diff --git a/programming_examples/ml/bottleneck/Makefile b/programming_examples/ml/bottleneck/Makefile index 0133e02c7a..5c696af170 100755 --- a/programming_examples/ml/bottleneck/Makefile +++ b/programming_examples/ml/bottleneck/Makefile @@ -5,31 +5,36 @@ # # Copyright (C) 2024, Advanced Micro Devices, Inc. -include ../../makefile-common +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +include ${srcdir}/../../makefile-common mlirFileName = aie +VPATH := ${srcdir}/../../../aie_kernels/aie2 + all: build/conv2dk1.o build/conv2dk3.o build/conv2dk1_skip.o build/final.xclbin -build/${mlirFileName}.mlir: aie2.py +build/${mlirFileName}.mlir: ${srcdir}/aie2.py mkdir -p ${@D} python3 $< > $@ insts.txt: build/${mlirFileName}.mlir aiecc.py -v --aie-only-generate-npu --npu-insts-name=$@ $< -build/conv2dk1.o: ../../../aie_kernels/aie2/conv2dk1.cc +build/conv2dk1.o: conv2dk1.cc xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@ -build/conv2dk3.o: ../../../aie_kernels/aie2/conv2dk3.cc +build/conv2dk3.o: conv2dk3.cc xchesscc -d ${CHESSCC2_FLAGS} -DUINT8_ACT -c $< -o $@ -build/conv2dk1_skip.o: ../../../aie_kernels/aie2/conv2dk1_skip.cc +build/conv2dk1_skip.o: conv2dk1_skip.cc xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@ -build/final.xclbin: build/${mlirFileName}.mlir +build/final.xclbin: build/${mlirFileName}.mlir build/conv2dk1.o build/conv2dk3.o build/conv2dk1_skip.o cd build && aiecc.py -v --aie-generate-cdo --aie-generate-npu --no-compile-host \ - --xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%) + --basic-alloc-scheme \ + --xclbin-name=${@F} --npu-insts-name=insts.txt ${ $@ - -insts.txt: build/${mlirFileName}.mlir - aiecc.py -v --aie-only-generate-npu --npu-insts-name=$@ $< - -build/conv2dk1_i8.o: conv2dk1_i8.cc - xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@ - -build/final.xclbin: build/${mlirFileName}.mlir - cd build && aiecc.py -v --aie-generate-cdo --aie-generate-npu --no-compile-host \ +build/final.xclbin: build/${mlirFileName}.mlir build/conv2dk1_i8.o + mkdir -p ${@D} + cd ${@D} && aiecc.py -v --aie-generate-cdo --aie-generate-npu --no-compile-host \ --xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%) +run_py: build/final.xclbin build/insts.txt + ${powershell} python3 ${srcdir}/test.py -x build/final.xclbin -i build/insts.txt -k MLIR_AIE + clean: rm -rf build *.elf* *.lst *.bif ${mlirFileName}.mlir.prj log* *.xclbin sim \ chess* *.o insts.txt \ - *.log aie_partition.json *.bin BOOT.BIN _x test.exe -run_py: - ${powershell} python3 test.py -x build/final.xclbin -i build/insts.txt -k MLIR_AIE \ No newline at end of file + *.log aie_partition.json *.bin BOOT.BIN _x test.exe \ No newline at end of file diff --git a/programming_examples/ml/conv2d/aie2.py b/programming_examples/ml/conv2d/aie2.py index 82584170cf..a4c8b5a1c9 100644 --- a/programming_examples/ml/conv2d/aie2.py +++ b/programming_examples/ml/conv2d/aie2.py @@ -34,10 +34,6 @@ bufOut = actOut * 2 # double buffer actOutInt32s = actOut // 4 -enableTrace = False -trace_size = 16384 -traceSizeInInt32s = trace_size // 4 - def conv2dk1(): with mlir_mod_ctx() as ctx: @@ -83,9 +79,6 @@ def device_body(): ComputeTile2 = tile(0, 2) compute_tile2_col, compute_tile2_row = 0, 2 - if enableTrace: - flow(ComputeTile2, WireBundle.Trace, 0, ShimTile, WireBundle.DMA, 1) - # AIE-array data movement with object fifos # Input of_inOF_act_L3L2 = object_fifo( @@ -155,85 +148,6 @@ def core_body(): @FuncOp.from_py_func(tensor_ty, memRef_wts_ty, tensor_ty) def sequence(I, W, O): - if enableTrace: - # 0x340D0: Trace Control 0 - # 0xAABB---C - # AA <- Event to stop trace capture - # BB <- Event to start trace capture - # C <- Trace mode, 00=event=time, 01=event-PC, 10=execution - # Configure so that "Event 1" (always true) causes tracing to start - npu_write32( - column=compute_tile2_col, - row=compute_tile2_row, - address=0x340D0, - value=0x00010000, - ) - # 0x340D4: Trace Control 1 - npu_write32( - column=compute_tile2_col, - row=compute_tile2_row, - address=0x340D4, - value=0x00000000, - ) - # 0x340E0: Trace Event Group 1 (Which events to trace) - # 0xAABBCCDD AA, BB, CC, DD <- four event slots - npu_write32( - column=compute_tile2_col, - row=compute_tile2_row, - address=0x340E0, - value=0x4B222125, - ) - # 0x340E4: Trace Event Group 2 (Which events to trace) - # 0xAABBCCDD AA, BB, CC, DD <- four event slots - npu_write32( - column=compute_tile2_col, - row=compute_tile2_row, - address=0x340E4, - value=0x2D2C1A4F, - ) - - npu_write32( - column=compute_tile2_col, - row=compute_tile2_row, - address=0x3FF00, - value=0x00000121, - ) - - # Configure a buffer descriptor to write tracing information that has been routed into this shim tile - # out to host DDR memory - trace_bd_id = 13 # use BD 13 for writing trace output from compute tile to DDR host memory - output_size = bufOut - npu_writebd_shimtile( - bd_id=trace_bd_id, - buffer_length=trace_size, - buffer_offset=output_size, - enable_packet=0, - out_of_order_id=0, - packet_id=0, - packet_type=0, - column=0, - column_num=1, - d0_size=0, - d0_stride=0, - d1_size=0, - d1_stride=0, - d2_stride=0, - ddr_id=2, - iteration_current=0, - iteration_size=0, - iteration_stride=0, - lock_acq_enable=0, - lock_acq_id=0, - lock_acq_val=0, - lock_rel_id=0, - lock_rel_val=0, - next_bd=0, - use_next_bd=0, - valid_bd=1, - ) - # Set start BD to our shim bd_Id (3) - npu_write32(column=0, row=0, address=0x1D20C, value=trace_bd_id) - NpuWriteRTPOp("rtp2", col=0, row=2, index=0, value=10) npu_dma_memcpy_nd( diff --git a/programming_examples/ml/conv2d/run.lit b/programming_examples/ml/conv2d/run.lit deleted file mode 100644 index 59c3c8b031..0000000000 --- a/programming_examples/ml/conv2d/run.lit +++ /dev/null @@ -1,10 +0,0 @@ -// (c) Copyright 2024 Advanced Micro Devices, Inc. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// REQUIRES: ryzen_ai, chess, torch -// -// RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -DINT8_ACT -c %S/../../../aie_kernels/aie2/conv2dk1_i8.cc -o conv2dk1_i8.o -// RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir -// RUN: %run_on_npu %python %S/test.py -x aie.xclbin -i insts.txt -k MLIR_AIE | FileCheck %s -// CHECK: PASS! \ No newline at end of file diff --git a/programming_examples/ml/conv2d/run_makefile.lit b/programming_examples/ml/conv2d/run_makefile.lit new file mode 100644 index 0000000000..6097345491 --- /dev/null +++ b/programming_examples/ml/conv2d/run_makefile.lit @@ -0,0 +1,9 @@ +// (c) Copyright 2024 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: ryzen_ai, chess, torch +// +// RUN: make -f %S/Makefile clean +// RUN: make -f %S/Makefile +// RUN: %run_on_npu make -f %S/Makefile run_py | FileCheck %s +// CHECK: PASS! diff --git a/programming_examples/ml/conv2d/test.py b/programming_examples/ml/conv2d/test.py index 1a8d2e7712..6ad9a02e41 100644 --- a/programming_examples/ml/conv2d/test.py +++ b/programming_examples/ml/conv2d/test.py @@ -147,13 +147,17 @@ def forward(self, x): print("\nAvg NPU time: {}us.".format(int((npu_time_total / num_iter) / 1000))) - assert np.allclose( + if np.allclose( ofm_mem_fmt_out.detach().numpy(), golden_output.detach().numpy(), rtol=0, atol=2 * int8_scale, - ) - print("\nPASS!\n") + ): + print("\nPASS!\n") + exit(0) + else: + print("\nFailed.\n") + exit(-1) if __name__ == "__main__": diff --git a/programming_examples/ml/conv2d_fused_relu/Makefile b/programming_examples/ml/conv2d_fused_relu/Makefile index 22ebe63104..755e2d4f7f 100755 --- a/programming_examples/ml/conv2d_fused_relu/Makefile +++ b/programming_examples/ml/conv2d_fused_relu/Makefile @@ -5,15 +5,17 @@ # # Copyright (C) 2024, Advanced Micro Devices, Inc. -include ../../makefile-common +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +include ${srcdir}/../../makefile-common mlirFileName = aieWithTrace_1core -VPATH := ../../../aie_kernels/aie2 +VPATH := ${srcdir}/../../../aie_kernels/aie2 all: build/conv2dk1.o build/final.xclbin -build/${mlirFileName}.mlir: aie2.py +build/${mlirFileName}.mlir: ${srcdir}/aie2.py mkdir -p ${@D} python3 $< > $@ @@ -24,9 +26,9 @@ insts.txt: build/${mlirFileName}.mlir build/conv2dk1.o: conv2dk1.cc xchesscc -d ${CHESSCC2_FLAGS} -DINT8_ACT -c $< -o $@ -build/final.xclbin: build/${mlirFileName}.mlir +build/final.xclbin: build/${mlirFileName}.mlir build/conv2dk1.o cd build && aiecc.py -v --aie-generate-cdo --aie-generate-npu --no-compile-host \ - --xclbin-name=${@F} --npu-insts-name=insts.txt $(<:%=../%) + --xclbin-name=${@F} --npu-insts-name=insts.txt ${ $@ + cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} -build/aie_trace.mlir: aie2.py +build/aie.mlir: ${srcdir}/aie2.py mkdir -p ${@D} - python3 $< ${trace_size} > $@ - + python3 $< 0 > $@ -build/aie_trace.mlir: aie2.py +build/aie_trace.mlir: ${srcdir}/aie2.py mkdir -p ${@D} python3 $< ${trace_size} > $@ @@ -42,10 +39,10 @@ build/final_trace.xclbin: build/aie_trace.mlir build/add.o cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \ --xclbin-name=${@F} --npu-insts-name=insts.txt ${ $@ run: ${targetname}.exe build/final.xclbin build/insts.txt ${powershell} ./$< -x build/final.xclbin -i build/insts.txt -k MLIR_AIE diff --git a/programming_examples/ml/eltwise_add/run.lit b/programming_examples/ml/eltwise_add/run.lit deleted file mode 100644 index 863e0d23c4..0000000000 --- a/programming_examples/ml/eltwise_add/run.lit +++ /dev/null @@ -1,11 +0,0 @@ -// (c) Copyright 2023 Advanced Micro Devices, Inc. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// REQUIRES: ryzen_ai, chess -// -// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/aie2/add.cc -o add.o -// RUN: %python %S/aie2.py 4096 | aie-opt -cse -canonicalize -o ./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir -// RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s -// CHECK: PASS! diff --git a/programming_examples/ml/eltwise_add/run_makefile.lit b/programming_examples/ml/eltwise_add/run_makefile.lit new file mode 100644 index 0000000000..6875524001 --- /dev/null +++ b/programming_examples/ml/eltwise_add/run_makefile.lit @@ -0,0 +1,9 @@ +// (c) Copyright 2024 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: ryzen_ai, chess +// +// RUN: make -f %S/Makefile clean +// RUN: make -f %S/Makefile +// RUN: %run_on_npu make -f %S/Makefile run | FileCheck %s +// CHECK: PASS! diff --git a/programming_examples/ml/eltwise_mul/Makefile b/programming_examples/ml/eltwise_mul/Makefile index 9f24b09350..2dcd94923e 100644 --- a/programming_examples/ml/eltwise_mul/Makefile +++ b/programming_examples/ml/eltwise_mul/Makefile @@ -6,24 +6,26 @@ # ##===----------------------------------------------------------------------===## -include ../../makefile-common +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +include ${srcdir}/../../makefile-common all: build/final.xclbin targetname = myEltwiseMul trace_size = 8192 -VPATH := ../../../aie_kernels/aie2 +VPATH := ${srcdir}/../../../aie_kernels/aie2 build/%.o: %.cc mkdir -p ${@D} - cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $(<:%=../%) -o ${@F} + cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -c $< -o ${@F} -build/aie.mlir: aie2.py +build/aie.mlir: ${srcdir}/aie2.py mkdir -p ${@D} python3 $< > $@ -build/aie_trace.mlir: aie2.py +build/aie_trace.mlir: ${srcdir}/aie2.py mkdir -p ${@D} python3 $< ${trace_size} > $@ @@ -38,10 +40,10 @@ build/final_trace.xclbin: build/aie_trace.mlir build/mul.o --xclbin-name=${@F} --npu-insts-name=insts.txt ${ $@ -build/aie_trace.mlir: aie2.py +build/aie_trace.mlir: ${srcdir}/aie2.py mkdir -p ${@D} python3 $< ${trace_size} > $@ @@ -37,10 +39,10 @@ build/final_trace.xclbin: build/aie_trace.mlir build/relu.o cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-npu --no-compile-host \ --xclbin-name=${@F} --npu-insts-name=insts.txt ${ $@ -build/aie_trace.mlir: aie2.py +build/aie_trace.mlir: ${srcdir}/aie2.py mkdir -p ${@D} python3 $< ${trace_size} > $@ @@ -51,10 +53,10 @@ build/final_trace.xclbin: build/aie_trace.mlir build/kernels.a --aie-generate-npu --npu-insts-name=insts.txt $(<:%=../%) -${targetname}.exe: test.cpp +${targetname}.exe: ${srcdir}/test.cpp rm -rf _build mkdir -p _build - cd _build && ${powershell} cmake .. -DTARGET_NAME=${targetname} + cd _build && ${powershell} cmake ${srcdir} -DTARGET_NAME=${targetname} cd _build && ${powershell} cmake --build . --config Release ifeq "${powershell}" "powershell.exe" cp _build/${targetname}.exe $@ diff --git a/programming_examples/ml/softmax/run.lit b/programming_examples/ml/softmax/run.lit deleted file mode 100644 index 42441e898a..0000000000 --- a/programming_examples/ml/softmax/run.lit +++ /dev/null @@ -1,15 +0,0 @@ -// (c) Copyright 2024 Advanced Micro Devices, Inc. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// REQUIRES: ryzen_ai, chess -// -// RUN: aie-opt %S/bf16_softmax.mlir --affine-super-vectorize="virtual-vector-size=16 test-fastest-varying=0 vectorize-reductions=true" --convert-vector-to-aievec="aie-target=aieml" -lower-affine | aie-translate -aieml=true --aievec-to-cpp -o dut.cc -// RUN: xchesscc_wrapper aie2 -I %aietools/include -I%S/../../../aie_runtime_lib/AIE2 -c dut.cc -o dut.o -// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_runtime_lib/AIE2/lut_based_ops.cpp -o lut_based_ops.o -// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/softmax.cc -o softmax.o -// RUN: ar rvs kernels.a dut.o lut_based_ops.o softmax.o -// RUN: %python %S/aie2.py | aie-opt -cse -canonicalize -o ./aie.mlir -// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.txt ./aie.mlir -// RUN: g++-13 %S/test.cpp -o test.exe -std=c++23 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem -// RUN: %run_on_npu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s -// CHECK: PASS! diff --git a/programming_examples/ml/softmax/run_makefile.lit b/programming_examples/ml/softmax/run_makefile.lit new file mode 100644 index 0000000000..6875524001 --- /dev/null +++ b/programming_examples/ml/softmax/run_makefile.lit @@ -0,0 +1,9 @@ +// (c) Copyright 2024 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: ryzen_ai, chess +// +// RUN: make -f %S/Makefile clean +// RUN: make -f %S/Makefile +// RUN: %run_on_npu make -f %S/Makefile run | FileCheck %s +// CHECK: PASS!