From 9716dc78b8a3fe04a018b2739b261732f2e27264 Mon Sep 17 00:00:00 2001 From: James Lin Date: Mon, 10 Jun 2024 18:44:17 -0500 Subject: [PATCH] [aievec] add aievec.min/max e2e to-llvm tests (#1528) --- lib/Conversion/AIEVecToLLVM/AIEVecToLLVM.cpp | 10 ++++--- test/Conversion/AIEVecToLLVM/test-max.mlir | 6 ++-- test/Conversion/AIEVecToLLVM/test-min.mlir | 6 ++-- .../i16xi16_max_elem-llvm.mlir | 28 +++++++++++++++++++ .../i16xi16_max_elem/i16xi16_max_elem.mlir | 10 +++++-- .../i16xi16_max_elem/testbench.cc | 16 +++++++++++ .../i16xi16_min_elem-llvm.mlir | 28 +++++++++++++++++++ .../i16xi16_min_elem/i16xi16_min_elem.mlir | 10 +++++-- .../i16xi16_min_elem/testbench.cc | 14 ++++++++++ .../i32xi32_max_elem-llvm.mlir | 28 +++++++++++++++++++ .../i32xi32_max_elem/i32xi32_max_elem.mlir | 10 +++++-- .../i32xi32_max_elem/testbench.cc | 16 +++++++++++ .../i32xi32_min_elem-llvm.mlir | 28 +++++++++++++++++++ .../i32xi32_min_elem/i32xi32_min_elem.mlir | 10 +++++-- .../i32xi32_min_elem/testbench.cc | 16 +++++++++++ .../i8xi8_max_elem/i8xi8_max_elem-llvm.mlir | 28 +++++++++++++++++++ .../i8xi8_max_elem/i8xi8_max_elem.mlir | 23 ++++++++------- .../aievec_tests/i8xi8_max_elem/testbench.cc | 16 +++++++++++ .../i8xi8_min_elem/i8xi8_min_elem-llvm.mlir | 28 +++++++++++++++++++ .../i8xi8_min_elem/i8xi8_min_elem.mlir | 23 ++++++++------- .../aievec_tests/i8xi8_min_elem/testbench.cc | 16 +++++++++++ 21 files changed, 328 insertions(+), 42 deletions(-) create mode 100644 test/unit_tests/aievec_tests/i16xi16_max_elem/i16xi16_max_elem-llvm.mlir create mode 100644 test/unit_tests/aievec_tests/i16xi16_min_elem/i16xi16_min_elem-llvm.mlir create mode 100644 test/unit_tests/aievec_tests/i32xi32_max_elem/i32xi32_max_elem-llvm.mlir create mode 100644 test/unit_tests/aievec_tests/i32xi32_min_elem/i32xi32_min_elem-llvm.mlir create mode 100644 test/unit_tests/aievec_tests/i8xi8_max_elem/i8xi8_max_elem-llvm.mlir create mode 100644 test/unit_tests/aievec_tests/i8xi8_min_elem/i8xi8_min_elem-llvm.mlir diff --git a/lib/Conversion/AIEVecToLLVM/AIEVecToLLVM.cpp b/lib/Conversion/AIEVecToLLVM/AIEVecToLLVM.cpp index 059c2101a4..6a0cb062da 100644 --- a/lib/Conversion/AIEVecToLLVM/AIEVecToLLVM.cpp +++ b/lib/Conversion/AIEVecToLLVM/AIEVecToLLVM.cpp @@ -1584,9 +1584,10 @@ class MaxOpConversion : public mlir::ConvertOpToLLVMPattern { // create xllvm intrinsic Value maxOp = nullptr; if (llvm::isa(resultScaTy)) { - // create constant for cmp + // create constant for third operand `cmp` + // Note: `cmp` is implicitly treated as `sign` to the vmax intrinsic auto cmpCst = rewriter.create( - loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(0)); + loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(1)); SmallVector operands{adaptor.getLhs(), adaptor.getRhs(), cmpCst}; if (resultBitWidth == 8) { maxOp = rewriter.create( @@ -1681,9 +1682,10 @@ class MinOpConversion : public mlir::ConvertOpToLLVMPattern { // create xllvm intrinsic Value minOp = nullptr; if (llvm::isa(resultScaTy)) { - // create constant for cmp + // create constant for third operand `cmp` + // Note: `cmp` is implicitly treated as `sign` to the vmin intrinsic auto cmpCst = rewriter.create( - loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(0)); + loc, rewriter.getI32Type(), rewriter.getI32IntegerAttr(1)); SmallVector operands{adaptor.getLhs(), adaptor.getRhs(), cmpCst}; if (resultBitWidth == 8) { minOp = rewriter.create( diff --git a/test/Conversion/AIEVecToLLVM/test-max.mlir b/test/Conversion/AIEVecToLLVM/test-max.mlir index 59d4f49bcd..033ec75e71 100644 --- a/test/Conversion/AIEVecToLLVM/test-max.mlir +++ b/test/Conversion/AIEVecToLLVM/test-max.mlir @@ -7,7 +7,7 @@ func.func @i8_max(%arg0 : vector<64xi8>) -> vector<64xi8> { // CHECK-LABEL: @i8_max // CHECK-SAME: %[[ARG0:.*]]: vector<64xi8> -// CHECK: %[[CST:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK-NEXT: %[[VMAX:.*]] = "xllvm.intr.aie2.vmax.lt8"( // CHECK-SAME: %[[ARG0]], %[[ARG0]], %[[CST]]) : // CHECK-SAME: (vector<64xi8>, vector<64xi8>, i32) -> !llvm.struct<(vector<64xi8>, vector<2xi32>)> @@ -23,7 +23,7 @@ func.func @i16_max(%arg0 : vector<32xi16>) -> vector<32xi16> { // CHECK-LABEL: @i16_max // CHECK-SAME: %[[ARG0:.*]]: vector<32xi16> -// CHECK: %[[CST:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK-NEXT: %[[VMAX:.*]] = "xllvm.intr.aie2.vmax.lt16"( // CHECK-SAME: %[[ARG0]], %[[ARG0]], %[[CST]]) : // CHECK-SAME: (vector<32xi16>, vector<32xi16>, i32) -> !llvm.struct<(vector<32xi16>, i32)> @@ -39,7 +39,7 @@ func.func @i32_max(%arg0 : vector<16xi32>) -> vector<16xi32> { // CHECK-LABEL: @i32_max // CHECK-SAME: %[[ARG0:.*]]: vector<16xi32> -// CHECK: %[[CST:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK-NEXT: %[[VMAX:.*]] = "xllvm.intr.aie2.vmax.lt32"( // CHECK-SAME: %[[ARG0]], %[[ARG0]], %[[CST]]) : // CHECK-SAME: (vector<16xi32>, vector<16xi32>, i32) -> !llvm.struct<(vector<16xi32>, i32)> diff --git a/test/Conversion/AIEVecToLLVM/test-min.mlir b/test/Conversion/AIEVecToLLVM/test-min.mlir index 4930d639c9..595d759438 100644 --- a/test/Conversion/AIEVecToLLVM/test-min.mlir +++ b/test/Conversion/AIEVecToLLVM/test-min.mlir @@ -7,7 +7,7 @@ func.func @i8_min(%arg0 : vector<64xi8>) -> vector<64xi8> { // CHECK-LABEL: @i8_min // CHECK-SAME: %[[ARG0:.*]]: vector<64xi8> -// CHECK: %[[CST:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK-NEXT: %[[VMIN:.*]] = "xllvm.intr.aie2.vmin.ge8"( // CHECK-SAME: %[[ARG0]], %[[ARG0]], %[[CST]]) : // CHECK-SAME: (vector<64xi8>, vector<64xi8>, i32) -> !llvm.struct<(vector<64xi8>, vector<2xi32>)> @@ -23,7 +23,7 @@ func.func @i16_min(%arg0 : vector<32xi16>) -> vector<32xi16> { // CHECK-LABEL: @i16_min // CHECK-SAME: %[[ARG0:.*]]: vector<32xi16> -// CHECK: %[[CST:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK-NEXT: %[[VMIN:.*]] = "xllvm.intr.aie2.vmin.ge16"( // CHECK-SAME: %[[ARG0]], %[[ARG0]], %[[CST]]) : // CHECK-SAME: (vector<32xi16>, vector<32xi16>, i32) -> !llvm.struct<(vector<32xi16>, i32)> @@ -39,7 +39,7 @@ func.func @i32_min(%arg0 : vector<16xi32>) -> vector<16xi32> { // CHECK-LABEL: @i32_min // CHECK-SAME: %[[ARG0:.*]]: vector<16xi32> -// CHECK: %[[CST:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[CST:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK-NEXT: %[[VMIN:.*]] = "xllvm.intr.aie2.vmin.ge32"( // CHECK-SAME: %[[ARG0]], %[[ARG0]], %[[CST]]) : // CHECK-SAME: (vector<16xi32>, vector<16xi32>, i32) -> !llvm.struct<(vector<16xi32>, i32)> diff --git a/test/unit_tests/aievec_tests/i16xi16_max_elem/i16xi16_max_elem-llvm.mlir b/test/unit_tests/aievec_tests/i16xi16_max_elem/i16xi16_max_elem-llvm.mlir new file mode 100644 index 0000000000..c1d3d88390 --- /dev/null +++ b/test/unit_tests/aievec_tests/i16xi16_max_elem/i16xi16_max_elem-llvm.mlir @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2024, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// REQUIRES: peano +// RUN: mkdir -p %t/data; cd %t +// RUN: aie-opt %s -affine-super-vectorize="virtual-vector-size=32" %vector-to-llvmir% -o llvmir.mlir +// RUN: aie-translate llvmir.mlir %llvmir-to-ll% -o dut.ll +// RUN: %PEANO_INSTALL_DIR/bin/clang %clang_aie2_args -c dut.ll -o dut.o +// RUN: xchesscc_wrapper %xchesscc_aie2_args -DTO_LLVM +w work +o work -I%S -I. %S/testbench.cc dut.o +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: memref<1024xi16>, %arg1: memref<1024xi16>, %arg2: memref<1024xi16>) { + memref.assume_alignment %arg0, 32 : memref<1024xi16> + memref.assume_alignment %arg1, 32 : memref<1024xi16> + memref.assume_alignment %arg2, 32 : memref<1024xi16> + affine.for %arg3 = 0 to 1024 { + %0 = affine.load %arg0[%arg3] : memref<1024xi16> + %1 = affine.load %arg1[%arg3] : memref<1024xi16> + %2 = arith.maxsi %0, %1 : i16 + affine.store %2, %arg2[%arg3] : memref<1024xi16> + } + return + } +} diff --git a/test/unit_tests/aievec_tests/i16xi16_max_elem/i16xi16_max_elem.mlir b/test/unit_tests/aievec_tests/i16xi16_max_elem/i16xi16_max_elem.mlir index 1ae4491387..339ab0b0cc 100644 --- a/test/unit_tests/aievec_tests/i16xi16_max_elem/i16xi16_max_elem.mlir +++ b/test/unit_tests/aievec_tests/i16xi16_max_elem/i16xi16_max_elem.mlir @@ -1,7 +1,11 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// RUN: mkdir -p %t/data; cd %t // RUN: aie-opt %s -affine-super-vectorize="virtual-vector-size=32" --convert-vector-to-aievec="aie-target=aieml" -lower-affine | aie-translate -aieml=true --aievec-to-cpp -o dut.cc -// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. -c dut.cc -o dut.o -// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc work/dut.o -// RUN: mkdir -p data +// RUN: xchesscc_wrapper %xchesscc_aie2_args +w work +o work -I%S -I. -c dut.cc -o dut.o +// RUN: xchesscc_wrapper %xchesscc_aie2_args -DTO_CPP +w work +o work -I%S -I. %S/testbench.cc work/dut.o // RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout // RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s // CHECK: TEST PASSED diff --git a/test/unit_tests/aievec_tests/i16xi16_max_elem/testbench.cc b/test/unit_tests/aievec_tests/i16xi16_max_elem/testbench.cc index f8fe0a969f..60a6264401 100644 --- a/test/unit_tests/aievec_tests/i16xi16_max_elem/testbench.cc +++ b/test/unit_tests/aievec_tests/i16xi16_max_elem/testbench.cc @@ -4,7 +4,19 @@ #include #include #include + +#ifdef TO_CPP void dut(int16_t *restrict in0, int16_t *restrict in1, int16_t *restrict out0); +#elif TO_LLVM +extern "C" { +void dut(int16_t *in0_allocated, int16_t *in0_aligned, int64_t in0_offset, + int64_t in0_sizes_0, int64_t in0_strides_0, int16_t *in1_allocated, + int16_t *in1_aligned, int64_t in1_offset, int64_t in1_sizes_0, + int64_t in1_strides_0, int16_t *out0_allocated, int16_t *out0_aligned, + int64_t out0_offset, int64_t out0_sizes_0, int64_t out0_strides_0); +} +#endif + void dut_ref(int16_t *in0, int16_t *in1, int16_t *out0); alignas(32) int16_t g_in0[IN0_SIZE]; @@ -26,7 +38,11 @@ int main(int argc, char *argv[]) { chess_memory_fence(); auto cyclesBegin = chess_cycle_count(); +#ifdef TO_CPP dut(g_in0, g_in1, g_out0); +#elif TO_LLVM + dut(g_in0, g_in0, 0, 0, 0, g_in1, g_in1, 0, 0, 0, g_out0, g_out0, 0, 0, 0); +#endif auto cyclesEnd = chess_cycle_count(); chess_memory_fence(); diff --git a/test/unit_tests/aievec_tests/i16xi16_min_elem/i16xi16_min_elem-llvm.mlir b/test/unit_tests/aievec_tests/i16xi16_min_elem/i16xi16_min_elem-llvm.mlir new file mode 100644 index 0000000000..dfdf3d3db0 --- /dev/null +++ b/test/unit_tests/aievec_tests/i16xi16_min_elem/i16xi16_min_elem-llvm.mlir @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2024, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// REQUIRES: peano +// RUN: mkdir -p %t/data; cd %t +// RUN: aie-opt %s -affine-super-vectorize="virtual-vector-size=32" %vector-to-llvmir% -o llvmir.mlir +// RUN: aie-translate llvmir.mlir %llvmir-to-ll% -o dut.ll +// RUN: %PEANO_INSTALL_DIR/bin/clang %clang_aie2_args -c dut.ll -o dut.o +// RUN: xchesscc_wrapper %xchesscc_aie2_args -DTO_LLVM +w work +o work -I%S -I. %S/testbench.cc dut.o +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: memref<1024xi16>, %arg1: memref<1024xi16>, %arg2: memref<1024xi16>) { + memref.assume_alignment %arg0, 32 : memref<1024xi16> + memref.assume_alignment %arg1, 32 : memref<1024xi16> + memref.assume_alignment %arg2, 32 : memref<1024xi16> + affine.for %arg3 = 0 to 1024 { + %0 = affine.load %arg0[%arg3] : memref<1024xi16> + %1 = affine.load %arg1[%arg3] : memref<1024xi16> + %2 = arith.minsi %0, %1 : i16 + affine.store %2, %arg2[%arg3] : memref<1024xi16> + } + return + } +} diff --git a/test/unit_tests/aievec_tests/i16xi16_min_elem/i16xi16_min_elem.mlir b/test/unit_tests/aievec_tests/i16xi16_min_elem/i16xi16_min_elem.mlir index c73cd2137d..377a4b42b5 100644 --- a/test/unit_tests/aievec_tests/i16xi16_min_elem/i16xi16_min_elem.mlir +++ b/test/unit_tests/aievec_tests/i16xi16_min_elem/i16xi16_min_elem.mlir @@ -1,7 +1,11 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// RUN: mkdir -p %t/data; cd %t // RUN: aie-opt %s -affine-super-vectorize="virtual-vector-size=32" --convert-vector-to-aievec="aie-target=aieml" -lower-affine | aie-translate -aieml=true --aievec-to-cpp -o dut.cc -// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. -c dut.cc -o dut.o -// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc work/dut.o -// RUN: mkdir -p data +// RUN: xchesscc_wrapper %xchesscc_aie2_args +w work +o work -I%S -I. -c dut.cc -o dut.o +// RUN: xchesscc_wrapper %xchesscc_aie2_args -DTO_CPP +w work +o work -I%S -I. %S/testbench.cc work/dut.o // RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout // RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s // CHECK: TEST PASSED diff --git a/test/unit_tests/aievec_tests/i16xi16_min_elem/testbench.cc b/test/unit_tests/aievec_tests/i16xi16_min_elem/testbench.cc index 9d7a4bd39d..35240fbda2 100644 --- a/test/unit_tests/aievec_tests/i16xi16_min_elem/testbench.cc +++ b/test/unit_tests/aievec_tests/i16xi16_min_elem/testbench.cc @@ -4,7 +4,17 @@ #include #include #include +#ifdef TO_CPP void dut(int16_t *restrict in0, int16_t *restrict in1, int16_t *restrict out0); +#elif TO_LLVM +extern "C" { +void dut(int16_t *in0_allocated, int16_t *in0_aligned, int64_t in0_offset, + int64_t in0_sizes_0, int64_t in0_strides_0, int16_t *in1_allocated, + int16_t *in1_aligned, int64_t in1_offset, int64_t in1_sizes_0, + int64_t in1_strides_0, int16_t *out0_allocated, int16_t *out0_aligned, + int64_t out0_offset, int64_t out0_sizes_0, int64_t out0_strides_0); +} +#endif void dut_ref(int16_t *in0, int16_t *in1, int16_t *out0); alignas(32) int16_t g_in0[IN0_SIZE]; @@ -26,7 +36,11 @@ int main(int argc, char *argv[]) { chess_memory_fence(); auto cyclesBegin = chess_cycle_count(); +#ifdef TO_CPP dut(g_in0, g_in1, g_out0); +#elif TO_LLVM + dut(g_in0, g_in0, 0, 0, 0, g_in1, g_in1, 0, 0, 0, g_out0, g_out0, 0, 0, 0); +#endif auto cyclesEnd = chess_cycle_count(); chess_memory_fence(); diff --git a/test/unit_tests/aievec_tests/i32xi32_max_elem/i32xi32_max_elem-llvm.mlir b/test/unit_tests/aievec_tests/i32xi32_max_elem/i32xi32_max_elem-llvm.mlir new file mode 100644 index 0000000000..0e3c3590b3 --- /dev/null +++ b/test/unit_tests/aievec_tests/i32xi32_max_elem/i32xi32_max_elem-llvm.mlir @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2024, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// REQUIRES: peano +// RUN: mkdir -p %t/data; cd %t +// RUN: aie-opt %s -affine-super-vectorize="virtual-vector-size=16" %vector-to-llvmir% -o llvmir.mlir +// RUN: aie-translate llvmir.mlir %llvmir-to-ll% -o dut.ll +// RUN: %PEANO_INSTALL_DIR/bin/clang %clang_aie2_args -c dut.ll -o dut.o +// RUN: xchesscc_wrapper %xchesscc_aie2_args -DTO_LLVM +w work +o work -I%S -I. %S/testbench.cc dut.o +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: memref<1024xi32>, %arg1: memref<1024xi32>, %arg2: memref<1024xi32>) { + memref.assume_alignment %arg0, 32 : memref<1024xi32> + memref.assume_alignment %arg1, 32 : memref<1024xi32> + memref.assume_alignment %arg2, 32 : memref<1024xi32> + affine.for %arg3 = 0 to 1024 { + %0 = affine.load %arg0[%arg3] : memref<1024xi32> + %1 = affine.load %arg1[%arg3] : memref<1024xi32> + %2 = arith.maxsi %0, %1 : i32 + affine.store %2, %arg2[%arg3] : memref<1024xi32> + } + return + } +} diff --git a/test/unit_tests/aievec_tests/i32xi32_max_elem/i32xi32_max_elem.mlir b/test/unit_tests/aievec_tests/i32xi32_max_elem/i32xi32_max_elem.mlir index dc5be1d5f3..7ffd6698d0 100644 --- a/test/unit_tests/aievec_tests/i32xi32_max_elem/i32xi32_max_elem.mlir +++ b/test/unit_tests/aievec_tests/i32xi32_max_elem/i32xi32_max_elem.mlir @@ -1,7 +1,11 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// RUN: mkdir -p %t/data; cd %t // RUN: aie-opt %s -affine-super-vectorize="virtual-vector-size=16" --convert-vector-to-aievec="aie-target=aieml" -lower-affine | aie-translate -aieml=true --aievec-to-cpp -o dut.cc -// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. -c dut.cc -o dut.o -// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc work/dut.o -// RUN: mkdir -p data +// RUN: xchesscc_wrapper %xchesscc_aie2_args +w work +o work -I%S -I. -c dut.cc -o dut.o +// RUN: xchesscc_wrapper %xchesscc_aie2_args -DTO_CPP +w work +o work -I%S -I. %S/testbench.cc work/dut.o // RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout // RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s // CHECK: TEST PASSED diff --git a/test/unit_tests/aievec_tests/i32xi32_max_elem/testbench.cc b/test/unit_tests/aievec_tests/i32xi32_max_elem/testbench.cc index 5c0c91ff42..036f7ecb85 100644 --- a/test/unit_tests/aievec_tests/i32xi32_max_elem/testbench.cc +++ b/test/unit_tests/aievec_tests/i32xi32_max_elem/testbench.cc @@ -4,7 +4,19 @@ #include #include #include + +#ifdef TO_CPP void dut(int32_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0); +#elif TO_LLVM +extern "C" { +void dut(int32_t *in0_allocated, int32_t *in0_aligned, int64_t in0_offset, + int64_t in0_sizes_0, int64_t in0_strides_0, int32_t *in1_allocated, + int32_t *in1_aligned, int64_t in1_offset, int64_t in1_sizes_0, + int64_t in1_strides_0, int32_t *out0_allocated, int32_t *out0_aligned, + int64_t out0_offset, int64_t out0_sizes_0, int64_t out0_strides_0); +} +#endif + void dut_ref(int32_t *in0, int32_t *in1, int32_t *out0); alignas(32) int32_t g_in0[IN0_SIZE]; @@ -26,7 +38,11 @@ int main(int argc, char *argv[]) { chess_memory_fence(); auto cyclesBegin = chess_cycle_count(); +#ifdef TO_CPP dut(g_in0, g_in1, g_out0); +#elif TO_LLVM + dut(g_in0, g_in0, 0, 0, 0, g_in1, g_in1, 0, 0, 0, g_out0, g_out0, 0, 0, 0); +#endif auto cyclesEnd = chess_cycle_count(); chess_memory_fence(); diff --git a/test/unit_tests/aievec_tests/i32xi32_min_elem/i32xi32_min_elem-llvm.mlir b/test/unit_tests/aievec_tests/i32xi32_min_elem/i32xi32_min_elem-llvm.mlir new file mode 100644 index 0000000000..96c20735d6 --- /dev/null +++ b/test/unit_tests/aievec_tests/i32xi32_min_elem/i32xi32_min_elem-llvm.mlir @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2024, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// REQUIRES: peano +// RUN: mkdir -p %t/data; cd %t +// RUN: aie-opt %s -affine-super-vectorize="virtual-vector-size=16" %vector-to-llvmir% -o llvmir.mlir +// RUN: aie-translate llvmir.mlir %llvmir-to-ll% -o dut.ll +// RUN: %PEANO_INSTALL_DIR/bin/clang %clang_aie2_args -c dut.ll -o dut.o +// RUN: xchesscc_wrapper %xchesscc_aie2_args -DTO_LLVM +w work +o work -I%S -I. %S/testbench.cc dut.o +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: memref<1024xi32>, %arg1: memref<1024xi32>, %arg2: memref<1024xi32>) { + memref.assume_alignment %arg0, 32 : memref<1024xi32> + memref.assume_alignment %arg1, 32 : memref<1024xi32> + memref.assume_alignment %arg2, 32 : memref<1024xi32> + affine.for %arg3 = 0 to 1024 { + %0 = affine.load %arg0[%arg3] : memref<1024xi32> + %1 = affine.load %arg1[%arg3] : memref<1024xi32> + %2 = arith.minsi %0, %1 : i32 + affine.store %2, %arg2[%arg3] : memref<1024xi32> + } + return + } +} diff --git a/test/unit_tests/aievec_tests/i32xi32_min_elem/i32xi32_min_elem.mlir b/test/unit_tests/aievec_tests/i32xi32_min_elem/i32xi32_min_elem.mlir index bf2db4c50a..e0de66a437 100644 --- a/test/unit_tests/aievec_tests/i32xi32_min_elem/i32xi32_min_elem.mlir +++ b/test/unit_tests/aievec_tests/i32xi32_min_elem/i32xi32_min_elem.mlir @@ -1,7 +1,11 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// RUN: mkdir -p %t/data; cd %t // RUN: aie-opt %s -affine-super-vectorize="virtual-vector-size=16" --convert-vector-to-aievec="aie-target=aieml" -lower-affine | aie-translate -aieml=true --aievec-to-cpp -o dut.cc -// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. -c dut.cc -o dut.o -// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc work/dut.o -// RUN: mkdir -p data +// RUN: xchesscc_wrapper %xchesscc_aie2_args +w work +o work -I%S -I. -c dut.cc -o dut.o +// RUN: xchesscc_wrapper %xchesscc_aie2_args -DTO_CPP +w work +o work -I%S -I. %S/testbench.cc work/dut.o // RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout // RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s // CHECK: TEST PASSED diff --git a/test/unit_tests/aievec_tests/i32xi32_min_elem/testbench.cc b/test/unit_tests/aievec_tests/i32xi32_min_elem/testbench.cc index b4e019a3ce..f8ee9c2716 100644 --- a/test/unit_tests/aievec_tests/i32xi32_min_elem/testbench.cc +++ b/test/unit_tests/aievec_tests/i32xi32_min_elem/testbench.cc @@ -4,7 +4,19 @@ #include #include #include + +#ifdef TO_CPP void dut(int32_t *restrict in0, int32_t *restrict in1, int32_t *restrict out0); +#elif TO_LLVM +extern "C" { +void dut(int32_t *in0_allocated, int32_t *in0_aligned, int64_t in0_offset, + int64_t in0_sizes_0, int64_t in0_strides_0, int32_t *in1_allocated, + int32_t *in1_aligned, int64_t in1_offset, int64_t in1_sizes_0, + int64_t in1_strides_0, int32_t *out0_allocated, int32_t *out0_aligned, + int64_t out0_offset, int64_t out0_sizes_0, int64_t out0_strides_0); +} +#endif + void dut_ref(int32_t *in0, int32_t *in1, int32_t *out0); alignas(32) int32_t g_in0[IN0_SIZE]; @@ -26,7 +38,11 @@ int main(int argc, char *argv[]) { chess_memory_fence(); auto cyclesBegin = chess_cycle_count(); +#ifdef TO_CPP dut(g_in0, g_in1, g_out0); +#elif TO_LLVM + dut(g_in0, g_in0, 0, 0, 0, g_in1, g_in1, 0, 0, 0, g_out0, g_out0, 0, 0, 0); +#endif auto cyclesEnd = chess_cycle_count(); chess_memory_fence(); diff --git a/test/unit_tests/aievec_tests/i8xi8_max_elem/i8xi8_max_elem-llvm.mlir b/test/unit_tests/aievec_tests/i8xi8_max_elem/i8xi8_max_elem-llvm.mlir new file mode 100644 index 0000000000..9745852914 --- /dev/null +++ b/test/unit_tests/aievec_tests/i8xi8_max_elem/i8xi8_max_elem-llvm.mlir @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2024, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// REQUIRES: peano +// RUN: mkdir -p %t/data; cd %t +// RUN: aie-opt %s -affine-super-vectorize="virtual-vector-size=64" %vector-to-llvmir% -o llvmir.mlir +// RUN: aie-translate llvmir.mlir %llvmir-to-ll% -o dut.ll +// RUN: %PEANO_INSTALL_DIR/bin/clang %clang_aie2_args -c dut.ll -o dut.o +// RUN: xchesscc_wrapper %xchesscc_aie2_args -DTO_LLVM +w work +o work -I%S -I. %S/testbench.cc dut.o +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: memref<1024xi8>, %arg1: memref<1024xi8>, %arg2: memref<1024xi8>) { + memref.assume_alignment %arg0, 32 : memref<1024xi8> + memref.assume_alignment %arg1, 32 : memref<1024xi8> + memref.assume_alignment %arg2, 32 : memref<1024xi8> + affine.for %arg3 = 0 to 1024 { + %0 = affine.load %arg0[%arg3] : memref<1024xi8> + %1 = affine.load %arg1[%arg3] : memref<1024xi8> + %2 = arith.maxsi %0, %1 : i8 + affine.store %2, %arg2[%arg3] : memref<1024xi8> + } + return + } +} diff --git a/test/unit_tests/aievec_tests/i8xi8_max_elem/i8xi8_max_elem.mlir b/test/unit_tests/aievec_tests/i8xi8_max_elem/i8xi8_max_elem.mlir index 6e869eb9e4..b69055b52a 100644 --- a/test/unit_tests/aievec_tests/i8xi8_max_elem/i8xi8_max_elem.mlir +++ b/test/unit_tests/aievec_tests/i8xi8_max_elem/i8xi8_max_elem.mlir @@ -1,19 +1,22 @@ -// RUN: aie-opt %s --convert-vector-to-aievec="aie-target=aieml" -lower-affine | aie-translate -aieml=true --aievec-to-cpp -o dut.cc -// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. -c dut.cc -o dut.o -// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc work/dut.o -// RUN: mkdir -p data +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// RUN: mkdir -p %t/data; cd %t +// RUN: aie-opt %s -affine-super-vectorize="virtual-vector-size=64" --convert-vector-to-aievec="aie-target=aieml" -lower-affine | aie-translate -aieml=true --aievec-to-cpp -o dut.cc +// RUN: xchesscc_wrapper %xchesscc_aie2_args +w work +o work -I%S -I. -c dut.cc -o dut.o +// RUN: xchesscc_wrapper %xchesscc_aie2_args -DTO_CPP +w work +o work -I%S -I. %S/testbench.cc work/dut.o // RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout // RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s // CHECK: TEST PASSED module { func.func @dut(%arg0: memref<1024xi8>, %arg1: memref<1024xi8>, %arg2: memref<1024xi8>) { - %c0_i8 = arith.constant 0 : i8 - affine.for %arg3 = 0 to 1024 step 32 { - %0 = vector.transfer_read %arg0[%arg3], %c0_i8 : memref<1024xi8>, vector<64xi8> - %1 = vector.transfer_read %arg1[%arg3], %c0_i8 : memref<1024xi8>, vector<64xi8> - %2 = arith.maxsi %0, %1 : vector<64xi8> - vector.transfer_write %2, %arg2[%arg3] : vector<64xi8>, memref<1024xi8> + affine.for %arg3 = 0 to 1024 { + %0 = affine.load %arg0[%arg3] : memref<1024xi8> + %1 = affine.load %arg1[%arg3] : memref<1024xi8> + %2 = arith.maxsi %0, %1 : i8 + affine.store %2, %arg2[%arg3] : memref<1024xi8> } return } diff --git a/test/unit_tests/aievec_tests/i8xi8_max_elem/testbench.cc b/test/unit_tests/aievec_tests/i8xi8_max_elem/testbench.cc index 6e3f1ba3d9..f9fdb84a62 100644 --- a/test/unit_tests/aievec_tests/i8xi8_max_elem/testbench.cc +++ b/test/unit_tests/aievec_tests/i8xi8_max_elem/testbench.cc @@ -4,7 +4,19 @@ #include #include #include + +#ifdef TO_CPP void dut(int8_t *restrict in0, int8_t *restrict in1, int8_t *restrict out0); +#elif TO_LLVM +extern "C" { +void dut(int8_t *in0_allocated, int8_t *in0_aligned, int64_t in0_offset, + int64_t in0_sizes_0, int64_t in0_strides_0, int8_t *in1_allocated, + int8_t *in1_aligned, int64_t in1_offset, int64_t in1_sizes_0, + int64_t in1_strides_0, int8_t *out0_allocated, int8_t *out0_aligned, + int64_t out0_offset, int64_t out0_sizes_0, int64_t out0_strides_0); +} +#endif + void dut_ref(int8_t *in0, int8_t *in1, int8_t *out0); alignas(32) int8_t g_in0[IN0_SIZE]; @@ -26,7 +38,11 @@ int main(int argc, char *argv[]) { chess_memory_fence(); auto cyclesBegin = chess_cycle_count(); +#ifdef TO_CPP dut(g_in0, g_in1, g_out0); +#elif TO_LLVM + dut(g_in0, g_in0, 0, 0, 0, g_in1, g_in1, 0, 0, 0, g_out0, g_out0, 0, 0, 0); +#endif auto cyclesEnd = chess_cycle_count(); chess_memory_fence(); diff --git a/test/unit_tests/aievec_tests/i8xi8_min_elem/i8xi8_min_elem-llvm.mlir b/test/unit_tests/aievec_tests/i8xi8_min_elem/i8xi8_min_elem-llvm.mlir new file mode 100644 index 0000000000..0b02971e0d --- /dev/null +++ b/test/unit_tests/aievec_tests/i8xi8_min_elem/i8xi8_min_elem-llvm.mlir @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2024, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// REQUIRES: peano +// RUN: mkdir -p %t/data; cd %t +// RUN: aie-opt %s -affine-super-vectorize="virtual-vector-size=64" %vector-to-llvmir% -o llvmir.mlir +// RUN: aie-translate llvmir.mlir %llvmir-to-ll% -o dut.ll +// RUN: %PEANO_INSTALL_DIR/bin/clang %clang_aie2_args -c dut.ll -o dut.o +// RUN: xchesscc_wrapper %xchesscc_aie2_args -DTO_LLVM +w work +o work -I%S -I. %S/testbench.cc dut.o +// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout +// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s +// CHECK: TEST PASSED + +module { + func.func @dut(%arg0: memref<1024xi8>, %arg1: memref<1024xi8>, %arg2: memref<1024xi8>) { + memref.assume_alignment %arg0, 32 : memref<1024xi8> + memref.assume_alignment %arg1, 32 : memref<1024xi8> + memref.assume_alignment %arg2, 32 : memref<1024xi8> + affine.for %arg3 = 0 to 1024 { + %0 = affine.load %arg0[%arg3] : memref<1024xi8> + %1 = affine.load %arg1[%arg3] : memref<1024xi8> + %2 = arith.minsi %0, %1 : i8 + affine.store %2, %arg2[%arg3] : memref<1024xi8> + } + return + } +} diff --git a/test/unit_tests/aievec_tests/i8xi8_min_elem/i8xi8_min_elem.mlir b/test/unit_tests/aievec_tests/i8xi8_min_elem/i8xi8_min_elem.mlir index 0e8522b990..dcdf2cc288 100644 --- a/test/unit_tests/aievec_tests/i8xi8_min_elem/i8xi8_min_elem.mlir +++ b/test/unit_tests/aievec_tests/i8xi8_min_elem/i8xi8_min_elem.mlir @@ -1,19 +1,22 @@ -// RUN: aie-opt %s --convert-vector-to-aievec="aie-target=aieml" -lower-affine | aie-translate -aieml=true --aievec-to-cpp -o dut.cc -// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. -c dut.cc -o dut.o -// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc work/dut.o -// RUN: mkdir -p data +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// Copyright (C) 2023-2024, Advanced Micro Devices, Inc. + +// REQUIRES: valid_xchess_license +// RUN: mkdir -p %t/data; cd %t +// RUN: aie-opt %s -affine-super-vectorize="virtual-vector-size=64" --convert-vector-to-aievec="aie-target=aieml" -lower-affine | aie-translate -aieml=true --aievec-to-cpp -o dut.cc +// RUN: xchesscc_wrapper %xchesscc_aie2_args +w work +o work -I%S -I. -c dut.cc -o dut.o +// RUN: xchesscc_wrapper %xchesscc_aie2_args -DTO_CPP +w work +o work -I%S -I. %S/testbench.cc work/dut.o // RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout // RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s // CHECK: TEST PASSED module { func.func @dut(%arg0: memref<1024xi8>, %arg1: memref<1024xi8>, %arg2: memref<1024xi8>) { - %c0_i8 = arith.constant 0 : i8 - affine.for %arg3 = 0 to 1024 step 32 { - %0 = vector.transfer_read %arg0[%arg3], %c0_i8 : memref<1024xi8>, vector<64xi8> - %1 = vector.transfer_read %arg1[%arg3], %c0_i8 : memref<1024xi8>, vector<64xi8> - %2 = arith.minsi %0, %1 : vector<64xi8> - vector.transfer_write %2, %arg2[%arg3] : vector<64xi8>, memref<1024xi8> + affine.for %arg3 = 0 to 1024 { + %0 = affine.load %arg0[%arg3] : memref<1024xi8> + %1 = affine.load %arg1[%arg3] : memref<1024xi8> + %2 = arith.minsi %0, %1 : i8 + affine.store %2, %arg2[%arg3] : memref<1024xi8> } return } diff --git a/test/unit_tests/aievec_tests/i8xi8_min_elem/testbench.cc b/test/unit_tests/aievec_tests/i8xi8_min_elem/testbench.cc index b4b1102193..0aed91af2f 100644 --- a/test/unit_tests/aievec_tests/i8xi8_min_elem/testbench.cc +++ b/test/unit_tests/aievec_tests/i8xi8_min_elem/testbench.cc @@ -4,7 +4,19 @@ #include #include #include + +#ifdef TO_CPP void dut(int8_t *restrict in0, int8_t *restrict in1, int8_t *restrict out0); +#elif TO_LLVM +extern "C" { +void dut(int8_t *in0_allocated, int8_t *in0_aligned, int64_t in0_offset, + int64_t in0_sizes_0, int64_t in0_strides_0, int8_t *in1_allocated, + int8_t *in1_aligned, int64_t in1_offset, int64_t in1_sizes_0, + int64_t in1_strides_0, int8_t *out0_allocated, int8_t *out0_aligned, + int64_t out0_offset, int64_t out0_sizes_0, int64_t out0_strides_0); +} +#endif + void dut_ref(int8_t *in0, int8_t *in1, int8_t *out0); alignas(32) int8_t g_in0[IN0_SIZE]; @@ -26,7 +38,11 @@ int main(int argc, char *argv[]) { chess_memory_fence(); auto cyclesBegin = chess_cycle_count(); +#ifdef TO_CPP dut(g_in0, g_in1, g_out0); +#elif TO_LLVM + dut(g_in0, g_in0, 0, 0, 0, g_in1, g_in1, 0, 0, 0, g_out0, g_out0, 0, 0, 0); +#endif auto cyclesEnd = chess_cycle_count(); chess_memory_fence();