-
Notifications
You must be signed in to change notification settings - Fork 86
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix TOSA broadcast and mixed precision tests (#631)
Fix the following TOSA tests: - bf16xbf16_sub_elem_2d_broadcast_2d - i8xi16_sub_elem Add the following new TOSA tests: - i16xi16_sub_elem_2d_broadcast_scalar (pass) - i16xi16_sub_elem_2d_broadcast_1d_unit_dim (pass) - bf16xbf16_sub_elem_2d_broadcast_scalar (xfail)
- Loading branch information
1 parent
83da581
commit ceb4dfe
Showing
17 changed files
with
373 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 change: 0 additions & 1 deletion
1
...n/Dialect/TOSA/bf16xbf16_sub_elem_2d_broadcast_2d/bf16xbf16_sub_elem_2d_broadcast_2d.mlir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
29 changes: 29 additions & 0 deletions
29
test/Integration/Dialect/TOSA/bf16xbf16_sub_elem_2d_broadcast_2d/dut.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
// clang-format off | ||
void dut(bfloat16 * restrict v1, bfloat16 * restrict v2, bfloat16 * restrict v3) { | ||
bfloat16 * restrict v4 = v2; | ||
size_t v5 = 0; | ||
size_t v6 = 16; | ||
size_t v7 = 1; | ||
for (size_t v8 = v5; v8 < v6; v8 += v7) | ||
chess_prepare_for_pipelining | ||
chess_loop_range(16, 16) | ||
{ | ||
size_t v9 = 0; | ||
size_t v10 = 1024; | ||
size_t v11 = 16; | ||
for (size_t v12 = v9; v12 < v10; v12 += v11) | ||
chess_prepare_for_pipelining | ||
chess_loop_range(64, 64) | ||
{ | ||
v16bfloat16 v13 = *(v16bfloat16 *)(v1 + 1024*v8+v12); | ||
v16bfloat16 v14 = *(v16bfloat16 *)(v4 + v12); | ||
v16accfloat v15 = ups_to_v16accfloat(v13); | ||
v16accfloat v16 = ups_to_v16accfloat(v14); | ||
v16accfloat v17 = sub(v15, v16); | ||
v16bfloat16 v18 = to_v16bfloat16(v17); | ||
*(v16bfloat16 *)(v3 + 1024*v8+v12) = v18; | ||
} | ||
} | ||
return; | ||
} | ||
// clang-format on |
24 changes: 24 additions & 0 deletions
24
...t/TOSA/bf16xbf16_sub_elem_2d_broadcast_scalar/bf16xbf16_sub_elem_2d_broadcast_scalar.mlir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// Copyright (C) 2023, Advanced Micro Devices, Inc. | ||
|
||
// XFAIL: * | ||
// REQUIRES: valid_xchess_license | ||
// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-make-broadcastable, tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir | ||
// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=16" -o affine.mlir | ||
// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine -o aievec.mlir | ||
// RUN: aie-translate aievec.mlir -aieml=true --aievec-to-cpp -o dut.cc | ||
// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc | ||
// RUN: mkdir -p data | ||
// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout | ||
// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s | ||
// CHECK: TEST PASSED | ||
|
||
module { | ||
func.func @dut(%arg0: tensor<16x1024xbf16>, %arg1: tensor<bf16>) -> (tensor<16x1024xbf16>) { | ||
%0 = "tosa.reshape"(%arg1) { new_shape = array<i64: 1, 1>} : (tensor<bf16>) -> (tensor<1x1xbf16>) | ||
%1 = "tosa.sub"(%arg0,%0) : (tensor<16x1024xbf16>, tensor<1x1xbf16>) -> (tensor<16x1024xbf16>) | ||
return %1 : tensor<16x1024xbf16> | ||
} | ||
} | ||
|
||
|
4 changes: 4 additions & 0 deletions
4
test/Integration/Dialect/TOSA/bf16xbf16_sub_elem_2d_broadcast_scalar/defines.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#pragma once | ||
constexpr unsigned const IN0_SIZE = 16 * 1024; | ||
constexpr unsigned const IN1_SIZE = 1; | ||
constexpr unsigned const OUT0_SIZE = 16 * 1024; |
56 changes: 56 additions & 0 deletions
56
test/Integration/Dialect/TOSA/bf16xbf16_sub_elem_2d_broadcast_scalar/testbench.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
#include "../common/testbench.h" | ||
#include "defines.h" | ||
#include <algorithm> | ||
#include <cstdint> | ||
#include <cstdio> | ||
#include <cstdlib> | ||
void dut(bfloat16 *restrict in0, bfloat16 *restrict in1, | ||
bfloat16 *restrict out0); | ||
void dut_ref(bfloat16 *in0, bfloat16 *in1, bfloat16 *out0); | ||
|
||
alignas(32) bfloat16 g_in0[IN0_SIZE]; | ||
alignas(32) bfloat16 g_in1[IN1_SIZE]; | ||
alignas(32) bfloat16 g_out0[OUT0_SIZE]; | ||
alignas(32) bfloat16 g_out0Ref[OUT0_SIZE]; | ||
|
||
int main(int argc, char *argv[]) { | ||
std::string dataDir(TO_STR(DATA_DIR)); | ||
srand(10); | ||
std::generate(g_in0, g_in0 + IN0_SIZE, | ||
[&]() { return random_bfloat16(-10, 10, 2); }); | ||
std::generate(g_in1, g_in1 + IN1_SIZE, | ||
[&]() { return random_bfloat16(-10, 10, 2); }); | ||
|
||
writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt"); | ||
writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt"); | ||
|
||
chess_memory_fence(); | ||
auto cyclesBegin = chess_cycle_count(); | ||
dut(g_in0, g_in1, g_out0); | ||
auto cyclesEnd = chess_cycle_count(); | ||
chess_memory_fence(); | ||
|
||
auto cycleCount = (int)(cyclesEnd - cyclesBegin); | ||
reportCycleCount(cycleCount, dataDir + "/cycle_count.txt"); | ||
|
||
writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt"); | ||
|
||
dut_ref(g_in0, g_in1, g_out0Ref); | ||
writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt"); | ||
|
||
bool ok = true; | ||
ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE); | ||
|
||
if (ok) | ||
printf("TEST PASSED\n"); | ||
else | ||
printf("TEST FAILED\n"); | ||
|
||
return ok ? 0 : 1; | ||
} | ||
|
||
void dut_ref(bfloat16 *in0, bfloat16 *in1, bfloat16 *out0) { | ||
for (unsigned k = 0; k < OUT0_SIZE; k += 1) { | ||
out0[k] = in0[k] - in1[k % IN1_SIZE]; | ||
} | ||
} |
4 changes: 4 additions & 0 deletions
4
test/Integration/Dialect/TOSA/i16xi16_sub_elem_2d_broadcast_1d_unit_dim/defines.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#pragma once | ||
constexpr unsigned const IN0_SIZE = 16 * 1024; | ||
constexpr unsigned const IN1_SIZE = 1; | ||
constexpr unsigned const OUT0_SIZE = 16 * 1024; |
27 changes: 27 additions & 0 deletions
27
test/Integration/Dialect/TOSA/i16xi16_sub_elem_2d_broadcast_1d_unit_dim/dut.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
// clang-format off | ||
void dut(int16_t * restrict v1, int16_t * restrict v2, int16_t * restrict v3) { | ||
size_t v4 = 0; | ||
v32int16 v5 = *(v32int16 *)(v2 + v4); | ||
v32int16 v6 = broadcast_elem(v5, 0); | ||
size_t v7 = 0; | ||
size_t v8 = 16; | ||
size_t v9 = 1; | ||
for (size_t v10 = v7; v10 < v8; v10 += v9) | ||
chess_prepare_for_pipelining | ||
chess_loop_range(16, 16) | ||
{ | ||
size_t v11 = 0; | ||
size_t v12 = 1024; | ||
size_t v13 = 32; | ||
for (size_t v14 = v11; v14 < v12; v14 += v13) | ||
chess_prepare_for_pipelining | ||
chess_loop_range(32, 32) | ||
{ | ||
v32int16 v15 = *(v32int16 *)(v1 + 1024*v10+v14); | ||
v32int16 v16 = sub(v15, v6); | ||
*(v32int16 *)(v3 + 1024*v10+v14) = v16; | ||
} | ||
} | ||
return; | ||
} | ||
// clang-format on |
23 changes: 23 additions & 0 deletions
23
.../i16xi16_sub_elem_2d_broadcast_1d_unit_dim/i16xi16_sub_elem_2d_broadcast_1d_unit_dim.mlir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// Copyright (C) 2023, Advanced Micro Devices, Inc. | ||
|
||
// REQUIRES: valid_xchess_license | ||
// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-make-broadcastable, tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir | ||
// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=32" -o affine.mlir | ||
// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine -o aievec.mlir | ||
// RUN: aie-translate aievec.mlir -aieml=true --aievec-to-cpp -o dut.cc | ||
// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc >& xchesscc_wrapper.stdout | ||
// RUN: mkdir -p data | ||
// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout | ||
// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s | ||
// CHECK: TEST PASSED | ||
|
||
module { | ||
func.func @dut(%arg0: tensor<16x1024xi16>, %arg1: tensor<1xi16>) -> (tensor<16x1024xi16>) { | ||
%0 = "tosa.reshape"(%arg1) { new_shape = array<i64: 1, 1>} : (tensor<1xi16>) -> (tensor<1x1xi16>) | ||
%1 = "tosa.sub"(%arg0,%0) : (tensor<16x1024xi16>, tensor<1x1xi16>) -> (tensor<16x1024xi16>) | ||
return %1 : tensor<16x1024xi16> | ||
} | ||
} | ||
|
||
|
55 changes: 55 additions & 0 deletions
55
test/Integration/Dialect/TOSA/i16xi16_sub_elem_2d_broadcast_1d_unit_dim/testbench.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#include "../common/testbench.h" | ||
#include "defines.h" | ||
#include <algorithm> | ||
#include <cstdint> | ||
#include <cstdio> | ||
#include <cstdlib> | ||
void dut(int16_t *restrict in0, int16_t *restrict in1, int16_t *restrict out0); | ||
void dut_ref(int16_t *in0, int16_t *in1, int16_t *out0); | ||
|
||
alignas(32) int16_t g_in0[IN0_SIZE]; | ||
alignas(32) int16_t g_in1[IN1_SIZE]; | ||
alignas(32) int16_t g_out0[OUT0_SIZE]; | ||
alignas(32) int16_t g_out0Ref[OUT0_SIZE]; | ||
|
||
int main(int argc, char *argv[]) { | ||
std::string dataDir(TO_STR(DATA_DIR)); | ||
srand(10); | ||
std::generate(g_in0, g_in0 + IN0_SIZE, | ||
[&]() { return random_integer<int16_t>(); }); | ||
std::generate(g_in1, g_in1 + IN1_SIZE, | ||
[&]() { return random_integer<int16_t>(); }); | ||
|
||
writeData(g_in0, IN0_SIZE, dataDir + "/in0.txt"); | ||
writeData(g_in1, IN1_SIZE, dataDir + "/in1.txt"); | ||
|
||
chess_memory_fence(); | ||
auto cyclesBegin = chess_cycle_count(); | ||
dut(g_in0, g_in1, g_out0); | ||
auto cyclesEnd = chess_cycle_count(); | ||
chess_memory_fence(); | ||
|
||
auto cycleCount = (int)(cyclesEnd - cyclesBegin); | ||
reportCycleCount(cycleCount, dataDir + "/cycle_count.txt"); | ||
|
||
writeData(g_out0, OUT0_SIZE, dataDir + "/out0.txt"); | ||
|
||
dut_ref(g_in0, g_in1, g_out0Ref); | ||
writeData(g_out0Ref, OUT0_SIZE, dataDir + "/out0_ref.txt"); | ||
|
||
bool ok = true; | ||
ok &= checkData(g_out0, g_out0Ref, OUT0_SIZE); | ||
|
||
if (ok) | ||
printf("TEST PASSED\n"); | ||
else | ||
printf("TEST FAILED\n"); | ||
|
||
return ok ? 0 : 1; | ||
} | ||
|
||
void dut_ref(int16_t *in0, int16_t *in1, int16_t *out0) { | ||
for (unsigned k = 0; k < OUT0_SIZE; k += 1) { | ||
out0[k] = in0[k] - in1[k % IN1_SIZE]; | ||
} | ||
} |
4 changes: 4 additions & 0 deletions
4
test/Integration/Dialect/TOSA/i16xi16_sub_elem_2d_broadcast_scalar/defines.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#pragma once | ||
constexpr unsigned const IN0_SIZE = 16 * 1024; | ||
constexpr unsigned const IN1_SIZE = 1; | ||
constexpr unsigned const OUT0_SIZE = 16 * 1024; |
28 changes: 28 additions & 0 deletions
28
test/Integration/Dialect/TOSA/i16xi16_sub_elem_2d_broadcast_scalar/dut.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
// clang-format off | ||
void dut(int16_t * restrict v1, int16_t * restrict v2, int16_t * restrict v3) { | ||
size_t v4 = 0; | ||
int16_t * restrict v5 = v2; | ||
v32int16 v6 = *(v32int16 *)(v5 + v4); | ||
v32int16 v7 = broadcast_elem(v6, 0); | ||
size_t v8 = 0; | ||
size_t v9 = 16; | ||
size_t v10 = 1; | ||
for (size_t v11 = v8; v11 < v9; v11 += v10) | ||
chess_prepare_for_pipelining | ||
chess_loop_range(16, 16) | ||
{ | ||
size_t v12 = 0; | ||
size_t v13 = 1024; | ||
size_t v14 = 32; | ||
for (size_t v15 = v12; v15 < v13; v15 += v14) | ||
chess_prepare_for_pipelining | ||
chess_loop_range(32, 32) | ||
{ | ||
v32int16 v16 = *(v32int16 *)(v1 + 1024*v11+v15); | ||
v32int16 v17 = sub(v16, v7); | ||
*(v32int16 *)(v3 + 1024*v11+v15) = v17; | ||
} | ||
} | ||
return; | ||
} | ||
// clang-format on |
23 changes: 23 additions & 0 deletions
23
.../TOSA/i16xi16_sub_elem_2d_broadcast_scalar/i16xi16_sub_elem_2d_broadcast_1d_unit_dim.mlir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// Copyright (C) 2023, Advanced Micro Devices, Inc. | ||
|
||
// REQUIRES: valid_xchess_license | ||
// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(tosa-make-broadcastable, tosa-to-linalg-named, tosa-to-linalg))" -o linalg.mlir | ||
// RUN: mlir-opt linalg.mlir --linalg-fuse-elementwise-ops --eliminate-empty-tensors --empty-tensor-to-alloc-tensor --one-shot-bufferize="allow-return-allocs allow-unknown-ops bufferize-function-boundaries function-boundary-type-conversion=identity-layout-map" --drop-equivalent-buffer-results --buffer-results-to-out-params --buffer-deallocation --canonicalize --cse --convert-linalg-to-affine-loops --affine-super-vectorize="virtual-vector-size=32" -o affine.mlir | ||
// RUN: aie-opt affine.mlir --convert-vector-to-aievec="aie-target=aieml" -lower-affine -o aievec.mlir | ||
// RUN: aie-translate aievec.mlir -aieml=true --aievec-to-cpp -o dut.cc | ||
// RUN: xchesscc_wrapper aie2 -f -g +s +w work +o work -I%S -I. %S/testbench.cc dut.cc >& xchesscc_wrapper.stdout | ||
// RUN: mkdir -p data | ||
// RUN: xca_udm_dbg --aiearch aie-ml -qf -T -P %aietools/data/aie_ml/lib/ -t "%S/../profiling.tcl ./work/a.out" >& xca_udm_dbg.stdout | ||
// RUN: FileCheck --input-file=./xca_udm_dbg.stdout %s | ||
// CHECK: TEST PASSED | ||
|
||
module { | ||
func.func @dut(%arg0: tensor<16x1024xi16>, %arg1: tensor<i16>) -> (tensor<16x1024xi16>) { | ||
%0 = "tosa.reshape"(%arg1) { new_shape = array<i64: 1, 1>} : (tensor<i16>) -> (tensor<1x1xi16>) | ||
%1 = "tosa.sub"(%arg0,%0) : (tensor<16x1024xi16>, tensor<1x1xi16>) -> (tensor<16x1024xi16>) | ||
return %1 : tensor<16x1024xi16> | ||
} | ||
} | ||
|
||
|
Oops, something went wrong.