Skip to content

Commit

Permalink
Fix reduce min (#1263)
Browse files Browse the repository at this point in the history
  • Loading branch information
denolf authored and fifield committed Apr 17, 2024
1 parent 15fe206 commit a54d5b3
Show file tree
Hide file tree
Showing 18 changed files with 46 additions and 694 deletions.
6 changes: 3 additions & 3 deletions aie_kernels/aie2/reduce_max.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
void _reduce_max_vector(int32_t *restrict in, int32_t *restrict out,
const int32_t input_size) {

v16int32 tiny = broadcast_to_v16int32((int32_t)-2147483648);
v16int32 tiny = broadcast_to_v16int32((int32_t)INT32_MIN);
const int32_t vector_size = 16;
v16int32 after_vector;
v16int32 running_max = tiny;
for (int32_t i = 0; i < input_size; i += vector_size)
chess_prepare_for_pipelining chess_loop_range(64, 64) {
chess_prepare_for_pipelining chess_loop_range(8, ) {
v16int32 next = *(v16int32 *)(in + i);
v16int32 test = max(running_max, next);
running_max = test;
Expand All @@ -34,7 +34,7 @@ void _reduce_max_vector(int32_t *restrict in, int32_t *restrict out,

void _reduce_max_scalar(int32_t *restrict in, int32_t *restrict out,
const int32_t input_size) {
int32_t running_max = (int32_t)-2147483648;
int32_t running_max = (int32_t)INT32_MIN;
for (int32_t i = 0; i < input_size; i++) {
if (in[i] > running_max)
running_max = in[i];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@

#include <aie_api/aie.hpp>

void vector(int32_t *restrict in, int32_t *restrict out) {
void _reduce_min_vector(int32_t *restrict in, int32_t *restrict out,
const int32_t input_size) {

v16int32 massive = broadcast_to_v16int32((int32_t)2147483647);
int32_t input_size = 1024;
int32_t vector_size = 16;
v16int32 massive = broadcast_to_v16int32((int32_t)INT32_MAX);
const int32_t vector_size = 16;
v16int32 after_vector;
v16int32 running_min = massive;
for (int32_t i = 0; i < input_size; i += vector_size)
chess_prepare_for_pipelining chess_loop_range(64, 64) {
chess_prepare_for_pipelining chess_loop_range(8, ) {
v16int32 next = *(v16int32 *)(in + i);
v16int32 test = min(running_min, next);
running_min = test;
Expand All @@ -32,9 +32,9 @@ void vector(int32_t *restrict in, int32_t *restrict out) {
return;
}

void scalar(int32_t *restrict in, int32_t *restrict out) {
size_t input_size = 1024;
int32_t running_min = (int32_t)2147483647;
void _reduce_min_scalar(int32_t *restrict in, int32_t *restrict out,
const int32_t input_size) {
int32_t running_min = (int32_t)INT32_MAX;
for (int32_t i = 0; i < input_size; i++) {
if (in[i] < running_min)
running_min = in[i];
Expand All @@ -46,8 +46,12 @@ void scalar(int32_t *restrict in, int32_t *restrict out) {

extern "C" {

void vector_min(int32_t *a_in, int32_t *c_out) { vector(a_in, c_out); }
void reduce_min_vector(int32_t *a_in, int32_t *c_out, int32_t input_size) {
_reduce_min_vector(a_in, c_out, input_size);
}

void scalar_min(int32_t *a_in, int32_t *c_out) { scalar(a_in, c_out); }
void reduce_min_scalar(int32_t *a_in, int32_t *c_out, int32_t input_size) {
_reduce_min_scalar(a_in, c_out, input_size);
}

} // extern "C"
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,17 @@ project(${ProjectName})
find_package(Boost REQUIRED)

add_executable(${currentTarget}
${CMAKE_CURRENT_SOURCE_DIR}/../../../runtime_lib/test_lib/test_utils.cpp
test.cpp
)

target_compile_definitions(${currentTarget} PUBLIC DISABLE_ABI_CHECK=1)

target_include_directories (${currentTarget} PUBLIC
../../utils
${CMAKE_CURRENT_SOURCE_DIR}/../../../runtime_lib/test_lib
${XRT_INC_DIR}
${Boost_INCLUDE_DIRS}
../../../programming_examples/utils
)

target_link_directories(${currentTarget} PUBLIC
Expand All @@ -66,4 +68,4 @@ else()
target_link_libraries(${currentTarget} PUBLIC
xrt_coreutil
)
endif()
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,23 @@ include ../../makefile-common

ACDC_AIE = $(dir $(shell which aie-opt))/..

targetname = vector_min
targetname = reduce_min
devicename = ipu
col = 0
CHESS_FLAGS=${CHESSCCWRAP2_FLAGS}
KERNEL_LIB=${ACDC_AIE}/../../aie_kernels/generic/
KERNEL_LIB=../../../aie_kernels/aie2

all: build/final.xclbin build/insts.txt

build/vector_min.o: ${KERNEL_LIB}/vector_min.cc
build/reduce_min.cc.o: ${KERNEL_LIB}/reduce_min.cc
mkdir -p ${@D}
cd ${@D} && xchesscc_wrapper ${CHESS_FLAGS} -DBIT_WIDTH=8 -c $< -o ${@F}
cd ${@D} && xchesscc_wrapper ${CHESS_FLAGS} -c $(<:%=../%) -o ${@F}

build/aie.mlir: aie2.py
mkdir -p ${@D}
python3 $< ${devicename} ${col} > $@

build/final.xclbin: build/aie.mlir build/vector_min.o
build/final.xclbin: build/aie.mlir build/reduce_min.cc.o
mkdir -p ${@D}
cd ${@D} && aiecc.py --aie-generate-cdo --no-compile-host --xclbin-name=${@F} \
--aie-generate-ipu --ipu-insts-name=insts.txt $(<:%=../%)
Expand Down Expand Up @@ -73,4 +73,4 @@ vck5000: build/aie.mlir build/scale.o
-Wl,--whole-archive -Wl,--no-whole-archive -lstdc++ -ldl -lelf -o test.elf

clean: clean_trace
rm -rf build _build inst aie.mlir.prj core_* test.elf ${targetname}.exe vector_min.o* vector_min.cc
rm -rf build _build inst aie.mlir.prj core_* test.elf ${targetname}.exe
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import sys


def my_vector_max():
def my_reduce_min():
N = 1024

buffer_depth = 2
Expand All @@ -39,10 +39,9 @@ def device_body():
memRef_O_ty = T.memref(1, T.i32())

# AIE Core Function declarations

vector_min = external_func("vector_min", inputs=[memRef_I_ty, memRef_O_ty])

scalar_min = external_func("scalar_min", inputs=[memRef_I_ty, memRef_O_ty])
reduce_min_vector = external_func(
"reduce_min_vector", inputs=[memRef_I_ty, memRef_O_ty, T.i32()]
)

# Tile declarations
ShimTile = tile(int(sys.argv[2]), 0)
Expand All @@ -57,16 +56,12 @@ def device_body():
# Set up compute tiles

# Compute tile 2
@core(ComputeTile2, "vector_min.o")
@core(ComputeTile2, "reduce_min.cc.o")
def core_body():
for _ in for_(0xFFFFFFFF):
elem_out = of_out.acquire(ObjectFifoPort.Produce, 1)
elem_in = of_in.acquire(ObjectFifoPort.Consume, 1)

call(
vector_min,
[elem_in, elem_out],
)
call(reduce_min_vector, [elem_in, elem_out, N])
of_in.release(ObjectFifoPort.Consume, 1)
of_out.release(ObjectFifoPort.Produce, 1)
yield_([])
Expand All @@ -83,4 +78,4 @@ def sequence(A, C):
print(ctx.module)


my_vector_max()
my_reduce_min()
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
// (c) Copyright 2023 Advanced Micro Devices, Inc.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// REQUIRES: ryzen_ai
// REQUIRES: ryzen_ai, chess
//
// RUN: %python %S/aie2.py ipu 0 > ./aie.mlir
// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/generic/vector_min.cc -o vector_min.o
// RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/../../../aie_kernels/aie2/reduce_min.cc -o reduce_min.cc.o
// RUN: %python %S/aie2.py ipu 0 | aie-opt -cse -canonicalize -o ./aie.mlir
// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir
// RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
// RUN: g++ %S/test.cpp -o test.exe -std=c++23 -Wall -I%S/../../../runtime_lib/test_lib %S/../../../runtime_lib/test_lib/test_utils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
// CHECK: PASS!

Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,8 @@ int main(int argc, const char *argv[]) {
int n_warmup_iterations = vm["warmup"].as<int>();
int trace_size = vm["trace_sz"].as<int>();

// ------------------------------------------------------
// Configure this to match your design's buffer size
// ------------------------------------------------------
int INOUT0_VOLUME = 1024; // Input only, 64x uint32_t in this example
int INOUT1_VOLUME = 1; // Not used in this example
int INOUT0_VOLUME = 1024;
int INOUT1_VOLUME = 1;

size_t INOUT0_SIZE = INOUT0_VOLUME * sizeof(INOUT0_DATATYPE);
size_t INOUT1_SIZE = INOUT1_VOLUME * sizeof(INOUT1_DATATYPE);
Expand All @@ -76,46 +73,12 @@ int main(int argc, const char *argv[]) {
// Get device, load the xclbin & kernel and register them
// ------------------------------------------------------
// Get a device handle
unsigned int device_index = 0;
auto device = xrt::device(device_index);

// Load the xclbin
if (verbosity >= 1)
std::cout << "Loading xclbin: " << vm["xclbin"].as<std::string>() << "\n";
auto xclbin = xrt::xclbin(vm["xclbin"].as<std::string>());

// Load the kernel
if (verbosity >= 1)
std::cout << "Kernel opcode: " << vm["kernel"].as<std::string>() << "\n";
std::string Node = vm["kernel"].as<std::string>();

// Get the kernel from the xclbin
auto xkernels = xclbin.get_kernels();
auto xkernel = *std::find_if(xkernels.begin(), xkernels.end(),
[Node, verbosity](xrt::xclbin::kernel &k) {
auto name = k.get_name();
if (verbosity >= 1) {
std::cout << "Name: " << name << std::endl;
}
return name.rfind(Node, 0) == 0;
});
auto kernelName = xkernel.get_name();

// Register xclbin
if (verbosity >= 1)
std::cout << "Registering xclbin: " << vm["xclbin"].as<std::string>()
<< "\n";
device.register_xclbin(xclbin);
xrt::device device;
xrt::kernel kernel;

// Get a hardware context
if (verbosity >= 1)
std::cout << "Getting hardware context.\n";
xrt::hw_context context(device, xclbin.get_uuid());

// Get a kernel handle
if (verbosity >= 1)
std::cout << "Getting handle to kernel:" << kernelName << "\n";
auto kernel = xrt::kernel(context, kernelName);
test_utils::init_xrt_load_kernel(device, kernel, verbosity,
vm["xclbin"].as<std::string>(),
vm["kernel"].as<std::string>());

// ------------------------------------------------------
// Initialize input/ output buffer sizes and sync them
Expand All @@ -136,7 +99,7 @@ int main(int argc, const char *argv[]) {

// Initialize Inout buffer 0
INOUT0_DATATYPE *bufInOut0 = bo_inout0.map<INOUT0_DATATYPE *>();
std::int32_t min = (std::int32_t)2147483647;
std::int32_t min = (std::int32_t)INT32_MAX;
for (int i = 0; i < INOUT0_VOLUME; i++) {
std::int32_t next = test_utils::random_int32_t(100000);
if (next < min)
Expand Down
76 changes: 0 additions & 76 deletions programming_examples/basic/vector_min/Makefile

This file was deleted.

Loading

0 comments on commit a54d5b3

Please sign in to comment.