Skip to content

Commit

Permalink
add two-col example (#989)
Browse files Browse the repository at this point in the history
  • Loading branch information
makslevental authored Feb 9, 2024
1 parent 98d5fdb commit 9225c56
Show file tree
Hide file tree
Showing 7 changed files with 698 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .github/workflows/buildAndTestRyzenAI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ env:

jobs:
build-repo:
name: Build and Test
name: Build and Test with AIE tools on Ryzen AI

runs-on: amd7940hs

Expand Down
23 changes: 23 additions & 0 deletions test/ipu-xrt/two_col/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
include ../makefile-common

VPATH := $(VISION_KERNELS_VPATH_BASE)/threshold

all: final.xclbin

insts.txt: aie.mlir
aiecc.py -v --aie-only-generate-ipu --ipu-insts-name=$@ $<

threshold.o: threshold.cc
xchesscc -d ${CHESSCC2_FLAGS} -DBIT_WIDTH=8 -c $< -o $@

final.xclbin: aie.mlir threshold.o
aiecc.py -v --aie-generate-cdo --aie-generate-ipu --no-compile-host \
--xclbin-name=$@ --ipu-insts-name=insts.txt $<

clean:
rm -rf *.elf* *.bif aie.mlir.prj *.xclbin sim \
chess* *.o* insts.txt \
*.log aie_partition.json *.bin BOOT.BIN _x test.exe

test.exe: test.cpp
$(CXX) $(LD_FLAGS) $< -o $@ $(CFLAGS) $(INC) $(LD_PATHS)
141 changes: 141 additions & 0 deletions test/ipu-xrt/two_col/aie.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
module {
aie.device(ipu) {
%0 = aie.tile(0, 0)
%1 = aie.tile(0, 1)
%2 = aie.tile(0, 2)
%3 = aie.tile(0, 3)
%4 = aie.tile(1, 4)
%5 = aie.tile(1, 5)
%rtp0 = aie.buffer(%2) {sym_name = "rtp0"} : memref<16xi32>
%rtp1 = aie.buffer(%3) {sym_name = "rtp1"} : memref<16xi32>
%rtp2 = aie.buffer(%4) {sym_name = "rtp2"} : memref<16xi32>
%rtp3 = aie.buffer(%5) {sym_name = "rtp3"} : memref<16xi32>
aie.objectfifo @objFifo_in0(%0, {%1}, 2 : i32) : !aie.objectfifo<memref<512xui8>>
aie.objectfifo @objFifo_in1(%1, {%2}, 2 : i32) : !aie.objectfifo<memref<128xui8>>
aie.objectfifo @objFifo_in2(%1, {%3}, 2 : i32) : !aie.objectfifo<memref<128xui8>>
aie.objectfifo @objFifo_in3(%1, {%4}, 2 : i32) : !aie.objectfifo<memref<128xui8>>
aie.objectfifo @objFifo_in4(%1, {%5}, 2 : i32) : !aie.objectfifo<memref<128xui8>>
aie.objectfifo.link [@objFifo_in0] -> [@objFifo_in1, @objFifo_in2, @objFifo_in3, @objFifo_in4] ()
aie.objectfifo @objFifo_out0(%1, {%0}, 2 : i32) : !aie.objectfifo<memref<512xui8>>
aie.objectfifo @objFifo_out1(%2, {%1}, 2 : i32) : !aie.objectfifo<memref<128xui8>>
aie.objectfifo @objFifo_out2(%3, {%1}, 2 : i32) : !aie.objectfifo<memref<128xui8>>
aie.objectfifo @objFifo_out3(%4, {%1}, 2 : i32) : !aie.objectfifo<memref<128xui8>>
aie.objectfifo @objFifo_out4(%5, {%1}, 2 : i32) : !aie.objectfifo<memref<128xui8>>
aie.objectfifo.link [@objFifo_out1, @objFifo_out2, @objFifo_out3, @objFifo_out4] -> [@objFifo_out0] ()
func.func private @thresholdLine(%in: memref<128xui8>, %out: memref<128xui8>, %lineWidth: i32, %thresholdValue: i32, %maxValue: i32, %thresholdType: i8) -> ()
%24 = aie.core(%2) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4096 = arith.constant 4096 : index
%lineWidth = arith.constant 128 : i32
%maxValue = arith.constant 255 : i32
%th = arith.constant 50 : i32
%v0 = arith.constant 0 : i32
memref.store %th, %rtp0[%c0] : memref<16xi32>
memref.store %v0, %rtp0[%c1] : memref<16xi32>
scf.for %arg0 = %c0 to %c4096 step %c1 {
%subview0 = aie.objectfifo.acquire @objFifo_in1(Consume, 1) : !aie.objectfifosubview<memref<128xui8>>
%elem0 = aie.objectfifo.subview.access %subview0[0] : !aie.objectfifosubview<memref<128xui8>> -> memref<128xui8>
%subview1 = aie.objectfifo.acquire @objFifo_out1(Produce, 1) : !aie.objectfifosubview<memref<128xui8>>
%elem1 = aie.objectfifo.subview.access %subview1[0] : !aie.objectfifosubview<memref<128xui8>> -> memref<128xui8>
%thresh = memref.load %rtp0[%c0] : memref<16xi32>
%tt = memref.load %rtp0[%c1] : memref<16xi32>
%threshType = arith.trunci %tt : i32 to i8
func.call @thresholdLine(%elem0,%elem1,%lineWidth,%thresh,%maxValue,%threshType) : (memref<128xui8>, memref<128xui8>, i32, i32, i32, i8) -> ()
aie.objectfifo.release @objFifo_in1(Consume, 1)
aie.objectfifo.release @objFifo_out1(Produce, 1)
}
aie.end
} {link_with = "threshold.o"}
%34 = aie.core(%3) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4096 = arith.constant 4096 : index
%lineWidth = arith.constant 128 : i32
%maxValue = arith.constant 255 : i32
%th = arith.constant 50 : i32
%v0 = arith.constant 0 : i32
memref.store %th, %rtp1[%c0] : memref<16xi32>
memref.store %v0, %rtp1[%c1] : memref<16xi32>
scf.for %arg0 = %c0 to %c4096 step %c1 {
%subview0 = aie.objectfifo.acquire @objFifo_in2(Consume, 1) : !aie.objectfifosubview<memref<128xui8>>
%elem0 = aie.objectfifo.subview.access %subview0[0] : !aie.objectfifosubview<memref<128xui8>> -> memref<128xui8>
%subview1 = aie.objectfifo.acquire @objFifo_out2(Produce, 1) : !aie.objectfifosubview<memref<128xui8>>
%elem1 = aie.objectfifo.subview.access %subview1[0] : !aie.objectfifosubview<memref<128xui8>> -> memref<128xui8>
%thresh = memref.load %rtp1[%c0] : memref<16xi32>
%tt = memref.load %rtp1[%c1] : memref<16xi32>
%threshType = arith.trunci %tt : i32 to i8
func.call @thresholdLine(%elem0,%elem1,%lineWidth,%thresh,%maxValue,%threshType) : (memref<128xui8>, memref<128xui8>, i32, i32, i32, i8) -> ()
aie.objectfifo.release @objFifo_in2(Consume, 1)
aie.objectfifo.release @objFifo_out2(Produce, 1)
}
aie.end
} {link_with = "threshold.o"}
%44 = aie.core(%4) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4096 = arith.constant 4096 : index
%lineWidth = arith.constant 128 : i32
%maxValue = arith.constant 255 : i32
%th = arith.constant 50 : i32
%v0 = arith.constant 0 : i32
memref.store %th, %rtp2[%c0] : memref<16xi32>
memref.store %v0, %rtp2[%c1] : memref<16xi32>
scf.for %arg0 = %c0 to %c4096 step %c1 {
%subview0 = aie.objectfifo.acquire @objFifo_in3(Consume, 1) : !aie.objectfifosubview<memref<128xui8>>
%elem0 = aie.objectfifo.subview.access %subview0[0] : !aie.objectfifosubview<memref<128xui8>> -> memref<128xui8>
%subview1 = aie.objectfifo.acquire @objFifo_out3(Produce, 1) : !aie.objectfifosubview<memref<128xui8>>
%elem1 = aie.objectfifo.subview.access %subview1[0] : !aie.objectfifosubview<memref<128xui8>> -> memref<128xui8>
%thresh = memref.load %rtp2[%c0] : memref<16xi32>
%tt = memref.load %rtp2[%c1] : memref<16xi32>
%threshType = arith.trunci %tt : i32 to i8
func.call @thresholdLine(%elem0,%elem1,%lineWidth,%thresh,%maxValue,%threshType) : (memref<128xui8>, memref<128xui8>, i32, i32, i32, i8) -> ()
aie.objectfifo.release @objFifo_in3(Consume, 1)
aie.objectfifo.release @objFifo_out3(Produce, 1)
}
aie.end
} {link_with = "threshold.o"}
%54 = aie.core(%5) {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c4096 = arith.constant 4096 : index
%lineWidth = arith.constant 128 : i32
%maxValue = arith.constant 255 : i32
%th = arith.constant 50 : i32
%v0 = arith.constant 0 : i32
memref.store %th, %rtp3[%c0] : memref<16xi32>
memref.store %v0, %rtp3[%c1] : memref<16xi32>
scf.for %arg0 = %c0 to %c4096 step %c1 {
%subview0 = aie.objectfifo.acquire @objFifo_in4(Consume, 1) : !aie.objectfifosubview<memref<128xui8>>
%elem0 = aie.objectfifo.subview.access %subview0[0] : !aie.objectfifosubview<memref<128xui8>> -> memref<128xui8>
%subview1 = aie.objectfifo.acquire @objFifo_out4(Produce, 1) : !aie.objectfifosubview<memref<128xui8>>
%elem1 = aie.objectfifo.subview.access %subview1[0] : !aie.objectfifosubview<memref<128xui8>> -> memref<128xui8>
%thresh = memref.load %rtp3[%c0] : memref<16xi32>
%tt = memref.load %rtp3[%c1] : memref<16xi32>
%threshType = arith.trunci %tt : i32 to i8
func.call @thresholdLine(%elem0,%elem1,%lineWidth,%thresh,%maxValue,%threshType) : (memref<128xui8>, memref<128xui8>, i32, i32, i32, i8) -> ()
aie.objectfifo.release @objFifo_in4(Consume, 1)
aie.objectfifo.release @objFifo_out4(Produce, 1)
}
aie.end
} {link_with = "threshold.o"}
func.func @sequence(%in : memref<2048xi32>, %buf : memref<32xi32>, %out : memref<2048xi32>) {
%c0 = arith.constant 0 : i64
%c1 = arith.constant 1 : i64
%c2048 = arith.constant 2048 : i64
aiex.ipu.rtp_write(0, 2, 0, 50) { buffer_sym_name = "rtp0" }
aiex.ipu.rtp_write(0, 3, 0, 50) { buffer_sym_name = "rtp1" }
aiex.ipu.rtp_write(1, 4, 0, 50) { buffer_sym_name = "rtp2" }
aiex.ipu.rtp_write(1, 5, 0, 50) { buffer_sym_name = "rtp3" }
aiex.ipu.rtp_write(0, 2, 1, 0) { buffer_sym_name = "rtp0" }
aiex.ipu.rtp_write(0, 3, 1, 0) { buffer_sym_name = "rtp1" }
aiex.ipu.rtp_write(1, 4, 1, 0) { buffer_sym_name = "rtp2" }
aiex.ipu.rtp_write(1, 5, 1, 0) { buffer_sym_name = "rtp3" }
aiex.ipu.dma_memcpy_nd (0, 0, %out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c2048][%c0,%c0,%c0]) { metadata = @objFifo_out0, id = 1 : i64 } : memref<2048xi32>
aiex.ipu.dma_memcpy_nd (0, 0, %in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c2048][%c0,%c0,%c0]) { metadata = @objFifo_in0, id = 0 : i64 } : memref<2048xi32>
aiex.ipu.sync { column = 0 : i32, row = 0 : i32, direction = 0 : i32, channel = 0 : i32, column_num = 1 : i32, row_num = 1 : i32 }
return
}
}
}

10 changes: 10 additions & 0 deletions test/ipu-xrt/two_col/run.lit
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// (c) Copyright 2023 Advanced Micro Devices, Inc.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// REQUIRES: ryzen_ai
//
// RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -c %S/threshold.cc -o ./threshold.o
// RUN: %python aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt %S/aie.mlir
// RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s
// CHECK: PASS!
1 change: 1 addition & 0 deletions test/ipu-xrt/two_col/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
./test.exe -x final.xclbin -k MLIR_AIE -i insts.txt
209 changes: 209 additions & 0 deletions test/ipu-xrt/two_col/test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
#include <boost/program_options.hpp>
#include <cstdint>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>

#include "xrt/xrt_bo.h"
#include "xrt/xrt_device.h"
#include "xrt/xrt_kernel.h"

// #define IMAGE_WIDTH_IN 256
// #define IMAGE_HEIGHT_IN 256

#define IMAGE_WIDTH_IN 128
#define IMAGE_HEIGHT_IN 64

#define IMAGE_WIDTH_OUT IMAGE_WIDTH_IN
#define IMAGE_HEIGHT_OUT IMAGE_HEIGHT_IN

#define IMAGE_AREA_IN (IMAGE_HEIGHT_IN * IMAGE_WIDTH_IN)
#define IMAGE_AREA_OUT (IMAGE_HEIGHT_OUT * IMAGE_WIDTH_OUT)

constexpr int IN_SIZE = (IMAGE_AREA_IN * sizeof(uint8_t));
constexpr int OUT_SIZE = (IMAGE_AREA_OUT * sizeof(uint8_t));

namespace po = boost::program_options;

void check_arg_file_exists(po::variables_map &vm_in, std::string name) {
if (!vm_in.count(name)) {
throw std::runtime_error("Error: no " + name + " file was provided\n");
} else {
std::ifstream test(vm_in[name].as<std::string>());
if (!test) {
throw std::runtime_error("The " + name + " file " +
vm_in[name].as<std::string>() +
" does not exist.\n");
}
}
}

std::vector<uint32_t> load_instr_sequence(std::string instr_path) {
std::ifstream instr_file(instr_path);
std::string line;
std::vector<uint32_t> instr_v;
while (std::getline(instr_file, line)) {
std::istringstream iss(line);
uint32_t a;
if (!(iss >> std::hex >> a)) {
throw std::runtime_error("Unable to parse instruction file\n");
}
instr_v.push_back(a);
}
return instr_v;
}

int main(int argc, const char *argv[]) {

// Program arguments parsing
po::options_description desc("Allowed options");
desc.add_options()("help,h", "produce help message")(
"xclbin,x", po::value<std::string>()->required(),
"the input xclbin path")(
"kernel,k", po::value<std::string>()->required(),
"the kernel name in the XCLBIN (for instance PP_PRE_FD)")(
"verbosity,v", po::value<int>()->default_value(0),
"the verbosity of the output")(
"instr,i", po::value<std::string>()->required(),
"path of file containing userspace instructions to be sent to the LX6");
po::variables_map vm;

try {
po::store(po::parse_command_line(argc, argv, desc), vm);
po::notify(vm);

if (vm.count("help")) {
std::cout << desc << "\n";
return 1;
}
} catch (const std::exception &ex) {
std::cerr << ex.what() << "\n\n";
std::cerr << "Usage:\n" << desc << "\n";
return 1;
}

check_arg_file_exists(vm, "xclbin");
check_arg_file_exists(vm, "instr");

std::vector<uint32_t> instr_v =
load_instr_sequence(vm["instr"].as<std::string>());

int verbosity = vm["verbosity"].as<int>();
if (verbosity >= 1)
std::cout << "Sequence instr count: " << instr_v.size() << "\n";

// Start the XRT test code
// Get a device handle
unsigned int device_index = 0;
auto device = xrt::device(device_index);

// Load the xclbin
if (verbosity >= 1)
std::cout << "Loading xclbin: " << vm["xclbin"].as<std::string>() << "\n";
auto xclbin = xrt::xclbin(vm["xclbin"].as<std::string>());

if (verbosity >= 1)
std::cout << "Kernel opcode: " << vm["kernel"].as<std::string>() << "\n";
std::string Node = vm["kernel"].as<std::string>();

// Get the kernel from the xclbin
auto xkernels = xclbin.get_kernels();
auto xkernel = *std::find_if(xkernels.begin(), xkernels.end(),
[Node](xrt::xclbin::kernel &k) {
auto name = k.get_name();
std::cout << "Name: " << name << std::endl;
return name.rfind(Node, 0) == 0;
});
auto kernelName = xkernel.get_name();

if (verbosity >= 1)
std::cout << "Registering xclbin: " << vm["xclbin"].as<std::string>()
<< "\n";

device.register_xclbin(xclbin);

// get a hardware context
if (verbosity >= 1)
std::cout << "Getting hardware context.\n";
xrt::hw_context context(device, xclbin.get_uuid());

// get a kernel handle
if (verbosity >= 1)
std::cout << "Getting handle to kernel:" << kernelName << "\n";
auto kernel = xrt::kernel(context, kernelName);

auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int),
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(0));
auto bo_in =
xrt::bo(device, IN_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(2));
auto debug =
xrt::bo(device, IN_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3));
auto bo_out =
xrt::bo(device, OUT_SIZE, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(4));

if (verbosity >= 1)
std::cout << "Writing data into buffer objects.\n";
uint8_t *bufIn = bo_in.map<uint8_t *>();
std::vector<uint8_t> srcVec;
for (int i = 0; i < IMAGE_AREA_IN; i++)
srcVec.push_back(rand() % UINT8_MAX);
memcpy(bufIn, srcVec.data(), (srcVec.size() * sizeof(uint8_t)));

void *bufInstr = bo_instr.map<void *>();
memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int));

bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_in.sync(XCL_BO_SYNC_BO_TO_DEVICE);

if (verbosity >= 1)
std::cout << "Running Kernel.\n";
auto run = kernel(bo_instr, instr_v.size(), bo_in, debug, bo_out);
run.wait();

bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);

uint8_t *bufOut = bo_out.map<uint8_t *>();

int errors = 0;
int max_errors = 64000;

std::cout << std::dec;
for (uint32_t i = 0; i < IMAGE_AREA_OUT; i++) {
if (srcVec[i] <= 50) { // Obviously change this back to 100
if (*(bufOut + i) != 0) {
if (errors < max_errors)
std::cout << "Error: " << (uint8_t) * (bufOut + i) << " at " << i
<< " should be zero "
<< " : input " << (uint8_t)srcVec[i] << std::endl;
errors++;
} else {
std::cout << "Wow: " << (uint8_t) * (bufOut + i) << " at " << i
<< " is correct "
<< " : input " << (uint8_t)srcVec[i] << std::endl;
}
} else {
if (*(bufOut + i) != UINT8_MAX) {
if (errors < max_errors)
std::cout << "Error: " << (uint8_t) * (bufOut + i) << " at " << i
<< " should be UINT8_MAX "
<< " : input " << (uint8_t)srcVec[i] << std::endl;
errors++;
} else {
std::cout << "WowT: " << (uint8_t) * (bufOut + i) << " at " << i
<< " is correct "
<< " : input " << (uint8_t)srcVec[i] << std::endl;
}
}
}

if (!errors) {
std::cout << "\nPASS!\n\n";
return 0;
} else {
std::cout << "\nfailed.\n\n";
return 1;
}
}
Loading

0 comments on commit 9225c56

Please sign in to comment.