-
Notifications
You must be signed in to change notification settings - Fork 86
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
POC multiple kernels in one xclbin (#1508)
- Loading branch information
Showing
6 changed files
with
363 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
//===- aie.mlir ------------------------------------------------*- MLIR -*-===// | ||
// | ||
// Copyright (C) 2023, Advanced Micro Devices, Inc. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
module { | ||
aie.device(npu1_1col) { | ||
%t00 = aie.tile(0, 0) | ||
%t01 = aie.tile(0, 1) | ||
%t02 = aie.tile(0, 2) | ||
|
||
aie.objectfifo @objFifo_in0(%t00, {%t01}, 2 : i32) : !aie.objectfifo<memref<16xi32>> | ||
aie.objectfifo @objFifo_in1(%t01, {%t02}, 2 : i32) : !aie.objectfifo<memref<8xi32>> | ||
aie.objectfifo.link [@objFifo_in0] -> [@objFifo_in1] () | ||
|
||
aie.objectfifo @objFifo_out1(%t02, {%t01}, 2 : i32) : !aie.objectfifo<memref<8xi32>> | ||
aie.objectfifo @objFifo_out0(%t01, {%t00}, 2 : i32) : !aie.objectfifo<memref<16xi32>> | ||
aie.objectfifo.link [@objFifo_out1] -> [@objFifo_out0] () | ||
|
||
aie.core(%t02) { | ||
%c8 = arith.constant 8 : index | ||
%c0 = arith.constant 0 : index | ||
%c1 = arith.constant 1 : index | ||
%c1_32 = arith.constant 1 : i32 | ||
|
||
scf.for %steps = %c0 to %c8 step %c1 { | ||
%subview0 = aie.objectfifo.acquire @objFifo_in1(Consume, 1) : !aie.objectfifosubview<memref<8xi32>> | ||
%elem0 = aie.objectfifo.subview.access %subview0[0] : !aie.objectfifosubview<memref<8xi32>> -> memref<8xi32> | ||
%subview1 = aie.objectfifo.acquire @objFifo_out1(Produce, 1) : !aie.objectfifosubview<memref<8xi32>> | ||
%elem1 = aie.objectfifo.subview.access %subview1[0] : !aie.objectfifosubview<memref<8xi32>> -> memref<8xi32> | ||
scf.for %arg3 = %c0 to %c8 step %c1 { | ||
%0 = memref.load %elem0[%arg3] : memref<8xi32> | ||
%1 = arith.addi %0, %c1_32 : i32 | ||
memref.store %1, %elem1[%arg3] : memref<8xi32> | ||
} | ||
aie.objectfifo.release @objFifo_in1(Consume, 1) | ||
aie.objectfifo.release @objFifo_out1(Produce, 1) | ||
} | ||
aie.end | ||
} | ||
func.func @sequence(%in : memref<64xi32>, %buf : memref<32xi32>, %out : memref<64xi32>) { | ||
%c0 = arith.constant 0 : i64 | ||
%c1 = arith.constant 1 : i64 | ||
%c64 = arith.constant 64 : i64 | ||
aiex.npu.dma_memcpy_nd (0, 0, %out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0]) { metadata = @objFifo_out0, id = 1 : i64 } : memref<64xi32> | ||
aiex.npu.dma_memcpy_nd (0, 0, %in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0]) { metadata = @objFifo_in0, id = 0 : i64 } : memref<64xi32> | ||
aiex.npu.sync { column = 0 : i32, row = 0 : i32, direction = 0 : i32, channel = 0 : i32, column_num = 1 : i32, row_num = 1 : i32 } | ||
return | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
//===- aie.mlir ------------------------------------------------*- MLIR -*-===// | ||
// | ||
// Copyright (C) 2023, Advanced Micro Devices, Inc. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
module { | ||
aie.device(npu1_1col) { | ||
%t00 = aie.tile(0, 0) | ||
%t01 = aie.tile(0, 1) | ||
%t02 = aie.tile(0, 2) | ||
|
||
aie.objectfifo @objFifo_in0(%t00, {%t01}, 2 : i32) : !aie.objectfifo<memref<16xi32>> | ||
aie.objectfifo @objFifo_in1(%t01, {%t02}, 2 : i32) : !aie.objectfifo<memref<8xi32>> | ||
aie.objectfifo.link [@objFifo_in0] -> [@objFifo_in1] () | ||
|
||
aie.objectfifo @objFifo_out1(%t02, {%t01}, 2 : i32) : !aie.objectfifo<memref<8xi32>> | ||
aie.objectfifo @objFifo_out0(%t01, {%t00}, 2 : i32) : !aie.objectfifo<memref<16xi32>> | ||
aie.objectfifo.link [@objFifo_out1] -> [@objFifo_out0] () | ||
|
||
aie.core(%t02) { | ||
%c8 = arith.constant 8 : index | ||
%c0 = arith.constant 0 : index | ||
%c1 = arith.constant 1 : index | ||
%c2_32 = arith.constant 2 : i32 | ||
|
||
scf.for %steps = %c0 to %c8 step %c1 { | ||
%subview0 = aie.objectfifo.acquire @objFifo_in1(Consume, 1) : !aie.objectfifosubview<memref<8xi32>> | ||
%elem0 = aie.objectfifo.subview.access %subview0[0] : !aie.objectfifosubview<memref<8xi32>> -> memref<8xi32> | ||
%subview1 = aie.objectfifo.acquire @objFifo_out1(Produce, 1) : !aie.objectfifosubview<memref<8xi32>> | ||
%elem1 = aie.objectfifo.subview.access %subview1[0] : !aie.objectfifosubview<memref<8xi32>> -> memref<8xi32> | ||
scf.for %arg3 = %c0 to %c8 step %c1 { | ||
%0 = memref.load %elem0[%arg3] : memref<8xi32> | ||
%1 = arith.addi %0, %c2_32 : i32 | ||
memref.store %1, %elem1[%arg3] : memref<8xi32> | ||
} | ||
aie.objectfifo.release @objFifo_in1(Consume, 1) | ||
aie.objectfifo.release @objFifo_out1(Produce, 1) | ||
} | ||
aie.end | ||
} | ||
func.func @sequence(%in : memref<64xi32>, %buf : memref<32xi32>, %out : memref<64xi32>) { | ||
%c0 = arith.constant 0 : i64 | ||
%c1 = arith.constant 1 : i64 | ||
%c64 = arith.constant 64 : i64 | ||
aiex.npu.dma_memcpy_nd (0, 0, %out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0]) { metadata = @objFifo_out0, id = 1 : i64 } : memref<64xi32> | ||
aiex.npu.dma_memcpy_nd (0, 0, %in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0]) { metadata = @objFifo_in0, id = 0 : i64 } : memref<64xi32> | ||
aiex.npu.sync { column = 0 : i32, row = 0 : i32, direction = 0 : i32, channel = 0 : i32, column_num = 1 : i32, row_num = 1 : i32 } | ||
return | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
// (c) Copyright 2023 Advanced Micro Devices, Inc. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// REQUIRES: ryzen_ai | ||
// | ||
// RUN: %python aiecc.py --xclbin-kernel-name=ADDONE --xclbin-kernel-id=0x901 --xclbin-instance-name=ADDONEINST --no-aiesim --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=add_one.xclbin --npu-insts-name=insts.txt %S/aie1.mlir | ||
// RUN: %python aiecc.py --xclbin-kernel-name=ADDTWO --xclbin-kernel-id=0x902 --xclbin-instance-name=ADDTWOINST --no-aiesim --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-input=add_one.xclbin --xclbin-name=add_two.xclbin --npu-insts-name=insts.txt %S/aie2.mlir | ||
// RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem | ||
// RUN: %run_on_npu ./test.exe -x add_two.xclbin -i insts.txt | FileCheck %s | ||
// CHECK: PASS! | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,221 @@ | ||
//===- test.cpp -------------------------------------------000---*- C++ -*-===// | ||
// | ||
// This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// Copyright (C) 2023, Advanced Micro Devices, Inc. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include <boost/program_options.hpp> | ||
#include <cstdint> | ||
#include <fstream> | ||
#include <iostream> | ||
#include <sstream> | ||
#include <string> | ||
#include <vector> | ||
|
||
#include "xrt/xrt_bo.h" | ||
#include "xrt/xrt_device.h" | ||
#include "xrt/xrt_kernel.h" | ||
|
||
constexpr int IN_SIZE = 64; | ||
constexpr int OUT_SIZE = 64; | ||
|
||
namespace po = boost::program_options; | ||
|
||
void check_arg_file_exists(po::variables_map &vm_in, std::string name) { | ||
if (!vm_in.count(name)) { | ||
throw std::runtime_error("Error: no " + name + " file was provided\n"); | ||
} else { | ||
std::ifstream test(vm_in[name].as<std::string>()); | ||
if (!test) { | ||
throw std::runtime_error("The " + name + " file " + | ||
vm_in[name].as<std::string>() + | ||
" does not exist.\n"); | ||
} | ||
} | ||
} | ||
|
||
std::vector<uint32_t> load_instr_sequence(std::string instr_path) { | ||
std::ifstream instr_file(instr_path); | ||
std::string line; | ||
std::vector<uint32_t> instr_v; | ||
while (std::getline(instr_file, line)) { | ||
std::istringstream iss(line); | ||
uint32_t a; | ||
if (!(iss >> std::hex >> a)) { | ||
throw std::runtime_error("Unable to parse instruction file\n"); | ||
} | ||
instr_v.push_back(a); | ||
} | ||
return instr_v; | ||
} | ||
|
||
int main(int argc, const char *argv[]) { | ||
|
||
// Program arguments parsing | ||
po::options_description desc("Allowed options"); | ||
desc.add_options()("help,h", "produce help message")( | ||
"xclbin,x", po::value<std::string>()->required(), | ||
"the input xclbin path")("verbosity,v", | ||
po::value<int>()->default_value(0), | ||
"the verbosity of the output")( | ||
"instr,i", po::value<std::string>()->required(), | ||
"path of file containing userspace instructions to be sent to the LX6"); | ||
po::variables_map vm; | ||
|
||
try { | ||
po::store(po::parse_command_line(argc, argv, desc), vm); | ||
po::notify(vm); | ||
|
||
if (vm.count("help")) { | ||
std::cout << desc << "\n"; | ||
return 1; | ||
} | ||
} catch (const std::exception &ex) { | ||
std::cerr << ex.what() << "\n\n"; | ||
std::cerr << "Usage:\n" << desc << "\n"; | ||
return 1; | ||
} | ||
|
||
check_arg_file_exists(vm, "xclbin"); | ||
check_arg_file_exists(vm, "instr"); | ||
|
||
std::vector<uint32_t> instr_v = | ||
load_instr_sequence(vm["instr"].as<std::string>()); | ||
|
||
int verbosity = vm["verbosity"].as<int>(); | ||
if (verbosity >= 1) | ||
std::cout << "Sequence instr count: " << instr_v.size() << "\n"; | ||
|
||
// Start the XRT test code | ||
// Get a device handle | ||
unsigned int device_index = 0; | ||
auto device = xrt::device(device_index); | ||
|
||
// Load the xclbin | ||
if (verbosity >= 1) | ||
std::cout << "Loading xclbin: " << vm["xclbin"].as<std::string>() << "\n"; | ||
auto xclbin = xrt::xclbin(vm["xclbin"].as<std::string>()); | ||
|
||
// Get the kernel from the xclbin | ||
auto xkernels = xclbin.get_kernels(); | ||
auto xkernel0 = *std::find_if(xkernels.begin(), xkernels.end(), | ||
[](xrt::xclbin::kernel &k) { | ||
auto name = k.get_name(); | ||
std::cout << "Name: " << name << std::endl; | ||
return name == "ADDONE"; | ||
}); | ||
auto kernelName0 = xkernel0.get_name(); | ||
auto xkernel1 = *std::find_if(xkernels.begin(), xkernels.end(), | ||
[](xrt::xclbin::kernel &k) { | ||
auto name = k.get_name(); | ||
std::cout << "Name: " << name << std::endl; | ||
return name == "ADDTWO"; | ||
}); | ||
auto kernelName1 = xkernel1.get_name(); | ||
|
||
if (verbosity >= 1) | ||
std::cout << "Registering xclbin: " << vm["xclbin"].as<std::string>() | ||
<< "\n"; | ||
|
||
device.register_xclbin(xclbin); | ||
|
||
// get a hardware context | ||
if (verbosity >= 1) | ||
std::cout << "Getting hardware context.\n"; | ||
xrt::hw_context context(device, xclbin.get_uuid()); | ||
|
||
// get a kernel handle | ||
if (verbosity >= 1) | ||
std::cout << "Getting handle to kernels: " << kernelName0 << " and " | ||
<< kernelName1 << "\n"; | ||
|
||
auto kernel0 = xrt::kernel(context, kernelName0); | ||
|
||
auto bo0_instr = xrt::bo(device, instr_v.size() * sizeof(int), | ||
XCL_BO_FLAGS_CACHEABLE, kernel0.group_id(0)); | ||
auto bo0_inA = xrt::bo(device, IN_SIZE * sizeof(int32_t), | ||
XRT_BO_FLAGS_HOST_ONLY, kernel0.group_id(2)); | ||
auto bo0_inB = xrt::bo(device, IN_SIZE * sizeof(int32_t), | ||
XRT_BO_FLAGS_HOST_ONLY, kernel0.group_id(3)); | ||
auto bo0_out = xrt::bo(device, OUT_SIZE * sizeof(int32_t), | ||
XRT_BO_FLAGS_HOST_ONLY, kernel0.group_id(4)); | ||
|
||
auto kernel1 = xrt::kernel(context, kernelName1); | ||
|
||
auto bo1_instr = xrt::bo(device, instr_v.size() * sizeof(int), | ||
XCL_BO_FLAGS_CACHEABLE, kernel1.group_id(0)); | ||
auto bo1_inA = xrt::bo(device, IN_SIZE * sizeof(int32_t), | ||
XRT_BO_FLAGS_HOST_ONLY, kernel1.group_id(2)); | ||
auto bo1_inB = xrt::bo(device, IN_SIZE * sizeof(int32_t), | ||
XRT_BO_FLAGS_HOST_ONLY, kernel1.group_id(3)); | ||
auto bo1_out = xrt::bo(device, OUT_SIZE * sizeof(int32_t), | ||
XRT_BO_FLAGS_HOST_ONLY, kernel1.group_id(4)); | ||
|
||
if (verbosity >= 1) | ||
std::cout << "Writing data into buffer objects.\n"; | ||
|
||
uint32_t *bufInA = bo0_inA.map<uint32_t *>(); | ||
std::vector<uint32_t> srcVecA; | ||
for (int i = 0; i < IN_SIZE; i++) | ||
srcVecA.push_back(i + 1); | ||
memcpy(bufInA, srcVecA.data(), (srcVecA.size() * sizeof(uint32_t))); | ||
|
||
void *bufInstr = bo0_instr.map<void *>(); | ||
memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int)); | ||
|
||
bo0_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); | ||
bo0_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE); | ||
|
||
if (verbosity >= 1) | ||
std::cout << "Running Kernel 0.\n"; | ||
|
||
auto run0 = kernel0(bo0_instr, instr_v.size(), bo0_inA, bo0_inB, bo0_out); | ||
run0.wait(); | ||
|
||
bo0_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE); | ||
uint32_t *bufOut = bo0_out.map<uint32_t *>(); | ||
|
||
// same instructions as kernel1 | ||
bufInstr = bo1_instr.map<void *>(); | ||
memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int)); | ||
bo1_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); | ||
|
||
// copy kernel0 output to kernel1 input | ||
bufInA = bo1_inA.map<uint32_t *>(); | ||
memcpy(bufInA, bufOut, IN_SIZE * sizeof(uint32_t)); | ||
bo1_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE); | ||
|
||
if (verbosity >= 1) | ||
std::cout << "Running Kernel 1.\n"; | ||
auto run1 = kernel1(bo1_instr, instr_v.size(), bo1_inA, bo1_inB, bo1_out); | ||
run1.wait(); | ||
|
||
bo1_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE); | ||
bufOut = bo1_out.map<uint32_t *>(); | ||
|
||
int errors = 0; | ||
|
||
for (uint32_t i = 0; i < 64; i++) { | ||
uint32_t ref = (i + 1) + 1 + 2; | ||
if (*(bufOut + i) != ref) { | ||
std::cout << "Error in output " << *(bufOut + i) << " != " << ref | ||
<< std::endl; | ||
errors++; | ||
} else { | ||
std::cout << "Correct output " << *(bufOut + i) << " == " << ref | ||
<< std::endl; | ||
} | ||
} | ||
|
||
if (!errors) { | ||
std::cout << "\nPASS!\n\n"; | ||
return 0; | ||
} else { | ||
std::cout << "\nfailed.\n\n"; | ||
return 1; | ||
} | ||
} |