Skip to content

Commit

Permalink
hack tests for xrt-lite
Browse files Browse the repository at this point in the history
  • Loading branch information
makslevental committed Oct 12, 2024
1 parent be60159 commit bb239ad
Show file tree
Hide file tree
Showing 22 changed files with 208 additions and 201 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ jobs:
# Remove-Item -Path "$pwd\llvm-build" -Force
$env:llvm_install_dir = "$pwd\llvm-install"
echo $env:llvm_install_dir
.\build_tools\download_peano.ps1
$env:peano_install_dir = "$pwd\llvm-aie"
.\build_tools.\build_test_cpp.ps1
- name: Create artifacts
Expand Down
6 changes: 4 additions & 2 deletions build_tools/build_test_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -155,5 +155,7 @@ if [ -d "$llvm_install_dir" ]; then
fi

cp "$build_dir/tools/testing/e2e/iree-e2e-matmul-test" "$install_dir/bin"
mkdir -p "$install_dir/device_tests"
cp "$build_dir"/runtime/plugins/AMD-AIE/iree-amd-aie/driver/xrt-lite/cts/*test "$install_dir/device_tests"
if [[ "$OSTYPE" == "linux"* ]]; then
mkdir -p "$install_dir/device_tests"
cp "$build_dir"/runtime/plugins/AMD-AIE/iree-amd-aie/driver/xrt-lite/cts/*test "$install_dir/device_tests"
fi
2 changes: 1 addition & 1 deletion build_tools/ci/cpu_comparison/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def generate_aie_output(config, aie_vmfb, input_args, function_name, name, outpu
config.iree_run_exe,
f"--module={aie_vmfb}",
*input_args,
"--device=xrt",
"--device=xrt-lite",
f"--output=@{aie_bin}",
]
if function_name:
Expand Down
4 changes: 2 additions & 2 deletions build_tools/ci/run_matmul_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ function run_matmul_test() {

local target_device="npu1_4col"

local device="xrt"
local device="xrt-lite"

local peano_install_path="${PEANO}"

Expand Down Expand Up @@ -530,7 +530,7 @@ run_matmul_test \
--acc_type "f32" \
--target_backend "amd-aie" \
--target_device "npu1_4col" \
--device "xrt" \
--device "xrt-lite" \
--peano_install_path "${PEANO}" \
--amd_aie_install_path "${IREE_INSTALL_DIR}" \
--vitis_path "${VITIS}" \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ struct AMDAIESession
}

void populateHALTargetDevices(IREE::HAL::TargetDeviceList &targets) override {
// #hal.device.target<"xrt", ...
// #hal.device.target<"xrt-lite", ...
// #hal.executable.target<"amd-aie", ...
targets.add("xrt", [=]() { return AMDAIE::createTarget(options); });
targets.add("xrt-lite", [=]() { return AMDAIE::createTarget(options); });
}

void populateHALTargetBackends(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class AIETargetDevice final : public IREE::HAL::TargetDevice {
targetRegistry.getTargetBackend("amd-aie")->getDefaultExecutableTargets(
context, "amd-aie", configAttr, executableTargetAttrs);

return IREE::HAL::DeviceTargetAttr::get(context, b.getStringAttr("xrt"),
return IREE::HAL::DeviceTargetAttr::get(context, b.getStringAttr("xrt-lite"),
configAttr, executableTargetAttrs);
}

Expand All @@ -113,7 +113,7 @@ class AIETargetBackend final : public IREE::HAL::TargetBackend {
public:
explicit AIETargetBackend(const AMDAIEOptions &options) : options(options) {}

std::string getLegacyDefaultDeviceID() const override { return "xrt"; }
std::string getLegacyDefaultDeviceID() const override { return "xrt-lite"; }

void getDefaultExecutableTargets(
MLIRContext *context, StringRef deviceID, DictionaryAttr deviceConfigAttr,
Expand Down
10 changes: 3 additions & 7 deletions runtime/src/iree-amd-aie/driver/xrt-lite/allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ extern const iree_hal_allocator_vtable_t iree_hal_xrt_lite_allocator_vtable;
struct iree_hal_xrt_lite_allocator {
iree_hal_resource_t resource;
iree_allocator_t host_allocator;
std::shared_ptr<shim_xdna::device> shim_device;
shim_xdna::device* shim_device;
IREE_STATISTICS(iree_hal_allocator_statistics_t statistics;)

iree_hal_xrt_lite_allocator(iree_allocator_t host_allocator,
std::shared_ptr<shim_xdna::device> shim_device)
shim_xdna::device* shim_device)
: host_allocator(host_allocator), shim_device(shim_device) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_hal_resource_initialize(&iree_hal_xrt_lite_allocator_vtable,
Expand Down Expand Up @@ -266,7 +266,7 @@ static iree_hal_xrt_lite_allocator* iree_hal_xrt_lite_allocator_cast(
}

iree_status_t iree_hal_xrt_lite_allocator_create(
iree_allocator_t host_allocator, std::shared_ptr<shim_xdna::device> device,
iree_allocator_t host_allocator, shim_xdna::device* device,
iree_hal_allocator_t** out_allocator) {
IREE_ASSERT_ARGUMENT(out_allocator);
IREE_TRACE_ZONE_BEGIN(z0);
Expand Down Expand Up @@ -296,11 +296,7 @@ static void iree_hal_xrt_lite_allocator_destroy(
iree_hal_xrt_lite_allocator_cast(base_allocator);
IREE_TRACE_ZONE_BEGIN(z0);

// TODO(max): shouldn't this be happening automatically via the refcounting
// (or just the dtor of device?)
allocator->shim_device.reset();
iree_hal_resource_release(&allocator->resource);
// something's not happening here?
iree_allocator_free(allocator->host_allocator, allocator);

IREE_TRACE_ZONE_END(z0);
Expand Down
2 changes: 1 addition & 1 deletion runtime/src/iree-amd-aie/driver/xrt-lite/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

// Creates a buffer allocator used for persistent allocations.
iree_status_t iree_hal_xrt_lite_allocator_create(
iree_allocator_t host_allocator, std::shared_ptr<shim_xdna::device> device,
iree_allocator_t host_allocator, shim_xdna::device* device,
iree_hal_allocator_t** out_allocator);

#endif // IREE_HAL_DRIVERS_XRT_LITE_ALLOCATOR_H_
6 changes: 3 additions & 3 deletions runtime/src/iree-amd-aie/driver/xrt-lite/device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ struct iree_hal_xrt_lite_device {
// Block pool used for command buffers with a larger block size (as command
// buffers can contain inlined data uploads).
iree_arena_block_pool_t block_pool;
std::shared_ptr<shim_xdna::device> shim_device;
shim_xdna::device* shim_device;

iree_status_t create_executable_cache(
iree_string_view_t identifier, iree_loop_t loop,
Expand Down Expand Up @@ -162,7 +162,7 @@ iree_status_t iree_hal_xrt_lite_device_create(
identifier, &device->identifier,
reinterpret_cast<char*>(device) + total_size - identifier.size);
device->host_allocator = host_allocator;
device->shim_device = std::make_shared<shim_xdna::device>();
device->shim_device = new shim_xdna::device;

// TODO(null): pass device handles and pool configuration to the allocator.
// Some implementations may share allocators across multiple devices created
Expand Down Expand Up @@ -205,7 +205,7 @@ static void iree_hal_xrt_lite_device_destroy(iree_hal_device_t* base_device) {
// and joined first.

iree_hal_allocator_release(device->device_allocator);
device->shim_device.reset();
delete device->shim_device;
iree_allocator_free(host_allocator, device);

IREE_TRACE_ZONE_END(z0);
Expand Down
37 changes: 8 additions & 29 deletions runtime/src/iree-amd-aie/driver/xrt-lite/direct_command_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "iree-amd-aie/driver/xrt-lite/buffer.h"
#include "iree-amd-aie/driver/xrt-lite/executable.h"
#include "iree-amd-aie/driver/xrt-lite/shim/linux/kmq/hwq.h"
#include "iree-amd-aie/driver/xrt-lite/shim/linux/kmq/kernel.h"
#include "iree/hal/utils/resource_set.h"

// The max number of bindings per descriptor set allowed in the XRT HAL
Expand All @@ -29,7 +30,7 @@ struct iree_hal_xrt_lite_direct_command_buffer {
// Staging arena used for host->device transfers.
iree_arena_allocator_t arena;

std::shared_ptr<shim_xdna::device> shim_device;
shim_xdna::device* shim_device;

struct {
shim_xdna::bo* bindings[IREE_HAL_XRT_LITE_MAX_DESCRIPTOR_SET_BINDING_COUNT];
Expand All @@ -52,12 +53,12 @@ iree_hal_xrt_lite_direct_command_buffer_cast(
iree_hal_command_buffer_t* base_value) {
IREE_HAL_ASSERT_TYPE(base_value,
&iree_hal_xrt_lite_direct_command_buffer_vtable);
return (iree_hal_xrt_lite_direct_command_buffer*)base_value;
return reinterpret_cast<iree_hal_xrt_lite_direct_command_buffer*>(base_value);
}

iree_status_t iree_hal_xrt_lite_direct_command_buffer_create(
std::shared_ptr<shim_xdna::device> shim_device,
iree_hal_allocator_t* device_allocator, iree_hal_command_buffer_mode_t mode,
shim_xdna::device* shim_device, iree_hal_allocator_t* device_allocator,
iree_hal_command_buffer_mode_t mode,
iree_hal_command_category_t command_categories,
iree_host_size_t binding_capacity, iree_arena_block_pool_t* block_pool,
iree_allocator_t host_allocator,
Expand Down Expand Up @@ -106,7 +107,7 @@ static void iree_hal_xrt_lite_direct_command_buffer_destroy(
iree_hal_xrt_lite_direct_command_buffer_cast(base_command_buffer);
iree_allocator_t host_allocator = command_buffer->host_allocator;
IREE_TRACE_ZONE_BEGIN(z0);
command_buffer->shim_device.reset();

iree_hal_resource_set_free(command_buffer->resource_set);
iree_arena_deinitialize(&command_buffer->arena);
iree_allocator_free(host_allocator, command_buffer);
Expand Down Expand Up @@ -135,18 +136,6 @@ static iree_status_t iree_hal_xrt_lite_direct_command_buffer_end(
return iree_ok_status();
}

static void iree_hal_xrt_lite_direct_command_buffer_begin_debug_group(
iree_hal_command_buffer_t* base_command_buffer, iree_string_view_t label,
iree_hal_label_color_t label_color,
const iree_hal_label_location_t* location) {
(void)iree_status_from_code(IREE_STATUS_UNIMPLEMENTED);
}

static void iree_hal_xrt_lite_direct_command_buffer_end_debug_group(
iree_hal_command_buffer_t* base_command_buffer) {
(void)iree_status_from_code(IREE_STATUS_UNIMPLEMENTED);
}

static iree_status_t iree_hal_xrt_lite_direct_command_buffer_execution_barrier(
iree_hal_command_buffer_t* base_command_buffer,
iree_hal_execution_stage_t source_stage_mask,
Expand Down Expand Up @@ -300,8 +289,7 @@ static iree_status_t iree_hal_xrt_lite_direct_command_buffer_dispatch(
cu_name += ":IREE";
shim_xdna::cuidx_t cu_idx = kernel_params.context->open_cu_context(cu_name);

shim_xdna::exec_buf ebuf(command_buffer->shim_device->get_pdev(),
ERT_START_CU);
shim_xdna::kernel ebuf(command_buffer->shim_device->get_pdev(), ERT_START_CU);
ebuf.set_cu_idx(cu_idx);
unsigned int opcode = 3;
ebuf.add_arg_64(opcode);
Expand All @@ -311,13 +299,9 @@ static iree_status_t iree_hal_xrt_lite_direct_command_buffer_dispatch(
shim_xdna::bo* bo = iree_hal_xrt_lite_buffer_handle(
iree_hal_buffer_allocated_buffer(bindings.values[j].buffer));
ebuf.add_arg_bo(*bo);
}

for (iree_host_size_t j = 0; j < bindings.count; ++j) {
shim_xdna::bo* bo = iree_hal_xrt_lite_buffer_handle(
iree_hal_buffer_allocated_buffer(bindings.values[j].buffer));
bo->sync(shim_xdna::direction::host2device);
}

shim_xdna::hw_q* hwq = kernel_params.context->get_hw_queue();
hwq->issue_command(ebuf.get_exec_buf_bo());
hwq->wait_command(ebuf.get_exec_buf_bo(), 0);
Expand All @@ -328,11 +312,6 @@ static iree_status_t iree_hal_xrt_lite_direct_command_buffer_dispatch(
bo->sync(shim_xdna::direction::device2host);
}

for (iree_host_size_t j = 0; j < bindings.count; ++j) {
shim_xdna::bo* bo = iree_hal_xrt_lite_buffer_handle(
iree_hal_buffer_allocated_buffer(bindings.values[j].buffer));
}

IREE_TRACE_ZONE_END(z0);

return iree_ok_status();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ extern "C" {
// |out_command_buffer| must be released by the caller (see
// iree_hal_command_buffer_release).
iree_status_t iree_hal_xrt_lite_direct_command_buffer_create(
std::shared_ptr<shim_xdna::device> shim_device,
shim_xdna::device* shim_device,
iree_hal_allocator_t* device_allocator, iree_hal_command_buffer_mode_t mode,
iree_hal_command_category_t command_categories,
iree_host_size_t binding_capacity, iree_arena_block_pool_t* block_pool,
Expand Down
20 changes: 10 additions & 10 deletions runtime/src/iree-amd-aie/driver/xrt-lite/executable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ iree_amd_aie_hal_xrt_lite_native_executable_flatbuffer_verify(
}

iree_status_t iree_hal_xrt_lite_native_executable_create(
std::shared_ptr<shim_xdna::device> shim_device,
shim_xdna::device* shim_device,
const iree_hal_executable_params_t* executable_params,
iree_allocator_t host_allocator, iree_hal_executable_t** out_executable) {
IREE_ASSERT_ARGUMENT(executable_params);
Expand Down Expand Up @@ -176,8 +176,8 @@ iree_status_t iree_hal_xrt_lite_native_executable_create(
std::vector<char> xclbinVector(
xclbin_fb, xclbin_fb + flatbuffers_string_len(xclbin_fb));
params->xclbinVector = xclbinVector;
// xrt::xclbin xclbin = xrt::xclbin(xclbinVector);
// params->context = shim_device->create_hw_context(xclbin);
// xrt::xclbin xclbin = xrt::xclbin(xclbinVector);
// params->context = shim_device->create_hw_context(xclbin);

uint32_t asm_instr_index =
flatbuffers_uint32_vec_at(asm_instr_indices_vec, entry_ordinal);
Expand All @@ -186,13 +186,13 @@ iree_status_t iree_hal_xrt_lite_native_executable_create(
params->asm_inst =
iree_amd_aie_hal_xrt_AsmInstDef_asm_inst_get(asminst_def);

// uint32_t num_instr = flatbuffers_uint32_vec_len(asm_inst);
// size_t ctrl_code_size = num_instr * sizeof(uint32_t);
// params->bo_ctrl_code =
// shim_device->alloc_bo(ctrl_code_size, XCL_BO_FLAGS_CACHEABLE);
// uint32_t* instr_buffer =
// static_cast<uint32_t*>(params->bo_ctrl_code->map());
// memcpy(instr_buffer, asm_inst, ctrl_code_size);
// uint32_t num_instr = flatbuffers_uint32_vec_len(asm_inst);
// size_t ctrl_code_size = num_instr * sizeof(uint32_t);
// params->bo_ctrl_code =
// shim_device->alloc_bo(ctrl_code_size, XCL_BO_FLAGS_CACHEABLE);
// uint32_t* instr_buffer =
// static_cast<uint32_t*>(params->bo_ctrl_code->map());
// memcpy(instr_buffer, asm_inst, ctrl_code_size);

// Stash the entry point name in the string table for use when tracing.
IREE_TRACE({
Expand Down
2 changes: 1 addition & 1 deletion runtime/src/iree-amd-aie/driver/xrt-lite/executable.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct iree_hal_xrt_lite_kernel_params_t {
// |out_executable| must be released by the caller (see
// iree_hal_executable_release).
iree_status_t iree_hal_xrt_lite_native_executable_create(
std::shared_ptr<shim_xdna::device> shim_device,
shim_xdna::device* shim_device,
const iree_hal_executable_params_t* executable_params,
iree_allocator_t host_allocator, iree_hal_executable_t** out_executable);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ struct iree_hal_xrt_lite_nop_executable_cache_t {
// Abstract resource used for injecting reference counting and vtable; must be
// at offset 0.
iree_hal_resource_t resource;
std::shared_ptr<shim_xdna::device> shim_device;
shim_xdna::device* shim_device;
iree_allocator_t host_allocator;
};

Expand All @@ -35,8 +35,8 @@ iree_hal_xrt_lite_nop_executable_cache_cast(
}

iree_status_t iree_hal_xrt_lite_nop_executable_cache_create(
std::shared_ptr<shim_xdna::device> shim_device,
iree_string_view_t identifier, iree_allocator_t host_allocator,
shim_xdna::device* shim_device, iree_string_view_t identifier,
iree_allocator_t host_allocator,
iree_hal_executable_cache_t** out_executable_cache) {
IREE_ASSERT_ARGUMENT(out_executable_cache);
*out_executable_cache = nullptr;
Expand All @@ -62,7 +62,6 @@ static void iree_hal_xrt_lite_nop_executable_cache_destroy(
iree_hal_xrt_lite_nop_executable_cache_cast(base_executable_cache);
IREE_TRACE_ZONE_BEGIN(z0);

executable_cache->shim_device.reset();
iree_allocator_free(executable_cache->host_allocator, executable_cache);

IREE_TRACE_ZONE_END(z0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ extern "C" {
// |out_executable_cache| must be released by the caller (see
// iree_hal_executable_cache_release).
iree_status_t iree_hal_xrt_lite_nop_executable_cache_create(
std::shared_ptr<shim_xdna::device> shim_device,
shim_xdna::device* shim_device,
iree_string_view_t identifier, iree_allocator_t host_allocator,
iree_hal_executable_cache_t** out_executable_cache);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ iree_cc_library(
hwctx.h
hwq.cpp
hwq.h
kernel.cpp
kernel.h
shim_debug.cpp
shim_debug.h
DEPS
Expand Down
Loading

0 comments on commit bb239ad

Please sign in to comment.