diff --git a/README.md b/README.md
index 03289fdef..18c7cbe92 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
# AMD AIE Plugin for IREE
-This repository contains an early-phase IREE compiler and runtime plugin for interfacing the AMD AIE accelerator to IREE.
+This repository contains an early-phase IREE compiler and runtime plugin for targeting AMD NPUs with IREE.
## Developer Setup
@@ -21,8 +21,7 @@ git clone --recursive git@github.com:nod-ai/iree-amd-aie.git
git clone --recursive https://github.com/nod-ai/iree-amd-aie.git
```
-or if you want a faster checkout
-
+or, if you want a faster checkout,
```
git \
@@ -32,10 +31,11 @@ git \
clone \
--recursive \
--shallow-submodules \
- https://github.com/nod-ai/iree-amd-aie.git
+ git@github.com:nod-ai/iree-amd-aie.git # https://github.com/nod-ai/iree-amd-aie.git
```
-The above avoids cloning entire repo histories, and skips unused nested submodules.
+The above avoids cloning entire repo histories for submodules, and skips a few, currently, unused,
+submodules that are nested in IREE.
## Building (along with IREE)
@@ -62,7 +62,7 @@ cmake --build
### Instructions
-The bare minimum configure command for IREE with the amd-aie plugin
+The bare minimum configure command for IREE with the amd-aie plugin
```
cmake \
@@ -81,7 +81,8 @@ Very likely, you will want to use `ccache` and `lld` (or some other modern linke
-DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=lld"
```
-If you don't plan on using any of IREE's frontends or backends/targets (e.g., you're doing work on this code base itself), you can opt-out of everything (except the `llvm-cpu` backend) with
+If you don't plan on using any of IREE's frontends or backends/targets (e.g., you're doing work on this code base itself),
+you can opt-out of everything (except the `llvm-cpu` backend) with
```
-DIREE_INPUT_STABLEHLO=OFF \
@@ -104,9 +105,29 @@ If you're "bringing your own LLVM", i.e., you have a prebuilt/compiled distribut
-DIREE_BUILD_BUNDLED_LLVM=OFF
```
-In this case you will need to supply `-DLLVM_EXTERNAL_LIT=` (e.g., `pip install lit; SOMEWHERE=$(which lit)`).
+In this case you will need `lit` somewhere in your environment and you will need to add to CMake `-DLLVM_EXTERNAL_LIT=`
+(e.g., `pip install lit; SOMEWHERE=$(which lit)`).
+
+See [Bringing your own LLVM](#bringing-your-own-llvm) below for more information on using prebuilt/compiled distributions of LLVM.
+
+## Testing
+
+Lit tests (i.e., compiler tests) specific to AIE can be run with something like
+
+```
+cd
+ctest -R amd-aie --output-on-failure -j 10
+```
+
+(the `-j 10` runs `10` tests in parallel)
+
+Other tests, which run on device, are in the `build_tools` subdirectory.
-Note, getting the right/matching build of LLVM, that works with IREE is tough (besides the commit hash, there are various flags to set).
+## Pro-tips
+
+### Bringing your own LLVM
+
+When using a pre-built distribution of LLVM, getting the right/matching build, that works with IREE, is tough (besides the commit hash, there are various flags to set).
To enable adventurous users to avail themselves of `-DIREE_BUILD_BUNDLED_LLVM=OFF` we cache/store/save the LLVM distribution for every successful CI run.
These can then be downloaded by checking the artifacts section of any recent CI run's [Summary page](https://github.com/nod-ai/iree-amd-aie/actions/runs/10713474448):
@@ -114,16 +135,31 @@ These can then be downloaded by checking the artifacts section of any recent CI
-## Testing
-Lit tests specific to AIE can be run with something like
+### Debugging HAL
+
+You can turn on HAL API tracing by adding to CMake:
```
-cd
-ctest -R amd-aie
+-DIREE_ENABLE_RUNTIME_TRACING=ON
+-DIREE_TRACING_PROVIDER=console
+// optional but recommended
+-DIREE_TRACING_CONSOLE_FLUSH=1
```
-Other tests, which run on device, are in the `build_tools` subdirectory.
+This will you show you all the HAL APIs that have `IREE_TRACE_ZONE_BEGIN ... IREE_TRACE_ZONE_END` that are hit during a run/execution (of, e.g., `iree-run-module`).
+
+You can turn on VM tracing by adding to CMake:
+
+```
+-DIREE_VM_EXECUTION_TRACING_ENABLE=1
+-DIREE_VM_EXECUTION_TRACING_FORCE_ENABLE=1
+// optional
+-DIREE_VM_EXECUTION_TRACING_SRC_LOC_ENABLE=1
+```
+
+This will show you all of the [VM dispatches](https://github.com/iree-org/iree/blob/0e8a5737dfe49a48a4e9c15ba7a7d24dd2fd7623/runtime/src/iree/vm/bytecode/dispatch.c#L661) that actually occur during a run/execution.
+Note, this is roughly equivalent to [passing](https://github.com/nod-ai/iree-amd-aie/blob/737092791dc2428ad71bc172f69804c583b0f60e/build_tools/ci/run_matmul_test.sh#L420) `--compile-to=vm` to `iree-compile`.
## Architectural overview (out of date)
diff --git a/build_tools/ci/run_matmul_test.sh b/build_tools/ci/run_matmul_test.sh
index f4954a0d5..1ed121c44 100755
--- a/build_tools/ci/run_matmul_test.sh
+++ b/build_tools/ci/run_matmul_test.sh
@@ -441,7 +441,7 @@ function run_matmul_test() {
fi
fi
- # Renable exit on failure:
+ # Re-enable exit on failure:
echo "**** Generating calls .vmfb file for ${name} ****"
${IREE_COMPILE_EXE} "${calls_ir}" \
--iree-hal-target-backends=${target_backend} \
@@ -793,7 +793,19 @@ if [ -d "$VITIS" ]; then
fi
-echo "\n\n"
+# note this will not actually show any devices because --xrt_lite_n_core_rows --xrt_lite_n_core_cols are not passed
+# which i have omitted to make the conditional slightly more succinct
+if [[ $($IREE_INSTALL_DIR/bin/iree-benchmark-module --dump_devices | grep xrt-lite) ]]; then
+ $IREE_INSTALL_DIR/bin/iree-benchmark-module \
+ --module=$OUTPUT_DIR/mm_test1_bf16_f32_m64_n64_k64.vmfb \
+ --function=matmul_64x64_64xbf16_ \
+ --input=64x64xbf16 \
+ --input=64x64xbf16 \
+ --device=xrt-lite \
+ --benchmark_repetitions=10 \
+ --xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \
+ --xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS
+fi
echo "$MATMUL_TESTS_RUN matmul tests run!"
if [ $MATMUL_TESTS_FAILS -ne 0 ]; then
diff --git a/runtime/src/iree-amd-aie/driver/xrt-lite/driver.cc b/runtime/src/iree-amd-aie/driver/xrt-lite/driver.cc
index 87a7b9c1f..3dbba529f 100644
--- a/runtime/src/iree-amd-aie/driver/xrt-lite/driver.cc
+++ b/runtime/src/iree-amd-aie/driver/xrt-lite/driver.cc
@@ -127,6 +127,7 @@ namespace {
const iree_hal_driver_vtable_t iree_hal_xrt_lite_driver_vtable = {
.destroy = iree_hal_xrt_lite_driver_destroy,
.query_available_devices = iree_hal_xrt_lite_driver_query_available_devices,
+ .dump_device_info = unimplemented_ok_status,
.create_device_by_id = iree_hal_xrt_lite_driver_create_device_by_id,
.create_device_by_path = iree_hal_xrt_lite_driver_create_device_by_path,
};