forked from Xilinx/mlir-aie
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ASPLOS][WIP] Passthrough kernel in basic examples (Xilinx#1216)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
- Loading branch information
Showing
16 changed files
with
444 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
75 changes: 75 additions & 0 deletions
75
programming_examples/basic/passthrough_kernel/CMakeLists.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
# This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# (c) Copyright 2024 Xilinx Inc. | ||
|
||
# parameters | ||
# -DBOOST_ROOT: Path to Boost install | ||
# -DXRT_INC_DIR: Full path to src/runtime_src/core/include in XRT cloned repo | ||
# -DXRT_LIB_DIR: Path to xrt_coreutil.lib | ||
# -DTARGET_NAME: Target name to be built | ||
|
||
# cmake needs this line | ||
cmake_minimum_required(VERSION 3.1) | ||
|
||
find_program(WSL NAMES powershell.exe) | ||
|
||
if (NOT WSL) | ||
set(BOOST_ROOT /usr/include/boost CACHE STRING "Path to Boost install") | ||
set(XRT_INC_DIR /opt/xilinx/xrt/include CACHE STRING "Path to XRT cloned repo") | ||
set(XRT_LIB_DIR /opt/xilinx/xrt/lib CACHE STRING "Path to xrt_coreutil.lib") | ||
else() | ||
set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") | ||
set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") | ||
set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") | ||
endif () | ||
|
||
set(PASSTHROUGH_SIZE 4096 CACHE STRING "size") | ||
set(TARGET_NAME test CACHE STRING "Target to be built") | ||
|
||
SET (ProjectName ${TARGET_NAME}) | ||
SET (currentTarget ${TARGET_NAME}) | ||
|
||
if ( WSL ) | ||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}) | ||
endif () | ||
|
||
project(${ProjectName}) | ||
|
||
# Find packages | ||
find_package(Boost REQUIRED) | ||
|
||
add_executable(${currentTarget} | ||
${CMAKE_CURRENT_SOURCE_DIR}/../../../runtime_lib/test_lib/test_utils.cpp | ||
test.cpp | ||
) | ||
|
||
target_compile_definitions(${currentTarget} PUBLIC | ||
PASSTHROUGH_SIZE=${PASSTHROUGH_SIZE} | ||
DISABLE_ABI_CHECK=1 | ||
) | ||
|
||
target_include_directories (${currentTarget} PUBLIC | ||
${CMAKE_CURRENT_SOURCE_DIR}/../../utils | ||
${CMAKE_CURRENT_SOURCE_DIR}/../../../runtime_lib/test_lib | ||
${XRT_INC_DIR} | ||
${Boost_INCLUDE_DIRS} | ||
) | ||
|
||
target_link_directories(${currentTarget} PUBLIC | ||
${XRT_LIB_DIR} | ||
${Boost_LIBRARY_DIRS} | ||
) | ||
|
||
if (NOT WSL) | ||
target_link_libraries(${currentTarget} PUBLIC | ||
xrt_coreutil | ||
boost_program_options | ||
boost_filesystem | ||
) | ||
else() | ||
target_link_libraries(${currentTarget} PUBLIC | ||
xrt_coreutil | ||
) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
##===- Makefile -----------------------------------------------------------===## | ||
# | ||
# This file licensed under the Apache License v2.0 with LLVM Exceptions. | ||
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
##===----------------------------------------------------------------------===## | ||
|
||
include ../../makefile-common | ||
|
||
VPATH := ../../../aie_kernels/aie_generic | ||
|
||
PASSTHROUGH_SIZE = 4096 | ||
|
||
targetname = passThroughKernel | ||
|
||
.PHONY: all template clean | ||
|
||
all: build/final_${PASSTHROUGH_SIZE}.xclbin | ||
|
||
build/aie2_lineBased_8b_${PASSTHROUGH_SIZE}.mlir: aie2.py | ||
mkdir -p ${@D} | ||
python3 $< ${PASSTHROUGH_SIZE} > $@ | ||
|
||
build/passThrough.cc.o: passThrough.cc | ||
mkdir -p ${@D} | ||
cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -DBIT_WIDTH=8 -c $(<:%=../%) -o ${@F} | ||
|
||
build/final_${PASSTHROUGH_SIZE}.xclbin: build/aie2_lineBased_8b_${PASSTHROUGH_SIZE}.mlir build/passThrough.cc.o | ||
mkdir -p ${@D} | ||
cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \ | ||
--xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%) | ||
|
||
${targetname}.exe: test.cpp | ||
rm -rf _build | ||
mkdir -p _build | ||
cd _build && ${powershell} cmake .. -DTARGET_NAME=${targetname} -DPASSTHROUGH_SIZE=${PASSTHROUGH_SIZE} | ||
cd _build && ${powershell} cmake --build . --config Release | ||
ifeq "${powershell}" "powershell.exe" | ||
cp _build/${targetname}.exe $@ | ||
else | ||
cp _build/${targetname} $@ | ||
endif | ||
|
||
run: ${targetname}.exe build/final_${PASSTHROUGH_SIZE}.xclbin build/insts.txt | ||
${powershell} ./$< -x build/final_${PASSTHROUGH_SIZE}.xclbin -i build/insts.txt -k MLIR_AIE | ||
|
||
clean: | ||
rm -rf build _build ${targetname}.exe |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
# | ||
# This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# (c) Copyright 2024 AMD Inc. | ||
|
||
import sys | ||
|
||
from aie.dialects.aie import * | ||
from aie.dialects.aiex import * | ||
from aie.dialects.scf import * | ||
from aie.extras.context import mlir_mod_ctx | ||
|
||
N = 1024 | ||
|
||
if len(sys.argv) == 2: | ||
N = int(sys.argv[1]) | ||
|
||
lineWidthInBytes = N // 4 # chop input in 4 sub-tensors | ||
lineWidthInInt32s = lineWidthInBytes // 4 | ||
|
||
enableTrace = False | ||
traceSizeInBytes = 8192 | ||
traceSizeInInt32s = traceSizeInBytes // 4 | ||
|
||
|
||
def passthroughKernel(): | ||
with mlir_mod_ctx() as ctx: | ||
|
||
@device(AIEDevice.ipu) | ||
def device_body(): | ||
# define types | ||
memRef_ty = T.memref(lineWidthInBytes, T.ui8()) | ||
|
||
# AIE Core Function declarations | ||
passThroughLine = external_func( | ||
"passThroughLine", inputs=[memRef_ty, memRef_ty, T.i32()] | ||
) | ||
|
||
# Tile declarations | ||
ShimTile = tile(0, 0) | ||
ComputeTile2 = tile(0, 2) | ||
|
||
if enableTrace: | ||
flow(ComputeTile2, "Trace", 0, ShimTile, "DMA", 1) | ||
|
||
# AIE-array data movement with object fifos | ||
of_in = object_fifo("in", ShimTile, ComputeTile2, 2, memRef_ty) | ||
of_out = object_fifo("out", ComputeTile2, ShimTile, 2, memRef_ty) | ||
|
||
# Set up compute tiles | ||
|
||
# Compute tile 2 | ||
@core(ComputeTile2, "passThrough.cc.o") | ||
def core_body(): | ||
for _ in for_(sys.maxsize): | ||
elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) | ||
elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) | ||
call(passThroughLine, [elemIn, elemOut, lineWidthInBytes]) | ||
of_in.release(ObjectFifoPort.Consume, 1) | ||
of_out.release(ObjectFifoPort.Produce, 1) | ||
yield_([]) | ||
|
||
# print(ctx.module.operation.verify()) | ||
|
||
tensorSize = N | ||
tensorSizeInInt32s = tensorSize // 4 | ||
tensor_ty = T.memref(lineWidthInInt32s, T.i32()) | ||
|
||
@FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty) | ||
def sequence(inTensor, outTensor, notUsed): | ||
if enableTrace: | ||
# Trace output | ||
|
||
# Trace_Event0, Trace_Event1: Select which events to trace. | ||
# Note that the event buffers only appear to be transferred to DDR in | ||
# bursts of 256 bytes. If less than 256 bytes are written, you may not | ||
# see trace output, or only see it on the next iteration of your | ||
# kernel invocation, as the buffer gets filled up. Note that, even | ||
# though events are encoded as 4 byte words, it may take more than 64 | ||
# events to fill the buffer to 256 bytes and cause a flush, since | ||
# multiple repeating events can be 'compressed' by the trace mechanism. | ||
# In order to always generate sufficient events, we add the "assert | ||
# TRUE" event to one slot, which fires every cycle, and thus fills our | ||
# buffer quickly. | ||
|
||
# Some events: | ||
# TRUE (0x01) | ||
# STREAM_STALL (0x18) | ||
# LOCK_STALL (0x1A) | ||
# EVENTS_CORE_INSTR_EVENT_1 (0x22) | ||
# EVENTS_CORE_INSTR_EVENT_0 (0x21) | ||
# INSTR_VECTOR (0x25) Core executes a vecotr MAC, ADD or compare instruction | ||
# INSTR_LOCK_ACQUIRE_REQ (0x2C) Core executes a lock acquire instruction | ||
# INSTR_LOCK_RELEASE_REQ (0x2D) Core executes a lock release instruction | ||
# EVENTS_CORE_PORT_RUNNING_1 (0x4F) | ||
# EVENTS_CORE_PORT_RUNNING_0 (0x4B) | ||
|
||
# Trace_Event0 (4 slots) | ||
IpuWrite32(0, 2, 0x340E0, 0x4B222125) | ||
# Trace_Event1 (4 slots) | ||
IpuWrite32(0, 2, 0x340E4, 0x2D2C1A4F) | ||
|
||
# Event slots as configured above: | ||
# 0: Kernel executes vector instruction | ||
# 1: Event 0 -- Kernel starts | ||
# 2: Event 1 -- Kernel done | ||
# 3: Port_Running_0 | ||
# 4: Port_Running_1 | ||
# 5: Lock Stall | ||
# 6: Lock Acquire Instr | ||
# 7: Lock Release Instr | ||
|
||
# Stream_Switch_Event_Port_Selection_0 | ||
# This is necessary to capture the Port_Running_0 and Port_Running_1 events | ||
IpuWrite32(0, 2, 0x3FF00, 0x121) | ||
|
||
# Trace_Control0: Define trace start and stop triggers. Set start event TRUE. | ||
IpuWrite32(0, 2, 0x340D0, 0x10000) | ||
|
||
# Start trace copy out. | ||
IpuWriteBdShimTile( | ||
bd_id=3, | ||
buffer_length=traceSizeInBytes, | ||
buffer_offset=tensorSize, | ||
enable_packet=0, | ||
out_of_order_id=0, | ||
packet_id=0, | ||
packet_type=0, | ||
column=0, | ||
column_num=1, | ||
d0_stride=0, | ||
d0_wrap=0, | ||
d1_stride=0, | ||
d1_wrap=0, | ||
d2_stride=0, | ||
ddr_id=2, | ||
iteration_current=0, | ||
iteration_stride=0, | ||
iteration_wrap=0, | ||
lock_acq_enable=0, | ||
lock_acq_id=0, | ||
lock_acq_val=0, | ||
lock_rel_id=0, | ||
lock_rel_val=0, | ||
next_bd=0, | ||
use_next_bd=0, | ||
valid_bd=1, | ||
) | ||
IpuWrite32(0, 0, 0x1D20C, 0x3) | ||
|
||
ipu_dma_memcpy_nd( | ||
metadata="in", | ||
bd_id=0, | ||
mem=inTensor, | ||
sizes=[1, 1, 1, tensorSizeInInt32s], | ||
) | ||
ipu_dma_memcpy_nd( | ||
metadata="out", | ||
bd_id=1, | ||
mem=outTensor, | ||
sizes=[1, 1, 1, tensorSizeInInt32s], | ||
) | ||
ipu_sync(column=0, row=0, direction=0, channel=0) | ||
|
||
print(ctx.module) | ||
|
||
|
||
passthroughKernel() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
// (c) Copyright 2023 Advanced Micro Devices, Inc. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// REQUIRES: ryzen_ai, chess | ||
// | ||
// RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -c %S/../../../aie_kernels/aie_generic/passThrough.cc -o passThrough.cc.o | ||
// RUN: %python %S/aie2.py 4096 | aie-opt -cse -canonicalize -o ./aie.mlir | ||
// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir | ||
// RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall -DPASSTHROUGH_SIZE=4096 -I%S/../../utils %S/../../utils/xrtUtils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem | ||
// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s | ||
// CHECK: PASS! | ||
|
Oops, something went wrong.