-
Notifications
You must be signed in to change notification settings - Fork 86
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ASPLOS][WIP] Passthrough kernel in basic examples #1216
Merged
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
cd2345a
basic passthrough_kernel
denolf b17f178
rename and clean up
denolf ab84b08
Merge branch 'asplos' into passthroughKernel
denolf 61511b2
fix passThrough with images
denolf 2f7ce0e
Apply suggestions from code review
denolf a9a77a7
apply clang-format patch
denolf ec069ab
fix format with black
denolf e96c54c
merge asplos
denolf 509b2c1
chop in 4 sub tensors and add events to kernel
denolf File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
75 changes: 75 additions & 0 deletions
75
programming_examples/basic/passthrough_kernel/CMakeLists.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
# This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# (c) Copyright 2024 Xilinx Inc. | ||
|
||
# parameters | ||
# -DBOOST_ROOT: Path to Boost install | ||
# -DXRT_INC_DIR: Full path to src/runtime_src/core/include in XRT cloned repo | ||
# -DXRT_LIB_DIR: Path to xrt_coreutil.lib | ||
# -DTARGET_NAME: Target name to be built | ||
|
||
# cmake needs this line | ||
cmake_minimum_required(VERSION 3.1) | ||
|
||
find_program(WSL NAMES powershell.exe) | ||
|
||
if (NOT WSL) | ||
set(BOOST_ROOT /usr/include/boost CACHE STRING "Path to Boost install") | ||
set(XRT_INC_DIR /opt/xilinx/xrt/include CACHE STRING "Path to XRT cloned repo") | ||
set(XRT_LIB_DIR /opt/xilinx/xrt/lib CACHE STRING "Path to xrt_coreutil.lib") | ||
else() | ||
set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") | ||
set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") | ||
set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") | ||
endif () | ||
|
||
set(PASSTHROUGH_SIZE 4096 CACHE STRING "size") | ||
set(TARGET_NAME test CACHE STRING "Target to be built") | ||
|
||
SET (ProjectName ${TARGET_NAME}) | ||
SET (currentTarget ${TARGET_NAME}) | ||
|
||
if ( WSL ) | ||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}) | ||
endif () | ||
|
||
project(${ProjectName}) | ||
|
||
# Find packages | ||
find_package(Boost REQUIRED) | ||
|
||
add_executable(${currentTarget} | ||
${CMAKE_CURRENT_SOURCE_DIR}/../../../runtime_lib/test_lib/test_utils.cpp | ||
test.cpp | ||
) | ||
|
||
target_compile_definitions(${currentTarget} PUBLIC | ||
PASSTHROUGH_SIZE=${PASSTHROUGH_SIZE} | ||
DISABLE_ABI_CHECK=1 | ||
) | ||
|
||
target_include_directories (${currentTarget} PUBLIC | ||
${CMAKE_CURRENT_SOURCE_DIR}/../../utils | ||
${CMAKE_CURRENT_SOURCE_DIR}/../../../runtime_lib/test_lib | ||
${XRT_INC_DIR} | ||
${Boost_INCLUDE_DIRS} | ||
) | ||
|
||
target_link_directories(${currentTarget} PUBLIC | ||
${XRT_LIB_DIR} | ||
${Boost_LIBRARY_DIRS} | ||
) | ||
|
||
if (NOT WSL) | ||
target_link_libraries(${currentTarget} PUBLIC | ||
xrt_coreutil | ||
boost_program_options | ||
boost_filesystem | ||
) | ||
else() | ||
target_link_libraries(${currentTarget} PUBLIC | ||
xrt_coreutil | ||
) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
##===- Makefile -----------------------------------------------------------===## | ||
# | ||
# This file licensed under the Apache License v2.0 with LLVM Exceptions. | ||
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
##===----------------------------------------------------------------------===## | ||
|
||
include ../../makefile-common | ||
|
||
VPATH := ../../../aie_kernels/aie_generic | ||
|
||
PASSTHROUGH_SIZE = 4096 | ||
|
||
targetname = passThroughKernel | ||
|
||
.PHONY: all template clean | ||
|
||
all: build/final_${PASSTHROUGH_SIZE}.xclbin | ||
|
||
build/aie2_lineBased_8b_${PASSTHROUGH_SIZE}.mlir: aie2.py | ||
mkdir -p ${@D} | ||
python3 $< ${PASSTHROUGH_SIZE} > $@ | ||
|
||
build/passThrough.cc.o: passThrough.cc | ||
mkdir -p ${@D} | ||
cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -DBIT_WIDTH=8 -c $(<:%=../%) -o ${@F} | ||
|
||
build/final_${PASSTHROUGH_SIZE}.xclbin: build/aie2_lineBased_8b_${PASSTHROUGH_SIZE}.mlir build/passThrough.cc.o | ||
mkdir -p ${@D} | ||
cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \ | ||
--xclbin-name=${@F} --ipu-insts-name=insts.txt $(<:%=../%) | ||
|
||
${targetname}.exe: test.cpp | ||
rm -rf _build | ||
mkdir -p _build | ||
cd _build && ${powershell} cmake .. -DTARGET_NAME=${targetname} -DPASSTHROUGH_SIZE=${PASSTHROUGH_SIZE} | ||
cd _build && ${powershell} cmake --build . --config Release | ||
ifeq "${powershell}" "powershell.exe" | ||
cp _build/${targetname}.exe $@ | ||
else | ||
cp _build/${targetname} $@ | ||
endif | ||
|
||
run: ${targetname}.exe build/final_${PASSTHROUGH_SIZE}.xclbin build/insts.txt | ||
${powershell} ./$< -x build/final_${PASSTHROUGH_SIZE}.xclbin -i build/insts.txt -k MLIR_AIE | ||
|
||
clean: | ||
rm -rf build _build ${targetname}.exe |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
# | ||
# This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# (c) Copyright 2024 AMD Inc. | ||
|
||
import sys | ||
|
||
from aie.dialects.aie import * | ||
from aie.dialects.aiex import * | ||
from aie.dialects.scf import * | ||
from aie.extras.context import mlir_mod_ctx | ||
|
||
N = 1024 | ||
|
||
if len(sys.argv) == 2: | ||
N = int(sys.argv[1]) | ||
|
||
lineWidthInBytes = N // 4 # chop input in 4 sub-tensors | ||
lineWidthInInt32s = lineWidthInBytes // 4 | ||
|
||
enableTrace = False | ||
traceSizeInBytes = 8192 | ||
traceSizeInInt32s = traceSizeInBytes // 4 | ||
|
||
|
||
def passthroughKernel(): | ||
with mlir_mod_ctx() as ctx: | ||
|
||
@device(AIEDevice.ipu) | ||
def device_body(): | ||
# define types | ||
memRef_ty = T.memref(lineWidthInBytes, T.ui8()) | ||
|
||
# AIE Core Function declarations | ||
passThroughLine = external_func( | ||
"passThroughLine", inputs=[memRef_ty, memRef_ty, T.i32()] | ||
) | ||
|
||
# Tile declarations | ||
ShimTile = tile(0, 0) | ||
ComputeTile2 = tile(0, 2) | ||
|
||
if enableTrace: | ||
flow(ComputeTile2, "Trace", 0, ShimTile, "DMA", 1) | ||
|
||
# AIE-array data movement with object fifos | ||
of_in = object_fifo("in", ShimTile, ComputeTile2, 2, memRef_ty) | ||
of_out = object_fifo("out", ComputeTile2, ShimTile, 2, memRef_ty) | ||
|
||
# Set up compute tiles | ||
|
||
# Compute tile 2 | ||
@core(ComputeTile2, "passThrough.cc.o") | ||
def core_body(): | ||
for _ in for_(sys.maxsize): | ||
elemOut = of_out.acquire(ObjectFifoPort.Produce, 1) | ||
elemIn = of_in.acquire(ObjectFifoPort.Consume, 1) | ||
call(passThroughLine, [elemIn, elemOut, lineWidthInBytes]) | ||
of_in.release(ObjectFifoPort.Consume, 1) | ||
of_out.release(ObjectFifoPort.Produce, 1) | ||
yield_([]) | ||
|
||
# print(ctx.module.operation.verify()) | ||
|
||
tensorSize = N | ||
tensorSizeInInt32s = tensorSize // 4 | ||
tensor_ty = T.memref(lineWidthInInt32s, T.i32()) | ||
|
||
@FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty) | ||
def sequence(inTensor, outTensor, notUsed): | ||
if enableTrace: | ||
# Trace output | ||
|
||
# Trace_Event0, Trace_Event1: Select which events to trace. | ||
# Note that the event buffers only appear to be transferred to DDR in | ||
# bursts of 256 bytes. If less than 256 bytes are written, you may not | ||
# see trace output, or only see it on the next iteration of your | ||
# kernel invocation, as the buffer gets filled up. Note that, even | ||
# though events are encoded as 4 byte words, it may take more than 64 | ||
# events to fill the buffer to 256 bytes and cause a flush, since | ||
# multiple repeating events can be 'compressed' by the trace mechanism. | ||
# In order to always generate sufficient events, we add the "assert | ||
# TRUE" event to one slot, which fires every cycle, and thus fills our | ||
# buffer quickly. | ||
|
||
# Some events: | ||
# TRUE (0x01) | ||
# STREAM_STALL (0x18) | ||
# LOCK_STALL (0x1A) | ||
# EVENTS_CORE_INSTR_EVENT_1 (0x22) | ||
# EVENTS_CORE_INSTR_EVENT_0 (0x21) | ||
# INSTR_VECTOR (0x25) Core executes a vecotr MAC, ADD or compare instruction | ||
# INSTR_LOCK_ACQUIRE_REQ (0x2C) Core executes a lock acquire instruction | ||
# INSTR_LOCK_RELEASE_REQ (0x2D) Core executes a lock release instruction | ||
# EVENTS_CORE_PORT_RUNNING_1 (0x4F) | ||
# EVENTS_CORE_PORT_RUNNING_0 (0x4B) | ||
|
||
# Trace_Event0 (4 slots) | ||
IpuWrite32(0, 2, 0x340E0, 0x4B222125) | ||
# Trace_Event1 (4 slots) | ||
IpuWrite32(0, 2, 0x340E4, 0x2D2C1A4F) | ||
|
||
# Event slots as configured above: | ||
# 0: Kernel executes vector instruction | ||
# 1: Event 0 -- Kernel starts | ||
# 2: Event 1 -- Kernel done | ||
# 3: Port_Running_0 | ||
# 4: Port_Running_1 | ||
# 5: Lock Stall | ||
# 6: Lock Acquire Instr | ||
# 7: Lock Release Instr | ||
|
||
# Stream_Switch_Event_Port_Selection_0 | ||
# This is necessary to capture the Port_Running_0 and Port_Running_1 events | ||
IpuWrite32(0, 2, 0x3FF00, 0x121) | ||
|
||
# Trace_Control0: Define trace start and stop triggers. Set start event TRUE. | ||
IpuWrite32(0, 2, 0x340D0, 0x10000) | ||
|
||
# Start trace copy out. | ||
IpuWriteBdShimTile( | ||
bd_id=3, | ||
buffer_length=traceSizeInBytes, | ||
buffer_offset=tensorSize, | ||
enable_packet=0, | ||
out_of_order_id=0, | ||
packet_id=0, | ||
packet_type=0, | ||
column=0, | ||
column_num=1, | ||
d0_stride=0, | ||
d0_wrap=0, | ||
d1_stride=0, | ||
d1_wrap=0, | ||
d2_stride=0, | ||
ddr_id=2, | ||
iteration_current=0, | ||
iteration_stride=0, | ||
iteration_wrap=0, | ||
lock_acq_enable=0, | ||
lock_acq_id=0, | ||
lock_acq_val=0, | ||
lock_rel_id=0, | ||
lock_rel_val=0, | ||
next_bd=0, | ||
use_next_bd=0, | ||
valid_bd=1, | ||
) | ||
IpuWrite32(0, 0, 0x1D20C, 0x3) | ||
|
||
ipu_dma_memcpy_nd( | ||
metadata="in", | ||
bd_id=0, | ||
mem=inTensor, | ||
sizes=[1, 1, 1, tensorSizeInInt32s], | ||
) | ||
ipu_dma_memcpy_nd( | ||
metadata="out", | ||
bd_id=1, | ||
mem=outTensor, | ||
sizes=[1, 1, 1, tensorSizeInInt32s], | ||
) | ||
ipu_sync(column=0, row=0, direction=0, channel=0) | ||
|
||
print(ctx.module) | ||
|
||
|
||
passthroughKernel() | ||
denolf marked this conversation as resolved.
Show resolved
Hide resolved
denolf marked this conversation as resolved.
Show resolved
Hide resolved
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
// (c) Copyright 2023 Advanced Micro Devices, Inc. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// REQUIRES: ryzen_ai, chess | ||
// | ||
// RUN: xchesscc_wrapper aie2 -I %aietools/include -DBIT_WIDTH=8 -c %S/../../../aie_kernels/aie_generic/passThrough.cc -o passThrough.cc.o | ||
// RUN: %python %S/aie2.py 4096 | aie-opt -cse -canonicalize -o ./aie.mlir | ||
// RUN: %python aiecc.py --xbridge --aie-generate-cdo --aie-generate-ipu --no-compile-host --xclbin-name=aie.xclbin --ipu-insts-name=insts.txt ./aie.mlir | ||
// RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall -DPASSTHROUGH_SIZE=4096 -I%S/../../utils %S/../../utils/xrtUtils.cpp %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem | ||
// RUN: %run_on_ipu ./test.exe -x aie.xclbin -k MLIR_AIE -i insts.txt | FileCheck %s | ||
// CHECK: PASS! | ||
|
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[clang-format] reported by reviewdog 🐶