Skip to content

Commit

Permalink
Update programming examples from tutorial branch (#1217)
Browse files Browse the repository at this point in the history
Co-authored-by: Philip James-Roxby <phil.jamesroxby@gmail.com>
Co-authored-by: pjr <pjr@xilinx.com>
Co-authored-by: Joseph Melber <jgmelber@gmail.com>
  • Loading branch information
4 people authored Apr 10, 2024
1 parent 41e89be commit c835974
Show file tree
Hide file tree
Showing 17 changed files with 1,643 additions and 96 deletions.
61 changes: 0 additions & 61 deletions programming_examples/basic/eltwise_add/add.cc

This file was deleted.

69 changes: 69 additions & 0 deletions programming_examples/basic/eltwise_mul/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2023 Advanced Micro Devices, Inc.

# parameters
# -DBOOST_ROOT: Path to Boost install
# -DXRT_INC_DIR: Full path to src/runtime_src/core/include in XRT cloned repo
# -DXRT_LIB_DIR: Path to xrt_coreutil.lib
# -DTARGET_NAME: Target name to be built

# cmake needs this line
cmake_minimum_required(VERSION 3.1)

find_program(WSL NAMES powershell.exe)

if (NOT WSL)
set(BOOST_ROOT /usr/include/boost CACHE STRING "Path to Boost install")
set(XRT_INC_DIR /opt/xilinx/xrt/include CACHE STRING "Path to XRT cloned repo")
set(XRT_LIB_DIR /opt/xilinx/xrt/lib CACHE STRING "Path to xrt_coreutil.lib")
else()
set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
set(XRT_LIB_DIR C:/Technical/xrtIPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
endif()

set(TARGET_NAME test CACHE STRING "Target to be built")

SET (ProjectName ${TARGET_NAME})
SET (currentTarget ${TARGET_NAME})

if ( WSL )
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR})
endif ()

project(${ProjectName})

# Find packages
find_package(Boost REQUIRED)

add_executable(${currentTarget}
test.cpp
)

target_compile_definitions(${currentTarget} PUBLIC DISABLE_ABI_CHECK=1)

target_include_directories (${currentTarget} PUBLIC
${XRT_INC_DIR}
${Boost_INCLUDE_DIRS}
../../../programming_examples/utils
)

target_link_directories(${currentTarget} PUBLIC
${XRT_LIB_DIR}
${Boost_LIBRARY_DIRS}
)

if (NOT WSL)
target_link_libraries(${currentTarget} PUBLIC
xrt_coreutil
boost_program_options
boost_filesystem
)
else()
target_link_libraries(${currentTarget} PUBLIC
xrt_coreutil
)
endif()
47 changes: 47 additions & 0 deletions programming_examples/basic/eltwise_mul/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
##===- Makefile -----------------------------------------------------------===##
#
# This file licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
##===----------------------------------------------------------------------===##

include ../../../programming_examples/basic/makefile-common

all: build/final.xclbin

targetname = myEltwiseMul

build/mul.o:
mkdir -p ${@D}
cd ${@D} && xchesscc_wrapper ${CHESSCCWRAP2_FLAGS} -I. -c ${REPO_ROOT}/aie_kernels/aie2/mul.cc -o ${@F}

build/aie.mlir: aie2.py
mkdir -p ${@D}
python3 $< > $@

build/final.xclbin: build/aie.mlir build/mul.o
mkdir -p ${@D}
cd ${@D} && aiecc.py --aie-generate-cdo --aie-generate-ipu --no-compile-host \
--xclbin-name=${@F} --ipu-insts-name=insts.txt ${<F}

${targetname}.exe: test.cpp
rm -rf _build
mkdir -p _build
# cd _build && ${powershell} cmake .. -DTARGET_NAME=${targetname}
cd _build && ${powershell} cmake -E env CXXFLAGS="-std=c++23 -ggdb" cmake .. -D CMAKE_C_COMPILER=gcc-13 -D CMAKE_CXX_COMPILER=g++-13 -DTARGET_NAME=${targetname} -Dsubdir=${subdir}
cd _build && ${powershell} cmake --build . --config Release
ifeq "${powershell}" "powershell.exe"
cp _build/${targetname}.exe $@
else
cp _build/${targetname} $@
endif

run: ${targetname}.exe build/final.xclbin build/insts.txt
${powershell} ./$< -x build/final.xclbin -i build/insts.txt -k MLIR_AIE

run_py: build/final.xclbin build/insts.txt
${powershell} python3 test.py -x build/final.xclbin -i build/insts.txt -k MLIR_AIE

clean:
rm -rf build _build ${targetname}.exe
20 changes: 20 additions & 0 deletions programming_examples/basic/eltwise_mul/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<!---//===- README.md --------------------------*- Markdown -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Copyright (C) 2022, Advanced Micro Devices, Inc.
//
//===----------------------------------------------------------------------===//-->

# <ins>Section 3 - My First Program</ins>

In this section, we'll put together what you learend in [section-1](../section-1) for defining a basic strucutral design in python and combine it with the data movement part from [section-2](../section-2) to build our first program. We will then run a simulation on this program as well as run this design on hardware (Ryzen AI).

* Introduce example of first simple program (Bias Add)
* Walk through syntax of aie2.py, test.cpp, test_utils.h, maybe CMakeLists.txt and Makefile/ makefile-common as well
* need to remove trace parts from test.cpp for now and move it to Section-4

* Illustrate how built-in simulation of single core design
* Illustrate how to run designs on Ryzen AI enabled hardware
151 changes: 151 additions & 0 deletions programming_examples/basic/eltwise_mul/aie2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2023 AMD Inc.

import sys

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.dialects.scf import *
from aie.extras.context import mlir_mod_ctx


def my_eltwise_mul():

word_size_in = 2
N = 65536
N_in_bytes = N * word_size_in

A_sz_in_i32s = N_in_bytes // 4
B_sz_in_i32s = N_in_bytes // 4
C_sz_in_i32s = N_in_bytes // 4

# Tile sizes
n = 1024
N_div_n = N // n

n_cores = 2
tiles = N_div_n // n_cores
buffer_depth = 2

with mlir_mod_ctx() as ctx:

@device(AIEDevice.ipu)
def device_body():
memRef_ty = T.memref(n, T.bf16())

# Type used in the tile memory
memRef_A_ty = T.memref(n, T.bf16())
memRef_B_ty = T.memref(n, T.bf16())
memRef_C_ty = T.memref(n, T.bf16())

# Type used in the memory tile which aggregates across the 4 cores
memRef_A_MT_ty = T.memref(n * n_cores, T.bf16())
memRef_B_MT_ty = T.memref(n * n_cores, T.bf16())
memRef_C_MT_ty = T.memref(n * n_cores, T.bf16())

# AIE Core Function declarations

eltwise_mul_bf16_scalar = external_func(
"eltwise_mul_bf16_scalar", inputs=[memRef_ty, memRef_ty, memRef_ty]
)
eltwise_mul_bf16_vector = external_func(
"eltwise_mul_bf16_vector", inputs=[memRef_ty, memRef_ty, memRef_ty]
)
# elwise_int32 = external_func("scale_int32", inputs=[memRef_ty, memRef_ty])

# Tile declarations
ShimTile = tile(0, 0)

MemTile = tile(0, 1)
cores = [tile(0, 2 + i) for i in range(n_cores)]

inA_fifo_names = [f"memA{i}" for i in range(n_cores)]
inB_fifo_names = [f"memB{i}" for i in range(n_cores)]
outC_fifo_names = [f"memC{i}" for i in range(n_cores)]

inA_fifos = {}
inB_fifos = {}
outC_fifos = {}

# AIE-array data movement with object fifos
# Input A
inA = object_fifo("inA", ShimTile, MemTile, buffer_depth, memRef_A_MT_ty)
for i in range(n_cores):
inA_fifos[inA_fifo_names[i]] = object_fifo(
inA_fifo_names[i], MemTile, cores[i], buffer_depth, memRef_A_ty
)
object_fifo_link(inA, inA_fifo_names)

# Input B
inB = object_fifo("inB", ShimTile, MemTile, buffer_depth, memRef_B_MT_ty)
for i in range(n_cores):
inB_fifos[inB_fifo_names[i]] = object_fifo(
inB_fifo_names[i], MemTile, cores[i], buffer_depth, memRef_B_ty
)
object_fifo_link(inB, inB_fifo_names[0:n_cores])

# Output C
for i in range(n_cores):
outC_fifos[outC_fifo_names[i]] = object_fifo(
outC_fifo_names[i], cores[i], MemTile, buffer_depth, memRef_C_ty
)
outC = object_fifo("outC", MemTile, ShimTile, buffer_depth, memRef_C_MT_ty)
object_fifo_link(outC_fifo_names[0:n_cores], outC)

# Set up compute tiles
for i in range(n_cores):
# Compute tile i
@core(cores[i], "mul.o")
def core_body():
for _ in for_(0xFFFFFFFF):
for _ in for_(tiles):
elem_out = outC_fifos[outC_fifo_names[i]].acquire(
ObjectFifoPort.Produce, 1
)
elem_in_a = inA_fifos[inA_fifo_names[i]].acquire(
ObjectFifoPort.Consume, 1
)
elem_in_b = inB_fifos[inB_fifo_names[i]].acquire(
ObjectFifoPort.Consume, 1
)

call(
eltwise_mul_bf16_vector,
[elem_in_a, elem_in_b, elem_out],
)
inA_fifos[inA_fifo_names[i]].release(
ObjectFifoPort.Consume, 1
)
inB_fifos[inB_fifo_names[i]].release(
ObjectFifoPort.Consume, 1
)
outC_fifos[outC_fifo_names[i]].release(
ObjectFifoPort.Produce, 1
)
yield_([])
yield_([])

# To/from AIE-array data movement
tensor_ty = T.memref(N, T.i32())

@FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty)
def sequence(A, B, C):
ipu_dma_memcpy_nd(
metadata="outC", bd_id=0, mem=C, sizes=[1, 1, 1, C_sz_in_i32s]
)
ipu_dma_memcpy_nd(
metadata="inA", bd_id=1, mem=A, sizes=[1, 1, 1, A_sz_in_i32s]
)
ipu_dma_memcpy_nd(
metadata="inB", bd_id=2, mem=B, sizes=[1, 1, 1, B_sz_in_i32s]
)
ipu_sync(column=0, row=0, direction=0, channel=0)

print(ctx.module)


my_eltwise_mul()
Loading

0 comments on commit c835974

Please sign in to comment.