diff --git a/include/aie-c/Translation.h b/include/aie-c/Translation.h index 37f075729a..e326dd7917 100644 --- a/include/aie-c/Translation.h +++ b/include/aie-c/Translation.h @@ -34,10 +34,6 @@ MLIR_CAPI_EXPORTED MlirLogicalResult aieTranslateToCDODirect(MlirOperation moduleOp, MlirStringRef workDirPath, bool bigEndian, bool emitUnified, bool cdoDebug, bool aieSim, bool xaieDebug, bool enableCores); - -MLIR_CAPI_EXPORTED MlirLogicalResult aieTranslateToTxn( - MlirOperation moduleOp, MlirStringRef outputFile, MlirStringRef workDirPath, - bool aieSim, bool xaieDebug, bool enableCores); MLIR_CAPI_EXPORTED MlirLogicalResult aieTranslateToCtrlpkt( MlirOperation moduleOp, MlirStringRef outputFile, MlirStringRef workDirPath, bool aieSim, bool xaieDebug, bool enableCores); diff --git a/include/aie/Conversion/AIEToConfiguration/AIEToConfiguration.h b/include/aie/Conversion/AIEToConfiguration/AIEToConfiguration.h new file mode 100644 index 0000000000..7f937b933f --- /dev/null +++ b/include/aie/Conversion/AIEToConfiguration/AIEToConfiguration.h @@ -0,0 +1,32 @@ +//===- AIEToConfiguration.h -------------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. +// +//===----------------------------------------------------------------------===// + +#ifndef AIE_CONVERSION_AIETOCONFIGURATION_AIETOCONFIGURATION_H +#define AIE_CONVERSION_AIETOCONFIGURATION_AIETOCONFIGURATION_H + +#include "aie/Dialect/AIE/IR/AIEDialect.h" +#include "aie/Dialect/AIEX/IR/AIEXDialect.h" + +#include "mlir/Pass/Pass.h" + +#include + +namespace xilinx::AIE { + +std::unique_ptr> +createConvertAIEToTransactionPass(); + +std::optional +convertTransactionBinaryToMLIR(mlir::MLIRContext *ctx, + std::vector &binary); + +} // namespace xilinx::AIE + +#endif // AIE_CONVERSION_AIETOCONFIGURATION_AIETOCONFIGURATION_H diff --git a/include/aie/Conversion/CMakeLists.txt b/include/aie/Conversion/CMakeLists.txt index 40f9c3ec7e..6579f14ff1 100644 --- a/include/aie/Conversion/CMakeLists.txt +++ b/include/aie/Conversion/CMakeLists.txt @@ -1,3 +1,8 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. set(LLVM_TARGET_DEFINITIONS Passes.td) mlir_tablegen(Passes.h.inc -gen-pass-decls -name Conversion) mlir_tablegen(Passes.capi.h.inc -gen-pass-capi-header -prefix Conversion) diff --git a/include/aie/Conversion/Passes.h b/include/aie/Conversion/Passes.h index 53797352cf..366e85793a 100644 --- a/include/aie/Conversion/Passes.h +++ b/include/aie/Conversion/Passes.h @@ -11,6 +11,7 @@ #ifndef AIE_CONVERSION_PASSES_H #define AIE_CONVERSION_PASSES_H +#include "aie/Conversion/AIEToConfiguration/AIEToConfiguration.h" #include "aie/Conversion/AIEVecToLLVM/AIEVecToLLVM.h" #include "aie/Conversion/PassesEnums.h.inc" diff --git a/include/aie/Conversion/Passes.td b/include/aie/Conversion/Passes.td index 4dc6247b30..1ef348b8d7 100644 --- a/include/aie/Conversion/Passes.td +++ b/include/aie/Conversion/Passes.td @@ -51,4 +51,28 @@ def ConvertAIEVecToLLVM : Pass<"convert-aievec-to-llvm", "mlir::ModuleOp"> { ]; } +//===----------------------------------------------------------------------===// +// AIEToTransaction +//===----------------------------------------------------------------------===// + +def ConvertAIEToTransaction : Pass<"convert-aie-to-transaction", + "xilinx::AIE::DeviceOp"> { + let summary = "Convert AIE dialect to npu transaction operations"; + let description = [{ + This pass converts aie.device operations to a sequence of transaction binary + operations (writes, masked writes, and block writes) that can be used to + to configure the npu device. The transaction operations are emitted as + `npu.write32`, `npu.maskwrite32`, and `npu.blockwrite` operations. A new + `aiex.runtime_sequence` operation is inserted into the `aie.device` to + contain the new transaction operations sequence. + }]; + let constructor = "xilinx::AIE::createConvertAIEToTransactionPass()"; + let dependentDialects = ["xilinx::AIE::AIEDialect", + "xilinx::AIEX::AIEXDialect"]; + let options = [ + Option<"clElfDir", "elf-dir", "std::string", /*default=*/"", + "Where to find ELF files">, + ]; +} + #endif // AIE_CONVERSION_PASSES diff --git a/include/aie/Dialect/AIEX/Transforms/AIEXPasses.h b/include/aie/Dialect/AIEX/Transforms/AIEXPasses.h index 14d7e87a5a..489b019999 100644 --- a/include/aie/Dialect/AIEX/Transforms/AIEXPasses.h +++ b/include/aie/Dialect/AIEX/Transforms/AIEXPasses.h @@ -1,4 +1,4 @@ -//===- AIEPasses.h ----------------------------------------------*- C++ -*-===// +//===- AIEXPasses.h ---------------------------------------------*- C++ -*-===// // // This file is licensed under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/include/aie/Targets/AIETargets.h b/include/aie/Targets/AIETargets.h index 4dd1581591..9a6473ddb2 100644 --- a/include/aie/Targets/AIETargets.h +++ b/include/aie/Targets/AIETargets.h @@ -35,13 +35,17 @@ mlir::LogicalResult AIETranslateShimSolution(mlir::ModuleOp module, mlir::LogicalResult AIETranslateGraphXPE(mlir::ModuleOp module, llvm::raw_ostream &); mlir::LogicalResult AIETranslateToNPU(mlir::ModuleOp module, - llvm::raw_ostream &output); -mlir::LogicalResult AIETranslateToNPU(mlir::ModuleOp, std::vector &); + llvm::raw_ostream &output, + llvm::StringRef sequenceName = ""); +mlir::LogicalResult AIETranslateToNPU(mlir::ModuleOp, std::vector &, + llvm::StringRef sequenceName = ""); mlir::LogicalResult AIETranslateControlPacketsToUI32Vec(mlir::ModuleOp module, - llvm::raw_ostream &output); + llvm::raw_ostream &output, + llvm::StringRef sequenceName = ""); mlir::LogicalResult -AIETranslateControlPacketsToUI32Vec(mlir::ModuleOp, std::vector &); +AIETranslateControlPacketsToUI32Vec(mlir::ModuleOp, std::vector &, + llvm::StringRef sequenceName = ""); mlir::LogicalResult AIETranslateToLdScript(mlir::ModuleOp module, llvm::raw_ostream &output, int tileCol, int tileRow); @@ -60,11 +64,6 @@ AIETranslateToCDODirect(mlir::ModuleOp m, llvm::StringRef workDirPath, bool cdoDebug = false, bool aieSim = false, bool xaieDebug = false, bool enableCores = true); mlir::LogicalResult -AIETranslateToTxn(mlir::ModuleOp m, llvm::raw_ostream &output, - llvm::StringRef workDirPath, bool outputBinary = false, - bool aieSim = false, bool xaieDebug = false, - bool enableCores = true); -mlir::LogicalResult AIETranslateToControlPackets(mlir::ModuleOp m, llvm::raw_ostream &output, llvm::StringRef workDirPath, bool outputBinary = false, bool aieSim = false, @@ -80,9 +79,6 @@ mlir::LogicalResult AIETranslateToAirbin(mlir::ModuleOp module, mlir::LogicalResult AIETranslateToTargetArch(mlir::ModuleOp module, llvm::raw_ostream &output); -std::optional -AIETranslateBinaryToTxn(mlir::MLIRContext *ctx, std::vector &binary); - std::optional AIETranslateBinaryToCtrlpkt(mlir::MLIRContext *ctx, std::vector &binary); diff --git a/lib/CAPI/CMakeLists.txt b/lib/CAPI/CMakeLists.txt index 342ea0ac19..2784395eb8 100644 --- a/lib/CAPI/CMakeLists.txt +++ b/lib/CAPI/CMakeLists.txt @@ -15,6 +15,7 @@ add_mlir_public_c_api_library(AIECAPI AIEX AIEXTransforms AIEXUtils + MLIRAIEToConfiguration MLIRAIEVecDialect MLIRAIEVecAIE1Dialect MLIRAIEVecToLLVM diff --git a/lib/CAPI/Translation.cpp b/lib/CAPI/Translation.cpp index 264bebf478..637eed4a2c 100644 --- a/lib/CAPI/Translation.cpp +++ b/lib/CAPI/Translation.cpp @@ -10,6 +10,7 @@ #include "aie-c/Translation.h" +#include "aie/Conversion/AIEToConfiguration/AIEToConfiguration.h" #include "aie/Dialect/AIE/IR/AIETargetModel.h" #include "aie/Targets/AIERT.h" #include "aie/Targets/AIETargets.h" @@ -83,39 +84,6 @@ MlirLogicalResult aieTranslateToCDODirect(MlirOperation moduleOp, return wrap(status); } -MlirLogicalResult aieTranslateToTxn(MlirOperation moduleOp, - MlirStringRef outputFile, - MlirStringRef workDirPath, bool aieSim, - bool xaieDebug, bool enableCores) { - ModuleOp mod = llvm::cast(unwrap(moduleOp)); - bool outputBinary = false; - - std::string errorMessage; - auto output = openOutputFile(StringRef(outputFile.data, outputFile.length), - &errorMessage); - if (!output) { - llvm::errs() << errorMessage << "\n"; - return wrap(failure()); - } - - auto status = AIETranslateToTxn( - mod, output->os(), llvm::StringRef(workDirPath.data, workDirPath.length), - outputBinary, aieSim, xaieDebug, enableCores); - - std::vector diagnostics; - ScopedDiagnosticHandler handler(mod.getContext(), [&](Diagnostic &d) { - llvm::raw_string_ostream(diagnostics.emplace_back()) - << d.getLocation() << ": " << d; - }); - - if (failed(status)) - for (const auto &diagnostic : diagnostics) - std::cerr << diagnostic << "\n"; - else - output->keep(); - return wrap(status); -} - MlirLogicalResult aieTranslateToCtrlpkt(MlirOperation moduleOp, MlirStringRef outputFile, MlirStringRef workDirPath, bool aieSim, @@ -151,7 +119,7 @@ MlirLogicalResult aieTranslateToCtrlpkt(MlirOperation moduleOp, MlirOperation aieTranslateBinaryToTxn(MlirContext ctx, MlirStringRef binary) { std::vector binaryData(binary.data, binary.data + binary.length); - auto mod = AIETranslateBinaryToTxn(unwrap(ctx), binaryData); + auto mod = convertTransactionBinaryToMLIR(unwrap(ctx), binaryData); if (!mod) return wrap(ModuleOp().getOperation()); return wrap(mod->getOperation()); diff --git a/lib/Conversion/AIEToConfiguration/AIEToConfiguration.cpp b/lib/Conversion/AIEToConfiguration/AIEToConfiguration.cpp new file mode 100644 index 0000000000..4b8f32419d --- /dev/null +++ b/lib/Conversion/AIEToConfiguration/AIEToConfiguration.cpp @@ -0,0 +1,357 @@ +//===- AIEToConfiguration.h -------------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. +// +//===----------------------------------------------------------------------===// + +#include "../PassDetail.h" + +#include "aie/Conversion/AIEToConfiguration/AIEToConfiguration.h" +#include "aie/Targets/AIERT.h" + +#include "llvm/Support/Debug.h" + +#include + +#define DEBUG_TYPE "aie-convert-to-config" + +using namespace mlir; +using namespace xilinx; +using namespace xilinx::AIE; + +namespace { + +// An TransactionBinaryOperation encapulates an aie-rt TnxCmd struct +struct TransactionBinaryOperation { + struct XAie_TxnCmd cmd; + TransactionBinaryOperation(XAie_TxnOpcode opc, uint32_t mask, uint64_t addr, + uint32_t value, const uint8_t *data, + uint32_t size) { + cmd.Opcode = opc; + cmd.Mask = mask; + cmd.RegOff = addr; + cmd.Value = value; + cmd.DataPtr = reinterpret_cast(data); + cmd.Size = size; + } +}; +} // namespace + +// Parse a TXN binary blob. On success return the number of columns from the +// header and a vector of parsed operations. On failure return std::nullopt. +static std::optional +parseTransactionBinary(const std::vector &data, + std::vector &ops) { + + uint32_t major = data[0]; + uint32_t minor = data[1]; + uint32_t num_cols = data[4]; + + uint32_t num_ops, txn_size; + std::memcpy(&num_ops, &data[8], 4); + std::memcpy(&txn_size, &data[12], 4); + + LLVM_DEBUG(llvm::dbgs() << "Major: " << major << "\n"); + LLVM_DEBUG(llvm::dbgs() << "Minor: " << minor << "\n"); + LLVM_DEBUG(llvm::dbgs() << "DevGen: " << data[2] << "\n"); + LLVM_DEBUG(llvm::dbgs() << "NumRows: " << data[3] << "\n"); + LLVM_DEBUG(llvm::dbgs() << "NumCols: " << num_cols << "\n"); + LLVM_DEBUG(llvm::dbgs() << "NumMemTileRows: " << data[5] << "\n"); + LLVM_DEBUG(llvm::dbgs() << "NumOps: " << num_ops << "\n"); + LLVM_DEBUG(llvm::dbgs() << "TxnSize: " << txn_size << " bytes\n"); + + size_t i = 16; + + // Convert opcode from uint8 to enum + auto convertOpcode = [](uint8_t opc) { + switch (opc) { + case 0: + return XAie_TxnOpcode::XAIE_IO_WRITE; + case 1: + return XAie_TxnOpcode::XAIE_IO_BLOCKWRITE; + case 3: + return XAie_TxnOpcode::XAIE_IO_MASKWRITE; + default: + llvm::errs() << "Unhandled opcode: " << std::to_string(opc) << "\n"; + return XAie_TxnOpcode::XAIE_IO_CUSTOM_OP_MAX; + } + }; + + // Parse the binary blob. There are two versions supported, 0.1 and 1.0. + // For both versions, build a list of TransactionBinaryOperation objects + // representing the parsed operations. + if (major == 0 && minor == 1) { + while (i < data.size()) { + + XAie_TxnOpcode opc = convertOpcode(data[i]); + LLVM_DEBUG(llvm::dbgs() << "opcode: " + std::to_string(opc) + "\n"); + + uint64_t addr = 0; + uint32_t value = 0; + uint32_t size = 0; + uint32_t mask = 0; + const uint8_t *data_ptr = nullptr; + + if (opc == XAie_TxnOpcode::XAIE_IO_WRITE) { + LLVM_DEBUG(llvm::dbgs() << "opcode: WRITE (0x00)\n"); + uint32_t addr0, addr1; + std::memcpy(&addr0, &data[i + 8], 4); + std::memcpy(&addr1, &data[i + 12], 4); + std::memcpy(&value, &data[i + 16], 4); + std::memcpy(&size, &data[i + 20], 4); + addr = static_cast(addr1) << 32 | addr0; + i += size; + } else if (opc == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) { + LLVM_DEBUG(llvm::dbgs() << "opcode: BLOCKWRITE (0x01)\n"); + std::memcpy(&addr, &data[i + 8], 4); + std::memcpy(&size, &data[i + 12], 4); + data_ptr = data.data() + i + 16; + i += size; + size = size - 16; + } else if (opc == XAie_TxnOpcode::XAIE_IO_MASKWRITE) { + LLVM_DEBUG(llvm::dbgs() << "opcode: MASKWRITE (0x03)\n"); + uint32_t addr0, addr1; + std::memcpy(&addr0, &data[i + 8], 4); + std::memcpy(&addr1, &data[i + 12], 4); + std::memcpy(&value, &data[i + 16], 4); + std::memcpy(&mask, &data[i + 20], 4); + std::memcpy(&size, &data[i + 24], 4); + addr = static_cast(addr1) << 32 | addr0; + i += size; + } else { + llvm::errs() << "Unhandled opcode: " << std::to_string(opc) << "\n"; + return std::nullopt; + } + ops.emplace_back(opc, mask, addr, value, data_ptr, size); + LLVM_DEBUG(llvm::dbgs() << "addr: " << addr << "\n"); + LLVM_DEBUG(llvm::dbgs() << "value: " << value << "\n"); + LLVM_DEBUG(llvm::dbgs() << "size: " << size << "\n"); + LLVM_DEBUG(llvm::dbgs() << "mask: " << mask << "\n"); + LLVM_DEBUG(llvm::dbgs() + << "data: " << reinterpret_cast(data_ptr) << "\n"); + } + } else if (major == 1 && minor == 0) { + while (i < data.size()) { + + XAie_TxnOpcode opc = convertOpcode(data[i]); + LLVM_DEBUG(llvm::dbgs() << "opcode: " + std::to_string(opc) + "\n"); + + uint64_t addr = 0; + uint32_t value = 0; + uint32_t size = 0; + uint32_t mask = 0; + const uint8_t *data_ptr = nullptr; + + if (opc == XAie_TxnOpcode::XAIE_IO_WRITE) { + LLVM_DEBUG(llvm::dbgs() << "opcode: WRITE (0x00)\n"); + std::memcpy(&addr, &data[i + 4], 4); + std::memcpy(&value, &data[i + 8], 4); + i += 12; + } else if (opc == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) { + LLVM_DEBUG(llvm::dbgs() << "opcode: BLOCKWRITE (0x01)\n"); + std::memcpy(&addr, &data[i + 4], 4); + std::memcpy(&size, &data[i + 8], 4); + data_ptr = data.data() + i + 12; + i += size; + size = size - 12; + } else if (opc == XAie_TxnOpcode::XAIE_IO_MASKWRITE) { + LLVM_DEBUG(llvm::dbgs() << "opcode: MASKWRITE (0x03)\n"); + std::memcpy(&addr, &data[i + 4], 4); + std::memcpy(&value, &data[i + 8], 4); + std::memcpy(&mask, &data[i + 12], 4); + i += 16; + } else { + llvm::errs() << "Unhandled opcode: " << std::to_string(opc) << "\n"; + return std::nullopt; + } + LLVM_DEBUG(llvm::dbgs() << "addr: " << addr << "\n"); + LLVM_DEBUG(llvm::dbgs() << "value: " << value << "\n"); + LLVM_DEBUG(llvm::dbgs() << "size: " << size << "\n"); + LLVM_DEBUG(llvm::dbgs() << "mask: " << mask << "\n"); + LLVM_DEBUG(llvm::dbgs() + << "data: " << reinterpret_cast(data_ptr) << "\n"); + ops.emplace_back(opc, mask, addr, value, data_ptr, size); + } + } else { + llvm::errs() << "Unsupported TXN binary version: " << major << "." << minor + << "\n"; + return std::nullopt; + } + + return num_cols; +} + +static LogicalResult generateTxn(AIERTControl &ctl, const StringRef workDirPath, + DeviceOp &targetOp, bool aieSim, + bool enableElfs, bool enableInit, + bool enableCores) { + if (enableElfs && !targetOp.getOps().empty() && + failed(ctl.addAieElfs(targetOp, workDirPath, aieSim))) + return failure(); + if (enableInit && failed(ctl.addInitConfig(targetOp))) + return failure(); + if (enableCores && !targetOp.getOps().empty() && + failed(ctl.addCoreEnable(targetOp))) + return failure(); + return success(); +} + +static LogicalResult convertTransactionOpsToMLIR( + OpBuilder builder, AIE::DeviceOp device, + std::vector &operations) { + + auto loc = builder.getUnknownLoc(); + + // for each blockwrite in the binary, create a GlobalOp with the data + std::vector global_data; + for (auto &op : operations) { + if (op.cmd.Opcode != XAIE_IO_BLOCKWRITE) { + global_data.push_back(nullptr); + continue; + } + uint32_t size = op.cmd.Size / 4; + const uint32_t *d = reinterpret_cast(op.cmd.DataPtr); + std::vector data32(d, d + size); + + int id = 0; + std::string name = "blockwrite_data"; + while (device.lookupSymbol(name)) + name = "blockwrite_data_" + std::to_string(id++); + + MemRefType memrefType = MemRefType::get({size}, builder.getI32Type()); + TensorType tensorType = RankedTensorType::get({size}, builder.getI32Type()); + auto global = builder.create( + loc, name, builder.getStringAttr("private"), memrefType, + DenseElementsAttr::get(tensorType, data32), true, nullptr); + global_data.push_back(global); + } + + // create aiex.runtime_sequence + int id = 0; + std::string seq_name = "configure"; + while (device.lookupSymbol(seq_name)) + seq_name = "configure" + std::to_string(id++); + StringAttr seq_sym_name = builder.getStringAttr(seq_name); + auto seq = builder.create(loc, seq_sym_name); + seq.getBody().push_back(new Block); + + // create the txn ops + builder.setInsertionPointToStart(&seq.getBody().front()); + for (auto p : llvm::zip(operations, global_data)) { + auto op = std::get<0>(p); + memref::GlobalOp payload = std::get<1>(p); + + if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_WRITE) { + builder.create(loc, op.cmd.RegOff, op.cmd.Value, + nullptr, nullptr, nullptr); + } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) { + auto memref = builder.create(loc, payload.getType(), + payload.getName()); + builder.create( + loc, builder.getUI32IntegerAttr(op.cmd.RegOff), memref.getResult(), + nullptr, nullptr, nullptr); + } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_MASKWRITE) { + builder.create(loc, op.cmd.RegOff, op.cmd.Value, + op.cmd.Mask, nullptr, nullptr, + nullptr); + } else { + llvm::errs() << "Unhandled txn opcode: " << op.cmd.Opcode << "\n"; + return failure(); + } + } + + return success(); +} + +// Convert a transaction binary to MLIR. On success return a new ModuleOp +std::optional +xilinx::AIE::convertTransactionBinaryToMLIR(mlir::MLIRContext *ctx, + std::vector &binary) { + + // parse the binary + std::vector operations; + auto c = parseTransactionBinary(binary, operations); + if (!c) { + llvm::errs() << "Failed to parse binary\n"; + return std::nullopt; + } + int columns = *c; + + auto loc = mlir::UnknownLoc::get(ctx); + + // create a new ModuleOp and set the insertion point + auto module = ModuleOp::create(loc); + OpBuilder builder(module.getBodyRegion()); + builder.setInsertionPointToStart(module.getBody()); + + // create aie.device + std::vector devices{AIEDevice::npu1_1col, AIEDevice::npu1_2col, + AIEDevice::npu1_3col, AIEDevice::npu1_4col, + AIEDevice::npu1}; + auto device = builder.create(loc, devices[columns - 1]); + device.getRegion().emplaceBlock(); + builder.setInsertionPointToStart(device.getBody()); + + if (failed(convertTransactionOpsToMLIR(builder, device, operations))) + return std::nullopt; + + return module; +} + +namespace { + +struct ConvertAIEToTransactionPass + : ConvertAIEToTransactionBase { + void runOnOperation() override { + + auto device = getOperation(); + + const BaseNPUTargetModel &targetModel = + (const BaseNPUTargetModel &)device.getTargetModel(); + + if (!targetModel.isNPU()) + return; + + bool aieSim = false; + bool xaieDebug = false; + + AIERTControl ctl(targetModel); + if (failed(ctl.setIOBackend(aieSim, xaieDebug))) + return signalPassFailure(); + + // start collecting transations + XAie_StartTransaction(&ctl.devInst, XAIE_TRANSACTION_DISABLE_AUTO_FLUSH); + + auto result = generateTxn(ctl, clElfDir, device, aieSim, true, true, true); + if (failed(result)) + return signalPassFailure(); + + // Export the transactions to a binary buffer + uint8_t *txn_ptr = XAie_ExportSerializedTransaction(&ctl.devInst, 0, 0); + XAie_TxnHeader *hdr = (XAie_TxnHeader *)txn_ptr; + std::vector txn_data(txn_ptr, txn_ptr + hdr->TxnSize); + + // parse the binary data + std::vector operations; + if (!parseTransactionBinary(txn_data, operations)) { + llvm::errs() << "Failed to parse binary\n"; + return signalPassFailure(); + } + + OpBuilder builder(device.getBodyRegion()); + + if (failed(convertTransactionOpsToMLIR(builder, device, operations))) + return signalPassFailure(); + } +}; + +} // end anonymous namespace + +std::unique_ptr> +xilinx::AIE::createConvertAIEToTransactionPass() { + return std::make_unique(); +} diff --git a/lib/Conversion/AIEToConfiguration/CMakeLists.txt b/lib/Conversion/AIEToConfiguration/CMakeLists.txt new file mode 100644 index 0000000000..f08d57a2cf --- /dev/null +++ b/lib/Conversion/AIEToConfiguration/CMakeLists.txt @@ -0,0 +1,20 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. +add_mlir_conversion_library(MLIRAIEToConfiguration + AIEToConfiguration.cpp + + ADDITIONAL_HEADER_DIRS + $(CMAKE_CURRENT_SRC_DIR)/../../../../include/aie/Conversion/AIEToConfiguration + + DEPENDS + MLIRAIEConversionPassIncGen + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + AIERT + ) diff --git a/lib/Conversion/AIEVecToLLVM/CMakeLists.txt b/lib/Conversion/AIEVecToLLVM/CMakeLists.txt index acba6d9780..268b06782d 100644 --- a/lib/Conversion/AIEVecToLLVM/CMakeLists.txt +++ b/lib/Conversion/AIEVecToLLVM/CMakeLists.txt @@ -1,3 +1,8 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. add_mlir_conversion_library(MLIRAIEVecToLLVM AIEVecToLLVM.cpp diff --git a/lib/Conversion/CMakeLists.txt b/lib/Conversion/CMakeLists.txt index 8c78b250dc..660e9c6669 100644 --- a/lib/Conversion/CMakeLists.txt +++ b/lib/Conversion/CMakeLists.txt @@ -1 +1,7 @@ -add_subdirectory(AIEVecToLLVM) +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. +add_subdirectory(AIEToConfiguration) +add_subdirectory(AIEVecToLLVM) \ No newline at end of file diff --git a/lib/Conversion/PassDetail.h b/lib/Conversion/PassDetail.h index 537986193e..dc31948472 100644 --- a/lib/Conversion/PassDetail.h +++ b/lib/Conversion/PassDetail.h @@ -12,6 +12,7 @@ #define AIE_CONVERSION_PASSDETAIL_H_ #include "aie/Conversion/Passes.h" + #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/IR/BuiltinOps.h" diff --git a/lib/Dialect/AIEX/IR/AIEXDialect.cpp b/lib/Dialect/AIEX/IR/AIEXDialect.cpp index 89c94f3bfe..e0e547a09d 100644 --- a/lib/Dialect/AIEX/IR/AIEXDialect.cpp +++ b/lib/Dialect/AIEX/IR/AIEXDialect.cpp @@ -532,16 +532,6 @@ LogicalResult AIEX::RuntimeSequenceOp::verify() { (*this)->emitOpError() << "must be inside AIE device operation."; return failure(); } - auto seq_ops = device.getOps(); - if (std::distance(seq_ops.begin(), seq_ops.end()) > 1) { - auto err = device.emitOpError() - << "Cannot have more than one runtime sequence per device."; - for (auto it = seq_ops.begin(); it != seq_ops.end(); ++it) { - AIEX::RuntimeSequenceOp seq_op = *it; - err.attachNote(seq_op.getLoc()) << "Sequence operation definition here."; - } - return failure(); - } return success(); } diff --git a/lib/Targets/AIERT.cpp b/lib/Targets/AIERT.cpp index c0ec5290ad..2746b2e1c4 100644 --- a/lib/Targets/AIERT.cpp +++ b/lib/Targets/AIERT.cpp @@ -592,8 +592,7 @@ LogicalResult AIERTControl::addAieElf(uint8_t col, uint8_t row, } LogicalResult AIERTControl::addAieElfs(DeviceOp &targetOp, - const StringRef workDirPath, - bool aieSim) { + const StringRef elfPath, bool aieSim) { for (auto tileOp : targetOp.getOps()) if (tileOp.isShimNOCorPLTile()) { // Resets no needed with V2 kernel driver @@ -611,8 +610,7 @@ LogicalResult AIERTControl::addAieElfs(DeviceOp &targetOp, auto ps = std::filesystem::path::preferred_separator; if (failed(addAieElf( col, row, - (llvm::Twine(workDirPath) + std::string(1, ps) + fileName) - .str(), + (llvm::Twine(elfPath) + std::string(1, ps) + fileName).str(), aieSim))) return failure(); } diff --git a/lib/Targets/AIETargetCDODirect.cpp b/lib/Targets/AIETargetCDODirect.cpp index 17e3bacd36..b25760532a 100644 --- a/lib/Targets/AIETargetCDODirect.cpp +++ b/lib/Targets/AIETargetCDODirect.cpp @@ -379,90 +379,6 @@ LogicalResult xilinx::AIE::AIETranslateToCDODirect( aieSim, xaieDebug, enableCores); } -std::optional -xilinx::AIE::AIETranslateBinaryToTxn(mlir::MLIRContext *ctx, - std::vector &binary) { - - // parse the binary - std::vector operations; - auto c = parseTransactionBinary(binary, operations); - if (!c) { - llvm::errs() << "Failed to parse binary\n"; - return std::nullopt; - } - int columns = *c; - - auto loc = mlir::UnknownLoc::get(ctx); - - // create a new ModuleOp and set the insertion point - auto module = ModuleOp::create(loc); - OpBuilder builder(module.getBodyRegion()); - builder.setInsertionPointToStart(module.getBody()); - - // create aie.device - std::vector devices{AIEDevice::npu1_1col, AIEDevice::npu1_2col, - AIEDevice::npu1_3col, AIEDevice::npu1_4col, - AIEDevice::npu1}; - auto device = builder.create(loc, devices[columns - 1]); - device.getRegion().emplaceBlock(); - builder.setInsertionPointToStart(device.getBody()); - - // for each blockwrite in the binary, create a GlobalOp with the data - std::vector global_data; - for (auto &op : operations) { - if (op.cmd.Opcode != XAIE_IO_BLOCKWRITE) { - global_data.push_back(nullptr); - continue; - } - uint32_t size = op.cmd.Size / 4; - const uint32_t *d = reinterpret_cast(op.cmd.DataPtr); - std::vector data32(d, d + size); - - int id = 0; - std::string name = "blockwrite_data"; - while (device.lookupSymbol(name)) - name = "blockwrite_data_" + std::to_string(id++); - - MemRefType memrefType = MemRefType::get({size}, builder.getI32Type()); - TensorType tensorType = RankedTensorType::get({size}, builder.getI32Type()); - auto global = builder.create( - loc, name, builder.getStringAttr("private"), memrefType, - DenseElementsAttr::get(tensorType, data32), true, nullptr); - global_data.push_back(global); - } - - // create aiex.runtime_sequence - auto seq = builder.create(loc, nullptr); - seq.getBody().push_back(new Block); - - // create the txn ops - builder.setInsertionPointToStart(&seq.getBody().front()); - for (auto p : llvm::zip(operations, global_data)) { - auto op = std::get<0>(p); - memref::GlobalOp payload = std::get<1>(p); - - if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_WRITE) { - builder.create(loc, op.cmd.RegOff, op.cmd.Value, - nullptr, nullptr, nullptr); - } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) { - auto memref = builder.create(loc, payload.getType(), - payload.getName()); - builder.create( - loc, builder.getUI32IntegerAttr(op.cmd.RegOff), memref.getResult(), - nullptr, nullptr, nullptr); - } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_MASKWRITE) { - builder.create(loc, op.cmd.RegOff, op.cmd.Value, - op.cmd.Mask, nullptr, nullptr, - nullptr); - } else { - llvm::errs() << "Unhandled txn opcode: " << op.cmd.Opcode << "\n"; - return std::nullopt; - } - } - - return module; -} - std::optional xilinx::AIE::AIETranslateBinaryToCtrlpkt(mlir::MLIRContext *ctx, std::vector &binary) { @@ -572,29 +488,6 @@ xilinx::AIE::AIETranslateBinaryToCtrlpkt(mlir::MLIRContext *ctx, return module; } -LogicalResult xilinx::AIE::AIETranslateToTxn(ModuleOp m, - llvm::raw_ostream &output, - llvm::StringRef workDirPath, - bool outputBinary, bool enableSim, - bool xaieDebug, bool enableCores) { - std::vector bin; - auto result = - translateToTxn(m, bin, workDirPath, enableSim, xaieDebug, enableCores); - if (failed(result)) - return result; - - if (outputBinary) { - output.write(reinterpret_cast(bin.data()), bin.size()); - return success(); - } - - auto new_module = AIETranslateBinaryToTxn(m.getContext(), bin); - if (!new_module) - return failure(); - new_module->print(output); - return success(); -} - LogicalResult xilinx::AIE::AIETranslateToControlPackets( ModuleOp m, llvm::raw_ostream &output, llvm::StringRef workDirPath, bool outputBinary, bool enableSim, bool xaieDebug, bool enableCores) { diff --git a/lib/Targets/AIETargetNPU.cpp b/lib/Targets/AIETargetNPU.cpp index 0439c427e5..bf62b73497 100644 --- a/lib/Targets/AIETargetNPU.cpp +++ b/lib/Targets/AIETargetNPU.cpp @@ -187,7 +187,8 @@ void appendBlockWrite(std::vector &instructions, NpuBlockWriteOp op) { LogicalResult xilinx::AIE::AIETranslateToNPU(ModuleOp module, - std::vector &instructions) { + std::vector &instructions, + StringRef sequenceName) { auto words = reserveAndGetTail(instructions, 4); @@ -206,8 +207,10 @@ xilinx::AIE::AIETranslateToNPU(ModuleOp module, words[1] = (numMemTileRows << 8) | numCols; auto sequenceOps = deviceOp.getOps(); - for (auto f : sequenceOps) { - Block &entry = f.getBody().front(); + for (auto seq : sequenceOps) { + if (sequenceName.size() && sequenceName != seq.getSymName()) + continue; + Block &entry = seq.getBody().front(); for (auto &o : entry) { llvm::TypeSwitch(&o) .Case([&](auto op) { @@ -240,9 +243,10 @@ xilinx::AIE::AIETranslateToNPU(ModuleOp module, } LogicalResult xilinx::AIE::AIETranslateToNPU(ModuleOp module, - raw_ostream &output) { + raw_ostream &output, + StringRef sequenceName) { std::vector instructions; - auto r = AIETranslateToNPU(module, instructions); + auto r = AIETranslateToNPU(module, instructions, sequenceName); if (failed(r)) return r; for (auto w : instructions) @@ -251,12 +255,14 @@ LogicalResult xilinx::AIE::AIETranslateToNPU(ModuleOp module, } LogicalResult xilinx::AIE::AIETranslateControlPacketsToUI32Vec( - ModuleOp module, std::vector &instructions) { - + ModuleOp module, std::vector &instructions, + StringRef sequenceName) { DeviceOp deviceOp = *module.getOps().begin(); auto sequenceOps = deviceOp.getOps(); - for (auto f : sequenceOps) { - Block &entry = f.getBody().front(); + for (auto seq : sequenceOps) { + if (sequenceName.size() && sequenceName != seq.getSymName()) + continue; + Block &entry = seq.getBody().front(); for (auto &o : entry) { llvm::TypeSwitch(&o).Case([&](auto op) { uint32_t size = 0; @@ -290,11 +296,11 @@ LogicalResult xilinx::AIE::AIETranslateControlPacketsToUI32Vec( return success(); } -LogicalResult -xilinx::AIE::AIETranslateControlPacketsToUI32Vec(ModuleOp module, - raw_ostream &output) { +LogicalResult xilinx::AIE::AIETranslateControlPacketsToUI32Vec( + ModuleOp module, raw_ostream &output, StringRef sequenceName) { std::vector instructions; - auto r = AIETranslateControlPacketsToUI32Vec(module, instructions); + auto r = + AIETranslateControlPacketsToUI32Vec(module, instructions, sequenceName); if (failed(r)) return r; for (auto w : instructions) diff --git a/lib/Targets/AIETargets.cpp b/lib/Targets/AIETargets.cpp index 7638c45aaf..b13ce23845 100644 --- a/lib/Targets/AIETargets.cpp +++ b/lib/Targets/AIETargets.cpp @@ -159,6 +159,11 @@ void registerAIETranslations() { "Select binary (true) or text (false) output for supported " "translations. e.g. aie-npu-instgen, aie-ctrlpkt-to-bin")); + static llvm::cl::opt sequenceName( + "aie-sequence-name", llvm::cl::init(""), + llvm::cl::desc( + "Specify the name of the aiex.runtime_sequence to translate")); + TranslateFromMLIRRegistration registrationMMap( "aie-generate-mmap", "Generate AIE memory map", [](ModuleOp module, raw_ostream &output) { @@ -338,36 +343,19 @@ void registerAIETranslations() { cdoXaieDebug, cdoEnableCores); }, registerDialects); - TranslateFromMLIRRegistration registrationCDOWithTxn( - "aie-generate-txn", - "Generate TXN configuration. Use --aie-output-binary to select between " - "mlir (default) and binary output", - [](ModuleOp module, raw_ostream &output) { - SmallString<128> workDirPath_; - if (workDirPath.getNumOccurrences() == 0) { - if (llvm::sys::fs::current_path(workDirPath_)) - llvm::report_fatal_error( - "couldn't get cwd to use as work-dir-path"); - } else - workDirPath_ = workDirPath.getValue(); - LLVM_DEBUG(llvm::dbgs() << "work-dir-path: " << workDirPath_ << "\n"); - return AIETranslateToTxn(module, output, workDirPath_, outputBinary, - cdoAieSim, cdoXaieDebug, cdoEnableCores); - }, - registerDialects); TranslateFromMLIRRegistration registrationNPU( "aie-npu-instgen", "Translate npu instructions to binary", [](ModuleOp module, raw_ostream &output) { if (outputBinary == true) { std::vector instructions; - auto r = AIETranslateToNPU(module, instructions); + auto r = AIETranslateToNPU(module, instructions, sequenceName); if (failed(r)) return r; output.write(reinterpret_cast(instructions.data()), instructions.size() * sizeof(uint32_t)); return success(); } - return AIETranslateToNPU(module, output); + return AIETranslateToNPU(module, output, sequenceName); }, registerDialects); TranslateFromMLIRRegistration registrationCtrlPkt( @@ -375,7 +363,8 @@ void registerAIETranslations() { [](ModuleOp module, raw_ostream &output) { if (outputBinary == true) { std::vector instructions; - auto r = AIETranslateControlPacketsToUI32Vec(module, instructions); + auto r = AIETranslateControlPacketsToUI32Vec(module, instructions, + sequenceName); if (failed(r)) return r; output.write(reinterpret_cast(instructions.data()), diff --git a/python/AIEMLIRModule.cpp b/python/AIEMLIRModule.cpp index 2d71249e72..fa3d214dac 100644 --- a/python/AIEMLIRModule.cpp +++ b/python/AIEMLIRModule.cpp @@ -107,23 +107,6 @@ PYBIND11_MODULE(_aie, m) { "emit_unified"_a = false, "cdo_debug"_a = false, "aiesim"_a = false, "xaie_debug"_a = false, "enable_cores"_a = true); - m.def( - "generate_txn", - [](MlirOperation op, const std::string &outputFile, - const std::string &workDirPath, bool aieSim, bool xaieDebug, - bool enableCores) { - mlir::python::CollectDiagnosticsToStringScope scope( - mlirOperationGetContext(op)); - if (mlirLogicalResultIsFailure( - aieTranslateToTxn(op, {outputFile.data(), outputFile.size()}, - {workDirPath.data(), workDirPath.size()}, - aieSim, xaieDebug, enableCores))) - throw py::value_error("Failed to generate txn binary because: " + - scope.takeMessage()); - }, - "module"_a, "output_file"_a, "work_dir_path"_a, "aiesim"_a = false, - "xaie_debug"_a = false, "enable_cores"_a = true); - m.def( "generate_ctrlpkt", [](MlirOperation op, const std::string &outputFile, diff --git a/python/compiler/aiecc/main.py b/python/compiler/aiecc/main.py index dddb65a910..35e34c22c9 100644 --- a/python/compiler/aiecc/main.py +++ b/python/compiler/aiecc/main.py @@ -564,7 +564,6 @@ async def process_cdo(self): generate_cdo(input_physical.operation, self.tmpdirname) async def process_txn(self): - from aie.dialects.aie import generate_txn with Context(), Location.unknown(): for elf in glob.glob("*.elf"): @@ -577,11 +576,17 @@ async def process_txn(self): shutil.copy(elf_map, self.tmpdirname) except shutil.SameFileError: pass - input_physical = Module.parse( - await read_file_async(self.prepend_tmp("input_physical.mlir")) + input_physical = await read_file_async( + self.prepend_tmp("input_physical.mlir") + ) + run_passes( + "builtin.module(aie.device(convert-aie-to-transaction{elf-dir=" + + self.tmpdirname + + "}))", + input_physical, + self.prepend_tmp("txn.mlir"), + self.opts.verbose, ) - txn_file = os.path.join(self.tmpdirname, "txn.mlir") - generate_txn(input_physical.operation, txn_file, self.tmpdirname) async def process_ctrlpkt(self): from aie.dialects.aie import generate_ctrlpkt diff --git a/python/dialects/aie.py b/python/dialects/aie.py index 09f2451e30..c810aab0a6 100644 --- a/python/dialects/aie.py +++ b/python/dialects/aie.py @@ -22,7 +22,6 @@ aie_llvm_link, generate_bcf, generate_cdo, - generate_txn, generate_ctrlpkt, generate_xaie, generate_control_packets, diff --git a/test/npu-xrt/add_one_two_txn/run.lit b/test/npu-xrt/add_one_two_txn/run.lit index 9410178e12..295fca13af 100644 --- a/test/npu-xrt/add_one_two_txn/run.lit +++ b/test/npu-xrt/add_one_two_txn/run.lit @@ -6,6 +6,6 @@ // RUN: clang %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem // RUN: %python aiecc.py --xclbin-kernel-name=ADDONE --xclbin-kernel-id=0x901 --xclbin-instance-name=ADDONEINST --no-aiesim --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=add_one.xclbin --npu-insts-name=add_one_insts.txt %S/aie1.mlir // RUN: %python aiecc.py --no-aiesim --aie-generate-txn --aie-generate-npu --no-compile-host --npu-insts-name=add_two_insts.txt %S/aie2.mlir -// RUN: aie-translate -aie-npu-instgen -aie-output-binary=true aie2.mlir.prj/txn.mlir -o add_two_cfg.bin +// RUN: aie-translate -aie-npu-instgen -aie-output-binary=true -aie-sequence-name=configure aie2.mlir.prj/txn.mlir -o add_two_cfg.bin // RUN: %run_on_npu ./test.exe -x add_one.xclbin -i add_one_insts.txt -c add_two_cfg.bin -j add_two_insts.txt | FileCheck %s // CHECK: PASS! diff --git a/test/npu-xrt/nd_memcpy_transforms/aie2.py b/test/npu-xrt/nd_memcpy_transforms/aie2.py index 1e13f7bd24..8569a84fe4 100644 --- a/test/npu-xrt/nd_memcpy_transforms/aie2.py +++ b/test/npu-xrt/nd_memcpy_transforms/aie2.py @@ -5,7 +5,7 @@ # # (c) Copyright 2024 AMD Inc. -# REQUIRES: ryzen_ai, valid_chess_license +# REQUIRES: ryzen_ai, valid_xchess_license # # RUN: xchesscc_wrapper aie2 -I %aietools/include -c %S/kernel.cc -o ./kernel.o # RUN: %python %S/aie2.py > ./aie2.mlir diff --git a/tools/aie-opt/CMakeLists.txt b/tools/aie-opt/CMakeLists.txt index 53b08c2eea..3a02f4a030 100644 --- a/tools/aie-opt/CMakeLists.txt +++ b/tools/aie-opt/CMakeLists.txt @@ -30,6 +30,7 @@ set(LIBS AIEX AIEXTransforms AIEXUtils + MLIRAIEToConfiguration MLIRAIEVecDialect MLIRAIEVecAIE1Dialect MLIRAIEVecTransformOps