diff --git a/lib/Targets/AIETargetNPU.cpp b/lib/Targets/AIETargetNPU.cpp index 062e71cb91..6b192462d6 100644 --- a/lib/Targets/AIETargetNPU.cpp +++ b/lib/Targets/AIETargetNPU.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/Format.h" #include +#include using namespace mlir; using namespace xilinx; @@ -29,6 +30,10 @@ using namespace xilinx::AIEX; namespace { +std::vector getHeader() { + return {0x06030100, 0x00000101, 0x00000000, 0x00000000}; +} + // Example: // - instructions = {3,4,5} // - tailSize = 2 @@ -43,7 +48,7 @@ reserveAndGetTail(std::vector &instructions, uint64_t tailSize) { tailSize); } -void appendSync(std::vector &instructions, NpuSyncOp op) { +void appendSync(std::vector &instructions, uint16_t &num_ops, NpuSyncOp op) { auto words = reserveAndGetTail(instructions, 4); @@ -64,9 +69,11 @@ void appendSync(std::vector &instructions, NpuSyncOp op) { words[3] |= (op.getRowNum() & 0xff) << 8; words[3] |= (op.getColumnNum() & 0xff) << 16; words[3] |= (op.getChannel() & 0xff) << 24; + + num_ops++; } -void appendWrite32(std::vector &instructions, NpuWrite32Op op) { +void appendWrite32(std::vector &instructions, uint16_t &num_ops, NpuWrite32Op op) { auto words = reserveAndGetTail(instructions, 6); @@ -85,9 +92,12 @@ void appendWrite32(std::vector &instructions, NpuWrite32Op op) { words[4] = 6; // Operation Size words[5] = 0; // Padding + + num_ops++; } -void appendWriteBdShimTile(std::vector &instructions, +void appendWriteBdShimTile(std::vector &instructions, + uint16_t &num_ops, NpuWriteBdExShimTileOp op) { auto words = reserveAndGetTail(instructions, 12); @@ -151,13 +161,16 @@ void appendWriteBdShimTile(std::vector &instructions, words[11] |= (op.getLockAcqEnable() & 0x1) << 12; words[11] |= (op.getLockAcqVal() & 0xef) << 5; words[11] |= op.getLockAcqId() & 0xf; + + num_ops++; } } // namespace std::vector xilinx::AIE::AIETranslateToNPU(ModuleOp module) { - std::vector instructions; + std::vector instructions = getHeader(); + uint16_t num_ops = 0; DeviceOp deviceOp = *module.getOps().begin(); auto funcOps = deviceOp.getOps(); @@ -167,20 +180,26 @@ std::vector xilinx::AIE::AIETranslateToNPU(ModuleOp module) { Block &entry = f.getRegion().front(); for (auto &o : entry) { llvm::TypeSwitch(&o) - .Case([&](auto op) { appendSync(instructions, op); }) - .Case([&](auto op) { appendWrite32(instructions, op); }) + .Case([&](auto op) { appendSync(instructions, num_ops, op); }) + .Case([&](auto op) { appendWrite32(instructions, num_ops, op); }) .Case( - [&](auto op) { appendWriteBdShimTile(instructions, op); }); + [&](auto op) { appendWriteBdShimTile(instructions, num_ops, op); }); } } + num_ops = 3; + instructions.at(2) = (num_ops & 0xFFFF); return instructions; } LogicalResult xilinx::AIE::AIETranslateToNPU(ModuleOp module, raw_ostream &output) { + //auto instructions = AIETranslateToNPU(module); + //instructions.at(1) = (instructions.size() * 4 & 0xFFFF) << 16; + //for (auto w : instructions) + // output << llvm::format("%08X\n", w); auto instructions = AIETranslateToNPU(module); - for (auto w : instructions) - output << llvm::format("%08X\n", w); + instructions.at(3) = (instructions.size() * 4 & 0xFFFF); + output.write(reinterpret_cast(instructions.data()), instructions.size() * sizeof(uint32_t)); return success(); }