Skip to content

Commit

Permalink
[WIP] switch to transaction binary
Browse files Browse the repository at this point in the history
  • Loading branch information
jgmelber committed May 2, 2024
1 parent 6b117d1 commit df28652
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 71 deletions.
131 changes: 81 additions & 50 deletions lib/Targets/AIETargetNPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,81 +45,112 @@ reserveAndGetTail(std::vector<uint32_t> &instructions, uint64_t tailSize) {

void appendSync(std::vector<uint32_t> &instructions, NpuSyncOp op) {

auto words = reserveAndGetTail(instructions, 2);

uint32_t opCode = 3;
words[0] |= (opCode & 0xff) << 24;
words[0] |= (op.getColumn() & 0xff) << 16;
words[0] |= (op.getRow() & 0xff) << 8;
words[0] |= op.getDirection() & 0x1;

words[1] |= (op.getChannel() & 0xff) << 24;
words[1] |= (op.getColumnNum() & 0xff) << 16;
words[1] |= (op.getRowNum() & 0xff) << 8;
auto words = reserveAndGetTail(instructions, 4);

uint32_t opCode = 0x80;
// XAIE_IO_CUSTOM_OP_BEGIN
// Wait until the number of BDs in the same channel of all tiles equal to 0
words[0] |= (opCode & 0xff);
words[0] |= (op.getColumn() & 0xff) << 8;
words[0] |= (op.getRow() & 0xff) << 16;
words[0] |= 0 << 24; // Padding

words[1] = 4; // Operation Size

words[2] |= op.getDirection() & 0xff;
words[2] |= (op.getRow() & 0xff) << 8;
words[2] |= (op.getColumn() & 0xff) << 16;

words[3] |= (op.getRowNum() & 0xff) << 8;
words[3] |= (op.getColumnNum() & 0xff) << 16;
words[3] |= (op.getChannel() & 0xff) << 24;
}

void appendWrite32(std::vector<uint32_t> &instructions, NpuWrite32Op op) {

auto words = reserveAndGetTail(instructions, 3);
auto words = reserveAndGetTail(instructions, 6);

uint32_t opCode = 0;
// XAIE_IO_WRITE
words[0] |= (opCode & 0xff);
words[0] |= (op.getColumn() & 0xff) << 8;
words[0] |= (op.getRow() & 0xff) << 16;
words[0] |= 0 << 24; // Padding

uint32_t opCode = 2;
words[0] |= (opCode & 0xff) << 24;
words[0] |= (op.getColumn() & 0xff) << 16;
words[0] |= (op.getRow() & 0xff) << 8;
words[1] = op.getAddress(); // ADDR_LOW
words[2] = 0; // ADDR_HIGH

words[1] = op.getAddress();
words[3] = op.getValue(); // Value

words[2] = op.getValue();
words[4] = 6; // Operation Size

words[5] = 0; // Padding
}

void appendWriteBdShimTile(std::vector<uint32_t> &instructions,
NpuWriteBdExShimTileOp op) {

auto words = reserveAndGetTail(instructions, 10);
auto words = reserveAndGetTail(instructions, 12);

uint32_t opCode = 1;
words[0] |= (opCode & 0xff);
words[0] |= (op.getColumn() & 0xff) << 8;
words[0] |= (0 & 0xff) << 16;
words[0] |= (op.getColumn() & 0xff) << 24;

words[1] |= (0 & 0xff);
words[1] |= (0 & 0xff) << 8;
words[1] |= (op.getColumn() & 0xff) << 16;
words[1] |= 0 << 24; // Padding

auto bd_id = op.getBdId();
uint32_t bd_addr = 0x1D000 + bd_id * 0x20;
words[2] = bd_addr; // ADDR

uint32_t opCode = 6;
words[0] |= (opCode & 0xff) << 24;
words[0] |= (op.getColumn() & 0xff) << 16;
words[0] |= (op.getColumnNum() & 0xff) << 8;
words[0] |= (op.getDdrId() & 0xf) << 4;
words[0] |= (op.getBdId() & 0xf);
words[3] = 12; // Operation Size;

// TODO: Address Incr
// words[1] = ...
// DMA_BDX_0
words[4] = op.getBufferLength();

words[2] = op.getBufferLength();
words[3] = op.getBufferOffset();
// DMA_BDX_1
words[5] = op.getBufferOffset();

// DMA_BDX_2
// En Packet , OoO BD ID , Packet ID , Packet Type
words[4] |= (op.getEnablePacket() & 0x1) << 30;
words[4] |= (op.getOutOfOrderId() & 0x3f) << 24;
words[4] |= (op.getPacketId() & 0x1f) << 19;
words[4] |= (op.getPacketType() & 0x7) << 16;
words[6] |= (op.getEnablePacket() & 0x1) << 30;
words[6] |= (op.getOutOfOrderId() & 0x3f) << 24;
words[6] |= (op.getPacketId() & 0x1f) << 19;
words[6] |= (op.getPacketType() & 0x7) << 16;

// DMA_BDX_3
// TODO: Secure Access
words[5] |= (op.getD0Size() & 0x3ff) << 20;
words[5] |= op.getD0Stride() & 0xfffff;
words[7] |= (op.getD0Size() & 0x3ff) << 20;
words[7] |= op.getD0Stride() & 0xfffff;

words[6] = 0x80000000; // burst length;
words[6] |= (op.getD1Size() & 0x3ff) << 20;
words[6] |= op.getD1Stride() & 0xfffff;
// DMA_BDX_4
words[8] = 0x80000000; // burst length;
words[8] |= (op.getD1Size() & 0x3ff) << 20;
words[8] |= op.getD1Stride() & 0xfffff;

// DMA_BDX_5
// TODO: SIMID, AxCache, AXQoS
words[7] = op.getD2Stride() & 0xfffff;
words[9] = op.getD2Stride() & 0xfffff;

words[8] |= (op.getIterationCurrent() & 0x3f) << 26;
words[8] |= (op.getIterationSize() & 0x3f) << 20;
words[8] |= op.getIterationStride() & 0xfffff;
// DMA_BDX_6
words[10] |= (op.getIterationCurrent() & 0x3f) << 26;
words[10] |= (op.getIterationSize() & 0x3f) << 20;
words[10] |= op.getIterationStride() & 0xfffff;

// DMA_BDX_7
// TODO: TLAST Suppress
words[9] |= (op.getNextBd() & 0xf) << 27;
words[9] |= (op.getUseNextBd() & 0x1) << 26;
words[9] |= (op.getValidBd() & 0x1) << 25;
words[9] |= (op.getLockRelVal() & 0xef) << 18;
words[9] |= (op.getLockRelId() & 0xf) << 13;
words[9] |= (op.getLockAcqEnable() & 0x1) << 12;
words[9] |= (op.getLockAcqVal() & 0xef) << 5;
words[9] |= op.getLockAcqId() & 0xf;
words[11] |= (op.getNextBd() & 0xf) << 27;
words[11] |= (op.getUseNextBd() & 0x1) << 26;
words[11] |= (op.getValidBd() & 0x1) << 25;
words[11] |= (op.getLockRelVal() & 0xef) << 18;
words[11] |= (op.getLockRelId() & 0xf) << 13;
words[11] |= (op.getLockAcqEnable() & 0x1) << 12;
words[11] |= (op.getLockAcqVal() & 0xef) << 5;
words[11] |= op.getLockAcqId() & 0xf;
}

} // namespace
Expand Down
33 changes: 12 additions & 21 deletions python/compiler/aiecc/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def emit_design_kernel_json(
buffer_args=None,
):
if buffer_args is None:
buffer_args = [f"bo{i}" for i in range(4)]
buffer_args = [f"bo{i}" for i in range(5)]

arguments = [
{
Expand All @@ -119,18 +119,7 @@ def emit_design_kernel_json(
"offset" : "0x00"
},
]

offset = 0x08
for buf in buffer_args:
arg = {
"name": buf,
"memory-connection": "HOST",
"address-qualifier": "GLOBAL",
"type": "char *",
"offset": str(hex(offset)),
}
arguments.append(arg)
offset += 0x8

inst_arguments = [
{
Expand All @@ -150,14 +139,16 @@ def emit_design_kernel_json(
arguments.append(inst_arguments[0])
arguments.append(inst_arguments[1])
offset += 12
arg = {
"name": "mc",
"memory-connection": "HOST",
"address-qualifier": "GLOBAL",
"type": "char *",
"offset": str(hex(offset)),
}
arguments.append(arg)

for buf in buffer_args:
arg = {
"name": buf,
"address-qualifier": "SCALAR",
"type": "uint64_t",
"offset": str(hex(offset)),
}
arguments.append(arg)
offset += 0x8

return {
"ps-kernels": {
Expand Down Expand Up @@ -601,7 +592,7 @@ async def process_xclbin_gen(self, has_cores):
self.prepend_tmp("aie_partition.json"),
)

buffer_arg_names = [f"bo{i}" for i in range(4)]
buffer_arg_names = [f"bo{i}" for i in range(5)]
await write_file_async(
json.dumps(
emit_design_kernel_json(
Expand Down

0 comments on commit df28652

Please sign in to comment.