From 87d347d4704215047d03bae0d3056a381c5a2a9c Mon Sep 17 00:00:00 2001 From: Jorn Tuyls Date: Tue, 11 Jun 2024 15:52:17 -0700 Subject: [PATCH] [aie.objectfifo.link] Enable multiple consumers with dimensionsToStream --- lib/Dialect/AIE/IR/AIEDialect.cpp | 5 - test/dialect/AIE/roundtrip.mlir | 37 ++++ .../nd_dma_distribute_broadcast_AIE2.mlir | 174 ++++++++++++++++++ .../nd_dma_distribute_broadcast_AIE2_bad.mlir | 39 ---- 4 files changed, 211 insertions(+), 44 deletions(-) create mode 100644 test/dialect/AIE/roundtrip.mlir create mode 100644 test/objectFifo-stateful-transform/nd_dma_distribute_broadcast_AIE2.mlir delete mode 100644 test/objectFifo-stateful-transform/nd_dma_distribute_broadcast_AIE2_bad.mlir diff --git a/lib/Dialect/AIE/IR/AIEDialect.cpp b/lib/Dialect/AIE/IR/AIEDialect.cpp index e7e1dd6b69..b32aca2f33 100644 --- a/lib/Dialect/AIE/IR/AIEDialect.cpp +++ b/lib/Dialect/AIE/IR/AIEDialect.cpp @@ -625,11 +625,6 @@ LogicalResult ObjectFifoLinkOp::verify() { int outputSize = 0; for (auto fifoOut : getOutputObjectFifos()) { - if (!fifoOut.getDimensionsToStream().empty() && - fifoOut.getConsumerTiles().size() > 1) { - return emitOpError("currently does not support objectFifos with " - "dimensionsToStream and multiple consumers."); - } for (auto dims : fifoOut.getDimensionsFromStreamPerConsumer()) { if (!dims.empty()) return emitOpError("currently does not support objectFifos with " diff --git a/test/dialect/AIE/roundtrip.mlir b/test/dialect/AIE/roundtrip.mlir new file mode 100644 index 0000000000..4859090f99 --- /dev/null +++ b/test/dialect/AIE/roundtrip.mlir @@ -0,0 +1,37 @@ +//===- roundtrip.mlir ------------------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// RUN: aie-opt --split-input-file %s | FileCheck %s + +// aie.objectfifo.link with multiple consumers with toStream +// CHECK: aie.device +// CHECK: %[[TILE_0_2:.+]] = aie.tile(0, 2) +// CHECK: %[[TILE_0_3:.+]] = aie.tile(0, 3) +// CHECK: %[[TILE_1_2:.+]] = aie.tile(1, 2) +// CHECK: %[[TILE_1_3:.+]] = aie.tile(1, 3) +// CHECK: %[[TILE_0_0:.+]] = aie.tile(0, 0) +// CHECK: %[[TILE_0_1:.+]] = aie.tile(0, 1) +// CHECK: aie.objectfifo @obj1(%[[TILE_0_0]], {%[[TILE_0_1]]}, 4 : i32) : !aie.objectfifo> +// CHECK: aie.objectfifo @obj2(%[[TILE_0_1]] toStream [, , ], {%[[TILE_0_2]], %[[TILE_0_3]]}, 4 : i32) : !aie.objectfifo> +// CHECK: aie.objectfifo @obj3(%[[TILE_0_1]] toStream [, , ], {%[[TILE_1_2]], %[[TILE_1_3]]}, 4 : i32) : !aie.objectfifo> +// CHECK: aie.objectfifo.link [@obj1] -> [@obj2, @obj3]() +aie.device(npu1_4col) { + memref.global "public" @out0 : memref<16xi32> + %tile_0_2 = aie.tile(0, 2) + %tile_0_3 = aie.tile(0, 3) + %tile_1_2 = aie.tile(1, 2) + %tile_1_3 = aie.tile(1, 3) + %tile_0_0 = aie.tile(0, 0) + %tile_0_1 = aie.tile(0, 1) + aie.objectfifo @obj1(%tile_0_0, {%tile_0_1}, 4 : i32) : !aie.objectfifo> + aie.objectfifo @obj2(%tile_0_1 toStream [, , ], {%tile_0_2, %tile_0_3}, 4 : i32) : !aie.objectfifo> + aie.objectfifo @obj3(%tile_0_1 toStream [, , ], {%tile_1_2, %tile_1_3}, 4 : i32) : !aie.objectfifo> + aie.objectfifo.link [@obj1] -> [@obj2, @obj3]() +} diff --git a/test/objectFifo-stateful-transform/nd_dma_distribute_broadcast_AIE2.mlir b/test/objectFifo-stateful-transform/nd_dma_distribute_broadcast_AIE2.mlir new file mode 100644 index 0000000000..2364bb8df6 --- /dev/null +++ b/test/objectFifo-stateful-transform/nd_dma_distribute_broadcast_AIE2.mlir @@ -0,0 +1,174 @@ +//===- nd_dma_distribute_broadcast_AIE2_bad.mlir ---------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2023, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// RUN: aie-opt --aie-objectFifo-stateful-transform %s | FileCheck %s + +// CHECK-LABEL: aie.device(xcve2302) { +// CHECK-DAG: memref.global "public" @[[OF2_0_CONS:.+]] : memref<128xi32> +// CHECK-DAG: memref.global "public" @[[OF2_1_CONS:.+]] : memref<128xi32> +// CHECK-DAG: memref.global "public" @[[OF2:.+]] : memref<128xi32> +// CHECK-DAG: memref.global "public" @[[OF1_0_CONS:.+]] : memref<128xi32> +// CHECK-DAG: memref.global "public" @[[OF1_1_CONS:.+]] : memref<128xi32> +// CHECK-DAG: memref.global "public" @[[OF1:.+]] : memref<128xi32> +// CHECK-DAG: memref.global "public" @[[OF0_CONS:.+]] : memref<256xi32> +// CHECK-DAG: memref.global "public" @[[OF0:.+]] : memref<256xi32> +// CHECK-DAG: %[[TILE_1_0:.+]] = aie.tile(1, 0) +// CHECK-DAG: %[[TILE_1_1:.+]] = aie.tile(1, 1) +// CHECK-DAG: %[[TILE_1_2:.+]] = aie.tile(1, 2) +// CHECK-DAG: %[[TILE_2_2:.+]] = aie.tile(2, 2) +// CHECK-DAG: %[[TILE_1_3:.+]] = aie.tile(1, 3) +// CHECK-DAG: %[[TILE_2_3:.+]] = aie.tile(2, 3) +// CHECK-DAG: %[[OF2_0_CONS_BUFF_0:.+]] = aie.buffer(%[[TILE_1_3]]) +// CHECK-DAG: %[[OF2_0_CONS_BUFF_1:.+]] = aie.buffer(%[[TILE_1_3]]) +// CHECK-DAG: %[[OF2_0_CONS_PROD_LOCK:.+]] = aie.lock(%tile_1_3, 0) {init = 2 : i32 +// CHECK-DAG: %[[OF2_0_CONS_CONS_LOCK:.+]] = aie.lock(%tile_1_3, 1) {init = 0 : i32 +// CHECK-DAG: %[[OF2_1_CONS_BUFF_0:.+]] = aie.buffer(%[[TILE_2_3]]) +// CHECK-DAG: %[[OF2_1_CONS_BUFF_1:.+]] = aie.buffer(%[[TILE_2_3]]) +// CHECK-DAG: %[[OF2_1_CONS_PROD_LOCK:.+]] = aie.lock(%tile_2_3, 0) {init = 2 : i32 +// CHECK-DAG: %[[OF2_1_CONS_CONS_LOCK:.+]] = aie.lock(%tile_2_3, 1) {init = 0 : i32 +// CHECK-DAG: %[[OF1_0_CONS_BUFF_0:.+]] = aie.buffer(%[[TILE_1_2]]) +// CHECK-DAG: %[[OF1_0_CONS_BUFF_1:.+]] = aie.buffer(%[[TILE_1_2]]) +// CHECK-DAG: %[[OF1_0_CONS_PROD_LOCK:.+]] = aie.lock(%tile_1_2, 0) {init = 2 : i32 +// CHECK-DAG: %[[OF1_0_CONS_CONS_LOCK:.+]] = aie.lock(%tile_1_2, 1) {init = 0 : i32 +// CHECK-DAG: %[[OF1_1_CONS_BUFF_0:.+]] = aie.buffer(%[[TILE_2_2]]) +// CHECK-DAG: %[[OF1_1_CONS_BUFF_1:.+]] = aie.buffer(%[[TILE_2_2]]) +// CHECK-DAG: %[[OF1_1_CONS_PROD_LOCK:.+]] = aie.lock(%tile_2_2, 0) {init = 2 : i32 +// CHECK-DAG: %[[OF1_1_CONS_CONS_LOCK:.+]] = aie.lock(%tile_2_2, 1) {init = 0 : i32 +// CHECK-DAG: %[[OF0_CONS_BUFF_0:.+]] = aie.buffer(%[[TILE_1_1]]) +// CHECK-DAG: %[[OF0_CONS_BUFF_1:.+]] = aie.buffer(%[[TILE_1_1]]) +// CHECK-DAG: %[[OF0_CONS_PROD_LOCK:.+]] = aie.lock(%[[TILE_1_1]], 0) {init = 4 : i32 +// CHECK-DAG: %[[OF0_CONS_CONS_LOCK:.+]] = aie.lock(%[[TILE_1_1]], 1) {init = 0 : i32 +// CHECK-DAG: %[[OF0_PROD_LOCK:.+]] = aie.lock(%[[TILE_1_0]], 0) {init = 0 : i32 +// CHECK-DAG: %[[OF0_CONS_LOCK:.+]] = aie.lock(%[[TILE_1_0]], 1) {init = 0 : i32 +// CHECK-DAG: aie.flow(%[[TILE_1_0]], DMA : 0, %[[TILE_1_1]], DMA : 0) +// CHECK-DAG: aie.flow(%[[TILE_1_1]], DMA : 0, %[[TILE_2_2]], DMA : 0) +// CHECK-DAG: aie.flow(%[[TILE_1_1]], DMA : 0, %[[TILE_1_2]], DMA : 0) +// CHECK-DAG: aie.flow(%[[TILE_1_1]], DMA : 1, %[[TILE_2_3]], DMA : 0) +// CHECK-DAG: aie.flow(%[[TILE_1_1]], DMA : 1, %[[TILE_1_3]], DMA : 0) +// CHECK: aie.shim_dma_allocation @[[OF0]](MM2S, 0, 1) +// CHECK: %{{.+}} = aie.memtile_dma(%[[TILE_1_1]]) { +// CHECK: %[[VAL_0:.*]] = aie.dma_start(S2MM, 0, ^bb1, ^bb3) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF0_CONS_PROD_LOCK]], AcquireGreaterEqual, 2) +// CHECK: aie.dma_bd(%[[OF0_CONS_BUFF_0]] : memref<256xi32>, 0, 256) +// CHECK: aie.use_lock(%[[OF0_CONS_CONS_LOCK]], Release, 2) +// CHECK: aie.next_bd ^bb2 +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF0_CONS_PROD_LOCK]], AcquireGreaterEqual, 2) +// CHECK: aie.dma_bd(%[[OF0_CONS_BUFF_1]] : memref<256xi32>, 0, 256) +// CHECK: aie.use_lock(%[[OF0_CONS_CONS_LOCK]], Release, 2) +// CHECK: aie.next_bd ^bb1 +// CHECK: ^bb3: +// CHECK: %[[VAL_1:.+]] = aie.dma_start(MM2S, 0, ^bb4, ^bb6) +// CHECK: ^bb4: +// CHECK: aie.use_lock(%[[OF0_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_CONS_BUFF_0]] : memref<256xi32>, 0, 128, [, , , ]) +// CHECK: aie.use_lock(%[[OF0_CONS_PROD_LOCK]], Release, 1) +// CHECK: aie.next_bd ^bb5 +// CHECK: ^bb5: +// CHECK: aie.use_lock(%[[OF0_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_CONS_BUFF_1]] : memref<256xi32>, 0, 128, [, , , ]) +// CHECK: aie.use_lock(%[[OF0_CONS_PROD_LOCK]], Release, 1) +// CHECK: aie.next_bd ^bb4 +// CHECK: ^bb6: +// CHECK: %[[VAL_2:.+]] = aie.dma_start(MM2S, 1, ^bb7, ^bb9) +// CHECK: ^bb7: +// CHECK: aie.use_lock(%[[OF0_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_CONS_BUFF_0]] : memref<256xi32>, 128, 128, [, , , ]) +// CHECK: aie.use_lock(%[[OF0_CONS_PROD_LOCK]], Release, 1) +// CHECK: aie.next_bd ^bb8 +// CHECK: ^bb8: +// CHECK: aie.use_lock(%[[OF0_CONS_CONS_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF0_CONS_BUFF_1]] : memref<256xi32>, 128, 128, [, , , ]) +// CHECK: aie.use_lock(%[[OF0_CONS_PROD_LOCK]], Release, 1) +// CHECK: aie.next_bd ^bb7 +// CHECK: ^bb9: +// CHECK: aie.end +// CHECK: } +// CHECK: %{{.+}} = aie.mem(%[[TILE_1_2]]) { +// CHECK: %{{.+}} = aie.dma_start(S2MM, 0, ^bb1, ^bb3) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF1_0_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF1_0_CONS_BUFF_0]] : memref<128xi32>, 0, 128) +// CHECK: aie.use_lock(%[[OF1_0_CONS_CONS_LOCK]], Release, 1) +// CHECK: aie.next_bd ^bb2 +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF1_0_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF1_0_CONS_BUFF_1]] : memref<128xi32>, 0, 128) +// CHECK: aie.use_lock(%[[OF1_0_CONS_CONS_LOCK]], Release, 1) +// CHECK: aie.next_bd ^bb1 +// CHECK: ^bb3: +// CHECK: aie.end +// CHECK: } +// CHECK: %{{.+}} = aie.mem(%[[TILE_2_2]]) { +// CHECK: %{{.+}} = aie.dma_start(S2MM, 0, ^bb1, ^bb3) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF1_1_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF1_1_CONS_BUFF_0]] : memref<128xi32>, 0, 128) +// CHECK: aie.use_lock(%[[OF1_1_CONS_CONS_LOCK]], Release, 1) +// CHECK: aie.next_bd ^bb2 +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF1_1_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF1_1_CONS_BUFF_1]] : memref<128xi32>, 0, 128) +// CHECK: aie.use_lock(%[[OF1_1_CONS_CONS_LOCK]], Release, 1) +// CHECK: aie.next_bd ^bb1 +// CHECK: ^bb3: +// CHECK: aie.end +// CHECK: } +// CHECK: %{{.+}} = aie.mem(%[[TILE_1_3]]) { +// CHECK: %{{.+}} = aie.dma_start(S2MM, 0, ^bb1, ^bb3) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF2_0_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_0_CONS_BUFF_0]] : memref<128xi32>, 0, 128) +// CHECK: aie.use_lock(%[[OF2_0_CONS_CONS_LOCK]], Release, 1) +// CHECK: aie.next_bd ^bb2 +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF2_0_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_0_CONS_BUFF_1]] : memref<128xi32>, 0, 128) +// CHECK: aie.use_lock(%[[OF2_0_CONS_CONS_LOCK]], Release, 1) +// CHECK: aie.next_bd ^bb1 +// CHECK: ^bb3: +// CHECK: aie.end +// CHECK: } +// CHECK: %{{.+}} = aie.mem(%[[TILE_2_3]]) { +// CHECK: %{{.+}} = aie.dma_start(S2MM, 0, ^bb1, ^bb3) +// CHECK: ^bb1: +// CHECK: aie.use_lock(%[[OF2_1_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_1_CONS_BUFF_0]] : memref<128xi32>, 0, 128) +// CHECK: aie.use_lock(%[[OF2_1_CONS_CONS_LOCK]], Release, 1) +// CHECK: aie.next_bd ^bb2 +// CHECK: ^bb2: +// CHECK: aie.use_lock(%[[OF2_1_CONS_PROD_LOCK]], AcquireGreaterEqual, 1) +// CHECK: aie.dma_bd(%[[OF2_1_CONS_BUFF_1]] : memref<128xi32>, 0, 128) +// CHECK: aie.use_lock(%[[OF2_1_CONS_CONS_LOCK]], Release, 1) +// CHECK: aie.next_bd ^bb1 +// CHECK: ^bb3: +// CHECK: aie.end +// CHECK: } +aie.device(xcve2302) { + %tile10 = aie.tile(1, 0) + %tile11 = aie.tile(1, 1) + %tile12 = aie.tile(1, 2) + %tile22 = aie.tile(2, 2) + %tile13 = aie.tile(1, 3) + %tile23 = aie.tile(2, 3) + aie.objectfifo @of0 (%tile10, {%tile11}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @of1 (%tile11 toStream [, + , + , + ], + {%tile12, %tile22}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @of2 (%tile11 toStream [, + , + , + ], + {%tile13, %tile23}, 2 : i32) : !aie.objectfifo> + aie.objectfifo.link [ @of0 ] -> [ @of1, @of2 ] () +} diff --git a/test/objectFifo-stateful-transform/nd_dma_distribute_broadcast_AIE2_bad.mlir b/test/objectFifo-stateful-transform/nd_dma_distribute_broadcast_AIE2_bad.mlir deleted file mode 100644 index 4c456e2c47..0000000000 --- a/test/objectFifo-stateful-transform/nd_dma_distribute_broadcast_AIE2_bad.mlir +++ /dev/null @@ -1,39 +0,0 @@ -//===- nd_dma_distribute_broadcast_AIE2_bad.mlir ---------------*- MLIR -*-===// -// -// This file is licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -// Copyright (C) 2023, Advanced Micro Devices, Inc. -// -//===----------------------------------------------------------------------===// - -// RUN: aie-opt --aie-objectFifo-stateful-transform --verify-diagnostics %s - -module @ndDMAObjFifoAIE2 { - aie.device(xcve2302) { - %tile10 = aie.tile(1, 0) - %tile11 = aie.tile(1, 1) - %tile12 = aie.tile(1, 2) - %tile22 = aie.tile(2, 2) - %tile13 = aie.tile(1, 3) - %tile23 = aie.tile(2, 3) - - aie.objectfifo @of0 (%tile10, {%tile11}, - 2 : i32) : !aie.objectfifo> - - aie.objectfifo @of1 (%tile11 toStream [, - , - , - ], - {%tile12, %tile22}, 2 : i32) : !aie.objectfifo> - - aie.objectfifo @of2 (%tile11 toStream [, - , - , - ], - {%tile13, %tile23}, 2 : i32) : !aie.objectfifo> - // expected-error@+1 {{'aie.objectfifo.link' op currently does not support objectFifos with dimensionsToStream and multiple consumers.}} - aie.objectfifo.link [ @of0 ] -> [ @of1, @of2 ] () - } -}