Skip to content

Commit

Permalink
Merge branch 'main' into tiler-helper
Browse files Browse the repository at this point in the history
  • Loading branch information
hunhoffe committed Oct 23, 2024
2 parents d51e5c8 + 655adb1 commit 87df9a7
Show file tree
Hide file tree
Showing 15 changed files with 41 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
//
// REQUIRES: ryzen_ai, peano
//
// RUN: mkdir -p test
// RUN: cd test
// RUN: make -f %S/Makefile clean
// RUN: make -f %S/Makefile
// RUN: %run_on_npu make -f %S/Makefile run | FileCheck %s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
//
// REQUIRES: ryzen_ai, chess
//
// RUN: mkdir -p test_chess
// RUN: cd test_chess
// RUN: make -f %S/Makefile.chess clean
// RUN: make -f %S/Makefile.chess
// RUN: %run_on_npu make -f %S/Makefile.chess run | FileCheck %s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
//
// REQUIRES: ryzen_ai, peano
//
// RUN: mkdir -p %S/test_1
// RUN: cd %S/test_1
// RUN: mkdir -p test_1
// RUN: cd test_1
// RUN: make -f %S/Makefile clean
// RUN: make -f %S/Makefile
// RUN: %run_on_npu make -f %S/Makefile run | FileCheck %s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
//
// REQUIRES: ryzen_ai, peano
//
// RUN: mkdir -p %S/test_2
// RUN: cd %S/test_2
// RUN: mkdir -p test_2
// RUN: cd test_2
// RUN: make -f %S/Makefile clean
// RUN: env M=768 K=512 N=512 m=64 k=64 n=64 dtype_in=i16 dtype_out=i16 make -f %S/Makefile
// RUN: %run_on_npu env M=768 K=512 N=512 m=64 k=64 n=64 dtype_in=i16 dtype_out=i16 make -f %S/Makefile run | FileCheck %s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
//
// REQUIRES: ryzen_ai, peano
//
// RUN: mkdir -p %S/test_alt
// RUN: cd %S/test_alt
// RUN: mkdir -p test_alt
// RUN: cd test_alt
// RUN: make -f %S/Makefile clean
// RUN: env use_alt=1 make -f %S/Makefile
// RUN: %run_on_npu env use_alt=1 make -f %S/Makefile run | FileCheck %s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
//
// REQUIRES: ryzen_ai, chess
//
// RUN: mkdir -p test_chess
// RUN: cd test_chess
// RUN: make -f %S/Makefile.chess clean
// RUN: make -f %S/Makefile.chess
// RUN: %run_on_npu make -f %S/Makefile.chess run | FileCheck %s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
//
// REQUIRES: ryzen_ai, peano
//
// RUN: mkdir -p %S/test_i8
// RUN: cd %S/test_i8
// RUN: mkdir -p test_i8
// RUN: cd test_i8
// RUN: make -f %S/Makefile clean
// RUN: env dtype_in=i8 dtype_out=i8 m=64 k=128 n=64 M=512 K=512 N=512 make -f %S/Makefile
// RUN: %run_on_npu env dtype_in=i8 dtype_out=i8 m=64 k=128 n=64 M=512 K=512 N=512 make -f %S/Makefile run | FileCheck %s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
//
// REQUIRES: ryzen_ai, peano
//
// RUN: mkdir -p %S/test_1_col
// RUN: cd %S/test_1_col
// RUN: mkdir -p test_1_col
// RUN: cd test_1_col
// RUN: make -f %S/Makefile clean
// RUN: env n_aie_cols=1 make -f %S/Makefile
// RUN: %run_on_npu env n_aie_cols=2 make -f %S/Makefile run | FileCheck %s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
//
// REQUIRES: ryzen_ai, peano
//
// RUN: mkdir -p %S/test_2_col
// RUN: cd %S/test_2_col
// RUN: mkdir -p test_2_col
// RUN: cd test_2_col
// RUN: make -f %S/Makefile clean
// RUN: env n_aie_cols=2 make -f %S/Makefile
// RUN: %run_on_npu env n_aie_cols=2 make -f %S/Makefile run | FileCheck %s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
//
// REQUIRES: ryzen_ai, peano
//
// RUN: mkdir -p %S/test_4_col
// RUN: cd %S/test_4_col
// RUN: mkdir -p test_4_col
// RUN: cd test_4_col
// RUN: make -f %S/Makefile clean
// RUN: env n_aie_cols=4 make -f %S/Makefile
// RUN: %run_on_npu env n_aie_cols=4 make -f %S/Makefile run | FileCheck %s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
//
// REQUIRES: ryzen_ai, peano
//
// RUN: mkdir -p %S/test_4_col_i8
// RUN: cd %S/test_4_col_i8
// RUN: mkdir -p test_4_col_i8
// RUN: cd test_4_col_i8
// RUN: make -f %S/Makefile clean
// RUN: env n_aie_cols=4 dtype_in=i8 dtype_out=i8 M=512 K=512 N=512 m=64 k=128 n=64 make -f %S/Makefile
// RUN: %run_on_npu env n_aie_cols=4 dtype_in=i8 dtype_out=i8 M=512 K=512 N=512 m=64 k=128 n=64 make -f %S/Makefile run | FileCheck %s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
//
// REQUIRES: ryzen_ai, chess
//
// RUN: mkdir -p test_chess
// RUN: cd test_chess
// RUN: make -f %S/Makefile.chess clean
// RUN: make -f %S/Makefile.chess
// RUN: %run_on_npu make -f %S/Makefile.chess run | FileCheck %s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
//
// REQUIRES: ryzen_ai, peano
//
// RUN: mkdir -p %S/test_b_col_maj
// RUN: cd %S/test_b_col_maj
// RUN: mkdir -p test_b_col_maj
// RUN: cd test_b_col_maj
// RUN: make -f %S/Makefile clean
// RUN: env n_aie_cols=4 b_col_maj=1 dtype_in=bf16 dtype_out=f32 M=256 K=256 N=256 m=32 k=32 n=32 make -f %S/Makefile
// RUN: %run_on_npu env n_aie_cols=4 b_col_maj=1 dtype_in=bf16 dtype_out=f32 M=256 K=256 N=256 m=32 k=32 n=32 make -f %S/Makefile run | FileCheck %s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ class object_fifo_link(ObjectFifoLinkOp):
self,
fifoIns,
fifoOuts,
srcOffsets=[],
dstOffsets=[],
)
```
A link allows the user to specify a set of input Object FIFOs via the `fifoIns` input and a set of output ones via the `fifoOuts` input. Each Object FIFO may be specified either using its `name` or its Python object. Both inputs can be either a single Object FIFO or an array of them. It is required that there exists at least one shared tile between the consumer tiles of `fifoIns` and the producer tiles of `fifoOuts` for a link to be valid. This is because the implicit copy of data will be done using the Data Movement Accelerators (DMAs) of that tile.
Expand All @@ -47,16 +49,17 @@ Currently, the Object FIFO lowering uses the order in which the output FIFOs are

<img src="./../../../assets/Distribute.png" height="200">

The following code snippet describes the figure above. There are three Object FIFOs: `of0` has a producer tile A and a consumer tile B, while `of1` and `of2` have B as their producer tile and C and D respectively as their consumer tiles. The link specifies that data from `of0` is distributed to `of1` and `of2`. In this link, B is the shared tile where the implicit data copy will take place via B's DMAs. We can also note how `of1` and `of2`'s datatypes are half of `of0`'s, which means that the first half of objects in `of0` will go to `of1` and the second half to `of2`, based on their order in the link.
The following code snippet describes the figure above. There are three Object FIFOs: `of0` has a producer tile A and a consumer tile B, while `of1` and `of2` have B as their producer tile and C and D respectively as their consumer tiles. The link specifies that data from `of0` is distributed to `of1` and `of2`. In this link, B is the shared tile where the implicit data copy will take place via B's DMAs. We can also note how `of1` and `of2`'s datatypes are half of `of0`'s, which means that the first half of objects in `of0` will go to `of1` and the second half to `of2`, based on their order in the link. This is explicitly set by specifying the `dstOffsets` option on the link.

```python
A = tile(1, 0)
B = tile(1, 1)
C = tile(1, 3)
D = tile(2, 3)
of0 = object_fifo("objfifo0", A, B, 2, np.ndarray[(256,), np.dtype[np.int32]])
of1 = object_fifo("objfifo1", B, C, 2, np.ndarray[(256,), np.dtype[np.int32]])
of2 = object_fifo("objfifo2", B, D, 2, np.ndarray[(256,), np.dtype[np.int32]])
object_fifo_link(of0, [of1, of2])
of1 = object_fifo("objfifo1", B, C, 2, np.ndarray[(128,), np.dtype[np.int32]])
of2 = object_fifo("objfifo2", B, D, 2, np.ndarray[(128,), np.dtype[np.int32]])
object_fifo_link(of0, [of1, of2], [], [0, 128])
```

A full design example that uses this feature is available in Section 2e: [04_distribute_L2](../../section-2e/04_distribute_L2/).
Expand All @@ -76,9 +79,9 @@ B = tile(1, 1)
C = tile(1, 3)
D = tile(2, 3)
of0 = object_fifo("objfifo0", B, A, 2, np.ndarray[(256,), np.dtype[np.int32]])
of1 = object_fifo("objfifo1", C, B, 2, np.ndarray[(256,), np.dtype[np.int32]])
of2 = object_fifo("objfifo2", D, B, 2, np.ndarray[(256,), np.dtype[np.int32]])
object_fifo_link([of1, of2], of0)
of1 = object_fifo("objfifo1", C, B, 2, np.ndarray[(128,), np.dtype[np.int32]])
of2 = object_fifo("objfifo2", D, B, 2, np.ndarray[(128,), np.dtype[np.int32]])
object_fifo_link([of1, of2], of0, [0, 128], [])
```

A full design example that uses these features is available in Section 2e: [05_join_L2](../../section-2e/05_join_L2/).
Expand Down
10 changes: 5 additions & 5 deletions programming_guide/section-2/section-2f/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ tile_a = tile(1, 3)

prod_lock = lock(tile_a, lock_id=0, init=1)
cons_lock = lock(tile_a, lock_id=1, init=0)
buff_in = buffer(tile=tile_a, shape=(256,), dtype=np.int32) # 256xi32
buff_in = buffer(tile=tile_a, datatype=np.ndarray[(256,), np.dtype[np.int32]]) # 256xi32

@mem(tile_a)
def mem_body():
Expand All @@ -78,8 +78,8 @@ tile_a = tile(1, 3)

prod_lock = lock(tile_a, lock_id=0, init=2) # note that the producer lock now has 2 tokens
cons_lock = lock(tile_a, lock_id=1, init=0)
buff_ping = buffer(tile=tile_a, shape=(256,), dtype=np.int32) # 256xi32
buff_pong = buffer(tile=tile_a, shape=(256,), dtype=np.int32) # 256xi32
buff_ping = buffer(tile=tile_a, datatype=np.ndarray[(256,), np.dtype[np.int32]]) # 256xi32
buff_pong = buffer(tile=tile_a, datatype=np.ndarray[(256,), np.dtype[np.int32]]) # 256xi32

@mem(tile_a)
def mem_body():
Expand Down Expand Up @@ -130,11 +130,11 @@ tile_b = tile(1, 3)

prod_lock_a = lock(tile_a, lock_id=0, init=1)
cons_lock_a = lock(tile_a, lock_id=1, init=0)
buff_a = buffer(tile=tile_a, shape=(256,), dtype=np.int32) # 256xi32
buff_a = buffer(tile=tile_a, np.ndarray[(256,), np.dtype[np.int32]]) # 256xi32

prod_lock_b = lock(tile_b, lock_id=0, init=1)
cons_lock_b = lock(tile_b, lock_id=1, init=0)
buff_b = buffer(tile=tile_b, shape=(256,), dtype=np.int32) # 256xi32
buff_b = buffer(tile=tile_b, np.ndarray[(256,), np.dtype[np.int32]]) # 256xi32

aie.flow(tile_a, WireBundle.DMA, 0, tile_b, WireBundle.DMA, 1)

Expand Down

0 comments on commit 87df9a7

Please sign in to comment.