Skip to content

Commit

Permalink
[aievec] Add new shuffle ops
Browse files Browse the repository at this point in the history
This replaces the old `aievec.shuffle` op with a new one with a better
syntax that supports all cases supported by the intrinsics, and has
strong type guarantees.

For legacy purpose, we leave the old shuffle instruction renamed as
`aievec.legacyshuffle`.
  • Loading branch information
jsetoain committed May 28, 2024
1 parent 555f014 commit 362ad18
Show file tree
Hide file tree
Showing 18 changed files with 664 additions and 28 deletions.
96 changes: 96 additions & 0 deletions include/aie/Dialect/AIEVec/IR/AIEVecAttributes.td
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
//===- AIEVecAttributes.td - AIE vector attributes def. ----*- tablegen -*-====//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
// Defines AIE vector operations.
//===----------------------------------------------------------------------===//

#ifndef AIE_DIALECT_AIEVEC_IR_AIEVECATTRIBUTES_TD
#define AIE_DIALECT_AIEVEC_IR_AIEVECATTRIBUTES_TD

include "aie/Dialect/AIEVec/IR/AIEVecDialect.td"
include "mlir/IR/EnumAttr.td"

// Shuffle modes for shuffle ops.
def SHUFFLE_MODE_T8_64X2_LO : I32EnumAttrCase<"T8_64X2_LO", 0, "t8_64x2_lo">;
def SHUFFLE_MODE_T8_64X2_HI : I32EnumAttrCase<"T8_64X2_HI", 1, "t8_64x2_hi">;
def SHUFFLE_MODE_T16_32X2_LO : I32EnumAttrCase<"T16_32X2_LO", 2, "t16_32x2_lo">;
def SHUFFLE_MODE_T16_32X2_HI : I32EnumAttrCase<"T16_32X2_HI", 3, "t16_32x2_hi">;
def SHUFFLE_MODE_T32_16X2_LO : I32EnumAttrCase<"T32_16X2_LO", 4, "t32_16x2_lo">;
def SHUFFLE_MODE_T32_16X2_HI : I32EnumAttrCase<"T32_16X2_HI", 5, "t32_16x2_hi">;
def SHUFFLE_MODE_T64_8X2_LO : I32EnumAttrCase<"T64_8X2_LO", 6, "t64_8x2_lo">;
def SHUFFLE_MODE_T64_8X2_HI : I32EnumAttrCase<"T64_8X2_HI", 7, "t64_8x2_hi">;
def SHUFFLE_MODE_T128_4X2_LO : I32EnumAttrCase<"T128_4X2_LO", 8, "t128_4x2_lo">;
def SHUFFLE_MODE_T128_4X2_HI : I32EnumAttrCase<"T128_4X2_HI", 9, "t128_4x2_hi">;
def SHUFFLE_MODE_T256_2X2_LO : I32EnumAttrCase<"T256_2X2_LO", 10, "t256_2x2_lo">;
def SHUFFLE_MODE_T256_2X2_HI : I32EnumAttrCase<"T256_2X2_HI", 11, "t256_2x2_hi">;
def SHUFFLE_MODE_T128_2X4_LO : I32EnumAttrCase<"T128_2X4_LO", 12, "t128_2x4_lo">;
def SHUFFLE_MODE_T128_2X4_HI : I32EnumAttrCase<"T128_2X4_HI", 13, "t128_2x4_hi">;
def SHUFFLE_MODE_T64_2X8_LO : I32EnumAttrCase<"T64_2X8_LO", 14, "t64_2x8_lo">;
def SHUFFLE_MODE_T64_2X8_HI : I32EnumAttrCase<"T64_2X8_HI", 15, "t64_2x8_hi">;
def SHUFFLE_MODE_T32_2X16_LO : I32EnumAttrCase<"T32_2X16_LO", 16, "t32_2x16_lo">;
def SHUFFLE_MODE_T32_2X16_HI : I32EnumAttrCase<"T32_2X16_HI", 17, "t32_2x16_hi">;
def SHUFFLE_MODE_T16_2X32_LO : I32EnumAttrCase<"T16_2X32_LO", 18, "t16_2x32_lo">;
def SHUFFLE_MODE_T16_2X32_HI : I32EnumAttrCase<"T16_2X32_HI", 19, "t16_2x32_hi">;
def SHUFFLE_MODE_T8_2X64_LO : I32EnumAttrCase<"T8_2X64_LO", 20, "t8_2x64_lo">;
def SHUFFLE_MODE_T8_2X64_HI : I32EnumAttrCase<"T8_2X64_HI", 21, "t8_2x64_hi">;
def SHUFFLE_MODE_T512_1X2_LO : I32EnumAttrCase<"T512_1X2_LO", 22, "t512_1x2_lo">;
def SHUFFLE_MODE_T512_1X2_HI : I32EnumAttrCase<"T512_1X2_HI", 23, "t512_1x2_hi">;
def SHUFFLE_MODE_T16_16X4_LO : I32EnumAttrCase<"T16_16X4_LO", 24, "t16_16x4_lo">;
def SHUFFLE_MODE_T16_16X4_HI : I32EnumAttrCase<"T16_16X4_HI", 25, "t16_16x4_hi">;
def SHUFFLE_MODE_T16_4X16_LO : I32EnumAttrCase<"T16_4X16_LO", 26, "t16_4x16_lo">;
def SHUFFLE_MODE_T16_4X16_HI : I32EnumAttrCase<"T16_4X16_HI", 27, "t16_4x16_hi">;
def SHUFFLE_MODE_T16_8X4 : I32EnumAttrCase<"T16_8X4", 28, "t16_8x4">;
def SHUFFLE_MODE_T16_4X8 : I32EnumAttrCase<"T16_4X8", 29, "t16_4x8">;
def SHUFFLE_MODE_T32_8X4_LO : I32EnumAttrCase<"T32_8X4_LO", 30, "t32_8x4_lo">;
def SHUFFLE_MODE_T32_8X4_HI : I32EnumAttrCase<"T32_8X4_HI", 31, "t32_8x4_hi">;
def SHUFFLE_MODE_T32_4X8_LO : I32EnumAttrCase<"T32_4X8_LO", 32, "t32_4x8_lo">;
def SHUFFLE_MODE_T32_4X8_HI : I32EnumAttrCase<"T32_4X8_HI", 33, "t32_4x8_hi">;
def SHUFFLE_MODE_T32_4X4 : I32EnumAttrCase<"T32_4X4", 34, "t32_4x4">;
def SHUFFLE_MODE_T8_8X8 : I32EnumAttrCase<"T8_8X8", 35, "t8_8x8">;
def SHUFFLE_MODE_T8_16X4 : I32EnumAttrCase<"T8_16X4", 36, "t8_16x4">;
def SHUFFLE_MODE_T8_4X16 : I32EnumAttrCase<"T8_4X16", 37, "t8_4x16">;
def SHUFFLE_MODE_T16_1X2_flip : I32EnumAttrCase<"T16_1X2_flip", 38, "t16_1x2_flip">;
def SHUFFLE_MODE_T16_4X4 : I32EnumAttrCase<"T16_4X4", 39, "t16_4x4">;
def SHUFFLE_MODE_T16_4X2 : I32EnumAttrCase<"T16_4X2", 40, "t16_4x2">;
def SHUFFLE_MODE_T16_2X4 : I32EnumAttrCase<"T16_2X4", 41, "t16_2x4">;
def SHUFFLE_MODE_T16_8X2 : I32EnumAttrCase<"T16_8X2", 42, "t16_8x2">;
def SHUFFLE_MODE_T16_2X8 : I32EnumAttrCase<"T16_2X8", 43, "t16_2x8">;
def SHUFFLE_MODE_T16_16X2 : I32EnumAttrCase<"T16_16X2", 44, "t16_16x2">;
def SHUFFLE_MODE_T16_2X16 : I32EnumAttrCase<"T16_2X16", 45, "t16_2x16">;
def SHUFFLE_MODE_T8_8X4 : I32EnumAttrCase<"T8_8X4", 46, "t8_8x4">;
def SHUFFLE_MODE_T8_4X8 : I32EnumAttrCase<"T8_4X8", 47, "t8_4x8">;

def ShuffleMode : I32EnumAttr<
"ShuffleMode",
"Shuffle mode for AIEVec shuffle operations",
[SHUFFLE_MODE_T8_64X2_LO, SHUFFLE_MODE_T8_64X2_HI, SHUFFLE_MODE_T16_32X2_LO,
SHUFFLE_MODE_T16_32X2_HI, SHUFFLE_MODE_T32_16X2_LO, SHUFFLE_MODE_T32_16X2_HI,
SHUFFLE_MODE_T64_8X2_LO, SHUFFLE_MODE_T64_8X2_HI, SHUFFLE_MODE_T128_4X2_LO,
SHUFFLE_MODE_T128_4X2_HI, SHUFFLE_MODE_T256_2X2_LO, SHUFFLE_MODE_T256_2X2_HI,
SHUFFLE_MODE_T128_2X4_LO, SHUFFLE_MODE_T128_2X4_HI, SHUFFLE_MODE_T64_2X8_LO,
SHUFFLE_MODE_T64_2X8_HI, SHUFFLE_MODE_T32_2X16_LO, SHUFFLE_MODE_T32_2X16_HI,
SHUFFLE_MODE_T16_2X32_LO, SHUFFLE_MODE_T16_2X32_HI, SHUFFLE_MODE_T8_2X64_LO,
SHUFFLE_MODE_T8_2X64_HI, SHUFFLE_MODE_T512_1X2_LO, SHUFFLE_MODE_T512_1X2_HI,
SHUFFLE_MODE_T16_16X4_LO, SHUFFLE_MODE_T16_16X4_HI, SHUFFLE_MODE_T16_4X16_LO,
SHUFFLE_MODE_T16_4X16_HI, SHUFFLE_MODE_T16_8X4, SHUFFLE_MODE_T16_4X8,
SHUFFLE_MODE_T32_8X4_LO, SHUFFLE_MODE_T32_8X4_HI, SHUFFLE_MODE_T32_4X8_LO,
SHUFFLE_MODE_T32_4X8_HI, SHUFFLE_MODE_T32_4X4, SHUFFLE_MODE_T8_8X8,
SHUFFLE_MODE_T8_16X4, SHUFFLE_MODE_T8_4X16, SHUFFLE_MODE_T16_1X2_flip,
SHUFFLE_MODE_T16_4X4, SHUFFLE_MODE_T16_4X2, SHUFFLE_MODE_T16_2X4,
SHUFFLE_MODE_T16_8X2, SHUFFLE_MODE_T16_2X8, SHUFFLE_MODE_T16_16X2,
SHUFFLE_MODE_T16_2X16, SHUFFLE_MODE_T8_8X4, SHUFFLE_MODE_T8_4X8]> {
let cppNamespace = "::xilinx::aievec";
let genSpecializedAttr = 0;
}

def AIEVec_ShuffleModeAttr : EnumAttr<AIEVec_Dialect, ShuffleMode, "mode"> {
let assemblyFormat = "`[` $value `]`";
}

#endif // AIE_DIALECT_AIEVEC_IR_AIEVECATTRIBUTES_TD
5 changes: 4 additions & 1 deletion include/aie/Dialect/AIEVec/IR/AIEVecDialect.td
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2022 Xilinx Inc.
// (c) Copyright 2022-2024 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
// Defines AIE vector dialect.
Expand All @@ -19,6 +19,9 @@ def AIEVec_Dialect : Dialect {
let name = "aievec";
let summary = "Types and operations for AIE vector dialect";
let cppNamespace = "::xilinx::aievec";

let useDefaultAttributePrinterParser = 1;

let extraClassDeclaration = [{
void registerTypes();
}];
Expand Down
6 changes: 5 additions & 1 deletion include/aie/Dialect/AIEVec/IR/AIEVecOps.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2022 Xilinx Inc.
// (c) Copyright 2022-2024 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
// This file defines the AIE vector dialect and the operations.
Expand All @@ -17,6 +17,10 @@
#include "mlir/Interfaces/InferTypeOpInterface.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"

#include "aie/Dialect/AIEVec/IR/AIEVecEnums.h.inc"
#define GET_ATTRDEF_CLASSES
#include "aie/Dialect/AIEVec/IR/AIEVecAttributes.h.inc"

#include "AIEVecDialect.h"

#define GET_OP_CLASSES
Expand Down
156 changes: 153 additions & 3 deletions include/aie/Dialect/AIEVec/IR/AIEVecOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2023 AMD Inc.
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
// Defines AIE vector operations.
Expand All @@ -14,6 +14,7 @@
#define AIEVEC_OPS

include "aie/Dialect/AIE/IR/AIEAttrs.td"
include "aie/Dialect/AIEVec/IR/AIEVecAttributes.td"
include "aie/Dialect/AIEVec/IR/AIEVecTypes.td"
include "aie/Dialect/AIEVec/IR/AIEVecTypeConstraints.td"

Expand Down Expand Up @@ -587,8 +588,8 @@ def AIEVec_ShiftOp:
}];
}

def AIEVec_ShuffleOp:
AIEVec_Op<"shuffle", [
def AIEVec_LegacyShuffleOp:
AIEVec_Op<"legacyshuffle", [
Pure
]>,
Arguments<(ins AnyVector:$source,
Expand Down Expand Up @@ -872,4 +873,153 @@ def AIEVec_MatMulOp:
let hasVerifier = 0;
}

def AIEVec_ShuffleOp : AIEVec_Op<"shuffle",
[Pure, AllTypesMatch<["lhs", "result"]>,
OptionalTypesMatchWith<"result and rhs have the same type", "result", "rhs",
"::llvm::cast<Type>($_self)">]>,
Arguments<(ins VectorOfBitWidthAndElementTypes<
512, [I8, I16, I32, I64, I128, I256,
I512, BF16, F32]>:$lhs,
Optional<VectorOfBitWidthAndElementTypes<
512, [I8, I16, I32, I64, I128, I256,
I512, BF16, F32]>>:$rhs,
AIEVec_ShuffleModeAttr:$mode)>,
Results<(outs AnyVector:$result)> {
let summary = "AIE2 shuffle";
let description = [{
AMD AIEv2-specific vector shuffle. It performs a shuffle of the elements of
1 or 2 input vectors using the specified shuffle mode. The shuffle mode is
specified as:

`t<width>_<r>x<c>(_(hi|lo))?`

where `<width>` is the bitwidth of the vector element type, `<r>` and `<c>`
are the number of rows and columns that will be transposed to perform the
shuffle, and, for modes that require two 512-bit vectors, `hi` and `lo`
indicate which part of the resulting extended 1024-bit vector will be
assembled and returned.

E.g.: `t32_4x8` would take two 512-bit vectors, `lhs` and `rhs`, with 16
elements of 32 bits each. The resulting vector would contain either the
least (`lo`) or most (`hi`) significant 16 elements of the 32 element vector
that would result from selecting, out of the concatenated vectors `lhs:rhs`,
8 blocks of 4 elements, each block taking one of every 8 elements starting
from the block index.

That is, for two `vector<16xi32>` operands containing:
```
lhs = [0, 1, 2, 3, ..., 15]
rhs = [17, 18, 19, 20, ..., 31]
```

The first 8 blocks would be:
```
b0 = [0, 8, 16, 24]
b1 = [1, 9, 17, 25]
b2 = [2, 10, 18, 26]
b3 = [3, 11, 19, 27]
...
b7 = [7, 15, 23, 31]
```

`t32_4x8_lo` would return first four blocks:
```
result = [0, 8, 16, 24, 1, 9, 17, 25, ..., 3, 11, 19, 27]
```

And `t32_4x8_hi` would return the last four blocks:
```
result = [4, 12, 20, 28, 5, 13, 21, 29, ..., 7, 15, 24, 31]
```

It can be seen as flattened 4x8 matrix, split in two 16-element halfs, being
tranposed to a 8x4 arrangement. In the example above:

```
lhs = [ 0, 1, 2, 3, 4, 5, 6, 7]
[ 8, 9, 10, 11, 12, 13, 14, 15]
rhs = [16, 17, 18, 19, 20, 21, 22, 23]
[24, 25, 26, 27, 28, 29, 30, 31]
```

Would result in:
```
t32_4x8_lo = [0, 8, 16, 24]
[1, 9, 17, 25]
[2, 10, 18, 26]
[3, 11, 19, 27]
t32_4x8_hi = [4, 12, 20, 28]
[5, 13, 21, 29]
[6, 14, 22, 30]
[7, 15, 23, 31]
```

A special mode, `t16_1x2_flip`, swaps each pair of elements in a vector with
32 16-bit elements. E.g.:
```
lhs = [0, 1, 2, 3, ..., 28, 29, 30, 31]
```
Would result in:
```
t16_1x2_flip = [1, 0, 3, 2, ..., 29, 28, 31, 30]
```

The list of supported shuffle modes, required operands, and associated
vector types are the following:

Shuffle Mode | Operands | Types Supported
:------------------:|:------------------:|:------------------:
t8_8x4 | `lhs` | `vector<2x32xi8>`
t8_4x8 | ^ | ^
t8_8x8 | ^ | `vector<64xi8>`
t8_16x4 | ^ | ^
t8_4x16 | ^ | ^
t8_64x2_lo | `lhs` & `rhs` | ^
t8_64x2_hi | ^ | ^
t8_2x64_lo | ^ | ^
t8_2x64_hi | ^ | ^
t16_4x2 | `lhs` | `vector<4x8xi16>` or `vector<4x8xbf16>`
t16_2x4 | ^ | ^
t16_4x4 | ^ | `vector<2x16xi16>` or `vector<2x16xbf16>`
t16_8x2 | ^ | ^
t16_2x8 | ^ | ^
t16_8x4 | ^ | `vector<32xi16>` or `vector<32xbf16>
t16_4x8 | ^ | ^
t16_16x2 | ^ | ^
t16_2x16 | ^ | ^
t16_1x2_flip | ^ | ^
t16_32x2_lo | `lhs` & `rhs` | ^
t16_32x2_hi | ^ | ^
t16_2x32_lo | ^ | ^
t16_2x32_hi | ^ | ^
t16_16x4_lo | ^ | ^
t16_16x4_hi | ^ | ^
t16_4x16_lo | ^ | ^
t16_4x16_hi | ^ | ^
t32_4x4 | `lhs` | `vector<16xi32>` or `vector<16xf32>`
t32_16x2_lo | `lhs` & `rhs` | ^
t32_16x2_hi | ^ | ^
t32_2x16_lo | ^ | ^
t32_2x16_hi | ^ | ^
t32_8x4_lo | ^ | ^
t32_8x4_hi | ^ | ^
t32_4x8_lo | ^ | ^
t32_4x8_hi | ^ | ^
t64_8x2_lo | `lhs` & `rhs` | `vector<8xi64>`
t64_8x2_hi | ^ | ^
t64_2x8_lo | ^ | ^
t64_2x8_hi | ^ | ^
t128_4x2_lo | ^ | `vector<4xi128>`
t128_4x2_hi | ^ | ^
t128_2x4_lo | ^ | ^
t128_2x4_hi | ^ | ^
t256_2x2_lo | ^ | `vector<2xi256>`
t256_2x2_hi | ^ | ^
t512_1x2_lo | ^ | `vector<1xi512>`
t512_1x2_hi | ^ | ^
}];
let assemblyFormat = [{$lhs (`,` $rhs^)? $mode attr-dict `:` type($result)}];
let hasVerifier = 1;
}

#endif // AIEVEC_OPS
2 changes: 2 additions & 0 deletions include/aie/Dialect/AIEVec/IR/AIEVecTypeConstraints.td
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ include "mlir/IR/BuiltinTypes.td"
include "mlir/IR/OpBase.td"

def I4 : I<4>;
def I256 : I<256>;
def I512 : I<512>;

class TypeShape<string name> :
StrFunc<"cast<::mlir::ShapedType>($" # name # ").getShape()">;
Expand Down
2 changes: 1 addition & 1 deletion include/aie/Dialect/AIEVec/IR/AIEVecTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2022 Xilinx Inc.
// (c) Copyright 2022 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//

Expand Down
10 changes: 9 additions & 1 deletion include/aie/Dialect/AIEVec/IR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,16 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2022 Xilinx Inc.
# (c) Copyright 2022-2024 Advanced Micro Devices, Inc. or its affiliates

add_mlir_dialect(AIEVecOps aievec)
add_mlir_doc(AIEVecOps AIEVecDialect ./ -gen-dialect-doc -dialect=aievec)

# Add AIEVec attributes
set(LLVM_TARGET_DEFINITIONS AIEVecAttributes.td)
mlir_tablegen(AIEVecEnums.h.inc -gen-enum-decls)
mlir_tablegen(AIEVecEnums.cpp.inc -gen-enum-defs)
mlir_tablegen(AIEVecAttributes.h.inc -gen-attrdef-decls)
mlir_tablegen(AIEVecAttributes.cpp.inc -gen-attrdef-defs)
add_public_tablegen_target(MLIRAIEVecAttributesIncGen)
add_dependencies(mlir-generic-headers MLIRAIEVecAttributesIncGen)
Loading

0 comments on commit 362ad18

Please sign in to comment.