From 89c8fe5dfd8328df9cfc352438457883e2b94b45 Mon Sep 17 00:00:00 2001 From: Javier Setoain Date: Thu, 30 May 2024 12:21:40 +0200 Subject: [PATCH] [aievec] Add new shuffle ops (#1516) --- .../aie/Dialect/AIEVec/IR/AIEVecAttributes.td | 96 +++++++++ .../aie/Dialect/AIEVec/IR/AIEVecDialect.td | 5 +- include/aie/Dialect/AIEVec/IR/AIEVecOps.h | 6 +- include/aie/Dialect/AIEVec/IR/AIEVecOps.td | 156 +++++++++++++- .../AIEVec/IR/AIEVecTypeConstraints.td | 2 + include/aie/Dialect/AIEVec/IR/AIEVecTypes.h | 2 +- include/aie/Dialect/AIEVec/IR/CMakeLists.txt | 10 +- lib/Dialect/AIEVec/IR/AIEVecOps.cpp | 122 ++++++++++- lib/Dialect/AIEVec/IR/CMakeLists.txt | 1 + .../AIEVec/Transforms/AIEVectorize.cpp | 10 +- .../Transforms/FoldMulAddChainToConvOp.cpp | 9 +- .../AIEVecToCpp/TranslateAIEVecToCpp.cpp | 34 ++- .../VectorToAIEVec/test-conv-op-i8-init.mlir | 2 +- .../VectorToAIEVec/test-conv-op-i8.mlir | 2 +- test/aievec/conv2d_i8_after_polygeist.mlir | 2 +- test/aievec/conv2d_uij_i8_noinit_aie-ml.mlir | 2 +- test/dialect/AIEVec/invalid.mlir | 27 +++ test/dialect/AIEVec/roundtrip.mlir | 204 ++++++++++++++++++ 18 files changed, 664 insertions(+), 28 deletions(-) create mode 100644 include/aie/Dialect/AIEVec/IR/AIEVecAttributes.td diff --git a/include/aie/Dialect/AIEVec/IR/AIEVecAttributes.td b/include/aie/Dialect/AIEVec/IR/AIEVecAttributes.td new file mode 100644 index 0000000000..87f2e4f4f2 --- /dev/null +++ b/include/aie/Dialect/AIEVec/IR/AIEVecAttributes.td @@ -0,0 +1,96 @@ +//===- AIEVecAttributes.td - AIE vector attributes def. ----*- tablegen -*-====// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +// +//===----------------------------------------------------------------------===// +// Defines AIE vector operations. +//===----------------------------------------------------------------------===// + +#ifndef AIE_DIALECT_AIEVEC_IR_AIEVECATTRIBUTES_TD +#define AIE_DIALECT_AIEVEC_IR_AIEVECATTRIBUTES_TD + +include "aie/Dialect/AIEVec/IR/AIEVecDialect.td" +include "mlir/IR/EnumAttr.td" + +// Shuffle modes for shuffle ops. +def SHUFFLE_MODE_T8_64X2_LO : I32EnumAttrCase<"T8_64X2_LO", 0, "t8_64x2_lo">; +def SHUFFLE_MODE_T8_64X2_HI : I32EnumAttrCase<"T8_64X2_HI", 1, "t8_64x2_hi">; +def SHUFFLE_MODE_T16_32X2_LO : I32EnumAttrCase<"T16_32X2_LO", 2, "t16_32x2_lo">; +def SHUFFLE_MODE_T16_32X2_HI : I32EnumAttrCase<"T16_32X2_HI", 3, "t16_32x2_hi">; +def SHUFFLE_MODE_T32_16X2_LO : I32EnumAttrCase<"T32_16X2_LO", 4, "t32_16x2_lo">; +def SHUFFLE_MODE_T32_16X2_HI : I32EnumAttrCase<"T32_16X2_HI", 5, "t32_16x2_hi">; +def SHUFFLE_MODE_T64_8X2_LO : I32EnumAttrCase<"T64_8X2_LO", 6, "t64_8x2_lo">; +def SHUFFLE_MODE_T64_8X2_HI : I32EnumAttrCase<"T64_8X2_HI", 7, "t64_8x2_hi">; +def SHUFFLE_MODE_T128_4X2_LO : I32EnumAttrCase<"T128_4X2_LO", 8, "t128_4x2_lo">; +def SHUFFLE_MODE_T128_4X2_HI : I32EnumAttrCase<"T128_4X2_HI", 9, "t128_4x2_hi">; +def SHUFFLE_MODE_T256_2X2_LO : I32EnumAttrCase<"T256_2X2_LO", 10, "t256_2x2_lo">; +def SHUFFLE_MODE_T256_2X2_HI : I32EnumAttrCase<"T256_2X2_HI", 11, "t256_2x2_hi">; +def SHUFFLE_MODE_T128_2X4_LO : I32EnumAttrCase<"T128_2X4_LO", 12, "t128_2x4_lo">; +def SHUFFLE_MODE_T128_2X4_HI : I32EnumAttrCase<"T128_2X4_HI", 13, "t128_2x4_hi">; +def SHUFFLE_MODE_T64_2X8_LO : I32EnumAttrCase<"T64_2X8_LO", 14, "t64_2x8_lo">; +def SHUFFLE_MODE_T64_2X8_HI : I32EnumAttrCase<"T64_2X8_HI", 15, "t64_2x8_hi">; +def SHUFFLE_MODE_T32_2X16_LO : I32EnumAttrCase<"T32_2X16_LO", 16, "t32_2x16_lo">; +def SHUFFLE_MODE_T32_2X16_HI : I32EnumAttrCase<"T32_2X16_HI", 17, "t32_2x16_hi">; +def SHUFFLE_MODE_T16_2X32_LO : I32EnumAttrCase<"T16_2X32_LO", 18, "t16_2x32_lo">; +def SHUFFLE_MODE_T16_2X32_HI : I32EnumAttrCase<"T16_2X32_HI", 19, "t16_2x32_hi">; +def SHUFFLE_MODE_T8_2X64_LO : I32EnumAttrCase<"T8_2X64_LO", 20, "t8_2x64_lo">; +def SHUFFLE_MODE_T8_2X64_HI : I32EnumAttrCase<"T8_2X64_HI", 21, "t8_2x64_hi">; +def SHUFFLE_MODE_T512_1X2_LO : I32EnumAttrCase<"T512_1X2_LO", 22, "t512_1x2_lo">; +def SHUFFLE_MODE_T512_1X2_HI : I32EnumAttrCase<"T512_1X2_HI", 23, "t512_1x2_hi">; +def SHUFFLE_MODE_T16_16X4_LO : I32EnumAttrCase<"T16_16X4_LO", 24, "t16_16x4_lo">; +def SHUFFLE_MODE_T16_16X4_HI : I32EnumAttrCase<"T16_16X4_HI", 25, "t16_16x4_hi">; +def SHUFFLE_MODE_T16_4X16_LO : I32EnumAttrCase<"T16_4X16_LO", 26, "t16_4x16_lo">; +def SHUFFLE_MODE_T16_4X16_HI : I32EnumAttrCase<"T16_4X16_HI", 27, "t16_4x16_hi">; +def SHUFFLE_MODE_T16_8X4 : I32EnumAttrCase<"T16_8X4", 28, "t16_8x4">; +def SHUFFLE_MODE_T16_4X8 : I32EnumAttrCase<"T16_4X8", 29, "t16_4x8">; +def SHUFFLE_MODE_T32_8X4_LO : I32EnumAttrCase<"T32_8X4_LO", 30, "t32_8x4_lo">; +def SHUFFLE_MODE_T32_8X4_HI : I32EnumAttrCase<"T32_8X4_HI", 31, "t32_8x4_hi">; +def SHUFFLE_MODE_T32_4X8_LO : I32EnumAttrCase<"T32_4X8_LO", 32, "t32_4x8_lo">; +def SHUFFLE_MODE_T32_4X8_HI : I32EnumAttrCase<"T32_4X8_HI", 33, "t32_4x8_hi">; +def SHUFFLE_MODE_T32_4X4 : I32EnumAttrCase<"T32_4X4", 34, "t32_4x4">; +def SHUFFLE_MODE_T8_8X8 : I32EnumAttrCase<"T8_8X8", 35, "t8_8x8">; +def SHUFFLE_MODE_T8_16X4 : I32EnumAttrCase<"T8_16X4", 36, "t8_16x4">; +def SHUFFLE_MODE_T8_4X16 : I32EnumAttrCase<"T8_4X16", 37, "t8_4x16">; +def SHUFFLE_MODE_T16_1X2_flip : I32EnumAttrCase<"T16_1X2_flip", 38, "t16_1x2_flip">; +def SHUFFLE_MODE_T16_4X4 : I32EnumAttrCase<"T16_4X4", 39, "t16_4x4">; +def SHUFFLE_MODE_T16_4X2 : I32EnumAttrCase<"T16_4X2", 40, "t16_4x2">; +def SHUFFLE_MODE_T16_2X4 : I32EnumAttrCase<"T16_2X4", 41, "t16_2x4">; +def SHUFFLE_MODE_T16_8X2 : I32EnumAttrCase<"T16_8X2", 42, "t16_8x2">; +def SHUFFLE_MODE_T16_2X8 : I32EnumAttrCase<"T16_2X8", 43, "t16_2x8">; +def SHUFFLE_MODE_T16_16X2 : I32EnumAttrCase<"T16_16X2", 44, "t16_16x2">; +def SHUFFLE_MODE_T16_2X16 : I32EnumAttrCase<"T16_2X16", 45, "t16_2x16">; +def SHUFFLE_MODE_T8_8X4 : I32EnumAttrCase<"T8_8X4", 46, "t8_8x4">; +def SHUFFLE_MODE_T8_4X8 : I32EnumAttrCase<"T8_4X8", 47, "t8_4x8">; + +def ShuffleMode : I32EnumAttr< + "ShuffleMode", + "Shuffle mode for AIEVec shuffle operations", + [SHUFFLE_MODE_T8_64X2_LO, SHUFFLE_MODE_T8_64X2_HI, SHUFFLE_MODE_T16_32X2_LO, + SHUFFLE_MODE_T16_32X2_HI, SHUFFLE_MODE_T32_16X2_LO, SHUFFLE_MODE_T32_16X2_HI, + SHUFFLE_MODE_T64_8X2_LO, SHUFFLE_MODE_T64_8X2_HI, SHUFFLE_MODE_T128_4X2_LO, + SHUFFLE_MODE_T128_4X2_HI, SHUFFLE_MODE_T256_2X2_LO, SHUFFLE_MODE_T256_2X2_HI, + SHUFFLE_MODE_T128_2X4_LO, SHUFFLE_MODE_T128_2X4_HI, SHUFFLE_MODE_T64_2X8_LO, + SHUFFLE_MODE_T64_2X8_HI, SHUFFLE_MODE_T32_2X16_LO, SHUFFLE_MODE_T32_2X16_HI, + SHUFFLE_MODE_T16_2X32_LO, SHUFFLE_MODE_T16_2X32_HI, SHUFFLE_MODE_T8_2X64_LO, + SHUFFLE_MODE_T8_2X64_HI, SHUFFLE_MODE_T512_1X2_LO, SHUFFLE_MODE_T512_1X2_HI, + SHUFFLE_MODE_T16_16X4_LO, SHUFFLE_MODE_T16_16X4_HI, SHUFFLE_MODE_T16_4X16_LO, + SHUFFLE_MODE_T16_4X16_HI, SHUFFLE_MODE_T16_8X4, SHUFFLE_MODE_T16_4X8, + SHUFFLE_MODE_T32_8X4_LO, SHUFFLE_MODE_T32_8X4_HI, SHUFFLE_MODE_T32_4X8_LO, + SHUFFLE_MODE_T32_4X8_HI, SHUFFLE_MODE_T32_4X4, SHUFFLE_MODE_T8_8X8, + SHUFFLE_MODE_T8_16X4, SHUFFLE_MODE_T8_4X16, SHUFFLE_MODE_T16_1X2_flip, + SHUFFLE_MODE_T16_4X4, SHUFFLE_MODE_T16_4X2, SHUFFLE_MODE_T16_2X4, + SHUFFLE_MODE_T16_8X2, SHUFFLE_MODE_T16_2X8, SHUFFLE_MODE_T16_16X2, + SHUFFLE_MODE_T16_2X16, SHUFFLE_MODE_T8_8X4, SHUFFLE_MODE_T8_4X8]> { + let cppNamespace = "::xilinx::aievec"; + let genSpecializedAttr = 0; +} + +def AIEVec_ShuffleModeAttr : EnumAttr { + let assemblyFormat = "`[` $value `]`"; +} + +#endif // AIE_DIALECT_AIEVEC_IR_AIEVECATTRIBUTES_TD diff --git a/include/aie/Dialect/AIEVec/IR/AIEVecDialect.td b/include/aie/Dialect/AIEVec/IR/AIEVecDialect.td index 4741fdba97..569995ec63 100644 --- a/include/aie/Dialect/AIEVec/IR/AIEVecDialect.td +++ b/include/aie/Dialect/AIEVec/IR/AIEVecDialect.td @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2022 Xilinx Inc. +// (c) Copyright 2022-2024 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// // Defines AIE vector dialect. @@ -19,6 +19,9 @@ def AIEVec_Dialect : Dialect { let name = "aievec"; let summary = "Types and operations for AIE vector dialect"; let cppNamespace = "::xilinx::aievec"; + + let useDefaultAttributePrinterParser = 1; + let extraClassDeclaration = [{ void registerTypes(); }]; diff --git a/include/aie/Dialect/AIEVec/IR/AIEVecOps.h b/include/aie/Dialect/AIEVec/IR/AIEVecOps.h index b1a26c3c43..1bffeaff19 100644 --- a/include/aie/Dialect/AIEVec/IR/AIEVecOps.h +++ b/include/aie/Dialect/AIEVec/IR/AIEVecOps.h @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2022 Xilinx Inc. +// (c) Copyright 2022-2024 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// // This file defines the AIE vector dialect and the operations. @@ -17,6 +17,10 @@ #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" +#include "aie/Dialect/AIEVec/IR/AIEVecEnums.h.inc" +#define GET_ATTRDEF_CLASSES +#include "aie/Dialect/AIEVec/IR/AIEVecAttributes.h.inc" + #include "AIEVecDialect.h" #define GET_OP_CLASSES diff --git a/include/aie/Dialect/AIEVec/IR/AIEVecOps.td b/include/aie/Dialect/AIEVec/IR/AIEVecOps.td index 3861dc32d6..73a5ef80b7 100644 --- a/include/aie/Dialect/AIEVec/IR/AIEVecOps.td +++ b/include/aie/Dialect/AIEVec/IR/AIEVecOps.td @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2023 AMD Inc. +// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// // Defines AIE vector operations. @@ -14,6 +14,7 @@ #define AIEVEC_OPS include "aie/Dialect/AIE/IR/AIEAttrs.td" +include "aie/Dialect/AIEVec/IR/AIEVecAttributes.td" include "aie/Dialect/AIEVec/IR/AIEVecTypes.td" include "aie/Dialect/AIEVec/IR/AIEVecTypeConstraints.td" @@ -587,8 +588,8 @@ def AIEVec_ShiftOp: }]; } -def AIEVec_ShuffleOp: - AIEVec_Op<"shuffle", [ +def AIEVec_LegacyShuffleOp: + AIEVec_Op<"legacyshuffle", [ Pure ]>, Arguments<(ins AnyVector:$source, @@ -872,4 +873,153 @@ def AIEVec_MatMulOp: let hasVerifier = 0; } +def AIEVec_ShuffleOp : AIEVec_Op<"shuffle", + [Pure, AllTypesMatch<["lhs", "result"]>, + OptionalTypesMatchWith<"result and rhs have the same type", "result", "rhs", + "::llvm::cast($_self)">]>, + Arguments<(ins VectorOfBitWidthAndElementTypes< + 512, [I8, I16, I32, I64, I128, I256, + I512, BF16, F32]>:$lhs, + Optional>:$rhs, + AIEVec_ShuffleModeAttr:$mode)>, + Results<(outs AnyVector:$result)> { + let summary = "AIE2 shuffle"; + let description = [{ + AMD AIEv2-specific vector shuffle. It performs a shuffle of the elements of + 1 or 2 input vectors using the specified shuffle mode. The shuffle mode is + specified as: + + `t_x(_(hi|lo))?` + + where `` is the bitwidth of the vector element type, `` and `` + are the number of rows and columns that will be transposed to perform the + shuffle, and, for modes that require two 512-bit vectors, `hi` and `lo` + indicate which part of the resulting extended 1024-bit vector will be + assembled and returned. + + E.g.: `t32_4x8` would take two 512-bit vectors, `lhs` and `rhs`, with 16 + elements of 32 bits each. The resulting vector would contain either the + least (`lo`) or most (`hi`) significant 16 elements of the 32 element vector + that would result from selecting, out of the concatenated vectors `lhs:rhs`, + 8 blocks of 4 elements, each block taking one of every 8 elements starting + from the block index. + + That is, for two `vector<16xi32>` operands containing: + ``` + lhs = [0, 1, 2, 3, ..., 15] + rhs = [17, 18, 19, 20, ..., 31] + ``` + + The first 8 blocks would be: + ``` + b0 = [0, 8, 16, 24] + b1 = [1, 9, 17, 25] + b2 = [2, 10, 18, 26] + b3 = [3, 11, 19, 27] + ... + b7 = [7, 15, 23, 31] + ``` + + `t32_4x8_lo` would return first four blocks: + ``` + result = [0, 8, 16, 24, 1, 9, 17, 25, ..., 3, 11, 19, 27] + ``` + + And `t32_4x8_hi` would return the last four blocks: + ``` + result = [4, 12, 20, 28, 5, 13, 21, 29, ..., 7, 15, 24, 31] + ``` + + It can be seen as flattened 4x8 matrix, split in two 16-element halfs, being + tranposed to a 8x4 arrangement. In the example above: + + ``` + lhs = [ 0, 1, 2, 3, 4, 5, 6, 7] + [ 8, 9, 10, 11, 12, 13, 14, 15] + rhs = [16, 17, 18, 19, 20, 21, 22, 23] + [24, 25, 26, 27, 28, 29, 30, 31] + ``` + + Would result in: + ``` + t32_4x8_lo = [0, 8, 16, 24] + [1, 9, 17, 25] + [2, 10, 18, 26] + [3, 11, 19, 27] + t32_4x8_hi = [4, 12, 20, 28] + [5, 13, 21, 29] + [6, 14, 22, 30] + [7, 15, 23, 31] + ``` + + A special mode, `t16_1x2_flip`, swaps each pair of elements in a vector with + 32 16-bit elements. E.g.: + ``` + lhs = [0, 1, 2, 3, ..., 28, 29, 30, 31] + ``` + Would result in: + ``` + t16_1x2_flip = [1, 0, 3, 2, ..., 29, 28, 31, 30] + ``` + + The list of supported shuffle modes, required operands, and associated + vector types are the following: + + Shuffle Mode | Operands | Types Supported + :------------------:|:------------------:|:------------------: + t8_8x4 | `lhs` | `vector<64xi8>` + t8_4x8 | ^ | ^ + t8_8x8 | ^ | ^ + t8_16x4 | ^ | ^ + t8_4x16 | ^ | ^ + t8_64x2_lo | `lhs` & `rhs` | ^ + t8_64x2_hi | ^ | ^ + t8_2x64_lo | ^ | ^ + t8_2x64_hi | ^ | ^ + t16_4x2 | `lhs` | `vector<32xi16>` or `vector<32xbf16>` + t16_2x4 | ^ | ^ + t16_4x4 | ^ | ^ + t16_8x2 | ^ | ^ + t16_2x8 | ^ | ^ + t16_8x4 | ^ | ^ + t16_4x8 | ^ | ^ + t16_16x2 | ^ | ^ + t16_2x16 | ^ | ^ + t16_1x2_flip | ^ | ^ + t16_32x2_lo | `lhs` & `rhs` | ^ + t16_32x2_hi | ^ | ^ + t16_2x32_lo | ^ | ^ + t16_2x32_hi | ^ | ^ + t16_16x4_lo | ^ | ^ + t16_16x4_hi | ^ | ^ + t16_4x16_lo | ^ | ^ + t16_4x16_hi | ^ | ^ + t32_4x4 | `lhs` | `vector<16xi32>` or `vector<16xf32>` + t32_16x2_lo | `lhs` & `rhs` | ^ + t32_16x2_hi | ^ | ^ + t32_2x16_lo | ^ | ^ + t32_2x16_hi | ^ | ^ + t32_8x4_lo | ^ | ^ + t32_8x4_hi | ^ | ^ + t32_4x8_lo | ^ | ^ + t32_4x8_hi | ^ | ^ + t64_8x2_lo | ^ | `vector<8xi64>` + t64_8x2_hi | ^ | ^ + t64_2x8_lo | ^ | ^ + t64_2x8_hi | ^ | ^ + t128_4x2_lo | ^ | `vector<4xi128>` + t128_4x2_hi | ^ | ^ + t128_2x4_lo | ^ | ^ + t128_2x4_hi | ^ | ^ + t256_2x2_lo | ^ | `vector<2xi256>` + t256_2x2_hi | ^ | ^ + t512_1x2_lo | ^ | `vector<1xi512>` + t512_1x2_hi | ^ | ^ + }]; + let assemblyFormat = [{$lhs (`,` $rhs^)? $mode attr-dict `:` type($result)}]; + let hasVerifier = 1; +} + #endif // AIEVEC_OPS diff --git a/include/aie/Dialect/AIEVec/IR/AIEVecTypeConstraints.td b/include/aie/Dialect/AIEVec/IR/AIEVecTypeConstraints.td index cf22124fc6..52ba841596 100644 --- a/include/aie/Dialect/AIEVec/IR/AIEVecTypeConstraints.td +++ b/include/aie/Dialect/AIEVec/IR/AIEVecTypeConstraints.td @@ -17,6 +17,8 @@ include "mlir/IR/BuiltinTypes.td" include "mlir/IR/OpBase.td" def I4 : I<4>; +def I256 : I<256>; +def I512 : I<512>; class TypeShape : StrFunc<"cast<::mlir::ShapedType>($" # name # ").getShape()">; diff --git a/include/aie/Dialect/AIEVec/IR/AIEVecTypes.h b/include/aie/Dialect/AIEVec/IR/AIEVecTypes.h index f69a9a77fc..698c6bd584 100644 --- a/include/aie/Dialect/AIEVec/IR/AIEVecTypes.h +++ b/include/aie/Dialect/AIEVec/IR/AIEVecTypes.h @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2022 Xilinx Inc. +// (c) Copyright 2022 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// diff --git a/include/aie/Dialect/AIEVec/IR/CMakeLists.txt b/include/aie/Dialect/AIEVec/IR/CMakeLists.txt index 2d3bd90b90..91e3475555 100644 --- a/include/aie/Dialect/AIEVec/IR/CMakeLists.txt +++ b/include/aie/Dialect/AIEVec/IR/CMakeLists.txt @@ -3,8 +3,16 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2022 Xilinx Inc. +# (c) Copyright 2022-2024 Advanced Micro Devices, Inc. or its affiliates add_mlir_dialect(AIEVecOps aievec) add_mlir_doc(AIEVecOps AIEVecDialect ./ -gen-dialect-doc -dialect=aievec) +# Add AIEVec attributes +set(LLVM_TARGET_DEFINITIONS AIEVecAttributes.td) +mlir_tablegen(AIEVecEnums.h.inc -gen-enum-decls) +mlir_tablegen(AIEVecEnums.cpp.inc -gen-enum-defs) +mlir_tablegen(AIEVecAttributes.h.inc -gen-attrdef-decls) +mlir_tablegen(AIEVecAttributes.cpp.inc -gen-attrdef-defs) +add_public_tablegen_target(MLIRAIEVecAttributesIncGen) +add_dependencies(mlir-generic-headers MLIRAIEVecAttributesIncGen) \ No newline at end of file diff --git a/lib/Dialect/AIEVec/IR/AIEVecOps.cpp b/lib/Dialect/AIEVec/IR/AIEVecOps.cpp index b0055be060..572f1f7b61 100644 --- a/lib/Dialect/AIEVec/IR/AIEVecOps.cpp +++ b/lib/Dialect/AIEVec/IR/AIEVecOps.cpp @@ -4,25 +4,28 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2022 Xilinx Inc. +// (c) Copyright 2022-2024 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// // This file implements AIE vector op printing, pasing, and verification. //===----------------------------------------------------------------------===// -#include "aie/Dialect/AIEVec/IR/AIEVecOps.h" -#include "aie/Dialect/AIEVec/AIEVecUtils.h" - #include "mlir/Dialect/LLVMIR/LLVMTypes.h" +#include "mlir/IR/DialectImplementation.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/TypeUtilities.h" #include "mlir/Transforms/FoldUtils.h" +#include "llvm/ADT/TypeSwitch.h" + +#include "aie/Dialect/AIEVec/AIEVecUtils.h" +#include "aie/Dialect/AIEVec/IR/AIEVecOps.h" using namespace llvm; using namespace mlir; using namespace xilinx; using namespace xilinx::aievec; +#include "aie/Dialect/AIEVec/IR/AIEVecEnums.cpp.inc" #include "aie/Dialect/AIEVec/IR/AIEVecOpsDialect.cpp.inc" //===----------------------------------------------------------------------===// @@ -31,6 +34,10 @@ using namespace xilinx::aievec; void AIEVecDialect::initialize() { registerTypes(); + addAttributes< +#define GET_ATTRDEF_LIST +#include "aie/Dialect/AIEVec/IR/AIEVecAttributes.cpp.inc" + >(); addOperations< #define GET_OP_LIST #include "aie/Dialect/AIEVec/IR/AIEVecOps.cpp.inc" @@ -1620,8 +1627,105 @@ ParseResult ShiftOp::parse(OpAsmParser &parser, OperationState &result) { // ShuffleOp //===----------------------------------------------------------------------===// +// This verification function makes sure that the shuffle mode supports the +// number and type of operands provided. +LogicalResult ShuffleOp::verify() { + unsigned modeBitWidth; + bool requireRhs = true; + auto mode = getMode(); + switch (mode) { + case ShuffleMode::T8_8X8: // 35 + case ShuffleMode::T8_16X4: // 36 + case ShuffleMode::T8_4X16: // 37 + case ShuffleMode::T8_8X4: // 46 + case ShuffleMode::T8_4X8: // 47 + requireRhs = false; + LLVM_FALLTHROUGH; + case ShuffleMode::T8_64X2_LO: // 0 + case ShuffleMode::T8_64X2_HI: // 1 + case ShuffleMode::T8_2X64_LO: // 20 + case ShuffleMode::T8_2X64_HI: // 21 + modeBitWidth = 8u; + break; + case ShuffleMode::T16_8X4: // 28 + case ShuffleMode::T16_4X8: // 29 + case ShuffleMode::T16_1X2_flip: // 38 + case ShuffleMode::T16_4X4: // 39 + case ShuffleMode::T16_4X2: // 40 + case ShuffleMode::T16_2X4: // 41 + case ShuffleMode::T16_8X2: // 42 + case ShuffleMode::T16_2X8: // 43 + case ShuffleMode::T16_16X2: // 44 + case ShuffleMode::T16_2X16: // 45 + requireRhs = false; + LLVM_FALLTHROUGH; + case ShuffleMode::T16_32X2_LO: // 2 + case ShuffleMode::T16_32X2_HI: // 3 + case ShuffleMode::T16_2X32_LO: // 18 + case ShuffleMode::T16_2X32_HI: // 19 + case ShuffleMode::T16_16X4_LO: // 24 + case ShuffleMode::T16_16X4_HI: // 25 + case ShuffleMode::T16_4X16_LO: // 26 + case ShuffleMode::T16_4X16_HI: // 27 + modeBitWidth = 16u; + break; + case ShuffleMode::T32_4X4: // 34 + requireRhs = false; + LLVM_FALLTHROUGH; + case ShuffleMode::T32_16X2_LO: // 4 + case ShuffleMode::T32_16X2_HI: // 5 + case ShuffleMode::T32_2X16_LO: // 16 + case ShuffleMode::T32_2X16_HI: // 17 + case ShuffleMode::T32_8X4_LO: // 30 + case ShuffleMode::T32_8X4_HI: // 31 + case ShuffleMode::T32_4X8_LO: // 32 + case ShuffleMode::T32_4X8_HI: // 33 + modeBitWidth = 32u; + break; + case ShuffleMode::T64_8X2_LO: // 6 + case ShuffleMode::T64_8X2_HI: // 7 + case ShuffleMode::T64_2X8_LO: // 14 + case ShuffleMode::T64_2X8_HI: // 15 + modeBitWidth = 64u; + break; + case ShuffleMode::T128_4X2_LO: // 8 + case ShuffleMode::T128_4X2_HI: // 9 + case ShuffleMode::T128_2X4_LO: // 12 + case ShuffleMode::T128_2X4_HI: // 13 + modeBitWidth = 128u; + break; + case ShuffleMode::T256_2X2_LO: // 10 + case ShuffleMode::T256_2X2_HI: // 11 + modeBitWidth = 256u; + break; + case ShuffleMode::T512_1X2_LO: // 22 + case ShuffleMode::T512_1X2_HI: // 23 + modeBitWidth = 512u; + break; + } + + // Verify number of operands + if (requireRhs && !getRhs()) + return emitError() << "shuffle mode '" << stringifyEnum(mode) + << "' requires a second operand"; + + if (!requireRhs && getRhs()) + return emitError() << "shuffle mode '" << stringifyEnum(mode) + << "' does not admit a second operand"; + + // Verify vector element type + auto elemBitWidth = + cast(getLhs().getType()).getElementTypeBitWidth(); + if (modeBitWidth != elemBitWidth) + return emitError() << "shuffle mode '" << stringifyEnum(mode) + << "' requires vectors of " << modeBitWidth + << "-bit elements"; + + return success(); +} + // Print out Shuffle op. -void ShuffleOp::print(OpAsmPrinter &p) { +void LegacyShuffleOp::print(OpAsmPrinter &p) { // Print the source vector p << " " << getSource(); @@ -1633,7 +1737,7 @@ void ShuffleOp::print(OpAsmPrinter &p) { } // Verify Shuffle op. -LogicalResult ShuffleOp::verify() { +LogicalResult LegacyShuffleOp::verify() { // Verify the types VectorType sourceType = llvm::dyn_cast(getSource().getType()); VectorType resultType = llvm::dyn_cast(getResult().getType()); @@ -1660,7 +1764,8 @@ LogicalResult ShuffleOp::verify() { } // Parse Shuffle op. -ParseResult ShuffleOp::parse(OpAsmParser &parser, OperationState &result) { +ParseResult LegacyShuffleOp::parse(OpAsmParser &parser, + OperationState &result) { llvm::SMLoc typesLoc; SmallVector types; OpAsmParser::UnresolvedOperand source; @@ -1886,5 +1991,8 @@ ParseResult FMAConvOp::parse(OpAsmParser &parser, OperationState &result) { return parseMulFMAConvOp(parser, result, true); } +#define GET_ATTRDEF_CLASSES +#include "aie/Dialect/AIEVec/IR/AIEVecAttributes.cpp.inc" + #define GET_OP_CLASSES #include "aie/Dialect/AIEVec/IR/AIEVecOps.cpp.inc" diff --git a/lib/Dialect/AIEVec/IR/CMakeLists.txt b/lib/Dialect/AIEVec/IR/CMakeLists.txt index 632851b995..dc10d46704 100644 --- a/lib/Dialect/AIEVec/IR/CMakeLists.txt +++ b/lib/Dialect/AIEVec/IR/CMakeLists.txt @@ -14,6 +14,7 @@ add_mlir_dialect_library(MLIRAIEVecDialect DEPENDS MLIRAIEVecOpsIncGen + MLIRAIEVecAttributesIncGen LINK_LIBS PUBLIC MLIRIR diff --git a/lib/Dialect/AIEVec/Transforms/AIEVectorize.cpp b/lib/Dialect/AIEVec/Transforms/AIEVectorize.cpp index 19da8bf6f2..07c51cf5cb 100644 --- a/lib/Dialect/AIEVec/Transforms/AIEVectorize.cpp +++ b/lib/Dialect/AIEVec/Transforms/AIEVectorize.cpp @@ -739,9 +739,9 @@ static aievec::ShiftOp generateShiftOp(Value lhs, Value rhs, int32_t shiftBytes, return shiftOp; } -static aievec::ShuffleOp generateShuffleOp(Value source, VectState *state, - Location loc, unsigned mode, - VectorType resType = nullptr) { +static aievec::LegacyShuffleOp generateShuffleOp(Value source, VectState *state, + Location loc, unsigned mode, + VectorType resType = nullptr) { auto vecType = cast(source.getType()); if (!resType) { @@ -750,8 +750,8 @@ static aievec::ShuffleOp generateShuffleOp(Value source, VectState *state, resType = createVectorType(lanes, scalarType); } - auto shuffleOp = - state->builder.create(loc, resType, source, mode); + auto shuffleOp = state->builder.create(loc, resType, + source, mode); return shuffleOp; } diff --git a/lib/Dialect/AIEVec/Transforms/FoldMulAddChainToConvOp.cpp b/lib/Dialect/AIEVec/Transforms/FoldMulAddChainToConvOp.cpp index a458b4367c..942eda66d0 100644 --- a/lib/Dialect/AIEVec/Transforms/FoldMulAddChainToConvOp.cpp +++ b/lib/Dialect/AIEVec/Transforms/FoldMulAddChainToConvOp.cpp @@ -447,10 +447,11 @@ struct FoldMulAddChainToConvOpPattern .getResult(); // If the filter has duplicate elements, pack them. if (group.bcastDist == 2) - grpRhs = - rewriter - .create(loc, signalVecTy, grpRhs, /*mode=*/0) - .getResult(); + // NOTE: This shuffle mode works for `vector<64xi8>` + grpRhs = rewriter + .create(loc, signalVecTy, grpRhs, + grpRhs, ShuffleMode::T8_64X2_LO) + .getResult(); // If the first element of the filter to be used is not 0, shift the // filter to align the first element to the beginning. if (group.bcastShift) { diff --git a/lib/Targets/AIEVecToCpp/TranslateAIEVecToCpp.cpp b/lib/Targets/AIEVecToCpp/TranslateAIEVecToCpp.cpp index df35d3ba88..fd71fb79bd 100644 --- a/lib/Targets/AIEVecToCpp/TranslateAIEVecToCpp.cpp +++ b/lib/Targets/AIEVecToCpp/TranslateAIEVecToCpp.cpp @@ -1041,6 +1041,37 @@ static LogicalResult printOperation(CppEmitter &emitter, // Generate the shuffle intrinsic static LogicalResult printOperation(CppEmitter &emitter, aievec::ShuffleOp shuffleOp) { + Value lhs = shuffleOp.getLhs(); + Value rhs = shuffleOp.getRhs(); + aievec::ShuffleMode mode = shuffleOp.getMode(); + + raw_indented_ostream &os = emitter.ostream(); + + // Generate the initialization for the result + if (failed(emitter.emitAssignPrefix(*shuffleOp))) + return failure(); + + os << "shuffle"; + os << "("; + if (!emitter.hasValueInScope(lhs)) + return failure(); + os << emitter.getOrCreateName(lhs); + os << ", "; + if (rhs) { + if (!emitter.hasValueInScope(rhs)) + return failure(); + os << emitter.getOrCreateName(rhs); + os << ", "; + } + os << "eShuffleMode::shuffle_T" << stringifyEnum(mode).substr(1); + os << ")"; + + return success(); +} + +// Generate the shuffle intrinsic +static LogicalResult printOperation(CppEmitter &emitter, + aievec::LegacyShuffleOp shuffleOp) { Value source = shuffleOp.getSource(); unsigned mode = shuffleOp.getMode(); @@ -3233,7 +3264,8 @@ LogicalResult CppEmitter::emitOperation(Operation &op, bool trailingSemicolon) { SelectOp, SRSOp, SubOp, SubElemOp, UPDOp, UPSOp, FMAElemOp, MulElemOp, BroadcastOp, BroadcastScalarOp, MulConvOp, FMAConvOp, ShiftOp, ShuffleOp, CastOp, MinOp, MaxOp, NegOp, CmpOp, SelOp, - ExtElemOp, BxorOp, BnegOp, BandOp, BorOp, UnpackOp, MatMulOp>( + ExtElemOp, BxorOp, BnegOp, BandOp, BorOp, UnpackOp, MatMulOp, + LegacyShuffleOp>( [&](auto op) { return printOperation(*this, op); }) .Default([&](Operation *) { return op.emitOpError("unable to find printer for op"); diff --git a/test/Conversion/VectorToAIEVec/test-conv-op-i8-init.mlir b/test/Conversion/VectorToAIEVec/test-conv-op-i8-init.mlir index 3701ae2051..27950f04ef 100644 --- a/test/Conversion/VectorToAIEVec/test-conv-op-i8-init.mlir +++ b/test/Conversion/VectorToAIEVec/test-conv-op-i8-init.mlir @@ -36,7 +36,7 @@ func.func @conv2d(%arg0: memref<18x288xi8>, %arg1: memref<48xi8>, %arg2: memref< // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[T0:.*]] = aievec.upd %[[A1]][%[[C0]]] {index = 0 : i8, offset = 0 : i32} : memref<48xi8>, vector<32xi8> // CHECK: %[[T1:.*]] = aievec.concat %[[T0]], %[[T0]] : vector<32xi8>, vector<64xi8> -// CHECK: %[[T2:.*]] = aievec.shuffle %[[T1]] {mode = 0 : i32} : vector<64xi8>, vector<64xi8> +// CHECK: %[[T2:.*]] = aievec.shuffle %[[T1]], %[[T1]] [t8_64x2_lo] : vector<64xi8> // CHECK: affine.for %[[I:.*]] = 0 to 16 { // CHECK: affine.for %[[J:.*]] = 0 to 256 step 32 { // CHECK: %[[T3:.*]] = aievec.upd %[[A2]][%[[I]], %[[J]]] {index = 0 : i8, offset = 0 : i32} : memref<16x256xi8>, vector<32xi8> diff --git a/test/Conversion/VectorToAIEVec/test-conv-op-i8.mlir b/test/Conversion/VectorToAIEVec/test-conv-op-i8.mlir index 1f920f4f38..d0db180f3a 100644 --- a/test/Conversion/VectorToAIEVec/test-conv-op-i8.mlir +++ b/test/Conversion/VectorToAIEVec/test-conv-op-i8.mlir @@ -34,7 +34,7 @@ func.func @conv2d(%arg0: memref<18x288xi8>, %arg1: memref<48xi8>, %arg2: memref< // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[T0:.*]] = aievec.upd %[[A1]][%[[C0]]] {index = 0 : i8, offset = 0 : i32} : memref<48xi8>, vector<32xi8> // CHECK: %[[T1:.*]] = aievec.concat %[[T0]], %[[T0]] : vector<32xi8>, vector<64xi8> -// CHECK: %[[T2:.*]] = aievec.shuffle %[[T1]] {mode = 0 : i32} : vector<64xi8>, vector<64xi8> +// CHECK: %[[T2:.*]] = aievec.shuffle %[[T1]], %[[T1]] [t8_64x2_lo] : vector<64xi8> // CHECK: affine.for %[[I:.*]] = 0 to 16 { // CHECK: affine.for %[[J:.*]] = 0 to 256 step 32 { // CHECK: %[[T3:.*]] = aievec.upd %[[A0]][%[[I]], %[[J]]] {index = 0 : i8, offset = 0 : i32} : memref<18x288xi8>, vector<64xi8> diff --git a/test/aievec/conv2d_i8_after_polygeist.mlir b/test/aievec/conv2d_i8_after_polygeist.mlir index de4949bff5..eaa5e40451 100644 --- a/test/aievec/conv2d_i8_after_polygeist.mlir +++ b/test/aievec/conv2d_i8_after_polygeist.mlir @@ -38,7 +38,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.endianness" // CHECK: %[[C16:.*]] = arith.constant 16 : index // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[T0:.*]] = aievec.upd %[[A1]][%[[C0:.*]]] {index = 0 : i8, offset = 0 : i32} : memref, vector<64xi8> -// CHECK: %[[T1:.*]] = aievec.shuffle %[[T0:.*]] {mode = 0 : i32} : vector<64xi8>, vector<64xi8> +// CHECK: %[[T1:.*]] = aievec.legacyshuffle %[[T0:.*]] {mode = 0 : i32} : vector<64xi8>, vector<64xi8> // CHECK: scf.for %[[A3:.*]] = %[[C0:.*]] to %[[C16:.*]] step %[[C1:.*]] { // CHECK: scf.for %[[A4:.*]] = %[[C0:.*]] to %[[C256:.*]] step %[[C32:.*]] { // CHECK: %[[T2:.*]] = aievec.upd %[[A0]][%[[A3:.*]], %[[A4:.*]]] {index = 0 : i8, offset = 0 : i32} : memref, vector<64xi8> diff --git a/test/aievec/conv2d_uij_i8_noinit_aie-ml.mlir b/test/aievec/conv2d_uij_i8_noinit_aie-ml.mlir index f6e7667ad5..7e5f8d90a8 100644 --- a/test/aievec/conv2d_uij_i8_noinit_aie-ml.mlir +++ b/test/aievec/conv2d_uij_i8_noinit_aie-ml.mlir @@ -80,7 +80,7 @@ func.func @conv2d (%A: memref<18x288xi8>, %B: memref<48xi8>, %C: memref<16x256xi // CHECK: %[[C8:.*]] = arith.constant 8 : i32 // CHECK: %[[C0:.*]] = arith.constant 0 : index // CHECK: %[[T0:.*]] = aievec.upd %[[A1]][%[[C0]]] {index = 0 : i8, offset = 0 : i32} : memref<48xi8>, vector<64xi8> -// CHECK: %[[T1:.*]] = aievec.shuffle %[[T0]] {mode = 0 : i32} : vector<64xi8>, vector<64xi8> +// CHECK: %[[T1:.*]] = aievec.legacyshuffle %[[T0]] {mode = 0 : i32} : vector<64xi8>, vector<64xi8> // CHECK: %[[T2:.*]] = aievec.shift %[[T1]], %[[T1]], %[[C8]] {isAcc = false} : vector<64xi8>, vector<64xi8>, i32, vector<64xi8> // CHECK: %[[T3:.*]] = aievec.shift %[[T1]], %[[T1]], %[[C16_i32]] {isAcc = false} : vector<64xi8>, vector<64xi8>, i32, vector<64xi8> // CHECK: scf.for %[[A3:.*]] = %[[C0]] to %[[C16]] step %[[C1]] { diff --git a/test/dialect/AIEVec/invalid.mlir b/test/dialect/AIEVec/invalid.mlir index 05b274cac2..1333670e7b 100644 --- a/test/dialect/AIEVec/invalid.mlir +++ b/test/dialect/AIEVec/invalid.mlir @@ -37,3 +37,30 @@ func.func @invalidAccumulatorType(%A : vector<2x4xi16>, %B : vector<4x8xi16>, into vector<2x8xi32> return %0 : vector<2x8xi32> } + +// ----- + +func.func @invalidShuffleModeElementType(%v : vector<32xi16>) + -> vector<32xi16> { + // expected-error @+1 {{shuffle mode 't32_4x4' requires vectors of 32-bit elements}} + %r = aievec.shuffle %v [t32_4x4] : vector<32xi16> + return %r : vector<32xi16> +} + +// ----- + +func.func @invalidShuffleModeExtraOperand(%v : vector<32xi16>) + -> vector<32xi16> { + // expected-error @+1 {{shuffle mode 't16_4x8' does not admit a second operand}} + %r = aievec.shuffle %v, %v [t16_4x8] : vector<32xi16> + return %r : vector<32xi16> +} + +// ----- + +func.func @invalidShuffleModeMissingOperand(%v : vector<32xi16>) + -> vector<32xi16> { + // expected-error @+1 {{shuffle mode 't16_16x4_lo' requires a second operand}} + %r = aievec.shuffle %v [t16_16x4_lo] : vector<32xi16> + return %r : vector<32xi16> +} diff --git a/test/dialect/AIEVec/roundtrip.mlir b/test/dialect/AIEVec/roundtrip.mlir index c1e88056ed..c7c20011cb 100644 --- a/test/dialect/AIEVec/roundtrip.mlir +++ b/test/dialect/AIEVec/roundtrip.mlir @@ -157,3 +157,207 @@ func.func @matmul_bf16(%A : vector<4x8xbf16>, %B : vector<8x4xbf16>, into vector<4x4xf32> return %0 : vector<4x4xf32> } + +// ----- + +func.func @shuffle_i8(%v : vector<64xi8>) -> vector<64xi8> { + // CHECK: aievec.shuffle %{{.*}} [t8_8x8] + %0 = aievec.shuffle %v [t8_8x8] : vector<64xi8> + // CHECK: aievec.shuffle %{{.*}} [t8_16x4] + %1 = aievec.shuffle %0 [t8_16x4] : vector<64xi8> + // CHECK: aievec.shuffle %{{.*}} [t8_4x16] + %2 = aievec.shuffle %1 [t8_4x16] : vector<64xi8> + // CHECK: aievec.shuffle %{{.*}} [t8_8x4] + %3 = aievec.shuffle %2 [t8_8x4] : vector<64xi8> + // CHECK: aievec.shuffle %{{.*}} [t8_4x8] + %4 = aievec.shuffle %3 [t8_4x8] : vector<64xi8> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t8_64x2_lo] + %5 = aievec.shuffle %v, %4 [t8_64x2_lo] : vector<64xi8> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t8_64x2_hi] + %6 = aievec.shuffle %5, %v [t8_64x2_hi] : vector<64xi8> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t8_2x64_lo] + %7 = aievec.shuffle %v, %6 [t8_2x64_lo] : vector<64xi8> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t8_2x64_hi] + %8 = aievec.shuffle %7, %v [t8_2x64_hi] : vector<64xi8> + return %8 : vector<64xi8> +} + +// ----- + +func.func @shuffle_i16(%v : vector<32xi16>) -> vector<32xi16> { + // CHECK: aievec.shuffle %{{.*}} [t16_8x4] + %0 = aievec.shuffle %v [t16_8x4] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}} [t16_4x8] + %1 = aievec.shuffle %0 [t16_4x8] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}} [t16_1x2_flip] + %2 = aievec.shuffle %1 [t16_1x2_flip] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}} [t16_4x4] + %3 = aievec.shuffle %2 [t16_4x4] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}} [t16_4x2] + %4 = aievec.shuffle %3 [t16_4x2] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}} [t16_2x4] + %5 = aievec.shuffle %4 [t16_2x4] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}} [t16_8x2] + %6 = aievec.shuffle %5 [t16_8x2] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}} [t16_2x8] + %7 = aievec.shuffle %6 [t16_2x8] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}} [t16_16x2] + %8 = aievec.shuffle %7 [t16_16x2] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}} [t16_2x16] + %9 = aievec.shuffle %8 [t16_2x16] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_32x2_lo] + %10 = aievec.shuffle %v, %9 [t16_32x2_lo] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_32x2_hi] + %11 = aievec.shuffle %10, %v [t16_32x2_hi] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_2x32_lo] + %12 = aievec.shuffle %v, %11 [t16_2x32_lo] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_2x32_hi] + %13 = aievec.shuffle %12, %v [t16_2x32_hi] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_16x4_lo] + %14 = aievec.shuffle %v, %13 [t16_16x4_lo] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_16x4_hi] + %15 = aievec.shuffle %14, %v [t16_16x4_hi] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_4x16_lo] + %16 = aievec.shuffle %v, %15 [t16_4x16_lo] : vector<32xi16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_4x16_hi] + %17 = aievec.shuffle %16, %v [t16_4x16_hi] : vector<32xi16> + return %17 : vector<32xi16> +} + +// ----- + +func.func @shuffle_bf16(%v : vector<32xbf16>) -> vector<32xbf16> { + // CHECK: aievec.shuffle %{{.*}} [t16_8x4] + %0 = aievec.shuffle %v [t16_8x4] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}} [t16_4x8] + %1 = aievec.shuffle %0 [t16_4x8] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}} [t16_1x2_flip] + %2 = aievec.shuffle %1 [t16_1x2_flip] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}} [t16_4x4] + %3 = aievec.shuffle %2 [t16_4x4] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}} [t16_4x2] + %4 = aievec.shuffle %3 [t16_4x2] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}} [t16_2x4] + %5 = aievec.shuffle %4 [t16_2x4] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}} [t16_8x2] + %6 = aievec.shuffle %5 [t16_8x2] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}} [t16_2x8] + %7 = aievec.shuffle %6 [t16_2x8] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}} [t16_16x2] + %8 = aievec.shuffle %7 [t16_16x2] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}} [t16_2x16] + %9 = aievec.shuffle %8 [t16_2x16] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_32x2_lo] + %10 = aievec.shuffle %v, %9 [t16_32x2_lo] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_32x2_hi] + %11 = aievec.shuffle %10, %v [t16_32x2_hi] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_2x32_lo] + %12 = aievec.shuffle %v, %11 [t16_2x32_lo] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_2x32_hi] + %13 = aievec.shuffle %12, %v [t16_2x32_hi] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_16x4_lo] + %14 = aievec.shuffle %v, %13 [t16_16x4_lo] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_16x4_hi] + %15 = aievec.shuffle %14, %v [t16_16x4_hi] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_4x16_lo] + %16 = aievec.shuffle %v, %15 [t16_4x16_lo] : vector<32xbf16> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t16_4x16_hi] + %17 = aievec.shuffle %16, %v [t16_4x16_hi] : vector<32xbf16> + return %17 : vector<32xbf16> +} + +// ----- + +func.func @shuffle_i32(%v : vector<16xi32>) -> vector<16xi32> { + // CHECK: aievec.shuffle %{{.*}} [t32_4x4] + %0 = aievec.shuffle %v, [t32_4x4] : vector<16xi32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_16x2_lo] + %1 = aievec.shuffle %0, %v [t32_16x2_lo] : vector<16xi32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_16x2_hi] + %2 = aievec.shuffle %v, %1 [t32_16x2_hi] : vector<16xi32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_2x16_lo] + %3 = aievec.shuffle %2, %v [t32_2x16_lo] : vector<16xi32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_2x16_hi] + %4 = aievec.shuffle %v, %3 [t32_2x16_hi] : vector<16xi32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_8x4_lo] + %5 = aievec.shuffle %4, %v [t32_8x4_lo] : vector<16xi32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_8x4_hi] + %6 = aievec.shuffle %v, %5 [t32_8x4_hi] : vector<16xi32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_4x8_lo] + %7 = aievec.shuffle %6, %v [t32_4x8_lo] : vector<16xi32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_4x8_hi] + %8 = aievec.shuffle %v, %7 [t32_4x8_hi] : vector<16xi32> + return %8 : vector<16xi32> +} + +// ----- + +func.func @shuffle_f32(%v : vector<16xf32>) -> vector<16xf32> { + // CHECK: aievec.shuffle %{{.*}} [t32_4x4] + %0 = aievec.shuffle %v, [t32_4x4] : vector<16xf32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_16x2_lo] + %1 = aievec.shuffle %0, %v [t32_16x2_lo] : vector<16xf32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_16x2_hi] + %2 = aievec.shuffle %v, %1 [t32_16x2_hi] : vector<16xf32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_2x16_lo] + %3 = aievec.shuffle %2, %v [t32_2x16_lo] : vector<16xf32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_2x16_hi] + %4 = aievec.shuffle %v, %3 [t32_2x16_hi] : vector<16xf32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_8x4_lo] + %5 = aievec.shuffle %4, %v [t32_8x4_lo] : vector<16xf32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_8x4_hi] + %6 = aievec.shuffle %v, %5 [t32_8x4_hi] : vector<16xf32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_4x8_lo] + %7 = aievec.shuffle %6, %v [t32_4x8_lo] : vector<16xf32> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t32_4x8_hi] + %8 = aievec.shuffle %v, %7 [t32_4x8_hi] : vector<16xf32> + return %8 : vector<16xf32> +} + +// ----- + +func.func @shuffle_i64(%v : vector<8xi64>) -> vector<8xi64> { + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t64_8x2_lo] + %0 = aievec.shuffle %v, %v [t64_8x2_lo] : vector<8xi64> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t64_8x2_hi] + %1 = aievec.shuffle %0, %v [t64_8x2_hi] : vector<8xi64> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t64_2x8_lo] + %2 = aievec.shuffle %v, %1 [t64_2x8_lo] : vector<8xi64> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t64_2x8_hi] + %3 = aievec.shuffle %2, %v [t64_2x8_hi] : vector<8xi64> + return %3 : vector<8xi64> +} + +// ----- + +func.func @shuffle_i128(%v : vector<4xi128>) -> vector<4xi128> { + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t128_4x2_lo] + %0 = aievec.shuffle %v, %v [t128_4x2_lo] : vector<4xi128> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t128_4x2_hi] + %1 = aievec.shuffle %0, %v [t128_4x2_hi] : vector<4xi128> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t128_2x4_lo] + %2 = aievec.shuffle %v, %1 [t128_2x4_lo] : vector<4xi128> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t128_2x4_hi] + %3 = aievec.shuffle %2, %v [t128_2x4_hi] : vector<4xi128> + return %3 : vector<4xi128> +} + +// ----- + +func.func @shuffle_i256(%v : vector<2xi256>) -> vector<2xi256> { + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t256_2x2_lo] + %0 = aievec.shuffle %v, %v [t256_2x2_lo] : vector<2xi256> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t256_2x2_hi] + %1 = aievec.shuffle %0, %v [t256_2x2_hi] : vector<2xi256> + return %1 : vector<2xi256> +} + +// ----- + +func.func @shuffle_i512(%v : vector<1xi512>) -> vector<1xi512> { + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t512_1x2_lo] + %0 = aievec.shuffle %v, %v [t512_1x2_lo] : vector<1xi512> + // CHECK: aievec.shuffle %{{.*}}, %{{.*}} [t512_1x2_hi] + %1 = aievec.shuffle %0, %v [t512_1x2_hi] : vector<1xi512> + return %1 : vector<1xi512> +}