[aievec] Improve alignment offset calculation

In order to compute the alignment offset for a given transfer read operation, we make a lot of assumptions about the source of its indices. This patch tries to reduce the number of assumptions to improve missalignment detection and computation.
Xilinx · Aug 18, 2023 · 4207a22 · 4207a22
1 parent 4dad574
commit 4207a22
Show file tree

Hide file tree

Showing 5 changed files with 73 additions and 53 deletions.
diff --git a/include/aie/Dialect/AIEVec/Analysis/Passes.h b/include/aie/Dialect/AIEVec/Analysis/Passes.h
@@ -34,7 +34,7 @@ namespace aievec {
 #define GEN_PASS_CLASSES
 #include "aie/Dialect/AIEVec/Analysis/Passes.h.inc"
 
-std::unique_ptr<Pass> createAIEVecConvolutionAnalysisPass();
+std::unique_ptr<mlir::Pass> createAIEVecConvolutionAnalysisPass();
 
 /// Generate the code for registering passes.
 #define GEN_PASS_REGISTRATION

diff --git a/include/aie/Dialect/AIEVec/Utils/Utils.h b/include/aie/Dialect/AIEVec/Utils/Utils.h
@@ -13,6 +13,7 @@
 
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include <cstdint>
+#include <optional>
 #include <type_traits>
 
 namespace mlir {
@@ -31,10 +32,9 @@ template <
         std::is_same_v<TransferReadLikeOp, mlir::vector::TransferReadOp> ||
         std::is_same_v<TransferReadLikeOp,
                        mlir::vector::TransferReadOp::Adaptor>>>
-int64_t getTransferReadAlignmentOffset(TransferReadLikeOp readOp,
-                                       mlir::VectorType vType,
-                                       int64_t alignment);
-
+std::optional<int64_t> getTransferReadAlignmentOffset(TransferReadLikeOp readOp,
+                                                      mlir::VectorType vType,
+                                                      int64_t alignment);
 }
 
 #endif // AIE_DIALECT_AIEVEC_UTILS_UTILS_H
diff --git a/lib/Dialect/AIEVec/Transforms/VectorToAIEVecConversions.cpp b/lib/Dialect/AIEVec/Transforms/VectorToAIEVecConversions.cpp
@@ -780,7 +780,8 @@ struct LowerVectorTransferReadToAIEUPD
 
     // Misaligned accesses
     auto vType = readOp.getVectorType();
-    if (getTransferReadAlignmentOffset(adaptor, vType, vectorAlignment) != 0)
+    if (getTransferReadAlignmentOffset(adaptor, vType, vectorAlignment)
+            .value_or(0) != 0)
       return failure();
 
     // Invalid vector size.

diff --git a/lib/Dialect/AIEVec/Transforms/VectorToVectorConversions.cpp b/lib/Dialect/AIEVec/Transforms/VectorToVectorConversions.cpp
@@ -74,7 +74,8 @@ struct SplitUnalignedTransferReadPattern
     // Check if the transfer is unaligned.
     auto vType = readOp.getVectorType();
     int64_t offset =
-        getTransferReadAlignmentOffset(adaptor, vType, vectorAlignment);
+        getTransferReadAlignmentOffset(adaptor, vType, vectorAlignment)
+            .value_or(0);
     if (offset == 0)
       return failure();
 
@@ -89,23 +90,14 @@ struct SplitUnalignedTransferReadPattern
     // TODO: Add support for cases where the offset is greater than the
     // TODO: vector length.
     auto loc = readOp.getLoc();
-    auto newInnerMostIdx =
-        TypeSwitch<Operation *, Value>(
-            adaptor.getIndices().back().getDefiningOp())
-            .Case<AffineApplyOp>(
-                [&](auto applyOp) { return applyOp.getMapOperands()[0]; })
-            .Case<arith::ConstantOp>([&](auto constantOp) {
-              auto cstValue = cast<IntegerAttr>(constantOp.getValue()).getInt();
-              auto newCstValue = cstValue - offset;
-              auto newConstantIdxOp = rewriter.create<arith::ConstantOp>(
-                  loc,
-                  rewriter.getIntegerAttr(constantOp.getType(), newCstValue));
-              return newConstantIdxOp.getResult();
-            })
-            .Default([&](auto) {
-              llvm_unreachable("Unexpected index type");
-              return nullptr;
-            });
+    Value oldInnerMostIdx = adaptor.getIndices().back();
+    auto offsetCorrectionMap =
+        AffineMap::get(1, 0, getAffineDimExpr(0, readOp.getContext()) - offset);
+    Value newInnerMostIdx =
+        rewriter
+            .create<AffineApplyOp>(readOp.getLoc(), offsetCorrectionMap,
+                                   SmallVector<Value, 1>({oldInnerMostIdx}))
+            .getResult();
     SmallVector<Value, 8> alignedIdx;
     alignedIdx.append(adaptor.getIndices().begin(), adaptor.getIndices().end());
     alignedIdx[alignedIdx.size() - 1] = newInnerMostIdx;
@@ -263,7 +255,8 @@ static void configureAIEv1CanonicalizeLegalizations(ConversionTarget &target) {
   target.addDynamicallyLegalOp<vector::TransferReadOp>(
       [](vector::TransferReadOp op) {
         return !op.getPermutationMap().isConstant() &&
-               getTransferReadAlignmentOffset(op, op.getVectorType(), 128) == 0;
+               getTransferReadAlignmentOffset(op, op.getVectorType(), 128)
+                       .value_or(0) == 0;
       });
 }
 
@@ -294,7 +287,8 @@ static void configureAIEMLCanonicalizeLegalizations(ConversionTarget &target) {
   target.addDynamicallyLegalOp<vector::TransferReadOp>(
       [](vector::TransferReadOp op) {
         return !op.getPermutationMap().isConstant() &&
-               getTransferReadAlignmentOffset(op, op.getVectorType(), 256) == 0;
+               getTransferReadAlignmentOffset(op, op.getVectorType(), 256)
+                       .value_or(0) == 0;
       });
 }
 

diff --git a/lib/Dialect/AIEVec/Utils/Utils.cpp b/lib/Dialect/AIEVec/Utils/Utils.cpp
@@ -18,56 +18,81 @@
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/Debug.h"
-#include <optional>
 
 #define DEBUG_TYPE "aievec-utils"
 
 using namespace mlir;
 
 namespace xilinx::aievec {
+
+static std::optional<int64_t> getLowerBoundValue(Value idx) {
+  if (auto blkArg = dyn_cast<BlockArgument>(idx)) {
+    auto parentOp = blkArg.getOwner()->getParentOp();
+    return TypeSwitch<Operation *, std::optional<int64_t>>(parentOp)
+        .Case<AffineForOp>([&blkArg](AffineForOp forOp) {
+          if (forOp.getInductionVar() == blkArg &&
+              forOp.hasConstantLowerBound())
+            return std::optional<int64_t>(forOp.getConstantLowerBound());
+          // If it's an iteration argument or the lower bound is an
+          // affine expression.
+          // TODO: Compute the value of the lower bound affine expression
+          // TODO: if it's constant.
+          return std::optional<int64_t>();
+        })
+        .Default([](auto) { return std::optional<int64_t>(); });
+  }
+  return TypeSwitch<Operation *, std::optional<int64_t>>(idx.getDefiningOp())
+      .Case<arith::ConstantOp>([](auto constantOp) {
+        return std::optional<int64_t>(
+            cast<IntegerAttr>(constantOp.getValue()).getInt());
+      })
+      .Case<AffineApplyOp>([](auto applyOp) {
+        if (applyOp.getAffineMap().getNumResults() == 1) {
+          SmallVector<int64_t, 4> srcIndices;
+          for (auto index : applyOp.getMapOperands()) {
+            std::optional<int64_t> lbv = getLowerBoundValue(index);
+            // XXX: We assume block arguments to either have well-defined
+            // XXX: compile-time values, or to be aligned.
+            if (!lbv && !isa<BlockArgument>(index))
+              return std::optional<int64_t>();
+            srcIndices.push_back(lbv.value_or(0L));
+          }
+          return std::optional<int64_t>(
+              applyOp.getAffineMap().compose(srcIndices)[0]);
+        }
+        return std::optional<int64_t>();
+      })
+      .Default([&](auto) { return std::optional<int64_t>(); });
+}
+
 // Return the offset of a given transfer read operation with regards to the
 // specified vector type. If the read is aligned to the specified alignment
 // parameter (in bits), then the offset is 0. Otherwise, the offset is the
 // number of elements past the immediately preceding aligned vector length.
 template <typename TransferReadLikeOp, typename>
-int64_t getTransferReadAlignmentOffset(TransferReadLikeOp readOp,
-                                       VectorType vType, int64_t alignment) {
+std::optional<int64_t> getTransferReadAlignmentOffset(TransferReadLikeOp readOp,
+                                                      VectorType vType,
+                                                      int64_t alignment) {
   // TODO: Add support for cases where the index is not comming from an
   // TODO: `affine.apply` op or when the affine map has more than one
   // TODO: dimension. We also need to address the case where the index is an
   // TODO: induction variable.
   auto innerMostIndex = readOp.getIndices().back();
   auto vectorLength = vType.getShape().back();
-  auto idxDefOp = innerMostIndex.getDefiningOp();
-  if (!idxDefOp)
-    return 0L;
-  int64_t vectorLengthAlignmentOffset =
-      TypeSwitch<Operation *, int64_t>(idxDefOp)
-          .Case<arith::ConstantOp>([&](auto constantOp) {
-            return cast<IntegerAttr>(constantOp.getValue()).getInt() %
-                   vectorLength;
-          })
-          .template Case<AffineApplyOp>([&](auto applyOp) {
-            if (applyOp.getAffineMap().getNumDims() == 1)
-              return applyOp.getAffineMap().compose(ArrayRef<int64_t>{0})[0] %
-                     vectorLength;
-            return 0L;
-          })
-          .Default([&](auto) {
-            // XXX: If we can't determine the offset, we assume the access is
-            // XXX: aligned.
-            return 0L;
-          });
+  std::optional<int64_t> lbv = getLowerBoundValue(innerMostIndex);
+  if (!lbv)
+    return std::nullopt;
+  int64_t vectorLengthAlignmentOffset = lbv.value() % vectorLength;
   int64_t absoluteAlignmentOffset = alignment / vType.getElementTypeBitWidth();
   if (vectorLengthAlignmentOffset % absoluteAlignmentOffset)
     return vectorLengthAlignmentOffset;
   return 0;
 }
 
-template int64_t getTransferReadAlignmentOffset(vector::TransferReadOp readOp,
-                                                VectorType vType,
-                                                int64_t alignment);
-template int64_t
+template std::optional<int64_t>
+getTransferReadAlignmentOffset(vector::TransferReadOp readOp, VectorType vType,
+                               int64_t alignment);
+template std::optional<int64_t>
 getTransferReadAlignmentOffset(vector::TransferReadOp::Adaptor readOp,
                                VectorType vType, int64_t alignment);