Skip to content

Commit

Permalink
[mlir][Bufferization] castOrReallocMemRefValue: Use BufferizationOpti…
Browse files Browse the repository at this point in the history
…ons (llvm#89175)

This allows to configure both the op used for allocation and copy of
memrefs.
It also changes the default behavior because the default allocation in
`BufferizationOptions` creates `memref.alloc` with `alignment = 64`
where we used to create `memref.alloca` without any alignment before.
Fixes
```
// TODO: Use alloc/memcpy callback from BufferizationOptions if called via
// BufferizableOpInterface impl of ToMemrefOp.
```
  • Loading branch information
mgehre-amd authored Apr 18, 2024
1 parent 4c3514f commit c515c78
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 20 deletions.
6 changes: 4 additions & 2 deletions mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,14 @@ void populateDynamicDimSizes(OpBuilder &b, Location loc, Value shapedValue,
/// This function returns `failure()` in case of unsupported casts. E.g., casts
/// with differing element types or memory spaces.
FailureOr<Value> castOrReallocMemRefValue(OpBuilder &b, Value value,
MemRefType type);
MemRefType type,
const BufferizationOptions &options);

/// Try to fold to_memref(to_tensor(x)). If x's type and the result type of the
/// to_memref op are different, a memref.cast is needed.
LogicalResult foldToMemrefToTensorPair(RewriterBase &rewriter,
ToMemrefOp toMemref);
ToMemrefOp toMemref,
const BufferizationOptions &options);

/// Add the canonicalization patterns for bufferization.dealloc to the given
/// pattern set to make them available to other passes (such as
Expand Down
31 changes: 18 additions & 13 deletions mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ using namespace mlir::bufferization;
// Helper functions
//===----------------------------------------------------------------------===//

FailureOr<Value>
mlir::bufferization::castOrReallocMemRefValue(OpBuilder &b, Value value,
MemRefType destType) {
FailureOr<Value> mlir::bufferization::castOrReallocMemRefValue(
OpBuilder &b, Value value, MemRefType destType,
const BufferizationOptions &options) {
auto srcType = llvm::cast<MemRefType>(value.getType());

// Element type, rank and memory space must match.
Expand Down Expand Up @@ -73,18 +73,21 @@ mlir::bufferization::castOrReallocMemRefValue(OpBuilder &b, Value value,
Value size = b.create<memref::DimOp>(loc, value, i);
dynamicOperands.push_back(size);
}
// TODO: Use alloc/memcpy callback from BufferizationOptions if called via
// BufferizableOpInterface impl of ToMemrefOp.
Value copy = b.create<memref::AllocOp>(loc, destType, dynamicOperands);
b.create<memref::CopyOp>(loc, value, copy);

FailureOr<Value> copy =
options.createAlloc(b, loc, destType, dynamicOperands);
if (failed(copy))
return failure();
if (failed(options.createMemCpy(b, loc, value, *copy)))
return failure();
return copy;
}

/// Try to fold to_memref(to_tensor(x)). If x's type and the result type of the
/// to_memref op are different, a memref.cast is needed.
LogicalResult
mlir::bufferization::foldToMemrefToTensorPair(RewriterBase &rewriter,
ToMemrefOp toMemref) {
LogicalResult mlir::bufferization::foldToMemrefToTensorPair(
RewriterBase &rewriter, ToMemrefOp toMemref,
const BufferizationOptions &options) {
auto memrefToTensor = toMemref.getTensor().getDefiningOp<ToTensorOp>();
if (!memrefToTensor)
return failure();
Expand All @@ -105,7 +108,7 @@ mlir::bufferization::foldToMemrefToTensorPair(RewriterBase &rewriter,
// Ranked memref -> Ranked memref cast.
if (rankedSrcType && rankedDestType) {
FailureOr<Value> replacement = castOrReallocMemRefValue(
rewriter, memrefToTensor.getMemref(), rankedDestType);
rewriter, memrefToTensor.getMemref(), rankedDestType, options);
if (failed(replacement))
return failure();

Expand Down Expand Up @@ -795,7 +798,9 @@ struct ToMemrefToTensorFolding : public OpRewritePattern<ToMemrefOp> {

LogicalResult matchAndRewrite(ToMemrefOp toMemref,
PatternRewriter &rewriter) const final {
return foldToMemrefToTensorPair(rewriter, toMemref);
BufferizationOptions options;
options.bufferAlignment = 0;
return foldToMemrefToTensorPair(rewriter, toMemref, options);
}
};

Expand Down Expand Up @@ -843,7 +848,7 @@ void ToMemrefOp::getCanonicalizationPatterns(RewritePatternSet &results,
LogicalResult ToMemrefOp::bufferize(RewriterBase &rewriter,
const BufferizationOptions &options) {
// Fold to_memref(to_tensor(x)) to x. Insert a cast if necessary.
(void)foldToMemrefToTensorPair(rewriter, *this);
(void)foldToMemrefToTensorPair(rewriter, *this, options);
// Note: The return value of `bufferize` indicates whether there was an error
// or not. (And not whether the pattern matched or not.)
return success();
Expand Down
8 changes: 5 additions & 3 deletions mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,10 @@ BufferizeTypeConverter::BufferizeTypeConverter() {
auto rankedDestType = dyn_cast<MemRefType>(type);
if (!rankedDestType)
return nullptr;
BufferizationOptions options;
options.bufferAlignment = 0;
FailureOr<Value> replacement =
castOrReallocMemRefValue(builder, inputs[0], rankedDestType);
castOrReallocMemRefValue(builder, inputs[0], rankedDestType, options);
if (failed(replacement))
return nullptr;
return *replacement;
Expand Down Expand Up @@ -512,8 +514,8 @@ LogicalResult bufferization::bufferizeOp(Operation *op,
// Fold all to_memref(to_tensor(x)) pairs.
for (Operation *op : toMemrefOps) {
rewriter.setInsertionPoint(op);
(void)bufferization::foldToMemrefToTensorPair(rewriter,
cast<ToMemrefOp>(op));
(void)bufferization::foldToMemrefToTensorPair(
rewriter, cast<ToMemrefOp>(op), options);
}

// Remove all dead to_tensor ops.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func.func @main(%t: tensor<5xf32>) -> (f32, f32) {
// Note: This alloc is not needed, but it is inserted before the returned buffer
// is promoted to an out param to reconcile mismatching layout maps on return
// value and function signature.
// CHECK-NO-LAYOUT: %[[alloc2:.*]] = memref.alloc() : memref<2x5xf32>
// CHECK-NO-LAYOUT: %[[alloc2:.*]] = memref.alloc() {{.*}} : memref<2x5xf32>
// CHECK-NO-LAYOUT: memref.copy %[[subview]], %[[alloc2]]
// CHECK-NO-LAYOUT: memref.copy %[[alloc2]], %[[r]]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func.func private @external_func_with_return_val(tensor<4xi32>) -> f32
// CHECK-NO-LAYOUT-MAP-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32>
// CHECK-NO-LAYOUT-MAP: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<20x10xf32>
// CHECK-NO-LAYOUT-MAP: %[[subview:.*]] = memref.subview {{.*}} : memref<20x10xf32> to memref<2x?xf32, strided<[10, 1], offset: ?>>
// CHECK-NO-LAYOUT-MAP: %[[alloc_no_layout:.*]] = memref.alloc(%{{.*}}) : memref<2x?xf32>
// CHECK-NO-LAYOUT-MAP: %[[alloc_no_layout:.*]] = memref.alloc(%{{.*}}) {{.*}} : memref<2x?xf32>
// CHECK-NO-LAYOUT-MAP: memref.copy %[[subview]], %[[alloc_no_layout]]
// TODO: %alloc should be deallocated here, but we currently do not dealloc
// buffers that are inserted due to to_tensor/to_memref canonicalization (when
Expand Down

0 comments on commit c515c78

Please sign in to comment.