Skip to content

Commit

Permalink
[GPU] Bail out in GPUReduceBankConflicts if we have collapse_shape us…
Browse files Browse the repository at this point in the history
…er (iree-org#18863)

This is unsupported by upstream and can lead to a compiler error.
llvm/llvm-project#112994

Progress towards: iree-org#18858

---------

Signed-off-by: Nirvedh <nirvedh@gmail.com>
  • Loading branch information
nirvedhmeshram authored Oct 24, 2024
1 parent 8ce8bed commit aef6e1f
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,23 @@ namespace mlir::iree_compiler {

namespace {

/// Check if AllocOp has a CollapseShapeOp user.
static bool hasCollapseShapeUser(memref::AllocOp allocOp) {
SmallVector<Operation *> users(allocOp->getUsers());
while (!users.empty()) {
auto user = users.pop_back_val();
if (isa<memref::CollapseShapeOp>(user)) {
return true;
}
if (isa<ViewLikeOpInterface>(user)) {
for (auto u : user->getUsers()) {
users.push_back(u);
}
}
}
return false;
}

/// Pad out the inner dimension of the `memref.alloc` op in order reduce the
/// chances to have bank conflicts when reading 2D shapes within shared memory.
static void padAlloc(MLIRContext *context, memref::AllocOp allocOp,
Expand All @@ -28,6 +45,12 @@ static void padAlloc(MLIRContext *context, memref::AllocOp allocOp,
int64_t innerDim = allocOpShape.back();
if (ShapedType::isDynamic(innerDim))
return;

// Return if we have CollapseShape op as an user as padding in that case is
// unsupported.
if (hasCollapseShapeUser(allocOp))
return;

Type elType = allocOp.getType().getElementType();
unsigned bitwidth =
mlir::DataLayout::closest(allocOp).getTypeSizeInBits(elType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,66 @@ func.func @pad_alloc_expand_shape(%a: memref<1024x1024xf32>) {
return
}

// -----
// CHECK-LABEL: func.func @no_pad_alloc_collapse_shape
// CHECK: %[[A:.*]] = memref.alloc() : memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>>
// CHECK: %[[C:.*]] = memref.collapse_shape %[[A]] {{\[}}[0], [1, 2], [3, 4]]
// CHECK-SAME: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>> into
// CHECK-SAME: memref<4x32x64xf32, #gpu.address_space<workgroup>>
// CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[VEC_READ:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CST_0]] {in_bounds = [true]} :
// CHECK-SAME: memref<1024x1024xf32>, vector<4xf32>
// CHECK: vector.transfer_write %[[VEC_READ]], %[[C]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true]} :
// CHECK-SAME: vector<4xf32>, memref<4x32x64xf32, #gpu.address_space<workgroup>>


func.func @no_pad_alloc_collapse_shape(%a: memref<1024x1024xf32>) {
%0 = memref.alloc() : memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>>
%1 = memref.collapse_shape %0 [[0], [1, 2], [3, 4]]
: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>> into memref<4x32x64xf32, #gpu.address_space<workgroup>>
%c0 = arith.constant 0 : index
%cst_0 = arith.constant 0.000000e+00 : f32
%3 = vector.transfer_read %a[%c0, %c0], %cst_0 {in_bounds = [true]} :
memref<1024x1024xf32>, vector<4xf32>
vector.transfer_write %3, %1[%c0, %c0, %c0] {in_bounds = [true]} :
vector<4xf32>, memref<4x32x64xf32, #gpu.address_space<workgroup>>
return
}

// -----

// CHECK-LABEL: func.func @no_pad_alloc_collapse_shape_throughsubview
// CHECK: %[[A:.*]] = memref.alloc() : memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>>
// CHECK: %[[S:.*]] = memref.subview %[[A]][0, 0, 0, 0, 0] [4, 2, 16, 8, 8] [1, 1, 1, 1, 1] :
// CHECK-SAME: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>> to
// CHECK-SAME: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>>
// CHECK: %[[C:.*]] = memref.collapse_shape %[[S]] {{\[}}[0], [1, 2], [3, 4]]
// CHECK-SAME: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>> into
// CHECK-SAME: memref<4x32x64xf32, #gpu.address_space<workgroup>>
// CHECK: %[[C0:.*]] = arith.constant 0 : index
// CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[VEC_READ:.*]] = vector.transfer_read %{{.*}}[%[[C0]], %[[C0]]], %[[CST]] {in_bounds = [true]} :
// CHECK-SAME: memref<1024x1024xf32>, vector<4xf32>
// CHECK: vector.transfer_write %[[VEC_READ]], %[[C]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true]} :
// CHECK-SAME: vector<4xf32>, memref<4x32x64xf32, #gpu.address_space<workgroup>>


func.func @no_pad_alloc_collapse_shape_throughsubview(%a: memref<1024x1024xf32>) {
%0 = memref.alloc() : memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>>
%subview = memref.subview %0[0, 0, 0, 0, 0] [4, 2, 16, 8, 8] [1, 1, 1, 1, 1]
: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>> to memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>>
%1 = memref.collapse_shape %subview [[0], [1, 2], [3, 4]]
: memref<4x2x16x8x8xf32, #gpu.address_space<workgroup>> into memref<4x32x64xf32, #gpu.address_space<workgroup>>
%c0 = arith.constant 0 : index
%cst_0 = arith.constant 0.000000e+00 : f32
%3 = vector.transfer_read %a[%c0, %c0], %cst_0 {in_bounds = [true]} :
memref<1024x1024xf32>, vector<4xf32>
vector.transfer_write %3, %1[%c0, %c0, %c0] {in_bounds = [true]} :
vector<4xf32>, memref<4x32x64xf32, #gpu.address_space<workgroup>>
return
}

// -----

// CHECK-LABEL: func.func @pad_alloc_negative
Expand Down

0 comments on commit aef6e1f

Please sign in to comment.