diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp index d5a4a280e83a..dd498fad50e8 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUPromoteMatmulOperands.cpp @@ -44,6 +44,15 @@ void promoteOperand(OpBuilder &builder, Operation *op, unsigned index) { Value operand = op->getOperand(index); if (auto producer = operand.getDefiningOp()) { + // Skip promotion of fills. + if (isa(producer)) { + return; + } + if (auto generic = dyn_cast(&*producer)) { + if (linalg::isaFillOpInterface(generic)) { + return; + } + } setLoweringConfig(producer, IREE::GPU::DerivedThreadConfigAttr::get( builder.getContext())); return; diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir index 5ec02698451a..f05cf7b1890b 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_promote_matmul_operands.mlir @@ -64,3 +64,21 @@ func.func @lhs_only_matmul(%a: tensor<32x1024xf32>, %b: tensor<1024x128xf32>) -> // CHECK-SAME: %[[B:[A-Za-z0-9]+]]: tensor<1024x128xf32> // CHECK-DAG: %[[PA:.+]] = linalg.copy {{.*}} ins(%[[A]] : tensor<32x1024xf32>) // CHECK: linalg.generic {{.*}} ins(%[[PA]], %[[B]] : tensor<32x1024xf32>, tensor<1024x128xf32>) + +// ----- + +#lowering_config = #iree_gpu.lowering_config<{promote_operands = [0]}> + +func.func @no_promote_fill(%b: tensor<128x128xf32>) -> tensor<4x128xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %empty = tensor.empty() : tensor<4x128xf32> + %fill = linalg.fill ins(%cst : f32) outs(%empty : tensor<4x128xf32>) -> tensor<4x128xf32> + %mm = linalg.matmul {lowering_config = #lowering_config} + ins(%fill, %b : tensor<4x128xf32>, tensor<128x128xf32>) outs(%fill : tensor<4x128xf32>) -> tensor<4x128xf32> + return %mm : tensor<4x128xf32> +} + +// Verify that fills are not promoted. +// CHECK-LABEL: func.func @no_promote_fill +// CHECK-NOT: iree_gpu.derived_thread_config +// CHECK: return