diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUGeneralizeNamedOps.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUGeneralizeNamedOps.cpp index 547a3ca0553a..52cf01139902 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUGeneralizeNamedOps.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUGeneralizeNamedOps.cpp @@ -15,11 +15,61 @@ #include "iree/compiler/Codegen/Common/GPU/Passes.h" #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Pass/Pass.h" namespace mlir::iree_compiler { +LogicalResult +generalizeCandidates(MLIRContext *context, + ArrayRef namedOpCandidates) { + IRRewriter rewriter(context); + for (auto linalgOp : namedOpCandidates) { + // Pass down lowering configuration. It can exist due to user set + // configuration from the input. + auto config = getLoweringConfig(linalgOp); + rewriter.setInsertionPoint(linalgOp); + FailureOr generalizedOp = + linalg::generalizeNamedOp(rewriter, linalgOp); + if (failed(generalizedOp)) { + linalgOp->emitOpError("failed to generalize operation"); + return failure(); + } + if (config) { + setLoweringConfig(*generalizedOp, config); + } + } + return success(); +} + +namespace { +struct GPUGeneralizeNamedConvolutionOpsPass + : public GPUGeneralizeNamedConvolutionOpsBase< + GPUGeneralizeNamedConvolutionOpsPass> { + + void runOnOperation() override; +}; +} // namespace + +void GPUGeneralizeNamedConvolutionOpsPass::runOnOperation() { + auto funcOp = getOperation(); + SmallVector namedOpCandidates; + funcOp.walk([&](linalg::LinalgOp linalgOp) { + if (isa(*linalgOp)) + namedOpCandidates.push_back(linalgOp); + }); + + if (failed(generalizeCandidates(&getContext(), namedOpCandidates))) { + return signalPassFailure(); + } +} + +std::unique_ptr> +createGPUGeneralizeNamedConvolutionOpsPass() { + return std::make_unique(); +} + namespace { struct GPUGeneralizeNamedOpsPass : public GPUGeneralizeNamedOpsBase { @@ -37,21 +87,8 @@ void GPUGeneralizeNamedOpsPass::runOnOperation() { namedOpCandidates.push_back(linalgOp); }); - IRRewriter rewriter(&getContext()); - for (auto linalgOp : namedOpCandidates) { - // Pass down lowering configuration. It can exist due to user set - // configuration from the input. - auto config = getLoweringConfig(linalgOp); - rewriter.setInsertionPoint(linalgOp); - FailureOr generalizedOp = - linalg::generalizeNamedOp(rewriter, linalgOp); - if (failed(generalizedOp)) { - linalgOp->emitOpError("failed to generalize operation"); - return signalPassFailure(); - } - if (config) { - setLoweringConfig(*generalizedOp, config); - } + if (failed(generalizeCandidates(&getContext(), namedOpCandidates))) { + return signalPassFailure(); } } diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h index f1a672fdcefc..9aba897d9c19 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h @@ -140,6 +140,11 @@ createWorkgroupSpecializationPass(); std::unique_ptr> createWorkGroupSwizzle(unsigned swizzleLogTile = 0); +// This pass generalizes named Linalg convolution ops to allow for better +// folding of unit dimensions. +std::unique_ptr> +createGPUGeneralizeNamedConvolutionOpsPass(); + // This pass generalizes named Linalg ops that are better off as generics. std::unique_ptr> createGPUGeneralizeNamedOpsPass(); diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td index 0d53803013fd..cf73d11655fa 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td @@ -31,6 +31,12 @@ def GPUDistributeSharedMemoryCopy : let constructor = "mlir::iree_compiler::createGPUDistributeSharedMemoryCopy()"; } +def GPUGeneralizeNamedConvolutionOps : + InterfacePass<"iree-codegen-gpu-generalize-named-convolution-ops", "mlir::FunctionOpInterface"> { + let summary = "Convert named Linalg convolution ops to linalg.generic ops"; + let constructor = "mlir::iree_compiler::createGPUGeneralizeNamedConvolutionOpsPass()"; +} + def GPUGeneralizeNamedOps : InterfacePass<"iree-codegen-gpu-generalize-named-ops", "mlir::FunctionOpInterface"> { let summary = "Convert named Linalg ops to linalg.generic ops"; diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel index 4c7cfd38a288..f11011f161d7 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel @@ -21,6 +21,7 @@ iree_lit_test_suite( "gpu_check_resource_usage.mlir", "gpu_distribute.mlir", "gpu_distribute_shared_memory.mlir", + "gpu_generalize_named_convolution_ops.mlir", "gpu_generalize_named_ops.mlir", "gpu_lower_to_ukernels.mlir", "gpu_nested_layout_vector_distribution.mlir", diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt index b376523b04c2..f71e6972bfae 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt @@ -17,6 +17,7 @@ iree_lit_test_suite( "gpu_check_resource_usage.mlir" "gpu_distribute.mlir" "gpu_distribute_shared_memory.mlir" + "gpu_generalize_named_convolution_ops.mlir" "gpu_generalize_named_ops.mlir" "gpu_lower_to_ukernels.mlir" "gpu_nested_layout_contract_amdgpu.mlir" diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_generalize_named_convolution_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_generalize_named_convolution_ops.mlir new file mode 100644 index 000000000000..563c54904f8e --- /dev/null +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_generalize_named_convolution_ops.mlir @@ -0,0 +1,22 @@ +// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-generalize-named-convolution-ops))" %s | FileCheck %s + +func.func @nhwc_convolution(%arg0: tensor<1x1x32x32xf16>, %arg1: tensor<1x1x32x128xf16>) -> tensor<1x1x32x128xf16> { + %cst = arith.constant 0.000000e+00 : f16 + %0 = tensor.empty() : tensor<1x1x32x128xf16> + %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1x1x32x128xf16>) -> tensor<1x1x32x128xf16> + %2 = linalg.conv_2d_nhwc_hwcf { + dilations = dense<1> : vector<2xi64>, + strides = dense<1> : vector<2xi64>, + lowering_config = #iree_codegen.lowering_config + } + ins(%arg0, %arg1 : tensor<1x1x32x32xf16>, tensor<1x1x32x128xf16>) + outs(%1 : tensor<1x1x32x128xf16>) -> tensor<1x1x32x128xf16> + return %2 : tensor<1x1x32x128xf16> +} + +// CHECK: #[[$CONFIG:.+]] = #iree_codegen.lowering_config +// CHECK-SAME{LITERAL}: + +// CHECK-LABEL: func.func @nhwc_convolution +// CHECK: linalg.generic +// CHECK-SAME: lowering_config = #[[$CONFIG]]