[Codegen][GPU] Add pass to generalize named convolution ops (#16575)

After tiling the filter dimensions to 1, the simplest way to both handle residual unit dimensions and vectorize the convolution is to generalize it. This adds a pass for generalizing all convolution ops with this intended use.
iree-org · Feb 26, 2024 · baeffa7 · baeffa7
1 parent bb68472
commit baeffa7
Show file tree

Hide file tree

Showing 6 changed files with 87 additions and 15 deletions.
diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUGeneralizeNamedOps.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUGeneralizeNamedOps.cpp
@@ -15,11 +15,61 @@
 #include "iree/compiler/Codegen/Common/GPU/Passes.h"
 #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir::iree_compiler {
 
+LogicalResult
+generalizeCandidates(MLIRContext *context,
+                     ArrayRef<linalg::LinalgOp> namedOpCandidates) {
+  IRRewriter rewriter(context);
+  for (auto linalgOp : namedOpCandidates) {
+    // Pass down lowering configuration. It can exist due to user set
+    // configuration from the input.
+    auto config = getLoweringConfig(linalgOp);
+    rewriter.setInsertionPoint(linalgOp);
+    FailureOr<linalg::GenericOp> generalizedOp =
+        linalg::generalizeNamedOp(rewriter, linalgOp);
+    if (failed(generalizedOp)) {
+      linalgOp->emitOpError("failed to generalize operation");
+      return failure();
+    }
+    if (config) {
+      setLoweringConfig(*generalizedOp, config);
+    }
+  }
+  return success();
+}
+
+namespace {
+struct GPUGeneralizeNamedConvolutionOpsPass
+    : public GPUGeneralizeNamedConvolutionOpsBase<
+          GPUGeneralizeNamedConvolutionOpsPass> {
+
+  void runOnOperation() override;
+};
+} // namespace
+
+void GPUGeneralizeNamedConvolutionOpsPass::runOnOperation() {
+  auto funcOp = getOperation();
+  SmallVector<linalg::LinalgOp> namedOpCandidates;
+  funcOp.walk([&](linalg::LinalgOp linalgOp) {
+    if (isa<linalg::ConvolutionOpInterface>(*linalgOp))
+      namedOpCandidates.push_back(linalgOp);
+  });
+
+  if (failed(generalizeCandidates(&getContext(), namedOpCandidates))) {
+    return signalPassFailure();
+  }
+}
+
+std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
+createGPUGeneralizeNamedConvolutionOpsPass() {
+  return std::make_unique<GPUGeneralizeNamedConvolutionOpsPass>();
+}
+
 namespace {
 struct GPUGeneralizeNamedOpsPass
     : public GPUGeneralizeNamedOpsBase<GPUGeneralizeNamedOpsPass> {
@@ -37,21 +87,8 @@ void GPUGeneralizeNamedOpsPass::runOnOperation() {
       namedOpCandidates.push_back(linalgOp);
   });
 
-  IRRewriter rewriter(&getContext());
-  for (auto linalgOp : namedOpCandidates) {
-    // Pass down lowering configuration. It can exist due to user set
-    // configuration from the input.
-    auto config = getLoweringConfig(linalgOp);
-    rewriter.setInsertionPoint(linalgOp);
-    FailureOr<linalg::GenericOp> generalizedOp =
-        linalg::generalizeNamedOp(rewriter, linalgOp);
-    if (failed(generalizedOp)) {
-      linalgOp->emitOpError("failed to generalize operation");
-      return signalPassFailure();
-    }
-    if (config) {
-      setLoweringConfig(*generalizedOp, config);
-    }
+  if (failed(generalizeCandidates(&getContext(), namedOpCandidates))) {
+    return signalPassFailure();
   }
 }
 

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h
@@ -140,6 +140,11 @@ createWorkgroupSpecializationPass();
 std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
 createWorkGroupSwizzle(unsigned swizzleLogTile = 0);
 
+// This pass generalizes named Linalg convolution ops to allow for better
+// folding of unit dimensions.
+std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
+createGPUGeneralizeNamedConvolutionOpsPass();
+
 // This pass generalizes named Linalg ops that are better off as generics.
 std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
 createGPUGeneralizeNamedOpsPass();

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td b/compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td
@@ -31,6 +31,12 @@ def GPUDistributeSharedMemoryCopy :
   let constructor = "mlir::iree_compiler::createGPUDistributeSharedMemoryCopy()";
 }
 
+def GPUGeneralizeNamedConvolutionOps :
+    InterfacePass<"iree-codegen-gpu-generalize-named-convolution-ops", "mlir::FunctionOpInterface"> {
+  let summary = "Convert named Linalg convolution ops to linalg.generic ops";
+  let constructor = "mlir::iree_compiler::createGPUGeneralizeNamedConvolutionOpsPass()";
+}
+
 def GPUGeneralizeNamedOps :
     InterfacePass<"iree-codegen-gpu-generalize-named-ops", "mlir::FunctionOpInterface"> {
   let summary = "Convert named Linalg ops to linalg.generic ops";

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/GPU/test/BUILD.bazel
@@ -21,6 +21,7 @@ iree_lit_test_suite(
             "gpu_check_resource_usage.mlir",
             "gpu_distribute.mlir",
             "gpu_distribute_shared_memory.mlir",
+            "gpu_generalize_named_convolution_ops.mlir",
             "gpu_generalize_named_ops.mlir",
             "gpu_lower_to_ukernels.mlir",
             "gpu_nested_layout_vector_distribution.mlir",

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/GPU/test/CMakeLists.txt
@@ -17,6 +17,7 @@ iree_lit_test_suite(
     "gpu_check_resource_usage.mlir"
     "gpu_distribute.mlir"
     "gpu_distribute_shared_memory.mlir"
+    "gpu_generalize_named_convolution_ops.mlir"
     "gpu_generalize_named_ops.mlir"
     "gpu_lower_to_ukernels.mlir"
     "gpu_nested_layout_contract_amdgpu.mlir"

diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_generalize_named_convolution_ops.mlir b/compiler/src/iree/compiler/Codegen/Common/GPU/test/gpu_generalize_named_convolution_ops.mlir
@@ -0,0 +1,22 @@
+// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-generalize-named-convolution-ops))" %s | FileCheck %s
+
+func.func @nhwc_convolution(%arg0: tensor<1x1x32x32xf16>, %arg1: tensor<1x1x32x128xf16>) -> tensor<1x1x32x128xf16> {
+  %cst = arith.constant 0.000000e+00 : f16
+  %0 = tensor.empty() : tensor<1x1x32x128xf16>
+  %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1x1x32x128xf16>) -> tensor<1x1x32x128xf16>
+  %2 = linalg.conv_2d_nhwc_hwcf {
+    dilations = dense<1> : vector<2xi64>,
+    strides = dense<1> : vector<2xi64>,
+    lowering_config = #iree_codegen.lowering_config<tile_sizes = [[1, 1, 32, 128, 1, 1, 32]]>
+  }
+         ins(%arg0, %arg1 : tensor<1x1x32x32xf16>, tensor<1x1x32x128xf16>)
+         outs(%1 : tensor<1x1x32x128xf16>) -> tensor<1x1x32x128xf16>
+  return %2 : tensor<1x1x32x128xf16>
+}
+
+//               CHECK: #[[$CONFIG:.+]] = #iree_codegen.lowering_config
+// CHECK-SAME{LITERAL}: <tile_sizes = [[1, 1, 32, 128, 1, 1, 32]]>
+
+// CHECK-LABEL: func.func @nhwc_convolution
+//       CHECK:   linalg.generic
+//  CHECK-SAME:     lowering_config = #[[$CONFIG]]