Skip to content

Commit

Permalink
[Codegen][GPU] Add pass to generalize named convolution ops (#16575)
Browse files Browse the repository at this point in the history
After tiling the filter dimensions to 1, the simplest way to both handle
residual unit dimensions and vectorize the convolution is to generalize
it. This adds a pass for generalizing all convolution ops with this
intended use.
  • Loading branch information
qedawkins authored Feb 26, 2024
1 parent bb68472 commit baeffa7
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,61 @@
#include "iree/compiler/Codegen/Common/GPU/Passes.h"
#include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenAttrs.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
#include "mlir/Pass/Pass.h"

namespace mlir::iree_compiler {

LogicalResult
generalizeCandidates(MLIRContext *context,
ArrayRef<linalg::LinalgOp> namedOpCandidates) {
IRRewriter rewriter(context);
for (auto linalgOp : namedOpCandidates) {
// Pass down lowering configuration. It can exist due to user set
// configuration from the input.
auto config = getLoweringConfig(linalgOp);
rewriter.setInsertionPoint(linalgOp);
FailureOr<linalg::GenericOp> generalizedOp =
linalg::generalizeNamedOp(rewriter, linalgOp);
if (failed(generalizedOp)) {
linalgOp->emitOpError("failed to generalize operation");
return failure();
}
if (config) {
setLoweringConfig(*generalizedOp, config);
}
}
return success();
}

namespace {
struct GPUGeneralizeNamedConvolutionOpsPass
: public GPUGeneralizeNamedConvolutionOpsBase<
GPUGeneralizeNamedConvolutionOpsPass> {

void runOnOperation() override;
};
} // namespace

void GPUGeneralizeNamedConvolutionOpsPass::runOnOperation() {
auto funcOp = getOperation();
SmallVector<linalg::LinalgOp> namedOpCandidates;
funcOp.walk([&](linalg::LinalgOp linalgOp) {
if (isa<linalg::ConvolutionOpInterface>(*linalgOp))
namedOpCandidates.push_back(linalgOp);
});

if (failed(generalizeCandidates(&getContext(), namedOpCandidates))) {
return signalPassFailure();
}
}

std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
createGPUGeneralizeNamedConvolutionOpsPass() {
return std::make_unique<GPUGeneralizeNamedConvolutionOpsPass>();
}

namespace {
struct GPUGeneralizeNamedOpsPass
: public GPUGeneralizeNamedOpsBase<GPUGeneralizeNamedOpsPass> {
Expand All @@ -37,21 +87,8 @@ void GPUGeneralizeNamedOpsPass::runOnOperation() {
namedOpCandidates.push_back(linalgOp);
});

IRRewriter rewriter(&getContext());
for (auto linalgOp : namedOpCandidates) {
// Pass down lowering configuration. It can exist due to user set
// configuration from the input.
auto config = getLoweringConfig(linalgOp);
rewriter.setInsertionPoint(linalgOp);
FailureOr<linalg::GenericOp> generalizedOp =
linalg::generalizeNamedOp(rewriter, linalgOp);
if (failed(generalizedOp)) {
linalgOp->emitOpError("failed to generalize operation");
return signalPassFailure();
}
if (config) {
setLoweringConfig(*generalizedOp, config);
}
if (failed(generalizeCandidates(&getContext(), namedOpCandidates))) {
return signalPassFailure();
}
}

Expand Down
5 changes: 5 additions & 0 deletions compiler/src/iree/compiler/Codegen/Common/GPU/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ createWorkgroupSpecializationPass();
std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
createWorkGroupSwizzle(unsigned swizzleLogTile = 0);

// This pass generalizes named Linalg convolution ops to allow for better
// folding of unit dimensions.
std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
createGPUGeneralizeNamedConvolutionOpsPass();

// This pass generalizes named Linalg ops that are better off as generics.
std::unique_ptr<InterfacePass<mlir::FunctionOpInterface>>
createGPUGeneralizeNamedOpsPass();
Expand Down
6 changes: 6 additions & 0 deletions compiler/src/iree/compiler/Codegen/Common/GPU/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ def GPUDistributeSharedMemoryCopy :
let constructor = "mlir::iree_compiler::createGPUDistributeSharedMemoryCopy()";
}

def GPUGeneralizeNamedConvolutionOps :
InterfacePass<"iree-codegen-gpu-generalize-named-convolution-ops", "mlir::FunctionOpInterface"> {
let summary = "Convert named Linalg convolution ops to linalg.generic ops";
let constructor = "mlir::iree_compiler::createGPUGeneralizeNamedConvolutionOpsPass()";
}

def GPUGeneralizeNamedOps :
InterfacePass<"iree-codegen-gpu-generalize-named-ops", "mlir::FunctionOpInterface"> {
let summary = "Convert named Linalg ops to linalg.generic ops";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ iree_lit_test_suite(
"gpu_check_resource_usage.mlir",
"gpu_distribute.mlir",
"gpu_distribute_shared_memory.mlir",
"gpu_generalize_named_convolution_ops.mlir",
"gpu_generalize_named_ops.mlir",
"gpu_lower_to_ukernels.mlir",
"gpu_nested_layout_vector_distribution.mlir",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ iree_lit_test_suite(
"gpu_check_resource_usage.mlir"
"gpu_distribute.mlir"
"gpu_distribute_shared_memory.mlir"
"gpu_generalize_named_convolution_ops.mlir"
"gpu_generalize_named_ops.mlir"
"gpu_lower_to_ukernels.mlir"
"gpu_nested_layout_contract_amdgpu.mlir"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(func.func(iree-codegen-gpu-generalize-named-convolution-ops))" %s | FileCheck %s

func.func @nhwc_convolution(%arg0: tensor<1x1x32x32xf16>, %arg1: tensor<1x1x32x128xf16>) -> tensor<1x1x32x128xf16> {
%cst = arith.constant 0.000000e+00 : f16
%0 = tensor.empty() : tensor<1x1x32x128xf16>
%1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1x1x32x128xf16>) -> tensor<1x1x32x128xf16>
%2 = linalg.conv_2d_nhwc_hwcf {
dilations = dense<1> : vector<2xi64>,
strides = dense<1> : vector<2xi64>,
lowering_config = #iree_codegen.lowering_config<tile_sizes = [[1, 1, 32, 128, 1, 1, 32]]>
}
ins(%arg0, %arg1 : tensor<1x1x32x32xf16>, tensor<1x1x32x128xf16>)
outs(%1 : tensor<1x1x32x128xf16>) -> tensor<1x1x32x128xf16>
return %2 : tensor<1x1x32x128xf16>
}

// CHECK: #[[$CONFIG:.+]] = #iree_codegen.lowering_config
// CHECK-SAME{LITERAL}: <tile_sizes = [[1, 1, 32, 128, 1, 1, 32]]>

// CHECK-LABEL: func.func @nhwc_convolution
// CHECK: linalg.generic
// CHECK-SAME: lowering_config = #[[$CONFIG]]

0 comments on commit baeffa7

Please sign in to comment.