Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Buddy GPU] GEMM Codegen Pipeline Pass - Linalg-Tensor-Opt & Linalg-Memref-Opt #385

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ project(buddy-mlir LANGUAGES CXX C)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED YES)
set(CMAKE_BUILD_TYPE Debug)
include(ExternalProject)

#-------------------------------------------------------------------------------
Expand Down
21 changes: 21 additions & 0 deletions examples/BuddyMatmul/linalg-matmul.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#map = affine_map<(d0, d1, d2) -> (d0, d2)>
#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
module attributes {} {
func.func private @Matmul(%arg0: tensor<1024x512xf16>, %arg1: tensor<512x1024xf16>) -> tensor<1024x1024xf16> attributes {} {
%cst = arith.constant 0.000000e+00 : f16
%0 = tensor.empty() : tensor<1024x1024xf16>
%1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1024x1024xf16>) -> tensor<1024x1024xf16>
%2 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0, %arg1 : tensor<1024x512xf16>, tensor<512x1024xf16>) outs(%1 : tensor<1024x1024xf16>) {
^bb0(%in: f16, %in_0: f16, %out: f16):
%3 = arith.mulf %in, %in_0 : f16
%4 = arith.addf %out, %3 : f16
linalg.yield %4 : f16
} -> tensor<1024x1024xf16>
return %2 : tensor<1024x1024xf16>
}
func.func @forward(%arg0: tensor<1024x512xf16>, %arg1: tensor<512x1024xf16>) -> tensor<1024x1024xf16> {
%0 = call @Matmul(%arg0, %arg1) : (tensor<1024x512xf16>, tensor<512x1024xf16>) -> tensor<1024x1024xf16>
return %0 : tensor<1024x1024xf16>
}
}
2 changes: 1 addition & 1 deletion midend/include/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
add_subdirectory(Dialect)
add_subdirectory(Dialect)
3 changes: 3 additions & 0 deletions midend/include/Dialect/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ add_subdirectory(RVV)
add_subdirectory(VectorExp)
add_subdirectory(Gemmini)
add_subdirectory(Sche)
add_subdirectory(Transform)
add_subdirectory(Linalg)
add_subdirectory(GPU)
3 changes: 3 additions & 0 deletions midend/include/Dialect/GPU/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
set(LLVM_TARGET_DEFINITIONS Passes.td)
mlir_tablegen(Passes.h.inc -gen-pass-decls)
add_public_tablegen_target(BuddyGPUPassIncGen)
18 changes: 18 additions & 0 deletions midend/include/Dialect/GPU/Passes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#ifndef DIALECT_GPU_PASSES_H
#define DIALECT_GPU_PASSES_H

// Include the constructor of passes in GPU Dialect
#include "GPU/Transforms/GPUDistributeToWarp.h"
#include "GPU/Transforms/RemoveReduntantLoops.h"

namespace mlir {
// Generate the definition of GPU Passes
#define GEN_PASS_DECL
#include "GPU/Passes.h.inc"

#define GEN_PASS_REGISTRATION
#include "GPU/Passes.h.inc"

} // namespace mlir

#endif
11 changes: 11 additions & 0 deletions midend/include/Dialect/GPU/Passes.td
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
include "mlir/Pass/PassBase.td"

def GPUDistributeToWarp : Pass<"gpu-distribute-to-warp", "func::FuncOp"> {
let summary = "GPU distribute from Block level to Warp";
let constructor = "mlir::createGPUDistributeToWarpPass()";
}

def RemoveReduntantLoops : Pass<"remove-reduntant-loops", "func::FuncOp"> {
let summary = "Remove the loops that only run once in gpu kernel";
let constructor = "mlir::createRemoveReduntantLoops()";
}
15 changes: 15 additions & 0 deletions midend/include/Dialect/GPU/Transforms/GPUDistributeToWarp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef DIALECT_GPU_TRANSFORMS_GPUDistributeToWarp_H
#define DIALECT_GPU_TRANSFORMS_GPUDistributeToWarp_H

#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Pass/Pass.h"
#include "llvm/ADT/StringRef.h"
#include <memory>

namespace mlir {

std::unique_ptr<OperationPass<func::FuncOp>> createGPUDistributeToWarpPass();

} // namespace mlir

#endif
16 changes: 16 additions & 0 deletions midend/include/Dialect/GPU/Transforms/RemoveReduntantLoops.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#ifndef DIALECT_GPU_TRANSFORMS_REMOVEREDUNTANTLOOPS_H
#define DIALECT_GPU_TRANSFORMS_REMOVEREDUNTANTLOOPS_H

#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Operation.h"
#include "mlir/Pass/Pass.h"
#include "llvm/ADT/StringRef.h"
#include <memory>

namespace mlir {

std::unique_ptr<OperationPass<func::FuncOp>> createRemoveReduntantLoops();

} // namespace mlir

#endif
3 changes: 3 additions & 0 deletions midend/include/Dialect/Linalg/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
set(LLVM_TARGET_DEFINITIONS Passes.td)
mlir_tablegen(Passes.h.inc -gen-pass-decls)
add_public_tablegen_target(BuddyLinalgPassIncGen)
17 changes: 17 additions & 0 deletions midend/include/Dialect/Linalg/Passes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef DIALECT_LINALG_PASSES_H
#define DIALECT_LINALG_PASSES_H

// Include the constructor of passes in Linalg Dialect
#include "Linalg/Transforms/LinalgPromotion.h"

namespace mlir {
// Generate the definition of Linalg Passes
#define GEN_PASS_DECL
#include "Linalg/Passes.h.inc"

#define GEN_PASS_REGISTRATION
#include "Linalg/Passes.h.inc"

} // namespace mlir

#endif
6 changes: 6 additions & 0 deletions midend/include/Dialect/Linalg/Passes.td
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include "mlir/Pass/PassBase.td"

def LinalgPromotion : Pass<"linalg-promotion", "func::FuncOp"> {
let summary = "promote Linalg's MatmulOp operand subview to memref.alloca and linalg.copy";
let constructor = "mlir::createLinalgPromotionPass()";
}
16 changes: 16 additions & 0 deletions midend/include/Dialect/Linalg/Transforms/LinalgPromotion.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#ifndef DIALECT_LINALG_TRANSFORMS_LINALGPROMOTION_H
#define DIALECT_LINALG_TRANSFORMS_LINALGPROMOTION_H

#include "mlir/Pass/Pass.h"
#include "llvm/ADT/StringRef.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include <memory>

namespace mlir {

std::unique_ptr<OperationPass<func::FuncOp>>
createLinalgPromotionPass();

} // namespace mlir

#endif
3 changes: 3 additions & 0 deletions midend/include/Dialect/Transform/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
set(LLVM_TARGET_DEFINITIONS Passes.td)
mlir_tablegen(Passes.h.inc -gen-pass-decls)
add_public_tablegen_target(BuddyTransformPassIncGen)
22 changes: 22 additions & 0 deletions midend/include/Dialect/Transform/Passes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef DIALECT_TRANSFORM_PASSES_H
#define DIALECT_TRANSFORM_PASSES_H

#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassRegistry.h"

// Include the constructor of passes in Transform Dialect
#include "Transform/Transforms/TransformDialectInterpreter.h"
#include "Transform/Transforms/TransformInsertion.h"

namespace mlir {
class ModuleOp;
// Generate the definition of Transform Passes
#define GEN_PASS_DECL
#include "Transform/Passes.h.inc"

#define GEN_PASS_REGISTRATION
#include "Transform/Passes.h.inc"

}

#endif
10 changes: 10 additions & 0 deletions midend/include/Dialect/Transform/Passes.td
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
include "mlir/Pass/PassBase.td"

// TransformDialectInterpreter
def TransformDialectInterpreter : Pass<"transform-dialect-interpreter", "ModuleOp"> {
let summary = "Apply transform dialect operations one by one";
let constructor = "mlir::createTransformDialectInterpreter()";
let options = [
Option<"eraseAfter", "erase-after", "bool", "false", "erase Transform Ops after applied">
];
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#ifndef TRANSFORM_TRANSFORMS_TRANSFORMDIALECTINTERPRETER_H
#define TRANSFORM_TRANSFORMS_TRANSFORMDIALECTINTERPRETER_H

#include "mlir/Pass/Pass.h"
#include <memory>

namespace mlir {

class ModuleOp;

std::unique_ptr<OperationPass<ModuleOp>>
createTransformDialectInterpreter(bool eraseAfter = false);

} // namespace mlir

#endif
25 changes: 25 additions & 0 deletions midend/include/Dialect/Transform/Transforms/TransformInsertion.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#ifndef TRANSFORM_TRANSFORMS_TRANSFORMINSERTION_H
#define TRANSFORM_TRANSFORMS_TRANSFORMINSERTION_H

#include "mlir/Pass/Pass.h"
#include <memory>
#include <string>

namespace mlir {

class ModuleOp;
class ImplicitLocOpBuilder;

struct TransformInsertionConfig {
std::string funcAnchor;
std::string matchPrefix;
std::function<bool(Operation *)> opFilter;
std::function<void(ImplicitLocOpBuilder &, Operation *, Value)> transformBuilder;
};

std::unique_ptr<OperationPass<ModuleOp>>
createGenericTransformInsertionPass(const TransformInsertionConfig &config);

} //namespace mlir

#endif
29 changes: 29 additions & 0 deletions midend/include/Pipelines/BufferizeOpt.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#ifndef PIPELINES_BUFFERIZEOPT_H
#define PIPELINES_BUFFERIZEOPT_H

#include "Pipelines/LinalgTensorOpt.h"
#include "mlir/Pass/PassManager.h"
#include "mlir/Pass/PassOptions.h"
#include "mlir/Pass/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
#include <string>

namespace mlir {
namespace buddy {

struct BuddyBufferizeOptOptions :
public PassPipelineOptions<BuddyBufferizeOptOptions> {
Option<std::string> target {
*this, "target",
llvm::cl::desc("An option to specify target"),
};
};

void createBufferizeOptPipeline(OpPassManager &pm, const BuddyBufferizeOptOptions &options);

void registerBufferizeOptPassPipeline();

} // namespace mlir::buddy
} // namespace mlir

#endif
53 changes: 53 additions & 0 deletions midend/include/Pipelines/GPU/GemmCodegenTransform.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#ifndef PIPELINES_GPU_GEMM_CODEGEN_TRANSOFRM_H
#define PIPELINES_GPU_GEMM_CODEGEN_TRANSOFRM_H

#include "mlir/Pass/PassManager.h"
#include "mlir/Pass/PassOptions.h"
#include "mlir/Pass/PassRegistry.h"

namespace mlir {
namespace buddy {
struct GPUGemmCodegenConfigOptions : public PassPipelineOptions<GPUGemmCodegenConfigOptions> {
Option<std::string> funcAnchor {
*this, "func-anchor",
llvm::cl::desc(
"An optional Unit attribute anchoring on target functions."),
llvm::cl::init("")};
Option<std::string> annotatePrefix {
*this, "annotate-prefix",
llvm::cl::desc("An optional annotate prefix attribute on target ops."),
llvm::cl::init("__buddy_gpu_gemm__")};
ListOption<int64_t> tileConfig {
*this, "tile-config",
llvm::cl::desc("An optional tile config for matmul op")};
ListOption<int64_t> workGroup {
*this, "work-group",
llvm::cl::desc("An optional workgroup size config for matmul op")};
Option<int64_t> stages {
*this, "stages",
llvm::cl::desc("An optional stages config for matmul op")};
};

struct GPUGemmGeneralOptions
: public PassPipelineOptions<GPUGemmGeneralOptions> {
Option<std::string> funcAnchor{
*this, "func-anchor",
llvm::cl::desc(
"An optional Unit attribute anchoring on target functions."),
llvm::cl::init("")};
Option<std::string> annotatePrefix {
*this, "annotate-prefix",
llvm::cl::desc("An optional annotate prefix attribute on target ops."),
llvm::cl::init("__buddy_gpu_gemm__")};
};


void createGemmTileConfigInsertTransform(OpPassManager &pm, const GPUGemmCodegenConfigOptions &options);

void createGemmTileTransform(OpPassManager &pm,
const GPUGemmGeneralOptions &options);

} // namespace mlir::buddy
} // namespace mlir

#endif
29 changes: 29 additions & 0 deletions midend/include/Pipelines/LinalgMemrefOpt.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#ifndef PIPELINES_MEMREFOPT_H
#define PIPELINES_MEMREFOPT_H

#include "mlir/Pass/PassManager.h"
#include "mlir/Pass/PassOptions.h"
#include "mlir/Pass/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
#include <string>

namespace mlir {
namespace buddy {

struct LinalgMemrefOptPipelineOptions :
public PassPipelineOptions<LinalgMemrefOptPipelineOptions> {
Option<std::string> target {
*this, "target",
llvm::cl::desc("An optional attribute to speicify target."),
};
};

void createLinalgMemrefOptPipeline(OpPassManager &pm,
const LinalgMemrefOptPipelineOptions &options);

void registerLinalgMemrefOptPipeline();

} // mlir::buddy
} // mlir

#endif
31 changes: 31 additions & 0 deletions midend/include/Pipelines/LinalgTensorOpt.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#ifndef PIPELINES_GPU_LINALGTENSOROPT_H
#define PIPELINES_GPU_LINALGTENSOROPT_H

#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassManager.h"
#include <string>
#include <memory>

namespace mlir {
namespace buddy {

struct LinalgTensorOptPipelineOptions
: public PassPipelineOptions<LinalgTensorOptPipelineOptions> {
Option<std::string> target{
*this, "target",
llvm::cl::desc("An optional attribute to speicify target."),
llvm::cl::init("gpu")};
Option<std::string> arch{
*this, "arch", llvm::cl::desc("An optional attribute to speicify arch."),
llvm::cl::init("nv_sm_80")};
};

void createLinalgTensorOptPassPipeline(OpPassManager &pm, const LinalgTensorOptPipelineOptions &options);

void registerLinalgTensorOptPassPipeline();


} // namespace mlir::buddy
} // namespace mlir

#endif
Loading