buddy-compiler · CJ77Qi · Aug 19, 2024 · Aug 19, 2024 · Aug 21, 2024 · Aug 21, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -22,6 +22,7 @@ project(buddy-mlir LANGUAGES CXX C)
 
 set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED YES)
+set(CMAKE_BUILD_TYPE Debug)
 include(ExternalProject)
 
 #-------------------------------------------------------------------------------

diff --git a/examples/BuddyMatmul/linalg-matmul.mlir b/examples/BuddyMatmul/linalg-matmul.mlir
@@ -0,0 +1,21 @@
+#map = affine_map<(d0, d1, d2) -> (d0, d2)>
+#map1 = affine_map<(d0, d1, d2) -> (d2, d1)>
+#map2 = affine_map<(d0, d1, d2) -> (d0, d1)>
+module attributes {} {
+  func.func private @Matmul(%arg0: tensor<1024x512xf16>, %arg1: tensor<512x1024xf16>) -> tensor<1024x1024xf16> attributes {} {
+    %cst = arith.constant 0.000000e+00 : f16
+    %0 = tensor.empty() : tensor<1024x1024xf16>
+    %1 = linalg.fill ins(%cst : f16) outs(%0 : tensor<1024x1024xf16>) -> tensor<1024x1024xf16>
+    %2 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0, %arg1 : tensor<1024x512xf16>, tensor<512x1024xf16>) outs(%1 : tensor<1024x1024xf16>) {
+    ^bb0(%in: f16, %in_0: f16, %out: f16):
+      %3 = arith.mulf %in, %in_0 : f16
+      %4 = arith.addf %out, %3 : f16
+      linalg.yield %4 : f16
+    } -> tensor<1024x1024xf16>
+    return %2 : tensor<1024x1024xf16>
+  }
+  func.func @forward(%arg0: tensor<1024x512xf16>, %arg1: tensor<512x1024xf16>) -> tensor<1024x1024xf16> {
+    %0 = call @Matmul(%arg0, %arg1) : (tensor<1024x512xf16>, tensor<512x1024xf16>) -> tensor<1024x1024xf16>
+    return %0 : tensor<1024x1024xf16>
+  }
+}
diff --git a/midend/include/CMakeLists.txt b/midend/include/CMakeLists.txt
@@ -1 +1 @@
-add_subdirectory(Dialect)
+add_subdirectory(Dialect)
diff --git a/midend/include/Dialect/CMakeLists.txt b/midend/include/Dialect/CMakeLists.txt
@@ -5,3 +5,6 @@ add_subdirectory(RVV)
 add_subdirectory(VectorExp)
 add_subdirectory(Gemmini)
 add_subdirectory(Sche)
+add_subdirectory(Transform)
+add_subdirectory(Linalg)
+add_subdirectory(GPU)
diff --git a/midend/include/Dialect/GPU/CMakeLists.txt b/midend/include/Dialect/GPU/CMakeLists.txt
@@ -0,0 +1,3 @@
+set(LLVM_TARGET_DEFINITIONS Passes.td)
+mlir_tablegen(Passes.h.inc -gen-pass-decls)
+add_public_tablegen_target(BuddyGPUPassIncGen)
diff --git a/midend/include/Dialect/GPU/Passes.h b/midend/include/Dialect/GPU/Passes.h
@@ -0,0 +1,18 @@
+#ifndef DIALECT_GPU_PASSES_H
+#define DIALECT_GPU_PASSES_H
+
+// Include the constructor of passes in GPU Dialect
+#include "GPU/Transforms/GPUDistributeToWarp.h" 
+#include "GPU/Transforms/RemoveReduntantLoops.h"
+
+namespace mlir {
+// Generate the definition of GPU Passes
+#define GEN_PASS_DECL
+#include "GPU/Passes.h.inc"
+
+#define GEN_PASS_REGISTRATION
+#include "GPU/Passes.h.inc"
+
+} // namespace mlir
+
+#endif
diff --git a/midend/include/Dialect/GPU/Passes.td b/midend/include/Dialect/GPU/Passes.td
@@ -0,0 +1,11 @@
+include "mlir/Pass/PassBase.td"
+
+def GPUDistributeToWarp : Pass<"gpu-distribute-to-warp", "func::FuncOp"> {
+    let summary = "GPU distribute from Block level to Warp";
+    let constructor = "mlir::createGPUDistributeToWarpPass()";
+}
+
+def RemoveReduntantLoops : Pass<"remove-reduntant-loops", "func::FuncOp"> {
+    let summary = "Remove the loops that only run once in gpu kernel";
+    let constructor = "mlir::createRemoveReduntantLoops()";
+}
diff --git a/midend/include/Dialect/GPU/Transforms/GPUDistributeToWarp.h b/midend/include/Dialect/GPU/Transforms/GPUDistributeToWarp.h
@@ -0,0 +1,15 @@
+#ifndef DIALECT_GPU_TRANSFORMS_GPUDistributeToWarp_H
+#define DIALECT_GPU_TRANSFORMS_GPUDistributeToWarp_H
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/ADT/StringRef.h"
+#include <memory>
+
+namespace mlir {
+
+std::unique_ptr<OperationPass<func::FuncOp>> createGPUDistributeToWarpPass();
+
+} // namespace mlir
+
+#endif
diff --git a/midend/include/Dialect/GPU/Transforms/RemoveReduntantLoops.h b/midend/include/Dialect/GPU/Transforms/RemoveReduntantLoops.h
@@ -0,0 +1,16 @@
+#ifndef DIALECT_GPU_TRANSFORMS_REMOVEREDUNTANTLOOPS_H
+#define DIALECT_GPU_TRANSFORMS_REMOVEREDUNTANTLOOPS_H
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/ADT/StringRef.h"
+#include <memory>
+
+namespace mlir {
+
+std::unique_ptr<OperationPass<func::FuncOp>> createRemoveReduntantLoops();
+
+} // namespace mlir
+
+#endif
diff --git a/midend/include/Dialect/Linalg/CMakeLists.txt b/midend/include/Dialect/Linalg/CMakeLists.txt
@@ -0,0 +1,3 @@
+set(LLVM_TARGET_DEFINITIONS Passes.td)
+mlir_tablegen(Passes.h.inc -gen-pass-decls)
+add_public_tablegen_target(BuddyLinalgPassIncGen)
diff --git a/midend/include/Dialect/Linalg/Passes.h b/midend/include/Dialect/Linalg/Passes.h
@@ -0,0 +1,17 @@
+#ifndef DIALECT_LINALG_PASSES_H
+#define DIALECT_LINALG_PASSES_H
+
+// Include the constructor of passes in Linalg Dialect
+#include "Linalg/Transforms/LinalgPromotion.h"
+
+namespace mlir {
+// Generate the definition of Linalg Passes
+#define GEN_PASS_DECL
+#include "Linalg/Passes.h.inc"
+
+#define GEN_PASS_REGISTRATION
+#include "Linalg/Passes.h.inc"
+
+} // namespace mlir
+
+#endif
diff --git a/midend/include/Dialect/Linalg/Passes.td b/midend/include/Dialect/Linalg/Passes.td
@@ -0,0 +1,6 @@
+include "mlir/Pass/PassBase.td"
+
+def LinalgPromotion : Pass<"linalg-promotion", "func::FuncOp"> {
+    let summary = "promote Linalg's MatmulOp operand subview to memref.alloca and linalg.copy";
+    let constructor = "mlir::createLinalgPromotionPass()";
+}
diff --git a/midend/include/Dialect/Linalg/Transforms/LinalgPromotion.h b/midend/include/Dialect/Linalg/Transforms/LinalgPromotion.h
@@ -0,0 +1,16 @@
+#ifndef DIALECT_LINALG_TRANSFORMS_LINALGPROMOTION_H
+#define DIALECT_LINALG_TRANSFORMS_LINALGPROMOTION_H
+
+#include "mlir/Pass/Pass.h"
+#include "llvm/ADT/StringRef.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include <memory>
+
+namespace mlir {
+
+std::unique_ptr<OperationPass<func::FuncOp>>
+createLinalgPromotionPass();
+
+} // namespace mlir
+
+#endif
diff --git a/midend/include/Dialect/Transform/CMakeLists.txt b/midend/include/Dialect/Transform/CMakeLists.txt
@@ -0,0 +1,3 @@
+set(LLVM_TARGET_DEFINITIONS Passes.td)
+mlir_tablegen(Passes.h.inc -gen-pass-decls)
+add_public_tablegen_target(BuddyTransformPassIncGen)
diff --git a/midend/include/Dialect/Transform/Passes.h b/midend/include/Dialect/Transform/Passes.h
@@ -0,0 +1,22 @@
+#ifndef DIALECT_TRANSFORM_PASSES_H
+#define DIALECT_TRANSFORM_PASSES_H
+
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassRegistry.h"
+
+// Include the constructor of passes in Transform Dialect
+#include "Transform/Transforms/TransformDialectInterpreter.h"
+#include "Transform/Transforms/TransformInsertion.h"
+
+namespace mlir {
+class ModuleOp;
+// Generate the definition of Transform Passes
+#define GEN_PASS_DECL
+#include "Transform/Passes.h.inc"
+
+#define GEN_PASS_REGISTRATION
+#include "Transform/Passes.h.inc"
+
+}
+
+#endif
diff --git a/midend/include/Dialect/Transform/Passes.td b/midend/include/Dialect/Transform/Passes.td
@@ -0,0 +1,10 @@
+include "mlir/Pass/PassBase.td"
+
+// TransformDialectInterpreter
+def TransformDialectInterpreter : Pass<"transform-dialect-interpreter", "ModuleOp"> {
+    let summary = "Apply transform dialect operations one by one";
+    let constructor = "mlir::createTransformDialectInterpreter()";
+    let options = [
+        Option<"eraseAfter", "erase-after", "bool", "false", "erase Transform Ops after applied">
+    ];
+}
diff --git a/midend/include/Dialect/Transform/Transforms/TransformDialectInterpreter.h b/midend/include/Dialect/Transform/Transforms/TransformDialectInterpreter.h
@@ -0,0 +1,16 @@
+#ifndef TRANSFORM_TRANSFORMS_TRANSFORMDIALECTINTERPRETER_H
+#define TRANSFORM_TRANSFORMS_TRANSFORMDIALECTINTERPRETER_H
+
+#include "mlir/Pass/Pass.h"
+#include <memory>
+
+namespace mlir {
+
+class ModuleOp;
+
+std::unique_ptr<OperationPass<ModuleOp>> 
+createTransformDialectInterpreter(bool eraseAfter = false);
+
+} // namespace mlir
+
+#endif
diff --git a/midend/include/Dialect/Transform/Transforms/TransformInsertion.h b/midend/include/Dialect/Transform/Transforms/TransformInsertion.h
@@ -0,0 +1,25 @@
+#ifndef TRANSFORM_TRANSFORMS_TRANSFORMINSERTION_H
+#define TRANSFORM_TRANSFORMS_TRANSFORMINSERTION_H
+
+#include "mlir/Pass/Pass.h"
+#include <memory>
+#include <string>
+
+namespace mlir {
+
+class ModuleOp;
+class ImplicitLocOpBuilder;
+
+struct TransformInsertionConfig {
+    std::string funcAnchor;
+    std::string matchPrefix;
+    std::function<bool(Operation *)> opFilter;
+    std::function<void(ImplicitLocOpBuilder &, Operation *, Value)> transformBuilder;
+};
+
+std::unique_ptr<OperationPass<ModuleOp>>
+createGenericTransformInsertionPass(const TransformInsertionConfig &config);
+
+} //namespace mlir
+
+#endif
diff --git a/midend/include/Pipelines/BufferizeOpt.h b/midend/include/Pipelines/BufferizeOpt.h
@@ -0,0 +1,29 @@
+#ifndef PIPELINES_BUFFERIZEOPT_H
+#define PIPELINES_BUFFERIZEOPT_H
+
+#include "Pipelines/LinalgTensorOpt.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Pass/PassOptions.h"
+#include "mlir/Pass/PassRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include <string>
+
+namespace mlir {
+namespace buddy {
+
+struct BuddyBufferizeOptOptions :
+    public PassPipelineOptions<BuddyBufferizeOptOptions> {
+    Option<std::string> target {
+        *this, "target",
+        llvm::cl::desc("An option to specify target"),
+    };
+};
+
+void createBufferizeOptPipeline(OpPassManager &pm, const BuddyBufferizeOptOptions &options);
+
+void registerBufferizeOptPassPipeline();
+
+} // namespace mlir::buddy
+} // namespace mlir
+
+#endif
diff --git a/midend/include/Pipelines/GPU/GemmCodegenTransform.h b/midend/include/Pipelines/GPU/GemmCodegenTransform.h
@@ -0,0 +1,53 @@
+#ifndef PIPELINES_GPU_GEMM_CODEGEN_TRANSOFRM_H
+#define PIPELINES_GPU_GEMM_CODEGEN_TRANSOFRM_H
+
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Pass/PassOptions.h"
+#include "mlir/Pass/PassRegistry.h"
+
+namespace mlir {
+namespace buddy {
+struct GPUGemmCodegenConfigOptions : public PassPipelineOptions<GPUGemmCodegenConfigOptions> {
+    Option<std::string> funcAnchor {
+        *this, "func-anchor", 
+        llvm::cl::desc(
+          "An optional Unit attribute anchoring on target functions."),
+      llvm::cl::init("")};
+    Option<std::string> annotatePrefix {
+        *this, "annotate-prefix",
+        llvm::cl::desc("An optional annotate prefix attribute on target ops."),
+        llvm::cl::init("__buddy_gpu_gemm__")};
+    ListOption<int64_t> tileConfig {
+        *this, "tile-config",
+        llvm::cl::desc("An optional tile config for matmul op")};
+    ListOption<int64_t> workGroup {
+        *this, "work-group",
+        llvm::cl::desc("An optional workgroup size config for matmul op")};
+    Option<int64_t> stages {
+        *this, "stages",
+        llvm::cl::desc("An optional stages config for matmul op")};
+};
+
+struct GPUGemmGeneralOptions
+    : public PassPipelineOptions<GPUGemmGeneralOptions> {
+  Option<std::string> funcAnchor{
+      *this, "func-anchor",
+      llvm::cl::desc(
+          "An optional Unit attribute anchoring on target functions."),
+      llvm::cl::init("")};
+  Option<std::string> annotatePrefix {
+        *this, "annotate-prefix",
+        llvm::cl::desc("An optional annotate prefix attribute on target ops."),
+        llvm::cl::init("__buddy_gpu_gemm__")};
+};
+
+
+void createGemmTileConfigInsertTransform(OpPassManager &pm, const GPUGemmCodegenConfigOptions &options);
+
+void createGemmTileTransform(OpPassManager &pm,
+                             const GPUGemmGeneralOptions &options);
+
+} // namespace mlir::buddy
+} // namespace mlir
+
+#endif
diff --git a/midend/include/Pipelines/LinalgMemrefOpt.h b/midend/include/Pipelines/LinalgMemrefOpt.h
@@ -0,0 +1,29 @@
+#ifndef PIPELINES_MEMREFOPT_H
+#define PIPELINES_MEMREFOPT_H
+
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Pass/PassOptions.h"
+#include "mlir/Pass/PassRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include <string>
+
+namespace mlir {
+namespace buddy {
+
+struct LinalgMemrefOptPipelineOptions :
+    public PassPipelineOptions<LinalgMemrefOptPipelineOptions> {
+    Option<std::string> target {
+        *this, "target",
+        llvm::cl::desc("An optional attribute to speicify target."),
+    };
+};
+
+void createLinalgMemrefOptPipeline(OpPassManager &pm, 
+                                   const LinalgMemrefOptPipelineOptions &options);
+
+void registerLinalgMemrefOptPipeline();
+
+} // mlir::buddy
+} // mlir
+
+#endif
diff --git a/midend/include/Pipelines/LinalgTensorOpt.h b/midend/include/Pipelines/LinalgTensorOpt.h
@@ -0,0 +1,31 @@
+#ifndef PIPELINES_GPU_LINALGTENSOROPT_H
+#define PIPELINES_GPU_LINALGTENSOROPT_H
+
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassManager.h"
+#include <string>
+#include <memory>
+
+namespace mlir {
+namespace buddy {
+
+struct LinalgTensorOptPipelineOptions
+    : public PassPipelineOptions<LinalgTensorOptPipelineOptions> {
+  Option<std::string> target{
+      *this, "target",
+      llvm::cl::desc("An optional attribute to speicify target."),
+      llvm::cl::init("gpu")};
+  Option<std::string> arch{
+      *this, "arch", llvm::cl::desc("An optional attribute to speicify arch."),
+      llvm::cl::init("nv_sm_80")};
+};
+
+void createLinalgTensorOptPassPipeline(OpPassManager &pm, const LinalgTensorOptPipelineOptions &options);
+
+void registerLinalgTensorOptPassPipeline();
+
+
+} // namespace mlir::buddy
+} // namespace mlir
+
+#endif