Skip to content

Commit

Permalink
Reland '[flang][cuda] Add cuf.register_kernel operation' (llvm#112389)
Browse files Browse the repository at this point in the history
The operation will be used in the CUF constructor to register the kernel
functions. This allow to delay this until codegen when the gpu.binary
will be available.

Reland of llvm#112268 with correct shared library build support.
  • Loading branch information
clementval authored Oct 15, 2024
1 parent 583fa4f commit 7e72e5b
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 0 deletions.
19 changes: 19 additions & 0 deletions flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -288,4 +288,23 @@ def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments,
let hasVerifier = 1;
}

def cuf_RegisterKernelOp : cuf_Op<"register_kernel", []> {
let summary = "Register a CUDA kernel";

let arguments = (ins
SymbolRefAttr:$name
);

let assemblyFormat = [{
$name attr-dict
}];

let hasVerifier = 1;

let extraClassDeclaration = [{
mlir::StringAttr getKernelName();
mlir::StringAttr getKernelModuleName();
}];
}

#endif // FORTRAN_DIALECT_CUF_CUF_OPS
1 change: 1 addition & 0 deletions flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ add_flang_library(CUFDialect
FIRDialect
FIRDialectSupport
MLIRIR
MLIRGPUDialect
MLIRTargetLLVMIRExport

LINK_COMPONENTS
Expand Down
37 changes: 37 additions & 0 deletions flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinOps.h"
Expand Down Expand Up @@ -253,6 +254,42 @@ llvm::LogicalResult cuf::KernelOp::verify() {
return mlir::success();
}

//===----------------------------------------------------------------------===//
// RegisterKernelOp
//===----------------------------------------------------------------------===//

mlir::StringAttr cuf::RegisterKernelOp::getKernelModuleName() {
return getName().getRootReference();
}

mlir::StringAttr cuf::RegisterKernelOp::getKernelName() {
return getName().getLeafReference();
}

mlir::LogicalResult cuf::RegisterKernelOp::verify() {
if (getKernelName() == getKernelModuleName())
return emitOpError("expect a module and a kernel name");

auto mod = getOperation()->getParentOfType<mlir::ModuleOp>();
if (!mod)
return emitOpError("expect to be in a module");

mlir::SymbolTable symTab(mod);
auto gpuMod = symTab.lookup<mlir::gpu::GPUModuleOp>(getKernelModuleName());
if (!gpuMod)
return emitOpError("gpu module not found");

mlir::SymbolTable gpuSymTab(gpuMod);
auto func = gpuSymTab.lookup<mlir::gpu::GPUFuncOp>(getKernelName());
if (!func)
return emitOpError("device function not found");

if (!func.isKernel())
return emitOpError("only kernel gpu.func can be registered");

return mlir::success();
}

// Tablegen operators

#define GET_OP_CLASSES
Expand Down
20 changes: 20 additions & 0 deletions flang/test/Fir/CUDA/cuda-register-func.fir
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// RUN: fir-opt %s | FileCheck %s

module attributes {gpu.container_module} {
gpu.module @cuda_device_mod {
gpu.func @_QPsub_device1() kernel {
gpu.return
}
gpu.func @_QPsub_device2(%arg0: !fir.ref<f32>) kernel {
gpu.return
}
}
llvm.func internal @__cudaFortranConstructor() {
cuf.register_kernel @cuda_device_mod::@_QPsub_device1
cuf.register_kernel @cuda_device_mod::@_QPsub_device2
llvm.return
}
}

// CHECK: cuf.register_kernel @cuda_device_mod::@_QPsub_device1
// CHECK: cuf.register_kernel @cuda_device_mod::@_QPsub_device2
50 changes: 50 additions & 0 deletions flang/test/Fir/cuf-invalid.fir
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,53 @@ func.func @_QPsub1(%arg0: !fir.ref<!fir.array<?xf32>> {cuf.data_attr = #cuf.cuda
cuf.data_transfer %20#0 to %11#0, %19 : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>
return
}

// -----

module attributes {gpu.container_module} {
gpu.module @cuda_device_mod {
gpu.func @_QPsub_device1() {
gpu.return
}
}
llvm.func internal @__cudaFortranConstructor() {
// expected-error@+1{{'cuf.register_kernel' op only kernel gpu.func can be registered}}
cuf.register_kernel @cuda_device_mod::@_QPsub_device1
llvm.return
}
}

// -----

module attributes {gpu.container_module} {
gpu.module @cuda_device_mod {
gpu.func @_QPsub_device1() {
gpu.return
}
}
llvm.func internal @__cudaFortranConstructor() {
// expected-error@+1{{'cuf.register_kernel' op device function not found}}
cuf.register_kernel @cuda_device_mod::@_QPsub_device2
llvm.return
}
}

// -----

module attributes {gpu.container_module} {
llvm.func internal @__cudaFortranConstructor() {
// expected-error@+1{{'cuf.register_kernel' op gpu module not found}}
cuf.register_kernel @cuda_device_mod::@_QPsub_device1
llvm.return
}
}

// -----

module attributes {gpu.container_module} {
llvm.func internal @__cudaFortranConstructor() {
// expected-error@+1{{'cuf.register_kernel' op expect a module and a kernel name}}
cuf.register_kernel @_QPsub_device1
llvm.return
}
}
1 change: 1 addition & 0 deletions flang/tools/fir-opt/fir-opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ int main(int argc, char **argv) {
#endif
DialectRegistry registry;
fir::support::registerDialects(registry);
registry.insert<mlir::gpu::GPUDialect>();
fir::support::addFIRExtensions(registry);
return failed(MlirOptMain(argc, argv, "FIR modular optimizer driver\n",
registry));
Expand Down

0 comments on commit 7e72e5b

Please sign in to comment.