diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h index 4132db672e394..1edded090f8ce 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.h @@ -12,6 +12,7 @@ #include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h" #include "flang/Optimizer/Dialect/CUF/CUFDialect.h" #include "flang/Optimizer/Dialect/FIRType.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/OpDefinition.h" #define GET_OP_CLASSES diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td index 98d1ef529738c..d34a8af0394a4 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td @@ -18,6 +18,7 @@ include "flang/Optimizer/Dialect/CUF/CUFDialect.td" include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td" include "flang/Optimizer/Dialect/FIRTypes.td" include "flang/Optimizer/Dialect/FIRAttr.td" +include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/Interfaces/LoopLikeInterface.td" include "mlir/IR/BuiltinAttributes.td" @@ -288,15 +289,30 @@ def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments, let hasVerifier = 1; } +def cuf_RegisterModuleOp : cuf_Op<"register_module", []> { + let summary = "Register a CUDA module"; + + let arguments = (ins + SymbolRefAttr:$name + ); + + let assemblyFormat = [{ + $name attr-dict `->` type($modulePtr) + }]; + + let results = (outs LLVM_AnyPointer:$modulePtr); +} + def cuf_RegisterKernelOp : cuf_Op<"register_kernel", []> { let summary = "Register a CUDA kernel"; let arguments = (ins - SymbolRefAttr:$name + SymbolRefAttr:$name, + LLVM_AnyPointer:$modulePtr ); let assemblyFormat = [{ - $name attr-dict + $name `(` $modulePtr `:` type($modulePtr) `)`attr-dict }]; let hasVerifier = 1; diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFToLLVMIRTranslation.h b/flang/include/flang/Optimizer/Dialect/CUF/CUFToLLVMIRTranslation.h new file mode 100644 index 0000000000000..f3edb7fca649d --- /dev/null +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFToLLVMIRTranslation.h @@ -0,0 +1,29 @@ +//===- CUFToLLVMIRTranslation.h - CUF Dialect to LLVM IR --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This provides registration calls for GPU dialect to LLVM IR translation. +// +//===----------------------------------------------------------------------===// + +#ifndef FLANG_OPTIMIZER_DIALECT_CUF_GPUTOLLVMIRTRANSLATION_H_ +#define FLANG_OPTIMIZER_DIALECT_CUF_GPUTOLLVMIRTRANSLATION_H_ + +namespace mlir { +class DialectRegistry; +class MLIRContext; +} // namespace mlir + +namespace cuf { + +/// Register the CUF dialect and the translation from it to the LLVM IR in +/// the given registry. +void registerCUFDialectTranslation(mlir::DialectRegistry ®istry); + +} // namespace cuf + +#endif // FLANG_OPTIMIZER_DIALECT_CUF_GPUTOLLVMIRTRANSLATION_H_ diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h index 04a5dd323e550..1c61c36719992 100644 --- a/flang/include/flang/Optimizer/Support/InitFIR.h +++ b/flang/include/flang/Optimizer/Support/InitFIR.h @@ -14,6 +14,7 @@ #define FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H #include "flang/Optimizer/Dialect/CUF/CUFDialect.h" +#include "flang/Optimizer/Dialect/CUF/CUFToLLVMIRTranslation.h" #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/HLFIR/HLFIRDialect.h" #include "mlir/Conversion/Passes.h" @@ -61,6 +62,7 @@ inline void addFIRExtensions(mlir::DialectRegistry ®istry, if (addFIRInlinerInterface) addFIRInlinerExtension(registry); addFIRToLLVMIRExtension(registry); + cuf::registerCUFDialectTranslation(registry); } inline void loadNonCodegenDialects(mlir::MLIRContext &context) { diff --git a/flang/include/flang/Runtime/CUDA/registration.h b/flang/include/flang/Runtime/CUDA/registration.h new file mode 100644 index 0000000000000..cbe202c4d23e0 --- /dev/null +++ b/flang/include/flang/Runtime/CUDA/registration.h @@ -0,0 +1,28 @@ +//===-- include/flang/Runtime/CUDA/registration.h ---------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_RUNTIME_CUDA_REGISTRATION_H_ +#define FORTRAN_RUNTIME_CUDA_REGISTRATION_H_ + +#include "flang/Runtime/entry-names.h" +#include + +namespace Fortran::runtime::cuda { + +extern "C" { + +/// Register a CUDA module. +void *RTDECL(CUFRegisterModule)(void *data); + +/// Register a device function. +void RTDECL(CUFRegisterFunction)(void **module, const char *fct); + +} // extern "C" + +} // namespace Fortran::runtime::cuda +#endif // FORTRAN_RUNTIME_CUDA_REGISTRATION_H_ diff --git a/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt b/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt index b2221199995d5..5d4bd0785971f 100644 --- a/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt +++ b/flang/lib/Optimizer/Dialect/CUF/CMakeLists.txt @@ -3,6 +3,7 @@ add_subdirectory(Attributes) add_flang_library(CUFDialect CUFDialect.cpp CUFOps.cpp + CUFToLLVMIRTranslation.cpp DEPENDS MLIRIR diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFToLLVMIRTranslation.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFToLLVMIRTranslation.cpp new file mode 100644 index 0000000000000..c6c9f96b81135 --- /dev/null +++ b/flang/lib/Optimizer/Dialect/CUF/CUFToLLVMIRTranslation.cpp @@ -0,0 +1,104 @@ +//===- CUFToLLVMIRTranslation.cpp - Translate CUF dialect to LLVM IR ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a translation between the MLIR CUF dialect and LLVM IR. +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Dialect/CUF/CUFToLLVMIRTranslation.h" +#include "flang/Optimizer/Dialect/CUF/CUFOps.h" +#include "flang/Runtime/entry-names.h" +#include "mlir/Target/LLVMIR/LLVMTranslationInterface.h" +#include "mlir/Target/LLVMIR/ModuleTranslation.h" +#include "llvm/ADT/TypeSwitch.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace mlir; + +namespace { + +LogicalResult registerModule(cuf::RegisterModuleOp op, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + std::string binaryIdentifier = + op.getName().getLeafReference().str() + "_bin_cst"; + llvm::Module *module = moduleTranslation.getLLVMModule(); + llvm::Value *binary = module->getGlobalVariable(binaryIdentifier, true); + if (!binary) + return op.emitError() << "Couldn't find the binary: " << binaryIdentifier; + + llvm::Type *ptrTy = builder.getPtrTy(0); + llvm::FunctionCallee fct = module->getOrInsertFunction( + RTNAME_STRING(CUFRegisterModule), + llvm::FunctionType::get(ptrTy, ArrayRef({ptrTy}), false)); + auto *handle = builder.CreateCall(fct, {binary}); + moduleTranslation.mapValue(op->getResults().front()) = handle; + return mlir::success(); +} + +llvm::Value *getOrCreateFunctionName(llvm::Module *module, + llvm::IRBuilderBase &builder, + llvm::StringRef moduleName, + llvm::StringRef kernelName) { + std::string globalName = + std::string(llvm::formatv("{0}_{1}_kernel_name", moduleName, kernelName)); + + if (llvm::GlobalVariable *gv = module->getGlobalVariable(globalName)) + return gv; + + return builder.CreateGlobalString(kernelName, globalName); +} + +LogicalResult registerKernel(cuf::RegisterKernelOp op, + llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::Module *module = moduleTranslation.getLLVMModule(); + llvm::Type *ptrTy = builder.getPtrTy(0); + llvm::FunctionCallee fct = module->getOrInsertFunction( + RTNAME_STRING(CUFRegisterFunction), + llvm::FunctionType::get(ptrTy, ArrayRef({ptrTy, ptrTy}), + false)); + llvm::Value *modulePtr = moduleTranslation.lookupValue(op.getModulePtr()); + builder.CreateCall( + fct, {modulePtr, getOrCreateFunctionName(module, builder, + op.getKernelModuleName().str(), + op.getKernelName().str())}); + return mlir::success(); +} + +class CUFDialectLLVMIRTranslationInterface + : public LLVMTranslationDialectInterface { +public: + using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; + + LogicalResult + convertOperation(Operation *operation, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) const override { + return llvm::TypeSwitch(operation) + .Case([&](cuf::RegisterModuleOp op) { + return registerModule(op, builder, moduleTranslation); + }) + .Case([&](cuf::RegisterKernelOp op) { + return registerKernel(op, builder, moduleTranslation); + }) + .Default([&](Operation *op) { + return op->emitError("unsupported GPU operation: ") << op->getName(); + }); + } +}; + +} // namespace + +void cuf::registerCUFDialectTranslation(DialectRegistry ®istry) { + registry.insert(); + registry.addExtension(+[](MLIRContext *ctx, cuf::CUFDialect *dialect) { + dialect->addInterfaces(); + }); +} diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp index 3db24226e7504..f260437e71041 100644 --- a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp +++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp @@ -62,12 +62,15 @@ struct CUFAddConstructor // Register kernels auto gpuMod = symTab.lookup(cudaModName); if (gpuMod) { + auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(ctx); + auto registeredMod = builder.create( + loc, llvmPtrTy, mlir::SymbolRefAttr::get(ctx, gpuMod.getName())); for (auto func : gpuMod.getOps()) { if (func.isKernel()) { auto kernelName = mlir::SymbolRefAttr::get( builder.getStringAttr(cudaModName), {mlir::SymbolRefAttr::get(builder.getContext(), func.getName())}); - builder.create(loc, kernelName); + builder.create(loc, kernelName, registeredMod); } } } diff --git a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp index 91ef1259332de..e81fafb529a27 100644 --- a/flang/lib/Optimizer/Transforms/CufOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CufOpConversion.cpp @@ -20,6 +20,7 @@ #include "flang/Runtime/CUDA/descriptor.h" #include "flang/Runtime/CUDA/memory.h" #include "flang/Runtime/allocatable.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt index 193dd77e93455..86523b419f871 100644 --- a/flang/runtime/CUDA/CMakeLists.txt +++ b/flang/runtime/CUDA/CMakeLists.txt @@ -18,6 +18,7 @@ add_flang_library(${CUFRT_LIBNAME} allocatable.cpp descriptor.cpp memory.cpp + registration.cpp ) if (BUILD_SHARED_LIBS) diff --git a/flang/runtime/CUDA/registration.cpp b/flang/runtime/CUDA/registration.cpp new file mode 100644 index 0000000000000..aed275e964680 --- /dev/null +++ b/flang/runtime/CUDA/registration.cpp @@ -0,0 +1,31 @@ +//===-- runtime/CUDA/registration.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Runtime/CUDA/registration.h" + +#include "cuda_runtime.h" + +namespace Fortran::runtime::cuda { + +extern "C" { + +extern void **__cudaRegisterFatBinary(void *data); +extern void __cudaRegisterFunction(void **fatCubinHandle, const char *hostFun, + char *deviceFun, const char *deviceName, int thread_limit, uint3 *tid, + uint3 *bid, dim3 *bDim, dim3 *gDim, int *wSize); + +void *RTDECL(CUFRegisterModule)(void *data) { + return __cudaRegisterFatBinary(data); +} + +void RTDEF(CUFRegisterFunction)(void **module, const char *fct) { + __cudaRegisterFunction(module, fct, const_cast(fct), fct, -1, + (uint3 *)0, (uint3 *)0, (dim3 *)0, (dim3 *)0, (int *)0); +} +} +} // namespace Fortran::runtime::cuda diff --git a/flang/test/Fir/CUDA/cuda-register-func.fir b/flang/test/Fir/CUDA/cuda-register-func.fir index 277475f0883dc..6b0cbfd3aca63 100644 --- a/flang/test/Fir/CUDA/cuda-register-func.fir +++ b/flang/test/Fir/CUDA/cuda-register-func.fir @@ -12,5 +12,6 @@ module attributes {gpu.container_module} { } // CHECK-LABEL: llvm.func internal @__cudaFortranConstructor() -// CHECK: cuf.register_kernel @cuda_device_mod::@_QPsub_device1 -// CHECK: cuf.register_kernel @cuda_device_mod::@_QPsub_device2 +// CHECK: %[[MOD_HANDLE:.*]] = cuf.register_module @cuda_device_mod -> !llvm.ptr +// CHECK: cuf.register_kernel @cuda_device_mod::@_QPsub_device1(%[[MOD_HANDLE]] : !llvm.ptr) +// CHECK: cuf.register_kernel @cuda_device_mod::@_QPsub_device2(%[[MOD_HANDLE]] : !llvm.ptr) diff --git a/flang/test/Fir/cuf-invalid.fir b/flang/test/Fir/cuf-invalid.fir index 8a1eb48576832..a3b9be3ee8223 100644 --- a/flang/test/Fir/cuf-invalid.fir +++ b/flang/test/Fir/cuf-invalid.fir @@ -135,8 +135,9 @@ module attributes {gpu.container_module} { } } llvm.func internal @__cudaFortranConstructor() { + %0 = cuf.register_module @cuda_device_mod -> !llvm.ptr // expected-error@+1{{'cuf.register_kernel' op only kernel gpu.func can be registered}} - cuf.register_kernel @cuda_device_mod::@_QPsub_device1 + cuf.register_kernel @cuda_device_mod::@_QPsub_device1(%0 : !llvm.ptr) llvm.return } } @@ -150,8 +151,9 @@ module attributes {gpu.container_module} { } } llvm.func internal @__cudaFortranConstructor() { + %0 = cuf.register_module @cuda_device_mod -> !llvm.ptr // expected-error@+1{{'cuf.register_kernel' op device function not found}} - cuf.register_kernel @cuda_device_mod::@_QPsub_device2 + cuf.register_kernel @cuda_device_mod::@_QPsub_device2(%0 : !llvm.ptr) llvm.return } } @@ -160,8 +162,9 @@ module attributes {gpu.container_module} { module attributes {gpu.container_module} { llvm.func internal @__cudaFortranConstructor() { + %0 = cuf.register_module @cuda_device_mod -> !llvm.ptr // expected-error@+1{{'cuf.register_kernel' op gpu module not found}} - cuf.register_kernel @cuda_device_mod::@_QPsub_device1 + cuf.register_kernel @cuda_device_mod::@_QPsub_device1(%0 : !llvm.ptr) llvm.return } } @@ -170,8 +173,9 @@ module attributes {gpu.container_module} { module attributes {gpu.container_module} { llvm.func internal @__cudaFortranConstructor() { + %0 = cuf.register_module @cuda_device_mod -> !llvm.ptr // expected-error@+1{{'cuf.register_kernel' op expect a module and a kernel name}} - cuf.register_kernel @_QPsub_device1 + cuf.register_kernel @_QPsub_device1(%0 : !llvm.ptr) llvm.return } } @@ -185,8 +189,9 @@ module attributes {gpu.container_module} { } } llvm.func internal @__cudaFortranConstructor() { + %0 = cuf.register_module @cuda_device_mod -> !llvm.ptr // expected-error@+1{{'cuf.register_kernel' op only gpu.kernel llvm.func can be registered}} - cuf.register_kernel @cuda_device_mod::@_QPsub_device1 + cuf.register_kernel @cuda_device_mod::@_QPsub_device1(%0 : !llvm.ptr) llvm.return } }