================
@@ -1634,11 +1645,151 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op)
{
globalCtorList.emplace_back(fnOp.getName(), globalCtor.value());
else if (auto globalDtor = fnOp.getGlobalDtorPriority())
globalDtorList.emplace_back(fnOp.getName(), globalDtor.value());
+
+ if (auto attr = fnOp->getAttr(cir::CUDAKernelNameAttr::getMnemonic())) {
+ auto kernelNameAttr = dyn_cast<CUDAKernelNameAttr>(attr);
+ std::string kernelName = kernelNameAttr.getKernelName();
+ cudaKernelMap[kernelName] = fnOp;
+ }
} else if (auto threeWayCmp = dyn_cast<cir::CmpThreeWayOp>(op)) {
lowerThreeWayCmpOp(threeWayCmp);
}
}
+static std::string getCUDAPrefix(clang::ASTContext *astCtx) {
+ if (astCtx->getLangOpts().HIP)
+ return "hip";
+ return "cuda";
+}
+
+static std::string addUnderscoredPrefix(llvm::StringRef prefix,
+ llvm::StringRef name) {
+ return ("__" + prefix + name).str();
+}
+
+/// Creates a global constructor function for the module:
+///
+/// For CUDA:
+/// \code
+/// void __cuda_module_ctor() {
+/// Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
+/// __cuda_register_globals(Handle);
+/// }
+/// \endcode
+///
+/// For HIP:
+/// \code
+/// void __hip_module_ctor() {
+/// if (__hip_gpubin_handle == 0) {
+/// __hip_gpubin_handle = __hipRegisterFatBinary(GpuBinaryBlob);
+/// __hip_register_globals(__hip_gpubin_handle);
+/// }
+/// }
+/// \endcode
+void LoweringPreparePass::buildCUDAModuleCtor() {
+ bool isHIP = astCtx->getLangOpts().HIP;
+
+ if (isHIP)
+ assert(!cir::MissingFeatures::hipModuleCtor());
+ if (astCtx->getLangOpts().GPURelocatableDeviceCode)
+ llvm_unreachable("GPU RDC NYI");
+
+ // For CUDA without -fgpu-rdc, it's safe to stop generating ctor
+ // if there's nothing to register.
+ if (cudaKernelMap.empty())
+ return;
+
+ // There's no device-side binary, so no need to proceed for CUDA.
+ // HIP has to create an external symbol in this case, which is NYI.
+ mlir::Attribute cudaBinaryHandleAttr =
+ mlirModule->getAttr(CIRDialect::getCUDABinaryHandleAttrName());
+ if (!cudaBinaryHandleAttr) {
+ if (astCtx->getLangOpts().HIP)
+ assert(!cir::MissingFeatures::hipModuleCtor());
+ return;
+ }
+
+ std::string cudaGPUBinaryName =
+ mlir::cast<CUDABinaryHandleAttr>(cudaBinaryHandleAttr).getName();
+
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> gpuBinaryOrErr =
+ llvm::MemoryBuffer::getFile(cudaGPUBinaryName);
+ if (std::error_code ec = gpuBinaryOrErr.getError()) {
+ mlirModule->emitError("cannot open GPU binary file: " + cudaGPUBinaryName +
+ ": " + ec.message());
+ return;
+ }
+ std::unique_ptr<llvm::MemoryBuffer> gpuBinary =
+ std::move(gpuBinaryOrErr.get());
+
+ // Set up common types and builder.
+ std::string cudaPrefix = getCUDAPrefix(astCtx);
----------------
andykaylor wrote:
```suggestion
StringRef cudaPrefix = getCUDAPrefix(astCtx);
```
https://github.com/llvm/llvm-project/pull/187636
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits