https://github.com/RiverDave updated https://github.com/llvm/llvm-project/pull/187636
>From 2e61735445211aa3b15dfbcafad1209ec124016d Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Wed, 18 Mar 2026 21:10:59 -0400 Subject: [PATCH 01/11] [CIR][CUDA] Global emission for fatbin symbols --- .../clang/CIR/Dialect/IR/CIRCUDAAttrs.td | 17 ++ .../clang/CIR/Dialect/IR/CIRDialect.td | 1 + clang/include/clang/CIR/MissingFeatures.h | 2 + clang/lib/CIR/CodeGen/CIRGenModule.cpp | 10 ++ .../Dialect/Transforms/LoweringPrepare.cpp | 154 ++++++++++++++++++ clang/test/CIR/CodeGenCUDA/device-stub.cu | 50 ++++++ 6 files changed, 234 insertions(+) create mode 100644 clang/test/CIR/CodeGenCUDA/device-stub.cu diff --git a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td index 5932db8323196..a5374f4ffd79b 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td @@ -50,5 +50,22 @@ def CUDAExternallyInitializedAttr : CIR_Attr<"CUDAExternallyInitialized", }]; let canHaveIllegalCXXABIType = 0; } +def CIR_CUDABinaryHandleAttr : CIR_Attr< + "CUDABinaryHandle", "cu.binary_handle" +> { + let summary = "Fat binary handle for device code."; + let description = + [{ + This attribute is attached to the ModuleOp and records the binary file + name passed to host. + + CUDA first compiles device-side code into a fat binary file. The file + name is then passed into host-side code, which is used to create a handle + and then generate various registration functions. + }]; + + let parameters = (ins "std::string":$name); + let assemblyFormat = "`<` $name `>`"; +} #endif // CLANG_CIR_DIALECT_IR_CIRCUDAATTRS_TD diff --git a/clang/include/clang/CIR/Dialect/IR/CIRDialect.td b/clang/include/clang/CIR/Dialect/IR/CIRDialect.td index f1f94c868e5b0..f14478e36f3c0 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRDialect.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRDialect.td @@ -75,6 +75,7 @@ def CIR_Dialect : Dialect { static llvm::StringRef getDefaultFuncAttrsAttrName() { return "default_func_attrs"; } static llvm::StringRef getResAttrsAttrName() { return "res_attrs"; } static llvm::StringRef getArgAttrsAttrName() { return "arg_attrs"; } + static llvm::StringRef getCUDABinaryHandleAttrName() { return "cir.cu.binary_handle"; } static llvm::StringRef getAMDGPUCodeObjectVersionAttrName() { return "cir.amdhsa_code_object_version"; } static llvm::StringRef getAMDGPUPrintfKindAttrName() { return "cir.amdgpu_printf_kind"; } diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index b9a6b83daa13c..ac02433fb504a 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -244,6 +244,8 @@ struct MissingFeatures { static bool ctorConstLvalueToRvalueConversion() { return false; } static bool ctorMemcpyizer() { return false; } static bool cudaSupport() { return false; } + static bool hipModuleCtor() { return false; } + static bool globalRegistration() { return false; } static bool dataLayoutTypeIsSized() { return false; } static bool dataLayoutTypeAllocSize() { return false; } static bool dataLayoutTypeStoreSize() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index e0681eb760249..4877781b89ad5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -162,6 +162,16 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext, /*line=*/0, /*column=*/0)); } + + // Set CUDA GPU binary handle. + if (langOpts.CUDA) { + std::string cudaBinaryName = codeGenOpts.CudaGpuBinaryFileName; + if (!cudaBinaryName.empty()) { + theModule->setAttr( + cir::CIRDialect::getCUDABinaryHandleAttrName(), + cir::CUDABinaryHandleAttr::get(&mlirContext, cudaBinaryName)); + } + } } CIRGenModule::~CIRGenModule() = default; diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index 5a8e3be51a947..c4996efc4b50d 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -10,6 +10,7 @@ #include "mlir/IR/Attributes.h" #include "mlir/IR/IRMapping.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Attrs.inc" #include "clang/AST/Mangle.h" #include "clang/Basic/Module.h" #include "clang/Basic/Specifiers.h" @@ -108,6 +109,17 @@ struct LoweringPreparePass cir::GlobalLinkageKind linkage = cir::GlobalLinkageKind::ExternalLinkage, cir::VisibilityKind visibility = cir::VisibilityKind::Default); + + /// ------------ + /// CUDA registration related + /// ------------ + + llvm::StringMap<FuncOp> cudaKernelMap; + + /// Build the CUDA module constructor that registers the fat binary + /// with the CUDA runtime. + void buildCUDAModuleCtor(); + /// Handle static local variable initialization with guard variables. void handleStaticLocal(cir::GlobalOp globalOp, cir::GetGlobalOp getGlobalOp); @@ -1641,11 +1653,150 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op) { globalCtorList.emplace_back(fnOp.getName(), globalCtor.value()); else if (auto globalDtor = fnOp.getGlobalDtorPriority()) globalDtorList.emplace_back(fnOp.getName(), globalDtor.value()); + + if (auto attr = fnOp->getAttr(cir::CUDAKernelNameAttr::getMnemonic())) { + auto kernelNameAttr = dyn_cast<CUDAKernelNameAttr>(attr); + std::string kernelName = kernelNameAttr.getKernelName(); + cudaKernelMap[kernelName] = fnOp; + } } else if (auto threeWayCmp = dyn_cast<cir::CmpThreeWayOp>(op)) { lowerThreeWayCmpOp(threeWayCmp); } } +static std::string getCUDAPrefix(clang::ASTContext *astCtx) { + if (astCtx->getLangOpts().HIP) + return "hip"; + return "cuda"; +} + +static std::string addUnderscoredPrefix(llvm::StringRef prefix, + llvm::StringRef name) { + return ("__" + prefix + name).str(); +} + +/// Creates a global constructor function for the module: +/// +/// For CUDA: +/// \code +/// void __cuda_module_ctor() { +/// Handle = __cudaRegisterFatBinary(GpuBinaryBlob); +/// __cuda_register_globals(Handle); +/// } +/// \endcode +/// +/// For HIP: +/// \code +/// void __hip_module_ctor() { +/// if (__hip_gpubin_handle == 0) { +/// __hip_gpubin_handle = __hipRegisterFatBinary(GpuBinaryBlob); +/// __hip_register_globals(__hip_gpubin_handle); +/// } +/// } +/// \endcode +void LoweringPreparePass::buildCUDAModuleCtor() { + bool isHIP = astCtx->getLangOpts().HIP; + + if (isHIP) + assert(!cir::MissingFeatures::hipModuleCtor()); + if (astCtx->getLangOpts().GPURelocatableDeviceCode) + llvm_unreachable("GPU RDC NYI"); + + // For CUDA without -fgpu-rdc, it's safe to stop generating ctor + // if there's nothing to register. + if (cudaKernelMap.empty()) + return; + + // There's no device-side binary, so no need to proceed for CUDA. + // HIP has to create an external symbol in this case, which is NYI. + mlir::Attribute cudaBinaryHandleAttr = + mlirModule->getAttr(CIRDialect::getCUDABinaryHandleAttrName()); + if (!cudaBinaryHandleAttr) { + if (astCtx->getLangOpts().HIP) + assert(!cir::MissingFeatures::hipModuleCtor()); + return; + } + + std::string cudaGPUBinaryName = + mlir::cast<CUDABinaryHandleAttr>(cudaBinaryHandleAttr).getName(); + + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> gpuBinaryOrErr = + llvm::MemoryBuffer::getFile(cudaGPUBinaryName); + if (std::error_code ec = gpuBinaryOrErr.getError()) { + mlirModule->emitError("cannot open GPU binary file: " + cudaGPUBinaryName + + ": " + ec.message()); + return; + } + std::unique_ptr<llvm::MemoryBuffer> gpuBinary = + std::move(gpuBinaryOrErr.get()); + + // Set up common types and builder. + std::string cudaPrefix = getCUDAPrefix(astCtx); + mlir::Location loc = mlirModule->getLoc(); + CIRBaseBuilderTy builder(getContext()); + builder.setInsertionPointToStart(mlirModule.getBody()); + + auto voidTy = builder.getVoidTy(); + auto voidPtrTy = builder.getVoidPtrTy(); + auto voidPtrPtrTy = builder.getPointerTo(voidPtrTy); + auto intTy = builder.getSIntNTy(32); + auto charTy = cir::IntType::get(&getContext(), astCtx->getCharWidth(), + /*isSigned=*/false); + + // --- Create fatbin globals --- + + // Create the fatbin string constant with GPU binary contents. + auto fatbinType = + ArrayType::get(&getContext(), charTy, gpuBinary->getBuffer().size()); + std::string fatbinStrName = addUnderscoredPrefix(cudaPrefix, "_fatbin_str"); + GlobalOp fatbinStr = + GlobalOp::create(builder, loc, fatbinStrName, fatbinType, + /*isConstant=*/true, GlobalLinkageKind::PrivateLinkage); + fatbinStr.setAlignment(8); + fatbinStr.setInitialValueAttr(cir::ConstArrayAttr::get( + fatbinType, builder.getStringAttr(gpuBinary->getBuffer()))); + assert(!cir::MissingFeatures::opGlobalSection()); + fatbinStr.setPrivate(); + + // Create the fatbin wrapper struct: + // struct { int magic; int version; void *fatbin; void *unused; }; + auto fatbinWrapperType = RecordType::get( + &getContext(), {intTy, intTy, voidPtrTy, voidPtrTy}, + /*packed=*/false, /*padded=*/false, RecordType::RecordKind::Struct); + std::string fatbinWrapperName = + addUnderscoredPrefix(cudaPrefix, "_fatbin_wrapper"); + GlobalOp fatbinWrapper = + GlobalOp::create(builder, loc, fatbinWrapperName, fatbinWrapperType, + /*isConstant=*/true, GlobalLinkageKind::PrivateLinkage); + + constexpr unsigned cudaFatMagic = 0x466243b1; + constexpr unsigned hipFatMagic = 0x48495046; + unsigned fatMagic = isHIP ? hipFatMagic : cudaFatMagic; + + auto magicInit = IntAttr::get(intTy, fatMagic); + auto versionInit = IntAttr::get(intTy, 1); + auto fatbinStrSymbol = + mlir::FlatSymbolRefAttr::get(fatbinStr.getSymNameAttr()); + auto fatbinInit = GlobalViewAttr::get(voidPtrTy, fatbinStrSymbol); + auto unusedInit = builder.getConstNullPtrAttr(voidPtrTy); + fatbinWrapper.setInitialValueAttr(cir::ConstRecordAttr::get( + fatbinWrapperType, + mlir::ArrayAttr::get(&getContext(), + {magicInit, versionInit, fatbinInit, unusedInit}))); + + // Create the GPU binary handle global variable. + std::string gpubinHandleName = + addUnderscoredPrefix(cudaPrefix, "_gpubin_handle"); + GlobalOp gpuBinHandle = GlobalOp::create( + builder, loc, gpubinHandleName, voidPtrPtrTy, + /*isConstant=*/false, GlobalLinkageKind::InternalLinkage); + gpuBinHandle.setInitialValueAttr(builder.getConstNullPtrAttr(voidPtrPtrTy)); + gpuBinHandle.setPrivate(); + + // TODO: ctor/dtor/register_globals + assert(!cir::MissingFeatures::globalRegistration()); +} + void LoweringPreparePass::runOnOperation() { mlir::Operation *op = getOperation(); if (isa<::mlir::ModuleOp>(op)) @@ -1666,6 +1817,9 @@ void LoweringPreparePass::runOnOperation() { runOnOp(o); buildCXXGlobalInitFunc(); + if (astCtx->getLangOpts().CUDA && !astCtx->getLangOpts().CUDAIsDevice) + buildCUDAModuleCtor(); + buildGlobalCtorDtorList(); } diff --git a/clang/test/CIR/CodeGenCUDA/device-stub.cu b/clang/test/CIR/CodeGenCUDA/device-stub.cu new file mode 100644 index 0000000000000..59bfd5b31d522 --- /dev/null +++ b/clang/test/CIR/CodeGenCUDA/device-stub.cu @@ -0,0 +1,50 @@ +// Based on clang/test/CodeGenCUDA/device-stub.cu (incubator). + +// Create a dummy GPU binary file for registration. +// RUN: echo -n "GPU binary would be here." > %t + +// CIR output — check fatbin globals are created correctly. +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-cir %s -x cuda \ +// RUN: -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR + +// OGCG output — check LLVM IR parity with original codegen. +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -x cuda \ +// RUN: -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG + +// No GPU binary — nothing should be generated. +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-cir %s -x cuda \ +// RUN: -target-sdk-version=12.3 -o %t.nogpu.cir +// RUN: FileCheck --input-file=%t.nogpu.cir %s --check-prefix=NOGPUBIN + +#include "Inputs/cuda.h" + +__global__ void kernelfunc(int i, int j, int k) {} + +void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } + +// Check the fatbin string constant with GPU binary contents. +// CIR: cir.global "private" constant cir_private @__cuda_fatbin_str = #cir.const_array<"GPU binary would be here."> : !cir.array<!u8i x 25> {alignment = 8 : i64} + +// Check the fatbin wrapper struct: { magic, version, ptr to fatbin, null }. +// CIR: cir.global constant cir_private @__cuda_fatbin_wrapper = #cir.const_record<{ +// CIR-SAME: #cir.int<1180844977> : !s32i, +// CIR-SAME: #cir.int<1> : !s32i, +// CIR-SAME: #cir.global_view<@__cuda_fatbin_str> : !cir.ptr<!void>, +// CIR-SAME: #cir.ptr<null> : !cir.ptr<!void> +// CIR-SAME: }> + +// Check the GPU binary handle global. +// CIR: cir.global "private" internal @__cuda_gpubin_handle = #cir.ptr<null> : !cir.ptr<!cir.ptr<!void>> + +// OGCG: constant [25 x i8] c"GPU binary would be here.", section ".nv_fatbin", align 8 +// OGCG: @__cuda_fatbin_wrapper = internal constant { i32, i32, ptr, ptr } { i32 1180844977, i32 1, ptr @{{.*}}, ptr null }, section ".nvFatBinSegment" +// OGCG: @__cuda_gpubin_handle = internal global ptr null + +// No GPU binary — no registration infrastructure at all. +// NOGPUBIN-NOT: fatbin +// NOGPUBIN-NOT: gpubin +// NOGPUBIN-NOT: __cuda_register_globals +// NOGPUBIN-NOT: __cuda_module_ctor +// NOGPUBIN-NOT: __cuda_module_dtor >From 74ff77f84666fb4f48644775c11feb23b090bae4 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Mon, 23 Mar 2026 20:19:09 -0400 Subject: [PATCH 02/11] fix tests and remove unnecessary comments. --- clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 1 - clang/test/CIR/CodeGenCUDA/device-stub.cu | 3 --- 2 files changed, 4 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index c4996efc4b50d..05574b9e30733 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -109,7 +109,6 @@ struct LoweringPreparePass cir::GlobalLinkageKind linkage = cir::GlobalLinkageKind::ExternalLinkage, cir::VisibilityKind visibility = cir::VisibilityKind::Default); - /// ------------ /// CUDA registration related /// ------------ diff --git a/clang/test/CIR/CodeGenCUDA/device-stub.cu b/clang/test/CIR/CodeGenCUDA/device-stub.cu index 59bfd5b31d522..b3e8baa17c7a4 100644 --- a/clang/test/CIR/CodeGenCUDA/device-stub.cu +++ b/clang/test/CIR/CodeGenCUDA/device-stub.cu @@ -3,17 +3,14 @@ // Create a dummy GPU binary file for registration. // RUN: echo -n "GPU binary would be here." > %t -// CIR output — check fatbin globals are created correctly. // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-cir %s -x cuda \ // RUN: -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.cir // RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR -// OGCG output — check LLVM IR parity with original codegen. // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -x cuda \ // RUN: -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.ll // RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG -// No GPU binary — nothing should be generated. // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-cir %s -x cuda \ // RUN: -target-sdk-version=12.3 -o %t.nogpu.cir // RUN: FileCheck --input-file=%t.nogpu.cir %s --check-prefix=NOGPUBIN >From 0c6b8530013334367876381a4f6614c409ab21ab Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Mon, 23 Mar 2026 20:46:17 -0400 Subject: [PATCH 03/11] fix global builder ordering --- .../CIR/Dialect/Transforms/LoweringPrepare.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index 05574b9e30733..5b241e2bcbad0 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -1748,9 +1748,9 @@ void LoweringPreparePass::buildCUDAModuleCtor() { auto fatbinType = ArrayType::get(&getContext(), charTy, gpuBinary->getBuffer().size()); std::string fatbinStrName = addUnderscoredPrefix(cudaPrefix, "_fatbin_str"); - GlobalOp fatbinStr = - GlobalOp::create(builder, loc, fatbinStrName, fatbinType, - /*isConstant=*/true, GlobalLinkageKind::PrivateLinkage); + GlobalOp fatbinStr = GlobalOp::create(builder, loc, fatbinStrName, fatbinType, + /*isConstant=*/true, {}, + GlobalLinkageKind::PrivateLinkage); fatbinStr.setAlignment(8); fatbinStr.setInitialValueAttr(cir::ConstArrayAttr::get( fatbinType, builder.getStringAttr(gpuBinary->getBuffer()))); @@ -1764,9 +1764,9 @@ void LoweringPreparePass::buildCUDAModuleCtor() { /*packed=*/false, /*padded=*/false, RecordType::RecordKind::Struct); std::string fatbinWrapperName = addUnderscoredPrefix(cudaPrefix, "_fatbin_wrapper"); - GlobalOp fatbinWrapper = - GlobalOp::create(builder, loc, fatbinWrapperName, fatbinWrapperType, - /*isConstant=*/true, GlobalLinkageKind::PrivateLinkage); + GlobalOp fatbinWrapper = GlobalOp::create( + builder, loc, fatbinWrapperName, fatbinWrapperType, + /*isConstant=*/true, {}, GlobalLinkageKind::PrivateLinkage); constexpr unsigned cudaFatMagic = 0x466243b1; constexpr unsigned hipFatMagic = 0x48495046; @@ -1786,9 +1786,10 @@ void LoweringPreparePass::buildCUDAModuleCtor() { // Create the GPU binary handle global variable. std::string gpubinHandleName = addUnderscoredPrefix(cudaPrefix, "_gpubin_handle"); + GlobalOp gpuBinHandle = GlobalOp::create( builder, loc, gpubinHandleName, voidPtrPtrTy, - /*isConstant=*/false, GlobalLinkageKind::InternalLinkage); + /*isConstant=*/false, {}, cir::GlobalLinkageKind::InternalLinkage); gpuBinHandle.setInitialValueAttr(builder.getConstNullPtrAttr(voidPtrPtrTy)); gpuBinHandle.setPrivate(); >From 0fff8db0dcfa23a73bff71cedaf8f415466b6597 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Wed, 25 Mar 2026 23:14:58 -0400 Subject: [PATCH 04/11] Avoid copies from `std::string` --- .../clang/CIR/Dialect/IR/CIRCUDAAttrs.td | 2 +- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 11 +++-- .../Dialect/Transforms/LoweringPrepare.cpp | 40 +++++++++++-------- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td index a5374f4ffd79b..d9aabd602a279 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td @@ -64,7 +64,7 @@ def CIR_CUDABinaryHandleAttr : CIR_Attr< and then generate various registration functions. }]; - let parameters = (ins "std::string":$name); + let parameters = (ins "mlir::StringAttr":$name); let assemblyFormat = "`<` $name `>`"; } diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 4877781b89ad5..2d27aff2cc2c8 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -32,10 +32,12 @@ #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/Interfaces/CIROpInterfaces.h" #include "clang/CIR/MissingFeatures.h" +#include "llvm/ADT/StringRef.h" #include "CIRGenFunctionInfo.h" #include "TargetInfo.h" #include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h" +#include "mlir/IR/Attributes.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Location.h" #include "mlir/IR/MLIRContext.h" @@ -165,11 +167,12 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext, // Set CUDA GPU binary handle. if (langOpts.CUDA) { - std::string cudaBinaryName = codeGenOpts.CudaGpuBinaryFileName; + llvm::StringRef cudaBinaryName = codeGenOpts.CudaGpuBinaryFileName; if (!cudaBinaryName.empty()) { - theModule->setAttr( - cir::CIRDialect::getCUDABinaryHandleAttrName(), - cir::CUDABinaryHandleAttr::get(&mlirContext, cudaBinaryName)); + theModule->setAttr(cir::CIRDialect::getCUDABinaryHandleAttrName(), + cir::CUDABinaryHandleAttr::get( + &mlirContext, mlir::StringAttr::get( + &mlirContext, cudaBinaryName))); } } } diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index 5b241e2bcbad0..a7162eb86b511 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -9,6 +9,7 @@ #include "PassDetail.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/IRMapping.h" +#include "mlir/IR/BuiltinAttributeInterfaces.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attrs.inc" #include "clang/AST/Mangle.h" @@ -21,9 +22,11 @@ #include "clang/CIR/Dialect/IR/CIRDataLayout.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIROpsEnums.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/Dialect/Passes.h" #include "clang/CIR/Interfaces/ASTAttrInterfaces.h" #include "clang/CIR/MissingFeatures.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/Path.h" @@ -1653,7 +1656,7 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op) { else if (auto globalDtor = fnOp.getGlobalDtorPriority()) globalDtorList.emplace_back(fnOp.getName(), globalDtor.value()); - if (auto attr = fnOp->getAttr(cir::CUDAKernelNameAttr::getMnemonic())) { + if (mlir::Attribute attr = fnOp->getAttr(cir::CUDAKernelNameAttr::getMnemonic())) { auto kernelNameAttr = dyn_cast<CUDAKernelNameAttr>(attr); std::string kernelName = kernelNameAttr.getKernelName(); cudaKernelMap[kernelName] = fnOp; @@ -1663,15 +1666,15 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op) { } } -static std::string getCUDAPrefix(clang::ASTContext *astCtx) { +static llvm::StringRef getCUDAPrefix(clang::ASTContext *astCtx) { if (astCtx->getLangOpts().HIP) return "hip"; return "cuda"; } -static std::string addUnderscoredPrefix(llvm::StringRef prefix, +static llvm::StringRef addUnderscoredPrefix(llvm::StringRef prefix, llvm::StringRef name) { - return ("__" + prefix + name).str(); + return ("__" + prefix + name).getSingleStringRef(); } /// Creates a global constructor function for the module: @@ -1716,8 +1719,10 @@ void LoweringPreparePass::buildCUDAModuleCtor() { return; } - std::string cudaGPUBinaryName = - mlir::cast<CUDABinaryHandleAttr>(cudaBinaryHandleAttr).getName(); + llvm::StringRef cudaGPUBinaryName = + mlir::cast<CUDABinaryHandleAttr>(cudaBinaryHandleAttr) + .getName() + .getValue(); llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> gpuBinaryOrErr = llvm::MemoryBuffer::getFile(cudaGPUBinaryName); @@ -1730,24 +1735,25 @@ void LoweringPreparePass::buildCUDAModuleCtor() { std::move(gpuBinaryOrErr.get()); // Set up common types and builder. - std::string cudaPrefix = getCUDAPrefix(astCtx); + llvm::StringRef cudaPrefix = getCUDAPrefix(astCtx); mlir::Location loc = mlirModule->getLoc(); CIRBaseBuilderTy builder(getContext()); builder.setInsertionPointToStart(mlirModule.getBody()); - auto voidTy = builder.getVoidTy(); - auto voidPtrTy = builder.getVoidPtrTy(); - auto voidPtrPtrTy = builder.getPointerTo(voidPtrTy); - auto intTy = builder.getSIntNTy(32); - auto charTy = cir::IntType::get(&getContext(), astCtx->getCharWidth(), - /*isSigned=*/false); + VoidType voidTy = builder.getVoidTy(); + PointerType voidPtrTy = builder.getVoidPtrTy(); + PointerType voidPtrPtrTy = builder.getPointerTo(voidPtrTy); + IntType intTy = builder.getSIntNTy(32); + IntType charTy = cir::IntType::get(&getContext(), astCtx->getCharWidth(), + /*isSigned=*/false); // --- Create fatbin globals --- // Create the fatbin string constant with GPU binary contents. auto fatbinType = ArrayType::get(&getContext(), charTy, gpuBinary->getBuffer().size()); - std::string fatbinStrName = addUnderscoredPrefix(cudaPrefix, "_fatbin_str"); + llvm::StringRef fatbinStrName = + addUnderscoredPrefix(cudaPrefix, "_fatbin_str"); GlobalOp fatbinStr = GlobalOp::create(builder, loc, fatbinStrName, fatbinType, /*isConstant=*/true, {}, GlobalLinkageKind::PrivateLinkage); @@ -1762,7 +1768,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() { auto fatbinWrapperType = RecordType::get( &getContext(), {intTy, intTy, voidPtrTy, voidPtrTy}, /*packed=*/false, /*padded=*/false, RecordType::RecordKind::Struct); - std::string fatbinWrapperName = + llvm::StringRef fatbinWrapperName = addUnderscoredPrefix(cudaPrefix, "_fatbin_wrapper"); GlobalOp fatbinWrapper = GlobalOp::create( builder, loc, fatbinWrapperName, fatbinWrapperType, @@ -1777,14 +1783,14 @@ void LoweringPreparePass::buildCUDAModuleCtor() { auto fatbinStrSymbol = mlir::FlatSymbolRefAttr::get(fatbinStr.getSymNameAttr()); auto fatbinInit = GlobalViewAttr::get(voidPtrTy, fatbinStrSymbol); - auto unusedInit = builder.getConstNullPtrAttr(voidPtrTy); + mlir::TypedAttr unusedInit = builder.getConstNullPtrAttr(voidPtrTy); fatbinWrapper.setInitialValueAttr(cir::ConstRecordAttr::get( fatbinWrapperType, mlir::ArrayAttr::get(&getContext(), {magicInit, versionInit, fatbinInit, unusedInit}))); // Create the GPU binary handle global variable. - std::string gpubinHandleName = + llvm::StringRef gpubinHandleName = addUnderscoredPrefix(cudaPrefix, "_gpubin_handle"); GlobalOp gpuBinHandle = GlobalOp::create( >From 9930dd3e867cb32c1edb08cacc1db403e047130b Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Thu, 26 Mar 2026 01:09:34 -0400 Subject: [PATCH 05/11] address more string copies stuff yo --- clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td | 2 +- clang/lib/CIR/CodeGen/CIRGenCall.cpp | 6 ++++-- clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 7 ++++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td index d9aabd602a279..8341819e84c62 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRCUDAAttrs.td @@ -32,7 +32,7 @@ def CIR_CUDAKernelNameAttr : CIR_Attr<"CUDAKernelName", "cu.kernel_name"> { respective function runtime registration on the host. }]; - let parameters = (ins "std::string":$kernel_name); + let parameters = (ins "mlir::StringAttr":$kernel_name); let assemblyFormat = "`<` $kernel_name `>`"; let canHaveIllegalCXXABIType = 0; } diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp index 35479fa8097ce..800343f4336b6 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp @@ -16,6 +16,7 @@ #include "CIRGenFunction.h" #include "CIRGenFunctionInfo.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/Attributes.h" #include "clang/CIR/ABIArgInfo.h" #include "clang/CIR/MissingFeatures.h" #include "llvm/Support/TypeSize.h" @@ -431,8 +432,9 @@ void CIRGenModule::constructAttributeList( GlobalDecl kernel(calleeInfo.getCalleeDecl()); llvm::StringRef kernelName = getMangledName( kernel.getWithKernelReferenceKind(KernelReferenceKind::Kernel)); - auto attr = - cir::CUDAKernelNameAttr::get(&getMLIRContext(), kernelName.str()); + auto attr = cir::CUDAKernelNameAttr::get( + &getMLIRContext(), + mlir::StringAttr::get(&getMLIRContext(), kernelName)); attrs.set(attr.getMnemonic(), attr); } diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index a7162eb86b511..94b73915d7758 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -1656,9 +1656,10 @@ void LoweringPreparePass::runOnOp(mlir::Operation *op) { else if (auto globalDtor = fnOp.getGlobalDtorPriority()) globalDtorList.emplace_back(fnOp.getName(), globalDtor.value()); - if (mlir::Attribute attr = fnOp->getAttr(cir::CUDAKernelNameAttr::getMnemonic())) { + if (mlir::Attribute attr = + fnOp->getAttr(cir::CUDAKernelNameAttr::getMnemonic())) { auto kernelNameAttr = dyn_cast<CUDAKernelNameAttr>(attr); - std::string kernelName = kernelNameAttr.getKernelName(); + llvm::StringRef kernelName = kernelNameAttr.getKernelName(); cudaKernelMap[kernelName] = fnOp; } } else if (auto threeWayCmp = dyn_cast<cir::CmpThreeWayOp>(op)) { @@ -1714,7 +1715,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() { mlir::Attribute cudaBinaryHandleAttr = mlirModule->getAttr(CIRDialect::getCUDABinaryHandleAttrName()); if (!cudaBinaryHandleAttr) { - if (astCtx->getLangOpts().HIP) + if (isHIP) assert(!cir::MissingFeatures::hipModuleCtor()); return; } >From 67c07153db68f381dbc88d851641d10431c292fd Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Fri, 27 Mar 2026 03:38:57 -0400 Subject: [PATCH 06/11] fix twine crashes --- clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index 94b73915d7758..0fd10567146ec 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -1673,9 +1673,9 @@ static llvm::StringRef getCUDAPrefix(clang::ASTContext *astCtx) { return "cuda"; } -static llvm::StringRef addUnderscoredPrefix(llvm::StringRef prefix, +static std::string addUnderscoredPrefix(llvm::StringRef prefix, llvm::StringRef name) { - return ("__" + prefix + name).getSingleStringRef(); + return ("__" + prefix + name).str(); } /// Creates a global constructor function for the module: @@ -1753,8 +1753,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() { // Create the fatbin string constant with GPU binary contents. auto fatbinType = ArrayType::get(&getContext(), charTy, gpuBinary->getBuffer().size()); - llvm::StringRef fatbinStrName = - addUnderscoredPrefix(cudaPrefix, "_fatbin_str"); + std::string fatbinStrName = addUnderscoredPrefix(cudaPrefix, "_fatbin_str"); GlobalOp fatbinStr = GlobalOp::create(builder, loc, fatbinStrName, fatbinType, /*isConstant=*/true, {}, GlobalLinkageKind::PrivateLinkage); @@ -1769,7 +1768,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() { auto fatbinWrapperType = RecordType::get( &getContext(), {intTy, intTy, voidPtrTy, voidPtrTy}, /*packed=*/false, /*padded=*/false, RecordType::RecordKind::Struct); - llvm::StringRef fatbinWrapperName = + std::string fatbinWrapperName = addUnderscoredPrefix(cudaPrefix, "_fatbin_wrapper"); GlobalOp fatbinWrapper = GlobalOp::create( builder, loc, fatbinWrapperName, fatbinWrapperType, @@ -1791,7 +1790,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() { {magicInit, versionInit, fatbinInit, unusedInit}))); // Create the GPU binary handle global variable. - llvm::StringRef gpubinHandleName = + std::string gpubinHandleName = addUnderscoredPrefix(cudaPrefix, "_gpubin_handle"); GlobalOp gpuBinHandle = GlobalOp::create( >From 3b1cadf1980d50878eb0b9b10b4d311af53a5d1d Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Thu, 2 Apr 2026 13:45:13 -0400 Subject: [PATCH 07/11] fix fmt --- clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index 0fd10567146ec..7135683ecbb3b 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -8,8 +8,8 @@ #include "PassDetail.h" #include "mlir/IR/Attributes.h" -#include "mlir/IR/IRMapping.h" #include "mlir/IR/BuiltinAttributeInterfaces.h" +#include "mlir/IR/IRMapping.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attrs.inc" #include "clang/AST/Mangle.h" >From beb2302b6bdfe37e8ee9b37f991cdc3b654a1e3a Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Thu, 2 Apr 2026 15:58:55 -0400 Subject: [PATCH 08/11] Fix conflicts and add section to fatbin globals --- clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 10 +++++++++- clang/test/CIR/CodeGenCUDA/device-stub.cu | 7 +++---- clang/test/CIR/CodeGenCUDA/kernel-call.cu | 4 ++-- clang/test/CIR/CodeGenCUDA/kernel-stub-name.cu | 6 +++--- clang/test/CIR/CodeGenHIP/simple.cpp | 2 +- 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index 7135683ecbb3b..7ba81856f8eb2 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -1750,6 +1750,13 @@ void LoweringPreparePass::buildCUDAModuleCtor() { // --- Create fatbin globals --- + // The section names are different for MAC OS X. + llvm::StringRef fatbinConstName = + astCtx->getLangOpts().HIP ? ".hip_fatbin" : ".nv_fatbin"; + + llvm::StringRef fatbinSectionName = + astCtx->getLangOpts().HIP ? ".hipFatBinSegment" : ".nvFatBinSegment"; + // Create the fatbin string constant with GPU binary contents. auto fatbinType = ArrayType::get(&getContext(), charTy, gpuBinary->getBuffer().size()); @@ -1760,7 +1767,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() { fatbinStr.setAlignment(8); fatbinStr.setInitialValueAttr(cir::ConstArrayAttr::get( fatbinType, builder.getStringAttr(gpuBinary->getBuffer()))); - assert(!cir::MissingFeatures::opGlobalSection()); + fatbinStr.setSection(fatbinConstName); fatbinStr.setPrivate(); // Create the fatbin wrapper struct: @@ -1773,6 +1780,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() { GlobalOp fatbinWrapper = GlobalOp::create( builder, loc, fatbinWrapperName, fatbinWrapperType, /*isConstant=*/true, {}, GlobalLinkageKind::PrivateLinkage); + fatbinWrapper.setSection(fatbinSectionName); constexpr unsigned cudaFatMagic = 0x466243b1; constexpr unsigned hipFatMagic = 0x48495046; diff --git a/clang/test/CIR/CodeGenCUDA/device-stub.cu b/clang/test/CIR/CodeGenCUDA/device-stub.cu index b3e8baa17c7a4..2e9deaee9b225 100644 --- a/clang/test/CIR/CodeGenCUDA/device-stub.cu +++ b/clang/test/CIR/CodeGenCUDA/device-stub.cu @@ -21,16 +21,15 @@ __global__ void kernelfunc(int i, int j, int k) {} void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } -// Check the fatbin string constant with GPU binary contents. -// CIR: cir.global "private" constant cir_private @__cuda_fatbin_str = #cir.const_array<"GPU binary would be here."> : !cir.array<!u8i x 25> {alignment = 8 : i64} +// CIR: cir.global "private" constant cir_private @__cuda_fatbin_str = #cir.const_array<"GPU binary would be here."> : !cir.array<!u8i x 25> {alignment = 8 : i64, section = ".nv_fatbin"} -// Check the fatbin wrapper struct: { magic, version, ptr to fatbin, null }. +// Check the fatbin wrapper struct: { magic, version, ptr to fatbin, null }, with section. // CIR: cir.global constant cir_private @__cuda_fatbin_wrapper = #cir.const_record<{ // CIR-SAME: #cir.int<1180844977> : !s32i, // CIR-SAME: #cir.int<1> : !s32i, // CIR-SAME: #cir.global_view<@__cuda_fatbin_str> : !cir.ptr<!void>, // CIR-SAME: #cir.ptr<null> : !cir.ptr<!void> -// CIR-SAME: }> +// CIR-SAME: }> : !rec_anon_struct {section = ".nvFatBinSegment"} // Check the GPU binary handle global. // CIR: cir.global "private" internal @__cuda_gpubin_handle = #cir.ptr<null> : !cir.ptr<!cir.ptr<!void>> diff --git a/clang/test/CIR/CodeGenCUDA/kernel-call.cu b/clang/test/CIR/CodeGenCUDA/kernel-call.cu index 2d37b6eef73af..3d153f581cec7 100644 --- a/clang/test/CIR/CodeGenCUDA/kernel-call.cu +++ b/clang/test/CIR/CodeGenCUDA/kernel-call.cu @@ -106,13 +106,13 @@ int main(void) { // CUDA-NEW: } else { // CUDA-NEW: cir.const #cir.int<42> : !s32i // CUDA-NEW: cir.const #cir.fp<1.000000e+00> : !cir.float - // CUDA-NEW: cir.call @_Z21__device_stub__kernelif({{.*}}) {cu.kernel_name = #cir.cu.kernel_name<_Z6kernelif>} : (!s32i {llvm.noundef}, !cir.float {llvm.noundef}) -> () + // CUDA-NEW: cir.call @_Z21__device_stub__kernelif({{.*}}) {cu.kernel_name = #cir.cu.kernel_name<"_Z6kernelif">} : (!s32i {llvm.noundef}, !cir.float {llvm.noundef}) -> () // CUDA-NEW: } // HIP-NEW: cir.if %{{.*}} { // HIP-NEW: } else { // HIP-NEW: cir.const #cir.int<42> : !s32i // HIP-NEW: cir.const #cir.fp<1.000000e+00> : !cir.float - // HIP-NEW: cir.call @_Z21__device_stub__kernelif({{.*}}) {cu.kernel_name = #cir.cu.kernel_name<_Z6kernelif>} : (!s32i {llvm.noundef}, !cir.float {llvm.noundef}) -> () + // HIP-NEW: cir.call @_Z21__device_stub__kernelif({{.*}}) {cu.kernel_name = #cir.cu.kernel_name<"_Z6kernelif">} : (!s32i {llvm.noundef}, !cir.float {llvm.noundef}) -> () // HIP-NEW: } kernel<<<1, 1>>>(42, 1.0f); } diff --git a/clang/test/CIR/CodeGenCUDA/kernel-stub-name.cu b/clang/test/CIR/CodeGenCUDA/kernel-stub-name.cu index 1a874d9e9fada..42c8f10430b1f 100644 --- a/clang/test/CIR/CodeGenCUDA/kernel-stub-name.cu +++ b/clang/test/CIR/CodeGenCUDA/kernel-stub-name.cu @@ -6,17 +6,17 @@ #include "Inputs/cuda.h" -// CHECK: cir.func {{.*}} @[[CSTUB:__device_stub__ckernel]]() attributes {cu.kernel_name = #cir.cu.kernel_name<ckernel>{{.*}}} +// CHECK: cir.func {{.*}} @[[CSTUB:__device_stub__ckernel]]() attributes {cu.kernel_name = #cir.cu.kernel_name<"ckernel">{{.*}}} // CHECK: cir.return // CHECK-NEXT: } extern "C" __global__ void ckernel() {} -// CHECK: cir.func {{.*}} @_ZN2ns23__device_stub__nskernelEv() attributes {cu.kernel_name = #cir.cu.kernel_name<_ZN2ns8nskernelEv>{{.*}}} +// CHECK: cir.func {{.*}} @_ZN2ns23__device_stub__nskernelEv() attributes {cu.kernel_name = #cir.cu.kernel_name<"_ZN2ns8nskernelEv">{{.*}}} namespace ns { __global__ void nskernel() {} } // namespace ns -// CHECK: cir.func {{.*}} @_Z25__device_stub__kernelfuncIiEvv() attributes {cu.kernel_name = #cir.cu.kernel_name<_Z10kernelfuncIiEvv>{{.*}}} +// CHECK: cir.func {{.*}} @_Z25__device_stub__kernelfuncIiEvv() attributes {cu.kernel_name = #cir.cu.kernel_name<"_Z10kernelfuncIiEvv">{{.*}}} template <class T> __global__ void kernelfunc() {} template __global__ void kernelfunc<int>(); diff --git a/clang/test/CIR/CodeGenHIP/simple.cpp b/clang/test/CIR/CodeGenHIP/simple.cpp index 15240fd7a3038..b3df34aed6afb 100644 --- a/clang/test/CIR/CodeGenHIP/simple.cpp +++ b/clang/test/CIR/CodeGenHIP/simple.cpp @@ -42,7 +42,7 @@ __global__ void global_fn(int a) {} // CIR-DEVICE: cir.func {{.*}}{{.*}} @_Z9global_fni // OGCG-DEVICE: define protected amdgpu_kernel void @_Z9global_fni -// CIR-HOST: @_Z24__device_stub__global_fni{{.*}}attributes {cu.kernel_name = #cir.cu.kernel_name<_Z9global_fni>{{.*}}} +// CIR-HOST: @_Z24__device_stub__global_fni{{.*}}attributes {cu.kernel_name = #cir.cu.kernel_name<"_Z9global_fni">{{.*}}} // CIR-HOST: %[[#CIRKernelArgs:]] = cir.alloca {{.*}}"kernel_args" // CIR-HOST: %[[#Decayed:]] = cir.cast array_to_ptrdecay %[[#CIRKernelArgs]] // CIR-HOST: cir.call @__hipPopCallConfiguration >From 9ede868137d722f87b0903e37fe2967840b81ead Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Thu, 2 Apr 2026 17:19:29 -0400 Subject: [PATCH 09/11] remove accidental .inc include --- clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index 7ba81856f8eb2..8943a63b60536 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -11,7 +11,6 @@ #include "mlir/IR/BuiltinAttributeInterfaces.h" #include "mlir/IR/IRMapping.h" #include "clang/AST/ASTContext.h" -#include "clang/AST/Attrs.inc" #include "clang/AST/Mangle.h" #include "clang/Basic/Module.h" #include "clang/Basic/Specifiers.h" >From 08764284a1c22bd0a7f83abbc03d1d7ffd74d00d Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Thu, 2 Apr 2026 17:31:27 -0400 Subject: [PATCH 10/11] Fix missing include for memoryBuffer on linux ci --- clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index 8943a63b60536..b5140c281ed2d 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -27,6 +27,7 @@ #include "clang/CIR/MissingFeatures.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include <memory> >From f41fc9f0f01be7eca3000ec69b406002c0fddfe7 Mon Sep 17 00:00:00 2001 From: David Rivera <[email protected]> Date: Thu, 2 Apr 2026 17:47:02 -0400 Subject: [PATCH 11/11] remove unused var --- clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index b5140c281ed2d..8185385f92b50 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -1741,7 +1741,6 @@ void LoweringPreparePass::buildCUDAModuleCtor() { CIRBaseBuilderTy builder(getContext()); builder.setInsertionPointToStart(mlirModule.getBody()); - VoidType voidTy = builder.getVoidTy(); PointerType voidPtrTy = builder.getVoidPtrTy(); PointerType voidPtrPtrTy = builder.getPointerTo(voidPtrTy); IntType intTy = builder.getSIntNTy(32); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
