Author: David Rivera
Date: 2026-05-05T03:09:49-04:00
New Revision: 28360f7802d0b24753e95cce339960da3575d85a

URL: 
https://github.com/llvm/llvm-project/commit/28360f7802d0b24753e95cce339960da3575d85a
DIFF: 
https://github.com/llvm/llvm-project/commit/28360f7802d0b24753e95cce339960da3575d85a.diff

LOG: [CIR][CUDA] Fix typed StringAttr on globals and Add CIR-to-LLVM lowering 
checks for existing registration support (#195002)

Added: 
    

Modified: 
    clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
    clang/test/CIR/CodeGenCUDA/device-stub.cu

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp 
b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index bd3c8bc0aa8d1..dc56a2f21aced 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -1958,7 +1958,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() {
                                         GlobalLinkageKind::PrivateLinkage);
   fatbinStr.setAlignment(8);
   fatbinStr.setInitialValueAttr(cir::ConstArrayAttr::get(
-      fatbinType, builder.getStringAttr(gpuBinary->getBuffer())));
+      fatbinType, StringAttr::get(gpuBinary->getBuffer(), fatbinType)));
   fatbinStr.setSection(fatbinConstName);
   fatbinStr.setPrivate();
 
@@ -2198,8 +2198,8 @@ void 
LoweringPreparePass::buildCUDARegisterGlobalFunctions(
         /*linkage=*/cir::GlobalLinkageKind::PrivateLinkage);
 
     // We must make the string zero-terminated.
-    tmpString.setInitialValueAttr(ConstArrayAttr::get(
-        strType, StringAttr::get(&getContext(), str + "\0")));
+    tmpString.setInitialValueAttr(
+        ConstArrayAttr::get(strType, StringAttr::get(str + "\0", strType)));
     tmpString.setPrivate();
     return tmpString;
   };

diff  --git a/clang/test/CIR/CodeGenCUDA/device-stub.cu 
b/clang/test/CIR/CodeGenCUDA/device-stub.cu
index 0f9d4d68d67ff..0e95c74324592 100644
--- a/clang/test/CIR/CodeGenCUDA/device-stub.cu
+++ b/clang/test/CIR/CodeGenCUDA/device-stub.cu
@@ -7,6 +7,10 @@
 // RUN:   -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.cir
 // RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
 
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -fclangir -emit-llvm %s -x cuda \
+// RUN:   -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -x cuda \
 // RUN:   -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.ll
 // RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
@@ -43,7 +47,7 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
 // CIR: cir.func private @__cudaRegisterFunction(!cir.ptr<!cir.ptr<!void>>, 
!cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>, !s32i, !cir.ptr<!void>, 
!cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>) -> !s32i
 
 // Check the device-side name string for kernelfunc (mangled, null-terminated).
-// CIR: cir.global "private" constant cir_private @".str_Z10kernelfunciii" = 
#cir.const_array<"_Z10kernelfunciii", trailing_zeros> : !cir.array<!u8i x 18>
+// CIR: cir.global "private" constant cir_private @".str_Z10kernelfunciii" = 
#cir.const_array<"_Z10kernelfunciii" : !cir.array<!u8i x 18>, trailing_zeros> : 
!cir.array<!u8i x 18>
 
 // Check __cuda_register_globals body: one __cudaRegisterFunction call per 
kernel.
 // CIR: cir.func internal private @__cuda_register_globals(%arg0: 
!cir.ptr<!cir.ptr<!void>>
@@ -56,7 +60,7 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
 // CIR-NEXT: cir.call @__cudaRegisterFunction(%{{.*}}, %[[HOST_FUNC]], 
%[[DEVICE_FUNC]], %[[DEVICE_FUNC]], %[[THREAD_LIMIT]], %[[NULL]], %[[NULL]], 
%[[NULL]], %[[NULL]], %[[NULL]])
 // CIR-NEXT: cir.return
 
-// CIR: cir.global "private" constant cir_private @__cuda_fatbin_str = 
#cir.const_array<"GPU binary would be here."> : !cir.array<!u8i x 25> 
{alignment = 8 : i64, section = ".nv_fatbin"}
+// CIR: cir.global "private" constant cir_private @__cuda_fatbin_str = 
#cir.const_array<"GPU binary would be here." : !cir.array<!u8i x 25>> : 
!cir.array<!u8i x 25> {alignment = 8 : i64, section = ".nv_fatbin"}
 
 // Check the fatbin wrapper struct: { magic, version, ptr to fatbin, null }, 
with section.
 // CIR: cir.global constant cir_private @__cuda_fatbin_wrapper = 
#cir.const_record<{
@@ -105,6 +109,25 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
 // OGCG: load ptr, ptr @__cuda_gpubin_handle
 // OGCG: call void @__cudaUnregisterFatBinary
 
+// LLVM: constant [25 x i8] c"GPU binary would be here.", section 
".nv_fatbin", align 8
+// LLVM: @__cuda_fatbin_wrapper = {{.*}}constant { i32, i32, ptr, ptr } { i32 
1180844977, i32 1, ptr @{{.*}}, ptr null }, section ".nvFatBinSegment"
+// LLVM: @__cuda_gpubin_handle = internal global ptr null
+// LLVM: @llvm.global_ctors = appending global {{.*}}@__cuda_module_ctor
+
+// LLVM: define internal void @__cuda_module_dtor
+// LLVM: load ptr, ptr @__cuda_gpubin_handle
+// LLVM: call void @__cudaUnregisterFatBinary
+
+// LLVM: define internal void @__cuda_register_globals
+// LLVM: call{{.*}}@__cudaRegisterFunction(ptr %{{.*}}, ptr 
@{{.*}}kernelfunc{{.*}}, ptr @{{.*}}, ptr @{{.*}}, i32 -1, ptr null, ptr null, 
ptr null, ptr null, ptr null)
+// LLVM: ret void
+
+// LLVM: define internal void @__cuda_module_ctor
+// LLVM: call{{.*}}@__cudaRegisterFatBinary(ptr @__cuda_fatbin_wrapper)
+// LLVM: store ptr %{{.*}}, ptr @__cuda_gpubin_handle
+// LLVM-NEXT: call void @__cuda_register_globals
+// LLVM: call i32 @atexit(ptr @__cuda_module_dtor)
+
 // No GPU binary — no registration infrastructure at all.
 // NOGPUBIN-NOT: fatbin
 // NOGPUBIN-NOT: gpubin


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to