Author: David Rivera Date: 2026-05-05T03:09:49-04:00 New Revision: 28360f7802d0b24753e95cce339960da3575d85a
URL: https://github.com/llvm/llvm-project/commit/28360f7802d0b24753e95cce339960da3575d85a DIFF: https://github.com/llvm/llvm-project/commit/28360f7802d0b24753e95cce339960da3575d85a.diff LOG: [CIR][CUDA] Fix typed StringAttr on globals and Add CIR-to-LLVM lowering checks for existing registration support (#195002) Added: Modified: clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp clang/test/CIR/CodeGenCUDA/device-stub.cu Removed: ################################################################################ diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index bd3c8bc0aa8d1..dc56a2f21aced 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -1958,7 +1958,7 @@ void LoweringPreparePass::buildCUDAModuleCtor() { GlobalLinkageKind::PrivateLinkage); fatbinStr.setAlignment(8); fatbinStr.setInitialValueAttr(cir::ConstArrayAttr::get( - fatbinType, builder.getStringAttr(gpuBinary->getBuffer()))); + fatbinType, StringAttr::get(gpuBinary->getBuffer(), fatbinType))); fatbinStr.setSection(fatbinConstName); fatbinStr.setPrivate(); @@ -2198,8 +2198,8 @@ void LoweringPreparePass::buildCUDARegisterGlobalFunctions( /*linkage=*/cir::GlobalLinkageKind::PrivateLinkage); // We must make the string zero-terminated. - tmpString.setInitialValueAttr(ConstArrayAttr::get( - strType, StringAttr::get(&getContext(), str + "\0"))); + tmpString.setInitialValueAttr( + ConstArrayAttr::get(strType, StringAttr::get(str + "\0", strType))); tmpString.setPrivate(); return tmpString; }; diff --git a/clang/test/CIR/CodeGenCUDA/device-stub.cu b/clang/test/CIR/CodeGenCUDA/device-stub.cu index 0f9d4d68d67ff..0e95c74324592 100644 --- a/clang/test/CIR/CodeGenCUDA/device-stub.cu +++ b/clang/test/CIR/CodeGenCUDA/device-stub.cu @@ -7,6 +7,10 @@ // RUN: -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.cir // RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fclangir -emit-llvm %s -x cuda \ +// RUN: -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM + // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -x cuda \ // RUN: -target-sdk-version=12.3 -fcuda-include-gpubinary %t -o %t.ll // RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG @@ -43,7 +47,7 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } // CIR: cir.func private @__cudaRegisterFunction(!cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>, !s32i, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>, !cir.ptr<!void>) -> !s32i // Check the device-side name string for kernelfunc (mangled, null-terminated). -// CIR: cir.global "private" constant cir_private @".str_Z10kernelfunciii" = #cir.const_array<"_Z10kernelfunciii", trailing_zeros> : !cir.array<!u8i x 18> +// CIR: cir.global "private" constant cir_private @".str_Z10kernelfunciii" = #cir.const_array<"_Z10kernelfunciii" : !cir.array<!u8i x 18>, trailing_zeros> : !cir.array<!u8i x 18> // Check __cuda_register_globals body: one __cudaRegisterFunction call per kernel. // CIR: cir.func internal private @__cuda_register_globals(%arg0: !cir.ptr<!cir.ptr<!void>> @@ -56,7 +60,7 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } // CIR-NEXT: cir.call @__cudaRegisterFunction(%{{.*}}, %[[HOST_FUNC]], %[[DEVICE_FUNC]], %[[DEVICE_FUNC]], %[[THREAD_LIMIT]], %[[NULL]], %[[NULL]], %[[NULL]], %[[NULL]], %[[NULL]]) // CIR-NEXT: cir.return -// CIR: cir.global "private" constant cir_private @__cuda_fatbin_str = #cir.const_array<"GPU binary would be here."> : !cir.array<!u8i x 25> {alignment = 8 : i64, section = ".nv_fatbin"} +// CIR: cir.global "private" constant cir_private @__cuda_fatbin_str = #cir.const_array<"GPU binary would be here." : !cir.array<!u8i x 25>> : !cir.array<!u8i x 25> {alignment = 8 : i64, section = ".nv_fatbin"} // Check the fatbin wrapper struct: { magic, version, ptr to fatbin, null }, with section. // CIR: cir.global constant cir_private @__cuda_fatbin_wrapper = #cir.const_record<{ @@ -105,6 +109,25 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } // OGCG: load ptr, ptr @__cuda_gpubin_handle // OGCG: call void @__cudaUnregisterFatBinary +// LLVM: constant [25 x i8] c"GPU binary would be here.", section ".nv_fatbin", align 8 +// LLVM: @__cuda_fatbin_wrapper = {{.*}}constant { i32, i32, ptr, ptr } { i32 1180844977, i32 1, ptr @{{.*}}, ptr null }, section ".nvFatBinSegment" +// LLVM: @__cuda_gpubin_handle = internal global ptr null +// LLVM: @llvm.global_ctors = appending global {{.*}}@__cuda_module_ctor + +// LLVM: define internal void @__cuda_module_dtor +// LLVM: load ptr, ptr @__cuda_gpubin_handle +// LLVM: call void @__cudaUnregisterFatBinary + +// LLVM: define internal void @__cuda_register_globals +// LLVM: call{{.*}}@__cudaRegisterFunction(ptr %{{.*}}, ptr @{{.*}}kernelfunc{{.*}}, ptr @{{.*}}, ptr @{{.*}}, i32 -1, ptr null, ptr null, ptr null, ptr null, ptr null) +// LLVM: ret void + +// LLVM: define internal void @__cuda_module_ctor +// LLVM: call{{.*}}@__cudaRegisterFatBinary(ptr @__cuda_fatbin_wrapper) +// LLVM: store ptr %{{.*}}, ptr @__cuda_gpubin_handle +// LLVM-NEXT: call void @__cuda_register_globals +// LLVM: call i32 @atexit(ptr @__cuda_module_dtor) + // No GPU binary — no registration infrastructure at all. // NOGPUBIN-NOT: fatbin // NOGPUBIN-NOT: gpubin _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
