https://github.com/Artem-B updated https://github.com/llvm/llvm-project/pull/151760
>From a3359f2959d2f161be88c49e5fece6d6fc73bf6b Mon Sep 17 00:00:00 2001 From: Artem Belevich <[email protected]> Date: Fri, 1 Aug 2025 12:23:06 -0700 Subject: [PATCH] [CUDA] Use --image3 to construct fat binary CUDA-12.9 has removed fatbinary tool's `--image` argument we've been using till now. --image3 has been supported since cuda-9, so we do not need CUDA SDK version checks. --- clang/lib/Driver/ToolChains/Cuda.cpp | 20 ++++++----------- clang/test/Driver/cuda-arch-translation.cu | 26 +++++++++++----------- clang/test/Driver/cuda-options.cu | 20 ++++++++--------- 3 files changed, 30 insertions(+), 36 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 1f0b478c02b25..fdfcea852b4f2 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -549,22 +549,16 @@ void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA, auto *A = II.getAction(); assert(A->getInputs().size() == 1 && "Device offload action is expected to have a single input"); - const char *gpu_arch_str = A->getOffloadingArch(); - assert(gpu_arch_str && + StringRef GpuArch = A->getOffloadingArch(); + assert(!GpuArch.empty() && "Device action expected to have associated a GPU architecture!"); - OffloadArch gpu_arch = StringToOffloadArch(gpu_arch_str); - if (II.getType() == types::TY_PP_Asm && - !shouldIncludePTX(Args, gpu_arch_str)) + if (II.getType() == types::TY_PP_Asm && !shouldIncludePTX(Args, GpuArch)) continue; - // We need to pass an Arch of the form "sm_XX" for cubin files and - // "compute_XX" for ptx. - const char *Arch = (II.getType() == types::TY_PP_Asm) - ? OffloadArchToVirtualArchString(gpu_arch) - : gpu_arch_str; - CmdArgs.push_back( - Args.MakeArgString(llvm::Twine("--image=profile=") + Arch + - ",file=" + getToolChain().getInputFilename(II))); + StringRef Kind = (II.getType() == types::TY_PP_Asm) ? "ptx" : "elf"; + CmdArgs.push_back(Args.MakeArgString( + "--image3=kind=" + Kind + ",sm=" + GpuArch.drop_front(3) + + ",file=" + getToolChain().getInputFilename(II))); } for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary)) diff --git a/clang/test/Driver/cuda-arch-translation.cu b/clang/test/Driver/cuda-arch-translation.cu index e4f83740a92eb..b4a521df2e6f5 100644 --- a/clang/test/Driver/cuda-arch-translation.cu +++ b/clang/test/Driver/cuda-arch-translation.cu @@ -68,19 +68,19 @@ // HIP: clang-offload-bundler -// SM20:--image=profile=sm_20{{.*}} -// SM21:--image=profile=sm_21{{.*}} -// SM30:--image=profile=sm_30{{.*}} -// SM32:--image=profile=sm_32{{.*}} -// SM35:--image=profile=sm_35{{.*}} -// SM37:--image=profile=sm_37{{.*}} -// SM50:--image=profile=sm_50{{.*}} -// SM52:--image=profile=sm_52{{.*}} -// SM53:--image=profile=sm_53{{.*}} -// SM60:--image=profile=sm_60{{.*}} -// SM61:--image=profile=sm_61{{.*}} -// SM62:--image=profile=sm_62{{.*}} -// SM70:--image=profile=sm_70{{.*}} +// SM20:--image3=kind=elf,sm=20{{.*}} +// SM21:--image3=kind=elf,sm=21{{.*}} +// SM30:--image3=kind=elf,sm=30{{.*}} +// SM32:--image3=kind=elf,sm=32{{.*}} +// SM35:--image3=kind=elf,sm=35{{.*}} +// SM37:--image3=kind=elf,sm=37{{.*}} +// SM50:--image3=kind=elf,sm=50{{.*}} +// SM52:--image3=kind=elf,sm=52{{.*}} +// SM53:--image3=kind=elf,sm=53{{.*}} +// SM60:--image3=kind=elf,sm=60{{.*}} +// SM61:--image3=kind=elf,sm=61{{.*}} +// SM62:--image3=kind=elf,sm=62{{.*}} +// SM70:--image3=kind=elf,sm=70{{.*}} // GFX600:-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx600 // GFX601:-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx601 // GFX602:-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx602 diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu index db6536ca9e03b..fc8e83a2bb279 100644 --- a/clang/test/Driver/cuda-options.cu +++ b/clang/test/Driver/cuda-options.cu @@ -243,10 +243,10 @@ // INCLUDES-DEVICE:fatbinary // INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]" -// INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]" -// INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]" -// INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]" -// INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]" +// INCLUDES-DEVICE-DAG: "--image3=kind=elf,sm={{[0-9]+}},file=[[CUBINFILE]]" +// INCLUDES-DEVICE-DAG: "--image3=kind=ptx,sm={{[0-9]+}},file=[[PTXFILE]]" +// INCLUDES-DEVICE2-DAG: "--image3=kind=elf,sm={{[0-9]+}},file=[[CUBINFILE2]]" +// INCLUDES-DEVICE2-DAG: "--image3=kind=ptx,sm={{[0-9]+}},file=[[PTXFILE2]]" // Match host-side preprocessor job with -save-temps. // HOST-SAVE: "-cc1" "-triple" "x86_64-unknown-linux-gnu" @@ -288,9 +288,9 @@ // FATBIN-COMMON:fatbinary // FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]" -// FATBIN-COMMON: "--image=profile=sm_52,file= -// PTX-SM52: "--image=profile=compute_52,file= -// NOPTX-SM52-NOT: "--image=profile=compute_52,file= -// FATBIN-COMMON: "--image=profile=sm_60,file= -// PTX-SM60: "--image=profile=compute_60,file= -// NOPTX-SM60-NOT: "--image=profile=compute_60,file= +// FATBIN-COMMON: "--image3=kind=elf,sm=52,file= +// PTX-SM52: "--image3=kind=ptx,sm=52,file= +// NOPTX-SM52-NOT: "--image3=kind=ptx,sm=52,file= +// FATBIN-COMMON: "--image3=kind=elf,sm=60,file= +// PTX-SM60: "--image3=kind=ptx,sm=60,file= +// NOPTX-SM60-NOT: "--image3=kind=ptx,sm=60,file= _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
