Author: Joseph Huber Date: 2026-03-19T09:18:10-05:00 New Revision: 44e306ecdb02bdf3fff067a39bf5a763c786a78d
URL: https://github.com/llvm/llvm-project/commit/44e306ecdb02bdf3fff067a39bf5a763c786a78d DIFF: https://github.com/llvm/llvm-project/commit/44e306ecdb02bdf3fff067a39bf5a763c786a78d.diff LOG: [Clang] Correctly link and handle PGO options on the GPU (#185761) Summary: Currently, the GPU targets ignore the standard profiling arguments. This PR changes the behavior to use the standard handling, which links the in the now-present `libclang_rt.profile.a` if the user built with the compiler-rt support enabled. If it is not present this is a linker error and we can always suppress with `-Xarch_host` and `-Xarch_device`. Hopefully this doesn't cause some people pain if they're used to doing `-fprofile-generate` on a CPU unguarded since it was a stange mix of a no-op and not a no-op on the GPU until now. Added: clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a Modified: clang/lib/Driver/ToolChains/AMDGPU.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Driver/ToolChains/Cuda.cpp clang/test/Driver/amdgpu-toolchain.c clang/test/Driver/cuda-cross-compiling.c clang/test/Driver/openmp-offload-gpu.c Removed: ################################################################################ diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 7bbdb71b1e24f..54fbd86168602 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -632,6 +632,8 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString("-plugin-opt=-mattr=" + llvm::join(Features, ","))); } + getToolChain().addProfileRTLibs(Args, CmdArgs); + if (Args.hasArg(options::OPT_stdlib)) CmdArgs.append({"-lc", "-lm"}); if (Args.hasArg(options::OPT_startfiles)) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 3b852528d92c4..6416baf9126ff 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -9357,9 +9357,23 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, OPT_flto_partitions_EQ, OPT_flto_EQ, OPT_hipspv_pass_plugin_EQ, - OPT_use_spirv_backend}; + OPT_use_spirv_backend, + OPT_fprofile_generate, + OPT_fprofile_generate_EQ, + OPT_fprofile_instr_generate, + OPT_fprofile_instr_generate_EQ}; const llvm::DenseSet<unsigned> LinkerOptions{OPT_mllvm, OPT_Zlinker_input}; auto ShouldForwardForToolChain = [&](Arg *A, const ToolChain &TC) { + auto HasProfileRT = TC.getVFS().exists( + TC.getCompilerRT(Args, "profile", ToolChain::FT_Static)); + // Don't forward profiling arguments if the toolchain doesn't support it. + // Without this check using it on the host would result in linker errors. + if (!HasProfileRT && + (A->getOption().matches(OPT_fprofile_generate) || + A->getOption().matches(OPT_fprofile_generate_EQ) || + A->getOption().matches(OPT_fprofile_instr_generate) || + A->getOption().matches(OPT_fprofile_instr_generate_EQ))) + return false; // Don't forward -mllvm to toolchains that don't support LLVM. return TC.HasNativeLLVMSupport() || A->getOption().getID() != OPT_mllvm; }; diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index e0020176800fd..2ca8886936f6c 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -643,6 +643,8 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME); CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath)); + getToolChain().addProfileRTLibs(Args, CmdArgs); + if (Args.hasArg(options::OPT_stdlib)) CmdArgs.append({"-lc", "-lm"}); if (Args.hasArg(options::OPT_startfiles)) { diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/amdgpu-toolchain.c b/clang/test/Driver/amdgpu-toolchain.c index 459c1bdac246f..2a48ca6bb7670 100644 --- a/clang/test/Driver/amdgpu-toolchain.c +++ b/clang/test/Driver/amdgpu-toolchain.c @@ -46,3 +46,9 @@ // RUN: --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode 2>&1 \ // RUN: | FileCheck -check-prefix=DEVICE-LIBS %s // DEVICE-LIBS: "-mlink-builtin-bitcode" "[[ROCM_PATH:.+]]ockl.bc" + +// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \ +// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ +// RUN: -fprofile-generate %s 2>&1 | FileCheck -check-prefixes=PROFILE %s +// PROFILE: ld.lld +// PROFILE-SAME: "[[RESOURCE_DIR:.+]]{{/|\\\\}}lib{{/|\\\\}}amdgcn-amd-amdhsa{{/|\\\\}}libclang_rt.profile.a" diff --git a/clang/test/Driver/cuda-cross-compiling.c b/clang/test/Driver/cuda-cross-compiling.c index ed2853cae3ccc..1dea9426f75ce 100644 --- a/clang/test/Driver/cuda-cross-compiling.c +++ b/clang/test/Driver/cuda-cross-compiling.c @@ -112,3 +112,9 @@ // RUN: -nogpulib -nogpuinc -### %s 2>&1 | FileCheck -check-prefix=PATH %s // PATH: clang-nvlink-wrapper{{.*}}"--cuda-path={{.*}}/Inputs/CUDA/usr/local/cuda" + +// RUN: %clang -### --target=nvptx64-nvidia-cuda -march=sm_89 -nogpulib \ +// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ +// RUN: -fprofile-generate %s 2>&1 | FileCheck -check-prefixes=PROFILE %s +// PROFILE: clang-nvlink-wrapper +// PROFILE-SAME: "[[RESOURCE_DIR:.+]]{{/|\\\\}}lib{{/|\\\\}}nvptx64-nvidia-cuda{{/|\\\\}}libclang_rt.profile.a" diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c index fb1bc9ffdbbd4..e057959d62044 100644 --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -410,3 +410,21 @@ // RUN: | FileCheck --check-prefix=SHOULD-EXTRACT %s // // SHOULD-EXTRACT: clang-linker-wrapper{{.*}}"--should-extract=gfx906" + +// +// Check that `-fprofile-generate` flags are forwarded to link in the runtime +// only if present in the resource directory. +// +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \ +// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ +// RUN: --offload-arch=gfx906 -fprofile-generate -nogpulib -nogpuinc %s 2>&1 \ +// RUN: | FileCheck --check-prefix=PROFILE %s +// +// PROFILE: clang-linker-wrapper{{.*}}--device-compiler=amdgcn-amd-amdhsa=-fprofile-generate +// +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \ +// RUN: -resource-dir=%S/Inputs/resource_dir \ +// RUN: --offload-arch=gfx906 -fprofile-generate -nogpulib -nogpuinc %s 2>&1 \ +// RUN: | FileCheck --check-prefix=NO-PROFILE %s +// +// NO-PROFILE-NOT: --device-compiler=amdgcn-amd-amdhsa=-fprofile-generate _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
