https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/185761
>From aac151d5b037557704ab35c01acdbe6062a95895 Mon Sep 17 00:00:00 2001 From: Joseph Huber <[email protected]> Date: Tue, 10 Mar 2026 16:17:09 -0500 Subject: [PATCH 1/3] [Clang] Correctly link and handle PGO options on the GPU Summary: Currently, the GPU targets ignore the standard profiling arguments. This PR changes the behavior to use the standard handling, which links the in the now-present `libclang_rt.profile.a` if the user built with the compiler-rt support enabled. If it is not present this is a linker error and we can always suppress with `-Xarch_host` and `-Xarch_device`. Hopefully this doesn't cause some people pain if they're used to doing `-fprofile-generate` on a CPU unguarded since it was a stange mix of a no-op and not a no-op on the GPU until now. --- clang/lib/Driver/ToolChains/AMDGPU.cpp | 2 ++ clang/lib/Driver/ToolChains/Clang.cpp | 6 +++++- clang/lib/Driver/ToolChains/Cuda.cpp | 2 ++ clang/test/Driver/amdgpu-toolchain.c | 4 ++++ clang/test/Driver/cuda-cross-compiling.c | 5 +++++ clang/test/Driver/openmp-offload-gpu.c | 9 +++++++++ 6 files changed, 27 insertions(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 7bbdb71b1e24f..54fbd86168602 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -632,6 +632,8 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString("-plugin-opt=-mattr=" + llvm::join(Features, ","))); } + getToolChain().addProfileRTLibs(Args, CmdArgs); + if (Args.hasArg(options::OPT_stdlib)) CmdArgs.append({"-lc", "-lm"}); if (Args.hasArg(options::OPT_startfiles)) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 3b852528d92c4..5f9edb205a0bb 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -9357,7 +9357,11 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, OPT_flto_partitions_EQ, OPT_flto_EQ, OPT_hipspv_pass_plugin_EQ, - OPT_use_spirv_backend}; + OPT_use_spirv_backend, + OPT_fprofile_generate, + OPT_fprofile_generate_EQ, + OPT_fprofile_instr_generate, + OPT_fprofile_instr_generate_EQ}; const llvm::DenseSet<unsigned> LinkerOptions{OPT_mllvm, OPT_Zlinker_input}; auto ShouldForwardForToolChain = [&](Arg *A, const ToolChain &TC) { // Don't forward -mllvm to toolchains that don't support LLVM. diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index e0020176800fd..2ca8886936f6c 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -643,6 +643,8 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME); CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath)); + getToolChain().addProfileRTLibs(Args, CmdArgs); + if (Args.hasArg(options::OPT_stdlib)) CmdArgs.append({"-lc", "-lm"}); if (Args.hasArg(options::OPT_startfiles)) { diff --git a/clang/test/Driver/amdgpu-toolchain.c b/clang/test/Driver/amdgpu-toolchain.c index 459c1bdac246f..384a7617f8859 100644 --- a/clang/test/Driver/amdgpu-toolchain.c +++ b/clang/test/Driver/amdgpu-toolchain.c @@ -46,3 +46,7 @@ // RUN: --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode 2>&1 \ // RUN: | FileCheck -check-prefix=DEVICE-LIBS %s // DEVICE-LIBS: "-mlink-builtin-bitcode" "[[ROCM_PATH:.+]]ockl.bc" + +// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \ +// RUN: -fprofile-generate %s 2>&1 | FileCheck -check-prefixes=PROFILE %s +// PROFILE: ld.lld{{.*}}libclang_rt.profile.a diff --git a/clang/test/Driver/cuda-cross-compiling.c b/clang/test/Driver/cuda-cross-compiling.c index ed2853cae3ccc..10323408a3732 100644 --- a/clang/test/Driver/cuda-cross-compiling.c +++ b/clang/test/Driver/cuda-cross-compiling.c @@ -112,3 +112,8 @@ // RUN: -nogpulib -nogpuinc -### %s 2>&1 | FileCheck -check-prefix=PATH %s // PATH: clang-nvlink-wrapper{{.*}}"--cuda-path={{.*}}/Inputs/CUDA/usr/local/cuda" + +// RUN: %clang -### --target=nvptx64-nvidia-cuda -march=sm_89 -nogpulib \ +// RUN: -fprofile-generate %s 2>&1 | FileCheck -check-prefixes=PROFILE %s + +// PROFILE: clang-nvlink-wrapper{{.*}}libclang_rt.profile.a diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c index fb1bc9ffdbbd4..727d2387a99a0 100644 --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -410,3 +410,12 @@ // RUN: | FileCheck --check-prefix=SHOULD-EXTRACT %s // // SHOULD-EXTRACT: clang-linker-wrapper{{.*}}"--should-extract=gfx906" + +// +// Check that `-fprofile-generate` flags are forwarded to link in the runtime. +// +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \ +// RUN: --offload-arch=gfx906 -fprofile-generate -nogpulib -nogpuinc %s 2>&1 \ +// RUN: | FileCheck --check-prefix=PROFILE %s +// +// PROFILE: clang-linker-wrapper{{.*}}--device-compiler=amdgcn-amd-amdhsa=-fprofile-generate >From f0ae4734fe4ef91f64ba0caef4be9754a63cfc31 Mon Sep 17 00:00:00 2001 From: Joseph Huber <[email protected]> Date: Wed, 18 Mar 2026 15:48:15 -0500 Subject: [PATCH 2/3] update tests --- .../lib/amdgcn-amd-amdhsa/libclang_rt.profile.a | 0 .../lib/nvptx64-nvidia-cuda/libclang_rt.profile.a | 0 clang/test/Driver/amdgpu-toolchain.c | 5 ++++- clang/test/Driver/cuda-cross-compiling.c | 6 ++++-- 4 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a create mode 100644 clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/amdgpu-toolchain.c b/clang/test/Driver/amdgpu-toolchain.c index 384a7617f8859..6ad245e1dd31f 100644 --- a/clang/test/Driver/amdgpu-toolchain.c +++ b/clang/test/Driver/amdgpu-toolchain.c @@ -48,5 +48,8 @@ // DEVICE-LIBS: "-mlink-builtin-bitcode" "[[ROCM_PATH:.+]]ockl.bc" // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \ +// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ // RUN: -fprofile-generate %s 2>&1 | FileCheck -check-prefixes=PROFILE %s -// PROFILE: ld.lld{{.*}}libclang_rt.profile.a +// PROFILE: ld.lld +// PROFILE-SAME: "-L[[RESOURCE_DIR:.*]]/lib/amdgcn-amd-amdhsa" +// PROFILE-SAME: "[[RESOURCE_DIR:.*]]/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a" diff --git a/clang/test/Driver/cuda-cross-compiling.c b/clang/test/Driver/cuda-cross-compiling.c index 10323408a3732..841205843937e 100644 --- a/clang/test/Driver/cuda-cross-compiling.c +++ b/clang/test/Driver/cuda-cross-compiling.c @@ -114,6 +114,8 @@ // PATH: clang-nvlink-wrapper{{.*}}"--cuda-path={{.*}}/Inputs/CUDA/usr/local/cuda" // RUN: %clang -### --target=nvptx64-nvidia-cuda -march=sm_89 -nogpulib \ +// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ // RUN: -fprofile-generate %s 2>&1 | FileCheck -check-prefixes=PROFILE %s - -// PROFILE: clang-nvlink-wrapper{{.*}}libclang_rt.profile.a +// PROFILE: clang-nvlink-wrapper +// PROFILE-SAME: "-L[[RESOURCE_DIR:.*]]/lib/nvptx64-nvidia-cuda" +// PROFILE-SAME: "[[RESOURCE_DIR:.*]]/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a" >From 7924ca87bf14fec7ce1ebc9181300ca6cdfded67 Mon Sep 17 00:00:00 2001 From: Joseph Huber <[email protected]> Date: Wed, 18 Mar 2026 16:03:43 -0500 Subject: [PATCH 3/3] Only add flags if user has support --- clang/lib/Driver/ToolChains/Clang.cpp | 10 ++++++++++ clang/test/Driver/amdgpu-toolchain.c | 2 +- clang/test/Driver/cuda-cross-compiling.c | 2 +- clang/test/Driver/openmp-offload-gpu.c | 1 + 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 5f9edb205a0bb..6416baf9126ff 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -9364,6 +9364,16 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, OPT_fprofile_instr_generate_EQ}; const llvm::DenseSet<unsigned> LinkerOptions{OPT_mllvm, OPT_Zlinker_input}; auto ShouldForwardForToolChain = [&](Arg *A, const ToolChain &TC) { + auto HasProfileRT = TC.getVFS().exists( + TC.getCompilerRT(Args, "profile", ToolChain::FT_Static)); + // Don't forward profiling arguments if the toolchain doesn't support it. + // Without this check using it on the host would result in linker errors. + if (!HasProfileRT && + (A->getOption().matches(OPT_fprofile_generate) || + A->getOption().matches(OPT_fprofile_generate_EQ) || + A->getOption().matches(OPT_fprofile_instr_generate) || + A->getOption().matches(OPT_fprofile_instr_generate_EQ))) + return false; // Don't forward -mllvm to toolchains that don't support LLVM. return TC.HasNativeLLVMSupport() || A->getOption().getID() != OPT_mllvm; }; diff --git a/clang/test/Driver/amdgpu-toolchain.c b/clang/test/Driver/amdgpu-toolchain.c index 6ad245e1dd31f..712149a7a39f3 100644 --- a/clang/test/Driver/amdgpu-toolchain.c +++ b/clang/test/Driver/amdgpu-toolchain.c @@ -52,4 +52,4 @@ // RUN: -fprofile-generate %s 2>&1 | FileCheck -check-prefixes=PROFILE %s // PROFILE: ld.lld // PROFILE-SAME: "-L[[RESOURCE_DIR:.*]]/lib/amdgcn-amd-amdhsa" -// PROFILE-SAME: "[[RESOURCE_DIR:.*]]/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a" +// PROFILE-SAME: "[[RESOURCE_DIR]]/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a" diff --git a/clang/test/Driver/cuda-cross-compiling.c b/clang/test/Driver/cuda-cross-compiling.c index 841205843937e..80f1804d90267 100644 --- a/clang/test/Driver/cuda-cross-compiling.c +++ b/clang/test/Driver/cuda-cross-compiling.c @@ -118,4 +118,4 @@ // RUN: -fprofile-generate %s 2>&1 | FileCheck -check-prefixes=PROFILE %s // PROFILE: clang-nvlink-wrapper // PROFILE-SAME: "-L[[RESOURCE_DIR:.*]]/lib/nvptx64-nvidia-cuda" -// PROFILE-SAME: "[[RESOURCE_DIR:.*]]/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a" +// PROFILE-SAME: "[[RESOURCE_DIR]]/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a" diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c index 727d2387a99a0..f7608ad1a0e22 100644 --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -415,6 +415,7 @@ // Check that `-fprofile-generate` flags are forwarded to link in the runtime. // // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \ +// RUN: -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \ // RUN: --offload-arch=gfx906 -fprofile-generate -nogpulib -nogpuinc %s 2>&1 \ // RUN: | FileCheck --check-prefix=PROFILE %s // _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
