https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/185761

>From aac151d5b037557704ab35c01acdbe6062a95895 Mon Sep 17 00:00:00 2001
From: Joseph Huber <[email protected]>
Date: Tue, 10 Mar 2026 16:17:09 -0500
Subject: [PATCH 1/2] [Clang] Correctly link and handle PGO options on the GPU

Summary:
Currently, the GPU targets ignore the standard profiling arguments. This
PR changes the behavior to use the standard handling, which links the in
the now-present `libclang_rt.profile.a` if the user built with the
compiler-rt support enabled. If it is not present this is a linker error
and we can always suppress with `-Xarch_host` and `-Xarch_device`.
Hopefully this doesn't cause some people pain if they're used to doing
`-fprofile-generate` on a CPU unguarded since it was a stange mix of a
no-op and not a no-op on the GPU until now.
---
 clang/lib/Driver/ToolChains/AMDGPU.cpp   | 2 ++
 clang/lib/Driver/ToolChains/Clang.cpp    | 6 +++++-
 clang/lib/Driver/ToolChains/Cuda.cpp     | 2 ++
 clang/test/Driver/amdgpu-toolchain.c     | 4 ++++
 clang/test/Driver/cuda-cross-compiling.c | 5 +++++
 clang/test/Driver/openmp-offload-gpu.c   | 9 +++++++++
 6 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 7bbdb71b1e24f..54fbd86168602 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -632,6 +632,8 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
         Args.MakeArgString("-plugin-opt=-mattr=" + llvm::join(Features, ",")));
   }
 
+  getToolChain().addProfileRTLibs(Args, CmdArgs);
+
   if (Args.hasArg(options::OPT_stdlib))
     CmdArgs.append({"-lc", "-lm"});
   if (Args.hasArg(options::OPT_startfiles)) {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index 3b852528d92c4..5f9edb205a0bb 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -9357,7 +9357,11 @@ void LinkerWrapper::ConstructJob(Compilation &C, const 
JobAction &JA,
       OPT_flto_partitions_EQ,
       OPT_flto_EQ,
       OPT_hipspv_pass_plugin_EQ,
-      OPT_use_spirv_backend};
+      OPT_use_spirv_backend,
+      OPT_fprofile_generate,
+      OPT_fprofile_generate_EQ,
+      OPT_fprofile_instr_generate,
+      OPT_fprofile_instr_generate_EQ};
   const llvm::DenseSet<unsigned> LinkerOptions{OPT_mllvm, OPT_Zlinker_input};
   auto ShouldForwardForToolChain = [&](Arg *A, const ToolChain &TC) {
     // Don't forward -mllvm to toolchains that don't support LLVM.
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp 
b/clang/lib/Driver/ToolChains/Cuda.cpp
index e0020176800fd..2ca8886936f6c 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -643,6 +643,8 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
   llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
   CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
 
+  getToolChain().addProfileRTLibs(Args, CmdArgs);
+
   if (Args.hasArg(options::OPT_stdlib))
     CmdArgs.append({"-lc", "-lm"});
   if (Args.hasArg(options::OPT_startfiles)) {
diff --git a/clang/test/Driver/amdgpu-toolchain.c 
b/clang/test/Driver/amdgpu-toolchain.c
index 459c1bdac246f..384a7617f8859 100644
--- a/clang/test/Driver/amdgpu-toolchain.c
+++ b/clang/test/Driver/amdgpu-toolchain.c
@@ -46,3 +46,7 @@
 // RUN:  --rocm-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode 2>&1 \
 // RUN: | FileCheck -check-prefix=DEVICE-LIBS %s
 // DEVICE-LIBS: "-mlink-builtin-bitcode" "[[ROCM_PATH:.+]]ockl.bc"
+
+// RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
+// RUN:   -fprofile-generate %s 2>&1 | FileCheck -check-prefixes=PROFILE %s
+// PROFILE: ld.lld{{.*}}libclang_rt.profile.a
diff --git a/clang/test/Driver/cuda-cross-compiling.c 
b/clang/test/Driver/cuda-cross-compiling.c
index ed2853cae3ccc..10323408a3732 100644
--- a/clang/test/Driver/cuda-cross-compiling.c
+++ b/clang/test/Driver/cuda-cross-compiling.c
@@ -112,3 +112,8 @@
 // RUN:   -nogpulib -nogpuinc -### %s 2>&1 | FileCheck -check-prefix=PATH %s
 
 // PATH: 
clang-nvlink-wrapper{{.*}}"--cuda-path={{.*}}/Inputs/CUDA/usr/local/cuda"
+
+// RUN: %clang -### --target=nvptx64-nvidia-cuda -march=sm_89 -nogpulib \
+// RUN:   -fprofile-generate %s 2>&1 | FileCheck -check-prefixes=PROFILE %s
+
+// PROFILE: clang-nvlink-wrapper{{.*}}libclang_rt.profile.a
diff --git a/clang/test/Driver/openmp-offload-gpu.c 
b/clang/test/Driver/openmp-offload-gpu.c
index fb1bc9ffdbbd4..727d2387a99a0 100644
--- a/clang/test/Driver/openmp-offload-gpu.c
+++ b/clang/test/Driver/openmp-offload-gpu.c
@@ -410,3 +410,12 @@
 // RUN:   | FileCheck --check-prefix=SHOULD-EXTRACT %s
 //
 // SHOULD-EXTRACT: clang-linker-wrapper{{.*}}"--should-extract=gfx906"
+
+//
+// Check that `-fprofile-generate` flags are forwarded to link in the runtime.
+//
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp \
+// RUN:     --offload-arch=gfx906 -fprofile-generate -nogpulib -nogpuinc %s 
2>&1 \
+// RUN:   | FileCheck --check-prefix=PROFILE %s
+//
+// PROFILE: 
clang-linker-wrapper{{.*}}--device-compiler=amdgcn-amd-amdhsa=-fprofile-generate

>From f0ae4734fe4ef91f64ba0caef4be9754a63cfc31 Mon Sep 17 00:00:00 2001
From: Joseph Huber <[email protected]>
Date: Wed, 18 Mar 2026 15:48:15 -0500
Subject: [PATCH 2/2] update tests

---
 .../lib/amdgcn-amd-amdhsa/libclang_rt.profile.a             | 0
 .../lib/nvptx64-nvidia-cuda/libclang_rt.profile.a           | 0
 clang/test/Driver/amdgpu-toolchain.c                        | 5 ++++-
 clang/test/Driver/cuda-cross-compiling.c                    | 6 ++++--
 4 files changed, 8 insertions(+), 3 deletions(-)
 create mode 100644 
clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a
 create mode 100644 
clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a

diff --git 
a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a
 
b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git 
a/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a
 
b/clang/test/Driver/Inputs/resource_dir_with_per_target_subdir/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/amdgpu-toolchain.c 
b/clang/test/Driver/amdgpu-toolchain.c
index 384a7617f8859..6ad245e1dd31f 100644
--- a/clang/test/Driver/amdgpu-toolchain.c
+++ b/clang/test/Driver/amdgpu-toolchain.c
@@ -48,5 +48,8 @@
 // DEVICE-LIBS: "-mlink-builtin-bitcode" "[[ROCM_PATH:.+]]ockl.bc"
 
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
+// RUN:   -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
 // RUN:   -fprofile-generate %s 2>&1 | FileCheck -check-prefixes=PROFILE %s
-// PROFILE: ld.lld{{.*}}libclang_rt.profile.a
+//      PROFILE: ld.lld
+// PROFILE-SAME: "-L[[RESOURCE_DIR:.*]]/lib/amdgcn-amd-amdhsa"
+// PROFILE-SAME: 
"[[RESOURCE_DIR:.*]]/lib/amdgcn-amd-amdhsa/libclang_rt.profile.a"
diff --git a/clang/test/Driver/cuda-cross-compiling.c 
b/clang/test/Driver/cuda-cross-compiling.c
index 10323408a3732..841205843937e 100644
--- a/clang/test/Driver/cuda-cross-compiling.c
+++ b/clang/test/Driver/cuda-cross-compiling.c
@@ -114,6 +114,8 @@
 // PATH: 
clang-nvlink-wrapper{{.*}}"--cuda-path={{.*}}/Inputs/CUDA/usr/local/cuda"
 
 // RUN: %clang -### --target=nvptx64-nvidia-cuda -march=sm_89 -nogpulib \
+// RUN:   -resource-dir=%S/Inputs/resource_dir_with_per_target_subdir \
 // RUN:   -fprofile-generate %s 2>&1 | FileCheck -check-prefixes=PROFILE %s
-
-// PROFILE: clang-nvlink-wrapper{{.*}}libclang_rt.profile.a
+//      PROFILE: clang-nvlink-wrapper
+// PROFILE-SAME: "-L[[RESOURCE_DIR:.*]]/lib/nvptx64-nvidia-cuda"
+// PROFILE-SAME: 
"[[RESOURCE_DIR:.*]]/lib/nvptx64-nvidia-cuda/libclang_rt.profile.a"

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to