================
@@ -9658,6 +9658,21 @@ void LinkerWrapper::ConstructJob(Compilation &C, const
JobAction &JA,
(TC->getTriple().isAMDGPU() || TC->getTriple().isNVPTX()))
LinkerArgs.emplace_back("-lompdevice");
+ // With PGO/coverage instrumentation, GPU device code references the
+ // device profile runtime (__llvm_profile_instrument_gpu and the
+ // __llvm_profile_sections bounds table emitted by
+ // InstrProfilingPlatformGPU). The offload device link does not otherwise
+ // pull it in, so forward the static device profile runtime to the GPU
+ // device linker. The archive is arch-suffixed, so pass its full path
+ // rather than a -l name.
+ if (ToolChain::needsProfileRT(Args) &&
+ (TC->getTriple().isAMDGPU() || TC->getTriple().isNVPTX())) {
+ std::string ProfileRT =
+ TC->getCompilerRT(Args, "profile", ToolChain::FT_Static);
+ if (TC->getVFS().exists(ProfileRT))
+ LinkerArgs.emplace_back(Args.MakeArgString(ProfileRT));
----------------
jhuber6 wrote:
I thought the intended way we handled this was just forwarding `-fprofile` or
whatever through the linker wrapper and we have the generated `clang
--target=amdgcn-amd-amdhsa` compile job link it in like any other target would?
https://github.com/llvm/llvm-project/pull/203056
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits