https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/202699
>From 230a76e5fe5b54c88a1d9fc5fe2c9515e0bb13d6 Mon Sep 17 00:00:00 2001 From: Joseph Huber <[email protected]> Date: Tue, 9 Jun 2026 10:53:33 -0500 Subject: [PATCH] [HIP] Fix `-flto` overriding `--no-lto` not that it is default Summary: The previous changes to LTO made the flto flag passed by default which overrode the hack we did to ervert to the old non-LTO pipline. This is a temporary hack so I'm hacking it even further to fix it. --- .../linker-wrapper-hip-no-rdc.c | 6 +++--- .../clang-linker-wrapper/linker-wrapper.c | 14 +++++++------- .../clang-linker-wrapper/ClangLinkerWrapper.cpp | 17 ++++++----------- 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-hip-no-rdc.c b/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-hip-no-rdc.c index 5c5b7b1eabfab..80ac493825aad 100644 --- a/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-hip-no-rdc.c +++ b/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-hip-no-rdc.c @@ -59,11 +59,11 @@ __attribute__((visibility("protected"), used)) int x; // Without --no-lto the AMDGPU device compilation uses the LTO pipeline // (-flto). // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --wrapper-verbose --dry-run --emit-fatbin-only --linker-path=/usr/bin/ld %t.out -o %t.lto.hipfb 2>&1 | FileCheck %s --check-prefix=LTO -// LTO: clang{{.*}} -mcpu=gfx1200{{.*}} -flto +// LTO: clang{{.*}} -mcpu=gfx1200 // With --no-lto the AMDGPU device compilation uses the conventional non-LTO // pipeline: -flto must not be passed, and '-x ir' must be passed so Clang // compiles the bitcode (stored in an object-extension file) instead of // handing it to the LTO link. -// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --wrapper-verbose --dry-run --no-lto --emit-fatbin-only --linker-path=/usr/bin/ld %t.out -o %t.nolto.hipfb 2>&1 | FileCheck %s --check-prefix=NO-LTO --implicit-check-not=-flto -// NO-LTO: clang{{.*}} -mcpu=gfx1200{{.*}} -x ir +// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --wrapper-verbose --dry-run --no-lto --emit-fatbin-only --linker-path=/usr/bin/ld %t.out -o %t.nolto.hipfb 2>&1 | FileCheck %s --check-prefix=NO-LTO +// NO-LTO: clang{{.*}} -mcpu=gfx1200{{.*}} -x ir {{.*}}-flto=none diff --git a/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper.c b/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper.c index 39b9bcd7425ab..a19a11e87afdb 100644 --- a/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper.c +++ b/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper.c @@ -40,7 +40,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK -// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -Wl,--no-undefined {{.*}}.o {{.*}}.o // RUN: llvm-offload-binary -o %t.out \ // RUN: --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \ @@ -49,7 +49,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --device-compiler=--save-temps \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LTO-TEMPS -// AMDGPU-LTO-TEMPS: clang{{.*}} --target=amdgcn-amd-amdhsa -mcpu=gfx1030 -flto {{.*}}-save-temps +// AMDGPU-LTO-TEMPS: clang{{.*}} --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}-save-temps // RUN: llvm-offload-binary -o %t.out \ // RUN: --image=file=%t.spirv.bc,kind=sycl,triple=spirv64-unknown-unknown,arch=generic @@ -159,7 +159,7 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --clang-backend \ // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND -// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o +// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -Wl,--no-undefined {{.*}}.o // RUN: llvm-offload-binary -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 @@ -182,8 +182,8 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID -// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a:xnack+.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o -// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a:xnack-.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a:xnack+.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -Wl,--no-undefined {{.*}}.o {{.*}}.o +// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a:xnack-.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -Wl,--no-undefined {{.*}}.o {{.*}}.o // RUN: llvm-offload-binary -o %t-lib.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=generic @@ -198,8 +198,8 @@ __attribute__((visibility("protected"), used)) int x; // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \ // RUN: --linker-path=/usr/bin/ld %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL -// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o -// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o +// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a -Wl,--no-undefined {{.*}}.o {{.*}}.o +// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -Wl,--no-undefined {{.*}}.o {{.*}}.o // RUN: llvm-offload-binary -o %t.out \ // RUN: --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \ diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 6e4fc7060389c..cfdd11e1d298d 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -534,16 +534,6 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args, Triple.isAMDGPU() ? CmdArgs.push_back(Args.MakeArgString("-mcpu=" + Arch)) : CmdArgs.push_back(Args.MakeArgString("-march=" + Arch)); - // AMDGPU defaults to the LTO pipeline. Non-RDC HIP uses the conventional - // non-LTO pipeline so device codegen still runs here, in parallel, instead - // of being deferred to the LTO link. - // FIXME: This is a stop-gap for non-RDC. Longer term, RDC and non-RDC should - // share a unified interface so runtime libraries can be provided to non-RDC - // compilations without relying on -mlink-builtin-bitcode. - bool NonLTOAMDGPU = Triple.isAMDGPU() && Args.hasArg(OPT_no_lto); - if (Triple.isAMDGPU() && !NonLTOAMDGPU) - CmdArgs.push_back("-flto"); - // Forward all of the `--offload-opt` and `-mllvm` options to the device. for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm)) CmdArgs.append( @@ -557,7 +547,9 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args, // Force the IR input language so Clang runs the compile and backend phases // instead of treating them as linker inputs, which would defer codegen to // the LTO link and defeat the non-LTO pipeline. - if (NonLTOAMDGPU) + // FIXME: This is a stop-gap for non-RDC. Longer term, RDC and non-RDC should + // share a unified interface. + if (Args.hasArg(OPT_no_lto)) CmdArgs.append({"-x", "ir"}); for (StringRef InputFile : InputFiles) CmdArgs.push_back(InputFile); @@ -621,6 +613,9 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args, for (StringRef Arg : Args.getAllArgValues(OPT_compiler_arg_EQ)) CmdArgs.push_back(Args.MakeArgString(Arg)); + if (Args.hasArg(OPT_no_lto)) + CmdArgs.append({"-flto=none", "-Wno-unused-command-line-argument"}); + if (Error Err = executeCommands(*ClangPath, CmdArgs)) return std::move(Err); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
