Author: Joseph Huber Date: 2026-06-18T10:27:26-05:00 New Revision: 154115307ba171489125550ece373302f0ab01e1
URL: https://github.com/llvm/llvm-project/commit/154115307ba171489125550ece373302f0ab01e1 DIFF: https://github.com/llvm/llvm-project/commit/154115307ba171489125550ece373302f0ab01e1.diff LOG: [HIP] Remove default `-flto-partitions=8` in the HIP toolchain (#203948) Summary: This was added and made it into a release, but it never should've been a default argument. Partitioning the LTO is a fundamentally different compilation model and has real impacts on the generated code. Right now it is added silently, which breaks non-Hostcall printf and degreades performance due to split uselists. This is a contract that should not be made default. "Compile times" is not a justification to silently change compilation semantics, that is the user's build system's job. Parititioning to a magic number is not an appropriate solution when passing -flto-partitions=8 or `-Xarch_device -flto-partitions=8` is perfectly viable and not hidden from the user. This resolves the 12% performance regression observed when switching to the LTO toolchain in HIP for dcsrgemm. Added: Modified: clang/lib/Driver/ToolChains/HIPAMD.cpp clang/test/Driver/hip-toolchain-rdc-static-lib.hip clang/test/Driver/hip-toolchain-rdc.hip Removed: ################################################################################ diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index 48d210b82784c..e48c28408dc67 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -294,12 +294,6 @@ HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, llvm::opt::DerivedArgList *DAL = ROCMToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind); - if (!Args.hasArg(options::OPT_flto_partitions_EQ)) { - const OptTable &Opts = getDriver().getOpts(); - DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_flto_partitions_EQ), - "8"); - } - return DAL; } diff --git a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip index 32a45296fcd96..71ac901b69299 100644 --- a/clang/test/Driver/hip-toolchain-rdc-static-lib.hip +++ b/clang/test/Driver/hip-toolchain-rdc-static-lib.hip @@ -2,6 +2,7 @@ // RUN: -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \ // RUN: --no-offload-new-driver --emit-static-lib -nogpulib \ // RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \ +// RUN: -flto-partitions=8 \ // RUN: %S/Inputs/hip_multiple_inputs/a.cu \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s diff --git a/clang/test/Driver/hip-toolchain-rdc.hip b/clang/test/Driver/hip-toolchain-rdc.hip index 414561a8eadb1..7e100c521441d 100644 --- a/clang/test/Driver/hip-toolchain-rdc.hip +++ b/clang/test/Driver/hip-toolchain-rdc.hip @@ -5,6 +5,7 @@ // RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \ // RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \ // RUN: --no-offload-new-driver -fhip-dump-offload-linker-script \ +// RUN: -flto-partitions=8 \ // RUN: %S/Inputs/hip_multiple_inputs/a.cu \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck -check-prefixes=CHECK,LNX %s @@ -16,6 +17,7 @@ // RUN: --hip-device-lib-path=%S/Inputs/hip_multiple_inputs/lib2 \ // RUN: -fuse-ld=lld -B%S/Inputs/lld -fgpu-rdc -nogpuinc \ // RUN: --no-offload-new-driver -fhip-dump-offload-linker-script \ +// RUN: -flto-partitions=8 \ // RUN: %S/Inputs/hip_multiple_inputs/a.cu \ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck -check-prefixes=CHECK,MSVC %s @@ -170,11 +172,13 @@ // RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \ // RUN: -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT %s -// LTO_DEFAULT: lld{{.*}}"--lto-partitions=8" +// LTO_DEFAULT: lld +// LTO_DEFAULT-NOT: "--lto-partitions= // RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --offload-new-driver \ // RUN: -L. -foffload-lto %s 2>&1 | FileCheck -check-prefix=LTO_DEFAULT_NEW %s -// LTO_DEFAULT_NEW: clang-linker-wrapper{{.*}}"--device-compiler=amdgcn-amd-amdhsa=-flto-partitions=8" +// LTO_DEFAULT_NEW: clang-linker-wrapper +// LTO_DEFAULT_NEW-NOT: "--device-compiler=amdgcn-amd-amdhsa=-flto-partitions= // RUN: %clang -### -fgpu-rdc --offload-arch=gfx90a -nogpulib -nogpuinc --no-offload-new-driver \ // RUN: -L. -foffload-lto -flto-partitions=42 %s 2>&1 | FileCheck -check-prefix=LTO_PARTS %s _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
