Author: Yaxun (Sam) Liu Date: 2026-03-26T10:57:53-04:00 New Revision: 15c75e170f3bebf5e6b3b58c463fef0896e58e17
URL: https://github.com/llvm/llvm-project/commit/15c75e170f3bebf5e6b3b58c463fef0896e58e17 DIFF: https://github.com/llvm/llvm-project/commit/15c75e170f3bebf5e6b3b58c463fef0896e58e17.diff LOG: [Driver][HIP] Bundle AMDGPU -S output under the new offload driver (#188262) [Driver][HIP] Bundle AMDGPU -S output under the new offload driver The old offload driver emits bundled assembly code for -S in textual clang-offload-bundler format. This allows a single .s file to contain assembly code for both host and devices, which can be consumed by clang. This eases manual optimization of assembly code for host and device. There are existing HIP tests and examples depending on this feature. The new offload driver does not support it, causing regressions. This patch adds support for this feature with minor changes to the job action creations. Fixes: LCOMPILER-553 Added: Modified: clang/include/clang/Driver/Driver.h clang/lib/Driver/Driver.cpp clang/test/Driver/hip-phases.hip Removed: ################################################################################ diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index b355ee6e15007..5490e06b6f3f3 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -524,10 +524,14 @@ class Driver { /// \param Input - The input type and arguments /// \param CUID - The CUID for \p Input /// \param HostAction - The host action used in the offloading toolchain. - Action *BuildOffloadingActions(Compilation &C, - llvm::opt::DerivedArgList &Args, - const InputTy &Input, StringRef CUID, - Action *HostAction) const; + /// \param HIPAsmBundleDeviceOut - If non-null, HIP non-RDC \c -S (AMDGCN) + /// device actions are appended here and \p HostAction is returned unchanged + /// so the caller can emit a bundled \c .s via \c OffloadBundlingJobAction. + Action * + BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, + const InputTy &Input, StringRef CUID, + Action *HostAction, + ActionList *HIPAsmBundleDeviceOut = nullptr) const; /// Returns the set of bound architectures active for this offload kind. /// If there are no bound architctures we return a set containing only the diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 3e2553686d87f..ba5e50381c9f8 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4439,6 +4439,33 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args, } } +/// HIP non-RDC \c -S for AMDGCN: emit host and device assembly separately and +/// bundle with \c clang-offload-bundler (new offload driver), instead of +/// \c llvm-offload-binary / \c clang-linker-wrapper fatbin embedding. +static bool +shouldBundleHIPAsmWithNewDriver(const Compilation &C, + const llvm::opt::DerivedArgList &Args, + const Driver &D) { + if (!C.isOffloadingHostKind(Action::OFK_HIP) || + !Args.hasArg(options::OPT_S) || Args.hasArg(options::OPT_emit_llvm) || + D.offloadDeviceOnly() || + Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) + return false; + + bool HasAMDGCNHIPDevice = false; + auto HIPTCs = C.getOffloadToolChains(Action::OFK_HIP); + for (auto It = HIPTCs.first; It != HIPTCs.second; ++It) { + const ToolChain *TC = It->second; + if (!TC) + continue; + const llvm::Triple &Tr = TC->getTriple(); + if (Tr.isSPIRV() || Tr.getArch() != llvm::Triple::amdgcn) + return false; + HasAMDGCNHIPDevice = true; + } + return HasAMDGCNHIPDevice; +} + void Driver::BuildActions(Compilation &C, DerivedArgList &Args, const InputList &Inputs, ActionList &Actions) const { llvm::PrettyStackTraceString CrashInfo("Building compilation actions"); @@ -4484,6 +4511,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, cast<InputAction>(Current)->setId(CUID); } + ActionList HIPAsmDeviceActions; + // Use the current host action in any of the offloading actions, if // required. if (!UseNewOffloadingDriver) @@ -4547,7 +4576,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, // Try to build the offloading actions and add the result as a dependency // to the host. if (UseNewOffloadingDriver) - Current = BuildOffloadingActions(C, Args, I, CUID, Current); + Current = BuildOffloadingActions(C, Args, I, CUID, Current, + &HIPAsmDeviceActions); // Use the current host action in any of the offloading actions, if // required. else if (OffloadBuilder->addHostDependenceToDeviceActions(Current, @@ -4558,6 +4588,16 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, break; } + // HIP non-RDC -S (AMDGCN): bundle host and device assembly like the + // classic driver instead of embedding a fat binary in host asm. + if (Current && !HIPAsmDeviceActions.empty()) { + assert(UseNewOffloadingDriver && "unexpected HIP asm bundle list"); + ActionList BundleInputs; + BundleInputs.append(HIPAsmDeviceActions); + BundleInputs.push_back(Current); + Current = C.MakeAction<OffloadBundlingJobAction>(BundleInputs); + } + // If we ended with something, add to the output list. if (Current) Actions.push_back(Current); @@ -4897,10 +4937,11 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, return Sorted; } -Action *Driver::BuildOffloadingActions(Compilation &C, - llvm::opt::DerivedArgList &Args, - const InputTy &Input, StringRef CUID, - Action *HostAction) const { +Action * +Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, + const InputTy &Input, StringRef CUID, + Action *HostAction, + ActionList *HIPAsmBundleDeviceOut) const { // Don't build offloading actions if explicitly disabled or we do not have a // valid source input. if (offloadHostOnly() || !types::isSrcFile(Input.first)) @@ -5104,6 +5145,14 @@ Action *Driver::BuildOffloadingActions(Compilation &C, *C.getOffloadToolChains<Action::OFK_HIP>().first->second, nullptr, Action::OFK_HIP); } else if (HIPNoRDC) { + // Host + device assembly: defer to clang-offload-bundler (see + // BuildActions). + if (HIPAsmBundleDeviceOut && + shouldBundleHIPAsmWithNewDriver(C, Args, C.getDriver())) { + for (Action *OA : OffloadActions) + HIPAsmBundleDeviceOut->push_back(OA); + return HostAction; + } // Package all the offloading actions into a single output that can be // embedded in the host and linked. Action *PackagerAction = @@ -5257,7 +5306,8 @@ Action *Driver::ConstructPhaseAction( Args.hasFlag(options::OPT_offload_new_driver, options::OPT_no_offload_new_driver, C.getActiveOffloadKinds() != Action::OFK_None) && - !offloadDeviceOnly() && !isSaveTempsEnabled()) + !offloadDeviceOnly() && !isSaveTempsEnabled() && + !(Args.hasArg(options::OPT_S) && !Args.hasArg(options::OPT_emit_llvm))) return Input; if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) { diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip index 47e4bfca68a39..ae4a2a6e53cda 100644 --- a/clang/test/Driver/hip-phases.hip +++ b/clang/test/Driver/hip-phases.hip @@ -67,6 +67,9 @@ // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 %s -S 2>&1 \ // RUN: | FileCheck -check-prefixes=ASM %s +// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --offload-new-driver --cuda-gpu-arch=gfx803 %s -S 2>&1 \ +// RUN: | FileCheck -check-prefixes=ASM %s // ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) // ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
