Author: Yaxun (Sam) Liu
Date: 2026-03-26T10:57:53-04:00
New Revision: 15c75e170f3bebf5e6b3b58c463fef0896e58e17

URL: 
https://github.com/llvm/llvm-project/commit/15c75e170f3bebf5e6b3b58c463fef0896e58e17
DIFF: 
https://github.com/llvm/llvm-project/commit/15c75e170f3bebf5e6b3b58c463fef0896e58e17.diff

LOG: [Driver][HIP] Bundle AMDGPU -S output under the new offload driver 
(#188262)

[Driver][HIP] Bundle AMDGPU -S output under the new offload driver

The old offload driver emits bundled assembly code for -S in textual
clang-offload-bundler format. This allows a single .s file to contain
assembly
code for both host and devices, which can be consumed by clang. This
eases
manual optimization of assembly code for host and device. There are
existing
HIP tests and examples depending on this feature. The new offload driver
does
not support it, causing regressions. This patch adds support for this
feature
with minor changes to the job action creations.

Fixes: LCOMPILER-553

Added: 
    

Modified: 
    clang/include/clang/Driver/Driver.h
    clang/lib/Driver/Driver.cpp
    clang/test/Driver/hip-phases.hip

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Driver/Driver.h 
b/clang/include/clang/Driver/Driver.h
index b355ee6e15007..5490e06b6f3f3 100644
--- a/clang/include/clang/Driver/Driver.h
+++ b/clang/include/clang/Driver/Driver.h
@@ -524,10 +524,14 @@ class Driver {
   /// \param Input - The input type and arguments
   /// \param CUID - The CUID for \p Input
   /// \param HostAction - The host action used in the offloading toolchain.
-  Action *BuildOffloadingActions(Compilation &C,
-                                 llvm::opt::DerivedArgList &Args,
-                                 const InputTy &Input, StringRef CUID,
-                                 Action *HostAction) const;
+  /// \param HIPAsmBundleDeviceOut - If non-null, HIP non-RDC \c -S (AMDGCN)
+  /// device actions are appended here and \p HostAction is returned unchanged
+  /// so the caller can emit a bundled \c .s via \c OffloadBundlingJobAction.
+  Action *
+  BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args,
+                         const InputTy &Input, StringRef CUID,
+                         Action *HostAction,
+                         ActionList *HIPAsmBundleDeviceOut = nullptr) const;
 
   /// Returns the set of bound architectures active for this offload kind.
   /// If there are no bound architctures we return a set containing only the

diff  --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 3e2553686d87f..ba5e50381c9f8 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -4439,6 +4439,33 @@ void Driver::handleArguments(Compilation &C, 
DerivedArgList &Args,
   }
 }
 
+/// HIP non-RDC \c -S for AMDGCN: emit host and device assembly separately and
+/// bundle with \c clang-offload-bundler (new offload driver), instead of
+/// \c llvm-offload-binary / \c clang-linker-wrapper fatbin embedding.
+static bool
+shouldBundleHIPAsmWithNewDriver(const Compilation &C,
+                                const llvm::opt::DerivedArgList &Args,
+                                const Driver &D) {
+  if (!C.isOffloadingHostKind(Action::OFK_HIP) ||
+      !Args.hasArg(options::OPT_S) || Args.hasArg(options::OPT_emit_llvm) ||
+      D.offloadDeviceOnly() ||
+      Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+    return false;
+
+  bool HasAMDGCNHIPDevice = false;
+  auto HIPTCs = C.getOffloadToolChains(Action::OFK_HIP);
+  for (auto It = HIPTCs.first; It != HIPTCs.second; ++It) {
+    const ToolChain *TC = It->second;
+    if (!TC)
+      continue;
+    const llvm::Triple &Tr = TC->getTriple();
+    if (Tr.isSPIRV() || Tr.getArch() != llvm::Triple::amdgcn)
+      return false;
+    HasAMDGCNHIPDevice = true;
+  }
+  return HasAMDGCNHIPDevice;
+}
+
 void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
                           const InputList &Inputs, ActionList &Actions) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
@@ -4484,6 +4511,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList 
&Args,
       cast<InputAction>(Current)->setId(CUID);
     }
 
+    ActionList HIPAsmDeviceActions;
+
     // Use the current host action in any of the offloading actions, if
     // required.
     if (!UseNewOffloadingDriver)
@@ -4547,7 +4576,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList 
&Args,
       // Try to build the offloading actions and add the result as a dependency
       // to the host.
       if (UseNewOffloadingDriver)
-        Current = BuildOffloadingActions(C, Args, I, CUID, Current);
+        Current = BuildOffloadingActions(C, Args, I, CUID, Current,
+                                         &HIPAsmDeviceActions);
       // Use the current host action in any of the offloading actions, if
       // required.
       else if (OffloadBuilder->addHostDependenceToDeviceActions(Current,
@@ -4558,6 +4588,16 @@ void Driver::BuildActions(Compilation &C, DerivedArgList 
&Args,
         break;
     }
 
+    // HIP non-RDC -S (AMDGCN): bundle host and device assembly like the
+    // classic driver instead of embedding a fat binary in host asm.
+    if (Current && !HIPAsmDeviceActions.empty()) {
+      assert(UseNewOffloadingDriver && "unexpected HIP asm bundle list");
+      ActionList BundleInputs;
+      BundleInputs.append(HIPAsmDeviceActions);
+      BundleInputs.push_back(Current);
+      Current = C.MakeAction<OffloadBundlingJobAction>(BundleInputs);
+    }
+
     // If we ended with something, add to the output list.
     if (Current)
       Actions.push_back(Current);
@@ -4897,10 +4937,11 @@ Driver::getOffloadArchs(Compilation &C, const 
llvm::opt::DerivedArgList &Args,
   return Sorted;
 }
 
-Action *Driver::BuildOffloadingActions(Compilation &C,
-                                       llvm::opt::DerivedArgList &Args,
-                                       const InputTy &Input, StringRef CUID,
-                                       Action *HostAction) const {
+Action *
+Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args,
+                               const InputTy &Input, StringRef CUID,
+                               Action *HostAction,
+                               ActionList *HIPAsmBundleDeviceOut) const {
   // Don't build offloading actions if explicitly disabled or we do not have a
   // valid source input.
   if (offloadHostOnly() || !types::isSrcFile(Input.first))
@@ -5104,6 +5145,14 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
              *C.getOffloadToolChains<Action::OFK_HIP>().first->second, nullptr,
              Action::OFK_HIP);
   } else if (HIPNoRDC) {
+    // Host + device assembly: defer to clang-offload-bundler (see
+    // BuildActions).
+    if (HIPAsmBundleDeviceOut &&
+        shouldBundleHIPAsmWithNewDriver(C, Args, C.getDriver())) {
+      for (Action *OA : OffloadActions)
+        HIPAsmBundleDeviceOut->push_back(OA);
+      return HostAction;
+    }
     // Package all the offloading actions into a single output that can be
     // embedded in the host and linked.
     Action *PackagerAction =
@@ -5257,7 +5306,8 @@ Action *Driver::ConstructPhaseAction(
         Args.hasFlag(options::OPT_offload_new_driver,
                      options::OPT_no_offload_new_driver,
                      C.getActiveOffloadKinds() != Action::OFK_None) &&
-        !offloadDeviceOnly() && !isSaveTempsEnabled())
+        !offloadDeviceOnly() && !isSaveTempsEnabled() &&
+        !(Args.hasArg(options::OPT_S) && !Args.hasArg(options::OPT_emit_llvm)))
       return Input;
 
     if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) {

diff  --git a/clang/test/Driver/hip-phases.hip 
b/clang/test/Driver/hip-phases.hip
index 47e4bfca68a39..ae4a2a6e53cda 100644
--- a/clang/test/Driver/hip-phases.hip
+++ b/clang/test/Driver/hip-phases.hip
@@ -67,6 +67,9 @@
 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 %s -S 2>&1 \
 // RUN: | FileCheck -check-prefixes=ASM %s
+// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
+// RUN: --offload-new-driver --cuda-gpu-arch=gfx803 %s -S 2>&1 \
+// RUN: | FileCheck -check-prefixes=ASM %s
 // ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], 
(device-[[T]], [[ARCH:gfx803]])
 // ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, 
(device-[[T]], [[ARCH]])
 // ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to