llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Yaxun (Sam) Liu (yxsamliu) <details> <summary>Changes</summary> [Driver][HIP] Bundle AMDGPU -S output under the new offload driver The old offload driver emits bundled assembly code for -S in textual clang-offload-bundler format. This allows a single .s file to contain assembly code for both host and devices, which can be consumed by clang. This eases manual optimization of assembly code for host and device. There are existing HIP tests and examples depending on this feature. The new offload driver does not support it, causing regressions. This patch adds support for this feature with minor changes to the job action creations. Fixes: LCOMPILER-553 --- Full diff: https://github.com/llvm/llvm-project/pull/188262.diff 3 Files Affected: - (modified) clang/include/clang/Driver/Driver.h (+5-1) - (modified) clang/lib/Driver/Driver.cpp (+55-3) - (modified) clang/test/Driver/hip-phases.hip (+3) ``````````diff diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index b355ee6e15007..e9a9914647cb2 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -524,10 +524,14 @@ class Driver { /// \param Input - The input type and arguments /// \param CUID - The CUID for \p Input /// \param HostAction - The host action used in the offloading toolchain. + /// \param HIPAsmBundleDeviceOut - If non-null, HIP non-RDC \c -S (AMDGCN) + /// device actions are appended here and \p HostAction is returned unchanged + /// so the caller can emit a bundled \c .s via \c OffloadBundlingJobAction. Action *BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, const InputTy &Input, StringRef CUID, - Action *HostAction) const; + Action *HostAction, + ActionList *HIPAsmBundleDeviceOut = nullptr) const; /// Returns the set of bound architectures active for this offload kind. /// If there are no bound architctures we return a set containing only the diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 3e2553686d87f..0f47009470595 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4439,6 +4439,35 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args, } } +/// HIP non-RDC \c -S for AMDGCN: emit host and device assembly separately and +/// bundle with \c clang-offload-bundler (new offload driver), instead of +/// \c llvm-offload-binary / \c clang-linker-wrapper fatbin embedding. +static bool shouldBundleHIPAsmWithNewDriver(const Compilation &C, + const llvm::opt::DerivedArgList &Args, + const Driver &D) { + if (!C.isOffloadingHostKind(Action::OFK_HIP)) + return false; + if (!Args.hasArg(options::OPT_S) || Args.hasArg(options::OPT_emit_llvm)) + return false; + if (D.offloadDeviceOnly()) + return false; + if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) + return false; + + bool HasAMDGCNHIPDevice = false; + auto HIPTCs = C.getOffloadToolChains(Action::OFK_HIP); + for (auto It = HIPTCs.first; It != HIPTCs.second; ++It) { + const ToolChain *TC = It->second; + if (!TC) + continue; + const llvm::Triple &Tr = TC->getTriple(); + if (Tr.isSPIRV() || Tr.getArch() != llvm::Triple::amdgcn) + return false; + HasAMDGCNHIPDevice = true; + } + return HasAMDGCNHIPDevice; +} + void Driver::BuildActions(Compilation &C, DerivedArgList &Args, const InputList &Inputs, ActionList &Actions) const { llvm::PrettyStackTraceString CrashInfo("Building compilation actions"); @@ -4484,6 +4513,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, cast<InputAction>(Current)->setId(CUID); } + ActionList HIPAsmDeviceActions; + // Use the current host action in any of the offloading actions, if // required. if (!UseNewOffloadingDriver) @@ -4547,7 +4578,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, // Try to build the offloading actions and add the result as a dependency // to the host. if (UseNewOffloadingDriver) - Current = BuildOffloadingActions(C, Args, I, CUID, Current); + Current = BuildOffloadingActions(C, Args, I, CUID, Current, + &HIPAsmDeviceActions); // Use the current host action in any of the offloading actions, if // required. else if (OffloadBuilder->addHostDependenceToDeviceActions(Current, @@ -4558,6 +4590,16 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, break; } + // HIP non-RDC -S (AMDGCN): bundle host and device assembly like the + // classic driver instead of embedding a fat binary in host asm. + if (Current && !HIPAsmDeviceActions.empty()) { + assert(UseNewOffloadingDriver && "unexpected HIP asm bundle list"); + ActionList BundleInputs; + BundleInputs.append(HIPAsmDeviceActions); + BundleInputs.push_back(Current); + Current = C.MakeAction<OffloadBundlingJobAction>(BundleInputs); + } + // If we ended with something, add to the output list. if (Current) Actions.push_back(Current); @@ -4900,7 +4942,8 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Action *Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, const InputTy &Input, StringRef CUID, - Action *HostAction) const { + Action *HostAction, + ActionList *HIPAsmBundleDeviceOut) const { // Don't build offloading actions if explicitly disabled or we do not have a // valid source input. if (offloadHostOnly() || !types::isSrcFile(Input.first)) @@ -5104,6 +5147,13 @@ Action *Driver::BuildOffloadingActions(Compilation &C, *C.getOffloadToolChains<Action::OFK_HIP>().first->second, nullptr, Action::OFK_HIP); } else if (HIPNoRDC) { + // Host + device assembly: defer to clang-offload-bundler (see BuildActions). + if (HIPAsmBundleDeviceOut && + shouldBundleHIPAsmWithNewDriver(C, Args, C.getDriver())) { + for (Action *OA : OffloadActions) + HIPAsmBundleDeviceOut->push_back(OA); + return HostAction; + } // Package all the offloading actions into a single output that can be // embedded in the host and linked. Action *PackagerAction = @@ -5257,7 +5307,9 @@ Action *Driver::ConstructPhaseAction( Args.hasFlag(options::OPT_offload_new_driver, options::OPT_no_offload_new_driver, C.getActiveOffloadKinds() != Action::OFK_None) && - !offloadDeviceOnly() && !isSaveTempsEnabled()) + !offloadDeviceOnly() && !isSaveTempsEnabled() && + !(Args.hasArg(options::OPT_S) && + !Args.hasArg(options::OPT_emit_llvm))) return Input; if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) { diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip index 47e4bfca68a39..ae4a2a6e53cda 100644 --- a/clang/test/Driver/hip-phases.hip +++ b/clang/test/Driver/hip-phases.hip @@ -67,6 +67,9 @@ // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \ // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 %s -S 2>&1 \ // RUN: | FileCheck -check-prefixes=ASM %s +// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \ +// RUN: --offload-new-driver --cuda-gpu-arch=gfx803 %s -S 2>&1 \ +// RUN: | FileCheck -check-prefixes=ASM %s // ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) // ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) `````````` </details> https://github.com/llvm/llvm-project/pull/188262 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
