This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG2f9ace9e9a58: [OpenMP] Introduce new flag to change
offloading driver pipeline (authored by jhuber6).
Changed prior to commit:
https://reviews.llvm.org/D116541?vs=397089&id=404684#toc
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D116541/new/
https://reviews.llvm.org/D116541
Files:
clang/include/clang/Driver/Driver.h
clang/include/clang/Driver/Options.td
clang/lib/Driver/Driver.cpp
clang/lib/Driver/ToolChains/Clang.cpp
clang/test/Driver/openmp-offload-gpu.c
Index: clang/test/Driver/openmp-offload-gpu.c
===================================================================
--- clang/test/Driver/openmp-offload-gpu.c
+++ clang/test/Driver/openmp-offload-gpu.c
@@ -350,3 +350,13 @@
// TRIPLE: "-triple" "nvptx64-nvidia-cuda"
// TRIPLE: "-target-cpu" "sm_35"
+
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN: -fopenmp-new-driver -no-canonical-prefixes -ccc-print-bindings %s -o openmp-offload-gpu 2>&1 \
+// RUN: | FileCheck -check-prefix=NEW_DRIVER %s
+
+// NEW_DRIVER: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_INPUT:.+]]"], output: "[[HOST_BC:.+]]"
+// NEW_DRIVER: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[DEVICE_INPUT:.+]]", "[[HOST_BC]]"], output: "[[DEVICE_ASM:.+]]"
+// NEW_DRIVER: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[DEVICE_ASM]]"], output: "[[DEVICE_OBJ:.+]]"
+// NEW_DRIVER: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[HOST_BC]]", "[[DEVICE_OBJ]]"], output: "[[HOST_OBJ:.+]]"
+// NEW_DRIVER: "x86_64-unknown-linux-gnu" - "[[LINKER:.+]]", inputs: ["[[HOST_OBJ]]"], output: "openmp-offload-gpu"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4351,6 +4351,7 @@
bool IsHIP = JA.isOffloading(Action::OFK_HIP);
bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
+ bool IsOpenMPHost = JA.isHostOffloading(Action::OFK_OpenMP);
bool IsHeaderModulePrecompile = isa<HeaderModulePrecompileJobAction>(JA);
bool IsDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) ||
JA.isDeviceOffloading(Action::OFK_Host));
@@ -4371,6 +4372,7 @@
InputInfoList ModuleHeaderInputs;
const InputInfo *CudaDeviceInput = nullptr;
const InputInfo *OpenMPDeviceInput = nullptr;
+ const InputInfo *OpenMPHostInput = nullptr;
for (const InputInfo &I : Inputs) {
if (&I == &Input) {
// This is the primary input.
@@ -4387,6 +4389,8 @@
CudaDeviceInput = &I;
} else if (IsOpenMPDevice && !OpenMPDeviceInput) {
OpenMPDeviceInput = &I;
+ } else if (IsOpenMPHost && !OpenMPHostInput) {
+ OpenMPHostInput = &I;
} else {
llvm_unreachable("unexpectedly given multiple inputs");
}
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -3830,6 +3830,11 @@
// Builder to be used to build offloading actions.
OffloadingActionBuilder OffloadBuilder(C, Args, Inputs);
+ // Offload kinds active for this compilation.
+ unsigned OffloadKinds = Action::OFK_None;
+ if (C.hasOffloadToolChain<Action::OFK_OpenMP>())
+ OffloadKinds |= Action::OFK_OpenMP;
+
// Construct the actions to perform.
HeaderModulePrecompileJobAction *HeaderModuleAction = nullptr;
ActionList LinkerInputs;
@@ -3850,14 +3855,16 @@
// Use the current host action in any of the offloading actions, if
// required.
- if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
- break;
+ if (!Args.hasArg(options::OPT_fopenmp_new_driver))
+ if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
+ break;
for (phases::ID Phase : PL) {
// Add any offload action the host action depends on.
- Current = OffloadBuilder.addDeviceDependencesToHostAction(
- Current, InputArg, Phase, PL.back(), FullPL);
+ if (!Args.hasArg(options::OPT_fopenmp_new_driver))
+ Current = OffloadBuilder.addDeviceDependencesToHostAction(
+ Current, InputArg, Phase, PL.back(), FullPL);
if (!Current)
break;
@@ -3890,6 +3897,11 @@
break;
}
+ // Try to build the offloading actions and add the result as a dependency
+ // to the host.
+ if (Args.hasArg(options::OPT_fopenmp_new_driver))
+ Current = BuildOffloadingActions(C, Args, I, Current);
+
// FIXME: Should we include any prior module file outputs as inputs of
// later actions in the same command line?
@@ -3907,8 +3919,9 @@
// Use the current host action in any of the offloading actions, if
// required.
- if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
- break;
+ if (!Args.hasArg(options::OPT_fopenmp_new_driver))
+ if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg))
+ break;
if (Current->getType() == types::TY_Nothing)
break;
@@ -3919,7 +3932,11 @@
Actions.push_back(Current);
// Add any top level actions generated for offloading.
- OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg);
+ if (!Args.hasArg(options::OPT_fopenmp_new_driver))
+ OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg);
+ else if (Current)
+ Current->propagateHostOffloadInfo(OffloadKinds,
+ /*BoundArch=*/nullptr);
}
// Add a link action if necessary.
@@ -3931,8 +3948,9 @@
}
if (!LinkerInputs.empty()) {
- if (Action *Wrapper = OffloadBuilder.makeHostLinkAction())
- LinkerInputs.push_back(Wrapper);
+ if (!Args.hasArg(options::OPT_fopenmp_new_driver))
+ if (Action *Wrapper = OffloadBuilder.makeHostLinkAction())
+ LinkerInputs.push_back(Wrapper);
Action *LA;
// Check if this Linker Job should emit a static library.
if (ShouldEmitStaticLibrary(Args)) {
@@ -3940,7 +3958,11 @@
} else {
LA = C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image);
}
- LA = OffloadBuilder.processHostLinkAction(LA);
+ if (!Args.hasArg(options::OPT_fopenmp_new_driver))
+ LA = OffloadBuilder.processHostLinkAction(LA);
+ if (Args.hasArg(options::OPT_fopenmp_new_driver))
+ LA->propagateHostOffloadInfo(OffloadKinds,
+ /*BoundArch=*/nullptr);
Actions.push_back(LA);
}
@@ -4026,6 +4048,68 @@
Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
}
+Action *Driver::BuildOffloadingActions(Compilation &C,
+ llvm::opt::DerivedArgList &Args,
+ const InputTy &Input,
+ Action *HostAction) const {
+ if (!isa<CompileJobAction>(HostAction))
+ return HostAction;
+
+ SmallVector<const ToolChain *, 2> ToolChains;
+ ActionList DeviceActions;
+
+ types::ID InputType = Input.first;
+ const Arg *InputArg = Input.second;
+
+ auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>();
+ for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE; ++TI)
+ ToolChains.push_back(TI->second);
+
+ for (unsigned I = 0; I < ToolChains.size(); ++I)
+ DeviceActions.push_back(C.MakeAction<InputAction>(*InputArg, InputType));
+
+ if (DeviceActions.empty())
+ return HostAction;
+
+ auto PL = types::getCompilationPhases(*this, Args, InputType);
+
+ for (phases::ID Phase : PL) {
+ if (Phase == phases::Link) {
+ assert(Phase == PL.back() && "linking must be final compilation step.");
+ break;
+ }
+
+ auto TC = ToolChains.begin();
+ for (Action *&A : DeviceActions) {
+ A = ConstructPhaseAction(C, Args, Phase, A);
+
+ if (isa<CompileJobAction>(A)) {
+ HostAction->setCannotBeCollapsedWithNextDependentAction();
+ OffloadAction::HostDependence HDep(
+ *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
+ /*BourdArch=*/nullptr, Action::OFK_OpenMP);
+ OffloadAction::DeviceDependences DDep;
+ DDep.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP);
+ A = C.MakeAction<OffloadAction>(HDep, DDep);
+ }
+ ++TC;
+ }
+ }
+
+ OffloadAction::DeviceDependences DDeps;
+
+ auto TC = ToolChains.begin();
+ for (Action *A : DeviceActions) {
+ DDeps.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP);
+ TC++;
+ }
+
+ OffloadAction::HostDependence HDep(
+ *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
+ /*BoundArch=*/nullptr, DDeps);
+ return C.MakeAction<OffloadAction>(HDep, DDeps);
+}
+
Action *Driver::ConstructPhaseAction(
Compilation &C, const ArgList &Args, phases::ID Phase, Action *Input,
Action::OffloadKind TargetDeviceOffloadKind) const {
@@ -4188,7 +4272,7 @@
ArchNames.insert(A->getValue());
// Set of (Action, canonical ToolChain triple) pairs we've built jobs for.
- std::map<std::pair<const Action *, std::string>, InputInfo> CachedResults;
+ std::map<std::pair<const Action *, std::string>, InputInfoList> CachedResults;
for (Action *A : C.getActions()) {
// If we are linking an image for multiple archs then the linker wants
// -arch_multiple and -final_output <final image name>. Unfortunately, this
@@ -4645,10 +4729,11 @@
return TriplePlusArch;
}
-InputInfo Driver::BuildJobsForAction(
+InputInfoList Driver::BuildJobsForAction(
Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch,
bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
- std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults,
+ std::map<std::pair<const Action *, std::string>, InputInfoList>
+ &CachedResults,
Action::OffloadKind TargetDeviceOffloadKind) const {
std::pair<const Action *, std::string> ActionTC = {
A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)};
@@ -4656,17 +4741,18 @@
if (CachedResult != CachedResults.end()) {
return CachedResult->second;
}
- InputInfo Result = BuildJobsForActionNoCache(
+ InputInfoList Result = BuildJobsForActionNoCache(
C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput,
CachedResults, TargetDeviceOffloadKind);
CachedResults[ActionTC] = Result;
return Result;
}
-InputInfo Driver::BuildJobsForActionNoCache(
+InputInfoList Driver::BuildJobsForActionNoCache(
Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch,
bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
- std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults,
+ std::map<std::pair<const Action *, std::string>, InputInfoList>
+ &CachedResults,
Action::OffloadKind TargetDeviceOffloadKind) const {
llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
@@ -4704,7 +4790,7 @@
// If there is a single device option, just generate the job for it.
if (OA->hasSingleDeviceDependence()) {
- InputInfo DevA;
+ InputInfoList DevA;
OA->doOnEachDeviceDependence([&](Action *DepA, const ToolChain *DepTC,
const char *DepBoundArch) {
DevA =
@@ -4722,7 +4808,7 @@
OA->doOnEachDependence(
/*IsHostDependence=*/BuildingForOffloadDevice,
[&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) {
- OffloadDependencesInputInfo.push_back(BuildJobsForAction(
+ OffloadDependencesInputInfo.append(BuildJobsForAction(
C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false,
/*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults,
DepA->getOffloadingDeviceKind()));
@@ -4731,6 +4817,17 @@
A = BuildingForOffloadDevice
? OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true)
: OA->getHostDependence();
+
+ // We may have already built this action as a part of the offloading
+ // toolchain, return the cached input if so.
+ std::pair<const Action *, std::string> ActionTC = {
+ OA->getHostDependence(),
+ GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)};
+ if (CachedResults.find(ActionTC) != CachedResults.end()) {
+ InputInfoList Inputs = CachedResults[ActionTC];
+ Inputs.append(OffloadDependencesInputInfo);
+ return Inputs;
+ }
}
if (const InputAction *IA = dyn_cast<InputAction>(A)) {
@@ -4740,9 +4837,9 @@
Input.claim();
if (Input.getOption().matches(options::OPT_INPUT)) {
const char *Name = Input.getValue();
- return InputInfo(A, Name, /* _BaseInput = */ Name);
+ return {InputInfo(A, Name, /* _BaseInput = */ Name)};
}
- return InputInfo(A, &Input, /* _BaseInput = */ "");
+ return {InputInfo(A, &Input, /* _BaseInput = */ "")};
}
if (const BindArchAction *BAA = dyn_cast<BindArchAction>(A)) {
@@ -4772,7 +4869,7 @@
const Tool *T = TS.getTool(Inputs, CollapsedOffloadActions);
if (!T)
- return InputInfo();
+ return {InputInfo()};
if (BuildingForOffloadDevice &&
A->getOffloadingDeviceKind() == Action::OFK_OpenMP) {
@@ -4799,7 +4896,7 @@
cast<OffloadAction>(OA)->doOnEachDependence(
/*IsHostDependence=*/BuildingForOffloadDevice,
[&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) {
- OffloadDependencesInputInfo.push_back(BuildJobsForAction(
+ OffloadDependencesInputInfo.append(BuildJobsForAction(
C, DepA, DepTC, DepBoundArch, /* AtTopLevel */ false,
/*MultipleArchs=*/!!DepBoundArch, LinkingOutput, CachedResults,
DepA->getOffloadingDeviceKind()));
@@ -4813,7 +4910,7 @@
// FIXME: Clean this up.
bool SubJobAtTopLevel =
AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A));
- InputInfos.push_back(BuildJobsForAction(
+ InputInfos.append(BuildJobsForAction(
C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput,
CachedResults, A->getOffloadingDeviceKind()));
}
@@ -4897,8 +4994,8 @@
Arch = BoundArch;
CachedResults[{A, GetTriplePlusArchString(UI.DependentToolChain, Arch,
- UI.DependentOffloadKind)}] =
- CurI;
+ UI.DependentOffloadKind)}] = {
+ CurI};
}
// Now that we have all the results generated, select the one that should be
@@ -4907,9 +5004,9 @@
A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)};
assert(CachedResults.find(ActionTC) != CachedResults.end() &&
"Result does not exist??");
- Result = CachedResults[ActionTC];
+ Result = CachedResults[ActionTC].front();
} else if (JA->getType() == types::TY_Nothing)
- Result = InputInfo(A, BaseInput);
+ Result = {InputInfo(A, BaseInput)};
else {
// We only have to generate a prefix for the host if this is not a top-level
// action.
@@ -4962,7 +5059,7 @@
C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()),
LinkingOutput);
}
- return Result;
+ return {Result};
}
const char *Driver::getDefaultImageName() const {
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2472,6 +2472,8 @@
PosFlag<SetTrue, [CC1Option]>, NegFlag<SetFalse>, BothFlags<[NoArgumentUnused, HelpHidden]>>;
def static_openmp: Flag<["-"], "static-openmp">,
HelpText<"Use the static host OpenMP runtime while linking.">;
+def fopenmp_new_driver : Flag<["-"], "fopenmp-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
+ HelpText<"Use the new driver for OpenMP offloading.">;
def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group<f_Group>;
def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group<f_Group>;
defm escaping_block_tail_calls : BoolFOption<"escaping-block-tail-calls",
Index: clang/include/clang/Driver/Driver.h
===================================================================
--- clang/include/clang/Driver/Driver.h
+++ clang/include/clang/Driver/Driver.h
@@ -12,6 +12,7 @@
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/LLVM.h"
#include "clang/Driver/Action.h"
+#include "clang/Driver/InputInfo.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/Phases.h"
#include "clang/Driver/ToolChain.h"
@@ -38,13 +39,14 @@
namespace driver {
- class Command;
- class Compilation;
- class InputInfo;
- class JobList;
- class JobAction;
- class SanitizerArgs;
- class ToolChain;
+typedef SmallVector<InputInfo, 4> InputInfoList;
+
+class Command;
+class Compilation;
+class JobList;
+class JobAction;
+class SanitizerArgs;
+class ToolChain;
/// Describes the kind of LTO mode selected via -f(no-)?lto(=.*)? options.
enum LTOKind {
@@ -171,9 +173,11 @@
/// The file to log CC_LOG_DIAGNOSTICS output to, if enabled.
std::string CCLogDiagnosticsFilename;
+ /// An input type and its arguments.
+ using InputTy = std::pair<types::ID, const llvm::opt::Arg *>;
+
/// A list of inputs and their types for the given arguments.
- typedef SmallVector<std::pair<types::ID, const llvm::opt::Arg *>, 16>
- InputList;
+ using InputList = SmallVector<InputTy, 16>;
/// Whether the driver should follow g++ like behavior.
bool CCCIsCXX() const { return Mode == GXXMode; }
@@ -413,6 +417,18 @@
void BuildUniversalActions(Compilation &C, const ToolChain &TC,
const InputList &BAInputs) const;
+ /// BuildOffloadingActions - Construct the list of actions to perform for the
+ /// offloading toolchain that will be embedded in the host.
+ ///
+ /// \param C - The compilation that is being built.
+ /// \param Args - The input arguments.
+ /// \param Input - The input type and arguments
+ /// \param HostAction - The host action used in the offloading toolchain.
+ Action *BuildOffloadingActions(Compilation &C,
+ llvm::opt::DerivedArgList &Args,
+ const InputTy &Input,
+ Action *HostAction) const;
+
/// Check that the file referenced by Value exists. If it doesn't,
/// issue a diagnostic and return false.
/// If TypoCorrect is true and the file does not exist, see if it looks
@@ -503,13 +519,12 @@
/// BuildJobsForAction - Construct the jobs to perform for the action \p A and
/// return an InputInfo for the result of running \p A. Will only construct
/// jobs for a given (Action, ToolChain, BoundArch, DeviceKind) tuple once.
- InputInfo
- BuildJobsForAction(Compilation &C, const Action *A, const ToolChain *TC,
- StringRef BoundArch, bool AtTopLevel, bool MultipleArchs,
- const char *LinkingOutput,
- std::map<std::pair<const Action *, std::string>, InputInfo>
- &CachedResults,
- Action::OffloadKind TargetDeviceOffloadKind) const;
+ InputInfoList BuildJobsForAction(
+ Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch,
+ bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
+ std::map<std::pair<const Action *, std::string>, InputInfoList>
+ &CachedResults,
+ Action::OffloadKind TargetDeviceOffloadKind) const;
/// Returns the default name for linked images (e.g., "a.out").
const char *getDefaultImageName() const;
@@ -617,10 +632,10 @@
/// Helper used in BuildJobsForAction. Doesn't use the cache when building
/// jobs specifically for the given action, but will use the cache when
/// building jobs for the Action's inputs.
- InputInfo BuildJobsForActionNoCache(
+ InputInfoList BuildJobsForActionNoCache(
Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch,
bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,
- std::map<std::pair<const Action *, std::string>, InputInfo>
+ std::map<std::pair<const Action *, std::string>, InputInfoList>
&CachedResults,
Action::OffloadKind TargetDeviceOffloadKind) const;
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits