jhuber6 updated this revision to Diff 412812.
jhuber6 added a comment.
Herald added a subscriber: dang.
Herald added a project: All.
Updating, embed fatbinaries now and small changes.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D120272/new/
https://reviews.llvm.org/D120272
Files:
clang/include/clang/Basic/DiagnosticDriverKinds.td
clang/include/clang/Driver/Options.td
clang/lib/Driver/Driver.cpp
clang/lib/Driver/ToolChains/Clang.cpp
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4391,6 +4391,7 @@
// one input.
bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
+ bool IsCudaHost = JA.isHostOffloading(Action::OFK_Cuda);
bool IsHIP = JA.isOffloading(Action::OFK_HIP);
bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
@@ -4414,6 +4415,7 @@
InputInfoList ModuleHeaderInputs;
InputInfoList OpenMPHostInputs;
+ InputInfoList CudaHostInputs;
const InputInfo *CudaDeviceInput = nullptr;
const InputInfo *OpenMPDeviceInput = nullptr;
for (const InputInfo &I : Inputs) {
@@ -4428,6 +4430,8 @@
<< types::getTypeName(Expected);
}
ModuleHeaderInputs.push_back(I);
+ } else if (IsCudaHost && Args.hasArg(options::OPT_fopenmp_new_driver)) {
+ CudaHostInputs.push_back(I);
} else if ((IsCuda || IsHIP) && !CudaDeviceInput) {
CudaDeviceInput = &I;
} else if (IsOpenMPDevice && !OpenMPDeviceInput) {
@@ -6957,6 +6961,7 @@
auto OpenMPTCs = C.getOffloadToolChains<Action::OFK_OpenMP>();
for (auto TI = OpenMPTCs.first, TE = OpenMPTCs.second; TI != TE;
++TI, ++InputFile) {
+ assert(InputFile->isFilename() && "Offloading requires a filename");
const ToolChain *TC = TI->second;
const ArgList &TCArgs = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
StringRef File =
@@ -6969,6 +6974,21 @@
TC->getTripleString() + "." +
TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName));
}
+ } else if (IsCudaHost && !CudaHostInputs.empty()) {
+ const ToolChain *TC = C.getSingleOffloadToolChain<Action::OFK_Cuda>();
+ for (const auto &InputFile : CudaHostInputs) {
+ assert(InputFile.isFilename() && "Offloading requires a filename");
+ StringRef File =
+ C.getArgs().MakeArgString(TC->getInputFilename(InputFile));
+ StringRef InputName = Clang::getBaseInputStem(Args, Inputs);
+ // The CUDA toolchain should have a bound arch appended to the filename.
+ StringRef Arch = File.split(".").first.rsplit('-').second;
+ CmdArgs.push_back(Args.MakeArgString(
+ "-fembed-offload-object=" + File + "," +
+ Action::GetOffloadKindName(Action::OFK_Cuda) + "." +
+ TC->getTripleString() + "." +
+ Arch + "." + InputName));
+ }
}
if (Triple.isAMDGPU()) {
@@ -8217,17 +8237,21 @@
const Driver &D = getToolChain().getDriver();
const llvm::Triple TheTriple = getToolChain().getTriple();
auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>();
+ auto CudaTCRange = C.getOffloadToolChains<Action::OFK_Cuda>();
ArgStringList CmdArgs;
// Pass the CUDA path to the linker wrapper tool.
- for (auto &I : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
- const ToolChain *TC = I.second;
- if (TC->getTriple().isNVPTX()) {
- CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
- if (CudaInstallation.isValid())
- CmdArgs.push_back(Args.MakeArgString(
- "--cuda-path=" + CudaInstallation.getInstallPath()));
- break;
+ for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
+ auto TCRange = C.getOffloadToolChains(Kind);
+ for (auto &I : llvm::make_range(TCRange.first, TCRange.second)) {
+ const ToolChain *TC = I.second;
+ if (TC->getTriple().isNVPTX()) {
+ CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
+ if (CudaInstallation.isValid())
+ CmdArgs.push_back(Args.MakeArgString(
+ "--cuda-path=" + CudaInstallation.getInstallPath()));
+ break;
+ }
}
}
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4045,6 +4045,67 @@
Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
}
+static StringRef getCanonicalArchString(Compilation &C,
+ llvm::opt::DerivedArgList &Args,
+ StringRef ArchStr,
+ Action::OffloadKind Kind) {
+ if (Kind == Action::OFK_Cuda) {
+ CudaArch Arch = StringToCudaArch(ArchStr);
+ if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) {
+ C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
+ return StringRef();
+ }
+ return Args.MakeArgStringRef(CudaArchToString(Arch));
+ } else {
+ llvm::StringMap<bool> Features;
+ // getHIPOffloadTargetTriple() is known to return valid value as it has
+ // been called successfully in the CreateOffloadingDeviceToolChains().
+ auto Arch = parseTargetID(
+ *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()), ArchStr,
+ &Features);
+ if (!Arch) {
+ C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr;
+ C.setContainsError();
+ return StringRef();
+ }
+ return Args.MakeArgStringRef(
+ getCanonicalTargetID(Arch.getValue(), Features));
+ }
+ return StringRef();
+}
+
+static llvm::DenseSet<StringRef>
+getOffloadArchs(Compilation &C, llvm::opt::DerivedArgList &Args,
+ Action::OffloadKind Kind) {
+
+ // If this is OpenMP offloading we don't use a bound architecture.
+ if (Kind == Action::OFK_OpenMP)
+ return llvm::DenseSet<StringRef>{StringRef()};
+
+ // --offload and --offload-arch options are mutually exclusive.
+ if (Args.hasArgNoClaim(options::OPT_offload_EQ) &&
+ Args.hasArgNoClaim(options::OPT_offload_arch_EQ,
+ options::OPT_no_offload_arch_EQ)) {
+ C.getDriver().Diag(diag::err_opt_not_valid_with_opt) << "--offload-arch"
+ << "--offload";
+ }
+
+ llvm::DenseSet<StringRef> Archs;
+ for (auto &Arg : Args.getAllArgValues(options::OPT_offload_arch_EQ))
+ Archs.insert(getCanonicalArchString(C, Args, Arg, Kind));
+ for (auto &Arg : Args.getAllArgValues(options::OPT_no_offload_arch_EQ))
+ Archs.erase(getCanonicalArchString(C, Args, Arg, Kind));
+
+ if (Archs.empty()) {
+ if (Kind == Action::OFK_Cuda)
+ Archs.insert(CudaArchToString(CudaArch::SM_35));
+ else if (Kind == Action::OFK_Cuda)
+ Archs.insert(CudaArchToString(CudaArch::GFX803));
+ }
+
+ return Archs;
+}
+
Action *Driver::BuildOffloadingActions(Compilation &C,
llvm::opt::DerivedArgList &Args,
const InputTy &Input,
@@ -4057,12 +4118,18 @@
types::ID InputType = Input.first;
const Arg *InputArg = Input.second;
- const Action::OffloadKind OffloadKinds[] = {Action::OFK_OpenMP};
+ const Action::OffloadKind OffloadKinds[] = {
+ Action::OFK_OpenMP, Action::OFK_Cuda, Action::OFK_HIP};
for (Action::OffloadKind Kind : OffloadKinds) {
SmallVector<const ToolChain *, 2> ToolChains;
ActionList DeviceActions;
+ const bool Relocatable =
+ Kind == Action::OFK_OpenMP ||
+ Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
+ /*Default=*/false);
+
auto TCRange = C.getOffloadToolChains(Kind);
for (auto TI = TCRange.first, TE = TCRange.second; TI != TE; ++TI)
ToolChains.push_back(TI->second);
@@ -4070,7 +4137,16 @@
if (ToolChains.empty())
continue;
- for (unsigned I = 0; I < ToolChains.size(); ++I)
+ if (!Relocatable)
+ Diags.Report(diag::err_drv_non_relocatable);
+
+ // Get the product of all bound architectures and toolchains.
+ SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
+ for (const ToolChain *TC : ToolChains)
+ for (StringRef Arch : getOffloadArchs(C, Args, Kind))
+ TCAndArchs.push_back(std::make_pair(TC, Arch));
+
+ for (unsigned I = 0, E = TCAndArchs.size(); I != E; ++I)
DeviceActions.push_back(C.MakeAction<InputAction>(*InputArg, InputType));
if (DeviceActions.empty())
@@ -4084,7 +4160,7 @@
break;
}
- auto TC = ToolChains.begin();
+ auto TCAndArch = TCAndArchs.begin();
for (Action *&A : DeviceActions) {
A = ConstructPhaseAction(C, Args, Phase, A, Kind);
@@ -4092,19 +4168,28 @@
HostAction->setCannotBeCollapsedWithNextDependentAction();
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
- /*BourdArch=*/nullptr, Action::OFK_OpenMP);
+ /*BoundArch=*/nullptr, Kind);
OffloadAction::DeviceDependences DDep;
- DDep.add(*A, **TC, /*BoundArch=*/nullptr, Kind);
+ DDep.add(*A, *TCAndArch->first, /*BoundArch=*/nullptr, Kind);
A = C.MakeAction<OffloadAction>(HDep, DDep);
+ ++TCAndArch;
+ } else if (isa<AssembleJobAction>(A) && Kind == Action::OFK_Cuda) {
+ ActionList FatbinActions;
+ for (Action *A : {A, A->getInputs()[0]}) {
+ OffloadAction::DeviceDependences DDep;
+ DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
+ FatbinActions.emplace_back(
+ C.MakeAction<OffloadAction>(DDep, A->getType()));
+ }
+ A = C.MakeAction<LinkJobAction>(FatbinActions, types::TY_CUDA_FATBIN);
}
- ++TC;
}
}
- auto TC = ToolChains.begin();
+ auto TCAndArch = TCAndArchs.begin();
for (Action *A : DeviceActions) {
- DDeps.add(*A, **TC, /*BoundArch=*/nullptr, Kind);
- TC++;
+ DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
+ ++TCAndArch;
}
}
@@ -4206,7 +4291,7 @@
return C.MakeAction<BackendJobAction>(Input, Output);
}
if (isUsingLTO(/* IsOffload */ true) &&
- TargetDeviceOffloadKind == Action::OFK_OpenMP) {
+ TargetDeviceOffloadKind != Action::OFK_None) {
types::ID Output =
Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
return C.MakeAction<BackendJobAction>(Input, Output);
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2511,6 +2511,7 @@
HelpText<"Use the static host OpenMP runtime while linking.">;
def fopenmp_new_driver : Flag<["-"], "fopenmp-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
HelpText<"Use the new driver for OpenMP offloading.">;
+def : Flag<["-"], "foffload-new-driver">, Alias<fopenmp_new_driver>;
def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group<f_Group>;
def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group<f_Group>;
defm escaping_block_tail_calls : BoolFOption<"escaping-block-tail-calls",
Index: clang/include/clang/Basic/DiagnosticDriverKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -327,6 +327,8 @@
"invalid value '%1' in '%0', value must be 'none' or a positive integer">;
def err_drv_small_columns : Error<
"invalid value '%1' in '%0', value must be '%2' or greater">;
+def err_drv_non_relocatable : Error<
+ "the new driver requires relocatable code, compile with '-fgpu-rdc' enabled">;
def err_drv_invalid_malign_branch_EQ : Error<
"invalid argument '%0' to -malign-branch=; each element must be one of: %1">;
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits