jhuber6 updated this revision to Diff 415622.
jhuber6 added a comment.
Fix wrong condition for picking up input.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D120272/new/
https://reviews.llvm.org/D120272
Files:
clang/include/clang/Basic/Cuda.h
clang/include/clang/Driver/Options.td
clang/lib/Driver/Driver.cpp
clang/lib/Driver/ToolChains/Clang.cpp
clang/test/Driver/cuda-openmp-driver.cu
Index: clang/test/Driver/cuda-openmp-driver.cu
===================================================================
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,16 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN: -foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix CHECK %s
+
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4391,6 +4391,7 @@
// one input.
bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
+ bool IsCudaHost = JA.isHostOffloading(Action::OFK_Cuda);
bool IsHIP = JA.isOffloading(Action::OFK_HIP);
bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
@@ -4414,6 +4415,7 @@
InputInfoList ModuleHeaderInputs;
InputInfoList OpenMPHostInputs;
+ InputInfoList CudaHostInputs;
const InputInfo *CudaDeviceInput = nullptr;
const InputInfo *OpenMPDeviceInput = nullptr;
for (const InputInfo &I : Inputs) {
@@ -4428,6 +4430,9 @@
<< types::getTypeName(Expected);
}
ModuleHeaderInputs.push_back(I);
+ } else if (IsCudaHost && Args.hasArg(options::OPT_fopenmp_new_driver)) {
+ CudaDeviceInput = &I;
+ CudaHostInputs.push_back(I);
} else if ((IsCuda || IsHIP) && !CudaDeviceInput) {
CudaDeviceInput = &I;
} else if (IsOpenMPDevice && !OpenMPDeviceInput) {
@@ -6982,6 +6987,20 @@
TC->getTripleString() + "." +
TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName));
}
+ } else if (IsCudaHost && !CudaHostInputs.empty()) {
+ const ToolChain *TC = C.getSingleOffloadToolChain<Action::OFK_Cuda>();
+ for (const auto &InputFile : CudaHostInputs) {
+ assert(InputFile.isFilename() && "Offloading requires a filename");
+ StringRef File =
+ C.getArgs().MakeArgString(TC->getInputFilename(InputFile));
+ StringRef InputName = Clang::getBaseInputStem(Args, Inputs);
+ // The CUDA toolchain should have a bound arch appended to the filename.
+ StringRef Arch = File.rsplit(".").first.rsplit('-').second;
+ CmdArgs.push_back(Args.MakeArgString(
+ "-fembed-offload-object=" + File + "," +
+ Action::GetOffloadKindName(Action::OFK_Cuda) + "." +
+ TC->getTripleString() + "." + Arch + "." + InputName));
+ }
}
if (Triple.isAMDGPU()) {
@@ -8234,14 +8253,17 @@
ArgStringList CmdArgs;
// Pass the CUDA path to the linker wrapper tool.
- for (auto &I : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
- const ToolChain *TC = I.second;
- if (TC->getTriple().isNVPTX()) {
- CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
- if (CudaInstallation.isValid())
- CmdArgs.push_back(Args.MakeArgString(
- "--cuda-path=" + CudaInstallation.getInstallPath()));
- break;
+ for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
+ auto TCRange = C.getOffloadToolChains(Kind);
+ for (auto &I : llvm::make_range(TCRange.first, TCRange.second)) {
+ const ToolChain *TC = I.second;
+ if (TC->getTriple().isNVPTX()) {
+ CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
+ if (CudaInstallation.isValid())
+ CmdArgs.push_back(Args.MakeArgString(
+ "--cuda-path=" + CudaInstallation.getInstallPath()));
+ break;
+ }
}
}
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4088,6 +4088,101 @@
Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
}
+/// Returns the canonical name for the offloading architecture when using HIP or
+/// CUDA.
+static StringRef getCanonicalArchString(Compilation &C,
+ llvm::opt::DerivedArgList &Args,
+ StringRef ArchStr,
+ Action::OffloadKind Kind) {
+ if (Kind == Action::OFK_Cuda) {
+ CudaArch Arch = StringToCudaArch(ArchStr);
+ if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) {
+ C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
+ return StringRef();
+ }
+ return Args.MakeArgStringRef(CudaArchToString(Arch));
+ } else if (Kind == Action::OFK_HIP) {
+ llvm::StringMap<bool> Features;
+ // getHIPOffloadTargetTriple() is known to return valid value as it has
+ // been called successfully in the CreateOffloadingDeviceToolChains().
+ auto Arch = parseTargetID(
+ *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()), ArchStr,
+ &Features);
+ if (!Arch) {
+ C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr;
+ C.setContainsError();
+ return StringRef();
+ }
+ return Args.MakeArgStringRef(
+ getCanonicalTargetID(Arch.getValue(), Features));
+ }
+ return StringRef();
+}
+
+/// Checks if the set offloading architectures does not conflict. Returns the
+/// incompatible pair if a conflict occurs.
+static llvm::Optional<std::pair<llvm::StringRef, llvm::StringRef>>
+getConflictOffloadArchCombination(const llvm::DenseSet<StringRef> &Archs,
+ Action::OffloadKind Kind) {
+ if (Kind != Action::OFK_HIP)
+ return None;
+
+ std::set<StringRef> ArchSet;
+ llvm::copy(Archs, std::inserter(ArchSet, ArchSet.begin()));
+ return getConflictTargetIDCombination(ArchSet);
+}
+
+/// Returns the set of bound architectures active for this compilation kind.
+/// This function returns a set of bound architectures, if there are no bound
+/// architctures we return a set containing only the empty string.
+static llvm::DenseSet<StringRef>
+getOffloadArchs(Compilation &C, llvm::opt::DerivedArgList &Args,
+ Action::OffloadKind Kind) {
+
+ // If this is OpenMP offloading we don't use a bound architecture.
+ if (Kind == Action::OFK_OpenMP)
+ return llvm::DenseSet<StringRef>{StringRef()};
+
+ // --offload and --offload-arch options are mutually exclusive.
+ if (Args.hasArgNoClaim(options::OPT_offload_EQ) &&
+ Args.hasArgNoClaim(options::OPT_offload_arch_EQ,
+ options::OPT_no_offload_arch_EQ)) {
+ C.getDriver().Diag(diag::err_opt_not_valid_with_opt)
+ << "--offload"
+ << (Args.hasArgNoClaim(options::OPT_offload_arch_EQ)
+ ? "--offload-arch"
+ : "--no-offload-arch");
+ }
+
+ llvm::DenseSet<StringRef> Archs;
+ for (auto &Arg : Args) {
+ if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) {
+ Archs.insert(getCanonicalArchString(C, Args, Arg->getValue(), Kind));
+ } else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) {
+ if (Arg->getValue() == StringRef("all"))
+ Archs.clear();
+ else
+ Archs.erase(getCanonicalArchString(C, Args, Arg->getValue(), Kind));
+ }
+ }
+
+ if (auto ConflictingArchs = getConflictOffloadArchCombination(Archs, Kind)) {
+ C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
+ << ConflictingArchs.getValue().first
+ << ConflictingArchs.getValue().second;
+ C.setContainsError();
+ }
+
+ if (Archs.empty()) {
+ if (Kind == Action::OFK_Cuda)
+ Archs.insert(CudaArchToString(DefaultCudaArch));
+ else if (Kind == Action::OFK_HIP)
+ Archs.insert(CudaArchToString(DefaultHIPArch));
+ }
+
+ return Archs;
+}
+
Action *Driver::BuildOffloadingActions(Compilation &C,
llvm::opt::DerivedArgList &Args,
const InputTy &Input,
@@ -4100,7 +4195,8 @@
types::ID InputType = Input.first;
const Arg *InputArg = Input.second;
- const Action::OffloadKind OffloadKinds[] = {Action::OFK_OpenMP};
+ const Action::OffloadKind OffloadKinds[] = {
+ Action::OFK_OpenMP, Action::OFK_Cuda, Action::OFK_HIP};
for (Action::OffloadKind Kind : OffloadKinds) {
SmallVector<const ToolChain *, 2> ToolChains;
@@ -4113,7 +4209,13 @@
if (ToolChains.empty())
continue;
- for (unsigned I = 0; I < ToolChains.size(); ++I)
+ // Get the product of all bound architectures and toolchains.
+ SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
+ for (const ToolChain *TC : ToolChains)
+ for (StringRef Arch : getOffloadArchs(C, Args, Kind))
+ TCAndArchs.push_back(std::make_pair(TC, Arch));
+
+ for (unsigned I = 0, E = TCAndArchs.size(); I != E; ++I)
DeviceActions.push_back(C.MakeAction<InputAction>(*InputArg, InputType));
if (DeviceActions.empty())
@@ -4127,7 +4229,7 @@
break;
}
- auto TC = ToolChains.begin();
+ auto TCAndArch = TCAndArchs.begin();
for (Action *&A : DeviceActions) {
A = ConstructPhaseAction(C, Args, Phase, A, Kind);
@@ -4135,19 +4237,28 @@
HostAction->setCannotBeCollapsedWithNextDependentAction();
OffloadAction::HostDependence HDep(
*HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(),
- /*BourdArch=*/nullptr, Action::OFK_OpenMP);
+ /*BoundArch=*/nullptr, Kind);
OffloadAction::DeviceDependences DDep;
- DDep.add(*A, **TC, /*BoundArch=*/nullptr, Kind);
+ DDep.add(*A, *TCAndArch->first, /*BoundArch=*/nullptr, Kind);
A = C.MakeAction<OffloadAction>(HDep, DDep);
+ ++TCAndArch;
+ } else if (isa<AssembleJobAction>(A) && Kind == Action::OFK_Cuda) {
+ ActionList FatbinActions;
+ for (Action *A : {A, A->getInputs()[0]}) {
+ OffloadAction::DeviceDependences DDep;
+ DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
+ FatbinActions.emplace_back(
+ C.MakeAction<OffloadAction>(DDep, A->getType()));
+ }
+ A = C.MakeAction<LinkJobAction>(FatbinActions, types::TY_CUDA_FATBIN);
}
- ++TC;
}
}
- auto TC = ToolChains.begin();
+ auto TCAndArch = TCAndArchs.begin();
for (Action *A : DeviceActions) {
- DDeps.add(*A, **TC, /*BoundArch=*/nullptr, Kind);
- TC++;
+ DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
+ ++TCAndArch;
}
}
@@ -4249,7 +4360,7 @@
return C.MakeAction<BackendJobAction>(Input, Output);
}
if (isUsingLTO(/* IsOffload */ true) &&
- TargetDeviceOffloadKind == Action::OFK_OpenMP) {
+ TargetDeviceOffloadKind != Action::OFK_None) {
types::ID Output =
Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
return C.MakeAction<BackendJobAction>(Input, Output);
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2512,6 +2512,8 @@
HelpText<"Use the static host OpenMP runtime while linking.">;
def fopenmp_new_driver : Flag<["-"], "fopenmp-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
HelpText<"Use the new driver for OpenMP offloading.">;
+def foffload_new_driver : Flag<["-"], "foffload-new-driver">, Flags<[CC1Option]>, Group<Action_Group>,
+ HelpText<"Use the new driver for offloading.">, Alias<fopenmp_new_driver>;
def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group<f_Group>;
def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group<f_Group>;
defm escaping_block_tail_calls : BoolFOption<"escaping-block-tail-calls",
Index: clang/include/clang/Basic/Cuda.h
===================================================================
--- clang/include/clang/Basic/Cuda.h
+++ clang/include/clang/Basic/Cuda.h
@@ -102,6 +102,9 @@
LAST,
};
+constexpr CudaArch DefaultCudaArch = CudaArch::SM_35;
+constexpr CudaArch DefaultHIPArch = CudaArch::GFX803;
+
static inline bool IsNVIDIAGpuArch(CudaArch A) {
return A >= CudaArch::SM_20 && A < CudaArch::GFX600;
}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits