gtbercea updated this revision to Diff 105355.
gtbercea added a comment.
Address Comments.
https://reviews.llvm.org/D34784
Files:
include/clang/Basic/DiagnosticDriverKinds.td
include/clang/Driver/Options.td
lib/Driver/ToolChains/Cuda.cpp
test/Driver/openmp-offload.c
Index: test/Driver/openmp-offload.c
===================================================================
--- test/Driver/openmp-offload.c
+++ test/Driver/openmp-offload.c
@@ -597,3 +597,19 @@
// RUN: | FileCheck -check-prefix=CHK-FOPENMP-IS-DEVICE %s
// CHK-FOPENMP-IS-DEVICE: clang{{.*}} "-aux-triple" "powerpc64le--linux" {{.*}}.c" "-fopenmp-is-device" "-fopenmp-host-ir-file-path"
+
+/// ###########################################################################
+
+/// Check -Xopenmp-target=powerpc64le-ibm-linux-gnu -march=pwr8 is passed when compiling for the device.
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -Xopenmp-target=powerpc64le-ibm-linux-gnu -march=pwr8 %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-FOPENMP-EQ-TARGET %s
+
+// CHK-FOPENMP-EQ-TARGET: clang{{.*}} argument unused during compilation: '-Xopenmp-target=powerpc64le-ibm-linux-gnu -march=pwr8'
+
+/// ###########################################################################
+
+/// Check -Xopenmp-target -march=pwr8 is passed when compiling for the device.
+// RUN: %clang -### -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu -Xopenmp-target -march=pwr8 %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHK-FOPENMP-TARGET %s
+
+// CHK-FOPENMP-TARGET: clang{{.*}} argument unused during compilation: '-Xopenmp-target -march=pwr8'
Index: lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- lib/Driver/ToolChains/Cuda.cpp
+++ lib/Driver/ToolChains/Cuda.cpp
@@ -212,8 +212,20 @@
static_cast<const toolchains::CudaToolChain &>(getToolChain());
assert(TC.getTriple().isNVPTX() && "Wrong platform");
+ StringRef GPUArchName;
+ std::vector<std::string> GPUArchNames;
+ // If this is an OpenMP action we need to extract the device architecture from
+ // the -fopenmp-target-arch option.
+ if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
+ GPUArchNames = Args.getAllArgValues(options::OPT_march_EQ);
+ assert(GPUArchNames.size() == 1 &&
+ "Exactly one GPU Arch required for ptxas.");
+ GPUArchName = GPUArchNames[0];
+ } else
+ GPUArchName = JA.getOffloadingArch();
+
// Obtain architecture from the action.
- CudaArch gpu_arch = StringToCudaArch(JA.getOffloadingArch());
+ CudaArch gpu_arch = StringToCudaArch(GPUArchName);
assert(gpu_arch != CudaArch::UNKNOWN &&
"Device action expected to have an architecture.");
@@ -392,6 +404,22 @@
CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
}
+void AddMArchOption(DerivedArgList *DAL,
+ const OptTable &Opts,
+ StringRef Opt) {
+ if (Opt.startswith("-march=")) {
+ StringRef Arch = Opt.split("=").second;
+ // Check if the arch provided is valid for this toolchain.
+ // If not valid, ignore it.
+ if (StringToCudaArch(Arch) != CudaArch::UNKNOWN) {
+ DAL->eraseArg(options::OPT_march_EQ);
+ DAL->AddJoinedArg(nullptr,
+ Opts.getOption(options::OPT_march_EQ),
+ Arch.str());
+ }
+ }
+}
+
llvm::opt::DerivedArgList *
CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
StringRef BoundArch,
@@ -405,7 +433,7 @@
// For OpenMP device offloading, append derived arguments. Make sure
// flags are not duplicated.
- // TODO: Append the compute capability.
+ // Also append the compute capability.
if (DeviceOffloadKind == Action::OFK_OpenMP) {
for (Arg *A : Args){
bool IsDuplicate = false;
@@ -418,6 +446,49 @@
if (!IsDuplicate)
DAL->append(A);
}
+
+ // Get the compute capability from the -Xopenmp-target flag.
+ auto OptList = Args.getAllArgValues(options::OPT_Xopenmp_target_EQ);
+
+ // For each OPT_Xopenmp_target_EQ option, the function returns
+ // two strings, the triple and the option.
+ // The following format is assumed:
+ //
+ // -Xopenmp-target=nvptx64-nvidia-cuda -opt=val
+ for (unsigned i = 0; i < OptList.size(); i+=2) {
+ StringRef Opt = OptList[i+1];
+ if (OptList[i] == getTripleString())
+ AddMArchOption(DAL, Opts, Opt);
+ }
+
+ OptList = Args.getAllArgValues(options::OPT_Xopenmp_target);
+ // When there is only one option in the list, the following format
+ // is assumed:
+ //
+ // -Xopenmp-target -opt=val
+
+ // By default, if no triple is explicitely specified, we
+ // associate -opt=val with the toolchain specified under the
+ // -fopenmp-targets flag (provided that there is only one such
+ // toolchain specified).
+ if (!OptList.empty() &&
+ Args.getAllArgValues(
+ options::OPT_fopenmp_targets_EQ).size() != 1)
+ getDriver().Diag(diag::err_drv_Xopenmp_target_missing_triple);
+
+ // Add arch
+ for (StringRef Opt : OptList) {
+ AddMArchOption(DAL, Opts, Opt);
+ }
+
+ auto MArchList = DAL->getAllArgValues(options::OPT_march_EQ);
+ assert(MArchList.size() < 2 &&
+ "Too many archs under -Xopenmp-targets");
+ if (MArchList.empty())
+ // Default compute capability for CUDA toolchain is sm_20.
+ DAL->AddJoinedArg(nullptr,
+ Opts.getOption(options::OPT_march_EQ), "sm_20");
+
return DAL;
}
Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -459,6 +459,10 @@
HelpText<"Pass <arg> to fatbinary invocation">, MetaVarName<"<arg>">;
def Xcuda_ptxas : Separate<["-"], "Xcuda-ptxas">,
HelpText<"Pass <arg> to the ptxas assembler">, MetaVarName<"<arg>">;
+def Xopenmp_target : Separate<["-"], "Xopenmp-target">,
+ HelpText<"Pass <arg> to the target offloading toolchain.">, MetaVarName<"<arg>">;
+def Xopenmp_target_EQ : JoinedAndSeparate<["-"], "Xopenmp-target=">,
+ HelpText<"Pass <arg> to the specified target offloading toolchain. The triple that identifies the toolchain must be provided after the equals sign.">, MetaVarName<"<arg>">;
def z : Separate<["-"], "z">, Flags<[LinkerInput, RenderAsInput]>,
HelpText<"Pass -z <arg> to the linker">, MetaVarName<"<arg>">,
Group<Link_Group>;
Index: include/clang/Basic/DiagnosticDriverKinds.td
===================================================================
--- include/clang/Basic/DiagnosticDriverKinds.td
+++ include/clang/Basic/DiagnosticDriverKinds.td
@@ -69,6 +69,8 @@
"invalid Xarch argument: '%0', options requiring arguments are unsupported">;
def err_drv_invalid_Xarch_argument_isdriver : Error<
"invalid Xarch argument: '%0', cannot change driver behavior inside Xarch argument">;
+def err_drv_Xopenmp_target_missing_triple : Error<
+ "cannot deduce implicit triple value for -Xopenmp-target, specify triple using -Xopenmp-target=<triple>">;
def err_drv_argument_only_allowed_with : Error<
"invalid argument '%0' only allowed with '%1'">;
def err_drv_argument_not_allowed_with : Error<
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits