gtbercea updated this revision to Diff 104427.
gtbercea added a comment.
Herald added subscribers: aheejin, jgravelle-google.

Updated diff to address comments.


Repository:
  rL LLVM

https://reviews.llvm.org/D29647

Files:
  include/clang/Driver/ToolChain.h
  lib/Driver/ToolChain.cpp
  lib/Driver/ToolChains/BareMetal.cpp
  lib/Driver/ToolChains/BareMetal.h
  lib/Driver/ToolChains/Clang.cpp
  lib/Driver/ToolChains/Cuda.cpp
  lib/Driver/ToolChains/Cuda.h
  lib/Driver/ToolChains/Fuchsia.cpp
  lib/Driver/ToolChains/Fuchsia.h
  lib/Driver/ToolChains/Gnu.cpp
  lib/Driver/ToolChains/Gnu.h
  lib/Driver/ToolChains/Hexagon.cpp
  lib/Driver/ToolChains/Hexagon.h
  lib/Driver/ToolChains/WebAssembly.cpp
  lib/Driver/ToolChains/WebAssembly.h
  lib/Driver/ToolChains/XCore.cpp
  lib/Driver/ToolChains/XCore.h
  test/Driver/openmp-offload.c

Index: test/Driver/openmp-offload.c
===================================================================
--- test/Driver/openmp-offload.c
+++ test/Driver/openmp-offload.c
@@ -607,3 +607,12 @@
 // CHK-FOPENMP-IS-DEVICE: clang{{.*}}.i" {{.*}}" "-fopenmp-is-device"
 // CHK-FOPENMP-IS-DEVICE-NEXT: clang{{.*}}.bc" {{.*}}.i" "-fopenmp-is-device" "-fopenmp-host-ir-file-path"
 // CHK-FOPENMP-IS-DEVICE-NEXT: clang{{.*}}.s" {{.*}}.bc" "-fopenmp-is-device"
+
+/// ###########################################################################
+
+/// Check -march propagates compute capability to device offloading toolchain.
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -save-temps -no-canonical-prefixes -march=sm_35 %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-COMPUTE-CAPABILITY %s
+
+// CHK-COMPUTE-CAPABILITY: ptxas{{.*}}" "--gpu-name" "sm_35"
+// CHK-COMPUTE-CAPABILITY-NEXT: nvlink{{.*}}" "-arch" "sm_35"
Index: lib/Driver/ToolChains/XCore.h
===================================================================
--- lib/Driver/ToolChains/XCore.h
+++ lib/Driver/ToolChains/XCore.h
@@ -67,7 +67,8 @@
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                             llvm::opt::ArgStringList &CC1Args) const override;
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                             llvm::opt::ArgStringList &CC1Args) const override;
+                             llvm::opt::ArgStringList &CC1Args,
+                             Action::OffloadKind DeviceOffloadKind) const override;
   void AddClangCXXStdlibIncludeArgs(
       const llvm::opt::ArgList &DriverArgs,
       llvm::opt::ArgStringList &CC1Args) const override;
Index: lib/Driver/ToolChains/XCore.cpp
===================================================================
--- lib/Driver/ToolChains/XCore.cpp
+++ lib/Driver/ToolChains/XCore.cpp
@@ -124,7 +124,8 @@
 }
 
 void XCoreToolChain::addClangTargetOptions(const ArgList &DriverArgs,
-                                           ArgStringList &CC1Args) const {
+                                           ArgStringList &CC1Args,
+                                           Action::OffloadKind) const {
   CC1Args.push_back("-nostdsysteminc");
 }
 
Index: lib/Driver/ToolChains/WebAssembly.h
===================================================================
--- lib/Driver/ToolChains/WebAssembly.h
+++ lib/Driver/ToolChains/WebAssembly.h
@@ -53,7 +53,8 @@
   bool SupportsProfiling() const override;
   bool HasNativeLLVMSupport() const override;
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                             llvm::opt::ArgStringList &CC1Args) const override;
+                             llvm::opt::ArgStringList &CC1Args,
+                             Action::OffloadKind DeviceOffloadKind) const override;
   RuntimeLibType GetDefaultRuntimeLibType() const override;
   CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
   void AddClangSystemIncludeArgs(
Index: lib/Driver/ToolChains/WebAssembly.cpp
===================================================================
--- lib/Driver/ToolChains/WebAssembly.cpp
+++ lib/Driver/ToolChains/WebAssembly.cpp
@@ -134,7 +134,8 @@
 bool WebAssembly::HasNativeLLVMSupport() const { return true; }
 
 void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs,
-                                        ArgStringList &CC1Args) const {
+                                        ArgStringList &CC1Args,
+                                        Action::OffloadKind) const {
   if (DriverArgs.hasFlag(clang::driver::options::OPT_fuse_init_array,
                          options::OPT_fno_use_init_array, true))
     CC1Args.push_back("-fuse-init-array");
Index: lib/Driver/ToolChains/Hexagon.h
===================================================================
--- lib/Driver/ToolChains/Hexagon.h
+++ lib/Driver/ToolChains/Hexagon.h
@@ -69,7 +69,8 @@
   ~HexagonToolChain() override;
 
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                             llvm::opt::ArgStringList &CC1Args) const override;
+                             llvm::opt::ArgStringList &CC1Args,
+                             Action::OffloadKind DeviceOffloadKind) const override;
   void
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                             llvm::opt::ArgStringList &CC1Args) const override;
Index: lib/Driver/ToolChains/Hexagon.cpp
===================================================================
--- lib/Driver/ToolChains/Hexagon.cpp
+++ lib/Driver/ToolChains/Hexagon.cpp
@@ -428,7 +428,8 @@
 }
 
 void HexagonToolChain::addClangTargetOptions(const ArgList &DriverArgs,
-                                             ArgStringList &CC1Args) const {
+                                             ArgStringList &CC1Args,
+                                             Action::OffloadKind) const {
   if (DriverArgs.hasArg(options::OPT_ffp_contract))
     return;
   unsigned OptLevel = getOptimizationLevel(DriverArgs);
Index: lib/Driver/ToolChains/Gnu.h
===================================================================
--- lib/Driver/ToolChains/Gnu.h
+++ lib/Driver/ToolChains/Gnu.h
@@ -341,7 +341,8 @@
       : Generic_GCC(D, Triple, Args) {}
 
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                             llvm::opt::ArgStringList &CC1Args) const override;
+                             llvm::opt::ArgStringList &CC1Args,
+                             Action::OffloadKind DeviceOffloadKind) const override;
 };
 
 } // end namespace toolchains
Index: lib/Driver/ToolChains/Gnu.cpp
===================================================================
--- lib/Driver/ToolChains/Gnu.cpp
+++ lib/Driver/ToolChains/Gnu.cpp
@@ -2461,7 +2461,8 @@
 void Generic_ELF::anchor() {}
 
 void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs,
-                                        ArgStringList &CC1Args) const {
+                                        ArgStringList &CC1Args,
+                                        Action::OffloadKind) const {
   const Generic_GCC::GCCVersion &V = GCCInstallation.getVersion();
   bool UseInitArrayDefault =
       getTriple().getArch() == llvm::Triple::aarch64 ||
Index: lib/Driver/ToolChains/Fuchsia.h
===================================================================
--- lib/Driver/ToolChains/Fuchsia.h
+++ lib/Driver/ToolChains/Fuchsia.h
@@ -55,7 +55,8 @@
   GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
 
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                             llvm::opt::ArgStringList &CC1Args) const override;
+                             llvm::opt::ArgStringList &CC1Args,
+                             Action::OffloadKind DeviceOffloadKind) const override;
   void
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                             llvm::opt::ArgStringList &CC1Args) const override;
Index: lib/Driver/ToolChains/Fuchsia.cpp
===================================================================
--- lib/Driver/ToolChains/Fuchsia.cpp
+++ lib/Driver/ToolChains/Fuchsia.cpp
@@ -172,7 +172,8 @@
 }
 
 void Fuchsia::addClangTargetOptions(const ArgList &DriverArgs,
-                                    ArgStringList &CC1Args) const {
+                                    ArgStringList &CC1Args,
+                                    Action::OffloadKind) const {
   if (DriverArgs.hasFlag(options::OPT_fuse_init_array,
                          options::OPT_fno_use_init_array, true))
     CC1Args.push_back("-fuse-init-array");
Index: lib/Driver/ToolChains/Cuda.h
===================================================================
--- lib/Driver/ToolChains/Cuda.h
+++ lib/Driver/ToolChains/Cuda.h
@@ -130,7 +130,8 @@
   TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
                 Action::OffloadKind DeviceOffloadKind) const override;
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                             llvm::opt::ArgStringList &CC1Args) const override;
+                             llvm::opt::ArgStringList &CC1Args,
+                             Action::OffloadKind DeviceOffloadKind) const override;
 
   // Never try to use the integrated assembler with CUDA; always fork out to
   // ptxas.
Index: lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- lib/Driver/ToolChains/Cuda.cpp
+++ lib/Driver/ToolChains/Cuda.cpp
@@ -212,8 +212,21 @@
       static_cast<const toolchains::CudaToolChain &>(getToolChain());
   assert(TC.getTriple().isNVPTX() && "Wrong platform");
 
+  StringRef GPUArchName;
+  std::vector<std::string> GPUArchNames;
+  // If this is an OpenMP action we need to extract the device architecture from
+  // the -march option.
+  if (JA.isDeviceOffloading(Action::OFK_OpenMP)) {
+    GPUArchNames = Args.getAllArgValues(options::OPT_march_EQ);
+    assert(GPUArchNames.size() == 1 &&
+           "Exactly one GPU Arch required for ptxas.");
+    // TODO: get compute capability if a flag is used to pass it to the driver.
+    GPUArchName = GPUArchNames[0];
+  } else
+    GPUArchName = JA.getOffloadingArch();
+
   // Obtain architecture from the action.
-  CudaArch gpu_arch = StringToCudaArch(JA.getOffloadingArch());
+  CudaArch gpu_arch = StringToCudaArch(GPUArchName);
   assert(gpu_arch != CudaArch::UNKNOWN &&
          "Device action expected to have an architecture.");
 
@@ -346,26 +359,32 @@
 
 void CudaToolChain::addClangTargetOptions(
     const llvm::opt::ArgList &DriverArgs,
-    llvm::opt::ArgStringList &CC1Args) const {
-  HostTC.addClangTargetOptions(DriverArgs, CC1Args);
+    llvm::opt::ArgStringList &CC1Args,
+    Action::OffloadKind DeviceOffloadingKind) const {
+  HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
 
-  CC1Args.push_back("-fcuda-is-device");
+  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
+  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
+  assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
+          DeviceOffloadingKind == Action::OFK_Cuda) &&
+         "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
 
-  if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
-                         options::OPT_fno_cuda_flush_denormals_to_zero, false))
-    CC1Args.push_back("-fcuda-flush-denormals-to-zero");
+  if (DeviceOffloadingKind == Action::OFK_Cuda) {
+    CC1Args.push_back("-fcuda-is-device");
 
-  if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
-                         options::OPT_fno_cuda_approx_transcendentals, false))
-    CC1Args.push_back("-fcuda-approx-transcendentals");
+    if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
+                           options::OPT_fno_cuda_flush_denormals_to_zero, false))
+      CC1Args.push_back("-fcuda-flush-denormals-to-zero");
 
-  if (DriverArgs.hasArg(options::OPT_nocudalib))
-    return;
+    if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
+                           options::OPT_fno_cuda_approx_transcendentals, false))
+      CC1Args.push_back("-fcuda-approx-transcendentals");
 
-  StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
-  assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
-  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
+    if (DriverArgs.hasArg(options::OPT_nocudalib))
+      return;
+  }
 
+  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
   if (LibDeviceFile.empty()) {
     getDriver().Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
     return;
@@ -404,6 +423,29 @@
 
   const OptTable &Opts = getDriver().getOpts();
 
+  // For OpenMP device offloading, append derived arguments along with
+  // the compute capability. Make sure flags are not duplicated.
+  if (DeviceOffloadKind == Action::OFK_OpenMP) {
+    for (Arg *A : Args){
+      bool IsDuplicate = false;
+      for (Arg *DALArg : *DAL){
+        if (A == DALArg) {
+          IsDuplicate = true;
+          break;
+        }
+      }
+      if (!IsDuplicate)
+        DAL->append(A);
+    }
+
+    // TODO: get the compute capability from offloading arguments when not
+    // using the default compute capability of sm_20.
+    if (Args.getAllArgValues(options::OPT_march_EQ).empty())
+      DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
+                      "sm_20");
+    return DAL;
+  }
+
   for (Arg *A : Args) {
     if (A->getOption().matches(options::OPT_Xarch__)) {
       // Skip this argument unless the architecture matches BoundArch
Index: lib/Driver/ToolChains/Clang.cpp
===================================================================
--- lib/Driver/ToolChains/Clang.cpp
+++ lib/Driver/ToolChains/Clang.cpp
@@ -2574,7 +2574,8 @@
                    AsynchronousUnwindTables))
     CmdArgs.push_back("-munwind-tables");
 
-  getToolChain().addClangTargetOptions(Args, CmdArgs);
+  getToolChain().addClangTargetOptions(Args, CmdArgs,
+                                       JA.getOffloadingDeviceKind());
 
   if (Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) {
     CmdArgs.push_back("-mlimit-float-precision");
Index: lib/Driver/ToolChains/BareMetal.h
===================================================================
--- lib/Driver/ToolChains/BareMetal.h
+++ lib/Driver/ToolChains/BareMetal.h
@@ -54,7 +54,8 @@
   void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                                  llvm::opt::ArgStringList &CC1Args) const override;
   void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                             llvm::opt::ArgStringList &CC1Args) const override;
+                             llvm::opt::ArgStringList &CC1Args,
+                             Action::OffloadKind DeviceOffloadKind) const override;
   std::string findLibCxxIncludePath(ToolChain::CXXStdlibType LibType) const;
   void AddClangCXXStdlibIncludeArgs(
       const llvm::opt::ArgList &DriverArgs,
Index: lib/Driver/ToolChains/BareMetal.cpp
===================================================================
--- lib/Driver/ToolChains/BareMetal.cpp
+++ lib/Driver/ToolChains/BareMetal.cpp
@@ -98,7 +98,8 @@
 }
 
 void BareMetal::addClangTargetOptions(const ArgList &DriverArgs,
-                                      ArgStringList &CC1Args) const {
+                                      ArgStringList &CC1Args,
+                                      Action::OffloadKind) const {
   CC1Args.push_back("-nostdsysteminc");
 }
 
Index: lib/Driver/ToolChain.cpp
===================================================================
--- lib/Driver/ToolChain.cpp
+++ lib/Driver/ToolChain.cpp
@@ -544,9 +544,9 @@
   // Each toolchain should provide the appropriate include flags.
 }
 
-void ToolChain::addClangTargetOptions(const ArgList &DriverArgs,
-                                      ArgStringList &CC1Args) const {
-}
+void ToolChain::addClangTargetOptions(
+    const ArgList &DriverArgs, ArgStringList &CC1Args,
+    Action::OffloadKind DeviceOffloadKind) const {}
 
 void ToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {}
 
Index: include/clang/Driver/ToolChain.h
===================================================================
--- include/clang/Driver/ToolChain.h
+++ include/clang/Driver/ToolChain.h
@@ -411,7 +411,8 @@
 
   /// \brief Add options that need to be passed to cc1 for this target.
   virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
-                                     llvm::opt::ArgStringList &CC1Args) const;
+                                     llvm::opt::ArgStringList &CC1Args,
+                                     Action::OffloadKind DeviceOffloadKind) const;
 
   /// \brief Add warning options that need to be passed to cc1 for this target.
   virtual void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to