jhuber6 created this revision.
jhuber6 added reviewers: JonChesterfield, tianshilei1992, jdoerfert, 
ggeorgakoudis.
Herald added subscribers: asavonic, dang, kerbowa, guansong, yaxunl, jvesely.
Herald added a project: All.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1.
Herald added a project: clang.

This patch adds support for linking in the OpenMP math wrappers library.
The math library first replaces all math calls with an OpenMP wrapper
call. This wrapper call is linked early with a library that associates
the wrapper call to the original math function. This is necessary to
have access to the math function symbols without including the math
header which includes incompatible code with the GPU. These wrapper
functions have another library which maps them to the device library
version late when doing LTO. Linking libdevice must be done after this
library and done late when doing LTO. Unfortunately doing LTO with
libdevice is very slow right now.

Depends on D121467 <https://reviews.llvm.org/D121467>


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D121468

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/CommonArgs.cpp
  clang/lib/Driver/ToolChains/CommonArgs.h
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/test/Driver/Inputs/libomptarget/libomptarget-nvptx-math.bc
  clang/test/Driver/Inputs/libomptarget/libomptarget-nvptx-wrapper.bc
  clang/test/Driver/openmp-offload-gpu.c

Index: clang/test/Driver/openmp-offload-gpu.c
===================================================================
--- clang/test/Driver/openmp-offload-gpu.c
+++ clang/test/Driver/openmp-offload-gpu.c
@@ -346,3 +346,13 @@
 // RUN:   | FileCheck -check-prefix=NEW_DRIVER_EMBEDDING %s
 
 // NEW_DRIVER_EMBEDDING: -fembed-offload-object=[[CUBIN:.*\.cubin]],nvptx64-nvidia-cuda.sm_70
+
+// RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target=nvptx64-nvida-cuda -march=sm_70 \
+// RUN:          --libomptarget-nvptx-wrapper-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-wrapper.bc \
+// RUN:          --libomptarget-nvptx-math-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-math.bc \
+// RUN:          --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-new-nvptx-test.bc \
+// RUN:          -foffload-lto -fopenmp-device-libm -fopenmp-new-driver -no-canonical-prefixes %s -o openmp-offload-gpu 2>&1 \
+// RUN:   | FileCheck -check-prefix=DEVICE_LIBM %s
+
+// DEVICE_LIBM: clang{{.*}}"-mlink-builtin-bitcode" "{{.*}}/Inputs/libomptarget/libomptarget-nvptx-wrapper.bc"
+// DEVICE_LIBM: clang-linker-wrapper{{.*}}"-target-library=nvptx64-nvidia-cuda-sm_70={{.*}}/Inputs/libomptarget/libomptarget-new-nvptx-test.bc" "-target-library=nvptx64-nvidia-cuda-sm_70={{.*}}/Inputs/libomptarget/libomptarget-nvptx-math.bc"
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -696,9 +696,6 @@
     return;
   }
 
-  CC1Args.push_back("-mlink-builtin-bitcode");
-  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
-
   clang::CudaVersion CudaInstallationVersion = CudaInstallation.version();
 
   // New CUDA versions often introduce new instructions that are only supported
@@ -744,12 +741,19 @@
       return;
     }
 
+    if (DriverArgs.hasArg(options::OPT_fopenmp_device_libm))
+      addOpenMPMathRTL(getDriver(), DriverArgs, CC1Args, getTriple());
+
     // Link the bitcode library late if we're using device LTO.
     if (getDriver().isUsingLTO(/* IsOffload */ true))
       return;
 
     addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, GpuArch.str(),
                        getTriple());
+
+    CC1Args.push_back("-mlink-builtin-bitcode");
+    CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
+
     AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx",
                                    GpuArch, /*isBitCodeSDL=*/true,
                                    /*postClangLink=*/true);
Index: clang/lib/Driver/ToolChains/CommonArgs.h
===================================================================
--- clang/lib/Driver/ToolChains/CommonArgs.h
+++ clang/lib/Driver/ToolChains/CommonArgs.h
@@ -189,6 +189,10 @@
                             llvm::opt::ArgStringList &CmdArgs,
                             const llvm::Triple &Triple, bool IsLTO);
 
+void addOpenMPMathRTL(const Driver &D, const llvm::opt::ArgList &DriverArgs,
+                      llvm::opt::ArgStringList &CC1Args,
+                      const llvm::Triple &Triple, bool IncludeLibm = false);
+
 void addOpenMPDeviceRTL(const Driver &D, const llvm::opt::ArgList &DriverArgs,
                         llvm::opt::ArgStringList &CC1Args,
                         StringRef BitcodeSuffix, const llvm::Triple &Triple);
Index: clang/lib/Driver/ToolChains/CommonArgs.cpp
===================================================================
--- clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1990,6 +1990,94 @@
   }
 }
 
+static void
+addBitcodeLibrary(const Driver &D, const llvm::opt::ArgList &DriverArgs,
+                  llvm::opt::ArgStringList &CC1Args, const llvm::Triple &Triple,
+                  ArrayRef<StringRef> LibraryPaths,
+                  StringRef BitcodeLibraryName, OptSpecifier BCPathOpt) {
+  // First check whether user specified the bc library.
+  if (const Arg *A = DriverArgs.getLastArg(BCPathOpt)) {
+    SmallString<128> LibOmpTargetFile(A->getValue());
+    if (llvm::sys::fs::exists(LibOmpTargetFile) &&
+        llvm::sys::fs::is_directory(LibOmpTargetFile)) {
+      llvm::sys::path::append(LibOmpTargetFile, BitcodeLibraryName);
+    }
+
+    if (llvm::sys::fs::exists(LibOmpTargetFile)) {
+      CC1Args.push_back("-mlink-builtin-bitcode");
+      CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
+    } else {
+      D.Diag(diag::err_drv_omp_offload_target_bcruntime_not_found)
+          << LibOmpTargetFile;
+    }
+    return;
+  }
+
+  bool FoundBCLibrary = false;
+
+  for (StringRef LibraryPath : LibraryPaths) {
+    SmallString<128> LibOmpTargetFile(LibraryPath);
+    llvm::sys::path::append(LibOmpTargetFile, BitcodeLibraryName);
+    if (llvm::sys::fs::exists(LibOmpTargetFile)) {
+      CC1Args.push_back("-mlink-builtin-bitcode");
+      CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
+      FoundBCLibrary = true;
+      break;
+    }
+  }
+
+  if (!FoundBCLibrary)
+    D.Diag(diag::err_drv_omp_offload_target_missingbcruntime)
+        << BitcodeLibraryName << (Triple.isAMDGCN() ? "amdgpu" : "nvptx");
+}
+
+void tools::addOpenMPMathRTL(const Driver &D,
+                             const llvm::opt::ArgList &DriverArgs,
+                             llvm::opt::ArgStringList &CC1Args,
+                             const llvm::Triple &Triple, bool IncludeLibm) {
+  SmallVector<StringRef, 8> LibraryPaths;
+
+  // Add path to clang lib / lib64 folder.
+  SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(D.Dir);
+  llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
+  LibraryPaths.emplace_back(DefaultLibPath.c_str());
+
+  // Add user defined library paths from LIBRARY_PATH.
+  llvm::Optional<std::string> LibPath =
+      llvm::sys::Process::GetEnv("LIBRARY_PATH");
+  if (LibPath) {
+    SmallVector<StringRef, 8> Frags;
+    const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
+    llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
+    for (StringRef Path : Frags)
+      LibraryPaths.emplace_back(Path.trim());
+  }
+
+  StringRef ArchPrefix = Triple.isAMDGCN() ? "amdgpu" : "nvptx";
+
+  OptSpecifier MathWrapperBCPathOpt =
+      Triple.isAMDGCN() ? options::OPT_libomptarget_amdgpu_wrapper_bc_path_EQ
+                        : options::OPT_libomptarget_nvptx_wrapper_bc_path_EQ;
+  std::string MathWrapperName =
+      ("libomptarget-" + ArchPrefix + "-math-wrappers.bc").str();
+
+  addBitcodeLibrary(D, DriverArgs, CC1Args, Triple, LibraryPaths,
+                    MathWrapperName, MathWrapperBCPathOpt);
+
+  // If we are doing LTO only link the OpenMP math wrappers.
+  if (D.isUsingLTO(/* IsOffload */ true) && !IncludeLibm)
+    return;
+
+  OptSpecifier MathBCPathOpt =
+      Triple.isAMDGCN() ? options::OPT_libomptarget_amdgpu_math_bc_path_EQ
+                        : options::OPT_libomptarget_nvptx_math_bc_path_EQ;
+  std::string DeviceMathName =
+      ("libomptarget-" + ArchPrefix + "-libm.bc").str();
+
+  addBitcodeLibrary(D, DriverArgs, CC1Args, Triple, LibraryPaths,
+                    DeviceMathName, MathBCPathOpt);
+}
+
 void tools::addOpenMPDeviceRTL(const Driver &D,
                                const llvm::opt::ArgList &DriverArgs,
                                llvm::opt::ArgStringList &CC1Args,
@@ -2021,37 +2109,6 @@
   std::string LibOmpTargetName =
       ("libomptarget-" + ArchPrefix + "-" + BitcodeSuffix + ".bc").str();
 
-  // First check whether user specifies bc library
-  if (const Arg *A = DriverArgs.getLastArg(LibomptargetBCPathOpt)) {
-    SmallString<128> LibOmpTargetFile(A->getValue());
-    if (llvm::sys::fs::exists(LibOmpTargetFile) &&
-        llvm::sys::fs::is_directory(LibOmpTargetFile)) {
-      llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
-    }
-
-    if (llvm::sys::fs::exists(LibOmpTargetFile)) {
-      CC1Args.push_back("-mlink-builtin-bitcode");
-      CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
-    } else {
-      D.Diag(diag::err_drv_omp_offload_target_bcruntime_not_found)
-          << LibOmpTargetFile;
-    }
-  } else {
-    bool FoundBCLibrary = false;
-
-    for (StringRef LibraryPath : LibraryPaths) {
-      SmallString<128> LibOmpTargetFile(LibraryPath);
-      llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
-      if (llvm::sys::fs::exists(LibOmpTargetFile)) {
-        CC1Args.push_back("-mlink-builtin-bitcode");
-        CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
-        FoundBCLibrary = true;
-        break;
-      }
-    }
-
-    if (!FoundBCLibrary)
-      D.Diag(diag::err_drv_omp_offload_target_missingbcruntime)
-          << LibOmpTargetName << ArchPrefix;
-  }
+  addBitcodeLibrary(D, DriverArgs, CC1Args, Triple, LibraryPaths,
+                    LibOmpTargetName, LibomptargetBCPathOpt);
 }
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -8244,7 +8244,13 @@
       if (CudaInstallation.isValid())
         CmdArgs.push_back(Args.MakeArgString(
             "--cuda-path=" + CudaInstallation.getInstallPath()));
-      break;
+
+      if (D.isUsingLTO(/* IsOffload */ true)) {
+        const ArgList &TCArgs =
+            C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
+        StringRef Arch = TCArgs.getLastArgValue(options::OPT_march_EQ);
+        std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(Arch);
+      }
     }
   }
 
@@ -8305,6 +8311,21 @@
         CmdArgs.push_back(
             Args.MakeArgString("-target-library=" + TC->getTripleString() +
                                "-" + Arch + "=" + BitcodeLibrary.back()));
+
+      ArgStringList MathLibrary;
+      addOpenMPMathRTL(TCDriver, TCArgs, MathLibrary, TC->getTriple(), true);
+
+      if (!MathLibrary.empty())
+        CmdArgs.push_back(
+            Args.MakeArgString("-target-library=" + TC->getTripleString() +
+                               "-" + Arch + "=" + MathLibrary.back()));
+
+      CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
+      std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(Arch);
+      if (!LibDeviceFile.empty())
+        CmdArgs.push_back(
+            Args.MakeArgString("-target-library=" + TC->getTripleString() +
+                               "-" + Arch + "=" + LibDeviceFile));
     }
 
     // Pass in the optimization level to use for LTO.
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -285,6 +285,9 @@
   if (DriverArgs.hasArg(options::OPT_nogpulib))
     return;
 
+  if (DriverArgs.hasArg(options::OPT_fopenmp_device_libm))
+    addOpenMPMathRTL(getDriver(), DriverArgs, CC1Args, getTriple());
+
   // Link the bitcode library late if we're using device LTO.
   if (getDriver().isUsingLTO(/* IsOffload */ true))
     return;
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -1046,6 +1046,14 @@
   HelpText<"Path to libomptarget-amdgcn bitcode library">, Alias<libomptarget_amdgpu_bc_path_EQ>;
 def libomptarget_nvptx_bc_path_EQ : Joined<["--"], "libomptarget-nvptx-bc-path=">, Group<i_Group>,
   HelpText<"Path to libomptarget-nvptx bitcode library">;
+def libomptarget_amdgpu_wrapper_bc_path_EQ : Joined<["--"], "libomptarget-amdgpu-wrapper-bc-path=">,
+  Group<i_Group>, HelpText<"Path to libomptarget-amdgpu math wrapper bitcode library">;
+def libomptarget_nvptx_wrapper_bc_path_EQ : Joined<["--"], "libomptarget-nvptx-wrapper-bc-path=">,
+  Group<i_Group>, HelpText<"Path to libomptarget-nvptx math wrapper bitcode library">;
+def libomptarget_amdgpu_math_bc_path_EQ : Joined<["--"], "libomptarget-amdgpu-math-bc-path=">,
+  Group<i_Group>, HelpText<"Path to libomptarget-amdgpu math bitcode library">;
+def libomptarget_nvptx_math_bc_path_EQ : Joined<["--"], "libomptarget-nvptx-math-bc-path=">,
+  Group<i_Group>, HelpText<"Path to libomptarget-nvptx math bitcode library">;
 def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>,
   HelpText<"Print macro definitions in -E mode in addition to normal output">;
 def dI : Flag<["-"], "dI">, Group<d_Group>, Flags<[CC1Option]>,
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to