arsenm updated this revision to Diff 252692.
arsenm added a comment.

Use -nogpulib instead of -nodefaultlibs


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59321/new/

https://reviews.llvm.org/D59321

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
  clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
  clang/test/Driver/Inputs/rocm-device-libs/lib/ockl.amdgcn.bc
  
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc
  
clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_correctly_rounded_sqrt_on.amdgcn.bc
  clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_off.amdgcn.bc
  clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_daz_opt_on.amdgcn.bc
  clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_off.amdgcn.bc
  clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_finite_only_on.amdgcn.bc
  clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_803.amdgcn.bc
  clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_isa_version_900.amdgcn.bc
  clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_off.amdgcn.bc
  clang/test/Driver/Inputs/rocm-device-libs/lib/oclc_unsafe_math_on.amdgcn.bc
  clang/test/Driver/Inputs/rocm-device-libs/lib/ocml.amdgcn.bc
  clang/test/Driver/Inputs/rocm-device-libs/lib/opencl.amdgcn.bc
  clang/test/Driver/amdgpu-visibility.cl
  clang/test/Driver/rocm-detect.cl
  clang/test/Driver/rocm-device-libs.cl
  clang/test/Driver/rocm-not-found.cl

Index: clang/test/Driver/rocm-not-found.cl
===================================================================
--- /dev/null
+++ clang/test/Driver/rocm-not-found.cl
@@ -0,0 +1,11 @@
+// REQUIRES: clang-driver
+
+// Check that we raise an error if we're trying to compile OpenCL for amdhsa code but can't
+// find a ROCm install, unless -nogpulib was passed.
+
+// RUN: %clang -### --sysroot=%s/no-rocm-there -target amdgcn--amdhsa %s 2>&1 | FileCheck %s --check-prefix ERR
+// RUN: %clang -### --rocm-path=%s/no-rocm-there -target amdgcn--amdhsa %s 2>&1 | FileCheck %s --check-prefix ERR
+// ERR: cannot find ROCm installation. Provide its path via --rocm-path, or pass -nogpulib.
+
+// RUN: %clang -### -nogpulib --rocm-path=%s/no-rocm-there %s 2>&1 | FileCheck %s --check-prefix OK
+// OK-NOT: cannot find ROCm installation.
Index: clang/test/Driver/rocm-device-libs.cl
===================================================================
--- /dev/null
+++ clang/test/Driver/rocm-device-libs.cl
@@ -0,0 +1,121 @@
+// REQUIRES: clang-driver
+// REQUIRES: amdgpu-registered-target
+
+// Test flush-denormals-to-zero enabled uses oclc_daz_opt_on
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN:   -x cl -mcpu=gfx900 \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX900-DEFAULT,GFX900 %s
+
+
+
+// Make sure the different denormal default is respected for gfx8
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN:   -x cl -mcpu=gfx803 \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803 %s
+
+
+
+// Make sure the non-canonical name works
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN:   -x cl -mcpu=fiji \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DEFAULT,GFX803-DEFAULT,GFX803 %s
+
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN:   -x cl -mcpu=gfx900 \
+// RUN:   -cl-denorms-are-zero \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX900 %s
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN:   -x cl -mcpu=gfx803 \
+// RUN:   -cl-denorms-are-zero \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-DAZ,GFX803 %s
+
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa \
+// RUN:   -x cl -mcpu=gfx803 \
+// RUN:   -cl-finite-math-only \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FINITE-ONLY,GFX803 %s
+
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa    \
+// RUN:   -x cl -mcpu=gfx803                     \
+// RUN:   -cl-fp32-correctly-rounded-divide-sqrt \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-CORRECT-SQRT,GFX803 %s
+
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa    \
+// RUN:   -x cl -mcpu=gfx803                     \
+// RUN:   -cl-fast-relaxed-math \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-FAST-RELAXED,GFX803 %s
+
+
+
+// RUN: %clang -### -target amdgcn-amd-amdhsa    \
+// RUN:   -x cl -mcpu=gfx803                     \
+// RUN:   -cl-unsafe-math-optimizations \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs \
+// RUN:   %S/opencl.cl \
+// RUN: 2>&1 | FileCheck -dump-input-on-failure --check-prefixes=COMMON,COMMON-UNSAFE,GFX803 %s
+
+
+
+// COMMON: "-triple" "amdgcn-amd-amdhsa"
+// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/ocml.amdgcn.bc"
+// COMMON-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/ockl.amdgcn.bc"
+
+// GFX900-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_off.amdgcn.bc"
+// GFX803-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_on.amdgcn.bc"
+// GFX700-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_on.amdgcn.bc"
+// COMMON-DAZ-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_daz_opt_on.amdgcn.bc"
+
+
+// COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_off.amdgcn.bc"
+// COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_off.amdgcn.bc"
+// COMMON-DEFAULT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc"
+
+
+// COMMON-FINITE-ONLY-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_off.amdgcn.bc"
+// COMMON-FINITE-ONLY-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_on.amdgcn.bc"
+// COMMON-FINITE-ONLY-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc"
+
+
+// COMMON-CORRECT-SQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_off.amdgcn.bc"
+// COMMON-CORRECT-SQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_off.amdgcn.bc"
+// COMMON-CORRECT-SQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_on.amdgcn.bc"
+
+
+// COMMON-FAST-RELAXED-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_on.amdgcn.bc"
+// COMMON-FAST-RELAXED-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_on.amdgcn.bc"
+// COMMON-FAST-RELAXED-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc"
+
+
+// COMMON-UNSAFE-MATH-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_unsafe_math_on.amdgcn.bc"
+// COMMON-UNSAFE-MATH-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_finite_only_off.amdgcn.bc"
+// COMMON-UNSAFE-MATH-SAME: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_correctly_rounded_sqrt_off.amdgcn.bc"
+
+
+// GFX900: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_isa_version_900.amdgcn.bc"
+// GFX803: "-mlink-builtin-bitcode" "{{.*}}/lib/oclc_isa_version_803.amdgcn.bc"
Index: clang/test/Driver/rocm-detect.cl
===================================================================
--- /dev/null
+++ clang/test/Driver/rocm-detect.cl
@@ -0,0 +1,21 @@
+// REQUIRES: clang-driver
+// REQUIRES: amdgpu-registered-target
+
+// Make sure the appropriate device specific library is available.
+
+// We don't include every target in the test directory, so just pick a valid
+// target not included in the test.
+
+// RUN: %clang -### -v -target amdgcn-amd-amdhsa -mcpu=gfx902 \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=COMMON,GFX902-DEFAULTLIBS %s
+
+
+// RUN: %clang -### -v -target amdgcn-amd-amdhsa -mcpu=gfx902 -nogpulib \
+// RUN:   --rocm-path=%S/Inputs/rocm-device-libs %s 2>&1 \
+// RUN:   | FileCheck -check-prefixes=COMMON,GFX902,NODEFAULTLIBS %s
+
+
+// GFX902-DEFAULTLIBS: error: cannot find device library for gfx902. Provide path to different ROCm installation via --rocm-path, or pass -nogpulib to build without linking default libraries.
+
+// NODEFAULTLIBS-NOT: error: cannot find
Index: clang/test/Driver/amdgpu-visibility.cl
===================================================================
--- clang/test/Driver/amdgpu-visibility.cl
+++ clang/test/Driver/amdgpu-visibility.cl
@@ -2,6 +2,10 @@
 // RUN: %clang -### -target amdgcn-amd-amdhsa -x cl -c -emit-llvm -fvisibility=protected  %s 2>&1 | FileCheck -check-prefix=OVERRIDE-PROTECTED  %s
 // RUN: %clang -### -target amdgcn-amd-amdhsa -x cl -c -emit-llvm -fvisibility-ms-compat  %s 2>&1 | FileCheck -check-prefix=OVERRIDE-MS  %s
 
+// RUN: %clang -### -target amdgcn-mesa-mesa3d -x cl -c -emit-llvm %s 2>&1 | FileCheck -check-prefix=DEFAULT %s
+// RUN: %clang -### -target amdgcn-mesa-mesa3d -x cl -c -emit-llvm -fvisibility=protected  %s 2>&1 | FileCheck -check-prefix=OVERRIDE-PROTECTED  %s
+// RUN: %clang -### -target amdgcn-mesa-mesa3d -x cl -c -emit-llvm -fvisibility-ms-compat  %s 2>&1 | FileCheck -check-prefix=OVERRIDE-MS  %s
+
 // DEFAULT-DAG: "-fvisibility" "hidden"
 // DEFAULT-DAG: "-fapply-global-visibility-to-externs"
 
Index: clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
+++ clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl
@@ -1,5 +1,5 @@
-// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s
-// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s
 
 // CHECK-DAG: ![[FILEVAR0:[0-9]+]] = distinct !DIGlobalVariable(name: "FileVar0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: false, isDefinition: true)
 // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FILEVAR0]], expr: !DIExpression())
Index: clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
===================================================================
--- clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
+++ clang/test/CodeGenOpenCL/amdgpu-debug-info-pointer-address-space.cl
@@ -1,5 +1,5 @@
-// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s
-// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa -mcpu=fiji -o - %s | FileCheck %s
+// RUN: %clang -cl-std=CL2.0 -emit-llvm -g -O0 -S -nogpulib -target amdgcn-amd-amdhsa-opencl -mcpu=fiji -o - %s | FileCheck %s
 
 // CHECK-DAG: ![[DWARF_ADDRESS_SPACE_NONE:[0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !{{[0-9]+}}, size: {{[0-9]+}})
 // CHECK-DAG: ![[DWARF_ADDRESS_SPACE_LOCAL:[0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !{{[0-9]+}}, size: {{[0-9]+}}, dwarfAddressSpace: 2)
Index: clang/lib/Driver/ToolChains/AMDGPU.h
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPU.h
+++ clang/lib/Driver/ToolChains/AMDGPU.h
@@ -13,10 +13,132 @@
 #include "clang/Driver/Options.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
+#include "llvm/ADT/SmallString.h"
+
 #include <map>
 
 namespace clang {
 namespace driver {
+
+/// A class to find a viable ROCM installation
+/// TODO: Generalize to handle libclc.
+class RocmInstallationDetector {
+private:
+  struct ConditionalLibrary {
+    SmallString<0> On;
+    SmallString<0> Off;
+
+    bool isValid() const {
+      return !On.empty() && !Off.empty();
+    }
+
+    StringRef get(bool Enabled) const {
+      assert(isValid());
+      return Enabled ? On : Off;
+    }
+  };
+
+  const Driver &D;
+  bool IsValid = false;
+  //RocmVersion Version = RocmVersion::UNKNOWN;
+  SmallString<0> InstallPath;
+  //SmallString<0> BinPath;
+  SmallString<0> LibPath;
+  SmallString<0> LibDevicePath;
+  SmallString<0> IncludePath;
+  llvm::StringMap<std::string> LibDeviceMap;
+
+  // Libraries that are always linked.
+  SmallString<0> OCML;
+  SmallString<0> OCKL;
+
+  // Libraries swapped based on compile flags.
+  ConditionalLibrary FiniteOnly;
+  ConditionalLibrary UnsafeMath;
+  ConditionalLibrary DenormalsAreZero;
+  ConditionalLibrary CorrectlyRoundedSqrt;
+
+  bool allGenericLibsValid() const {
+    return !OCML.empty() &&
+           !OCKL.empty() &&
+           FiniteOnly.isValid() &&
+           UnsafeMath.isValid() &&
+           DenormalsAreZero.isValid() &&
+           CorrectlyRoundedSqrt.isValid();
+  }
+
+  // CUDA architectures for which we have raised an error in
+  // CheckRocmVersionSupportsArch.
+  mutable llvm::SmallSet<CudaArch, 4> ArchsWithBadVersion;
+
+public:
+  RocmInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
+                           const llvm::opt::ArgList &Args);
+
+  void AddRocmIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                          llvm::opt::ArgStringList &CC1Args) const;
+
+  /// Emit an error if Version does not support the given Arch.
+  ///
+  /// If either Version or Arch is unknown, does not emit an error.  Emits at
+  /// most one error per Arch.
+  void CheckRocmVersionSupportsArch(CudaArch Arch) const;
+
+  /// Check whether we detected a valid Rocm install.
+  bool isValid() const { return IsValid; }
+  /// Print information about the detected CUDA installation.
+  void print(raw_ostream &OS) const;
+
+  /// Get the detected Rocm install's version.
+  //RocmVersion version() const { return Version; }
+
+  /// Get the detected Rocm installation path.
+  StringRef getInstallPath() const { return InstallPath; }
+
+  /// Get the detected path to Rocm's bin directory.
+  //StringRef getBinPath() const { return BinPath; }
+
+  /// Get the detected Rocm Include path.
+  StringRef getIncludePath() const { return IncludePath; }
+
+/// Get the detected Rocm library path.
+  StringRef getLibPath() const { return LibPath; }
+
+  /// Get the detected Rocm device library path.
+  StringRef getLibDevicePath() const { return LibDevicePath; }
+
+  StringRef getOCMLPath() const {
+    assert(!OCML.empty());
+    return OCML;
+  }
+
+  StringRef getOCKLPath() const {
+    assert(!OCKL.empty());
+    return OCKL;
+  }
+
+  StringRef getFiniteOnlyPath(bool Enabled) const {
+    return FiniteOnly.get(Enabled);
+  }
+
+  StringRef getUnsafeMathPath(bool Enabled) const {
+    return UnsafeMath.get(Enabled);
+  }
+
+  StringRef getDenormalsAreZeroPath(bool Enabled) const {
+    return DenormalsAreZero.get(Enabled);
+  }
+
+  StringRef getCorrectlyRoundedSqrtPath(bool Enabled) const {
+    return CorrectlyRoundedSqrt.get(Enabled);
+  }
+
+  /// Get libdevice file for given architecture
+  std::string getLibDeviceFile(StringRef Gpu) const {
+    return LibDeviceMap.lookup(Gpu);
+  }
+};
+
 namespace tools {
 namespace amdgpu {
 
@@ -40,11 +162,9 @@
 namespace toolchains {
 
 class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
-
-private:
+protected:
   const std::map<options::ID, const StringRef> OptionsDefault;
 
-protected:
   Tool *buildLinker() const override;
   const StringRef getOptionDefault(options::ID OptID) const {
     auto opt = OptionsDefault.find(OptID);
@@ -73,6 +193,20 @@
       const llvm::fltSemantics *FPType = nullptr) const override;
 };
 
+class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {
+private:
+  RocmInstallationDetector RocmInstallation;
+
+public:
+  ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
+                const llvm::opt::ArgList &Args);
+  void addClangTargetOptions(
+    const llvm::opt::ArgList &DriverArgs,
+    llvm::opt::ArgStringList &CC1Args,
+    Action::OffloadKind DeviceOffloadKind) const override;
+};
+
+
 } // end namespace toolchains
 } // end namespace driver
 } // end namespace clang
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -12,7 +12,9 @@
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "llvm/Option/ArgList.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/TargetParser.h"
+#include "llvm/Support/VirtualFileSystem.h"
 
 using namespace clang::driver;
 using namespace clang::driver::tools;
@@ -20,6 +22,153 @@
 using namespace clang;
 using namespace llvm::opt;
 
+RocmInstallationDetector::RocmInstallationDetector(
+    const Driver &D, const llvm::Triple &HostTriple,
+    const llvm::opt::ArgList &Args)
+    : D(D) {
+  struct Candidate {
+    std::string Path;
+    bool StrictChecking;
+
+    Candidate(std::string Path, bool StrictChecking = false)
+        : Path(Path), StrictChecking(StrictChecking) {}
+  };
+
+  SmallVector<Candidate, 4> Candidates;
+
+  if (Args.hasArg(clang::driver::options::OPT_rocm_path_EQ)) {
+    Candidates.emplace_back(
+        Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ).str());
+  } else {
+    // Try to find relative to the compiler binary.
+    const char *InstallDir = D.getInstalledDir();
+
+    // Check both a normal Unix prefix position of the clang binary, as well as
+    // the Windows-esque layout the ROCm packages use with the host architecture
+    // subdirectory of bin.
+
+    StringRef ParentDir = llvm::sys::path::parent_path(InstallDir);
+    if (ParentDir == HostTriple.getArchName())
+      ParentDir = llvm::sys::path::parent_path(ParentDir);
+
+    if (ParentDir == "bin") {
+      Candidates.emplace_back(llvm::sys::path::parent_path(ParentDir).str(),
+                              /*StrictChecking=*/true);
+    }
+
+    Candidates.emplace_back(D.SysRoot + "/opt/rocm");
+  }
+
+  bool NoBuiltinLibs = Args.hasArg(options::OPT_nogpulib);
+
+  for (const auto &Candidate : Candidates) {
+    InstallPath = Candidate.Path;
+    if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
+      continue;
+
+    // FIXME: The install path situation is a real mess.
+
+    // For a cmake install, these are placed directly in
+    // ${INSTALL_PREFIX}/lib
+
+    // In the separate OpenCL builds, the bitcode libraries are placed in
+    // ${OPENCL_ROOT}/lib/x86_64/bitcode/*
+
+    // For the rocm installed packages, these are placed at
+    // /opt/rocm/opencl/lib/x86_64/bitcode
+
+    // An additional copy is installed, in scattered locations between
+    // /opt/rocm/hcc/rocdl/oclc
+    // /opt/rocm/hcc/rocdl/ockl
+    // /opt/rocm/hcc/rocdl/lib
+    //
+    // Yet another complete set is installed to
+    // /opt/rocm/hcc/rocdl/lib
+
+    // For now just recognize the opencl package layout.
+
+    // BinPath = InstallPath + "/bin";
+    llvm::sys::path::append(IncludePath, InstallPath, "include");
+    llvm::sys::path::append(LibDevicePath, InstallPath, "lib");
+
+    auto &FS = D.getVFS();
+
+    // We don't need the include path for OpenCL, since clang already ships with
+    // the default header.
+
+    bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking);
+    if (CheckLibDevice && !FS.exists(LibDevicePath))
+      continue;
+
+    const StringRef Suffix(".amdgcn.bc");
+
+    std::error_code EC;
+    for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
+         !EC && LI != LE; LI = LI.increment(EC)) {
+      StringRef FilePath = LI->path();
+      StringRef FileName = llvm::sys::path::filename(FilePath);
+      if (!FileName.endswith(Suffix))
+        continue;
+
+      StringRef BaseName = FileName.drop_back(Suffix.size());
+
+      if (BaseName == "ocml") {
+        OCML = FilePath;
+      } else if (BaseName == "ockl") {
+        OCKL = FilePath;
+      } else if (BaseName == "oclc_finite_only_off") {
+        FiniteOnly.Off = FilePath;
+      } else if (BaseName == "oclc_finite_only_on") {
+        FiniteOnly.On = FilePath;
+      } else if (BaseName == "oclc_daz_opt_on") {
+        DenormalsAreZero.On = FilePath;
+      } else if (BaseName == "oclc_daz_opt_off") {
+        DenormalsAreZero.Off = FilePath;
+      } else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
+        CorrectlyRoundedSqrt.On = FilePath;
+      } else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
+        CorrectlyRoundedSqrt.Off = FilePath;
+      } else if (BaseName == "oclc_unsafe_math_on") {
+        UnsafeMath.On = FilePath;
+      } else if (BaseName == "oclc_unsafe_math_off") {
+        UnsafeMath.Off = FilePath;
+      } else {
+        // Process all bitcode filenames that look like
+        // ocl_isa_version_XXX.amdgcn.bc
+        const StringRef DeviceLibPrefix = "oclc_isa_version_";
+        if (!BaseName.startswith(DeviceLibPrefix))
+          continue;
+
+        StringRef IsaVersionNumber = BaseName.drop_front(DeviceLibPrefix.size());
+
+        llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
+        SmallString<8> Tmp;
+        LibDeviceMap.insert(std::make_pair(GfxName.toStringRef(Tmp),
+                                           FilePath.str()));
+      }
+    }
+
+    if (!NoBuiltinLibs) {
+      // Check that the required non-target libraries are all available.
+      if (!allGenericLibsValid())
+        continue;
+
+      // Check that we have found at least one libdevice that we can link in if
+      // -nobuiltinlib hasn't been specified.
+      if (LibDeviceMap.empty())
+        continue;
+    }
+
+    IsValid = true;
+    break;
+  }
+}
+
+void RocmInstallationDetector::print(raw_ostream &OS) const {
+  if (isValid())
+    OS << "Found ROCm installation: " << InstallPath << '\n';
+}
+
 void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                   const InputInfo &Output,
                                   const InputInfoList &Inputs,
@@ -138,6 +287,12 @@
                llvm::DenormalMode::getIEEE();
 }
 
+/// ROCM Toolchain
+ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
+                             const ArgList &Args)
+  : AMDGPUToolChain(D, Triple, Args),
+    RocmInstallation(D, Triple, Args) { }
+
 void AMDGPUToolChain::addClangTargetOptions(
     const llvm::opt::ArgList &DriverArgs,
     llvm::opt::ArgStringList &CC1Args,
@@ -151,3 +306,75 @@
     CC1Args.push_back("-fapply-global-visibility-to-externs");
   }
 }
+
+void ROCMToolChain::addClangTargetOptions(
+    const llvm::opt::ArgList &DriverArgs,
+    llvm::opt::ArgStringList &CC1Args,
+    Action::OffloadKind DeviceOffloadingKind) const {
+  AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
+
+  if (DriverArgs.hasArg(options::OPT_nogpulib))
+    return;
+
+  if (!RocmInstallation.isValid()) {
+    getDriver().Diag(diag::err_drv_no_rocm_installation);
+    return;
+  }
+
+  // Get the device name and canonicalize it
+  const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
+  auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
+  const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
+
+  std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
+  if (LibDeviceFile.empty()) {
+    getDriver().Diag(diag::err_drv_no_rocm_device_lib) << GpuArch;
+    return;
+  }
+
+  const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
+  const bool DefaultDenormsAreZeroForTarget =
+    (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
+    (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
+
+  static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode";
+
+  // TODO: There are way too many flags that change this. Do we need to check
+  // them all?
+  bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
+             !DefaultDenormsAreZeroForTarget;
+
+  bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
+
+  bool UnsafeMathOpt = DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
+  bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
+  bool CorrectSqrt = DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
+
+  CC1Args.push_back(LinkBitcodeFlag);
+  CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOCMLPath()));
+
+  CC1Args.push_back(LinkBitcodeFlag);
+  CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOCKLPath()));
+
+  CC1Args.push_back(LinkBitcodeFlag);
+  CC1Args.push_back(
+    DriverArgs.MakeArgString(RocmInstallation.getDenormalsAreZeroPath(DAZ)));
+
+  CC1Args.push_back(LinkBitcodeFlag);
+  CC1Args.push_back(
+    DriverArgs.MakeArgString(
+      RocmInstallation.getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)));
+
+  CC1Args.push_back(LinkBitcodeFlag);
+  CC1Args.push_back(
+    DriverArgs.MakeArgString(
+      RocmInstallation.getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)));
+
+  CC1Args.push_back(LinkBitcodeFlag);
+  CC1Args.push_back(
+    DriverArgs.MakeArgString(
+      RocmInstallation.getCorrectlyRoundedSqrtPath(CorrectSqrt)));
+
+  CC1Args.push_back(LinkBitcodeFlag);
+  CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
+}
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4854,6 +4854,8 @@
       TC = std::make_unique<toolchains::Solaris>(*this, Target, Args);
       break;
     case llvm::Triple::AMDHSA:
+      TC = std::make_unique<toolchains::ROCMToolChain>(*this, Target, Args);
+      break;
     case llvm::Triple::AMDPAL:
     case llvm::Triple::Mesa3D:
       TC = std::make_unique<toolchains::AMDGPUToolChain>(*this, Target, Args);
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -604,6 +604,8 @@
 def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>,
   HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">;
 def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">;
+def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group<Link_Group>,
+  HelpText<"ROCm installation path">;
 def hip_device_lib_path_EQ : Joined<["--"], "hip-device-lib-path=">, Group<Link_Group>,
   HelpText<"HIP device library path">;
 def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group<Link_Group>,
Index: clang/include/clang/Basic/DiagnosticDriverKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -55,6 +55,14 @@
 def err_drv_no_cuda_libdevice : Error<
   "cannot find libdevice for %0. Provide path to different CUDA installation "
   "via --cuda-path, or pass -nocudalib to build without linking with libdevice.">;
+
+def err_drv_no_rocm_installation : Error<
+  "cannot find ROCm installation.  Provide its path via --rocm-path, or pass "
+  "-nogpulib.">;
+def err_drv_no_rocm_device_lib : Error<
+  "cannot find device library for %0. Provide path to different ROCm installation "
+  "via --rocm-path, or pass -nogpulib to build without linking default libraries.">;
+
 def err_drv_cuda_version_unsupported : Error<
   "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), "
   "but installation at %3 is %4.  Use --cuda-path to specify a different CUDA "
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to