[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)

2024-01-30 Thread Joseph Huber via cfe-commits

https://github.com/jhuber6 closed 
https://github.com/llvm/llvm-project/pull/79873
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)

2024-01-30 Thread Joseph Huber via cfe-commits

https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/79873

>From 35e12c3d83f3be93618805ffaf05e3424689f32f Mon Sep 17 00:00:00 2001
From: Joseph Huber 
Date: Mon, 29 Jan 2024 11:08:04 -0600
Subject: [PATCH 1/3] [NVPTX] Allow compiling LLVM-IR without `-march` set

Summary:
The NVPTX tools require an architecture to be used, however if we are
creating generic LLVM-IR we should be able to leave it unspecified. This
will result in the `target-cpu` attributes not being set on the
functions so it can be changed when linked into code. This allows the
standalone `--target=nvptx64-nvidia-cuda` toolchain to create LLVM-IR
simmilar to how CUDA's deviceRTL looks from C/C++
---
 .../clang/Basic/DiagnosticDriverKinds.td  |  2 ++
 clang/lib/Basic/Targets/NVPTX.cpp |  7 +-
 clang/lib/Basic/Targets/NVPTX.h   |  3 ++-
 clang/lib/Driver/ToolChains/Cuda.cpp  | 19 ++-
 clang/test/Driver/cuda-cross-compiling.c  | 24 +++
 .../Preprocessor/predefined-arch-macros.c | 12 ++
 6 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td 
b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 094fe19509412..476528375fb88 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -57,6 +57,8 @@ def warn_drv_avr_stdlib_not_linked: Warning<
   InGroup;
 def err_drv_cuda_bad_gpu_arch : Error<"unsupported CUDA gpu architecture: %0">;
 def err_drv_offload_bad_gpu_arch : Error<"unsupported %0 gpu architecture: 
%1">;
+def err_drv_offload_missing_gpu_arch : Error<
+  "Must pass in an explicit %0 gpu architecture to '%1'">;
 def err_drv_no_cuda_installation : Error<
   "cannot find CUDA installation; provide its path via '--cuda-path', or pass "
   "'-nocudainc' to build without CUDA includes">;
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp 
b/clang/lib/Basic/Targets/NVPTX.cpp
index 0b9d97f69d146..7687e3faad770 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -59,7 +59,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
   // Define available target features
   // These must be defined in sorted order!
   NoAsmVariants = true;
-  GPU = CudaArch::SM_20;
+  GPU = CudaArch::UNUSED;
 
   if (TargetPointerWidth == 32)
 resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
@@ -169,6 +169,11 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions 
&Opts,
MacroBuilder &Builder) const {
   Builder.defineMacro("__PTX__");
   Builder.defineMacro("__NVPTX__");
+
+  // Skip setting architecture dependent macros if undefined.
+  if (GPU == CudaArch::UNUSED && !HostTarget)
+return;
+
   if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
 // Set __CUDA_ARCH__ for the GPU specified.
 std::string CUDAArchCode = [this] {
diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h
index 20d76b702a942..f476d49047c01 100644
--- a/clang/lib/Basic/Targets/NVPTX.h
+++ b/clang/lib/Basic/Targets/NVPTX.h
@@ -79,7 +79,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public 
TargetInfo {
   initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags,
  StringRef CPU,
  const std::vector &FeaturesVec) const override {
-Features[CudaArchToString(GPU)] = true;
+if (GPU != CudaArch::UNUSED)
+  Features[CudaArchToString(GPU)] = true;
 Features["ptx" + std::to_string(PTXVersion)] = true;
 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
   }
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp 
b/clang/lib/Driver/ToolChains/Cuda.cpp
index 8a9d0caaccf30..ca54d2d55426b 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -389,7 +389,11 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const 
JobAction &JA,
 GPUArchName = JA.getOffloadingArch();
   } else {
 GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
-assert(!GPUArchName.empty() && "Must have an architecture passed in.");
+if (GPUArchName.empty()) {
+  C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch)
+  << getToolChain().getArchName() << getShortName();
+  return;
+}
   }
 
   // Obtain architecture from the action.
@@ -593,7 +597,11 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 CmdArgs.push_back("-v");
 
   StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ);
-  assert(!GPUArch.empty() && "At least one GPU Arch required for nvlink.");
+  if (GPUArch.empty()) {
+C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch)
+<< getToolChain().getArchName() << getShortName();
+return;
+  }
 
   CmdArgs.push_back("-arch");
   CmdArgs.push_back(Args.MakeArgString(GPUArch));
@@ -726,9 +734,8 @@ N

[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)

2024-01-30 Thread Joseph Huber via cfe-commits

https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/79873

>From 35e12c3d83f3be93618805ffaf05e3424689f32f Mon Sep 17 00:00:00 2001
From: Joseph Huber 
Date: Mon, 29 Jan 2024 11:08:04 -0600
Subject: [PATCH 1/2] [NVPTX] Allow compiling LLVM-IR without `-march` set

Summary:
The NVPTX tools require an architecture to be used, however if we are
creating generic LLVM-IR we should be able to leave it unspecified. This
will result in the `target-cpu` attributes not being set on the
functions so it can be changed when linked into code. This allows the
standalone `--target=nvptx64-nvidia-cuda` toolchain to create LLVM-IR
simmilar to how CUDA's deviceRTL looks from C/C++
---
 .../clang/Basic/DiagnosticDriverKinds.td  |  2 ++
 clang/lib/Basic/Targets/NVPTX.cpp |  7 +-
 clang/lib/Basic/Targets/NVPTX.h   |  3 ++-
 clang/lib/Driver/ToolChains/Cuda.cpp  | 19 ++-
 clang/test/Driver/cuda-cross-compiling.c  | 24 +++
 .../Preprocessor/predefined-arch-macros.c | 12 ++
 6 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td 
b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 094fe19509412..476528375fb88 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -57,6 +57,8 @@ def warn_drv_avr_stdlib_not_linked: Warning<
   InGroup;
 def err_drv_cuda_bad_gpu_arch : Error<"unsupported CUDA gpu architecture: %0">;
 def err_drv_offload_bad_gpu_arch : Error<"unsupported %0 gpu architecture: 
%1">;
+def err_drv_offload_missing_gpu_arch : Error<
+  "Must pass in an explicit %0 gpu architecture to '%1'">;
 def err_drv_no_cuda_installation : Error<
   "cannot find CUDA installation; provide its path via '--cuda-path', or pass "
   "'-nocudainc' to build without CUDA includes">;
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp 
b/clang/lib/Basic/Targets/NVPTX.cpp
index 0b9d97f69d146..7687e3faad770 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -59,7 +59,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
   // Define available target features
   // These must be defined in sorted order!
   NoAsmVariants = true;
-  GPU = CudaArch::SM_20;
+  GPU = CudaArch::UNUSED;
 
   if (TargetPointerWidth == 32)
 resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
@@ -169,6 +169,11 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions 
&Opts,
MacroBuilder &Builder) const {
   Builder.defineMacro("__PTX__");
   Builder.defineMacro("__NVPTX__");
+
+  // Skip setting architecture dependent macros if undefined.
+  if (GPU == CudaArch::UNUSED && !HostTarget)
+return;
+
   if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
 // Set __CUDA_ARCH__ for the GPU specified.
 std::string CUDAArchCode = [this] {
diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h
index 20d76b702a942..f476d49047c01 100644
--- a/clang/lib/Basic/Targets/NVPTX.h
+++ b/clang/lib/Basic/Targets/NVPTX.h
@@ -79,7 +79,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public 
TargetInfo {
   initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags,
  StringRef CPU,
  const std::vector &FeaturesVec) const override {
-Features[CudaArchToString(GPU)] = true;
+if (GPU != CudaArch::UNUSED)
+  Features[CudaArchToString(GPU)] = true;
 Features["ptx" + std::to_string(PTXVersion)] = true;
 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
   }
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp 
b/clang/lib/Driver/ToolChains/Cuda.cpp
index 8a9d0caaccf30..ca54d2d55426b 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -389,7 +389,11 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const 
JobAction &JA,
 GPUArchName = JA.getOffloadingArch();
   } else {
 GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
-assert(!GPUArchName.empty() && "Must have an architecture passed in.");
+if (GPUArchName.empty()) {
+  C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch)
+  << getToolChain().getArchName() << getShortName();
+  return;
+}
   }
 
   // Obtain architecture from the action.
@@ -593,7 +597,11 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 CmdArgs.push_back("-v");
 
   StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ);
-  assert(!GPUArch.empty() && "At least one GPU Arch required for nvlink.");
+  if (GPUArch.empty()) {
+C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch)
+<< getToolChain().getArchName() << getShortName();
+return;
+  }
 
   CmdArgs.push_back("-arch");
   CmdArgs.push_back(Args.MakeArgString(GPUArch));
@@ -726,9 +734,8 @@ N

[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)

2024-01-30 Thread Artem Belevich via cfe-commits

https://github.com/Artem-B approved this pull request.


https://github.com/llvm/llvm-project/pull/79873
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)

2024-01-30 Thread Artem Belevich via cfe-commits

Artem-B wrote:

Considering that it's for the stand-alone compilation only, I'm not going to 
block this patch.
That said, please add a `TODO` somewhere to address an issue w/ explicitly 
targeting generic variant.
 

https://github.com/llvm/llvm-project/pull/79873
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)

2024-01-30 Thread Joseph Huber via cfe-commits

jhuber6 wrote:

> > Right now if you specify target-cpu you get target-cpu attributes, which is 
> > what we don't want.
> 
> I'm fine handling 'generic' in a special way under the hood and not 
> specifying target-CPU.
> 
> My concern is about user-facing interface. Command line options must be 
> overridable. For the CPU I would be able to specify the variant that matches 
> the default. For GPU I'll have no way to explicitly pick 'generic' as the 
> target. I think this is important.

I can add `generic` for overloading purposes, but I'd prefer if leaving it off 
implied generic as well, at least for this standalone target version. This is a 
little divergent from standard CPU targets because GPUs have no backward 
compatibility, so something being "generic" requires a bit more work.

https://github.com/llvm/llvm-project/pull/79873
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)

2024-01-29 Thread Artem Belevich via cfe-commits

Artem-B wrote:

> Right now if you specify target-cpu you get target-cpu attributes, which is 
> what we don't want. 

I'm fine handling 'generic' in a special way under the hood and not specifying 
target-CPU.

My concern is about user-facing interface. Command line options must be 
overridable. 
For the CPU I would be able to specify the variant that matches the default.
For GPU I'll have no way to explicitly pick 'generic' as the target. I think 
this is important.




https://github.com/llvm/llvm-project/pull/79873
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)

2024-01-29 Thread Joseph Huber via cfe-commits

jhuber6 wrote:

> > I think there's some precedent from both vendors to treat missing 
> > attributes as a more generic target.
> 
> It sounds more like a bug than a feature to me.
> 
> The major difference between "you get sm_xx by default" and this "you get 
> generic by default" is that With specific sm_XX, I can override it both ways 
> -- I wan enable/disable it if I need to regardless of how it was specified 
> before my overriding options.
> 
> With the magic unnameable 'generic' target, I can only disable it by 
> specifying it, but there's no way to enable it once a preceding option names 
> some specific architecture.
> 
> It makes little difference where you control complete build, but that is not 
> the case for all builds. E.g. Tensorflow builds with bazel and the end user 
> does not have access to whatever compiler flags global build rules may set. 
> So if you want to build for generic GPU target, you will have to jump through 
> way more hoops than is reasonable, as opposed to specifying a few overriding 
> options you're interested in.
> 
> I'm fine with defaulting to such generic target, but I do believe we need to 
> handle it the same way as specific targets.

We could make the driver require some special argument, but the question is how 
that should be reflected in the LLVM-IR. Right now if you specify `target-cpu` 
you get `target-cpu` attributes, which is what we don't want. The standard 
behavior on all other targets is to ignore target-dependent attributes when 
`-march` or `-mcpu` isn't set, which is exactly what we want here. On the 
driver level we could hide this behind `-march=generic` if that's important, 
but I think inside of `clang` it should just be if it's missing as that's the 
standard handling.

https://github.com/llvm/llvm-project/pull/79873
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)

2024-01-29 Thread Artem Belevich via cfe-commits

Artem-B wrote:

> I think there's some precedent from both vendors to treat missing attributes 
> as a more generic target.

It sounds more like a bug than a feature to me.

The major difference between "you get sm_xx by default" and this "you get 
generic by default" is that With specific sm_XX, I can override it both ways -- 
I wan enable/disable it if I need to regardless of how it was specified before 
my overriding options.

With the magic unnameable 'generic' target, I can only disable it by specifying 
it, but there's no way to enable it once a preceding option names some specific 
architecture.

It makes little difference where you control complete build, but that is not 
the case for all builds. E.g. Tensorflow builds with bazel and the end user 
does not have access to whatever compiler flags global build rules may set. So 
if you want to build for generic GPU target, you will have to jump through way 
more hoops than is reasonable, as opposed to specifying a few overriding 
options you're interested in.

I'm fine with defaulting to such generic target, but I do believe we need to 
handle it the same way as specific targets.

https://github.com/llvm/llvm-project/pull/79873
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)

2024-01-29 Thread Joseph Huber via cfe-commits

jhuber6 wrote:

> Relying on something _not_ being defined is probably not the best way to 
> handle 'generic' target. For starters it makes it hard or impossible to 
> recreate the same compilation state by undoing already-specified option. It 
> also breaks established assumption that there _is_ a default target CPU/GPU. 
> If we do want to have a generic GPU target, then we should grow an explicit 
> 'generic' GPU variant, IMO. It would be a functional opposite of 'native'.

AMDGPU uses a missing `-mcpu` on the OpenCL target to build their "generic" 
device libraries and it's also missing from NVIDIA's `libdevice.10.bc` in the 
same way. I think there's some precedent from both vendors to treat missing 
attributes as a more generic target. The default target to me is more of the 
domain of the driver. So if you're using CUDA and don't specify anything you 
get `sm_52`. This patch also creates a hard error if it's unspecified before it 
makes it to the tools like ptxas and nvlink.

https://github.com/llvm/llvm-project/pull/79873
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)

2024-01-29 Thread Artem Belevich via cfe-commits

Artem-B wrote:

Relying on something *not* being defined is probably not the best way to handle 
'generic' target. For starters it makes it hard or impossible to recreate the 
same compilation state by undoing already-specified option. It also breaks 
established assumption that there *is* a default target CPU/GPU. If we do want 
to have a generic GPU target, then we should grow an explicit 'generic' GPU 
variant, IMO. It would be a functional opposite of 'native'.

https://github.com/llvm/llvm-project/pull/79873
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)

2024-01-29 Thread Justin Lebar via cfe-commits

https://github.com/jlebar approved this pull request.


https://github.com/llvm/llvm-project/pull/79873
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)

2024-01-29 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-driver

Author: Joseph Huber (jhuber6)


Changes

Summary:
The NVPTX tools require an architecture to be used, however if we are
creating generic LLVM-IR we should be able to leave it unspecified. This
will result in the `target-cpu` attributes not being set on the
functions so it can be changed when linked into code. This allows the
standalone `--target=nvptx64-nvidia-cuda` toolchain to create LLVM-IR
simmilar to how CUDA's deviceRTL looks from C/C++


---
Full diff: https://github.com/llvm/llvm-project/pull/79873.diff


6 Files Affected:

- (modified) clang/include/clang/Basic/DiagnosticDriverKinds.td (+2) 
- (modified) clang/lib/Basic/Targets/NVPTX.cpp (+6-1) 
- (modified) clang/lib/Basic/Targets/NVPTX.h (+2-1) 
- (modified) clang/lib/Driver/ToolChains/Cuda.cpp (+13-6) 
- (modified) clang/test/Driver/cuda-cross-compiling.c (+14-10) 
- (modified) clang/test/Preprocessor/predefined-arch-macros.c (+12) 


``diff
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td 
b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 094fe1950941270..476528375fb8889 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -57,6 +57,8 @@ def warn_drv_avr_stdlib_not_linked: Warning<
   InGroup;
 def err_drv_cuda_bad_gpu_arch : Error<"unsupported CUDA gpu architecture: %0">;
 def err_drv_offload_bad_gpu_arch : Error<"unsupported %0 gpu architecture: 
%1">;
+def err_drv_offload_missing_gpu_arch : Error<
+  "Must pass in an explicit %0 gpu architecture to '%1'">;
 def err_drv_no_cuda_installation : Error<
   "cannot find CUDA installation; provide its path via '--cuda-path', or pass "
   "'-nocudainc' to build without CUDA includes">;
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp 
b/clang/lib/Basic/Targets/NVPTX.cpp
index 0b9d97f69d146af..7687e3faad770d0 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -59,7 +59,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
   // Define available target features
   // These must be defined in sorted order!
   NoAsmVariants = true;
-  GPU = CudaArch::SM_20;
+  GPU = CudaArch::UNUSED;
 
   if (TargetPointerWidth == 32)
 resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
@@ -169,6 +169,11 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions 
&Opts,
MacroBuilder &Builder) const {
   Builder.defineMacro("__PTX__");
   Builder.defineMacro("__NVPTX__");
+
+  // Skip setting architecture dependent macros if undefined.
+  if (GPU == CudaArch::UNUSED && !HostTarget)
+return;
+
   if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
 // Set __CUDA_ARCH__ for the GPU specified.
 std::string CUDAArchCode = [this] {
diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h
index 20d76b702a9426e..f476d49047c0138 100644
--- a/clang/lib/Basic/Targets/NVPTX.h
+++ b/clang/lib/Basic/Targets/NVPTX.h
@@ -79,7 +79,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public 
TargetInfo {
   initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags,
  StringRef CPU,
  const std::vector &FeaturesVec) const override {
-Features[CudaArchToString(GPU)] = true;
+if (GPU != CudaArch::UNUSED)
+  Features[CudaArchToString(GPU)] = true;
 Features["ptx" + std::to_string(PTXVersion)] = true;
 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
   }
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp 
b/clang/lib/Driver/ToolChains/Cuda.cpp
index 8a9d0caaccf30bb..ca54d2d55426b9f 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -389,7 +389,11 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const 
JobAction &JA,
 GPUArchName = JA.getOffloadingArch();
   } else {
 GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
-assert(!GPUArchName.empty() && "Must have an architecture passed in.");
+if (GPUArchName.empty()) {
+  C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch)
+  << getToolChain().getArchName() << getShortName();
+  return;
+}
   }
 
   // Obtain architecture from the action.
@@ -593,7 +597,11 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 CmdArgs.push_back("-v");
 
   StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ);
-  assert(!GPUArch.empty() && "At least one GPU Arch required for nvlink.");
+  if (GPUArch.empty()) {
+C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch)
+<< getToolChain().getArchName() << getShortName();
+return;
+  }
 
   CmdArgs.push_back("-arch");
   CmdArgs.push_back(Args.MakeArgString(GPUArch));
@@ -726,9 +734,8 @@ NVPTXToolChain::NVPTXToolChain(const Driver &D, const 
llvm::Triple &Triple,
 llvm::opt::DerivedArgList *
 NVPTXToolChain::Translat

[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)

2024-01-29 Thread Joseph Huber via cfe-commits

https://github.com/jhuber6 created 
https://github.com/llvm/llvm-project/pull/79873

Summary:
The NVPTX tools require an architecture to be used, however if we are
creating generic LLVM-IR we should be able to leave it unspecified. This
will result in the `target-cpu` attributes not being set on the
functions so it can be changed when linked into code. This allows the
standalone `--target=nvptx64-nvidia-cuda` toolchain to create LLVM-IR
simmilar to how CUDA's deviceRTL looks from C/C++


>From 35e12c3d83f3be93618805ffaf05e3424689f32f Mon Sep 17 00:00:00 2001
From: Joseph Huber 
Date: Mon, 29 Jan 2024 11:08:04 -0600
Subject: [PATCH] [NVPTX] Allow compiling LLVM-IR without `-march` set

Summary:
The NVPTX tools require an architecture to be used, however if we are
creating generic LLVM-IR we should be able to leave it unspecified. This
will result in the `target-cpu` attributes not being set on the
functions so it can be changed when linked into code. This allows the
standalone `--target=nvptx64-nvidia-cuda` toolchain to create LLVM-IR
simmilar to how CUDA's deviceRTL looks from C/C++
---
 .../clang/Basic/DiagnosticDriverKinds.td  |  2 ++
 clang/lib/Basic/Targets/NVPTX.cpp |  7 +-
 clang/lib/Basic/Targets/NVPTX.h   |  3 ++-
 clang/lib/Driver/ToolChains/Cuda.cpp  | 19 ++-
 clang/test/Driver/cuda-cross-compiling.c  | 24 +++
 .../Preprocessor/predefined-arch-macros.c | 12 ++
 6 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td 
b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 094fe1950941270..476528375fb8889 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -57,6 +57,8 @@ def warn_drv_avr_stdlib_not_linked: Warning<
   InGroup;
 def err_drv_cuda_bad_gpu_arch : Error<"unsupported CUDA gpu architecture: %0">;
 def err_drv_offload_bad_gpu_arch : Error<"unsupported %0 gpu architecture: 
%1">;
+def err_drv_offload_missing_gpu_arch : Error<
+  "Must pass in an explicit %0 gpu architecture to '%1'">;
 def err_drv_no_cuda_installation : Error<
   "cannot find CUDA installation; provide its path via '--cuda-path', or pass "
   "'-nocudainc' to build without CUDA includes">;
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp 
b/clang/lib/Basic/Targets/NVPTX.cpp
index 0b9d97f69d146af..7687e3faad770d0 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -59,7 +59,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
   // Define available target features
   // These must be defined in sorted order!
   NoAsmVariants = true;
-  GPU = CudaArch::SM_20;
+  GPU = CudaArch::UNUSED;
 
   if (TargetPointerWidth == 32)
 resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
@@ -169,6 +169,11 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions 
&Opts,
MacroBuilder &Builder) const {
   Builder.defineMacro("__PTX__");
   Builder.defineMacro("__NVPTX__");
+
+  // Skip setting architecture dependent macros if undefined.
+  if (GPU == CudaArch::UNUSED && !HostTarget)
+return;
+
   if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) {
 // Set __CUDA_ARCH__ for the GPU specified.
 std::string CUDAArchCode = [this] {
diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h
index 20d76b702a9426e..f476d49047c0138 100644
--- a/clang/lib/Basic/Targets/NVPTX.h
+++ b/clang/lib/Basic/Targets/NVPTX.h
@@ -79,7 +79,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public 
TargetInfo {
   initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags,
  StringRef CPU,
  const std::vector &FeaturesVec) const override {
-Features[CudaArchToString(GPU)] = true;
+if (GPU != CudaArch::UNUSED)
+  Features[CudaArchToString(GPU)] = true;
 Features["ptx" + std::to_string(PTXVersion)] = true;
 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
   }
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp 
b/clang/lib/Driver/ToolChains/Cuda.cpp
index 8a9d0caaccf30bb..ca54d2d55426b9f 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -389,7 +389,11 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const 
JobAction &JA,
 GPUArchName = JA.getOffloadingArch();
   } else {
 GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
-assert(!GPUArchName.empty() && "Must have an architecture passed in.");
+if (GPUArchName.empty()) {
+  C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch)
+  << getToolChain().getArchName() << getShortName();
+  return;
+}
   }
 
   // Obtain architecture from the action.
@@ -593,7 +597,11 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const 
JobAction &JA,
 CmdArgs.push_back("-v");