subject:"\[PATCH\] D120272\: \[CUDA\] Add driver support for compiling CUDA with the new driver"

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-29 Thread Joseph Huber via Phabricator via cfe-commits

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGc5e5b54350fe: [CUDA] Add driver support for compiling CUDA 
with the new driver (authored by jhuber6).

Changed prior to commit:
  https://reviews.llvm.org/D120272?vs=425261=426036#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/Cuda.h
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/cuda-openmp-driver.cu
  clang/test/Driver/cuda-phases.cu

Index: clang/test/Driver/cuda-phases.cu
===
--- clang/test/Driver/cuda-phases.cu
+++ clang/test/Driver/cuda-phases.cu
@@ -217,3 +217,32 @@
 // DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
 // DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
 // DASM2-NOT: host
+
+//
+// Test the phases generated when using the new offloading driver.
+//
+// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \
+// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW_DRIVER %s
+// NEW_DRIVER: 0: input, "[[INPUT:.*]]", cuda, (host-cuda)
+// NEW_DRIVER: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
+// NEW_DRIVER: 2: compiler, {1}, ir, (host-cuda)
+// NEW_DRIVER: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
+// NEW_DRIVER: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
+// NEW_DRIVER: 5: compiler, {4}, ir, (device-cuda, sm_52)
+// NEW_DRIVER: 6: backend, {5}, assembler, (device-cuda, sm_52)
+// NEW_DRIVER: 7: assembler, {6}, object, (device-cuda, sm_52)
+// NEW_DRIVER: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
+// NEW_DRIVER: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, assembler
+// NEW_DRIVER: 10: linker, {8, 9}, cuda-fatbin, (device-cuda, sm_52)
+// NEW_DRIVER: 11: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
+// NEW_DRIVER: 12: preprocessor, {11}, cuda-cpp-output, (device-cuda, sm_70)
+// NEW_DRIVER: 13: compiler, {12}, ir, (device-cuda, sm_70)
+// NEW_DRIVER: 14: backend, {13}, assembler, (device-cuda, sm_70)
+// NEW_DRIVER: 15: assembler, {14}, object, (device-cuda, sm_70)
+// NEW_DRIVER: 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {15}, object
+// NEW_DRIVER: 17: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {14}, assembler
+// NEW_DRIVER: 18: linker, {16, 17}, cuda-fatbin, (device-cuda, sm_70)
+// NEW_DRIVER: 19: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {10}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {18}, ir
+// NEW_DRIVER: 20: backend, {19}, assembler, (host-cuda)
+// NEW_DRIVER: 21: assembler, {20}, object, (host-cuda)
+// NEW_DRIVER: 22: clang-linker-wrapper, {21}, image, (host-cuda)
Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,18 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:--offload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix BINDINGS %s
+
+// BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN: %clang -### -nocudalib --offload-new-driver %s 2>&1 | FileCheck -check-prefix RDC %s
+// RDC: error: Using '--offload-new-driver' requires '-fgpu-rdc'
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -6236,6 +6236,9 @@
   }
 
   if (IsCuda || IsHIP) {
+if

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-26 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6223-6224
   if (IsCuda || IsHIP) {
-if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) ||
+Args.hasArg(options::OPT_foffload_new_driver))
   CmdArgs.push_back("-fgpu-rdc");

rnk wrote:
> tra wrote:
> > jhuber6 wrote:
> > > tra wrote:
> > > > tra wrote:
> > > > > jhuber6 wrote:
> > > > > > tra wrote:
> > > > > > > If user specifies both `-fno-gpu-rdc` and `-foffload-new-driver` 
> > > > > > > we would still enable RDC compilation.
> > > > > > > We may want to at least issue a warning. 
> > > > > > > 
> > > > > > > Considering that  we have multiple places where we may check for 
> > > > > > > `-f[no]gpu-rdc` we should make sure we don't get different ideas 
> > > > > > > whether RDC has been enabled.
> > > > > > > I think it may make sense to provide a common way to figure it 
> > > > > > > out. Either via a helper function that would process CLI 
> > > > > > > arguments or calculate it once and save it somewhere.
> > > > > > I haven't quite finalized how to handle this. The new driver should 
> > > > > > be compatible with a non-RDC build since we simply wouldn't embed 
> > > > > > the device image or create offloading entries. It's a little bit 
> > > > > > more difficult here since the new method is opt-in so it requires a 
> > > > > > flag. We should definitely emit a warning if both are enabled (I'm 
> > > > > > assuming there's one for passing both `fgpu-rdc` and 
> > > > > > `fno-gpu-rdc`). I'll add one in.
> > > > > > 
> > > > > > Also we could consider the new driver *the* RDC in the future which 
> > > > > > would be the easiest. The problem is if we want to support CUDA's 
> > > > > > method of RDC considering how other build systems seem to expect 
> > > > > > it. I could see us embedding the fatbinary in the object file, even 
> > > > > > if unused, just so that cuobjdump works. However we couldn't 
> > > > > > support the generation of `__cudaRegisterFatBinary_nv` 
> > > > > > functions because then those would cause linker errors. WDYT?
> > > > > > I'm assuming there's one for passing both fgpu-rdc and fno-gpu-rdc
> > > > > 
> > > > > This is not a valid assumption. The whole idea behind 
> > > > > `-fno-something` is that the options can be overridden. E.g. if the 
> > > > > build specifies a standard set of compiler options, but we need to 
> > > > > override some of them when building a particular file. We can only do 
> > > > > so by appending to the standard options. Potentially we may end up 
> > > > > having those options overridden again. While it's not very common, 
> > > > > it's certainly possible. It's also possible to start with 
> > > > > '-fno-gpu-rdc' and then override it with `-fgpu-rdc`.
> > > > > 
> > > > > In this case, we care about the final state of RDC specified by 
> > > > > -f*gpu-rdc options, not by the fact that `-fno-gpu-rdc` is present. 
> > > > > `Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, 
> > > > > false)` does exactly that -- gives you the final state. If it returns 
> > > > > false, but we have `-foffload-new-driver`, then we need a warning as 
> > > > > these options are contradictory.
> > > > > 
> > > > > The new driver should be compatible with a non-RDC build 
> > > > 
> > > > In that case, we don't need a warning, but we do need a test verifying 
> > > > this behavior.
> > > > 
> > > > > Also we could consider the new driver *the* RDC in the future which 
> > > > > would be the easiest. 
> > > > 
> > > > SGTM. I do not know how it all will work out in the end. Your proposed 
> > > > model makes a lot of sense, and I'm guardedly optimistic about it.
> > > > Eventually we would deprecate RDC options, but we still need to work 
> > > > sensibly when user specifies a mix of these options. 
> > > > 
> > > > 
> > > > In that case, we don't need a warning, but we do need a test verifying 
> > > > this behavior.
> > > > 
> > > It's possible but I don't have the logic here to do it, figured we can 
> > > cross that bridge later.
> > > 
> > > > SGTM. I do not know how it all will work out in the end. Your proposed 
> > > > model makes a lot of sense, and I'm guardedly optimistic about it.
> > > >
> > > So the only downsides I know of, is that we don't currently replicate 
> > > CUDA's magic to JIT RDC code (We can do this with LTO anyway), and that 
> > > registering offload entries relies on the linker defining `__start / 
> > > __stop` variables, which I don't know if linkers on Windows / MacOS 
> > > provide. I'd be really interested if someone on the LLD team knew the 
> > > answer to that.
> > > relies on the linker defining __start / __stop variables, which I don't 
> > > know if linkers on Windows / MacOS provide. I'd be really interested if 
> > > someone on the LLD team knew the answer to that.
> > 
>

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-26 Thread Reid Kleckner via Phabricator via cfe-commits

rnk added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6223-6224
   if (IsCuda || IsHIP) {
-if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) ||
+Args.hasArg(options::OPT_foffload_new_driver))
   CmdArgs.push_back("-fgpu-rdc");

tra wrote:
> jhuber6 wrote:
> > tra wrote:
> > > tra wrote:
> > > > jhuber6 wrote:
> > > > > tra wrote:
> > > > > > If user specifies both `-fno-gpu-rdc` and `-foffload-new-driver` we 
> > > > > > would still enable RDC compilation.
> > > > > > We may want to at least issue a warning. 
> > > > > > 
> > > > > > Considering that  we have multiple places where we may check for 
> > > > > > `-f[no]gpu-rdc` we should make sure we don't get different ideas 
> > > > > > whether RDC has been enabled.
> > > > > > I think it may make sense to provide a common way to figure it out. 
> > > > > > Either via a helper function that would process CLI arguments or 
> > > > > > calculate it once and save it somewhere.
> > > > > I haven't quite finalized how to handle this. The new driver should 
> > > > > be compatible with a non-RDC build since we simply wouldn't embed the 
> > > > > device image or create offloading entries. It's a little bit more 
> > > > > difficult here since the new method is opt-in so it requires a flag. 
> > > > > We should definitely emit a warning if both are enabled (I'm assuming 
> > > > > there's one for passing both `fgpu-rdc` and `fno-gpu-rdc`). I'll add 
> > > > > one in.
> > > > > 
> > > > > Also we could consider the new driver *the* RDC in the future which 
> > > > > would be the easiest. The problem is if we want to support CUDA's 
> > > > > method of RDC considering how other build systems seem to expect it. 
> > > > > I could see us embedding the fatbinary in the object file, even if 
> > > > > unused, just so that cuobjdump works. However we couldn't support the 
> > > > > generation of `__cudaRegisterFatBinary_nv` functions because then 
> > > > > those would cause linker errors. WDYT?
> > > > > I'm assuming there's one for passing both fgpu-rdc and fno-gpu-rdc
> > > > 
> > > > This is not a valid assumption. The whole idea behind `-fno-something` 
> > > > is that the options can be overridden. E.g. if the build specifies a 
> > > > standard set of compiler options, but we need to override some of them 
> > > > when building a particular file. We can only do so by appending to the 
> > > > standard options. Potentially we may end up having those options 
> > > > overridden again. While it's not very common, it's certainly possible. 
> > > > It's also possible to start with '-fno-gpu-rdc' and then override it 
> > > > with `-fgpu-rdc`.
> > > > 
> > > > In this case, we care about the final state of RDC specified by 
> > > > -f*gpu-rdc options, not by the fact that `-fno-gpu-rdc` is present. 
> > > > `Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)` 
> > > > does exactly that -- gives you the final state. If it returns false, 
> > > > but we have `-foffload-new-driver`, then we need a warning as these 
> > > > options are contradictory.
> > > > 
> > > > The new driver should be compatible with a non-RDC build 
> > > 
> > > In that case, we don't need a warning, but we do need a test verifying 
> > > this behavior.
> > > 
> > > > Also we could consider the new driver *the* RDC in the future which 
> > > > would be the easiest. 
> > > 
> > > SGTM. I do not know how it all will work out in the end. Your proposed 
> > > model makes a lot of sense, and I'm guardedly optimistic about it.
> > > Eventually we would deprecate RDC options, but we still need to work 
> > > sensibly when user specifies a mix of these options. 
> > > 
> > > 
> > > In that case, we don't need a warning, but we do need a test verifying 
> > > this behavior.
> > > 
> > It's possible but I don't have the logic here to do it, figured we can 
> > cross that bridge later.
> > 
> > > SGTM. I do not know how it all will work out in the end. Your proposed 
> > > model makes a lot of sense, and I'm guardedly optimistic about it.
> > >
> > So the only downsides I know of, is that we don't currently replicate 
> > CUDA's magic to JIT RDC code (We can do this with LTO anyway), and that 
> > registering offload entries relies on the linker defining `__start / 
> > __stop` variables, which I don't know if linkers on Windows / MacOS 
> > provide. I'd be really interested if someone on the LLD team knew the 
> > answer to that.
> > relies on the linker defining __start / __stop variables, which I don't 
> > know if linkers on Windows / MacOS provide. I'd be really interested if 
> > someone on the LLD team knew the answer to that.
> 
> @MaskRay, @rnk - would you happen to know the answer?
I believe MachO has an equivalent mechanism, but I'm not familiar with it. For 
PE/COFF, you can search the ASan code to see

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-26 Thread Artem Belevich via Phabricator via cfe-commits

tra accepted this revision.
tra added a comment.
This revision is now accepted and ready to land.

In D120272#3475155 , @jhuber6 wrote:

> Changing this to simply require that the user manually passes `-fgpu-rdc` in 
> order to use the new driver. I think this makes more sense in the short term 
> and we can move to make the new driver the default rdc approach later. I 
> tested this and the following should workd,

SGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-26 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 425261.
jhuber6 added a comment.

Changing this to simply require that the user manually passes `-fgpu-rdc` in 
order to use the new driver. I think this makes more sense in the short term 
and we can move to make the new driver the default rdc approach later. I tested 
this and the following should workd,

  clang foo.cu -fgpu-rdc -foffload-new-driver -c
  clang bar.cu -c
  clang foo.o bar.o -fgpu-rdc -foffload-new-driver


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/Cuda.h
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/cuda-openmp-driver.cu
  clang/test/Driver/cuda-phases.cu

Index: clang/test/Driver/cuda-phases.cu
===
--- clang/test/Driver/cuda-phases.cu
+++ clang/test/Driver/cuda-phases.cu
@@ -217,3 +217,32 @@
 // DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
 // DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
 // DASM2-NOT: host
+
+//
+// Test the phases generated when using the new offloading driver.
+//
+// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases -foffload-new-driver \
+// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW_DRIVER %s
+// NEW_DRIVER: 0: input, "[[INPUT:.*]]", cuda, (host-cuda)
+// NEW_DRIVER: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
+// NEW_DRIVER: 2: compiler, {1}, ir, (host-cuda)
+// NEW_DRIVER: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
+// NEW_DRIVER: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
+// NEW_DRIVER: 5: compiler, {4}, ir, (device-cuda, sm_52)
+// NEW_DRIVER: 6: backend, {5}, assembler, (device-cuda, sm_52)
+// NEW_DRIVER: 7: assembler, {6}, object, (device-cuda, sm_52)
+// NEW_DRIVER: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
+// NEW_DRIVER: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, assembler
+// NEW_DRIVER: 10: linker, {8, 9}, cuda-fatbin, (device-cuda, sm_52)
+// NEW_DRIVER: 11: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
+// NEW_DRIVER: 12: preprocessor, {11}, cuda-cpp-output, (device-cuda, sm_70)
+// NEW_DRIVER: 13: compiler, {12}, ir, (device-cuda, sm_70)
+// NEW_DRIVER: 14: backend, {13}, assembler, (device-cuda, sm_70)
+// NEW_DRIVER: 15: assembler, {14}, object, (device-cuda, sm_70)
+// NEW_DRIVER: 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {15}, object
+// NEW_DRIVER: 17: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {14}, assembler
+// NEW_DRIVER: 18: linker, {16, 17}, cuda-fatbin, (device-cuda, sm_70)
+// NEW_DRIVER: 19: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {10}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {18}, ir
+// NEW_DRIVER: 20: backend, {19}, assembler, (host-cuda)
+// NEW_DRIVER: 21: assembler, {20}, object, (host-cuda)
+// NEW_DRIVER: 22: clang-linker-wrapper, {21}, image, (host-cuda)
Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,18 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix BINDINGS %s
+
+// BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN: %clang -### -nocudalib -foffload-new-driver %s 2>&1 | FileCheck -check-prefix RDC %s
+// RDC: error: Using '-foffload-new-driver' requires '-fgpu-rdc'
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 424543.
jhuber6 added a comment.

Making suggested changes.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/Cuda.h
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/lib/Driver/ToolChains/HIPAMD.cpp
  clang/test/Driver/cuda-openmp-driver.cu
  clang/test/Driver/cuda-phases.cu

Index: clang/test/Driver/cuda-phases.cu
===
--- clang/test/Driver/cuda-phases.cu
+++ clang/test/Driver/cuda-phases.cu
@@ -217,3 +217,32 @@
 // DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
 // DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
 // DASM2-NOT: host
+
+//
+// Test the phases generated when using the new offloading driver.
+//
+// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases -foffload-new-driver \
+// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW_DRIVER %s
+// NEW_DRIVER: 0: input, "[[INPUT:.*]]", cuda, (host-cuda)
+// NEW_DRIVER: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
+// NEW_DRIVER: 2: compiler, {1}, ir, (host-cuda)
+// NEW_DRIVER: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
+// NEW_DRIVER: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
+// NEW_DRIVER: 5: compiler, {4}, ir, (device-cuda, sm_52)
+// NEW_DRIVER: 6: backend, {5}, assembler, (device-cuda, sm_52)
+// NEW_DRIVER: 7: assembler, {6}, object, (device-cuda, sm_52)
+// NEW_DRIVER: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
+// NEW_DRIVER: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, assembler
+// NEW_DRIVER: 10: linker, {8, 9}, cuda-fatbin, (device-cuda, sm_52)
+// NEW_DRIVER: 11: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
+// NEW_DRIVER: 12: preprocessor, {11}, cuda-cpp-output, (device-cuda, sm_70)
+// NEW_DRIVER: 13: compiler, {12}, ir, (device-cuda, sm_70)
+// NEW_DRIVER: 14: backend, {13}, assembler, (device-cuda, sm_70)
+// NEW_DRIVER: 15: assembler, {14}, object, (device-cuda, sm_70)
+// NEW_DRIVER: 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {15}, object
+// NEW_DRIVER: 17: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {14}, assembler
+// NEW_DRIVER: 18: linker, {16, 17}, cuda-fatbin, (device-cuda, sm_70)
+// NEW_DRIVER: 19: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {10}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {18}, ir
+// NEW_DRIVER: 20: backend, {19}, assembler, (host-cuda)
+// NEW_DRIVER: 21: assembler, {20}, object, (host-cuda)
+// NEW_DRIVER: 22: clang-linker-wrapper, {21}, image, (host-cuda)
Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,19 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix BINDINGS %s
+
+// BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN: %clang -### -nocudalib -foffload-new-driver %s 2>&1 | FileCheck -check-prefix RDC %s
+// RDC: clang{{.*}}"-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-fgpu-rdc"
+// RDC: ptxas{{.*}}-c
Index: clang/lib/Driver/ToolChains/HIPAMD.cpp
===
--- clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -222,7 +222,8 @@
 CC1Args.push_back("-fcuda-approx-transcendentals");
 
   if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
-  false))
+

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6225
   if (IsCuda || IsHIP) {
-if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+if (Args.hasArg(options::OPT_fno_gpu_rdc) && IsCudaDevice &&
+Args.hasArg(options::OPT_foffload_new_driver))

tra wrote:
> This has to be `Args.hasArg(options::OPT_fno_gpu_rdc) && 
> Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) == false`
> 
> E.g. we don't want a warning if we have `-foffload-new-driver -fno-gpu-rdc 
> -fgpu-rdc`.
K, will do



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6228
+D.Diag(diag::warn_drv_no_rdc_new_driver) 
+<< "SampleUse with PGO options";
+if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) ||

tra wrote:
> The warning does not take any parameters and this one looks wrong anyways.
Whoops, deleted that previously but had a little SNAFU with my commits and 
forgot to do it again.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6896
+  if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
+   false))
 CmdArgs.push_back("-fgpu-rdc");

tra wrote:
> I'm not sure why we're no longer checking for `OPT_foffload_new_driver` here. 
> Don't we want to have the same RDC mode on the host and device sides?
> I think we do as that affects the way we mangle some symbols and it has to be 
> consistent on both sides.
> 
> 
This is only checked with `CudaDeviceInput` which we don't use with the new 
driver.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-22 Thread Artem Belevich via Phabricator via cfe-commits

tra added a subscriber: rnk.
tra added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6225
   if (IsCuda || IsHIP) {
-if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+if (Args.hasArg(options::OPT_fno_gpu_rdc) && IsCudaDevice &&
+Args.hasArg(options::OPT_foffload_new_driver))

This has to be `Args.hasArg(options::OPT_fno_gpu_rdc) && 
Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) == false`

E.g. we don't want a warning if we have `-foffload-new-driver -fno-gpu-rdc 
-fgpu-rdc`.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6228
+D.Diag(diag::warn_drv_no_rdc_new_driver) 
+<< "SampleUse with PGO options";
+if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) ||

The warning does not take any parameters and this one looks wrong anyways.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6896
+  if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
+   false))
 CmdArgs.push_back("-fgpu-rdc");

I'm not sure why we're no longer checking for `OPT_foffload_new_driver` here. 
Don't we want to have the same RDC mode on the host and device sides?
I think we do as that affects the way we mangle some symbols and it has to be 
consistent on both sides.





Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6223-6224
   if (IsCuda || IsHIP) {
-if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) ||
+Args.hasArg(options::OPT_foffload_new_driver))
   CmdArgs.push_back("-fgpu-rdc");

jhuber6 wrote:
> tra wrote:
> > tra wrote:
> > > jhuber6 wrote:
> > > > tra wrote:
> > > > > If user specifies both `-fno-gpu-rdc` and `-foffload-new-driver` we 
> > > > > would still enable RDC compilation.
> > > > > We may want to at least issue a warning. 
> > > > > 
> > > > > Considering that  we have multiple places where we may check for 
> > > > > `-f[no]gpu-rdc` we should make sure we don't get different ideas 
> > > > > whether RDC has been enabled.
> > > > > I think it may make sense to provide a common way to figure it out. 
> > > > > Either via a helper function that would process CLI arguments or 
> > > > > calculate it once and save it somewhere.
> > > > I haven't quite finalized how to handle this. The new driver should be 
> > > > compatible with a non-RDC build since we simply wouldn't embed the 
> > > > device image or create offloading entries. It's a little bit more 
> > > > difficult here since the new method is opt-in so it requires a flag. We 
> > > > should definitely emit a warning if both are enabled (I'm assuming 
> > > > there's one for passing both `fgpu-rdc` and `fno-gpu-rdc`). I'll add 
> > > > one in.
> > > > 
> > > > Also we could consider the new driver *the* RDC in the future which 
> > > > would be the easiest. The problem is if we want to support CUDA's 
> > > > method of RDC considering how other build systems seem to expect it. I 
> > > > could see us embedding the fatbinary in the object file, even if 
> > > > unused, just so that cuobjdump works. However we couldn't support the 
> > > > generation of `__cudaRegisterFatBinary_nv` functions because then 
> > > > those would cause linker errors. WDYT?
> > > > I'm assuming there's one for passing both fgpu-rdc and fno-gpu-rdc
> > > 
> > > This is not a valid assumption. The whole idea behind `-fno-something` is 
> > > that the options can be overridden. E.g. if the build specifies a 
> > > standard set of compiler options, but we need to override some of them 
> > > when building a particular file. We can only do so by appending to the 
> > > standard options. Potentially we may end up having those options 
> > > overridden again. While it's not very common, it's certainly possible. 
> > > It's also possible to start with '-fno-gpu-rdc' and then override it with 
> > > `-fgpu-rdc`.
> > > 
> > > In this case, we care about the final state of RDC specified by 
> > > -f*gpu-rdc options, not by the fact that `-fno-gpu-rdc` is present. 
> > > `Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)` 
> > > does exactly that -- gives you the final state. If it returns false, but 
> > > we have `-foffload-new-driver`, then we need a warning as these options 
> > > are contradictory.
> > > 
> > > The new driver should be compatible with a non-RDC build 
> > 
> > In that case, we don't need a warning, but we do need a test verifying this 
> > behavior.
> > 
> > > Also we could consider the new driver *the* RDC in the future which would 
> > > be the easiest. 
> > 
> > SGTM. I do not know how it all will work out in the end. Your proposed 
> > model makes a lot of sense, and I'm guardedly optimistic about it.
> > Eventually we would

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6223-6224
   if (IsCuda || IsHIP) {
-if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) ||
+Args.hasArg(options::OPT_foffload_new_driver))
   CmdArgs.push_back("-fgpu-rdc");

tra wrote:
> tra wrote:
> > jhuber6 wrote:
> > > tra wrote:
> > > > If user specifies both `-fno-gpu-rdc` and `-foffload-new-driver` we 
> > > > would still enable RDC compilation.
> > > > We may want to at least issue a warning. 
> > > > 
> > > > Considering that  we have multiple places where we may check for 
> > > > `-f[no]gpu-rdc` we should make sure we don't get different ideas 
> > > > whether RDC has been enabled.
> > > > I think it may make sense to provide a common way to figure it out. 
> > > > Either via a helper function that would process CLI arguments or 
> > > > calculate it once and save it somewhere.
> > > I haven't quite finalized how to handle this. The new driver should be 
> > > compatible with a non-RDC build since we simply wouldn't embed the device 
> > > image or create offloading entries. It's a little bit more difficult here 
> > > since the new method is opt-in so it requires a flag. We should 
> > > definitely emit a warning if both are enabled (I'm assuming there's one 
> > > for passing both `fgpu-rdc` and `fno-gpu-rdc`). I'll add one in.
> > > 
> > > Also we could consider the new driver *the* RDC in the future which would 
> > > be the easiest. The problem is if we want to support CUDA's method of RDC 
> > > considering how other build systems seem to expect it. I could see us 
> > > embedding the fatbinary in the object file, even if unused, just so that 
> > > cuobjdump works. However we couldn't support the generation of 
> > > `__cudaRegisterFatBinary_nv` functions because then those would cause 
> > > linker errors. WDYT?
> > > I'm assuming there's one for passing both fgpu-rdc and fno-gpu-rdc
> > 
> > This is not a valid assumption. The whole idea behind `-fno-something` is 
> > that the options can be overridden. E.g. if the build specifies a standard 
> > set of compiler options, but we need to override some of them when building 
> > a particular file. We can only do so by appending to the standard options. 
> > Potentially we may end up having those options overridden again. While it's 
> > not very common, it's certainly possible. It's also possible to start with 
> > '-fno-gpu-rdc' and then override it with `-fgpu-rdc`.
> > 
> > In this case, we care about the final state of RDC specified by -f*gpu-rdc 
> > options, not by the fact that `-fno-gpu-rdc` is present. 
> > `Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)` does 
> > exactly that -- gives you the final state. If it returns false, but we have 
> > `-foffload-new-driver`, then we need a warning as these options are 
> > contradictory.
> > 
> > The new driver should be compatible with a non-RDC build 
> 
> In that case, we don't need a warning, but we do need a test verifying this 
> behavior.
> 
> > Also we could consider the new driver *the* RDC in the future which would 
> > be the easiest. 
> 
> SGTM. I do not know how it all will work out in the end. Your proposed model 
> makes a lot of sense, and I'm guardedly optimistic about it.
> Eventually we would deprecate RDC options, but we still need to work sensibly 
> when user specifies a mix of these options. 
> 
> 
> In that case, we don't need a warning, but we do need a test verifying this 
> behavior.
> 
It's possible but I don't have the logic here to do it, figured we can cross 
that bridge later.

> SGTM. I do not know how it all will work out in the end. Your proposed model 
> makes a lot of sense, and I'm guardedly optimistic about it.
>
So the only downsides I know of, is that we don't currently replicate CUDA's 
magic to JIT RDC code (We can do this with LTO anyway), and that registering 
offload entries relies on the linker defining `__start / __stop` variables, 
which I don't know if linkers on Windows / MacOS provide. I'd be really 
interested if someone on the LLD team knew the answer to that.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 424537.
jhuber6 added a comment.

Adding warning for using both -fno-gpu-rdc and -foffload-new-driver. I think 
this is a good warning to have for now while this is being worked in as opt-in. 
Once this has matured I plan on adding the necessary logic to handle RDC and 
non-RDC builds correctly with this. But for the purposes of this patch just 
warning is fine.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/Cuda.h
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/lib/Driver/ToolChains/HIPAMD.cpp
  clang/test/Driver/cuda-openmp-driver.cu
  clang/test/Driver/cuda-phases.cu

Index: clang/test/Driver/cuda-phases.cu
===
--- clang/test/Driver/cuda-phases.cu
+++ clang/test/Driver/cuda-phases.cu
@@ -217,3 +217,32 @@
 // DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
 // DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
 // DASM2-NOT: host
+
+//
+// Test the phases generated when using the new offloading driver.
+//
+// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases -foffload-new-driver \
+// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW_DRIVER %s
+// NEW_DRIVER: 0: input, "[[INPUT:.*]]", cuda, (host-cuda)
+// NEW_DRIVER: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
+// NEW_DRIVER: 2: compiler, {1}, ir, (host-cuda)
+// NEW_DRIVER: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
+// NEW_DRIVER: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
+// NEW_DRIVER: 5: compiler, {4}, ir, (device-cuda, sm_52)
+// NEW_DRIVER: 6: backend, {5}, assembler, (device-cuda, sm_52)
+// NEW_DRIVER: 7: assembler, {6}, object, (device-cuda, sm_52)
+// NEW_DRIVER: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
+// NEW_DRIVER: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, assembler
+// NEW_DRIVER: 10: linker, {8, 9}, cuda-fatbin, (device-cuda, sm_52)
+// NEW_DRIVER: 11: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
+// NEW_DRIVER: 12: preprocessor, {11}, cuda-cpp-output, (device-cuda, sm_70)
+// NEW_DRIVER: 13: compiler, {12}, ir, (device-cuda, sm_70)
+// NEW_DRIVER: 14: backend, {13}, assembler, (device-cuda, sm_70)
+// NEW_DRIVER: 15: assembler, {14}, object, (device-cuda, sm_70)
+// NEW_DRIVER: 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {15}, object
+// NEW_DRIVER: 17: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {14}, assembler
+// NEW_DRIVER: 18: linker, {16, 17}, cuda-fatbin, (device-cuda, sm_70)
+// NEW_DRIVER: 19: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {10}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {18}, ir
+// NEW_DRIVER: 20: backend, {19}, assembler, (host-cuda)
+// NEW_DRIVER: 21: assembler, {20}, object, (host-cuda)
+// NEW_DRIVER: 22: clang-linker-wrapper, {21}, image, (host-cuda)
Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,19 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix BINDINGS %s
+
+// BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN: %clang -### -nocudalib -foffload-new-driver %s 2>&1 | FileCheck -check-prefix RDC %s
+// RDC: clang{{.*}}"-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-fgpu-rdc"
+// RDC: ptxas{{.*}}-c
Index: clang/lib/Driver/ToolChains/HIPAMD.cpp

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-22 Thread Artem Belevich via Phabricator via cfe-commits

tra added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6223-6224
   if (IsCuda || IsHIP) {
-if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) ||
+Args.hasArg(options::OPT_foffload_new_driver))
   CmdArgs.push_back("-fgpu-rdc");

tra wrote:
> jhuber6 wrote:
> > tra wrote:
> > > If user specifies both `-fno-gpu-rdc` and `-foffload-new-driver` we would 
> > > still enable RDC compilation.
> > > We may want to at least issue a warning. 
> > > 
> > > Considering that  we have multiple places where we may check for 
> > > `-f[no]gpu-rdc` we should make sure we don't get different ideas whether 
> > > RDC has been enabled.
> > > I think it may make sense to provide a common way to figure it out. 
> > > Either via a helper function that would process CLI arguments or 
> > > calculate it once and save it somewhere.
> > I haven't quite finalized how to handle this. The new driver should be 
> > compatible with a non-RDC build since we simply wouldn't embed the device 
> > image or create offloading entries. It's a little bit more difficult here 
> > since the new method is opt-in so it requires a flag. We should definitely 
> > emit a warning if both are enabled (I'm assuming there's one for passing 
> > both `fgpu-rdc` and `fno-gpu-rdc`). I'll add one in.
> > 
> > Also we could consider the new driver *the* RDC in the future which would 
> > be the easiest. The problem is if we want to support CUDA's method of RDC 
> > considering how other build systems seem to expect it. I could see us 
> > embedding the fatbinary in the object file, even if unused, just so that 
> > cuobjdump works. However we couldn't support the generation of 
> > `__cudaRegisterFatBinary_nv` functions because then those would cause 
> > linker errors. WDYT?
> > I'm assuming there's one for passing both fgpu-rdc and fno-gpu-rdc
> 
> This is not a valid assumption. The whole idea behind `-fno-something` is 
> that the options can be overridden. E.g. if the build specifies a standard 
> set of compiler options, but we need to override some of them when building a 
> particular file. We can only do so by appending to the standard options. 
> Potentially we may end up having those options overridden again. While it's 
> not very common, it's certainly possible. It's also possible to start with 
> '-fno-gpu-rdc' and then override it with `-fgpu-rdc`.
> 
> In this case, we care about the final state of RDC specified by -f*gpu-rdc 
> options, not by the fact that `-fno-gpu-rdc` is present. 
> `Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)` does 
> exactly that -- gives you the final state. If it returns false, but we have 
> `-foffload-new-driver`, then we need a warning as these options are 
> contradictory.
> 
> The new driver should be compatible with a non-RDC build 

In that case, we don't need a warning, but we do need a test verifying this 
behavior.

> Also we could consider the new driver *the* RDC in the future which would be 
> the easiest. 

SGTM. I do not know how it all will work out in the end. Your proposed model 
makes a lot of sense, and I'm guardedly optimistic about it.
Eventually we would deprecate RDC options, but we still need to work sensibly 
when user specifies a mix of these options. 




Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-22 Thread Artem Belevich via Phabricator via cfe-commits

tra added inline comments.

Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6223-6224
   if (IsCuda || IsHIP) {
-if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) ||
+Args.hasArg(options::OPT_foffload_new_driver))
   CmdArgs.push_back("-fgpu-rdc");

jhuber6 wrote:
> tra wrote:
> > If user specifies both `-fno-gpu-rdc` and `-foffload-new-driver` we would 
> > still enable RDC compilation.
> > We may want to at least issue a warning. 
> > 
> > Considering that  we have multiple places where we may check for 
> > `-f[no]gpu-rdc` we should make sure we don't get different ideas whether 
> > RDC has been enabled.
> > I think it may make sense to provide a common way to figure it out. Either 
> > via a helper function that would process CLI arguments or calculate it once 
> > and save it somewhere.
> I haven't quite finalized how to handle this. The new driver should be 
> compatible with a non-RDC build since we simply wouldn't embed the device 
> image or create offloading entries. It's a little bit more difficult here 
> since the new method is opt-in so it requires a flag. We should definitely 
> emit a warning if both are enabled (I'm assuming there's one for passing both 
> `fgpu-rdc` and `fno-gpu-rdc`). I'll add one in.
> 
> Also we could consider the new driver *the* RDC in the future which would be 
> the easiest. The problem is if we want to support CUDA's method of RDC 
> considering how other build systems seem to expect it. I could see us 
> embedding the fatbinary in the object file, even if unused, just so that 
> cuobjdump works. However we couldn't support the generation of 
> `__cudaRegisterFatBinary_nv` functions because then those would cause 
> linker errors. WDYT?
> I'm assuming there's one for passing both fgpu-rdc and fno-gpu-rdc

This is not a valid assumption. The whole idea behind `-fno-something` is that 
the options can be overridden. E.g. if the build specifies a standard set of 
compiler options, but we need to override some of them when building a 
particular file. We can only do so by appending to the standard options. 
Potentially we may end up having those options overridden again. While it's not 
very common, it's certainly possible. It's also possible to start with 
'-fno-gpu-rdc' and then override it with `-fgpu-rdc`.

In this case, we care about the final state of RDC specified by -f*gpu-rdc 
options, not by the fact that `-fno-gpu-rdc` is present. 
`Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)` does 
exactly that -- gives you the final state. If it returns false, but we have 
`-foffload-new-driver`, then we need a warning as these options are 
contradictory.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6223-6224
   if (IsCuda || IsHIP) {
-if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) ||
+Args.hasArg(options::OPT_foffload_new_driver))
   CmdArgs.push_back("-fgpu-rdc");

tra wrote:
> If user specifies both `-fno-gpu-rdc` and `-foffload-new-driver` we would 
> still enable RDC compilation.
> We may want to at least issue a warning. 
> 
> Considering that  we have multiple places where we may check for 
> `-f[no]gpu-rdc` we should make sure we don't get different ideas whether RDC 
> has been enabled.
> I think it may make sense to provide a common way to figure it out. Either 
> via a helper function that would process CLI arguments or calculate it once 
> and save it somewhere.
I haven't quite finalized how to handle this. The new driver should be 
compatible with a non-RDC build since we simply wouldn't embed the device image 
or create offloading entries. It's a little bit more difficult here since the 
new method is opt-in so it requires a flag. We should definitely emit a warning 
if both are enabled (I'm assuming there's one for passing both `fgpu-rdc` and 
`fno-gpu-rdc`). I'll add one in.

Also we could consider the new driver *the* RDC in the future which would be 
the easiest. The problem is if we want to support CUDA's method of RDC 
considering how other build systems seem to expect it. I could see us embedding 
the fatbinary in the object file, even if unused, just so that cuobjdump works. 
However we couldn't support the generation of `__cudaRegisterFatBinary_nv` 
functions because then those would cause linker errors. WDYT?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-22 Thread Artem Belevich via Phabricator via cfe-commits

tra added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6223-6224
   if (IsCuda || IsHIP) {
-if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
+if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) ||
+Args.hasArg(options::OPT_foffload_new_driver))
   CmdArgs.push_back("-fgpu-rdc");

If user specifies both `-fno-gpu-rdc` and `-foffload-new-driver` we would still 
enable RDC compilation.
We may want to at least issue a warning. 

Considering that  we have multiple places where we may check for 
`-f[no]gpu-rdc` we should make sure we don't get different ideas whether RDC 
has been enabled.
I think it may make sense to provide a common way to figure it out. Either via 
a helper function that would process CLI arguments or calculate it once and 
save it somewhere.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-22 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added a comment.

Ping


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-19 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 423685.
jhuber6 added a comment.

Making suggested changes.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/Cuda.h
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/lib/Driver/ToolChains/HIPAMD.cpp
  clang/test/Driver/cuda-openmp-driver.cu
  clang/test/Driver/cuda-phases.cu

Index: clang/test/Driver/cuda-phases.cu
===
--- clang/test/Driver/cuda-phases.cu
+++ clang/test/Driver/cuda-phases.cu
@@ -217,3 +217,32 @@
 // DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
 // DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
 // DASM2-NOT: host
+
+//
+// Test the phases generated when using the new offloading driver.
+//
+// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases -foffload-new-driver \
+// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW_DRIVER %s
+// NEW_DRIVER: 0: input, "[[INPUT:.*]]", cuda, (host-cuda)
+// NEW_DRIVER: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
+// NEW_DRIVER: 2: compiler, {1}, ir, (host-cuda)
+// NEW_DRIVER: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
+// NEW_DRIVER: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
+// NEW_DRIVER: 5: compiler, {4}, ir, (device-cuda, sm_52)
+// NEW_DRIVER: 6: backend, {5}, assembler, (device-cuda, sm_52)
+// NEW_DRIVER: 7: assembler, {6}, object, (device-cuda, sm_52)
+// NEW_DRIVER: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
+// NEW_DRIVER: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, assembler
+// NEW_DRIVER: 10: linker, {8, 9}, cuda-fatbin, (device-cuda, sm_52)
+// NEW_DRIVER: 11: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
+// NEW_DRIVER: 12: preprocessor, {11}, cuda-cpp-output, (device-cuda, sm_70)
+// NEW_DRIVER: 13: compiler, {12}, ir, (device-cuda, sm_70)
+// NEW_DRIVER: 14: backend, {13}, assembler, (device-cuda, sm_70)
+// NEW_DRIVER: 15: assembler, {14}, object, (device-cuda, sm_70)
+// NEW_DRIVER: 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {15}, object
+// NEW_DRIVER: 17: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {14}, assembler
+// NEW_DRIVER: 18: linker, {16, 17}, cuda-fatbin, (device-cuda, sm_70)
+// NEW_DRIVER: 19: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {10}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {18}, ir
+// NEW_DRIVER: 20: backend, {19}, assembler, (host-cuda)
+// NEW_DRIVER: 21: assembler, {20}, object, (host-cuda)
+// NEW_DRIVER: 22: clang-linker-wrapper, {21}, image, (host-cuda)
Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,19 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix BINDINGS %s
+
+// BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN: %clang -### -nocudalib -foffload-new-driver %s 2>&1 | FileCheck -check-prefix RDC %s
+// RDC: clang{{.*}}"-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-fgpu-rdc"
+// RDC: ptxas{{.*}}-c
Index: clang/lib/Driver/ToolChains/HIPAMD.cpp
===
--- clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -188,7 +188,8 @@
 CC1Args.push_back("-fcuda-approx-transcendentals");
 
   if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
-  false))
+  false) ||
+

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-19 Thread Artem Belevich via Phabricator via cfe-commits

tra added a comment.

Thank you for adding the compilation pipeline tests.

LGTM overall.




Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6223-6226
 if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
   CmdArgs.push_back("-fgpu-rdc");
+if (Args.hasArg(options::OPT_foffload_new_driver))
+  CmdArgs.push_back("-fgpu-rdc");

Combine both ifs, so we don't add `-fgpu-rdc` twice?



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6892-6893
 CmdArgs.push_back("-fgpu-rdc");
+  if (Args.hasArg(options::OPT_foffload_new_driver))
+CmdArgs.push_back("-fgpu-rdc");
   }

Ditto.



Comment at: clang/test/Driver/cuda-openmp-driver.cu:18
+// RUN: %clang -### -nocudalib -foffload-new-driver %s 2>&1 | FileCheck 
-check-prefix RDC %s
+// RDC: ptxas{{.*}}-c

You probably want to check for `clang -cc1 ... -fgpu-rdc`, too.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-19 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 423645.
jhuber6 added a comment.
Herald added a subscriber: mattd.

Adding new test for CUDA phases.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/Cuda.h
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/lib/Driver/ToolChains/HIPAMD.cpp
  clang/test/Driver/cuda-openmp-driver.cu
  clang/test/Driver/cuda-phases.cu

Index: clang/test/Driver/cuda-phases.cu
===
--- clang/test/Driver/cuda-phases.cu
+++ clang/test/Driver/cuda-phases.cu
@@ -217,3 +217,32 @@
 // DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
 // DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
 // DASM2-NOT: host
+
+//
+// Test the phases generated when using the new offloading driver.
+//
+// RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases -foffload-new-driver \
+// RUN: --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW_DRIVER %s
+// NEW_DRIVER: 0: input, "[[INPUT:.*]]", cuda, (host-cuda)
+// NEW_DRIVER: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
+// NEW_DRIVER: 2: compiler, {1}, ir, (host-cuda)
+// NEW_DRIVER: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
+// NEW_DRIVER: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
+// NEW_DRIVER: 5: compiler, {4}, ir, (device-cuda, sm_52)
+// NEW_DRIVER: 6: backend, {5}, assembler, (device-cuda, sm_52)
+// NEW_DRIVER: 7: assembler, {6}, object, (device-cuda, sm_52)
+// NEW_DRIVER: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
+// NEW_DRIVER: 9: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, assembler
+// NEW_DRIVER: 10: linker, {8, 9}, cuda-fatbin, (device-cuda, sm_52)
+// NEW_DRIVER: 11: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
+// NEW_DRIVER: 12: preprocessor, {11}, cuda-cpp-output, (device-cuda, sm_70)
+// NEW_DRIVER: 13: compiler, {12}, ir, (device-cuda, sm_70)
+// NEW_DRIVER: 14: backend, {13}, assembler, (device-cuda, sm_70)
+// NEW_DRIVER: 15: assembler, {14}, object, (device-cuda, sm_70)
+// NEW_DRIVER: 16: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {15}, object
+// NEW_DRIVER: 17: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {14}, assembler
+// NEW_DRIVER: 18: linker, {16, 17}, cuda-fatbin, (device-cuda, sm_70)
+// NEW_DRIVER: 19: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {10}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {18}, ir
+// NEW_DRIVER: 20: backend, {19}, assembler, (host-cuda)
+// NEW_DRIVER: 21: assembler, {20}, object, (host-cuda)
+// NEW_DRIVER: 22: clang-linker-wrapper, {21}, image, (host-cuda)
Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,18 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix BINDINGS %s
+
+// BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN: %clang -### -nocudalib -foffload-new-driver %s 2>&1 | FileCheck -check-prefix RDC %s
+// RDC: ptxas{{.*}}-c
Index: clang/lib/Driver/ToolChains/HIPAMD.cpp
===
--- clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -188,7 +188,8 @@
 CC1Args.push_back("-fcuda-approx-transcendentals");
 
   if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
-  false))
+  false) ||
+  DriverArgs.hasArg(options::OPT_foffload_new_driver))

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-07 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 421270.
jhuber6 added a comment.

Make `-foffload-new-driver` imply GPU-RDC mode, it won't work otherwise. Also 
adjust tests.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/Cuda.h
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/lib/Driver/ToolChains/HIPAMD.cpp
  clang/test/Driver/cuda-openmp-driver.cu

Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,18 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix BINDINGS %s
+
+// BINDINGS: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// BINDINGS-NEXT: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// BINDINGS-NEXT: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
+
+// RUN: %clang -### -nocudalib -foffload-new-driver %s 2>&1 | FileCheck -check-prefix RDC %s
+// RDC: ptxas{{.*}}-c
Index: clang/lib/Driver/ToolChains/HIPAMD.cpp
===
--- clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -188,7 +188,7 @@
 CC1Args.push_back("-fcuda-approx-transcendentals");
 
   if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
-  false))
+  false) || DriverArgs.hasArg(options::OPT_foffload_new_driver))
 CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});
 
   StringRef MaxThreadsPerBlock =
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -461,7 +461,8 @@
options::OPT_fnoopenmp_relocatable_target,
/*Default=*/true);
   else if (JA.isOffloading(Action::OFK_Cuda))
-Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
+Relocatable = Args.hasArg(options::OPT_foffload_new_driver) || 
+  Args.hasFlag(options::OPT_fgpu_rdc,
options::OPT_fno_gpu_rdc, /*Default=*/false);
 
   if (Relocatable)
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -6248,6 +6248,8 @@
   if (IsCuda || IsHIP) {
 if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
   CmdArgs.push_back("-fgpu-rdc");
+if (Args.hasArg(options::OPT_foffload_new_driver))
+  CmdArgs.push_back("-fgpu-rdc");
 if (Args.hasFlag(options::OPT_fgpu_defer_diag,
  options::OPT_fno_gpu_defer_diag, false))
   CmdArgs.push_back("-fgpu-defer-diag");
@@ -6930,6 +6932,8 @@
   CmdArgs.push_back(CudaDeviceInput->getFilename());
   if (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false))
 CmdArgs.push_back("-fgpu-rdc");
+  if (Args.hasArg(options::OPT_foffload_new_driver))
+CmdArgs.push_back("-fgpu-rdc");
   }
 
   if (IsCuda) {
@@ -8250,14 +8254,17 @@
   ArgStringList CmdArgs;
 
   // Pass the CUDA path to the linker wrapper tool.
-  for (auto  : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
-const ToolChain *TC = I.second;
-if (TC->getTriple().isNVPTX()) {
-  CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
-  if (CudaInstallation.isValid())
-CmdArgs.push_back(Args.MakeArgString(
-"--cuda-path=" + CudaInstallation.getInstallPath()));
-  break;
+  for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
+auto TCRange = C.getOffloadToolChains(Kind);
+for (auto  : llvm::make_range(TCRange.first, TCRange.second)) {
+

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-07 Thread Fangrui Song via Phabricator via cfe-commits

MaskRay added inline comments.



Comment at: clang/test/Driver/cuda-openmp-driver.cu:1
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target

clang-driver is unneeded. 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-07 Thread Fangrui Song via Phabricator via cfe-commits

MaskRay added inline comments.



Comment at: clang/test/Driver/cuda-openmp-driver.cu:10
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: 
"[[PTX_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: 
["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", 
"[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"

Better to add -NEXT whenever applicable


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-04-07 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 421232.
jhuber6 added a comment.

Update with the more generic clang argument handling.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/Cuda.h
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/cuda-openmp-driver.cu

Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,16 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix CHECK %s
+
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -8248,14 +8248,17 @@
   ArgStringList CmdArgs;
 
   // Pass the CUDA path to the linker wrapper tool.
-  for (auto  : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
-const ToolChain *TC = I.second;
-if (TC->getTriple().isNVPTX()) {
-  CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
-  if (CudaInstallation.isValid())
-CmdArgs.push_back(Args.MakeArgString(
-"--cuda-path=" + CudaInstallation.getInstallPath()));
-  break;
+  for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
+auto TCRange = C.getOffloadToolChains(Kind);
+for (auto  : llvm::make_range(TCRange.first, TCRange.second)) {
+  const ToolChain *TC = I.second;
+  if (TC->getTriple().isNVPTX()) {
+CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
+if (CudaInstallation.isValid())
+  CmdArgs.push_back(Args.MakeArgString(
+  "--cuda-path=" + CudaInstallation.getInstallPath()));
+break;
+  }
 }
   }
 
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4105,6 +4105,101 @@
   Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
 }
 
+/// Returns the canonical name for the offloading architecture when using HIP or
+/// CUDA.
+static StringRef getCanonicalArchString(Compilation ,
+llvm::opt::DerivedArgList ,
+StringRef ArchStr,
+Action::OffloadKind Kind) {
+  if (Kind == Action::OFK_Cuda) {
+CudaArch Arch = StringToCudaArch(ArchStr);
+if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) {
+  C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
+  return StringRef();
+}
+return Args.MakeArgStringRef(CudaArchToString(Arch));
+  } else if (Kind == Action::OFK_HIP) {
+llvm::StringMap Features;
+// getHIPOffloadTargetTriple() is known to return valid value as it has
+// been called successfully in the CreateOffloadingDeviceToolChains().
+auto Arch = parseTargetID(
+*getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()), ArchStr,
+);
+if (!Arch) {
+  C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr;
+  C.setContainsError();
+  return StringRef();
+}
+return Args.MakeArgStringRef(
+getCanonicalTargetID(Arch.getValue(), Features));
+  }
+  return StringRef();
+}
+
+/// Checks if the set offloading architectures does not conflict. Returns the
+/// incompatible pair if a conflict occurs.
+static llvm::Optional>
+getConflictOffloadArchCombination(const llvm::DenseSet ,
+  Action::OffloadKind Kind) {
+  if (Kind != Action::OFK_HIP)
+return

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-17 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 416292.
jhuber6 added a comment.

Addressing comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/Cuda.h
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/cuda-openmp-driver.cu

Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,16 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix CHECK %s
+
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4391,6 +4391,7 @@
   // one input.
   bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
   bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
+  bool IsCudaHost = JA.isHostOffloading(Action::OFK_Cuda);
   bool IsHIP = JA.isOffloading(Action::OFK_HIP);
   bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
   bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
@@ -4414,6 +4415,7 @@
 
   InputInfoList ModuleHeaderInputs;
   InputInfoList OpenMPHostInputs;
+  InputInfoList CudaHostInputs;
   const InputInfo *CudaDeviceInput = nullptr;
   const InputInfo *OpenMPDeviceInput = nullptr;
   for (const InputInfo  : Inputs) {
@@ -4428,6 +4430,9 @@
 << types::getTypeName(Expected);
   }
   ModuleHeaderInputs.push_back(I);
+} else if (IsCudaHost && Args.hasArg(options::OPT_fopenmp_new_driver)) {
+  CudaDeviceInput = 
+  CudaHostInputs.push_back(I);
 } else if ((IsCuda || IsHIP) && !CudaDeviceInput) {
   CudaDeviceInput = 
 } else if (IsOpenMPDevice && !OpenMPDeviceInput) {
@@ -6982,6 +6987,19 @@
   TC->getTripleString() + "." +
   TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName));
 }
+  } else if (IsCudaHost && !CudaHostInputs.empty()) {
+const ToolChain *TC = C.getSingleOffloadToolChain();
+for (const auto  : CudaHostInputs) {
+  assert(InputFile.isFilename() && "Offloading requires a filename");
+  StringRef File =
+  C.getArgs().MakeArgString(TC->getInputFilename(InputFile));
+  StringRef InputName = Clang::getBaseInputStem(Args, Inputs);
+  CmdArgs.push_back(Args.MakeArgString(
+  "-fembed-offload-object=" + File + "," +
+  Action::GetOffloadKindName(Action::OFK_Cuda) + "." +
+  TC->getTripleString() + "." +
+  InputFile.getAction()->getOffloadingArch() + "." + InputName));
+}
   }
 
   if (Triple.isAMDGPU()) {
@@ -8234,14 +8252,17 @@
   ArgStringList CmdArgs;
 
   // Pass the CUDA path to the linker wrapper tool.
-  for (auto  : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
-const ToolChain *TC = I.second;
-if (TC->getTriple().isNVPTX()) {
-  CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
-  if (CudaInstallation.isValid())
-CmdArgs.push_back(Args.MakeArgString(
-"--cuda-path=" + CudaInstallation.getInstallPath()));
-  break;
+  for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
+auto TCRange = C.getOffloadToolChains(Kind);
+for (auto  : llvm::make_range(TCRange.first, TCRange.second)) {
+  const ToolChain *TC = I.second;
+  if (TC->getTriple().isNVPTX()) {
+CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
+if (CudaInstallation.isValid())
+  CmdArgs.push_back(Args.MakeArgString(
+  "--cuda-path=" + CudaInstallation.getInstallPath()));
+break;
+  }

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-17 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/include/clang/Basic/Cuda.h:105-107
+constexpr CudaArch DefaultCudaArch = CudaArch::SM_35;
+constexpr CudaArch DefaultHIPArch = CudaArch::GFX803;
+

tra wrote:
> Nit: those could be just enum values.
> ```
>   ...
>   LAST,
>   DefaultCudaArch = SM_35,
>   DefaultHIPArch = GFX803,
> };
> ```
Will do.



Comment at: clang/lib/Driver/Driver.cpp:4219
+for (unsigned I = 0, E = TCAndArchs.size(); I != E; ++I)
   DeviceActions.push_back(C.MakeAction(*InputArg, InputType));
 

tra wrote:
> This loop can be folded into the loop above.
> 
> Alternatively, for simple loops like this one could use `llvm::for_each`.
It could, I think the intention is clearer (i.e. make an input for every 
toolchain and architecture) having them separate. But I can merge them if 
that's better.



Comment at: clang/lib/Driver/Driver.cpp:4244
   A = C.MakeAction(HDep, DDep);
+  ++TCAndArch;
+} else if (isa(A) && Kind == Action::OFK_Cuda) {

tra wrote:
> Could you elaborate why TCAndArch is incremented only here? Don't other cases 
> need to advance it, too? At the very least we need some comments here and for 
> the loop in general, describing what's going on.
It shouldn't be, I forgot to move this out of the conditional once I added more 
things, I'll explain the usage as well.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6998
+  // The CUDA toolchain should have a bound arch appended to the filename.
+  StringRef Arch = File.rsplit(".").first.rsplit('-').second;
+  CmdArgs.push_back(Args.MakeArgString(

tra wrote:
> Extracting arch name from the file name looks... questionable. Where does 
> that file name come from? Can't we get this information directly?
Yeah, it's not ideal but it was the easiest way to do it. The alternative is to 
find a way to traverse the job action list and find the nodes with a bound 
architecture set and hope they're in the same order. I can try to do that.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-17 Thread Artem Belevich via Phabricator via cfe-commits

tra added inline comments.



Comment at: clang/include/clang/Basic/Cuda.h:105-107
+constexpr CudaArch DefaultCudaArch = CudaArch::SM_35;
+constexpr CudaArch DefaultHIPArch = CudaArch::GFX803;
+

Nit: those could be just enum values.
```
  ...
  LAST,
  DefaultCudaArch = SM_35,
  DefaultHIPArch = GFX803,
};
```



Comment at: clang/lib/Driver/Driver.cpp:4219
+for (unsigned I = 0, E = TCAndArchs.size(); I != E; ++I)
   DeviceActions.push_back(C.MakeAction(*InputArg, InputType));
 

This loop can be folded into the loop above.

Alternatively, for simple loops like this one could use `llvm::for_each`.



Comment at: clang/lib/Driver/Driver.cpp:4244
   A = C.MakeAction(HDep, DDep);
+  ++TCAndArch;
+} else if (isa(A) && Kind == Action::OFK_Cuda) {

Could you elaborate why TCAndArch is incremented only here? Don't other cases 
need to advance it, too? At the very least we need some comments here and for 
the loop in general, describing what's going on.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:6998
+  // The CUDA toolchain should have a bound arch appended to the filename.
+  StringRef Arch = File.rsplit(".").first.rsplit('-').second;
+  CmdArgs.push_back(Args.MakeArgString(

Extracting arch name from the file name looks... questionable. Where does that 
file name come from? Can't we get this information directly?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-15 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 415622.
jhuber6 added a comment.

Fix wrong condition for picking up input.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/Cuda.h
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/cuda-openmp-driver.cu

Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,16 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix CHECK %s
+
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4391,6 +4391,7 @@
   // one input.
   bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
   bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
+  bool IsCudaHost = JA.isHostOffloading(Action::OFK_Cuda);
   bool IsHIP = JA.isOffloading(Action::OFK_HIP);
   bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
   bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
@@ -4414,6 +4415,7 @@
 
   InputInfoList ModuleHeaderInputs;
   InputInfoList OpenMPHostInputs;
+  InputInfoList CudaHostInputs;
   const InputInfo *CudaDeviceInput = nullptr;
   const InputInfo *OpenMPDeviceInput = nullptr;
   for (const InputInfo  : Inputs) {
@@ -4428,6 +4430,9 @@
 << types::getTypeName(Expected);
   }
   ModuleHeaderInputs.push_back(I);
+} else if (IsCudaHost && Args.hasArg(options::OPT_fopenmp_new_driver)) {
+  CudaDeviceInput = 
+  CudaHostInputs.push_back(I);
 } else if ((IsCuda || IsHIP) && !CudaDeviceInput) {
   CudaDeviceInput = 
 } else if (IsOpenMPDevice && !OpenMPDeviceInput) {
@@ -6982,6 +6987,20 @@
   TC->getTripleString() + "." +
   TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName));
 }
+  } else if (IsCudaHost && !CudaHostInputs.empty()) {
+const ToolChain *TC = C.getSingleOffloadToolChain();
+for (const auto  : CudaHostInputs) {
+  assert(InputFile.isFilename() && "Offloading requires a filename");
+  StringRef File =
+  C.getArgs().MakeArgString(TC->getInputFilename(InputFile));
+  StringRef InputName = Clang::getBaseInputStem(Args, Inputs);
+  // The CUDA toolchain should have a bound arch appended to the filename.
+  StringRef Arch = File.rsplit(".").first.rsplit('-').second;
+  CmdArgs.push_back(Args.MakeArgString(
+  "-fembed-offload-object=" + File + "," +
+  Action::GetOffloadKindName(Action::OFK_Cuda) + "." +
+  TC->getTripleString() + "." + Arch + "." + InputName));
+}
   }
 
   if (Triple.isAMDGPU()) {
@@ -8234,14 +8253,17 @@
   ArgStringList CmdArgs;
 
   // Pass the CUDA path to the linker wrapper tool.
-  for (auto  : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
-const ToolChain *TC = I.second;
-if (TC->getTriple().isNVPTX()) {
-  CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
-  if (CudaInstallation.isValid())
-CmdArgs.push_back(Args.MakeArgString(
-"--cuda-path=" + CudaInstallation.getInstallPath()));
-  break;
+  for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
+auto TCRange = C.getOffloadToolChains(Kind);
+for (auto  : llvm::make_range(TCRange.first, TCRange.second)) {
+  const ToolChain *TC = I.second;
+  if (TC->getTriple().isNVPTX()) {
+CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
+if (CudaInstallation.isValid())
+

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-15 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 415460.
jhuber6 added a comment.

Accidentally clang formatted an entire file.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/Cuda.h
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/cuda-openmp-driver.cu

Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,16 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix CHECK %s
+
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4391,6 +4391,7 @@
   // one input.
   bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
   bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
+  bool IsCudaHost = JA.isHostOffloading(Action::OFK_Cuda);
   bool IsHIP = JA.isOffloading(Action::OFK_HIP);
   bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
   bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
@@ -4414,6 +4415,7 @@
 
   InputInfoList ModuleHeaderInputs;
   InputInfoList OpenMPHostInputs;
+  InputInfoList CudaHostInputs;
   const InputInfo *CudaDeviceInput = nullptr;
   const InputInfo *OpenMPDeviceInput = nullptr;
   for (const InputInfo  : Inputs) {
@@ -4428,6 +4430,9 @@
 << types::getTypeName(Expected);
   }
   ModuleHeaderInputs.push_back(I);
+} else if (IsOpenMPDevice && !OpenMPDeviceInput) {
+  CudaDeviceInput = 
+  CudaHostInputs.push_back(I);
 } else if ((IsCuda || IsHIP) && !CudaDeviceInput) {
   CudaDeviceInput = 
 } else if (IsOpenMPDevice && !OpenMPDeviceInput) {
@@ -6982,6 +6987,20 @@
   TC->getTripleString() + "." +
   TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName));
 }
+  } else if (IsCudaHost && !CudaHostInputs.empty()) {
+const ToolChain *TC = C.getSingleOffloadToolChain();
+for (const auto  : CudaHostInputs) {
+  assert(InputFile.isFilename() && "Offloading requires a filename");
+  StringRef File =
+  C.getArgs().MakeArgString(TC->getInputFilename(InputFile));
+  StringRef InputName = Clang::getBaseInputStem(Args, Inputs);
+  // The CUDA toolchain should have a bound arch appended to the filename.
+  StringRef Arch = File.rsplit(".").first.rsplit('-').second;
+  CmdArgs.push_back(Args.MakeArgString(
+  "-fembed-offload-object=" + File + "," +
+  Action::GetOffloadKindName(Action::OFK_Cuda) + "." +
+  TC->getTripleString() + "." + Arch + "." + InputName));
+}
   }
 
   if (Triple.isAMDGPU()) {
@@ -8234,14 +8253,17 @@
   ArgStringList CmdArgs;
 
   // Pass the CUDA path to the linker wrapper tool.
-  for (auto  : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
-const ToolChain *TC = I.second;
-if (TC->getTriple().isNVPTX()) {
-  CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
-  if (CudaInstallation.isValid())
-CmdArgs.push_back(Args.MakeArgString(
-"--cuda-path=" + CudaInstallation.getInstallPath()));
-  break;
+  for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
+auto TCRange = C.getOffloadToolChains(Kind);
+for (auto  : llvm::make_range(TCRange.first, TCRange.second)) {
+  const ToolChain *TC = I.second;
+  if (TC->getTriple().isNVPTX()) {
+CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
+if (CudaInstallation.isValid())
+

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-15 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 415439.
jhuber6 added a comment.

We shouldn't need to restrict this to RDC only if implemented properly.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/Cuda.h
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/cuda-openmp-driver.cu

Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,16 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix CHECK %s
+
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -71,8 +71,8 @@
   if (Args.hasArg(options::OPT_static))
 if (const Arg *A =
 Args.getLastArg(options::OPT_dynamic, options::OPT_mdynamic_no_pic))
-  D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
-  << "-static";
+  D.Diag(diag::err_drv_argument_not_allowed_with)
+  << A->getAsString(Args) << "-static";
 }
 
 // Add backslashes to escape spaces and other backslashes.
@@ -157,8 +157,8 @@
 /// parameter in reciprocal argument strings. Return false if there is an error
 /// parsing the refinement step. Otherwise, return true and set the Position
 /// of the refinement step in the input string.
-static bool getRefinementStep(StringRef In, const Driver ,
-  const Arg , size_t ) {
+static bool getRefinementStep(StringRef In, const Driver , const Arg ,
+  size_t ) {
   const char RefinementStepToken = ':';
   Position = In.find(RefinementStepToken);
   if (Position != StringRef::npos) {
@@ -510,7 +510,7 @@
 }
 
 static bool mustUseNonLeafFramePointerForTarget(const llvm::Triple ) {
-  switch (Triple.getArch()){
+  switch (Triple.getArch()) {
   default:
 return false;
   case llvm::Triple::arm:
@@ -705,7 +705,7 @@
 
 /// Add a CC1 and CC1AS option to specify the coverage file path prefix map.
 static void addCoveragePrefixMapArg(const Driver , const ArgList ,
-   ArgStringList ) {
+ArgStringList ) {
   for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
 options::OPT_fcoverage_prefix_map_EQ)) {
 StringRef Map = A->getValue();
@@ -801,13 +801,12 @@
   CSPGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate))
 CSPGOGenerateArg = nullptr;
 
-  auto *ProfileGenerateArg = Args.getLastArg(
-  options::OPT_fprofile_instr_generate,
-  options::OPT_fprofile_instr_generate_EQ,
-  options::OPT_fno_profile_instr_generate);
-  if (ProfileGenerateArg &&
-  ProfileGenerateArg->getOption().matches(
-  options::OPT_fno_profile_instr_generate))
+  auto *ProfileGenerateArg =
+  Args.getLastArg(options::OPT_fprofile_instr_generate,
+  options::OPT_fprofile_instr_generate_EQ,
+  options::OPT_fno_profile_instr_generate);
+  if (ProfileGenerateArg && ProfileGenerateArg->getOption().matches(
+options::OPT_fno_profile_instr_generate))
 ProfileGenerateArg = nullptr;
 
   if (PGOGenerateArg && ProfileGenerateArg)
@@ -1334,8 +1333,8 @@
   }
 
   if (ThroughHeader.empty()) {
-CmdArgs.push_back(Args.MakeArgString(
-Twine("-pch-through-hdrstop-") + (YcArg ? "create" : "use")));
+

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-10 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 414371.
jhuber6 added a comment.
Herald added subscribers: abrachet, phosek.

Fix architecture parsing and still include the GPU binary so cuobjcopy can use 
them.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/Cuda.h
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/cuda-openmp-driver.cu

Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,16 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix CHECK %s
+
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -71,8 +71,8 @@
   if (Args.hasArg(options::OPT_static))
 if (const Arg *A =
 Args.getLastArg(options::OPT_dynamic, options::OPT_mdynamic_no_pic))
-  D.Diag(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args)
-  << "-static";
+  D.Diag(diag::err_drv_argument_not_allowed_with)
+  << A->getAsString(Args) << "-static";
 }
 
 // Add backslashes to escape spaces and other backslashes.
@@ -157,8 +157,8 @@
 /// parameter in reciprocal argument strings. Return false if there is an error
 /// parsing the refinement step. Otherwise, return true and set the Position
 /// of the refinement step in the input string.
-static bool getRefinementStep(StringRef In, const Driver ,
-  const Arg , size_t ) {
+static bool getRefinementStep(StringRef In, const Driver , const Arg ,
+  size_t ) {
   const char RefinementStepToken = ':';
   Position = In.find(RefinementStepToken);
   if (Position != StringRef::npos) {
@@ -510,7 +510,7 @@
 }
 
 static bool mustUseNonLeafFramePointerForTarget(const llvm::Triple ) {
-  switch (Triple.getArch()){
+  switch (Triple.getArch()) {
   default:
 return false;
   case llvm::Triple::arm:
@@ -705,7 +705,7 @@
 
 /// Add a CC1 and CC1AS option to specify the coverage file path prefix map.
 static void addCoveragePrefixMapArg(const Driver , const ArgList ,
-   ArgStringList ) {
+ArgStringList ) {
   for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
 options::OPT_fcoverage_prefix_map_EQ)) {
 StringRef Map = A->getValue();
@@ -801,13 +801,12 @@
   CSPGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate))
 CSPGOGenerateArg = nullptr;
 
-  auto *ProfileGenerateArg = Args.getLastArg(
-  options::OPT_fprofile_instr_generate,
-  options::OPT_fprofile_instr_generate_EQ,
-  options::OPT_fno_profile_instr_generate);
-  if (ProfileGenerateArg &&
-  ProfileGenerateArg->getOption().matches(
-  options::OPT_fno_profile_instr_generate))
+  auto *ProfileGenerateArg =
+  Args.getLastArg(options::OPT_fprofile_instr_generate,
+  options::OPT_fprofile_instr_generate_EQ,
+  options::OPT_fno_profile_instr_generate);
+  if (ProfileGenerateArg && ProfileGenerateArg->getOption().matches(
+options::OPT_fno_profile_instr_generate))
 ProfileGenerateArg = nullptr;
 
   if (PGOGenerateArg && ProfileGenerateArg)
@@ -1334,8 +1333,8 @@
   }
 
   if (ThroughHeader.empty()) {
-CmdArgs.push_back(Args.MakeArgString(
-

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-04 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/lib/Driver/Driver.cpp:4132-4134
+  Archs.insert(CudaArchToString(CudaArch::SM_35));
+else if (Kind == Action::OFK_HIP)
+  Archs.insert(CudaArchToString(CudaArch::GFX803));

tra wrote:
> JonChesterfield wrote:
> > tra wrote:
> > > If we do not have constants for the default CUDA/HIP arch yet, we should 
> > > probably add them and use them here.
> > Defaulting hip to gfx803 is unlikely to be helpful. It won't run on other 
> > architectures, i.e. there's no conservative default that will run on most 
> > things. I guess that's an existing quirk of the hip toolchain?
> I agree that there's no "safe" choice of GPU target. It applies to CUDA, as 
> well, at least for GPU binaries.
> That said, we still want `clang -c foo.cu` to work.
> 
> For CUDA we use the oldest variant still supported by the vendor. It produces 
> PTX assembly which we embed along with the GPU binary. 
> That PTX is valid for newer GPU archtectures and CUDA runtime will be able to 
> compile it for the GPU the app happens to run on. It's not ideal, but it 
> works.
> 
> While for AMDGPU we do not have such forward compatibility mode as we have 
> with CUDA, being able to compile for *something* by default is still useful, 
> IMO.
> 
> 
I just copied GFX803 because that's what the previous driver used. I agree we 
should just default to something, maybe in the AMD case we can issue a warning 
telling them to use the flag to properly specify it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-04 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 413087.
jhuber6 added a comment.
Herald added a subscriber: dexonsmith.

Addressing nits.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/Cuda.h
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/cuda-openmp-driver.cu

Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,16 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix CHECK %s
+
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4391,6 +4391,7 @@
   // one input.
   bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
   bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
+  bool IsCudaHost = JA.isHostOffloading(Action::OFK_Cuda);
   bool IsHIP = JA.isOffloading(Action::OFK_HIP);
   bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
   bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
@@ -4414,6 +4415,7 @@
 
   InputInfoList ModuleHeaderInputs;
   InputInfoList OpenMPHostInputs;
+  InputInfoList CudaHostInputs;
   const InputInfo *CudaDeviceInput = nullptr;
   const InputInfo *OpenMPDeviceInput = nullptr;
   for (const InputInfo  : Inputs) {
@@ -4428,6 +4430,8 @@
 << types::getTypeName(Expected);
   }
   ModuleHeaderInputs.push_back(I);
+} else if (IsCudaHost && Args.hasArg(options::OPT_fopenmp_new_driver)) {
+  CudaHostInputs.push_back(I);
 } else if ((IsCuda || IsHIP) && !CudaDeviceInput) {
   CudaDeviceInput = 
 } else if (IsOpenMPDevice && !OpenMPDeviceInput) {
@@ -6957,6 +6961,7 @@
 auto OpenMPTCs = C.getOffloadToolChains();
 for (auto TI = OpenMPTCs.first, TE = OpenMPTCs.second; TI != TE;
  ++TI, ++InputFile) {
+  assert(InputFile->isFilename() && "Offloading requires a filename");
   const ToolChain *TC = TI->second;
   const ArgList  = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
   StringRef File =
@@ -6969,6 +6974,20 @@
   TC->getTripleString() + "." +
   TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName));
 }
+  } else if (IsCudaHost && !CudaHostInputs.empty()) {
+const ToolChain *TC = C.getSingleOffloadToolChain();
+for (const auto  : CudaHostInputs) {
+  assert(InputFile.isFilename() && "Offloading requires a filename");
+  StringRef File =
+  C.getArgs().MakeArgString(TC->getInputFilename(InputFile));
+  StringRef InputName = Clang::getBaseInputStem(Args, Inputs);
+  // The CUDA toolchain should have a bound arch appended to the filename.
+  StringRef Arch = File.split(".").first.rsplit('-').second;
+  CmdArgs.push_back(Args.MakeArgString(
+  "-fembed-offload-object=" + File + "," +
+  Action::GetOffloadKindName(Action::OFK_Cuda) + "." +
+  TC->getTripleString() + "." + Arch + "." + InputName));
+}
   }
 
   if (Triple.isAMDGPU()) {
@@ -8220,14 +8239,17 @@
   ArgStringList CmdArgs;
 
   // Pass the CUDA path to the linker wrapper tool.
-  for (auto  : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
-const ToolChain *TC = I.second;
-if (TC->getTriple().isNVPTX()) {
-  CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
-  if (CudaInstallation.isValid())
-CmdArgs.push_back(Args.MakeArgString(
-"--cuda-path=" +

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-04 Thread Artem Belevich via Phabricator via cfe-commits

tra added inline comments.

Comment at: clang/lib/Driver/Driver.cpp:4132-4134
+  Archs.insert(CudaArchToString(CudaArch::SM_35));
+else if (Kind == Action::OFK_HIP)
+  Archs.insert(CudaArchToString(CudaArch::GFX803));

JonChesterfield wrote:
> tra wrote:
> > If we do not have constants for the default CUDA/HIP arch yet, we should 
> > probably add them and use them here.
> Defaulting hip to gfx803 is unlikely to be helpful. It won't run on other 
> architectures, i.e. there's no conservative default that will run on most 
> things. I guess that's an existing quirk of the hip toolchain?
I agree that there's no "safe" choice of GPU target. It applies to CUDA, as 
well, at least for GPU binaries.
That said, we still want `clang -c foo.cu` to work.

For CUDA we use the oldest variant still supported by the vendor. It produces 
PTX assembly which we embed along with the GPU binary. 
That PTX is valid for newer GPU archtectures and CUDA runtime will be able to 
compile it for the GPU the app happens to run on. It's not ideal, but it works.

While for AMDGPU we do not have such forward compatibility mode as we have with 
CUDA, being able to compile for *something* by default is still useful, IMO.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-04 Thread Jon Chesterfield via Phabricator via cfe-commits

JonChesterfield added inline comments.



Comment at: clang/lib/Driver/Driver.cpp:4132-4134
+  Archs.insert(CudaArchToString(CudaArch::SM_35));
+else if (Kind == Action::OFK_HIP)
+  Archs.insert(CudaArchToString(CudaArch::GFX803));

tra wrote:
> If we do not have constants for the default CUDA/HIP arch yet, we should 
> probably add them and use them here.
Defaulting hip to gfx803 is unlikely to be helpful. It won't run on other 
architectures, i.e. there's no conservative default that will run on most 
things. I guess that's an existing quirk of the hip toolchain?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-04 Thread Artem Belevich via Phabricator via cfe-commits

tra added inline comments.



Comment at: clang/lib/Driver/Driver.cpp:4107
+ options::OPT_no_offload_arch_EQ)) {
+C.getDriver().Diag(diag::err_opt_not_valid_with_opt) << "--offload-arch"
+ << "--offload";

Nit: It would be nice to report specific option which triggered the diag. 
Reporting `--offload-arch` when it's triggered by `--no-offload-arch` would be 
somewhat confusing.

`Args.hasArgNoClaim(options::OPT_offload_arch_EQ) ? "--offload-arch" : 
--no-offload-arch` ?



Comment at: clang/lib/Driver/Driver.cpp:4132-4134
+  Archs.insert(CudaArchToString(CudaArch::SM_35));
+else if (Kind == Action::OFK_HIP)
+  Archs.insert(CudaArchToString(CudaArch::GFX803));

If we do not have constants for the default CUDA/HIP arch yet, we should 
probably add them and use them here.



Comment at: clang/lib/Driver/Driver.cpp:4171
 
-for (unsigned I = 0; I < ToolChains.size(); ++I)
+if (!Relocatable)
+  Diags.Report(diag::err_drv_non_relocatable);

If we do not allow non-relocatable compilation with the new driver, perhaps we 
should make `-fgpu-rdc` enabled by default with the new driver and error out if 
someone attempts to use `-fno-gpu-rdc`. Otherwise we're virtually guaranteed 
that everyone attempting to use `-foffload-new-driver` will hit this error.




Comment at: clang/lib/Driver/Driver.cpp:4172
+if (!Relocatable)
+  Diags.Report(diag::err_drv_non_relocatable);
+

Do we want to return early here?



Comment at: clang/lib/Driver/Driver.cpp:4221
+auto TCAndArch = TCAndArchs.begin();
 for (Action *A : DeviceActions) {
+  DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);

Nit: We do have `llvm::zip` for iterating over multiple containers. However, 
unpacking loop variable results maybe more trouble than it's worth it in such a 
small loop, Up to you.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-04 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 412996.
jhuber6 added a comment.

Fix


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/cuda-openmp-driver.cu

Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,16 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix CHECK %s
+
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4391,6 +4391,7 @@
   // one input.
   bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
   bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
+  bool IsCudaHost = JA.isHostOffloading(Action::OFK_Cuda);
   bool IsHIP = JA.isOffloading(Action::OFK_HIP);
   bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
   bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
@@ -4414,6 +4415,7 @@
 
   InputInfoList ModuleHeaderInputs;
   InputInfoList OpenMPHostInputs;
+  InputInfoList CudaHostInputs;
   const InputInfo *CudaDeviceInput = nullptr;
   const InputInfo *OpenMPDeviceInput = nullptr;
   for (const InputInfo  : Inputs) {
@@ -4428,6 +4430,8 @@
 << types::getTypeName(Expected);
   }
   ModuleHeaderInputs.push_back(I);
+} else if (IsCudaHost && Args.hasArg(options::OPT_fopenmp_new_driver)) {
+  CudaHostInputs.push_back(I);
 } else if ((IsCuda || IsHIP) && !CudaDeviceInput) {
   CudaDeviceInput = 
 } else if (IsOpenMPDevice && !OpenMPDeviceInput) {
@@ -6957,6 +6961,7 @@
 auto OpenMPTCs = C.getOffloadToolChains();
 for (auto TI = OpenMPTCs.first, TE = OpenMPTCs.second; TI != TE;
  ++TI, ++InputFile) {
+  assert(InputFile->isFilename() && "Offloading requires a filename");
   const ToolChain *TC = TI->second;
   const ArgList  = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
   StringRef File =
@@ -6969,6 +6974,21 @@
   TC->getTripleString() + "." +
   TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName));
 }
+  } else if (IsCudaHost && !CudaHostInputs.empty()) {
+const ToolChain *TC = C.getSingleOffloadToolChain();
+for (const auto  : CudaHostInputs) {
+  assert(InputFile.isFilename() && "Offloading requires a filename");
+  StringRef File =
+  C.getArgs().MakeArgString(TC->getInputFilename(InputFile));
+  StringRef InputName = Clang::getBaseInputStem(Args, Inputs);
+  // The CUDA toolchain should have a bound arch appended to the filename.
+  StringRef Arch = File.split(".").first.rsplit('-').second;
+  CmdArgs.push_back(Args.MakeArgString(
+  "-fembed-offload-object=" + File + "," +
+  Action::GetOffloadKindName(Action::OFK_Cuda) + "." +
+  TC->getTripleString() + "." +
+  Arch + "." + InputName));
+}
   }
 
   if (Triple.isAMDGPU()) {
@@ -8220,14 +8240,17 @@
   ArgStringList CmdArgs;
 
   // Pass the CUDA path to the linker wrapper tool.
-  for (auto  : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
-const ToolChain *TC = I.second;
-if (TC->getTriple().isNVPTX()) {
-  CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
-  if (CudaInstallation.isValid())
-CmdArgs.push_back(Args.MakeArgString(
-"--cuda-path=" + CudaInstallation.getInstallPath()));
-  break;
+  for (Action::OffloadKind Kind : {Action::OFK_Cuda,

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-03 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 412908.
jhuber6 added a comment.

Correctly handle offloading architecture options.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/cuda-openmp-driver.cu

Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,16 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix CHECK %s
+
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4391,6 +4391,7 @@
   // one input.
   bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
   bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
+  bool IsCudaHost = JA.isHostOffloading(Action::OFK_Cuda);
   bool IsHIP = JA.isOffloading(Action::OFK_HIP);
   bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
   bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
@@ -4414,6 +4415,7 @@
 
   InputInfoList ModuleHeaderInputs;
   InputInfoList OpenMPHostInputs;
+  InputInfoList CudaHostInputs;
   const InputInfo *CudaDeviceInput = nullptr;
   const InputInfo *OpenMPDeviceInput = nullptr;
   for (const InputInfo  : Inputs) {
@@ -4428,6 +4430,8 @@
 << types::getTypeName(Expected);
   }
   ModuleHeaderInputs.push_back(I);
+} else if (IsCudaHost && Args.hasArg(options::OPT_fopenmp_new_driver)) {
+  CudaHostInputs.push_back(I);
 } else if ((IsCuda || IsHIP) && !CudaDeviceInput) {
   CudaDeviceInput = 
 } else if (IsOpenMPDevice && !OpenMPDeviceInput) {
@@ -6957,6 +6961,7 @@
 auto OpenMPTCs = C.getOffloadToolChains();
 for (auto TI = OpenMPTCs.first, TE = OpenMPTCs.second; TI != TE;
  ++TI, ++InputFile) {
+  assert(InputFile->isFilename() && "Offloading requires a filename");
   const ToolChain *TC = TI->second;
   const ArgList  = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
   StringRef File =
@@ -6969,6 +6974,21 @@
   TC->getTripleString() + "." +
   TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName));
 }
+  } else if (IsCudaHost && !CudaHostInputs.empty()) {
+const ToolChain *TC = C.getSingleOffloadToolChain();
+for (const auto  : CudaHostInputs) {
+  assert(InputFile.isFilename() && "Offloading requires a filename");
+  StringRef File =
+  C.getArgs().MakeArgString(TC->getInputFilename(InputFile));
+  StringRef InputName = Clang::getBaseInputStem(Args, Inputs);
+  // The CUDA toolchain should have a bound arch appended to the filename.
+  StringRef Arch = File.split(".").first.rsplit('-').second;
+  CmdArgs.push_back(Args.MakeArgString(
+  "-fembed-offload-object=" + File + "," +
+  Action::GetOffloadKindName(Action::OFK_Cuda) + "." +
+  TC->getTripleString() + "." +
+  Arch + "." + InputName));
+}
   }
 
   if (Triple.isAMDGPU()) {
@@ -8217,17 +8237,21 @@
   const Driver  = getToolChain().getDriver();
   const llvm::Triple TheTriple = getToolChain().getTriple();
   auto OpenMPTCRange = C.getOffloadToolChains();
+  auto CudaTCRange = C.getOffloadToolChains();
   ArgStringList CmdArgs;
 
   // Pass the CUDA path to the linker wrapper tool.
-  for (auto  : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
-const ToolChain *TC = I.second;
-if (TC->getTriple().isNVPTX()) {
-  CudaInstallationDetector

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-03 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 added inline comments.



Comment at: clang/lib/Driver/Driver.cpp:4099-4102
+  for (auto  : Args.getAllArgValues(options::OPT_offload_arch_EQ))
+Archs.insert(getCanonicalArchString(C, Args, Arg, Kind));
+  for (auto  : Args.getAllArgValues(options::OPT_no_offload_arch_EQ))
+Archs.erase(getCanonicalArchString(C, Args, Arg, Kind));

yaxunl wrote:
> The final set depends on the order of -offload-arch and -no-offload-arch 
> options, e.g. `--offload-arch=gfx906 --no-offload-arch=gfx906` and 
> `--no-offload-arch=gfx906 --offload-arch=gfx906` is different. Also there is 
> `--no-offload-arch=all` which removes all precedent --offload-arch= options.
I see, so we need to iterate the arguments in-order and insert or erase them as 
they are entered, I'll fix it.



Comment at: clang/lib/Driver/Driver.cpp:4107
+  Archs.insert(CudaArchToString(CudaArch::SM_35));
+else if (Kind == Action::OFK_Cuda)
+  Archs.insert(CudaArchToString(CudaArch::GFX803));

yaxunl wrote:
> should this be HIP?
Whoops, haven't tested it with HIP yet.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-03 Thread Yaxun Liu via Phabricator via cfe-commits

yaxunl added inline comments.



Comment at: clang/lib/Driver/Driver.cpp:4099-4102
+  for (auto  : Args.getAllArgValues(options::OPT_offload_arch_EQ))
+Archs.insert(getCanonicalArchString(C, Args, Arg, Kind));
+  for (auto  : Args.getAllArgValues(options::OPT_no_offload_arch_EQ))
+Archs.erase(getCanonicalArchString(C, Args, Arg, Kind));

The final set depends on the order of -offload-arch and -no-offload-arch 
options, e.g. `--offload-arch=gfx906 --no-offload-arch=gfx906` and 
`--no-offload-arch=gfx906 --offload-arch=gfx906` is different. Also there is 
`--no-offload-arch=all` which removes all precedent --offload-arch= options.



Comment at: clang/lib/Driver/Driver.cpp:4107
+  Archs.insert(CudaArchToString(CudaArch::SM_35));
+else if (Kind == Action::OFK_Cuda)
+  Archs.insert(CudaArchToString(CudaArch::GFX803));

should this be HIP?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-03 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 412821.
jhuber6 added a comment.

Adding test


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/cuda-openmp-driver.cu

Index: clang/test/Driver/cuda-openmp-driver.cu
===
--- /dev/null
+++ clang/test/Driver/cuda-openmp-driver.cu
@@ -0,0 +1,16 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang -### -target x86_64-linux-gnu -nocudalib -ccc-print-bindings -fgpu-rdc \
+// RUN:-foffload-new-driver --offload-arch=sm_35 --offload-arch=sm_70 %s 2>&1 \
+// RUN: | FileCheck -check-prefix CHECK %s
+
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_35]]"], output: "[[CUBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_35]]", "[[PTX_SM_35]]"], output: "[[FATBIN_SM_35:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_SM_70:.+]]"], output: "[[CUBIN_SM_70:.+]]"
+// CHECK: "nvptx64-nvidia-cuda" - "NVPTX::Linker", inputs: ["[[CUBIN_SM_70]]", "[[PTX_SM_70:.+]]"], output: "[[FATBIN_SM_70:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[INPUT]]", "[[FATBIN_SM_35]]", "[[FATBIN_SM_70]]"], output: "[[HOST_OBJ:.+]]"
+// CHECK: "x86_64-unknown-linux-gnu" - "Offload::Linker", inputs: ["[[HOST_OBJ]]"], output: "a.out"
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4391,6 +4391,7 @@
   // one input.
   bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
   bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
+  bool IsCudaHost = JA.isHostOffloading(Action::OFK_Cuda);
   bool IsHIP = JA.isOffloading(Action::OFK_HIP);
   bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
   bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
@@ -4414,6 +4415,7 @@
 
   InputInfoList ModuleHeaderInputs;
   InputInfoList OpenMPHostInputs;
+  InputInfoList CudaHostInputs;
   const InputInfo *CudaDeviceInput = nullptr;
   const InputInfo *OpenMPDeviceInput = nullptr;
   for (const InputInfo  : Inputs) {
@@ -4428,6 +4430,8 @@
 << types::getTypeName(Expected);
   }
   ModuleHeaderInputs.push_back(I);
+} else if (IsCudaHost && Args.hasArg(options::OPT_fopenmp_new_driver)) {
+  CudaHostInputs.push_back(I);
 } else if ((IsCuda || IsHIP) && !CudaDeviceInput) {
   CudaDeviceInput = 
 } else if (IsOpenMPDevice && !OpenMPDeviceInput) {
@@ -6957,6 +6961,7 @@
 auto OpenMPTCs = C.getOffloadToolChains();
 for (auto TI = OpenMPTCs.first, TE = OpenMPTCs.second; TI != TE;
  ++TI, ++InputFile) {
+  assert(InputFile->isFilename() && "Offloading requires a filename");
   const ToolChain *TC = TI->second;
   const ArgList  = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
   StringRef File =
@@ -6969,6 +6974,21 @@
   TC->getTripleString() + "." +
   TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName));
 }
+  } else if (IsCudaHost && !CudaHostInputs.empty()) {
+const ToolChain *TC = C.getSingleOffloadToolChain();
+for (const auto  : CudaHostInputs) {
+  assert(InputFile.isFilename() && "Offloading requires a filename");
+  StringRef File =
+  C.getArgs().MakeArgString(TC->getInputFilename(InputFile));
+  StringRef InputName = Clang::getBaseInputStem(Args, Inputs);
+  // The CUDA toolchain should have a bound arch appended to the filename.
+  StringRef Arch = File.split(".").first.rsplit('-').second;
+  CmdArgs.push_back(Args.MakeArgString(
+  "-fembed-offload-object=" + File + "," +
+  Action::GetOffloadKindName(Action::OFK_Cuda) + "." +
+  TC->getTripleString() + "." +
+  Arch + "." + InputName));
+}
   }
 
   if (Triple.isAMDGPU()) {
@@ -8217,17 +8237,21 @@
   const Driver  = getToolChain().getDriver();
   const llvm::Triple TheTriple = getToolChain().getTriple();
   auto OpenMPTCRange = C.getOffloadToolChains();
+  auto CudaTCRange = C.getOffloadToolChains();
   ArgStringList CmdArgs;
 
   // Pass the CUDA path to the linker wrapper tool.
-  for (auto  : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
-const ToolChain *TC = I.second;
-if (TC->getTriple().isNVPTX()) {
-  CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
-  if

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-03 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 412816.
jhuber6 added a comment.

Adding comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp

Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4391,6 +4391,7 @@
   // one input.
   bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
   bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
+  bool IsCudaHost = JA.isHostOffloading(Action::OFK_Cuda);
   bool IsHIP = JA.isOffloading(Action::OFK_HIP);
   bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
   bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
@@ -4414,6 +4415,7 @@
 
   InputInfoList ModuleHeaderInputs;
   InputInfoList OpenMPHostInputs;
+  InputInfoList CudaHostInputs;
   const InputInfo *CudaDeviceInput = nullptr;
   const InputInfo *OpenMPDeviceInput = nullptr;
   for (const InputInfo  : Inputs) {
@@ -4428,6 +4430,8 @@
 << types::getTypeName(Expected);
   }
   ModuleHeaderInputs.push_back(I);
+} else if (IsCudaHost && Args.hasArg(options::OPT_fopenmp_new_driver)) {
+  CudaHostInputs.push_back(I);
 } else if ((IsCuda || IsHIP) && !CudaDeviceInput) {
   CudaDeviceInput = 
 } else if (IsOpenMPDevice && !OpenMPDeviceInput) {
@@ -6957,6 +6961,7 @@
 auto OpenMPTCs = C.getOffloadToolChains();
 for (auto TI = OpenMPTCs.first, TE = OpenMPTCs.second; TI != TE;
  ++TI, ++InputFile) {
+  assert(InputFile->isFilename() && "Offloading requires a filename");
   const ToolChain *TC = TI->second;
   const ArgList  = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
   StringRef File =
@@ -6969,6 +6974,21 @@
   TC->getTripleString() + "." +
   TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName));
 }
+  } else if (IsCudaHost && !CudaHostInputs.empty()) {
+const ToolChain *TC = C.getSingleOffloadToolChain();
+for (const auto  : CudaHostInputs) {
+  assert(InputFile.isFilename() && "Offloading requires a filename");
+  StringRef File =
+  C.getArgs().MakeArgString(TC->getInputFilename(InputFile));
+  StringRef InputName = Clang::getBaseInputStem(Args, Inputs);
+  // The CUDA toolchain should have a bound arch appended to the filename.
+  StringRef Arch = File.split(".").first.rsplit('-').second;
+  CmdArgs.push_back(Args.MakeArgString(
+  "-fembed-offload-object=" + File + "," +
+  Action::GetOffloadKindName(Action::OFK_Cuda) + "." +
+  TC->getTripleString() + "." +
+  Arch + "." + InputName));
+}
   }
 
   if (Triple.isAMDGPU()) {
@@ -8217,17 +8237,21 @@
   const Driver  = getToolChain().getDriver();
   const llvm::Triple TheTriple = getToolChain().getTriple();
   auto OpenMPTCRange = C.getOffloadToolChains();
+  auto CudaTCRange = C.getOffloadToolChains();
   ArgStringList CmdArgs;
 
   // Pass the CUDA path to the linker wrapper tool.
-  for (auto  : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
-const ToolChain *TC = I.second;
-if (TC->getTriple().isNVPTX()) {
-  CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
-  if (CudaInstallation.isValid())
-CmdArgs.push_back(Args.MakeArgString(
-"--cuda-path=" + CudaInstallation.getInstallPath()));
-  break;
+  for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
+auto TCRange = C.getOffloadToolChains(Kind);
+for (auto  : llvm::make_range(TCRange.first, TCRange.second)) {
+  const ToolChain *TC = I.second;
+  if (TC->getTriple().isNVPTX()) {
+CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
+if (CudaInstallation.isValid())
+  CmdArgs.push_back(Args.MakeArgString(
+  "--cuda-path=" + CudaInstallation.getInstallPath()));
+break;
+  }
 }
   }
 
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4045,6 +4045,72 @@
   Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
 }
 
+/// Returns the canonical name for the offloading architecture when using HIP or
+/// CUDA.
+static StringRef getCanonicalArchString(Compilation ,
+llvm::opt::DerivedArgList ,
+StringRef ArchStr,
+Action::OffloadKind Kind) {
+  if (Kind == Action::OFK_Cuda) {
+CudaArch Arch = StringToCudaArch(ArchStr);
+if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) {
+

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-03-03 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 updated this revision to Diff 412812.
jhuber6 added a comment.
Herald added a subscriber: dang.
Herald added a project: All.

Updating, embed fatbinaries now and small changes.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp

Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4391,6 +4391,7 @@
   // one input.
   bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
   bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
+  bool IsCudaHost = JA.isHostOffloading(Action::OFK_Cuda);
   bool IsHIP = JA.isOffloading(Action::OFK_HIP);
   bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
   bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
@@ -4414,6 +4415,7 @@
 
   InputInfoList ModuleHeaderInputs;
   InputInfoList OpenMPHostInputs;
+  InputInfoList CudaHostInputs;
   const InputInfo *CudaDeviceInput = nullptr;
   const InputInfo *OpenMPDeviceInput = nullptr;
   for (const InputInfo  : Inputs) {
@@ -4428,6 +4430,8 @@
 << types::getTypeName(Expected);
   }
   ModuleHeaderInputs.push_back(I);
+} else if (IsCudaHost && Args.hasArg(options::OPT_fopenmp_new_driver)) {
+  CudaHostInputs.push_back(I);
 } else if ((IsCuda || IsHIP) && !CudaDeviceInput) {
   CudaDeviceInput = 
 } else if (IsOpenMPDevice && !OpenMPDeviceInput) {
@@ -6957,6 +6961,7 @@
 auto OpenMPTCs = C.getOffloadToolChains();
 for (auto TI = OpenMPTCs.first, TE = OpenMPTCs.second; TI != TE;
  ++TI, ++InputFile) {
+  assert(InputFile->isFilename() && "Offloading requires a filename");
   const ToolChain *TC = TI->second;
   const ArgList  = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
   StringRef File =
@@ -6969,6 +6974,21 @@
   TC->getTripleString() + "." +
   TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName));
 }
+  } else if (IsCudaHost && !CudaHostInputs.empty()) {
+const ToolChain *TC = C.getSingleOffloadToolChain();
+for (const auto  : CudaHostInputs) {
+  assert(InputFile.isFilename() && "Offloading requires a filename");
+  StringRef File =
+  C.getArgs().MakeArgString(TC->getInputFilename(InputFile));
+  StringRef InputName = Clang::getBaseInputStem(Args, Inputs);
+  // The CUDA toolchain should have a bound arch appended to the filename.
+  StringRef Arch = File.split(".").first.rsplit('-').second;
+  CmdArgs.push_back(Args.MakeArgString(
+  "-fembed-offload-object=" + File + "," +
+  Action::GetOffloadKindName(Action::OFK_Cuda) + "." +
+  TC->getTripleString() + "." +
+  Arch + "." + InputName));
+}
   }
 
   if (Triple.isAMDGPU()) {
@@ -8217,17 +8237,21 @@
   const Driver  = getToolChain().getDriver();
   const llvm::Triple TheTriple = getToolChain().getTriple();
   auto OpenMPTCRange = C.getOffloadToolChains();
+  auto CudaTCRange = C.getOffloadToolChains();
   ArgStringList CmdArgs;
 
   // Pass the CUDA path to the linker wrapper tool.
-  for (auto  : llvm::make_range(OpenMPTCRange.first, OpenMPTCRange.second)) {
-const ToolChain *TC = I.second;
-if (TC->getTriple().isNVPTX()) {
-  CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
-  if (CudaInstallation.isValid())
-CmdArgs.push_back(Args.MakeArgString(
-"--cuda-path=" + CudaInstallation.getInstallPath()));
-  break;
+  for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP}) {
+auto TCRange = C.getOffloadToolChains(Kind);
+for (auto  : llvm::make_range(TCRange.first, TCRange.second)) {
+  const ToolChain *TC = I.second;
+  if (TC->getTriple().isNVPTX()) {
+CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
+if (CudaInstallation.isValid())
+  CmdArgs.push_back(Args.MakeArgString(
+  "--cuda-path=" + CudaInstallation.getInstallPath()));
+break;
+  }
 }
   }
 
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -4045,6 +4045,67 @@
   Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
 }
 
+static StringRef getCanonicalArchString(Compilation ,
+llvm::opt::DerivedArgList ,
+StringRef ArchStr,
+Action::OffloadKind Kind) {
+  if (Kind == Action::OFK_Cuda) {
+CudaArch Arch = StringToCudaArch(ArchStr);
+if (Arch == CudaArch::UNKNOWN || !IsNVIDIAGpuArch(Arch)) {
+

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-02-21 Thread Johannes Doerfert via Phabricator via cfe-commits

jdoerfert added a comment.

Tests?




Comment at: clang/lib/Driver/Driver.cpp:3956
   LA = C.MakeAction(LinkerInputs, types::TY_Image);
-  LA->propagateHostOffloadInfo(OffloadKinds,
+  LA->propagateHostOffloadInfo(C.getActiveOffloadKinds(),
/*BoundArch=*/nullptr);

Everything till here can be a NFC commit, right? Let's split it off



Comment at: clang/lib/Driver/Driver.cpp:4108
+}
+
 Action *Driver::BuildOffloadingActions(Compilation ,

Can we have a doxygen comment explaining what these helpers do?



Comment at: clang/lib/Driver/Driver.cpp:4122
+  const Action::OffloadKind OffloadKinds[] = {
+  Action::OFK_OpenMP, Action::OFK_Cuda, Action::OFK_HIP};
 

With the NFC commit we can probably also include some of this but restricted to 
OFK_OpenMP. Try  to minimize the functional change that one has to think about.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120272/new/

https://reviews.llvm.org/D120272

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D120272: [CUDA] Add driver support for compiling CUDA with the new driver

2022-02-21 Thread Joseph Huber via Phabricator via cfe-commits

jhuber6 created this revision.
jhuber6 added reviewers: jdoerfert, JonChesterfield, tra, yaxunl.
Herald added a subscriber: carlosgalvezp.
jhuber6 requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1.
Herald added a project: clang.

This patch adds the basic support for the clang driver to compile and link CUDA
using the new offloading driver. This requires handling the CUDA offloading kind
and embedding the generated files into the host. This will allow us to link
OpenMP code with CUDA code in the linker wrapper.

Depends on D120270  D120271 



Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D120272

Files:
  clang/include/clang/Driver/Compilation.h
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/ToolChains/Clang.cpp

Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4374,6 +4374,7 @@
   // one input.
   bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
   bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
+  bool IsCudaHost = JA.isHostOffloading(Action::OFK_Cuda);
   bool IsHIP = JA.isOffloading(Action::OFK_HIP);
   bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
   bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
@@ -4397,6 +4398,7 @@
 
   InputInfoList ModuleHeaderInputs;
   InputInfoList OpenMPHostInputs;
+  InputInfoList CudaHostInputs;
   const InputInfo *CudaDeviceInput = nullptr;
   const InputInfo *OpenMPDeviceInput = nullptr;
   for (const InputInfo  : Inputs) {
@@ -4411,6 +4413,8 @@
 << types::getTypeName(Expected);
   }
   ModuleHeaderInputs.push_back(I);
+} else if (IsCudaHost && Args.hasArg(options::OPT_fopenmp_new_driver)) {
+  CudaHostInputs.push_back(I);
 } else if ((IsCuda || IsHIP) && !CudaDeviceInput) {
   CudaDeviceInput = 
 } else if (IsOpenMPDevice && !OpenMPDeviceInput) {
@@ -6929,6 +6933,7 @@
 auto OpenMPTCs = C.getOffloadToolChains();
 for (auto TI = OpenMPTCs.first, TE = OpenMPTCs.second; TI != TE;
  ++TI, ++InputFile) {
+  assert(InputFile->isFilename() && "Offloading requires a filename");
   const ToolChain *TC = TI->second;
   const ArgList  = C.getArgsForToolChain(TC, "", Action::OFK_OpenMP);
   StringRef File =
@@ -6941,6 +6946,25 @@
   TC->getTripleString() + "." +
   TCArgs.getLastArgValue(options::OPT_march_EQ) + "." + InputName));
 }
+  } else if (IsCudaHost && !CudaHostInputs.empty()) {
+const ToolChain *CudaTC = C.getSingleOffloadToolChain();
+for (const auto  : CudaHostInputs) {
+  assert(InputFile.isFilename() && "Offloading requires a filename");
+  StringRef File =
+  C.getArgs().MakeArgString(CudaTC->getInputFilename(InputFile));
+  StringRef InputName = Clang::getBaseInputStem(Args, Inputs);
+  // The CUDA toolchain should have a bound arch appended to the filename.
+  StringRef Arch = File.split(".").first.rsplit('-').second;
+  // CUDA offloads both the PTX and Cubin so we need a uniqe section name.
+  if (File.endswith(".s"))
+CmdArgs.push_back(Args.MakeArgString(
+"-fembed-offload-object=" + File + "," + "cuda." +
+CudaTC->getTripleString() + "." + Arch + ".ptx." + InputName));
+  else
+CmdArgs.push_back(Args.MakeArgString(
+"-fembed-offload-object=" + File + "," + "cuda." +
+CudaTC->getTripleString() + "." + Arch + "." + InputName));
+}
   }
 
   if (Triple.isAMDGPU()) {
@@ -8189,6 +8213,7 @@
   const Driver  = getToolChain().getDriver();
   const llvm::Triple TheTriple = getToolChain().getTriple();
   auto OpenMPTCRange = C.getOffloadToolChains();
+  auto CudaTCRange = C.getOffloadToolChains();
   ArgStringList CmdArgs;
 
   // Pass the CUDA path to the linker wrapper tool.
@@ -8202,6 +8227,16 @@
   break;
 }
   }
+  for (auto  : llvm::make_range(CudaTCRange.first, CudaTCRange.second)) {
+const ToolChain *TC = I.second;
+if (TC->getTriple().isNVPTX()) {
+  CudaInstallationDetector CudaInstallation(D, TheTriple, Args);
+  if (CudaInstallation.isValid())
+CmdArgs.push_back(Args.MakeArgString(
+"--cuda-path=" + CudaInstallation.getInstallPath()));
+  break;
+}
+  }
 
   // Get the AMDGPU math libraries.
   // FIXME: This method is bad, remove once AMDGPU has a proper math library
Index: clang/lib/Driver/Driver.cpp
===
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -3830,11 +3830,6 @@
   // Builder to be used to build offloading actions.
   OffloadingActionBuilder OffloadBuilder(C, Args, Inputs);
 
-  // Offload kinds active for this compilation.
-  unsigned OffloadKinds = Action::OFK_None;
-  if (C.hasOffloadToolChain())
-

43 matches

Mail list logo