[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)
https://github.com/jhuber6 closed https://github.com/llvm/llvm-project/pull/79873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/79873 >From 35e12c3d83f3be93618805ffaf05e3424689f32f Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 29 Jan 2024 11:08:04 -0600 Subject: [PATCH 1/3] [NVPTX] Allow compiling LLVM-IR without `-march` set Summary: The NVPTX tools require an architecture to be used, however if we are creating generic LLVM-IR we should be able to leave it unspecified. This will result in the `target-cpu` attributes not being set on the functions so it can be changed when linked into code. This allows the standalone `--target=nvptx64-nvidia-cuda` toolchain to create LLVM-IR simmilar to how CUDA's deviceRTL looks from C/C++ --- .../clang/Basic/DiagnosticDriverKinds.td | 2 ++ clang/lib/Basic/Targets/NVPTX.cpp | 7 +- clang/lib/Basic/Targets/NVPTX.h | 3 ++- clang/lib/Driver/ToolChains/Cuda.cpp | 19 ++- clang/test/Driver/cuda-cross-compiling.c | 24 +++ .../Preprocessor/predefined-arch-macros.c | 12 ++ 6 files changed, 49 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 094fe19509412..476528375fb88 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -57,6 +57,8 @@ def warn_drv_avr_stdlib_not_linked: Warning< InGroup; def err_drv_cuda_bad_gpu_arch : Error<"unsupported CUDA gpu architecture: %0">; def err_drv_offload_bad_gpu_arch : Error<"unsupported %0 gpu architecture: %1">; +def err_drv_offload_missing_gpu_arch : Error< + "Must pass in an explicit %0 gpu architecture to '%1'">; def err_drv_no_cuda_installation : Error< "cannot find CUDA installation; provide its path via '--cuda-path', or pass " "'-nocudainc' to build without CUDA includes">; diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 0b9d97f69d146..7687e3faad770 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -59,7 +59,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, // Define available target features // These must be defined in sorted order! NoAsmVariants = true; - GPU = CudaArch::SM_20; + GPU = CudaArch::UNUSED; if (TargetPointerWidth == 32) resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); @@ -169,6 +169,11 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { Builder.defineMacro("__PTX__"); Builder.defineMacro("__NVPTX__"); + + // Skip setting architecture dependent macros if undefined. + if (GPU == CudaArch::UNUSED && !HostTarget) +return; + if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { // Set __CUDA_ARCH__ for the GPU specified. std::string CUDAArchCode = [this] { diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index 20d76b702a942..f476d49047c01 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -79,7 +79,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeaturesVec) const override { -Features[CudaArchToString(GPU)] = true; +if (GPU != CudaArch::UNUSED) + Features[CudaArchToString(GPU)] = true; Features["ptx" + std::to_string(PTXVersion)] = true; return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 8a9d0caaccf30..ca54d2d55426b 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -389,7 +389,11 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, GPUArchName = JA.getOffloadingArch(); } else { GPUArchName = Args.getLastArgValue(options::OPT_march_EQ); -assert(!GPUArchName.empty() && "Must have an architecture passed in."); +if (GPUArchName.empty()) { + C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch) + << getToolChain().getArchName() << getShortName(); + return; +} } // Obtain architecture from the action. @@ -593,7 +597,11 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-v"); StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ); - assert(!GPUArch.empty() && "At least one GPU Arch required for nvlink."); + if (GPUArch.empty()) { +C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch) +<< getToolChain().getArchName() << getShortName(); +return; + } CmdArgs.push_back("-arch"); CmdArgs.push_back(Args.MakeArgString(GPUArch)); @@ -726,9 +734,8 @@ N
[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/79873 >From 35e12c3d83f3be93618805ffaf05e3424689f32f Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 29 Jan 2024 11:08:04 -0600 Subject: [PATCH 1/2] [NVPTX] Allow compiling LLVM-IR without `-march` set Summary: The NVPTX tools require an architecture to be used, however if we are creating generic LLVM-IR we should be able to leave it unspecified. This will result in the `target-cpu` attributes not being set on the functions so it can be changed when linked into code. This allows the standalone `--target=nvptx64-nvidia-cuda` toolchain to create LLVM-IR simmilar to how CUDA's deviceRTL looks from C/C++ --- .../clang/Basic/DiagnosticDriverKinds.td | 2 ++ clang/lib/Basic/Targets/NVPTX.cpp | 7 +- clang/lib/Basic/Targets/NVPTX.h | 3 ++- clang/lib/Driver/ToolChains/Cuda.cpp | 19 ++- clang/test/Driver/cuda-cross-compiling.c | 24 +++ .../Preprocessor/predefined-arch-macros.c | 12 ++ 6 files changed, 49 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 094fe19509412..476528375fb88 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -57,6 +57,8 @@ def warn_drv_avr_stdlib_not_linked: Warning< InGroup; def err_drv_cuda_bad_gpu_arch : Error<"unsupported CUDA gpu architecture: %0">; def err_drv_offload_bad_gpu_arch : Error<"unsupported %0 gpu architecture: %1">; +def err_drv_offload_missing_gpu_arch : Error< + "Must pass in an explicit %0 gpu architecture to '%1'">; def err_drv_no_cuda_installation : Error< "cannot find CUDA installation; provide its path via '--cuda-path', or pass " "'-nocudainc' to build without CUDA includes">; diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 0b9d97f69d146..7687e3faad770 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -59,7 +59,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, // Define available target features // These must be defined in sorted order! NoAsmVariants = true; - GPU = CudaArch::SM_20; + GPU = CudaArch::UNUSED; if (TargetPointerWidth == 32) resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); @@ -169,6 +169,11 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { Builder.defineMacro("__PTX__"); Builder.defineMacro("__NVPTX__"); + + // Skip setting architecture dependent macros if undefined. + if (GPU == CudaArch::UNUSED && !HostTarget) +return; + if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { // Set __CUDA_ARCH__ for the GPU specified. std::string CUDAArchCode = [this] { diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index 20d76b702a942..f476d49047c01 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -79,7 +79,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeaturesVec) const override { -Features[CudaArchToString(GPU)] = true; +if (GPU != CudaArch::UNUSED) + Features[CudaArchToString(GPU)] = true; Features["ptx" + std::to_string(PTXVersion)] = true; return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 8a9d0caaccf30..ca54d2d55426b 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -389,7 +389,11 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, GPUArchName = JA.getOffloadingArch(); } else { GPUArchName = Args.getLastArgValue(options::OPT_march_EQ); -assert(!GPUArchName.empty() && "Must have an architecture passed in."); +if (GPUArchName.empty()) { + C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch) + << getToolChain().getArchName() << getShortName(); + return; +} } // Obtain architecture from the action. @@ -593,7 +597,11 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-v"); StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ); - assert(!GPUArch.empty() && "At least one GPU Arch required for nvlink."); + if (GPUArch.empty()) { +C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch) +<< getToolChain().getArchName() << getShortName(); +return; + } CmdArgs.push_back("-arch"); CmdArgs.push_back(Args.MakeArgString(GPUArch)); @@ -726,9 +734,8 @@ N
[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)
https://github.com/Artem-B approved this pull request. https://github.com/llvm/llvm-project/pull/79873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)
Artem-B wrote: Considering that it's for the stand-alone compilation only, I'm not going to block this patch. That said, please add a `TODO` somewhere to address an issue w/ explicitly targeting generic variant. https://github.com/llvm/llvm-project/pull/79873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)
jhuber6 wrote: > > Right now if you specify target-cpu you get target-cpu attributes, which is > > what we don't want. > > I'm fine handling 'generic' in a special way under the hood and not > specifying target-CPU. > > My concern is about user-facing interface. Command line options must be > overridable. For the CPU I would be able to specify the variant that matches > the default. For GPU I'll have no way to explicitly pick 'generic' as the > target. I think this is important. I can add `generic` for overloading purposes, but I'd prefer if leaving it off implied generic as well, at least for this standalone target version. This is a little divergent from standard CPU targets because GPUs have no backward compatibility, so something being "generic" requires a bit more work. https://github.com/llvm/llvm-project/pull/79873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)
Artem-B wrote: > Right now if you specify target-cpu you get target-cpu attributes, which is > what we don't want. I'm fine handling 'generic' in a special way under the hood and not specifying target-CPU. My concern is about user-facing interface. Command line options must be overridable. For the CPU I would be able to specify the variant that matches the default. For GPU I'll have no way to explicitly pick 'generic' as the target. I think this is important. https://github.com/llvm/llvm-project/pull/79873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)
jhuber6 wrote: > > I think there's some precedent from both vendors to treat missing > > attributes as a more generic target. > > It sounds more like a bug than a feature to me. > > The major difference between "you get sm_xx by default" and this "you get > generic by default" is that With specific sm_XX, I can override it both ways > -- I wan enable/disable it if I need to regardless of how it was specified > before my overriding options. > > With the magic unnameable 'generic' target, I can only disable it by > specifying it, but there's no way to enable it once a preceding option names > some specific architecture. > > It makes little difference where you control complete build, but that is not > the case for all builds. E.g. Tensorflow builds with bazel and the end user > does not have access to whatever compiler flags global build rules may set. > So if you want to build for generic GPU target, you will have to jump through > way more hoops than is reasonable, as opposed to specifying a few overriding > options you're interested in. > > I'm fine with defaulting to such generic target, but I do believe we need to > handle it the same way as specific targets. We could make the driver require some special argument, but the question is how that should be reflected in the LLVM-IR. Right now if you specify `target-cpu` you get `target-cpu` attributes, which is what we don't want. The standard behavior on all other targets is to ignore target-dependent attributes when `-march` or `-mcpu` isn't set, which is exactly what we want here. On the driver level we could hide this behind `-march=generic` if that's important, but I think inside of `clang` it should just be if it's missing as that's the standard handling. https://github.com/llvm/llvm-project/pull/79873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)
Artem-B wrote: > I think there's some precedent from both vendors to treat missing attributes > as a more generic target. It sounds more like a bug than a feature to me. The major difference between "you get sm_xx by default" and this "you get generic by default" is that With specific sm_XX, I can override it both ways -- I wan enable/disable it if I need to regardless of how it was specified before my overriding options. With the magic unnameable 'generic' target, I can only disable it by specifying it, but there's no way to enable it once a preceding option names some specific architecture. It makes little difference where you control complete build, but that is not the case for all builds. E.g. Tensorflow builds with bazel and the end user does not have access to whatever compiler flags global build rules may set. So if you want to build for generic GPU target, you will have to jump through way more hoops than is reasonable, as opposed to specifying a few overriding options you're interested in. I'm fine with defaulting to such generic target, but I do believe we need to handle it the same way as specific targets. https://github.com/llvm/llvm-project/pull/79873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)
jhuber6 wrote: > Relying on something _not_ being defined is probably not the best way to > handle 'generic' target. For starters it makes it hard or impossible to > recreate the same compilation state by undoing already-specified option. It > also breaks established assumption that there _is_ a default target CPU/GPU. > If we do want to have a generic GPU target, then we should grow an explicit > 'generic' GPU variant, IMO. It would be a functional opposite of 'native'. AMDGPU uses a missing `-mcpu` on the OpenCL target to build their "generic" device libraries and it's also missing from NVIDIA's `libdevice.10.bc` in the same way. I think there's some precedent from both vendors to treat missing attributes as a more generic target. The default target to me is more of the domain of the driver. So if you're using CUDA and don't specify anything you get `sm_52`. This patch also creates a hard error if it's unspecified before it makes it to the tools like ptxas and nvlink. https://github.com/llvm/llvm-project/pull/79873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)
Artem-B wrote: Relying on something *not* being defined is probably not the best way to handle 'generic' target. For starters it makes it hard or impossible to recreate the same compilation state by undoing already-specified option. It also breaks established assumption that there *is* a default target CPU/GPU. If we do want to have a generic GPU target, then we should grow an explicit 'generic' GPU variant, IMO. It would be a functional opposite of 'native'. https://github.com/llvm/llvm-project/pull/79873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)
https://github.com/jlebar approved this pull request. https://github.com/llvm/llvm-project/pull/79873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)
llvmbot wrote: @llvm/pr-subscribers-clang-driver Author: Joseph Huber (jhuber6) Changes Summary: The NVPTX tools require an architecture to be used, however if we are creating generic LLVM-IR we should be able to leave it unspecified. This will result in the `target-cpu` attributes not being set on the functions so it can be changed when linked into code. This allows the standalone `--target=nvptx64-nvidia-cuda` toolchain to create LLVM-IR simmilar to how CUDA's deviceRTL looks from C/C++ --- Full diff: https://github.com/llvm/llvm-project/pull/79873.diff 6 Files Affected: - (modified) clang/include/clang/Basic/DiagnosticDriverKinds.td (+2) - (modified) clang/lib/Basic/Targets/NVPTX.cpp (+6-1) - (modified) clang/lib/Basic/Targets/NVPTX.h (+2-1) - (modified) clang/lib/Driver/ToolChains/Cuda.cpp (+13-6) - (modified) clang/test/Driver/cuda-cross-compiling.c (+14-10) - (modified) clang/test/Preprocessor/predefined-arch-macros.c (+12) ``diff diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 094fe1950941270..476528375fb8889 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -57,6 +57,8 @@ def warn_drv_avr_stdlib_not_linked: Warning< InGroup; def err_drv_cuda_bad_gpu_arch : Error<"unsupported CUDA gpu architecture: %0">; def err_drv_offload_bad_gpu_arch : Error<"unsupported %0 gpu architecture: %1">; +def err_drv_offload_missing_gpu_arch : Error< + "Must pass in an explicit %0 gpu architecture to '%1'">; def err_drv_no_cuda_installation : Error< "cannot find CUDA installation; provide its path via '--cuda-path', or pass " "'-nocudainc' to build without CUDA includes">; diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 0b9d97f69d146af..7687e3faad770d0 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -59,7 +59,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, // Define available target features // These must be defined in sorted order! NoAsmVariants = true; - GPU = CudaArch::SM_20; + GPU = CudaArch::UNUSED; if (TargetPointerWidth == 32) resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); @@ -169,6 +169,11 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { Builder.defineMacro("__PTX__"); Builder.defineMacro("__NVPTX__"); + + // Skip setting architecture dependent macros if undefined. + if (GPU == CudaArch::UNUSED && !HostTarget) +return; + if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { // Set __CUDA_ARCH__ for the GPU specified. std::string CUDAArchCode = [this] { diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index 20d76b702a9426e..f476d49047c0138 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -79,7 +79,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeaturesVec) const override { -Features[CudaArchToString(GPU)] = true; +if (GPU != CudaArch::UNUSED) + Features[CudaArchToString(GPU)] = true; Features["ptx" + std::to_string(PTXVersion)] = true; return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 8a9d0caaccf30bb..ca54d2d55426b9f 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -389,7 +389,11 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, GPUArchName = JA.getOffloadingArch(); } else { GPUArchName = Args.getLastArgValue(options::OPT_march_EQ); -assert(!GPUArchName.empty() && "Must have an architecture passed in."); +if (GPUArchName.empty()) { + C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch) + << getToolChain().getArchName() << getShortName(); + return; +} } // Obtain architecture from the action. @@ -593,7 +597,11 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-v"); StringRef GPUArch = Args.getLastArgValue(options::OPT_march_EQ); - assert(!GPUArch.empty() && "At least one GPU Arch required for nvlink."); + if (GPUArch.empty()) { +C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch) +<< getToolChain().getArchName() << getShortName(); +return; + } CmdArgs.push_back("-arch"); CmdArgs.push_back(Args.MakeArgString(GPUArch)); @@ -726,9 +734,8 @@ NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple, llvm::opt::DerivedArgList * NVPTXToolChain::Translat
[clang] [NVPTX] Allow compiling LLVM-IR without `-march` set (PR #79873)
https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/79873 Summary: The NVPTX tools require an architecture to be used, however if we are creating generic LLVM-IR we should be able to leave it unspecified. This will result in the `target-cpu` attributes not being set on the functions so it can be changed when linked into code. This allows the standalone `--target=nvptx64-nvidia-cuda` toolchain to create LLVM-IR simmilar to how CUDA's deviceRTL looks from C/C++ >From 35e12c3d83f3be93618805ffaf05e3424689f32f Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 29 Jan 2024 11:08:04 -0600 Subject: [PATCH] [NVPTX] Allow compiling LLVM-IR without `-march` set Summary: The NVPTX tools require an architecture to be used, however if we are creating generic LLVM-IR we should be able to leave it unspecified. This will result in the `target-cpu` attributes not being set on the functions so it can be changed when linked into code. This allows the standalone `--target=nvptx64-nvidia-cuda` toolchain to create LLVM-IR simmilar to how CUDA's deviceRTL looks from C/C++ --- .../clang/Basic/DiagnosticDriverKinds.td | 2 ++ clang/lib/Basic/Targets/NVPTX.cpp | 7 +- clang/lib/Basic/Targets/NVPTX.h | 3 ++- clang/lib/Driver/ToolChains/Cuda.cpp | 19 ++- clang/test/Driver/cuda-cross-compiling.c | 24 +++ .../Preprocessor/predefined-arch-macros.c | 12 ++ 6 files changed, 49 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 094fe1950941270..476528375fb8889 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -57,6 +57,8 @@ def warn_drv_avr_stdlib_not_linked: Warning< InGroup; def err_drv_cuda_bad_gpu_arch : Error<"unsupported CUDA gpu architecture: %0">; def err_drv_offload_bad_gpu_arch : Error<"unsupported %0 gpu architecture: %1">; +def err_drv_offload_missing_gpu_arch : Error< + "Must pass in an explicit %0 gpu architecture to '%1'">; def err_drv_no_cuda_installation : Error< "cannot find CUDA installation; provide its path via '--cuda-path', or pass " "'-nocudainc' to build without CUDA includes">; diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 0b9d97f69d146af..7687e3faad770d0 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -59,7 +59,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, // Define available target features // These must be defined in sorted order! NoAsmVariants = true; - GPU = CudaArch::SM_20; + GPU = CudaArch::UNUSED; if (TargetPointerWidth == 32) resetDataLayout("e-p:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64"); @@ -169,6 +169,11 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { Builder.defineMacro("__PTX__"); Builder.defineMacro("__NVPTX__"); + + // Skip setting architecture dependent macros if undefined. + if (GPU == CudaArch::UNUSED && !HostTarget) +return; + if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { // Set __CUDA_ARCH__ for the GPU specified. std::string CUDAArchCode = [this] { diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index 20d76b702a9426e..f476d49047c0138 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -79,7 +79,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeaturesVec) const override { -Features[CudaArchToString(GPU)] = true; +if (GPU != CudaArch::UNUSED) + Features[CudaArchToString(GPU)] = true; Features["ptx" + std::to_string(PTXVersion)] = true; return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec); } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 8a9d0caaccf30bb..ca54d2d55426b9f 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -389,7 +389,11 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, GPUArchName = JA.getOffloadingArch(); } else { GPUArchName = Args.getLastArgValue(options::OPT_march_EQ); -assert(!GPUArchName.empty() && "Must have an architecture passed in."); +if (GPUArchName.empty()) { + C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch) + << getToolChain().getArchName() << getShortName(); + return; +} } // Obtain architecture from the action. @@ -593,7 +597,11 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-v");