[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)
https://github.com/Artem-B closed https://github.com/llvm/llvm-project/pull/74895 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)
Artem-B wrote: Tested the changes with cuda test-suite, with cuda-12.1 and 12.3 targeting `sm_{60,70,80,90,90a}`. https://github.com/llvm/llvm-project/pull/74895 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)
https://github.com/Artem-B updated https://github.com/llvm/llvm-project/pull/74895 >From eace5f13ee62c770a84cdaae441d4c1c6eeb07c2 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Wed, 6 Dec 2023 12:11:38 -0800 Subject: [PATCH 1/3] [CUDA] Add support for CUDA-12.3 and sm_90a --- clang/docs/ReleaseNotes.rst | 3 +++ clang/include/clang/Basic/BuiltinsNVPTX.def | 13 +++-- clang/include/clang/Basic/Cuda.h| 7 +-- clang/lib/Basic/Cuda.cpp| 5 + clang/lib/Basic/Targets/NVPTX.cpp | 3 +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp| 1 + clang/lib/Driver/ToolChains/Cuda.cpp| 6 ++ clang/test/Misc/target-invalid-cpu-note.c | 2 +- llvm/lib/Target/NVPTX/NVPTX.td | 19 ++- llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp| 7 ++- llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 11 +-- 11 files changed, 60 insertions(+), 17 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6e4009deaf874..783dc7333af7e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -973,6 +973,9 @@ CUDA/HIP Language Changes CUDA Support +- Clang now supports CUDA SDK up to 12.3 +- Added support for sm_90a + AIX Support ^^^ diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index d74a7d1e55dd2..0f2e8260143be 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -26,7 +26,9 @@ #pragma push_macro("SM_87") #pragma push_macro("SM_89") #pragma push_macro("SM_90") -#define SM_90 "sm_90" +#pragma push_macro("SM_90a") +#define SM_90a "sm_90a" +#define SM_90 "sm_90|" SM_90a #define SM_89 "sm_89|" SM_90 #define SM_87 "sm_87|" SM_89 #define SM_86 "sm_86|" SM_87 @@ -56,7 +58,11 @@ #pragma push_macro("PTX78") #pragma push_macro("PTX80") #pragma push_macro("PTX81") -#define PTX81 "ptx81" +#pragma push_macro("PTX82") +#pragma push_macro("PTX83") +#define PTX83 "ptx83" +#define PTX82 "ptx82|" PTX83 +#define PTX81 "ptx81|" PTX82 #define PTX80 "ptx80|" PTX81 #define PTX78 "ptx78|" PTX80 #define PTX77 "ptx77|" PTX78 @@ -1055,6 +1061,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) #pragma pop_macro("SM_87") #pragma pop_macro("SM_89") #pragma pop_macro("SM_90") +#pragma pop_macro("SM_90a") #pragma pop_macro("PTX42") #pragma pop_macro("PTX60") #pragma pop_macro("PTX61") @@ -1072,3 +1079,5 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) #pragma pop_macro("PTX78") #pragma pop_macro("PTX80") #pragma pop_macro("PTX81") +#pragma pop_macro("PTX82") +#pragma pop_macro("PTX83") diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 2d912bdbbd1bc..916cb4b7ef34a 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -39,9 +39,11 @@ enum class CudaVersion { CUDA_118, CUDA_120, CUDA_121, - FULLY_SUPPORTED = CUDA_118, + CUDA_122, + CUDA_123, + FULLY_SUPPORTED = CUDA_123, PARTIALLY_SUPPORTED = - CUDA_121, // Partially supported. Proceed with a warning. + CUDA_123, // Partially supported. Proceed with a warning. NEW = 1, // Too new. Issue a warning, but allow using it. }; const char *CudaVersionToString(CudaVersion V); @@ -71,6 +73,7 @@ enum class CudaArch { SM_87, SM_89, SM_90, + SM_90a, GFX600, GFX601, GFX602, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 65840b9f20252..1b1da6a1356f2 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -39,6 +39,8 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = { CUDA_ENTRY(11, 8), CUDA_ENTRY(12, 0), CUDA_ENTRY(12, 1), +CUDA_ENTRY(12, 2), +CUDA_ENTRY(12, 3), {"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits::max())}, {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone. }; @@ -93,6 +95,7 @@ static const CudaArchToStringMap arch_names[] = { SM(87), // Jetson/Drive AGX Orin SM(89), // Ada Lovelace SM(90), // Hopper +SM(90a), // Hopper GFX(600), // gfx600 GFX(601), // gfx601 GFX(602), // gfx602 @@ -209,6 +212,8 @@ CudaVersion MinVersionForCudaArch(CudaArch A) { case CudaArch::SM_89: case CudaArch::SM_90: return CudaVersion::CUDA_118; + case CudaArch::SM_90a: +return CudaVersion::CUDA_120; default: llvm_unreachable("invalid enum"); } diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 3a4a75b0348f2..5c601812f6175 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -262,11 +262,14 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions , case
[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)
https://github.com/Artem-B updated https://github.com/llvm/llvm-project/pull/74895 >From 3ce8e08b94e33480139e13ca9f0fd7b719ff2c3d Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Wed, 6 Dec 2023 12:11:38 -0800 Subject: [PATCH 1/3] [CUDA] Add support for CUDA-12.3 and sm_90a --- clang/docs/ReleaseNotes.rst | 3 +++ clang/include/clang/Basic/BuiltinsNVPTX.def | 13 +++-- clang/include/clang/Basic/Cuda.h| 7 +-- clang/lib/Basic/Cuda.cpp| 5 + clang/lib/Basic/Targets/NVPTX.cpp | 3 +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp| 1 + clang/lib/Driver/ToolChains/Cuda.cpp| 6 ++ clang/test/Misc/target-invalid-cpu-note.c | 2 +- llvm/lib/Target/NVPTX/NVPTX.td | 19 ++- llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp| 7 ++- llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 11 +-- 11 files changed, 60 insertions(+), 17 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 89ea2f0930ceca..1bf68a46a64dac 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -937,6 +937,9 @@ CUDA/HIP Language Changes CUDA Support +- Clang now supports CUDA SDK up to 12.3 +- Added support for sm_90a + AIX Support ^^^ diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index d74a7d1e55dd28..0f2e8260143be7 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -26,7 +26,9 @@ #pragma push_macro("SM_87") #pragma push_macro("SM_89") #pragma push_macro("SM_90") -#define SM_90 "sm_90" +#pragma push_macro("SM_90a") +#define SM_90a "sm_90a" +#define SM_90 "sm_90|" SM_90a #define SM_89 "sm_89|" SM_90 #define SM_87 "sm_87|" SM_89 #define SM_86 "sm_86|" SM_87 @@ -56,7 +58,11 @@ #pragma push_macro("PTX78") #pragma push_macro("PTX80") #pragma push_macro("PTX81") -#define PTX81 "ptx81" +#pragma push_macro("PTX82") +#pragma push_macro("PTX83") +#define PTX83 "ptx83" +#define PTX82 "ptx82|" PTX83 +#define PTX81 "ptx81|" PTX82 #define PTX80 "ptx80|" PTX81 #define PTX78 "ptx78|" PTX80 #define PTX77 "ptx77|" PTX78 @@ -1055,6 +1061,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) #pragma pop_macro("SM_87") #pragma pop_macro("SM_89") #pragma pop_macro("SM_90") +#pragma pop_macro("SM_90a") #pragma pop_macro("PTX42") #pragma pop_macro("PTX60") #pragma pop_macro("PTX61") @@ -1072,3 +1079,5 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) #pragma pop_macro("PTX78") #pragma pop_macro("PTX80") #pragma pop_macro("PTX81") +#pragma pop_macro("PTX82") +#pragma pop_macro("PTX83") diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 2d912bdbbd1bc5..916cb4b7ef34a7 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -39,9 +39,11 @@ enum class CudaVersion { CUDA_118, CUDA_120, CUDA_121, - FULLY_SUPPORTED = CUDA_118, + CUDA_122, + CUDA_123, + FULLY_SUPPORTED = CUDA_123, PARTIALLY_SUPPORTED = - CUDA_121, // Partially supported. Proceed with a warning. + CUDA_123, // Partially supported. Proceed with a warning. NEW = 1, // Too new. Issue a warning, but allow using it. }; const char *CudaVersionToString(CudaVersion V); @@ -71,6 +73,7 @@ enum class CudaArch { SM_87, SM_89, SM_90, + SM_90a, GFX600, GFX601, GFX602, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 65840b9f20252b..1b1da6a1356f2c 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -39,6 +39,8 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = { CUDA_ENTRY(11, 8), CUDA_ENTRY(12, 0), CUDA_ENTRY(12, 1), +CUDA_ENTRY(12, 2), +CUDA_ENTRY(12, 3), {"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits::max())}, {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone. }; @@ -93,6 +95,7 @@ static const CudaArchToStringMap arch_names[] = { SM(87), // Jetson/Drive AGX Orin SM(89), // Ada Lovelace SM(90), // Hopper +SM(90a), // Hopper GFX(600), // gfx600 GFX(601), // gfx601 GFX(602), // gfx602 @@ -209,6 +212,8 @@ CudaVersion MinVersionForCudaArch(CudaArch A) { case CudaArch::SM_89: case CudaArch::SM_90: return CudaVersion::CUDA_118; + case CudaArch::SM_90a: +return CudaVersion::CUDA_120; default: llvm_unreachable("invalid enum"); } diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 3a4a75b0348f20..5c601812f61759 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -262,11 +262,14 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions ,
[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)
@@ -80,8 +85,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { bool allowFP16Math() const; bool hasMaskOperator() const { return PTXVersion >= 71; } bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; } - unsigned int getSmVersion() const { return SmVersion; } + unsigned int getSmVersion() const { return FullSmVersion / 10; } + unsigned int getFullSmVersion() const { return FullSmVersion; } std::string getTargetName() const { return TargetName; } + bool isSm90a() const { return getFullSmVersion() == 901; } Artem-B wrote: Done. https://github.com/llvm/llvm-project/pull/74895 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)
https://github.com/Artem-B updated https://github.com/llvm/llvm-project/pull/74895 >From 3ce8e08b94e33480139e13ca9f0fd7b719ff2c3d Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Wed, 6 Dec 2023 12:11:38 -0800 Subject: [PATCH 1/2] [CUDA] Add support for CUDA-12.3 and sm_90a --- clang/docs/ReleaseNotes.rst | 3 +++ clang/include/clang/Basic/BuiltinsNVPTX.def | 13 +++-- clang/include/clang/Basic/Cuda.h| 7 +-- clang/lib/Basic/Cuda.cpp| 5 + clang/lib/Basic/Targets/NVPTX.cpp | 3 +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp| 1 + clang/lib/Driver/ToolChains/Cuda.cpp| 6 ++ clang/test/Misc/target-invalid-cpu-note.c | 2 +- llvm/lib/Target/NVPTX/NVPTX.td | 19 ++- llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp| 7 ++- llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 11 +-- 11 files changed, 60 insertions(+), 17 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 89ea2f0930cec..1bf68a46a64da 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -937,6 +937,9 @@ CUDA/HIP Language Changes CUDA Support +- Clang now supports CUDA SDK up to 12.3 +- Added support for sm_90a + AIX Support ^^^ diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index d74a7d1e55dd2..0f2e8260143be 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -26,7 +26,9 @@ #pragma push_macro("SM_87") #pragma push_macro("SM_89") #pragma push_macro("SM_90") -#define SM_90 "sm_90" +#pragma push_macro("SM_90a") +#define SM_90a "sm_90a" +#define SM_90 "sm_90|" SM_90a #define SM_89 "sm_89|" SM_90 #define SM_87 "sm_87|" SM_89 #define SM_86 "sm_86|" SM_87 @@ -56,7 +58,11 @@ #pragma push_macro("PTX78") #pragma push_macro("PTX80") #pragma push_macro("PTX81") -#define PTX81 "ptx81" +#pragma push_macro("PTX82") +#pragma push_macro("PTX83") +#define PTX83 "ptx83" +#define PTX82 "ptx82|" PTX83 +#define PTX81 "ptx81|" PTX82 #define PTX80 "ptx80|" PTX81 #define PTX78 "ptx78|" PTX80 #define PTX77 "ptx77|" PTX78 @@ -1055,6 +1061,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) #pragma pop_macro("SM_87") #pragma pop_macro("SM_89") #pragma pop_macro("SM_90") +#pragma pop_macro("SM_90a") #pragma pop_macro("PTX42") #pragma pop_macro("PTX60") #pragma pop_macro("PTX61") @@ -1072,3 +1079,5 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) #pragma pop_macro("PTX78") #pragma pop_macro("PTX80") #pragma pop_macro("PTX81") +#pragma pop_macro("PTX82") +#pragma pop_macro("PTX83") diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 2d912bdbbd1bc..916cb4b7ef34a 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -39,9 +39,11 @@ enum class CudaVersion { CUDA_118, CUDA_120, CUDA_121, - FULLY_SUPPORTED = CUDA_118, + CUDA_122, + CUDA_123, + FULLY_SUPPORTED = CUDA_123, PARTIALLY_SUPPORTED = - CUDA_121, // Partially supported. Proceed with a warning. + CUDA_123, // Partially supported. Proceed with a warning. NEW = 1, // Too new. Issue a warning, but allow using it. }; const char *CudaVersionToString(CudaVersion V); @@ -71,6 +73,7 @@ enum class CudaArch { SM_87, SM_89, SM_90, + SM_90a, GFX600, GFX601, GFX602, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 65840b9f20252..1b1da6a1356f2 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -39,6 +39,8 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = { CUDA_ENTRY(11, 8), CUDA_ENTRY(12, 0), CUDA_ENTRY(12, 1), +CUDA_ENTRY(12, 2), +CUDA_ENTRY(12, 3), {"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits::max())}, {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone. }; @@ -93,6 +95,7 @@ static const CudaArchToStringMap arch_names[] = { SM(87), // Jetson/Drive AGX Orin SM(89), // Ada Lovelace SM(90), // Hopper +SM(90a), // Hopper GFX(600), // gfx600 GFX(601), // gfx601 GFX(602), // gfx602 @@ -209,6 +212,8 @@ CudaVersion MinVersionForCudaArch(CudaArch A) { case CudaArch::SM_89: case CudaArch::SM_90: return CudaVersion::CUDA_118; + case CudaArch::SM_90a: +return CudaVersion::CUDA_120; default: llvm_unreachable("invalid enum"); } diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 3a4a75b0348f2..5c601812f6175 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -262,11 +262,14 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions , case
[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)
https://github.com/Artem-B edited https://github.com/llvm/llvm-project/pull/74895 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)
@@ -80,8 +85,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { bool allowFP16Math() const; bool hasMaskOperator() const { return PTXVersion >= 71; } bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; } - unsigned int getSmVersion() const { return SmVersion; } + unsigned int getSmVersion() const { return FullSmVersion / 10; } + unsigned int getFullSmVersion() const { return FullSmVersion; } std::string getTargetName() const { return TargetName; } + bool isSm90a() const { return getFullSmVersion() == 901; } jhuber6 wrote: Yeah, I was thinking that the internal representation would just be what "FullSMVersion" is now, but `getSMVersion` would return `/ 10` and `getFeatures` or something would be `% 10`. https://github.com/llvm/llvm-project/pull/74895 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)
@@ -80,8 +85,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { bool allowFP16Math() const; bool hasMaskOperator() const { return PTXVersion >= 71; } bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; } - unsigned int getSmVersion() const { return SmVersion; } + unsigned int getSmVersion() const { return FullSmVersion / 10; } + unsigned int getFullSmVersion() const { return FullSmVersion; } std::string getTargetName() const { return TargetName; } + bool isSm90a() const { return getFullSmVersion() == 901; } Artem-B wrote: According to [CUDA docs](docs.nvidia.com/cuda/parallel-thread-execution/index.html?highlight=sm_90a#ptx-module-directives-target) > Target architectures with suffix “a”, such as sm_90a, include > architecture-accelerated features that are supported on the specified > architecture only, hence such targets do not follow the onion layer model. > Therefore, PTX code generated for such targets cannot be run on later > generation devices. Architecture-accelerated features can only be used with > targets that support these features. It's not clear where they are going with this approach. I can make it a more generic `int hasAAFeatures() { return FullSmVersion % 10; }` if that's what you're looking for. https://github.com/llvm/llvm-project/pull/74895 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits