[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)

2023-12-11 Thread Artem Belevich via cfe-commits

https://github.com/Artem-B closed 
https://github.com/llvm/llvm-project/pull/74895
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)

2023-12-11 Thread Artem Belevich via cfe-commits

Artem-B wrote:

Tested the changes with cuda test-suite, with cuda-12.1 and 12.3 targeting 
`sm_{60,70,80,90,90a}`.

https://github.com/llvm/llvm-project/pull/74895
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)

2023-12-11 Thread Artem Belevich via cfe-commits

https://github.com/Artem-B updated 
https://github.com/llvm/llvm-project/pull/74895

>From eace5f13ee62c770a84cdaae441d4c1c6eeb07c2 Mon Sep 17 00:00:00 2001
From: Artem Belevich 
Date: Wed, 6 Dec 2023 12:11:38 -0800
Subject: [PATCH 1/3] [CUDA] Add support for CUDA-12.3 and sm_90a

---
 clang/docs/ReleaseNotes.rst |  3 +++
 clang/include/clang/Basic/BuiltinsNVPTX.def | 13 +++--
 clang/include/clang/Basic/Cuda.h|  7 +--
 clang/lib/Basic/Cuda.cpp|  5 +
 clang/lib/Basic/Targets/NVPTX.cpp   |  3 +++
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp|  1 +
 clang/lib/Driver/ToolChains/Cuda.cpp|  6 ++
 clang/test/Misc/target-invalid-cpu-note.c   |  2 +-
 llvm/lib/Target/NVPTX/NVPTX.td  | 19 ++-
 llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp|  7 ++-
 llvm/lib/Target/NVPTX/NVPTXSubtarget.h  | 11 +--
 11 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 6e4009deaf874..783dc7333af7e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -973,6 +973,9 @@ CUDA/HIP Language Changes
 CUDA Support
 
 
+- Clang now supports CUDA SDK up to 12.3
+- Added support for sm_90a
+
 AIX Support
 ^^^
 
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def 
b/clang/include/clang/Basic/BuiltinsNVPTX.def
index d74a7d1e55dd2..0f2e8260143be 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -26,7 +26,9 @@
 #pragma push_macro("SM_87")
 #pragma push_macro("SM_89")
 #pragma push_macro("SM_90")
-#define SM_90 "sm_90"
+#pragma push_macro("SM_90a")
+#define SM_90a "sm_90a"
+#define SM_90 "sm_90|" SM_90a
 #define SM_89 "sm_89|" SM_90
 #define SM_87 "sm_87|" SM_89
 #define SM_86 "sm_86|" SM_87
@@ -56,7 +58,11 @@
 #pragma push_macro("PTX78")
 #pragma push_macro("PTX80")
 #pragma push_macro("PTX81")
-#define PTX81 "ptx81"
+#pragma push_macro("PTX82")
+#pragma push_macro("PTX83")
+#define PTX83 "ptx83"
+#define PTX82 "ptx82|" PTX83
+#define PTX81 "ptx81|" PTX82
 #define PTX80 "ptx80|" PTX81
 #define PTX78 "ptx78|" PTX80
 #define PTX77 "ptx77|" PTX78
@@ -1055,6 +1061,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", 
"", AND(SM_90,PTX78))
 #pragma pop_macro("SM_87")
 #pragma pop_macro("SM_89")
 #pragma pop_macro("SM_90")
+#pragma pop_macro("SM_90a")
 #pragma pop_macro("PTX42")
 #pragma pop_macro("PTX60")
 #pragma pop_macro("PTX61")
@@ -1072,3 +1079,5 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", 
"", AND(SM_90,PTX78))
 #pragma pop_macro("PTX78")
 #pragma pop_macro("PTX80")
 #pragma pop_macro("PTX81")
+#pragma pop_macro("PTX82")
+#pragma pop_macro("PTX83")
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index 2d912bdbbd1bc..916cb4b7ef34a 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -39,9 +39,11 @@ enum class CudaVersion {
   CUDA_118,
   CUDA_120,
   CUDA_121,
-  FULLY_SUPPORTED = CUDA_118,
+  CUDA_122,
+  CUDA_123,
+  FULLY_SUPPORTED = CUDA_123,
   PARTIALLY_SUPPORTED =
-  CUDA_121, // Partially supported. Proceed with a warning.
+  CUDA_123, // Partially supported. Proceed with a warning.
   NEW = 1,  // Too new. Issue a warning, but allow using it.
 };
 const char *CudaVersionToString(CudaVersion V);
@@ -71,6 +73,7 @@ enum class CudaArch {
   SM_87,
   SM_89,
   SM_90,
+  SM_90a,
   GFX600,
   GFX601,
   GFX602,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 65840b9f20252..1b1da6a1356f2 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -39,6 +39,8 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
 CUDA_ENTRY(11, 8),
 CUDA_ENTRY(12, 0),
 CUDA_ENTRY(12, 1),
+CUDA_ENTRY(12, 2),
+CUDA_ENTRY(12, 3),
 {"", CudaVersion::NEW, 
llvm::VersionTuple(std::numeric_limits::max())},
 {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
 };
@@ -93,6 +95,7 @@ static const CudaArchToStringMap arch_names[] = {
 SM(87),  // Jetson/Drive AGX Orin
 SM(89),  // Ada Lovelace
 SM(90),  // Hopper
+SM(90a), // Hopper
 GFX(600),  // gfx600
 GFX(601),  // gfx601
 GFX(602),  // gfx602
@@ -209,6 +212,8 @@ CudaVersion MinVersionForCudaArch(CudaArch A) {
   case CudaArch::SM_89:
   case CudaArch::SM_90:
 return CudaVersion::CUDA_118;
+  case CudaArch::SM_90a:
+return CudaVersion::CUDA_120;
   default:
 llvm_unreachable("invalid enum");
   }
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp 
b/clang/lib/Basic/Targets/NVPTX.cpp
index 3a4a75b0348f2..5c601812f6175 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -262,11 +262,14 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions 
,
   case 

[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)

2023-12-08 Thread Artem Belevich via cfe-commits

https://github.com/Artem-B updated 
https://github.com/llvm/llvm-project/pull/74895

>From 3ce8e08b94e33480139e13ca9f0fd7b719ff2c3d Mon Sep 17 00:00:00 2001
From: Artem Belevich 
Date: Wed, 6 Dec 2023 12:11:38 -0800
Subject: [PATCH 1/3] [CUDA] Add support for CUDA-12.3 and sm_90a

---
 clang/docs/ReleaseNotes.rst |  3 +++
 clang/include/clang/Basic/BuiltinsNVPTX.def | 13 +++--
 clang/include/clang/Basic/Cuda.h|  7 +--
 clang/lib/Basic/Cuda.cpp|  5 +
 clang/lib/Basic/Targets/NVPTX.cpp   |  3 +++
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp|  1 +
 clang/lib/Driver/ToolChains/Cuda.cpp|  6 ++
 clang/test/Misc/target-invalid-cpu-note.c   |  2 +-
 llvm/lib/Target/NVPTX/NVPTX.td  | 19 ++-
 llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp|  7 ++-
 llvm/lib/Target/NVPTX/NVPTXSubtarget.h  | 11 +--
 11 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 89ea2f0930ceca..1bf68a46a64dac 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -937,6 +937,9 @@ CUDA/HIP Language Changes
 CUDA Support
 
 
+- Clang now supports CUDA SDK up to 12.3
+- Added support for sm_90a
+
 AIX Support
 ^^^
 
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def 
b/clang/include/clang/Basic/BuiltinsNVPTX.def
index d74a7d1e55dd28..0f2e8260143be7 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -26,7 +26,9 @@
 #pragma push_macro("SM_87")
 #pragma push_macro("SM_89")
 #pragma push_macro("SM_90")
-#define SM_90 "sm_90"
+#pragma push_macro("SM_90a")
+#define SM_90a "sm_90a"
+#define SM_90 "sm_90|" SM_90a
 #define SM_89 "sm_89|" SM_90
 #define SM_87 "sm_87|" SM_89
 #define SM_86 "sm_86|" SM_87
@@ -56,7 +58,11 @@
 #pragma push_macro("PTX78")
 #pragma push_macro("PTX80")
 #pragma push_macro("PTX81")
-#define PTX81 "ptx81"
+#pragma push_macro("PTX82")
+#pragma push_macro("PTX83")
+#define PTX83 "ptx83"
+#define PTX82 "ptx82|" PTX83
+#define PTX81 "ptx81|" PTX82
 #define PTX80 "ptx80|" PTX81
 #define PTX78 "ptx78|" PTX80
 #define PTX77 "ptx77|" PTX78
@@ -1055,6 +1061,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", 
"", AND(SM_90,PTX78))
 #pragma pop_macro("SM_87")
 #pragma pop_macro("SM_89")
 #pragma pop_macro("SM_90")
+#pragma pop_macro("SM_90a")
 #pragma pop_macro("PTX42")
 #pragma pop_macro("PTX60")
 #pragma pop_macro("PTX61")
@@ -1072,3 +1079,5 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", 
"", AND(SM_90,PTX78))
 #pragma pop_macro("PTX78")
 #pragma pop_macro("PTX80")
 #pragma pop_macro("PTX81")
+#pragma pop_macro("PTX82")
+#pragma pop_macro("PTX83")
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index 2d912bdbbd1bc5..916cb4b7ef34a7 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -39,9 +39,11 @@ enum class CudaVersion {
   CUDA_118,
   CUDA_120,
   CUDA_121,
-  FULLY_SUPPORTED = CUDA_118,
+  CUDA_122,
+  CUDA_123,
+  FULLY_SUPPORTED = CUDA_123,
   PARTIALLY_SUPPORTED =
-  CUDA_121, // Partially supported. Proceed with a warning.
+  CUDA_123, // Partially supported. Proceed with a warning.
   NEW = 1,  // Too new. Issue a warning, but allow using it.
 };
 const char *CudaVersionToString(CudaVersion V);
@@ -71,6 +73,7 @@ enum class CudaArch {
   SM_87,
   SM_89,
   SM_90,
+  SM_90a,
   GFX600,
   GFX601,
   GFX602,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 65840b9f20252b..1b1da6a1356f2c 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -39,6 +39,8 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
 CUDA_ENTRY(11, 8),
 CUDA_ENTRY(12, 0),
 CUDA_ENTRY(12, 1),
+CUDA_ENTRY(12, 2),
+CUDA_ENTRY(12, 3),
 {"", CudaVersion::NEW, 
llvm::VersionTuple(std::numeric_limits::max())},
 {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
 };
@@ -93,6 +95,7 @@ static const CudaArchToStringMap arch_names[] = {
 SM(87),  // Jetson/Drive AGX Orin
 SM(89),  // Ada Lovelace
 SM(90),  // Hopper
+SM(90a), // Hopper
 GFX(600),  // gfx600
 GFX(601),  // gfx601
 GFX(602),  // gfx602
@@ -209,6 +212,8 @@ CudaVersion MinVersionForCudaArch(CudaArch A) {
   case CudaArch::SM_89:
   case CudaArch::SM_90:
 return CudaVersion::CUDA_118;
+  case CudaArch::SM_90a:
+return CudaVersion::CUDA_120;
   default:
 llvm_unreachable("invalid enum");
   }
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp 
b/clang/lib/Basic/Targets/NVPTX.cpp
index 3a4a75b0348f20..5c601812f61759 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -262,11 +262,14 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions 
,
   

[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)

2023-12-08 Thread Artem Belevich via cfe-commits


@@ -80,8 +85,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool allowFP16Math() const;
   bool hasMaskOperator() const { return PTXVersion >= 71; }
   bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; }
-  unsigned int getSmVersion() const { return SmVersion; }
+  unsigned int getSmVersion() const { return FullSmVersion / 10; }
+  unsigned int getFullSmVersion() const { return FullSmVersion; }
   std::string getTargetName() const { return TargetName; }
+  bool isSm90a() const { return getFullSmVersion() == 901; }

Artem-B wrote:

Done.

https://github.com/llvm/llvm-project/pull/74895
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)

2023-12-08 Thread Artem Belevich via cfe-commits

https://github.com/Artem-B updated 
https://github.com/llvm/llvm-project/pull/74895

>From 3ce8e08b94e33480139e13ca9f0fd7b719ff2c3d Mon Sep 17 00:00:00 2001
From: Artem Belevich 
Date: Wed, 6 Dec 2023 12:11:38 -0800
Subject: [PATCH 1/2] [CUDA] Add support for CUDA-12.3 and sm_90a

---
 clang/docs/ReleaseNotes.rst |  3 +++
 clang/include/clang/Basic/BuiltinsNVPTX.def | 13 +++--
 clang/include/clang/Basic/Cuda.h|  7 +--
 clang/lib/Basic/Cuda.cpp|  5 +
 clang/lib/Basic/Targets/NVPTX.cpp   |  3 +++
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp|  1 +
 clang/lib/Driver/ToolChains/Cuda.cpp|  6 ++
 clang/test/Misc/target-invalid-cpu-note.c   |  2 +-
 llvm/lib/Target/NVPTX/NVPTX.td  | 19 ++-
 llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp|  7 ++-
 llvm/lib/Target/NVPTX/NVPTXSubtarget.h  | 11 +--
 11 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 89ea2f0930cec..1bf68a46a64da 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -937,6 +937,9 @@ CUDA/HIP Language Changes
 CUDA Support
 
 
+- Clang now supports CUDA SDK up to 12.3
+- Added support for sm_90a
+
 AIX Support
 ^^^
 
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def 
b/clang/include/clang/Basic/BuiltinsNVPTX.def
index d74a7d1e55dd2..0f2e8260143be 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -26,7 +26,9 @@
 #pragma push_macro("SM_87")
 #pragma push_macro("SM_89")
 #pragma push_macro("SM_90")
-#define SM_90 "sm_90"
+#pragma push_macro("SM_90a")
+#define SM_90a "sm_90a"
+#define SM_90 "sm_90|" SM_90a
 #define SM_89 "sm_89|" SM_90
 #define SM_87 "sm_87|" SM_89
 #define SM_86 "sm_86|" SM_87
@@ -56,7 +58,11 @@
 #pragma push_macro("PTX78")
 #pragma push_macro("PTX80")
 #pragma push_macro("PTX81")
-#define PTX81 "ptx81"
+#pragma push_macro("PTX82")
+#pragma push_macro("PTX83")
+#define PTX83 "ptx83"
+#define PTX82 "ptx82|" PTX83
+#define PTX81 "ptx81|" PTX82
 #define PTX80 "ptx80|" PTX81
 #define PTX78 "ptx78|" PTX80
 #define PTX77 "ptx77|" PTX78
@@ -1055,6 +1061,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", 
"", AND(SM_90,PTX78))
 #pragma pop_macro("SM_87")
 #pragma pop_macro("SM_89")
 #pragma pop_macro("SM_90")
+#pragma pop_macro("SM_90a")
 #pragma pop_macro("PTX42")
 #pragma pop_macro("PTX60")
 #pragma pop_macro("PTX61")
@@ -1072,3 +1079,5 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", 
"", AND(SM_90,PTX78))
 #pragma pop_macro("PTX78")
 #pragma pop_macro("PTX80")
 #pragma pop_macro("PTX81")
+#pragma pop_macro("PTX82")
+#pragma pop_macro("PTX83")
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index 2d912bdbbd1bc..916cb4b7ef34a 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -39,9 +39,11 @@ enum class CudaVersion {
   CUDA_118,
   CUDA_120,
   CUDA_121,
-  FULLY_SUPPORTED = CUDA_118,
+  CUDA_122,
+  CUDA_123,
+  FULLY_SUPPORTED = CUDA_123,
   PARTIALLY_SUPPORTED =
-  CUDA_121, // Partially supported. Proceed with a warning.
+  CUDA_123, // Partially supported. Proceed with a warning.
   NEW = 1,  // Too new. Issue a warning, but allow using it.
 };
 const char *CudaVersionToString(CudaVersion V);
@@ -71,6 +73,7 @@ enum class CudaArch {
   SM_87,
   SM_89,
   SM_90,
+  SM_90a,
   GFX600,
   GFX601,
   GFX602,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 65840b9f20252..1b1da6a1356f2 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -39,6 +39,8 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
 CUDA_ENTRY(11, 8),
 CUDA_ENTRY(12, 0),
 CUDA_ENTRY(12, 1),
+CUDA_ENTRY(12, 2),
+CUDA_ENTRY(12, 3),
 {"", CudaVersion::NEW, 
llvm::VersionTuple(std::numeric_limits::max())},
 {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
 };
@@ -93,6 +95,7 @@ static const CudaArchToStringMap arch_names[] = {
 SM(87),  // Jetson/Drive AGX Orin
 SM(89),  // Ada Lovelace
 SM(90),  // Hopper
+SM(90a), // Hopper
 GFX(600),  // gfx600
 GFX(601),  // gfx601
 GFX(602),  // gfx602
@@ -209,6 +212,8 @@ CudaVersion MinVersionForCudaArch(CudaArch A) {
   case CudaArch::SM_89:
   case CudaArch::SM_90:
 return CudaVersion::CUDA_118;
+  case CudaArch::SM_90a:
+return CudaVersion::CUDA_120;
   default:
 llvm_unreachable("invalid enum");
   }
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp 
b/clang/lib/Basic/Targets/NVPTX.cpp
index 3a4a75b0348f2..5c601812f6175 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -262,11 +262,14 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions 
,
   case 

[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)

2023-12-08 Thread Artem Belevich via cfe-commits

https://github.com/Artem-B edited 
https://github.com/llvm/llvm-project/pull/74895
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)

2023-12-08 Thread Joseph Huber via cfe-commits


@@ -80,8 +85,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool allowFP16Math() const;
   bool hasMaskOperator() const { return PTXVersion >= 71; }
   bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; }
-  unsigned int getSmVersion() const { return SmVersion; }
+  unsigned int getSmVersion() const { return FullSmVersion / 10; }
+  unsigned int getFullSmVersion() const { return FullSmVersion; }
   std::string getTargetName() const { return TargetName; }
+  bool isSm90a() const { return getFullSmVersion() == 901; }

jhuber6 wrote:

Yeah, I was thinking that the internal representation would just be what 
"FullSMVersion" is now, but `getSMVersion` would return `/ 10` and 
`getFeatures` or something would be `% 10`.

https://github.com/llvm/llvm-project/pull/74895
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[llvm] [clang] [CUDA] Add support for CUDA-12.3 and sm_90a (PR #74895)

2023-12-08 Thread Artem Belevich via cfe-commits


@@ -80,8 +85,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool allowFP16Math() const;
   bool hasMaskOperator() const { return PTXVersion >= 71; }
   bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; }
-  unsigned int getSmVersion() const { return SmVersion; }
+  unsigned int getSmVersion() const { return FullSmVersion / 10; }
+  unsigned int getFullSmVersion() const { return FullSmVersion; }
   std::string getTargetName() const { return TargetName; }
+  bool isSm90a() const { return getFullSmVersion() == 901; }

Artem-B wrote:

According to [CUDA 
docs](docs.nvidia.com/cuda/parallel-thread-execution/index.html?highlight=sm_90a#ptx-module-directives-target)

> Target architectures with suffix “a”, such as sm_90a, include 
> architecture-accelerated features that are supported on the specified 
> architecture only, hence such targets do not follow the onion layer model. 
> Therefore, PTX code generated for such targets cannot be run on later 
> generation devices. Architecture-accelerated features can only be used with 
> targets that support these features.

It's not clear where they are going with this approach.

I can make it a more generic `int hasAAFeatures() { return FullSmVersion % 10;  
}` if that's what you're looking for.


https://github.com/llvm/llvm-project/pull/74895
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits