llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: None (NeKon69)

<details>
<summary>Changes</summary>

Fixes #<!-- -->184754

Adds a macro based on which `this_cluster` function is defined.

---
Full diff: https://github.com/llvm/llvm-project/pull/187039.diff


2 Files Affected:

- (modified) clang/lib/Basic/Targets/NVPTX.cpp (+6) 
- (modified) clang/test/Preprocessor/predefined-arch-macros.c (+6) 


``````````diff
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp 
b/clang/lib/Basic/Targets/NVPTX.cpp
index 6526eeff3d718..5b4286f8bfd5e 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -184,6 +184,12 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions 
&Opts,
     unsigned ArchID = CudaArchToID(GPU);
     Builder.defineMacro("__CUDA_ARCH__", llvm::Twine(ArchID));
 
+    // Per
+    // 
https://docs.nvidia.com/cuda/cuda-programming-guide/04-special-topics/cooperative-groups.html#id4
+    // clusters are only available with >=9.0 compute capability
+    if (ArchID >= 900)
+      Builder.defineMacro("_CG_CLUSTER_INTRINSICS_AVAILABLE");
+
     if (IsNVIDIAAcceleratedOffloadArch(GPU))
       Builder.defineMacro(
           "__CUDA_ARCH_FEAT_SM" + llvm::Twine(ArchID / 10) + "_ALL", "1");
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c 
b/clang/test/Preprocessor/predefined-arch-macros.c
index cb2d13d59d8bf..7c2139c2f6a40 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -4602,11 +4602,17 @@
 // RUN: %clang -march=sm_75 -E -dM %s -o - 2>&1 \
 // RUN:     -target nvptx64-unknown-unknown \
 // RUN:   | FileCheck -match-full-lines %s 
-check-prefixes=CHECK_NVPTX,CHECK_ARCH_SM_75
+// RUN: %clang -march=sm_90 -E -dM %s -o - 2>&1 \
+// RUN:     -target nvptx64-unknown-unknown \
+// RUN:   | FileCheck -match-full-lines %s 
-check-prefixes=CHECK_NVPTX,CHECK_ARCH_SM_90
 // RUN: %clang -E -dM %s -o - 2>&1 \
 // RUN:     -target nvptx64-unknown-unknown \
 // RUN:   | FileCheck -match-full-lines %s 
-check-prefixes=CHECK_NVPTX,CHECK_ARCH_UNSET
 // CHECK_ARCH_SM_75: #define __CUDA_ARCH__ 750
+// CHECK_ARCH_SM_90: #define __CUDA_ARCH__ 900
+// CHECK_ARCH_SM_90: #define _CG_CLUSTER_INTRINSICS_AVAILABLE 1
 // CHECK_ARCH_UNSET-NOT: #define __CUDA_ARCH__
+// CHECK_ARCH_UNSET-NOT: #define _CG_CLUSTER_INTRINSICS_AVAILABLE
 // CHECK_NVPTX: #define __NVPTX__ 1
 
 // Begin amdgcn tests ----------------

``````````

</details>


https://github.com/llvm/llvm-project/pull/187039
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to