llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: None (NeKon69) <details> <summary>Changes</summary> Fixes #<!-- -->184754 Adds a macro based on which `this_cluster` function is defined. --- Full diff: https://github.com/llvm/llvm-project/pull/187039.diff 2 Files Affected: - (modified) clang/lib/Basic/Targets/NVPTX.cpp (+6) - (modified) clang/test/Preprocessor/predefined-arch-macros.c (+6) ``````````diff diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 6526eeff3d718..5b4286f8bfd5e 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -184,6 +184,12 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, unsigned ArchID = CudaArchToID(GPU); Builder.defineMacro("__CUDA_ARCH__", llvm::Twine(ArchID)); + // Per + // https://docs.nvidia.com/cuda/cuda-programming-guide/04-special-topics/cooperative-groups.html#id4 + // clusters are only available with >=9.0 compute capability + if (ArchID >= 900) + Builder.defineMacro("_CG_CLUSTER_INTRINSICS_AVAILABLE"); + if (IsNVIDIAAcceleratedOffloadArch(GPU)) Builder.defineMacro( "__CUDA_ARCH_FEAT_SM" + llvm::Twine(ArchID / 10) + "_ALL", "1"); diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index cb2d13d59d8bf..7c2139c2f6a40 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -4602,11 +4602,17 @@ // RUN: %clang -march=sm_75 -E -dM %s -o - 2>&1 \ // RUN: -target nvptx64-unknown-unknown \ // RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_NVPTX,CHECK_ARCH_SM_75 +// RUN: %clang -march=sm_90 -E -dM %s -o - 2>&1 \ +// RUN: -target nvptx64-unknown-unknown \ +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_NVPTX,CHECK_ARCH_SM_90 // RUN: %clang -E -dM %s -o - 2>&1 \ // RUN: -target nvptx64-unknown-unknown \ // RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_NVPTX,CHECK_ARCH_UNSET // CHECK_ARCH_SM_75: #define __CUDA_ARCH__ 750 +// CHECK_ARCH_SM_90: #define __CUDA_ARCH__ 900 +// CHECK_ARCH_SM_90: #define _CG_CLUSTER_INTRINSICS_AVAILABLE 1 // CHECK_ARCH_UNSET-NOT: #define __CUDA_ARCH__ +// CHECK_ARCH_UNSET-NOT: #define _CG_CLUSTER_INTRINSICS_AVAILABLE // CHECK_NVPTX: #define __NVPTX__ 1 // Begin amdgcn tests ---------------- `````````` </details> https://github.com/llvm/llvm-project/pull/187039 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
