https://github.com/NeKon69 updated https://github.com/llvm/llvm-project/pull/187039
>From f4d922cd241cc428d12ff6abf7948f8c29650d2b Mon Sep 17 00:00:00 2001 From: NeKon69 <[email protected]> Date: Tue, 17 Mar 2026 17:46:21 +0300 Subject: [PATCH] [NVPTX] add a macro to correctly resolve `cg::this_cluster` --- clang/lib/Basic/Targets/NVPTX.cpp | 6 ++++++ clang/test/Preprocessor/predefined-arch-macros.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 6526eeff3d718..5b4286f8bfd5e 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -184,6 +184,12 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, unsigned ArchID = CudaArchToID(GPU); Builder.defineMacro("__CUDA_ARCH__", llvm::Twine(ArchID)); + // Per + // https://docs.nvidia.com/cuda/cuda-programming-guide/04-special-topics/cooperative-groups.html#id4 + // clusters are only available with >=9.0 compute capability + if (ArchID >= 900) + Builder.defineMacro("_CG_CLUSTER_INTRINSICS_AVAILABLE"); + if (IsNVIDIAAcceleratedOffloadArch(GPU)) Builder.defineMacro( "__CUDA_ARCH_FEAT_SM" + llvm::Twine(ArchID / 10) + "_ALL", "1"); diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index cb2d13d59d8bf..7c2139c2f6a40 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -4602,11 +4602,17 @@ // RUN: %clang -march=sm_75 -E -dM %s -o - 2>&1 \ // RUN: -target nvptx64-unknown-unknown \ // RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_NVPTX,CHECK_ARCH_SM_75 +// RUN: %clang -march=sm_90 -E -dM %s -o - 2>&1 \ +// RUN: -target nvptx64-unknown-unknown \ +// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_NVPTX,CHECK_ARCH_SM_90 // RUN: %clang -E -dM %s -o - 2>&1 \ // RUN: -target nvptx64-unknown-unknown \ // RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_NVPTX,CHECK_ARCH_UNSET // CHECK_ARCH_SM_75: #define __CUDA_ARCH__ 750 +// CHECK_ARCH_SM_90: #define __CUDA_ARCH__ 900 +// CHECK_ARCH_SM_90: #define _CG_CLUSTER_INTRINSICS_AVAILABLE 1 // CHECK_ARCH_UNSET-NOT: #define __CUDA_ARCH__ +// CHECK_ARCH_UNSET-NOT: #define _CG_CLUSTER_INTRINSICS_AVAILABLE // CHECK_NVPTX: #define __NVPTX__ 1 // Begin amdgcn tests ---------------- _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
