https://github.com/fenodem created https://github.com/llvm/llvm-project/pull/187696
@localspook @yxsamliu @emankov Do not merge! This PR was created to document my efforts on https://github.com/llvm/llvm-project/issues/119661 I used this .cu file: ``` // test_cuda_math_minimal.cu __global__ void minimal_test() { double d = -1.0; float f = -1.0f; // These 8 calls trigger all the errors in the issue int a = isfinite(d); int b = isfinite(f); int c = isinf(d); int d2 = isinf(f); int e = isnan(d); int f2 = isnan(f); int g = signbit(d); int h = signbit(f); (void)a; (void)b; (void)c; (void)d2; (void)e; (void)f2; (void)g; (void)h; } int main() { minimal_test<<<1, 1>>>(); cudaDeviceSynchronize(); return 0; } ``` with this command: ``` clang++ "C:\downloads\test_cuda_math_minimal.cu" ^ -o "C:\Downloads\test_cuda_math_minimal.cu.exe" ^ --target=x86_64-windows-gnu ^ --cuda-path="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6" ^ --cuda-gpu-arch=sm_89 ^ --sysroot="c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64" ^ -lcudart ``` Before this commit, I was getting the same errors which were present in the 2nd listing from https://github.com/llvm/llvm-project/issues/119661#issue-2734927853 If any knows how to make clang++.exe to take cuda header from near directory and not from --sysroot, please report here! Upon applying these patches, I've found macro poisoning non-working (strange). These errors are after applying them (I couldn't find how to make clang use llvm headers, not those from gcc; I applied these diff to llvm headers where clang++.exe lives): ``` In file included from C:/Downloads/winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1/mingw64/lib/clang/18/include/__clang_cuda_runtime_wrapper.h:41: In file included from C:/Downloads/winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1/mingw64/lib/clang/18/include/cuda_wrappers/cmath:27: In file included from c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/cmath:49: c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/bits/std_abs.h:137:7: error: __float128 is not supported on this target 137 | abs(__float128 __x) | ^ c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/bits/std_abs.h:136:3: error: __float128 is not supported on this target 136 | __float128 | ^ c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/bits/std_abs.h:137:18: note: '__x' defined here 137 | abs(__float128 __x) | ^ c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/bits/std_abs.h:137:18: note: '__x' defined here c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/bits/std_abs.h:137:18: note: '__x' defined here In file included from <built-in>:1: In file included from C:/Downloads/winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1/mingw64/lib/clang/18/include/__clang_cuda_runtime_wrapper.h:41: In file included from C:/Downloads/winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1/mingw64/lib/clang/18/include/cuda_wrappers/cmath:27: In file included from c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/cmath:3898: In file included from c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/bits/specfun.h:43: In file included from c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/bits/stl_algobase.h:64: In file included from c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/bits/stl_pair.h:60: c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/type_traits:514:39: error: __float128 is not supported on this target 514 | struct __is_floating_point_helper<__float128> | ^ In file included from <built-in>:1: In file included from C:/Downloads/winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1/mingw64/lib/clang/18/include/__clang_cuda_runtime_wrapper.h:41: In file included from C:/Downloads/winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1/mingw64/lib/clang/18/include/cuda_wrappers/cmath:27: In file included from c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/cmath:3898: In file included from c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/bits/specfun.h:43: c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/bits/stl_algobase.h:1079:21: error: __float128 is not supported on this target 1079 | __size_to_integer(__float128 __n) { return (long long)__n; } | ^ In file included from <built-in>:1: In file included from C:/Downloads/winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1/mingw64/lib/clang/18/include/__clang_cuda_runtime_wrapper.h:41: In file included from C:/Downloads/winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1/mingw64/lib/clang/18/include/cuda_wrappers/cmath:27: In file included from c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/cmath:3898: In file included from c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/bits/specfun.h:44: c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/limits:2089:27: error: __float128 is not supported on this target 2089 | struct numeric_limits<__float128> | ^ c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/limits:2093:33: error: __float128 is not supported on this target 2093 | static _GLIBCXX_CONSTEXPR __float128 | ^ c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/limits:2104:33: error: __float128 is not supported on this target 2104 | static _GLIBCXX_CONSTEXPR __float128 | ^ c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/limits:2118:33: error: __float128 is not supported on this target 2118 | static _GLIBCXX_CONSTEXPR __float128 | ^ c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/limits:2132:33: error: __float128 is not supported on this target 2132 | static _GLIBCXX_CONSTEXPR __float128 | ^ c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/limits:2136:33: error: __float128 is not supported on this target 2136 | static _GLIBCXX_CONSTEXPR __float128 | ^ c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/limits:2156:33: error: __float128 is not supported on this target 2156 | static _GLIBCXX_CONSTEXPR __float128 | ^ c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/limits:2160:33: error: __float128 is not supported on this target 2160 | static _GLIBCXX_CONSTEXPR __float128 | ^ c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/limits:2164:33: error: __float128 is not supported on this target 2164 | static _GLIBCXX_CONSTEXPR __float128 | ^ c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/limits:2176:33: error: __float128 is not supported on this target 2176 | static _GLIBCXX_CONSTEXPR __float128 | ^ c:\Downloads\winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1\mingw64/include/c++/14.2.0/limits:2170:28: error: __float128 is not supported on this target 2170 | return __builtin_bit_cast(__float128, __builtin_nansf128("")); | ^ In file included from <built-in>:1: In file included from C:/Downloads/winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1/mingw64/lib/clang/18/include/__clang_cuda_runtime_wrapper.h:349: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6/include/crt/math_functions.hpp:413:10: error: no matching function for call to '__signbitl' 413 | return __signbitl(a); | ^~~~~~~~~~ C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6/include/crt/math_functions.hpp:2672:14: note: candidate function not viable: call to __host__ function from __device__ function 2672 | __func__(int __signbitl(const long double a)) | ^ In file included from <built-in>:1: In file included from C:/Downloads/winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1/mingw64/lib/clang/18/include/__clang_cuda_runtime_wrapper.h:349: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6/include/crt/math_functions.hpp:418:10: error: no matching function for call to '__signbit' 418 | return __signbit(a); | ^~~~~~~~~ C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6/include/crt/math_functions.hpp:1289:14: note: candidate function not viable: call to __host__ function from __device__ function 1289 | __func__(int __signbit(double a)) | ^ In file included from <built-in>:1: In file included from C:/Downloads/winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1/mingw64/lib/clang/18/include/__clang_cuda_runtime_wrapper.h:349: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6/include/crt/math_functions.hpp:430:10: error: no matching function for call to '__isinfl' 430 | return __isinfl(a); | ^~~~~~~~ C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6/include/crt/math_functions.hpp:2692:14: note: candidate function not viable: call to __host__ function from __device__ function 2692 | __func__(int __isinfl(const long double a)) | ^ In file included from <built-in>:1: In file included from C:/winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1/mingw64/lib/clang/18/include/__clang_cuda_runtime_wrapper.h:349: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6/include/crt/math_functions.hpp:444:62: error: functions that differ only in their return type cannot be overloaded 444 | static __inline__ __host__ __device__ __cudart_builtin__ int isinf(const double a) | ~~~ ^ C:/Downloads/winlibs-x86_64-posix-seh-gcc-14.2.0-llvm-18.1.8-mingw-w64ucrt-12.0.0-r1/mingw64/lib/clang/18/include/__clang_cuda_math_forward_declares.h:99:17: note: previous declaration is here 99 | __DEVICE__ bool isinf(double); | ~~~~ ^ fatal error: too many errors emitted, stopping now [-ferror-limit=] 20 errors generated when compiling for sm_89. ``` >From f3af2cc29fb5109416c3f7a847c097e644e8eeea Mon Sep 17 00:00:00 2001 From: fenodem <[email protected]> Date: Fri, 20 Mar 2026 09:34:28 +0000 Subject: [PATCH 1/4] Update __clang_cuda_math_forward_declares.h --- .../__clang_cuda_math_forward_declares.h | 65 ++++++++++++------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/clang/lib/Headers/__clang_cuda_math_forward_declares.h b/clang/lib/Headers/__clang_cuda_math_forward_declares.h index 45fe1e5b1772d..d8091ea77ff49 100644 --- a/clang/lib/Headers/__clang_cuda_math_forward_declares.h +++ b/clang/lib/Headers/__clang_cuda_math_forward_declares.h @@ -12,12 +12,23 @@ #error "This file is for CUDA/HIP compilation only." #endif -// This file forward-declares of some math functions we (or the CUDA headers) -// will define later. We need to do this, and do it before cmath is included, -// because the standard library may have constexpr math functions. In the -// absence of a prior __device__ decl, those constexpr functions may become -// implicitly host+device. host+device functions can't be overloaded, so that -// would preclude the use of our own __device__ overloads for these functions. +// PURPOSE: Forward-declare __device__ math functions before <cmath> is included. +// Prevents standard library constexpr functions from becoming implicit +// __host__ __device__, which would clash with our __device__ overloads. + +// --------------------------------------------------------------------------- +// Return Type: CUDA headers return 'bool' on MSVC, but 'int' on POSIX. +// Mismatches here cause "functions differ only in return type" errors. +// --------------------------------------------------------------------------- +// CORRECTED: Force 'int' for all CUDA compilations to match CUDA SDK headers +// (math_functions.hpp), which define these as returning int regardless of host. +#if defined(__CUDA__) +#define __CUDA_CLASSIFIER_RET_TYPE int +#elif defined(__OPENMP_NVPTX__) +#define __CUDA_CLASSIFIER_RET_TYPE int +#else +#define __CUDA_CLASSIFIER_RET_TYPE int +#endif #pragma push_macro("__DEVICE__") #define __DEVICE__ \ @@ -89,31 +100,38 @@ __DEVICE__ double hypot(double, double); __DEVICE__ float hypot(float, float); __DEVICE__ int ilogb(double); __DEVICE__ int ilogb(float); -#ifdef _MSC_VER -__DEVICE__ bool isfinite(long double); + +// --------------------------------------------------------------------------- +// Classification Functions +// --------------------------------------------------------------------------- +// Note: We declare long double versions here if not MSVC to match +// __clang_cuda_cmath.h logic, but they require implementations in +// __clang_cuda_device_functions.h to avoid link errors. +#if !defined(_MSC_VER) +__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isfinite(long double); #endif -__DEVICE__ bool isfinite(double); -__DEVICE__ bool isfinite(float); +__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isfinite(double); +__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isfinite(float); __DEVICE__ bool isgreater(double, double); __DEVICE__ bool isgreaterequal(double, double); __DEVICE__ bool isgreaterequal(float, float); __DEVICE__ bool isgreater(float, float); -#ifdef _MSC_VER -__DEVICE__ bool isinf(long double); +#if !defined(_MSC_VER) +__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isinf(long double); #endif -__DEVICE__ bool isinf(double); -__DEVICE__ bool isinf(float); +__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isinf(double); +__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isinf(float); __DEVICE__ bool isless(double, double); __DEVICE__ bool islessequal(double, double); __DEVICE__ bool islessequal(float, float); __DEVICE__ bool isless(float, float); __DEVICE__ bool islessgreater(double, double); __DEVICE__ bool islessgreater(float, float); -#ifdef _MSC_VER -__DEVICE__ bool isnan(long double); +#if !defined(_MSC_VER) +__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isnan(long double); #endif -__DEVICE__ bool isnan(double); -__DEVICE__ bool isnan(float); +__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isnan(double); +__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE isnan(float); __DEVICE__ bool isnormal(double); __DEVICE__ bool isnormal(float); __DEVICE__ bool isunordered(double, double); @@ -165,11 +183,11 @@ __DEVICE__ double scalbln(double, long); __DEVICE__ float scalbln(float, long); __DEVICE__ double scalbn(double, int); __DEVICE__ float scalbn(float, int); -#ifdef _MSC_VER -__DEVICE__ bool signbit(long double); +#if !defined(_MSC_VER) +__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE signbit(long double); #endif -__DEVICE__ bool signbit(double); -__DEVICE__ bool signbit(float); +__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE signbit(double); +__DEVICE__ __CUDA_CLASSIFIER_RET_TYPE signbit(float); __DEVICE__ double sin(double); __DEVICE__ float sin(float); __DEVICE__ double sinh(double); @@ -185,9 +203,6 @@ __DEVICE__ float tgamma(float); __DEVICE__ double trunc(double); __DEVICE__ float trunc(float); -// Notably missing above is nexttoward, which we don't define on -// the device side because libdevice doesn't give us an implementation, and we -// don't want to be in the business of writing one ourselves. // We need to define these overloads in exactly the namespace our standard // library uses (including the right inline namespace), otherwise they won't be >From a291032af6d170b921e028555c6f257ba47eece9 Mon Sep 17 00:00:00 2001 From: fenodem <[email protected]> Date: Fri, 20 Mar 2026 09:48:57 +0000 Subject: [PATCH 2/4] Update __clang_cuda_device_functions.h --- .../Headers/__clang_cuda_device_functions.h | 79 ++++++++++++++----- 1 file changed, 61 insertions(+), 18 deletions(-) diff --git a/clang/lib/Headers/__clang_cuda_device_functions.h b/clang/lib/Headers/__clang_cuda_device_functions.h index 0226fe95abab6..4658c92d37206 100644 --- a/clang/lib/Headers/__clang_cuda_device_functions.h +++ b/clang/lib/Headers/__clang_cuda_device_functions.h @@ -223,11 +223,65 @@ __DEVICE__ float __fdividef(float __a, float __b) { } __DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); } __DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); } -__DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); } -__DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); } -#ifdef _MSC_VER -__DEVICE__ int __finitel(long double __a); -#endif + +// --------------------------------------------------------------------------- +// Classification Function Internal Names +// --------------------------------------------------------------------------- +// WARNING: Do NOT consolidate these functions. CUDA's math_functions.hpp calls +// distinct names (e.g., __signbit vs __signbitd). Removing one causes +// "no matching function" errors. +// +// Note: We use __inline__ without static. This provides external linkage +// semantics which matches the expectations of CUDA headers declaring these +// as 'extern' for the GCC/MinGW environment, while still allowing inlining. +// --------------------------------------------------------------------------- + +// Float implementations +__inline__ __host__ __device__ __attribute__((always_inline)) +int __finitef(float __a) { return __builtin_isfinite(__a); } +__inline__ __host__ __device__ __attribute__((always_inline)) +int __isinff(float __a) { return __builtin_isinf(__a); } +__inline__ __host__ __device__ __attribute__((always_inline)) +int __isnanf(float __a) { return __builtin_isnan(__a); } +__inline__ __host__ __device__ __attribute__((always_inline)) +int __signbitf(float __a) { return __builtin_signbit(__a); } + +// Double implementations +// Note: Both __finite and __isfinited are defined because CUDA headers +// reference distinct names in different contexts (similar to __signbit/__signbitd). +__inline__ __host__ __device__ __attribute__((always_inline)) +int __finite(double __a) { return __builtin_isfinite(__a); } +__inline__ __host__ __device__ __attribute__((always_inline)) +int __isfinited(double __a) { return __builtin_isfinite(__a); } +__inline__ __host__ __device__ __attribute__((always_inline)) +int __isinf(double __a) { return __builtin_isinf(__a); } +__inline__ __host__ __device__ __attribute__((always_inline)) +int __isnan(double __a) { return __builtin_isnan(__a); } +__inline__ __host__ __device__ __attribute__((always_inline)) +int __signbit(double __a) { return __builtin_signbit(__a); } +__inline__ __host__ __device__ __attribute__((always_inline)) +int __signbitd(double __a) { return __builtin_signbit(__a); } + +// Long double implementations (UNGUARDED - intentional) +// IMPORTANT: Do NOT cast to double. Clang's builtins natively support long double. +// Casting causes incorrect results on MinGW/Linux where long double has higher +// precision than double (e.g. finite values that overflow double). +// NOTE: Clang does NOT support __builtin_isfinitel. Using suffixed builtins +// will fail. The generic builtin preserves precision for 80-bit long double +// on MinGW hosts and handles double demotion on devices automatically. +// NOTE: Do NOT add #if !defined(_MSC_VER) here. Unlike wrappers, these +// are __inline__ with distinct names (__finitel vs __finite). +// They have no linker visibility and are optimized away if unused. +// CUDA headers may call these on any platform - define unconditionally. +__inline__ __host__ __device__ __attribute__((always_inline)) +int __finitel(long double __a) { return __builtin_isfinite(__a); } +__inline__ __host__ __device__ __attribute__((always_inline)) +int __isinfl(long double __a) { return __builtin_isinf(__a); } +__inline__ __host__ __device__ __attribute__((always_inline)) +int __isnanl(long double __a) { return __builtin_isnan(__a); } +__inline__ __host__ __device__ __attribute__((always_inline)) +int __signbitl(long double __a) { return __builtin_signbit(__a); } + __DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); } __DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); } __DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); } @@ -433,17 +487,7 @@ __DEVICE__ float __int2float_rn(int __a) { return __nv_int2float_rn(__a); } __DEVICE__ float __int2float_ru(int __a) { return __nv_int2float_ru(__a); } __DEVICE__ float __int2float_rz(int __a) { return __nv_int2float_rz(__a); } __DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); } -__DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); } -__DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); } -__DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); } -#ifdef _MSC_VER -__DEVICE__ int __isinfl(long double __a); -#endif -__DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); } -__DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); } -#ifdef _MSC_VER -__DEVICE__ int __isnanl(long double __a); -#endif + __DEVICE__ double __ll2double_rd(long long __a) { return __nv_ll2double_rd(__a); } @@ -515,8 +559,7 @@ __DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) { return __nv_sad(__a, __b, __c); } __DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); } -__DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); } -__DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); } + __DEVICE__ void __sincosf(float __a, float *__s, float *__c) { return __nv_fast_sincosf(__a, __s, __c); } >From eaa6e8296429c55b7d59f425a781a4b27a906b11 Mon Sep 17 00:00:00 2001 From: fenodem <[email protected]> Date: Fri, 20 Mar 2026 10:01:39 +0000 Subject: [PATCH 3/4] Update __clang_cuda_cmath.h --- clang/lib/Headers/__clang_cuda_cmath.h | 81 +++++++++++++------------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/clang/lib/Headers/__clang_cuda_cmath.h b/clang/lib/Headers/__clang_cuda_cmath.h index 5bbb59a93b9e5..b78cad5f94544 100644 --- a/clang/lib/Headers/__clang_cuda_cmath.h +++ b/clang/lib/Headers/__clang_cuda_cmath.h @@ -65,54 +65,57 @@ __DEVICE__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } -// For inscrutable reasons, the CUDA headers define these functions for us on -// Windows. -#if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__) - -// For OpenMP we work around some old system headers that have non-conforming -// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do -// this by providing two versions of these functions, differing only in the -// return type. To avoid conflicting definitions we disable implicit base -// function generation. That means we will end up with two specializations, one -// per type, but only one has a base function defined by the system header. +// --------------------------------------------------------------------------- +// Standard Classification Functions +// --------------------------------------------------------------------------- +// OpenMP variants return 'int' (legacy compatibility). +// Base functions return __CUDA_CLASSIFIER_RET_TYPE (bool/int per ABI). +// --------------------------------------------------------------------------- + #if defined(__OPENMP_NVPTX__) #pragma omp begin declare variant match( \ implementation = {extension(disable_implicit_base)}) -// FIXME: We lack an extension to customize the mangling of the variants, e.g., -// add a suffix. This means we would clash with the names of the variants -// (note that we do not create implicit base functions here). To avoid -// this clash we add a new trait to some of them that is always true -// (this is LLVM after all ;)). It will only influence the mangled name -// of the variants inside the inner region and avoid the clash. #pragma omp begin declare variant match(implementation = {vendor(llvm)}) -__DEVICE__ int isinf(float __x) { return ::__isinff(__x); } -__DEVICE__ int isinf(double __x) { return ::__isinf(__x); } -__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); } -__DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); } -__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); } -__DEVICE__ int isnan(double __x) { return ::__isnan(__x); } +// OpenMP path: Return 'int' for legacy compatibility. +static __host__ __device__ int isinf(float __x) { return ::__isinff(__x); } +static __host__ __device__ int isinf(double __x) { return ::__isinf(__x); } +static __host__ __device__ int isfinite(float __x) { return ::__finitef(__x); } +static __host__ __device__ int isfinite(double __x) { return ::__isfinited(__x); } +static __host__ __device__ int isnan(float __x) { return ::__isnanf(__x); } +static __host__ __device__ int isnan(double __x) { return ::__isnan(__x); } +static __host__ __device__ int signbit(float __x) { return ::__signbitf(__x); } +static __host__ __device__ int signbit(double __x) { return ::__signbitd(__x); } #pragma omp end declare variant -#endif - -__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } -__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } -__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } -// For inscrutable reasons, __finite(), the double-precision version of -// __finitef, does not exist when compiling for MacOS. __isfinited is available -// everywhere and is just as good. -__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); } -__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } -__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } - -#if defined(__OPENMP_NVPTX__) #pragma omp end declare variant -#endif +#else // !__OPENMP_NVPTX__ + +// Base path (CUDA): Return type matches __CUDA_CLASSIFIER_RET_TYPE. +// 'int' for MinGW, 'bool' for MSVC. +static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isinf(float __x) { return ::__isinff(__x); } +static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isinf(double __x) { return ::__isinf(__x); } +static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isfinite(float __x) { return ::__finitef(__x); } +// MacOS: __finite is unavailable; __isfinited works everywhere. +static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isfinite(double __x) { return ::__isfinited(__x); } +static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isnan(float __x) { return ::__isnanf(__x); } +static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isnan(double __x) { return ::__isnan(__x); } +static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE signbit(float __x) { return ::__signbitf(__x); } +static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE signbit(double __x) { return ::__signbitd(__x); } + +// Long double support (MinGW/Linux only). +// Long double wrappers (MSVC-guarded - intentional) +// On MSVC, long double == double, causing overload conflicts. +#if !defined(_MSC_VER) +static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isfinite(long double __x) { return ::__finitel(__x); } +static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isinf(long double __x) { return ::__isinfl(__x); } +static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE isnan(long double __x) { return ::__isnanl(__x); } +static __host__ __device__ __CUDA_CLASSIFIER_RET_TYPE signbit(long double __x) { return ::__signbitl(__x); } +#endif // !_MSC_VER -#endif +#endif // __OPENMP_NVPTX__ __DEVICE__ bool isgreater(float __x, float __y) { return __builtin_isgreater(__x, __y); @@ -167,8 +170,6 @@ __DEVICE__ float pow(float __base, int __iexp) { __DEVICE__ double pow(double __base, int __iexp) { return ::powi(__base, __iexp); } -__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); } -__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); } __DEVICE__ float sin(float __x) { return ::sinf(__x); } __DEVICE__ float sinh(float __x) { return ::sinhf(__x); } __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); } @@ -289,7 +290,7 @@ __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow); __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder); __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint); -__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round); +__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin) __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh) >From ff13ec335dace51851cb803b13180505deb52c4e Mon Sep 17 00:00:00 2001 From: fenodem <[email protected]> Date: Fri, 20 Mar 2026 12:26:24 +0000 Subject: [PATCH 4/4] Update __clang_cuda_runtime_wrapper.h --- .../Headers/__clang_cuda_runtime_wrapper.h | 65 ++++++++++++++++++- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h index 295f4191f9927..ee1313092d4c6 100644 --- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h @@ -39,12 +39,45 @@ // Include some standard headers to avoid CUDA headers including them // while some required macros (like __THROW) are in a weird state. #include <climits> + +// --------------------------------------------------------------------------- +// MinGW (GCC) Compatibility Fix +// --------------------------------------------------------------------------- +// MinGW's math.h declares internal names like __isnanf, __signbitf as +// __host__ functions. This conflicts with our __host__ __device__ definitions. +// We rename them out of the way before including <cmath>. +// --------------------------------------------------------------------------- +#define __isnanf __mingw_hidden_isnanf +#define __isinf __mingw_hidden_isinf +#define __isinff __mingw_hidden_isinff +#define __finite __mingw_hidden_finite +#define __finitef __mingw_hidden_finitef +#define __signbit __mingw_hidden_signbit +#define __signbitf __mingw_hidden_signbitf +#define __isnanl __mingw_hidden_isnanl +#define __isinfl __mingw_hidden_isinfl +#define __finitel __mingw_hidden_finitel +#define __signbitl __mingw_hidden_signbitl + #include <cmath> #include <cstdlib> #include <stdlib.h> #include <string.h> #undef __CUDACC__ +// Restore the names so we can use them for our own definitions. +#undef __isnanf +#undef __isinf +#undef __isinff +#undef __finite +#undef __finitef +#undef __signbit +#undef __signbitf +#undef __isnanl +#undef __isinfl +#undef __finitel +#undef __signbitl + // math_functions.h from CUDA 13.2+ defines _NV_RSQRT_SPECIFIER. // Clang does not include it, so we need to define it ourselves. #if defined(__GNUC__) && defined(__GLIBC_PREREQ) @@ -218,12 +251,31 @@ inline __host__ double __signbitd(double x) { #define __USE_FAST_MATH__ 1 #endif +// --------------------------------------------------------------------------- +// Macro Poisoning - Universal (NOT Platform-Specific) +// --------------------------------------------------------------------------- +// Poison the standard names to prevent ODR violations or incorrect overloads +// from CUDA headers. +// --------------------------------------------------------------------------- +#define isfinite __cuda_disabled_isfinite +#define isinf __cuda_disabled_isinf +#define isnan __cuda_disabled_isnan +#define signbit __cuda_disabled_signbit + #if CUDA_VERSION >= 9000 #include "crt/math_functions.hpp" #else #include "math_functions.hpp" #endif +// --------------------------------------------------------------------------- +// Macro Restoration +// --------------------------------------------------------------------------- +#undef isfinite +#undef isinf +#undef isnan +#undef signbit + #pragma pop_macro("__USE_FAST_MATH__") #if CUDA_VERSION < 9000 @@ -342,7 +394,11 @@ __DEVICE__ unsigned int __isLocal(const void *p) { // conditional on __GNUC__. :) #pragma push_macro("signbit") #pragma push_macro("__GNUC__") -#undef __GNUC__ +#ifndef __GNUC__ +#define __GNUC__ 4 +#define __CLANG_CUDA_DEFINED_GNUC +#endif + #define signbit __ignored_cuda_signbit // CUDA-9 omits device-side definitions of some math functions if it sees @@ -365,6 +421,12 @@ __DEVICE__ unsigned int __isLocal(const void *p) { #endif #pragma pop_macro("_GLIBCXX_MATH_H") #pragma pop_macro("_LIBCPP_VERSION") + +// Restore original __GNUC__ state +#ifdef __CLANG_CUDA_DEFINED_GNUC +#undef __GNUC__ +#undef __CLANG_CUDA_DEFINED_GNUC +#endif #pragma pop_macro("__GNUC__") #pragma pop_macro("signbit") @@ -505,7 +567,6 @@ __device__ inline __cuda_builtin_gridDim_t::operator uint3() const { #include "curand_mtgp32_kernel.h" #pragma pop_macro("dim3") #pragma pop_macro("uint3") -#pragma pop_macro("__USE_FAST_MATH__") #pragma pop_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__") // CUDA runtime uses this undocumented function to access kernel launch _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
