https://github.com/fineg74 updated https://github.com/llvm/llvm-project/pull/198644
>From cfd211713f2e0071e51652c6e899455383550dc7 Mon Sep 17 00:00:00 2001 From: "Fine, Gregory" <[email protected]> Date: Tue, 19 May 2026 13:20:27 -0700 Subject: [PATCH 1/3] Refactor cmath OpenMP wrappers and add support for SPIRV backend. --- clang/lib/Headers/CMakeLists.txt | 1 + .../Headers/openmp_wrappers/__clang_cmath.h | 202 ++++++++++++++++++ clang/lib/Headers/openmp_wrappers/cmath | 94 ++------ .../test/Headers/openmp_device_math_isnan.cpp | 8 +- 4 files changed, 219 insertions(+), 86 deletions(-) create mode 100644 clang/lib/Headers/openmp_wrappers/__clang_cmath.h diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index d60ae2b5961e0..1e74ddce1a764 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -385,6 +385,7 @@ set(openmp_wrapper_files openmp_wrappers/__clang_openmp_device_functions.h openmp_wrappers/complex_cmath.h openmp_wrappers/new + openmp_wrappers/__clang_cmath.h ) set(llvm_offload_wrapper_files diff --git a/clang/lib/Headers/openmp_wrappers/__clang_cmath.h b/clang/lib/Headers/openmp_wrappers/__clang_cmath.h new file mode 100644 index 0000000000000..8d549f121033c --- /dev/null +++ b/clang/lib/Headers/openmp_wrappers/__clang_cmath.h @@ -0,0 +1,202 @@ +/*===---- __clang_spirv_cmath.h - SPIRV cmath decls -----------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_CMATH_H__ +#define __CLANG_CMATH_H__ + +#if !defined(__OPENMP_SPIRV__) && !defined(__OPENMP_AMDGCN__) && \ + !defined(__OPENMP_NVPTX__) +#error "This file is for SPIRV/HIP/CUDA OpenMP device compilation only." +#endif +#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) + +__DEVICE__ float sin(float __x) { return ::sinf(__x); } +__DEVICE__ float sinh(float __x) { return ::sinhf(__x); } +__DEVICE__ float cos(float __x) { return ::cosf(__x); } +__DEVICE__ float cosh(float __x) { return ::coshf(__x); } +__DEVICE__ double abs(double __x) { return ::fabs(__x); } +__DEVICE__ float abs(float __x) { return ::fabsf(__x); } +__DEVICE__ long long abs(long long __n) { return ::llabs(__n); } +__DEVICE__ long abs(long __n) { return ::labs(__n); } +__DEVICE__ float fma(float __x, float __y, float __z) { + return ::fmaf(__x, __y, __z); +} +__DEVICE__ int fpclassify(float __x) { + return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, + FP_ZERO, __x); +} +__DEVICE__ int fpclassify(double __x) { + return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, + FP_ZERO, __x); +} +__DEVICE__ float frexp(float __arg, int *__exp) { + return ::frexpf(__arg, __exp); +} +__DEVICE__ float acos(float __x) { return ::acosf(__x); } +__DEVICE__ float acosh(float __x) { return ::acoshf(__x); } +__DEVICE__ float asin(float __x) { return ::asinf(__x); } +__DEVICE__ float asinh(float __x) { return ::asinhf(__x); } +__DEVICE__ float atan(float __x) { return ::atanf(__x); } +__DEVICE__ float atanh(float __x) { return ::atanhf(__x); } +__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); } +__DEVICE__ float ceil(float __x) { return ::ceilf(__x); } +__DEVICE__ float exp(float __x) { return ::expf(__x); } +__DEVICE__ float exp2(float __x) { return ::exp2f(__x); } +__DEVICE__ float expm1(float __x) { return ::expm1f(__x); } +__DEVICE__ float fabs(float __x) { return ::fabsf(__x); } +__DEVICE__ float floor(float __x) { return ::floorf(__x); } +__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } +__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); } + +// For OpenMP we work around some old system headers that have non-conforming +// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do +// this by providing two versions of these functions, differing only in the +// return type. To avoid conflicting definitions we disable implicit base +// function generation. That means we will end up with two specializations, one +// per type, but only one has a base function defined by the system header. +#pragma omp begin declare variant match( \ + implementation = {extension(disable_implicit_base)}) + +// FIXME: We lack an extension to customize the mangling of the variants, e.g., +// add a suffix. This means we would clash with the names of the variants +// (note that we do not create implicit base functions here). To avoid +// this clash we add a new trait to some of them that is always true +// (this is LLVM after all ;)). It will only influence the mangled name +// of the variants inside the inner region and avoid the clash. +#pragma omp begin declare variant match(implementation = {vendor(llvm)}) + +__DEVICE__ int isinf(float __x) { return ::__isinff(__x); } +__DEVICE__ int isinf(double __x) { return ::__isinf(__x); } +__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); } +__DEVICE__ int isfinite(double __x) { return ::__finite(__x); } +__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); } +__DEVICE__ int isnan(double __x) { return ::__isnan(__x); } + +#pragma omp end declare variant + +__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } +__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } +__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } +__DEVICE__ bool isfinite(double __x) { return ::__finite(__x); } +__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } +__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } + +#pragma omp end declare variant + +__DEVICE__ bool isgreater(float __x, float __y) { + return __builtin_isgreater(__x, __y); +} +__DEVICE__ bool isgreater(double __x, double __y) { + return __builtin_isgreater(__x, __y); +} +__DEVICE__ bool isgreaterequal(float __x, float __y) { + return __builtin_isgreaterequal(__x, __y); +} +__DEVICE__ bool isgreaterequal(double __x, double __y) { + return __builtin_isgreaterequal(__x, __y); +} +__DEVICE__ bool isless(float __x, float __y) { + return __builtin_isless(__x, __y); +} +__DEVICE__ bool isless(double __x, double __y) { + return __builtin_isless(__x, __y); +} +__DEVICE__ bool islessequal(float __x, float __y) { + return __builtin_islessequal(__x, __y); +} +__DEVICE__ bool islessequal(double __x, double __y) { + return __builtin_islessequal(__x, __y); +} +__DEVICE__ bool islessgreater(float __x, float __y) { + return __builtin_islessgreater(__x, __y); +} +__DEVICE__ bool islessgreater(double __x, double __y) { + return __builtin_islessgreater(__x, __y); +} +__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } +__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } +__DEVICE__ bool isunordered(float __x, float __y) { + return __builtin_isunordered(__x, __y); +} +__DEVICE__ bool isunordered(double __x, double __y) { + return __builtin_isunordered(__x, __y); +} +__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } +__DEVICE__ float pow(float __base, int __iexp) { + return ::powif(__base, __iexp); +} +__DEVICE__ double pow(double __base, int __iexp) { + return ::powi(__base, __iexp); +} +__DEVICE__ float remquo(float __x, float __y, int *__quo) { + return ::remquof(__x, __y, __quo); +} +__DEVICE__ float scalbln(float __x, long int __n) { + return ::scalblnf(__x, __n); +} +__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); } +__DEVICE__ bool signbit(double __x) { +#if defined(__OPENMP_NVPTX__) + return ::__signbitd(__x); +#else + return ::__signbit(__x); +#endif +} +__DEVICE__ float ldexp(float __arg, int __exp) { + return ::ldexpf(__arg, __exp); +} +__DEVICE__ float log(float __x) { return ::logf(__x); } +__DEVICE__ float log10(float __x) { return ::log10f(__x); } +__DEVICE__ float log1p(float __x) { return ::log1pf(__x); } +__DEVICE__ float log2(float __x) { return ::log2f(__x); } +__DEVICE__ float logb(float __x) { return ::logbf(__x); } + +__DEVICE__ float pow(float __base, float __exp) { + return ::powf(__base, __exp); +} +__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); } +__DEVICE__ float tan(float __x) { return ::tanf(__x); } +__DEVICE__ float tanh(float __x) { return ::tanhf(__x); } +__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); } +__DEVICE__ float copysign(float __a, float __b) { + return ::copysignf(__a, __b); +} +__DEVICE__ float erf(float __x) { return ::erff(__x); } +__DEVICE__ float erfc(float __x) { return ::erfcf(__x); } +__DEVICE__ float fdim(float __a, float __b) { return ::fdimf(__a, __b); } +__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); } +__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); } +__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); } +__DEVICE__ long long llrint(float __x) { return ::llrintf(__x); } +__DEVICE__ long long llround(float __x) { return ::llroundf(__x); } +__DEVICE__ long lrint(float __x) { return ::lrintf(__x); } +__DEVICE__ long lround(float __x) { return ::lroundf(__x); } +__DEVICE__ float rint(float __x) { return ::rintf(__x); } +__DEVICE__ float round(float __x) { return ::roundf(__x); } +__DEVICE__ float trunc(float __x) { return ::truncf(__x); } +__DEVICE__ float nearbyint(float __x) { return ::nearbyintf(__x); } +__DEVICE__ float nextafter(float __a, float __b) { + return ::nextafterf(__a, __b); +} +__DEVICE__ float remainder(float __a, float __b) { + return ::remainderf(__a, __b); +} +__DEVICE__ float scalbn(float __a, int __b) { return ::scalbnf(__a, __b); } + +#if defined(__OPENMP_AMDGCN__) +__DEVICE__ _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) { + return __builtin_fmaf16(__x, __y, __z); +} +__DEVICE__ _Float16 pow(_Float16 __base, int __iexp) { + return __ocml_pown_f16(__base, __iexp); +} +#endif + +#undef __DEVICE__ +#endif // __CLANG_CMATH_H__ \ No newline at end of file diff --git a/clang/lib/Headers/openmp_wrappers/cmath b/clang/lib/Headers/openmp_wrappers/cmath index e1b71516e72c2..6df8e574e6528 100644 --- a/clang/lib/Headers/openmp_wrappers/cmath +++ b/clang/lib/Headers/openmp_wrappers/cmath @@ -26,107 +26,37 @@ // We need limits because __clang_cuda_cmath.h below uses `std::numeric_limit`. #include <limits> - +#ifdef __NVPTX__ #pragma omp begin declare variant match( \ device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any, allow_templates)}) #define __CUDA__ #define __OPENMP_NVPTX__ -#include <__clang_cuda_cmath.h> +#include <__clang_cmath.h> #undef __OPENMP_NVPTX__ #undef __CUDA__ -// Overloads not provided by the CUDA wrappers but by the CUDA system headers. -// Since we do not include the latter we define them ourselves. -#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) - -__DEVICE__ float acosh(float __x) { return ::acoshf(__x); } -__DEVICE__ float asinh(float __x) { return ::asinhf(__x); } -__DEVICE__ float atanh(float __x) { return ::atanhf(__x); } -__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); } -__DEVICE__ float erf(float __x) { return ::erff(__x); } -__DEVICE__ float erfc(float __x) { return ::erfcf(__x); } -__DEVICE__ float exp2(float __x) { return ::exp2f(__x); } -__DEVICE__ float expm1(float __x) { return ::expm1f(__x); } -__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); } -__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); } -__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); } -__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); } -__DEVICE__ long long int llrint(float __x) { return ::llrintf(__x); } -__DEVICE__ long long int llround(float __x) { return ::llroundf(__x); } -__DEVICE__ float log1p(float __x) { return ::log1pf(__x); } -__DEVICE__ float log2(float __x) { return ::log2f(__x); } -__DEVICE__ float logb(float __x) { return ::logbf(__x); } -__DEVICE__ long int lrint(float __x) { return ::lrintf(__x); } -__DEVICE__ long int lround(float __x) { return ::lroundf(__x); } -__DEVICE__ float nextafter(float __x, float __y) { - return ::nextafterf(__x, __y); -} -__DEVICE__ float remainder(float __x, float __y) { - return ::remainderf(__x, __y); -} -__DEVICE__ float scalbln(float __x, long int __y) { - return ::scalblnf(__x, __y); -} -__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); } -__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); } - -#undef __DEVICE__ - #pragma omp end declare variant +#endif // __NVPTX__ #ifdef __AMDGCN__ #pragma omp begin declare variant match(device = {arch(amdgcn)}) -#pragma push_macro("__constant__") -#define __constant__ __attribute__((constant)) #define __OPENMP_AMDGCN__ - -#include <__clang_hip_cmath.h> - -#pragma pop_macro("__constant__") +#include <__clang_cmath.h> #undef __OPENMP_AMDGCN__ -// Define overloads otherwise which are absent -#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) +#pragma omp end declare variant +#endif // __AMDGCN__ -__DEVICE__ float acos(float __x) { return ::acosf(__x); } -__DEVICE__ float acosh(float __x) { return ::acoshf(__x); } -__DEVICE__ float asin(float __x) { return ::asinf(__x); } -__DEVICE__ float asinh(float __x) { return ::asinhf(__x); } -__DEVICE__ float atan(float __x) { return ::atanf(__x); } -__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); } -__DEVICE__ float atanh(float __x) { return ::atanhf(__x); } -__DEVICE__ float cbrt(float __x) { return ::cbrtf(__x); } -__DEVICE__ float cosh(float __x) { return ::coshf(__x); } -__DEVICE__ float erf(float __x) { return ::erff(__x); } -__DEVICE__ float erfc(float __x) { return ::erfcf(__x); } -__DEVICE__ float exp2(float __x) { return ::exp2f(__x); } -__DEVICE__ float expm1(float __x) { return ::expm1f(__x); } -__DEVICE__ float fdim(float __x, float __y) { return ::fdimf(__x, __y); } -__DEVICE__ float hypot(float __x, float __y) { return ::hypotf(__x, __y); } -__DEVICE__ int ilogb(float __x) { return ::ilogbf(__x); } -__DEVICE__ float ldexp(float __arg, int __exp) { - return ::ldexpf(__arg, __exp); -} -__DEVICE__ float lgamma(float __x) { return ::lgammaf(__x); } -__DEVICE__ float log1p(float __x) { return ::log1pf(__x); } -__DEVICE__ float logb(float __x) { return ::logbf(__x); } -__DEVICE__ float nextafter(float __x, float __y) { - return ::nextafterf(__x, __y); -} -__DEVICE__ float remainder(float __x, float __y) { - return ::remainderf(__x, __y); -} -__DEVICE__ float scalbn(float __x, int __y) { return ::scalbnf(__x, __y); } -__DEVICE__ float sinh(float __x) { return ::sinhf(__x); } -__DEVICE__ float tan(float __x) { return ::tanf(__x); } -__DEVICE__ float tanh(float __x) { return ::tanhf(__x); } -__DEVICE__ float tgamma(float __x) { return ::tgammaf(__x); } +#ifdef __SPIRV__ +#pragma omp begin declare variant match(device = {arch(spirv64)}) -#undef __DEVICE__ +#define __OPENMP_SPIRV__ +#include <__clang_cmath.h> +#undef __OPENMP_SPIRV__ #pragma omp end declare variant -#endif // __AMDGCN__ +#endif // __SPIRV__ #endif diff --git a/clang/test/Headers/openmp_device_math_isnan.cpp b/clang/test/Headers/openmp_device_math_isnan.cpp index 3fd98813f2480..d67a71f887f8a 100644 --- a/clang/test/Headers/openmp_device_math_isnan.cpp +++ b/clang/test/Headers/openmp_device_math_isnan.cpp @@ -30,18 +30,18 @@ double math(float f, double d) { // INT_RETURN: call noundef i32 @__nv_isnanf(float // AMD_INT_RETURN_SAFE: call i1 @llvm.is.fpclass.f32(float{{.*}}, i32 3) // AMD_INT_RETURN_FAST: sitofp i32 {{.*}} to double - // SPIRV_INT_RETURN: call spir_func noundef i32 @_Z5isnanf(float + // SPIRV_INT_RETURN: call spir_func zeroext i1 @_Z13__spirv_IsNanf(float // BOOL_RETURN: call noundef i32 @__nv_isnanf(float - // SPIRV_BOOL_RETURN: call spir_func noundef zeroext i1 @_Z5isnanf(float + // SPIRV_BOOL_RETURN: call spir_func zeroext i1 @_Z13__spirv_IsNanf(float // AMD_BOOL_RETURN_SAFE: call i1 @llvm.is.fpclass.f32(float{{.*}}, i32 3) // AMD_BOOL_RETURN_FAST: icmp ne i32 {{.*}}, 0 r += std::isnan(f); // INT_RETURN: call noundef i32 @__nv_isnand(double - // SPIRV_INT_RETURN: call spir_func noundef i32 @_Z5isnand(double + // SPIRV_INT_RETURN: call spir_func zeroext i1 @_Z13__spirv_IsNand(double // AMD_INT_RETURN_SAFE: call i1 @llvm.is.fpclass.f64(double{{.*}}, i32 3) // AMD_INT_RETURN_FAST: sitofp i32 {{.*}} to double // BOOL_RETURN: call noundef i32 @__nv_isnand(double - // SPIRV_BOOL_RETURN: call spir_func noundef zeroext i1 @_Z5isnand(double + // SPIRV_BOOL_RETURN: call spir_func zeroext i1 @_Z13__spirv_IsNand(double // AMD_BOOL_RETURN_SAFE: call i1 @llvm.is.fpclass.f64(double{{.*}}, i32 3) // AMD_BOOL_RETURN_FAST: icmp ne i32 {{.*}}, 0 r += std::isnan(d); >From 76917dfcf956114040a3ac037f233b3468cbd7f5 Mon Sep 17 00:00:00 2001 From: "Fine, Gregory" <[email protected]> Date: Wed, 3 Jun 2026 20:11:17 -0700 Subject: [PATCH 2/3] Fix incorrect header description --- clang/lib/Headers/openmp_wrappers/__clang_cmath.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Headers/openmp_wrappers/__clang_cmath.h b/clang/lib/Headers/openmp_wrappers/__clang_cmath.h index 8d549f121033c..9580b3749570a 100644 --- a/clang/lib/Headers/openmp_wrappers/__clang_cmath.h +++ b/clang/lib/Headers/openmp_wrappers/__clang_cmath.h @@ -1,4 +1,4 @@ -/*===---- __clang_spirv_cmath.h - SPIRV cmath decls -----------------------=== +/*===---- __clang_cmath.h - cmath decls ------------------------------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. >From 64eed1b1f901ae3a315c075b6c2576d83435688c Mon Sep 17 00:00:00 2001 From: "Fine, Gregory" <[email protected]> Date: Thu, 4 Jun 2026 09:35:28 -0700 Subject: [PATCH 3/3] Fix incorrect merge --- clang/test/Headers/openmp_device_math_isnan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Headers/openmp_device_math_isnan.cpp b/clang/test/Headers/openmp_device_math_isnan.cpp index 5973d513f1910..f392ad1f324f8 100644 --- a/clang/test/Headers/openmp_device_math_isnan.cpp +++ b/clang/test/Headers/openmp_device_math_isnan.cpp @@ -30,7 +30,7 @@ double math(float f, double d) { // INT_RETURN: call noundef i32 @__nv_isnanf(float // AMD_INT_RETURN_SAFE: call i1 @llvm.is.fpclass.f32(float{{.*}}, i32 3) // AMD_INT_RETURN_FAST: sitofp fast i32 {{.*}} to double - // SPIRV_INT_RETURN: call spir_func noundef i32 @_Z5isnanf(float + // SPIRV_INT_RETURN: call spir_func zeroext i1 @_Z13__spirv_IsNanf(float // BOOL_RETURN: call noundef i32 @__nv_isnanf(float // SPIRV_BOOL_RETURN: call spir_func zeroext i1 @_Z13__spirv_IsNanf(float // AMD_BOOL_RETURN_SAFE: call i1 @llvm.is.fpclass.f32(float{{.*}}, i32 3) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
