https://github.com/fineg74 updated https://github.com/llvm/llvm-project/pull/179846
>From 05956a95c3ecb6408654fb26957e61697c61ea33 Mon Sep 17 00:00:00 2001 From: "Fine, Gregory" <[email protected]> Date: Wed, 4 Feb 2026 18:11:08 -0800 Subject: [PATCH 1/2] Add headers to support complex math for sprv --- clang/lib/Headers/CMakeLists.txt | 1 + .../Headers/__clang_spirv_complex_builtins.h | 267 ++++++++++++++++++ clang/lib/Headers/openmp_wrappers/complex | 8 +- clang/lib/Headers/openmp_wrappers/complex.h | 6 + 4 files changed, 281 insertions(+), 1 deletion(-) create mode 100644 clang/lib/Headers/__clang_spirv_complex_builtins.h diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index c92b370b88d2d..9e1c3988b4b83 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -141,6 +141,7 @@ set(riscv_files set(spirv_files __clang_spirv_builtins.h + __clang_spirv_complex_builtins.h ) set(systemz_files diff --git a/clang/lib/Headers/__clang_spirv_complex_builtins.h b/clang/lib/Headers/__clang_spirv_complex_builtins.h new file mode 100644 index 0000000000000..fd7a492b5744d --- /dev/null +++ b/clang/lib/Headers/__clang_spirv_complex_builtins.h @@ -0,0 +1,267 @@ +/*===-- __clang_spirv_complex_builtins - SPIRV impls of runtime complex fns ---=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_SPIRV_COMPLEX_BUILTINS +#define __CLANG_SPIRV_COMPLEX_BUILTINS + +// This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are +// libgcc functions that clang assumes are available when compiling c99 complex +// operations. (These implementations come from libc++, and have been modified +// to work with SPIRV and OpenMP target offloading [in C and C++ mode].) + +#pragma push_macro("__DEVICE__") +#if defined(__OPENMP_SPIRV__) +#include <__clang_spirv_libdevice_declares.h> +#pragma omp declare target +#define __DEVICE__ __attribute__((noinline, nothrow, cold, weak)) +#else +#define __DEVICE__ __device__ inline +#endif + +// To make the algorithms available for C and C++ in SPIRV and OpenMP we select +// different but equivalent function versions. TODO: For OpenMP we currently +// select the native builtins as the overload support for templates is lacking. +#if !defined(__OPENMP_SPIRV__) +#define _ISNANd std::isnan +#define _ISNANf std::isnan +#define _ISINFd std::isinf +#define _ISINFf std::isinf +#define _ISFINITEd std::isfinite +#define _ISFINITEf std::isfinite +#define _COPYSIGNd std::copysign +#define _COPYSIGNf std::copysign +#define _SCALBNd std::scalbn +#define _SCALBNf std::scalbn +#define _ABSd std::abs +#define _ABSf std::abs +#define _LOGBd std::logb +#define _LOGBf std::logb +// Rather than pulling in std::max from algorithm everytime, use available ::max. +#define _fmaxd max +#define _fmaxf max +#else +#define _ISNANd __spirv_IsNan +#define _ISNANf __spirv_IsNan +#define _ISINFd __spirv_IsInf +#define _ISINFf __spirv_IsInf +#define _ISFINITEd __spirv_IsFinite +#define _ISFINITEf __spirv_IsFinite +#define _COPYSIGNd __spirv_ocl_copysign +#define _COPYSIGNf __spirv_ocl_copysign +#define _SCALBNd __spirv_ocl_ldexp +#define _SCALBNf __spirv_ocl_ldexp +#define _ABSd __spirv_ocl_fabs +#define _ABSf __spirv_ocl_fabs +#define _LOGBd __spirv_ocl_logb +#define _LOGBf __spirv_ocl_logb +#define _fmaxd __spirv_ocl_fmax +#define _fmaxf __spirv_ocl_fmax +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +__DEVICE__ double _Complex __muldc3(double __a, double __b, double __c, + double __d) { + double __ac = __a * __c; + double __bd = __b * __d; + double __ad = __a * __d; + double __bc = __b * __c; + double _Complex z; + __real__(z) = __ac - __bd; + __imag__(z) = __ad + __bc; + if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { + int __recalc = 0; + if (_ISINFd(__a) || _ISINFd(__b)) { + __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a); + __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b); + if (_ISNANd(__c)) + __c = _COPYSIGNd(0, __c); + if (_ISNANd(__d)) + __d = _COPYSIGNd(0, __d); + __recalc = 1; + } + if (_ISINFd(__c) || _ISINFd(__d)) { + __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c); + __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d); + if (_ISNANd(__a)) + __a = _COPYSIGNd(0, __a); + if (_ISNANd(__b)) + __b = _COPYSIGNd(0, __b); + __recalc = 1; + } + if (!__recalc && + (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) { + if (_ISNANd(__a)) + __a = _COPYSIGNd(0, __a); + if (_ISNANd(__b)) + __b = _COPYSIGNd(0, __b); + if (_ISNANd(__c)) + __c = _COPYSIGNd(0, __c); + if (_ISNANd(__d)) + __d = _COPYSIGNd(0, __d); + __recalc = 1; + } + if (__recalc) { + // Can't use std::numeric_limits<double>::infinity() -- that doesn't have + // a device overload (and isn't constexpr before C++11, naturally). + __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d); + __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c); + } + } + return z; +} + +__DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) { + float __ac = __a * __c; + float __bd = __b * __d; + float __ad = __a * __d; + float __bc = __b * __c; + float _Complex z; + __real__(z) = __ac - __bd; + __imag__(z) = __ad + __bc; + if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { + int __recalc = 0; + if (_ISINFf(__a) || _ISINFf(__b)) { + __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); + __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); + if (_ISNANf(__c)) + __c = _COPYSIGNf(0, __c); + if (_ISNANf(__d)) + __d = _COPYSIGNf(0, __d); + __recalc = 1; + } + if (_ISINFf(__c) || _ISINFf(__d)) { + __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); + __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); + if (_ISNANf(__a)) + __a = _COPYSIGNf(0, __a); + if (_ISNANf(__b)) + __b = _COPYSIGNf(0, __b); + __recalc = 1; + } + if (!__recalc && + (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) { + if (_ISNANf(__a)) + __a = _COPYSIGNf(0, __a); + if (_ISNANf(__b)) + __b = _COPYSIGNf(0, __b); + if (_ISNANf(__c)) + __c = _COPYSIGNf(0, __c); + if (_ISNANf(__d)) + __d = _COPYSIGNf(0, __d); + __recalc = 1; + } + if (__recalc) { + __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d); + __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c); + } + } + return z; +} + +__DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, + double __d) { + int __ilogbw = 0; + // Can't use std::max, because that's defined in <algorithm>, and we don't + // want to pull that in for every compile. The CUDA headers define + // ::max(float, float) and ::max(double, double), which is sufficient for us. + double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d))); + if (_ISFINITEd(__logbw)) { + __ilogbw = (int)__logbw; + __c = _SCALBNd(__c, -__ilogbw); + __d = _SCALBNd(__d, -__ilogbw); + } + double __denom = __c * __c + __d * __d; + double _Complex z; + __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw); + __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw); + if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { + if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) { + __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a; + __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b; + } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) && + _ISFINITEd(__d)) { + __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a); + __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b); + __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d); + __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d); + } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) && + _ISFINITEd(__b)) { + __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c); + __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d); + __real__(z) = 0.0 * (__a * __c + __b * __d); + __imag__(z) = 0.0 * (__b * __c - __a * __d); + } + } + return z; +} + +__DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { + int __ilogbw = 0; + float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d))); + if (_ISFINITEf(__logbw)) { + __ilogbw = (int)__logbw; + __c = _SCALBNf(__c, -__ilogbw); + __d = _SCALBNf(__d, -__ilogbw); + } + float __denom = __c * __c + __d * __d; + float _Complex z; + __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw); + __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw); + if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { + if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) { + __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a; + __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b; + } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) && + _ISFINITEf(__d)) { + __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); + __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); + __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d); + __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d); + } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) && + _ISFINITEf(__b)) { + __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); + __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); + __real__(z) = 0 * (__a * __c + __b * __d); + __imag__(z) = 0 * (__b * __c - __a * __d); + } + } + return z; +} + +#if defined(__cplusplus) +} // extern "C" +#endif + +#undef _ISNANd +#undef _ISNANf +#undef _ISINFd +#undef _ISINFf +#undef _COPYSIGNd +#undef _COPYSIGNf +#undef _ISFINITEd +#undef _ISFINITEf +#undef _SCALBNd +#undef _SCALBNf +#undef _ABSd +#undef _ABSf +#undef _LOGBd +#undef _LOGBf +#undef _fmaxd +#undef _fmaxf + +#if defined(__OPENMP_SPIRV__) +#pragma omp end declare target +#endif + +#pragma pop_macro("__DEVICE__") + +#endif // __CLANG_SPIRV_COMPLEX_BUILTINS \ No newline at end of file diff --git a/clang/lib/Headers/openmp_wrappers/complex b/clang/lib/Headers/openmp_wrappers/complex index 1ceecc1af8aec..b23c4b6d7d2be 100644 --- a/clang/lib/Headers/openmp_wrappers/complex +++ b/clang/lib/Headers/openmp_wrappers/complex @@ -29,6 +29,12 @@ #undef __OPENMP_AMDGCN__ #endif // __AMDGCN__ +#ifdef __SPIRV__ +#define __OPENMP_SPIRV__ +#include <__clang_spirv_complex_builtins.h> +#undef __OPENMP_SPIRV__ +#endif // __SPIRV__ + #endif // Grab the host header too. @@ -45,7 +51,7 @@ #ifndef _LIBCPP_STD_VER #pragma omp begin declare variant match( \ - device = {arch(amdgcn, nvptx, nvptx64)}, \ + device = {arch(amdgcn, nvptx, nvptx64, spirv64)}, \ implementation = {extension(match_any, allow_templates)}) #include <complex_cmath.h> diff --git a/clang/lib/Headers/openmp_wrappers/complex.h b/clang/lib/Headers/openmp_wrappers/complex.h index 7e7c0866426bc..81501428b0f90 100644 --- a/clang/lib/Headers/openmp_wrappers/complex.h +++ b/clang/lib/Headers/openmp_wrappers/complex.h @@ -29,6 +29,12 @@ #undef __OPENMP_AMDGCN__ #endif +#ifdef __SPIRV__ +#define __OPENMP_SPIRV__ +#include <__clang_spirv_complex_builtins.h> +#undef __OPENMP_SPIRV__ +#endif // __SPIRV__ + #endif // Grab the host header too. >From 1c95f420df1e797085ba9b7b611d7eed3a2dde46 Mon Sep 17 00:00:00 2001 From: "Fine, Gregory" <[email protected]> Date: Wed, 4 Feb 2026 18:40:22 -0800 Subject: [PATCH 2/2] Fix formatting issues --- clang/lib/Headers/__clang_spirv_complex_builtins.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/lib/Headers/__clang_spirv_complex_builtins.h b/clang/lib/Headers/__clang_spirv_complex_builtins.h index fd7a492b5744d..43ad92fd5972b 100644 --- a/clang/lib/Headers/__clang_spirv_complex_builtins.h +++ b/clang/lib/Headers/__clang_spirv_complex_builtins.h @@ -1,4 +1,4 @@ -/*===-- __clang_spirv_complex_builtins - SPIRV impls of runtime complex fns ---=== +/*==-- __clang_spirv_complex_builtins - SPIRV impls of runtime complex fns --== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. @@ -17,7 +17,6 @@ #pragma push_macro("__DEVICE__") #if defined(__OPENMP_SPIRV__) -#include <__clang_spirv_libdevice_declares.h> #pragma omp declare target #define __DEVICE__ __attribute__((noinline, nothrow, cold, weak)) #else @@ -42,7 +41,6 @@ #define _ABSf std::abs #define _LOGBd std::logb #define _LOGBf std::logb -// Rather than pulling in std::max from algorithm everytime, use available ::max. #define _fmaxd max #define _fmaxf max #else _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
