Author: Matt Arsenault Date: 2026-03-24T12:01:30+01:00 New Revision: f046f4518dc7c707289b2fc8b748d3034cfa4c15
URL: https://github.com/llvm/llvm-project/commit/f046f4518dc7c707289b2fc8b748d3034cfa4c15 DIFF: https://github.com/llvm/llvm-project/commit/f046f4518dc7c707289b2fc8b748d3034cfa4c15.diff LOG: libclc: Update tanh (#188215) This was originally ported from rocm device libs in f51df5ba8c4512dbeb7828ac0c34f89177b551d6. Merge in more recent changes. Added: Modified: libclc/clc/lib/generic/math/clc_tanh.cl libclc/clc/lib/generic/math/clc_tanh.inc Removed: ################################################################################ diff --git a/libclc/clc/lib/generic/math/clc_tanh.cl b/libclc/clc/lib/generic/math/clc_tanh.cl index c0e6f60f6fe4c..cf9d5679c199d 100644 --- a/libclc/clc/lib/generic/math/clc_tanh.cl +++ b/libclc/clc/lib/generic/math/clc_tanh.cl @@ -7,15 +7,14 @@ //===----------------------------------------------------------------------===// #include "clc/clc_convert.h" -#include "clc/internal/clc.h" +#include "clc/math/clc_copysign.h" +#include "clc/math/clc_ep.h" #include "clc/math/clc_exp.h" -#include "clc/math/clc_fma.h" +#include "clc/math/clc_exp2_fast.h" +#include "clc/math/clc_fabs.h" #include "clc/math/clc_mad.h" -#include "clc/math/math.h" -#include "clc/math/tables.h" -#include "clc/relational/clc_isinf.h" -#include "clc/relational/clc_isnan.h" -#include "clc/shared/clc_min.h" +#include "clc/math/clc_recip_fast.h" +#include "clc/math/clc_tanh.h" #define __CLC_BODY "clc_tanh.inc" #include "clc/math/gentype.inc" diff --git a/libclc/clc/lib/generic/math/clc_tanh.inc b/libclc/clc/lib/generic/math/clc_tanh.inc index a25fd58fcbeaf..4daf879001b1d 100644 --- a/libclc/clc/lib/generic/math/clc_tanh.inc +++ b/libclc/clc/lib/generic/math/clc_tanh.inc @@ -8,130 +8,54 @@ #if __CLC_FPSIZE == 32 -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanh(__CLC_GENTYPE x) { - // The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent - // to the following three formulae: - // 1. (exp(x) - exp(-x))/(exp(x) + exp(-x)) - // 2. (1 - (2/(exp(2*x) + 1 ))) - // 3. (exp(2*x) - 1)/(exp(2*x) + 1) - // but computationally, some formulae are better on some ranges. - - const __CLC_GENTYPE large_threshold = 0x1.0a2b24p+3f; - - __CLC_UINTN ux = __CLC_AS_UINTN(x); - __CLC_UINTN aux = ux & EXSIGNBIT_SP32; - __CLC_UINTN xs = ux ^ aux; - - __CLC_GENTYPE y = __CLC_AS_GENTYPE(aux); - __CLC_GENTYPE y2 = y * y; - - __CLC_GENTYPE a1 = __clc_mad( - y2, __clc_mad(y2, 0.4891631088530669873e-4F, -0.14628356048797849e-2F), - -0.28192806108402678e0F); - __CLC_GENTYPE b1 = - __clc_mad(y2, 0.3427017942262751343e0F, 0.845784192581041099e0F); - - __CLC_GENTYPE a2 = __clc_mad( - y2, __clc_mad(y2, 0.3827534993599483396e-4F, -0.12325644183611929e-2F), - -0.24069858695196524e0F); - __CLC_GENTYPE b2 = - __clc_mad(y2, 0.292529068698052819e0F, 0.72209738473684982e0F); - - __CLC_INTN c = y < 0.9f; - __CLC_GENTYPE a = c ? a1 : a2; - __CLC_GENTYPE b = c ? b1 : b2; - __CLC_GENTYPE zlo = __clc_mad(MATH_DIVIDE(a, b), y * y2, y); - - __CLC_GENTYPE p = __clc_exp(2.0f * y) + 1.0f; - __CLC_GENTYPE zhi = 1.0F - MATH_DIVIDE(2.0F, p); - - __CLC_GENTYPE z = y <= 1.0f ? zlo : zhi; - z = __CLC_AS_GENTYPE(xs | __CLC_AS_UINTN(z)); +static _CLC_OVERLOAD _CLC_CONST __CLC_FLOATN __clc_tanh_small(__CLC_FLOATN y) { + __CLC_FLOATN y2 = y * y; + __CLC_FLOATN p = __clc_mad( + y2, + __clc_mad(y2, + __clc_mad(y2, __clc_mad(y2, -0x1.758e7ap-8f, 0x1.521192p-6f), + -0x1.b8389cp-5f), + 0x1.110704p-3f), + -0x1.555532p-2f); + return __clc_mad(y2, y * p, y); +} - // Edge cases - __CLC_GENTYPE sone = __CLC_AS_GENTYPE(0x3f800000U | xs); - z = y > large_threshold ? sone : z; - z = aux < 0x39000000 || aux > 0x7f800000 ? x : z; +static _CLC_OVERLOAD _CLC_CONST __CLC_FLOATN __clc_tanh_large(__CLC_FLOATN y) { + __CLC_FLOATN t = __clc_exp(2.0f * y); + return __clc_mad(-2.0f, __clc_recip_fast(t + 1.0f), 1.0f); +} - return z; +_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_FLOATN __clc_tanh(__CLC_FLOATN x) { + __CLC_FLOATN y = __clc_fabs(x); + __CLC_FLOATN z = y < 0.625f ? __clc_tanh_small(y) : __clc_tanh_large(y); + return __clc_copysign(z, x); } #elif __CLC_FPSIZE == 64 -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanh(__CLC_GENTYPE x) { - // The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent - // to the following three formulae: - // 1. (exp(x) - exp(-x))/(exp(x) + exp(-x)) - // 2. (1 - (2/(exp(2*x) + 1 ))) - // 3. (exp(2*x) - 1)/(exp(2*x) + 1) - // but computationally, some formulae are better on some ranges. - - // The point at which e^-x is insignificant compared to e^x = ln(2^27) - const __CLC_GENTYPE large_threshold = 0x1.2b708872320e2p+4; - - __CLC_ULONGN ux = __CLC_AS_ULONGN(x); - __CLC_ULONGN ax = ux & ~SIGNBIT_DP64; - __CLC_ULONGN sx = ux ^ ax; - __CLC_GENTYPE y = __CLC_AS_GENTYPE(ax); - __CLC_GENTYPE y2 = y * y; - - // y < 0.9 - __CLC_GENTYPE znl = - __clc_fma(y2, - __clc_fma(y2, - __clc_fma(y2, -0.142077926378834722618091e-7, - -0.200047621071909498730453e-3), - -0.176016349003044679402273e-1), - -0.274030424656179760118928e0); - - __CLC_GENTYPE zdl = - __clc_fma(y2, - __clc_fma(y2, - __clc_fma(y2, 0.2091140262529164482568557e-3, - 0.201562166026937652780575e-1), - 0.381641414288328849317962e0), - 0.822091273968539282568011e0); - - // 0.9 <= y <= 1 - __CLC_GENTYPE znm = - __clc_fma(y2, - __clc_fma(y2, - __clc_fma(y2, -0.115475878996143396378318e-7, - -0.165597043903549960486816e-3), - -0.146173047288731678404066e-1), - -0.227793870659088295252442e0); - - __CLC_GENTYPE zdm = - __clc_fma(y2, - __clc_fma(y2, - __clc_fma(y2, 0.173076050126225961768710e-3, - 0.167358775461896562588695e-1), - 0.317204558977294374244770e0), - 0.683381611977295894959554e0); - - __CLC_LONGN c = y < 0.9; - __CLC_GENTYPE zn = c ? znl : znm; - __CLC_GENTYPE zd = c ? zdl : zdm; - __CLC_GENTYPE z = y + y * y2 * MATH_DIVIDE(zn, zd); - - // y > 1 - __CLC_GENTYPE p = __clc_exp(2.0 * y) + 1.0; - __CLC_GENTYPE zg = 1.0 - 2.0 / p; - - z = y > 1.0 ? zg : z; - - // Other cases - z = y < 0x1.0p-28 || ax > PINFBITPATT_DP64 ? x : z; +_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_DOUBLEN __clc_tanh(__CLC_DOUBLEN x) { + __CLC_DOUBLEN y = __clc_fabs(x); + __CLC_EP_PAIR e = __clc_ep_exp_extended(__clc_ep_make_pair(y, 0.0)); + __CLC_EP_PAIR ei = __clc_ep_recip(e); + __CLC_EP_PAIR t = + __clc_ep_fast_div(__clc_ep_fast_sub(e, ei), __clc_ep_fast_add(e, ei)); + __CLC_DOUBLEN z = t.hi; - z = y > large_threshold ? 1.0 : z; + z = y > 19.0625 ? 1.0 : z; + z = y < 0x1.0p-27 ? y : z; - return __CLC_AS_GENTYPE(sx | __CLC_AS_ULONGN(z)); + return __clc_copysign(z, x); } #elif __CLC_FPSIZE == 16 -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanh(__CLC_GENTYPE x) { - return __CLC_CONVERT_GENTYPE(__clc_tanh(__CLC_CONVERT_FLOATN(x))); +_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_HALFN __clc_tanh(__CLC_HALFN hx) { + __CLC_FLOATN x = __CLC_CONVERT_FLOATN(hx) * (__CLC_FLOATN)M_LOG2E; + __CLC_FLOATN a = __clc_exp2_fast(x); + __CLC_FLOATN b = __clc_exp2_fast(-x); + __CLC_HALFN one = __clc_copysign(1.0h, hx); + __CLC_HALFN ret = __CLC_CONVERT_HALFN((a - b) * __clc_recip_fast(a + b)); + return __clc_fabs(hx) > 4.5h ? one : ret; } #endif _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
