https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/203804
Summary: This should recombine the split constant for this case. The performance should be negligible for such large math functions, we get an extra add, but in exchange the results should improve 1 ULP. This was primarily done to match what AMD's math libraries do, with this change we are byte-for-byte identical in output. >From f711108fa03d09f35cc51ccd061c29d9e3d62d31 Mon Sep 17 00:00:00 2001 From: Joseph Huber <[email protected]> Date: Sun, 14 Jun 2026 17:39:48 -0500 Subject: [PATCH] [libclc] Use FMA for the pi reconstruction in acos / atan Summary: This should recombine the split constant for this case. The performance should be negligible for such large math functions, we get an extra add, but in exchange the results should improve 1 ULP. This was primarily done to match what AMD's math libraries do, with this change we are byte-for-byte identical in output. --- libclc/clc/lib/generic/math/clc_acos.inc | 8 ++++---- libclc/clc/lib/generic/math/clc_atan.inc | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libclc/clc/lib/generic/math/clc_acos.inc b/libclc/clc/lib/generic/math/clc_acos.inc index 32e007a542799..01feed0f35636 100644 --- a/libclc/clc/lib/generic/math/clc_acos.inc +++ b/libclc/clc/lib/generic/math/clc_acos.inc @@ -75,7 +75,7 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_acos(__CLC_GENTYPE x) { static _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_acos_identity_reduction( __CLC_GENTYPE x, __CLC_GENTYPE r, __CLC_GENTYPE u, __CLC_GENTYPE z) { __CLC_EP_PAIR s = __clc_ep_sqrt(r); - __CLC_GENTYPE zm = __clc_mad(0x1.dd9ad336a0500p+0, 0x1.af154eeb562d6p+0, + __CLC_GENTYPE zm = __clc_fma(0x1.dd9ad336a0500p+0, 0x1.af154eeb562d6p+0, -2.0 * __clc_mad(s.hi, u, s.hi)); __CLC_GENTYPE zp = 2.0 * (s.hi + __clc_mad(s.hi, u, s.lo)); z = x < 0.0 ? zm : zp; @@ -114,7 +114,7 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_acos(__CLC_GENTYPE x) { 0x1.8ed60a300c8d2p-7), 0x1.c6fa84b77012bp-7), 0x1.1c6c111dccb70p-6), 0x1.6e89f0a0adacfp-6), 0x1.f1c72c668963fp-6), 0x1.6db6db41ce4bdp-5), 0x1.333333336fd5bp-4), 0x1.5555555555380p-3); - __CLC_GENTYPE z = __clc_mad(0x1.dd9ad336a0500p-1, 0x1.af154eeb562d6p+0, + __CLC_GENTYPE z = __clc_fma(0x1.dd9ad336a0500p-1, 0x1.af154eeb562d6p+0, -__clc_mad(x, u, x)); #ifdef __CLC_SCALAR @@ -156,9 +156,9 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_acos(__CLC_GENTYPE x) { __CLC_GENTYPE s = __clc_sqrt_fast(r); __CLC_GENTYPE ztp = 2.0h * __clc_mad(s, u, s); - __CLC_GENTYPE ztn = __clc_mad(0x1.ea8p+0h, 0x1.a3cp+0h, -ztp); + __CLC_GENTYPE ztn = __clc_fma(0x1.ea8p+0h, 0x1.a3cp+0h, -ztp); __CLC_GENTYPE zt = x < 0.0h ? ztn : ztp; - __CLC_GENTYPE z = __clc_mad(0x1.ea8p-1h, 0x1.a3cp+0h, -__clc_mad(x, u, x)); + __CLC_GENTYPE z = __clc_fma(0x1.ea8p-1h, 0x1.a3cp+0h, -__clc_mad(x, u, x)); z = ax > 0.5h ? zt : z; return z; diff --git a/libclc/clc/lib/generic/math/clc_atan.inc b/libclc/clc/lib/generic/math/clc_atan.inc index 83d849cc54590..8dae127af7d28 100644 --- a/libclc/clc/lib/generic/math/clc_atan.inc +++ b/libclc/clc/lib/generic/math/clc_atan.inc @@ -49,7 +49,7 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_HALFN __clc_atan(__CLC_HALFN x) { __CLC_HALFN a = __clc_atan_reduced(v); - __CLC_HALFN y = __clc_mad(0x1.ea8p-1h, 0x1.a3cp+0h, -a); + __CLC_HALFN y = __clc_fma(0x1.ea8p-1h, 0x1.a3cp+0h, -a); a = g ? y : a; return __clc_copysign(a, x); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
