Author: Joseph Huber Date: 2026-06-14T20:32:57-05:00 New Revision: 8b6551e49395be0dc878e7acff1edd31f4204143
URL: https://github.com/llvm/llvm-project/commit/8b6551e49395be0dc878e7acff1edd31f4204143 DIFF: https://github.com/llvm/llvm-project/commit/8b6551e49395be0dc878e7acff1edd31f4204143.diff LOG: [libclc] Use FMA for the pi reconstruction in acos / atan (#203804) Summary: This should recombine the split constant for this case. The performance should be negligible for such large math functions, we get an extra add, but in exchange the results should improve 1 ULP. This was primarily done to match what AMD's math libraries do, with this change we are byte-for-byte identical in output. Added: Modified: libclc/clc/lib/generic/math/clc_acos.inc libclc/clc/lib/generic/math/clc_atan.inc Removed: ################################################################################ diff --git a/libclc/clc/lib/generic/math/clc_acos.inc b/libclc/clc/lib/generic/math/clc_acos.inc index 32e007a542799..01feed0f35636 100644 --- a/libclc/clc/lib/generic/math/clc_acos.inc +++ b/libclc/clc/lib/generic/math/clc_acos.inc @@ -75,7 +75,7 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_acos(__CLC_GENTYPE x) { static _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_acos_identity_reduction( __CLC_GENTYPE x, __CLC_GENTYPE r, __CLC_GENTYPE u, __CLC_GENTYPE z) { __CLC_EP_PAIR s = __clc_ep_sqrt(r); - __CLC_GENTYPE zm = __clc_mad(0x1.dd9ad336a0500p+0, 0x1.af154eeb562d6p+0, + __CLC_GENTYPE zm = __clc_fma(0x1.dd9ad336a0500p+0, 0x1.af154eeb562d6p+0, -2.0 * __clc_mad(s.hi, u, s.hi)); __CLC_GENTYPE zp = 2.0 * (s.hi + __clc_mad(s.hi, u, s.lo)); z = x < 0.0 ? zm : zp; @@ -114,7 +114,7 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_acos(__CLC_GENTYPE x) { 0x1.8ed60a300c8d2p-7), 0x1.c6fa84b77012bp-7), 0x1.1c6c111dccb70p-6), 0x1.6e89f0a0adacfp-6), 0x1.f1c72c668963fp-6), 0x1.6db6db41ce4bdp-5), 0x1.333333336fd5bp-4), 0x1.5555555555380p-3); - __CLC_GENTYPE z = __clc_mad(0x1.dd9ad336a0500p-1, 0x1.af154eeb562d6p+0, + __CLC_GENTYPE z = __clc_fma(0x1.dd9ad336a0500p-1, 0x1.af154eeb562d6p+0, -__clc_mad(x, u, x)); #ifdef __CLC_SCALAR @@ -156,9 +156,9 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_acos(__CLC_GENTYPE x) { __CLC_GENTYPE s = __clc_sqrt_fast(r); __CLC_GENTYPE ztp = 2.0h * __clc_mad(s, u, s); - __CLC_GENTYPE ztn = __clc_mad(0x1.ea8p+0h, 0x1.a3cp+0h, -ztp); + __CLC_GENTYPE ztn = __clc_fma(0x1.ea8p+0h, 0x1.a3cp+0h, -ztp); __CLC_GENTYPE zt = x < 0.0h ? ztn : ztp; - __CLC_GENTYPE z = __clc_mad(0x1.ea8p-1h, 0x1.a3cp+0h, -__clc_mad(x, u, x)); + __CLC_GENTYPE z = __clc_fma(0x1.ea8p-1h, 0x1.a3cp+0h, -__clc_mad(x, u, x)); z = ax > 0.5h ? zt : z; return z; diff --git a/libclc/clc/lib/generic/math/clc_atan.inc b/libclc/clc/lib/generic/math/clc_atan.inc index 83d849cc54590..8dae127af7d28 100644 --- a/libclc/clc/lib/generic/math/clc_atan.inc +++ b/libclc/clc/lib/generic/math/clc_atan.inc @@ -49,7 +49,7 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_HALFN __clc_atan(__CLC_HALFN x) { __CLC_HALFN a = __clc_atan_reduced(v); - __CLC_HALFN y = __clc_mad(0x1.ea8p-1h, 0x1.a3cp+0h, -a); + __CLC_HALFN y = __clc_fma(0x1.ea8p-1h, 0x1.a3cp+0h, -a); a = g ? y : a; return __clc_copysign(a, x); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
