Author: Joseph Huber
Date: 2026-06-14T20:32:57-05:00
New Revision: 8b6551e49395be0dc878e7acff1edd31f4204143

URL: 
https://github.com/llvm/llvm-project/commit/8b6551e49395be0dc878e7acff1edd31f4204143
DIFF: 
https://github.com/llvm/llvm-project/commit/8b6551e49395be0dc878e7acff1edd31f4204143.diff

LOG: [libclc] Use FMA for the pi reconstruction in acos / atan (#203804)

Summary:
This should recombine the split constant for this case. The performance
should be negligible for such large math functions, we get an extra add,
but in exchange the results should improve 1 ULP.

This was primarily done to match what AMD's math libraries do, with this
change we are byte-for-byte identical in output.

Added: 
    

Modified: 
    libclc/clc/lib/generic/math/clc_acos.inc
    libclc/clc/lib/generic/math/clc_atan.inc

Removed: 
    


################################################################################
diff  --git a/libclc/clc/lib/generic/math/clc_acos.inc 
b/libclc/clc/lib/generic/math/clc_acos.inc
index 32e007a542799..01feed0f35636 100644
--- a/libclc/clc/lib/generic/math/clc_acos.inc
+++ b/libclc/clc/lib/generic/math/clc_acos.inc
@@ -75,7 +75,7 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE 
__clc_acos(__CLC_GENTYPE x) {
 static _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_acos_identity_reduction(
     __CLC_GENTYPE x, __CLC_GENTYPE r, __CLC_GENTYPE u, __CLC_GENTYPE z) {
   __CLC_EP_PAIR s = __clc_ep_sqrt(r);
-  __CLC_GENTYPE zm = __clc_mad(0x1.dd9ad336a0500p+0, 0x1.af154eeb562d6p+0,
+  __CLC_GENTYPE zm = __clc_fma(0x1.dd9ad336a0500p+0, 0x1.af154eeb562d6p+0,
                                -2.0 * __clc_mad(s.hi, u, s.hi));
   __CLC_GENTYPE zp = 2.0 * (s.hi + __clc_mad(s.hi, u, s.lo));
   z = x < 0.0 ? zm : zp;
@@ -114,7 +114,7 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE 
__clc_acos(__CLC_GENTYPE x) {
                             0x1.8ed60a300c8d2p-7), 0x1.c6fa84b77012bp-7), 
0x1.1c6c111dccb70p-6), 0x1.6e89f0a0adacfp-6),
                             0x1.f1c72c668963fp-6), 0x1.6db6db41ce4bdp-5), 
0x1.333333336fd5bp-4), 0x1.5555555555380p-3);
 
-  __CLC_GENTYPE z = __clc_mad(0x1.dd9ad336a0500p-1, 0x1.af154eeb562d6p+0,
+  __CLC_GENTYPE z = __clc_fma(0x1.dd9ad336a0500p-1, 0x1.af154eeb562d6p+0,
                               -__clc_mad(x, u, x));
 
 #ifdef __CLC_SCALAR
@@ -156,9 +156,9 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE 
__clc_acos(__CLC_GENTYPE x) {
 
   __CLC_GENTYPE s = __clc_sqrt_fast(r);
   __CLC_GENTYPE ztp = 2.0h * __clc_mad(s, u, s);
-  __CLC_GENTYPE ztn = __clc_mad(0x1.ea8p+0h, 0x1.a3cp+0h, -ztp);
+  __CLC_GENTYPE ztn = __clc_fma(0x1.ea8p+0h, 0x1.a3cp+0h, -ztp);
   __CLC_GENTYPE zt = x < 0.0h ? ztn : ztp;
-  __CLC_GENTYPE z = __clc_mad(0x1.ea8p-1h, 0x1.a3cp+0h, -__clc_mad(x, u, x));
+  __CLC_GENTYPE z = __clc_fma(0x1.ea8p-1h, 0x1.a3cp+0h, -__clc_mad(x, u, x));
   z = ax > 0.5h ? zt : z;
 
   return z;

diff  --git a/libclc/clc/lib/generic/math/clc_atan.inc 
b/libclc/clc/lib/generic/math/clc_atan.inc
index 83d849cc54590..8dae127af7d28 100644
--- a/libclc/clc/lib/generic/math/clc_atan.inc
+++ b/libclc/clc/lib/generic/math/clc_atan.inc
@@ -49,7 +49,7 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_HALFN 
__clc_atan(__CLC_HALFN x) {
 
   __CLC_HALFN a = __clc_atan_reduced(v);
 
-  __CLC_HALFN y = __clc_mad(0x1.ea8p-1h, 0x1.a3cp+0h, -a);
+  __CLC_HALFN y = __clc_fma(0x1.ea8p-1h, 0x1.a3cp+0h, -a);
   a = g ? y : a;
 
   return __clc_copysign(a, x);


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to