https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/188186

This was originally ported from rocm device libs in
bf9f76fbe0fb8b8af3377d2c6ce7d7cc290894ad, merge in more
recent changes.

>From d710fbaebe628cb1c9a4ac0d14f88d742c03527c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Thu, 19 Mar 2026 14:07:04 +0100
Subject: [PATCH] libclc: Update log1p

This was originally ported from rocm device libs in
bf9f76fbe0fb8b8af3377d2c6ce7d7cc290894ad, merge in more
recent changes.
---
 libclc/clc/include/clc/math/clc_ep_decl.inc |   4 +-
 libclc/clc/lib/generic/math/clc_ep.inc      |  60 +++++++-
 libclc/clc/lib/generic/math/clc_log1p.cl    |   4 +-
 libclc/clc/lib/generic/math/clc_log1p.inc   | 154 +++-----------------
 4 files changed, 82 insertions(+), 140 deletions(-)

diff --git a/libclc/clc/include/clc/math/clc_ep_decl.inc 
b/libclc/clc/include/clc/math/clc_ep_decl.inc
index 9bb06cba91a69..5f83d4f6af1dd 100644
--- a/libclc/clc/include/clc/math/clc_ep_decl.inc
+++ b/libclc/clc/include/clc/math/clc_ep_decl.inc
@@ -82,7 +82,7 @@ _CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR
 __clc_ep_fast_sub(__CLC_EP_PAIR a, __CLC_EP_PAIR b);
 
 _CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_ldexp(__CLC_EP_PAIR 
a,
-                                                                int e);
+                                                                __CLC_INTN e);
 
 _CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_mul(__CLC_EP_PAIR a,
                                                               __CLC_GENTYPE b);
@@ -135,4 +135,6 @@ _CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR 
__clc_ep_sqrt(__CLC_EP_PAIR a);
 #if __CLC_FPSIZE == 32 || __CLC_FPSIZE == 64
 _CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_ep_exp(__CLC_EP_PAIR a);
 _CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_ln(__CLC_GENTYPE a);
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_ep_ln_hi(__CLC_EP_PAIR 
a,
+                                                                __CLC_INTN ea);
 #endif
diff --git a/libclc/clc/lib/generic/math/clc_ep.inc 
b/libclc/clc/lib/generic/math/clc_ep.inc
index fc45c892b6c80..8f844644c43fd 100644
--- a/libclc/clc/lib/generic/math/clc_ep.inc
+++ b/libclc/clc/lib/generic/math/clc_ep.inc
@@ -245,7 +245,7 @@ __clc_ep_fast_sub(__CLC_EP_PAIR a, __CLC_EP_PAIR b) {
 }
 
 _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR __clc_ep_ldexp(__CLC_EP_PAIR a,
-                                                               int e) {
+                                                               __CLC_INTN e) {
   return __clc_ep_make_pair(__clc_ldexp(a.hi, e), __clc_ldexp(a.lo, e));
 }
 
@@ -469,6 +469,28 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR 
__clc_ep_ln(__CLC_GENTYPE a) {
   return r;
 }
 
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_FLOATN __clc_ep_ln_hi(__CLC_EP_PAIR a,
+                                                              __CLC_INTN ea) {
+  __CLC_INTN a_hi_exp;
+  __CLC_FLOATN a_hi_m = __clc_frexp(a.hi, &a_hi_exp);
+  __CLC_INTN b = a_hi_m < (2.0f / 3.0f);
+  __CLC_INTN e = a_hi_exp - b;
+  __CLC_EP_PAIR m = __clc_ep_ldexp(a, -e);
+  __CLC_EP_PAIR x =
+      __clc_ep_div(__clc_ep_fast_add(-1.0f, m), __clc_ep_fast_add(1.0f, m));
+  __CLC_FLOATN s = x.hi * x.hi;
+  __CLC_FLOATN p = __clc_mad(s, __clc_mad(s, 0x1.36db58p-2f, 0x1.992b46p-2f),
+                             0x1.5555b4p-1f);
+
+  const __CLC_EP_PAIR ln2 = __clc_ep_make_pair(__CLC_FP_LIT(0x1.62e430p-1),
+                                               __CLC_FP_LIT(-0x1.05c610p-29));
+
+  __CLC_EP_PAIR r =
+      __clc_ep_add(__clc_ep_mul(ln2, __CLC_CONVERT_FLOATN(e + ea)),
+                   __clc_ep_fast_add(__clc_ep_ldexp(x, 1), s * x.hi * p));
+  return r.hi;
+}
+
 #elif __CLC_FPSIZE == 64
 
 _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_GENTYPE __clc_ep_exp(__CLC_EP_PAIR x) {
@@ -509,6 +531,42 @@ _CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_EP_PAIR 
__clc_ep_ln(__CLC_DOUBLEN a) {
   return r;
 }
 
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST __CLC_DOUBLEN __clc_ep_ln_hi(__CLC_EP_PAIR a,
+                                                               __CLC_INTN ea) {
+  __CLC_INTN a_hi_exp;
+  __CLC_DOUBLEN m_hi = __clc_frexp(a.hi, &a_hi_exp);
+
+  __CLC_LONGN b = m_hi < (2.0 / 3.0);
+  __CLC_INTN e = a_hi_exp - __CLC_CONVERT_INTN(b);
+  __CLC_EP_PAIR m = __clc_ep_ldexp(a, -e);
+  __CLC_EP_PAIR x =
+      __clc_ep_div(__clc_ep_fast_add(-1.0, m), __clc_ep_fast_add(1.0, m));
+  __CLC_DOUBLEN s = x.hi * x.hi;
+
+  const __CLC_DOUBLEN ln_c1 = 0x1.3ab76bf559e2bp-3;
+  const __CLC_DOUBLEN ln_c2 = 0x1.385386b47b09ap-3;
+  const __CLC_DOUBLEN ln_c3 = 0x1.7474dd7f4df2ep-3;
+  const __CLC_DOUBLEN ln_c4 = 0x1.c71c016291751p-3;
+  const __CLC_DOUBLEN ln_c5 = 0x1.249249b27acf1p-2;
+  const __CLC_DOUBLEN ln_c6 = 0x1.99999998ef7b6p-2;
+  const __CLC_DOUBLEN ln_c7 = 0x1.5555555555780p-1;
+
+  __CLC_DOUBLEN q1 = __clc_mad(s, ln_c1, ln_c2);
+  __CLC_DOUBLEN q2 = __clc_mad(s, q1, ln_c3);
+  __CLC_DOUBLEN q3 = __clc_mad(s, q2, ln_c4);
+  __CLC_DOUBLEN q4 = __clc_mad(s, q3, ln_c5);
+  __CLC_DOUBLEN q5 = __clc_mad(s, q4, ln_c6);
+  __CLC_DOUBLEN p = __clc_mad(s, q5, ln_c7);
+
+  const __CLC_EP_PAIR ln2 = __clc_ep_make_pair(
+      __CLC_FP_LIT(0x1.62e42fefa39efp-1), __CLC_FP_LIT(0x1.abc9e3b39803fp-56));
+
+  __CLC_EP_PAIR r =
+      __clc_ep_add(__clc_ep_mul(ln2, __CLC_CONVERT_DOUBLEN(e + ea)),
+                   __clc_ep_fast_add(__clc_ep_ldexp(x, 1), s * x.hi * p));
+  return r.hi;
+}
+
 #endif
 
 #undef __CLC_EP_USE_FMA
diff --git a/libclc/clc/lib/generic/math/clc_log1p.cl 
b/libclc/clc/lib/generic/math/clc_log1p.cl
index 6c32c1ffc361d..557bd0d1441dc 100644
--- a/libclc/clc/lib/generic/math/clc_log1p.cl
+++ b/libclc/clc/lib/generic/math/clc_log1p.cl
@@ -10,9 +10,11 @@
 #include "clc/float/definitions.h"
 #include "clc/internal/clc.h"
 #include "clc/math/clc_fma.h"
+#include "clc/math/clc_ep.h"
 #include "clc/math/clc_mad.h"
+#include "clc/math/clc_fabs.h"
+#include "clc/math/clc_log2_fast.h"
 #include "clc/math/math.h"
-#include "clc/math/tables.h"
 #include "clc/relational/clc_isinf.h"
 
 #define __CLC_BODY "clc_log1p.inc"
diff --git a/libclc/clc/lib/generic/math/clc_log1p.inc 
b/libclc/clc/lib/generic/math/clc_log1p.inc
index 2dd616818c715..c86b5afa6cb75 100644
--- a/libclc/clc/lib/generic/math/clc_log1p.inc
+++ b/libclc/clc/lib/generic/math/clc_log1p.inc
@@ -19,152 +19,32 @@
 
 #if __CLC_FPSIZE == 32
 
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_log1p(__CLC_GENTYPE x) {
-  __CLC_GENTYPE w = x;
-  __CLC_UINTN ux = __CLC_AS_UINTN(x);
-  __CLC_UINTN ax = ux & EXSIGNBIT_SP32;
-
-  // |x| < 2^-4
-  __CLC_GENTYPE u2 = MATH_DIVIDE(x, 2.0f + x);
-  __CLC_GENTYPE u = u2 + u2;
-  __CLC_GENTYPE v = u * u;
-  // 2/(5 * 2^5), 2/(3 * 2^3)
-  __CLC_GENTYPE zsmall =
-      __clc_mad(-u2, x, __clc_mad(v, 0x1.99999ap-7f, 0x1.555556p-4f) * v * u) +
-      x;
-
-  // |x| >= 2^-4
-  ux = __CLC_AS_UINTN(x + 1.0f);
-
-  __CLC_INTN m = __CLC_AS_INTN((ux >> EXPSHIFTBITS_SP32) & 0xff) - 
EXPBIAS_SP32;
-  __CLC_GENTYPE mf = __CLC_CONVERT_GENTYPE(m);
-  __CLC_UINTN indx = (ux & 0x007f0000) + ((ux & 0x00008000) << 1);
-  __CLC_GENTYPE F = __CLC_AS_GENTYPE(indx | 0x3f000000);
-
-  // x > 2^24
-  __CLC_GENTYPE fg24 = F - __CLC_AS_GENTYPE(0x3f000000 | (ux & MANTBITS_SP32));
-
-  // x <= 2^24
-  __CLC_UINTN xhi = ux & 0xffff8000;
-  __CLC_GENTYPE xh = __CLC_AS_GENTYPE(xhi);
-  __CLC_GENTYPE xt = (1.0f - xh) + w;
-  __CLC_UINTN xnm = ((~(xhi & 0x7f800000)) - 0x00800000) & 0x7f800000;
-  xt = xt * __CLC_AS_GENTYPE(xnm) * 0.5f;
-  __CLC_GENTYPE fl24 =
-      F - __CLC_AS_GENTYPE(0x3f000000 | (xhi & MANTBITS_SP32)) - xt;
-
-  __CLC_GENTYPE f = mf > 24.0f ? fg24 : fl24;
-
-  indx = indx >> 16;
-  __CLC_GENTYPE r = f * __CLC_USE_TABLE(log_inv_tbl, __CLC_CONVERT_INTN(indx));
-
-  // 1/3, 1/2
-  __CLC_GENTYPE poly =
-      __clc_mad(__clc_mad(r, 0x1.555556p-2f, 0x1.0p-1f), r * r, r);
-
-  const __CLC_GENTYPE LOG2_HEAD = 0x1.62e000p-1f;  // 0.693115234
-  const __CLC_GENTYPE LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833
-
-  __CLC_GENTYPE tv0 = __CLC_USE_TABLE(loge_tbl_lo, __CLC_AS_INTN(indx));
-  __CLC_GENTYPE tv1 = __CLC_USE_TABLE(loge_tbl_hi, __CLC_AS_INTN(indx));
-  __CLC_GENTYPE z1 = __clc_mad(mf, LOG2_HEAD, tv0);
-  __CLC_GENTYPE z2 = __clc_mad(mf, LOG2_TAIL, -poly) + tv1;
-  __CLC_GENTYPE z = z1 + z2;
-
-  z = ax < 0x3d800000U ? zsmall : z;
-
-  // Edge cases
-  z = ax >= PINFBITPATT_SP32 ? w : z;
-  z = w < -1.0f ? __CLC_GENTYPE_NAN : z;
-  z = w == -1.0f ? __CLC_AS_GENTYPE((__CLC_UINTN)NINFBITPATT_SP32) : z;
-  // Fix subnormals
-  z = ax < 0x33800000 ? x : z;
-
-  return z;
+_CLC_OVERLOAD _CLC_DEF _CLC_CONST __CLC_FLOATN __clc_log1p(__CLC_FLOATN x) {
+  __CLC_FLOATN z = __clc_ep_ln_hi(__clc_ep_add(__CLC_FP_LIT(1.0), x), 0);
+  z = x == __CLC_GENTYPE_INF ? x : z;
+  z = x < -1.0f ? __CLC_GENTYPE_NAN : z;
+  z = x == -1.0f ? -__CLC_GENTYPE_INF : z;
+  return __clc_fabs(x) < 0x1.0p-24f ? x : z;
 }
 
 #elif __CLC_FPSIZE == 64
 
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_log1p(__CLC_GENTYPE x) {
-  // Process Inside the threshold now
-  __CLC_ULONGN ux = __CLC_AS_ULONGN((__CLC_GENTYPE)1.0 + x);
-  __CLC_INTN xexp =
-      __CLC_CONVERT_INTN((ux >> EXPSHIFTBITS_DP64) & 0x7ff) - EXPBIAS_DP64;
-  __CLC_GENTYPE f =
-      __CLC_AS_GENTYPE((__CLC_ULONGN)ONEEXPBITS_DP64 | (ux & MANTBITS_DP64));
+_CLC_OVERLOAD _CLC_DEF _CLC_CONST __CLC_DOUBLEN __clc_log1p(__CLC_DOUBLEN x) {
+  __CLC_GENTYPE z = __clc_ep_ln_hi(__clc_ep_add(1.0, x), 0);
 
-  __CLC_INTN j = __CLC_CONVERT_INTN(ux >> 45);
-  j = ((0x80 | (j & 0x7e)) >> 1) + (j & 0x1);
-  __CLC_GENTYPE f1 = __CLC_CONVERT_GENTYPE(j) * 0x1.0p-6;
-  j -= 64;
-
-  __CLC_GENTYPE f2temp = f - f1;
-  __CLC_GENTYPE m2 =
-      __CLC_AS_GENTYPE(__CLC_CONVERT_ULONGN(0x3ff - xexp) << 
EXPSHIFTBITS_DP64);
-  __CLC_GENTYPE f2l = __clc_fma(m2, x, m2 - f1);
-  __CLC_GENTYPE f2g = __clc_fma(m2, x, -f1) + m2;
-  __CLC_GENTYPE f2 =
-      __CLC_CONVERT_LONGN(xexp <= MANTLENGTH_DP64 - 1) ? f2l : f2g;
-  f2 = __CLC_CONVERT_LONGN(xexp <= -2 || (xexp >= MANTLENGTH_DP64 + 8)) ? 
f2temp
-                                                                        : f2;
-
-  __CLC_GENTYPE z1 = __CLC_USE_TABLE(ln_tbl_lo, j);
-  __CLC_GENTYPE q = __CLC_USE_TABLE(ln_tbl_hi, j);
-
-  __CLC_GENTYPE u = MATH_DIVIDE(f2, __clc_fma(0.5, f2, f1));
-  __CLC_GENTYPE v = u * u;
-
-  __CLC_GENTYPE poly = v * __clc_fma(v,
-                                     __clc_fma(v, 2.23219810758559851206e-03,
-                                               1.24999999978138668903e-02),
-                                     8.33333333333333593622e-02);
-
-  // log2_lead and log2_tail sum to an extra-precise version of log(2)
-  // 0x3fe62e42e0000000
-  const __CLC_GENTYPE log2_lead = 6.93147122859954833984e-01;
-  // 0x3e6efa39ef35793c
-  const __CLC_GENTYPE log2_tail = 5.76999904754328540596e-08;
-
-  __CLC_GENTYPE z2 = q + __clc_fma(u, poly, u);
-  __CLC_GENTYPE dxexp = __CLC_CONVERT_GENTYPE(xexp);
-  __CLC_GENTYPE r1 = __clc_fma(dxexp, log2_lead, z1);
-  __CLC_GENTYPE r2 = __clc_fma(dxexp, log2_tail, z2);
-  __CLC_GENTYPE result1 = r1 + r2;
-
-  // Process Outside the threshold now
-  __CLC_GENTYPE r = x;
-  u = r / (2.0 + r);
-  __CLC_GENTYPE correction = r * u;
-  u = u + u;
-  v = u * u;
-  r1 = r;
-
-  poly = __clc_fma(v,
-                   __clc_fma(v,
-                             __clc_fma(v, 4.34887777707614552256e-04,
-                                       2.23213998791944806202e-03),
-                             1.25000000037717509602e-02),
-                   8.33333333333317923934e-02);
-
-  r2 = __clc_fma(u * v, poly, -correction);
-
-  // The values exp(-1/16)-1 and exp(1/16)-1
-  const __CLC_GENTYPE log1p_thresh1 = -0x1.f0540438fd5c3p-5;
-  const __CLC_GENTYPE log1p_thresh2 = 0x1.082b577d34ed8p-4;
-  __CLC_GENTYPE result2 = r1 + r2;
-  result2 = x < log1p_thresh1 || x > log1p_thresh2 ? result1 : result2;
-
-  result2 = __clc_isinf(x) ? x : result2;
-  result2 = x < -1.0 ? __CLC_GENTYPE_NAN : result2;
-  result2 =
-      x == -1.0 ? __CLC_AS_GENTYPE((__CLC_ULONGN)NINFBITPATT_DP64) : result2;
-  return result2;
+  z = x == __CLC_GENTYPE_INF ? x : z;
+  z = x < -1.0 ? __CLC_GENTYPE_NAN : z;
+  z = x == -1.0 ? -__CLC_GENTYPE_INF : z;
+  return z;
 }
 
+
 #elif __CLC_FPSIZE == 16
 
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_log1p(__CLC_GENTYPE x) {
-  return __CLC_CONVERT_GENTYPE(__clc_log1p(__CLC_CONVERT_FLOATN(x)));
+_CLC_OVERLOAD _CLC_DEF _CLC_CONST __CLC_HALFN __clc_log1p(__CLC_HALFN x) {
+  __CLC_HALFN ret = 
__CLC_CONVERT_HALFN(__clc_log2_fast(__CLC_CONVERT_FLOATN(x) + 1.0f) * 
0x1.62e430p-1f);
+  __CLC_HALFN p = __clc_mad(x, x * __clc_mad(x, 0x1.555556p-2h, -0.5h), x);
+  return __clc_fabs(x) < 0x1.0p-6h ? p : ret;
 }
 
 #endif

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to