On systems that don't have a native version, we use an implementation of
sqrtl() (square-root of long double) that -- to do its job -- pokes at the
floating-point exception state and rounding mode. In particular, at a key
point it clears any previous "inexact" exception and sets the rounding
mode to "toward zero" and then does a division. It then tests whether
that raised an "inexact" exception and fixes the result up based on that,
and finally restores the rounding mode before returning.
The current version does that using the old, non-standard fp* routines:
fp{set,get}sticky() and fpsetround(). This diff switches it to the new,
standardized fe* routines: fe{clear,test}except() and fe{get,set}round().
(Why bother? The fp* routines are defined in libc, while the fe* routines
are defined inside libm itself...which means that with some symbol
redirection they can be made to call directly, without going through the
PLT. This diff is thus a prelude to the larger diff I have sitting in my
tree to do exactly that, reducing 136 PLT entries to just 22 on amd64, for
example. Even on a HW-FP-poor arch like mips64 it gets reduced from 204
to only 56 PLT entries.)
ok?
Philip Guenther
Index: src/e_sqrtl.c
===
RCS file: /data/src/openbsd/src/lib/libm/src/e_sqrtl.c,v
retrieving revision 1.1
diff -u -p -r1.1 e_sqrtl.c
--- src/e_sqrtl.c 9 Dec 2008 20:00:35 - 1.1
+++ src/e_sqrtl.c 11 Sep 2016 03:47:45 -
@@ -26,9 +26,9 @@
*/
#include
-#include
+#include /* for struct ieee_ext */
+#include
#include
-#include
#include
#ifdef EXT_IMPLICIT_NBIT
@@ -204,27 +204,28 @@ sqrtl(long double x)
u.e = xn + lo; /* Combine everything. */
u.bits.ext_exp += (k >> 1) - 1;
- fpsetsticky(fpgetsticky() & ~FP_X_IMP);
- r = fpsetround(FP_RZ); /* Set to round-toward-zero. */
+ feclearexcept(FE_INEXACT);
+ r = fegetround();
+ fesetround(FE_TOWARDZERO); /* Set to round-toward-zero. */
xn = x / u.e; /* Chopped quotient (inexact?). */
- if (!(fpgetsticky() & FP_X_IMP)) { /* Quotient is exact. */
+ if (!fetestexcept(FE_INEXACT)) { /* Quotient is exact. */
if (xn == u.e) {
- fpsetround(r);
+ fesetround(r);
return (u.e);
}
/* Round correctly for inputs like x = y**2 - ulp. */
xn = dec(xn); /* xn = xn - ulp. */
}
- if (r == FP_RN) {
+ if (r == FE_TONEAREST) {
xn = inc(xn); /* xn = xn + ulp. */
- } else if (r == FP_RP) {
+ } else if (r == FE_UPWARD) {
u.e = inc(u.e); /* u.e = u.e + ulp. */
xn = inc(xn); /* xn = xn + ulp. */
}
u.e = u.e + xn; /* Chopped sum. */
- fpsetround(r); /* Restore env and raise inexact */
+ fesetround(r); /* Restore env and raise inexact */
u.bits.ext_exp--;
return (u.e);
}