fma{, f}.c: Implement FMA for `double` and `float` properly

Liu Hao Thu, 12 Dec 2019 19:53:49 -0800

Two testcases have been attached, the first one is for `double` and the
other is for `float`. They compile and run fine with Glibc on Linux but
fails on x64.


Side note: The test may fail on x86 without excess casts, due to the
fact that x87 extending format is used for intermediate results, which
always cause double-rounding errors. In order for the test to pass,
hardware single-precision floating-point arithmetic must be enabled by
using GCC's `-msse -mfpmath=sse` options.






FMA of `double` shall not be implemented using `long double`, as
the result has 106 bits and cannot be stored accurately in a
`long double`, whose mantissa only has 64 bits.

Let's calculate `FMA(0x1.0000000000001p0, 1, 0x0.00000000000007FFp0)`:
First, we multiply `0x1.0000000000001p0` and `1`, which yields
`0x1.0000000000001p0`. Then we add `0x0.00000000000007FFp0` to it:

      0x1.0000 0000 0000 1     p0
  +)  0x0.0000 0000 0000 07FF  p0
  ---------------------------------
      0x1.0000 0000 0000 17FF  p0   (long double)

This result has 65 significant bits. When it is stored into a
`long double`, the last bit is rounded to even, as follows:

      0x1.0000 0000 0000 17FF  p0
                            1       <= rounded UP as this bit is set
  ---------------------------------
      0x1.0000 0000 0000 1800  p0   (long double)

If we attempt to round this value into `double` again, we get:

      0x1.0000 0000 0000 1800  p0
                          8         <= rounded UP as this bit is set
  ---------------------------------
      0x1.0000 0000 0000 2000  p0   (double)

This is wrong. Because FMA shall round the result exactly once. If we
round the first result to double we get a different result:

      0x0.0000 0000 0000 17FF  p0
                          8         <= rounded DOWN as this bit is clear
  ---------------------------------
      0x1.0000 0000 0000 1000  p0   (double)

`float` suffers from a similar problem.

This patch fixes such issues.

Testcase for `double`:

  #include <assert.h>
  #include <math.h>

  int main(void)
    {
      volatile double a = 0x1.0000000000001p0;
      volatile double b = 1;
      volatile double c = 0x0.00000000000007FFp0;

      assert(a * b + c    == 0x1.0000000000001p0);
      assert(fma(a, b, c) == 0x1.0000000000001p0);
      assert((double)((long double)a * b + c)
                          == 0x1.0000000000002p0);
    }

Testcase for `float`:

  #include <assert.h>
  #include <math.h>

  int main(void)
    {
      volatile float a = 0x0.800001p0f;
      volatile float b = 0x0.5FFFFFp0f;;
      volatile float c = 0x0.000000000000FFFFFFp0f;

      assert(a * b + c     == 0x0.2FFFFFCp0);
      assert(fmaf(a, b, c) == 0x0.2FFFFFCp0);
      assert((float)((double)a * b + c)
                           == 0x0.3000000p0);
    }

Reference:
https://www.exploringbinary.com/double-rounding-errors-in-floating
Signed-off-by: Liu Hao <[email protected]>
---
 mingw-w64-crt/math/fma.c  | 66 ++++++++++++++++++++++++++++++++++++---
 mingw-w64-crt/math/fmaf.c | 66 ++++++++++++++++++++++++++++++++++++---
 2 files changed, 122 insertions(+), 10 deletions(-)

diff --git a/mingw-w64-crt/math/fma.c b/mingw-w64-crt/math/fma.c
index c4ce738b..a7226c2d 100644
--- a/mingw-w64-crt/math/fma.c
+++ b/mingw-w64-crt/math/fma.c
@@ -29,13 +29,69 @@ double fma(double x, double y, double z){
   return z;
 }
 -#else
+#elif defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) ||
defined(__i386__)
 -long double fmal(long double x, long double y, long double z);
+#include <math.h>
+#include <stdint.h>
 -/* For platforms that don't have hardware FMA, emulate it. */
-double fma(double x, double y, double z){
-  return (double)fmal(x, y, z);
+/* This is in accordance with the IEC 559 double-precision format.
+ * Be advised that due to the hidden bit, the higher half actually has
26 bits.
+ * Multiplying two 27-bit numbers will cause a 1-ULP error, which we cannot
+ * avoid. It is kept in the very last position.
+ */
+typedef union iec559_double_ {
+  struct __attribute__((__packed__)) {
+    uint64_t mlo : 27;
+    uint64_t mhi : 25;
+    uint64_t exp : 11;
+    uint64_t sgn :  1;
+  };
+  double f;
+} iec559_double;
+
+static inline void break_down(iec559_double *restrict lo, iec559_double
*restrict hi, double x) {
+  hi->f = x;
+  /* Erase low-order significant bits. `hi->f` now has only 32
significant bits. */
+  hi->mlo = 0;
+  /* Store the low-order half. It will be normalized by the hardware. */
+  lo->f = x - hi->f;
+  /* Preserve signness in case of zero. */
+  lo->sgn = hi->sgn;
 }
 +double fma(double x, double y, double z) {
+  /*
+    POSIX-2013:
+    1. If x or y are NaN, a NaN shall be returned.
+    2. If x multiplied by y is an exact infinity and z is also an infinity
+       but with the opposite sign, a domain error shall occur, and
either a NaN
+       (if supported), or an implementation-defined value shall be
returned.
+    3. If one of x and y is infinite, the other is zero, and z is not a
NaN,
+       a domain error shall occur, and either a NaN (if supported), or an
+       implementation-defined value shall be returned.
+    4. If one of x and y is infinite, the other is zero, and z is a
NaN, a NaN
+       shall be returned and a domain error may occur.
+    5. If x* y is not 0*Inf nor Inf*0 and z is a NaN, a NaN shall be
returned.
+  */
+  /* Check whether the result is finite. */
+  double ret = x * y + z;
+  if(!isfinite(ret)) {
+    return ret; /* If this naive check doesn't yield a finite value,
the FMA isn't
+                   likely to return one either. Forward the value as is. */
+  }
+  iec559_double xlo, xhi, ylo, yhi;
+  break_down(&xlo, &xhi, x);
+  break_down(&ylo, &yhi, y);
+  /* The order of these four statements is essential. Don't move them
around. */
+  ret = z;
+  ret += xhi.f * yhi.f;                 /* The most significant item
comes first. */
+  ret += xhi.f * ylo.f + xlo.f * yhi.f; /* They are equally significant. */
+  ret += xlo.f * ylo.f;                 /* The least significant item
comes last. */
+  return ret;
+}
+
+#else
+
+#error Please add FMA implementation for this platform.
+
 #endif
diff --git a/mingw-w64-crt/math/fmaf.c b/mingw-w64-crt/math/fmaf.c
index b3f58a84..0d0bd959 100644
--- a/mingw-w64-crt/math/fmaf.c
+++ b/mingw-w64-crt/math/fmaf.c
@@ -29,13 +29,69 @@ float fmaf(float x, float y, float z){
   return z;
 }
 -#else
+#elif defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) ||
defined(__i386__)
 -long double fmal(long double x, long double y, long double z);
+#include <math.h>
+#include <stdint.h>
 -/* For platforms that don't have hardware FMA, emulate it. */
-float fmaf(float x, float y, float z){
-  return (float)fmal(x, y, z);
+/* This is in accordance with the IEC 559 single-precision format.
+ * Be advised that due to the hidden bit, the higher half actually has
11 bits.
+ * Multiplying two 13-bit numbers will cause a 1-ULP error, which we cannot
+ * avoid. It is kept in the very last position.
+ */
+typedef union iec559_float_ {
+  struct __attribute__((__packed__)) {
+    uint32_t mlo : 13;
+    uint32_t mhi : 10;
+    uint32_t exp :  8;
+    uint32_t sgn :  1;
+  };
+  float f;
+} iec559_float;
+
+static inline void break_down(iec559_float *restrict lo, iec559_float
*restrict hi, float x) {
+  hi->f = x;
+  /* Erase low-order significant bits. `hi->f` now has only 32
significant bits. */
+  hi->mlo = 0;
+  /* Store the low-order half. It will be normalized by the hardware. */
+  lo->f = x - hi->f;
+  /* Preserve signness in case of zero. */
+  lo->sgn = hi->sgn;
 }
 +float fmaf(float x, float y, float z) {
+  /*
+    POSIX-2013:
+    1. If x or y are NaN, a NaN shall be returned.
+    2. If x multiplied by y is an exact infinity and z is also an infinity
+       but with the opposite sign, a domain error shall occur, and
either a NaN
+       (if supported), or an implementation-defined value shall be
returned.
+    3. If one of x and y is infinite, the other is zero, and z is not a
NaN,
+       a domain error shall occur, and either a NaN (if supported), or an
+       implementation-defined value shall be returned.
+    4. If one of x and y is infinite, the other is zero, and z is a
NaN, a NaN
+       shall be returned and a domain error may occur.
+    5. If x* y is not 0*Inf nor Inf*0 and z is a NaN, a NaN shall be
returned.
+  */
+  /* Check whether the result is finite. */
+  float ret = x * y + z;
+  if(!isfinite(ret)) {
+    return ret; /* If this naive check doesn't yield a finite value,
the FMA isn't
+                   likely to return one either. Forward the value as is. */
+  }
+  iec559_float xlo, xhi, ylo, yhi;
+  break_down(&xlo, &xhi, x);
+  break_down(&ylo, &yhi, y);
+  /* The order of these four statements is essential. Don't move them
around. */
+  ret = z;
+  ret += xhi.f * yhi.f;                 /* The most significant item
comes first. */
+  ret += xhi.f * ylo.f + xlo.f * yhi.f; /* They are equally significant. */
+  ret += xlo.f * ylo.f;                 /* The least significant item
comes last. */
+  return ret;
+}
+
+#else
+
+#error Please add FMA implementation for this platform.
+
 #endif
-- 
2.24.0

From e8fcb00bb56bc7b337fa7f59a73da10a4dae8ff6 Mon Sep 17 00:00:00 2001
From: Liu Hao <[email protected]>
Date: Thu, 12 Dec 2019 21:04:04 +0800
Subject: [PATCH 2/2] crt/fma{,f}.c: Implement FMA for `double` and `float`
 properly

FMA of `double` shall not be implemented using `long double`, as
the result has 106 bits and cannot be stored accurately in a
`long double`, whose mantissa only has 64 bits.

Let's calculate `FMA(0x1.0000000000001p0, 1, 0x0.00000000000007FFp0)`:
First, we multiply `0x1.0000000000001p0` and `1`, which yields
`0x1.0000000000001p0`. Then we add `0x0.00000000000007FFp0` to it:

      0x1.0000 0000 0000 1     p0
  +)  0x0.0000 0000 0000 07FF  p0
  ---------------------------------
      0x1.0000 0000 0000 17FF  p0   (long double)

This result has 65 significant bits. When it is stored into a
`long double`, the last bit is rounded to even, as follows:

      0x1.0000 0000 0000 17FF  p0
                            1       <= rounded UP as this bit is set
  ---------------------------------
      0x1.0000 0000 0000 1800  p0   (long double)

If we attempt to round this value into `double` again, we get:

      0x1.0000 0000 0000 1800  p0
                          8         <= rounded UP as this bit is set
  ---------------------------------
      0x1.0000 0000 0000 2000  p0   (double)

This is wrong. Because FMA shall round the result exactly once. If we
round the first result to double we get a different result:

      0x0.0000 0000 0000 17FF  p0
                          8         <= rounded DOWN as this bit is clear
  ---------------------------------
      0x1.0000 0000 0000 1000  p0   (double)

`float` suffers from a similar problem.

This patch fixes such issues.

Testcase for `double`:

  #include <assert.h>
  #include <math.h>

  int main(void)
    {
      volatile double a = 0x1.0000000000001p0;
      volatile double b = 1;
      volatile double c = 0x0.00000000000007FFp0;

      assert(a * b + c    == 0x1.0000000000001p0);
      assert(fma(a, b, c) == 0x1.0000000000001p0);
      assert((double)((long double)a * b + c)
                          == 0x1.0000000000002p0);
    }

Testcase for `float`:

  #include <assert.h>
  #include <math.h>

  int main(void)
    {
      volatile float a = 0x0.800001p0f;
      volatile float b = 0x0.5FFFFFp0f;;
      volatile float c = 0x0.000000000000FFFFFFp0f;

      assert(a * b + c     == 0x0.2FFFFFCp0);
      assert(fmaf(a, b, c) == 0x0.2FFFFFCp0);
      assert((float)((double)a * b + c)
                           == 0x0.3000000p0);
    }

Reference: https://www.exploringbinary.com/double-rounding-errors-in-floating
Signed-off-by: Liu Hao <[email protected]>
---
 mingw-w64-crt/math/fma.c  | 66 ++++++++++++++++++++++++++++++++++++---
 mingw-w64-crt/math/fmaf.c | 66 ++++++++++++++++++++++++++++++++++++---
 2 files changed, 122 insertions(+), 10 deletions(-)

diff --git a/mingw-w64-crt/math/fma.c b/mingw-w64-crt/math/fma.c
index c4ce738b..a7226c2d 100644
--- a/mingw-w64-crt/math/fma.c
+++ b/mingw-w64-crt/math/fma.c
@@ -29,13 +29,69 @@ double fma(double x, double y, double z){
   return z;
 }
 
-#else
+#elif defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || 
defined(__i386__)
 
-long double fmal(long double x, long double y, long double z);
+#include <math.h>
+#include <stdint.h>
 
-/* For platforms that don't have hardware FMA, emulate it. */
-double fma(double x, double y, double z){
-  return (double)fmal(x, y, z);
+/* This is in accordance with the IEC 559 double-precision format.
+ * Be advised that due to the hidden bit, the higher half actually has 26 bits.
+ * Multiplying two 27-bit numbers will cause a 1-ULP error, which we cannot
+ * avoid. It is kept in the very last position.
+ */
+typedef union iec559_double_ {
+  struct __attribute__((__packed__)) {
+    uint64_t mlo : 27;
+    uint64_t mhi : 25;
+    uint64_t exp : 11;
+    uint64_t sgn :  1;
+  };
+  double f;
+} iec559_double;
+
+static inline void break_down(iec559_double *restrict lo, iec559_double 
*restrict hi, double x) {
+  hi->f = x;
+  /* Erase low-order significant bits. `hi->f` now has only 32 significant 
bits. */
+  hi->mlo = 0;
+  /* Store the low-order half. It will be normalized by the hardware. */
+  lo->f = x - hi->f;
+  /* Preserve signness in case of zero. */
+  lo->sgn = hi->sgn;
 }
 
+double fma(double x, double y, double z) {
+  /*
+    POSIX-2013:
+    1. If x or y are NaN, a NaN shall be returned.
+    2. If x multiplied by y is an exact infinity and z is also an infinity
+       but with the opposite sign, a domain error shall occur, and either a NaN
+       (if supported), or an implementation-defined value shall be returned.
+    3. If one of x and y is infinite, the other is zero, and z is not a NaN,
+       a domain error shall occur, and either a NaN (if supported), or an
+       implementation-defined value shall be returned.
+    4. If one of x and y is infinite, the other is zero, and z is a NaN, a NaN
+       shall be returned and a domain error may occur.
+    5. If x* y is not 0*Inf nor Inf*0 and z is a NaN, a NaN shall be returned.
+  */
+  /* Check whether the result is finite. */
+  double ret = x * y + z;
+  if(!isfinite(ret)) {
+    return ret; /* If this naive check doesn't yield a finite value, the FMA 
isn't
+                   likely to return one either. Forward the value as is. */
+  }
+  iec559_double xlo, xhi, ylo, yhi;
+  break_down(&xlo, &xhi, x);
+  break_down(&ylo, &yhi, y);
+  /* The order of these four statements is essential. Don't move them around. 
*/
+  ret = z;
+  ret += xhi.f * yhi.f;                 /* The most significant item comes 
first. */
+  ret += xhi.f * ylo.f + xlo.f * yhi.f; /* They are equally significant. */
+  ret += xlo.f * ylo.f;                 /* The least significant item comes 
last. */
+  return ret;
+}
+
+#else
+
+#error Please add FMA implementation for this platform.
+
 #endif
diff --git a/mingw-w64-crt/math/fmaf.c b/mingw-w64-crt/math/fmaf.c
index b3f58a84..0d0bd959 100644
--- a/mingw-w64-crt/math/fmaf.c
+++ b/mingw-w64-crt/math/fmaf.c
@@ -29,13 +29,69 @@ float fmaf(float x, float y, float z){
   return z;
 }
 
-#else
+#elif defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || 
defined(__i386__)
 
-long double fmal(long double x, long double y, long double z);
+#include <math.h>
+#include <stdint.h>
 
-/* For platforms that don't have hardware FMA, emulate it. */
-float fmaf(float x, float y, float z){
-  return (float)fmal(x, y, z);
+/* This is in accordance with the IEC 559 single-precision format.
+ * Be advised that due to the hidden bit, the higher half actually has 11 bits.
+ * Multiplying two 13-bit numbers will cause a 1-ULP error, which we cannot
+ * avoid. It is kept in the very last position.
+ */
+typedef union iec559_float_ {
+  struct __attribute__((__packed__)) {
+    uint32_t mlo : 13;
+    uint32_t mhi : 10;
+    uint32_t exp :  8;
+    uint32_t sgn :  1;
+  };
+  float f;
+} iec559_float;
+
+static inline void break_down(iec559_float *restrict lo, iec559_float 
*restrict hi, float x) {
+  hi->f = x;
+  /* Erase low-order significant bits. `hi->f` now has only 32 significant 
bits. */
+  hi->mlo = 0;
+  /* Store the low-order half. It will be normalized by the hardware. */
+  lo->f = x - hi->f;
+  /* Preserve signness in case of zero. */
+  lo->sgn = hi->sgn;
 }
 
+float fmaf(float x, float y, float z) {
+  /*
+    POSIX-2013:
+    1. If x or y are NaN, a NaN shall be returned.
+    2. If x multiplied by y is an exact infinity and z is also an infinity
+       but with the opposite sign, a domain error shall occur, and either a NaN
+       (if supported), or an implementation-defined value shall be returned.
+    3. If one of x and y is infinite, the other is zero, and z is not a NaN,
+       a domain error shall occur, and either a NaN (if supported), or an
+       implementation-defined value shall be returned.
+    4. If one of x and y is infinite, the other is zero, and z is a NaN, a NaN
+       shall be returned and a domain error may occur.
+    5. If x* y is not 0*Inf nor Inf*0 and z is a NaN, a NaN shall be returned.
+  */
+  /* Check whether the result is finite. */
+  float ret = x * y + z;
+  if(!isfinite(ret)) {
+    return ret; /* If this naive check doesn't yield a finite value, the FMA 
isn't
+                   likely to return one either. Forward the value as is. */
+  }
+  iec559_float xlo, xhi, ylo, yhi;
+  break_down(&xlo, &xhi, x);
+  break_down(&ylo, &yhi, y);
+  /* The order of these four statements is essential. Don't move them around. 
*/
+  ret = z;
+  ret += xhi.f * yhi.f;                 /* The most significant item comes 
first. */
+  ret += xhi.f * ylo.f + xlo.f * yhi.f; /* They are equally significant. */
+  ret += xlo.f * ylo.f;                 /* The least significant item comes 
last. */
+  return ret;
+}
+
+#else
+
+#error Please add FMA implementation for this platform.
+
 #endif
-- 
2.24.0

signature.asc
Description: OpenPGP digital signature

_______________________________________________
Mingw-w64-public mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public

[Mingw-w64-public] [PATCH 2/2] crt/fma{, f}.c: Implement FMA for `double` and `float` properly

Reply via email to