[llvm-branch-commits] [compiler-rt] [compiler-rt][ARM] Double-precision FP support functions (PR #179920)

Simon Tatham via llvm-branch-commits Fri, 06 Feb 2026 03:54:46 -0800

https://github.com/statham-arm updated 
https://github.com/llvm/llvm-project/pull/179920


>From cdcb4cc7e66916f2b4a068d38825b1e6244b589c Mon Sep 17 00:00:00 2001
From: Simon Tatham <[email protected]>
Date: Thu, 29 Jan 2026 16:06:10 +0000
Subject: [PATCH] [compiler-rt][ARM] Double-precision FP support functions

This commit adds C helper functions `dnan2`, `dnorm2` and `dunder` for
handling the less critical edge cases of double-precision arithmetic,
similar to `fnan2`, `fnorm2` and `funder` that were added in commit
f7e652127772e93.

It also adds a header file that defines some register aliases for
handling double-precision numbers in AArch32 software floating point
in an endianness-independent way, by providing aliases `xh` and `xl`
for the high and low words of the first double-precision function
argument, regardless of which of them is in r0 and which in r1, and
similarly `yh` and `yl` for the second argument in r2/r3.
---
 compiler-rt/lib/builtins/CMakeLists.txt |  3 +
 compiler-rt/lib/builtins/arm/dnan2.c    | 45 ++++++++++++++
 compiler-rt/lib/builtins/arm/dnorm2.c   | 59 +++++++++++++++++++
 compiler-rt/lib/builtins/arm/dunder.c   | 78 +++++++++++++++++++++++++
 compiler-rt/lib/builtins/arm/endian.h   | 37 ++++++++++++
 5 files changed, 222 insertions(+)
 create mode 100644 compiler-rt/lib/builtins/arm/dnan2.c
 create mode 100644 compiler-rt/lib/builtins/arm/dnorm2.c
 create mode 100644 compiler-rt/lib/builtins/arm/dunder.c
 create mode 100644 compiler-rt/lib/builtins/arm/endian.h

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt 
b/compiler-rt/lib/builtins/CMakeLists.txt
index f43ef4743ff97..82ebfe0ccb322 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -455,6 +455,9 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH 
MATCHES "arm")
       arm/fnan2.c
       arm/fnorm2.c
       arm/funder.c
+      arm/dnan2.c
+      arm/dnorm2.c
+      arm/dunder.c
       )
   endif()
 endif()
diff --git a/compiler-rt/lib/builtins/arm/dnan2.c 
b/compiler-rt/lib/builtins/arm/dnan2.c
new file mode 100644
index 0000000000000..3b3bb50d3f587
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/dnan2.c
@@ -0,0 +1,45 @@
+//===-- dnan2.c - Handle double-precision NaN inputs to binary operation 
--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This helper function is available for use by double-precision float
+// arithmetic implementations to handle propagating NaNs from the input
+// operands to the output, in a way that matches Arm hardware FP.
+//
+// On input, a and b are floating-point numbers in IEEE 754 encoding, and at
+// least one of them must be a NaN. The return value is the correct output NaN.
+//
+// A signalling NaN in the input (with bit 51 clear) takes priority over any
+// quiet NaN, and is adjusted on return by setting bit 51 to make it quiet. If
+// both inputs are the same type of NaN then the first input takes priority:
+// the input a is used instead of b.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+uint64_t __compiler_rt_dnan2(uint64_t a, uint64_t b) {
+  // Make shifted-left copies of a and b to discard the sign bit. Then add 1 at
+  // the bit position where the quiet vs signalling bit ended up. This squashes
+  // all the signalling NaNs to the top of the range of 64-bit values, from
+  // 0xfff0000000000001 to 0xffffffffffffffff inclusive; meanwhile, all the
+  // quiet NaN values wrap round to the bottom, from 0 to 0x000fffffffffffff
+  // inclusive. So we can detect a signalling NaN by asking if it's greater
+  // than 0xfff0000000000000, and a quiet one by asking if it's less than
+  // 0x0010000000000000.
+  uint64_t aadj = (a << 1) + 0x0010000000000000;
+  uint64_t badj = (b << 1) + 0x0010000000000000;
+
+  if (aadj > 0xfff0000000000000)   // a is a signalling NaN?
+    return a | 0x0008000000000000; //   if so, return it with the quiet bit set
+  if (badj > 0xfff0000000000000)   // b is a signalling NaN?
+    return b | 0x0008000000000000; //   if so, return it with the quiet bit set
+  if (aadj < 0x0010000000000000)   // a is a quiet NaN?
+    return a;                      // if so, return it
+  else /* expect (badj < 0x0010000000000000) */
+    return b;                      // in that case b must be a quiet NaN
+}
diff --git a/compiler-rt/lib/builtins/arm/dnorm2.c 
b/compiler-rt/lib/builtins/arm/dnorm2.c
new file mode 100644
index 0000000000000..da605ca22aabe
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/dnorm2.c
@@ -0,0 +1,59 @@
+//===-- dnorm2.c - Handle double-precision denormal inputs to binary op 
---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This helper function is available for use by double-precision float
+// arithmetic implementations, to handle denormal inputs on entry by
+// renormalizing the mantissa and modifying the exponent to match.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+// Structure containing the function's inputs and outputs.
+//
+// On entry: a, b are two input floating-point numbers, still in IEEE 754
+// encoding. expa and expb are the 8-bit exponents of those numbers, extracted
+// and shifted down to the low 8 bits of the word, with no other change.
+// Neither value should be zero, or have the maximum exponent (indicating an
+// infinity or NaN).
+//
+// On exit: each of a and b contains the mantissa of the input value, with the
+// leading 1 bit made explicit, and shifted up to bit 52 (the same place it
+// would have been if the number was normalized already). If expa was zero
+// (indicating that a was denormal) then it is now represented as a normalized
+// number with an out-of-range exponent (zero or negative). The same applies to
+// expb and b.
+//
+// The sign bits from the input floating-point numbers are discarded
+// completely. The caller is expected to have stored those somewhere
+// safe already.
+struct dnorm2 {
+  uint64_t a, b;
+  uint32_t expa, expb;
+};
+
+void __compiler_rt_dnorm2(struct dnorm2 *values) {
+  values->a &= ~0xFFF0000000000000;
+  values->b &= ~0xFFF0000000000000;
+
+  if (values->expa == 0) {
+    unsigned shift = __builtin_clzll(values->a) - 11;
+    values->a <<= shift;
+    values->expa = 1 - shift;
+  } else {
+    values->a |= 0x0010000000000000;
+  }
+
+  if (values->expb == 0) {
+    unsigned shift = __builtin_clzll(values->b) - 11;
+    values->b <<= shift;
+    values->expb = 1 - shift;
+  } else {
+    values->b |= 0x0010000000000000;
+  }
+}
diff --git a/compiler-rt/lib/builtins/arm/dunder.c 
b/compiler-rt/lib/builtins/arm/dunder.c
new file mode 100644
index 0000000000000..026bf76d50261
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/dunder.c
@@ -0,0 +1,78 @@
+//===-- dunder.c - Handle double-precision floating-point underflow 
-------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This helper function is available for use by double-precision float
+// arithmetic implementations to handle underflowed output values, if they were
+// computed in the form of a normalized mantissa and an out-of-range exponent.
+//
+// On input: x should be a complete IEEE 754 floating-point value representing
+// the desired output scaled up by 2^1536 (the same value that would have been
+// passed to an underflow trap handler in IEEE 754:1985).
+//
+// This isn't enough information to re-round to the correct output denormal
+// without also knowing whether x itself has already been rounded, and which
+// way. 'errsign' gives this information, by indicating the sign of the value
+// (true result - x). That is, if errsign > 0 it means the true value was
+// larger (x was rounded down); if errsign < 0 then x was rounded up; if
+// errsign == 0 then x represents the _exact_ desired output value.
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdint.h>
+
+#define SIGNBIT 0x8000000000000000
+#define MANTSIZE 52
+#define BIAS 0x600
+
+uint64_t __compiler_rt_dunder(uint64_t x, uint32_t errsign) {
+  uint64_t sign = x & SIGNBIT;
+  uint64_t exponent = (x << 1) >> 53;
+
+  // Rule out exponents so small (or large!) that no denormalisation
+  // is needed.
+  if (exponent > BIAS) {
+    // Exponent 0x601 or above means a normalised number got here by
+    // mistake, so we just remove the 0x600 exponent bias and go
+    // straight home.
+    return x - ((uint64_t)BIAS << MANTSIZE);
+  }
+  uint32_t bits_lost = BIAS + 1 - exponent;
+  if (bits_lost > MANTSIZE + 1) {
+    // The implicit leading 1 of the intermediate value's mantissa is
+    // below the lowest mantissa bit of a denormal by at least 2 bits.
+    // Round down to 0 unconditionally.
+    return sign;
+  }
+
+  // Make the full mantissa (with leading bit) at the top of the word.
+  uint64_t mantissa = 0x8000000000000000 | (x << 11);
+  // Adjust by 1 depending on the sign of the error.
+  mantissa -= errsign >> 31;
+  mantissa += (-errsign) >> 31;
+
+  // Shift down to the output position, keeping the bits shifted off.
+  uint64_t outmant, shifted_off;
+  if (bits_lost == MANTSIZE + 1) {
+    // Special case for the exponent where we have to shift the whole
+    // of 'mantissa' off the bottom of the word.
+    outmant = 0;
+    shifted_off = mantissa;
+  } else {
+    outmant = mantissa >> (11 + bits_lost);
+    shifted_off = mantissa << (64 - (11 + bits_lost));
+  }
+
+  // Re-round.
+  if (shifted_off >> 63) {
+    outmant++;
+    if (!(shifted_off << 1))
+      outmant &= ~1; // halfway case: round to even
+  }
+
+  return sign | outmant;
+}
diff --git a/compiler-rt/lib/builtins/arm/endian.h 
b/compiler-rt/lib/builtins/arm/endian.h
new file mode 100644
index 0000000000000..f603fa9cc678f
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/endian.h
@@ -0,0 +1,37 @@
+//===-- endian.h - make double-prec software FP work in both endiannesses 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file should be included from assembly source code (not C). It
+// defines two pairs of register aliases, for handling 64-bit values passed and
+// returned from functions in the AArch32 integer registers:
+//
+//   xh, xl      the high and low words of a 64-bit value passed in {r0,r1}
+//   yh, yl      the high and low words of a 64-bit value passed in {r2,r3}
+//
+// Which alias goes with which register depends on endianness.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef COMPILER_RT_ARM_FP_ENDIAN_H
+#define COMPILER_RT_ARM_FP_ENDIAN_H
+
+#ifdef __BIG_ENDIAN__
+// Big-endian: high words are in lower-numbered registers.
+xh .req r0
+xl .req r1
+yh .req r2
+yl .req r3
+#else
+// Little-endian: low words are in lower-numbered registers.
+xl .req r0
+xh .req r1
+yl .req r2
+yh .req r3
+#endif
+
+#endif // COMPILER_RT_ARM_FP_ENDIAN_H

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] [compiler-rt][ARM] Double-precision FP support functions (PR #179920)

Reply via email to