[llvm-branch-commits] [compiler-rt] [compiler-rt][ARM] Optimized double-precision FP comparisons (PR #179924)

via llvm-branch-commits Thu, 05 Feb 2026 04:04:30 -0800

github-actions[bot] wrote:

<!--LLVM CODE FORMAT COMMENT: {clang-format}-->



:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff origin/main HEAD --extensions h,c -- 
compiler-rt/lib/builtins/arm/dcmp.h compiler-rt/lib/builtins/arm/thumb1/dcmp.h 
compiler-rt/test/builtins/Unit/comparedf2new_test.c --diff_from_common_commit
``````````

:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff
diff --git a/compiler-rt/lib/builtins/arm/dcmp.h 
b/compiler-rt/lib/builtins/arm/dcmp.h
index c9fd0ef32..ed790e4f7 100644
--- a/compiler-rt/lib/builtins/arm/dcmp.h
+++ b/compiler-rt/lib/builtins/arm/dcmp.h
@@ -55,156 +55,161 @@
 //  - if the 11 exponent bits of the output are not all 1, then there are
 //    definitely no NaNs, so a fast path can handle most non-NaN cases.
 
-  // First diverge control for the negative-numbers case.
-  orrs    r12, op0h, op1h
-  bmi     LOCAL_LABEL(negative)         // high bit set => at least one 
negative input
+// First diverge control for the negative-numbers case.
+orrs r12, op0h,
+    op1h bmi
+    LOCAL_LABEL(negative) // high bit set => at least one negative input
 
-  // Here, both inputs are positive. Try adding 1<<20 to their bitwise OR in
-  // r12. This will carry all the way into the top bit, setting the N flag, if
-  // all 11 exponent bits were set.
-  cmn     r12, #1 << 20
-  bmi     LOCAL_LABEL(NaNInf_check_positive) // need to look harder for NaNs
+// Here, both inputs are positive. Try adding 1<<20 to their bitwise OR in
+// r12. This will carry all the way into the top bit, setting the N flag, if
+// all 11 exponent bits were set.
+cmn r12, #1 << 20 bmi LOCAL_LABEL(
+             NaNInf_check_positive) // need to look harder for NaNs
 
-  // The fastest fast path: both inputs positive and we could easily tell there
-  // were no NaNs. So we just compare op0 and op1 as unsigned integers.
-  cmp     op0h, op1h
-  SetReturnRegisterNE
-  bxne    lr
-  cmp     op0l, op1l
-  SetReturnRegister
-  bx      lr
+// The fastest fast path: both inputs positive and we could easily tell there
+// were no NaNs. So we just compare op0 and op1 as unsigned integers.
+cmp op0h, op1h SetReturnRegisterNE bxne lr cmp op0l,
+    op1l SetReturnRegister bx lr
 
-LOCAL_LABEL(NaNInf_check_positive):
-  // Second tier for positive numbers. We come here if both inputs are
-  // positive, but our fast initial check didn't manage to rule out a NaN. But
-  // it's not guaranteed that there _is_ a NaN, for two reasons:
-  //
-  //  1. An input with exponent 0x7FF might be an infinity instead. Those
-  //     behave normally under comparison.
-  //
-  //  2. There might not even _be_ an input with exponent 0x7FF. All we know so
-  //     far is that the two inputs ORed together had all the exponent bits
-  //     set. So each of those bits is set in _at least one_ of the inputs, but
-  //     not necessarily all in the _same_ input.
-  //
-  // Test each exponent individually for 0x7FF, using the same CMN idiom as
-  // above. If neither one carries into the sign bit then we have no NaNs _or_
-  // infinities and can compare the registers and return again.
-  cmn     op0h, #1 << 20
-  cmnpl   op1h, #1 << 20
-  bmi     LOCAL_LABEL(NaN_check_positive)
+    LOCAL_LABEL(NaNInf_check_positive)
+    : // Second tier for positive numbers. We come here if both inputs are
+      // positive, but our fast initial check didn't manage to rule out a NaN.
+      // But it's not guaranteed that there _is_ a NaN, for two reasons:
+      //
+      //  1. An input with exponent 0x7FF might be an infinity instead. Those
+      //     behave normally under comparison.
+      //
+      //  2. There might not even _be_ an input with exponent 0x7FF. All we 
know
+      //  so
+      //     far is that the two inputs ORed together had all the exponent bits
+      //     set. So each of those bits is set in _at least one_ of the inputs,
+      //     but not necessarily all in the _same_ input.
+      //
+      // Test each exponent individually for 0x7FF, using the same CMN idiom as
+      // above. If neither one carries into the sign bit then we have no NaNs
+      // _or_ infinities and can compare the registers and return again.
+      cmn op0h, #1 << 20 cmnpl op1h,
+      #1 << 20 bmi LOCAL_LABEL(NaN_check_positive)
 
-  // Second-tier return path, now we've ruled out anything difficult. By this
-  // time we know that the two operands have different exponents (because the
-  // exponents' bitwise OR is 0x7FF but neither one is 0x7FF by itself, so each
-  // must have a set bit not present in the other). So we only need to compare
-  // the high words.
-  cmp     op0h, op1h
-  SetReturnRegister
-  bx      lr
+// Second-tier return path, now we've ruled out anything difficult. By this
+// time we know that the two operands have different exponents (because the
+// exponents' bitwise OR is 0x7FF but neither one is 0x7FF by itself, so each
+// must have a set bit not present in the other). So we only need to compare
+// the high words.
+cmp op0h,
+    op1h SetReturnRegister bx lr
 
-LOCAL_LABEL(NaN_check_positive):
-  // Third tier for positive numbers. Here we know that at least one of the
-  // inputs has exponent 0x7FF. But they might still be infinities rather than
-  // NaNs. So now we must check whether there's an actual NaN.
-  //
-  // We do this by shifting the high word of each input left to get rid of the
-  // sign bit, shifting a bit in at the bottom which is 1 if any bit is set in
-  // the low word. Then we check if the result is _greater_ than 0xFFE00000
-  // (but not equal), via adding 0x00200000 to it and testing for the HI
-  // condition (carry flag set, but Z clear).
-  //
-  // We could have skipped the second-tier check and done this more rigorous
-  // test immediately. But that would cost an extra instruction in the case
-  // where there are no infinities or NaNs, and we assume that that is so much
-  // more common that it's worth optimizing for.
-  cmp     op0l, #1           // set C if op0l is nonzero
-  adc     op0h, op0h, op0h   // shift op0h left, bringing in the C bit
-  cmp     op1l, #1           // set C if op1l is nonzero
-  adc     op1h, op1h, op1h   // shift op1h left, bringing in the C bit
-  cmn     op0h, #1 << 21     // if HI, then op0 is a NaN
-  cmnls   op1h, #1 << 21     // if not HI, then do the same check for op1
-  bhi     LOCAL_LABEL(NaN)           // now, if HI, there's definitely a NaN
+    LOCAL_LABEL(NaN_check_positive)
+    : // Third tier for positive numbers. Here we know that at least one of the
+      // inputs has exponent 0x7FF. But they might still be infinities rather
+      // than NaNs. So now we must check whether there's an actual NaN.
+      //
+      // We do this by shifting the high word of each input left to get rid of
+      // the sign bit, shifting a bit in at the bottom which is 1 if any bit is
+      // set in the low word. Then we check if the result is _greater_ than
+      // 0xFFE00000 (but not equal), via adding 0x00200000 to it and testing 
for
+      // the HI condition (carry flag set, but Z clear).
+      //
+      // We could have skipped the second-tier check and done this more 
rigorous
+      // test immediately. But that would cost an extra instruction in the case
+      // where there are no infinities or NaNs, and we assume that that is so
+      // much more common that it's worth optimizing for.
+      cmp op0l, #1 // set C if op0l is nonzero
+      adc op0h,
+      op0h, op0h // shift op0h left, bringing in the C bit
+      cmp op1l,
+      #1 // set C if op1l is nonzero
+      adc op1h,
+      op1h, op1h // shift op1h left, bringing in the C bit
+      cmn op0h,
+      #1 << 21 // if HI, then op0 is a NaN
+          cmnls op1h,
+      #1 << 21                 // if not HI, then do the same check for op1
+          bhi LOCAL_LABEL(NaN) // now, if HI, there's definitely a NaN
 
-  // Now we've finally ruled out NaNs! And we still know both inputs are
-  // positive. So the third-tier return path can just compare the top words
-  // again. (The fact that we've just shifted them left doesn't make a
-  // difference.)
-  cmp     op0h, op1h
-  SetReturnRegister
-  bx      lr
+// Now we've finally ruled out NaNs! And we still know both inputs are
+// positive. So the third-tier return path can just compare the top words
+// again. (The fact that we've just shifted them left doesn't make a
+// difference.)
+cmp op0h,
+    op1h SetReturnRegister bx lr
 
-LOCAL_LABEL(negative):
-  // We come here if at least one operand is negative. We haven't checked for
-  // NaNs at all yet (the sign check came first), so repeat the first-tier
-  // check strategy of seeing if all exponent bits are set in r12.
-  //
-  // On this path, the sign bit in r12 is set, so if adding 1 to the low
-  // exponent bit carries all the way through into the sign bit, it will
-  // _clear_ the sign bit rather than setting it. So we expect MI to be the
-  // "definitely no NaNs" result, where it was PL on the positive branch.
-  cmn     r12, #1 << 20
-  bpl     LOCAL_LABEL(NaNInf_check_negative)
+    LOCAL_LABEL(negative)
+    : // We come here if at least one operand is negative. We haven't checked
+      // for NaNs at all yet (the sign check came first), so repeat the
+      // first-tier check strategy of seeing if all exponent bits are set in
+      // r12.
+      //
+      // On this path, the sign bit in r12 is set, so if adding 1 to the low
+      // exponent bit carries all the way through into the sign bit, it will
+      // _clear_ the sign bit rather than setting it. So we expect MI to be the
+      // "definitely no NaNs" result, where it was PL on the positive branch.
+      cmn r12, #1 << 20 bpl LOCAL_LABEL(NaNInf_check_negative)
 
-  // Now we have no NaNs, but at least one negative number. This gives us two
-  // complications:
-  //
-  //  1. Floating-point numbers are sign/magnitude, not two's complement, so we
-  //     have to consider separately the cases of "both negative" and "one of
-  //     each sign".
-  //
-  //  2. -0 and +0 are required to compare equal.
-  //
-  // But problem #1 is not as hard as it sounds! If both operands are negative,
-  // then we can get the result we want by comparing them as unsigned integers
-  // the opposite way round, because the input with the smaller value (as an
-  // integer) is the larger number in an FP ordering sense. And if one operand
-  // is negative and the other is positive, the _same_ reversed comparison
-  // works, because the positive number (with zero sign bit) will always
-  // compare less than the negative one in an unsigned-integers sense.
-  //
-  // So we only have to worry about problem #2, signed zeroes. This only
-  // affects the answer if _both_ operands are zero. So we check that by
-  // testing all bits of both operands apart from the sign bit.
-  orrs    r12, op0l, op0h, LSL #1 // EQ if op0 is zero
-  orrseq  r12, op1l, op1h, LSL #1 // now only EQ if both are zero
-  cmpne   op1h, op0h              // otherwise, compare them backwards
-  SetReturnRegisterNE
-  bxne    lr
-  cmp     op1l, op0l
-  SetReturnRegister
-  bx      lr
+// Now we have no NaNs, but at least one negative number. This gives us two
+// complications:
+//
+//  1. Floating-point numbers are sign/magnitude, not two's complement, so we
+//     have to consider separately the cases of "both negative" and "one of
+//     each sign".
+//
+//  2. -0 and +0 are required to compare equal.
+//
+// But problem #1 is not as hard as it sounds! If both operands are negative,
+// then we can get the result we want by comparing them as unsigned integers
+// the opposite way round, because the input with the smaller value (as an
+// integer) is the larger number in an FP ordering sense. And if one operand
+// is negative and the other is positive, the _same_ reversed comparison
+// works, because the positive number (with zero sign bit) will always
+// compare less than the negative one in an unsigned-integers sense.
+//
+// So we only have to worry about problem #2, signed zeroes. This only
+// affects the answer if _both_ operands are zero. So we check that by
+// testing all bits of both operands apart from the sign bit.
+orrs r12, op0l, op0h,
+    LSL #1 // EQ if op0 is zero
+    orrseq r12,
+    op1l, op1h,
+    LSL #1 // now only EQ if both are zero
+    cmpne op1h,
+    op0h // otherwise, compare them backwards
+        SetReturnRegisterNE bxne lr cmp op1l,
+    op0l SetReturnRegister bx lr
 
-LOCAL_LABEL(NaNInf_check_negative):
-  // Second tier for negative numbers: we know the OR of the exponents is 0xFF,
-  // but again, we might not have either _actual_ exponent 0xFF, and also, an
-  // exponent 0xFF might be an infinity instead of a NaN.
-  //
-  // On this path we've already branched twice (once for negative numbers and
-  // once for the first-tier NaN check), so we'll just go straight to the
-  // precise check for NaNs.
-  //
-  // Like the NaNInf_check_positive case, we do each NaN check by making a
-  // word consisting of (high word << 1) OR (1 if low word is nonzero). But
-  // unlike the positive case, we can't make those words _in place_,
-  // overwriting op0h and op1h themselves, because that would shift the sign
-  // bits off the top, and we still need the sign bits to get the comparison
-  // right. (In the positive case, we knew both sign bits were 0, enabling a
-  // shortcut.)
-  cmp     op0l, #1           // set C if op0l is nonzero
-  adc     r12, op0h, op0h    // shift op0h left, bringing in the C bit
-  cmn     r12, #1 << 21      // if HI, then op0 is a NaN
-  bhi     LOCAL_LABEL(NaN)
-  cmp     op1l, #1           // set C if op1l is nonzero
-  adc     r12, op1h, op1h    // shift op1h left, bringing in the C bit
-  cmn     r12, #1 << 21      // if HI, then op1 is a NaN
-  bhi     LOCAL_LABEL(NaN)
+    LOCAL_LABEL(NaNInf_check_negative)
+    : // Second tier for negative numbers: we know the OR of the exponents is
+      // 0xFF, but again, we might not have either _actual_ exponent 0xFF, and
+      // also, an exponent 0xFF might be an infinity instead of a NaN.
+      //
+      // On this path we've already branched twice (once for negative numbers
+      // and once for the first-tier NaN check), so we'll just go straight to
+      // the precise check for NaNs.
+      //
+      // Like the NaNInf_check_positive case, we do each NaN check by making a
+      // word consisting of (high word << 1) OR (1 if low word is nonzero). But
+      // unlike the positive case, we can't make those words _in place_,
+      // overwriting op0h and op1h themselves, because that would shift the 
sign
+      // bits off the top, and we still need the sign bits to get the 
comparison
+      // right. (In the positive case, we knew both sign bits were 0, enabling 
a
+      // shortcut.)
+      cmp op0l, #1 // set C if op0l is nonzero
+      adc r12,
+      op0h, op0h // shift op0h left, bringing in the C bit
+      cmn r12,
+      #1 << 21 // if HI, then op0 is a NaN
+          bhi LOCAL_LABEL(NaN)
+cmp op1l,
+    #1 // set C if op1l is nonzero
+    adc r12,
+    op1h,
+    op1h // shift op1h left, bringing in the C bit
+        cmn r12,
+    #1 << 21 // if HI, then op1 is a NaN
+        bhi LOCAL_LABEL(NaN)
 
-  // Now we've ruled out NaNs, so we can just compare the two input registers
-  // and return. On this path we _don't_ need to check for the special case of
-  // comparing two zeroes, because we only came here if the bitwise OR of the
-  // exponent fields was 0x7FF, which means the exponents can't both have been
-  // zero! So we can _just_ do the reversed CMP and finish.
-  cmp     op1h, op0h
-  SetReturnRegister
-  bx      lr
+// Now we've ruled out NaNs, so we can just compare the two input registers
+// and return. On this path we _don't_ need to check for the special case of
+// comparing two zeroes, because we only came here if the bitwise OR of the
+// exponent fields was 0x7FF, which means the exponents can't both have been
+// zero! So we can _just_ do the reversed CMP and finish.
+cmp op1h, op0h SetReturnRegister bx lr
diff --git a/compiler-rt/lib/builtins/arm/thumb1/dcmp.h 
b/compiler-rt/lib/builtins/arm/thumb1/dcmp.h
index fc45b5d46..2867e94f4 100644
--- a/compiler-rt/lib/builtins/arm/thumb1/dcmp.h
+++ b/compiler-rt/lib/builtins/arm/thumb1/dcmp.h
@@ -49,188 +49,195 @@
 //  - if the 11 exponent bits of the output are not all 1, then there are
 //    definitely no NaNs, so a fast path can handle most non-NaN cases.
 
-  push    {r4,r5,r6,lr}
-
-  // Set up the constant 1 << 20 in a register, which we'll need on all
-  // branches.
-  movs    r5, #1
-  lsls    r5, r5, #20
-
-  // First diverge control for the negative-numbers case.
-  movs    r4, op0h
-  orrs    r4, r4, op1h
-  bmi     LOCAL_LABEL(negative)         // high bit set => at least one 
negative input
-
-  // Here, both inputs are positive. Try adding 1<<20 to their bitwise OR in
-  // r4. This will carry all the way into the top bit, setting the N flag, if
-  // all 11 exponent bits were set.
-  cmn     r4, r5
-  bmi     LOCAL_LABEL(NaNInf_check_positive) // need to look harder for NaNs
-
-  // The fastest fast path: both inputs positive and we could easily tell there
-  // were no NaNs. So we just compare op0 and op1 as unsigned integers.
-  cmp     op0h, op1h
-  beq     LOCAL_LABEL(low_word_positive)
-  SetReturnRegister
-  pop     {r4,r5,r6,pc}
-LOCAL_LABEL(low_word_positive):
-  cmp     op0l, op1l
-  SetReturnRegister
-  pop     {r4,r5,r6,pc}
-
-LOCAL_LABEL(NaNInf_check_positive):
-  // Second tier for positive numbers. We come here if both inputs are
-  // positive, but our fast initial check didn't manage to rule out a NaN. But
-  // it's not guaranteed that there _is_ a NaN, for two reasons:
-  //
-  //  1. An input with exponent 0x7FF might be an infinity instead. Those
-  //     behave normally under comparison.
-  //
-  //  2. There might not even _be_ an input with exponent 0x7FF. All we know so
-  //     far is that the two inputs ORed together had all the exponent bits
-  //     set. So each of those bits is set in _at least one_ of the inputs, but
-  //     not necessarily all in the _same_ input.
-  //
-  // Test each exponent individually for 0x7FF, using the same CMN idiom as
-  // above. If neither one carries into the sign bit then we have no NaNs _or_
-  // infinities and can compare the registers and return again.
-  cmn     op0h, r5
-  bmi     LOCAL_LABEL(NaN_check_positive)
-  cmn     op1h, r5
-  bmi     LOCAL_LABEL(NaN_check_positive)
-
-  // Second-tier return path, now we've ruled out anything difficult. By this
-  // time we know that the two operands have different exponents (because the
-  // exponents' bitwise OR is 0x7FF but neither one is 0x7FF by itself, so each
-  // must have a set bit not present in the other). So we only need to compare
-  // the high words.
-  cmp     op0h, op1h
-  SetReturnRegister
-  pop     {r4,r5,r6,pc}
-
-LOCAL_LABEL(NaN_check_positive):
-  // Third tier for positive numbers. Here we know that at least one of the
-  // inputs has exponent 0x7FF. But they might still be infinities rather than
-  // NaNs. So now we must check whether there's an actual NaN.
-  //
-  // We do this by shifting the high word of each input left to get rid of the
-  // sign bit, shifting a bit in at the bottom which is 1 if any bit is set in
-  // the low word. Then we check if the result is _greater_ than 0xFFE00000
-  // (but not equal), via adding 0x00200000 to it and testing for the HI
-  // condition (carry flag set, but Z clear).
-  //
-  // We could have skipped the second-tier check and done this more rigorous
-  // test immediately. But that would cost an extra instruction in the case
-  // where there are no infinities or NaNs, and we assume that that is so much
-  // more common that it's worth optimizing for.
-  lsls    r6, r5, #1         // set r6 = 1<<21
-  cmp     op0l, #1           // set C if op0l is nonzero
-  adcs    op0h, op0h, op0h   // shift op0h left, bringing in the C bit
-  cmn     op0h, r6           // if HI, then op0 is a NaN
-  bhi     LOCAL_LABEL(NaN)
-  cmp     op1l, #1           // set C if op1l is nonzero
-  adcs    op1h, op1h, op1h   // shift op1h left, bringing in the C bit
-  cmn     op1h, r6           // if HI, then op1 is a NaN
-  bhi     LOCAL_LABEL(NaN)
-
-  // Now we've finally ruled out NaNs! And we still know both inputs are
-  // positive. So the third-tier return path can just compare the top words
-  // again. (The fact that we've just shifted them left doesn't make a
-  // difference.)
-  cmp     op0h, op1h
-  SetReturnRegister
-  pop     {r4,r5,r6,pc}
-
-LOCAL_LABEL(negative):
-  // We come here if at least one operand is negative. We haven't checked for
-  // NaNs at all yet (the sign check came first), so repeat the first-tier
-  // check strategy of seeing if all exponent bits are set in r12.
-  //
-  // On this path, the sign bit in r12 is set, so if adding 1 to the low
-  // exponent bit carries all the way through into the sign bit, it will
-  // _clear_ the sign bit rather than setting it. So we expect MI to be the
-  // "definitely no NaNs" result, where it was PL on the positive branch.
-  cmn     r4, r5
-  bpl     LOCAL_LABEL(NaNInf_check_negative)
-
-  // Now we have no NaNs, but at least one negative number. This gives us two
-  // complications:
-  //
-  //  1. Floating-point numbers are sign/magnitude, not two's complement, so we
-  //     have to consider separately the cases of "both negative" and "one of
-  //     each sign".
-  //
-  //  2. -0 and +0 are required to compare equal.
-  //
-  // But problem #1 is not as hard as it sounds! If both operands are negative,
-  // then we can get the result we want by comparing them as unsigned integers
-  // the opposite way round, because the input with the smaller value (as an
-  // integer) is the larger number in an FP ordering sense. And if one operand
-  // is negative and the other is positive, the _same_ reversed comparison
-  // works, because the positive number (with zero sign bit) will always
-  // compare less than the negative one in an unsigned-integers sense.
-  //
-  // So we only have to worry about problem #2, signed zeroes. This only
-  // affects the answer if _both_ operands are zero. So we check that by
-  // testing all bits of both operands apart from the sign bit.
-  lsls    r6, r4, #1         // logical OR of both high words except the signs
-  orrs    r6, r6, op0l       // combine that with the low word of op0
-  orrs    r6, r6, op1l       // and op1, so now only EQ if both are zero
-  beq     LOCAL_LABEL(equal)
-  // Now we've ruled out confusing zero cases, just compare the operands in
-  // reverse sense.
-  cmp     op1h, op0h
-  beq     LOCAL_LABEL(low_word_negative)
-  SetReturnRegister
-  pop     {r4,r5,r6,pc}
-LOCAL_LABEL(low_word_negative):
-  cmp     op1l, op0l
-  SetReturnRegister
-  pop     {r4,r5,r6,pc}
-
-LOCAL_LABEL(equal):
-  // We come here if we know the inputs are supposed to compare equal. Set up
-  // the flags by comparing a register with itself.
-  //
-  // (We might have come here via a BEQ, in which case we know Z=1, but we also
-  // need C=1 for our caller to get _all_ the right flags.)
-  cmp     r0, r0             // compare a register with itself
-  SetReturnRegister
-  pop     {r4,r5,r6,pc}
-
-LOCAL_LABEL(NaNInf_check_negative):
-  // Second tier for negative numbers: we know the OR of the exponents is 0xFF,
-  // but again, we might not have either _actual_ exponent 0xFF, and also, an
-  // exponent 0xFF might be an infinity instead of a NaN.
-  //
-  // On this path we've already branched twice (once for negative numbers and
-  // once for the first-tier NaN check), so we'll just go straight to the
-  // precise check for NaNs.
-  //
-  // Like the NaNInf_check_positive case, we do each NaN check by making a
-  // word consisting of (high word << 1) OR (1 if low word is nonzero). But
-  // unlike the positive case, we can't make those words _in place_,
-  // overwriting op0h and op1h themselves, because that would shift the sign
-  // bits off the top, and we still need the sign bits to get the comparison
-  // right. (In the positive case, we knew both sign bits were 0, enabling a
-  // shortcut.)
-  lsls    r6, r5, #1         // set r6 = 1<<21
-  movs    r4, op0h           // copy op0h into a scratch register to modify
-  cmp     op0l, #1           // set C if op0l is nonzero
-  adcs    r4, r4, r4         // shift left, bringing in the C bit
-  cmn     r4, r6             // if HI, then op0 is a NaN
-  bhi     LOCAL_LABEL(NaN)
-  movs    r4, op1h           // copy op1h into a scratch register to modify
-  cmp     op1l, #1           // set C if op1l is nonzero
-  adcs    r4, r4, r4         // shift left, bringing in the C bit
-  cmn     r4, r6             // if HI, then op1 is a NaN
-  bhi     LOCAL_LABEL(NaN)
-
-  // Now we've ruled out NaNs, so we can just compare the two input registers
-  // and return. On this path we _don't_ need to check for the special case of
-  // comparing two zeroes, because we only came here if the bitwise OR of the
-  // exponent fields was 0x7FF, which means the exponents can't both have been
-  // zero! So we can _just_ do the reversed CMP and finish.
-  cmp     op1h, op0h
-  SetReturnRegister
-  pop     {r4,r5,r6,pc}
+push{r4, r5, r6, lr}
+
+// Set up the constant 1 << 20 in a register, which we'll need on all
+// branches.
+movs r5,
+    #1 lsls r5, r5,
+    #20
+
+    // First diverge control for the negative-numbers case.
+    movs r4,
+    op0h orrs r4, r4,
+    op1h
+        bmi LOCAL_LABEL(negative) // high bit set => at least one negative 
input
+
+// Here, both inputs are positive. Try adding 1<<20 to their bitwise OR in
+// r4. This will carry all the way into the top bit, setting the N flag, if
+// all 11 exponent bits were set.
+cmn r4,
+    r5 bmi LOCAL_LABEL(NaNInf_check_positive) // need to look harder for NaNs
+
+// The fastest fast path: both inputs positive and we could easily tell there
+// were no NaNs. So we just compare op0 and op1 as unsigned integers.
+cmp op0h, op1h beq LOCAL_LABEL(low_word_positive)
+SetReturnRegister pop{r4, r5, r6, pc} LOCAL_LABEL(low_word_positive)
+    : cmp op0l, op1l SetReturnRegister pop{r4, r5, r6, pc}
+
+      LOCAL_LABEL(NaNInf_check_positive)
+    : // Second tier for positive numbers. We come here if both inputs are
+      // positive, but our fast initial check didn't manage to rule out a NaN.
+      // But it's not guaranteed that there _is_ a NaN, for two reasons:
+      //
+      //  1. An input with exponent 0x7FF might be an infinity instead. Those
+      //     behave normally under comparison.
+      //
+      //  2. There might not even _be_ an input with exponent 0x7FF. All we 
know
+      //  so
+      //     far is that the two inputs ORed together had all the exponent bits
+      //     set. So each of those bits is set in _at least one_ of the inputs,
+      //     but not necessarily all in the _same_ input.
+      //
+      // Test each exponent individually for 0x7FF, using the same CMN idiom as
+      // above. If neither one carries into the sign bit then we have no NaNs
+      // _or_ infinities and can compare the registers and return again.
+      cmn op0h, r5 bmi LOCAL_LABEL(NaN_check_positive)
+cmn op1h, r5 bmi LOCAL_LABEL(NaN_check_positive)
+
+// Second-tier return path, now we've ruled out anything difficult. By this
+// time we know that the two operands have different exponents (because the
+// exponents' bitwise OR is 0x7FF but neither one is 0x7FF by itself, so each
+// must have a set bit not present in the other). So we only need to compare
+// the high words.
+cmp op0h,
+    op1h SetReturnRegister pop{r4, r5, r6, pc}
+
+LOCAL_LABEL(NaN_check_positive)
+    : // Third tier for positive numbers. Here we know that at least one of the
+      // inputs has exponent 0x7FF. But they might still be infinities rather
+      // than NaNs. So now we must check whether there's an actual NaN.
+      //
+      // We do this by shifting the high word of each input left to get rid of
+      // the sign bit, shifting a bit in at the bottom which is 1 if any bit is
+      // set in the low word. Then we check if the result is _greater_ than
+      // 0xFFE00000 (but not equal), via adding 0x00200000 to it and testing 
for
+      // the HI condition (carry flag set, but Z clear).
+      //
+      // We could have skipped the second-tier check and done this more 
rigorous
+      // test immediately. But that would cost an extra instruction in the case
+      // where there are no infinities or NaNs, and we assume that that is so
+      // much more common that it's worth optimizing for.
+      lsls r6, r5, #1 // set r6 = 1<<21
+      cmp op0l,
+      #1 // set C if op0l is nonzero
+      adcs op0h,
+      op0h, op0h // shift op0h left, bringing in the C bit
+      cmn op0h,
+      r6 // if HI, then op0 is a NaN
+      bhi LOCAL_LABEL(NaN)
+cmp op1l,
+    #1 // set C if op1l is nonzero
+    adcs op1h,
+    op1h,
+    op1h // shift op1h left, bringing in the C bit
+        cmn op1h,
+    r6 // if HI, then op1 is a NaN
+        bhi LOCAL_LABEL(NaN)
+
+// Now we've finally ruled out NaNs! And we still know both inputs are
+// positive. So the third-tier return path can just compare the top words
+// again. (The fact that we've just shifted them left doesn't make a
+// difference.)
+cmp op0h,
+    op1h SetReturnRegister pop{r4, r5, r6, pc}
+
+LOCAL_LABEL(negative)
+    : // We come here if at least one operand is negative. We haven't checked
+      // for NaNs at all yet (the sign check came first), so repeat the
+      // first-tier check strategy of seeing if all exponent bits are set in
+      // r12.
+      //
+      // On this path, the sign bit in r12 is set, so if adding 1 to the low
+      // exponent bit carries all the way through into the sign bit, it will
+      // _clear_ the sign bit rather than setting it. So we expect MI to be the
+      // "definitely no NaNs" result, where it was PL on the positive branch.
+      cmn r4, r5 bpl LOCAL_LABEL(NaNInf_check_negative)
+
+// Now we have no NaNs, but at least one negative number. This gives us two
+// complications:
+//
+//  1. Floating-point numbers are sign/magnitude, not two's complement, so we
+//     have to consider separately the cases of "both negative" and "one of
+//     each sign".
+//
+//  2. -0 and +0 are required to compare equal.
+//
+// But problem #1 is not as hard as it sounds! If both operands are negative,
+// then we can get the result we want by comparing them as unsigned integers
+// the opposite way round, because the input with the smaller value (as an
+// integer) is the larger number in an FP ordering sense. And if one operand
+// is negative and the other is positive, the _same_ reversed comparison
+// works, because the positive number (with zero sign bit) will always
+// compare less than the negative one in an unsigned-integers sense.
+//
+// So we only have to worry about problem #2, signed zeroes. This only
+// affects the answer if _both_ operands are zero. So we check that by
+// testing all bits of both operands apart from the sign bit.
+lsls r6, r4,
+    #1 // logical OR of both high words except the signs
+    orrs r6,
+    r6,
+    op0l // combine that with the low word of op0
+        orrs r6,
+    r6,
+    op1l // and op1, so now only EQ if both are zero
+        beq LOCAL_LABEL(equal)
+// Now we've ruled out confusing zero cases, just compare the operands in
+// reverse sense.
+cmp op1h, op0h beq LOCAL_LABEL(low_word_negative)
+SetReturnRegister pop{r4, r5, r6, pc} LOCAL_LABEL(low_word_negative)
+    : cmp op1l, op0l SetReturnRegister pop{r4, r5, r6, pc}
+
+      LOCAL_LABEL(equal)
+    : // We come here if we know the inputs are supposed to compare equal. Set
+      // up the flags by comparing a register with itself.
+      //
+      // (We might have come here via a BEQ, in which case we know Z=1, but we
+      // also need C=1 for our caller to get _all_ the right flags.)
+      cmp r0, r0 // compare a register with itself
+      SetReturnRegister pop{r4, r5, r6, pc}
+
+      LOCAL_LABEL(NaNInf_check_negative)
+    : // Second tier for negative numbers: we know the OR of the exponents is
+      // 0xFF, but again, we might not have either _actual_ exponent 0xFF, and
+      // also, an exponent 0xFF might be an infinity instead of a NaN.
+      //
+      // On this path we've already branched twice (once for negative numbers
+      // and once for the first-tier NaN check), so we'll just go straight to
+      // the precise check for NaNs.
+      //
+      // Like the NaNInf_check_positive case, we do each NaN check by making a
+      // word consisting of (high word << 1) OR (1 if low word is nonzero). But
+      // unlike the positive case, we can't make those words _in place_,
+      // overwriting op0h and op1h themselves, because that would shift the 
sign
+      // bits off the top, and we still need the sign bits to get the 
comparison
+      // right. (In the positive case, we knew both sign bits were 0, enabling 
a
+      // shortcut.)
+      lsls r6, r5, #1 // set r6 = 1<<21
+      movs r4,
+      op0h // copy op0h into a scratch register to modify
+      cmp op0l,
+      #1 // set C if op0l is nonzero
+      adcs r4,
+      r4, r4 // shift left, bringing in the C bit
+      cmn r4,
+      r6 // if HI, then op0 is a NaN
+      bhi LOCAL_LABEL(NaN)
+movs r4,
+    op1h // copy op1h into a scratch register to modify
+        cmp op1l,
+    #1 // set C if op1l is nonzero
+    adcs r4,
+    r4,
+    r4 // shift left, bringing in the C bit
+        cmn r4,
+    r6 // if HI, then op1 is a NaN
+        bhi LOCAL_LABEL(NaN)
+
+// Now we've ruled out NaNs, so we can just compare the two input registers
+// and return. On this path we _don't_ need to check for the special case of
+// comparing two zeroes, because we only came here if the bitwise OR of the
+// exponent fields was 0x7FF, which means the exponents can't both have been
+// zero! So we can _just_ do the reversed CMP and finish.
+cmp op1h, op0h SetReturnRegister pop { r4, r5, r6, pc }
diff --git a/compiler-rt/test/builtins/Unit/comparedf2new_test.c 
b/compiler-rt/test/builtins/Unit/comparedf2new_test.c
index f78a1a6aa..8a91f9051 100644
--- a/compiler-rt/test/builtins/Unit/comparedf2new_test.c
+++ b/compiler-rt/test/builtins/Unit/comparedf2new_test.c
@@ -20,21 +20,19 @@ COMPILER_RT_ABI int __ltdf2(double, double);
 COMPILER_RT_ABI int __cmpdf2(double, double);
 COMPILER_RT_ABI int __unorddf2(double, double);
 
-enum Result {
-  RESULT_LT,
-  RESULT_GT,
-  RESULT_EQ,
-  RESULT_UN
-};
+enum Result { RESULT_LT, RESULT_GT, RESULT_EQ, RESULT_UN };
 
-int expect(int line, uint64_t a_rep, uint64_t b_rep, const char *name, int 
result, int ok, const char *expected) {
+int expect(int line, uint64_t a_rep, uint64_t b_rep, const char *name,
+           int result, int ok, const char *expected) {
   if (!ok)
-    printf("error at line %d: %s(%016" PRIx64 ", %016" PRIx64 ") = %d, 
expected %s\n",
+    printf("error at line %d: %s(%016" PRIx64 ", %016" PRIx64
+           ") = %d, expected %s\n",
            line, name, a_rep, b_rep, result, expected);
   return !ok;
 }
 
-int test__comparedf2(int line, uint64_t a_rep, uint64_t b_rep, enum Result 
result) {
+int test__comparedf2(int line, uint64_t a_rep, uint64_t b_rep,
+                     enum Result result) {
   double a = fromRep64(a_rep), b = fromRep64(b_rep);
 
   int eq = __eqdf2(a, b);
@@ -94,7 +92,7 @@ int test__comparedf2(int line, uint64_t a_rep, uint64_t 
b_rep, enum Result resul
   return ret;
 }
 
-#define test__comparedf2(a,b,x) test__comparedf2(__LINE__,a,b,x)
+#define test__comparedf2(a, b, x) test__comparedf2(__LINE__, a, b, x)
 
 int main(void) {
   int status = 0;

``````````

</details>


https://github.com/llvm/llvm-project/pull/179924
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [compiler-rt] [compiler-rt][ARM] Optimized double-precision FP comparisons (PR #179924)

Reply via email to