https://github.com/statham-arm created https://github.com/llvm/llvm-project/pull/179926
This commit provides assembly versions of the conversions both ways between double and float. >From 35dd800cbe1eb4d571c47254530ee75e7b98f500 Mon Sep 17 00:00:00 2001 From: Simon Tatham <[email protected]> Date: Thu, 29 Jan 2026 16:12:53 +0000 Subject: [PATCH] [compiler-rt][ARM] Optimized FP double <-> single conversion This commit provides assembly versions of the conversions both ways between double and float. --- compiler-rt/lib/builtins/CMakeLists.txt | 2 + compiler-rt/lib/builtins/arm/extendsfdf2.S | 195 ++++++++++ compiler-rt/lib/builtins/arm/truncdfsf2.S | 198 ++++++++++ .../test/builtins/Unit/extendsfdf2new_test.c | 123 ++++++ .../test/builtins/Unit/truncdfsf2new_test.c | 367 ++++++++++++++++++ 5 files changed, 885 insertions(+) create mode 100644 compiler-rt/lib/builtins/arm/extendsfdf2.S create mode 100644 compiler-rt/lib/builtins/arm/truncdfsf2.S create mode 100644 compiler-rt/test/builtins/Unit/extendsfdf2new_test.c create mode 100644 compiler-rt/test/builtins/Unit/truncdfsf2new_test.c diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 0c53781a51392..6b392c8eb22f0 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -456,6 +456,8 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH MATCHES "arm") arm/gesf2.S arm/unorddf2.S arm/unordsf2.S + arm/extendsfdf2.S + arm/truncdfsf2.S ) set_source_files_properties(${assembly_files} PROPERTIES COMPILE_OPTIONS ${implicit_it_flag}) diff --git a/compiler-rt/lib/builtins/arm/extendsfdf2.S b/compiler-rt/lib/builtins/arm/extendsfdf2.S new file mode 100644 index 0000000000000..21518d4a75b1a --- /dev/null +++ b/compiler-rt/lib/builtins/arm/extendsfdf2.S @@ -0,0 +1,195 @@ +//===-- extendsfdf2.S - single- to double-precision FP conversion ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __extendsfdf2 function (single to double precision +// floating point conversion) for the Arm and Thumb2 ISAs. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" +#include "endian.h" + + .syntax unified + .text + .p2align 2 + +#if __ARM_PCS_VFP +DEFINE_COMPILERRT_FUNCTION(__extendsfdf2) + push {r4, lr} + vmov r0, s0 + bl __aeabi_f2d + VMOV_TO_DOUBLE(d0, r0, r1) + pop {r4, pc} +#else +DEFINE_COMPILERRT_FUNCTION_ALIAS(__extendsfdf2, __aeabi_f2d) +#endif + +DEFINE_COMPILERRT_FUNCTION(__aeabi_f2d) + + // Start with the fast path, dealing with normalized single-precision inputs. + // We handle these as quickly as possible in straight-line code, and branch + // out of line to a single 'handle everything else' label which will have to + // figure out what kind of unusual thing has happened. + + // Extend the exponent field by 3 bits, by shifting the sign bit off the top + // of r0 into the carry flag, shifting the rest of the input word right by 3, + // then using RRX to put the sign back. So we end up with a word shaped like + // the top half of a double, but the exponent field is still biased by the + // single-precision offset of 0x7f instead of the double-precision 0x3ff. + lsls r3, r0, #1 + lsr r12, r3, #3 + rrx r12, r12 + + // For a normalized number, the remaining steps are to rebias the exponent, + // recover the remaining 3 mantissa bits from r0 which aren't included in the + // word we've just made, and move both into the right output registers. + // + // But we must also check for the difficult cases. These occur when the input + // exponent is either 0 or 0xFF. Those two values can be identified by the + // property that exp XOR (exp << 1) has the top 7 bits all zero. + + // Do the test for uncommon values. Instead of using a shifter operand in the + // obvious way (EOR output, r0, r0, lsl #1), we use the fact that the setup + // code above already has a shifted-left copy of the input word in r3. In + // Thumb, this makes the EORS a 16-bit instruction instead of 32-bit. + eors r3, r3, r0 + + // Now prepare the output, for normal inputs. + // + // We make this pair of instructions conditional on NE, i.e. we skip it if r3 + // and r0 were actually equal (which could only happen if r0 was 0, i.e. the + // input was +0). This is fine, because in that situation the input wasn't + // normalized, so we aren't going to return this output anyway. + // + // The _point_ of conditionalizing these two instructions is that this way we + // have only one IT instruction on the fast path, and it's _here_, where this + // comment is, so that it comes immediately after the above 16-bit EORS and + // can be executed in the same cycle by Cortex-M3. + lslne xl, r0, #29 // xl now has the bottom 3 input mantissa bits + addne xh, r12, #(0x3ff - 0x7f) << 20 // rebias exponent in xh + + // Finally, check whether the test word in r3 has its top 7 exponent bits + // zero. If not, we can return the fast-path answer. + tstne r3, #0x7f000000 + bxne lr + + // Now we've handled the fast-path cases as fast as we know how, what do we + // do next? We almost certainly don't have the input value in r0 any more, + // because we overwrote it by writing an unused output to xh:xl in the above + // code. Worse, we didn't _reliably_ overwrite it, because those writes to + // xh:xl might not have happened if the whole test word in r3 was zero. So + // where can we find the input bits? + // + // We have r3 = input XOR (input << 1). That's actually an invertible + // transformation, so in principle we could recover the full original input + // float from just r3. The quickest way to do that involves these five + // instructions (in any order, since they commute): + // + // EOR r3, r3, r3, lsl #16 + // EOR r3, r3, r3, lsl #8 + // EOR r3, r3, r3, lsl #4 + // EOR r3, r3, r3, lsl #2 + // EOR r3, r3, r3, lsl #1 + // + // But that's rather slow, and we can do better. r12 contains most of the + // input bits in a more usable form: we inserted three zero bits between the + // sign and the top of the exponent, but everything from the input is there + // _somewhere_, except for the low 3 bits. + // + // However, on one code path below we'll use a subset of those EOR + // instructions to recover the low 3 bits of the input. + + // First, find out whether the input exponent was 0 (zero or denormal), or + // 0xFF (infinity or NaN). We know it was one of the two, or we would have + // taken the early return from the fast path. So it's enough to test any + // single bit of the exponent in r12. + tst r12, #1<<27 // bit 27 is topmost bit of the 8-bit exponent + bne LOCAL_LABEL(inf_or_nan) + + // If we didn't take that branch, we have a denormal or zero. Zeroes are + // likely to be common, so we'd prefer to handle those with highest priority. + // + // r3 = (input XOR (input << 1)) will take the values 0 or 0x80000000 for a + // zero input. So it contains precisely the right value to return in xh. + // + // The BICS here combines the zeroing of xl with the test of r3, because it + // sets Z if and only if the input was one of those two values, and if so, + // sets xl=0. + // + // Unfortunately this has the side effect of clobbering xl in the case where + // we _don't_ take the early return, so now we've lost our verbatim copy of + // the low 3 input bits! On the denormal-handling path we'll have to recover + // those from r3 more awkwardly. But denormal handling is rare, and slow + // anyway, so it's worth the awkwardness to save a cycle in the much more + // common case of a zero input. + bics xl, r3, #0x80000000 // EQ if output is zero + moveq xh, r3 // if so, copy input sign into xh + bxeq lr // and return + + // Now we know we're dealing with a denormal, so we need to recover the whole + // input mantissa. Most of it is in r12, but those last three bits now need + // to be reconstructed from r3 by using part of the shift+EOR trick shown + // above. We only need the left shifts by 1 and by 2, because the other three + // don't affect the bottom 3 bits at all. + eor r3, r3, r3, lsl #2 + eor r3, r3, r3, lsl #1 + and r3, r3, #7 + + // Now r3 contains just the low bits of the mantissa. The rest of the + // mantissa is in r12, shifted right by 3 bits, so this instruction rebuilds + // the entire input mantissa in xh. (The exponent field is known to be zero, + // and the sign bit at the top of r12 is discarded by the left shift.) + orr xh, r3, r12, lsl #3 + + // Renormalize that input mantissa so that its high bit is at the top of the + // word. + clz r2, xh + lsl xh, xh, r2 + + // Compute the right sign + exponent to go with that mantissa. + // + // If the input mantissa had had only its low bit set, then the input float + // would be 2^-149, which has a double-precision exponent of 0x36a. In that + // situation we'd have r2 = 31 (output from the CLZ). So we need the output + // exponent to be (0x389 - r2). But the leading bit of the mantissa will + // increment the exponent field when we add them together, so in fact we want + // to calculate (0x388 - r2). That's particularly convenient, because 0x388 + // fits in an AArch32 immediate field! + and r3, r12, #0x80000000 // get the sign bit from the top of r12 + add r3, r3, #0x388 << 20 // add the exponent bias as calculated above + sub r3, r3, r2, lsl #20 // subtract the CLZ output + + // Finally, distribute the normalized mantissa across the two output words, + // and combine the top half with the exponent we just computed. + lsls xl, xh, #21 // low word = low 3 bits of normalized mantissa + add xh, r3, xh, lsr #11 // high word = sign + exp + rest of mantissa + bx lr + +LOCAL_LABEL(inf_or_nan): + // We come here if the input was either infinity or a NaN. In this situation + // we can be sure that the instructions that set up the fast-path return + // value _did_ happen, because the input was nonzero. Also we branched away + // before the test for a zero input clobbered xl. + // + // So xh:xl will contain what _would_ be the right output value if 0xFF were + // not a special input: the exponent field will be 0x47f, and the sign and + // mantissa will be in place. + // + // This is almost exactly what we really want to return, except for two + // things: the exponent should be corrected to 0x7ff for an output infinity + // or NaN, and if the mantissa is nonzero at all (so that we're returning a + // NaN and not an infinity) then we should set its top bit to make it a quiet + // NaN. + orrs xh, xh, #0x7f000000 // set the missing bits in the exponent field + orrs r2, xl, xh, lsl #12 // is any bit of the mantissa set? + orrne xh, xh, #0x00080000 // if so, set the top mantissa bit + bx lr + +END_COMPILERRT_FUNCTION(__aeabi_f2d) + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/lib/builtins/arm/truncdfsf2.S b/compiler-rt/lib/builtins/arm/truncdfsf2.S new file mode 100644 index 0000000000000..d87fce8d1bcbb --- /dev/null +++ b/compiler-rt/lib/builtins/arm/truncdfsf2.S @@ -0,0 +1,198 @@ +//===-- truncdfsf2.S - double- to single precision FP conversion ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __truncdfsf2 function (double to single precision +// floating point conversion), with the IEEE-754 default rounding (to nearest, +// ties to even), for the Arm and Thumb2 ISAs. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" +#include "endian.h" + + .syntax unified + .text + .p2align 2 + +#if __ARM_PCS_VFP +DEFINE_COMPILERRT_FUNCTION(__truncdfsf2) + push {r4, lr} + VMOV_FROM_DOUBLE(r0, r1, d0) + bl __aeabi_d2f + vmov s0, r0 + pop {r4, pc} +#else +DEFINE_COMPILERRT_FUNCTION_ALIAS(__truncdfsf2, __aeabi_d2f) +#endif + +DEFINE_COMPILERRT_FUNCTION(__aeabi_d2f) + + // Start with the fast path, dealing with input values that give a normalized + // single-precision output. We handle these as quickly as possible in + // straight-line code, and branch out of line to a single 'handle everything + // else' label which will have to figure out what kind of unusual thing has + // happened. + + // Split xh into the sign bit (in r3) and everything else (r2), so that we + // can change the width of the exponent field and then put the sign back on + // later. + bic r2, xh, #0x80000000 + and r3, xh, #0x80000000 + + // Rebias the exponent, still in its double-precision location, to account + // for the difference between double- and single-precision exponents. + sub r2, r2, #(0x3ff-0x7f) << 20 + + // If the exponent field is now 0 or less, we have an underflow or an exact + // zero. If it's 0xFF or more, we have an overflow, or a NaN or infinity as + // input. Detect all of those in a combined test, and branch out of line. + cmp r2, #0x00100000 // LO if output too small + rsbshs r12, r2, #0x0ff00000 // otherwise, set LS if output too large + bls LOCAL_LABEL(uncommon) // so now LS means one or the other happened + + // We've disposed of all the uncommon cases, so we know we're returning a + // normalized float, but we might still need to round it. Shift the round bit + // into the C flag, also setting Z if everything below that is zero. + lsls r12, xl, #4 + + // Put the result back together, by recombining the sign (in r3) with the + // exponent and top of the mantissa (in r2, needing to be shifted left 3 + // bits), plus the top 3 bits of xl. The last of those is put on with an ADC + // instruction, which also rounds up if the bit we just shifted into C was + // set. + orr r2, r3, r2, lsl #3 // sign + exponent + most of mantissa + adc r0, r2, xl, lsr #29 // low 3 bits of mantissa + maybe round up + + // If C=1 and Z=1, we need to round to even. Otherwise we're finished. So we + // conditionally return based on one of those flags, then clear the low + // output bit based on the other. + // + // Which way round? On the assumption that input mantissas are roughly + // uniformly distributed, _almost all_ input doubles will contain a 1 bit + // somewhere in the bottom 28 bits, so we return early in the vast majority + // of cases by testing Z first. If we tested C first, we'd expect to return + // early only half the time, costing two extra instructions half the time + // instead of 1/2^28 of the time. + // + // (That's a bit optimistic, because of course in some situations input + // mantissas _won't_ be that uniform. In particular, if you converted from a + // float, did a small amount of calculation in double, and converted back, + // the round-to-even case might come up more often. But at least _some_ + // applications will be passing doubles that make use of the whole mantissa, + // so I think this is still the more sensible way round to do the test.) + bxne lr // return if Z=0 + biccs r0, r0, #1 // Z=1, so round to even if C=1 too + bx lr // and now return unconditionally + +LOCAL_LABEL(uncommon): + // We come here if anything at all goes wrong on the fast path. We could have + // an interesting kind of input - zero, denormal, infinity or NaN - or we + // could have a normalized double-precision input too large or too small to + // yield a normalized single-precision output. + // + // Of the various cases, the most important one to handle quickly is a zero + // input, because those are probably fairly common. So the very first thing + // we do is test if the input is zero, and if so, return the same sign of + // zero by simply using xh as the return value. + orrs r12, xl, xh, lsl #1 // are all bits of xh:xl 0 except the sign bit? + +#ifndef __BIG_ENDIAN__ + // In little-endian, xh (containing the desired sign bit) and r0 (the output + // register) aren't the same. This instruction can be skipped in big-endian, + // where the correct output value is already in r0. + moveq r0, xh +#endif + bxeq lr + + // Separate the remaining cases into three types: too small (underflow, + // whether or not the input was a denormal), too big (overflow or input + // infinity, which we treat the same in the absence of FP exceptions), and + // NaN. + // + // At this stage r2 contains the output exponent, rebiased to its + // single-precision value, but at bit 20 (that is, still in the + // double-precision position). Detect underflow by doing a signed comparison + // against the minimum normalized single-precision exponent. + cmp r2, #0x00100000 + blt LOCAL_LABEL(underflow) + + // Now figure out whether we had a NaN as input, by shifting xh left by a bit + // (discarding the sign) and setting the new low bit if xl != 0. This gives a + // value which is greater than 0xFFE00000 (in an unsigned comparison) for + // precisely NaN inputs. + cmp xl, #1 // set C if xl != 0 + adc r12, xh, xh // shift that in to the bottom of xh + cmn r12, #0x00200000 // is the result > 0xFFE00000? + bhi LOCAL_LABEL(nan) // if so, go and handle a NaN + + // If we're still here, we have a finite overflow, or an input infinity. We + // don't have to figure out which: we return an infinity of the appropriate + // sign in both cases. So keep just the sign of xh, and make an infinity out + // of the rest of the bits. + mvn r0, xh, lsr #31 // shift sign bit down to bit 0 and flip it + mvn r0, r0, lsl #8 // flip it back, putting 8 set bits below it + lsl r0, r0, #23 // and shift those 9 bits back up to the top + bx lr + +LOCAL_LABEL(nan): + // We have a double-precision NaN input. The Arm NaN handling rules say that + // we make the output single-precision NaN by keeping the sign and as much of + // the mantissa as possible (starting from the top bit). But we also set the + // top bit of the mantissa, which makes the output NaN quiet even if the + // input one was signaling. + // + // So this code looks a bit like a miniature version of the fast path: we + // keep the bottom 8 bits of the exponent in xh as the output exponent (we + // know it's all 1s, which is what we want), plus all the mantissa bits below + // it; shift all of that 3 bits left and recombine with the sign; then + // combine with the top 3 bits of xl. Finally, set the top mantissa bit. + bic r2, xh, #0xF0000000 // everything from xh we want to shift left + orr r0, r3, xl, lsr #29 // combine sign with low 3 output mantissa bits + orr r0, r0, r2, lsl #3 // combine that with the shifted-up value in r2 + orr r0, r0, #0x00400000 // set the top mantissa bit to make it a QNaN + bx lr + +LOCAL_LABEL(underflow): + // We have an input value small enough to underflow. The basic strategy is to + // leave __funder to deal with the details. + // + // Normally __funder expects to get a value that's already been rounded, and + // will re-round it, for which it also needs to know which way the value has + // been rounded already. In this case we haven't rounded _yet_. Rather than + // carefully rounding to nearest, it's easier to just make the __funder input + // value by truncating the mantissa (i.e. round towards zero), and set the + // rounding direction accordingly. + + // Rebias the exponent (again) to make an IEEE 754 underflow intermediate. If + // this still doesn't make r2 positive, then the result is so small that it + // will underflow to 0 anyway, so it doesn't really matter what exponent we + // do provide - we just clear the top 8 bits of r2 to ensure the sign is + // right and the exponent is _something_ small. + adds r2, r2, #0x0c000000 // exponent bias (still shifted down 3 bits) + bicmi r2, r2, #0xff000000 // handle exponent still being negative + + // Test the bits we're going to shift off the mantissa, to see if any are + // zero. This will determine the rounding direction we pass to __funder, + // because although we never round _up_ on this path, we must still tell it + // whether the value we pass it was rounded down or was already exact. + lsls r12, xl, #3 // set Z if the intermediate value is exact + + // Put together the intermediate value to pass to __funder. + orr r2, r3, r2, lsl #3 // sign + exponent + most of mantissa + orr r0, r2, xl, lsr #29 // combine with top 3 bits of xl + + // Set the rounding direction flag based on the test above. + moveq r1, #0 // intermediate is exact + movne r1, #1 // intermediate is too small (we didn't round) + + // And tailcall __funder to do the rest of the job. + b SYMBOL_NAME(__compiler_rt_funder) + +END_COMPILERRT_FUNCTION(__aeabi_d2f) + +NO_EXEC_STACK_DIRECTIVE diff --git a/compiler-rt/test/builtins/Unit/extendsfdf2new_test.c b/compiler-rt/test/builtins/Unit/extendsfdf2new_test.c new file mode 100644 index 0000000000000..04446488f73bf --- /dev/null +++ b/compiler-rt/test/builtins/Unit/extendsfdf2new_test.c @@ -0,0 +1,123 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_extendsfdf2 + +#include "int_lib.h" +#include <inttypes.h> +#include <stdio.h> + +#include "fp_test.h" + +// By default this test uses compareResultD to check the returned floats, which +// accepts any returned NaN if the expected result is the canonical NaN value +// 0x7ff8000000000000. For the Arm optimized FP implementation, which commits +// to a more detailed handling of NaNs, we tighten up the check and include +// some extra test cases specific to that NaN policy. +#if (__arm__ && !(__thumb__ && !__thumb2__)) && COMPILER_RT_ARM_OPTIMIZED_FP +# define EXPECT_EXACT_RESULTS +# define ARM_NAN_HANDLING +#endif + +// Returns: a converted from float to double +COMPILER_RT_ABI double __extendsfdf2(float a); + +int test__extendsfdf2(int line, uint32_t a_rep, uint64_t expected_rep) { + float a = fromRep32(a_rep); + double x = __extendsfdf2(a); +#ifdef EXPECT_EXACT_RESULTS + int ret = toRep64(x) != expected_rep; +#else + int ret = compareResultD(x, expected_rep); +#endif + + if (ret) { + printf("error at line %d: __extendsfdf2(%08" PRIx32 ") = %016" PRIx64 + ", expected %016" PRIx64 "\n", + line, a_rep, toRep64(x), expected_rep); + } + return ret; +} + +#define test__extendsfdf2(a,x) test__extendsfdf2(__LINE__,a,x) + +int main(void) { + int status = 0; + + status |= test__extendsfdf2(0x00000001, 0x36a0000000000000); + status |= test__extendsfdf2(0x00000003, 0x36b8000000000000); + status |= test__extendsfdf2(0x00000005, 0x36c4000000000000); + status |= test__extendsfdf2(0x00000009, 0x36d2000000000000); + status |= test__extendsfdf2(0x00000011, 0x36e1000000000000); + status |= test__extendsfdf2(0x00000021, 0x36f0800000000000); + status |= test__extendsfdf2(0x00000041, 0x3700400000000000); + status |= test__extendsfdf2(0x00000081, 0x3710200000000000); + status |= test__extendsfdf2(0x00000101, 0x3720100000000000); + status |= test__extendsfdf2(0x00000201, 0x3730080000000000); + status |= test__extendsfdf2(0x00000401, 0x3740040000000000); + status |= test__extendsfdf2(0x00000801, 0x3750020000000000); + status |= test__extendsfdf2(0x00001001, 0x3760010000000000); + status |= test__extendsfdf2(0x00002001, 0x3770008000000000); + status |= test__extendsfdf2(0x00004001, 0x3780004000000000); + status |= test__extendsfdf2(0x00008001, 0x3790002000000000); + status |= test__extendsfdf2(0x00010001, 0x37a0001000000000); + status |= test__extendsfdf2(0x00020001, 0x37b0000800000000); + status |= test__extendsfdf2(0x00040001, 0x37c0000400000000); + status |= test__extendsfdf2(0x00080001, 0x37d0000200000000); + status |= test__extendsfdf2(0x00100001, 0x37e0000100000000); + status |= test__extendsfdf2(0x00200001, 0x37f0000080000000); + status |= test__extendsfdf2(0x00400001, 0x3800000040000000); + status |= test__extendsfdf2(0x00800001, 0x3810000020000000); + status |= test__extendsfdf2(0x01000001, 0x3820000020000000); + status |= test__extendsfdf2(0x20000001, 0x3c00000020000000); + status |= test__extendsfdf2(0x30000001, 0x3e00000020000000); + status |= test__extendsfdf2(0x3f800000, 0x3ff0000000000000); + status |= test__extendsfdf2(0x7f000000, 0x47e0000000000000); + status |= test__extendsfdf2(0x7f7fffff, 0x47efffffe0000000); + status |= test__extendsfdf2(0x7f800000, 0x7ff0000000000000); + status |= test__extendsfdf2(0xff000000, 0xc7e0000000000000); + status |= test__extendsfdf2(0xff7fffff, 0xc7efffffe0000000); + status |= test__extendsfdf2(0xff800000, 0xfff0000000000000); + status |= test__extendsfdf2(0x80800000, 0xb810000000000000); + status |= test__extendsfdf2(0x807fffff, 0xb80fffffc0000000); + status |= test__extendsfdf2(0x80400000, 0xb800000000000000); + status |= test__extendsfdf2(0x803fffff, 0xb7ffffff80000000); + status |= test__extendsfdf2(0x80000003, 0xb6b8000000000000); + status |= test__extendsfdf2(0x80000002, 0xb6b0000000000000); + status |= test__extendsfdf2(0x80000001, 0xb6a0000000000000); + status |= test__extendsfdf2(0x80000000, 0x8000000000000000); + + // Test that the result of an operation is a NaN at all when it should be. + // + // In most configurations these tests' results are checked compared using + // compareResultD, so we set all the answers to the canonical NaN + // 0x7ff8000000000000, which causes compareResultF to accept any NaN + // encoding. We also use the same value as the input NaN in tests that have + // one, so that even in EXPECT_EXACT_RESULTS mode these tests should pass, + // because 0x7ff8000000000000 is still the exact expected NaN. + status |= test__extendsfdf2(0x7fc00000, 0x7ff8000000000000); + +#ifdef ARM_NAN_HANDLING + // Tests specific to the NaN handling of Arm hardware, mimicked by + // arm/extendsfdf2.S: + // + // - a quiet NaN is distinguished by the top mantissa bit being 1 + // + // - converting a quiet NaN from float to double is done by copying + // the input mantissa bits to the top of the output mantissa and + // appending 0 bits below them + // + // - if the input is a signalling NaN, its top mantissa bit is set + // to turn it quiet, and then that quiet NaN is converted to + // double as above + status |= test__extendsfdf2(0x7faf53b1, 0x7ffdea7620000000); + status |= test__extendsfdf2(0x7fe111d3, 0x7ffc223a60000000); + status |= test__extendsfdf2(0xffaf53b1, 0xfffdea7620000000); + status |= test__extendsfdf2(0xffe111d3, 0xfffc223a60000000); + +#endif // ARM_NAN_HANDLING + + return status; +} diff --git a/compiler-rt/test/builtins/Unit/truncdfsf2new_test.c b/compiler-rt/test/builtins/Unit/truncdfsf2new_test.c new file mode 100644 index 0000000000000..0542f97643618 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/truncdfsf2new_test.c @@ -0,0 +1,367 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_truncdfsf2 + +#include "int_lib.h" +#include <inttypes.h> +#include <stdio.h> + +#include "fp_test.h" + +// By default this test uses compareResultF to check the returned floats, which +// accepts any returned NaN if the expected result is the canonical NaN value +// 0x7fc00000. For the Arm optimized FP implementation, which commits to a more +// detailed handling of NaNs, we tighten up the check and include some extra +// test cases specific to that NaN policy. +#if (__arm__ && !(__thumb__ && !__thumb2__)) && COMPILER_RT_ARM_OPTIMIZED_FP +# define EXPECT_EXACT_RESULTS +# define ARM_NAN_HANDLING +#endif + +// Returns: a converted from double to float +COMPILER_RT_ABI float __truncdfsf2(double a); + +int test__truncdfsf2(int line, uint64_t a_rep, uint32_t expected_rep) { + double a = fromRep64(a_rep); + float x = __truncdfsf2(a); +#ifdef EXPECT_EXACT_RESULTS + int ret = toRep32(x) != expected_rep; +#else + int ret = compareResultF(x, expected_rep); +#endif + + if (ret) { + printf("error at line %d: __truncdfsf2(%016" PRIx64 ") = %08" PRIx32 + ", expected %08" PRIx32 "\n", + line, a_rep, toRep32(x), expected_rep); + } + return ret; +} + +#define test__truncdfsf2(a,x) test__truncdfsf2(__LINE__,a,x) + +int main(void) { + int status = 0; + + status |= test__truncdfsf2(0x0000000000000001, 0x00000000); + status |= test__truncdfsf2(0x0000000000000002, 0x00000000); + status |= test__truncdfsf2(0x0000000000000004, 0x00000000); + status |= test__truncdfsf2(0x0000000000000008, 0x00000000); + status |= test__truncdfsf2(0x000000000000001a, 0x00000000); + status |= test__truncdfsf2(0x0000000000000020, 0x00000000); + status |= test__truncdfsf2(0x0000000000000040, 0x00000000); + status |= test__truncdfsf2(0x0000000000000080, 0x00000000); + status |= test__truncdfsf2(0x000000000000019a, 0x00000000); + status |= test__truncdfsf2(0x0000000000000200, 0x00000000); + status |= test__truncdfsf2(0x0000000000000400, 0x00000000); + status |= test__truncdfsf2(0x0000000000000800, 0x00000000); + status |= test__truncdfsf2(0x000000000000189a, 0x00000000); + status |= test__truncdfsf2(0x0000000000002000, 0x00000000); + status |= test__truncdfsf2(0x0000000000004000, 0x00000000); + status |= test__truncdfsf2(0x0000000000008000, 0x00000000); + status |= test__truncdfsf2(0x000000000001789a, 0x00000000); + status |= test__truncdfsf2(0x0000000000020000, 0x00000000); + status |= test__truncdfsf2(0x0000000000040000, 0x00000000); + status |= test__truncdfsf2(0x0000000000080000, 0x00000000); + status |= test__truncdfsf2(0x000000000016789a, 0x00000000); + status |= test__truncdfsf2(0x0000000000200000, 0x00000000); + status |= test__truncdfsf2(0x0000000000400000, 0x00000000); + status |= test__truncdfsf2(0x0000000000800000, 0x00000000); + status |= test__truncdfsf2(0x000000000156789a, 0x00000000); + status |= test__truncdfsf2(0x0000000002000000, 0x00000000); + status |= test__truncdfsf2(0x0000000004000000, 0x00000000); + status |= test__truncdfsf2(0x0000000008000000, 0x00000000); + status |= test__truncdfsf2(0x000000001456789a, 0x00000000); + status |= test__truncdfsf2(0x0000000020000000, 0x00000000); + status |= test__truncdfsf2(0x0000000040000000, 0x00000000); + status |= test__truncdfsf2(0x0000000080000000, 0x00000000); + status |= test__truncdfsf2(0x000000013465789a, 0x00000000); + status |= test__truncdfsf2(0x0000000200000000, 0x00000000); + status |= test__truncdfsf2(0x0000000400000000, 0x00000000); + status |= test__truncdfsf2(0x0000000800000000, 0x00000000); + status |= test__truncdfsf2(0x000000123456789a, 0x00000000); + status |= test__truncdfsf2(0x0000002000000000, 0x00000000); + status |= test__truncdfsf2(0x0000004000000000, 0x00000000); + status |= test__truncdfsf2(0x0000008000000000, 0x00000000); + status |= test__truncdfsf2(0x000001123456789a, 0x00000000); + status |= test__truncdfsf2(0x0000020000000000, 0x00000000); + status |= test__truncdfsf2(0x0000040000000000, 0x00000000); + status |= test__truncdfsf2(0x0000080000000000, 0x00000000); + status |= test__truncdfsf2(0x000010123456789a, 0x00000000); + status |= test__truncdfsf2(0x0000200000000000, 0x00000000); + status |= test__truncdfsf2(0x0000400000000000, 0x00000000); + status |= test__truncdfsf2(0x0000800000000000, 0x00000000); + status |= test__truncdfsf2(0x000100123456789a, 0x00000000); + status |= test__truncdfsf2(0x0002000000000000, 0x00000000); + status |= test__truncdfsf2(0x0004000000000000, 0x00000000); + status |= test__truncdfsf2(0x0008000000000000, 0x00000000); + status |= test__truncdfsf2(0x0010000000000000, 0x00000000); + status |= test__truncdfsf2(0x36a0000000000000, 0x00000001); + status |= test__truncdfsf2(0x36b0000000000000, 0x00000002); + status |= test__truncdfsf2(0x36b2000000000000, 0x00000002); + status |= test__truncdfsf2(0x36b4000000000000, 0x00000002); + status |= test__truncdfsf2(0x36b6000000000000, 0x00000003); + status |= test__truncdfsf2(0x36b8000000000000, 0x00000003); + status |= test__truncdfsf2(0x36ba000000000000, 0x00000003); + status |= test__truncdfsf2(0x36bc000000000000, 0x00000004); + status |= test__truncdfsf2(0x36be000000000000, 0x00000004); + status |= test__truncdfsf2(0x36c0000000000000, 0x00000004); + status |= test__truncdfsf2(0x36c1000000000000, 0x00000004); + status |= test__truncdfsf2(0x36c2000000000000, 0x00000004); + status |= test__truncdfsf2(0x36c3000000000000, 0x00000005); + status |= test__truncdfsf2(0x36c4000000000000, 0x00000005); + status |= test__truncdfsf2(0x36c5000000000000, 0x00000005); + status |= test__truncdfsf2(0x36c6000000000000, 0x00000006); + status |= test__truncdfsf2(0x36c7000000000000, 0x00000006); + status |= test__truncdfsf2(0x36d0000000000000, 0x00000008); + status |= test__truncdfsf2(0x36d0800000000000, 0x00000008); + status |= test__truncdfsf2(0x36d1000000000000, 0x00000008); + status |= test__truncdfsf2(0x36d1800000000000, 0x00000009); + status |= test__truncdfsf2(0x36d2000000000000, 0x00000009); + status |= test__truncdfsf2(0x36d2800000000000, 0x00000009); + status |= test__truncdfsf2(0x36d3000000000000, 0x0000000a); + status |= test__truncdfsf2(0x36d3800000000000, 0x0000000a); + status |= test__truncdfsf2(0x36e0000000000000, 0x00000010); + status |= test__truncdfsf2(0x36e0400000000000, 0x00000010); + status |= test__truncdfsf2(0x36e0800000000000, 0x00000010); + status |= test__truncdfsf2(0x36e0c00000000000, 0x00000011); + status |= test__truncdfsf2(0x36e1000000000000, 0x00000011); + status |= test__truncdfsf2(0x36e1400000000000, 0x00000011); + status |= test__truncdfsf2(0x36e1800000000000, 0x00000012); + status |= test__truncdfsf2(0x36e1c00000000000, 0x00000012); + status |= test__truncdfsf2(0x36f0000000000000, 0x00000020); + status |= test__truncdfsf2(0x36f0200000000000, 0x00000020); + status |= test__truncdfsf2(0x36f0400000000000, 0x00000020); + status |= test__truncdfsf2(0x36f0600000000000, 0x00000021); + status |= test__truncdfsf2(0x36f0800000000000, 0x00000021); + status |= test__truncdfsf2(0x36f0a00000000000, 0x00000021); + status |= test__truncdfsf2(0x36f0c00000000000, 0x00000022); + status |= test__truncdfsf2(0x36f0e00000000000, 0x00000022); + status |= test__truncdfsf2(0x3700000000000000, 0x00000040); + status |= test__truncdfsf2(0x3700100000000000, 0x00000040); + status |= test__truncdfsf2(0x3700200000000000, 0x00000040); + status |= test__truncdfsf2(0x3700300000000000, 0x00000041); + status |= test__truncdfsf2(0x3700400000000000, 0x00000041); + status |= test__truncdfsf2(0x3700500000000000, 0x00000041); + status |= test__truncdfsf2(0x3700600000000000, 0x00000042); + status |= test__truncdfsf2(0x3700700000000000, 0x00000042); + status |= test__truncdfsf2(0x3710000000000000, 0x00000080); + status |= test__truncdfsf2(0x3710080000000000, 0x00000080); + status |= test__truncdfsf2(0x3710100000000000, 0x00000080); + status |= test__truncdfsf2(0x3710180000000000, 0x00000081); + status |= test__truncdfsf2(0x3710200000000000, 0x00000081); + status |= test__truncdfsf2(0x3710280000000000, 0x00000081); + status |= test__truncdfsf2(0x3710300000000000, 0x00000082); + status |= test__truncdfsf2(0x3710380000000000, 0x00000082); + status |= test__truncdfsf2(0x3720000000000000, 0x00000100); + status |= test__truncdfsf2(0x3720040000000000, 0x00000100); + status |= test__truncdfsf2(0x3720080000000000, 0x00000100); + status |= test__truncdfsf2(0x37200c0000000000, 0x00000101); + status |= test__truncdfsf2(0x3720100000000000, 0x00000101); + status |= test__truncdfsf2(0x3720140000000000, 0x00000101); + status |= test__truncdfsf2(0x3720180000000000, 0x00000102); + status |= test__truncdfsf2(0x37201c0000000000, 0x00000102); + status |= test__truncdfsf2(0x3730000000000000, 0x00000200); + status |= test__truncdfsf2(0x3730020000000000, 0x00000200); + status |= test__truncdfsf2(0x3730040000000000, 0x00000200); + status |= test__truncdfsf2(0x3730060000000000, 0x00000201); + status |= test__truncdfsf2(0x3730080000000000, 0x00000201); + status |= test__truncdfsf2(0x37300a0000000000, 0x00000201); + status |= test__truncdfsf2(0x37300c0000000000, 0x00000202); + status |= test__truncdfsf2(0x37300e0000000000, 0x00000202); + status |= test__truncdfsf2(0x3740000000000000, 0x00000400); + status |= test__truncdfsf2(0x3740010000000000, 0x00000400); + status |= test__truncdfsf2(0x3740020000000000, 0x00000400); + status |= test__truncdfsf2(0x3740030000000000, 0x00000401); + status |= test__truncdfsf2(0x3740040000000000, 0x00000401); + status |= test__truncdfsf2(0x3740050000000000, 0x00000401); + status |= test__truncdfsf2(0x3740060000000000, 0x00000402); + status |= test__truncdfsf2(0x3740070000000000, 0x00000402); + status |= test__truncdfsf2(0x3750000000000000, 0x00000800); + status |= test__truncdfsf2(0x3750008000000000, 0x00000800); + status |= test__truncdfsf2(0x3750010000000000, 0x00000800); + status |= test__truncdfsf2(0x3750018000000000, 0x00000801); + status |= test__truncdfsf2(0x3750020000000000, 0x00000801); + status |= test__truncdfsf2(0x3750028000000000, 0x00000801); + status |= test__truncdfsf2(0x3750030000000000, 0x00000802); + status |= test__truncdfsf2(0x3750038000000000, 0x00000802); + status |= test__truncdfsf2(0x3760000000000000, 0x00001000); + status |= test__truncdfsf2(0x3760004000000000, 0x00001000); + status |= test__truncdfsf2(0x3760008000000000, 0x00001000); + status |= test__truncdfsf2(0x376000c000000000, 0x00001001); + status |= test__truncdfsf2(0x3760010000000000, 0x00001001); + status |= test__truncdfsf2(0x3760014000000000, 0x00001001); + status |= test__truncdfsf2(0x3760018000000000, 0x00001002); + status |= test__truncdfsf2(0x376001c000000000, 0x00001002); + status |= test__truncdfsf2(0x3770000000000000, 0x00002000); + status |= test__truncdfsf2(0x3770002000000000, 0x00002000); + status |= test__truncdfsf2(0x3770004000000000, 0x00002000); + status |= test__truncdfsf2(0x3770006000000000, 0x00002001); + status |= test__truncdfsf2(0x3770008000000000, 0x00002001); + status |= test__truncdfsf2(0x377000a000000000, 0x00002001); + status |= test__truncdfsf2(0x377000c000000000, 0x00002002); + status |= test__truncdfsf2(0x377000e000000000, 0x00002002); + status |= test__truncdfsf2(0x3780000000000000, 0x00004000); + status |= test__truncdfsf2(0x3780001000000000, 0x00004000); + status |= test__truncdfsf2(0x3780002000000000, 0x00004000); + status |= test__truncdfsf2(0x3780003000000000, 0x00004001); + status |= test__truncdfsf2(0x3780004000000000, 0x00004001); + status |= test__truncdfsf2(0x3780005000000000, 0x00004001); + status |= test__truncdfsf2(0x3780006000000000, 0x00004002); + status |= test__truncdfsf2(0x3780007000000000, 0x00004002); + status |= test__truncdfsf2(0x3790000000000000, 0x00008000); + status |= test__truncdfsf2(0x3790000800000000, 0x00008000); + status |= test__truncdfsf2(0x3790001000000000, 0x00008000); + status |= test__truncdfsf2(0x3790001800000000, 0x00008001); + status |= test__truncdfsf2(0x3790002000000000, 0x00008001); + status |= test__truncdfsf2(0x3790002800000000, 0x00008001); + status |= test__truncdfsf2(0x3790003000000000, 0x00008002); + status |= test__truncdfsf2(0x3790003800000000, 0x00008002); + status |= test__truncdfsf2(0x37a0000000000000, 0x00010000); + status |= test__truncdfsf2(0x37a0000400000000, 0x00010000); + status |= test__truncdfsf2(0x37a0000800000000, 0x00010000); + status |= test__truncdfsf2(0x37a0000c00000000, 0x00010001); + status |= test__truncdfsf2(0x37a0001000000000, 0x00010001); + status |= test__truncdfsf2(0x37a0001400000000, 0x00010001); + status |= test__truncdfsf2(0x37a0001800000000, 0x00010002); + status |= test__truncdfsf2(0x37a0001c00000000, 0x00010002); + status |= test__truncdfsf2(0x37b0000000000000, 0x00020000); + status |= test__truncdfsf2(0x37b0000200000000, 0x00020000); + status |= test__truncdfsf2(0x37b0000400000000, 0x00020000); + status |= test__truncdfsf2(0x37b0000600000000, 0x00020001); + status |= test__truncdfsf2(0x37b0000800000000, 0x00020001); + status |= test__truncdfsf2(0x37b0000a00000000, 0x00020001); + status |= test__truncdfsf2(0x37b0000c00000000, 0x00020002); + status |= test__truncdfsf2(0x37b0000e00000000, 0x00020002); + status |= test__truncdfsf2(0x37c0000000000000, 0x00040000); + status |= test__truncdfsf2(0x37c0000100000000, 0x00040000); + status |= test__truncdfsf2(0x37c0000200000000, 0x00040000); + status |= test__truncdfsf2(0x37c0000300000000, 0x00040001); + status |= test__truncdfsf2(0x37c0000400000000, 0x00040001); + status |= test__truncdfsf2(0x37c0000500000000, 0x00040001); + status |= test__truncdfsf2(0x37c0000600000000, 0x00040002); + status |= test__truncdfsf2(0x37c0000700000000, 0x00040002); + status |= test__truncdfsf2(0x37d0000000000000, 0x00080000); + status |= test__truncdfsf2(0x37d0000080000000, 0x00080000); + status |= test__truncdfsf2(0x37d0000100000000, 0x00080000); + status |= test__truncdfsf2(0x37d0000180000000, 0x00080001); + status |= test__truncdfsf2(0x37d0000200000000, 0x00080001); + status |= test__truncdfsf2(0x37d0000280000000, 0x00080001); + status |= test__truncdfsf2(0x37d0000300000000, 0x00080002); + status |= test__truncdfsf2(0x37d0000380000000, 0x00080002); + status |= test__truncdfsf2(0x37e0000000000000, 0x00100000); + status |= test__truncdfsf2(0x37e0000040000000, 0x00100000); + status |= test__truncdfsf2(0x37e0000080000000, 0x00100000); + status |= test__truncdfsf2(0x37e00000c0000000, 0x00100001); + status |= test__truncdfsf2(0x37e0000100000000, 0x00100001); + status |= test__truncdfsf2(0x37e0000140000000, 0x00100001); + status |= test__truncdfsf2(0x37e0000180000000, 0x00100002); + status |= test__truncdfsf2(0x37e00001c0000000, 0x00100002); + status |= test__truncdfsf2(0x37f0000000000000, 0x00200000); + status |= test__truncdfsf2(0x37f0000020000000, 0x00200000); + status |= test__truncdfsf2(0x37f000003fffffff, 0x00200000); + status |= test__truncdfsf2(0x37f0000040000000, 0x00200000); + status |= test__truncdfsf2(0x37f0000040000001, 0x00200001); + status |= test__truncdfsf2(0x37f0000060000000, 0x00200001); + status |= test__truncdfsf2(0x37f0000080000000, 0x00200001); + status |= test__truncdfsf2(0x37f00000a0000000, 0x00200001); + status |= test__truncdfsf2(0x37f00000bfffffff, 0x00200001); + status |= test__truncdfsf2(0x37f00000c0000000, 0x00200002); + status |= test__truncdfsf2(0x37f00000c0000001, 0x00200002); + status |= test__truncdfsf2(0x37f00000e0000000, 0x00200002); + status |= test__truncdfsf2(0x3800000000000000, 0x00400000); + status |= test__truncdfsf2(0x3800000010000000, 0x00400000); + status |= test__truncdfsf2(0x3800000020000000, 0x00400000); + status |= test__truncdfsf2(0x3800000030000000, 0x00400001); + status |= test__truncdfsf2(0x3800000040000000, 0x00400001); + status |= test__truncdfsf2(0x3800000050000000, 0x00400001); + status |= test__truncdfsf2(0x3800000060000000, 0x00400002); + status |= test__truncdfsf2(0x3800000070000000, 0x00400002); + status |= test__truncdfsf2(0x380fffffffffffff, 0x00800000); + status |= test__truncdfsf2(0x3810000000000000, 0x00800000); + status |= test__truncdfsf2(0x3810000008000000, 0x00800000); + status |= test__truncdfsf2(0x3810000010000000, 0x00800000); + status |= test__truncdfsf2(0x3810000018000000, 0x00800001); + status |= test__truncdfsf2(0x3810000020000000, 0x00800001); + status |= test__truncdfsf2(0x3810000028000000, 0x00800001); + status |= test__truncdfsf2(0x3810000030000000, 0x00800002); + status |= test__truncdfsf2(0x3810000038000000, 0x00800002); + status |= test__truncdfsf2(0x3ff0000000000000, 0x3f800000); + status |= test__truncdfsf2(0x3ff0000008000000, 0x3f800000); + status |= test__truncdfsf2(0x3ff0000010000000, 0x3f800000); + status |= test__truncdfsf2(0x3ff0000018000000, 0x3f800001); + status |= test__truncdfsf2(0x3ff0000028000000, 0x3f800001); + status |= test__truncdfsf2(0x3ff0000030000000, 0x3f800002); + status |= test__truncdfsf2(0x3ff0000038000000, 0x3f800002); + status |= test__truncdfsf2(0x4000000000000000, 0x40000000); + status |= test__truncdfsf2(0x47efffffe8000000, 0x7f7fffff); + status |= test__truncdfsf2(0x47effffff0000000, 0x7f800000); + status |= test__truncdfsf2(0x47effffff8000000, 0x7f800000); + status |= test__truncdfsf2(0x7fc0000000000000, 0x7f800000); + status |= test__truncdfsf2(0x7ff0000000000000, 0x7f800000); + status |= test__truncdfsf2(0x8010000000000000, 0x80000000); + status |= test__truncdfsf2(0xbff0000008000000, 0xbf800000); + status |= test__truncdfsf2(0xbff0000010000000, 0xbf800000); + status |= test__truncdfsf2(0xbff0000018000000, 0xbf800001); + status |= test__truncdfsf2(0xbff0000028000000, 0xbf800001); + status |= test__truncdfsf2(0xbff0000030000000, 0xbf800002); + status |= test__truncdfsf2(0xbff0000038000000, 0xbf800002); + status |= test__truncdfsf2(0xc024000000000000, 0xc1200000); + status |= test__truncdfsf2(0xc7efffffe8000000, 0xff7fffff); + status |= test__truncdfsf2(0xc7effffff0000000, 0xff800000); + status |= test__truncdfsf2(0xc7effffff8000000, 0xff800000); + status |= test__truncdfsf2(0xffc0000000000000, 0xff800000); + status |= test__truncdfsf2(0xfff0000000000000, 0xff800000); + status |= test__truncdfsf2(0x3780000000000000, 0x00004000); + status |= test__truncdfsf2(0xb780000000000000, 0x80004000); + status |= test__truncdfsf2(0x0000000080000000, 0x00000000); + status |= test__truncdfsf2(0x8000000080000000, 0x80000000); + status |= test__truncdfsf2(0x380ffffff0000000, 0x00800000); + status |= test__truncdfsf2(0x380fffffd0000000, 0x007fffff); + status |= test__truncdfsf2(0x380fffffe8000000, 0x00800000); + status |= test__truncdfsf2(0x380fffffc8000000, 0x007fffff); + status |= test__truncdfsf2(0xb80ffffff0000000, 0x80800000); + status |= test__truncdfsf2(0xb80fffffd0000000, 0x807fffff); + status |= test__truncdfsf2(0xb80fffffe8000000, 0x80800000); + status |= test__truncdfsf2(0xb80fffffc8000000, 0x807fffff); + status |= test__truncdfsf2(0x0000000000000000, 0x00000000); + status |= test__truncdfsf2(0x8000000000000000, 0x80000000); + status |= test__truncdfsf2(0xc7e0000010000000, 0xff000000); + + // Test that the result of an operation is a NaN at all when it should be. + // + // In most configurations these tests' results are checked compared using + // compareResultF, so we set all the answers to the canonical NaN 0x7fc00000, + // which causes compareResultF to accept any NaN encoding. We also use the + // same value as the input NaN in tests that have one, so that even in + // EXPECT_EXACT_RESULTS mode these tests should pass, because 0x7fc00000 is + // still the exact expected NaN. + status |= test__truncdfsf2(0x7ff8000000000000, 0x7fc00000); + +#ifdef ARM_NAN_HANDLING + // Tests specific to the NaN handling of Arm hardware, mimicked by + // arm/truncdfsf2.S: + // + // - a quiet NaN is distinguished by the top mantissa bit being 1 + // + // - converting a quiet NaN from double to float is done by keeping + // the topmost 23 bits of the mantissa and discarding the lower + // ones + // + // - if the input is a signalling NaN, its top mantissa bit is set + // to turn it quiet, and then that quiet NaN is converted to + // float as above + status |= test__truncdfsf2(0x7ff0000000000001, 0x7fc00000); + status |= test__truncdfsf2(0x7ff753b1887bcf03, 0x7ffa9d8c); + status |= test__truncdfsf2(0x7ff911d3c0abfdda, 0x7fc88e9e); + status |= test__truncdfsf2(0xfff0000000000001, 0xffc00000); + status |= test__truncdfsf2(0xfff753b1887bcf03, 0xfffa9d8c); + status |= test__truncdfsf2(0xfff911d3c0abfdda, 0xffc88e9e); + +#endif // ARM_NAN_HANDLING + + return status; +} _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
