https://gcc.gnu.org/bugzilla/show_bug.cgi?id=125525

            Bug ID: 125525
           Summary: equivalent signed zero detection produces different
                    assembler
           Product: gcc
           Version: 16.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: manu at gcc dot gnu.org
  Target Milestone: ---

When implementing radix sort for floating-point values, it is common practice
to canonicalize -0.0 and +0.0 to 0.0. There are at least two ways to do this:

#include <stddef.h>
#include <stdint.h>
#include <string.h>

static inline uint64_t
load_u64_from_double(double x)
{
    uint64_t u;
    memcpy(&u, &x, sizeof(u));
    return u;
}

uint64_t
double_to_sort_key_asc(double x)
{
    uint64_t u = load_u64_from_double(x);
    if ((u << 1) == 0)
        u = 0;
    uint64_t mask = (uint64_t)-(int64_t)(u >> 63) |
UINT64_C(0x8000000000000000);
    return u ^ mask;
}

uint64_t
double_to_sort_key_asc2(double x)
{
    uint64_t u = load_u64_from_double(x);
    uint64_t is_zero = ((u << 1) == 0);   
    u &= ~(is_zero << 63);      // clear sign bit only for zero
    uint64_t mask = (uint64_t)-(int64_t)(u >> 63) |
UINT64_C(0x8000000000000000);
    return u ^ mask;
}


I expected that both functions would produce the same assembler, however, -O2
-march=x86-64-v2 produces:

"double_to_sort_key_asc":
        movabs  rcx, -9223372036854775808
        movq    rax, xmm0
        cqo
        or      rdx, rcx
        xor     rdx, rax
        add     rax, rax
        cmovne  rcx, rdx
        mov     rax, rcx
        ret
"double_to_sort_key_asc2":
        movq    rax, xmm0
        xor     edx, edx
        mov     rcx, rax
        add     rcx, rcx
        sete    dl
        sal     rdx, 63
        not     rdx
        and     rdx, rax
        mov     rax, rdx
        sar     rax, 63
        bts     rax, 63
        xor     rax, rdx
        ret

Note that GCC < 16.1 produced a conditional jump for the first variant but it
now produces cmovne. Using __builtin_expect to tell GCC 16.1 that the condition
is likely false produces again a conditional jump. Clang also produces a
conditional jump.

Reply via email to