On OpenBSD 7.5/i386, the two tests still fail, except when compiling
without optimization (e.g. with CFLAGS="-ggdb").

So, it's a compiler optimization bug, as can be seen by the code
generated by the OpenBSD compiler (a clang 16.0.6 derivative).
Cf. the attached totalorderf.O0.s, totalorderf.O2.s.

The patch below avoids the compiler bug, cf. the attached
totalorderf.O2-fixed.s.


2024-04-09  Bruno Haible  <br...@clisp.org>

        totalorder, totalorderf: Avoid miscompilation by clang on OpenBSD/i386.
        * lib/totalorder.c (totalorder): Insert a compiler optimization barrier.
        * lib/totalorderf.c (totalorderf): Likewise.

diff --git a/lib/totalorder.c b/lib/totalorder.c
index 635e3cb276..eaef9a582d 100644
--- a/lib/totalorder.c
+++ b/lib/totalorder.c
@@ -57,6 +57,11 @@ totalorder (double const *x, double const *y)
   xu.f = *x;
   yu.f = *y;
 #else
+# ifdef __clang__
+  /* Prevent clang 16.0.6 on OpenBSD 7.5 from reusing the values of *x and *y
+     (fetched above) in optimized inlined memcpy expansions.  */
+  __asm__ __volatile__ ("" : : : "memory");
+# endif
   /* On 32-bit x86 processors, as well as on x86_64 processors with
      CC="gcc -mfpmath=387", the evaluation of *x and *y above is done through
      an 'fldl' instruction, which converts a signalling NaN to a quiet NaN. See
diff --git a/lib/totalorderf.c b/lib/totalorderf.c
index 75024b6839..f5f1b40112 100644
--- a/lib/totalorderf.c
+++ b/lib/totalorderf.c
@@ -57,6 +57,11 @@ totalorderf (float const *x, float const *y)
   xu.f = *x;
   yu.f = *y;
 #else
+# ifdef __clang__
+  /* Prevent clang 16.0.6 on OpenBSD 7.5 from reusing the values of *x and *y
+     (fetched above) in optimized inlined memcpy expansions.  */
+  __asm__ __volatile__ ("" : : : "memory");
+# endif
   /* On 32-bit x86 processors, as well as on x86_64 processors with
      CC="gcc -mfpmath=387", the evaluation of *x and *y above is done through
      an 'flds' instruction, which converts a signalling NaN to a quiet NaN. See
        .text
        .file   "totalorderf.c"
        .globl  totalorderf                     # -- Begin function totalorderf
        .p2align        4, 0xcc
        .type   totalorderf,@function
totalorderf:                            # @totalorderf
        .cfi_startproc
# %bb.0:
        pushl   %ebp
        .cfi_def_cfa_offset 8
        .cfi_offset %ebp, -8
        movl    %esp, %ebp
        .cfi_def_cfa_register %ebp
        pushl   %ebx
        subl    $60, %esp
        .cfi_offset %ebx, -12
        calll   .L0$pb
.L0$pb:
        popl    %eax
.Ltmp0:
        addl    $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %eax
        movl    %eax, -48(%ebp)                 # 4-byte Spill
        movl    12(%ebp), %ecx
        movl    8(%ebp), %ecx
        movl    __guard_local@GOTOFF(%eax), %eax
        movl    %eax, -8(%ebp)
        movl    8(%ebp), %eax
        movl    (%eax), %eax
        shrl    $31, %eax
        movl    %eax, -24(%ebp)
        movl    12(%ebp), %eax
        flds    (%eax)
        fstps   -44(%ebp)
        movl    -44(%ebp), %eax
        cmpl    $0, %eax
        setl    %al
        andb    $1, %al
        movzbl  %al, %eax
        movl    %eax, -28(%ebp)
        cmpl    $0, -24(%ebp)
        setne   %al
        xorb    $-1, %al
        andb    $1, %al
        movzbl  %al, %eax
        cmpl    $0, -28(%ebp)
        setne   %cl
        xorb    $-1, %cl
        andb    $1, %cl
        movzbl  %cl, %ecx
        cmpl    %ecx, %eax
        je      .LBB0_2
# %bb.1:
        movl    -24(%ebp), %eax
        movl    %eax, -20(%ebp)
        jmp     .LBB0_7
.LBB0_2:
        movl    8(%ebp), %eax
        flds    (%eax)
        fucomp  %st(0)
        fnstsw  %ax
                                        # kill: def $ah killed $ah killed $ax
        sahf
        setp    %al
        movzbl  %al, %eax
        movl    %eax, -32(%ebp)
        movl    12(%ebp), %eax
        flds    (%eax)
        fucomp  %st(0)
        fnstsw  %ax
                                        # kill: def $ah killed $ah killed $ax
        sahf
        setp    %al
        andb    $1, %al
        movzbl  %al, %eax
        movl    %eax, -36(%ebp)
        cmpl    $0, -32(%ebp)
        setne   %al
        xorb    $-1, %al
        andb    $1, %al
        movzbl  %al, %eax
        cmpl    $0, -36(%ebp)
        setne   %cl
        xorb    $-1, %cl
        andb    $1, %cl
        movzbl  %cl, %ecx
        cmpl    %ecx, %eax
        je      .LBB0_4
# %bb.3:
        cmpl    $0, -32(%ebp)
        setne   %al
        xorb    $-1, %al
        andb    $1, %al
        movzbl  %al, %eax
        cmpl    $0, -24(%ebp)
        setne   %cl
        xorb    $-1, %cl
        andb    $1, %cl
        movzbl  %cl, %ecx
        cmpl    %ecx, %eax
        sete    %al
        andb    $1, %al
        movzbl  %al, %eax
        movl    %eax, -20(%ebp)
        jmp     .LBB0_7
.LBB0_4:
        cmpl    $0, -32(%ebp)
        jne     .LBB0_6
# %bb.5:
        movl    8(%ebp), %eax
        flds    (%eax)
        movl    12(%ebp), %eax
        flds    (%eax)
        fucompp
        fnstsw  %ax
                                        # kill: def $ah killed $ah killed $ax
        sahf
        setae   %al
        andb    $1, %al
        movzbl  %al, %eax
        movl    %eax, -20(%ebp)
        jmp     .LBB0_7
.LBB0_6:
        movl    -48(%ebp), %ebx                 # 4-byte Reload
        cmpl    $0, -24(%ebp)
        setne   %al
        xorb    $-1, %al
        xorb    $-1, %al
        andb    $1, %al
        movzbl  %al, %ecx
        xorl    %eax, %eax
        subl    %ecx, %eax
        movl    %eax, -40(%ebp)
        leal    -12(%ebp), %eax
        xorl    %ecx, %ecx
        movl    %eax, (%esp)
        movl    $0, 4(%esp)
        movl    $4, 8(%esp)
        calll   memset@PLT
        movl    -48(%ebp), %ebx                 # 4-byte Reload
        leal    -16(%ebp), %eax
        xorl    %ecx, %ecx
        movl    %eax, (%esp)
        movl    $0, 4(%esp)
        movl    $4, 8(%esp)
        calll   memset@PLT
        movl    8(%ebp), %eax
        movl    (%eax), %eax
        movl    %eax, -12(%ebp)
        movl    12(%ebp), %eax
        movl    (%eax), %eax
        movl    %eax, -16(%ebp)
        movl    -12(%ebp), %eax
        xorl    -40(%ebp), %eax
        movl    -16(%ebp), %ecx
        xorl    -40(%ebp), %ecx
        cmpl    %ecx, %eax
        setbe   %al
        andb    $1, %al
        movzbl  %al, %eax
        movl    %eax, -20(%ebp)
.LBB0_7:
        movl    -48(%ebp), %eax                 # 4-byte Reload
        movl    -20(%ebp), %ecx
        movl    %ecx, -52(%ebp)                 # 4-byte Spill
        movl    __guard_local@GOTOFF(%eax), %eax
        movl    -8(%ebp), %ecx
        cmpl    %ecx, %eax
        jne     .LBB0_9
# %bb.8:
        movl    -52(%ebp), %eax                 # 4-byte Reload
        addl    $60, %esp
        popl    %ebx
        popl    %ebp
        .cfi_def_cfa %esp, 4
        retl
.LBB0_9:
        .cfi_def_cfa %ebp, 8
        movl    -48(%ebp), %ebx                 # 4-byte Reload
        leal    .LSSH@GOTOFF(%ebx), %eax
        movl    %eax, (%esp)
        calll   __stack_smash_handler@PLT
.Lfunc_end0:
        .size   totalorderf, .Lfunc_end0-totalorderf
        .cfi_endproc
                                        # -- End function
        .hidden __guard_local
        .type   .LSSH,@object                   # @SSH
        .section        .rodata.str1.1,"aMS",@progbits,1
.LSSH:
        .asciz  "totalorderf"
        .size   .LSSH, 12

        .section        ".note.GNU-stack","",@progbits
        .addrsig
        .addrsig_sym __stack_smash_handler
        .addrsig_sym __guard_local
        .text
        .file   "totalorderf.c"
        .globl  totalorderf                     # -- Begin function totalorderf
        .p2align        4, 0xcc
        .type   totalorderf,@function
totalorderf:                            # @totalorderf
        .cfi_startproc
# %bb.0:
        pushl   %ebp
        .cfi_def_cfa_offset 8
        .cfi_offset %ebp, -8
        movl    %esp, %ebp
        .cfi_def_cfa_register %ebp
        pushl   %ebx
        subl    $8, %esp
        .cfi_offset %ebx, -12
        movl    12(%ebp), %eax
        movl    8(%ebp), %ecx
        flds    (%ecx)
        fsts    -8(%ebp)
        movl    -8(%ebp), %ecx
        flds    (%eax)
        fsts    -12(%ebp)
        movl    -12(%ebp), %edx
        movl    %edx, %eax
        xorl    %ecx, %eax
        js      .LBB0_1
# %bb.2:
        fxch    %st(1)
        fucom   %st(0)
        fnstsw  %ax
                                        # kill: def $ah killed $ah killed $ax
        sahf
        setnp   %bl
        fxch    %st(1)
        fucom   %st(0)
        fnstsw  %ax
                                        # kill: def $ah killed $ah killed $ax
        sahf
        setp    %al
        xorb    %bl, %al
        je      .LBB0_3
# %bb.4:
        fxch    %st(1)
        fucom   %st(0)
        fnstsw  %ax
                                        # kill: def $ah killed $ah killed $ax
        sahf
        jp      .LBB0_6
# %bb.5:
        fxch    %st(1)
        fucompp
        fnstsw  %ax
                                        # kill: def $ah killed $ah killed $ax
        sahf
        setae   %al
        jmp     .LBB0_7
.LBB0_1:
        fstp    %st(1)
        fstp    %st(0)
        shrl    $31, %ecx
        jmp     .LBB0_8
.LBB0_3:
        fstp    %st(0)
        fucomp  %st(0)
        fnstsw  %ax
                                        # kill: def $ah killed $ah killed $ax
        sahf
        setnp   %dl
        testl   %ecx, %ecx
        sets    %al
        xorb    %dl, %al
.LBB0_7:
        movzbl  %al, %ecx
.LBB0_8:
        movl    %ecx, %eax
        addl    $8, %esp
        popl    %ebx
        popl    %ebp
        .cfi_def_cfa %esp, 4
        retl
.LBB0_6:
        .cfi_def_cfa %ebp, 8
        fstp    %st(0)
        fstp    %st(0)
        movl    %ecx, %eax
        sarl    $31, %eax
        xorl    %eax, %ecx
        xorl    %edx, %eax
        cmpl    %eax, %ecx
        setbe   %al
        jmp     .LBB0_7
.Lfunc_end0:
        .size   totalorderf, .Lfunc_end0-totalorderf
        .cfi_endproc
                                        # -- End function
        .section        ".note.GNU-stack","",@progbits
        .addrsig
        .text
        .file   "totalorderf.c"
        .globl  totalorderf                     # -- Begin function totalorderf
        .p2align        4, 0xcc
        .type   totalorderf,@function
totalorderf:                            # @totalorderf
        .cfi_startproc
# %bb.0:
        pushl   %ebp
        .cfi_def_cfa_offset 8
        .cfi_offset %ebp, -8
        movl    %esp, %ebp
        .cfi_def_cfa_register %ebp
        pushl   %ebx
        pushl   %esi
        subl    $8, %esp
        .cfi_offset %esi, -16
        .cfi_offset %ebx, -12
        movl    12(%ebp), %edx
        movl    8(%ebp), %esi
        flds    (%esi)
        fsts    -12(%ebp)
        movl    -12(%ebp), %ecx
        flds    (%edx)
        fsts    -16(%ebp)
        movl    -16(%ebp), %eax
        xorl    %ecx, %eax
        js      .LBB0_1
# %bb.2:
        fxch    %st(1)
        fucom   %st(0)
        fnstsw  %ax
                                        # kill: def $ah killed $ah killed $ax
        sahf
        setnp   %bl
        fxch    %st(1)
        fucom   %st(0)
        fnstsw  %ax
                                        # kill: def $ah killed $ah killed $ax
        sahf
        setp    %al
        xorb    %bl, %al
        je      .LBB0_3
# %bb.4:
        fxch    %st(1)
        fucom   %st(0)
        fnstsw  %ax
                                        # kill: def $ah killed $ah killed $ax
        sahf
        jp      .LBB0_6
# %bb.5:
        fxch    %st(1)
        fucompp
        fnstsw  %ax
                                        # kill: def $ah killed $ah killed $ax
        sahf
        setae   %al
        jmp     .LBB0_7
.LBB0_1:
        fstp    %st(1)
        fstp    %st(0)
        shrl    $31, %ecx
        jmp     .LBB0_8
.LBB0_3:
        fstp    %st(0)
        fucomp  %st(0)
        fnstsw  %ax
                                        # kill: def $ah killed $ah killed $ax
        sahf
        setnp   %dl
        testl   %ecx, %ecx
        sets    %al
        xorb    %dl, %al
.LBB0_7:
        movzbl  %al, %ecx
.LBB0_8:
        movl    %ecx, %eax
        addl    $8, %esp
        popl    %esi
        popl    %ebx
        popl    %ebp
        .cfi_def_cfa %esp, 4
        retl
.LBB0_6:
        .cfi_def_cfa %ebp, 8
        fstp    %st(0)
        fstp    %st(0)
        sarl    $31, %ecx
        #APP
        #NO_APP
        movl    (%esi), %eax
        xorl    %ecx, %eax
        xorl    (%edx), %ecx
        cmpl    %ecx, %eax
        setbe   %al
        jmp     .LBB0_7
.Lfunc_end0:
        .size   totalorderf, .Lfunc_end0-totalorderf
        .cfi_endproc
                                        # -- End function
        .section        ".note.GNU-stack","",@progbits
        .addrsig

Reply via email to