On Thu, Aug 28, 2025 at 09:28:57AM +0200, Tomasz Kaminski wrote:
> I have no experience with backend optimization, so I would need help to fix
> the above.
> >From my side I could only offer reverting the change, but I do not think we
> should do so,
> see below.

I'll look at it momentarily.

That said, I've used my + Jonathan's testcase
g++ -S -O2 -std=c++23
#include <compare>
bool f1 (std::partial_ordering x) { return x <= 0; }
bool f2 (std::partial_ordering x) { return x >= 0; }
bool f3 (std::partial_ordering x) { return 0 <= x; }
bool f4 (std::partial_ordering x) { return 0 >= x; }
auto f5 (std::partial_ordering x) { return 0 <=> x; }
struct S { friend std::partial_ordering operator<=>(S, int); };
bool f6 (S x) { return x <= 0; }
bool f7 (S x) { return x >= 0; }
bool f8 (S x) { return 0 <= x; }
bool f9 (S x) { return 0 >= x; }
auto f10 (S x) { return 0 <=> x; }

to look at the generated code before/after your changes (i.e. same compiler
for compilation, source preprocessed with GCC 15 and latest trunk).
Not doing any benchmarks on that, so just judging from insn count changes
on x86_64 it is in most cases a wash (but insn size-wise they are in some
cases smaller) and on the rest fewer insns, on ia32 (but who cares about
that) it is mostly a regression or wash with 2 routines with significant
improvements, on ppc64le and aarch64 it is always a win.  diff -U200
edited using script and by hand to leave out unneeded labels or stuff
outside actual functions.

insn count change
    x86_64 ia32 ppc64le aarch64
+1          4
 0    6     4
-1    2                   8
-2    1           10
-3    1
-4                        2
-5          1
-8          1

===x86_64===
 _Z2f1St16partial_ordering:
-       testb   %dil, %dil
-       setle   %al
+       leal    -1(%rdi), %eax
+       shrb    $7, %al
        ret
 _Z2f2St16partial_ordering:
-       movsbl  %dil, %edi
-       cmpl    $1, %edi
-       setbe   %al
+       movl    %edi, %eax
+       notl    %eax
+       shrb    $7, %al
        ret
 _Z2f3St16partial_ordering:
-       movsbl  %dil, %edi
-       cmpl    $1, %edi
-       setbe   %al
+       movl    %edi, %eax
+       notl    %eax
+       shrb    $7, %al
        ret
 _Z2f4St16partial_ordering:
-       testb   %dil, %dil
-       setle   %al
+       leal    -1(%rdi), %eax
+       shrb    $7, %al
        ret
 _Z2f5St16partial_ordering:
        movl    %edi, %eax
        negl    %eax
-       testb   $1, %dil
-       cmove   %edi, %eax
        ret
 _Z2f61S:
        subq    $8, %rsp
        xorl    %edi, %edi
        call    _Zss1Si
-       testb   %al, %al
-       setle   %al
        addq    $8, %rsp
+       subl    $1, %eax
+       shrb    $7, %al
        ret
 _Z2f71S:
        subq    $8, %rsp
        xorl    %edi, %edi
        call    _Zss1Si
-       movsbl  %al, %eax
-       cmpl    $1, %eax
-       setbe   %al
        addq    $8, %rsp
+       notl    %eax
+       shrb    $7, %al
        ret
 _Z2f81S:
        subq    $8, %rsp
        xorl    %edi, %edi
        call    _Zss1Si
-       movsbl  %al, %eax
-       cmpl    $1, %eax
-       setbe   %al
        addq    $8, %rsp
+       notl    %eax
+       shrb    $7, %al
        ret
 _Z2f91S:
        subq    $8, %rsp
        xorl    %edi, %edi
        call    _Zss1Si
-       testb   %al, %al
-       setle   %al
        addq    $8, %rsp
+       subl    $1, %eax
+       shrb    $7, %al
        ret
 _Z3f101S:
        subq    $8, %rsp
        xorl    %edi, %edi
        call    _Zss1Si
-       movl    %eax, %edx
-       negl    %eax
-       testb   $1, %dl
-       cmove   %edx, %eax
        addq    $8, %rsp
+       negl    %eax
        ret
===ia32===
 _Z2f1St16partial_ordering:
-       cmpb    $0, 4(%esp)
-       setle   %al
+       movzbl  4(%esp), %eax
+       subl    $1, %eax
+       shrb    $7, %al
        ret
 _Z2f2St16partial_ordering:
-       movsbl  4(%esp), %eax
-       cmpl    $1, %eax
-       setbe   %al
+       movzbl  4(%esp), %eax
+       notl    %eax
+       shrb    $7, %al
        ret
 _Z2f3St16partial_ordering:
-       movsbl  4(%esp), %eax
-       cmpl    $1, %eax
-       setbe   %al
+       movzbl  4(%esp), %eax
+       notl    %eax
+       shrb    $7, %al
        ret
 _Z2f4St16partial_ordering:
-       cmpb    $0, 4(%esp)
-       setle   %al
+       movzbl  4(%esp), %eax
+       subl    $1, %eax
+       shrb    $7, %al
        ret
 _Z2f5St16partial_ordering:
        movzbl  8(%esp), %eax
        movl    4(%esp), %edx
-       testb   $1, %al
-       je      .L7
        negl    %eax
        movb    %al, (%edx)
        movl    %edx, %eax
        ret     $4
-.L7:
-       movb    %al, (%edx)
-       movl    %edx, %eax
-       ret     $4
 _Z2f61S:
        subl    $28, %esp
        leal    15(%esp), %eax
        subl    $4, %esp
        pushl   $0
        pushl   $0
        pushl   %eax
        call    _Zss1Si
-       cmpb    $0, 27(%esp)
-       setle   %al
+       movzbl  27(%esp), %eax
        addl    $40, %esp
+       subl    $1, %eax
+       shrb    $7, %al
        ret
 _Z2f71S:
        subl    $28, %esp
        leal    15(%esp), %eax
        subl    $4, %esp
        pushl   $0
        pushl   $0
        pushl   %eax
        call    _Zss1Si
-       movsbl  27(%esp), %eax
-       cmpl    $1, %eax
-       setbe   %al
+       movzbl  27(%esp), %eax
        addl    $40, %esp
+       notl    %eax
+       shrb    $7, %al
        ret
 _Z2f81S:
        subl    $28, %esp
        leal    15(%esp), %eax
        subl    $4, %esp
        pushl   $0
        pushl   $0
        pushl   %eax
        call    _Zss1Si
-       movsbl  27(%esp), %eax
-       cmpl    $1, %eax
-       setbe   %al
+       movzbl  27(%esp), %eax
        addl    $40, %esp
+       notl    %eax
+       shrb    $7, %al
        ret
 _Z2f91S:
        subl    $28, %esp
        leal    15(%esp), %eax
        subl    $4, %esp
        pushl   $0
        pushl   $0
        pushl   %eax
        call    _Zss1Si
-       cmpb    $0, 27(%esp)
-       setle   %al
+       movzbl  27(%esp), %eax
        addl    $40, %esp
+       subl    $1, %eax
+       shrb    $7, %al
        ret
 _Z3f101S:
        subl    $28, %esp
        leal    15(%esp), %eax
        subl    $4, %esp
        pushl   $0
        pushl   $0
        pushl   %eax
        call    _Zss1Si
        movzbl  27(%esp), %eax
-       addl    $12, %esp
-       testb   $1, %al
-       je      .L18
-       movl    32(%esp), %edx
+       movl    44(%esp), %edx
        negl    %eax
        movb    %al, (%edx)
-       movl    32(%esp), %eax
-       addl    $28, %esp
-       ret     $4
-.L18:
-       movl    32(%esp), %ecx
-       movb    %al, (%ecx)
-       movl    32(%esp), %eax
-       addl    $28, %esp
+       movl    %edx, %eax
+       addl    $40, %esp
        ret     $4
===ppc64le===
 _Z2f1St16partial_ordering:
-       extsb 3,3
-       neg 3,3
-       srdi 3,3,63
-       xori 3,3,0x1
+       addi 3,3,-1
+       rldicl 3,3,57,63
        blr
 _Z2f2St16partial_ordering:
-       rlwinm 3,3,0,0xff
-       subfic 3,3,1
-       srdi 3,3,63
-       xori 3,3,0x1
+       not 3,3
+       rldicl 3,3,57,63
        blr
 _Z2f3St16partial_ordering:
-       rlwinm 3,3,0,0xff
-       subfic 3,3,1
-       srdi 3,3,63
-       xori 3,3,0x1
+       not 3,3
+       rldicl 3,3,57,63
        blr
 _Z2f4St16partial_ordering:
-       extsb 3,3
-       neg 3,3
-       srdi 3,3,63
-       xori 3,3,0x1
+       addi 3,3,-1
+       rldicl 3,3,57,63
        blr
 _Z2f5St16partial_ordering:
-       andi. 9,3,0x1
-       beqlr 0
        neg 3,3
        blr
 _Z2f61S:
 0:     addis 2,12,.TOC.-.LCF5@ha
        addi 2,2,.TOC.-.LCF5@l
        .localentry     _Z2f61S,.-_Z2f61S
        mflr 0
        li 4,0
        li 3,0
        std 0,16(1)
        stdu 1,-32(1)
        bl _Zss1Si
        nop
        addi 1,1,32
-       extsb 3,3
        ld 0,16(1)
-       neg 3,3
-       srdi 3,3,63
+       addi 3,3,-1
+       rldicl 3,3,57,63
        mtlr 0
-       xori 3,3,0x1
        blr
 _Z2f71S:
 0:     addis 2,12,.TOC.-.LCF6@ha
        addi 2,2,.TOC.-.LCF6@l
        .localentry     _Z2f71S,.-_Z2f71S
        mflr 0
        li 4,0
        li 3,0
        std 0,16(1)
        stdu 1,-32(1)
        bl _Zss1Si
        nop
        addi 1,1,32
-       rlwinm 3,3,0,0xff
        ld 0,16(1)
-       subfic 3,3,1
-       srdi 3,3,63
+       not 3,3
+       rldicl 3,3,57,63
        mtlr 0
-       xori 3,3,0x1
        blr
 _Z2f81S:
 0:     addis 2,12,.TOC.-.LCF7@ha
        addi 2,2,.TOC.-.LCF7@l
        .localentry     _Z2f81S,.-_Z2f81S
        mflr 0
        li 4,0
        li 3,0
        std 0,16(1)
        stdu 1,-32(1)
        bl _Zss1Si
        nop
        addi 1,1,32
-       rlwinm 3,3,0,0xff
        ld 0,16(1)
-       subfic 3,3,1
-       srdi 3,3,63
+       not 3,3
+       rldicl 3,3,57,63
        mtlr 0
-       xori 3,3,0x1
        blr
 _Z2f91S:
 0:     addis 2,12,.TOC.-.LCF8@ha
        addi 2,2,.TOC.-.LCF8@l
        .localentry     _Z2f91S,.-_Z2f91S
        mflr 0
        li 4,0
        li 3,0
        std 0,16(1)
        stdu 1,-32(1)
        bl _Zss1Si
        nop
        addi 1,1,32
-       extsb 3,3
        ld 0,16(1)
-       neg 3,3
-       srdi 3,3,63
+       addi 3,3,-1
+       rldicl 3,3,57,63
        mtlr 0
-       xori 3,3,0x1
        blr
 _Z3f101S:
 0:     addis 2,12,.TOC.-.LCF9@ha
        addi 2,2,.TOC.-.LCF9@l
        .localentry     _Z3f101S,.-_Z3f101S
        mflr 0
        li 4,0
        li 3,0
        std 0,16(1)
        stdu 1,-32(1)
        bl _Zss1Si
        nop
-       andi. 9,3,0x1
-       beq 0,.L20
-       neg 3,3
-.L20:
        addi 1,1,32
        ld 0,16(1)
+       neg 3,3
        mtlr 0
        blr
===aarch64===
 _Z2f1St16partial_ordering:
-       sxtb    w0, w0
-       cmp     w0, 0
-       cset    w0, le
+       sub     w0, w0, #1
+       ubfx    w0, w0, 7, 1
        ret
 _Z2f2St16partial_ordering:
-       and     w0, w0, 255
-       cmp     w0, 1
-       cset    w0, ls
+       mvn     w0, w0
+       ubfx    w0, w0, 7, 1
        ret
 _Z2f3St16partial_ordering:
-       and     w0, w0, 255
-       cmp     w0, 1
-       cset    w0, ls
+       mvn     w0, w0
+       ubfx    w0, w0, 7, 1
        ret
 _Z2f4St16partial_ordering:
-       sxtb    w0, w0
-       cmp     w0, 0
-       cset    w0, le
+       sub     w0, w0, #1
+       ubfx    w0, w0, 7, 1
        ret
 _Z2f5St16partial_ordering:
-       sxtb    w1, w0
-       tst     x0, 1
-       neg     w0, w1
-       sxtb    w0, w0
-       csel    w0, w1, w0, eq
+       neg     w0, w0
        ret
 _Z2f61S:
        stp     x29, x30, [sp, -16]!
        mov     w1, 0
        mov     w0, 0
        mov     x29, sp
        bl      _Zss1Si
-       sxtb    w0, w0
-       cmp     w0, 0
-       cset    w0, le
+       sub     w0, w0, #1
        ldp     x29, x30, [sp], 16
+       ubfx    w0, w0, 7, 1
        ret
 _Z2f71S:
        stp     x29, x30, [sp, -16]!
        mov     w1, 0
        mov     w0, 0
        mov     x29, sp
        bl      _Zss1Si
-       and     w0, w0, 255
-       cmp     w0, 1
-       cset    w0, ls
+       mvn     w0, w0
        ldp     x29, x30, [sp], 16
+       ubfx    w0, w0, 7, 1
        ret
 _Z2f81S:
        stp     x29, x30, [sp, -16]!
        mov     w1, 0
        mov     w0, 0
        mov     x29, sp
        bl      _Zss1Si
-       and     w0, w0, 255
-       cmp     w0, 1
-       cset    w0, ls
+       mvn     w0, w0
        ldp     x29, x30, [sp], 16
+       ubfx    w0, w0, 7, 1
        ret
 _Z2f91S:
        stp     x29, x30, [sp, -16]!
        mov     w1, 0
        mov     w0, 0
        mov     x29, sp
        bl      _Zss1Si
-       sxtb    w0, w0
-       cmp     w0, 0
-       cset    w0, le
+       sub     w0, w0, #1
        ldp     x29, x30, [sp], 16
+       ubfx    w0, w0, 7, 1
        ret
 _Z3f101S:
        stp     x29, x30, [sp, -16]!
        mov     w1, 0
        mov     w0, 0
        mov     x29, sp
        bl      _Zss1Si
-       sxtb    w1, w0
-       tst     x0, 1
-       neg     w0, w1
        ldp     x29, x30, [sp], 16
-       sxtb    w0, w0
-       csel    w0, w1, w0, eq
+       neg     w0, w0
        ret

        Jakub

Reply via email to