On Thu, Aug 28, 2025 at 09:28:57AM +0200, Tomasz Kaminski wrote: > I have no experience with backend optimization, so I would need help to fix > the above. > >From my side I could only offer reverting the change, but I do not think we > should do so, > see below.
I'll look at it momentarily. That said, I've used my + Jonathan's testcase g++ -S -O2 -std=c++23 #include <compare> bool f1 (std::partial_ordering x) { return x <= 0; } bool f2 (std::partial_ordering x) { return x >= 0; } bool f3 (std::partial_ordering x) { return 0 <= x; } bool f4 (std::partial_ordering x) { return 0 >= x; } auto f5 (std::partial_ordering x) { return 0 <=> x; } struct S { friend std::partial_ordering operator<=>(S, int); }; bool f6 (S x) { return x <= 0; } bool f7 (S x) { return x >= 0; } bool f8 (S x) { return 0 <= x; } bool f9 (S x) { return 0 >= x; } auto f10 (S x) { return 0 <=> x; } to look at the generated code before/after your changes (i.e. same compiler for compilation, source preprocessed with GCC 15 and latest trunk). Not doing any benchmarks on that, so just judging from insn count changes on x86_64 it is in most cases a wash (but insn size-wise they are in some cases smaller) and on the rest fewer insns, on ia32 (but who cares about that) it is mostly a regression or wash with 2 routines with significant improvements, on ppc64le and aarch64 it is always a win. diff -U200 edited using script and by hand to leave out unneeded labels or stuff outside actual functions. insn count change x86_64 ia32 ppc64le aarch64 +1 4 0 6 4 -1 2 8 -2 1 10 -3 1 -4 2 -5 1 -8 1 ===x86_64=== _Z2f1St16partial_ordering: - testb %dil, %dil - setle %al + leal -1(%rdi), %eax + shrb $7, %al ret _Z2f2St16partial_ordering: - movsbl %dil, %edi - cmpl $1, %edi - setbe %al + movl %edi, %eax + notl %eax + shrb $7, %al ret _Z2f3St16partial_ordering: - movsbl %dil, %edi - cmpl $1, %edi - setbe %al + movl %edi, %eax + notl %eax + shrb $7, %al ret _Z2f4St16partial_ordering: - testb %dil, %dil - setle %al + leal -1(%rdi), %eax + shrb $7, %al ret _Z2f5St16partial_ordering: movl %edi, %eax negl %eax - testb $1, %dil - cmove %edi, %eax ret _Z2f61S: subq $8, %rsp xorl %edi, %edi call _Zss1Si - testb %al, %al - setle %al addq $8, %rsp + subl $1, %eax + shrb $7, %al ret _Z2f71S: subq $8, %rsp xorl %edi, %edi call _Zss1Si - movsbl %al, %eax - cmpl $1, %eax - setbe %al addq $8, %rsp + notl %eax + shrb $7, %al ret _Z2f81S: subq $8, %rsp xorl %edi, %edi call _Zss1Si - movsbl %al, %eax - cmpl $1, %eax - setbe %al addq $8, %rsp + notl %eax + shrb $7, %al ret _Z2f91S: subq $8, %rsp xorl %edi, %edi call _Zss1Si - testb %al, %al - setle %al addq $8, %rsp + subl $1, %eax + shrb $7, %al ret _Z3f101S: subq $8, %rsp xorl %edi, %edi call _Zss1Si - movl %eax, %edx - negl %eax - testb $1, %dl - cmove %edx, %eax addq $8, %rsp + negl %eax ret ===ia32=== _Z2f1St16partial_ordering: - cmpb $0, 4(%esp) - setle %al + movzbl 4(%esp), %eax + subl $1, %eax + shrb $7, %al ret _Z2f2St16partial_ordering: - movsbl 4(%esp), %eax - cmpl $1, %eax - setbe %al + movzbl 4(%esp), %eax + notl %eax + shrb $7, %al ret _Z2f3St16partial_ordering: - movsbl 4(%esp), %eax - cmpl $1, %eax - setbe %al + movzbl 4(%esp), %eax + notl %eax + shrb $7, %al ret _Z2f4St16partial_ordering: - cmpb $0, 4(%esp) - setle %al + movzbl 4(%esp), %eax + subl $1, %eax + shrb $7, %al ret _Z2f5St16partial_ordering: movzbl 8(%esp), %eax movl 4(%esp), %edx - testb $1, %al - je .L7 negl %eax movb %al, (%edx) movl %edx, %eax ret $4 -.L7: - movb %al, (%edx) - movl %edx, %eax - ret $4 _Z2f61S: subl $28, %esp leal 15(%esp), %eax subl $4, %esp pushl $0 pushl $0 pushl %eax call _Zss1Si - cmpb $0, 27(%esp) - setle %al + movzbl 27(%esp), %eax addl $40, %esp + subl $1, %eax + shrb $7, %al ret _Z2f71S: subl $28, %esp leal 15(%esp), %eax subl $4, %esp pushl $0 pushl $0 pushl %eax call _Zss1Si - movsbl 27(%esp), %eax - cmpl $1, %eax - setbe %al + movzbl 27(%esp), %eax addl $40, %esp + notl %eax + shrb $7, %al ret _Z2f81S: subl $28, %esp leal 15(%esp), %eax subl $4, %esp pushl $0 pushl $0 pushl %eax call _Zss1Si - movsbl 27(%esp), %eax - cmpl $1, %eax - setbe %al + movzbl 27(%esp), %eax addl $40, %esp + notl %eax + shrb $7, %al ret _Z2f91S: subl $28, %esp leal 15(%esp), %eax subl $4, %esp pushl $0 pushl $0 pushl %eax call _Zss1Si - cmpb $0, 27(%esp) - setle %al + movzbl 27(%esp), %eax addl $40, %esp + subl $1, %eax + shrb $7, %al ret _Z3f101S: subl $28, %esp leal 15(%esp), %eax subl $4, %esp pushl $0 pushl $0 pushl %eax call _Zss1Si movzbl 27(%esp), %eax - addl $12, %esp - testb $1, %al - je .L18 - movl 32(%esp), %edx + movl 44(%esp), %edx negl %eax movb %al, (%edx) - movl 32(%esp), %eax - addl $28, %esp - ret $4 -.L18: - movl 32(%esp), %ecx - movb %al, (%ecx) - movl 32(%esp), %eax - addl $28, %esp + movl %edx, %eax + addl $40, %esp ret $4 ===ppc64le=== _Z2f1St16partial_ordering: - extsb 3,3 - neg 3,3 - srdi 3,3,63 - xori 3,3,0x1 + addi 3,3,-1 + rldicl 3,3,57,63 blr _Z2f2St16partial_ordering: - rlwinm 3,3,0,0xff - subfic 3,3,1 - srdi 3,3,63 - xori 3,3,0x1 + not 3,3 + rldicl 3,3,57,63 blr _Z2f3St16partial_ordering: - rlwinm 3,3,0,0xff - subfic 3,3,1 - srdi 3,3,63 - xori 3,3,0x1 + not 3,3 + rldicl 3,3,57,63 blr _Z2f4St16partial_ordering: - extsb 3,3 - neg 3,3 - srdi 3,3,63 - xori 3,3,0x1 + addi 3,3,-1 + rldicl 3,3,57,63 blr _Z2f5St16partial_ordering: - andi. 9,3,0x1 - beqlr 0 neg 3,3 blr _Z2f61S: 0: addis 2,12,.TOC.-.LCF5@ha addi 2,2,.TOC.-.LCF5@l .localentry _Z2f61S,.-_Z2f61S mflr 0 li 4,0 li 3,0 std 0,16(1) stdu 1,-32(1) bl _Zss1Si nop addi 1,1,32 - extsb 3,3 ld 0,16(1) - neg 3,3 - srdi 3,3,63 + addi 3,3,-1 + rldicl 3,3,57,63 mtlr 0 - xori 3,3,0x1 blr _Z2f71S: 0: addis 2,12,.TOC.-.LCF6@ha addi 2,2,.TOC.-.LCF6@l .localentry _Z2f71S,.-_Z2f71S mflr 0 li 4,0 li 3,0 std 0,16(1) stdu 1,-32(1) bl _Zss1Si nop addi 1,1,32 - rlwinm 3,3,0,0xff ld 0,16(1) - subfic 3,3,1 - srdi 3,3,63 + not 3,3 + rldicl 3,3,57,63 mtlr 0 - xori 3,3,0x1 blr _Z2f81S: 0: addis 2,12,.TOC.-.LCF7@ha addi 2,2,.TOC.-.LCF7@l .localentry _Z2f81S,.-_Z2f81S mflr 0 li 4,0 li 3,0 std 0,16(1) stdu 1,-32(1) bl _Zss1Si nop addi 1,1,32 - rlwinm 3,3,0,0xff ld 0,16(1) - subfic 3,3,1 - srdi 3,3,63 + not 3,3 + rldicl 3,3,57,63 mtlr 0 - xori 3,3,0x1 blr _Z2f91S: 0: addis 2,12,.TOC.-.LCF8@ha addi 2,2,.TOC.-.LCF8@l .localentry _Z2f91S,.-_Z2f91S mflr 0 li 4,0 li 3,0 std 0,16(1) stdu 1,-32(1) bl _Zss1Si nop addi 1,1,32 - extsb 3,3 ld 0,16(1) - neg 3,3 - srdi 3,3,63 + addi 3,3,-1 + rldicl 3,3,57,63 mtlr 0 - xori 3,3,0x1 blr _Z3f101S: 0: addis 2,12,.TOC.-.LCF9@ha addi 2,2,.TOC.-.LCF9@l .localentry _Z3f101S,.-_Z3f101S mflr 0 li 4,0 li 3,0 std 0,16(1) stdu 1,-32(1) bl _Zss1Si nop - andi. 9,3,0x1 - beq 0,.L20 - neg 3,3 -.L20: addi 1,1,32 ld 0,16(1) + neg 3,3 mtlr 0 blr ===aarch64=== _Z2f1St16partial_ordering: - sxtb w0, w0 - cmp w0, 0 - cset w0, le + sub w0, w0, #1 + ubfx w0, w0, 7, 1 ret _Z2f2St16partial_ordering: - and w0, w0, 255 - cmp w0, 1 - cset w0, ls + mvn w0, w0 + ubfx w0, w0, 7, 1 ret _Z2f3St16partial_ordering: - and w0, w0, 255 - cmp w0, 1 - cset w0, ls + mvn w0, w0 + ubfx w0, w0, 7, 1 ret _Z2f4St16partial_ordering: - sxtb w0, w0 - cmp w0, 0 - cset w0, le + sub w0, w0, #1 + ubfx w0, w0, 7, 1 ret _Z2f5St16partial_ordering: - sxtb w1, w0 - tst x0, 1 - neg w0, w1 - sxtb w0, w0 - csel w0, w1, w0, eq + neg w0, w0 ret _Z2f61S: stp x29, x30, [sp, -16]! mov w1, 0 mov w0, 0 mov x29, sp bl _Zss1Si - sxtb w0, w0 - cmp w0, 0 - cset w0, le + sub w0, w0, #1 ldp x29, x30, [sp], 16 + ubfx w0, w0, 7, 1 ret _Z2f71S: stp x29, x30, [sp, -16]! mov w1, 0 mov w0, 0 mov x29, sp bl _Zss1Si - and w0, w0, 255 - cmp w0, 1 - cset w0, ls + mvn w0, w0 ldp x29, x30, [sp], 16 + ubfx w0, w0, 7, 1 ret _Z2f81S: stp x29, x30, [sp, -16]! mov w1, 0 mov w0, 0 mov x29, sp bl _Zss1Si - and w0, w0, 255 - cmp w0, 1 - cset w0, ls + mvn w0, w0 ldp x29, x30, [sp], 16 + ubfx w0, w0, 7, 1 ret _Z2f91S: stp x29, x30, [sp, -16]! mov w1, 0 mov w0, 0 mov x29, sp bl _Zss1Si - sxtb w0, w0 - cmp w0, 0 - cset w0, le + sub w0, w0, #1 ldp x29, x30, [sp], 16 + ubfx w0, w0, 7, 1 ret _Z3f101S: stp x29, x30, [sp, -16]! mov w1, 0 mov w0, 0 mov x29, sp bl _Zss1Si - sxtb w1, w0 - tst x0, 1 - neg w0, w1 ldp x29, x30, [sp], 16 - sxtb w0, w0 - csel w0, w1, w0, eq + neg w0, w0 ret Jakub