Hi,
GCC 10.2.0 (and GCC 8.3; other versions and targets except i386 and
amd64 not tested) generate rather bad code for the following ternary
expression:
--- repro.c ---
#define NULL (char *) 0
char *dummy(char *string, long count) {
return count == 0 ? NULL : string + 1;
}
--- EOF ---
$ gcc -m64 -o- -O3 -S repro.c
dummy:
addq $1, %rdi
movl $0, %eax
testq %rsi, %rsi
cmovne %rdi, %rax
ret
JFTR: why does GCC NOT generate the shorter "XOR %eax, %eax" here?
$ gcc -m64 -O3 -c dummy.c
$ objdump -D dummy.o
0000000000000000 <dummy>:
0: 48 83 c7 01 add $0x1,%rdi
4: b8 00 00 00 00 mov $0x0,%eax
9: 48 85 f6 test %rsi,%rsi
c: 48 0f 45 c7 cmovne %rdi,%rax
10: c3 retq
i386 and AMD64 use the ILP32 and LP64 data model where a "long" and
a "pointer" have the same size, and 0L and the null pointer have the
same binary representation, so the contents of RSI should be used to
load RAX with 0 conditionally:
dummy:
leaq 1(%rdi), %rax
testq %rsi, %rsi
cmoveq %rdi, %rax
ret
$ gcc -m32 -o- -O3 -S dummy.c
_dummy:
movl 8(%esp), %edx
movl 4(%esp), %eax
addl $1, %eax
testl %edx, %edx
movl $0, %edx
cmove %edx, %eax # OUCH: if this executes, EDX was 0 before,
ret # so the MOV is really a NOP!
$ gcc -m32 -O3 -c dummy.c
$ objdump -D dummy.o
00000000 <_dummy>:
0: 8b 54 24 08 mov 0x8(%esp),%edx
4: 8b 44 24 04 mov 0x4(%esp),%eax
8: 83 c0 01 add $0x1,%eax
b: 85 d2 test %edx,%edx
d: ba 00 00 00 00 mov $0x0,%edx
12: 0f 44 c2 cmove %edx,%eax
15: c3 ret
Here's what GCC should but generate:
00000000 <_dummy>:
0: 8b 44 24 04 mov 0x4(%esp),%eax
4: 8b 4c 24 08 mov 0x8(%esp),%ecx
8: 40 inc %eax
9: f7 d9 neg %ecx
b: 19 c9 sbb %ecx,%ecx
d: 21 c8 and %ecx,%eax
f: c3 ret
For (pre)historic processors which don't support CMOVcc the
following code is generated:
$ gcc -m32 -mtune=i386 -o- -S dummy.c
_dummy:
movl 8(%esp), %eax
testl %eax, %eax
je L3
movl 4(%esp), %eax
incl %eax
ret
.p2align 2
L3: # OUCH: EAX is already 0 here!
xorl %eax, %eax
ret
00000000 <dummy>:
0: 8b 44 24 08 mov 0x8(%esp),%eax
4: 85 c0 test %eax,%eax
6: 74 08 je 10 <dummy+0x10>
8: 8b 44 24 04 mov 0x4(%esp),%eax
c: 40 inc %eax
d: c3 ret
e: 66 90 xchg %ax,%ax
10: 31 c0 xor %eax,%eax
12: c3 ret
not amused
Stefan Kanthak