I found gcc 4.1.1 (and 4.2) can not optimize this simple code well.
void foo(int *a)
{
int i;
for (i = 0; i < 100; i++)
a[0] += a[1];
}
If I compile this code with -O2, instructions to load from a[1] and
store to a[0] are both inside a loop.
Is this a know issue? PR20463 or PR21676 or any other?
Followings are outputs from some gcc versions on mips and i386.
gcc 4.1.1 on mips: (no good)
foo:
.set noreorder
.set nomacro
lw $3,0($4)
addiu $6,$4,4
move $5,$0
li $7,100 # 0x64
$L2:
lw $2,0($6)
addiu $5,$5,1
addu $2,$3,$2
move $3,$2
bne $5,$7,$L2
sw $2,0($4)
j $31
nop
gcc 4.2 on mips: (no good)
foo:
.set noreorder
.set nomacro
lw $3,0($4)
lw $2,4($4)
addiu $6,$4,4
addu $3,$3,$2
sw $3,0($4)
li $5,1 # 0x1
$L2:
lw $2,0($6)
addiu $5,$5,1
addu $3,$3,$2
li $2,100 # 0x64
bne $5,$2,$L2
sw $3,0($4)
j $31
nop
gcc 4.2 on i386:
foo:
pushl %ebp
movl $1, %edx
movl %esp, %ebp
pushl %ebx
movl 8(%ebp), %ebx
movl 4(%ebx), %eax
leal 4(%ebx), %ecx
addl (%ebx), %eax
movl %eax, (%ebx)
.p2align 4,,7
.L2:
addl (%ecx), %eax
addl $1, %edx
cmpl $100, %edx
movl %eax, (%ebx)
jne .L2
popl %ebx
popl %ebp
ret
gcc 3.4.6 on mips: (good)
foo:
.set noreorder
.set nomacro
lw $5,0($4)
lw $6,4($4)
li $2,99 # 0x63
$L5:
addu $3,$5,$6
addiu $2,$2,-1
bgez $2,$L5
move $5,$3
j $31
sw $3,0($4)
gcc 3.4.4 on i386: (good)
foo:
pushl %ebp
movl %esp, %ebp
pushl %esi
pushl %ebx
movl 8(%ebp), %esi
movl (%esi), %ecx
movl 4(%esi), %ebx
movl $99, %eax
.p2align 4,,15
.L5:
leal (%ecx,%ebx), %edx
decl %eax
movl %edx, %ecx
jns .L5
movl %edx, (%esi)
popl %ebx
popl %esi
popl %ebp
ret
---
Atsushi Nemoto