How does the i386 backend optimise the stack slot assignment to minimize
the displacement offset?
What code should I look at?
Or is there some other optimisation at work here...?
I.e.:
; -O0 => large offset
leal 8268(%esp), %eax
incl (%eax)
; -O3 => small offset
incl 40(%esp)
The source for a test case + the output are attached
gcc-4.0 -S stackframe.c -fomit-frame-pointer -O0 -o stackframe-O0.s
gcc-4.0 -S stackframe.c -fomit-frame-pointer -O3 -o stackframe-O3.s
This thread has a stack slot assignment optimisation patch that has
never been committed to GCC CVS, but the above indicats that there is
some sort of mechanism in GCC already to mitigate this problem...
http://gcc.gnu.org/ml/gcc-patches/2003-01/msg00019.html
--
�yvind Harboe
http://www.zylin.com
int bar(int a);
int test1(int *);
int foo(int a, int b, int c, int d)
{
int abc[1024];
int j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z;
int def[1024];
for (j=0; j<bar(j); j++)
{
test1(abc);
for (k=0; k<bar(k); k++)
{
for (l=0; l<bar(l); l++)
{
for (m=0; m<bar(m); m++)
{
test1(def);
for (m=0; n<bar(n); n++)
{
for (o=0; o<bar(o); o++)
{
for (p=0; p<bar(p); p++)
{
for (r=0; r<bar(r); r++)
{
for (s=0; s<bar(s); s++)
{
for (t=0; t<bar(t); t++)
{
for (u=0; u<bar(u); u++)
{
for (v=0; v<bar(v); v++)
{
for (w=0; w<bar(w); w++)
{
}
}
}
}
}
}
}
}
}
}
}
}
}
}
.file "stackframe.c"
.text
.globl foo
.type foo, @function
foo:
subl $8284, %esp
movl $0, 8216(%esp)
jmp .L2
.L3:
subl $12, %esp
leal 4132(%esp), %eax
pushl %eax
call test1
addl $16, %esp
movl $0, 8220(%esp)
jmp .L4
.L5:
movl $0, 8224(%esp)
jmp .L6
.L7:
movl $0, 8228(%esp)
jmp .L8
.L9:
subl $12, %esp
leal 36(%esp), %eax
pushl %eax
call test1
addl $16, %esp
movl $0, 8228(%esp)
jmp .L10
.L11:
movl $0, 8236(%esp)
jmp .L12
.L13:
movl $0, 8240(%esp)
jmp .L14
.L15:
movl $0, 8248(%esp)
jmp .L16
.L17:
movl $0, 8252(%esp)
jmp .L18
.L19:
movl $0, 8256(%esp)
jmp .L20
.L21:
movl $0, 8260(%esp)
jmp .L22
.L23:
movl $0, 8264(%esp)
jmp .L24
.L25:
movl $0, 8268(%esp)
jmp .L26
.L27:
leal 8268(%esp), %eax
incl (%eax)
.L26:
subl $12, %esp
pushl 8280(%esp)
call bar
addl $16, %esp
cmpl 8268(%esp), %eax
jg .L27
leal 8264(%esp), %eax
incl (%eax)
.L24:
subl $12, %esp
pushl 8276(%esp)
call bar
addl $16, %esp
cmpl 8264(%esp), %eax
jg .L25
leal 8260(%esp), %eax
incl (%eax)
.L22:
subl $12, %esp
pushl 8272(%esp)
call bar
addl $16, %esp
cmpl 8260(%esp), %eax
jg .L23
leal 8256(%esp), %eax
incl (%eax)
.L20:
subl $12, %esp
pushl 8268(%esp)
call bar
addl $16, %esp
cmpl 8256(%esp), %eax
jg .L21
leal 8252(%esp), %eax
incl (%eax)
.L18:
subl $12, %esp
pushl 8264(%esp)
call bar
addl $16, %esp
cmpl 8252(%esp), %eax
jg .L19
leal 8248(%esp), %eax
incl (%eax)
.L16:
subl $12, %esp
pushl 8260(%esp)
call bar
addl $16, %esp
cmpl 8248(%esp), %eax
jg .L17
leal 8240(%esp), %eax
incl (%eax)
.L14:
subl $12, %esp
pushl 8252(%esp)
call bar
addl $16, %esp
cmpl 8240(%esp), %eax
jg .L15
leal 8236(%esp), %eax
incl (%eax)
.L12:
subl $12, %esp
pushl 8248(%esp)
call bar
addl $16, %esp
cmpl 8236(%esp), %eax
jg .L13
leal 8232(%esp), %eax
incl (%eax)
.L10:
subl $12, %esp
pushl 8244(%esp)
call bar
addl $16, %esp
cmpl 8232(%esp), %eax
jg .L11
leal 8228(%esp), %eax
incl (%eax)
.L8:
subl $12, %esp
pushl 8240(%esp)
call bar
addl $16, %esp
cmpl 8228(%esp), %eax
jg .L9
leal 8224(%esp), %eax
incl (%eax)
.L6:
subl $12, %esp
pushl 8236(%esp)
call bar
addl $16, %esp
cmpl 8224(%esp), %eax
jg .L7
leal 8220(%esp), %eax
incl (%eax)
.L4:
subl $12, %esp
pushl 8232(%esp)
call bar
addl $16, %esp
cmpl 8220(%esp), %eax
jg .L5
leal 8216(%esp), %eax
incl (%eax)
.L2:
subl $12, %esp
pushl 8228(%esp)
call bar
addl $16, %esp
cmpl 8216(%esp), %eax
jg .L3
addl $8284, %esp
ret
.size foo, .-foo
.ident "GCC: (GNU) 4.0.0 20050410 (prerelease) (Debian 4.0-0pre10)"
.section .note.GNU-stack,"",@progbits
.file "stackframe.c"
.text
.p2align 4,,15
.globl foo
.type foo, @function
foo:
pushl %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $8236, %esp
movl $0, 12(%esp)
.L2:
subl $12, %esp
movl 24(%esp), %eax
pushl %eax
call bar
addl $16, %esp
cmpl %eax, 12(%esp)
jge .L43
subl $12, %esp
leal 4152(%esp), %eax
pushl %eax
call test1
movl $0, 32(%esp)
addl $16, %esp
.L4:
subl $12, %esp
movl 28(%esp), %edx
pushl %edx
call bar
addl $16, %esp
cmpl %eax, 16(%esp)
jge .L38
movl $0, 20(%esp)
.L36:
subl $12, %esp
movl 32(%esp), %ecx
pushl %ecx
call bar
addl $16, %esp
cmpl %eax, 20(%esp)
jge .L44
xorl %ebx, %ebx
.L33:
subl $12, %esp
pushl %ebx
call bar
addl $16, %esp
cmpl %eax, %ebx
jge .L45
subl $12, %esp
leal 56(%esp), %eax
pushl %eax
call test1
addl $16, %esp
.L6:
subl $12, %esp
movl 36(%esp), %ebx
pushl %ebx
call bar
addl $16, %esp
cmpl 24(%esp), %eax
jle .L46
movl $0, 28(%esp)
.L28:
subl $12, %esp
movl 40(%esp), %esi
pushl %esi
call bar
addl $16, %esp
cmpl %eax, 28(%esp)
jge .L29
movl $0, 32(%esp)
.L25:
subl $12, %esp
movl 44(%esp), %edi
pushl %edi
call bar
addl $16, %esp
cmpl %eax, 32(%esp)
jge .L26
movl $0, 36(%esp)
.L22:
subl $12, %esp
movl 48(%esp), %ebp
pushl %ebp
call bar
addl $16, %esp
cmpl %eax, 36(%esp)
jge .L23
movl $0, 40(%esp)
.L19:
subl $12, %esp
movl 52(%esp), %eax
pushl %eax
call bar
addl $16, %esp
cmpl %eax, 40(%esp)
jge .L20
xorl %ebp, %ebp
subl $12, %esp
pushl %ebp
call bar
addl $16, %esp
cmpl %eax, %ebp
jge .L17
.L49:
xorl %edi, %edi
subl $12, %esp
pushl %edi
call bar
addl $16, %esp
cmpl %eax, %edi
jge .L14
.L48:
xorl %esi, %esi
subl $12, %esp
pushl %esi
call bar
addl $16, %esp
cmpl %eax, %esi
jge .L11
.L47:
xorl %ebx, %ebx
jmp .L8
.p2align 4,,15
.L7:
incl %ebx
.L8:
subl $12, %esp
pushl %ebx
call bar
addl $16, %esp
cmpl %eax, %ebx
jl .L7
incl %esi
subl $12, %esp
pushl %esi
call bar
addl $16, %esp
cmpl %eax, %esi
jl .L47
.L11:
incl %edi
subl $12, %esp
pushl %edi
call bar
addl $16, %esp
cmpl %eax, %edi
jl .L48
.L14:
incl %ebp
subl $12, %esp
pushl %ebp
call bar
addl $16, %esp
cmpl %eax, %ebp
jl .L49
.L17:
incl 40(%esp)
jmp .L19
.L20:
incl 36(%esp)
jmp .L22
.L23:
incl 32(%esp)
jmp .L25
.L26:
incl 28(%esp)
jmp .L28
.L29:
incl 24(%esp)
jmp .L6
.L46:
movl $1, %ebx
jmp .L33
.L45:
incl 20(%esp)
jmp .L36
.L44:
incl 16(%esp)
jmp .L4
.L38:
incl 12(%esp)
jmp .L2
.L43:
addl $8236, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
.size foo, .-foo
.ident "GCC: (GNU) 4.0.0 20050410 (prerelease) (Debian 4.0-0pre10)"
.section .note.GNU-stack,"",@progbits