How does the i386 backend optimise the stack slot assignment to minimize the displacement offset?
What code should I look at? Or is there some other optimisation at work here...? I.e.: ; -O0 => large offset leal 8268(%esp), %eax incl (%eax) ; -O3 => small offset incl 40(%esp) The source for a test case + the output are attached gcc-4.0 -S stackframe.c -fomit-frame-pointer -O0 -o stackframe-O0.s gcc-4.0 -S stackframe.c -fomit-frame-pointer -O3 -o stackframe-O3.s This thread has a stack slot assignment optimisation patch that has never been committed to GCC CVS, but the above indicats that there is some sort of mechanism in GCC already to mitigate this problem... http://gcc.gnu.org/ml/gcc-patches/2003-01/msg00019.html -- Øyvind Harboe http://www.zylin.com
int bar(int a); int test1(int *); int foo(int a, int b, int c, int d) { int abc[1024]; int j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z; int def[1024]; for (j=0; j<bar(j); j++) { test1(abc); for (k=0; k<bar(k); k++) { for (l=0; l<bar(l); l++) { for (m=0; m<bar(m); m++) { test1(def); for (m=0; n<bar(n); n++) { for (o=0; o<bar(o); o++) { for (p=0; p<bar(p); p++) { for (r=0; r<bar(r); r++) { for (s=0; s<bar(s); s++) { for (t=0; t<bar(t); t++) { for (u=0; u<bar(u); u++) { for (v=0; v<bar(v); v++) { for (w=0; w<bar(w); w++) { } } } } } } } } } } } } } }
.file "stackframe.c" .text .globl foo .type foo, @function foo: subl $8284, %esp movl $0, 8216(%esp) jmp .L2 .L3: subl $12, %esp leal 4132(%esp), %eax pushl %eax call test1 addl $16, %esp movl $0, 8220(%esp) jmp .L4 .L5: movl $0, 8224(%esp) jmp .L6 .L7: movl $0, 8228(%esp) jmp .L8 .L9: subl $12, %esp leal 36(%esp), %eax pushl %eax call test1 addl $16, %esp movl $0, 8228(%esp) jmp .L10 .L11: movl $0, 8236(%esp) jmp .L12 .L13: movl $0, 8240(%esp) jmp .L14 .L15: movl $0, 8248(%esp) jmp .L16 .L17: movl $0, 8252(%esp) jmp .L18 .L19: movl $0, 8256(%esp) jmp .L20 .L21: movl $0, 8260(%esp) jmp .L22 .L23: movl $0, 8264(%esp) jmp .L24 .L25: movl $0, 8268(%esp) jmp .L26 .L27: leal 8268(%esp), %eax incl (%eax) .L26: subl $12, %esp pushl 8280(%esp) call bar addl $16, %esp cmpl 8268(%esp), %eax jg .L27 leal 8264(%esp), %eax incl (%eax) .L24: subl $12, %esp pushl 8276(%esp) call bar addl $16, %esp cmpl 8264(%esp), %eax jg .L25 leal 8260(%esp), %eax incl (%eax) .L22: subl $12, %esp pushl 8272(%esp) call bar addl $16, %esp cmpl 8260(%esp), %eax jg .L23 leal 8256(%esp), %eax incl (%eax) .L20: subl $12, %esp pushl 8268(%esp) call bar addl $16, %esp cmpl 8256(%esp), %eax jg .L21 leal 8252(%esp), %eax incl (%eax) .L18: subl $12, %esp pushl 8264(%esp) call bar addl $16, %esp cmpl 8252(%esp), %eax jg .L19 leal 8248(%esp), %eax incl (%eax) .L16: subl $12, %esp pushl 8260(%esp) call bar addl $16, %esp cmpl 8248(%esp), %eax jg .L17 leal 8240(%esp), %eax incl (%eax) .L14: subl $12, %esp pushl 8252(%esp) call bar addl $16, %esp cmpl 8240(%esp), %eax jg .L15 leal 8236(%esp), %eax incl (%eax) .L12: subl $12, %esp pushl 8248(%esp) call bar addl $16, %esp cmpl 8236(%esp), %eax jg .L13 leal 8232(%esp), %eax incl (%eax) .L10: subl $12, %esp pushl 8244(%esp) call bar addl $16, %esp cmpl 8232(%esp), %eax jg .L11 leal 8228(%esp), %eax incl (%eax) .L8: subl $12, %esp pushl 8240(%esp) call bar addl $16, %esp cmpl 8228(%esp), %eax jg .L9 leal 8224(%esp), %eax incl (%eax) .L6: subl $12, %esp pushl 8236(%esp) call bar addl $16, %esp cmpl 8224(%esp), %eax jg .L7 leal 8220(%esp), %eax incl (%eax) .L4: subl $12, %esp pushl 8232(%esp) call bar addl $16, %esp cmpl 8220(%esp), %eax jg .L5 leal 8216(%esp), %eax incl (%eax) .L2: subl $12, %esp pushl 8228(%esp) call bar addl $16, %esp cmpl 8216(%esp), %eax jg .L3 addl $8284, %esp ret .size foo, .-foo .ident "GCC: (GNU) 4.0.0 20050410 (prerelease) (Debian 4.0-0pre10)" .section .note.GNU-stack,"",@progbits
.file "stackframe.c" .text .p2align 4,,15 .globl foo .type foo, @function foo: pushl %ebp pushl %edi pushl %esi pushl %ebx subl $8236, %esp movl $0, 12(%esp) .L2: subl $12, %esp movl 24(%esp), %eax pushl %eax call bar addl $16, %esp cmpl %eax, 12(%esp) jge .L43 subl $12, %esp leal 4152(%esp), %eax pushl %eax call test1 movl $0, 32(%esp) addl $16, %esp .L4: subl $12, %esp movl 28(%esp), %edx pushl %edx call bar addl $16, %esp cmpl %eax, 16(%esp) jge .L38 movl $0, 20(%esp) .L36: subl $12, %esp movl 32(%esp), %ecx pushl %ecx call bar addl $16, %esp cmpl %eax, 20(%esp) jge .L44 xorl %ebx, %ebx .L33: subl $12, %esp pushl %ebx call bar addl $16, %esp cmpl %eax, %ebx jge .L45 subl $12, %esp leal 56(%esp), %eax pushl %eax call test1 addl $16, %esp .L6: subl $12, %esp movl 36(%esp), %ebx pushl %ebx call bar addl $16, %esp cmpl 24(%esp), %eax jle .L46 movl $0, 28(%esp) .L28: subl $12, %esp movl 40(%esp), %esi pushl %esi call bar addl $16, %esp cmpl %eax, 28(%esp) jge .L29 movl $0, 32(%esp) .L25: subl $12, %esp movl 44(%esp), %edi pushl %edi call bar addl $16, %esp cmpl %eax, 32(%esp) jge .L26 movl $0, 36(%esp) .L22: subl $12, %esp movl 48(%esp), %ebp pushl %ebp call bar addl $16, %esp cmpl %eax, 36(%esp) jge .L23 movl $0, 40(%esp) .L19: subl $12, %esp movl 52(%esp), %eax pushl %eax call bar addl $16, %esp cmpl %eax, 40(%esp) jge .L20 xorl %ebp, %ebp subl $12, %esp pushl %ebp call bar addl $16, %esp cmpl %eax, %ebp jge .L17 .L49: xorl %edi, %edi subl $12, %esp pushl %edi call bar addl $16, %esp cmpl %eax, %edi jge .L14 .L48: xorl %esi, %esi subl $12, %esp pushl %esi call bar addl $16, %esp cmpl %eax, %esi jge .L11 .L47: xorl %ebx, %ebx jmp .L8 .p2align 4,,15 .L7: incl %ebx .L8: subl $12, %esp pushl %ebx call bar addl $16, %esp cmpl %eax, %ebx jl .L7 incl %esi subl $12, %esp pushl %esi call bar addl $16, %esp cmpl %eax, %esi jl .L47 .L11: incl %edi subl $12, %esp pushl %edi call bar addl $16, %esp cmpl %eax, %edi jl .L48 .L14: incl %ebp subl $12, %esp pushl %ebp call bar addl $16, %esp cmpl %eax, %ebp jl .L49 .L17: incl 40(%esp) jmp .L19 .L20: incl 36(%esp) jmp .L22 .L23: incl 32(%esp) jmp .L25 .L26: incl 28(%esp) jmp .L28 .L29: incl 24(%esp) jmp .L6 .L46: movl $1, %ebx jmp .L33 .L45: incl 20(%esp) jmp .L36 .L44: incl 16(%esp) jmp .L4 .L38: incl 12(%esp) jmp .L2 .L43: addl $8236, %esp popl %ebx popl %esi popl %edi popl %ebp ret .size foo, .-foo .ident "GCC: (GNU) 4.0.0 20050410 (prerelease) (Debian 4.0-0pre10)" .section .note.GNU-stack,"",@progbits