yonghong-song wrote:
The following is an example:
```
extern long tar(void);
__attribute__((noinline))
long foo(int a1, int a2, int a3, int a4, int a5, int a6, int a7, int a8, int
a9, int a10) {
return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + a10;
}
long bar(void) { return foo(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) + tar(); }
```
Compiled with 'clang -O2 -S t.c' on both x86_64 and arm64 machines.
For x86_64 asm:
```
...
foo: # @foo
.cfi_startproc
# %bb.0:
addl %esi, %edi
addl %ecx, %edx
addl %edi, %edx
addl %r9d, %r8d
addl %edx, %r8d
addl 8(%rsp), %r8d
addl 16(%rsp), %r8d
addl 24(%rsp), %r8d
addl 32(%rsp), %r8d
movslq %r8d, %rax
retq
...
bar: # @bar
.cfi_startproc
# %bb.0:
pushq %rbx
.cfi_def_cfa_offset 16
.cfi_offset %rbx, -16
movl $1, %edi
movl $2, %esi
movl $3, %edx
movl $4, %ecx
movl $5, %r8d
movl $6, %r9d
pushq $10
.cfi_adjust_cfa_offset 8
pushq $9
.cfi_adjust_cfa_offset 8
pushq $8
.cfi_adjust_cfa_offset 8
pushq $7
.cfi_adjust_cfa_offset 8
callq foo
addq $32, %rsp
.cfi_adjust_cfa_offset -32
movq %rax, %rbx
callq tar@PLT
addq %rbx, %rax
popq %rbx
.cfi_def_cfa_offset 8
retq
```
You can see that for bar(), argument 6 is moved to r9. Argument 7-10 is pushed
in reverse order.
For foo(), the incoming stack arguments are retrieved by going to caller.
The following is a arm64 asm snippet:
```
...
foo: // @foo
.cfi_startproc
// %bb.0:
ldr w8, [sp]
add w9, w1, w0
add w10, w2, w3
add w11, w4, w5
add w9, w9, w10
add w10, w11, w6
ldr w11, [sp, #8]
add w8, w7, w8
add w9, w9, w10
add w8, w9, w8
add w8, w8, w11
sxtw x0, w8
ret
...
bar: // @bar
.cfi_startproc
// %bb.0:
sub sp, sp, #48
.cfi_def_cfa_offset 48
stp x29, x30, [sp, #16] // 16-byte Folded Spill
str x19, [sp, #32] // 8-byte Spill
add x29, sp, #16
.cfi_def_cfa w29, 32
.cfi_offset w19, -16
.cfi_offset w30, -24
.cfi_offset w29, -32
mov w8, #10 // =0xa
mov w9, #9 // =0x9
mov w0, #1 // =0x1
mov w1, #2 // =0x2
mov w2, #3 // =0x3
mov w3, #4 // =0x4
mov w4, #5 // =0x5
mov w5, #6 // =0x6
mov w6, #7 // =0x7
mov w7, #8 // =0x8
str w8, [sp, #8]
str w9, [sp]
bl foo
mov x19, x0
bl tar
add x0, x0, x19
.cfi_def_cfa wsp, 48
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
ldr x19, [sp, #32] // 8-byte Reload
add sp, sp, #48
.cfi_def_cfa_offset 0
.cfi_restore w19
.cfi_restore w30
.cfi_restore w29
ret
...
```
For arm64, the first 8 parameters can be passed through registers. The number 9
and 10 parameters are saved in stack. Look at the above code
```
mov w8, #10 // =0xa
mov w9, #9 // =0x9
...
str w8, [sp, #8]
str w9, [sp]
```
So w8 (the second stack argument) is in [sp + 8], and w9 (the first stack
argument) is in [sp]. So stack layout will be
```
second stack argument (argument number 10)
first stack argument (argument number 9)
```
Similar to x86_64 calling convention.
https://github.com/llvm/llvm-project/pull/189060
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits