yonghong-song wrote: I struggled with claude and didn't get a good solution for random placement of r12 related instructions. I think we can enforce more restrictions in verifier in order to minimize the stack usage.
The following is an example:
```
$ cat t.c
__attribute__((noinline)) static int foo1(int a1, int a2, int a3, int a4, int
a5, int a6, int a7) {
return a1 + a2 + a3 + a4 + a5 + a6 + a7;
}
__attribute__((noinline)) static int foo2(int a1, int a2, int a3, int a4, int
a5, int a6, int a7, int a8) {
return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8;
}
int bar(int a1, int a2, int a3, int a4, int a5, int a6, int a7) {
int ret;
if (a1 + a2)
ret = foo1(a1, a2, a3, a4, a5, a6, a7);
else
ret = foo2(a1, a2, a3, a4, a5, a6, a7, a6 + a7);
return ret + a3 + a4;
}
```
Compiled with 'clang --target=bpf -O2 -S t.c', I got the following:
```
.file "t.c"
.text
.globl bar # -- Begin function bar
.p2align 3
.type bar,@function
bar: # @bar
# %bb.0:
w6 = w4
w7 = w3
r4 = *(u64 *)(r12 + 8)
r3 = *(u64 *)(r12 + 16)
w0 = w2
w0 = -w0
if w1 == w0 goto .LBB0_2
# %bb.1:
*(u64 *)(r12 - 16) = r4
*(u64 *)(r12 - 8) = r3
w3 = w7
w4 = w6
call foo1
goto .LBB0_3
.LBB0_2:
*(u64 *)(r12 - 24) = r4
*(u64 *)(r12 - 16) = r3
w3 += w4
*(u64 *)(r12 - 8) = r3
w3 = w7
w4 = w6
call foo2
.LBB0_3:
w6 += w7
w6 += w0
w0 = w6
exit
.Lfunc_end0:
.size bar, .Lfunc_end0-bar
# -- End function
.p2align 3 # -- Begin function foo1
.type foo1,@function
foo1: # @foo1
# %bb.0:
w0 = w2
w0 += w1
w0 += w3
w0 += w4
w0 += w5
r1 = *(u64 *)(r12 + 8)
w0 += w1
r1 = *(u64 *)(r12 + 16)
w0 += w1
exit
.Lfunc_end1:
.size foo1, .Lfunc_end1-foo1
# -- End function
.p2align 3 # -- Begin function foo2
.type foo2,@function
foo2: # @foo2
# %bb.0:
w0 = w2
w0 += w1
w0 += w3
w0 += w4
w0 += w5
r1 = *(u64 *)(r12 + 8)
w0 += w1
r1 = *(u64 *)(r12 + 16)
w0 += w1
r1 = *(u64 *)(r12 + 24)
w0 += w1
exit
.Lfunc_end2:
.size foo2, .Lfunc_end2-foo2
# -- End function
.addrsig
```
I think in verifier we can enforce r12 based stores like below:
- r12 based stores must be before a call in the same basic block
The above is exactly what the llvm generates.
This way, in JIT, whenever we see a r12 based store insn, with additional info
annotated to this insn about the stack depth, additional stack can be added.
During processing call, additional stack will be released.
WDYT?
https://github.com/llvm/llvm-project/pull/189060
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
