yonghong-song wrote:

I struggled with claude and didn't get a good solution for random placement of 
r12 related instructions. I think we can enforce more restrictions in verifier 
in order to minimize the stack usage.

The following is an example:
```
$ cat t.c
__attribute__((noinline)) static int foo1(int a1, int a2, int a3, int a4, int 
a5, int a6, int a7) {
  return a1 + a2 + a3 + a4 + a5 + a6 + a7;
}
__attribute__((noinline)) static int foo2(int a1, int a2, int a3, int a4, int 
a5, int a6, int a7, int a8) {
  return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8;
}

int bar(int a1, int a2, int a3, int a4, int a5, int a6, int a7) {
  int ret;

  if (a1 + a2)
    ret = foo1(a1, a2, a3, a4, a5, a6, a7);
  else
    ret = foo2(a1, a2, a3, a4, a5, a6, a7, a6 + a7);
  return ret + a3 + a4;
}
```
Compiled with 'clang --target=bpf -O2 -S t.c', I got the following:
```
        .file   "t.c"                                                           
                                                                  
        .text                                                                   
                                                                  
        .globl  bar                             # -- Begin function bar         
                                                                  
        .p2align        3                                                       
                                                                  
        .type   bar,@function                                                   
                                                                  
bar:                                    # @bar                                  
                                                                  
# %bb.0:                                                                        
                                                                  
        w6 = w4                                                                 
                                                                  
        w7 = w3                                                                 
                                                                  
        r4 = *(u64 *)(r12 + 8)                                                  
                                                                  
        r3 = *(u64 *)(r12 + 16)                                                 
                                                                  
        w0 = w2                                                                 
                                                                  
        w0 = -w0                                                                
                                                                  
        if w1 == w0 goto .LBB0_2                                                
                                                                  
# %bb.1:                                                                        
                                                                  
        *(u64 *)(r12 - 16) = r4                                                 
                                                                  
        *(u64 *)(r12 - 8) = r3                                                  
                                                                  
        w3 = w7                                                                 
                                                                  
        w4 = w6                                                                 
                                                                  
        call foo1                                                               
                                                                  
        goto .LBB0_3                                                            
                                                                  
.LBB0_2:                                                                        
                                                                  
        *(u64 *)(r12 - 24) = r4
        *(u64 *)(r12 - 16) = r3
        w3 += w4
        *(u64 *)(r12 - 8) = r3
        w3 = w7
        w4 = w6
        call foo2
.LBB0_3:
        w6 += w7
        w6 += w0
        w0 = w6
        exit
.Lfunc_end0:
        .size   bar, .Lfunc_end0-bar
                                        # -- End function
        .p2align        3                               # -- Begin function foo1
        .type   foo1,@function
foo1:                                   # @foo1
# %bb.0:
        w0 = w2
        w0 += w1
        w0 += w3
        w0 += w4
        w0 += w5
        r1 = *(u64 *)(r12 + 8)
        w0 += w1
        r1 = *(u64 *)(r12 + 16)
        w0 += w1
        exit
.Lfunc_end1:
        .size   foo1, .Lfunc_end1-foo1
                                        # -- End function
        .p2align        3                               # -- Begin function foo2
        .type   foo2,@function
foo2:                                   # @foo2
# %bb.0:
        w0 = w2
        w0 += w1
        w0 += w3
        w0 += w4
        w0 += w5
        r1 = *(u64 *)(r12 + 8)
        w0 += w1
        r1 = *(u64 *)(r12 + 16)
        w0 += w1
        r1 = *(u64 *)(r12 + 24)
        w0 += w1
        exit
.Lfunc_end2:
        .size   foo2, .Lfunc_end2-foo2
                                        # -- End function
        .addrsig
```

I think in verifier we can enforce r12 based stores like below:
   - r12 based stores must be before a call in the same basic block
The above is exactly what the llvm generates.

This way, in JIT, whenever we see a r12 based store insn, with additional info 
annotated to this insn about the stack depth, additional stack can be added. 
During processing call, additional stack will be released.

WDYT?

https://github.com/llvm/llvm-project/pull/189060
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to