Le 14/01/2026 à 12:44, [email protected] a écrit :
From: Abhishek Dubey <[email protected]>

Enabling tailcalls with subprog combinations by referencing
method. The actual tailcall count is always maintained in the
tail_call_info variable present in the frame of main function
(also called entry function). The tail_call_info variables in
the frames of all other subprog contains reference to the
tail_call_info present in frame of main function.

Dynamic resolution interprets the tail_call_info either as
value or reference depending on the context of active frame
while tailcall is invoked.

Signed-off-by: Abhishek Dubey <[email protected]>
---
  arch/powerpc/net/bpf_jit.h        | 12 +++++-
  arch/powerpc/net/bpf_jit_comp.c   | 10 ++++-
  arch/powerpc/net/bpf_jit_comp64.c | 68 +++++++++++++++++++++++--------
  3 files changed, 70 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 45d419c0ee73..5d735bc5e6bd 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -51,6 +51,12 @@
                EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset 
& 0xfffc));                                        \
        } while (0)
+/* Same as PPC_BCC_SHORT, except valid dest is known prior to call. */
+#define PPC_COND_BRANCH(cond, dest)                                           \
+       do {                                                                  \
+               long offset = (long)(dest) - CTX_NIA(ctx);                    \
+               EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset 
& 0xfffc));    \
+       } while (0)

I don't like the idea of duplicating PPC_BCC_SHORT() to just kick the verification out. Now we will have two macros doing the exact same thing with one handling failure case and one ignoring failure case. There is a big risk that one day or another someone will use the wrong macro.

Could you change bpf_jit_build_prologue() to return an int add use PPC_BCC_SHORT() instead of that new PPC_COND_BRANCH() ?


  /*
   * Sign-extended 32-bit immediate load
   *
@@ -75,6 +81,8 @@
/* for tailcall counter */
  #define BPF_PPC_TAILCALL        8
+/* for gpr non volatile registers BPG_REG_6 to 10 */
+#define BPF_PPC_STACK_SAVE      (6*8)

Add spaces before and after the *

/* If dummy pass (!image), account for maximum possible instructions */
  #define PPC_LI64(d, i)                do {                                    
      \
@@ -170,6 +178,7 @@ struct codegen_context {
        unsigned int alt_exit_addr;
        u64 arena_vm_start;
        u64 user_vm_start;
+       bool is_subprog;
  };
#define bpf_to_ppc(r) (ctx->b2p[r])
@@ -204,11 +213,10 @@ void bpf_jit_build_epilogue(u32 *image, struct 
codegen_context *ctx);
  void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx);
  void bpf_jit_realloc_regs(struct codegen_context *ctx);
  int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int 
tmp_reg, long exit_addr);
-
  int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int 
pass,
                          struct codegen_context *ctx, int insn_idx,
                          int jmp_off, int dst_reg, u32 code);
-

Not sure why this patch needs to remove those blank lines here and above.

+int bpf_jit_stack_tailcallinfo_offset(struct codegen_context *ctx);
  #endif
#endif
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 5e976730b2f5..069a8822c30d 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -206,6 +206,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
        cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
        cgctx.arena_vm_start = bpf_arena_get_kern_vm_start(fp->aux->arena);
        cgctx.user_vm_start = bpf_arena_get_user_vm_start(fp->aux->arena);
+       cgctx.is_subprog = bpf_is_subprog(fp);
/* Scouting faux-generate pass 0 */
        if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
@@ -435,6 +436,11 @@ void bpf_jit_free(struct bpf_prog *fp)
        bpf_prog_unlock_free(fp);
  }
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+       return IS_ENABLED(CONFIG_PPC64);
+}
+
  bool bpf_jit_supports_kfunc_call(void)
  {
        return true;
@@ -604,7 +610,7 @@ static void bpf_trampoline_setup_tail_call_cnt(u32 *image, 
struct codegen_contex
                                               int func_frame_offset, int 
r4_off)
  {
        if (IS_ENABLED(CONFIG_PPC64)) {
-               /* See bpf_jit_stack_tailcallcnt() */
+               /* See bpf_jit_stack_tailcallinfo_offset() */
                int tailcallcnt_offset = 7 * 8;
EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset));
@@ -619,7 +625,7 @@ static void bpf_trampoline_restore_tail_call_cnt(u32 
*image, struct codegen_cont
                                                 int func_frame_offset, int 
r4_off)
  {
        if (IS_ENABLED(CONFIG_PPC64)) {
-               /* See bpf_jit_stack_tailcallcnt() */
+               /* See bpf_jit_stack_tailcallinfo_offset() */
                int tailcallcnt_offset = 7 * 8;
EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset));
diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index 39061cd742c1..cebf81fbd59f 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -26,8 +26,12 @@
   * Ensure the top half (upto local_tmp_var) stays consistent
   * with our redzone usage.
   *
+ * tail_call_info - stores tailcall count value in main program's
+ *                  frame, stores reference to tail_call_info of
+ *                  main's frame in sub-prog's frame.
+ *
   *            [       prev sp         ] <-------------
- *             [    tail_call_cnt      ] 8             |
+ *             [    tail_call_info     ] 8             |
   *            [   nv gpr save area    ] 6*8           |
   *            [    local_tmp_var      ] 24            |
   * fp (r31) -->    [   ebpf stack space    ] upto 512      |
@@ -35,8 +39,6 @@
   * sp (r1) --->    [    stack pointer      ] --------------
   */
-/* for gpr non volatile registers BPG_REG_6 to 10 */
-#define BPF_PPC_STACK_SAVE     (6*8)
  /* for bpf JIT code internal usage */
  #define BPF_PPC_STACK_LOCALS  24
  /* stack frame excluding BPF stack, ensure this is quadword aligned */
@@ -98,7 +100,7 @@ static inline bool bpf_has_stack_frame(struct 
codegen_context *ctx)
   *            [       prev sp         ] <-------------
   *            [         ...           ]               |
   * sp (r1) --->    [    stack pointer      ] --------------
- *             [    tail_call_cnt      ] 8
+ *             [    tail_call_info     ] 8
   *            [   nv gpr save area    ] 6*8
   *            [    local_tmp_var      ] 24
   *            [   unused red zone     ] 224
@@ -114,7 +116,7 @@ static int bpf_jit_stack_local(struct codegen_context *ctx)
        }
  }
-static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
+int bpf_jit_stack_tailcallinfo_offset(struct codegen_context *ctx)
  {
        return bpf_jit_stack_local(ctx) + BPF_PPC_STACK_LOCALS + 
BPF_PPC_STACK_SAVE;
  }
@@ -147,17 +149,32 @@ void bpf_jit_build_prologue(u32 *image, struct 
codegen_context *ctx)
  #endif
/*
-        * Initialize tail_call_cnt if we do tail calls.
-        * Otherwise, put in NOPs so that it can be skipped when we are
-        * invoked through a tail call.
+        * Tail call count(tcc) is saved & updated only in main
+        * program's frame and the address of tcc in main program's
+        * frame (tcc_ptr) is saved in subprogs frame.
+        *
+        * Offset of tail_call_info on any frame will be interpreted
+        * as either tcc_ptr or tcc value depending on whether it is
+        * greater than MAX_TAIL_CALL_CNT or not.
         */
-       if (ctx->seen & SEEN_TAILCALL) {
+       if (!ctx->is_subprog) {
                EMIT(PPC_RAW_LI(bpf_to_ppc(TMP_REG_1), 0));
                /* this goes in the redzone */
                EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, 
-(BPF_PPC_TAILCALL)));
        } else {
-               EMIT(PPC_RAW_NOP());
-               EMIT(PPC_RAW_NOP());
+               /*
+                * if tail_call_info < MAX_TAIL_CALL_CNT
+                *     main prog calling first subprog -> copy reference
+                * else
+                *     subsequent subprog calling another subprog -> directly 
copy content
+                */
+               EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_2), _R1, 0));
+               EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), 
-(BPF_PPC_TAILCALL)));
+               EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT));
+               PPC_COND_BRANCH(COND_GT, CTX_NIA(ctx) + 8);
+               EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2),
+                                                       -(BPF_PPC_TAILCALL)));
+               EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, 
-(BPF_PPC_TAILCALL)));
        }
if (bpf_has_stack_frame(ctx)) {
@@ -352,19 +369,38 @@ static int bpf_jit_emit_tail_call(u32 *image, struct 
codegen_context *ctx, u32 o
        EMIT(PPC_RAW_CMPLW(b2p_index, bpf_to_ppc(TMP_REG_1)));
        PPC_BCC_SHORT(COND_GE, out);
+ EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallinfo_offset(ctx)));
+       EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT));
+       PPC_COND_BRANCH(COND_LE, CTX_NIA(ctx) + 8);
+
+       /* dereference TMP_REG_1 */
+       EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 0));
+
        /*
-        * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
+        * if (tail_call_info == MAX_TAIL_CALL_CNT)
         *   goto out;
         */
-       EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, 
bpf_jit_stack_tailcallcnt(ctx)));
        EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT));
-       PPC_BCC_SHORT(COND_GE, out);
+       PPC_COND_BRANCH(COND_EQ, out);
/*
-        * tail_call_cnt++;
+        * tail_call_info++; <- Actual value of tcc here
         */
        EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 1));
-       EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, 
bpf_jit_stack_tailcallcnt(ctx)));
+
+       /*
+        * Before writing updated tail_call_info, distinguish if current frame
+        * is storing a reference to tail_call_info or actual tcc value in
+        * tail_call_info.
+        */
+       EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_2), _R1, 
bpf_jit_stack_tailcallinfo_offset(ctx)));
+       EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_2), MAX_TAIL_CALL_CNT));
+       PPC_COND_BRANCH(COND_GT, CTX_NIA(ctx) + 8);
+
+       /* First get address of tail_call_info */
+       EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_2), _R1, 
bpf_jit_stack_tailcallinfo_offset(ctx)));
+       /* Writeback updated value to tail_call_info */
+       EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), 0));
/* prog = array->ptrs[index]; */
        EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_1), b2p_index, 8));


Reply via email to