From: Abhishek Dubey <[email protected]> Enabling tailcalls with subprog combinations by referencing method. The actual tailcall count is always maintained in the tail_call_info variable present in the frame of main function (also called entry function). The tail_call_info variables in the frames of all other subprog contains reference to the tail_call_info present in frame of main function.
Dynamic resolution interprets the tail_call_info either as value or reference depending on the context of active frame while tailcall is invoked. Following is selftest run: #./test_progs -t tailcalls #425/1 tailcalls/tailcall_1:OK #425/2 tailcalls/tailcall_2:OK #425/3 tailcalls/tailcall_3:OK #425/4 tailcalls/tailcall_4:OK #425/5 tailcalls/tailcall_5:OK #425/6 tailcalls/tailcall_6:OK #425/7 tailcalls/tailcall_bpf2bpf_1:OK #425/8 tailcalls/tailcall_bpf2bpf_2:OK #425/9 tailcalls/tailcall_bpf2bpf_3:OK #425/10 tailcalls/tailcall_bpf2bpf_4:OK #425/11 tailcalls/tailcall_bpf2bpf_5:OK #425/12 tailcalls/tailcall_bpf2bpf_6:OK #425/13 tailcalls/tailcall_bpf2bpf_fentry:OK #425/14 tailcalls/tailcall_bpf2bpf_fexit:OK #425/15 tailcalls/tailcall_bpf2bpf_fentry_fexit:OK #425/16 tailcalls/tailcall_bpf2bpf_fentry_entry:OK #425/17 tailcalls/tailcall_poke:OK #425/18 tailcalls/tailcall_bpf2bpf_hierarchy_1:OK #425/19 tailcalls/tailcall_bpf2bpf_hierarchy_fentry:OK #425/20 tailcalls/tailcall_bpf2bpf_hierarchy_fexit:OK #425/21 tailcalls/tailcall_bpf2bpf_hierarchy_fentry_fexit:OK #425/22 tailcalls/tailcall_bpf2bpf_hierarchy_fentry_entry:OK #425/23 tailcalls/tailcall_bpf2bpf_hierarchy_2:OK #425/24 tailcalls/tailcall_bpf2bpf_hierarchy_3:OK #425/25 tailcalls/tailcall_freplace:OK #425/26 tailcalls/tailcall_bpf2bpf_freplace:OK #425/27 tailcalls/tailcall_failure:OK #425/28 tailcalls/reject_tail_call_spin_lock:OK #425/29 tailcalls/reject_tail_call_rcu_lock:OK #425/30 tailcalls/reject_tail_call_preempt_lock:OK #425/31 tailcalls/reject_tail_call_ref:OK #425 tailcalls:OK Summary: 1/31 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Abhishek Dubey <[email protected]> --- arch/powerpc/net/bpf_jit.h | 14 ++++++- arch/powerpc/net/bpf_jit_comp.c | 10 ++++- arch/powerpc/net/bpf_jit_comp64.c | 67 +++++++++++++++++++++++-------- 3 files changed, 71 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 8334cd667bba..98e8b1f9c2f9 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -51,6 +51,12 @@ EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \ } while (0) +/* Same as PPC_BCC_SHORT, except valid dest is known prior to call. */ +#define PPC_COND_BRANCH(cond, dest) \ + do { \ + long offset = (long)(dest) - CTX_NIA(ctx); \ + EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \ + } while (0) /* * Sign-extended 32-bit immediate load * @@ -72,6 +78,10 @@ } } while (0) #ifdef CONFIG_PPC64 + +/* for gpr non volatile registers BPG_REG_6 to 10 */ +#define BPF_PPC_STACK_SAVE (6*8) + /* If dummy pass (!image), account for maximum possible instructions */ #define PPC_LI64(d, i) do { \ if (!image) \ @@ -166,6 +176,7 @@ struct codegen_context { unsigned int alt_exit_addr; u64 arena_vm_start; u64 user_vm_start; + bool is_subprog; }; #define bpf_to_ppc(r) (ctx->b2p[r]) @@ -200,11 +211,10 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx); void bpf_jit_realloc_regs(struct codegen_context *ctx); int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr); - int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass, struct codegen_context *ctx, int insn_idx, int jmp_off, int dst_reg, u32 code); - +int bpf_jit_stack_tailcallinfo_offset(struct codegen_context *ctx); #endif #endif diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 5e976730b2f5..069a8822c30d 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -206,6 +206,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) cgctx.stack_size = round_up(fp->aux->stack_depth, 16); cgctx.arena_vm_start = bpf_arena_get_kern_vm_start(fp->aux->arena); cgctx.user_vm_start = bpf_arena_get_user_vm_start(fp->aux->arena); + cgctx.is_subprog = bpf_is_subprog(fp); /* Scouting faux-generate pass 0 */ if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) { @@ -435,6 +436,11 @@ void bpf_jit_free(struct bpf_prog *fp) bpf_prog_unlock_free(fp); } +bool bpf_jit_supports_subprog_tailcalls(void) +{ + return IS_ENABLED(CONFIG_PPC64); +} + bool bpf_jit_supports_kfunc_call(void) { return true; @@ -604,7 +610,7 @@ static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_contex int func_frame_offset, int r4_off) { if (IS_ENABLED(CONFIG_PPC64)) { - /* See bpf_jit_stack_tailcallcnt() */ + /* See bpf_jit_stack_tailcallinfo_offset() */ int tailcallcnt_offset = 7 * 8; EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset)); @@ -619,7 +625,7 @@ static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_cont int func_frame_offset, int r4_off) { if (IS_ENABLED(CONFIG_PPC64)) { - /* See bpf_jit_stack_tailcallcnt() */ + /* See bpf_jit_stack_tailcallinfo_offset() */ int tailcallcnt_offset = 7 * 8; EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset)); diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 1fe37128c876..37c547b49da8 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -24,17 +24,19 @@ * Ensure the top half (upto local_tmp_var) stays consistent * with our redzone usage. * + * tail_call_info - stores tailcall count value in main program's + * frame, stores reference to tail_call_info of + * main's frame in sub-prog's frame. + * * [ prev sp ] <------------- * [ nv gpr save area ] 6*8 | - * [ tail_call_cnt ] 8 | + * [ tail_call_info ] 8 | * [ local_tmp_var ] 24 | * fp (r31) --> [ ebpf stack space ] upto 512 | * [ frame header ] 32/112 | * sp (r1) ---> [ stack pointer ] -------------- */ -/* for gpr non volatile registers BPG_REG_6 to 10 */ -#define BPF_PPC_STACK_SAVE (6*8) /* for bpf JIT code internal usage */ #define BPF_PPC_STACK_LOCALS 32 /* stack frame excluding BPF stack, ensure this is quadword aligned */ @@ -93,7 +95,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * [ ... ] | * sp (r1) ---> [ stack pointer ] -------------- * [ nv gpr save area ] 6*8 - * [ tail_call_cnt ] 8 + * [ tail_call_info ] 8 * [ local_tmp_var ] 24 * [ unused red zone ] 224 */ @@ -105,7 +107,7 @@ static int bpf_jit_stack_local(struct codegen_context *ctx) return -(BPF_PPC_STACK_SAVE + 32); } -static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) +int bpf_jit_stack_tailcallinfo_offset(struct codegen_context *ctx) { return bpf_jit_stack_local(ctx) + 24; } @@ -138,17 +140,31 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) #endif /* - * Initialize tail_call_cnt if we do tail calls. - * Otherwise, put in NOPs so that it can be skipped when we are - * invoked through a tail call. + * Tail call count(tcc) is saved & updated only in main + * program's frame and the address of tcc in main program's + * frame (tcc_ptr) is saved in subprogs frame. + * + * Offset of tail_call_info on any frame will be interpreted + * as either tcc_ptr or tcc value depending on whether it is + * greater than MAX_TAIL_CALL_CNT or not. */ - if (ctx->seen & SEEN_TAILCALL) { + if (!ctx->is_subprog) { EMIT(PPC_RAW_LI(bpf_to_ppc(TMP_REG_1), 0)); /* this goes in the redzone */ EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_STACK_SAVE + 8))); } else { - EMIT(PPC_RAW_NOP()); - EMIT(PPC_RAW_NOP()); + /* + * if tail_call_info < MAX_TAIL_CALL_CNT + * main prog calling first subprog -> copy reference + * else + * subsequent subprog calling another subprog -> directly copy content + */ + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_2), _R1, 0)); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), -(BPF_PPC_STACK_SAVE+8))); + EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT)); + PPC_COND_BRANCH(COND_GT, CTX_NIA(ctx) + 8); + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), -(BPF_PPC_STACK_SAVE + 8))); + EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_STACK_SAVE + 8))); } if (bpf_has_stack_frame(ctx)) { @@ -343,19 +359,38 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_CMPLW(b2p_index, bpf_to_ppc(TMP_REG_1))); PPC_BCC_SHORT(COND_GE, out); + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallinfo_offset(ctx))); + EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT)); + PPC_COND_BRANCH(COND_LE, CTX_NIA(ctx) + 8); + + /* dereference TMP_REG_1 */ + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 0)); + /* - * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) + * if (tail_call_info == MAX_TAIL_CALL_CNT) * goto out; */ - EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx))); EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT)); - PPC_BCC_SHORT(COND_GE, out); + PPC_COND_BRANCH(COND_EQ, out); /* - * tail_call_cnt++; + * tail_call_info++; <- Actual value of tcc here */ EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 1)); - EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx))); + + /* + * Before writing updated tail_call_info, distinguish if current frame + * is storing a reference to tail_call_info or actual tcc value in + * tail_call_info. + */ + EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_2), _R1, bpf_jit_stack_tailcallinfo_offset(ctx))); + EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_2), MAX_TAIL_CALL_CNT)); + PPC_COND_BRANCH(COND_GT, CTX_NIA(ctx) + 8); + + /* First get address of tail_call_info */ + EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_2), _R1, bpf_jit_stack_tailcallinfo_offset(ctx))); + /* Writeback updated value to tail_call_info */ + EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), 0)); /* prog = array->ptrs[index]; */ EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_1), b2p_index, 8)); -- 2.48.1

