Since bpf2bpf tailcall support is enabled for 64-bit powerpc with
kernel commit 2ed2d8f6fb38 ("powerpc64/bpf: Support tailcalls with
subprogs"), 'tailcalls/tailcall_bpf2bpf_hierarchy_fexit' BPF selftest
is triggering "corrupted stack end detected inside scheduler" with the
config option CONFIG_SCHED_STACK_END_CHECK enabled. While reviewing
the stack layout for BPF trampoline, observed that the dummy frame is
trying to protect the redzone of BPF program. This is because tail
call info and NVRs save area are in redzone at the time of tailcall
as the current BPF program stack frame is teared down before the
tailcall. But saving this redzone in the dummy frame of trampoline
is unnecessary because of the follow reasons:

  1) Firstly, trampoline can be attached to BPF entry/main program
     or subprog. Prologue part of the BPF entry/main program, where
     the trampoline attachpoint is, is skipped during tailcall. So,
     protecting the redzone does not arise when the trampoline is
     not even triggered in this scenario.
  2) In case of subprog, the caller's stackframe is already setup
     and the subprog's stackframe is yet to be setup. So, nothing
     on the redzone to be protected.

Also, using dummy frame in BPF trampoline, wastes critically scarce
kernel stack space, especially in tailcall sequence, for marginal
benefit in stack unwinding. So, drop setting up the dummy frame.
Instead, save return address in bpf trampoline frame and use it as
appropriate. Pruning this unnecessary stack usage mitigates the
likelihood of stack overflow in scenarios where bpf2bpf tailcalls
and fexit programs are mixed.

Reported-by: Saket Kumar Bhaskar <[email protected]>
Fixes: 2ed2d8f6fb38 ("powerpc64/bpf: Support tailcalls with subprogs")
Signed-off-by: Hari Bathini <[email protected]>
---
 arch/powerpc/net/bpf_jit_comp.c | 84 ++++++++++++---------------------
 1 file changed, 30 insertions(+), 54 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 1ff8030faf1f..35cea97a1bcd 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -638,15 +638,10 @@ static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, 
struct codegen_context
  * for the traced function (BPF subprog/callee) to fetch it.
  */
 static void bpf_trampoline_setup_tail_call_info(u32 *image, struct 
codegen_context *ctx,
-                                               int func_frame_offset,
-                                               int bpf_dummy_frame_size, int 
r4_off)
+                                               int bpf_frame_size, int r4_off)
 {
        if (IS_ENABLED(CONFIG_PPC64)) {
-               /*
-                * func_frame_offset =                                   ...(1)
-                *      bpf_dummy_frame_size + trampoline_frame_size
-                */
-               EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset));
+               EMIT(PPC_RAW_LD(_R4, _R1, bpf_frame_size));
                /* Refer to trampoline's Generated stack layout */
                EMIT(PPC_RAW_LD(_R3, _R4, -BPF_PPC_TAILCALL));
 
@@ -657,21 +652,12 @@ static void bpf_trampoline_setup_tail_call_info(u32 
*image, struct codegen_conte
                EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT));
                PPC_BCC_CONST_SHORT(COND_GT, 8);
                EMIT(PPC_RAW_ADDI(_R3, _R4, -BPF_PPC_TAILCALL));
+
                /*
-                * From ...(1) above:
-                * trampoline_frame_bottom =                            ...(2)
-                *      func_frame_offset - bpf_dummy_frame_size
-                *
-                * Using ...(2) derived above:
-                * trampoline_tail_call_info_offset =                  ...(3)
-                *      trampoline_frame_bottom - tailcallinfo_offset
-                *
-                * From ...(3):
                 * Use trampoline_tail_call_info_offset to write reference of 
main's
                 * tail_call_info in trampoline frame.
                 */
-               EMIT(PPC_RAW_STL(_R3, _R1, (func_frame_offset - 
bpf_dummy_frame_size)
-                                                               - 
BPF_PPC_TAILCALL));
+               EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size - BPF_PPC_TAILCALL));
        } else {
                /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
                EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
@@ -679,7 +665,7 @@ static void bpf_trampoline_setup_tail_call_info(u32 *image, 
struct codegen_conte
 }
 
 static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct 
codegen_context *ctx,
-                                                int func_frame_offset, int 
r4_off)
+                                                int bpf_frame_size, int r4_off)
 {
        if (IS_ENABLED(CONFIG_PPC32)) {
                /*
@@ -690,12 +676,12 @@ static void bpf_trampoline_restore_tail_call_cnt(u32 
*image, struct codegen_cont
        }
 }
 
-static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, 
int func_frame_offset,
-                                    int nr_regs, int regs_off)
+static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx,
+                                    int bpf_frame_size, int nr_regs, int 
regs_off)
 {
        int param_save_area_offset;
 
-       param_save_area_offset = func_frame_offset; /* the two frames we 
alloted */
+       param_save_area_offset = bpf_frame_size;
        param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is 
past frame header */
 
        for (int i = 0; i < nr_regs; i++) {
@@ -718,11 +704,11 @@ static void bpf_trampoline_restore_args_regs(u32 *image, 
struct codegen_context
 
 /* Used when we call into the traced function. Replicate parameter save area */
 static void bpf_trampoline_restore_args_stack(u32 *image, struct 
codegen_context *ctx,
-                                             int func_frame_offset, int 
nr_regs, int regs_off)
+                                             int bpf_frame_size, int nr_regs, 
int regs_off)
 {
        int param_save_area_offset;
 
-       param_save_area_offset = func_frame_offset; /* the two frames we 
alloted */
+       param_save_area_offset = bpf_frame_size;
        param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is 
past frame header */
 
        for (int i = 8; i < nr_regs; i++) {
@@ -739,10 +725,10 @@ static int __arch_prepare_bpf_trampoline(struct 
bpf_tramp_image *im, void *rw_im
                                         void *func_addr)
 {
        int regs_off, nregs_off, ip_off, run_ctx_off, retval_off, nvr_off, 
alt_lr_off, r4_off = 0;
-       int i, ret, nr_regs, bpf_frame_size = 0, bpf_dummy_frame_size = 0, 
func_frame_offset;
        struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
        struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
        struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+       int i, ret, nr_regs, retaddr_off, bpf_frame_size = 0;
        struct codegen_context codegen_ctx, *ctx;
        u32 *image = (u32 *)rw_image;
        ppc_inst_t branch_insn;
@@ -768,16 +754,11 @@ static int __arch_prepare_bpf_trampoline(struct 
bpf_tramp_image *im, void *rw_im
         * Generated stack layout:
         *
         * func prev back chain         [ back chain        ]
-        *                              [                   ]
-        * bpf prog redzone/tailcallcnt [ ...               ] 64 bytes (64-bit 
powerpc)
-        *                              [                   ] --
-        * LR save area                 [ r0 save (64-bit)  ]   | header
-        *                              [ r0 save (32-bit)  ]   |
-        * dummy frame for unwind       [ back chain 1      ] --
         *                              [ tail_call_info    ] optional - 64-bit 
powerpc
         *                              [ padding           ] align stack frame
         *       r4_off                 [ r4 (tailcallcnt)  ] optional - 32-bit 
powerpc
         *       alt_lr_off             [ real lr (ool stub)] optional - actual 
lr
+        *       retaddr_off            [ return address    ]
         *                              [ r26               ]
         *       nvr_off                [ r25               ] nvr save area
         *       retval_off             [ return value      ]
@@ -841,6 +822,10 @@ static int __arch_prepare_bpf_trampoline(struct 
bpf_tramp_image *im, void *rw_im
        nvr_off = bpf_frame_size;
        bpf_frame_size += 2 * SZL;
 
+       /* Save area for return address */
+       retaddr_off = bpf_frame_size;
+       bpf_frame_size += SZL;
+
        /* Optional save area for actual LR in case of ool ftrace */
        if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
                alt_lr_off = bpf_frame_size;
@@ -867,16 +852,8 @@ static int __arch_prepare_bpf_trampoline(struct 
bpf_tramp_image *im, void *rw_im
        /* Padding to align stack frame, if any */
        bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
 
-       /* Dummy frame size for proper unwind - includes 64-bytes red zone for 
64-bit powerpc */
-       bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64;
-
-       /* Offset to the traced function's stack frame */
-       func_frame_offset = bpf_dummy_frame_size + bpf_frame_size;
-
-       /* Create dummy frame for unwind, store original return value */
+       /*  Store original return value */
        EMIT(PPC_RAW_STL(_R0, _R1, PPC_LR_STKOFF));
-       /* Protect red zone where tail call count goes */
-       EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_dummy_frame_size));
 
        /* Create our stack frame */
        EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_frame_size));
@@ -891,14 +868,14 @@ static int __arch_prepare_bpf_trampoline(struct 
bpf_tramp_image *im, void *rw_im
        if (IS_ENABLED(CONFIG_PPC32) && nr_regs < 2)
                EMIT(PPC_RAW_STL(_R4, _R1, r4_off));
 
-       bpf_trampoline_save_args(image, ctx, func_frame_offset, nr_regs, 
regs_off);
+       bpf_trampoline_save_args(image, ctx, bpf_frame_size, nr_regs, regs_off);
 
        /* Save our LR/return address */
        EMIT(PPC_RAW_MFLR(_R3));
        if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE))
                EMIT(PPC_RAW_STL(_R3, _R1, alt_lr_off));
        else
-               EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF));
+               EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off));
 
        /*
         * Get IP address of the traced function.
@@ -920,9 +897,9 @@ static int __arch_prepare_bpf_trampoline(struct 
bpf_tramp_image *im, void *rw_im
                EMIT(PPC_RAW_STL(_R3, _R1, ip_off));
 
        if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
-               /* Fake our LR for unwind */
+               /* Fake our LR for BPF_TRAMP_F_CALL_ORIG case */
                EMIT(PPC_RAW_ADDI(_R3, _R3, 4));
-               EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF));
+               EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off));
        }
 
        /* Save function arg count -- see bpf_get_func_arg_cnt() */
@@ -961,20 +938,19 @@ static int __arch_prepare_bpf_trampoline(struct 
bpf_tramp_image *im, void *rw_im
        /* Call the traced function */
        if (flags & BPF_TRAMP_F_CALL_ORIG) {
                /*
-                * The address in LR save area points to the correct point in 
the original function
+                * retaddr on trampoline stack points to the correct point in 
the original function
                 * with both PPC_FTRACE_OUT_OF_LINE as well as with traditional 
ftrace instruction
                 * sequence
                 */
-               EMIT(PPC_RAW_LL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF));
+               EMIT(PPC_RAW_LL(_R3, _R1, retaddr_off));
                EMIT(PPC_RAW_MTCTR(_R3));
 
                /* Replicate tail_call_cnt before calling the original BPF prog 
*/
                if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
-                       bpf_trampoline_setup_tail_call_info(image, ctx, 
func_frame_offset,
-                                                               
bpf_dummy_frame_size, r4_off);
+                       bpf_trampoline_setup_tail_call_info(image, ctx, 
bpf_frame_size, r4_off);
 
                /* Restore args */
-               bpf_trampoline_restore_args_stack(image, ctx, 
func_frame_offset, nr_regs, regs_off);
+               bpf_trampoline_restore_args_stack(image, ctx, bpf_frame_size, 
nr_regs, regs_off);
 
                /* Restore TOC for 64-bit */
                if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && 
!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
@@ -988,7 +964,7 @@ static int __arch_prepare_bpf_trampoline(struct 
bpf_tramp_image *im, void *rw_im
 
                /* Restore updated tail_call_cnt */
                if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
-                       bpf_trampoline_restore_tail_call_cnt(image, ctx, 
func_frame_offset, r4_off);
+                       bpf_trampoline_restore_tail_call_cnt(image, ctx, 
bpf_frame_size, r4_off);
 
                /* Reserve space to patch branch instruction to skip fexit 
progs */
                if (ro_image) /* image is NULL for dummy pass */
@@ -1040,7 +1016,7 @@ static int __arch_prepare_bpf_trampoline(struct 
bpf_tramp_image *im, void *rw_im
                EMIT(PPC_RAW_LD(_R2, _R1, 24));
        if (flags & BPF_TRAMP_F_SKIP_FRAME) {
                /* Skip the traced function and return to parent */
-               EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset));
+               EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size));
                EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF));
                EMIT(PPC_RAW_MTLR(_R0));
                EMIT(PPC_RAW_BLR());
@@ -1048,13 +1024,13 @@ static int __arch_prepare_bpf_trampoline(struct 
bpf_tramp_image *im, void *rw_im
                if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
                        EMIT(PPC_RAW_LL(_R0, _R1, alt_lr_off));
                        EMIT(PPC_RAW_MTLR(_R0));
-                       EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset));
+                       EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size));
                        EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF));
                        EMIT(PPC_RAW_BLR());
                } else {
-                       EMIT(PPC_RAW_LL(_R0, _R1, bpf_frame_size + 
PPC_LR_STKOFF));
+                       EMIT(PPC_RAW_LL(_R0, _R1, retaddr_off));
                        EMIT(PPC_RAW_MTCTR(_R0));
-                       EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset));
+                       EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size));
                        EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF));
                        EMIT(PPC_RAW_MTLR(_R0));
                        EMIT(PPC_RAW_BCTR());
-- 
2.53.0


Reply via email to