Since bpf2bpf tailcall support is enabled for 64-bit powerpc with
kernel commit 2ed2d8f6fb38 ("powerpc64/bpf: Support tailcalls with
subprogs"), 'tailcalls/tailcall_bpf2bpf_hierarchy_fexit' BPF selftest
is triggering "corrupted stack end detected inside scheduler" with the
config option CONFIG_SCHED_STACK_END_CHECK enabled. While reviewing
the stack layout for BPF trampoline, observed that the dummy frame is
trying to protect the redzone of BPF program. This is because tail
call info and NVRs save area are in redzone at the time of tailcall
as the current BPF program stack frame is teared down before the
tailcall. But saving this redzone in the dummy frame of trampoline
is unnecessary because of the follow reasons:
1) Firstly, trampoline can be attached to BPF entry/main program
or subprog. Prologue part of the BPF entry/main program, where
the trampoline attachpoint is, is skipped during tailcall. So,
protecting the redzone does not arise when the trampoline is
not even triggered in this scenario.
2) In case of subprog, the caller's stackframe is already setup
and the subprog's stackframe is yet to be setup. So, nothing
on the redzone to be protected.
Also, using dummy frame in BPF trampoline, wastes critically scarce
kernel stack space, especially in tailcall sequence, for marginal
benefit in stack unwinding. So, drop setting up the dummy frame.
Instead, save return address in bpf trampoline frame and use it as
appropriate. Pruning this unnecessary stack usage mitigates the
likelihood of stack overflow in scenarios where bpf2bpf tailcalls
and fexit programs are mixed.
Reported-by: Saket Kumar Bhaskar <[email protected]>
Fixes: 2ed2d8f6fb38 ("powerpc64/bpf: Support tailcalls with subprogs")
Signed-off-by: Hari Bathini <[email protected]>
---
arch/powerpc/net/bpf_jit_comp.c | 84 ++++++++++++---------------------
1 file changed, 30 insertions(+), 54 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 1ff8030faf1f..35cea97a1bcd 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -638,15 +638,10 @@ static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image,
struct codegen_context
* for the traced function (BPF subprog/callee) to fetch it.
*/
static void bpf_trampoline_setup_tail_call_info(u32 *image, struct
codegen_context *ctx,
- int func_frame_offset,
- int bpf_dummy_frame_size, int
r4_off)
+ int bpf_frame_size, int r4_off)
{
if (IS_ENABLED(CONFIG_PPC64)) {
- /*
- * func_frame_offset = ...(1)
- * bpf_dummy_frame_size + trampoline_frame_size
- */
- EMIT(PPC_RAW_LD(_R4, _R1, func_frame_offset));
+ EMIT(PPC_RAW_LD(_R4, _R1, bpf_frame_size));
/* Refer to trampoline's Generated stack layout */
EMIT(PPC_RAW_LD(_R3, _R4, -BPF_PPC_TAILCALL));
@@ -657,21 +652,12 @@ static void bpf_trampoline_setup_tail_call_info(u32
*image, struct codegen_conte
EMIT(PPC_RAW_CMPLWI(_R3, MAX_TAIL_CALL_CNT));
PPC_BCC_CONST_SHORT(COND_GT, 8);
EMIT(PPC_RAW_ADDI(_R3, _R4, -BPF_PPC_TAILCALL));
+
/*
- * From ...(1) above:
- * trampoline_frame_bottom = ...(2)
- * func_frame_offset - bpf_dummy_frame_size
- *
- * Using ...(2) derived above:
- * trampoline_tail_call_info_offset = ...(3)
- * trampoline_frame_bottom - tailcallinfo_offset
- *
- * From ...(3):
* Use trampoline_tail_call_info_offset to write reference of
main's
* tail_call_info in trampoline frame.
*/
- EMIT(PPC_RAW_STL(_R3, _R1, (func_frame_offset -
bpf_dummy_frame_size)
- -
BPF_PPC_TAILCALL));
+ EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size - BPF_PPC_TAILCALL));
} else {
/* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
@@ -679,7 +665,7 @@ static void bpf_trampoline_setup_tail_call_info(u32 *image,
struct codegen_conte
}
static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct
codegen_context *ctx,
- int func_frame_offset, int
r4_off)
+ int bpf_frame_size, int r4_off)
{
if (IS_ENABLED(CONFIG_PPC32)) {
/*
@@ -690,12 +676,12 @@ static void bpf_trampoline_restore_tail_call_cnt(u32
*image, struct codegen_cont
}
}
-static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx,
int func_frame_offset,
- int nr_regs, int regs_off)
+static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx,
+ int bpf_frame_size, int nr_regs, int
regs_off)
{
int param_save_area_offset;
- param_save_area_offset = func_frame_offset; /* the two frames we
alloted */
+ param_save_area_offset = bpf_frame_size;
param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is
past frame header */
for (int i = 0; i < nr_regs; i++) {
@@ -718,11 +704,11 @@ static void bpf_trampoline_restore_args_regs(u32 *image,
struct codegen_context
/* Used when we call into the traced function. Replicate parameter save area */
static void bpf_trampoline_restore_args_stack(u32 *image, struct
codegen_context *ctx,
- int func_frame_offset, int
nr_regs, int regs_off)
+ int bpf_frame_size, int nr_regs,
int regs_off)
{
int param_save_area_offset;
- param_save_area_offset = func_frame_offset; /* the two frames we
alloted */
+ param_save_area_offset = bpf_frame_size;
param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is
past frame header */
for (int i = 8; i < nr_regs; i++) {
@@ -739,10 +725,10 @@ static int __arch_prepare_bpf_trampoline(struct
bpf_tramp_image *im, void *rw_im
void *func_addr)
{
int regs_off, nregs_off, ip_off, run_ctx_off, retval_off, nvr_off,
alt_lr_off, r4_off = 0;
- int i, ret, nr_regs, bpf_frame_size = 0, bpf_dummy_frame_size = 0,
func_frame_offset;
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+ int i, ret, nr_regs, retaddr_off, bpf_frame_size = 0;
struct codegen_context codegen_ctx, *ctx;
u32 *image = (u32 *)rw_image;
ppc_inst_t branch_insn;
@@ -768,16 +754,11 @@ static int __arch_prepare_bpf_trampoline(struct
bpf_tramp_image *im, void *rw_im
* Generated stack layout:
*
* func prev back chain [ back chain ]
- * [ ]
- * bpf prog redzone/tailcallcnt [ ... ] 64 bytes (64-bit
powerpc)
- * [ ] --
- * LR save area [ r0 save (64-bit) ] | header
- * [ r0 save (32-bit) ] |
- * dummy frame for unwind [ back chain 1 ] --
* [ tail_call_info ] optional - 64-bit
powerpc
* [ padding ] align stack frame
* r4_off [ r4 (tailcallcnt) ] optional - 32-bit
powerpc
* alt_lr_off [ real lr (ool stub)] optional - actual
lr
+ * retaddr_off [ return address ]
* [ r26 ]
* nvr_off [ r25 ] nvr save area
* retval_off [ return value ]
@@ -841,6 +822,10 @@ static int __arch_prepare_bpf_trampoline(struct
bpf_tramp_image *im, void *rw_im
nvr_off = bpf_frame_size;
bpf_frame_size += 2 * SZL;
+ /* Save area for return address */
+ retaddr_off = bpf_frame_size;
+ bpf_frame_size += SZL;
+
/* Optional save area for actual LR in case of ool ftrace */
if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
alt_lr_off = bpf_frame_size;
@@ -867,16 +852,8 @@ static int __arch_prepare_bpf_trampoline(struct
bpf_tramp_image *im, void *rw_im
/* Padding to align stack frame, if any */
bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
- /* Dummy frame size for proper unwind - includes 64-bytes red zone for
64-bit powerpc */
- bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64;
-
- /* Offset to the traced function's stack frame */
- func_frame_offset = bpf_dummy_frame_size + bpf_frame_size;
-
- /* Create dummy frame for unwind, store original return value */
+ /* Store original return value */
EMIT(PPC_RAW_STL(_R0, _R1, PPC_LR_STKOFF));
- /* Protect red zone where tail call count goes */
- EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_dummy_frame_size));
/* Create our stack frame */
EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_frame_size));
@@ -891,14 +868,14 @@ static int __arch_prepare_bpf_trampoline(struct
bpf_tramp_image *im, void *rw_im
if (IS_ENABLED(CONFIG_PPC32) && nr_regs < 2)
EMIT(PPC_RAW_STL(_R4, _R1, r4_off));
- bpf_trampoline_save_args(image, ctx, func_frame_offset, nr_regs,
regs_off);
+ bpf_trampoline_save_args(image, ctx, bpf_frame_size, nr_regs, regs_off);
/* Save our LR/return address */
EMIT(PPC_RAW_MFLR(_R3));
if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE))
EMIT(PPC_RAW_STL(_R3, _R1, alt_lr_off));
else
- EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF));
+ EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off));
/*
* Get IP address of the traced function.
@@ -920,9 +897,9 @@ static int __arch_prepare_bpf_trampoline(struct
bpf_tramp_image *im, void *rw_im
EMIT(PPC_RAW_STL(_R3, _R1, ip_off));
if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
- /* Fake our LR for unwind */
+ /* Fake our LR for BPF_TRAMP_F_CALL_ORIG case */
EMIT(PPC_RAW_ADDI(_R3, _R3, 4));
- EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF));
+ EMIT(PPC_RAW_STL(_R3, _R1, retaddr_off));
}
/* Save function arg count -- see bpf_get_func_arg_cnt() */
@@ -961,20 +938,19 @@ static int __arch_prepare_bpf_trampoline(struct
bpf_tramp_image *im, void *rw_im
/* Call the traced function */
if (flags & BPF_TRAMP_F_CALL_ORIG) {
/*
- * The address in LR save area points to the correct point in
the original function
+ * retaddr on trampoline stack points to the correct point in
the original function
* with both PPC_FTRACE_OUT_OF_LINE as well as with traditional
ftrace instruction
* sequence
*/
- EMIT(PPC_RAW_LL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF));
+ EMIT(PPC_RAW_LL(_R3, _R1, retaddr_off));
EMIT(PPC_RAW_MTCTR(_R3));
/* Replicate tail_call_cnt before calling the original BPF prog
*/
if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
- bpf_trampoline_setup_tail_call_info(image, ctx,
func_frame_offset,
-
bpf_dummy_frame_size, r4_off);
+ bpf_trampoline_setup_tail_call_info(image, ctx,
bpf_frame_size, r4_off);
/* Restore args */
- bpf_trampoline_restore_args_stack(image, ctx,
func_frame_offset, nr_regs, regs_off);
+ bpf_trampoline_restore_args_stack(image, ctx, bpf_frame_size,
nr_regs, regs_off);
/* Restore TOC for 64-bit */
if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) &&
!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
@@ -988,7 +964,7 @@ static int __arch_prepare_bpf_trampoline(struct
bpf_tramp_image *im, void *rw_im
/* Restore updated tail_call_cnt */
if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
- bpf_trampoline_restore_tail_call_cnt(image, ctx,
func_frame_offset, r4_off);
+ bpf_trampoline_restore_tail_call_cnt(image, ctx,
bpf_frame_size, r4_off);
/* Reserve space to patch branch instruction to skip fexit
progs */
if (ro_image) /* image is NULL for dummy pass */
@@ -1040,7 +1016,7 @@ static int __arch_prepare_bpf_trampoline(struct
bpf_tramp_image *im, void *rw_im
EMIT(PPC_RAW_LD(_R2, _R1, 24));
if (flags & BPF_TRAMP_F_SKIP_FRAME) {
/* Skip the traced function and return to parent */
- EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset));
+ EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size));
EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF));
EMIT(PPC_RAW_MTLR(_R0));
EMIT(PPC_RAW_BLR());
@@ -1048,13 +1024,13 @@ static int __arch_prepare_bpf_trampoline(struct
bpf_tramp_image *im, void *rw_im
if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
EMIT(PPC_RAW_LL(_R0, _R1, alt_lr_off));
EMIT(PPC_RAW_MTLR(_R0));
- EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset));
+ EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size));
EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF));
EMIT(PPC_RAW_BLR());
} else {
- EMIT(PPC_RAW_LL(_R0, _R1, bpf_frame_size +
PPC_LR_STKOFF));
+ EMIT(PPC_RAW_LL(_R0, _R1, retaddr_off));
EMIT(PPC_RAW_MTCTR(_R0));
- EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset));
+ EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_frame_size));
EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF));
EMIT(PPC_RAW_MTLR(_R0));
EMIT(PPC_RAW_BCTR());
--
2.53.0