> diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
> b/arch/powerpc/net/bpf_jit_comp64.c
> index db121b1404fe..17de8b53a962 100644
> --- a/arch/powerpc/net/bpf_jit_comp64.c
> +++ b/arch/powerpc/net/bpf_jit_comp64.c

[ ... ]

> +/*
> + * For exception boundary & exception_cb progs:
> + *     return increased size to accommodate additional NVRs.
> + */
> +static int bpf_jit_stack_size(struct codegen_context *ctx)
> +{
> +     return ctx->exception_boundary || ctx->exception_cb ?
> +                                     BPF_PPC_EXC_STACKFRAME :
> +                                     BPF_PPC_STACKFRAME;
> +}

[ ... ]

> @@ -198,7 +249,19 @@ void bpf_jit_build_prologue(u32 *image, struct 
> codegen_context *ctx)
>                       EMIT(PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF));
>               }
>
> -             EMIT(PPC_RAW_STDU(_R1, _R1, -(BPF_PPC_STACKFRAME + 
> ctx->stack_size)));
> +             EMIT(PPC_RAW_STDU(_R1, _R1,
> +                             -(bpf_jit_stack_size(ctx) + ctx->stack_size)));
> +     }

[ ... ]

> @@ -248,9 +311,19 @@ static void bpf_jit_emit_common_epilogue(u32 *image, 
> struct codegen_context *ctx)
>               EMIT(PPC_RAW_LD(bpf_to_ppc(ARENA_VM_START), _R1,
>                               bpf_jit_stack_offsetof(ctx, 
> bpf_to_ppc(ARENA_VM_START))));
>
> +     if (ctx->exception_cb) {
> +             /*
> +              * Recover additionally saved non volatile registers from stack
> +              * frame of exception boundary program.
> +              */
> +             for (i = _R14; i <= _R25; i++)
> +                     EMIT(PPC_RAW_LD(i, _R1, bpf_jit_stack_offsetof(ctx, 
> i)));
> +     }
> +
>       /* Tear down our stack frame */
>       if (bpf_has_stack_frame(ctx)) {
> -             EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME + 
> ctx->stack_size));
> +             EMIT(PPC_RAW_ADDI(_R1, _R1, bpf_jit_stack_size(ctx) + 
> ctx->stack_size));

For the exception callback case, doesn't this use the wrong stack_size?

In bpf_jit_build_prologue(), the exception callback does not create its own
frame - it sets R1 to the boundary's frame pointer via EMIT(PPC_RAW_MR(_R1,
_R5)). However here in the epilogue, the frame teardown uses ctx->stack_size
which is the callback's stack depth, not the boundary's.

The boundary program creates its frame with:
    STDU R1, R1, -(bpf_jit_stack_size(boundary_ctx) + boundary_stack_size)

But the callback tears it down with:
    ADDI R1, R1, bpf_jit_stack_size(callback_ctx) + callback_stack_size

Since each BPF subprogram can have a different stack_depth (set per-subprog
in kernel/bpf/verifier.c via func[i]->aux->stack_depth), if the exception
callback has a different stack depth than the boundary, R1 will end up at
the wrong location after this ADDI.

After the wrong R1 adjustment, the LR restore below loads from R1+16 which
is now pointing to the wrong memory location, potentially causing a return
to an invalid address.

Would it be correct to store the boundary's stack_size somewhere accessible
to the callback, or ensure the callback uses the same stack layout as the
boundary?

> +
>               if (ctx->seen & SEEN_FUNC || ctx->exception_cb) {
>                       EMIT(PPC_RAW_LD(_R0, _R1, PPC_LR_STKOFF));
>                       EMIT(PPC_RAW_MTLR(_R0));


---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

CI run summary: https://github.com/kernel-patches/bpf/actions/runs/21311936056

Reply via email to