On Wed, Oct 14, 2015 at 12:41:27PM +0000, Wang Nan wrote:
> From: He Kuang <[email protected]>
> 
> This patch generates prologue for a BPF program which fetch arguments
> for it. With this patch, the program can have arguments as follow:
> 
>  SEC("lock_page=__lock_page page->flags")
>  int lock_page(struct pt_regs *ctx, int err, unsigned long flags)
>  {
>        return 1;
>  }
> 
> This patch passes at most 3 arguments from r3, r4 and r5. r1 is still
> the ctx pointer. r2 is used to indicate the successfulness of
> dereferencing.
> 
> This patch uses r6 to hold ctx (struct pt_regs) and r7 to hold stack
> pointer for result. Result of each arguments first store on stack:
> 
>  low address
>  BPF_REG_FP - 24  ARG3
>  BPF_REG_FP - 16  ARG2
>  BPF_REG_FP - 8   ARG1
>  BPF_REG_FP
>  high address
> 
> Then loaded into r3, r4 and r5.
> 
> The output prologue for offn(...off2(off1(reg)))) should be:
> 
>      r6 <- r1                 // save ctx into a callee saved register
>      r7 <- fp
>      r7 <- r7 - stack_offset  // pointer to result slot
>      /* load r3 with the offset in pt_regs of 'reg' */
>      (r7) <- r3                       // make slot valid
>      r3 <- r3 + off1          // prepare to read unsafe pointer
>      r2 <- 8
>      r1 <- r7                 // result put onto stack
>      call probe_read          // read unsafe pointer
>      jnei r0, 0, err          // error checking
>      r3 <- (r7)                       // read result
>      r3 <- r3 + off2          // prepare to read unsafe pointer
>      r2 <- 8
>      r1 <- r7
>      call probe_read
>      jnei r0, 0, err
>      ...
>      /* load r2, r3, r4 from stack */
>      goto success
> err:
>      r2 <- 1
>      /* load r3, r4, r5 with 0 */
>      goto usercode
> success:
>      r2 <- 0
> usercode:
>      r1 <- r6 // restore ctx
>      // original user code
> 
> If all of arguments reside in register (dereferencing is not
> required), gen_prologue_fastpath() will be used to create
> fast prologue:
> 
>      r3 <- (r1 + offset of reg1)
>      r4 <- (r1 + offset of reg2)
>      r5 <- (r1 + offset of reg3)
>      r2 <- 0
> 
> P.S.
> 
> eBPF calling convention is defined as:
> 
> * r0          - return value from in-kernel function, and exit value
>                   for eBPF program
> * r1 - r5     - arguments from eBPF program to in-kernel function
> * r6 - r9     - callee saved registers that in-kernel function will
>                   preserve
> * r10         - read-only frame pointer to access stack
> 
> Signed-off-by: He Kuang <[email protected]>
> Signed-off-by: Wang Nan <[email protected]>
> Cc: Alexei Starovoitov <[email protected]>
> Cc: Brendan Gregg <[email protected]>
> Cc: Daniel Borkmann <[email protected]>
> Cc: David Ahern <[email protected]>
> Cc: He Kuang <[email protected]>
> Cc: Jiri Olsa <[email protected]>
> Cc: Kaixu Xia <[email protected]>
> Cc: Masami Hiramatsu <[email protected]>
> Cc: Namhyung Kim <[email protected]>
> Cc: Paul Mackerras <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Zefan Li <[email protected]>
> Cc: [email protected]
> Cc: Arnaldo Carvalho de Melo <[email protected]>
> Link: http://lkml.kernel.org/n/[email protected]
> ---

[SNIP]
> +int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
> +                   struct bpf_insn *new_prog, size_t *new_cnt,
> +                   size_t cnt_space)
> +{
> +     struct bpf_insn *success_code = NULL;
> +     struct bpf_insn *error_code = NULL;
> +     struct bpf_insn *user_code = NULL;
> +     struct bpf_insn_pos pos;
> +     bool fastpath = true;
> +     int i;
> +
> +     if (!new_prog || !new_cnt)
> +             return -EINVAL;
> +
> +     pos.begin = new_prog;
> +     pos.end = new_prog + cnt_space;
> +     pos.pos = new_prog;
> +
> +     if (!nargs) {
> +             ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0),
> +                 &pos);
> +
> +             if (check_pos(&pos))
> +                     goto errout;
> +
> +             *new_cnt = pos_get_cnt(&pos);
> +             return 0;
> +     }
> +
> +     if (nargs > BPF_PROLOGUE_MAX_ARGS)
> +             nargs = BPF_PROLOGUE_MAX_ARGS;

Wouldn't it be better to inform user if it ignored some arguments?

Thanks,
Namhyung


> +     if (cnt_space > BPF_MAXINSNS)
> +             cnt_space = BPF_MAXINSNS;
> +
> +     /* First pass: validation */
> +     for (i = 0; i < nargs; i++) {
> +             struct probe_trace_arg_ref *ref = args[i].ref;
> +
> +             if (args[i].value[0] == '@') {
> +                     /* TODO: fetch global variable */
> +                     pr_err("bpf: prologue: global %s%+ld not support\n",
> +                             args[i].value, ref ? ref->offset : 0);
> +                     return -ENOTSUP;
> +             }
> +
> +             while (ref) {
> +                     /* fastpath is true if all args has ref == NULL */
> +                     fastpath = false;
> +
> +                     /*
> +                      * Instruction encodes immediate value using
> +                      * s32, ref->offset is long. On systems which
> +                      * can't fill long in s32, refuse to process if
> +                      * ref->offset too large (or small).
> +                      */
> +#ifdef __LP64__
> +#define OFFSET_MAX   ((1LL << 31) - 1)
> +#define OFFSET_MIN   ((1LL << 31) * -1)
> +                     if (ref->offset > OFFSET_MAX ||
> +                                     ref->offset < OFFSET_MIN) {
> +                             pr_err("bpf: prologue: offset out of bound: 
> %ld\n",
> +                                    ref->offset);
> +                             return -E2BIG;
> +                     }
> +#endif
> +                     ref = ref->next;
> +             }
> +     }
> +     pr_debug("prologue: pass validation\n");
> +
> +     if (fastpath) {
> +             /* If all variables are registers... */
> +             pr_debug("prologue: fast path\n");
> +             if (gen_prologue_fastpath(&pos, args, nargs))
> +                     goto errout;
> +     } else {
> +             pr_debug("prologue: slow path\n");
> +
> +             /* Initialization: move ctx to a callee saved register. */
> +             ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos);
> +
> +             if (gen_prologue_slowpath(&pos, args, nargs))
> +                     goto errout;
> +             /*
> +              * start of ERROR_CODE (only slow pass needs error code)
> +              *   mov r2 <- 1
> +              *   goto usercode
> +              */
> +             error_code = pos.pos;
> +             ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1),
> +                 &pos);
> +
> +             for (i = 0; i < nargs; i++)
> +                     ins(BPF_ALU64_IMM(BPF_MOV,
> +                                       BPF_PROLOGUE_START_ARG_REG + i,
> +                                       0),
> +                         &pos);
> +             ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE),
> +                             &pos);
> +     }
> +
> +     /*
> +      * start of SUCCESS_CODE:
> +      *   mov r2 <- 0
> +      *   goto usercode  // skip
> +      */
> +     success_code = pos.pos;
> +     ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos);
> +
> +     /*
> +      * start of USER_CODE:
> +      *   Restore ctx to r1
> +      */
> +     user_code = pos.pos;
> +     if (!fastpath) {
> +             /*
> +              * Only slow path needs restoring of ctx. In fast path,
> +              * register are loaded directly from r1.
> +              */
> +             ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos);
> +             if (prologue_relocate(&pos, error_code, success_code,
> +                                   user_code))
> +                     goto errout;
> +     }
> +
> +     if (check_pos(&pos))
> +             goto errout;
> +
> +     *new_cnt = pos_get_cnt(&pos);
> +     return 0;
> +errout:
> +     return -ERANGE;
> +}
> diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h
> new file mode 100644
> index 0000000..f1e4c5d
> --- /dev/null
> +++ b/tools/perf/util/bpf-prologue.h
> @@ -0,0 +1,34 @@
> +/*
> + * Copyright (C) 2015, He Kuang <[email protected]>
> + * Copyright (C) 2015, Huawei Inc.
> + */
> +#ifndef __BPF_PROLOGUE_H
> +#define __BPF_PROLOGUE_H
> +
> +#include <linux/compiler.h>
> +#include <linux/filter.h>
> +#include "probe-event.h"
> +
> +#define BPF_PROLOGUE_MAX_ARGS 3
> +#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3
> +#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2
> +
> +#ifdef HAVE_BPF_PROLOGUE
> +int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
> +                   struct bpf_insn *new_prog, size_t *new_cnt,
> +                   size_t cnt_space);
> +#else
> +static inline int
> +bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused,
> +               int nargs __maybe_unused,
> +               struct bpf_insn *new_prog __maybe_unused,
> +               size_t *new_cnt,
> +               size_t cnt_space __maybe_unused)
> +{
> +     if (!new_cnt)
> +             return -EINVAL;
> +     *new_cnt = 0;
> +     return 0;
> +}
> +#endif
> +#endif /* __BPF_PROLOGUE_H */
> -- 
> 1.8.3.4
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to