There're scenarios that we need an eBPF program to record not only kprobe point args, but also the PMU counters, time latencies or cache miss numbers between two probe points and other information we can get when the probe point is entered.
This helper function gives eBPF program ability to output data as perf sample event. The function works as kprobe_perf_func(), it packets the data from bpf stack space into a sample record and submits it to the ring-buffer of perf_events which are binded to BPF ftrace entry. Userspace perf tools can record BPF ftrace event to collect those records. Signed-off-by: He Kuang <[email protected]> --- include/uapi/linux/bpf.h | 3 +++ kernel/trace/bpf_trace.c | 43 +++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 5 +++++ samples/bpf/bpf_helpers.h | 2 ++ 4 files changed, 53 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a9ebdf5..f44b0aa 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -210,6 +210,9 @@ enum bpf_func_id { * Return: 0 on success */ BPF_FUNC_l4_csum_replace, + + /* int bpf_output_data(void *src, int size, void *regs) */ + BPF_FUNC_output_data, __BPF_FUNC_MAX_ID, }; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 2d56ce5..45dbeab 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -79,6 +79,47 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .arg3_type = ARG_ANYTHING, }; +static u64 bpf_output_data(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + void *src = (void *) (long) r1; + int dsize = (int) r2, __size, size; + void *regs = (void *) (long) r3; + struct bpf_trace_entry_head *entry; + struct hlist_head *head; + int rctx; + + if (dsize > TRACE_BPF_MAX_SIZE) + return -ENOMEM; + + head = this_cpu_ptr(event_bpf.perf_events); + if (hlist_empty(head)) + return -ENOENT; + + __size = sizeof(*entry) + dsize; + size = ALIGN(__size + sizeof(u32), sizeof(u64)); + size -= sizeof(u32); + + entry = perf_trace_buf_prepare(size, TRACE_BPF, NULL, &rctx); + if (!entry) + return -ENOMEM; + + entry->size = dsize; + memcpy(&entry[1], src, dsize); + + perf_tp_event(0, 1, entry, size, regs, head, rctx, NULL); + + return 0; +} + +static const struct bpf_func_proto bpf_output_data_proto = { + .func = bpf_output_data, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_STACK, + .arg2_type = ARG_CONST_STACK_SIZE, + .arg3_type = ARG_PTR_TO_CTX, +}; + static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { /* NMI safe access to clock monotonic */ @@ -170,6 +211,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_map_delete_elem_proto; case BPF_FUNC_probe_read: return &bpf_probe_read_proto; + case BPF_FUNC_output_data: + return &bpf_output_data_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d135f55..8d9100d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -113,6 +113,11 @@ struct kretprobe_trace_entry_head { unsigned long ret_ip; }; +struct bpf_trace_entry_head { + struct trace_entry ent; + unsigned long size; +}; + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index f960b5f..bc7f13c 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -49,5 +49,7 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag (void *) BPF_FUNC_l3_csum_replace; static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = (void *) BPF_FUNC_l4_csum_replace; +static int (*bpf_output_data)(void *src, int size, void *regs) = + (void *) BPF_FUNC_output_data; #endif -- 1.8.5.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/

