[RFC PATCH v3 2/2] bpf: Introduce function for outputing data to perf event

He Kuang Tue, 07 Jul 2015 04:44:50 -0700

There're scenarios that we need an eBPF program to record not only
kprobe point args, but also the PMU counters, time latencies or cache
miss numbers between two probe points and other information we can
get when the probe point is entered.


This helper function gives eBPF program ability to output data as perf
sample event. The function works as kprobe_perf_func(), it packets the
data from bpf stack space into a sample record and submits it to the
ring-buffer of perf_events which are binded to BPF ftrace
entry. Userspace perf tools can record BPF ftrace event to collect
those records.

Signed-off-by: He Kuang <[email protected]>
---
 include/uapi/linux/bpf.h  |  3 +++
 kernel/trace/bpf_trace.c  | 43 +++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace.h      |  5 +++++
 samples/bpf/bpf_helpers.h |  2 ++
 4 files changed, 53 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a9ebdf5..f44b0aa 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -210,6 +210,9 @@ enum bpf_func_id {
         * Return: 0 on success
         */
        BPF_FUNC_l4_csum_replace,
+
+       /* int bpf_output_data(void *src, int size, void *regs) */
+       BPF_FUNC_output_data,
        __BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 2d56ce5..45dbeab 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -79,6 +79,47 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
        .arg3_type      = ARG_ANYTHING,
 };
 
+static u64 bpf_output_data(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+       void *src = (void *) (long) r1;
+       int dsize = (int) r2, __size, size;
+       void *regs = (void *) (long) r3;
+       struct bpf_trace_entry_head *entry;
+       struct hlist_head *head;
+       int rctx;
+
+       if (dsize > TRACE_BPF_MAX_SIZE)
+               return -ENOMEM;
+
+       head = this_cpu_ptr(event_bpf.perf_events);
+       if (hlist_empty(head))
+               return -ENOENT;
+
+       __size = sizeof(*entry) + dsize;
+       size = ALIGN(__size + sizeof(u32), sizeof(u64));
+       size -= sizeof(u32);
+
+       entry = perf_trace_buf_prepare(size, TRACE_BPF, NULL, &rctx);
+       if (!entry)
+               return -ENOMEM;
+
+       entry->size = dsize;
+       memcpy(&entry[1], src, dsize);
+
+       perf_tp_event(0, 1, entry, size, regs, head, rctx, NULL);
+
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_output_data_proto = {
+       .func           = bpf_output_data,
+       .gpl_only       = true,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_STACK,
+       .arg2_type      = ARG_CONST_STACK_SIZE,
+       .arg3_type      = ARG_PTR_TO_CTX,
+};
+
 static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 {
        /* NMI safe access to clock monotonic */
@@ -170,6 +211,8 @@ static const struct bpf_func_proto 
*kprobe_prog_func_proto(enum bpf_func_id func
                return &bpf_map_delete_elem_proto;
        case BPF_FUNC_probe_read:
                return &bpf_probe_read_proto;
+       case BPF_FUNC_output_data:
+               return &bpf_output_data_proto;
        case BPF_FUNC_ktime_get_ns:
                return &bpf_ktime_get_ns_proto;
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d135f55..8d9100d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -113,6 +113,11 @@ struct kretprobe_trace_entry_head {
        unsigned long           ret_ip;
 };
 
+struct bpf_trace_entry_head {
+       struct trace_entry      ent;
+       unsigned long           size;
+};
+
 /*
  * trace_flag_type is an enumeration that holds different
  * states when a trace occurs. These are:
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index f960b5f..bc7f13c 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -49,5 +49,7 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int 
from, int to, int flag
        (void *) BPF_FUNC_l3_csum_replace;
 static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int 
flags) =
        (void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_output_data)(void *src, int size, void *regs) =
+       (void *) BPF_FUNC_output_data;
 
 #endif
-- 
1.8.5.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[RFC PATCH v3 2/2] bpf: Introduce function for outputing data to perf event

Reply via email to