[PATCH v5 tip 2/7] tracing: attach BPF programs to kprobes
User interface: struct perf_event_attr attr = {.type = PERF_TYPE_TRACEPOINT, .config = event_id, ...}; event_fd = perf_event_open(,...); ioctl(event_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); prog_fd is a file descriptor associated with BPF program previously loaded. event_id is an ID of created kprobe close(event_fd) - automatically detaches BPF program from it BPF programs can call in-kernel helper functions to: - lookup/update/delete elements in maps - probe_read - wraper of probe_kernel_read() used to access any kernel data structures BPF programs receive 'struct pt_regs *' as an input ('struct pt_regs' is architecture dependent) Note, kprobes are _not_ a stable kernel ABI, so bpf programs attached to kprobes must be recompiled for every kernel version and user must supply correct LINUX_VERSION_CODE in attr.kern_version during bpf_prog_load() call. Signed-off-by: Alexei Starovoitov --- include/linux/ftrace_event.h| 14 ++ include/uapi/linux/bpf.h|3 ++ include/uapi/linux/perf_event.h |1 + kernel/bpf/syscall.c|7 ++- kernel/events/core.c| 59 +++ kernel/trace/Makefile |1 + kernel/trace/bpf_trace.c| 99 +++ kernel/trace/trace_kprobe.c | 10 +++- 8 files changed, 192 insertions(+), 2 deletions(-) create mode 100644 kernel/trace/bpf_trace.c diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index c674ee8f7fca..0aa535bc9f05 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -13,6 +13,7 @@ struct trace_array; struct trace_buffer; struct tracer; struct dentry; +struct bpf_prog; struct trace_print_flags { unsigned long mask; @@ -252,6 +253,7 @@ enum { TRACE_EVENT_FL_WAS_ENABLED_BIT, TRACE_EVENT_FL_USE_CALL_FILTER_BIT, TRACE_EVENT_FL_TRACEPOINT_BIT, + TRACE_EVENT_FL_KPROBE_BIT, }; /* @@ -265,6 +267,7 @@ enum { * it is best to clear the buffers that used it). * USE_CALL_FILTER - For ftrace internal events, don't use file filter * TRACEPOINT- Event is a tracepoint + * KPROBE- Event is a kprobe */ enum { TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), @@ -274,6 +277,7 @@ enum { TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT), TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT), TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), + TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), }; struct ftrace_event_call { @@ -303,6 +307,7 @@ struct ftrace_event_call { #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; + struct bpf_prog *prog; int (*perf_perm)(struct ftrace_event_call *, struct perf_event *); @@ -548,6 +553,15 @@ event_trigger_unlock_commit_regs(struct ftrace_event_file *file, event_triggers_post_call(file, tt); } +#ifdef CONFIG_BPF_SYSCALL +unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); +#else +static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +{ + return 1; +} +#endif + enum { FILTER_OTHER = 0, FILTER_STATIC_STRING, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 45da7ec7d274..4486d36d2e9e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -118,6 +118,7 @@ enum bpf_map_type { enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET_FILTER, + BPF_PROG_TYPE_KPROBE, }; /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -151,6 +152,7 @@ union bpf_attr { __u32 log_level; /* verbosity level of verifier */ __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf;/* user supplied buffer */ + __u32 kern_version; /* checked when type=kprobe */ }; } __attribute__((aligned(8))); @@ -162,6 +164,7 @@ enum bpf_func_id { BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(, ) */ BPF_FUNC_map_update_elem, /* int map_update_elem(, , , flags) */ BPF_FUNC_map_delete_elem, /* int map_delete_elem(, ) */ + BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ __BPF_FUNC_MAX_ID, }; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 3c8b45de57ec..ad4dade2a502 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -382,6 +382,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) #define PERF_EVENT_IOC_ID _IOR('$', 7,
[PATCH v5 tip 2/7] tracing: attach BPF programs to kprobes
User interface: struct perf_event_attr attr = {.type = PERF_TYPE_TRACEPOINT, .config = event_id, ...}; event_fd = perf_event_open(attr,...); ioctl(event_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); prog_fd is a file descriptor associated with BPF program previously loaded. event_id is an ID of created kprobe close(event_fd) - automatically detaches BPF program from it BPF programs can call in-kernel helper functions to: - lookup/update/delete elements in maps - probe_read - wraper of probe_kernel_read() used to access any kernel data structures BPF programs receive 'struct pt_regs *' as an input ('struct pt_regs' is architecture dependent) Note, kprobes are _not_ a stable kernel ABI, so bpf programs attached to kprobes must be recompiled for every kernel version and user must supply correct LINUX_VERSION_CODE in attr.kern_version during bpf_prog_load() call. Signed-off-by: Alexei Starovoitov a...@plumgrid.com --- include/linux/ftrace_event.h| 14 ++ include/uapi/linux/bpf.h|3 ++ include/uapi/linux/perf_event.h |1 + kernel/bpf/syscall.c|7 ++- kernel/events/core.c| 59 +++ kernel/trace/Makefile |1 + kernel/trace/bpf_trace.c| 99 +++ kernel/trace/trace_kprobe.c | 10 +++- 8 files changed, 192 insertions(+), 2 deletions(-) create mode 100644 kernel/trace/bpf_trace.c diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index c674ee8f7fca..0aa535bc9f05 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -13,6 +13,7 @@ struct trace_array; struct trace_buffer; struct tracer; struct dentry; +struct bpf_prog; struct trace_print_flags { unsigned long mask; @@ -252,6 +253,7 @@ enum { TRACE_EVENT_FL_WAS_ENABLED_BIT, TRACE_EVENT_FL_USE_CALL_FILTER_BIT, TRACE_EVENT_FL_TRACEPOINT_BIT, + TRACE_EVENT_FL_KPROBE_BIT, }; /* @@ -265,6 +267,7 @@ enum { * it is best to clear the buffers that used it). * USE_CALL_FILTER - For ftrace internal events, don't use file filter * TRACEPOINT- Event is a tracepoint + * KPROBE- Event is a kprobe */ enum { TRACE_EVENT_FL_FILTERED = (1 TRACE_EVENT_FL_FILTERED_BIT), @@ -274,6 +277,7 @@ enum { TRACE_EVENT_FL_WAS_ENABLED = (1 TRACE_EVENT_FL_WAS_ENABLED_BIT), TRACE_EVENT_FL_USE_CALL_FILTER = (1 TRACE_EVENT_FL_USE_CALL_FILTER_BIT), TRACE_EVENT_FL_TRACEPOINT = (1 TRACE_EVENT_FL_TRACEPOINT_BIT), + TRACE_EVENT_FL_KPROBE = (1 TRACE_EVENT_FL_KPROBE_BIT), }; struct ftrace_event_call { @@ -303,6 +307,7 @@ struct ftrace_event_call { #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; + struct bpf_prog *prog; int (*perf_perm)(struct ftrace_event_call *, struct perf_event *); @@ -548,6 +553,15 @@ event_trigger_unlock_commit_regs(struct ftrace_event_file *file, event_triggers_post_call(file, tt); } +#ifdef CONFIG_BPF_SYSCALL +unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); +#else +static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +{ + return 1; +} +#endif + enum { FILTER_OTHER = 0, FILTER_STATIC_STRING, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 45da7ec7d274..4486d36d2e9e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -118,6 +118,7 @@ enum bpf_map_type { enum bpf_prog_type { BPF_PROG_TYPE_UNSPEC, BPF_PROG_TYPE_SOCKET_FILTER, + BPF_PROG_TYPE_KPROBE, }; /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -151,6 +152,7 @@ union bpf_attr { __u32 log_level; /* verbosity level of verifier */ __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf;/* user supplied buffer */ + __u32 kern_version; /* checked when type=kprobe */ }; } __attribute__((aligned(8))); @@ -162,6 +164,7 @@ enum bpf_func_id { BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(map, key) */ BPF_FUNC_map_update_elem, /* int map_update_elem(map, key, value, flags) */ BPF_FUNC_map_delete_elem, /* int map_delete_elem(map, key) */ + BPF_FUNC_probe_read, /* int bpf_probe_read(void *dst, int size, void *src) */ __BPF_FUNC_MAX_ID, }; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 3c8b45de57ec..ad4dade2a502 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -382,6 +382,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) #define