[PATCH v5 tip 2/7] tracing: attach BPF programs to kprobes

2015-03-01 Thread Alexei Starovoitov
User interface:
struct perf_event_attr attr = {.type = PERF_TYPE_TRACEPOINT, .config = 
event_id, ...};
event_fd = perf_event_open(,...);
ioctl(event_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);

prog_fd is a file descriptor associated with BPF program previously loaded.
event_id is an ID of created kprobe

close(event_fd) - automatically detaches BPF program from it

BPF programs can call in-kernel helper functions to:
- lookup/update/delete elements in maps
- probe_read - wraper of probe_kernel_read() used to access any kernel
  data structures

BPF programs receive 'struct pt_regs *' as an input
('struct pt_regs' is architecture dependent)

Note, kprobes are _not_ a stable kernel ABI, so bpf programs attached to
kprobes must be recompiled for every kernel version and user must supply correct
LINUX_VERSION_CODE in attr.kern_version during bpf_prog_load() call.

Signed-off-by: Alexei Starovoitov 
---
 include/linux/ftrace_event.h|   14 ++
 include/uapi/linux/bpf.h|3 ++
 include/uapi/linux/perf_event.h |1 +
 kernel/bpf/syscall.c|7 ++-
 kernel/events/core.c|   59 +++
 kernel/trace/Makefile   |1 +
 kernel/trace/bpf_trace.c|   99 +++
 kernel/trace/trace_kprobe.c |   10 +++-
 8 files changed, 192 insertions(+), 2 deletions(-)
 create mode 100644 kernel/trace/bpf_trace.c

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index c674ee8f7fca..0aa535bc9f05 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -13,6 +13,7 @@ struct trace_array;
 struct trace_buffer;
 struct tracer;
 struct dentry;
+struct bpf_prog;
 
 struct trace_print_flags {
unsigned long   mask;
@@ -252,6 +253,7 @@ enum {
TRACE_EVENT_FL_WAS_ENABLED_BIT,
TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
TRACE_EVENT_FL_TRACEPOINT_BIT,
+   TRACE_EVENT_FL_KPROBE_BIT,
 };
 
 /*
@@ -265,6 +267,7 @@ enum {
  * it is best to clear the buffers that used it).
  *  USE_CALL_FILTER - For ftrace internal events, don't use file filter
  *  TRACEPOINT- Event is a tracepoint
+ *  KPROBE- Event is a kprobe
  */
 enum {
TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
@@ -274,6 +277,7 @@ enum {
TRACE_EVENT_FL_WAS_ENABLED  = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT),
TRACE_EVENT_FL_USE_CALL_FILTER  = (1 << 
TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
TRACE_EVENT_FL_TRACEPOINT   = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT),
+   TRACE_EVENT_FL_KPROBE   = (1 << TRACE_EVENT_FL_KPROBE_BIT),
 };
 
 struct ftrace_event_call {
@@ -303,6 +307,7 @@ struct ftrace_event_call {
 #ifdef CONFIG_PERF_EVENTS
int perf_refcount;
struct hlist_head __percpu  *perf_events;
+   struct bpf_prog *prog;
 
int (*perf_perm)(struct ftrace_event_call *,
 struct perf_event *);
@@ -548,6 +553,15 @@ event_trigger_unlock_commit_regs(struct ftrace_event_file 
*file,
event_triggers_post_call(file, tt);
 }
 
+#ifdef CONFIG_BPF_SYSCALL
+unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
+#else
+static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
+{
+   return 1;
+}
+#endif
+
 enum {
FILTER_OTHER = 0,
FILTER_STATIC_STRING,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 45da7ec7d274..4486d36d2e9e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -118,6 +118,7 @@ enum bpf_map_type {
 enum bpf_prog_type {
BPF_PROG_TYPE_UNSPEC,
BPF_PROG_TYPE_SOCKET_FILTER,
+   BPF_PROG_TYPE_KPROBE,
 };
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
@@ -151,6 +152,7 @@ union bpf_attr {
__u32   log_level;  /* verbosity level of verifier 
*/
__u32   log_size;   /* size of user buffer */
__aligned_u64   log_buf;/* user supplied buffer */
+   __u32   kern_version;   /* checked when type=kprobe */
};
 } __attribute__((aligned(8)));
 
@@ -162,6 +164,7 @@ enum bpf_func_id {
BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(, ) */
BPF_FUNC_map_update_elem, /* int map_update_elem(, , , 
flags) */
BPF_FUNC_map_delete_elem, /* int map_delete_elem(, ) */
+   BPF_FUNC_probe_read,  /* int bpf_probe_read(void *dst, int size, 
void *src) */
__BPF_FUNC_MAX_ID,
 };
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 3c8b45de57ec..ad4dade2a502 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -382,6 +382,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_SET_OUTPUT  _IO ('$', 5)
 #define PERF_EVENT_IOC_SET_FILTER  _IOW('$', 6, char *)
 #define PERF_EVENT_IOC_ID  _IOR('$', 7, 

[PATCH v5 tip 2/7] tracing: attach BPF programs to kprobes

2015-03-01 Thread Alexei Starovoitov
User interface:
struct perf_event_attr attr = {.type = PERF_TYPE_TRACEPOINT, .config = 
event_id, ...};
event_fd = perf_event_open(attr,...);
ioctl(event_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);

prog_fd is a file descriptor associated with BPF program previously loaded.
event_id is an ID of created kprobe

close(event_fd) - automatically detaches BPF program from it

BPF programs can call in-kernel helper functions to:
- lookup/update/delete elements in maps
- probe_read - wraper of probe_kernel_read() used to access any kernel
  data structures

BPF programs receive 'struct pt_regs *' as an input
('struct pt_regs' is architecture dependent)

Note, kprobes are _not_ a stable kernel ABI, so bpf programs attached to
kprobes must be recompiled for every kernel version and user must supply correct
LINUX_VERSION_CODE in attr.kern_version during bpf_prog_load() call.

Signed-off-by: Alexei Starovoitov a...@plumgrid.com
---
 include/linux/ftrace_event.h|   14 ++
 include/uapi/linux/bpf.h|3 ++
 include/uapi/linux/perf_event.h |1 +
 kernel/bpf/syscall.c|7 ++-
 kernel/events/core.c|   59 +++
 kernel/trace/Makefile   |1 +
 kernel/trace/bpf_trace.c|   99 +++
 kernel/trace/trace_kprobe.c |   10 +++-
 8 files changed, 192 insertions(+), 2 deletions(-)
 create mode 100644 kernel/trace/bpf_trace.c

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index c674ee8f7fca..0aa535bc9f05 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -13,6 +13,7 @@ struct trace_array;
 struct trace_buffer;
 struct tracer;
 struct dentry;
+struct bpf_prog;
 
 struct trace_print_flags {
unsigned long   mask;
@@ -252,6 +253,7 @@ enum {
TRACE_EVENT_FL_WAS_ENABLED_BIT,
TRACE_EVENT_FL_USE_CALL_FILTER_BIT,
TRACE_EVENT_FL_TRACEPOINT_BIT,
+   TRACE_EVENT_FL_KPROBE_BIT,
 };
 
 /*
@@ -265,6 +267,7 @@ enum {
  * it is best to clear the buffers that used it).
  *  USE_CALL_FILTER - For ftrace internal events, don't use file filter
  *  TRACEPOINT- Event is a tracepoint
+ *  KPROBE- Event is a kprobe
  */
 enum {
TRACE_EVENT_FL_FILTERED = (1  TRACE_EVENT_FL_FILTERED_BIT),
@@ -274,6 +277,7 @@ enum {
TRACE_EVENT_FL_WAS_ENABLED  = (1  TRACE_EVENT_FL_WAS_ENABLED_BIT),
TRACE_EVENT_FL_USE_CALL_FILTER  = (1  
TRACE_EVENT_FL_USE_CALL_FILTER_BIT),
TRACE_EVENT_FL_TRACEPOINT   = (1  TRACE_EVENT_FL_TRACEPOINT_BIT),
+   TRACE_EVENT_FL_KPROBE   = (1  TRACE_EVENT_FL_KPROBE_BIT),
 };
 
 struct ftrace_event_call {
@@ -303,6 +307,7 @@ struct ftrace_event_call {
 #ifdef CONFIG_PERF_EVENTS
int perf_refcount;
struct hlist_head __percpu  *perf_events;
+   struct bpf_prog *prog;
 
int (*perf_perm)(struct ftrace_event_call *,
 struct perf_event *);
@@ -548,6 +553,15 @@ event_trigger_unlock_commit_regs(struct ftrace_event_file 
*file,
event_triggers_post_call(file, tt);
 }
 
+#ifdef CONFIG_BPF_SYSCALL
+unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
+#else
+static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
+{
+   return 1;
+}
+#endif
+
 enum {
FILTER_OTHER = 0,
FILTER_STATIC_STRING,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 45da7ec7d274..4486d36d2e9e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -118,6 +118,7 @@ enum bpf_map_type {
 enum bpf_prog_type {
BPF_PROG_TYPE_UNSPEC,
BPF_PROG_TYPE_SOCKET_FILTER,
+   BPF_PROG_TYPE_KPROBE,
 };
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
@@ -151,6 +152,7 @@ union bpf_attr {
__u32   log_level;  /* verbosity level of verifier 
*/
__u32   log_size;   /* size of user buffer */
__aligned_u64   log_buf;/* user supplied buffer */
+   __u32   kern_version;   /* checked when type=kprobe */
};
 } __attribute__((aligned(8)));
 
@@ -162,6 +164,7 @@ enum bpf_func_id {
BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(map, key) */
BPF_FUNC_map_update_elem, /* int map_update_elem(map, key, value, 
flags) */
BPF_FUNC_map_delete_elem, /* int map_delete_elem(map, key) */
+   BPF_FUNC_probe_read,  /* int bpf_probe_read(void *dst, int size, 
void *src) */
__BPF_FUNC_MAX_ID,
 };
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 3c8b45de57ec..ad4dade2a502 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -382,6 +382,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_SET_OUTPUT  _IO ('$', 5)
 #define PERF_EVENT_IOC_SET_FILTER  _IOW('$', 6, char *)
 #define