Re: [PATCH net-next 4/6] perf, bpf: add perf events core support for BPF_PROG_TYPE_PERF_EVENT programs

2016-08-30 Thread Alexei Starovoitov
On Mon, Aug 29, 2016 at 02:17:18PM +0200, Peter Zijlstra wrote:
> On Fri, Aug 26, 2016 at 07:31:22PM -0700, Alexei Starovoitov wrote:
> > +static int perf_event_set_bpf_handler(struct perf_event *event, u32 
> > prog_fd)
> > +{
> > +   struct bpf_prog *prog;
> > +
> > +   if (event->overflow_handler_context)
> > +   /* hw breakpoint or kernel counter */
> > +   return -EINVAL;
> > +
> > +   if (event->prog)
> > +   return -EEXIST;
> > +
> > +   prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
> > +   if (IS_ERR(prog))
> > +   return PTR_ERR(prog);
> > +
> > +   event->prog = prog;
> > +   event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
> > +   WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
> > +   return 0;
> > +}
> > +
> > +static void perf_event_free_bpf_handler(struct perf_event *event)
> > +{
> > +   struct bpf_prog *prog = event->prog;
> > +
> > +   if (!prog)
> > +   return;
> 
> Does it make sense to do something like:
> 
>   WARN_ON_ONCE(event->overflow_handler != bpf_overflow_handler);

Yes that's an implicit assumption here, but checking for that
would be overkill. event->overflow_handler and event->prog are set
back to back in two places and reset here once together.
Such warn_on will only make people reading this code in the future
think that this bit is too complex to analyze by hand.

> > +
> > +   WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
> > +   event->prog = NULL;
> > +   bpf_prog_put(prog);
> > +}
> 
> 
> >  static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
> >  {
> > bool is_kprobe, is_tracepoint;
> > struct bpf_prog *prog;
> >  
> > +   if (event->attr.type == PERF_TYPE_HARDWARE ||
> > +   event->attr.type == PERF_TYPE_SOFTWARE)
> > +   return perf_event_set_bpf_handler(event, prog_fd);
> > +
> > if (event->attr.type != PERF_TYPE_TRACEPOINT)
> > return -EINVAL;
> >  
> > @@ -7647,6 +7711,8 @@ static void perf_event_free_bpf_prog(struct 
> > perf_event *event)
> >  {
> > struct bpf_prog *prog;
> >  
> > +   perf_event_free_bpf_handler(event);
> > +
> > if (!event->tp_event)
> > return;
> >  
> 
> Does it at all make sense to merge the tp_event->prog thing into this
> new event->prog?

'struct trace_event_call *tp_event' is global while tp_event->perf_events
are per cpu, so I don't see how we can do that without breaking user space
logic. Right now users do single perf_event_open of kprobe and attach prog
that is executed on all cpus where kprobe is firing. Additional per-cpu
filtering is done from within bpf prog.

> >  #ifdef CONFIG_HAVE_HW_BREAKPOINT
> > @@ -8957,6 +9029,14 @@ perf_event_alloc(struct perf_event_attr *attr, int 
> > cpu,
> > if (!overflow_handler && parent_event) {
> > overflow_handler = parent_event->overflow_handler;
> > context = parent_event->overflow_handler_context;
> > +   if (overflow_handler == bpf_overflow_handler) {
> > +   event->prog = bpf_prog_inc(parent_event->prog);
> > +   event->orig_overflow_handler = 
> > parent_event->orig_overflow_handler;
> > +   if (IS_ERR(event->prog)) {
> > +   event->prog = NULL;
> > +   overflow_handler = NULL;
> > +   }
> > +   }
> > }
> 
> Should we not fail the entire perf_event_alloc() call in that IS_ERR()
> case?

Yes. Good point. Will do.



Re: [PATCH net-next 4/6] perf, bpf: add perf events core support for BPF_PROG_TYPE_PERF_EVENT programs

2016-08-30 Thread Alexei Starovoitov
On Mon, Aug 29, 2016 at 02:17:18PM +0200, Peter Zijlstra wrote:
> On Fri, Aug 26, 2016 at 07:31:22PM -0700, Alexei Starovoitov wrote:
> > +static int perf_event_set_bpf_handler(struct perf_event *event, u32 
> > prog_fd)
> > +{
> > +   struct bpf_prog *prog;
> > +
> > +   if (event->overflow_handler_context)
> > +   /* hw breakpoint or kernel counter */
> > +   return -EINVAL;
> > +
> > +   if (event->prog)
> > +   return -EEXIST;
> > +
> > +   prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
> > +   if (IS_ERR(prog))
> > +   return PTR_ERR(prog);
> > +
> > +   event->prog = prog;
> > +   event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
> > +   WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
> > +   return 0;
> > +}
> > +
> > +static void perf_event_free_bpf_handler(struct perf_event *event)
> > +{
> > +   struct bpf_prog *prog = event->prog;
> > +
> > +   if (!prog)
> > +   return;
> 
> Does it make sense to do something like:
> 
>   WARN_ON_ONCE(event->overflow_handler != bpf_overflow_handler);

Yes that's an implicit assumption here, but checking for that
would be overkill. event->overflow_handler and event->prog are set
back to back in two places and reset here once together.
Such warn_on will only make people reading this code in the future
think that this bit is too complex to analyze by hand.

> > +
> > +   WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
> > +   event->prog = NULL;
> > +   bpf_prog_put(prog);
> > +}
> 
> 
> >  static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
> >  {
> > bool is_kprobe, is_tracepoint;
> > struct bpf_prog *prog;
> >  
> > +   if (event->attr.type == PERF_TYPE_HARDWARE ||
> > +   event->attr.type == PERF_TYPE_SOFTWARE)
> > +   return perf_event_set_bpf_handler(event, prog_fd);
> > +
> > if (event->attr.type != PERF_TYPE_TRACEPOINT)
> > return -EINVAL;
> >  
> > @@ -7647,6 +7711,8 @@ static void perf_event_free_bpf_prog(struct 
> > perf_event *event)
> >  {
> > struct bpf_prog *prog;
> >  
> > +   perf_event_free_bpf_handler(event);
> > +
> > if (!event->tp_event)
> > return;
> >  
> 
> Does it at all make sense to merge the tp_event->prog thing into this
> new event->prog?

'struct trace_event_call *tp_event' is global while tp_event->perf_events
are per cpu, so I don't see how we can do that without breaking user space
logic. Right now users do single perf_event_open of kprobe and attach prog
that is executed on all cpus where kprobe is firing. Additional per-cpu
filtering is done from within bpf prog.

> >  #ifdef CONFIG_HAVE_HW_BREAKPOINT
> > @@ -8957,6 +9029,14 @@ perf_event_alloc(struct perf_event_attr *attr, int 
> > cpu,
> > if (!overflow_handler && parent_event) {
> > overflow_handler = parent_event->overflow_handler;
> > context = parent_event->overflow_handler_context;
> > +   if (overflow_handler == bpf_overflow_handler) {
> > +   event->prog = bpf_prog_inc(parent_event->prog);
> > +   event->orig_overflow_handler = 
> > parent_event->orig_overflow_handler;
> > +   if (IS_ERR(event->prog)) {
> > +   event->prog = NULL;
> > +   overflow_handler = NULL;
> > +   }
> > +   }
> > }
> 
> Should we not fail the entire perf_event_alloc() call in that IS_ERR()
> case?

Yes. Good point. Will do.



Re: [PATCH net-next 4/6] perf, bpf: add perf events core support for BPF_PROG_TYPE_PERF_EVENT programs

2016-08-29 Thread Peter Zijlstra
On Fri, Aug 26, 2016 at 07:31:22PM -0700, Alexei Starovoitov wrote:
> +static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
> +{
> + struct bpf_prog *prog;
> +
> + if (event->overflow_handler_context)
> + /* hw breakpoint or kernel counter */
> + return -EINVAL;
> +
> + if (event->prog)
> + return -EEXIST;
> +
> + prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
> + if (IS_ERR(prog))
> + return PTR_ERR(prog);
> +
> + event->prog = prog;
> + event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
> + WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
> + return 0;
> +}
> +
> +static void perf_event_free_bpf_handler(struct perf_event *event)
> +{
> + struct bpf_prog *prog = event->prog;
> +
> + if (!prog)
> + return;

Does it make sense to do something like:

WARN_ON_ONCE(event->overflow_handler != bpf_overflow_handler);

?

> +
> + WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
> + event->prog = NULL;
> + bpf_prog_put(prog);
> +}


>  static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
>  {
>   bool is_kprobe, is_tracepoint;
>   struct bpf_prog *prog;
>  
> + if (event->attr.type == PERF_TYPE_HARDWARE ||
> + event->attr.type == PERF_TYPE_SOFTWARE)
> + return perf_event_set_bpf_handler(event, prog_fd);
> +
>   if (event->attr.type != PERF_TYPE_TRACEPOINT)
>   return -EINVAL;
>  
> @@ -7647,6 +7711,8 @@ static void perf_event_free_bpf_prog(struct perf_event 
> *event)
>  {
>   struct bpf_prog *prog;
>  
> + perf_event_free_bpf_handler(event);
> +
>   if (!event->tp_event)
>   return;
>  

Does it at all make sense to merge the tp_event->prog thing into this
new event->prog?

>  #ifdef CONFIG_HAVE_HW_BREAKPOINT
> @@ -8957,6 +9029,14 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
>   if (!overflow_handler && parent_event) {
>   overflow_handler = parent_event->overflow_handler;
>   context = parent_event->overflow_handler_context;
> + if (overflow_handler == bpf_overflow_handler) {
> + event->prog = bpf_prog_inc(parent_event->prog);
> + event->orig_overflow_handler = 
> parent_event->orig_overflow_handler;
> + if (IS_ERR(event->prog)) {
> + event->prog = NULL;
> + overflow_handler = NULL;
> + }
> + }
>   }

Should we not fail the entire perf_event_alloc() call in that IS_ERR()
case?



Re: [PATCH net-next 4/6] perf, bpf: add perf events core support for BPF_PROG_TYPE_PERF_EVENT programs

2016-08-29 Thread Peter Zijlstra
On Fri, Aug 26, 2016 at 07:31:22PM -0700, Alexei Starovoitov wrote:
> +static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
> +{
> + struct bpf_prog *prog;
> +
> + if (event->overflow_handler_context)
> + /* hw breakpoint or kernel counter */
> + return -EINVAL;
> +
> + if (event->prog)
> + return -EEXIST;
> +
> + prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
> + if (IS_ERR(prog))
> + return PTR_ERR(prog);
> +
> + event->prog = prog;
> + event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
> + WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
> + return 0;
> +}
> +
> +static void perf_event_free_bpf_handler(struct perf_event *event)
> +{
> + struct bpf_prog *prog = event->prog;
> +
> + if (!prog)
> + return;

Does it make sense to do something like:

WARN_ON_ONCE(event->overflow_handler != bpf_overflow_handler);

?

> +
> + WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
> + event->prog = NULL;
> + bpf_prog_put(prog);
> +}


>  static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
>  {
>   bool is_kprobe, is_tracepoint;
>   struct bpf_prog *prog;
>  
> + if (event->attr.type == PERF_TYPE_HARDWARE ||
> + event->attr.type == PERF_TYPE_SOFTWARE)
> + return perf_event_set_bpf_handler(event, prog_fd);
> +
>   if (event->attr.type != PERF_TYPE_TRACEPOINT)
>   return -EINVAL;
>  
> @@ -7647,6 +7711,8 @@ static void perf_event_free_bpf_prog(struct perf_event 
> *event)
>  {
>   struct bpf_prog *prog;
>  
> + perf_event_free_bpf_handler(event);
> +
>   if (!event->tp_event)
>   return;
>  

Does it at all make sense to merge the tp_event->prog thing into this
new event->prog?

>  #ifdef CONFIG_HAVE_HW_BREAKPOINT
> @@ -8957,6 +9029,14 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
>   if (!overflow_handler && parent_event) {
>   overflow_handler = parent_event->overflow_handler;
>   context = parent_event->overflow_handler_context;
> + if (overflow_handler == bpf_overflow_handler) {
> + event->prog = bpf_prog_inc(parent_event->prog);
> + event->orig_overflow_handler = 
> parent_event->orig_overflow_handler;
> + if (IS_ERR(event->prog)) {
> + event->prog = NULL;
> + overflow_handler = NULL;
> + }
> + }
>   }

Should we not fail the entire perf_event_alloc() call in that IS_ERR()
case?



[PATCH net-next 4/6] perf, bpf: add perf events core support for BPF_PROG_TYPE_PERF_EVENT programs

2016-08-26 Thread Alexei Starovoitov
Allow attaching BPF_PROG_TYPE_PERF_EVENT programs to sw and hw perf events
via overflow_handler mechanism.
When program is attached the overflow_handlers become stacked.
The program acts as a filter.
Returning zero from the program means that the normal perf_event_output handler
will not be called and sampling event won't be stored in the ring buffer.

The overflow_handler_context==NULL is an additional safety check
to make sure programs are not attached to hw breakpoints and watchdog
in case other checks (that prevent that now anyway) get accidentally
relaxed in the future.

The program refcnt is incremented in case perf_events are inhereted
when target task is forked.
Similar to kprobe and tracepoint programs there is no ioctl to
detach the program or swap already attached program. The user space
expected to close(perf_event_fd) like it does right now for kprobe+bpf.
That restriction simplifies the code quite a bit.

The invocation of overflow_handler in __perf_event_overflow() is now
done via READ_ONCE, since that pointer can be replaced when the program
is attached while perf_event itself could have been active already.
There is no need to do similar treatment for event->prog, since it's
assigned only once before it's accessed.

Signed-off-by: Alexei Starovoitov 
---
 include/linux/bpf.h|  4 +++
 include/linux/perf_event.h |  2 ++
 kernel/events/core.c   | 82 +-
 3 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 11134238417d..9a904f63f8c1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -297,6 +297,10 @@ static inline struct bpf_prog *bpf_prog_add(struct 
bpf_prog *prog, int i)
 static inline void bpf_prog_put(struct bpf_prog *prog)
 {
 }
+static inline struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
+{
+   return ERR_PTR(-EOPNOTSUPP);
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 /* verifier prototypes for helper functions called from eBPF programs */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 97bfe62f30d7..dcaaaf3ec8e6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -679,6 +679,8 @@ struct perf_event {
u64 (*clock)(void);
perf_overflow_handler_t overflow_handler;
void*overflow_handler_context;
+   perf_overflow_handler_t orig_overflow_handler;
+   struct bpf_prog *prog;
 
 #ifdef CONFIG_EVENT_TRACING
struct trace_event_call *tp_event;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1903b8f3a705..282920cd8f5f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6987,7 +6987,7 @@ static int __perf_event_overflow(struct perf_event *event,
irq_work_queue(>pending);
}
 
-   event->overflow_handler(event, data, regs);
+   READ_ONCE(event->overflow_handler)(event, data, regs);
 
if (*perf_event_fasync(event) && event->pending_kill) {
event->pending_wakeup = 1;
@@ -7602,11 +7602,75 @@ static void perf_event_free_filter(struct perf_event 
*event)
ftrace_profile_free_filter(event);
 }
 
+static void bpf_overflow_handler(struct perf_event *event,
+struct perf_sample_data *data,
+struct pt_regs *regs)
+{
+   struct bpf_perf_event_data_kern ctx = {
+   .data = data,
+   .regs = regs,
+   };
+   int ret = 0;
+
+#ifdef CONFIG_BPF_SYSCALL
+   preempt_disable();
+   if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
+   goto out;
+   rcu_read_lock();
+   ret = BPF_PROG_RUN(event->prog, (void *));
+   rcu_read_unlock();
+ out:
+   __this_cpu_dec(bpf_prog_active);
+   preempt_enable();
+#endif
+   if (!ret)
+   return;
+
+   event->orig_overflow_handler(event, data, regs);
+}
+
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+   struct bpf_prog *prog;
+
+   if (event->overflow_handler_context)
+   /* hw breakpoint or kernel counter */
+   return -EINVAL;
+
+   if (event->prog)
+   return -EEXIST;
+
+   prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
+   if (IS_ERR(prog))
+   return PTR_ERR(prog);
+
+   event->prog = prog;
+   event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
+   WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
+   return 0;
+}
+
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+   struct bpf_prog *prog = event->prog;
+
+   if (!prog)
+   return;
+
+   WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
+   event->prog = NULL;
+   bpf_prog_put(prog);
+}
+
 static int 

[PATCH net-next 4/6] perf, bpf: add perf events core support for BPF_PROG_TYPE_PERF_EVENT programs

2016-08-26 Thread Alexei Starovoitov
Allow attaching BPF_PROG_TYPE_PERF_EVENT programs to sw and hw perf events
via overflow_handler mechanism.
When program is attached the overflow_handlers become stacked.
The program acts as a filter.
Returning zero from the program means that the normal perf_event_output handler
will not be called and sampling event won't be stored in the ring buffer.

The overflow_handler_context==NULL is an additional safety check
to make sure programs are not attached to hw breakpoints and watchdog
in case other checks (that prevent that now anyway) get accidentally
relaxed in the future.

The program refcnt is incremented in case perf_events are inhereted
when target task is forked.
Similar to kprobe and tracepoint programs there is no ioctl to
detach the program or swap already attached program. The user space
expected to close(perf_event_fd) like it does right now for kprobe+bpf.
That restriction simplifies the code quite a bit.

The invocation of overflow_handler in __perf_event_overflow() is now
done via READ_ONCE, since that pointer can be replaced when the program
is attached while perf_event itself could have been active already.
There is no need to do similar treatment for event->prog, since it's
assigned only once before it's accessed.

Signed-off-by: Alexei Starovoitov 
---
 include/linux/bpf.h|  4 +++
 include/linux/perf_event.h |  2 ++
 kernel/events/core.c   | 82 +-
 3 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 11134238417d..9a904f63f8c1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -297,6 +297,10 @@ static inline struct bpf_prog *bpf_prog_add(struct 
bpf_prog *prog, int i)
 static inline void bpf_prog_put(struct bpf_prog *prog)
 {
 }
+static inline struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
+{
+   return ERR_PTR(-EOPNOTSUPP);
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 /* verifier prototypes for helper functions called from eBPF programs */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 97bfe62f30d7..dcaaaf3ec8e6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -679,6 +679,8 @@ struct perf_event {
u64 (*clock)(void);
perf_overflow_handler_t overflow_handler;
void*overflow_handler_context;
+   perf_overflow_handler_t orig_overflow_handler;
+   struct bpf_prog *prog;
 
 #ifdef CONFIG_EVENT_TRACING
struct trace_event_call *tp_event;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1903b8f3a705..282920cd8f5f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6987,7 +6987,7 @@ static int __perf_event_overflow(struct perf_event *event,
irq_work_queue(>pending);
}
 
-   event->overflow_handler(event, data, regs);
+   READ_ONCE(event->overflow_handler)(event, data, regs);
 
if (*perf_event_fasync(event) && event->pending_kill) {
event->pending_wakeup = 1;
@@ -7602,11 +7602,75 @@ static void perf_event_free_filter(struct perf_event 
*event)
ftrace_profile_free_filter(event);
 }
 
+static void bpf_overflow_handler(struct perf_event *event,
+struct perf_sample_data *data,
+struct pt_regs *regs)
+{
+   struct bpf_perf_event_data_kern ctx = {
+   .data = data,
+   .regs = regs,
+   };
+   int ret = 0;
+
+#ifdef CONFIG_BPF_SYSCALL
+   preempt_disable();
+   if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
+   goto out;
+   rcu_read_lock();
+   ret = BPF_PROG_RUN(event->prog, (void *));
+   rcu_read_unlock();
+ out:
+   __this_cpu_dec(bpf_prog_active);
+   preempt_enable();
+#endif
+   if (!ret)
+   return;
+
+   event->orig_overflow_handler(event, data, regs);
+}
+
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+   struct bpf_prog *prog;
+
+   if (event->overflow_handler_context)
+   /* hw breakpoint or kernel counter */
+   return -EINVAL;
+
+   if (event->prog)
+   return -EEXIST;
+
+   prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
+   if (IS_ERR(prog))
+   return PTR_ERR(prog);
+
+   event->prog = prog;
+   event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
+   WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
+   return 0;
+}
+
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+   struct bpf_prog *prog = event->prog;
+
+   if (!prog)
+   return;
+
+   WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
+   event->prog = NULL;
+   bpf_prog_put(prog);
+}
+
 static int perf_event_set_bpf_prog(struct