Re: [PATCH bpf-next v2] bpf: support raw tracepoints in modules

2018-12-18 Thread Alexei Starovoitov
On Wed, Dec 12, 2018 at 04:42:37PM -0800, Matt Mullins wrote:
> Distributions build drivers as modules, including network and filesystem
> drivers which export numerous tracepoints.  This enables
> bpf(BPF_RAW_TRACEPOINT_OPEN) to attach to those tracepoints.
> 
> Signed-off-by: Matt Mullins 
> ---
> v1->v2:
>   * avoid taking the mutex in bpf_event_notify when op is neither COMING nor
> GOING.
>   * check that kzalloc actually succeeded

Applied, Thanks



Re: [PATCH bpf-next v2] bpf: support raw tracepoints in modules

2018-12-13 Thread Martin Lau
On Thu, Dec 13, 2018 at 11:38:51AM -0800, Matt Mullins wrote:
> On Thu, 2018-12-13 at 19:22 +, Martin Lau wrote:
> > On Wed, Dec 12, 2018 at 04:42:37PM -0800, Matt Mullins wrote:
> > > Distributions build drivers as modules, including network and filesystem
> > > drivers which export numerous tracepoints.  This enables
> > > bpf(BPF_RAW_TRACEPOINT_OPEN) to attach to those tracepoints.
> > >
Acked-by: Martin KaFai Lau 

[ ... ]

> > > +#ifdef CONFIG_MODULES
> > > +int bpf_event_notify(struct notifier_block *nb, unsigned long op, void 
> > > *module)
> > > +{
> > > + struct bpf_trace_module *btm, *tmp;
> > > + struct module *mod = module;
> > > +
> > > + if (mod->num_bpf_raw_events == 0 ||
> > > + (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
> > > + return 0;
> > > +
> > > + mutex_lock(_module_mutex);
> > > +
> > > + switch (op) {
> > > + case MODULE_STATE_COMING:
> > > + btm = kzalloc(sizeof(*btm), GFP_KERNEL);
> > > + if (btm) {
> > > + btm->module = module;
> > > + list_add(>list, _trace_modules);
> > > + }
> > 
> > Is it fine to return 0 on !btm case?
> 
> That effectively just means we'll be ignoring tracepoints for a module
> that is loaded while we can't allocate a bpf_trace_module (24 bytes) to
> track it.  That feels like reasonable behavior to me.
ok.


Re: [PATCH bpf-next v2] bpf: support raw tracepoints in modules

2018-12-13 Thread Matt Mullins
On Thu, 2018-12-13 at 19:22 +, Martin Lau wrote:
> On Wed, Dec 12, 2018 at 04:42:37PM -0800, Matt Mullins wrote:
> > Distributions build drivers as modules, including network and filesystem
> > drivers which export numerous tracepoints.  This enables
> > bpf(BPF_RAW_TRACEPOINT_OPEN) to attach to those tracepoints.
> > 
> > Signed-off-by: Matt Mullins 
> > ---
> > v1->v2:
> >   * avoid taking the mutex in bpf_event_notify when op is neither COMING nor
> > GOING.
> >   * check that kzalloc actually succeeded
> > 
> > I didn't try to check list_empty before taking the mutex since I want to 
> > avoid
> > races between bpf_event_notify and bpf_get_raw_tracepoint.  Additionally,
> > list_for_each_entry_safe is not strictly necessary upon MODULE_STATE_GOING, 
> > but
> > Alexei suggested I use it to protect against fragility if the subsequent 
> > break;
> > eventually disappears.
> > 
> >  include/linux/module.h   |  4 ++
> >  include/linux/trace_events.h |  8 ++-
> >  kernel/bpf/syscall.c | 11 ++--
> >  kernel/module.c  |  5 ++
> >  kernel/trace/bpf_trace.c | 99 +++-
> >  5 files changed, 120 insertions(+), 7 deletions(-)
> > 
> > diff --git a/include/linux/module.h b/include/linux/module.h
> > index fce6b4335e36..5f147dd5e709 100644
> > --- a/include/linux/module.h
> > +++ b/include/linux/module.h
> > @@ -432,6 +432,10 @@ struct module {
> > unsigned int num_tracepoints;
> > tracepoint_ptr_t *tracepoints_ptrs;
> >  #endif
> > +#ifdef CONFIG_BPF_EVENTS
> > +   unsigned int num_bpf_raw_events;
> > +   struct bpf_raw_event_map *bpf_raw_events;
> > +#endif
> >  #ifdef HAVE_JUMP_LABEL
> > struct jump_entry *jump_entries;
> > unsigned int num_jump_entries;
> > diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> > index 4130a5497d40..8a62731673f7 100644
> > --- a/include/linux/trace_events.h
> > +++ b/include/linux/trace_events.h
> > @@ -471,7 +471,8 @@ void perf_event_detach_bpf_prog(struct perf_event 
> > *event);
> >  int perf_event_query_prog_array(struct perf_event *event, void __user 
> > *info);
> >  int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog 
> > *prog);
> >  int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog 
> > *prog);
> > -struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
> > +struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name);
> > +void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp);
> >  int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
> > u32 *fd_type, const char **buf,
> > u64 *probe_offset, u64 *probe_addr);
> > @@ -502,10 +503,13 @@ static inline int bpf_probe_unregister(struct 
> > bpf_raw_event_map *btp, struct bpf
> >  {
> > return -EOPNOTSUPP;
> >  }
> > -static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char 
> > *name)
> > +static inline struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char 
> > *name)
> >  {
> > return NULL;
> >  }
> > +static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
> > +{
> > +}
> >  static inline int bpf_get_perf_event_info(const struct perf_event *event,
> >   u32 *prog_id, u32 *fd_type,
> >   const char **buf, u64 *probe_offset,
> > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > index 70fb11106fc2..754370e3155e 100644
> > --- a/kernel/bpf/syscall.c
> > +++ b/kernel/bpf/syscall.c
> > @@ -1609,6 +1609,7 @@ static int bpf_raw_tracepoint_release(struct inode 
> > *inode, struct file *filp)
> > bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
> > bpf_prog_put(raw_tp->prog);
> > }
> > +   bpf_put_raw_tracepoint(raw_tp->btp);
> > kfree(raw_tp);
> > return 0;
> >  }
> > @@ -1634,13 +1635,15 @@ static int bpf_raw_tracepoint_open(const union 
> > bpf_attr *attr)
> > return -EFAULT;
> > tp_name[sizeof(tp_name) - 1] = 0;
> >  
> > -   btp = bpf_find_raw_tracepoint(tp_name);
> > +   btp = bpf_get_raw_tracepoint(tp_name);
> > if (!btp)
> > return -ENOENT;
> >  
> > raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
> > -   if (!raw_tp)
> > -   return -ENOMEM;
> > +   if (!raw_tp) {
> > +   err = -ENOMEM;
> > +   goto out_put_btp;
> > +   }
> > raw_tp->btp = btp;
> >  
> > prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
> > @@ -1668,6 +1671,8 @@ static int bpf_raw_tracepoint_open(const union 
> > bpf_attr *attr)
> > bpf_prog_put(prog);
> >  out_free_tp:
> > kfree(raw_tp);
> > +out_put_btp:
> > +   bpf_put_raw_tracepoint(btp);
> > return err;
> >  }
> >  
> > diff --git a/kernel/module.c b/kernel/module.c
> > index 49a405891587..06ec68f08387 100644
> > --- a/kernel/module.c
> > +++ b/kernel/module.c
> > @@ -3093,6 +3093,11 @@ static int 

Re: [PATCH bpf-next v2] bpf: support raw tracepoints in modules

2018-12-13 Thread Martin Lau
On Wed, Dec 12, 2018 at 04:42:37PM -0800, Matt Mullins wrote:
> Distributions build drivers as modules, including network and filesystem
> drivers which export numerous tracepoints.  This enables
> bpf(BPF_RAW_TRACEPOINT_OPEN) to attach to those tracepoints.
> 
> Signed-off-by: Matt Mullins 
> ---
> v1->v2:
>   * avoid taking the mutex in bpf_event_notify when op is neither COMING nor
> GOING.
>   * check that kzalloc actually succeeded
> 
> I didn't try to check list_empty before taking the mutex since I want to avoid
> races between bpf_event_notify and bpf_get_raw_tracepoint.  Additionally,
> list_for_each_entry_safe is not strictly necessary upon MODULE_STATE_GOING, 
> but
> Alexei suggested I use it to protect against fragility if the subsequent 
> break;
> eventually disappears.
> 
>  include/linux/module.h   |  4 ++
>  include/linux/trace_events.h |  8 ++-
>  kernel/bpf/syscall.c | 11 ++--
>  kernel/module.c  |  5 ++
>  kernel/trace/bpf_trace.c | 99 +++-
>  5 files changed, 120 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/module.h b/include/linux/module.h
> index fce6b4335e36..5f147dd5e709 100644
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -432,6 +432,10 @@ struct module {
>   unsigned int num_tracepoints;
>   tracepoint_ptr_t *tracepoints_ptrs;
>  #endif
> +#ifdef CONFIG_BPF_EVENTS
> + unsigned int num_bpf_raw_events;
> + struct bpf_raw_event_map *bpf_raw_events;
> +#endif
>  #ifdef HAVE_JUMP_LABEL
>   struct jump_entry *jump_entries;
>   unsigned int num_jump_entries;
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index 4130a5497d40..8a62731673f7 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -471,7 +471,8 @@ void perf_event_detach_bpf_prog(struct perf_event *event);
>  int perf_event_query_prog_array(struct perf_event *event, void __user *info);
>  int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
>  int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog 
> *prog);
> -struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
> +struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name);
> +void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp);
>  int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
>   u32 *fd_type, const char **buf,
>   u64 *probe_offset, u64 *probe_addr);
> @@ -502,10 +503,13 @@ static inline int bpf_probe_unregister(struct 
> bpf_raw_event_map *btp, struct bpf
>  {
>   return -EOPNOTSUPP;
>  }
> -static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char 
> *name)
> +static inline struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char 
> *name)
>  {
>   return NULL;
>  }
> +static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
> +{
> +}
>  static inline int bpf_get_perf_event_info(const struct perf_event *event,
> u32 *prog_id, u32 *fd_type,
> const char **buf, u64 *probe_offset,
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 70fb11106fc2..754370e3155e 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -1609,6 +1609,7 @@ static int bpf_raw_tracepoint_release(struct inode 
> *inode, struct file *filp)
>   bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
>   bpf_prog_put(raw_tp->prog);
>   }
> + bpf_put_raw_tracepoint(raw_tp->btp);
>   kfree(raw_tp);
>   return 0;
>  }
> @@ -1634,13 +1635,15 @@ static int bpf_raw_tracepoint_open(const union 
> bpf_attr *attr)
>   return -EFAULT;
>   tp_name[sizeof(tp_name) - 1] = 0;
>  
> - btp = bpf_find_raw_tracepoint(tp_name);
> + btp = bpf_get_raw_tracepoint(tp_name);
>   if (!btp)
>   return -ENOENT;
>  
>   raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
> - if (!raw_tp)
> - return -ENOMEM;
> + if (!raw_tp) {
> + err = -ENOMEM;
> + goto out_put_btp;
> + }
>   raw_tp->btp = btp;
>  
>   prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
> @@ -1668,6 +1671,8 @@ static int bpf_raw_tracepoint_open(const union bpf_attr 
> *attr)
>   bpf_prog_put(prog);
>  out_free_tp:
>   kfree(raw_tp);
> +out_put_btp:
> + bpf_put_raw_tracepoint(btp);
>   return err;
>  }
>  
> diff --git a/kernel/module.c b/kernel/module.c
> index 49a405891587..06ec68f08387 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -3093,6 +3093,11 @@ static int find_module_sections(struct module *mod, 
> struct load_info *info)
>sizeof(*mod->tracepoints_ptrs),
>>num_tracepoints);
>  #endif
> +#ifdef CONFIG_BPF_EVENTS
> + 

[PATCH bpf-next v2] bpf: support raw tracepoints in modules

2018-12-12 Thread Matt Mullins
Distributions build drivers as modules, including network and filesystem
drivers which export numerous tracepoints.  This enables
bpf(BPF_RAW_TRACEPOINT_OPEN) to attach to those tracepoints.

Signed-off-by: Matt Mullins 
---
v1->v2:
  * avoid taking the mutex in bpf_event_notify when op is neither COMING nor
GOING.
  * check that kzalloc actually succeeded

I didn't try to check list_empty before taking the mutex since I want to avoid
races between bpf_event_notify and bpf_get_raw_tracepoint.  Additionally,
list_for_each_entry_safe is not strictly necessary upon MODULE_STATE_GOING, but
Alexei suggested I use it to protect against fragility if the subsequent break;
eventually disappears.

 include/linux/module.h   |  4 ++
 include/linux/trace_events.h |  8 ++-
 kernel/bpf/syscall.c | 11 ++--
 kernel/module.c  |  5 ++
 kernel/trace/bpf_trace.c | 99 +++-
 5 files changed, 120 insertions(+), 7 deletions(-)

diff --git a/include/linux/module.h b/include/linux/module.h
index fce6b4335e36..5f147dd5e709 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -432,6 +432,10 @@ struct module {
unsigned int num_tracepoints;
tracepoint_ptr_t *tracepoints_ptrs;
 #endif
+#ifdef CONFIG_BPF_EVENTS
+   unsigned int num_bpf_raw_events;
+   struct bpf_raw_event_map *bpf_raw_events;
+#endif
 #ifdef HAVE_JUMP_LABEL
struct jump_entry *jump_entries;
unsigned int num_jump_entries;
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 4130a5497d40..8a62731673f7 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -471,7 +471,8 @@ void perf_event_detach_bpf_prog(struct perf_event *event);
 int perf_event_query_prog_array(struct perf_event *event, void __user *info);
 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
-struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
+struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name);
+void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp);
 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
u32 *fd_type, const char **buf,
u64 *probe_offset, u64 *probe_addr);
@@ -502,10 +503,13 @@ static inline int bpf_probe_unregister(struct 
bpf_raw_event_map *btp, struct bpf
 {
return -EOPNOTSUPP;
 }
-static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char 
*name)
+static inline struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char 
*name)
 {
return NULL;
 }
+static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
+{
+}
 static inline int bpf_get_perf_event_info(const struct perf_event *event,
  u32 *prog_id, u32 *fd_type,
  const char **buf, u64 *probe_offset,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 70fb11106fc2..754370e3155e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1609,6 +1609,7 @@ static int bpf_raw_tracepoint_release(struct inode 
*inode, struct file *filp)
bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
bpf_prog_put(raw_tp->prog);
}
+   bpf_put_raw_tracepoint(raw_tp->btp);
kfree(raw_tp);
return 0;
 }
@@ -1634,13 +1635,15 @@ static int bpf_raw_tracepoint_open(const union bpf_attr 
*attr)
return -EFAULT;
tp_name[sizeof(tp_name) - 1] = 0;
 
-   btp = bpf_find_raw_tracepoint(tp_name);
+   btp = bpf_get_raw_tracepoint(tp_name);
if (!btp)
return -ENOENT;
 
raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
-   if (!raw_tp)
-   return -ENOMEM;
+   if (!raw_tp) {
+   err = -ENOMEM;
+   goto out_put_btp;
+   }
raw_tp->btp = btp;
 
prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
@@ -1668,6 +1671,8 @@ static int bpf_raw_tracepoint_open(const union bpf_attr 
*attr)
bpf_prog_put(prog);
 out_free_tp:
kfree(raw_tp);
+out_put_btp:
+   bpf_put_raw_tracepoint(btp);
return err;
 }
 
diff --git a/kernel/module.c b/kernel/module.c
index 49a405891587..06ec68f08387 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3093,6 +3093,11 @@ static int find_module_sections(struct module *mod, 
struct load_info *info)
 sizeof(*mod->tracepoints_ptrs),
 >num_tracepoints);
 #endif
+#ifdef CONFIG_BPF_EVENTS
+   mod->bpf_raw_events = section_objs(info, "__bpf_raw_tp_map",
+  sizeof(*mod->bpf_raw_events),
+  >num_bpf_raw_events);
+#endif
 #ifdef HAVE_JUMP_LABEL
mod->jump_entries