Re: [PATCH bpf-next v2] bpf: support raw tracepoints in modules

Martin Lau Thu, 13 Dec 2018 11:24:01 -0800

On Wed, Dec 12, 2018 at 04:42:37PM -0800, Matt Mullins wrote:
> Distributions build drivers as modules, including network and filesystem
> drivers which export numerous tracepoints.  This enables
> bpf(BPF_RAW_TRACEPOINT_OPEN) to attach to those tracepoints.
> 
> Signed-off-by: Matt Mullins <mmull...@fb.com>
> ---
> v1->v2:
>   * avoid taking the mutex in bpf_event_notify when op is neither COMING nor
>     GOING.
>   * check that kzalloc actually succeeded
> 
> I didn't try to check list_empty before taking the mutex since I want to avoid
> races between bpf_event_notify and bpf_get_raw_tracepoint.  Additionally,
> list_for_each_entry_safe is not strictly necessary upon MODULE_STATE_GOING, 
> but
> Alexei suggested I use it to protect against fragility if the subsequent 
> break;
> eventually disappears.
> 
>  include/linux/module.h       |  4 ++
>  include/linux/trace_events.h |  8 ++-
>  kernel/bpf/syscall.c         | 11 ++--
>  kernel/module.c              |  5 ++
>  kernel/trace/bpf_trace.c     | 99 +++++++++++++++++++++++++++++++++++-
>  5 files changed, 120 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/module.h b/include/linux/module.h
> index fce6b4335e36..5f147dd5e709 100644
> --- a/include/linux/module.h
> +++ b/include/linux/module.h
> @@ -432,6 +432,10 @@ struct module {
>       unsigned int num_tracepoints;
>       tracepoint_ptr_t *tracepoints_ptrs;
>  #endif
> +#ifdef CONFIG_BPF_EVENTS
> +     unsigned int num_bpf_raw_events;
> +     struct bpf_raw_event_map *bpf_raw_events;
> +#endif
>  #ifdef HAVE_JUMP_LABEL
>       struct jump_entry *jump_entries;
>       unsigned int num_jump_entries;
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index 4130a5497d40..8a62731673f7 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -471,7 +471,8 @@ void perf_event_detach_bpf_prog(struct perf_event *event);
>  int perf_event_query_prog_array(struct perf_event *event, void __user *info);
>  int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
>  int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog 
> *prog);
> -struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
> +struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name);
> +void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp);
>  int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
>                           u32 *fd_type, const char **buf,
>                           u64 *probe_offset, u64 *probe_addr);
> @@ -502,10 +503,13 @@ static inline int bpf_probe_unregister(struct 
> bpf_raw_event_map *btp, struct bpf
>  {
>       return -EOPNOTSUPP;
>  }
> -static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char 
> *name)
> +static inline struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char 
> *name)
>  {
>       return NULL;
>  }
> +static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
> +{
> +}
>  static inline int bpf_get_perf_event_info(const struct perf_event *event,
>                                         u32 *prog_id, u32 *fd_type,
>                                         const char **buf, u64 *probe_offset,
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 70fb11106fc2..754370e3155e 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -1609,6 +1609,7 @@ static int bpf_raw_tracepoint_release(struct inode 
> *inode, struct file *filp)
>               bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
>               bpf_prog_put(raw_tp->prog);
>       }
> +     bpf_put_raw_tracepoint(raw_tp->btp);
>       kfree(raw_tp);
>       return 0;
>  }
> @@ -1634,13 +1635,15 @@ static int bpf_raw_tracepoint_open(const union 
> bpf_attr *attr)
>               return -EFAULT;
>       tp_name[sizeof(tp_name) - 1] = 0;
>  
> -     btp = bpf_find_raw_tracepoint(tp_name);
> +     btp = bpf_get_raw_tracepoint(tp_name);
>       if (!btp)
>               return -ENOENT;
>  
>       raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
> -     if (!raw_tp)
> -             return -ENOMEM;
> +     if (!raw_tp) {
> +             err = -ENOMEM;
> +             goto out_put_btp;
> +     }
>       raw_tp->btp = btp;
>  
>       prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
> @@ -1668,6 +1671,8 @@ static int bpf_raw_tracepoint_open(const union bpf_attr 
> *attr)
>       bpf_prog_put(prog);
>  out_free_tp:
>       kfree(raw_tp);
> +out_put_btp:
> +     bpf_put_raw_tracepoint(btp);
>       return err;
>  }
>  
> diff --git a/kernel/module.c b/kernel/module.c
> index 49a405891587..06ec68f08387 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -3093,6 +3093,11 @@ static int find_module_sections(struct module *mod, 
> struct load_info *info)
>                                            sizeof(*mod->tracepoints_ptrs),
>                                            &mod->num_tracepoints);
>  #endif
> +#ifdef CONFIG_BPF_EVENTS
> +     mod->bpf_raw_events = section_objs(info, "__bpf_raw_tp_map",
> +                                        sizeof(*mod->bpf_raw_events),
> +                                        &mod->num_bpf_raw_events);
> +#endif
>  #ifdef HAVE_JUMP_LABEL
>       mod->jump_entries = section_objs(info, "__jump_table",
>                                       sizeof(*mod->jump_entries),
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 9864a35c8bb5..9ddb6fddb4e0 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -17,6 +17,43 @@
>  #include "trace_probe.h"
>  #include "trace.h"
>  
> +#ifdef CONFIG_MODULES
> +struct bpf_trace_module {
> +     struct module *module;
> +     struct list_head list;
> +};
> +
> +static LIST_HEAD(bpf_trace_modules);
> +static DEFINE_MUTEX(bpf_module_mutex);
> +
> +static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char 
> *name)
> +{
> +     struct bpf_raw_event_map *btp, *ret = NULL;
> +     struct bpf_trace_module *btm;
> +     unsigned int i;
> +
> +     mutex_lock(&bpf_module_mutex);
> +     list_for_each_entry(btm, &bpf_trace_modules, list) {
> +             for (i = 0; i < btm->module->num_bpf_raw_events; ++i) {
> +                     btp = &btm->module->bpf_raw_events[i];
> +                     if (!strcmp(btp->tp->name, name)) {
> +                             if (try_module_get(btm->module))
> +                                     ret = btp;
> +                             goto out;
> +                     }
> +             }
> +     }
> +out:
> +     mutex_unlock(&bpf_module_mutex);
> +     return ret;
> +}
> +#else
> +static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char 
> *name)
> +{
> +     return NULL;
> +}
> +#endif /* CONFIG_MODULES */
> +
>  u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
>  u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
>  
> @@ -1076,7 +1113,7 @@ int perf_event_query_prog_array(struct perf_event 
> *event, void __user *info)
>  extern struct bpf_raw_event_map __start__bpf_raw_tp[];
>  extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
>  
> -struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
> +struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name)
>  {
>       struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
>  
> @@ -1084,7 +1121,16 @@ struct bpf_raw_event_map 
> *bpf_find_raw_tracepoint(const char *name)
>               if (!strcmp(btp->tp->name, name))
>                       return btp;
>       }
> -     return NULL;
> +
> +     return bpf_get_raw_tracepoint_module(name);
> +}
> +
> +void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
> +{
> +     struct module *mod = __module_address((unsigned long)btp);
> +
> +     if (mod)
> +             module_put(mod);
>  }
>  
>  static __always_inline
> @@ -1222,3 +1268,52 @@ int bpf_get_perf_event_info(const struct perf_event 
> *event, u32 *prog_id,
>  
>       return err;
>  }
> +
> +#ifdef CONFIG_MODULES
> +int bpf_event_notify(struct notifier_block *nb, unsigned long op, void 
> *module)
> +{
> +     struct bpf_trace_module *btm, *tmp;
> +     struct module *mod = module;
> +
> +     if (mod->num_bpf_raw_events == 0 ||
> +         (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
> +             return 0;
> +
> +     mutex_lock(&bpf_module_mutex);
> +
> +     switch (op) {
> +     case MODULE_STATE_COMING:
> +             btm = kzalloc(sizeof(*btm), GFP_KERNEL);
> +             if (btm) {
> +                     btm->module = module;
> +                     list_add(&btm->list, &bpf_trace_modules);
> +             }
Is it fine to return 0 on !btm case?


Other looks good.

> +             break;
> +     case MODULE_STATE_GOING:
> +             list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) {
> +                     if (btm->module == module) {
> +                             list_del(&btm->list);
> +                             kfree(btm);
> +                             break;
> +                     }
> +             }
> +             break;
> +     }
> +
> +     mutex_unlock(&bpf_module_mutex);
> +
> +     return 0;
> +}
> +
> +static struct notifier_block bpf_module_nb = {
> +     .notifier_call = bpf_event_notify,
> +};
> +
> +int __init bpf_event_init(void)
> +{
> +     register_module_notifier(&bpf_module_nb);
> +     return 0;
> +}
> +
> +fs_initcall(bpf_event_init);
> +#endif /* CONFIG_MODULES */
> -- 
> 2.17.1
>

Re: [PATCH bpf-next v2] bpf: support raw tracepoints in modules

Reply via email to