Distributions build drivers as modules, including network and filesystem
drivers which export numerous tracepoints.  This enables
bpf(BPF_RAW_TRACEPOINT_OPEN) to attach to those tracepoints.

Signed-off-by: Matt Mullins <mmull...@fb.com>
---
v1->v2:
  * avoid taking the mutex in bpf_event_notify when op is neither COMING nor
    GOING.
  * check that kzalloc actually succeeded

I didn't try to check list_empty before taking the mutex since I want to avoid
races between bpf_event_notify and bpf_get_raw_tracepoint.  Additionally,
list_for_each_entry_safe is not strictly necessary upon MODULE_STATE_GOING, but
Alexei suggested I use it to protect against fragility if the subsequent break;
eventually disappears.

 include/linux/module.h       |  4 ++
 include/linux/trace_events.h |  8 ++-
 kernel/bpf/syscall.c         | 11 ++--
 kernel/module.c              |  5 ++
 kernel/trace/bpf_trace.c     | 99 +++++++++++++++++++++++++++++++++++-
 5 files changed, 120 insertions(+), 7 deletions(-)

diff --git a/include/linux/module.h b/include/linux/module.h
index fce6b4335e36..5f147dd5e709 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -432,6 +432,10 @@ struct module {
        unsigned int num_tracepoints;
        tracepoint_ptr_t *tracepoints_ptrs;
 #endif
+#ifdef CONFIG_BPF_EVENTS
+       unsigned int num_bpf_raw_events;
+       struct bpf_raw_event_map *bpf_raw_events;
+#endif
 #ifdef HAVE_JUMP_LABEL
        struct jump_entry *jump_entries;
        unsigned int num_jump_entries;
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 4130a5497d40..8a62731673f7 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -471,7 +471,8 @@ void perf_event_detach_bpf_prog(struct perf_event *event);
 int perf_event_query_prog_array(struct perf_event *event, void __user *info);
 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
-struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
+struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name);
+void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp);
 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
                            u32 *fd_type, const char **buf,
                            u64 *probe_offset, u64 *probe_addr);
@@ -502,10 +503,13 @@ static inline int bpf_probe_unregister(struct 
bpf_raw_event_map *btp, struct bpf
 {
        return -EOPNOTSUPP;
 }
-static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char 
*name)
+static inline struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char 
*name)
 {
        return NULL;
 }
+static inline void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
+{
+}
 static inline int bpf_get_perf_event_info(const struct perf_event *event,
                                          u32 *prog_id, u32 *fd_type,
                                          const char **buf, u64 *probe_offset,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 70fb11106fc2..754370e3155e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1609,6 +1609,7 @@ static int bpf_raw_tracepoint_release(struct inode 
*inode, struct file *filp)
                bpf_probe_unregister(raw_tp->btp, raw_tp->prog);
                bpf_prog_put(raw_tp->prog);
        }
+       bpf_put_raw_tracepoint(raw_tp->btp);
        kfree(raw_tp);
        return 0;
 }
@@ -1634,13 +1635,15 @@ static int bpf_raw_tracepoint_open(const union bpf_attr 
*attr)
                return -EFAULT;
        tp_name[sizeof(tp_name) - 1] = 0;
 
-       btp = bpf_find_raw_tracepoint(tp_name);
+       btp = bpf_get_raw_tracepoint(tp_name);
        if (!btp)
                return -ENOENT;
 
        raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER);
-       if (!raw_tp)
-               return -ENOMEM;
+       if (!raw_tp) {
+               err = -ENOMEM;
+               goto out_put_btp;
+       }
        raw_tp->btp = btp;
 
        prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
@@ -1668,6 +1671,8 @@ static int bpf_raw_tracepoint_open(const union bpf_attr 
*attr)
        bpf_prog_put(prog);
 out_free_tp:
        kfree(raw_tp);
+out_put_btp:
+       bpf_put_raw_tracepoint(btp);
        return err;
 }
 
diff --git a/kernel/module.c b/kernel/module.c
index 49a405891587..06ec68f08387 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3093,6 +3093,11 @@ static int find_module_sections(struct module *mod, 
struct load_info *info)
                                             sizeof(*mod->tracepoints_ptrs),
                                             &mod->num_tracepoints);
 #endif
+#ifdef CONFIG_BPF_EVENTS
+       mod->bpf_raw_events = section_objs(info, "__bpf_raw_tp_map",
+                                          sizeof(*mod->bpf_raw_events),
+                                          &mod->num_bpf_raw_events);
+#endif
 #ifdef HAVE_JUMP_LABEL
        mod->jump_entries = section_objs(info, "__jump_table",
                                        sizeof(*mod->jump_entries),
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 9864a35c8bb5..9ddb6fddb4e0 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -17,6 +17,43 @@
 #include "trace_probe.h"
 #include "trace.h"
 
+#ifdef CONFIG_MODULES
+struct bpf_trace_module {
+       struct module *module;
+       struct list_head list;
+};
+
+static LIST_HEAD(bpf_trace_modules);
+static DEFINE_MUTEX(bpf_module_mutex);
+
+static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char 
*name)
+{
+       struct bpf_raw_event_map *btp, *ret = NULL;
+       struct bpf_trace_module *btm;
+       unsigned int i;
+
+       mutex_lock(&bpf_module_mutex);
+       list_for_each_entry(btm, &bpf_trace_modules, list) {
+               for (i = 0; i < btm->module->num_bpf_raw_events; ++i) {
+                       btp = &btm->module->bpf_raw_events[i];
+                       if (!strcmp(btp->tp->name, name)) {
+                               if (try_module_get(btm->module))
+                                       ret = btp;
+                               goto out;
+                       }
+               }
+       }
+out:
+       mutex_unlock(&bpf_module_mutex);
+       return ret;
+}
+#else
+static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char 
*name)
+{
+       return NULL;
+}
+#endif /* CONFIG_MODULES */
+
 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
@@ -1076,7 +1113,7 @@ int perf_event_query_prog_array(struct perf_event *event, 
void __user *info)
 extern struct bpf_raw_event_map __start__bpf_raw_tp[];
 extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
 
-struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
+struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name)
 {
        struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
 
@@ -1084,7 +1121,16 @@ struct bpf_raw_event_map *bpf_find_raw_tracepoint(const 
char *name)
                if (!strcmp(btp->tp->name, name))
                        return btp;
        }
-       return NULL;
+
+       return bpf_get_raw_tracepoint_module(name);
+}
+
+void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
+{
+       struct module *mod = __module_address((unsigned long)btp);
+
+       if (mod)
+               module_put(mod);
 }
 
 static __always_inline
@@ -1222,3 +1268,52 @@ int bpf_get_perf_event_info(const struct perf_event 
*event, u32 *prog_id,
 
        return err;
 }
+
+#ifdef CONFIG_MODULES
+int bpf_event_notify(struct notifier_block *nb, unsigned long op, void *module)
+{
+       struct bpf_trace_module *btm, *tmp;
+       struct module *mod = module;
+
+       if (mod->num_bpf_raw_events == 0 ||
+           (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
+               return 0;
+
+       mutex_lock(&bpf_module_mutex);
+
+       switch (op) {
+       case MODULE_STATE_COMING:
+               btm = kzalloc(sizeof(*btm), GFP_KERNEL);
+               if (btm) {
+                       btm->module = module;
+                       list_add(&btm->list, &bpf_trace_modules);
+               }
+               break;
+       case MODULE_STATE_GOING:
+               list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) {
+                       if (btm->module == module) {
+                               list_del(&btm->list);
+                               kfree(btm);
+                               break;
+                       }
+               }
+               break;
+       }
+
+       mutex_unlock(&bpf_module_mutex);
+
+       return 0;
+}
+
+static struct notifier_block bpf_module_nb = {
+       .notifier_call = bpf_event_notify,
+};
+
+int __init bpf_event_init(void)
+{
+       register_module_notifier(&bpf_module_nb);
+       return 0;
+}
+
+fs_initcall(bpf_event_init);
+#endif /* CONFIG_MODULES */
-- 
2.17.1

Reply via email to