From: Peter Zijlstra <[email protected]>

kallsym_tree is based on rbtree_latch. It is designed to hold dynamic
kernel symbols like bpf program, ftrace kallsyms, etc.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Song Liu <[email protected]>
---
 include/linux/kallsyms.h |  16 ++++
 kernel/extable.c         |   2 +
 kernel/kallsyms.c        | 188 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 205 insertions(+), 1 deletion(-)

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 657a83b943f0..be83ac3d8228 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -11,6 +11,8 @@
 #include <linux/stddef.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/rbtree_latch.h>
+#include <uapi/linux/perf_event.h>
 
 #include <asm/sections.h>
 
@@ -20,6 +22,20 @@
 
 struct module;
 
+struct kallsym_node
+{
+       struct latch_tree_node kn_node;
+       unsigned long kn_addr;
+       unsigned long kn_len;
+       enum perf_record_ksymbol_type ksym_type;
+       void (*kn_names)(struct kallsym_node *kn, char *sym_name, char 
**mod_name);
+};
+
+extern void kallsym_tree_add(struct kallsym_node *kn);
+extern void kallsym_tree_del(struct kallsym_node *kn);
+
+extern bool is_kallsym_tree_text_address(unsigned long addr);
+
 static inline int is_kernel_inittext(unsigned long addr)
 {
        if (addr >= (unsigned long)_sinittext
diff --git a/kernel/extable.c b/kernel/extable.c
index 6a5b61ebc66c..5271e9b649b1 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -145,6 +145,8 @@ int kernel_text_address(unsigned long addr)
 
        if (is_module_text_address(addr))
                goto out;
+       if (is_kallsym_tree_text_address(addr))
+               goto out;
        if (is_ftrace_trampoline(addr))
                goto out;
        if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr))
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 14934afa9e68..30611a5379fd 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -24,6 +24,8 @@
 #include <linux/filter.h>
 #include <linux/ftrace.h>
 #include <linux/compiler.h>
+#include <linux/spinlock.h>
+#include <linux/perf_event.h>
 
 /*
  * These will be re-linked against their real values
@@ -48,6 +50,165 @@ extern const u16 kallsyms_token_index[] __weak;
 
 extern const unsigned int kallsyms_markers[] __weak;
 
+static DEFINE_SPINLOCK(kallsym_lock);
+static struct latch_tree_root kallsym_tree __cacheline_aligned;
+
+static __always_inline unsigned long
+kallsym_node_addr(struct latch_tree_node *node)
+{
+       struct kallsym_node *kn;
+
+       kn = container_of(node, struct kallsym_node, kn_node);
+       return kn->kn_addr;
+}
+
+static __always_inline bool kallsym_tree_less(struct latch_tree_node *a,
+                                             struct latch_tree_node *b)
+{
+       return kallsym_node_addr(a) < kallsym_node_addr(b);
+}
+
+static __always_inline int kallsym_tree_comp(void *key,
+                                            struct latch_tree_node *n)
+{
+       unsigned long val = (unsigned long)key;
+       unsigned long sym_start, sym_end;
+       const struct kallsym_node *kn;
+
+       kn = container_of(n, struct kallsym_node, kn_node);
+       sym_start = kn->kn_addr;
+       sym_end = sym_start + kn->kn_len;
+
+       if (val < sym_start)
+               return -1;
+       if (val >= sym_end)
+               return 1;
+
+       return 0;
+}
+
+static const struct latch_tree_ops kallsym_tree_ops = {
+       .less = kallsym_tree_less,
+       .comp = kallsym_tree_comp,
+};
+
+void kallsym_tree_add(struct kallsym_node *kn)
+{
+       char namebuf[KSYM_NAME_LEN] = "";
+       char *modname = NULL;
+
+       spin_lock_irq(&kallsym_lock);
+       latch_tree_insert(&kn->kn_node, &kallsym_tree, &kallsym_tree_ops);
+       spin_unlock_irq(&kallsym_lock);
+
+       kn->kn_names(kn, namebuf, &modname);
+
+       if (modname) {
+               int len = strlen(namebuf);
+
+               snprintf(namebuf + len, sizeof(namebuf) - len, " [%s]", 
modname);
+       }
+
+       perf_event_ksymbol(kn->ksym_type, kn->kn_addr, kn->kn_len, false, 
namebuf);
+}
+
+void kallsym_tree_del(struct kallsym_node *kn)
+{
+       char namebuf[KSYM_NAME_LEN] = "";
+       char *modname = NULL;
+
+       kn->kn_names(kn, namebuf, &modname);
+
+       if (modname) {
+               int len = strlen(namebuf);
+
+               snprintf(namebuf + len, sizeof(namebuf) - len, " [%s]", 
modname);
+       }
+
+       perf_event_ksymbol(kn->ksym_type, kn->kn_addr, kn->kn_len, true, 
namebuf);
+
+       spin_lock_irq(&kallsym_lock);
+       latch_tree_erase(&kn->kn_node, &kallsym_tree, &kallsym_tree_ops);
+       spin_unlock_irq(&kallsym_lock);
+}
+
+static struct kallsym_node *kallsym_tree_find(unsigned long addr)
+{
+       struct kallsym_node *kn = NULL;
+       struct latch_tree_node *n;
+
+       n = latch_tree_find((void *)addr, &kallsym_tree, &kallsym_tree_ops);
+       if (n)
+               kn = container_of(n, struct kallsym_node, kn_node);
+
+       return kn;
+}
+
+static char *kallsym_tree_address_lookup(unsigned long addr, unsigned long 
*size,
+                                        unsigned long *off, char **modname, 
char *sym)
+{
+       struct kallsym_node *kn;
+       char *ret = NULL;
+
+       rcu_read_lock();
+       kn = kallsym_tree_find(addr);
+       if (kn) {
+               kn->kn_names(kn, sym, modname);
+
+               ret = sym;
+               if (size)
+                       *size = kn->kn_len;
+               if (off)
+                       *off = addr - kn->kn_addr;
+       }
+       rcu_read_unlock();
+
+       return ret;
+}
+
+bool is_kallsym_tree_text_address(unsigned long addr)
+{
+       bool ret;
+
+       rcu_read_lock();
+       ret = kallsym_tree_find(addr) != NULL;
+       rcu_read_unlock();
+
+       return ret;
+}
+
+static int kallsym_tree_kallsym(unsigned int symnum, unsigned long *value, 
char *type,
+                               char *sym, char *modname, int *exported)
+{
+       struct latch_tree_node *ltn;
+       int i, ret = -ERANGE;
+
+       rcu_read_lock();
+       for (i = 0, ltn = latch_tree_first(&kallsym_tree); i < symnum && ltn;
+            i++, ltn = latch_tree_next(&kallsym_tree, ltn))
+               ;
+
+       if (ltn) {
+               struct kallsym_node *kn;
+               char *mod;
+
+               kn = container_of(ltn, struct kallsym_node, kn_node);
+
+               kn->kn_names(kn, sym, &mod);
+               if (mod)
+                       strlcpy(modname, mod, MODULE_NAME_LEN);
+               else
+                       modname[0] = '\0';
+
+               *type = 't';
+               *exported = 0;
+               ret = 0;
+       }
+       rcu_read_unlock();
+
+       return ret;
+}
+
 /*
  * Expand a compressed symbol data into the resulting uncompressed string,
  * if uncompressed string is too long (>= maxlen), it will be truncated,
@@ -265,6 +426,7 @@ int kallsyms_lookup_size_offset(unsigned long addr, 
unsigned long *symbolsize,
        if (is_ksym_addr(addr))
                return !!get_symbol_pos(addr, symbolsize, offset);
        return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf) 
||
+              !!kallsym_tree_address_lookup(addr, symbolsize, offset, NULL, 
namebuf) ||
               !!__bpf_address_lookup(addr, symbolsize, offset, namebuf);
 }
 
@@ -300,6 +462,10 @@ const char *kallsyms_lookup(unsigned long addr,
        /* See if it's in a module or a BPF JITed image. */
        ret = module_address_lookup(addr, symbolsize, offset,
                                    modname, namebuf);
+       if (!ret)
+               ret = kallsym_tree_address_lookup(addr, symbolsize,
+                                                 offset, modname, namebuf);
+
        if (!ret)
                ret = bpf_address_lookup(addr, symbolsize,
                                         offset, modname, namebuf);
@@ -434,6 +600,7 @@ struct kallsym_iter {
        loff_t pos;
        loff_t pos_arch_end;
        loff_t pos_mod_end;
+       loff_t pos_tree_end;
        loff_t pos_ftrace_mod_end;
        unsigned long value;
        unsigned int nameoff; /* If iterating in core kernel symbols. */
@@ -478,9 +645,24 @@ static int get_ksymbol_mod(struct kallsym_iter *iter)
        return 1;
 }
 
+static int get_ksymbol_tree(struct kallsym_iter *iter)
+{
+       int ret = kallsym_tree_kallsym(iter->pos - iter->pos_mod_end,
+                                      &iter->value, &iter->type,
+                                      iter->name, iter->module_name,
+                                      &iter->exported);
+
+       if (ret < 0) {
+               iter->pos_tree_end = iter->pos;
+               return 0;
+       }
+
+       return 1;
+}
+
 static int get_ksymbol_ftrace_mod(struct kallsym_iter *iter)
 {
-       int ret = ftrace_mod_get_kallsym(iter->pos - iter->pos_mod_end,
+       int ret = ftrace_mod_get_kallsym(iter->pos - iter->pos_tree_end,
                                         &iter->value, &iter->type,
                                         iter->name, iter->module_name,
                                         &iter->exported);
@@ -545,6 +727,10 @@ static int update_iter_mod(struct kallsym_iter *iter, 
loff_t pos)
            get_ksymbol_mod(iter))
                return 1;
 
+       if ((!iter->pos_tree_end || iter->pos_tree_end > pos) &&
+           get_ksymbol_tree(iter))
+               return 1;
+
        if ((!iter->pos_ftrace_mod_end || iter->pos_ftrace_mod_end > pos) &&
            get_ksymbol_ftrace_mod(iter))
                return 1;
-- 
2.17.1

Reply via email to