Currently __module_address() is using a linear search through all
modules in order to find the module corresponding to the provided
address. With a lot of modules this can take a lot of time.

One of the users of this is kernel_text_address() which is employed
in many stack unwinders; which in turn are used by perf-callchain and
ftrace (possibly from NMI context).

So by optimizing __module_address() we optimize many stack unwinders
which are used by both perf and tracing in performance sensitive code.

Cc: Mathieu Desnoyers <mathieu.desnoy...@efficios.com>
Cc: Oleg Nesterov <o...@redhat.com>
Cc: "Paul E. McKenney" <paul...@linux.vnet.ibm.com>
Cc: Rusty Russell <ru...@rustcorp.com.au>
Cc: Steven Rostedt <rost...@goodmis.org>
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
---
 include/linux/module.h |   22 ++++++++--
 kernel/module.c        |  107 ++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 121 insertions(+), 8 deletions(-)

--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -17,6 +17,7 @@
 #include <linux/moduleparam.h>
 #include <linux/jump_label.h>
 #include <linux/export.h>
+#include <linux/rbtree_latch.h>
 
 #include <linux/percpu.h>
 #include <asm/module.h>
@@ -269,8 +270,15 @@ struct module {
        /* Startup function. */
        int (*init)(void);
 
-       /* If this is non-NULL, vfree after init() returns */
-       void *module_init;
+       /*
+        * If this is non-NULL, vfree after init() returns.
+        *
+        * Cacheline align here, such that:
+        *   module_init, module_core, init_size, core_size,
+        *   init_text_size, core_text_size and ltn_core.node[0]
+        * are on the same cacheline.
+        */
+       void *module_init       ____cacheline_aligned;
 
        /* Here is the actual code + data, vfree'd on unload. */
        void *module_core;
@@ -281,6 +289,14 @@ struct module {
        /* The size of the executable code in each section.  */
        unsigned int init_text_size, core_text_size;
 
+       /*
+        * We rely on the order of these two entries; not only do we want
+        * core.node[0] to be in the same cacheline as the above entries,
+        * we also assume ltn_init has a higher address than ltn_core.
+        */
+       struct latch_tree_nodes ltn_core;
+       struct latch_tree_nodes ltn_init;
+
        /* Size of RO sections of the module (text+rodata) */
        unsigned int init_ro_size, core_ro_size;
 
@@ -361,7 +377,7 @@ struct module {
        ctor_fn_t *ctors;
        unsigned int num_ctors;
 #endif
-};
+} ____cacheline_aligned;
 #ifndef MODULE_ARCH_INIT
 #define MODULE_ARCH_INIT {}
 #endif
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -102,6 +102,99 @@
 DEFINE_MUTEX(module_mutex);
 EXPORT_SYMBOL_GPL(module_mutex);
 static LIST_HEAD(modules);
+
+/*
+ * Use a latched RB-tree for __module_address(); this allows us to use
+ * RCU-sched lookups of the address from any context.
+ *
+ * Because modules have two address ranges: init and core, we need two
+ * latch_tree_nodes entries. We use the order they appear in struct module to
+ * determine if we need to use the init or core values for the comparisons.
+ */
+
+static __always_inline unsigned long __mod_tree_val(struct latch_tree_node *n)
+{
+       struct module *mod = n->priv;
+
+       if (n >= mod->ltn_init.node)
+               return (unsigned long)mod->module_init;
+       else
+               return (unsigned long)mod->module_core;
+}
+
+static __always_inline unsigned long __mod_tree_size(struct latch_tree_node *n)
+{
+       struct module *mod = n->priv;
+
+       if (n >= mod->ltn_init.node)
+               return (unsigned long)mod->init_size;
+       else
+               return (unsigned long)mod->core_size;
+}
+
+static __always_inline bool
+mod_tree_less(struct latch_tree_node *a, struct latch_tree_node *b)
+{
+       return __mod_tree_val(a) < __mod_tree_val(b);
+}
+
+static __always_inline int
+mod_tree_comp(void *key, struct latch_tree_node *n)
+{
+       unsigned long val = (unsigned long)key;
+       unsigned long start, end;
+
+       end = start = __mod_tree_val(n);
+       end += __mod_tree_size(n);
+
+       if (val < start)
+               return -1;
+
+       if (val >= end)
+               return 1;
+
+       return 0;
+}
+
+static const struct latch_tree_ops mod_tree_ops = {
+       .less = mod_tree_less,
+       .comp = mod_tree_comp,
+};
+
+static struct latch_tree_root mod_tree __cacheline_aligned;
+
+/*
+ * These modifications: insert, remove_init and remove; are serialized by the
+ * module_mutex.
+ */
+static void mod_tree_insert(struct module *mod)
+{
+       latch_tree_insert(&mod->ltn_core, &mod_tree, mod, &mod_tree_ops);
+       if (mod->init_size)
+               latch_tree_insert(&mod->ltn_init, &mod_tree, mod, 
&mod_tree_ops);
+}
+
+static void mod_tree_remove_init(struct module *mod)
+{
+       if (mod->init_size)
+               latch_tree_erase(&mod->ltn_init, &mod_tree, &mod_tree_ops);
+}
+
+static void mod_tree_remove(struct module *mod)
+{
+       latch_tree_erase(&mod->ltn_core, &mod_tree, &mod_tree_ops);
+       mod_tree_remove_init(mod);
+}
+
+static struct module *mod_tree_find(unsigned long addr)
+{
+       struct latch_tree_node *ltn;
+
+       ltn = latch_tree_find((void *)addr, &mod_tree, &mod_tree_ops);
+
+       return ltn ? ltn->priv : NULL;
+}
+
 #ifdef CONFIG_KGDB_KDB
 struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
 #endif /* CONFIG_KGDB_KDB */
@@ -1876,6 +1969,7 @@ static void free_module(struct module *m
        mutex_lock(&module_mutex);
        /* Unlink carefully: kallsyms could be walking list. */
        list_del_rcu(&mod->list);
+       mod_tree_remove(mod);
        /* Remove this module from bug list, this uses list_del_rcu */
        module_bug_cleanup(mod);
        /* Wait for RCU-sched synchronizing before releasing mod->list and 
buglist. */
@@ -3120,6 +3214,7 @@ static noinline int do_init_module(struc
        mod->symtab = mod->core_symtab;
        mod->strtab = mod->core_strtab;
 #endif
+       mod_tree_remove_init(mod);
        unset_module_init_ro_nx(mod);
        module_arch_freeing_init(mod);
        mod->module_init = NULL;
@@ -3190,6 +3285,7 @@ static int add_unformed_module(struct mo
                goto out;
        }
        list_add_rcu(&mod->list, &modules);
+       mod_tree_insert(mod);
        err = 0;
 
 out:
@@ -3389,6 +3485,7 @@ static int load_module(struct load_info
        mutex_lock(&module_mutex);
        /* Unlink carefully: kallsyms could be walking list. */
        list_del_rcu(&mod->list);
+       mod_tree_remove(mod);
        wake_up_all(&module_wq);
        /* Wait for RCU-sched synchronizing before releasing mod->list. */
        synchronize_sched();
@@ -3833,13 +3930,13 @@ struct module *__module_address(unsigned
 
        module_assert_mutex_or_preempt();
 
-       list_for_each_entry_rcu(mod, &modules, list) {
+       mod = mod_tree_find(addr);
+       if (mod) {
+               BUG_ON(!within_module(addr, mod));
                if (mod->state == MODULE_STATE_UNFORMED)
-                       continue;
-               if (within_module(addr, mod))
-                       return mod;
+                       mod = NULL;
        }
-       return NULL;
+       return mod;
 }
 EXPORT_SYMBOL_GPL(__module_address);
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to