Module symbol lookup via find_kallsyms_symbol() performs a linear scan
over the entire symtab when resolving an address. The number of symbols
in module symtabs has grown over the years, largely due to additional
metadata in non-standard sections, making this lookup very slow.

Improve this by separating function symbols during module load, placing
them at the beginning of the symtab, sorting them by address, and using
binary search when resolving addresses in module text.

This also should improve times for linear symbol name lookups, as valid
function symbols are now located at the beginning of the symtab.

The cost of sorting is small relative to module load time. In repeated
module load tests [1], depending on .config options, this change
increases load time between 2% and 4%. With cold caches, the difference
is not measurable, as memory access latency dominates.

The sorting theoretically could be done in compile time, but much more
complicated as we would have to simulate kernel addresses resolution
for symbols, and then correct relocation entries. That would be risky
if get out of sync.

The improvement can be observed when listing ftrace filter functions:

root@nano:~# time cat /sys/kernel/tracing/available_filter_functions | wc -l
74908

real    0m1.315s
user    0m0.000s
sys     0m1.312s

After:

root@nano:~# time cat /sys/kernel/tracing/available_filter_functions | wc -l
74911

real    0m0.167s
user    0m0.004s
sys     0m0.175s

(there are three more symbols introduced by the patch)

For livepatch modules, the symtab layout is preserved and the existing
linear search is used. For this case, it should be possible to keep
the original ELF symtab instead of copying it 1:1, but that is outside
the scope of this patch.

Link: https://gist.github.com/sgruszka/09f3fb1dad53a97b1aad96e1927ab117 [1]
Signed-off-by: Stanislaw Gruszka <[email protected]>
---
 include/linux/module.h   |   6 ++
 kernel/module/internal.h |   1 +
 kernel/module/kallsyms.c | 169 ++++++++++++++++++++++++++++-----------
 3 files changed, 131 insertions(+), 45 deletions(-)

diff --git a/include/linux/module.h b/include/linux/module.h
index ac254525014c..4da0289fba02 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -379,8 +379,14 @@ struct module_memory {
 struct mod_kallsyms {
        Elf_Sym *symtab;
        unsigned int num_symtab;
+       unsigned int num_func_syms;
        char *strtab;
        char *typetab;
+
+       unsigned int (*search)(struct mod_kallsyms *kallsyms,
+                              struct module_memory *mod_mem,
+                              unsigned long addr, unsigned long *bestval,
+                              unsigned long *nextval);
 };
 
 #ifdef CONFIG_LIVEPATCH
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index 618202578b42..6a4d498619b1 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -73,6 +73,7 @@ struct load_info {
        bool sig_ok;
 #ifdef CONFIG_KALLSYMS
        unsigned long mod_kallsyms_init_off;
+       unsigned long num_func_syms;
 #endif
 #ifdef CONFIG_MODULE_DECOMPRESS
 #ifdef CONFIG_MODULE_STATS
diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c
index 0fc11e45df9b..9e131baae441 100644
--- a/kernel/module/kallsyms.c
+++ b/kernel/module/kallsyms.c
@@ -10,6 +10,7 @@
 #include <linux/kallsyms.h>
 #include <linux/buildid.h>
 #include <linux/bsearch.h>
+#include <linux/sort.h>
 #include "internal.h"
 
 /* Lookup exported symbol in given range of kernel_symbols */
@@ -103,6 +104,95 @@ static bool is_core_symbol(const Elf_Sym *src, const 
Elf_Shdr *sechdrs,
        return true;
 }
 
+static inline bool is_func_symbol(const Elf_Sym *sym)
+{
+       return sym->st_shndx != SHN_UNDEF && sym->st_size != 0 &&
+              ELF_ST_TYPE(sym->st_info) == STT_FUNC;
+}
+
+static unsigned int bsearch_kallsyms_symbol(struct mod_kallsyms *kallsyms,
+                                           struct module_memory *mod_mem,
+                                           unsigned long addr,
+                                           unsigned long *bestval,
+                                           unsigned long *nextval)
+
+{
+       unsigned int mid, low = 1, high = kallsyms->num_func_syms + 1;
+       unsigned int best = 0;
+       unsigned long thisval;
+
+       while (low < high) {
+               mid = low + (high - low) / 2;
+               thisval = kallsyms_symbol_value(&kallsyms->symtab[mid]);
+
+               if (thisval <= addr) {
+                       *bestval = thisval;
+                       best = mid;
+                       low = mid + 1;
+               } else {
+                       *nextval = thisval;
+                       high = mid;
+               }
+       }
+
+       return best;
+}
+
+static const char *kallsyms_symbol_name(struct mod_kallsyms *kallsyms,
+                                       unsigned int symnum)
+{
+       return kallsyms->strtab + kallsyms->symtab[symnum].st_name;
+}
+
+static unsigned int search_kallsyms_symbol(struct mod_kallsyms *kallsyms,
+                                          struct module_memory *mod_mem,
+                                          unsigned long addr,
+                                          unsigned long *bestval,
+                                          unsigned long *nextval)
+{
+       unsigned int i, best = 0;
+
+       /*
+        * Scan for closest preceding symbol, and next symbol. (ELF
+        * starts real symbols at 1).
+        */
+       for (i = 1; i < kallsyms->num_symtab; i++) {
+               const Elf_Sym *sym = &kallsyms->symtab[i];
+               unsigned long thisval = kallsyms_symbol_value(sym);
+
+               if (sym->st_shndx == SHN_UNDEF)
+                       continue;
+
+               /*
+                * We ignore unnamed symbols: they're uninformative
+                * and inserted at a whim.
+                */
+               if (*kallsyms_symbol_name(kallsyms, i) == '\0' ||
+                   is_mapping_symbol(kallsyms_symbol_name(kallsyms, i)))
+                       continue;
+
+               if (thisval <= addr && thisval > *bestval) {
+                       best = i;
+                       *bestval = thisval;
+               }
+               if (thisval > addr && thisval < *nextval)
+                       *nextval = thisval;
+       }
+
+       return best;
+}
+
+static int elf_sym_cmp(const void *a, const void *b)
+{
+       const Elf_Sym *sym_a = a;
+       const Elf_Sym *sym_b = b;
+
+       if (sym_a->st_value < sym_b->st_value)
+               return -1;
+
+       return sym_a->st_value > sym_b->st_value;
+}
+
 /*
  * We only allocate and copy the strings needed by the parts of symtab
  * we keep.  This is simple, but has the effect of making multiple
@@ -115,9 +205,10 @@ void layout_symtab(struct module *mod, struct load_info 
*info)
        Elf_Shdr *symsect = info->sechdrs + info->index.sym;
        Elf_Shdr *strsect = info->sechdrs + info->index.str;
        const Elf_Sym *src;
-       unsigned int i, nsrc, ndst, strtab_size = 0;
+       unsigned int i, nsrc, ndst, nfunc, strtab_size = 0;
        struct module_memory *mod_mem_data = &mod->mem[MOD_DATA];
        struct module_memory *mod_mem_init_data = &mod->mem[MOD_INIT_DATA];
+       bool is_lp_mod = is_livepatch_module(mod);
 
        /* Put symbol section at end of init part of module. */
        symsect->sh_flags |= SHF_ALLOC;
@@ -129,12 +220,14 @@ void layout_symtab(struct module *mod, struct load_info 
*info)
        nsrc = symsect->sh_size / sizeof(*src);
 
        /* Compute total space required for the core symbols' strtab. */
-       for (ndst = i = 0; i < nsrc; i++) {
-               if (i == 0 || is_livepatch_module(mod) ||
+       for (ndst = nfunc = i = 0; i < nsrc; i++) {
+               if (i == 0 || is_lp_mod ||
                    is_core_symbol(src + i, info->sechdrs, info->hdr->e_shnum,
                                   info->index.pcpu)) {
                        strtab_size += strlen(&info->strtab[src[i].st_name]) + 
1;
                        ndst++;
+                       if (!is_lp_mod && is_func_symbol(src + i))
+                               nfunc++;
                }
        }
 
@@ -156,6 +249,7 @@ void layout_symtab(struct module *mod, struct load_info 
*info)
        mod_mem_init_data->size = ALIGN(mod_mem_init_data->size,
                                        __alignof__(struct mod_kallsyms));
        info->mod_kallsyms_init_off = mod_mem_init_data->size;
+       info->num_func_syms = nfunc;
 
        mod_mem_init_data->size += sizeof(struct mod_kallsyms);
        info->init_typeoffs = mod_mem_init_data->size;
@@ -169,7 +263,7 @@ void layout_symtab(struct module *mod, struct load_info 
*info)
  */
 void add_kallsyms(struct module *mod, const struct load_info *info)
 {
-       unsigned int i, ndst;
+       unsigned int i, di, nfunc, ndst;
        const Elf_Sym *src;
        Elf_Sym *dst;
        char *s;
@@ -178,6 +272,7 @@ void add_kallsyms(struct module *mod, const struct 
load_info *info)
        void *data_base = mod->mem[MOD_DATA].base;
        void *init_data_base = mod->mem[MOD_INIT_DATA].base;
        struct mod_kallsyms *kallsyms;
+       bool is_lp_mod = is_livepatch_module(mod);
 
        kallsyms = init_data_base + info->mod_kallsyms_init_off;
 
@@ -186,6 +281,7 @@ void add_kallsyms(struct module *mod, const struct 
load_info *info)
        /* Make sure we get permanent strtab: don't use info->strtab. */
        kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
        kallsyms->typetab = init_data_base + info->init_typeoffs;
+       kallsyms->search = search_kallsyms_symbol;
 
        /*
         * Now populate the cut down core kallsyms for after init
@@ -194,19 +290,30 @@ void add_kallsyms(struct module *mod, const struct 
load_info *info)
        mod->core_kallsyms.symtab = dst = data_base + info->symoffs;
        mod->core_kallsyms.strtab = s = data_base + info->stroffs;
        mod->core_kallsyms.typetab = data_base + info->core_typeoffs;
+       mod->core_kallsyms.search = is_lp_mod ? search_kallsyms_symbol :
+                                               bsearch_kallsyms_symbol;
+
        strtab_size = info->core_typeoffs - info->stroffs;
        src = kallsyms->symtab;
-       for (ndst = i = 0; i < kallsyms->num_symtab; i++) {
+       ndst = info->num_func_syms + 1;
+
+       for (nfunc = i = 0; i < kallsyms->num_symtab; i++) {
                kallsyms->typetab[i] = elf_type(src + i, info);
-               if (i == 0 || is_livepatch_module(mod) ||
+               if (i == 0 || is_lp_mod ||
                    is_core_symbol(src + i, info->sechdrs, info->hdr->e_shnum,
                                   info->index.pcpu)) {
                        ssize_t ret;
 
-                       mod->core_kallsyms.typetab[ndst] =
-                               kallsyms->typetab[i];
-                       dst[ndst] = src[i];
-                       dst[ndst++].st_name = s - mod->core_kallsyms.strtab;
+                       if (i == 0)
+                               di = 0;
+                       else if (!is_lp_mod && is_func_symbol(src + i))
+                               di = 1 + nfunc++;
+                       else
+                               di = ndst++;
+
+                       mod->core_kallsyms.typetab[di] = kallsyms->typetab[i];
+                       dst[di] = src[i];
+                       dst[di].st_name = s - mod->core_kallsyms.strtab;
                        ret = strscpy(s, &kallsyms->strtab[src[i].st_name],
                                      strtab_size);
                        if (ret < 0)
@@ -216,9 +323,13 @@ void add_kallsyms(struct module *mod, const struct 
load_info *info)
                }
        }
 
+       WARN_ON_ONCE(nfunc != info->num_func_syms);
+       sort(dst + 1, nfunc, sizeof(Elf_Sym), elf_sym_cmp, NULL);
+
        /* Set up to point into init section. */
        rcu_assign_pointer(mod->kallsyms, kallsyms);
        mod->core_kallsyms.num_symtab = ndst;
+       mod->core_kallsyms.num_func_syms = nfunc;
 }
 
 #if IS_ENABLED(CONFIG_STACKTRACE_BUILD_ID)
@@ -241,11 +352,6 @@ void init_build_id(struct module *mod, const struct 
load_info *info)
 }
 #endif
 
-static const char *kallsyms_symbol_name(struct mod_kallsyms *kallsyms, 
unsigned int symnum)
-{
-       return kallsyms->strtab + kallsyms->symtab[symnum].st_name;
-}
-
 /*
  * Given a module and address, find the corresponding symbol and return its 
name
  * while providing its size and offset if needed.
@@ -255,7 +361,7 @@ static const char *find_kallsyms_symbol(struct module *mod,
                                        unsigned long *size,
                                        unsigned long *offset)
 {
-       unsigned int i, best = 0;
+       unsigned int best;
        unsigned long nextval, bestval;
        struct mod_kallsyms *kallsyms = rcu_dereference(mod->kallsyms);
        struct module_memory *mod_mem;
@@ -267,36 +373,9 @@ static const char *find_kallsyms_symbol(struct module *mod,
                mod_mem = &mod->mem[MOD_TEXT];
 
        nextval = (unsigned long)mod_mem->base + mod_mem->size;
+       bestval = kallsyms_symbol_value(&kallsyms->symtab[0]);
 
-       bestval = kallsyms_symbol_value(&kallsyms->symtab[best]);
-
-       /*
-        * Scan for closest preceding symbol, and next symbol. (ELF
-        * starts real symbols at 1).
-        */
-       for (i = 1; i < kallsyms->num_symtab; i++) {
-               const Elf_Sym *sym = &kallsyms->symtab[i];
-               unsigned long thisval = kallsyms_symbol_value(sym);
-
-               if (sym->st_shndx == SHN_UNDEF)
-                       continue;
-
-               /*
-                * We ignore unnamed symbols: they're uninformative
-                * and inserted at a whim.
-                */
-               if (*kallsyms_symbol_name(kallsyms, i) == '\0' ||
-                   is_mapping_symbol(kallsyms_symbol_name(kallsyms, i)))
-                       continue;
-
-               if (thisval <= addr && thisval > bestval) {
-                       best = i;
-                       bestval = thisval;
-               }
-               if (thisval > addr && thisval < nextval)
-                       nextval = thisval;
-       }
-
+       best = kallsyms->search(kallsyms, mod_mem, addr, &bestval, &nextval);
        if (!best)
                return NULL;
 
-- 
2.50.1


Reply via email to