Re: [PATCH v3 04/20] gendwarfksyms: Add address matching

2024-10-01 Thread Sami Tolvanen
On Tue, Oct 1, 2024 at 2:06 PM Petr Pavlu  wrote:
>
> On 9/23/24 20:18, Sami Tolvanen wrote:
> > - for_each(name, get_symbol, &sym);
> > + for_each(name, false, get_symbol, &sym);
> >   return sym;
> >  }
>
> What is the reason that the for_each() call in symbol_get() is invoked
> with name_only=false?

It was initially added to skip address checking when reading the
symbol list, but it's redundant since there are no addresses to check
at that point anyway. I think we can just drop the name_only argument
completely. I'll change this in v4.

> > + for (n = 1; n < nsyms; ++n) {
> > + const char *name = NULL;
> > + Elf32_Word xndx = 0;
> > + GElf_Sym sym_mem;
> > + GElf_Sym *sym;
> > +
> > + sym = gelf_getsymshndx(data, xndx_data, n,
> > +&sym_mem, &xndx);
>
> Please check for sym==NULL in case the file is malformed, e.g.
> .symtab_shndx is truncated.

Good catch, I'll add a check.

Sami



Re: [PATCH v3 04/20] gendwarfksyms: Add address matching

2024-10-01 Thread Petr Pavlu
On 9/23/24 20:18, Sami Tolvanen wrote:
> The compiler may choose not to emit type information in DWARF for all
> aliases, but it's possible for each alias to be exported separately.
> To ensure we find type information for the aliases as well, read
> {section, address} tuples from the symbol table and match symbols also
> by address.
> 
> Signed-off-by: Sami Tolvanen 
> ---
>  scripts/gendwarfksyms/gendwarfksyms.c |   2 +
>  scripts/gendwarfksyms/gendwarfksyms.h |  13 +++
>  scripts/gendwarfksyms/symbols.c   | 153 +-
>  3 files changed, 165 insertions(+), 3 deletions(-)
> 
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.c 
> b/scripts/gendwarfksyms/gendwarfksyms.c
> index 096a334fa5b3..5032ec487626 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.c
> +++ b/scripts/gendwarfksyms/gendwarfksyms.c
> @@ -105,6 +105,8 @@ int main(int argc, char **argv)
>   return -1;
>   }
>  
> + symbol_read_symtab(fd);
> +
>   dwfl = dwfl_begin(&callbacks);
>   if (!dwfl) {
>   error("dwfl_begin failed for '%s': %s", argv[n],
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.h 
> b/scripts/gendwarfksyms/gendwarfksyms.h
> index 1a10d18f178e..a058647e2361 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.h
> +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> @@ -66,14 +66,27 @@ extern int dump_dies;
>   * symbols.c
>   */
>  
> +static inline unsigned int addr_hash(uintptr_t addr)
> +{
> + return hash_ptr((const void *)addr);
> +}
> +
> +struct symbol_addr {
> + uint32_t section;
> + Elf64_Addr address;
> +};
> +
>  struct symbol {
>   const char *name;
> + struct symbol_addr addr;
> + struct hlist_node addr_hash;
>   struct hlist_node name_hash;
>  };
>  
>  typedef void (*symbol_callback_t)(struct symbol *, void *arg);
>  
>  void symbol_read_exports(FILE *file);
> +void symbol_read_symtab(int fd);
>  struct symbol *symbol_get(const char *name);
>  
>  /*
> diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
> index 1809be93d18c..d84b46675dd1 100644
> --- a/scripts/gendwarfksyms/symbols.c
> +++ b/scripts/gendwarfksyms/symbols.c
> @@ -6,9 +6,41 @@
>  #include "gendwarfksyms.h"
>  
>  #define SYMBOL_HASH_BITS 15
> +
> +/* struct symbol_addr -> struct symbol */
> +static HASHTABLE_DEFINE(symbol_addrs, 1 << SYMBOL_HASH_BITS);
> +/* name -> struct symbol */
>  static HASHTABLE_DEFINE(symbol_names, 1 << SYMBOL_HASH_BITS);
>  
> -static int for_each(const char *name, symbol_callback_t func, void *data)
> +static inline unsigned int symbol_addr_hash(const struct symbol_addr *addr)
> +{
> + return hash_32(addr->section ^ addr_hash(addr->address));
> +}
> +
> +static int __for_each_addr(struct symbol *sym, symbol_callback_t func,
> +void *data)
> +{
> + struct hlist_node *tmp;
> + struct symbol *match = NULL;
> + int processed = 0;
> +
> + hash_for_each_possible_safe(symbol_addrs, match, tmp, addr_hash,
> + symbol_addr_hash(&sym->addr)) {
> + if (match == sym)
> + continue; /* Already processed */
> +
> + if (match->addr.section == sym->addr.section &&
> + match->addr.address == sym->addr.address) {
> + func(match, data);
> + ++processed;
> + }
> + }
> +
> + return processed;
> +}
> +
> +static int for_each(const char *name, bool name_only, symbol_callback_t func,
> + void *data)
>  {
>   struct hlist_node *tmp;
>   struct symbol *match;
> @@ -21,9 +53,13 @@ static int for_each(const char *name, symbol_callback_t 
> func, void *data)
>   if (strcmp(match->name, name))
>   continue;
>  
> + /* Call func for the match, and all address matches */
>   if (func)
>   func(match, data);
>  
> + if (!name_only && match->addr.section != SHN_UNDEF)
> + return checkp(__for_each_addr(match, func, data)) + 1;
> +
>   return 1;
>   }
>  
> @@ -32,7 +68,7 @@ static int for_each(const char *name, symbol_callback_t 
> func, void *data)
>  
>  static bool is_exported(const char *name)
>  {
> - return checkp(for_each(name, NULL, NULL)) > 0;
> + return checkp(for_each(name, true, NULL, NULL)) > 0;
>  }
>  
>  void symbol_read_exports(FILE *file)
> @@ -55,6 +91,7 @@ void symbol_read_exports(FILE *file)
>  
>   sym = xcalloc(1, sizeof(struct symbol));
>   sym->name = name;
> + sym->addr.section = SHN_UNDEF;
>  
>   hash_add(symbol_names, &sym->name_hash, hash_str(sym->name));
>   ++nsym;
> @@ -77,6 +114,116 @@ struct symbol *symbol_get(const char *name)
>  {
>   struct symbol *sym = NULL;
>  
> - for_each(name, get_symbol, &sym);
> + for_each(name, false, get_symbol, &sym)