Pierrick Bouvier <[email protected]> writes: > On 10/15/25 1:40 PM, Philippe Mathieu-Daudé wrote: >> On 13/10/25 23:39, Pierrick Bouvier wrote: >>> Enhance uftrace_symbols.py to generate .dbg files, containing >>> source location for every symbol present in .sym file. >>> It allows to use uftrace {replay,dump} --srcline and show origin of >> `uftrace {replay,dump} --srcline` >> >>> functions, connecting trace to original source code. >>> >>> It was first implemented with pyelftools DWARF parser, which was way >>> to slow (~minutes) to get locations for every symbol in the linux >> s/to/too/ >> >>> kernel. Thus, we use addr2line instead, which runs in seconds. >>> >>> As well, there were some bugs with latest pyelftools release, >>> requiring to run master version, which is not installable with pip. >>> Thus, since we now require binutils (addr2line), we can ditch pyelftools >>> based implementation and simply rely on nm to get symbols information, >> `nm` >> >>> which is faster and better. >>> >>> Signed-off-by: Pierrick Bouvier <[email protected]> >>> --- >>> contrib/plugins/uftrace_symbols.py | 108 +++++++++++++++++++---------- >>> 1 file changed, 72 insertions(+), 36 deletions(-) >>> >>> diff --git a/contrib/plugins/uftrace_symbols.py >>> b/contrib/plugins/uftrace_symbols.py >>> index b49e03203c8..728bf04ce54 100755 >>> --- a/contrib/plugins/uftrace_symbols.py >>> +++ b/contrib/plugins/uftrace_symbols.py >>> @@ -1,7 +1,7 @@ >>> #!/usr/bin/env python3 >>> # -*- coding: utf-8 -*- >>> # >>> -# Create symbols and mapping files for uftrace. >>> +# Create symbols, debug and mapping files for uftrace. >>> # >>> # Copyright 2025 Linaro Ltd >>> # Author: Pierrick Bouvier <[email protected]> >>> @@ -9,44 +9,71 @@ >>> # SPDX-License-Identifier: GPL-2.0-or-later >>> import argparse >>> -import elftools # pip install pyelftools >>> import os >>> +import subprocess >>> -from elftools.elf.elffile import ELFFile >>> -from elftools.elf.sections import SymbolTableSection >>> +class Symbol: >>> + def __init__(self, name, addr, size): >>> + self.name = name >>> + # clamp addr to 48 bits, like uftrace entries >>> + self.addr = addr & 0xffffffffffff >>> + self.full_addr = addr >>> + self.size = size >>> -def elf_func_symbols(elf): >>> - symbol_tables = [(idx, s) for idx, s in enumerate(elf.iter_sections()) >>> - if isinstance(s, SymbolTableSection)] >>> - symbols = [] >>> - for _, section in symbol_tables: >>> - for _, symbol in enumerate(section.iter_symbols()): >>> - if symbol_size(symbol) == 0: >>> - continue >>> - type = symbol['st_info']['type'] >>> - if type == 'STT_FUNC' or type == 'STT_NOTYPE': >>> - symbols.append(symbol) >>> - symbols.sort(key = lambda x: symbol_addr(x)) >>> + def set_loc(self, file, line): >>> + self.file = file >>> + self.line = line >>> + >>> +def get_symbols(elf_file): >>> + symbols=[] >>> + try: >>> + out = subprocess.check_output(['nm', '--print-size', elf_file], >>> + stderr=subprocess.STDOUT, >>> + text=True) >> Nitpicking, we might be using cross-compiled `nm`, so maybe not >> hardcode >> the binary name. >> >>> + except subprocess.CalledProcessError as e: >>> + print(e.output) >>> + raise >>> + out = out.strip().split('\n') >>> + for line in out: >>> + info = line.split(' ') >>> + if len(info) == 3: >>> + # missing size information >>> + continue >>> + addr, size, type, name = info >>> + # add only symbols from .text section >>> + if type.lower() != 't': >>> + continue >>> + addr = int(addr, 16) >>> + size = int(size, 16) >>> + symbols.append(Symbol(name, addr, size)) >>> + symbols.sort(key = lambda x: x.addr) >>> return symbols >>> -def symbol_size(symbol): >>> - return symbol['st_size'] >>> - >>> -def symbol_addr(symbol): >>> - addr = symbol['st_value'] >>> - # clamp addr to 48 bits, like uftrace entries >>> - return addr & 0xffffffffffff >>> - >>> -def symbol_name(symbol): >>> - return symbol.name >>> +def find_symbols_locations(elf_file, symbols): >>> + addresses = '\n'.join([hex(x.full_addr) for x in symbols]) >>> + try: >>> + out = subprocess.check_output(['addr2line', '--exe', elf_file], >> Ditto (cross compiled)? >> >>> + stderr=subprocess.STDOUT, >>> + input=addresses, text=True) >>> + except subprocess.CalledProcessError as e: >>> + print(e.output) >>> + raise >>> + out = out.strip().split('\n') >>> + assert len(out) == len(symbols) >>> + for i in range(len(symbols)): >>> + s = symbols[i] >>> + file, line = out[i].split(':') >>> + # addr2line may return 'line (discriminator [0-9]+)' sometimes, >>> + # remove this to keep only line number. >>> + line = line.split(' ')[0] >>> + s.set_loc(file, line) >>> class BinaryFile: >>> def __init__(self, path, map_offset): >>> self.fullpath = os.path.realpath(path) >>> self.map_offset = map_offset >>> - with open(path, 'rb') as f: >>> - self.elf = ELFFile(f) >>> - self.symbols = elf_func_symbols(self.elf) >>> + self.symbols = get_symbols(self.fullpath) >>> + find_symbols_locations(self.fullpath, self.symbols) >>> def path(self): >>> return self.fullpath >>> @@ -56,7 +83,7 @@ def addr_start(self): >>> def addr_end(self): >>> last_sym = self.symbols[-1] >>> - return symbol_addr(last_sym) + symbol_size(last_sym) + >>> self.map_offset >>> + return last_sym.addr + last_sym.size + self.map_offset >>> def generate_symbol_file(self, prefix_symbols): >>> binary_name = os.path.basename(self.fullpath) >>> @@ -66,14 +93,21 @@ def generate_symbol_file(self, prefix_symbols): >>> # print hexadecimal addresses on 48 bits >>> addrx = "0>12x" >>> for s in self.symbols: >>> - addr = symbol_addr(s) >>> + addr = s.addr >>> addr = f'{addr:{addrx}}' >>> - size = f'{symbol_size(s):{addrx}}' >>> - name = symbol_name(s) >>> + size = f'{s.size:{addrx}}' >>> if prefix_symbols: >>> - name = f'{binary_name}:{name}' >>> + name = f'{binary_name}:{s.name}' >>> print(addr, size, 'T', name, file=sym_file) >>> + def generate_debug_file(self): >>> + binary_name = os.path.basename(self.fullpath) >>> + dbg_file_path = f'./uftrace.data/{binary_name}.dbg' >> Prefer os.path.join(). >> >>> + with open(dbg_file_path, 'w') as dbg_file: >>> + for s in self.symbols: >>> + print(f'F: {hex(s.addr)} {s.name}', file=dbg_file) >>> + print(f'L: {s.line} {s.file}', file=dbg_file) >>> + >>> def parse_parameter(p): >>> s = p.split(":") >>> path = s[0] >>> @@ -84,7 +118,7 @@ def parse_parameter(p): >>> offset = s[1] >>> if not offset.startswith('0x'): >>> err = f'offset "{offset}" is not an hexadecimal constant. ' >>> - err += 'It should starts with "0x".' >>> + err += 'It should start with "0x".' >>> raise ValueError(err) >>> offset = int(offset, 16) >>> return path, offset >>> @@ -124,7 +158,8 @@ def generate_map(binaries): >>> def main(): >>> parser = argparse.ArgumentParser(description= >>> - 'generate symbol files for uftrace') >>> + 'generate symbol files for uftrace. ' >>> + 'Require binutils (nm and >>> addr2line).') >>> parser.add_argument('elf_file', nargs='+', >>> help='path to an ELF file. ' >>> 'Use /path/to/file:0xdeadbeef to add a mapping >>> offset.') >>> @@ -145,6 +180,7 @@ def main(): >>> for b in binaries: >>> b.generate_symbol_file(args.prefix_symbols) >>> + b.generate_debug_file() >>> generate_map(binaries) >>> >> No blocking comments: >> Reviewed-by: Philippe Mathieu-Daudé <[email protected]> >> > > Thanks Philippe. > > For the cross compiled tools, I'm not really sure it's worth making > this more complex. Having tooling for a cross architecture is an > advanced setup, and I think it's fair to expect someone to have > binutils installed if they have any cross compiler and cross binutils. > Plus, they can always create a symlink if needed. > > Alex, can you eventually integrate the (other) cosmetic changes? > If not, I can send a new patch if you prefer.
If you could send a v2 that would be great. > > Thanks, > Pierrick -- Alex Bennée Virtualisation Tech Lead @ Linaro
