This is an automated email from the ASF dual-hosted git repository. xiaoxiang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-nuttx.git
commit 87772796e3463472cd62f261e688f17cb8665fbd Author: Lingao Meng <[email protected]> AuthorDate: Wed Dec 1 14:09:20 2021 +0800 tools: Add size report script Add a resource statistics script, which can be used to analyze the resource occupation of ELF files, including BSS, data, ROM, etc. Signed-off-by: Lingao Meng <[email protected]> LICENSE: Add size_report to license file Declare license for intel Corporation. Signed-off-by: Lingao Meng <[email protected]> --- LICENSE | 1 + tools/size_report | 803 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 804 insertions(+) diff --git a/LICENSE b/LICENSE index 6762630..3f095af 100644 --- a/LICENSE +++ b/LICENSE @@ -2397,6 +2397,7 @@ libs/libc/math/lib_copysignf.c drivers/wireless/bluetooth/bt_uart.c drivers/wireless/bluetooth/bt_uart.h wireless/bluetooth +tools/size_report =========================== Copyright (c) 2016, Intel Corporation All rights reserved. diff --git a/tools/size_report b/tools/size_report new file mode 100755 index 0000000..7b4ae7d --- /dev/null +++ b/tools/size_report @@ -0,0 +1,803 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2016, 2020 Intel Corporation +# +# SPDX-License-Identifier: Apache-2.0 + +# Based on a script by: +# Chereau, Fabien <[email protected]> + +""" +Process an ELF file to generate size report on RAM and ROM. +""" + +import argparse +import os +import sys +import re +from pathlib import Path +import json + +from packaging import version + +from colorama import init, Fore + +from anytree import RenderTree, NodeMixin, findall_by_attr +from anytree.exporter import DictExporter + +import elftools +from elftools.elf.elffile import ELFFile +from elftools.elf.sections import SymbolTableSection +from elftools.dwarf.descriptions import describe_form_class +from elftools.dwarf.descriptions import ( + describe_DWARF_expr, set_global_machine_arch) +from elftools.dwarf.locationlists import ( + LocationExpr, LocationParser) + +if version.parse(elftools.__version__) < version.parse('0.24'): + sys.exit("pyelftools is out of date, need version 0.24 or later") + + +# ELF section flags +SHF_WRITE = 0x1 +SHF_ALLOC = 0x2 +SHF_EXEC = 0x4 +SHF_WRITE_ALLOC = SHF_WRITE | SHF_ALLOC +SHF_ALLOC_EXEC = SHF_ALLOC | SHF_EXEC + +DT_LOCATION = re.compile(r"\(DW_OP_addr: ([0-9a-f]+)\)") + +SRC_FILE_EXT = ('.h', '.c', '.hpp', '.cpp', '.hxx', '.cxx', '.c++') + + +def get_symbol_addr(sym): + """Get the address of a symbol""" + return sym['st_value'] + + +def get_symbol_size(sym): + """Get the size of a symbol""" + return sym['st_size'] + + +def is_symbol_in_ranges(sym, ranges): + """ + Given a list of start/end addresses, test if the symbol + lies within any of these address ranges. + """ + for bound in ranges: + if bound['start'] <= sym['st_value'] <= bound['end']: + return True + + return False + + +def get_die_mapped_address(die, parser, dwarfinfo): + """Get the bounding addresses from a DIE variable or subprogram""" + low = None + high = None + + if die.tag == 'DW_TAG_variable': + if 'DW_AT_location' in die.attributes: + loc_attr = die.attributes['DW_AT_location'] + if parser.attribute_has_location(loc_attr, die.cu['version']): + loc = parser.parse_from_attribute(loc_attr, die.cu['version']) + if isinstance(loc, LocationExpr): + addr = describe_DWARF_expr(loc.loc_expr, + dwarfinfo.structs) + + matcher = DT_LOCATION.match(addr) + if matcher: + low = int(matcher.group(1), 16) + high = low + 1 + + if die.tag == 'DW_TAG_subprogram': + if 'DW_AT_low_pc' in die.attributes: + low = die.attributes['DW_AT_low_pc'].value + + high_pc = die.attributes['DW_AT_high_pc'] + high_pc_class = describe_form_class(high_pc.form) + if high_pc_class == 'address': + high = high_pc.value + elif high_pc_class == 'constant': + high = low + high_pc.value + + return low, high + + +def match_symbol_address(symlist, die, parser, dwarfinfo): + """ + Find the symbol from a symbol list + where it matches the address in DIE variable, + or within the range of a DIE subprogram. + """ + low, high = get_die_mapped_address(die, parser, dwarfinfo) + + if low is None: + return None + + for sym in symlist: + if low <= sym['symbol']['st_value'] < high: + return sym + + return None + + +def get_symbols(elf, addr_ranges): + """ + Fetch the symbols from the symbol table and put them + into ROM, RAM buckets. + """ + rom_syms = dict() + ram_syms = dict() + unassigned_syms = dict() + + rom_addr_ranges = addr_ranges['rom'] + ram_addr_ranges = addr_ranges['ram'] + + for section in elf.iter_sections(): + if isinstance(section, SymbolTableSection): + for sym in section.iter_symbols(): + # Ignore symbols with size == 0 + if get_symbol_size(sym) == 0: + continue + + found_sec = False + entry = {'name': sym.name, + 'symbol': sym, + 'mapped_files': set()} + + # If symbol is in ROM area? + if is_symbol_in_ranges(sym, rom_addr_ranges): + if sym.name not in rom_syms: + rom_syms[sym.name] = list() + rom_syms[sym.name].append(entry) + found_sec = True + + # If symbol is in RAM area? + if is_symbol_in_ranges(sym, ram_addr_ranges): + if sym.name not in ram_syms: + ram_syms[sym.name] = list() + ram_syms[sym.name].append(entry) + found_sec = True + + if not found_sec: + unassigned_syms['sym_name'] = entry + + ret = {'rom': rom_syms, + 'ram': ram_syms, + 'unassigned': unassigned_syms} + return ret + + +def get_section_ranges(elf): + """ + Parse ELF header to find out the address ranges of ROM or RAM sections + and their total sizes. + """ + rom_addr_ranges = list() + ram_addr_ranges = list() + rom_size = 0 + ram_size = 0 + + for section in elf.iter_sections(): + size = section['sh_size'] + sec_start = section['sh_addr'] + sec_end = sec_start + size - 1 + bound = {'start': sec_start, 'end': sec_end} + + if section['sh_type'] == 'SHT_NOBITS': + # BSS and noinit sections + ram_addr_ranges.append(bound) + ram_size += size + elif section['sh_type'] == 'SHT_PROGBITS': + # Sections to be in flash or memory + flags = section['sh_flags'] + if (flags & SHF_ALLOC_EXEC) == SHF_ALLOC_EXEC: + # Text section + rom_addr_ranges.append(bound) + rom_size += size + elif (flags & SHF_WRITE_ALLOC) == SHF_WRITE_ALLOC: + # Data occupies both ROM and RAM + # since at boot, content is copied from ROM to RAM + rom_addr_ranges.append(bound) + rom_size += size + + ram_addr_ranges.append(bound) + ram_size += size + elif (flags & SHF_ALLOC) == SHF_ALLOC: + # Read only data + rom_addr_ranges.append(bound) + rom_size += size + + ret = {'rom': rom_addr_ranges, + 'rom_total_size': rom_size, + 'ram': ram_addr_ranges, + 'ram_total_size': ram_size} + return ret + + +def get_die_filename(die, lineprog): + """Get the source code filename associated with a DIE""" + file_index = die.attributes['DW_AT_decl_file'].value + file_entry = lineprog['file_entry'][file_index - 1] + + dir_index = file_entry['dir_index'] + if dir_index == 0: + filename = file_entry.name + else: + directory = lineprog.header['include_directory'][dir_index - 1] + filename = os.path.join(directory, file_entry.name) + + path = Path(filename.decode()) + + # Prepend output path to relative path + if not path.is_absolute(): + output = Path(args.output) + path = output.joinpath(path) + + # Change path to relative to Zephyr base + try: + path = path.resolve() + except OSError as e: + # built-ins can't be resolved, so it's not an issue + if '<built-in>' not in str(path): + raise e + + return path + + +def do_simple_name_matching(elf, symbol_dict, processed): + """ + Sequentially process DIEs in compiler units with direct file mappings + within the DIEs themselves, and do simply matching between DIE names + and symbol names. + """ + mapped_symbols = processed['mapped_symbols'] + mapped_addresses = processed['mapped_addr'] + unmapped_symbols = processed['unmapped_symbols'] + newly_mapped_syms = set() + + dwarfinfo = elf.get_dwarf_info() + location_lists = dwarfinfo.location_lists() + location_parser = LocationParser(location_lists) + + unmapped_dies = set() + + # Loop through all compile units + for compile_unit in dwarfinfo.iter_CUs(): + lineprog = dwarfinfo.line_program_for_CU(compile_unit) + if lineprog is None: + continue + + # Loop through each DIE and find variables and + # subprograms (i.e. functions) + for die in compile_unit.iter_DIEs(): + sym_name = None + + # Process variables + if die.tag == 'DW_TAG_variable': + # DW_AT_declaration + + # having 'DW_AT_location' means this maps + # to an actual address (e.g. not an extern) + if 'DW_AT_location' in die.attributes: + sym_name = die.get_full_path() + + # Process subprograms (i.e. functions) if they are valid + if die.tag == 'DW_TAG_subprogram': + # Refer to another DIE for name + if ('DW_AT_abstract_origin' in die.attributes) or ( + 'DW_AT_specification' in die.attributes): + unmapped_dies.add(die) + + # having 'DW_AT_low_pc' means it maps to + # an actual address + elif 'DW_AT_low_pc' in die.attributes: + # DW_AT_low_pc == 0 is a weak function + # which has been overriden + if die.attributes['DW_AT_low_pc'].value != 0: + sym_name = die.get_full_path() + + # For mangled function names, the linkage name + # is what appears in the symbol list + if 'DW_AT_linkage_name' in die.attributes: + linkage = die.attributes['DW_AT_linkage_name'] + sym_name = linkage.value.decode() + + if sym_name is not None: + # Skip DIE with no reference back to a file + if not 'DW_AT_decl_file' in die.attributes: + continue + + is_die_mapped = False + if sym_name in symbol_dict: + mapped_symbols.add(sym_name) + symlist = symbol_dict[sym_name] + symbol = match_symbol_address(symlist, die, + location_parser, + dwarfinfo) + + if symbol is not None: + symaddr = symbol['symbol']['st_value'] + if symaddr not in mapped_addresses: + is_die_mapped = True + path = get_die_filename(die, lineprog) + symbol['mapped_files'].add(path) + mapped_addresses.add(symaddr) + newly_mapped_syms.add(sym_name) + + if not is_die_mapped: + unmapped_dies.add(die) + + mapped_symbols = mapped_symbols.union(newly_mapped_syms) + unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms) + + processed['mapped_symbols'] = mapped_symbols + processed['mapped_addr'] = mapped_addresses + processed['unmapped_symbols'] = unmapped_symbols + processed['unmapped_dies'] = unmapped_dies + + +def mark_address_aliases(symbol_dict, processed): + """ + Mark symbol aliases as already mapped to prevent + double counting. + + There are functions and variables which are aliases to + other functions/variables. So this marks them as mapped + so they will not get counted again when a tree is being + built for display. + """ + mapped_symbols = processed['mapped_symbols'] + mapped_addresses = processed['mapped_addr'] + unmapped_symbols = processed['unmapped_symbols'] + already_mapped_syms = set() + + for ums in unmapped_symbols: + for one_sym in symbol_dict[ums]: + symbol = one_sym['symbol'] + if symbol['st_value'] in mapped_addresses: + already_mapped_syms.add(ums) + + mapped_symbols = mapped_symbols.union(already_mapped_syms) + unmapped_symbols = unmapped_symbols.difference(already_mapped_syms) + + processed['mapped_symbols'] = mapped_symbols + processed['mapped_addr'] = mapped_addresses + processed['unmapped_symbols'] = unmapped_symbols + + +def do_address_range_matching(elf, symbol_dict, processed): + """ + Match symbols indirectly using address ranges. + + This uses the address ranges of DIEs and map them to symbols + residing within those ranges, and works on DIEs that have not + been mapped in previous steps. This works on symbol names + that do not match the names in DIEs, e.g. "<func>" in DIE, + but "<func>.constprop.*" in symbol name list. This also + helps with mapping the mangled function names in C++, + since the names in DIE are actual function names in source + code and not mangled version of them. + """ + if 'unmapped_dies' not in processed: + return + + mapped_symbols = processed['mapped_symbols'] + mapped_addresses = processed['mapped_addr'] + unmapped_symbols = processed['unmapped_symbols'] + newly_mapped_syms = set() + + dwarfinfo = elf.get_dwarf_info() + location_lists = dwarfinfo.location_lists() + location_parser = LocationParser(location_lists) + + unmapped_dies = processed['unmapped_dies'] + + # Group DIEs by compile units + cu_list = dict() + + for die in unmapped_dies: + cu = die.cu + if cu not in cu_list: + cu_list[cu] = {'dies': set()} + cu_list[cu]['dies'].add(die) + + # Loop through all compile units + for cu in cu_list: + lineprog = dwarfinfo.line_program_for_CU(cu) + + # Map offsets from DIEs + offset_map = dict() + for die in cu.iter_DIEs(): + offset_map[die.offset] = die + + for die in cu_list[cu]['dies']: + if not die.tag == 'DW_TAG_subprogram': + continue + + path = None + + # Has direct reference to file, so use it + if 'DW_AT_decl_file' in die.attributes: + path = get_die_filename(die, lineprog) + + # Loop through indirect reference until a direct + # reference to file is found + if ('DW_AT_abstract_origin' in die.attributes) or ( + 'DW_AT_specification' in die.attributes): + die_ptr = die + while path is None: + if not (die_ptr.tag == 'DW_TAG_subprogram') or not ( + ('DW_AT_abstract_origin' in die_ptr.attributes) or + ('DW_AT_specification' in die_ptr.attributes)): + break + + if 'DW_AT_abstract_origin' in die_ptr.attributes: + ofname = 'DW_AT_abstract_origin' + elif 'DW_AT_specification' in die_ptr.attributes: + ofname = 'DW_AT_specification' + + offset = die_ptr.attributes[ofname].value + offset += die_ptr.cu.cu_offset + + # There is nothing to reference so no need to continue + if offset not in offset_map: + break + + die_ptr = offset_map[offset] + if 'DW_AT_decl_file' in die_ptr.attributes: + path = get_die_filename(die_ptr, lineprog) + + # Nothing to map + if path is not None: + low, high = get_die_mapped_address(die, location_parser, + dwarfinfo) + if low is None: + continue + + for ums in unmapped_symbols: + for one_sym in symbol_dict[ums]: + symbol = one_sym['symbol'] + symaddr = symbol['st_value'] + + if symaddr not in mapped_addresses: + if low <= symaddr < high: + one_sym['mapped_files'].add(path) + mapped_addresses.add(symaddr) + newly_mapped_syms.add(ums) + + mapped_symbols = mapped_symbols.union(newly_mapped_syms) + unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms) + + processed['mapped_symbols'] = mapped_symbols + processed['mapped_addr'] = mapped_addresses + processed['unmapped_symbols'] = unmapped_symbols + + +def set_root_path_for_unmapped_symbols(symbol_dict, addr_range, processed): + """ + Set root path for unmapped symbols. + + Any unmapped symbols are added under the root node if those + symbols reside within the desired memory address ranges + (e.g. ROM or RAM). + """ + mapped_symbols = processed['mapped_symbols'] + mapped_addresses = processed['mapped_addr'] + unmapped_symbols = processed['unmapped_symbols'] + newly_mapped_syms = set() + + for ums in unmapped_symbols: + for one_sym in symbol_dict[ums]: + symbol = one_sym['symbol'] + symaddr = symbol['st_value'] + + if is_symbol_in_ranges(symbol, addr_range): + if symaddr not in mapped_addresses: + path = Path(':') + one_sym['mapped_files'].add(path) + mapped_addresses.add(symaddr) + newly_mapped_syms.add(ums) + + mapped_symbols = mapped_symbols.union(newly_mapped_syms) + unmapped_symbols = unmapped_symbols.difference(newly_mapped_syms) + + processed['mapped_symbols'] = mapped_symbols + processed['mapped_addr'] = mapped_addresses + processed['unmapped_symbols'] = unmapped_symbols + +def find_common_path_prefix(symbol_dict): + """ + Find the common path prefix of all mapped files. + Must be called before set_root_path_for_unmapped_symbols(). + """ + paths = list() + + for _, sym in symbol_dict.items(): + for symbol in sym: + for file in symbol['mapped_files']: + paths.append(file) + + return os.path.commonpath(paths) + + +class TreeNode(NodeMixin): + """ + A symbol node. + """ + + def __init__(self, name, identifier, size=0, parent=None, children=None): + super().__init__() + self.name = name + self.size = size + self.parent = parent + self.identifier = identifier + if children: + self.children = children + + def __repr__(self): + return self.name + + +def sum_node_children_size(node): + """ + Calculate the sum of symbol size of all direct children. + """ + size = 0 + + for child in node.children: + size += child.size + + return size + + +def generate_any_tree(symbol_dict, total_size, path_prefix): + """ + Generate a symbol tree for output. + """ + root = TreeNode('Root', "root") + node_no_paths = TreeNode('(no paths)', ":", parent=root) + + if Path(path_prefix) == Path(args.zephyrbase): + # All source files are under ZEPHYR_BASE so there is + # no need for another level. + node_zephyr_base = root + node_output_dir = root + node_workspace = root + node_others = root + else: + node_zephyr_base = TreeNode('ZEPHYR_BASE', args.zephyrbase) + node_output_dir = TreeNode('OUTPUT_DIR', args.output) + node_others = TreeNode("/", "/") + + if args.workspace: + node_workspace = TreeNode('WORKSPACE', args.workspace) + else: + node_workspace = node_others + + # A set of helper function for building a simple tree with a path-like + # hierarchy. + def _insert_one_elem(root, path, size): + cur = None + node = None + parent = root + for part in path.parts: + if cur is None: + cur = part + else: + cur = str(Path(cur, part)) + + results = findall_by_attr(root, cur, name="identifier") + if results: + item = results[0] + item.size += size + parent = item + else: + if node: + parent = node + node = TreeNode(name=str(part), identifier=cur, size=size, parent=parent) + + # Mapping paths to tree nodes + path_node_map = [ + [Path(args.zephyrbase), node_zephyr_base], + [Path(args.output), node_output_dir], + ] + + if args.workspace: + path_node_map.append( + [Path(args.workspace), node_workspace] + ) + + for name, sym in symbol_dict.items(): + for symbol in sym: + size = get_symbol_size(symbol['symbol']) + for file in symbol['mapped_files']: + path = Path(file, name) + if path.is_absolute(): + has_node = False + + for one_path in path_node_map: + if one_path[0] in path.parents: + path = path.relative_to(one_path[0]) + dest_node = one_path[1] + has_node = True + break + + if not has_node: + dest_node = node_others + else: + dest_node = node_no_paths + + _insert_one_elem(dest_node, path, size) + + + if node_zephyr_base is not root: + # ZEPHYR_BASE and OUTPUT_DIR nodes don't have sum of symbol size + # so calculate them here. + node_zephyr_base.size = sum_node_children_size(node_zephyr_base) + node_output_dir.size = sum_node_children_size(node_output_dir) + + # Find out which nodes need to be in the tree. + # "(no path)", ZEPHYR_BASE nodes are essential. + children = [node_no_paths, node_zephyr_base] + if node_output_dir.height != 0: + # OUTPUT_DIR may be under ZEPHYR_BASE. + children.append(node_output_dir) + if node_others.height != 0: + # Only include "others" node if there is something. + children.append(node_others) + + if args.workspace: + node_workspace.size = sum_node_children_size(node_workspace) + if node_workspace.height != 0: + children.append(node_workspace) + + root.children = children + + root.size = total_size + + # Need to account for code and data where there are not emitted + # symbols associated with them. + node_hidden_syms = TreeNode('(hidden)', "(hidden)", parent=root) + node_hidden_syms.size = root.size - sum_node_children_size(root) + + return root + + +def node_sort(items): + """ + Node sorting used with RenderTree. + """ + return sorted(items, key=lambda item: item.name) + + +def print_any_tree(root, total_size, depth): + """ + Print the symbol tree. + """ + print('{:101s} {:7s} {:8s}'.format( + Fore.YELLOW + "Path", "Size", "%" + Fore.RESET)) + print('=' * 110) + for row in RenderTree(root, childiter=node_sort, maxlevel=depth): + f = len(row.pre) + len(row.node.name) + s = str(row.node.size).rjust(100-f) + percent = 100 * float(row.node.size) / float(total_size) + + cc = cr = "" + if not row.node.children: + if row.node.name != "(hidden)": + cc = Fore.CYAN + cr = Fore.RESET + elif row.node.name.endswith(SRC_FILE_EXT): + cc = Fore.GREEN + cr = Fore.RESET + + print(f"{row.pre}{cc}{row.node.name} {s} {cr}{Fore.BLUE}{percent:6.2f}%{Fore.RESET}") + print('=' * 110) + print(f'{total_size:>101}') + + +def parse_args(): + """ + Parse command line arguments. + """ + global args + + parser = argparse.ArgumentParser() + + parser.add_argument("-k", "--kernel", required=True, + help="Zephyr ELF binary") + parser.add_argument("-z", "--zephyrbase", required=True, + help="Zephyr base path") + parser.add_argument("-q", "--quiet", action="store_true", + help="Do not output anything on the screen.") + parser.add_argument("-o", "--output", required=True, + help="Output path") + parser.add_argument("-w", "--workspace", default=None, + help="Workspace path (Usually the same as WEST_TOPDIR)") + parser.add_argument("target", choices=['rom', 'ram', 'all']) + parser.add_argument("-d", "--depth", dest="depth", + type=int, default=None, + help="How deep should we go into the tree", + metavar="DEPTH") + parser.add_argument("-v", "--verbose", action="store_true", + help="Print extra debugging information") + parser.add_argument("--json", help="store results in a JSON file.") + args = parser.parse_args() + + +def main(): + """ + Main program. + """ + parse_args() + + # Init colorama + init() + + assert os.path.exists(args.kernel), "{0} does not exist.".format(args.kernel) + if args.target == 'ram': + targets = ['ram'] + elif args.target == 'rom': + targets = ['rom'] + elif args.target == 'all': + targets = ['rom', 'ram'] + + for t in targets: + + elf = ELFFile(open(args.kernel, "rb")) + + assert elf.has_dwarf_info(), "ELF file has no DWARF information" + + set_global_machine_arch(elf.get_machine_arch()) + + addr_ranges = get_section_ranges(elf) + + symbols = get_symbols(elf, addr_ranges) + + for sym in symbols['unassigned'].values(): + print("WARN: Symbol '{0}' is not in RAM or ROM".format(sym['name'])) + + symbol_dict = None + + if args.json: + jsonout = args.json + else: + jsonout = os.path.join(args.output, f'{t}.json') + + symbol_dict = symbols[t] + symsize = addr_ranges[f'{t}_total_size'] + ranges = addr_ranges[t] + + if symbol_dict is not None: + processed = {"mapped_symbols": set(), + "mapped_addr": set(), + "unmapped_symbols": set(symbol_dict.keys())} + + do_simple_name_matching(elf, symbol_dict, processed) + mark_address_aliases(symbol_dict, processed) + do_address_range_matching(elf, symbol_dict, processed) + mark_address_aliases(symbol_dict, processed) + common_path_prefix = find_common_path_prefix(symbol_dict) + set_root_path_for_unmapped_symbols(symbol_dict, ranges, processed) + + if args.verbose: + for sym in processed['unmapped_symbols']: + print("INFO: Unmapped symbol: {0}".format(sym)) + + root = generate_any_tree(symbol_dict, symsize, common_path_prefix) + if not args.quiet: + print_any_tree(root, symsize, args.depth) + + exporter = DictExporter() + data = dict() + data["symbols"] = exporter.export(root) + data["total_size"] = symsize + with open(jsonout, "w") as fp: + json.dump(data, fp, indent=4) + + +if __name__ == "__main__": + main()
