This is an automated email from the ASF dual-hosted git repository. simbit18 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nuttx.git
commit adddfc3bfda337fa71975407e1165e29467200aa Author: anjiahao <[email protected]> AuthorDate: Fri Nov 8 09:38:25 2024 +0800 abi_check.py:Check ABI compatibility between different ELF versions. 1. The input consists of multiple static libraries and an ELF file. The tool searches for external APIs used by the static libraries, then locates these API function signatures in the ELF file, and outputs the results as a JSON file. 2. Using the first feature, with the static libraries unchanged, the tool can take a new ELF file and an old ELF file as input, output two JSON files, and compare the function signatures of functions with the same name in the two JSON files. The comparison includes return values, parameters, and if they are structures, it also compares the structure size, member offsets, member types, etc. 3.When the input is a single ELF file, the tool can check if structures with the same name have different members. Signed-off-by: anjiahao <[email protected]> --- tools/abi_check.py | 594 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 594 insertions(+) diff --git a/tools/abi_check.py b/tools/abi_check.py new file mode 100755 index 00000000000..148c06f62ca --- /dev/null +++ b/tools/abi_check.py @@ -0,0 +1,594 @@ +#!/usr/bin/env python3 +# tools/abi_check.py +# +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. The +# ASF licenses this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance with the +# License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# + +import argparse +import json +import os +import re +import subprocess +import sys +import tempfile +from itertools import zip_longest + +from elftools.elf.elffile import ELFFile + +program_description = """ +This tool is used to check the binary compatibility of static libraries and has the following features: + 1. The input consists of multiple static libraries and an ELF file. The tool searches + for external APIs used by the static libraries, then locates these API function signatures + in the ELF file, and outputs the results as a JSON file. + 2. Using the first feature, with the static libraries unchanged, + the tool can take a new ELF file and an old ELF file as input, output two JSON files, + and compare the function signatures of functions with the same name in the two JSON files. + The comparison includes return values, parameters, and if they are structures, + it also compares the structure size, member offsets, member types, etc. + 3.When the input is a single ELF file, the tool can check if structures with the same name have different members. +""" + +struct_re = re.compile(r"struct\s+(\w+)\s+{") +member_re = re.compile(r"{(.*?)};", re.DOTALL) +file_re = re.compile(r"/\*\s*<[\da-f]+>\s+([\w\/\.\-_]+\.h:\d+)\s*\*/", re.DOTALL) + + +def member_mismatch(member1, member2): + member1 = member1.replace("_Bool", "bool") + member1 = member1.replace(" ", "") + member2 = member2.replace("_Bool", "bool") + member2 = member2.replace(" ", "") + + return member1 != member2 + + +def print_struct(name, member, fileinfo): + print(f"struct {name} {{") + print(member) + print(f"}}; at {fileinfo}") + + +def struct_check(elf): + p = subprocess.Popen( + ["pahole", "-M", "--sort", "-I", elf], + env=os.environ.copy(), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + out, err = p.communicate() + if p.returncode != 0: + print(f"Error: {err}") + sys.exit(1) + + struct_blocks = re.split(r"/\* Used at:", out) + structs = {} + for block in struct_blocks: + name_match = struct_re.search(block) + if not name_match: + continue + + struct_name = name_match.group(1) + if not struct_name: + continue + + member_match = member_re.search(block) + if not member_match: + continue + + file_match = file_re.search(block) + if not file_match: + continue + + members = member_match.group(1) + fileinfo = file_match.group(1) + if struct_name in structs and member_mismatch(structs[struct_name][0], members): + print_struct(struct_name, structs[struct_name][0], structs[struct_name][1]) + print("------") + print_struct(struct_name, members, fileinfo) + print("") + + else: + structs[struct_name] = (members, fileinfo) + + +def objfile_iter(path): + """Get .obj file from .a file""" + + temp_dir = tempfile.mkdtemp() + for p in path: + subprocess.run(["ar", "x", p], cwd=temp_dir) + + for filename in os.listdir(temp_dir): + yield os.path.join(temp_dir, filename) + + +def symbol_collect(path): + """get undefined and defined symbols from an object file""" + + obj = ELFFile.load_from_path(path) + symtab = obj.get_section_by_name(".symtab") + if symtab is None: + return (None, None) + + undef = [] + defined = [] + + for symbol in symtab.iter_symbols(): + if ( + symbol["st_info"]["type"] == "STT_NOTYPE" + and symbol["st_info"]["bind"] == "STB_GLOBAL" + ): + undef.append(symbol.name) + else: + defined.append(symbol.name) + + return undef, defined + + +def resolve_type(dwarfinfo, die): + """Get type name and size from DIE""" + + if die is None: + return ("void", 0) + + type_size = attr.value if (attr := die.attributes.get("DW_AT_byte_size")) else 0 + + def get_name(die, prefix="", suffix="", default="void", size=0): + name = die.attributes.get("DW_AT_name") + return ( + f"{prefix}{name.value.decode('utf-8')}{suffix}" if name else default, + size, + ) + + if die.tag == "DW_TAG_base_type": + return get_name(die, size=type_size) + + if die.tag == "DW_TAG_pointer_type": + base_type_die = resolve_referenced_die(dwarfinfo, die) + if base_type_die: + name, size = resolve_type(dwarfinfo, base_type_die) + return (f"{name}*", type_size) + else: + return ("void*", type_size) + + if die.tag == "DW_TAG_typedef": + base_type_die = resolve_referenced_die(dwarfinfo, die) + _, size = resolve_type(dwarfinfo, base_type_die) + + return get_name(die, default="unknown", size=size) + + if die.tag == "DW_TAG_structure_type": + return get_name( + die, prefix="struct ", default="anonymous struct", size=type_size + ) + + if die.tag == "DW_TAG_union_type": + return get_name(die, prefix="union ", default="anonymous union", size=type_size) + + if die.tag == "DW_TAG_enumeration_type": + return get_name(die, prefix="enum ", default="anonymous enum", size=type_size) + + if die.tag == "DW_TAG_subroutine_type": + base_type_die = resolve_referenced_die(dwarfinfo, die) + return resolve_type(dwarfinfo, base_type_die) + + if die.tag == "DW_TAG_array_type": + base_type_die = resolve_referenced_die(dwarfinfo, die) + name, _ = resolve_type(dwarfinfo, base_type_die) + size = 0 + for child in die.iter_children(): + if ( + child.tag == "DW_TAG_subrange_type" + and "DW_AT_upper_bound" in child.attributes + ): + size = child.attributes["DW_AT_upper_bound"].value + 1 + break + + return (f"{name}[{size}]", size) + + if die.tag == "DW_TAG_const_type": + base_type_die = resolve_referenced_die(dwarfinfo, die) + name, size = resolve_type(dwarfinfo, base_type_die) + + return (f"const {name}", size) + + return ("void", type_size) + + +def resolve_referenced_die(dwarfinfo, die): + """Get the referenced DIE""" + + if "DW_AT_type" in die.attributes: + type_offset = die.attributes["DW_AT_type"].value + cu_offset = die.cu.cu_offset + absolute_offset = type_offset + cu_offset + return dwarfinfo.get_DIE_from_refaddr(absolute_offset) + elif "DW_AT_specification" in die.attributes: + type_offset = die.attributes["DW_AT_specification"].value + cu_offset = die.cu.cu_offset + absolute_offset = type_offset + cu_offset + return dwarfinfo.get_DIE_from_refaddr(absolute_offset()) + + return None + + +def resolve_field_info(dwarfinfo, die): + """Get field information from DIE""" + + if die.tag != "DW_TAG_member": + return None + + field_name = die.attributes.get("DW_AT_name", None) + field_name = field_name.value.decode("utf-8") + field_type_die = resolve_referenced_die(dwarfinfo, die) + field_type, field_size = resolve_type(dwarfinfo, field_type_die) + field_offset = die.attributes.get("DW_AT_data_member_location", None) + field_offset = field_offset.value if field_offset else "unknown" + + return { + "name": field_name, + "type": field_type, + "size": field_size, + "offset": field_offset, + } + + +def find_combination_die(dwarfinfo, die): + if die.tag == "DW_TAG_structure_type" or die.tag == "DW_TAG_structure_type": + return die + + if "DW_AT_type" in die.attributes: + base_type_die = resolve_referenced_die(dwarfinfo, die) + return find_combination_die(dwarfinfo, base_type_die) + + return None + + +def resolve_combination_type(dwarfinfo, die): + combination_die = find_combination_die(dwarfinfo, die) + if not combination_die: + return [] + + field_info = [] + for child in combination_die.iter_children(): + if child.tag == "DW_TAG_member": + field_info.append(resolve_field_info(dwarfinfo, child)) + + return field_info + + +def get_die_file_path(die, dwarfinfo): + cu = die.cu + name = cu.get_top_DIE().attributes.get("DW_AT_name") + return name.value.decode("utf-8") if name else "unknown file" + + +def die_is_prototyped(die): + return ( + die.tag == "DW_TAG_subprogram" + and "DW_AT_external" in die.attributes + and "DW_AT_name" in die.attributes + and "DW_AT_prototyped" in die.attributes + ) + + +def die_is_real_function(die): + return ( + die.tag == "DW_TAG_subprogram" + and "DW_AT_external" in die.attributes + and "DW_AT_name" in die.attributes + and ("DW_AT_low_pc" in die.attributes or "DW_AT_inline" in die.attributes) + ) + + +def find_function_signature(elf_path, function_list, die_check): + + signature = [] + function_list = function_list.copy() + with open(elf_path, "rb") as f: + elffile = ELFFile(f) + if not elffile.has_dwarf_info(): + print("No DWARF debug info found in the ELF file.") + return (signature, function_list) + + dwarf_info = elffile.get_dwarf_info() + + for CU in dwarf_info.iter_CUs(): + for die in (die for die in CU.iter_DIEs() if die_check(die)): + name = die.attributes["DW_AT_name"].value.decode("utf-8") + if "DW_AT_linkage_name" in die.attributes: + if name not in function_list: + name = die.attributes["DW_AT_linkage_name"].value.decode( + "utf-8" + ) + + if name not in function_list: + continue + + prototype = {} + + return_type = "void" + return_type_size = 0 + retrun_type_field = [] + + if "DW_AT_type" in die.attributes: + return_type_ref = resolve_referenced_die(dwarf_info, die) + return_type, return_type_size = resolve_type( + dwarf_info, return_type_ref + ) + retrun_type_field = resolve_combination_type( + dwarf_info, return_type_ref + ) + + prototype["return"] = { + "type": return_type, + "size": return_type_size, + "field": retrun_type_field, + } + prototype["parameters"] = [] + for child in die.iter_children(): + if child.tag != "DW_TAG_formal_parameter": + continue + + param_type_ref = resolve_referenced_die(dwarf_info, child) + param_type, param_type_size = resolve_type( + dwarf_info, param_type_ref + ) + param_name = ( + child.attributes["DW_AT_name"].value.decode("utf-8") + if "DW_AT_name" in child.attributes + else "unnamed" + ) + + param_type_field = resolve_combination_type( + dwarf_info, param_type_ref + ) + prototype["parameters"].append( + { + "name": param_name, + "type": param_type, + "size": param_type_size, + "field": param_type_field, + } + ) + + prototype["file_path"] = get_die_file_path(die, dwarf_info) + signature.append((name, prototype)) + function_list.remove(name) + + dwarf_info._cu_cache.clear() + dwarf_info._cu_offsets_map.clear() + + return (signature, function_list) + + +def print_function_signature(function_name, prototype): + print(f"{prototype['return']['type']} {function_name} (", end="") + for i in range(len(prototype["parameters"])): + print( + f"{prototype['parameters'][i]['type']} {prototype['parameters'][i]['name']}", + end="", + ) + if i != len(prototype["parameters"]) - 1: + print(", ", end="") + print(");") + + print( + f" Return type {prototype['return']['type']} size: {prototype['return']['size']}" + ) + if len(prototype["return"]["field"]) != 0: + print(" Return type fields:") + for field in prototype["return"]["field"]: + print( + f" - Name: {field['name']}, Type: {field['type']}, Size: {field['size']}, Offset: {field['offset']}" + ) + + for param in prototype["parameters"]: + print(f" Parameter {param['name']} type {param['type']} size: {param['size']}") + if len(param["field"]) != 0: + print(" Parameter fields:") + for field in param["field"]: + print( + f" - Name: {field['name']}, Type: {field['type']}, Size: {field['size']}, Offset: {field['offset']}" + ) + + print(f"File at {prototype['file_path']}") + + +def dump_result(result): + for function_name, prototype in result[0]: + print_function_signature(function_name, prototype) + + for u in result[1]: + print(f"Function {u} not found in the elf file") + + +def prototype_diff(prototype1, prototype2): + if prototype1["type"] != prototype2["type"]: + print( + f" Prototype {prototype1['type']} is different with {prototype2['type']}" + ) + + print(f" {prototype1['type']} have Different:") + if prototype1["field"] != [] and prototype2["field"] != []: + for field1, field2 in zip_longest(prototype1["field"], prototype2["field"]): + if field1 is None and field2 is not None: + print(f" Field2 {field2['type']} not found in field1") + continue + + if field1 is None and field2 is not None: + print(f" Field1 {field1['type']} not found in field2") + continue + + if field1["type"] != field2["type"]: + print(f" Field {field1['type']} type is different") + if field1["size"] != field2["size"]: + print(f" Field {field1['type']} size is different") + if field1["offset"] != field2["offset"]: + print(f" Field {field1['type']} offset is different") + + else: + if prototype1["size"] != prototype2["size"]: + print( + f" size is different {prototype1['size']} != {prototype2['size']}" + ) + + +def diff(result1, result2): + dict1 = dict(result1) + dict2 = dict(result2) + + for function_name in dict1.keys(): + if function_name not in dict2: + print(f"Function {function_name} not found in the second result") + continue + + prototype1 = dict1[function_name] + prototype2 = dict2[function_name] + if prototype1["return"] != prototype2["return"]: + print(f"Function {function_name} return type is different") + prototype_diff(prototype1["return"], prototype2["return"]) + print("") + + if len(prototype1["parameters"]) != len(prototype2["parameters"]): + print(f"Function {function_name} parameters count is different") + print("") + continue + + i = 0 + for param1, param2 in zip(prototype1["parameters"], prototype2["parameters"]): + i += 1 + if ( + param1["type"] != param2["type"] + or param1["size"] != param2["size"] + or param1["field"] != param2["field"] + ): + print(f"Function {function_name} parameter {i} is different") + prototype_diff(param1, param2) + print("") + + +def find_signature_lib(args, undef): + signature = [] + for objfile in objfile_iter(args.lib): + if result := find_function_signature(objfile, undef, die_is_prototyped): + signature += result[0] + undef = result[1] + + result = [] + result.append(signature) + result.append(undef) + return result + + +def parse_symbols(args): + undef = [] + defined = [] + + for objfile in objfile_iter(args.lib): + + u, d = symbol_collect(objfile) + if u is not None: + undef.extend(u) + if d is not None: + defined.extend(d) + + undef = list(set(undef)) + defined = list(set(defined)) + for i in range(len(undef) - 1, -1, -1): + if undef[i] in defined: + undef.remove(undef[i]) + + signature, not_found = find_function_signature( + args.elf, undef, die_is_real_function + ) + signature = sorted(signature, key=lambda x: x[0]) + not_found = sorted(not_found) + + result = [] + result.append(signature) + result.append(not_found) + + if args.check: + lib_result = find_signature_lib(args, undef) + json.dump(list(lib_result), open(f"lib_{args.json}", "w"), indent=4) + + if args.dump: + print("Dump Elf function signature") + dump_result(result) + + json.dump(list(result), open(f"{args.json}", "w"), indent=4) + + +def args_parse(): + parser = argparse.ArgumentParser( + description=program_description, formatter_class=argparse.RawTextHelpFormatter + ) + parser.add_argument("-a", "--lib", nargs="+", help="Path to liba.so or lib.a") + + parser.add_argument("-e", "--elf", help="Path to elf file") + parser.add_argument( + "-c", + "--check", + action="store_true", + help="""If the static library contains debug information, +try to find the function in the static library, +and output the result to lib_<json> file + """, + ) + parser.add_argument("-d", "--dump", action="store_true", help="Dump result") + parser.add_argument( + "-j", "--json", default="out.json", help="Save result to json file" + ) + parser.add_argument( + "-s", "--struct_check", action="store_true", help="Dump struct different" + ) + parser.add_argument("-i", "--input_json", nargs=2, help="Diff two json files") + + args = parser.parse_args() + + if (not args.input_json and not args.struct_check) and ( + args.elf is None or args.lib is None + ): + print("Error: elf and lib must be provided") + exit(1) + + if args.struct_check and args.elf is None: + print("Error: elf must be provided") + exit(1) + + if os.path.exists(args.json): + print(f"Warning file {args.json} already exists, Will be overwritten") + + if args.check and os.path.exists(f"lib_{args.json}"): + print(f"Warning file lib_{args.json} already exists, Will be overwritten") + + return args + + +if __name__ == "__main__": + args = args_parse() + if args.input_json: + result1 = json.load(open(args.input_json[0])) + result2 = json.load(open(args.input_json[1])) + diff(result1[0], result2[0]) + elif args.struct_check: + struct_check(args.elf) + else: + parse_symbols(args)
