This is an automated email from the ASF dual-hosted git repository. xiaoxiang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nuttx.git
commit 8edb9283bad1a90f0a4e977f1a9a60fd683c0a60 Author: Gao Jiawei <gaojia...@xiaomi.com> AuthorDate: Tue Jul 9 11:26:27 2024 +0800 trivial modification on the utility module 1. add get macro related inteface, for now we have't fully implemented the way to expand and evalute macros at runtime. We just deal with some macros that can be expand and evaluate into essential constants that will be needed later. 2. rearrange utility functions in a different order 3. reimplment the get register API to make it more commonly used Signed-off-by: Gao Jiawei <gaojia...@xiaomi.com> --- tools/gdb/macros.py | 176 +++++++++++++++++++++++++++++++++++ tools/gdb/thread.py | 22 +---- tools/gdb/utils.py | 263 +++++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 385 insertions(+), 76 deletions(-) diff --git a/tools/gdb/macros.py b/tools/gdb/macros.py new file mode 100644 index 0000000000..b74c47e2e7 --- /dev/null +++ b/tools/gdb/macros.py @@ -0,0 +1,176 @@ +############################################################################ +# tools/gdb/macros.py +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. The +# ASF licenses this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance with the +# License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +############################################################################ + +# NOTE: GDB stores macro information based on the current stack frame's scope, +# including the source file and line number. Therefore, there may be missing +# macro definitions when you are at different stack frames. +# +# To resolve this issue, we need to retrieve all macro information from the ELF file +# then parse and evaluate it by ourselves. +# +# There might be two ways to achieve this, one is to leverage the C preprocessor +# to directly preprocess all the macros instereted into python constants +# gcc -E -x c -P <file_with_macros> -I/path/to/nuttx/include +# +# While the other way is to leverage the dwarf info stored in the ELF file, +# with -g3 switch, we have a `.debug_macro` section containing all the information +# about the macros. +# +# Currently, we using the second method. + +import os +import re +import subprocess +import tempfile + +PUNCTUATORS = [ + "\[", "\]", "\(", "\)", "\{", "\}", "\?", ";", ",", "~", + "\.\.\.", "\.", + "\-\>", "\-\-", "\-\=", "\-", + "\+\+", "\+\=", "\+", + "\*\=", "\*", + "\!\=", "\!", + "\&\&", "\&\=", "\&", + "\/\=", "\/", + "\%\>", "%:%:", "%:", "%=", "%", + "\^\=", "\^", + "\#\#", "\#", + "\:\>", "\:", + "\|\|", "\|\=", "\|", + "<<=", "<<", "<=", "<:", "<%", "<", + ">>=", ">>", ">=", ">", + "\=\=", "\=", +] + + +def parse_macro(line, macros, pattern): + # grep name, value + # the first group matches the token, the second matches the replacement + m = pattern.match(line) + if not m: + return False + + name, value = m.group(1), m.group(2) + + if name in macros: + # FIXME: what should we do if we got a redefinition/duplication here? + # for now I think it's ok just overwrite the old value + pass + + # emplace, for all undefined macros we evalute it to zero + macros[name] = value if value else "0" + + return True + + +def fetch_macro_info(file): + if not os.path.isfile(file): + raise FileNotFoundError("No given ELF target found") + + # FIXME: we don't use subprocess here because + # it's broken on some GDB distribution :(, I haven't + # found a solution to it. + + with tempfile.NamedTemporaryFile(delete=False) as f1: + + # # os.system(f"readelf -wm {file} > {output}") + process = subprocess.Popen( + f"readelf -wm {file}", + shell=True, + stdout=f1, + stderr=subprocess.STDOUT) + + process.communicate() + errcode = process.returncode + + f1.close() + + if errcode != 0: + return {} + + p = re.compile(".*macro[ ]*:[ ]*([\S]+\(.*?\)|[\w]+)[ ]*(.*)") + macros = {} + + with open(f1.name, 'rb') as f2: + for line in f2.readlines(): + line = line.decode("utf-8") + if not line.startswith(" DW_MACRO_define") and \ + not line.startswith(" DW_MACRO_undef"): + continue + + if not parse_macro(line, macros, p): + print(f"Failed to parse {line}") + + return macros + + +def split_tokens(expr): + p = "(" + "|".join(PUNCTUATORS) + ")" + res = list(filter(lambda e : e != "", + map(lambda e: e.rstrip().lstrip(), re.split(p, expr)))) + return res + + +def do_expand(expr, macro_map): + if expr in PUNCTUATORS: + return expr + + tokens = split_tokens(expr) + + res = [] + + for t in tokens: + if t not in macro_map: + res.append(t) + continue + res += do_expand(macro_map[t], macro_map) + + return res + + +# NOTE: Implement a fully functional parser which can +# preprocessing all the C marcos according to ISO 9899 standard +# may be an overkill, what we really care about are those +# macros that can be evaluted to an constant value. +# +# #define A (B + C + D) +# #define B 1 +# #define C 2 +# #define D 3 +# invoking try_expand('A', macro_map) will give you "(1 + 2 + 3)" +# +# However, +# #define SUM(B,C,D) (B + C + D) +# invoking try_expand('SUM(1,2,3)', macro_map) will give you "SUM(1,2,3)" +# +# We have not implemented this feature as we have not found a practical +# use case for it in our GDB plugin. +# +# However, you can switch to the correct stack frame that has this macro defined +# and let GDB expand and evaluate it for you if you really want to evalue some very +# complex macros. + + +def try_expand(expr, macro): + res = [] + + res += do_expand(expr, macro) + + return "".join(res) diff --git a/tools/gdb/thread.py b/tools/gdb/thread.py index e05780ead8..2e5c9c11f1 100644 --- a/tools/gdb/thread.py +++ b/tools/gdb/thread.py @@ -118,24 +118,6 @@ class Nxsetregs(gdb.Command): i += 1 -def get_pc_value(tcb): - arch = gdb.selected_frame().architecture() - tcbinfo = gdb.parse_and_eval("g_tcbinfo") - - i = 0 - for reg in arch.registers(): - if reg.name == "pc" or reg.name == "rip" or reg.name == "eip": - break - i += 1 - - regs = tcb["xcp"]["regs"].cast(gdb.lookup_type("char").pointer()) - value = gdb.Value(regs + tcbinfo["reg_off"]["p"][i]).cast( - gdb.lookup_type("uintptr_t").pointer() - )[0] - - return int(value) - - class Nxinfothreads(gdb.Command): def __init__(self): super(Nxinfothreads, self).__init__("info threads", gdb.COMMAND_USER) @@ -162,10 +144,10 @@ class Nxinfothreads(gdb.Command): if pidhash[i]["task_state"] == gdb.parse_and_eval("TSTATE_TASK_RUNNING"): index = "*%s" % i - pc = int(gdb.parse_and_eval("$pc")) + pc = utils.get_register_byname(utils.get_arch_pc_name(), tcb=None) else: index = " %s" % i - pc = get_pc_value(pidhash[i]) + pc = utils.get_register_byname(utils.get_arch_pc_name(), tcb=pidhash[i]) thread = "Thread 0x%x" % pidhash[i] diff --git a/tools/gdb/utils.py b/tools/gdb/utils.py index 17474a2609..13ea083c94 100644 --- a/tools/gdb/utils.py +++ b/tools/gdb/utils.py @@ -20,8 +20,10 @@ # ############################################################################ -import gdb import re +import gdb +from macros import fetch_macro_info, try_expand + class CachedType: """Cache a type object, so that we can reconnect to the new_objfile event""" @@ -38,7 +40,8 @@ class CachedType: if self._type is None: self._type = gdb.lookup_type(self._name) if self._type is None: - raise gdb.GdbError("cannot resolve type '{0}'".format(self._name)) + raise gdb.GdbError( + "cannot resolve type '{0}'".format(self._name)) if hasattr(gdb, "events") and hasattr(gdb.events, "new_objfile"): gdb.events.new_objfile.connect(self._new_objfile_handler) return self._type @@ -47,6 +50,44 @@ class CachedType: long_type = CachedType("long") +class MacroCtx: + """ + This is a singleton class wich only initializes once to + cache a context of macro definition which can be queried later + TODO: we only deal with single ELF at the moment for simplicity + If you load more object files while debugging, only the first one gets loaded + will be used to retrieve macro information + """ + + def __new__(cls, *args, **kwargs): + if not hasattr(cls, 'instance'): + cls.instance = super(MacroCtx, cls).__new__(cls) + return cls.instance + + def __init__(self, filename): + self._macro_map = {} + self._file = filename + + self._macro_map = fetch_macro_info(filename) + + @property + def macro_map(self): + return self._macro_map + + @property + def objfile(self): + return self._file + + +if len(gdb.objfiles()) > 0: + macroctx = MacroCtx(gdb.objfiles()[0].filename) +else: + raise gdb.GdbError("An executable file must be provided") + + +# Common Helper Functions + + def get_long_type(): """Return the cached long type object""" global long_type @@ -76,13 +117,90 @@ class ContainerOf(gdb.Function): def invoke(self, ptr, typename, elementname): return container_of( - ptr, gdb.lookup_type(typename.string()).pointer(), elementname.string() + ptr, gdb.lookup_type( + typename.string()).pointer(), elementname.string() ) ContainerOf() +def gdb_eval_or_none(expresssion): + """Evaluate an expression and return None if it fails""" + try: + return gdb.parse_and_eval(expresssion) + except gdb.error: + return None + + +def get_symbol_value(name): + """Return the value of a symbol value etc: Variable, Marco""" + + expr = None + + try: + gdb.execute("set $_%s = %s" % (name, name)) + expr = "$_%s" % (name) + except gdb.error: + expr = try_expand(name, macroctx.macro_map) + return gdb_eval_or_none(expr) + + +def hexdump(address, size): + inf = gdb.inferiors()[0] + mem = inf.read_memory(address, size) + bytes = mem.tobytes() + for i in range(0, len(bytes), 16): + chunk = bytes[i:i + 16] + gdb.write(f"{i + address:08x} ") + hex_values = " ".join(f"{byte:02x}" for byte in chunk) + hex_display = f"{hex_values:<47}" + gdb.write(hex_display) + ascii_values = "".join(chr(byte) if 32 <= byte + <= 126 else "." for byte in chunk) + gdb.write(f" {ascii_values} \n") + + +def is_decimal(s): + return re.fullmatch(r"\d+", s) is not None + + +def is_hexadecimal(s): + return re.fullmatch(r"0[xX][0-9a-fA-F]+|[0-9a-fA-F]+", s) is not None + + +class Hexdump(gdb.Command): + """hexdump address/symbol <size>""" + + def __init__(self): + super(Hexdump, self).__init__("hexdump", gdb.COMMAND_USER) + + def invoke(self, args, from_tty): + argv = args.split(" ") + address = 0 + size = 0 + if (argv[0] == ""): + gdb.write("Usage: hexdump address/symbol <size>\n") + return + + if is_decimal(argv[0]) or is_hexadecimal(argv[0]): + address = int(argv[0], 0) + size = int(argv[1], 0) + else: + var = gdb.parse_and_eval(f'{argv[0]}') + address = int(var.address) + size = int(var.type.sizeof) + gdb.write(f"{argv[0]} {hex(address)} {int(size)}\n") + + hexdump(address, size) + + +Hexdump() + + +# Machine Specific Helper Functions + + BIG_ENDIAN = 0 LITTLE_ENDIAN = 1 target_endianness = None @@ -112,7 +230,7 @@ def read_memoryview(inf, start, length): def read_u16(buffer, offset): """Read a 16-bit unsigned integer from a buffer""" - buffer_val = buffer[offset : offset + 2] + buffer_val = buffer[offset: offset + 2] value = [0, 0] if type(buffer_val[0]) is str: @@ -155,23 +273,39 @@ def read_ulong(buffer, offset): target_arch = None -def is_target_arch(arch): - """Return True if the target architecture is ARCH""" +def is_target_arch(arch, exact=False): + """ + For non exactly match, this function will + return True if the target architecture contains + keywords of an ARCH family. For example, x86 is + contained in i386:x86_64. + For exact match, this function will return True if + the target architecture is exactly the same as ARCH. + """ if hasattr(gdb.Frame, "architecture"): - return arch in gdb.newest_frame().architecture().name() + archname = gdb.newest_frame().architecture().name() + + return arch in archname \ + if not exact else arch == archname else: global target_arch if target_arch is None: target_arch = gdb.execute("show architecture", to_string=True) - return arch in target_arch + pattern = r'set to "(.*?)"\s*(\(currently (".*")\))?' + match = re.search(pattern, target_arch) + candidate = match.group(1) -def gdb_eval_or_none(expresssion): - """Evaluate an expression and return None if it fails""" - try: - return gdb.parse_and_eval(expresssion) - except gdb.error: - return None + if candidate == "auto": + target_arch = match.group(3) + else: + target_arch = candidate + + return arch in target_arch \ + if not exact else arch == target_arch + + +# Kernel Specific Helper Functions def is_target_smp(): @@ -183,55 +317,72 @@ def is_target_smp(): return False -def get_symbol_value(name): - """Return the value of a symbol value etc: Variable, Marco""" +# FIXME: support RISC-V/X86/ARM64 etc. +def in_interrupt_context(cpuid=0): + frame = gdb.selected_frame() - gdb.execute("set $_%s = %s" % (name, name)) - return gdb.parse_and_eval("$_%s" % name) + if is_target_arch("arm"): + xpsr = int(frame.read_register('xpsr')) + return xpsr & 0xf + else: + # TODO: figure out a more proper way to detect if + # we are in an interrupt context + g_current_regs = gdb_eval_or_none("g_current_regs") + return not g_current_regs[cpuid] + + +def get_arch_sp_name(): + if is_target_arch("arm", exact=True): + return "sp" + elif is_target_arch("i386", exact=True): + return "esp" + elif is_target_arch("i386:x86-64", exact=True): + return "rsp" + else: + raise gdb.GdbError("Not implemented yet") -def hexdump(address, size): - inf = gdb.inferiors()[0] - mem = inf.read_memory(address, size) - bytes = mem.tobytes() - for i in range(0, len(bytes), 16): - chunk = bytes[i:i+16] - gdb.write(f"{i + address:08x} ") - hex_values = " ".join(f"{byte:02x}" for byte in chunk) - hex_display = f"{hex_values:<47}" - gdb.write(hex_display) - ascii_values = "".join(chr(byte) if 32 <= byte <= 126 else "." for byte in chunk) - gdb.write(f" {ascii_values} \n") +def get_arch_pc_name(): + if is_target_arch("arm", exact=True): + return "pc" + elif is_target_arch("i386", exact=True): + return "eip" + elif is_target_arch("i386:x86-64", exact=True): + return "rip" + else: + raise gdb.GdbError("Not implemented yet") -def is_decimal(s): - return re.fullmatch(r"\d+", s) is not None -def is_hexadecimal(s): - return re.fullmatch(r"0[xX][0-9a-fA-F]+|[0-9a-fA-F]+", s) is not None +def get_register_byname(regname, tcb=None): + frame = gdb.selected_frame() -class Hexdump(gdb.Command): - """hexdump address/symbol <size>""" + # If no tcb is given then we can directly used the register from + # the cached frame by GDB + if not tcb: + return int(frame.read_register(regname)) - def __init__(self): - super(Hexdump, self).__init__("hexdump", gdb.COMMAND_USER) - def invoke(self, args, from_tty): - argv = args.split(" ") - argc = len(argv) - address = 0 - size = 0 - if (argv[0] == ""): - gdb.write("Usage: hexdump address/symbol <size>\n") - return + # Ok, let's take it from the context in the given tcb + arch = frame.architecture() + tcbinfo = gdb.parse_and_eval("g_tcbinfo") - if is_decimal(argv[0]) or is_hexadecimal(argv[0]): - address = int(argv[0], 0) - size = int(argv[1], 0) - else: - var = gdb.parse_and_eval(f'{argv[0]}') - address = int(var.address) - size = int(var.type.sizeof) - gdb.write(f"{argv[0]} {hex(address)} {int(size)}\n") + i = 0 + for reg in arch.registers(): + if reg.name == regname: + break + i += 1 - hexdump(address, size) + regs = tcb["xcp"]["regs"].cast(gdb.lookup_type("char").pointer()) + value = gdb.Value(regs + tcbinfo["reg_off"]["p"][i]).cast( + gdb.lookup_type("uintptr_t").pointer() + )[0] -Hexdump() + return int(value) + + +def get_tcbs(): + # In case we have created/deleted tasks at runtime, the tcbs will change + # so keep it as fresh as possible + pidhash = gdb.parse_and_eval("g_pidhash") + npidhash = gdb.parse_and_eval("g_npidhash") + + return [pidhash[i] for i in range(0, npidhash) if pidhash[i]]