Gustavo Romero <[email protected]> writes:
> This commit removes Avocado as a dependency for running the
> reverse_debugging test.
>
> The main benefit, beyond eliminating an extra dependency, is that there
> is no longer any need to handle GDB packets manually. This removes the
> need for ad-hoc functions dealing with endianness and arch-specific
> register numbers, making the test easier to read. The timeout variable
> is also removed, since Meson now manages timeouts automatically.
>
> reverse_debugging now uses the pygdbmi module to interact with GDB, if
> it's available in the test environment, otherwise the test is skipped.
> GDB is detect via the QEMU_TEST_GDB env. variable.
>
> This commit also significantly improves the output for the test and
> now prints all the GDB commands used in sequence. It also adds
> some clarifications to existing comments, for example, clarifying that
> once the replay-break is reached, a SIGINT is captured in GDB.
>
> reverse_debugging is kept "skipped" for aarch64, ppc64, and x86_64, so
> won't run unless QEMU_TEST_FLAKY_TESTS=1 is set in the test environment,
> before running 'make check-functional' or 'meson test [...]'.
>
> Signed-off-by: Gustavo Romero <[email protected]>
> ---
> tests/functional/reverse_debugging.py | 308 ++++++++++++++++----------
> 1 file changed, 188 insertions(+), 120 deletions(-)
>
> diff --git a/tests/functional/reverse_debugging.py
> b/tests/functional/reverse_debugging.py
> index f9a1d395f1..38161beab8 100644
> --- a/tests/functional/reverse_debugging.py
> +++ b/tests/functional/reverse_debugging.py
> @@ -1,21 +1,94 @@
> -# Reverse debugging test
> -#
> # SPDX-License-Identifier: GPL-2.0-or-later
> #
> +# Reverse debugging test
> +#
> # Copyright (c) 2020 ISP RAS
> +# Copyright (c) 2025 Linaro Limited
> #
> # Author:
> # Pavel Dovgalyuk <[email protected]>
> +# Gustavo Romero <[email protected]> (Run without Avocado)
> #
> # This work is licensed under the terms of the GNU GPL, version 2 or
> # later. See the COPYING file in the top-level directory.
> -import os
> +
> import logging
> +import os
> +import re
> +import subprocess
> +from pygdbmi.gdbcontroller import GdbController
> +from pygdbmi.constants import GdbTimeoutError
> +
>
> from qemu_test import LinuxKernelTest, get_qemu_img
> from qemu_test.ports import Ports
>
>
> +class GDB:
> + def __init__(self, gdb_path, echo=True, suffix='# ', prompt="$ "):
> + gdb_cmd = [gdb_path, "-q", "--interpreter=mi2"]
> + self.gdbmi = GdbController(gdb_cmd)
> + self.echo = echo
> + self.suffix = suffix
> + self.prompt = prompt
> +
> +
> + def get_payload(self, response, kind):
> + output = []
> + for o in response:
> + # Unpack payloads of the same type.
> + _type, _, payload, *_ = o.values()
> + if _type == kind:
> + output += [payload]
> +
> + # Some output lines do not end with \n but begin with it,
> + # so remove the leading \n and merge them with the next line
> + # that ends with \n.
> + lines = [line.lstrip('\n') for line in output]
> + lines = "".join(lines)
> + lines = lines.splitlines(keepends=True)
> +
> + return lines
> +
> +
> + def cli(self, cmd, timeout=4.0):
> + self.response = self.gdbmi.write(cmd, timeout_sec=timeout)
> + self.cmd_output = self.get_payload(self.response, "console")
> + if self.echo:
> + print(self.suffix + self.prompt + cmd)
> +
> + if len(self.cmd_output) > 0:
> + cmd_output = self.suffix.join(self.cmd_output)
> + print(self.suffix + cmd_output, end="")
> +
> + return self
> +
> +
> + def get_addr(self):
> + pattern = r"0x[0-9A-Fa-f]+"
> + cmd_output = "".join(self.cmd_output)
> + match = re.search(pattern, cmd_output)
> +
> + return int(match[0], 16) if match else None
> +
> +
> + def get_log(self):
> + r = self.get_payload(self.response, kind="log")
> + r = "".join(r)
> +
> + return r
> +
> +
> + def get_console(self):
> + r = "".join(self.cmd_output)
> +
> + return r
> +
> +
> + def exit(self):
> + self.gdbmi.exit()
> +
> +
Could this re-factor into a class have been a separate commit?
> class ReverseDebugging(LinuxKernelTest):
> """
> Test GDB reverse debugging commands: reverse step and reverse continue.
> @@ -28,21 +101,17 @@ class ReverseDebugging(LinuxKernelTest):
> that the execution is stopped at the last of them.
> """
>
> - timeout = 10
> STEPS = 10
> - endian_is_le = True
>
> def run_vm(self, record, shift, args, replay_path, image_path, port):
> - from avocado.utils import datadrainer
> -
> logger = logging.getLogger('replay')
> vm = self.get_vm(name='record' if record else 'replay')
> vm.set_console()
> if record:
> - logger.info('recording the execution...')
> + logger.info('Recording the execution...')
Mixing capitalisation fixes with logical change makes reviewing a pain.
> mode = 'record'
> else:
> - logger.info('replaying the execution...')
> + logger.info('Replaying the execution...')
> mode = 'replay'
> vm.add_args('-gdb', 'tcp::%d' % port, '-S')
> vm.add_args('-icount', 'shift=%s,rr=%s,rrfile=%s,rrsnapshot=init' %
> @@ -52,145 +121,144 @@ def run_vm(self, record, shift, args, replay_path,
> image_path, port):
> if args:
> vm.add_args(*args)
> vm.launch()
> - console_drainer = datadrainer.LineLogger(vm.console_socket.fileno(),
> - logger=self.log.getChild('console'),
> - stop_check=(lambda : not
> vm.is_running()))
> - console_drainer.start()
> - return vm
I suspect dropping the console drainer could be a separate commit like
in Daniels series.
>
> - @staticmethod
> - def get_reg_le(g, reg):
> - res = g.cmd(b'p%x' % reg)
> - num = 0
> - for i in range(len(res))[-2::-2]:
> - num = 0x100 * num + int(res[i:i + 2], 16)
> - return num
> -
> - @staticmethod
> - def get_reg_be(g, reg):
> - res = g.cmd(b'p%x' % reg)
> - return int(res, 16)
> -
> - def get_reg(self, g, reg):
> - # value may be encoded in BE or LE order
> - if self.endian_is_le:
> - return self.get_reg_le(g, reg)
> - else:
> - return self.get_reg_be(g, reg)
> -
> - def get_pc(self, g):
> - return self.get_reg(g, self.REG_PC)
> -
> - def check_pc(self, g, addr):
> - pc = self.get_pc(g)
> - if pc != addr:
> - self.fail('Invalid PC (read %x instead of %x)' % (pc, addr))
> -
> - @staticmethod
> - def gdb_step(g):
> - g.cmd(b's', b'T05thread:01;')
> -
> - @staticmethod
> - def gdb_bstep(g):
> - g.cmd(b'bs', b'T05thread:01;')
> + return vm
>
> @staticmethod
> def vm_get_icount(vm):
> return vm.qmp('query-replay')['return']['icount']
>
> def reverse_debugging(self, shift=7, args=None):
> - from avocado.utils import gdb
> - from avocado.utils import process
> -
> logger = logging.getLogger('replay')
>
> - # create qcow2 for snapshots
> - logger.info('creating qcow2 image for VM snapshots')
> + # Create qcow2 for snapshots
> + logger.info('Creating qcow2 image for VM snapshots')
> image_path = os.path.join(self.workdir, 'disk.qcow2')
> qemu_img = get_qemu_img(self)
> if qemu_img is None:
> self.skipTest('Could not find "qemu-img", which is required to '
> 'create the temporary qcow2 image')
> cmd = '%s create -f qcow2 %s 128M' % (qemu_img, image_path)
> - process.run(cmd)
> + r = subprocess.run(cmd, capture_output=True, shell=True, text=True)
> + logger.info(r.args)
> + logger.info(r.stdout)
>
> replay_path = os.path.join(self.workdir, 'replay.bin')
>
> - # record the log
> + # Record the log.
> vm = self.run_vm(True, shift, args, replay_path, image_path, -1)
> while self.vm_get_icount(vm) <= self.STEPS:
> pass
> last_icount = self.vm_get_icount(vm)
> vm.shutdown()
>
> - logger.info("recorded log with %s+ steps" % last_icount)
> + logger.info("Recorded log with %s+ steps" % last_icount)
> +
> + # Replay and run debug commands.
> + gdb_cmd = os.getenv('QEMU_TEST_GDB')
> + if not gdb_cmd:
> + test.skipTest(f"Test skipped because there is no GDB
> available!")
This fails:
test: qemu:func-thorough+func-aarch64-thorough+thorough /
func-aarch64-reverse_debug
start time: 09:24:25
duration: 0.88s
result: exit status 1
command: ASAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1
MALLOC_PERTURB_=220
QEMU_TEST_QEMU_BINARY=/home/alex/lsrc/qemu.git/builds/all/qemu-system-aarch64
LD_LIBRARY_PATH=/home/alex/lsrc/qemu.git/builds/all/contrib/plugins:/home/alex/lsrc/qemu.git/builds/all/tests/tcg/plugins
UBSAN_OPTIONS=halt_on_error=1:abort_on_error=1:print
_summary=1:print_stacktrace=1
PYTHONPATH=/home/alex/lsrc/qemu.git/python:/home/alex/lsrc/qemu.git/tests/functional
RUST_BACKTRACE=1 QEMU_BUILD_ROOT=/home/alex/lsrc/qemu.git/b
uilds/all
MSAN_OPTIONS=halt_on_error=1:abort_on_error=1:print_summary=1:print_stacktrace=1
QEMU_TEST_QEMU_IMG=/home/alex/lsrc/qemu.git/builds/all/qemu-img
MESON_TEST_ITERATIO
N=1 /home/alex/lsrc/qemu.git/builds/all/pyvenv/bin/python3
/home/alex/lsrc/qemu.git/tests/functional/aarch64/test_reverse_debug.py
----------------------------------- stdout -----------------------------------
TAP version 13
not ok 1 test_reverse_debug.ReverseDebugging_AArch64.test_aarch64_virt
1..1
----------------------------------- stderr -----------------------------------
Traceback (most recent call last):
File
"/home/alex/lsrc/qemu.git/tests/functional/aarch64/test_reverse_debug.py", line
31, in test_aarch64_virt
self.reverse_debugging(args=('-kernel', kernel_path))
~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/alex/lsrc/qemu.git/tests/functional/reverse_debugging.py", line
160, in reverse_debugging
test.skipTest(f"Test skipped because there is no GDB available!")
^^^^
NameError: name 'test' is not defined
More information on
test_reverse_debug.ReverseDebugging_AArch64.test_aarch64_virt could be found
here:
/home/alex/lsrc/qemu.git/builds/all/tests/functional/aarch64/test_reverse_debug.ReverseDebugging_AArch64.test_aarch64_virt/base.log
/home/alex/lsrc/qemu.git/builds/all/tests/functional/aarch64/test_reverse_debug.ReverseDebugging_AArch64.test_aarch64_virt/console.log
(test program exited with status code 1)
Not sure why though as:
cat config-host.mak
# Automatically generated by configure - do not modify
all:
SRC_PATH=/home/alex/lsrc/qemu.git
TARGET_DIRS=aarch64-linux-user aarch64_be-linux-user alpha-linux-user
arm-linux-user armeb-linux-user hexagon-linux-user hppa-linux-user
i386-linux-user loongarch64-linux-user m68k-linux-user microblaze-linux-user
microblazeel-linux-user mips-linux-user mips64-linux-user mips64el-linux-user
mipsel-linux-user mipsn32-linux-user mipsn32el-linux-user or1k-linux-user
ppc-linux-user ppc64-linux-user ppc64le-linux-user riscv32-linux-user
riscv64-linux-user s390x-linux-user sh4-linux-user sh4eb-linux-user
sparc-linux-user sparc32plus-linux-user sparc64-linux-user x86_64-linux-user
xtensa-linux-user xtensaeb-linux-user aarch64-softmmu alpha-softmmu arm-softmmu
avr-softmmu hppa-softmmu i386-softmmu loongarch64-softmmu m68k-softmmu
microblaze-softmmu microblazeel-softmmu mips-softmmu mips64-softmmu
mips64el-softmmu mipsel-softmmu or1k-softmmu ppc-softmmu ppc64-softmmu
riscv32-softmmu riscv64-softmmu rx-softmmu s390x-softmmu sh4-softmmu
sh4eb-softmmu sparc-softmmu sparc64-softmmu tricore-softmmu x86_64-softmmu
xtensa-softmmu xtensaeb-softmmu
GDB=/usr/bin/gdb-multiarch
>
> - # replay and run debug commands
> with Ports() as ports:
> port = ports.find_free_port()
> vm = self.run_vm(False, shift, args, replay_path, image_path,
> port)
> - logger.info('connecting to gdbstub')
> - g = gdb.GDBRemote('127.0.0.1', port, False, False)
> - g.connect()
> - r = g.cmd(b'qSupported')
> - if b'qXfer:features:read+' in r:
> - g.cmd(b'qXfer:features:read:target.xml:0,ffb')
> - if b'ReverseStep+' not in r:
> - self.fail('Reverse step is not supported by QEMU')
> - if b'ReverseContinue+' not in r:
> - self.fail('Reverse continue is not supported by QEMU')
> -
> - logger.info('stepping forward')
> - steps = []
> - # record first instruction addresses
> - for _ in range(self.STEPS):
> - pc = self.get_pc(g)
> - logger.info('saving position %x' % pc)
> - steps.append(pc)
> - self.gdb_step(g)
> -
> - # visit the recorded instruction in reverse order
> - logger.info('stepping backward')
> - for addr in steps[::-1]:
> - self.gdb_bstep(g)
> - self.check_pc(g, addr)
> - logger.info('found position %x' % addr)
> -
> - # visit the recorded instruction in forward order
> - logger.info('stepping forward')
> - for addr in steps:
> - self.check_pc(g, addr)
> - self.gdb_step(g)
> - logger.info('found position %x' % addr)
> -
> - # set breakpoints for the instructions just stepped over
> - logger.info('setting breakpoints')
> - for addr in steps:
> - # hardware breakpoint at addr with len=1
> - g.cmd(b'Z1,%x,1' % addr, b'OK')
> -
> - # this may hit a breakpoint if first instructions are executed
> - # again
> - logger.info('continuing execution')
> - vm.qmp('replay-break', icount=last_icount - 1)
> - # continue - will return after pausing
> - # This could stop at the end and get a T02 return, or by
> - # re-executing one of the breakpoints and get a T05 return.
> - g.cmd(b'c')
> - if self.vm_get_icount(vm) == last_icount - 1:
> - logger.info('reached the end (icount %s)' % (last_icount - 1))
> - else:
> - logger.info('hit a breakpoint again at %x (icount %s)' %
> - (self.get_pc(g), self.vm_get_icount(vm)))
>
> - logger.info('running reverse continue to reach %x' % steps[-1])
> - # reverse continue - will return after stopping at the breakpoint
> - g.cmd(b'bc', b'T05thread:01;')
> + try:
> + gdb = GDB(gdb_cmd)
>
> - # assume that none of the first instructions is executed again
> - # breaking the order of the breakpoints
> - self.check_pc(g, steps[-1])
> - logger.info('successfully reached %x' % steps[-1])
> + logger.info('Connecting to gdbstub...')
>
> - logger.info('exiting gdb and qemu')
> - vm.shutdown()
> + gdb.cli("set debug remote 1")
> +
> + c = gdb.cli(f"target remote localhost:{port}").get_console()
> + if not f"Remote debugging using localhost:{port}" in c:
> + self.fail("Could not connect to gdbstub!")
> +
> + # Remote debug messages are in 'log' payloads.
> + r = gdb.get_log()
> + if 'ReverseStep+' not in r:
> + self.fail('Reverse step is not supported by QEMU')
> + if 'ReverseContinue+' not in r:
> + self.fail('Reverse continue is not supported by QEMU')
> +
> + gdb.cli("set debug remote 0")
> +
> + logger.info('Stepping forward')
> + steps = []
> + # Record first instruction addresses.
> + for _ in range(self.STEPS):
> + pc = gdb.cli("print $pc").get_addr()
> + logger.info('Saving position %x' % pc)
> + steps.append(pc)
> +
> + gdb.cli("stepi")
> +
> + # Visit the recorded instructions in reverse order.
> + logger.info('Stepping backward')
> + for saved_pc in steps[::-1]:
> + logger.info('Found position %x' % saved_pc)
> + gdb.cli("reverse-stepi")
> + pc = gdb.cli("print $pc").get_addr()
> + if pc != saved_pc:
> + logger.info('Invalid PC (read %x instead of %x)' % (pc,
> saved_pc))
> + self.fail('Reverse stepping failed!')
> +
> + # Visit the recorded instructions in forward order.
> + logger.info('Stepping forward')
> + for saved_pc in steps:
> + logger.info('Found position %x' % saved_pc)
> + pc = gdb.cli("print $pc").get_addr()
> + if pc != saved_pc:
> + logger.info('Invalid PC (read %x instead of %x)' % (pc,
> saved_pc))
> + self.fail('Forward stepping failed!')
> +
> + gdb.cli("stepi")
> +
> + # Set breakpoints for the instructions just stepped over.
> + logger.info('Setting breakpoints')
> + for saved_pc in steps:
> + gdb.cli(f"break *{hex(saved_pc)}")
> +
> + # This may hit a breakpoint if first instructions are executed
> again.
> + logger.info('Continuing execution')
> + vm.qmp('replay-break', icount=last_icount - 1)
> + # continue - will return after pausing.
> + # This can stop at the end of the replay-break and gdb gets a
> SIGINT,
> + # or by re-executing one of the breakpoints and gdb stops at a
> + # breakpoint.
> + gdb.cli("continue")
> +
> + pc = gdb.cli("print $pc").get_addr()
> + current_icount = self.vm_get_icount(vm)
> + if current_icount == last_icount - 1:
> + print(f"# **** Hit replay-break at icount={current_icount},
> pc={hex(pc)} ****")
> + logger.info('Reached the end (icount %s)' % (current_icount))
> + else:
> + print(f"# **** Hit breakpoint at icount={current_icount},
> pc={hex(pc)} ****")
> + logger.info('Hit a breakpoint again at %x (icount %s)' %
> + (pc, current_icount))
> +
> + logger.info('Running reverse continue to reach %x' % steps[-1])
> + # reverse-continue - will return after stopping at the
> breakpoint.
> + gdb.cli("reverse-continue")
> +
> + # Assume that none of the first instructions are executed again
> + # breaking the order of the breakpoints.
> + # steps[-1] is the first saved $pc in reverse order.
> + pc = gdb.cli("print $pc").get_addr()
> + first_pc_in_rev_order = steps[-1]
> + if pc == first_pc_in_rev_order:
> + print(f"# **** Hit breakpoint at the first PC in reverse
> order ({hex(pc)}) ****")
> + logger.info('Successfully reached breakpoint at %x' %
> first_pc_in_rev_order)
> + else:
> + logger.info('Failed to reach breakpoint at %x' %
> first_pc_in_rev_order)
> + self.fail("'reverse-continue' did not hit the first PC in
> reverse order!")
> +
> + logger.info('Exiting GDB and QEMU...')
> + gdb.exit()
> + vm.shutdown()
> +
> + logger.info('Test passed.')
> +
> + except GdbTimeoutError:
> + self.fail("Connection to gdbstub timeouted...")
--
Alex Bennée
Virtualisation Tech Lead @ Linaro