On Thu, Feb 22, 2024 at 10:52:56AM +0530, Aditya Gupta wrote:
> Currently, gdb passthroughs of 'bt', 'frame', 'up', 'down', 'info
> locals' don't work. This is due to gdb not knowing the register values to
> unwind the stack frames
>
> Every gdb passthrough goes through `gdb_interface`. And then, gdb expects
> `crash_target::fetch_registers` to give it the register values, which is
> dependent on `machdep->get_cpu_reg` to read the register values for
> specific architecture.
>
> ----------------------------
> gdb passthrough (eg. "bt") | |
> crash -------------------------> | |
> | gdb_interface |
> | |
> | |
> | ---------------------- |
> fetch_registers | | | |
> crash_target<-------------------------+--| gdb | |
> --------------------------+->| | |
> Registers (SP,NIP, etc.)| | | |
> | | | |
> | ---------------------- |
> ----------------------------
>
> Implement `machdep->get_cpu_reg` on PPC64, so that crash provides the
> register values to gdb to unwind stack frames properly
>
> With these changes, on powerpc, 'bt' command output in gdb mode, will look
> like this:
>
> gdb> bt
> #0 0xc0000000002a53e8 in crash_setup_regs (oldregs=<optimized out>,
> newregs=0xc00000000486f8d8) at ./arch/powerpc/include/asm/kexec.h:69
> #1 __crash_kexec (regs=<optimized out>) at kernel/kexec_core.c:974
> #2 0xc000000000168918 in panic (fmt=<optimized out>) at
> kernel/panic.c:358
> #3 0xc000000000b735f8 in sysrq_handle_crash (key=<optimized out>) at
> drivers/tty/sysrq.c:155
> #4 0xc000000000b742cc in __handle_sysrq (key=key@entry=99,
> check_mask=check_mask@entry=false) at drivers/tty/sysrq.c:602
> #5 0xc000000000b7506c in write_sysrq_trigger (file=<optimized out>,
> buf=<optimized out>, count=2, ppos=<optimized out>) at
> drivers/tty/sysrq.c:1163
> #6 0xc00000000069a7bc in pde_write (ppos=<optimized out>,
> count=<optimized out>, buf=<optimized out>, file=<optimized out>,
> pde=0xc000000009ed3a80) at fs/proc/inode.c:340
> #7 proc_reg_write (file=<optimized out>, buf=<optimized out>,
> count=<optimized out>, ppos=<optimized out>) at fs/proc/inode.c:352
> #8 0xc0000000005b3bbc in vfs_write (file=file@entry=0xc00000009dda7d00,
> buf=buf@entry=0xebcfc7c6040 <error: Cannot access memory at address
> 0xebcfc7c6040>, count=count@entry=2, pos=pos@entry=0xc00000000486fda0) at
> fs/read_write.c:582
>
> instead of earlier output without this patch:
>
> gdb> bt
> #0 <unavailable> in ?? ()
> Backtrace stopped: previous frame identical to this frame (corrupt stack?)
>
> Also, 'get_dumpfile_regs' has been introduced to get registers from
> multiple supported vmcore formats. Correspondingly a flag 'BT_NO_PRINT_REGS'
> has been introduced to tell helper functions to get registers, to not
> print registers with every call to backtrace in gdb.
>
> Note: This feature to support GDB unwinding doesn't support live debugging
>
> Signed-off-by: Aditya Gupta <[email protected]>
> ---
> defs.h | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> kernel.c | 33 +++++++++++++++
> ppc64.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++--
> 3 files changed, 263 insertions(+), 3 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index 98650e8780bf..414f039b35f1 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -6095,6 +6095,7 @@ int load_module_symbols_helper(char *);
> void unlink_module(struct load_module *);
> int check_specified_module_tree(char *, char *);
> int is_system_call(char *, ulong);
> +void get_dumpfile_regs(struct bt_info*, ulong*, ulong*);
> void generic_dump_irq(int);
> void generic_get_irq_affinity(int);
> void generic_show_interrupts(int, ulong *);
> @@ -6194,6 +6195,7 @@ ulong cpu_map_addr(const char *type);
> #define BT_REGS_NOT_FOUND (0x4000000000000ULL)
> #define BT_OVERFLOW_STACK (0x8000000000000ULL)
> #define BT_SKIP_IDLE (0x10000000000000ULL)
> +#define BT_NO_PRINT_REGS (0x20000000000000ULL)
> #define BT_SYMBOL_OFFSET (BT_SYMBOLIC_ARGS)
>
> #define BT_REF_HEXVAL (0x1)
> @@ -8050,4 +8052,124 @@ enum x86_64_regnum {
> LAST_REGNUM
> };
>
> +/*
> + * Register numbers to make crash_target->fetch_registers()
> + * ---> machdep->get_cpu_reg() work properly.
> + *
> + * These register numbers and names are given according to output of
> + * `rs6000_register_name`, because that is what was being used by
> + * crash_target::fetch_registers in case of PPC64
> + */
> +enum ppc64_regnum {
> + PPC64_R0_REGNUM = 0,
> + PPC64_R1_REGNUM,
> + PPC64_R2_REGNUM,
> + PPC64_R3_REGNUM,
> + PPC64_R4_REGNUM,
> + PPC64_R5_REGNUM,
> + PPC64_R6_REGNUM,
> + PPC64_R7_REGNUM,
> + PPC64_R8_REGNUM,
> + PPC64_R9_REGNUM,
> + PPC64_R10_REGNUM,
> + PPC64_R11_REGNUM,
> + PPC64_R12_REGNUM,
> + PPC64_R13_REGNUM,
> + PPC64_R14_REGNUM,
> + PPC64_R15_REGNUM,
> + PPC64_R16_REGNUM,
> + PPC64_R17_REGNUM,
> + PPC64_R18_REGNUM,
> + PPC64_R19_REGNUM,
> + PPC64_R20_REGNUM,
> + PPC64_R21_REGNUM,
> + PPC64_R22_REGNUM,
> + PPC64_R23_REGNUM,
> + PPC64_R24_REGNUM,
> + PPC64_R25_REGNUM,
> + PPC64_R26_REGNUM,
> + PPC64_R27_REGNUM,
> + PPC64_R28_REGNUM,
> + PPC64_R29_REGNUM,
> + PPC64_R30_REGNUM,
> + PPC64_R31_REGNUM,
> +
> + PPC64_F0_REGNUM = 32,
> + PPC64_F1_REGNUM,
> + PPC64_F2_REGNUM,
> + PPC64_F3_REGNUM,
> + PPC64_F4_REGNUM,
> + PPC64_F5_REGNUM,
> + PPC64_F6_REGNUM,
> + PPC64_F7_REGNUM,
> + PPC64_F8_REGNUM,
> + PPC64_F9_REGNUM,
> + PPC64_F10_REGNUM,
> + PPC64_F11_REGNUM,
> + PPC64_F12_REGNUM,
> + PPC64_F13_REGNUM,
> + PPC64_F14_REGNUM,
> + PPC64_F15_REGNUM,
> + PPC64_F16_REGNUM,
> + PPC64_F17_REGNUM,
> + PPC64_F18_REGNUM,
> + PPC64_F19_REGNUM,
> + PPC64_F20_REGNUM,
> + PPC64_F21_REGNUM,
> + PPC64_F22_REGNUM,
> + PPC64_F23_REGNUM,
> + PPC64_F24_REGNUM,
> + PPC64_F25_REGNUM,
> + PPC64_F26_REGNUM,
> + PPC64_F27_REGNUM,
> + PPC64_F28_REGNUM,
> + PPC64_F29_REGNUM,
> + PPC64_F30_REGNUM,
> + PPC64_F31_REGNUM,
> +
> + PPC64_PC_REGNUM = 64,
> + PPC64_MSR_REGNUM = 65,
> + PPC64_CR_REGNUM = 66,
> + PPC64_LR_REGNUM = 67,
> + PPC64_CTR_REGNUM = 68,
> + PPC64_XER_REGNUM = 69,
> + PPC64_FPSCR_REGNUM = 70,
> +
> + PPC64_VR0_REGNUM = 106,
> + PPC64_VR1_REGNUM,
> + PPC64_VR2_REGNUM,
> + PPC64_VR3_REGNUM,
> + PPC64_VR4_REGNUM,
> + PPC64_VR5_REGNUM,
> + PPC64_VR6_REGNUM,
> + PPC64_VR7_REGNUM,
> + PPC64_VR8_REGNUM,
> + PPC64_VR9_REGNUM,
> + PPC64_VR10_REGNUM,
> + PPC64_VR11_REGNUM,
> + PPC64_VR12_REGNUM,
> + PPC64_VR13_REGNUM,
> + PPC64_VR14_REGNUM,
> + PPC64_VR15_REGNUM,
> + PPC64_VR16_REGNUM,
> + PPC64_VR17_REGNUM,
> + PPC64_VR18_REGNUM,
> + PPC64_VR19_REGNUM,
> + PPC64_VR20_REGNUM,
> + PPC64_VR21_REGNUM,
> + PPC64_VR22_REGNUM,
> + PPC64_VR23_REGNUM,
> + PPC64_VR24_REGNUM,
> + PPC64_VR25_REGNUM,
> + PPC64_VR26_REGNUM,
> + PPC64_VR27_REGNUM,
> + PPC64_VR28_REGNUM,
> + PPC64_VR29_REGNUM,
> + PPC64_VR30_REGNUM,
> + PPC64_VR31_REGNUM,
> +
> + PPC64_VSCR_REGNUM = 138,
> + PPC64_VRSAVE_REGNU = 139
> +};
> +
> #endif /* !GDB_COMMON */
> diff --git a/kernel.c b/kernel.c
> index 1728b70c1b5c..661ef0e237eb 100644
> --- a/kernel.c
> +++ b/kernel.c
> @@ -3533,6 +3533,39 @@ get_lkcd_regs(struct bt_info *bt, ulong *eip, ulong
> *esp)
> machdep->get_stack_frame(bt, eip, esp);
> }
>
> +void
> +get_dumpfile_regs(struct bt_info *bt, ulong *eip, ulong *esp)
> +{
> + bt->flags |= BT_NO_PRINT_REGS;
> +
> + if (NETDUMP_DUMPFILE())
> + get_netdump_regs(bt, eip, esp);
> + else if (KDUMP_DUMPFILE())
> + get_kdump_regs(bt, eip, esp);
> + else if (DISKDUMP_DUMPFILE())
> + get_diskdump_regs(bt, eip, esp);
> + else if (KVMDUMP_DUMPFILE())
> + get_kvmdump_regs(bt, eip, esp);
> + else if (LKCD_DUMPFILE())
> + get_lkcd_regs(bt, eip, esp);
> + else if (XENDUMP_DUMPFILE())
> + get_xendump_regs(bt, eip, esp);
> + else if (SADUMP_DUMPFILE())
> + get_sadump_regs(bt, eip, esp);
> + else if (VMSS_DUMPFILE())
> + get_vmware_vmss_regs(bt, eip, esp);
> + else if (REMOTE_PAUSED()) {
> + if (!is_task_active(bt->task) || !get_remote_regs(bt, eip, esp))
> + machdep->get_stack_frame(bt, eip, esp);
> + } else
> + machdep->get_stack_frame(bt, eip, esp);
> +
> + bt->flags &= ~BT_NO_PRINT_REGS;
> +
> + bt->instptr = *eip;
> + bt->stkptr = *esp;
> +}
> +
>
> /*
> * Store the head of the kernel module list for future use.
> diff --git a/ppc64.c b/ppc64.c
> index e8930a139e0d..870b0fbb49ce 100644
> --- a/ppc64.c
> +++ b/ppc64.c
> @@ -55,6 +55,8 @@ static void ppc64_set_bt_emergency_stack(enum
> emergency_stack_type type,
> static char * ppc64_check_eframe(struct ppc64_pt_regs *);
> static void ppc64_print_eframe(char *, struct ppc64_pt_regs *,
> struct bt_info *);
> +static int ppc64_get_cpu_reg(int cpu, int regno, const char *name, int size,
> + void *value);
> static void parse_cmdline_args(void);
> static int ppc64_paca_percpu_offset_init(int);
> static void ppc64_init_cpu_info(void);
> @@ -704,6 +706,8 @@ ppc64_init(int when)
> error(FATAL, "cannot malloc hwirqstack buffer
> space.");
> }
>
> + machdep->get_cpu_reg = ppc64_get_cpu_reg;
> +
> ppc64_init_paca_info();
>
> if (!machdep->hz) {
> @@ -2501,6 +2505,105 @@ ppc64_print_eframe(char *efrm_str, struct
> ppc64_pt_regs *regs,
> ppc64_print_nip_lr(regs, 1);
> }
>
> +static int
> +ppc64_get_cpu_reg(int cpu, int regno, const char *name, int size,
> + void *value)
> +{
> + struct bt_info bt_info, bt_setup;
> + struct task_context *tc;
> + ulong task;
> + struct ppc64_pt_regs *pt_regs;
> + ulong ip, sp;
> +
> + if (LIVE()) {
> + /* doesn't support reading registers in live dump */
> + return FALSE;
> + }
What do you think if we remove the LIVE() check for v10? According to my
test(with my x86_64 patchsets), with ppc64 arbitrary task stack unwinding
support, we can view the stack unwinding for non-active tasks for live debug:
crash> sys
KERNEL: /usr/lib/debug/lib/modules/5.14.0-425.el9.ppc64le/vmlinux
DUMPFILE: /proc/kcore
CPUS: 8
DATE: Tue Feb 27 04:19:36 EST 2024
UPTIME: 01:11:56
LOAD AVERAGE: 0.26, 0.09, 1.70
TASKS: 196
NODENAME: ibm-p9z-26-lp13.virt.pnr.lab.eng.rdu2.redhat.com
RELEASE: 5.14.0-425.el9.ppc64le
VERSION: #1 SMP Wed Feb 21 15:29:04 EST 2024
MACHINE: ppc64le (2900 Mhz)
MEMORY: 8 GB
crash> set 1
PID: 1
COMMAND: "systemd"
TASK: c0000000035fc900 [THREAD_INFO: c0000000035fc900]
CPU: 1
STATE: TASK_INTERRUPTIBLE
crash> bt
PID: 1 TASK: c0000000035fc900 CPU: 1 COMMAND: "systemd"
#0 [c00000000369fa60] __schedule at c000000000fc3c58
#1 [c00000000369fb20] schedule at c000000000fc411c
#2 [c00000000369fb50] schedule_hrtimeout_range_clock at c000000000fcd2a4
#3 [c00000000369fc00] ep_poll at c00000000063640c
#4 [c00000000369fcf0] do_epoll_wait at c000000000636584
#5 [c00000000369fd40] sys_epoll_wait at c000000000636608
#6 [c00000000369fdb0] system_call_exception at c00000000002e994
#7 [c00000000369fe10] system_call_vectored_common at c00000000000bfe8
crash> gdb bt
#0 0xc000000000fc3c58 in context_switch (rf=0xc00000000369fac0, next=0x0,
prev=0x0, rq=0x0) at kernel/sched/core.c:5409
#1 __schedule (sched_mode=sched_mode@entry=0) at kernel/sched/core.c:6737
#2 0xc000000000fc411c in schedule_loop (sched_mode=<optimized out>) at
kernel/sched/core.c:6807
#3 schedule () at kernel/sched/core.c:6816
#4 0xc000000000fcd2a4 in schedule_hrtimeout_range_clock (expires=<optimized
out>, delta=<optimized out>, mode=<optimized out>, clock_id=<optimized out>) at
kernel/time/hrtimer.c:2297
#5 0xc00000000063640c in ep_poll (ep=0xc000000034b32e80, events=0x0,
events@entry=0x1001d23a9e0, maxevents=maxevents@entry=82,
timeout=timeout@entry=0x0) at fs/eventpoll.c:1904
#6 0xc000000000636584 in do_epoll_wait (epfd=epfd@entry=4,
events=events@entry=0x1001d23a9e0, maxevents=maxevents@entry=82, to=0x0) at
fs/eventpoll.c:2288
#7 0xc000000000636608 in __do_sys_epoll_wait (timeout=<optimized out>,
maxevents=82, events=0x1001d23a9e0, epfd=4) at fs/eventpoll.c:2300
#8 __se_sys_epoll_wait (epfd=4, events=1100000504288, maxevents=82,
timeout=<optimized out>) at fs/eventpoll.c:2295
#9 0xc00000000002e994 in system_call_exception (r3=4, r4=1100000504288, r5=82,
r6=-1, r7=140735513318112, r8=140735513287592, r0=<optimized out>,
regs=0xc00000000369fe80) at arch/powerpc/kernel/interrupt.c:221
#10 0xc00000000000bfe8 in system_call_vectored_common () at
arch/powerpc/kernel/interrupt_64.S:209
Backtrace stopped: previous frame inner to this frame (corrupt stack?)
However for those active tasks in live mode, stack unwind will fail, but I
think it doesn't matter because "bt" and "gdb bt" both cannot show stacks:
crash> ps
PID PPID CPU TASK ST %MEM VSZ RSS COMM
0 0 0 c000000002af6380 RU 0.0 0 0 [swapper/0]
> 0 0 1 c0000000035f9000 RU 0.0 0 0 [swapper/1]
> 0 0 2 c0000000035f0180 RU 0.0 0 0 [swapper/2]
> 0 0 3 c0000000035c5580 RU 0.0 0 0 [swapper/3]
> 0 0 4 c0000000035fac80 RU 0.0 0 0 [swapper/4]
> 0 0 5 c0000000035c7200 RU 0.0 0 0 [swapper/5]
> 0 0 6 c0000000035c0000 RU 0.0 0 0 [swapper/6]
> 0 0 7 c0000000035dc800 RU 0.0 0 0 [swapper/7]
1 0 1 c0000000035fc900 IN 0.2 182272 14016 systemd
crash> set c0000000035f0180
PID: 0
COMMAND: "swapper/2"
TASK: c0000000035f0180 (1 of 8) [THREAD_INFO: c0000000035f0180]
CPU: 2
STATE: TASK_RUNNING (ACTIVE)
crash> bt
PID: 0 TASK: c0000000035f0180 CPU: 2 COMMAND: "swapper/2"
(active)
crash> gdb bt
#0 0xc000000003847d50 in ?? ()
gdb: invalid kernel virtual address: fffffffffffffffc type: "gdb_readmem
callback"
gdb: invalid kernel virtual address: fffffffffffffff8 type: "gdb_readmem
callback"
gdb: invalid kernel virtual address: fffffffffffffffc type: "gdb_readmem
callback"
gdb: invalid kernel virtual address: fffffffffffffff8 type: "gdb_readmem
callback"
#1 0x0000000000000000 in ?? ()
What do you think?
Thanks,
Tao Liu
> +
> + /* Currently only handling registers available in ppc64_pt_regs:
> + *
> + * 0-31: r0-r31
> + * 64: pc/nip
> + * 65: msr
> + *
> + * 67: lr
> + * 68: ctr
> + */
> + switch (regno) {
> + case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
> +
> + case PPC64_PC_REGNUM:
> + case PPC64_MSR_REGNUM:
> + case PPC64_LR_REGNUM:
> + case PPC64_CTR_REGNUM:
> + break;
> +
> + default:
> + // return false if we can't get that register
> + if (CRASHDEBUG(1))
> + error(WARNING, "unsupported register, regno=%d\n",
> regno);
> + return FALSE;
> + }
> +
> + task = get_active_task(cpu);
> + tc = task_to_context(task);
> + BZERO(&bt_setup, sizeof(struct bt_info));
> + clone_bt_info(&bt_setup, &bt_info, tc);
> + fill_stackbuf(&bt_info);
> +
> + // reusing the get_dumpfile_regs function to get pt regs structure
> + get_dumpfile_regs(&bt_info, &sp, &ip);
> + pt_regs = (struct ppc64_pt_regs *)bt_info.machdep;
> +
> + if (!pt_regs) {
> + error(WARNING, "pt_regs not available for cpu %d\n", cpu);
> + return FALSE;
> + }
> +
> + switch (regno) {
> + case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
> + if (size != sizeof(pt_regs->gpr[regno]))
> + return FALSE; // size mismatch
> +
> + memcpy(value, &pt_regs->gpr[regno], size);
> + break;
> +
> + case PPC64_PC_REGNUM:
> + if (size != sizeof(pt_regs->nip))
> + return FALSE; // size mismatch
> +
> + memcpy(value, &pt_regs->nip, size);
> + break;
> +
> + case PPC64_MSR_REGNUM:
> + if (size != sizeof(pt_regs->msr))
> + return FALSE; // size mismatch
> +
> + memcpy(value, &pt_regs->msr, size);
> + break;
> +
> + case PPC64_LR_REGNUM:
> + if (size != sizeof(pt_regs->link))
> + return FALSE; // size mismatch
> +
> + memcpy(value, &pt_regs->link, size);
> + break;
> +
> + case PPC64_CTR_REGNUM:
> + if (size != sizeof(pt_regs->ctr))
> + return FALSE; // size mismatch
> +
> + memcpy(value, &pt_regs->ctr, size);
> + break;
> + }
> +
> + /* free buffer allocated by fill_stackbuf */
> + if (bt_info.stackbuf)
> + FREEBUF(bt_info.stackbuf);
> +
> + return TRUE;
> +}
> +
> /*
> * For vmcore typically saved with KDump or FADump, get SP and IP values
> * from the saved ptregs.
> @@ -2613,9 +2716,11 @@ ppc64_get_dumpfile_stack_frame(struct bt_info *bt_in,
> ulong *nip, ulong *ksp)
> pt_regs = (struct ppc64_pt_regs *)bt->machdep;
> ur_nip = pt_regs->nip;
> ur_ksp = pt_regs->gpr[1];
> - /* Print the collected regs for panic task. */
> - ppc64_print_regs(pt_regs);
> - ppc64_print_nip_lr(pt_regs, 1);
> + if (!(bt->flags & BT_NO_PRINT_REGS)) {
> + /* Print the collected regs for panic task. */
> + ppc64_print_regs(pt_regs);
> + ppc64_print_nip_lr(pt_regs, 1);
> + }
> } else if ((pc->flags & KDUMP) ||
> ((pc->flags & DISKDUMP) &&
> (*diskdump_flags & KDUMP_CMPRS_LOCAL))) {
> --
> 2.43.0
>
--
Crash-utility mailing list -- [email protected]
To unsubscribe send an email to [email protected]
https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
Contribution Guidelines: https://github.com/crash-utility/crash/wiki