On Fri, May 31, 2024 at 5:33 PM <devel-requ...@lists.crash-utility.osci.io>
wrote:

> Date: Fri, 31 May 2024 17:19:31 +0800
> From: Tao Liu <l...@redhat.com>
> Subject: [Crash-utility] [PATCH v4 08/16] ppc64: correct gdb
>         passthroughs by implementing machdep->get_cpu_reg
> To: devel@lists.crash-utility.osci.io
> Cc: Mahesh J Salgaonkar <mah...@linux.ibm.com>, "Naveen N . Rao"
>         <naveen.n....@linux.vnet.ibm.com>, Lianbo Jiang <
> liji...@redhat.com>,
>         Alexey Makhalov <alexey.makha...@broadcom.com>
> Message-ID: <20240531091939.97828-9-l...@redhat.com>
> Content-Type: text/plain; charset=UTF-8
>
> From: Aditya Gupta <adit...@linux.ibm.com>
>
> Currently, gdb passthroughs of 'bt', 'frame', 'up', 'down', 'info
> locals' don't work. This is due to gdb not knowing the register values to
> unwind the stack frames
>
> Every gdb passthrough goes through `gdb_interface`. And then, gdb expects
> `crash_target::fetch_registers` to give it the register values, which is
> dependent on `machdep->get_cpu_reg` to read the register values for
> specific architecture.
>
>                                       ----------------------------
>            gdb passthrough (eg. "bt") |                          |
>    crash   -------------------------> |                          |
>                                       |      gdb_interface       |
>                                       |                          |
>                                       |                          |
>                                       |  ----------------------  |
>                  fetch_registers      |  |                    |  |
> crash_target<-------------------------+--|        gdb         |  |
>             --------------------------+->|                    |  |
>               Registers (SP,NIP, etc.)|  |                    |  |
>                                       |  |                    |  |
>                                       |  ----------------------  |
>                                       ----------------------------
>
> Implement `machdep->get_cpu_reg` on PPC64, so that crash provides the
> register values to gdb to unwind stack frames properly
>
> With these changes, on powerpc, 'bt' command output in gdb mode, will look
> like this:
>
>     gdb> bt
>     #0  0xc0000000002a53e8 in crash_setup_regs (oldregs=<optimized out>,
> newregs=0xc00000000486f8d8) at ./arch/powerpc/include/asm/kexec.h:69
>     #1  __crash_kexec (regs=<optimized out>) at kernel/kexec_core.c:974
>     #2  0xc000000000168918 in panic (fmt=<optimized out>) at
> kernel/panic.c:358
>     #3  0xc000000000b735f8 in sysrq_handle_crash (key=<optimized out>) at
> drivers/tty/sysrq.c:155
>     #4  0xc000000000b742cc in __handle_sysrq (key=key@entry=99,
> check_mask=check_mask@entry=false) at drivers/tty/sysrq.c:602
>     #5  0xc000000000b7506c in write_sysrq_trigger (file=<optimized out>,
> buf=<optimized out>, count=2, ppos=<optimized out>) at
> drivers/tty/sysrq.c:1163
>     #6  0xc00000000069a7bc in pde_write (ppos=<optimized out>,
> count=<optimized out>, buf=<optimized out>, file=<optimized out>,
> pde=0xc000000009ed3a80) at fs/proc/inode.c:340
>     #7  proc_reg_write (file=<optimized out>, buf=<optimized out>,
> count=<optimized out>, ppos=<optimized out>) at fs/proc/inode.c:352
>     #8  0xc0000000005b3bbc in vfs_write (file=file@entry=0xc00000009dda7d00,
> buf=buf@entry=0xebcfc7c6040 <error: Cannot access memory at address
> 0xebcfc7c6040>, count=count@entry=2, pos=pos@entry=0xc00000000486fda0) at
> fs/read_write.c:582
>
> instead of earlier output without this patch:
>
>     gdb> bt
>     #0  <unavailable> in ?? ()
>     Backtrace stopped: previous frame identical to this frame (corrupt
> stack?)
>
> Also, 'get_dumpfile_regs' has been introduced to get registers from
> multiple supported vmcore formats. Correspondingly a flag
> 'BT_NO_PRINT_REGS'
> has been introduced to tell helper functions to get registers, to not
> print registers with every call to backtrace in gdb.
>
>  Note: This feature to support GDB unwinding doesn't support live debugging
>
> Cc: Sourabh Jain <sourabhj...@linux.ibm.com>
> Cc: Hari Bathini <hbath...@linux.ibm.com>
> Cc: Mahesh J Salgaonkar <mah...@linux.ibm.com>
> Cc: Naveen N. Rao <naveen.n....@linux.vnet.ibm.com>
> Cc: Lianbo Jiang <liji...@redhat.com>
> Cc: HAGIO KAZUHITO(萩尾 一仁) <k-hagio...@nec.com>
> Cc: Tao Liu <l...@redhat.com>
> Cc: Alexey Makhalov <alexey.makha...@broadcom.com>
> Improved-by: Tao Liu <l...@redhat.com>
> Signed-off-by: Aditya Gupta <adit...@linux.ibm.com>
> ---
>  defs.h   | 123 +++++++++++++++++++++++++++++++++++++++++
>  kernel.c |  33 +++++++++++
>  ppc64.c  | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
>  3 files changed, 312 insertions(+), 7 deletions(-)
>
> diff --git a/defs.h b/defs.h
> index c0e6a29..8450aea 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -982,6 +982,7 @@ struct bt_info {
>         ulong eframe_ip;
>         ulong radix;
>         ulong *cpumask;
> +       bool need_free;


Comment on it later.


>  };
>
>  #define STACK_OFFSET_TYPE(OFF) \
> @@ -6096,6 +6097,7 @@ int load_module_symbols_helper(char *);
>  void unlink_module(struct load_module *);
>  int check_specified_module_tree(char *, char *);
>  int is_system_call(char *, ulong);
> +void get_dumpfile_regs(struct bt_info*, ulong*, ulong*);
>  void generic_dump_irq(int);
>  void generic_get_irq_affinity(int);
>  void generic_show_interrupts(int, ulong *);
> @@ -6195,6 +6197,7 @@ ulong cpu_map_addr(const char *type);
>  #define BT_REGS_NOT_FOUND (0x4000000000000ULL)
>  #define BT_OVERFLOW_STACK (0x8000000000000ULL)
>  #define BT_SKIP_IDLE     (0x10000000000000ULL)
> +#define BT_NO_PRINT_REGS (0x20000000000000ULL)
>  #define BT_SYMBOL_OFFSET   (BT_SYMBOLIC_ARGS)
>
>  #define BT_REF_HEXVAL         (0x1)
> @@ -8052,6 +8055,126 @@ enum x86_64_regnum {
>          LAST_REGNUM
>  };
>
> +/*
> + * Register numbers to make crash_target->fetch_registers()
> + * ---> machdep->get_current_task_reg() work properly.
> + *
> + *  These register numbers and names are given according to output of
> + *  `rs6000_register_name`, because that is what was being used by
> + *  crash_target::fetch_registers in case of PPC64
> + */
> +enum ppc64_regnum {
> +       PPC64_R0_REGNUM = 0,
> +       PPC64_R1_REGNUM,
> +       PPC64_R2_REGNUM,
> +       PPC64_R3_REGNUM,
> +       PPC64_R4_REGNUM,
> +       PPC64_R5_REGNUM,
> +       PPC64_R6_REGNUM,
> +       PPC64_R7_REGNUM,
> +       PPC64_R8_REGNUM,
> +       PPC64_R9_REGNUM,
> +       PPC64_R10_REGNUM,
> +       PPC64_R11_REGNUM,
> +       PPC64_R12_REGNUM,
> +       PPC64_R13_REGNUM,
> +       PPC64_R14_REGNUM,
> +       PPC64_R15_REGNUM,
> +       PPC64_R16_REGNUM,
> +       PPC64_R17_REGNUM,
> +       PPC64_R18_REGNUM,
> +       PPC64_R19_REGNUM,
> +       PPC64_R20_REGNUM,
> +       PPC64_R21_REGNUM,
> +       PPC64_R22_REGNUM,
> +       PPC64_R23_REGNUM,
> +       PPC64_R24_REGNUM,
> +       PPC64_R25_REGNUM,
> +       PPC64_R26_REGNUM,
> +       PPC64_R27_REGNUM,
> +       PPC64_R28_REGNUM,
> +       PPC64_R29_REGNUM,
> +       PPC64_R30_REGNUM,
> +       PPC64_R31_REGNUM,
> +
> +       PPC64_F0_REGNUM = 32,
> +       PPC64_F1_REGNUM,
> +       PPC64_F2_REGNUM,
> +       PPC64_F3_REGNUM,
> +       PPC64_F4_REGNUM,
> +       PPC64_F5_REGNUM,
> +       PPC64_F6_REGNUM,
> +       PPC64_F7_REGNUM,
> +       PPC64_F8_REGNUM,
> +       PPC64_F9_REGNUM,
> +       PPC64_F10_REGNUM,
> +       PPC64_F11_REGNUM,
> +       PPC64_F12_REGNUM,
> +       PPC64_F13_REGNUM,
> +       PPC64_F14_REGNUM,
> +       PPC64_F15_REGNUM,
> +       PPC64_F16_REGNUM,
> +       PPC64_F17_REGNUM,
> +       PPC64_F18_REGNUM,
> +       PPC64_F19_REGNUM,
> +       PPC64_F20_REGNUM,
> +       PPC64_F21_REGNUM,
> +       PPC64_F22_REGNUM,
> +       PPC64_F23_REGNUM,
> +       PPC64_F24_REGNUM,
> +       PPC64_F25_REGNUM,
> +       PPC64_F26_REGNUM,
> +       PPC64_F27_REGNUM,
> +       PPC64_F28_REGNUM,
> +       PPC64_F29_REGNUM,
> +       PPC64_F30_REGNUM,
> +       PPC64_F31_REGNUM,
> +
> +       PPC64_PC_REGNUM = 64,
> +       PPC64_MSR_REGNUM = 65,
> +       PPC64_CR_REGNUM = 66,
> +       PPC64_LR_REGNUM = 67,
> +       PPC64_CTR_REGNUM = 68,
> +       PPC64_XER_REGNUM = 69,
> +       PPC64_FPSCR_REGNUM = 70,
> +
> +       PPC64_VR0_REGNUM = 106,
> +       PPC64_VR1_REGNUM,
> +       PPC64_VR2_REGNUM,
> +       PPC64_VR3_REGNUM,
> +       PPC64_VR4_REGNUM,
> +       PPC64_VR5_REGNUM,
> +       PPC64_VR6_REGNUM,
> +       PPC64_VR7_REGNUM,
> +       PPC64_VR8_REGNUM,
> +       PPC64_VR9_REGNUM,
> +       PPC64_VR10_REGNUM,
> +       PPC64_VR11_REGNUM,
> +       PPC64_VR12_REGNUM,
> +       PPC64_VR13_REGNUM,
> +       PPC64_VR14_REGNUM,
> +       PPC64_VR15_REGNUM,
> +       PPC64_VR16_REGNUM,
> +       PPC64_VR17_REGNUM,
> +       PPC64_VR18_REGNUM,
> +       PPC64_VR19_REGNUM,
> +       PPC64_VR20_REGNUM,
> +       PPC64_VR21_REGNUM,
> +       PPC64_VR22_REGNUM,
> +       PPC64_VR23_REGNUM,
> +       PPC64_VR24_REGNUM,
> +       PPC64_VR25_REGNUM,
> +       PPC64_VR26_REGNUM,
> +       PPC64_VR27_REGNUM,
> +       PPC64_VR28_REGNUM,
> +       PPC64_VR29_REGNUM,
> +       PPC64_VR30_REGNUM,
> +       PPC64_VR31_REGNUM,
> +
> +       PPC64_VSCR_REGNUM = 138,
> +       PPC64_VRSAVE_REGNU = 139
> +};
> +
>

Is it possible to move the regnum definitions to ppc64.c? I did not see
them used in other modules. But, not sure if there are any compilation
issues, I haven't tried it.

 /* crash_target.c */
>  extern int gdb_change_thread_context ();
>
> diff --git a/kernel.c b/kernel.c
> index 78b7b1e..3730c55 100644
> --- a/kernel.c
> +++ b/kernel.c
> @@ -3533,6 +3533,39 @@ get_lkcd_regs(struct bt_info *bt, ulong *eip, ulong
> *esp)
>         machdep->get_stack_frame(bt, eip, esp);
>  }
>
> +void
> +get_dumpfile_regs(struct bt_info *bt, ulong *eip, ulong *esp)
> +{
> +       bt->flags |= BT_NO_PRINT_REGS;
> +
> +       if (NETDUMP_DUMPFILE())
> +               get_netdump_regs(bt, eip, esp);
> +       else if (KDUMP_DUMPFILE())
> +               get_kdump_regs(bt, eip, esp);
> +       else if (DISKDUMP_DUMPFILE())
> +               get_diskdump_regs(bt, eip, esp);
> +       else if (KVMDUMP_DUMPFILE())
> +               get_kvmdump_regs(bt, eip, esp);
> +       else if (LKCD_DUMPFILE())
> +               get_lkcd_regs(bt, eip, esp);
> +       else if (XENDUMP_DUMPFILE())
> +               get_xendump_regs(bt, eip, esp);
> +       else if (SADUMP_DUMPFILE())
> +               get_sadump_regs(bt, eip, esp);
> +       else if (VMSS_DUMPFILE())
> +               get_vmware_vmss_regs(bt, eip, esp);
> +       else if (REMOTE_PAUSED()) {
> +               if (!is_task_active(bt->task) || !get_remote_regs(bt, eip,
> esp))
> +                       machdep->get_stack_frame(bt, eip, esp);
> +       } else
> +               machdep->get_stack_frame(bt, eip, esp);
> +
> +       bt->flags &= ~BT_NO_PRINT_REGS;
> +
> +       bt->instptr = *eip;
> +       bt->stkptr = *esp;
> +}
> +
>
>  /*
>   *  Store the head of the kernel module list for future use.
> diff --git a/ppc64.c b/ppc64.c
> index e8930a1..ee1995a 100644
> --- a/ppc64.c
> +++ b/ppc64.c
> @@ -55,6 +55,8 @@ static void ppc64_set_bt_emergency_stack(enum
> emergency_stack_type type,
>  static char * ppc64_check_eframe(struct ppc64_pt_regs *);
>  static void ppc64_print_eframe(char *, struct ppc64_pt_regs *,
>                 struct bt_info *);
> +static int ppc64_get_current_task_reg(int regno, const char *name, int
> size,
> +                 void *value);
>  static void parse_cmdline_args(void);
>  static int ppc64_paca_percpu_offset_init(int);
>  static void ppc64_init_cpu_info(void);
> @@ -73,6 +75,26 @@ static ulong pmd_page_vaddr_l4(ulong pmd);
>  static int is_opal_context(ulong sp, ulong nip);
>  void opalmsg(void);
>
> +struct user_regs_bitmap_struct {
> +       struct {
> +               long gpr[32];
> +               long nip;
> +               long msr;
> +               long orig_gpr3;      /* Used for restarting system calls */
> +               long ctr;
> +               long link;
> +               long xer;
> +               long ccr;
> +               long mq;             /* 601 only (not used at present) */
> +                                       /* Used on APUS to hold IPL value.
> */
> +               long trap;           /* Reason for being here */
> +               long dar;            /* Fault registers */
> +               long dsisr;
> +               long result;         /* Result of a system call */
> +       };
> +       ulong bitmap;
> +};
> +
>  static int is_opal_context(ulong sp, ulong nip)
>  {
>         uint64_t opal_start, opal_end;
> @@ -704,6 +726,8 @@ ppc64_init(int when)
>                                 error(FATAL, "cannot malloc hwirqstack
> buffer space.");
>                 }
>
> +               machdep->get_current_task_reg = ppc64_get_current_task_reg;
> +
>                 ppc64_init_paca_info();
>
>                 if (!machdep->hz) {
> @@ -2501,6 +2525,120 @@ ppc64_print_eframe(char *efrm_str, struct
> ppc64_pt_regs *regs,
>         ppc64_print_nip_lr(regs, 1);
>  }
>
> +static int
> +ppc64_get_current_task_reg(int regno, const char *name, int size,
> +                 void *value)
> +{
> +       struct bt_info bt_info, bt_setup;
> +       struct task_context *tc;
> +       ulong task;
> +       struct user_regs_bitmap_struct *ur_bitmap;
> +       ulong ip, sp;
> +       bool ret = FALSE;
> +
> +       /* Currently only handling registers available in ppc64_pt_regs:
> +        *
> +        * 0-31:   r0-r31
> +        * 64:     pc/nip
> +        * 65:     msr
> +        *
> +        * 67:     lr
> +        * 68:     ctr
> +        */
> +       switch (regno) {
> +       case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
> +
> +       case PPC64_PC_REGNUM:
> +       case PPC64_MSR_REGNUM:
> +       case PPC64_LR_REGNUM:
> +       case PPC64_CTR_REGNUM:
> +               break;
> +
> +       default:
> +               // return false if we can't get that register
> +               if (CRASHDEBUG(1))
> +                       error(WARNING, "unsupported register, regno=%d\n",
> regno);
> +               return FALSE;
> +       }
> +
>

The above switch-case code block can be defined as a new static function to
check the validity of regnum, for example:
static int sanity_check_regs(int regno)

And then it can be invoked in the current function. Just an idea, maybe
this looks more concise and readable. Any thoughts?

+       tc = CURRENT_CONTEXT();
> +       if (!tc)
> +               return FALSE;
> +       BZERO(&bt_setup, sizeof(struct bt_info));
> +       clone_bt_info(&bt_setup, &bt_info, tc);
> +       fill_stackbuf(&bt_info);
> +
> +       // reusing the get_dumpfile_regs function to get pt regs structure
> +       get_dumpfile_regs(&bt_info, &sp, &ip);
> +       if (bt_info.stackbuf)
> +               FREEBUF(bt_info.stackbuf);
> +       ur_bitmap = (struct user_regs_bitmap_struct *)bt_info.machdep;
> +
> +       if (!ur_bitmap) {
> +               error(WARNING, "pt_regs not available for cpu %d\n",
> tc->processor);
> +               return FALSE;
> +       }
> +       if (!bt_info.need_free) {
> +               goto get_all;
> +       }
>

Could you please explain why the need_free is used to determine if it
should jump into the get_all code block?  The flag looks very strange, or
is there any another solution?

+
> +       switch (ur_bitmap->bitmap) {
> +       case 0x100000002:
> +               switch (regno) {
> +               case PPC64_R1_REGNUM:
> +               case PPC64_PC_REGNUM:
> +                       break;
> +               default:
> +                       return FALSE;
> +               }
> +               break;
> +       }
>

The switch-case code block should be equivalent to the implementation
below(if I understood correctly), and looks more readable?

+    /* Only for R1 and PC values */
+    if ((ur_bitmap->bitmap == 0x100000002) &&
+        (regno != PPC64_R1_REGNUM && regno != PPC64_PC_REGNUM))
+        return FALSE;



> +
> +get_all:
> +       switch (regno) {
> +       case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
> +               if (size != sizeof(ur_bitmap->gpr[regno])) {
> +                       ret = FALSE; break;  // size mismatch
> +               }
>

In fact , the variable ret can be initialized to FALSE one time at start of
this function, and then it can be simplified as below:
+       switch (regno) {
+       case PPC64_R0_REGNUM ... PPC64_R31_REGNUM:
+               if (size != sizeof(ur_bitmap->gpr[regno]))
+                     break;
+               memcpy(value, &ur_bitmap->gpr[regno], size);
+               ret = TRUE;
+               break;

+               memcpy(value, &ur_bitmap->gpr[regno], size);
> +               ret = TRUE; break;
> +
> +       case PPC64_PC_REGNUM:
> +               if (size != sizeof(ur_bitmap->nip)) {
> +                       ret = FALSE; break;  // size mismatch
> +               }
> +               memcpy(value, &ur_bitmap->nip, size);
> +               ret = TRUE; break;
> +
> +       case PPC64_MSR_REGNUM:
> +               if (size != sizeof(ur_bitmap->msr)) {
> +                       ret = FALSE; break;  // size mismatch
> +               }
> +               memcpy(value, &ur_bitmap->msr, size);
> +               ret = TRUE; break;
> +
> +       case PPC64_LR_REGNUM:
> +               if (size != sizeof(ur_bitmap->link)) {
> +                       ret = FALSE; break;  // size mismatch
> +               }
> +               memcpy(value, &ur_bitmap->link, size);
> +               ret = TRUE; break;
> +
> +       case PPC64_CTR_REGNUM:
> +               if (size != sizeof(ur_bitmap->ctr)) {
> +                       ret = FALSE; break;  // size mismatch
> +               }
> +               memcpy(value, &ur_bitmap->ctr, size);
> +               ret = TRUE; break;
> +       }
> +       if (bt_info.need_free) {
> +               FREEBUF(ur_bitmap);
> +               bt_info.need_free = FALSE;
> +       }
> +
> +       return ret;
> +}
> +
>  /*
>   * For vmcore typically saved with KDump or FADump, get SP and IP values
>   * from the saved ptregs.
> @@ -2613,9 +2751,11 @@ ppc64_get_dumpfile_stack_frame(struct bt_info
> *bt_in, ulong *nip, ulong *ksp)
>                 pt_regs = (struct ppc64_pt_regs *)bt->machdep;
>                 ur_nip = pt_regs->nip;
>                 ur_ksp = pt_regs->gpr[1];
> -               /* Print the collected regs for panic task. */
> -               ppc64_print_regs(pt_regs);
> -               ppc64_print_nip_lr(pt_regs, 1);
> +               if (!(bt->flags & BT_NO_PRINT_REGS)) {
> +                       /* Print the collected regs for panic task. */
> +                       ppc64_print_regs(pt_regs);
> +                       ppc64_print_nip_lr(pt_regs, 1);
> +               }
>         } else if ((pc->flags & KDUMP) ||
>                    ((pc->flags & DISKDUMP) &&
>                     (*diskdump_flags & KDUMP_CMPRS_LOCAL))) {
> @@ -2779,19 +2919,28 @@ static void
>  ppc64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp)
>  {
>         ulong ksp, nip;
> -
> +       struct user_regs_bitmap_struct *ur_bitmap;
> +
>         nip = ksp = 0;
>
> -       if (DUMPFILE() && is_task_active(bt->task))
> +       if (DUMPFILE() && is_task_active(bt->task)) {
>                 ppc64_get_dumpfile_stack_frame(bt, &nip, &ksp);
> -       else
> +               bt->need_free = FALSE;
> +       } else {
>                 get_ppc64_frame(bt, &nip, &ksp);
> +               ur_bitmap = (struct user_regs_bitmap_struct
> *)GETBUF(sizeof(*ur_bitmap));
> +               memset(ur_bitmap, 0, sizeof(*ur_bitmap));
> +               ur_bitmap->nip = nip;
> +               ur_bitmap->gpr[1] = ksp;
> +               ur_bitmap->bitmap += 0x100000002;
>

This confused me a little bit. Maybe it's good enough like this?

+               ur_bitmap->bitmap = 0x100000002;

Thanks
Lianbo

+               bt->machdep = ur_bitmap;
> +               bt->need_free = TRUE;
> +       }
>
>         if (pcp)
>                 *pcp = nip;
>         if (spp)
>                 *spp = ksp;
> -
>  }
>
>  static ulong
> --
> 2.40.1
>
--
Crash-utility mailing list -- devel@lists.crash-utility.osci.io
To unsubscribe send an email to devel-le...@lists.crash-utility.osci.io
https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
Contribution Guidelines: https://github.com/crash-utility/crash/wiki

Reply via email to