On 2022/06/29 4:19, Hari Bathini wrote:
> A CPU could be in an emergency stack when it is running in real mode
> or any special scenario like TM bad thing. Also, there are dedicated
> emergency stacks for machine check and system reset interrupt. Right
> now, no backtrace is provided if a CPU is in any of these stacks.
> This change ensures backtrace is processed appropriately even when
> a CPU is in any one of these emergency stacks. Also, if stack info
> cannot be found, print that message always instead of only when
> verbose logs are enabled.
> 
> Signed-off-by: Hari Bathini <hbath...@linux.ibm.com>
> ---
>   defs.h  |  12 ++++
>   ppc64.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
>   2 files changed, 196 insertions(+), 12 deletions(-)
> 
> diff --git a/defs.h b/defs.h
> index d1d3ea9..9b1b69a 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -6288,6 +6288,13 @@ struct ppc64_elf_prstatus {
>   
>   #ifdef PPC64
>   
> +enum emergency_stack_type {
> +     NONE_STACK              = 0,
> +     EMERGENCY_STACK,
> +     NMI_EMERGENCY_STACK,
> +     MC_EMERGENCY_STACK
> +};
> +
>   struct ppc64_opal {
>       uint64_t base;
>       uint64_t entry;
> @@ -6307,6 +6314,11 @@ struct machine_specific {
>           char *hwstackbuf;
>           uint hwstacksize;
>   
> +     /* Emergency stacks */
> +     ulong *emergency_sp;
> +     ulong *nmi_emergency_sp;
> +     ulong *mc_emergency_sp;
> +
>       uint l4_index_size;
>       uint l3_index_size;
>       uint l2_index_size;
> diff --git a/ppc64.c b/ppc64.c
> index 5d4c951..333a775 100644
> --- a/ppc64.c
> +++ b/ppc64.c
> @@ -48,6 +48,10 @@ static ulong ppc64_get_stackbase(ulong);
>   static ulong ppc64_get_stacktop(ulong);
>   void ppc64_compiler_warning_stub(void);
>   static ulong ppc64_in_irqstack(ulong);
> +static enum emergency_stack_type ppc64_in_emergency_stack(int cpu, ulong 
> addr,
> +                                                       bool verbose);
> +static void ppc64_set_bt_emergency_stack(enum emergency_stack_type type,
> +                                      struct bt_info *bt);
>   static char * ppc64_check_eframe(struct ppc64_pt_regs *);
>   static void ppc64_print_eframe(char *, struct ppc64_pt_regs *,
>               struct bt_info *);
> @@ -56,6 +60,7 @@ static int ppc64_paca_percpu_offset_init(int);
>   static void ppc64_init_cpu_info(void);
>   static int ppc64_get_cpu_map(void);
>   static void ppc64_clear_machdep_cache(void);
> +static void ppc64_init_paca_info(void);
>   static void ppc64_vmemmap_init(void);
>   static int ppc64_get_kvaddr_ranges(struct vaddr_range *);
>   static uint get_ptetype(ulong pte);
> @@ -692,6 +697,8 @@ ppc64_init(int when)
>                               error(FATAL, "cannot malloc hwirqstack buffer 
> space.");
>               }
>   
> +             ppc64_init_paca_info();
> +
>               if (!machdep->hz) {
>                       machdep->hz = HZ;
>                       if (THIS_KERNEL_VERSION >= LINUX(2,6,0))
> @@ -1203,6 +1210,63 @@ ppc64_kvtop(struct task_context *tc, ulong kvaddr,
>               return ppc64_vtop(kvaddr, (ulong *)vt->kernel_pgd[0], paddr, 
> verbose);
>   }
>   
> +static void
> +ppc64_init_paca_info(void)
> +{
> +     struct machine_specific *ms = machdep->machspec;
> +     ulong paca_ptr[kt->cpus];

Please don't use variable-length array, it might be fragile.

Thanks,
Kazu

> +     int i;
> +
> +     /* Get paca pointers for all CPUs. */
> +     if (symbol_exists("paca_ptrs")) {
> +             ulong paca_loc;
> +
> +             readmem(symbol_value("paca_ptrs"), KVADDR, &paca_loc, 
> sizeof(void *),
> +                     "paca double pointer", FAULT_ON_ERROR);
> +             readmem(paca_loc, KVADDR, paca_ptr, sizeof(void *) * kt->cpus,
> +                     "paca pointers", FAULT_ON_ERROR);
> +     } else if (symbol_exists("paca") &&
> +                (get_symbol_type("paca", NULL, NULL) == TYPE_CODE_PTR)) {
> +             readmem(symbol_value("paca"), KVADDR, paca_ptr, sizeof(void *) 
> * kt->cpus,
> +                     "paca pointers", FAULT_ON_ERROR);
> +     } else
> +             return;
> +
> +     /* Initialize emergency stacks info. */
> +     if (MEMBER_EXISTS("paca_struct", "emergency_sp")) {
> +             ulong offset = MEMBER_OFFSET("paca_struct", "emergency_sp");
> +
> +             if (!(ms->emergency_sp = (ulong *)calloc(kt->cpus, 
> sizeof(ulong))))
> +                     error(FATAL, "cannot malloc emergency stack space.");
> +             for (i = 0; i < kt->cpus; i++)
> +                     readmem(paca_ptr[i] + offset, KVADDR, 
> &ms->emergency_sp[i],
> +                             sizeof(void *), "paca->emergency_sp",
> +                             FAULT_ON_ERROR);
> +     }
> +
> +     if (MEMBER_EXISTS("paca_struct", "nmi_emergency_sp")) {
> +             ulong offset = MEMBER_OFFSET("paca_struct", "nmi_emergency_sp");
> +
> +             if (!(ms->nmi_emergency_sp = (ulong *)calloc(kt->cpus, 
> sizeof(ulong))))
> +                     error(FATAL, "cannot malloc NMI emergency stack 
> space.");
> +             for (i = 0; i < kt->cpus; i++)
> +                     readmem(paca_ptr[i] + offset, KVADDR, 
> &ms->nmi_emergency_sp[i],
> +                             sizeof(void *), "paca->nmi_emergency_sp",
> +                             FAULT_ON_ERROR);
> +     }
> +
> +     if (MEMBER_EXISTS("paca_struct", "mc_emergency_sp")) {
> +             ulong offset = MEMBER_OFFSET("paca_struct", "mc_emergency_sp");
> +
> +             if (!(ms->mc_emergency_sp = (ulong *)calloc(kt->cpus, 
> sizeof(ulong))))
> +                     error(FATAL, "cannot malloc machine check emergency 
> stack space.");
> +             for (i = 0; i < kt->cpus; i++)
> +                     readmem(paca_ptr[i] + offset, KVADDR, 
> &ms->mc_emergency_sp[i],
> +                             sizeof(void *), "paca->mc_emergency_sp",
> +                             FAULT_ON_ERROR);
> +     }
> +}
> +
>   /*
>    *  Verify that the kernel has made the vmemmap list available,
>    *  and if so, stash the relevant data required to make vtop
> @@ -1754,6 +1818,11 @@ ppc64_eframe_search(struct bt_info *bt_in)
>                       addr = bt->stackbase +
>                               roundup(SIZE(thread_info), sizeof(ulong));
>               } else if (!INSTACK(addr, bt)) {
> +                     enum emergency_stack_type estype;
> +
> +                     if ((estype = 
> ppc64_in_emergency_stack(bt->tc->processor, addr, false)))
> +                             ppc64_set_bt_emergency_stack(estype, bt);
> +
>                       /*
>                        * If the user specified SP is in HW interrupt stack
>                        * (only for tasks running on other CPUs and in 2.4
> @@ -1855,6 +1924,84 @@ ppc64_in_irqstack(ulong addr)
>       return 0;
>   }
>   
> +/*
> + * Check if the CPU is running in any of its emergency stacks.
> + * Returns
> + *   NONE_STACK          : if input is invalid or addr is not within any 
> emergency stack.
> + *   EMERGENCY_STACK     : if the addr is within emergency stack.
> + *   NMI_EMERGENCY_STACK : if the addr is within NMI emergency stack.
> + *   MC_EMERGENCY_STACK  : if the addr is within machine check emergency 
> stack.
> + */
> +static enum emergency_stack_type
> +ppc64_in_emergency_stack(int cpu, ulong addr, bool verbose)
> +{
> +     struct machine_specific *ms = machdep->machspec;
> +     ulong base, top;
> +
> +     if (cpu < 0  || cpu >= kt->cpus)
> +             return NONE_STACK;
> +
> +     if (ms->emergency_sp) {
> +             top = ms->emergency_sp[cpu];
> +             base =  top - STACKSIZE();
> +             if (addr >= base && addr < top) {
> +                     if (verbose)
> +                             fprintf(fp, "---<Emergency Stack>---\n");
> +                     return EMERGENCY_STACK;
> +             }
> +     }
> +
> +     if (ms->nmi_emergency_sp) {
> +             top = ms->nmi_emergency_sp[cpu];
> +             base = top - STACKSIZE();
> +             if (addr >= base && addr < top) {
> +                     if (verbose)
> +                             fprintf(fp, "---<NMI Emergency Stack>---\n");
> +                     return NMI_EMERGENCY_STACK;
> +             }
> +     }
> +
> +     if (ms->mc_emergency_sp) {
> +             top = ms->mc_emergency_sp[cpu];
> +             base =  top - STACKSIZE();
> +             if (addr >= base && addr < top) {
> +                     if (verbose)
> +                             fprintf(fp, "---<Machine Check Emergency 
> Stack>---\n");
> +                     return MC_EMERGENCY_STACK;
> +             }
> +     }
> +
> +     return NONE_STACK;
> +}
> +
> +static void
> +ppc64_set_bt_emergency_stack(enum emergency_stack_type type, struct bt_info 
> *bt)
> +{
> +     struct machine_specific *ms = machdep->machspec;
> +     ulong top;
> +
> +     switch (type) {
> +     case EMERGENCY_STACK:
> +             top = ms->emergency_sp[bt->tc->processor];
> +             break;
> +     case NMI_EMERGENCY_STACK:
> +             top = ms->nmi_emergency_sp[bt->tc->processor];
> +             break;
> +     case MC_EMERGENCY_STACK:
> +             top = ms->mc_emergency_sp[bt->tc->processor];
> +             break;
> +     default:
> +             top = 0;
> +             break;
> +     }
> +
> +     if (top) {
> +             bt->stackbase = top - STACKSIZE();
> +             bt->stacktop = top;
> +             alter_stackbuf(bt);
> +     }
> +}
> +
>   /*
>    *  Unroll a kernel stack.
>    */
> @@ -1935,10 +2082,13 @@ ppc64_back_trace_cmd(struct bt_info *bt)
>   static void
>   ppc64_back_trace(struct gnu_request *req, struct bt_info *bt)
>   {
> -     int frame = 0;
> -     ulong lr = 0; /* hack...need to pass in initial lr reg */
> +     enum emergency_stack_type estype;
>       ulong newpc = 0, newsp, marker;
> +     int c = bt->tc->processor;
> +     ulong nmi_sp = 0;
>       int eframe_found;
> +     int frame = 0;
> +     ulong lr = 0; /* hack...need to pass in initial lr reg */
>   
>       if (!INSTACK(req->sp, bt)) {
>               ulong irqstack;
> @@ -1948,6 +2098,10 @@ ppc64_back_trace(struct gnu_request *req, struct 
> bt_info *bt)
>                       bt->stackbase = irqstack;
>                       bt->stacktop = bt->stackbase + STACKSIZE();
>                       alter_stackbuf(bt);
> +             } else if ((estype = ppc64_in_emergency_stack(c, req->sp, 
> true))) {
> +                     if (estype == NMI_EMERGENCY_STACK)
> +                             nmi_sp = req->sp;
> +                     ppc64_set_bt_emergency_stack(estype, bt);
>               } else if (ms->hwintrstack) {
>                       bt->stacktop = ms->hwintrstack[bt->tc->processor] +
>                               sizeof(ulong);
> @@ -1956,9 +2110,7 @@ ppc64_back_trace(struct gnu_request *req, struct 
> bt_info *bt)
>                       bt->stackbuf = ms->hwstackbuf;
>                       alter_stackbuf(bt);
>               } else {
> -                     if (CRASHDEBUG(1)) {
> -                             fprintf(fp, "cannot find the stack info.\n");
> -                     }
> +                     fprintf(fp, "cannot find the stack info.\n");
>                       return;
>               }
>       }
> @@ -1988,13 +2140,20 @@ ppc64_back_trace(struct gnu_request *req, struct 
> bt_info *bt)
>                               newsp =
>                               *(ulong *)&bt->stackbuf[newsp - bt->stackbase];
>                       if (!INSTACK(newsp, bt)) {
> -                             /*
> -                             * Switch HW interrupt stack to process's stack.
> -                             */
> -                             bt->stackbase = GET_STACKBASE(bt->task);
> -                             bt->stacktop = GET_STACKTOP(bt->task);
> -                             alter_stackbuf(bt);
> -                     }
> +                             if ((estype = ppc64_in_emergency_stack(c, 
> newsp, true))) {
> +                                     if (!nmi_sp && estype == 
> NMI_EMERGENCY_STACK)
> +                                             nmi_sp = newsp;
> +                                     ppc64_set_bt_emergency_stack(estype, 
> bt);
> +                             } else {
> +                                     /*
> +                                      * Switch HW interrupt stack or 
> emergency stack
> +                                      * to process's stack.
> +                                      */
> +                                     bt->stackbase = GET_STACKBASE(bt->task);
> +                                     bt->stacktop = GET_STACKTOP(bt->task);
> +                                     alter_stackbuf(bt);
> +                             }
> +                     }
>                       if (IS_KVADDR(newsp) && INSTACK(newsp, bt))
>                               newpc = *(ulong *)&bt->stackbuf[newsp + 16 -
>                                               bt->stackbase];
> @@ -2038,6 +2197,16 @@ ppc64_back_trace(struct gnu_request *req, struct 
> bt_info *bt)
>                       }
>               }
>   
> +             /*
> +              * NMI stack may not be re-entrant. In so, an SP in the NMI 
> stack
> +              * is likely to point back to an SP within the NMI stack, in 
> case
> +              * of a nested NMI.
> +              */
> +             if (nmi_sp && nmi_sp == newsp) {
> +                     fprintf(fp, "---<Nested NMI>---\n");
> +                     break;
> +             }
> +
>               /*
>                * Some Linux 3.7 kernel threads have been seen to have
>                * their end-of-trace stack linkage pointer pointing
> @@ -2415,6 +2584,9 @@ ppc64_get_dumpfile_stack_frame(struct bt_info *bt_in, 
> ulong *nip, ulong *ksp)
>               pt_regs = (struct ppc64_pt_regs *)bt->machdep;
>               ur_nip = pt_regs->nip;
>               ur_ksp = pt_regs->gpr[1];
> +             /* Print the collected regs for panic task. */
> +             ppc64_print_regs(pt_regs);
> +             ppc64_print_nip_lr(pt_regs, 1);
>       } else if ((pc->flags & KDUMP) ||
>                  ((pc->flags & DISKDUMP) &&
>                   (*diskdump_flags & KDUMP_CMPRS_LOCAL))) {
--
Crash-utility mailing list
Crash-utility@redhat.com
https://listman.redhat.com/mailman/listinfo/crash-utility
Contribution Guidelines: https://github.com/crash-utility/crash/wiki

Reply via email to