Hi, Tao Thank you for the fix.
On Wed, Sep 18, 2024 at 7:44 AM <devel-requ...@lists.crash-utility.osci.io> wrote: > Date: Wed, 18 Sep 2024 11:42:03 +1200 > From: Tao Liu <l...@redhat.com> > Subject: [Crash-utility] [PATCH v1 1/3] ppc64: Fix bt printing error > stack trace > To: devel@lists.crash-utility.osci.io > Cc: adit...@linux.ibm.com, Tao Liu <l...@redhat.com> > Message-ID: <20240917234205.7783-2-l...@redhat.com> > Content-Type: text/plain; charset="US-ASCII"; x-default=true > > A error stack trace of bt cmd observed: > > crash> bt 1 > PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd" > #0 [c0000000037735c0] _end at c0000000037154b0 (unreliable) > #1 [c000000003773770] __switch_to at c00000000001fa9c > #2 [c0000000037737d0] __schedule at c00000000112e4ec > #3 [c0000000037738b0] schedule at c00000000112ea80 > ... > > The #0 stack trace is incorrect, the function address shouldn't exceed > _end. > The reason is for kernel>=v6.2, the offset of pt_regs to sp changed from > STACK_FRAME_OVERHEAD, i.e 112, to STACK_SWITCH_FRAME_REGS. For > CONFIG_PPC64_ELF_ABI_V1, it's 112, for ABI_V2, it's 48. So the nip will > read a > wrong value from stack when ABI_V2 enabled. > > Can you help to add the related kernel commits to patch log? That will help me a lot to review the patches. > To determine if ABI_V2 enabled is tricky. This patch do it by check the > following: > Can you try to read the value of e_flags from the elf header and to determine what ABI version it is? Thanks Lianbo > > In arch/powerpc/include/asm/ppc_asm.h: > #ifdef CONFIG_PPC64_ELF_ABI_V2 > #define STK_GOT 24 > #else > #define STK_GOT 40 > > In arch/powerpc/kernel/tm.S: > _GLOBAL(tm_reclaim) > mfcr r5 > mflr r0 > stw r5, 8(r1) > std r0, 16(r1) > std r2, STK_GOT(r1) > ... > > So a disassemble on tm_reclaim, and extract the STK_GOT value from std > instruction is used as the approach. > > After the patch: > crash> bt 1 > PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd" > #0 [c0000000037737d0] __schedule at c00000000112e4ec > #1 [c0000000037738b0] schedule at c00000000112ea80 > ... > > Signed-off-by: Tao Liu <l...@redhat.com> > --- > defs.h | 1 + > ppc64.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- > 2 files changed, 58 insertions(+), 3 deletions(-) > > diff --git a/defs.h b/defs.h > index 2231cb6..d5cb8cc 100644 > --- a/defs.h > +++ b/defs.h > @@ -4643,6 +4643,7 @@ struct efi_memory_desc_t { > #define MSR_PR_LG 14 /* Problem State / Privilege Level */ > /* Used to find the user or kernel-mode > frame*/ > > +#define STACK_SWITCH_FRAME_REGS 48 > #define STACK_FRAME_OVERHEAD 112 > #define EXCP_FRAME_MARKER 0x7265677368657265 > > diff --git a/ppc64.c b/ppc64.c > index e8930a1..6e5f155 100644 > --- a/ppc64.c > +++ b/ppc64.c > @@ -72,6 +72,7 @@ static ulong pud_page_vaddr_l4(ulong pud); > static ulong pmd_page_vaddr_l4(ulong pmd); > static int is_opal_context(ulong sp, ulong nip); > void opalmsg(void); > +static bool is_ppc64_elf_abi_v2(void); > > static int is_opal_context(ulong sp, ulong nip) > { > @@ -2813,6 +2814,51 @@ ppc64_get_sp(ulong task) > return sp; > } > > +static bool > +is_ppc64_elf_abi_v2(void) > +{ > + char buf1[BUFSIZE]; > + char *pos1, *pos2; > + int errflag = 0; > + ulong stk_got = 0; > + static bool ret = false; > + static bool checked = false; > + > + if (checked == true || !symbol_exists("tm_reclaim")) > + return ret; > + > + sprintf(buf1, "x/16i tm_reclaim"); > + open_tmpfile(); > + if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) > + goto out; > + checked = true; > + rewind(pc->tmpfile); > + while (fgets(buf1, BUFSIZE, pc->tmpfile)) { > + // "std r2, STK_GOT(r1)" is expected > + if (strstr(buf1, "std") && > + strstr(buf1, "(r1)") && > + (pos1 = strstr(buf1, "r2,"))) { > + pos1 += strlen("r2,"); > + for (pos2 = pos1; *pos2 != '\0' && *pos2 != '('; > pos2++); > + *pos2 = '\0'; > + stk_got = stol(pos1, RETURN_ON_ERROR|QUIET, > &errflag); > + break; > + } > + } > + > + if (!errflag) { > + switch (stk_got) { > + case 24: > + ret = true; > + case 40: > + goto out; > + } > + } > + error(WARNING, "Unstable elf_abi v1/v2 detection.\n"); > +out: > + close_tmpfile(); > + return ret; > +} > > /* > * get the SP and PC values for idle tasks. > @@ -2834,9 +2880,17 @@ get_ppc64_frame(struct bt_info *bt, ulong *getpc, > ulong *getsp) > sp = ppc64_get_sp(task); > if (!INSTACK(sp, bt)) > goto out; > - readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s, > - sizeof(struct ppc64_pt_regs), > - "PPC64 pt_regs", FAULT_ON_ERROR); > + > + if (THIS_KERNEL_VERSION >= LINUX(6,2,0) && is_ppc64_elf_abi_v2()) { > + readmem(sp+STACK_SWITCH_FRAME_REGS, KVADDR, ®s, > + sizeof(struct ppc64_pt_regs), > + "PPC64 pt_regs", FAULT_ON_ERROR); > + } else { > + readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s, > + sizeof(struct ppc64_pt_regs), > + "PPC64 pt_regs", FAULT_ON_ERROR); > + } > + > ip = regs.nip; > closest = closest_symbol(ip); > if (STREQ(closest, ".__switch_to") || STREQ(closest, > "__switch_to")) { > -- > 2.40.1 >
-- Crash-utility mailing list -- devel@lists.crash-utility.osci.io To unsubscribe send an email to devel-le...@lists.crash-utility.osci.io https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ Contribution Guidelines: https://github.com/crash-utility/crash/wiki