On Thu, Sep 19, 2024 at 11:28 AM Tao Liu <l...@redhat.com> wrote: > Hi lianbo, > > On Thu, Sep 19, 2024 at 2:41 PM lijiang <liji...@redhat.com> wrote: > > > > Hi, Tao > > > > Thank you for the fix. > > > > On Wed, Sep 18, 2024 at 7:44 AM < > devel-requ...@lists.crash-utility.osci.io> wrote: > >> > >> Date: Wed, 18 Sep 2024 11:42:03 +1200 > >> From: Tao Liu <l...@redhat.com> > >> Subject: [Crash-utility] [PATCH v1 1/3] ppc64: Fix bt printing error > >> stack trace > >> To: devel@lists.crash-utility.osci.io > >> Cc: adit...@linux.ibm.com, Tao Liu <l...@redhat.com> > >> Message-ID: <20240917234205.7783-2-l...@redhat.com> > >> Content-Type: text/plain; charset="US-ASCII"; x-default=true > >> > >> A error stack trace of bt cmd observed: > >> > >> crash> bt 1 > >> PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd" > >> #0 [c0000000037735c0] _end at c0000000037154b0 (unreliable) > >> #1 [c000000003773770] __switch_to at c00000000001fa9c > >> #2 [c0000000037737d0] __schedule at c00000000112e4ec > >> #3 [c0000000037738b0] schedule at c00000000112ea80 > >> ... > >> > >> The #0 stack trace is incorrect, the function address shouldn't exceed > _end. > >> The reason is for kernel>=v6.2, the offset of pt_regs to sp changed from > >> STACK_FRAME_OVERHEAD, i.e 112, to STACK_SWITCH_FRAME_REGS. For > >> CONFIG_PPC64_ELF_ABI_V1, it's 112, for ABI_V2, it's 48. So the nip will > read a > >> wrong value from stack when ABI_V2 enabled. > >> > > > > Can you help to add the related kernel commits to patch log? That will > help me a lot to review the patches. > > > Sure, will update it in v2. > > >> > >> To determine if ABI_V2 enabled is tricky. This patch do it by check the > >> following: > > > > > > Can you try to read the value of e_flags from the elf header and to > determine what ABI version it is? > > > Do you mean by checking the following? > > arch/powerpc/kernel/fadump.c:fadump_init_elfcore_header(): > > if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) > elf->e_flags = 2; > else if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1)) > elf->e_flags = 1; > else > elf->e_flags = 0; > > I think this only works for fadump, but it will not work for normal >
Maybe "not". I did not try it, but I noticed that there is a similar checking in kernel module, please refer to this one: arch/powerpc/kernel/module_64.c bool module_elf_check_arch(Elf_Ehdr *hdr) { unsigned long abi_level = hdr->e_flags & 0x3; if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) return abi_level == 2; else return abi_level < 2; } kdump. Also for kcore, the e_flags are set as: > > read_kcore_iter(): > #ifndef ELF_CORE_EFLAGS > #define ELF_CORE_EFLAGS 0 > #endif > .e_flags = ELF_CORE_EFLAGS, > > So it seems the e_flags is not a suitable approach for me... > The ELF_CORE_EFLAGS should be defined in the arch/powerpc/include/asm/elf.h: #define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0) And also refer to this one(arch/powerpc/include/asm/thread_info.h): #if defined(CONFIG_PPC64) #define is_elf2_task() (test_thread_flag(TIF_ELF2ABI)) #else #define is_elf2_task() (0) #endif Anyway, can you try it? Or have you already tried it? Lianbo > Thanks, > Tao Liu > > > > Thanks > > Lianbo > > > >> > >> > >> In arch/powerpc/include/asm/ppc_asm.h: > >> #ifdef CONFIG_PPC64_ELF_ABI_V2 > >> #define STK_GOT 24 > >> #else > >> #define STK_GOT 40 > >> > >> In arch/powerpc/kernel/tm.S: > >> _GLOBAL(tm_reclaim) > >> mfcr r5 > >> mflr r0 > >> stw r5, 8(r1) > >> std r0, 16(r1) > >> std r2, STK_GOT(r1) > >> ... > >> > >> So a disassemble on tm_reclaim, and extract the STK_GOT value from std > >> instruction is used as the approach. > >> > >> After the patch: > >> crash> bt 1 > >> PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd" > >> #0 [c0000000037737d0] __schedule at c00000000112e4ec > >> #1 [c0000000037738b0] schedule at c00000000112ea80 > >> ... > >> > >> Signed-off-by: Tao Liu <l...@redhat.com> > >> --- > >> defs.h | 1 + > >> ppc64.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- > >> 2 files changed, 58 insertions(+), 3 deletions(-) > >> > >> diff --git a/defs.h b/defs.h > >> index 2231cb6..d5cb8cc 100644 > >> --- a/defs.h > >> +++ b/defs.h > >> @@ -4643,6 +4643,7 @@ struct efi_memory_desc_t { > >> #define MSR_PR_LG 14 /* Problem State / Privilege Level */ > >> /* Used to find the user or kernel-mode > frame*/ > >> > >> +#define STACK_SWITCH_FRAME_REGS 48 > >> #define STACK_FRAME_OVERHEAD 112 > >> #define EXCP_FRAME_MARKER 0x7265677368657265 > >> > >> diff --git a/ppc64.c b/ppc64.c > >> index e8930a1..6e5f155 100644 > >> --- a/ppc64.c > >> +++ b/ppc64.c > >> @@ -72,6 +72,7 @@ static ulong pud_page_vaddr_l4(ulong pud); > >> static ulong pmd_page_vaddr_l4(ulong pmd); > >> static int is_opal_context(ulong sp, ulong nip); > >> void opalmsg(void); > >> +static bool is_ppc64_elf_abi_v2(void); > >> > >> static int is_opal_context(ulong sp, ulong nip) > >> { > >> @@ -2813,6 +2814,51 @@ ppc64_get_sp(ulong task) > >> return sp; > >> } > >> > >> +static bool > >> +is_ppc64_elf_abi_v2(void) > >> +{ > >> + char buf1[BUFSIZE]; > >> + char *pos1, *pos2; > >> + int errflag = 0; > >> + ulong stk_got = 0; > >> + static bool ret = false; > >> + static bool checked = false; > >> + > >> + if (checked == true || !symbol_exists("tm_reclaim")) > >> + return ret; > >> + > >> + sprintf(buf1, "x/16i tm_reclaim"); > >> + open_tmpfile(); > >> + if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) > >> + goto out; > >> + checked = true; > >> + rewind(pc->tmpfile); > >> + while (fgets(buf1, BUFSIZE, pc->tmpfile)) { > >> + // "std r2, STK_GOT(r1)" is expected > >> + if (strstr(buf1, "std") && > >> + strstr(buf1, "(r1)") && > >> + (pos1 = strstr(buf1, "r2,"))) { > >> + pos1 += strlen("r2,"); > >> + for (pos2 = pos1; *pos2 != '\0' && *pos2 != > '('; pos2++); > >> + *pos2 = '\0'; > >> + stk_got = stol(pos1, RETURN_ON_ERROR|QUIET, > &errflag); > >> + break; > >> + } > >> + } > >> + > >> + if (!errflag) { > >> + switch (stk_got) { > >> + case 24: > >> + ret = true; > >> + case 40: > >> + goto out; > >> + } > >> + } > >> + error(WARNING, "Unstable elf_abi v1/v2 detection.\n"); > >> +out: > >> + close_tmpfile(); > >> + return ret; > >> +} > >> > >> /* > >> * get the SP and PC values for idle tasks. > >> @@ -2834,9 +2880,17 @@ get_ppc64_frame(struct bt_info *bt, ulong > *getpc, ulong *getsp) > >> sp = ppc64_get_sp(task); > >> if (!INSTACK(sp, bt)) > >> goto out; > >> - readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s, > >> - sizeof(struct ppc64_pt_regs), > >> - "PPC64 pt_regs", FAULT_ON_ERROR); > >> + > >> + if (THIS_KERNEL_VERSION >= LINUX(6,2,0) && > is_ppc64_elf_abi_v2()) { > >> + readmem(sp+STACK_SWITCH_FRAME_REGS, KVADDR, ®s, > >> + sizeof(struct ppc64_pt_regs), > >> + "PPC64 pt_regs", FAULT_ON_ERROR); > >> + } else { > >> + readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s, > >> + sizeof(struct ppc64_pt_regs), > >> + "PPC64 pt_regs", FAULT_ON_ERROR); > >> + } > >> + > >> ip = regs.nip; > >> closest = closest_symbol(ip); > >> if (STREQ(closest, ".__switch_to") || STREQ(closest, > "__switch_to")) { > >> -- > >> 2.40.1 > >
-- Crash-utility mailing list -- devel@lists.crash-utility.osci.io To unsubscribe send an email to devel-le...@lists.crash-utility.osci.io https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ Contribution Guidelines: https://github.com/crash-utility/crash/wiki