On Fri, Oct 11, 2024 at 1:19 PM Tao Liu <l...@redhat.com> wrote: > On Tue, Oct 8, 2024 at 10:46 PM Tao Liu <l...@redhat.com> wrote: > > > > Hi Aditya, > > > > On Tue, Oct 8, 2024 at 10:18 PM Aditya Gupta <adit...@linux.ibm.com> > wrote: > > > > > > Hi Tao, > > > > > > > > > On 04/10/24 10:40, Tao Lou wrote: > > > > Hi Aditya & lianbo, > > > > > > > > On Tue, Sep 24, 2024 at 2:28 AM Aditya Gupta <adit...@linux.ibm.com> > wrote: > > > >> Hi Tao, > > > >> > > > >> On 24/09/22 10:25PM, Aditya Gupta wrote: > > > >>> Hi Tao, > > > >>> > > > >>> > > > >>> On 18/09/24 05:12, Tao Liu wrote: > > > >>>> A error stack trace of bt cmd observed: > > > >>>> > > > >>>> crash> bt 1 > > > >>>> PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd" > > > >>>> #0 [c0000000037735c0] _end at c0000000037154b0 (unreliable) > > > >>>> #1 [c000000003773770] __switch_to at c00000000001fa9c > > > >>>> #2 [c0000000037737d0] __schedule at c00000000112e4ec > > > >>>> #3 [c0000000037738b0] schedule at c00000000112ea80 > > > >>>> ... > > > >>>> > > > >>>> The #0 stack trace is incorrect, the function address shouldn't > exceed _end. > > > >>>> The reason is for kernel>=v6.2, the offset of pt_regs to sp > changed from > > > >>>> STACK_FRAME_OVERHEAD, i.e 112, to STACK_SWITCH_FRAME_REGS. For > > > >>>> CONFIG_PPC64_ELF_ABI_V1, it's 112, for ABI_V2, it's 48. So the > nip will read a > > > >>>> wrong value from stack when ABI_V2 enabled. > > > >>>> > > > >>>> To determine if ABI_V2 enabled is tricky. This patch do it by > check the > > > >>>> following: > > > >>>> > > > >>>> In arch/powerpc/include/asm/ppc_asm.h: > > > >>>> #ifdef CONFIG_PPC64_ELF_ABI_V2 > > > >>>> #define STK_GOT 24 > > > >>>> #else > > > >>>> #define STK_GOT 40 > > > >>>> > > > >>>> In arch/powerpc/kernel/tm.S: > > > >>>> _GLOBAL(tm_reclaim) > > > >>>> mfcr r5 > > > >>>> mflr r0 > > > >>>> stw r5, 8(r1) > > > >>>> std r0, 16(r1) > > > >>>> std r2, STK_GOT(r1) > > > >>>> ... > > > >>>> > > > >>>> So a disassemble on tm_reclaim, and extract the STK_GOT value > from std > > > >>>> instruction is used as the approach. > > > >>>> > > > >>>> After the patch: > > > >>>> crash> bt 1 > > > >>>> PID: 1 TASK: c000000003714b80 CPU: 2 COMMAND: "systemd" > > > >>>> #0 [c0000000037737d0] __schedule at c00000000112e4ec > > > >>>> #1 [c0000000037738b0] schedule at c00000000112ea80 > > > >>>> ... > > > >>>> > > > >>>> Signed-off-by: Tao Liu <l...@redhat.com> > > > >>>> --- > > > >>>> defs.h | 1 + > > > >>>> ppc64.c | 60 > ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- > > > >>>> 2 files changed, 58 insertions(+), 3 deletions(-) > > > >>>> > > > >>>> diff --git a/defs.h b/defs.h > > > >>>> index 2231cb6..d5cb8cc 100644 > > > >>>> --- a/defs.h > > > >>>> +++ b/defs.h > > > >>>> @@ -4643,6 +4643,7 @@ struct efi_memory_desc_t { > > > >>>> #define MSR_PR_LG 14 /* Problem State / Privilege Level */ > > > >>>> /* Used to find the user or > kernel-mode frame*/ > > > >>>> +#define STACK_SWITCH_FRAME_REGS 48 > > > >>>> #define STACK_FRAME_OVERHEAD 112 > > > >>>> #define EXCP_FRAME_MARKER 0x7265677368657265 > > > >>>> diff --git a/ppc64.c b/ppc64.c > > > >>>> index e8930a1..6e5f155 100644 > > > >>>> --- a/ppc64.c > > > >>>> +++ b/ppc64.c > > > >>>> @@ -72,6 +72,7 @@ static ulong pud_page_vaddr_l4(ulong pud); > > > >>>> static ulong pmd_page_vaddr_l4(ulong pmd); > > > >>>> static int is_opal_context(ulong sp, ulong nip); > > > >>>> void opalmsg(void); > > > >>>> +static bool is_ppc64_elf_abi_v2(void); > > > >>>> static int is_opal_context(ulong sp, ulong nip) > > > >>>> { > > > >>>> @@ -2813,6 +2814,51 @@ ppc64_get_sp(ulong task) > > > >>>> return sp; > > > >>>> } > > > >>>> +static bool > > > >>>> +is_ppc64_elf_abi_v2(void) > > > >>>> +{ > > > >>> > > > >>> Thanks for fixing these issues on ppc64 ! > > > >>> > > > >>> > > > >>> It is good. I am also checking if there is a way to implement > > > >>> 'is_ppc64_elf_abi_v2' by getting ABI version from the ELF notes, > as 'file' > > > >>> command somehow does know: > > > >>> > > > >>> $ file vmlinux-little-endian vmlinux-little-endian: ELF 64-bit LSB > > > >>> executable, 64-bit PowerPC or cisco 7500, OpenPOWER ELF V2 ABI, > version 1 > > > >>> (SYSV), statically linked, > > > >>> BuildID[sha1]=f6449970e6af6cddeed1e260393e2377ccc0f369, not > stripped > > > >>> > > > >>> That might be more dependable (if we can do that). > > > >> How about this approach ? > > > >> > > > >> I feel this might be more dependable, as the file command also uses > > > >> this, as well as fadump (PowerPC firmware-assisted dump) in the > kernel. > > > >> > > > >>> As a sidenote, there is still some issue either in patch #1 or #3 > > > >>> causing all secondary CPUs (where swapper task is running) to show > all > > > >>> threads as '_end' > > > >> What do you think ? > > > > I think your approach will work, thanks for your code! Please see my > > > > comments inlined in the code: > > > > > > > >> Either way what you chose, I do realise the idea to utilise the > > > >> 'std r2, STK_GOT(r1)' in such a way, was interesting and smart. > > > >> > > > >> static bool > > > >> is_ppc64_elf_abi_v2(void) > > > >> { > > > >> const char *kernel_file = pc->namelist; > > > >> int fd, swap; > > > >> char eheader[BUFSIZE]; > > > >> Elf64_Ehdr *elf64; > > > >> > > > >> if ((fd = open(kernel_file, O_RDONLY)) < 0) { > > > >> error(INFO, "%s: %s\n", kernel_file, > strerror(errno)); > > > >> return FALSE; > > > >> } > > > >> if (read(fd, eheader, BUFSIZE) != BUFSIZE) { > > > >> error(INFO, "%s: %s\n", kernel_file, strerror(errno)); > > > >> close(fd); > > > >> return FALSE; > > > >> } > > > >> close(fd); > > > >> > > > >> if (!STRNEQ(eheader, ELFMAG) || eheader[EI_VERSION] != > EV_CURRENT) > > > >> return FALSE; > > > >> > > > >> elf64 = (Elf64_Ehdr *)&eheader[0]; > > > >> > > > >> swap = (((eheader[EI_DATA] == ELFDATA2LSB) && > > > >> (__BYTE_ORDER == __BIG_ENDIAN)) || > > > >> ((eheader[EI_DATA] == ELFDATA2MSB) && > > > >> (__BYTE_ORDER == __LITTLE_ENDIAN))); > > > > The above code seems duplicated with the one in is_kernel(), in fact, > > > > is_kernel() will be invoked at the very beginning to determine if one > > > > file is vmlinux, so I would suggest to add our abi_v2 check here, > > > > like: > > > > > > > > in ppc64.c: > > > > int ppc64_elf_abi = 0; > > > > > > > > static bool is_ppc64_elf_abi_v2(void) > > > > { > > > > if (ppc64_elf_abi == 1) > > > > ... > > > > else if (ppc64_elf_abi == 2) > > > > ... > > > > else > > > > ... > > > > } > > > > > > > > in symbols.c:is_kernel(): > > > > ... > > > > } else if ((elf64->e_ident[EI_CLASS] == ELFCLASS64) && > > > > ((swap16(elf64->e_type, swap) == ET_EXEC) || > > > > ... > > > > case EM_PPC64: > > > > if (machine_type_mismatch(file, "PPC64", NULL, 0)) > > > > goto bailout; > > > > + else > > > > + ppc64_elf_abi = swap32(elf64->e_flags, swap); > > > > break; > > > > > > > > So we can initial the abi version at crash start only once, and get > > > > rid of the code duplication. > > > > > > Yes, I actually copied pasted the initial part from is_kernel. > > > > > > The approach is good, just to clarify, we know that is_kernel will be > > > called everytime before this check right ? > > > > Actually I used another approach rather than is_kernel(), see [1] for > > v2 patchset of ppc64 fixing. > > The reason is keeping a ppc64 specific variable in the multi-cpu arch > > used function is_kernel(), it is hard to make the code as clean. > > > > Hi Aditya, > > There is one more question about the abi_v1/2. Say if I have a abi_v2 > machine, but I want to run a abi_v1 elf executable file. I don't know > 1) is it possible to run v1 elf on a v2 kernel machine; 2) if it can, > which STACK_FRAME_OVERHEAD will the kernel use, 48 or 112? If > STACK_FRAME_OVERHEAD will vary depending on abi_v1/2 of elf executable > file, then our approach needs to change. > > That is my concern, as I discussed with Tao offline, is it possible to allow different ABI versions in a ppc64 system? E.g: kernel tasks or kernel modules.
Because Tao did the test, and found that there are different values of e_flags among various tasks, see: https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01168.html If the above is not true, could you please try to read the value of e_flags from the kcore/vmcore elf header? (If possible) Thanks Lianbo > Currently I have failed to build a abi_v1 elf executable file on the > abi_v2 machine, which reports: > > $ gcc test.c -mabi=elfv1 > fatal error: gnu/stubs-64-v1.h: No such file or directory > > But I don't know where to install the missing header or rpms, or if it > is doable. > > Any ideas? > > Thanks, > Tao Liu > > > [1]: > https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg01170.html > > > > Thanks, > > Tao Liu > > > > > > > > > > > Thanks, > > > Aditya Gupta > > > > > > > > > > >> /* > > > >> * The ABI version is stored in E_FLAGS in the ELF header, > atleast for > > > >> * ppc64 binaries. > > > >> * > > > >> * These logic is also used by the 'file' command, which > states this in > > > >> * its magic file: > > > >> * > > > >> * >18 leshort 20 PowerPC or > cisco 4500, > > > >> * >18 leshort 21 64-bit > PowerPC or cisco 7500, > > > >> * >>48 lelong 0 > Unspecified or Power ELF V1 ABI, > > > >> * >>48 lelong 1 Power ELF > V1 ABI, > > > >> * >>48 lelong 2 OpenPOWER > ELF V2 ABI, > > > >> * > > > >> * The '>18' above means, data at 18 (0x12), which is > basically > > > >> * 'elf64->e_machine', which will be 20 for PPC, and 21 > for PPC64 > > > >> * > > > >> * Similarly, '>>48' is offset 48 (0x30) in the file, > which is > > > >> * basically 'elf64->e_flags', which has the ELF ABI > version > > > >> */ > > > >> if (elf64->e_flags == 2) { > > > >> /* ABI v2 */ > > > >> return TRUE; > > > >> } else if (elf64->e_flags == 1) { > > > >> /* ABI v1 */ > > > >> return FALSE; > > > >> } > > > >> > > > >> error(WARNING, "Unstable elf_abi v1/v2 detection.\n"); > > > >> return FALSE; > > > >> } > > > >> > > > >> Also, we can those values in the ELF header: > > > >> > > > >> $ hexdump vmlinux-little-endian > > > >> 0000000 457f 464c 0102 0001 0000 0000 0000 0000 > > > >> 0000010 0002 0015 (= 21 = E_MACHINE) 0001 0000 0000 0000 0000 > c000 > > > >> 0000020 0040 0000 0000 0000 8bf8 0040 0000 0000 > > > >> 0000030 0002 (= E_FLAGS = ABI) 0000 0040 0038 0002 0040 0027 > 0026 > > > >> 0000040 0001 0000 0007 0000 0000 0001 0000 0000 > > > >> > > > > I agree with the elf header check on the vmlinux file, it is much > > > > simpler. Also I compiled the kernel with CONFIG_PPC64_ELF_ABI_V1 on, > > > > the file cmd will give: Power ELF V1 ABI string. So the approach can > > > > work. > > > > > > > >> Also, I see 'pc->namelist' is basically the kernel filename, is that > > > >> correct ? Can that be renamed to kernel_filename or something ? > > > >> > > > > Yes, by reading through the code, pc->namelist is vmlinux, and > > > > pc->namelist_debug is the debug symbol if vmlinux doesn't have the > > > > symbol. But I didn't see the need for the rename. > > > > > > > > Thanks, > > > > Tao Liu > > > > > > > >> Thanks, > > > >> Aditya Gupta > > > >> > > > >>> Thanks, > > > >>> > > > >>> Aditya Gupta > > > >>> > > > >>>> + char buf1[BUFSIZE]; > > > >>>> + char *pos1, *pos2; > > > >>>> + int errflag = 0; > > > >>>> + ulong stk_got = 0; > > > >>>> + static bool ret = false; > > > >>>> + static bool checked = false; > > > >>>> + > > > >>>> + if (checked == true || !symbol_exists("tm_reclaim")) > > > >>>> + return ret; > > > >>>> + > > > >>>> + sprintf(buf1, "x/16i tm_reclaim"); > > > >>>> + open_tmpfile(); > > > >>>> + if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) > > > >>>> + goto out; > > > >>>> + checked = true; > > > >>>> + rewind(pc->tmpfile); > > > >>>> + while (fgets(buf1, BUFSIZE, pc->tmpfile)) { > > > >>>> + // "std r2, STK_GOT(r1)" is expected > > > >>>> + if (strstr(buf1, "std") && > > > >>>> + strstr(buf1, "(r1)") && > > > >>>> + (pos1 = strstr(buf1, "r2,"))) { > > > >>>> + pos1 += strlen("r2,"); > > > >>>> + for (pos2 = pos1; *pos2 != '\0' && *pos2 != > '('; pos2++); > > > >>>> + *pos2 = '\0'; > > > >>>> + stk_got = stol(pos1, RETURN_ON_ERROR|QUIET, > &errflag); > > > >>>> + break; > > > >>>> + } > > > >>>> + } > > > >>>> + > > > >>>> + if (!errflag) { > > > >>>> + switch (stk_got) { > > > >>>> + case 24: > > > >>>> + ret = true; > > > >>>> + case 40: > > > >>>> + goto out; > > > >>>> + } > > > >>>> + } > > > >>>> + error(WARNING, "Unstable elf_abi v1/v2 detection.\n"); > > > >>>> +out: > > > >>>> + close_tmpfile(); > > > >>>> + return ret; > > > >>>> +} > > > >>>> /* > > > >>>> * get the SP and PC values for idle tasks. > > > >>>> @@ -2834,9 +2880,17 @@ get_ppc64_frame(struct bt_info *bt, ulong > *getpc, ulong *getsp) > > > >>>> sp = ppc64_get_sp(task); > > > >>>> if (!INSTACK(sp, bt)) > > > >>>> goto out; > > > >>>> - readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s, > > > >>>> - sizeof(struct ppc64_pt_regs), > > > >>>> - "PPC64 pt_regs", FAULT_ON_ERROR); > > > >>>> + > > > >>>> + if (THIS_KERNEL_VERSION >= LINUX(6,2,0) && > is_ppc64_elf_abi_v2()) { > > > >>>> + readmem(sp+STACK_SWITCH_FRAME_REGS, KVADDR, ®s, > > > >>>> + sizeof(struct ppc64_pt_regs), > > > >>>> + "PPC64 pt_regs", FAULT_ON_ERROR); > > > >>>> + } else { > > > >>>> + readmem(sp+STACK_FRAME_OVERHEAD, KVADDR, ®s, > > > >>>> + sizeof(struct ppc64_pt_regs), > > > >>>> + "PPC64 pt_regs", FAULT_ON_ERROR); > > > >>>> + } > > > >>>> + > > > >>>> ip = regs.nip; > > > >>>> closest = closest_symbol(ip); > > > >>>> if (STREQ(closest, ".__switch_to") || STREQ(closest, > "__switch_to")) { > > > > >
-- Crash-utility mailing list -- devel@lists.crash-utility.osci.io To unsubscribe send an email to devel-le...@lists.crash-utility.osci.io https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ Contribution Guidelines: https://github.com/crash-utility/crash/wiki