On Thu, Jun 27, 2024 at 4:27 PM lijiang <liji...@redhat.com> wrote:
> On Fri, May 31, 2024 at 5:33 PM <devel-requ...@lists.crash-utility.osci.io> > wrote: > >> Date: Fri, 31 May 2024 17:19:32 +0800 >> From: Tao Liu <l...@redhat.com> >> Subject: [Crash-utility] [PATCH v4 09/16] x86_64: Add gdb stack >> unwinding support >> To: devel@lists.crash-utility.osci.io >> Cc: Mahesh J Salgaonkar <mah...@linux.ibm.com>, "Naveen N . Rao" >> <naveen.n....@linux.vnet.ibm.com>, Lianbo Jiang < >> liji...@redhat.com>, >> Alexey Makhalov <alexey.makha...@broadcom.com> >> Message-ID: <20240531091939.97828-10-l...@redhat.com> >> Content-Type: text/plain; charset=UTF-8 >> >> This patch will add stack unwinding support for x86_64 CPU arch. It >> follows the tech path of ppc64_get_stack_frame() & ppc64_get_cpu_reg() >> in ppc64.c, to get and consume the cpu regs. >> >> One different case is, we need to handle inactive_task_frame structure >> specifically in x86_64. >> >> If inactive_task_frame structure enabled, for inactive tasks, 7 regs can >> be get from inactive_task_frame in stack, and sp need to rewind back to >> skip inactive_task_frame structure (See code comments in >> x86_64.c:x86_64_get_stack_frame()); for active tasks, we get regs by the >> original way. >> >> If inactive_task_frame structure is not enabled, for inactive tasks, the >> stack frame is organized as linked list, and sp/bp can be get from stack; >> for active tasks, we get regs by the original way. >> >> Cc: Sourabh Jain <sourabhj...@linux.ibm.com> >> Cc: Hari Bathini <hbath...@linux.ibm.com> >> Cc: Mahesh J Salgaonkar <mah...@linux.ibm.com> >> Cc: Naveen N. Rao <naveen.n....@linux.vnet.ibm.com> >> Cc: Lianbo Jiang <liji...@redhat.com> >> Cc: HAGIO KAZUHITO(萩尾 一仁) <k-hagio...@nec.com> >> Cc: Tao Liu <l...@redhat.com> >> Cc: Alexey Makhalov <alexey.makha...@broadcom.com> >> Signed-off-by: Tao Liu <l...@redhat.com> >> --- >> defs.h | 19 +++- >> x86_64.c | 291 ++++++++++++++++++++++++++++++++++++++++++++++++++----- >> 2 files changed, 283 insertions(+), 27 deletions(-) >> >> diff --git a/defs.h b/defs.h >> index 8450aea..ed52cc3 100644 >> --- a/defs.h >> +++ b/defs.h >> @@ -2241,6 +2241,20 @@ struct offset_table { /* stash >> of commonly-used offsets */ >> long mnt_namespace_nr_mounts; >> long mount_mnt_node; >> long log_caller_id; >> + long inactive_task_frame_r15; >> + long inactive_task_frame_r14; >> + long inactive_task_frame_r13; >> + long inactive_task_frame_r12; >> + long inactive_task_frame_flags; >> + long inactive_task_frame_si; >> + long inactive_task_frame_di; >> + long inactive_task_frame_bx; >> + long thread_struct_es; >> + long thread_struct_ds; >> + long thread_struct_fsbase; >> + long thread_struct_gsbase; >> + long thread_struct_fs; >> + long thread_struct_gs; >> }; >> > Also please dump them in the dump_offset_table(). Thanks Lianbo > >> struct size_table { /* stash of commonly-used sizes */ >> @@ -8008,7 +8022,7 @@ extern int have_full_symbols(void); >> >> /* >> * Register numbers must be in sync with gdb/features/i386/64bit-core.c >> - * to make crash_target->fetch_registers() ---> machdep->get_cpu_reg() >> + * to make crash_target->fetch_registers() ---> >> machdep->get_current_task_reg() >> * working properly. >> */ >> enum x86_64_regnum { >> @@ -8052,6 +8066,9 @@ enum x86_64_regnum { >> FOSEG_REGNUM, >> FOOFF_REGNUM, >> FOP_REGNUM, >> + FS_BASE_REGNUM = 152, >> + GS_BASE_REGNUM, >> + ORIG_RAX_REGNUM, >> LAST_REGNUM >> }; >> >> diff --git a/x86_64.c b/x86_64.c >> index 5ab2852..4ba0b40 100644 >> --- a/x86_64.c >> +++ b/x86_64.c >> @@ -126,7 +126,7 @@ static int x86_64_get_framesize(struct bt_info *, >> ulong, ulong, char *); >> static void x86_64_framesize_debug(struct bt_info *); >> static void x86_64_get_active_set(void); >> static int x86_64_get_kvaddr_ranges(struct vaddr_range *); >> -static int x86_64_get_cpu_reg(int, int, const char *, int, void *); >> +static int x86_64_get_current_task_reg(int, const char *, int, void *); >> static int x86_64_verify_paddr(uint64_t); >> static void GART_init(void); >> static void x86_64_exception_stacks_init(void); >> @@ -143,6 +143,29 @@ struct machine_specific x86_64_machine_specific = { >> 0 }; >> static const char *exception_functions_orig[]; >> static const char *exception_functions_5_8[]; >> >> +/* Use this hardwired version -- sometimes the >> + * debuginfo doesn't pick this up even though >> + * it exists in the kernel; it shouldn't change. >> + */ >> +struct x86_64_user_regs_struct { >> + unsigned long r15, r14, r13, r12, bp, bx; >> + unsigned long r11, r10, r9, r8, ax, cx, dx; >> + unsigned long si, di, orig_ax, ip, cs; >> + unsigned long flags, sp, ss, fs_base; >> + unsigned long gs_base, ds, es, fs, gs; >> +}; >> + >> +struct user_regs_bitmap_struct { >> + struct { >> + unsigned long r15, r14, r13, r12, bp, bx; >> + unsigned long r11, r10, r9, r8, ax, cx, dx; >> + unsigned long si, di, orig_ax, ip, cs; >> + unsigned long flags, sp, ss, fs_base; >> + unsigned long gs_base, ds, es, fs, gs; >> + }; >> + ulong bitmap; >> +}; >> > > The struct user_regs_bitmap_struct can de defined as below: > +struct user_regs_bitmap_struct { > + struct user_regs_bitmap_struct uregs; > + ulong bitmap; > +}; > > > + >> /* >> * Do all necessary machine-specific setup here. This is called several >> * times during initialization. >> @@ -195,7 +218,7 @@ x86_64_init(int when) >> machdep->machspec->irq_eframe_link = UNINITIALIZED; >> machdep->machspec->irq_stack_gap = UNINITIALIZED; >> machdep->get_kvaddr_ranges = x86_64_get_kvaddr_ranges; >> - machdep->get_current_task_reg = x86_64_get_cpu_reg; >> + machdep->get_current_task_reg = >> x86_64_get_current_task_reg; >> if (machdep->cmdline_args[0]) >> parse_cmdline_args(); >> if ((string = pc->read_vmcoreinfo("relocate"))) { >> @@ -500,6 +523,12 @@ x86_64_init(int when) >> MEMBER_OFFSET_INIT(thread_struct_rsp, >> "thread_struct", "sp"); >> if (INVALID_MEMBER(thread_struct_rsp0)) >> MEMBER_OFFSET_INIT(thread_struct_rsp0, >> "thread_struct", "sp0"); >> + MEMBER_OFFSET_INIT(thread_struct_es, "thread_struct", >> "es"); >> + MEMBER_OFFSET_INIT(thread_struct_ds, "thread_struct", >> "ds"); >> + MEMBER_OFFSET_INIT(thread_struct_fsbase, "thread_struct", >> "fsbase"); >> + MEMBER_OFFSET_INIT(thread_struct_gsbase, "thread_struct", >> "gsbase"); >> + MEMBER_OFFSET_INIT(thread_struct_fs, "thread_struct", >> "fs"); >> + MEMBER_OFFSET_INIT(thread_struct_gs, "thread_struct", >> "gs"); >> STRUCT_SIZE_INIT(tss_struct, "tss_struct"); >> MEMBER_OFFSET_INIT(tss_struct_ist, "tss_struct", "ist"); >> if (INVALID_MEMBER(tss_struct_ist)) { >> @@ -583,17 +612,6 @@ x86_64_init(int when) >> "user_regs_struct", "r15"); >> STRUCT_SIZE_INIT(user_regs_struct, "user_regs_struct"); >> if (!VALID_STRUCT(user_regs_struct)) { >> - /* Use this hardwired version -- sometimes the >> - * debuginfo doesn't pick this up even though >> - * it exists in the kernel; it shouldn't change. >> - */ >> - struct x86_64_user_regs_struct { >> - unsigned long r15, r14, r13, r12, bp, bx; >> - unsigned long r11, r10, r9, r8, ax, cx, >> dx; >> - unsigned long si, di, orig_ax, ip, cs; >> - unsigned long flags, sp, ss, fs_base; >> - unsigned long gs_base, ds, es, fs, gs; >> - }; >> ASSIGN_SIZE(user_regs_struct) = >> sizeof(struct x86_64_user_regs_struct); >> ASSIGN_OFFSET(user_regs_struct_rip) = >> @@ -891,7 +909,7 @@ x86_64_dump_machdep_table(ulong arg) >> fprintf(fp, " is_page_ptr: x86_64_is_page_ptr()\n"); >> fprintf(fp, " verify_paddr: x86_64_verify_paddr()\n"); >> fprintf(fp, " get_kvaddr_ranges: x86_64_get_kvaddr_ranges()\n"); >> - fprintf(fp, " get_cpu_reg: x86_64_get_cpu_reg()\n"); >> + fprintf(fp, "get_current_task_reg: >> x86_64_get_current_task_reg()\n"); >> fprintf(fp, " init_kernel_pgd: x86_64_init_kernel_pgd()\n"); >> fprintf(fp, "clear_machdep_cache: >> x86_64_clear_machdep_cache()\n"); >> fprintf(fp, " xendump_p2m_create: %s\n", PVOPS_XEN() ? >> @@ -4971,22 +4989,124 @@ x86_64_eframe_verify(struct bt_info *bt, long >> kvaddr, long cs, long ss, >> return FALSE; >> } >> >> +#define GET_REG_FROM_INACTIVE_TASK_FRAME(reg) \ >> +({ \ >> + ulong offset, reg_value = 0, rsp; \ >> + if (VALID_MEMBER(inactive_task_frame_bp)) { \ >> + offset = OFFSET(task_struct_thread) + >> OFFSET(thread_struct_rsp); \ >> + readmem(bt->task + offset, KVADDR, &rsp, \ >> + sizeof(ulong), "thread_struct.rsp", >> FAULT_ON_ERROR); \ >> + readmem(rsp + OFFSET(inactive_task_frame_##reg), KVADDR, >> ®_value, \ >> + sizeof(ulong), "inactive_task_frame.##reg", >> FAULT_ON_ERROR); \ >> + } \ >> + reg_value; \ >> +}) >> > > I would suggest changing the above macro definition to a function, at > least there are two advantages: > [1] once the task_struct or inactive_task_frame is changed in the future, > it is easy to follow its changes. > [2] easy to debug, and more readable. > > > >> + >> /* >> * Get a stack frame combination of pc and ra from the most relevent >> spot. >> */ >> static void >> x86_64_get_stack_frame(struct bt_info *bt, ulong *pcp, ulong *spp) >> { >> - if (bt->flags & BT_DUMPFILE_SEARCH) >> - return x86_64_get_dumpfile_stack_frame(bt, pcp, spp); >> + struct user_regs_bitmap_struct *ur_bitmap; >> + ulong pcp_save = *pcp; >> + ulong spp_save = *spp; >> + ulong sp; >> >> if (bt->flags & BT_SKIP_IDLE) >> bt->flags &= ~BT_SKIP_IDLE; >> + if (pcp) >> + *pcp = x86_64_get_pc(bt); >> + if (spp) >> + sp = *spp = x86_64_get_sp(bt); >> + ur_bitmap = (struct user_regs_bitmap_struct >> *)GETBUF(sizeof(*ur_bitmap)); >> + memset(ur_bitmap, 0, sizeof(struct user_regs_bitmap_struct)); >> > > Can you keep the same style?(sorry to be picky :-)) > + ur_bitmap = (struct user_regs_bitmap_struct > *)GETBUF(sizeof(*ur_bitmap)); > + memset(ur_bitmap, 0, sizeof(*ur_bitmap)); > or > + ur_bitmap = (struct user_regs_bitmap_struct *)GETBUF(sizeof(struct > user_regs_bitmap_struct))); > + memset(ur_bitmap, 0, sizeof(struct user_regs_bitmap_struct)); > > + >> + if (VALID_MEMBER(inactive_task_frame_bp)) { >> + if (!is_task_active(bt->task)) { >> + /* >> + * For inactive tasks in live and dumpfile, regs >> can be >> + * get from inactive_task_frame struct. >> + */ >> + ur_bitmap->r15 = >> GET_REG_FROM_INACTIVE_TASK_FRAME(r15); >> + ur_bitmap->r14 = >> GET_REG_FROM_INACTIVE_TASK_FRAME(r14); >> + ur_bitmap->r13 = >> GET_REG_FROM_INACTIVE_TASK_FRAME(r13); >> + ur_bitmap->r12 = >> GET_REG_FROM_INACTIVE_TASK_FRAME(r12); >> + ur_bitmap->bx = >> GET_REG_FROM_INACTIVE_TASK_FRAME(bx); >> + ur_bitmap->bp = >> GET_REG_FROM_INACTIVE_TASK_FRAME(bp); >> + /* >> + For inactive tasks: >> + crash> task -x 1|grep sp >> + sp = 0xffffc90000013d00 >> + crash> rd ffffc90000013d00 32 >> + ffffc90000013d00: ffff888104dad4a8 >> 0000000000000000 r15,r14 >> + ffffc90000013d10: ffff888100280000 >> ffff888100216500 r13,r12 >> + ffffc90000013d20: ffff888100217018 >> ffff88817fd2c800 rbx,rbp >> + ffffc90000013d30: ffffffff81a6a1b3 >> ffffc90000013de0 saved_rip,... >> + ffffc90000013d40: ffff888100000004 >> 99ccbf53ea493000 >> + ffffc90000013d50: ffff888100216500 >> ffff888100216500 >> + >> + crash> dis __schedule >> + ... >> + 0xffffffff81a6a1ab <__schedule+507>: mov >> %r13,%rsi >> + 0xffffffff81a6a1ae <__schedule+510>: call >> 0xffffffff81003490 <__switch_to_asm> >> + 0xffffffff81a6a1b3 <__schedule+515>: mov >> %rax,%rdi <<=== saved_rip >> + ... >> + crash> dis __switch_to_asm >> + 0xffffffff81003490 <__switch_to_asm>: push >> %rbp >> + 0xffffffff81003491 <__switch_to_asm+1>: push >> %rbx >> + 0xffffffff81003492 <__switch_to_asm+2>: push >> %r12 >> + 0xffffffff81003494 <__switch_to_asm+4>: push >> %r13 >> + 0xffffffff81003496 <__switch_to_asm+6>: push >> %r14 >> + 0xffffffff81003498 <__switch_to_asm+8>: push >> %r15 >> + 0xffffffff8100349a <__switch_to_asm+10>: >> mov %rsp,0x14d8(%rdi) >> + ... >> + Now saved_rip = ffffffff81a6a1b3, and we are >> starting >> + the stack unwind at saved_rip, which is function >> __schedule() >> + instead of function __switch_to_asm(), so the >> stack pointer should >> + be rewind from ffffc90000013d00 back to >> ffffc90000013d38, >> + aka *spp += 7 * reg_len. Otherwise we are >> unwinding function >> + __schedule() but with __switch_to_asm()'s stack >> frame, which >> + will fail. >> + */ >> + sp += 7 * sizeof(unsigned long); >> > > It seems this may still rely on the compiler, have you tried another > compiler? I'm not sure if there are the same results, just my concerns. > > Anyway, If there is no better way or solution, at least we should comment > on it and point out the current limitations. What do you think? > > + ur_bitmap->bitmap += 0x3f; >> > > Also why is it 0x3f? Can you explain the details? > > >> + } else { >> + /* >> + * For active tasks in dumpfile, we get regs >> through the >> + * original way. For active tasks in live, we only >> get >> + * ip and sp in the end of the function. >> + */ >> + if (bt->flags & BT_DUMPFILE_SEARCH) { >> + FREEBUF(ur_bitmap); >> + bt->need_free = FALSE; >> > > See the comments in the previous thread. > > >> + *pcp = pcp_save; >> + *spp = spp_save; >> + return >> x86_64_get_dumpfile_stack_frame(bt, pcp, spp); >> + } >> + } >> + } else { >> + if (!is_task_active(bt->task)) { >> + readmem(*spp, KVADDR, &(ur_bitmap->bp), >> + sizeof(ulong), "ur_bitmap->bp", >> FAULT_ON_ERROR); >> + ur_bitmap->bitmap += 0x10; >> + } else { >> + if (bt->flags & BT_DUMPFILE_SEARCH) { >> + FREEBUF(ur_bitmap); >> + bt->need_free = FALSE; >> + *pcp = pcp_save; >> + *spp = spp_save; >> + return >> x86_64_get_dumpfile_stack_frame(bt, pcp, spp); >> + } >> + } >> + } >> >> - if (pcp) >> - *pcp = x86_64_get_pc(bt); >> - if (spp) >> - *spp = x86_64_get_sp(bt); >> + ur_bitmap->ip = *pcp; >> + ur_bitmap->sp = sp; >> + ur_bitmap->bitmap += 0x50000; >> + >> + bt->machdep = ur_bitmap; >> + bt->need_free = TRUE; >> } >> >> /* >> @@ -6458,6 +6578,14 @@ x86_64_ORC_init(void) >> >> MEMBER_OFFSET_INIT(inactive_task_frame_bp, "inactive_task_frame", >> "bp"); >> MEMBER_OFFSET_INIT(inactive_task_frame_ret_addr, >> "inactive_task_frame", "ret_addr"); >> + MEMBER_OFFSET_INIT(inactive_task_frame_r15, >> "inactive_task_frame", "r15"); >> + MEMBER_OFFSET_INIT(inactive_task_frame_r14, >> "inactive_task_frame", "r14"); >> + MEMBER_OFFSET_INIT(inactive_task_frame_r13, >> "inactive_task_frame", "r13"); >> + MEMBER_OFFSET_INIT(inactive_task_frame_r12, >> "inactive_task_frame", "r12"); >> + MEMBER_OFFSET_INIT(inactive_task_frame_flags, >> "inactive_task_frame", "flags"); >> + MEMBER_OFFSET_INIT(inactive_task_frame_si, "inactive_task_frame", >> "si"); >> + MEMBER_OFFSET_INIT(inactive_task_frame_di, "inactive_task_frame", >> "di"); >> + MEMBER_OFFSET_INIT(inactive_task_frame_bx, "inactive_task_frame", >> "bx"); >> >> orc->has_signal = MEMBER_EXISTS("orc_entry", "signal"); /* added >> at 6.3 */ >> orc->has_end = MEMBER_EXISTS("orc_entry", "end"); /* >> removed at 6.4 */ >> @@ -9071,17 +9199,128 @@ x86_64_get_kvaddr_ranges(struct vaddr_range *vrp) >> return cnt; >> } >> >> +#define REG_CASE(R, r) \ >> + case R##_REGNUM: \ >> + if (size != sizeof(ur_bitmap->r)) { \ >> + ret = FALSE; break; \ >> + } else { \ >> + memcpy(value, &ur_bitmap->r, size); \ >> + ret = TRUE; break; \ >> + } >> + >> static int >> -x86_64_get_cpu_reg(int cpu, int regno, const char *name, >> +x86_64_get_current_task_reg(int regno, const char *name, >> int size, void *value) >> { >> - if (regno >= LAST_REGNUM) >> - return FALSE; >> + struct bt_info bt_info, bt_setup; >> + struct task_context *tc; >> + struct user_regs_bitmap_struct *ur_bitmap; >> + ulong ip, sp; >> + bool ret = FALSE; >> >> - if (VMSS_DUMPFILE()) >> - return vmware_vmss_get_cpu_reg(cpu, regno, name, size, >> value); >> + switch (regno) { >> + case RAX_REGNUM ... GS_REGNUM: >> + case FS_BASE_REGNUM ... ORIG_RAX_REGNUM: >> + break; >> + default: >> + return FALSE; >> + } >> >> > Ditto. > > >> - return FALSE; >> + tc = CURRENT_CONTEXT(); >> + if (!tc) >> + return FALSE; >> + >> + if (VMSS_DUMPFILE()) >> + return vmware_vmss_get_cpu_reg(tc->processor, regno, >> name, size, value); >> + >> + BZERO(&bt_setup, sizeof(struct bt_info)); >> + clone_bt_info(&bt_setup, &bt_info, tc); >> + fill_stackbuf(&bt_info); >> + >> + // reusing the get_dumpfile_regs function to get pt regs structure >> + get_dumpfile_regs(&bt_info, &sp, &ip); >> + if (bt_info.stackbuf) >> + FREEBUF(bt_info.stackbuf); >> + ur_bitmap = (struct user_regs_bitmap_struct *)bt_info.machdep; >> + if (!ur_bitmap) >> + return FALSE; >> + >> + /* Get all registers from elf notes*/ >> + if (!bt_info.need_free) { >> + goto get_all; >> + } >> + >> > Ditto. > > >> + /* Get subset registers from stack frame*/ >> + switch (ur_bitmap->bitmap) { >> + case 0x5003f: >> + switch (regno) { >> + case R12_REGNUM ... R15_REGNUM: >> + case RBP_REGNUM: >> + case RBX_REGNUM: >> + case RIP_REGNUM: >> + case RSP_REGNUM: >> + break; >> + default: >> + return FALSE; >> + } >> + break; >> + case 0x50010: >> + switch (regno) { >> + case RBP_REGNUM: >> + case RIP_REGNUM: >> + case RSP_REGNUM: >> + break; >> + default: >> + return FALSE; >> + } >> + break; >> + case 0x50000: >> + switch (regno) { >> + case RIP_REGNUM: >> + case RSP_REGNUM: >> + break; >> + default: >> + return FALSE; >> + } >> + break; >> + } >> > > Can you help to explain more details about several magic number values? > Such as 0x5003f, 0x50000, 0x50010. Or you may point out where they come > from, it may be helpful to add them here as code comment. > > Thanks > Lianbo > > + >> +get_all: >> + switch (regno) { >> + REG_CASE(RAX, ax); >> + REG_CASE(RBX, bx); >> + REG_CASE(RCX, cx); >> + REG_CASE(RDX, dx); >> + REG_CASE(RSI, si); >> + REG_CASE(RDI, di); >> + REG_CASE(RBP, bp); >> + REG_CASE(RSP, sp); >> + REG_CASE(R8, r8); >> + REG_CASE(R9, r9); >> + REG_CASE(R10, r10); >> + REG_CASE(R11, r11); >> + REG_CASE(R12, r12); >> + REG_CASE(R13, r13); >> + REG_CASE(R14, r14); >> + REG_CASE(R15, r15); >> + REG_CASE(RIP, ip); >> + REG_CASE(EFLAGS, flags); >> + REG_CASE(CS, cs); >> + REG_CASE(SS, ss); >> + REG_CASE(DS, ds); >> + REG_CASE(ES, es); >> + REG_CASE(FS, fs); >> + REG_CASE(GS, gs); >> + REG_CASE(FS_BASE, fs_base); >> + REG_CASE(GS_BASE, gs_base); >> + REG_CASE(ORIG_RAX, orig_ax); >> + } >> + >> + if (bt_info.need_free) { >> + FREEBUF(ur_bitmap); >> + bt_info.need_free = FALSE; >> + } >> + return ret; >> } >> >> /* >> -- >> 2.40.1 >> >
-- Crash-utility mailing list -- devel@lists.crash-utility.osci.io To unsubscribe send an email to devel-le...@lists.crash-utility.osci.io https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ Contribution Guidelines: https://github.com/crash-utility/crash/wiki