On Fri, May 31, 2024 at 5:21 PM <devel-requ...@lists.crash-utility.osci.io>
wrote:

> Date: Fri, 31 May 2024 13:14:02 +0800
> From: Tao Liu <l...@redhat.com>
> Subject: [Crash-utility] Re: [PATCH] x86_64: Add
>         top_of_kernel_stack_padding for kernel stack
> To: Lianbo Jiang <liji...@redhat.com>
> Cc: devel@lists.crash-utility.osci.io
> Message-ID:
>         <
> cao7dbbwzgeijbn2sv-3e00tsu2uo_kmbsz_ngvcft6xzdj3...@mail.gmail.com>
> Content-Type: text/plain; charset="UTF-8"
>
> Hi Lianbo,
>
> On Mon, May 27, 2024 at 11:30 AM Lianbo Jiang <liji...@redhat.com> wrote:
> >
> > Hi, Tao
> >
> > Thank you for the fix.
> >
> > On 5/23/24 12:06 PM, devel-requ...@lists.crash-utility.osci.io wrote:
> > > Date: Thu, 23 May 2024 12:06:03 +0800
> > > From: Tao Liu<l...@redhat.com>
> > > Subject: [Crash-utility] [PATCH] x86_64: Add
> > >       top_of_kernel_stack_padding for kernel stack
> > > To:devel@lists.crash-utility.osci.io
> > > Cc: Tao Liu<l...@redhat.com>
> > > Message-ID:<20240523040603.10304-1-l...@redhat.com>
> > > Content-Type: text/plain; charset="US-ASCII"; x-default=true
> > >
> > > With kernel patch [1], x86_64 will add extra padding for kernel stack,
> > > as a result, the pt_regs will be shift down by the offset of padding.
> > > Without the patch, the values of registers read from pt_regs will be
> > > incorrect.
> > >
> > > Though currently the TOP_OF_KERNEL_STACK_PADDING is configured by
> > > Kconfig, according to kernel code comment [2], the value may be made
> > > dynamicly later. In addition there might be systems compiled without
> > > Kconfig avaliable. So in this patch, we will calculate the value of
> > > TOP_OF_KERNEL_STACK_PADDING.
> > >
> > > The calculation is as follows:
> > >
> > > 1) in startup_64(), there is a lea instruction as:
> > >     leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING -
> PTREGS_SIZE)(%rip), %rsp
> > >
> > > 2) in rewind_stack_and_make_dead(), there is a lea instruction as:
> > >     leaq      -PTREGS_SIZE(%rax), %rsp
> > >
> > > The disassembled 2 instructions will be like:
> > >
> > > 1) 0xffffffff93a0007d <startup_64+3>:      lea
> 0x1e03ec4(%rip),%rsp        # 0xffffffff95803f48
> > >
>         ^^^^^^^^^^^^^^^^^^^^
> > > 2) 0xffffffff93a0465a <rewind_stack_and_make_dead+10>:     lea
> -0xa8(%rax),%rsp
> > >
>  ^^^^
> > > 0xffffffff95803f48 is the value of (__end_init_task -
> > > TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE), and 0xa8 is the value of
> > > PTREGS_SIZE, __end_init_task can be get by symbol reading.
> >
> > Calculating the value of TOP_OF_KERNEL_STACK_PADDING, which looks good,
> but it heavily relies on compiler.
> > Normally we would use this way unless there is no other choice.
> >
> > How about the following changes? Although it doesn't handle the case
> that the value is dynamic, let's see
> > how to change in the kernel in future, and then consider how to reflect
> it in crash-utility.
> >
> Sure, looks good to me, so let's go with this, and update it later
> when kernel changes.
>

Ok. Thanks, Tao.

Applied with minor changes:

https://github.com/crash-utility/crash/commit/48764a14bc5856f0b0bb30685336c68b832154fc

Lianbo


>
> Thanks,
> Tao Liu
>
> >
> > diff --git a/defs.h b/defs.h
> > index 01f316e67dde..42d875965256 100644
> > --- a/defs.h
> > +++ b/defs.h
> > @@ -2414,6 +2414,7 @@ struct size_table {         /* stash of
> commonly-used sizes */
> >         long maple_tree;
> >         long maple_node;
> >         long module_memory;
> > +       long fred_frame;
> >   };
> >
> >   struct array_table {
> > diff --git a/kernel.c b/kernel.c
> > index 1728b70c1b5c..cd3d6044cc9a 100644
> > --- a/kernel.c
> > +++ b/kernel.c
> > @@ -668,6 +668,7 @@ kernel_init()
> >         STRUCT_SIZE_INIT(softirq_state, "softirq_state");
> >         STRUCT_SIZE_INIT(softirq_action, "softirq_action");
> >         STRUCT_SIZE_INIT(desc_struct, "desc_struct");
> > +       STRUCT_SIZE_INIT(fred_frame, "fred_frame");
> >
> >         STRUCT_SIZE_INIT(char_device_struct, "char_device_struct");
> >         if (VALID_STRUCT(char_device_struct)) {
> > diff --git a/x86_64.c b/x86_64.c
> > index 0c21eb827e4a..6777c93e6b47 100644
> > --- a/x86_64.c
> > +++ b/x86_64.c
> > @@ -4086,10 +4086,11 @@ in_exception_stack:
> >
> >           if (!irq_eframe && !is_kernel_thread(bt->tc->task) &&
> >               (GET_STACKBASE(bt->tc->task) == bt->stackbase)) {
> > +               long stack_padding_size = SIZE(fred_frame) > 0 ? (2*8) :
> 0;
> >                 user_mode_eframe = bt->stacktop - SIZE(pt_regs);
> >                 if (last_process_stack_eframe < user_mode_eframe)
> >                         x86_64_exception_frame(EFRAME_PRINT, 0,
> bt->stackbuf +
> > -                               (bt->stacktop - bt->stackbase) -
> SIZE(pt_regs),
> > +                               (bt->stacktop - stack_padding_size -
> bt->stackbase) - SIZE(pt_regs),
> >                                 bt, ofp);
> >         }
> >
> > @@ -4407,10 +4408,11 @@ in_exception_stack:
> >
> >           if (!irq_eframe && !is_kernel_thread(bt->tc->task) &&
> >               (GET_STACKBASE(bt->tc->task) == bt->stackbase)) {
> > +               long stack_padding_size = SIZE(fred_frame) > 0 ? (2*8) :
> 0;
> >                 user_mode_eframe = bt->stacktop - SIZE(pt_regs);
> >                 if (last_process_stack_eframe < user_mode_eframe)
> >                         x86_64_exception_frame(EFRAME_PRINT, 0,
> bt->stackbuf +
> > -                               (bt->stacktop - bt->stackbase) -
> SIZE(pt_regs),
> > +                               (bt->stacktop - stack_padding_size -
> bt->stackbase) - SIZE(pt_regs),
> >                                 bt, ofp);
> >         }
> >
> > Thanks
> > Lianbo
> >
> > > [1]:
> https://lore.kernel.org/all/170668568261.398.10403890006820046961.tip-bot2@tip-bot2/
> > > [2]:
> https://elixir.bootlin.com/linux/v6.9.1/source/arch/x86/include/asm/thread_info.h#L34
> > >
> > > Signed-off-by: Tao Liu<l...@redhat.com>
> > > ---
> > >   x86_64.c | 84
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > >   1 file changed, 82 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/x86_64.c b/x86_64.c
> > > index 0c21eb8..43a31c2 100644
> > > --- a/x86_64.c
> > > +++ b/x86_64.c
> > > @@ -137,6 +137,7 @@ static orc_entry *orc_find(ulong);
> > >   static orc_entry *orc_module_find(ulong);
> > >   static ulong ip_table_to_vaddr(ulong);
> > >   static void orc_dump(ulong);
> > > +static long top_of_kernel_stack_padding(void);
> > >
> > >   struct machine_specific x86_64_machine_specific = { 0 };
> > >
> > > @@ -4089,7 +4090,8 @@ in_exception_stack:
> > >               user_mode_eframe = bt->stacktop - SIZE(pt_regs);
> > >               if (last_process_stack_eframe < user_mode_eframe)
> > >                       x86_64_exception_frame(EFRAME_PRINT, 0,
> bt->stackbuf +
> > > -                             (bt->stacktop - bt->stackbase) -
> SIZE(pt_regs),
> > > +                             (bt->stacktop - bt->stackbase) -
> SIZE(pt_regs) -
> > > +                             top_of_kernel_stack_padding(),
> > >                               bt, ofp);
> > >       }
> > >
> > > @@ -4410,7 +4412,8 @@ in_exception_stack:
> > >               user_mode_eframe = bt->stacktop - SIZE(pt_regs);
> > >               if (last_process_stack_eframe < user_mode_eframe)
> > >                       x86_64_exception_frame(EFRAME_PRINT, 0,
> bt->stackbuf +
> > > -                             (bt->stacktop - bt->stackbase) -
> SIZE(pt_regs),
> > > +                             (bt->stacktop - bt->stackbase) -
> SIZE(pt_regs) -
> > > +                             top_of_kernel_stack_padding(),
> > >                               bt, ofp);
> > >       }
> > >
> > > @@ -9541,4 +9544,81 @@ x86_64_swp_offset(ulong entry)
> > >       return SWP_OFFSET(entry);
> > >   }
> > >
> > > +static long
> > > +top_of_kernel_stack_padding(void)
> > > +{
> > > +     char buf1[BUFSIZE];
> > > +     char *cursor;
> > > +     long final_value, ptregs_size_value;
> > > +     char *arglist[MAXARGS];
> > > +     bool found = FALSE;
> > > +
> > > +     static long kernel_stack_padding = -1;
> > > +
> > > +     if (kernel_stack_padding >= 0)
> > > +             return kernel_stack_padding;
> > > +
> > > +     /*
> > > +     * startup_64:
> > > +     * ...
> > > +     * mov %rsi,%r15
> > > +     * leaq  (__end_init_task - TOP_OF_KERNEL_STACK_PADDING -
> PTREGS_SIZE)(%rip), %rsp
> > > +     */
> > > +     sprintf(buf1, "disass /r startup_64");
> > > +     open_tmpfile2();
> > > +     if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
> > > +             kernel_stack_padding = 0;
> > > +             goto out;
> > > +     }
> > > +
> > > +     rewind(pc->tmpfile2);
> > > +     while (fgets(buf1, BUFSIZE, pc->tmpfile2) && !found) {
> > > +             // machine code of "mov %rsi,%r15"
> > > +             if (strstr(buf1, "49 89 f7"))
> > > +                     found = TRUE;
> > > +     }
> > > +     if (!found || !(cursor = strstr(buf1, "# 0x"))) {
> > > +             kernel_stack_padding = 0;
> > > +             goto out;
> > > +     }
> > > +
> > > +     parse_line(cursor, arglist);
> > > +     final_value = stol(arglist[1], FAULT_ON_ERROR, NULL);
> > > +
> > > +     /*
> > > +     * rewind_stack_and_make_dead:
> > > +     * ...
> > > +     * leaq  -PTREGS_SIZE(%rax), %rsp
> > > +     */
> > > +     found = FALSE;
> > > +     rewind(pc->tmpfile2);
> > > +     sprintf(buf1, "disass rewind_stack_and_make_dead");
> > > +     if (!gdb_pass_through(buf1, pc->tmpfile2, GNU_RETURN_ON_ERROR)) {
> > > +             kernel_stack_padding = 0;
> > > +             goto out;
> > > +     }
> > > +     rewind(pc->tmpfile2);
> > > +     while (fgets(buf1, BUFSIZE, pc->tmpfile2)) {
> > > +             // find leaq -PTREGS_SIZE(%rax), %rsp
> > > +             if (strstr(buf1, "lea") && (cursor = strstr(buf1,
> "-0x"))) {
> > > +                     parse_line(cursor, arglist);
> > > +                     char *p = strchr(arglist[0], '(');
> > > +                     *p = '\0';
> > > +                     ptregs_size_value = stol(arglist[0] + 1,
> FAULT_ON_ERROR, NULL);
> > > +                     found = TRUE;
> > > +                     break;
> > > +             }
> > > +     }
> > > +     if (!found) {
> > > +             kernel_stack_padding = 0;
> > > +             goto out;
> > > +     }
> > > +
> > > +     struct syment *s = symbol_search("__end_init_task");
> > > +     kernel_stack_padding = s->value - final_value -
> ptregs_size_value;
> > > +out:
> > > +     close_tmpfile2();
> > > +     return kernel_stack_padding;
> > > +}
> > > +
> > >   #endif  /* X86_64 */
> > > -- 2.40.1
> >
>
--
Crash-utility mailing list -- devel@lists.crash-utility.osci.io
To unsubscribe send an email to devel-le...@lists.crash-utility.osci.io
https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
Contribution Guidelines: https://github.com/crash-utility/crash/wiki

Reply via email to