Hi Tao,

Thanks for your suggestion, and this method works well as expected.

Can we make it simpler, since the value we are looking for is the 2nd
> oprand of tbnz, there must be a ',' after the value.
>
> tbnz    w2, #0x1f, 0xffff80008001bed0
>
> Can we make it like the following? I haven't tested the following code...
>
> if ((pos2 = strchr(pos1, '#'))) {
>   pos2 += 1;
>   for (pos1 = pos2; *pos2 != '\0' && *pos2 != ','; pos2++);
>   *pos2 = '\0';
>   thread_shift = stol(pos1, RETURN_ON_ERROR|QUIET, &errflag);
>   if (errflag) {
>     thread_shift = 0;
>   }
>   break;
> }
>


Thanks.
Yeping.ZHENG

Tao Liu <l...@redhat.com> 于2024年7月30日周二 08:55写道:

> Hi Yeping,
>
> On Mon, Jul 29, 2024 at 2:20 PM yp z <wonder...@gmail.com> wrote:
> >
> > Hi Lianbo and Tao,
> > Thank you for your suggestions, and I have rewrote a patch in two cases:
> >
> > [1] with vmcoreinfo:
> > +       if (kernel_symbol_exists("kasan_enable_current")) {
> > +               min_thread_shift += 1;
> > +               thread_shift = (min_thread_shift < machdep->pageshift) ?
> machdep->pageshift : min_thread_shift;
> > +       }
> >
> This looks fine to me.
>
> >> BTW:  can you help point out the current issue is with or without the
> vmcoreinfo? Yeping
> >
> > My issue is with vmcoreinfo of "SYMBOL("kasan_enable_current")", and
> works well with this patch.
> >
> > [2] without vmcoreinfo:
> >>
> >> What about somehow the disassembly gives us hex values like:
> >> #0xe? It won't work by then. Any ideas for this?
> >
> >  In order to use stol() as expected, I have changed the first
> non-numeric char to '\0':
> > +                       if ((pos1 = strstr(buf1, "tbnz"))) {
> > +                               if ((pos2 = strchr(pos1, '#'))) {
> > +                                       pos2 += 1;
> > +                                       pos1 = pos2;
> > +                                       while (*pos2 != '\0') {
> > +                                               if (!((*pos2 >= '0' &&
> *pos2 <= '9')
> > +                                                       || (*pos2 >= 'A'
> && *pos2 <= 'F')
> > +                                                       || (*pos2 >= 'a'
> && *pos2 <= 'f'))) {
>
> There are 'x' 'X' not covered, it will not work for the "#0xe" case.
> Can we make it simpler, since the value we are looking for is the 2nd
> oprand of tbnz, there must be a ',' after the value.
>
> tbnz    w2, #0x1f, 0xffff80008001bed0
>
> Can we make it like the following? I haven't tested the following code...
>
> if ((pos2 = strchr(pos1, '#'))) {
>   pos2 += 1;
>   for (pos1 = pos2; *pos2 != '\0' && *pos2 != ','; pos2++);
>   *pos2 = '\0';
>   thread_shift = stol(pos1, RETURN_ON_ERROR|QUIET, &errflag);
>   if (errflag) {
>     thread_shift = 0;
>   }
>   break;
> }
>
> Thanks,
> Tao Liu
>
> > +                                                       *pos2 = '\0';
> > +                                                       break;
> > +                                               }
> > +                                               ++pos2;
> > +                                       }
> > +                                       thread_shift = stol(pos1,
> RETURN_ON_ERROR|QUIET, &errflag);
> >
> > Thanks.
> > Yeping.ZHENG
> >
> > lijiang <liji...@redhat.com> 于2024年7月26日周五 17:23写道:
> >>
> >> On Fri, Jul 19, 2024 at 6:40 AM Tao Liu <l...@redhat.com> wrote:
> >>>
> >>> Hi Lianbo,
> >>>
> >>> On Thu, Jul 18, 2024 at 10:50 PM Lianbo Jiang <liji...@redhat.com>
> wrote:
> >>> >
> >>> > On 7/16/24 4:22 PM, Tao Liu wrote:
> >>> >
> >>> > > Hi Yeping,
> >>> > >
> >>> > > Thanks for the fix.
> >>> > >
> >>> > > On Thu, Jul 11, 2024 at 1:38 PM <wonder...@gmail.com> wrote:
> >>> > >> When using the crash tool to parse the ARM64 dump file with KASAN
> enabled, I found that using the bt -a command will cause this tool to
> crash, the following is the backtrace infomation.
> >>> > >>
> >>> > >> (gdb) bt
> >>> > >> #0  0x00005635ac2b166b in arm64_unwind_frame
> (frame=0x7ffdaf35cb70, bt=0x7ffdaf35d430)
> >>> > >>      at arm64.c:2821
> >>> > >> #1  arm64_back_trace_cmd (bt=0x7ffdaf35d430) at arm64.c:3306
> >>> > >> #2  0x00005635ac27b108 in back_trace (bt=bt@entry=0x7ffdaf35d430)
> at kernel.c:3239
> >>> > >> #3  0x00005635ac2880ae in cmd_bt () at kernel.c:2863
> >>> > >> #4  0x00005635ac1f16dc in exec_command () at main.c:893
> >>> > >> #5  0x00005635ac1f192a in main_loop () at main.c:840
> >>> > >> #6  0x00005635ac50df81 in captured_main (data=<optimized out>) at
> main.c:1284
> >>> > >> #7  gdb_main (args=<optimized out>) at main.c:1313
> >>> > >> #8  0x00005635ac50e000 in gdb_main_entry (argc=<optimized out>,
> argv=<optimized out>)
> >>> > >>      at main.c:1338
> >>> > >> #9  0x00005635ac1ea2a5 in main (argc=5, argv=0x7ffdaf35dde8) at
> main.c:721
> >>> > >> Eventually, I found that it was may caused by not setting
> irq_stack_size properly, and provide this patch to solve it.
> >>> > >>
> >>> > > Could you please re-draft your commit message? The original one
> looks
> >>> > > informal. E.g:
> >>> > >
> >>> > > A segfault issue was observed on KASAN enabled arm64 kernel due to
> the
> >>> > > incorrect irq_stack_size, see the following stack trace:
> >>> > > ...
> >>> > > The issue was caused by ...., and this patch will fix the issue by
> ....
> >>> > >
> >>> > >>  From 34b28aa8c11e77d20adec4f7705a14d239c8a55f Mon Sep 17
> 00:00:00 2001
> >>> > >> From: wonderzyp <wonder...@qq.com>
> >>> > >> Date: Mon, 8 Jul 2024 20:11:38 +0800
> >>> > >> Subject: [PATCH 1131/1131] set_arm64_irq_stack_size
> >>> > >>
> >>> > >> Signed-off-by: Yeping Zheng <wonder...@gmail.com>
> >>> > >> ---
> >>> > >>   arm64.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++--
> >>> > >>   1 file changed, 47 insertions(+), 2 deletions(-)
> >>> > >>
> >>> > >> diff --git a/arm64.c b/arm64.c
> >>> > >> index b3040d7..39d891b 100644
> >>> > >> --- a/arm64.c
> >>> > >> +++ b/arm64.c
> >>> > >> @@ -93,6 +93,7 @@ static void arm64_calc_VA_BITS(void);
> >>> > >>   static int arm64_is_uvaddr(ulong, struct task_context *);
> >>> > >>   static void arm64_calc_KERNELPACMASK(void);
> >>> > >>   static int arm64_get_vmcoreinfo(unsigned long *vaddr, const
> char *label, int base);
> >>> > >> +static ulong arm64_set_irq_stack_size(struct machine_specific
> *ms);
> >>> > >>
> >>> > >>   struct kernel_range {
> >>> > >>          unsigned long modules_vaddr, modules_end;
> >>> > >> @@ -2223,8 +2224,14 @@ arm64_irq_stack_init(void)
> >>> > >>                  if (MEMBER_EXISTS("thread_union", "stack")) {
> >>> > >>                          if ((sz = MEMBER_SIZE("thread_union",
> "stack")) > 0)
> >>> > >>                                  ms->irq_stack_size = sz;
> >>> > >> -               } else
> >>> > >> -                       ms->irq_stack_size = ARM64_IRQ_STACK_SIZE;
> >>> > >> +               } else {
> >>> > >> +                       ulong res = arm64_set_irq_stack_size(ms);
> >>> > >> +                       if (res > 0){
> >>> > >> +                               ms->irq_stack_size = res;
> >>> > >> +                       } else {
> >>> > >> +                               ms->irq_stack_size =
> ARM64_IRQ_STACK_SIZE;
> >>> > >> +                       }
> >>> > >> +               }
> >>> > >>
> >>> > >>                  machdep->flags |= IRQ_STACKS;
> >>> > >>
> >>> > >> @@ -4921,6 +4928,44 @@ static void arm64_calc_KERNELPACMASK(void)
> >>> > >>          }
> >>> > >>   }
> >>> > >>
> >>> > >> +static ulong arm64_set_irq_stack_size(struct machine_specific
> *ms)
> >>> > >> +{
> >>> > >> +       char *string;
> >>> > >> +       int ret;
> >>> > >> +       int KASAN_THREAD_SHIFT = 0;
> >>> > >> +       int MIN_THREAD_SHIFT;
> >>> > >> +       ulong ARM64_PAGE_SHIFT;
> >>> > >> +       ulong THREAD_SHIFT = 0;
> >>> > >> +       ulong THREAD_SIZE;
> >>> > > I guess the upper case of variable names is not encouraged, though
> it
> >>> > > is the variable that comes from kernel config file.
> >>> > >
> >>> > >> +       if (kt->ikconfig_flags & IKCONFIG_AVAIL) {
> >>> > >> +               if ((ret =
> get_kernel_config("CONFIG_KASAN_GENERIC", NULL) == IKCONFIG_Y) ||
> >>> > >> +                       (ret =
> get_kernel_config("CONFIG_KASAN_SW_TAGS", NULL) == IKCONFIG_Y)) {
> >>> > >> +                               KASAN_THREAD_SHIFT = 1;
> >>> > >> +                       }
> >>> > >> +       }
> >>> > >> +       MIN_THREAD_SHIFT = 14 + KASAN_THREAD_SHIFT;
> >>> > >> +
> >>> > >> +       if (kt->ikconfig_flags & IKCONFIG_AVAIL) {
> >>> > > Could the if condition be merged with the prior one?
> >>> > >
> >>> > >> +               if ((ret = get_kernel_config("CONFIG_VMAP_STACK",
> NULL)) == IKCONFIG_Y){
> >>> > >> +                       if ((ret =
> get_kernel_config("CONFIG_ARM64_PAGE_SHIFT", &string)) ==
> >>> >
> >>> > The "CONFIG_ARM64_PAGE_SHIFT " has been removed since kernel
> v6.9-rc1,
> >>> > so this can not work on the latest kernel. See:
> >>> >
> >>> > d3e5bab923d3 ("arch: simplify architecture specific page size
> >>> > configuration")
> >>> >
> >>> >
> >>> > In addition, the IKCONFIG is not available in most distributions,  it
> >>> > can not cover this case. There is a similar discussion there:
> >>> >
> >>> >
> https://www.mail-archive.com/devel@lists.crash-utility.osci.io/msg00880.html
> >>> >
> >>> > Can you try to handle the current issue with a similar solution?
> >>> >
> >>> >
> >>> > >> IKCONFIG_STR){
> >>> > >> +                               ARM64_PAGE_SHIFT = atol(string);
> >>> > >> +                       }
> >>> > >> +                       if (MIN_THREAD_SHIFT < ARM64_PAGE_SHIFT){
> >>> > >> +                               THREAD_SHIFT = ARM64_PAGE_SHIFT;
> >>> > >> +                       } else {
> >>> > >> +                               THREAD_SHIFT = MIN_THREAD_SHIFT;
> >>> > >> +                       }
> >>> > >> +               }
> >>> > >> +       }
> >>> > >> +
> >>> > >> +       if (THREAD_SHIFT == 0) {
> >>> > >> +               return -1;
> >>> > >> +       }
> >>> > >> +
> >>> > >> +       THREAD_SIZE = ((1UL) << THREAD_SHIFT);
> >>> > >> +       return THREAD_SIZE;
> >>> > >> +}
> >>> > > I'm OK with the approach above, since it directly came from the
> kernel
> >>> > > source. However I'm not a fan of checking kernel configs, there
> might
> >>> > > be kernels which are compiled without CONFIG_IKCONFIG.
> >>> > >
> >>> > > Could we add an approach here, to get the value from disassembly
> when
> >>> > > CONFIG_IKCONFIG is negative?
> >>> > >
> >>> > > kernel source: arch/arm64/kernel/entry.S:
> >>> > >
> >>> > > .macro kernel_ventry, el:req, ht:req, regsize:req, label:req
> >>> > > ....
> >>> > > add sp, sp, x0 // sp' = sp + x0
> >>> > > sub x0, sp, x0 // x0' = sp' - x0 = (sp + x0) - x0 = sp
> >>> > > tbnz x0, #THREAD_SHIFT, 0f <<<<<<<<
> >>> > >
> >>> > > $ objdump -d vmlinux
> >>> > > ...
> >>> > > ffff800080010800 <vectors>:
> >>> > > ffff800080010800:       d10543ff        sub     sp, sp, #0x150
> >>> > > ffff800080010804:       8b2063ff        add     sp, sp, x0
> >>> > > ffff800080010808:       cb2063e0        sub     x0, sp, x0
> >>> > > ffff80008001080c:       37800080        tbnz    w0, #16,
> >>> > > ffff80008001081c <vectors+0x1c> <<<<<<<<<<
> >>> > >
> >>> > > It is easy to get the THREAD_SHIFT value by disassembling the tbnz
> >>> > > instruction. What do you think @Lianbo Jiang
> >>> >
> >>> > This is a good idea, but it still relies on compiler.
> >>> >
> >>> > As we discussed, usually I would recommend finding its values via
> some
> >>> > kernel symbols.
> >>>
> >>> Yeah, but I didn't find a suitable symbol so far... In addition, the
> >>
> >>
> >> It's true. For the KASAN, it is easy to find a suitable symbol, but
> it's not easy for the PAGESIZE or PAGESHIFT.
> >>
> >> Given that I would suggest covering two cases:
> >> [1] with vmcoreinfo(see mm/kasan/common.c)
> >> ...
> >> int min_thread_shift = 14;
> >> if (kernel_symbol_exists("kasan_enable_current"))
> >>     min_thread_shift += 1;
> >>
> >> thread_shift = (min_thread_shift < machdep->pageshift) ?
> machdep->pageshift : min_thread_shift;
> >> ...
> >>
> >> [2] without vmcoreinfo
> >> Use Tao's solution(disassemble kernel code)
> >>
> >> The first one is better than the second one in case of having
> vmcoreinfo, If I understand correctly.
> >> But we still need to cover the above two cases in the crash tools.
> >>
> >> BTW:  can you help point out the current issue is with or without the
> vmcoreinfo? Yeping
> >>
> >> Thanks
> >> Lianbo
> >>
> >>> above case is a little different here, the tbnz instruction is not
> >>> generated from C code, it is written in assembly in
> >>> arch/arm64/kernel/entry.S file. So I guess there would be a lower
> >>> possibility if tbnz instruction gets replaced or eliminated by the
> >>> compilers. Anyway, it's OK to me if you don't want to accept the
> >>> approach.
> >>>
> >>> Thanks,
> >>> Tao Liu
> >>>
> >>>
> >>> >
> >>> >
> >>> > Thanks
> >>> >
> >>> > Lianbo
> >>> >
> >>> > >
> >>> > > Thanks,
> >>> > > Tao Liu
> >>> > >
> >>> > >> +
> >>> > >>   #endif  /* ARM64 */
> >>> > >>
> >>> > >>
> >>> > >> --
> >>> > >> 2.25.1
> >>> > >>
> >>> >
> >>>
>
>
From 99318d1aeb980ca902aab9a5b8831d5314bff1c2 Mon Sep 17 00:00:00 2001
From: "yeping.zheng" <yeping.zh...@nio.com>
Date: Tue, 30 Jul 2024 14:16:54 +0800
Subject: [PATCH] A segfault issue due to the incorrect irq_stack_size on ARM64

See the following stack trace:
(gdb) bt
 #0  0x00005635ac2b166b in arm64_unwind_frame (frame=0x7ffdaf35cb70, bt=0x7ffdaf35d430) at arm64.c:2821
 #1  arm64_back_trace_cmd (bt=0x7ffdaf35d430) at arm64.c:3306
 #2  0x00005635ac27b108 in back_trace (bt=bt@entry=0x7ffdaf35d430) at kernel.c:3239
 #3  0x00005635ac2880ae in cmd_bt () at kernel.c:2863
 #4  0x00005635ac1f16dc in exec_command () at main.c:893
 #5  0x00005635ac1f192a in main_loop () at main.c:840
 #6  0x00005635ac50df81 in captured_main (data=<optimized out>) at main.c:1284
 #7  gdb_main (args=<optimized out>) at main.c:1313
 #8  0x00005635ac50e000 in gdb_main_entry (argc=<optimized out>, argv=<optimized out>) at main.c:1338
 #9  0x00005635ac1ea2a5 in main (argc=5, argv=0x7ffdaf35dde8) at main.c:721

The issue may be encountered when thread_union symbol not found in vmlinux
due to compiling optimization.

This patch will try the following 2 methods to get the irq_stack_size
when thread_union symbol unavailable:

1. Try getting the value from THREAD_SIZE when KASAN is enabled

   In arm64/include/asm/memory.h:

   #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
   ...
   #define IRQ_STACK_SIZE		THREAD_SIZE

   Since enabling the KASAN will affect the final value,
   this patch reset IRQ_STACK_SIZE according to the calculation process in kernel code.

2. Try getting the value from kernel code disassembly, to get
   THREAD_SHIFT directly from tbnz instruction.

   In arch/arm64/kernel/entry.S:

   .macro kernel_ventry, el:req, ht:req, regsize:req, label:req
   ...
         add     sp, sp, x0
         sub     x0, sp, x0
         tbnz    x0, #THREAD_SHIFT, 0f

   $ gdb vmlinux
   (gdb) disass vectors
   Dump of assembler code for function vectors:
      ...
      0xffff800080010804 <+4>:     add     sp, sp, x0
      0xffff800080010808 <+8>:     sub     x0, sp, x0
      0xffff80008001080c <+12>:    tbnz    w0, #16, 0xffff80008001081c <vectors+28>

Signed-off-by: yeping.zheng <yeping.zheng(a)nio.com&gt;
---
---
 arm64.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 47 insertions(+), 2 deletions(-)

diff --git a/arm64.c b/arm64.c
index 78e6609..82489d8 100644
--- a/arm64.c
+++ b/arm64.c
@@ -94,6 +94,7 @@ static int arm64_is_uvaddr(ulong, struct task_context *);
 static void arm64_calc_KERNELPACMASK(void);
 static void arm64_recalc_KERNELPACMASK(void);
 static int arm64_get_vmcoreinfo(unsigned long *vaddr, const char *label, int base);
+static ulong arm64_set_irq_stack_size(void);
 
 struct kernel_range {
 	unsigned long modules_vaddr, modules_end;
@@ -2234,8 +2235,10 @@ arm64_irq_stack_init(void)
 		if (MEMBER_EXISTS("thread_union", "stack")) { 
 			if ((sz = MEMBER_SIZE("thread_union", "stack")) > 0)
 				ms->irq_stack_size = sz;
-		} else
-			ms->irq_stack_size = ARM64_IRQ_STACK_SIZE;
+		} else {
+			ulong res = arm64_set_irq_stack_size();
+			ms->irq_stack_size = (res > 0) ? res : ARM64_IRQ_STACK_SIZE;
+		}
 
 		machdep->flags |= IRQ_STACKS;
 
@@ -4950,6 +4953,48 @@ static void arm64_recalc_KERNELPACMASK(void){
 	}
 }
 
+static ulong arm64_set_irq_stack_size(void)
+{
+	int min_thread_shift = 14;
+	ulong thread_shift = 0;
+	char buf1[BUFSIZE];
+	char *pos1, *pos2;
+	int errflag = 0;
+
+	if (kernel_symbol_exists("kasan_enable_current")) {
+		min_thread_shift += 1;
+		thread_shift = (min_thread_shift < machdep->pageshift) ? machdep->pageshift : min_thread_shift;
+	} else {
+		sprintf(buf1, "x/32i vectors");
+		open_tmpfile();
+		if (!gdb_pass_through(buf1, pc->tmpfile, GNU_RETURN_ON_ERROR)) {
+			goto out;
+		}
+		rewind(pc->tmpfile);
+		while (fgets(buf1, BUFSIZE, pc->tmpfile)) {
+			if ((pos1 = strstr(buf1, "tbnz"))) {
+				if ((pos2 = strchr(pos1, '#'))) {
+					pos2 += 1;
+					for(pos1=pos2; *pos2!='\0' && *pos2!=','; pos2++);
+					*pos2 = '\0';
+					thread_shift = stol(pos1, RETURN_ON_ERROR|QUIET, &errflag);
+					if (errflag) {
+						thread_shift = 0;
+					}
+					break;
+				}
+			}
+		}
+out:
+		close_tmpfile();
+	}
+
+	if (!thread_shift)
+		return 0;
+	else
+		return ((1UL) << thread_shift);
+}
+
 #endif  /* ARM64 */
 
 
-- 
2.25.1

--
Crash-utility mailing list -- devel@lists.crash-utility.osci.io
To unsubscribe send an email to devel-le...@lists.crash-utility.osci.io
https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
Contribution Guidelines: https://github.com/crash-utility/crash/wiki

Reply via email to