On 2023/05/30 19:38, HATAYAMA Daisuke wrote:
> There's no NMI on ARM. Hence, stopping the non-panicking CPUs from the
> panicking CPU via IPI can fail easily if interrupts are being masked
> in those moment. Moreover, crash_notes are not initialized for such
> unstopped CPUs and the corresponding NT_PRSTATUS notes are not
> attached to vmcore. However, crash utility never takes it
> consideration such uninitialized crash_notes and then ends with
> mapping different NT_PRSTATUS to actually unstopped CPUs. This corrupt
> mapping can result crash utility into segmentation fault in the
> operations where register values in NT_PRSTATUS notes are used.
> 
> For example:
> 
>      crash> bt 1408
>      PID: 1408     TASK: ffff000003e22200  CPU: 2    COMMAND: "repro"
>      Segmentation fault (core dumped)
> 
>      crash> help -D
>      diskdump_data:
>                     filename: 127.0.0.1-2023-05-26-02:21:27/vmcore-ld1
>                            flags: 46 
> (KDUMP_CMPRS_LOCAL|ERROR_EXCLUDED|LZO_SUPPORTED)
>      ...snip...
>                      notes_buf: 1815df0
>        num_vmcoredd_notes: 0
>        num_prstatus_notes: 5
>                           notes[0]: 1815df0 (NT_PRSTATUS)
>                                             si.signo: 0  si.code: 0  
> si.errno: 0
>      ...snip...
>                                             PSTATE: 80400005   FPVALID: 
> 00000000
>                           notes[4]: 1808f10 (NT_PRSTATUS)
>      Segmentation fault (core dumped)
> 
> To fix this issue, let's map NT_PRSTATUS to some CPU only if the
> corresponding crash_notes is checked to be initialized.
> 
> Signed-off-by: HATAYAMA Daisuke <d.hatay...@fujitsu.com>
> ---
>   defs.h     |  1 +
>   diskdump.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
>   netdump.c  |  5 ++++-
>   3 files changed, 55 insertions(+), 2 deletions(-)
> 
> diff --git a/defs.h b/defs.h
> index bfa07c3..655de55 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -7113,6 +7113,7 @@ int dumpfile_is_split(void);
>   void show_split_dumpfiles(void);
>   void x86_process_elf_notes(void *, unsigned long);
>   void *diskdump_get_prstatus_percpu(int);
> +int have_crash_notes(int cpu);
>   void map_cpus_to_prstatus_kdump_cmprs(void);
>   void diskdump_display_regs(int, FILE *);
>   void process_elf32_notes(void *, ulong);
> diff --git a/diskdump.c b/diskdump.c
> index 94bca4d..af65816 100644
> --- a/diskdump.c
> +++ b/diskdump.c
> @@ -101,6 +101,55 @@ int dumpfile_is_split(void)
>       return KDUMP_SPLIT();
>   }
>   
> +int have_crash_notes(int cpu)
> +{
> +     ulong crash_notes, notes_ptr;
> +     char *buf, *p;
> +     Elf64_Nhdr *note = NULL;
> +
> +     if (!readmem(symbol_value("crash_notes"),
> +                  KVADDR,
> +                  &crash_notes,
> +                  sizeof(crash_notes),
> +                  "crash_notes",
> +                  RETURN_ON_ERROR)) {
> +             error(WARNING, "cannot read \"crash_notes\"\n");
> +             return FALSE;
> +     }
> +
> +     if ((kt->flags & SMP) && (kt->flags & PER_CPU_OFF))
> +             notes_ptr = crash_notes + kt->__per_cpu_offset[cpu];
> +     else
> +             notes_ptr = crash_notes;
> +
> +     buf = GETBUF(SIZE(note_buf));
> +
> +     if (!readmem(notes_ptr,
> +                  KVADDR,
> +                  buf,
> +                  SIZE(note_buf),
> +                  "note_buf_t",
> +                  RETURN_ON_ERROR)) {
> +             error(WARNING, "cpu %d: cannot read NT_PRSTATUS note\n", cpu);
> +             return FALSE;
> +     }
> +
> +     note = (Elf64_Nhdr *)buf;
> +     p = buf + sizeof(Elf64_Nhdr);
> +
> +     if (note->n_type != NT_PRSTATUS) {
> +             error(WARNING, "cpu %d: invalid NT_PRSTATUS note (n_type != 
> NT_PRSTATUS)\n", cpu);
> +             return FALSE;
> +     }
> +
> +     if (!STRNEQ(p, "CORE")) {
> +             error(WARNING, "cpu %d: invalid NT_PRSTATUS note (name != 
> \"CORE\")\n", cpu);
> +             return FALSE;
> +     }
> +
> +     return TRUE;
> +}
> +
>   void
>   map_cpus_to_prstatus_kdump_cmprs(void)
>   {
> @@ -131,7 +180,7 @@ map_cpus_to_prstatus_kdump_cmprs(void)
>       nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
>   
>       for (i = 0, j = 0; i < nrcpus; i++) {
> -             if (in_cpu_map(ONLINE_MAP, i)) {
> +             if (in_cpu_map(ONLINE_MAP, i) && (!symbol_exists("crash_notes") 
> || have_crash_notes(i))) {

Thank you for the update.

kernel_symbol_exists() is better.
And it might be also good to check it out of the loop only once. 
Lianbo, what do you think?

   int crash_notes_exists = kernel_symbol_exists("crash_notes");

Anyway, we can change it when merging, so for the series,
Acked-by: Kazuhito Hagio <k-hagio...@nec.com>

Thanks,
Kazu


>                       dd->nt_prstatus_percpu[i] = nt_ptr[j++];
>                       dd->num_prstatus_notes =
>                               MAX(dd->num_prstatus_notes, i+1);
> diff --git a/netdump.c b/netdump.c
> index 4eba66c..9500cf4 100644
> --- a/netdump.c
> +++ b/netdump.c
> @@ -99,8 +99,11 @@ map_cpus_to_prstatus(void)
>       nrcpus = (kt->kernel_NR_CPUS ? kt->kernel_NR_CPUS : NR_CPUS);
>   
>       for (i = 0, j = 0; i < nrcpus; i++) {
> -             if (in_cpu_map(ONLINE_MAP, i))
> +             if (in_cpu_map(ONLINE_MAP, i) && (!symbol_exists("crash_notes") 
> || have_crash_notes(i))) {
>                       nd->nt_prstatus_percpu[i] = nt_ptr[j++];
> +                     nd->num_prstatus_notes =
> +                             MAX(nd->num_prstatus_notes, i+1);
> +             }
>       }
>   
>       FREEBUF(nt_ptr);
--
Crash-utility mailing list
Crash-utility@redhat.com
https://listman.redhat.com/mailman/listinfo/crash-utility
Contribution Guidelines: https://github.com/crash-utility/crash/wiki

Reply via email to