----- Original Message -----
> The kernel might have added a new elf-note of type NT_NOCOREDUMP for various
> reasons. This patch teaches crash tool to look for the same inside a vmcore
> before further analysis. If present, display the error description and exit
> early.
> 
> Signed-off-by: K.Prasad <[email protected]>

At this point, I'll admit I'm not sure I totally understand 
this patch or what the dumpfile header layout would look like.

Your new "myload64" pointer is not pointing to a PT_LOAD, but
rather the first PT_NOTE, so its name doesn't even make sense 
in that respect.  And for that matter, I don't see why you 
didn't just use the currently existing nd->notes64 pointer, 
which points to the same place?  Also, the re-definition of the 
currently-existing "size" value scares the hell out of me 
w/respect to backwards-compatibility.  And lastly, if I'm not
mistaken, when you do the realloc() of tmp_elf_header, it may
return a different address -- so wouldn't nd->elf_header be
left pointing to the old buffer?  And by extension, stale pointer
values could be left in nd->elf64, nd->num_pt_load_segments, 
nd->notes64, nd->load64, etc?   

I would rather that you is_netdump() is left intact -- except for 
a call to a new "check_nocoredump()" function, one which does not 
tinker with the is_netdump() pointers, sizes, buffers, etc...
Let that function do its own thing, and if it finds that there's 
no coredump, then it's not going to return and we're done.  But 
in 99.99% of the time, there will be a coredump, and your function
will not have screwed around with any of is_netdump()'s bookkeeping.

Anyway, when the feature is accepted upstream in the kernel, 
and then by makedumpfile, we'll revisit this.

Thanks,
  Dave

> ---
>  netdump.c |  136
>  ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 files changed, 131 insertions(+), 5 deletions(-)
> 
> diff --git a/netdump.c b/netdump.c
> index 1e9960c..3b4edec 100644
> --- a/netdump.c
> +++ b/netdump.c
> @@ -95,6 +95,74 @@ map_cpus_to_prstatus(void)
>  }
>  
>  /*
> + * Temporary definition of new elf-note type for compilation
> purposes.
> + * Not required when run on a new kernel containing this definition.
> + */
> +#define NT_NOCOREDUMP 21
> +
> +/*
> + * Function to verify if the vmcore contains and elf-note of type
> NT_NOCOREDUMP.
> + * The kernel adds such an elf-note when it is known that the crash
> is
> + * triggered due to a reason that does not need analysis of the
> entire kernel
> + * memory dump (e.g. crash triggered due to a faulty memory DIMM).
> + */
> +static void
> +has_nt_nocoredump(void *note_ptr, unsigned long size_note)
> +{
> +     Elf32_Nhdr *note32 = NULL;
> +     Elf64_Nhdr *note64 = NULL;
> +     size_t tot, len = 0;
> +     int num = 0;
> +
> +     for (tot = 0; tot < size_note; tot += len) {
> +             if (machine_type("X86_64")) {
> +                     note64 = note_ptr + tot;
> +                     /*
> +                      * If vmcore is generated due to fatal hardware
> +                      * errors (such as Machine Check Exception, we only have
> +                      * a 'slim' crashdump. Don't analyse further, inform the
> +                      * user about it and exit.
> +                      */
> +                     if (note64->n_type == NT_NOCOREDUMP) {
> +                             fprintf(fp, "\"System crashed due to a hardware"
> +                                     " memory error. No coredump"
> +                                     " available.\"\n");
> +
> +                             /* Do we have an accompanying error message? */
> +                             if (note64->n_descsz == 0)
> +                                     goto exit;
> +                             fprintf(fp,"Nocoredump Reason: %s",
> +                                     (char *)note64 + sizeof(Elf64_Nhdr));
> +                     }
> +
> +                     len = sizeof(Elf64_Nhdr);
> +                     len = roundup(len + note64->n_namesz, 4);
> +                     len = roundup(len + note64->n_descsz, 4);
> +             } else if (machine_type("X86")) {
> +                     note32 = note_ptr + tot;
> +                     if (note32->n_type == NT_NOCOREDUMP) {
> +                             fprintf(fp, "\"System crashed due to a hardware"
> +                                     " memory error. No coredump"
> +                                     " available.\"\n");
> +
> +                             /* Do we have an accompanying error message? */
> +                             if (note32->n_descsz == 0)
> +                                     goto exit;
> +
> +                             fprintf(fp,"Nocoredump Reason: %s",
> +                                     (char *)note32 + sizeof(Elf32_Nhdr));
> +exit:
> +                             clean_exit(0);
> +                     }
> +
> +                     len = sizeof(Elf32_Nhdr);
> +                     len = roundup(len + note32->n_namesz, 4);
> +                     len = roundup(len + note32->n_descsz, 4);
> +             }
> +     }
> +}
> +
> +/*
>   *  Determine whether a file is a netdump/diskdump/kdump creation,
>   *  and if TRUE, initialize the vmcore_data structure.
>   */
> @@ -103,12 +171,12 @@ is_netdump(char *file, ulong source_query)
>  {
>          int i, fd, swap;
>       Elf32_Ehdr *elf32;
> -     Elf32_Phdr *load32;
> +     Elf32_Phdr *load32, *myload32;
>       Elf64_Ehdr *elf64;
> -     Elf64_Phdr *load64;
> +     Elf64_Phdr *load64, *myload64;
>       char eheader[MIN_NETDUMP_ELF_HEADER_SIZE];
>       char buf[BUFSIZE];
> -     size_t size, len, tot;
> +     size_t size, mysize, len, tot;
>          Elf32_Off offset32;
>          Elf64_Off offset64;
>       ulong tmp_flags;
> @@ -195,7 +263,10 @@ is_netdump(char *file, ulong source_query)
>  
>                  load32 = (Elf32_Phdr *)
>                          &eheader[sizeof(Elf32_Ehdr)+sizeof(Elf32_Phdr)];
> -                size = (size_t)load32->p_offset;
> +                myload32 = (Elf32_Phdr *)
> +                        &eheader[sizeof(Elf32_Ehdr)];
> +
> +             size = (size_t)myload32->p_offset;
>  
>               if ((load32->p_offset & (MIN_PAGE_SIZE-1)) &&
>                   (load32->p_align == 0))
> @@ -249,7 +320,10 @@ is_netdump(char *file, ulong source_query)
>  
>                  load64 = (Elf64_Phdr *)
>                          &eheader[sizeof(Elf64_Ehdr)+sizeof(Elf64_Phdr)];
> -                size = (size_t)load64->p_offset;
> +                myload64 = (Elf64_Phdr *)
> +                        &eheader[sizeof(Elf64_Ehdr)];
> +
> +             size = (size_t)myload64->p_offset;
>               if ((load64->p_offset & (MIN_PAGE_SIZE-1)) &&
>                   (load64->p_align == 0))
>                       tmp_flags |= KDUMP_ELF64;
> @@ -362,6 +436,58 @@ is_netdump(char *file, ulong source_query)
>                      &nd->elf_header[sizeof(Elf64_Ehdr)];
>                  nd->load64 = (Elf64_Phdr *)
>                      &nd->elf_header[sizeof(Elf64_Ehdr)+sizeof(Elf64_Phdr)];
> +             /*
> +              * Find out if there exists an elf-note of type NT_NOCOREDUMP.
> +              * If so, exit early from crash analysis after displaying the
> +              * description string.
> +              *
> +              * Allocate a temporary buffer to store the PT_NOTE section and
> +              * loop through them to look for NT_NOCOREDUMP.
> +              */
> +             for (i = 0; i < elf64->e_phnum; i++, myload64++) {
> +                     mysize += myload64->p_memsz;
> +                     if (myload64->p_type == PT_NOTE) {
> +                             break;
> +                     }
> +             }
> +
> +             if (mysize == 0) {
> +                     fprintf(stderr, "No PT_NOTE section found\n");
> +                     clean_exit(1);
> +             }
> +
> +             /*
> +              * Size of the buffer should accommodate the Elf_Ehdr, Elf_Phdr
> +              * and all sections upto the first PT_NOTE.
> +              */
> +             mysize += size;
> +             tmp_elf_header = realloc(tmp_elf_header, mysize);
> +             if (tmp_elf_header == NULL) {
> +                     fprintf(stderr, "cannot malloc notes buffer\n");
> +                     clean_exit(1);
> +             }
> +             if (FLAT_FORMAT()) {
> +                     if (!read_flattened_format(fd, 0, tmp_elf_header, 
> mysize)) {
> +                             free(tmp_elf_header);
> +                             goto bailout;
> +                     }
> +             } else {
> +                     if (lseek(fd, 0, SEEK_SET) != 0) {
> +                             sprintf(buf, "%s: lseek", file);
> +                             perror(buf);
> +                             goto bailout;
> +                     }
> +                     if (read(fd, tmp_elf_header, mysize) != mysize) {
> +                             sprintf(buf, "%s: read", file);
> +                             perror(buf);
> +                             free(tmp_elf_header);
> +                             goto bailout;
> +                     }
> +             }
> +
> +             has_nt_nocoredump((char *)tmp_elf_header + myload64->p_offset,
> +                               myload64->p_memsz);
> +
>               if (DUMPFILE_FORMAT(nd->flags) == NETDUMP_ELF64)
>                       nd->page_size = (uint)nd->load64->p_align;
>                  dump_Elf64_Ehdr(nd->elf64);
> --
> 1.7.4.1
> 
> 

--
Crash-utility mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/crash-utility

Reply via email to