On Mon, Sep 18, 2006 at 03:07:25PM -0400, Vivek Goyal wrote:
> 
> From: Vivek Goyal <[EMAIL PROTECTED]>
> 
> o With relocatable kernel in picture, the kernel text map offset
>   (__START_KERNEL_map) is no longer constant. It depends on where kernel
>   is loaded.
> 
> o Now /proc/kcore is read to determine the virtual address the kernel
>   is mapped at and /porc/iomem is read for determining the physical
>   address where kernel is loaded at. This information is enough to
>   create to fill virtual address and phy addr info for elf header
>   mapping kernel text and data.
> 
> o Virtual address for kernel text are needed by gdb as well as crash to
>   retrieve the meaningful data from core file.
> 
> o This patch requires "elf note memsz" fix in the kernel. Currently
>   that fix is in -mm tree. It will still work with older kernels. It will
>   display the warning messages (/proc/kcore could not be parsed) and hardcode
>   the kernel virtual address and size.
> 
> Signed-off-by: Vivek Goyal <[EMAIL PROTECTED]>

Hi Vivek,

that seems generally fine to me. I'm not sure that I understand the
finer points of the elf parsing that you are doing, but again, it
seems fine. I've put some comments, mainly about formatting and the
like, inline.

-- 
Horms
  H: http://www.vergenet.net/~horms/
  W: http://www.valinux.co.jp/en/

> ---
> 
>  kexec/Makefile                       |    1 
>  kexec/arch/x86_64/crashdump-x86_64.c |  213 
> ++++++++++++++++++++++++++++++----
>  kexec/kexec-elf-core.c               |   28 ++++
>  kexec/kexec-elf.c                    |   42 ++++---
>  kexec/kexec-elf.h                    |    3 
>  kexec/kexec.h                        |    3 
>  6 files changed, 251 insertions(+), 39 deletions(-)
> 
> diff --git a/kexec/Makefile b/kexec/Makefile
> index f8c2f18..c059153 100644
> --- a/kexec/Makefile
> +++ b/kexec/Makefile
> @@ -13,6 +13,7 @@ KEXEC_C_SRCS:= kexec/kexec.c 
>  KEXEC_C_SRCS+= kexec/ifdown.c
>  KEXEC_C_SRCS+= kexec/kexec-elf.c 
>  KEXEC_C_SRCS+= kexec/kexec-elf-exec.c 
> +KEXEC_C_SRCS+= kexec/kexec-elf-core.c
>  KEXEC_C_SRCS+= kexec/kexec-elf-rel.c 
>  KEXEC_C_SRCS+= kexec/kexec-elf-boot.c 
>  KEXEC_C_SRCS+= kexec/crashdump.c
> diff --git a/kexec/arch/x86_64/crashdump-x86_64.c 
> b/kexec/arch/x86_64/crashdump-x86_64.c
> index 6776277..3c9c0dd 100644
> --- a/kexec/arch/x86_64/crashdump-x86_64.c
> +++ b/kexec/arch/x86_64/crashdump-x86_64.c
> @@ -24,6 +24,7 @@ #include <stdlib.h>
>  #include <errno.h>
>  #include <limits.h>
>  #include <elf.h>
> +#include <fcntl.h>
>  #include <sys/types.h>
>  #include <sys/stat.h>
>  #include <unistd.h>
> @@ -38,6 +39,147 @@ #include <x86/x86-linux.h>
>  /* Forward Declaration. */
>  static int exclude_crash_reserve_region(int *nr_ranges);
>  
> +/* Read kernel physical load addr from /proc/iomem (Kernel Code) and
> + * store in kexec_info */
> +static int get_kernel_paddr(struct kexec_info *info)
> +{
> +     const char iomem[]= "/proc/iomem";
> +     char line[MAX_LINE];
> +     FILE *fp;
> +     unsigned long long start, end;
> +
> +     fp = fopen(iomem, "r");
> +     if (!fp) {
> +             fprintf(stderr, "Cannot open %s: %s\n", iomem, strerror(errno));
> +             return -1;
> +     }
> +     while(fgets(line, sizeof(line), fp) != 0) {
> +             char *str;
> +             int consumed, count;
> +             count = sscanf(line, "%Lx-%Lx : %n",
> +                     &start, &end, &consumed);
> +             if (count != 2)
> +                     continue;
> +             str = line + consumed;
> +#if 0
> +             printf("%016Lx-%016Lx : %s",
> +                     start, end, str);
> +#endif

I really don't like the use of #if 0. I know its elsewhere in the code,
but I think it would be a lot nicer if we moved to something
like #if DEBUG, which could be toggled by ./configure. Not really
relevant to this patch, but I thought I would mention it anyway.

> +             if (memcmp(str, "Kernel code\n", 12) == 0) {
> +                     info->kern_paddr_start = start;
> +
> +#if 0
> +                     printf("kernel load physical addr start = 0x%016Lx\n",
> +                                     start);
> +#endif
> +                     fclose(fp);
> +                     return 0;
> +             }
> +     }
> +     fprintf(stderr, "Cannot determine kernel physical load addr\n");
> +     fclose(fp);
> +     return -1;
> +}
> +
> +/* Retrieve info regarding virtual address kernel has been compiled for and
> + * size of the kernel from /proc/kcore. Current /proc/kcore parsing from
> + * from kexec-tools fails because of malformed elf notes. A kernel patch has
> + * been submitted. For the folks using older kernels, this function
> + * hard codes the values to remain backward compatible. Once things stablize
> + * we should get rid of backward compatible code. */
> +
> +static int get_kernel_vaddr_and_size(struct kexec_info *info)
> +{
> +     int fd, result;
> +     const char kcore[] = "/proc/kcore";
> +     char *buf;
> +     struct mem_ehdr ehdr;
> +     struct mem_phdr *phdr, *end_phdr;
> +     int align;
> +     unsigned long size, progress;
> +
> +     align = getpagesize();
> +
> +     fd = open(kcore, O_RDONLY);
> +     if (fd < 0) {
> +             fprintf(stderr, "Cannot open %s: %s\n", kcore, strerror(errno));
> +             goto backward_compatible;
> +     }
> +#define KCORE_ELF_HEADERS_SIZE       4096

Personally I'm much more comofortable with #defines going
either towards the top a .c file, or in a .h file. Could you
move this and the definition of KERN_VADDR_ALIGN below to
one of these two locations, the .c one seems most appropriate 
here.

> +     size = KCORE_ELF_HEADERS_SIZE;
> +     buf = xmalloc(size);

Is there is a reason buf needs to be on the heap rather than the stack?
As it is a static size it seems easier just to declare it as
a fixed-size array. For starters it means there is no chance
of it leaking.

> +     memset(buf, 0, size);
> +     progress = 0;
> +     while(progress < size) {
> +             result = read(fd, buf + progress, size - progress);
> +             if (result < 0) {
> +                     if ((errno == EINTR) || (errno == EAGAIN))
> +                             continue;
> +                     die("read on %s of %ld bytes failed: %s\n",
> +                             kcore, (size - progress)+ 0UL, strerror(errno));
> +             }
> +             if (result == 0)
> +                     /* EOF */
> +                     break;
> +             progress += result;
> +     }

Is this kind of read X bytes untill you have X, EOF or error loop
a common thing in kexec-tools? It might be worth breaking it out into
some common code somewhere.

> +     result = close(fd);
> +     if (result < 0)
> +             die("Close of %s failed: %s\n", kcore, strerror(errno));

The result of close() is checked here, but the result of fclose()
is not checked in some code above. Is there a reason for this
inconsistency?

> +
> +     /* Don't perform checks to make sure stated phdrs and shdrs are
> +      * actually present in the core file. It is not practical
> +      * to read the GB size file into a user space buffer, Given the
> +      * fact that we don't use any info from that.
> +      */

That is true, but it is might be practical to memmap it.
Have you given some thought to whether that is worthwile or not?

> +     ignore_elf_len_check = 1;

Should ignore_elf_len_check be turned off again at some point,
or alternatively changed into a parameter rather than a global?

> +     result = build_elf_core_info(buf, size, &ehdr);
> +     if (result < 0) {
> +             fprintf(stderr, "ELF core (kcore) parse failed\n");
> +             goto backward_compatible;

Could the backward_compatible handling be moved into a
separate function, rather than a goto? It seems that
only info would need to be passed.

> +     }
> +     /* Traverse through the Elf headers and find the region where
> +      * kernel is mapped. */
> +#define KERN_VADDR_ALIGN     0x200000        /* 2MB */
> +     end_phdr = &ehdr.e_phdr[ehdr.e_phnum];
> +     for(phdr = ehdr.e_phdr; phdr != end_phdr; phdr++) {
> +             if (phdr->p_type == PT_LOAD) {
> +                     unsigned long saddr = phdr->p_vaddr;
> +                     unsigned long eaddr = phdr->p_vaddr + phdr->p_memsz;
> +                     unsigned long size;
> +             /* Look for kernel text mapping header. */

It seems like there is one tab too few for the indentation 
of the line above.

> +                     if ((saddr >= __START_KERNEL_map) &&
> +                     (eaddr <= __START_KERNEL_map + KERNEL_TEXT_SIZE)) {

I think that the above lines should be indented as follows

                        if ((saddr >= __START_KERNEL_map) &&
                            (eaddr <= __START_KERNEL_map + KERNEL_TEXT_SIZE)) {
                            ^ 2nd line is indented to start inside the
                              relevant ( of the 1st line

> +                             saddr = (saddr) & (~(KERN_VADDR_ALIGN - 1));
> +                             info->kern_vaddr_start = saddr;
> +                             size = eaddr - saddr;
> +                             /* Align size to page size boundary. */
> +                             size = (size + align - 1) & (~(align - 1));
> +                             info->kern_size = size;
> +#if 0
> +                     printf("kernel vaddr = 0x%lx size = 0x%lx\n",
> +                                     saddr, size);
> +#endif
> +                             return 0;
> +                     }

I'm not sure that I understand the alingment that the above
if statment is adding, but I guess its fine :-)

> +             }
> +     }
> +     fprintf(stderr, "Warning: Can't find kernel text map area from 
> kcore\n");
> +
> +backward_compatible:
> +     fprintf(stderr, "Warning: Hardcoding kernel virtual addr and size\n");
> +     /* Hardcoding kernel virtual address and size. While writting
> +      * this patch vanilla kernel is compiled for addr 2MB. Anybody
> +      * using kernel older than that which was compiled for 1MB
> +      * physical addr, use older version of kexec-tools */
> +
> +     info->kern_vaddr_start = __START_KERNEL_map + 0x200000;
> +     info->kern_size = KERNEL_TEXT_SIZE - 0x200000;
> +     fprintf(stderr, "Warning: virtual addr = 0x%lx size = 0x%lx\n",
> +             info->kern_vaddr_start, info->kern_size);
> +     return 0;
> +}
> +
>  /* Stores a sorted list of RAM memory ranges for which to create elf headers.
>   * A separate program header is created for backup region */
>  static struct memory_range crash_memory_range[CRASH_MAX_MEMORY_RANGES];
> @@ -81,6 +223,7 @@ static int get_crash_memory_ranges(struc
>       while(fgets(line, sizeof(line), fp) != 0) {
>               char *str;
>               int type, consumed, count;
> +
>               if (memory_ranges >= CRASH_MAX_MEMORY_RANGES)
>                       break;
>               count = sscanf(line, "%Lx-%Lx : %n",
> @@ -125,17 +268,6 @@ #endif
>               crash_memory_range[memory_ranges].end = end;
>               crash_memory_range[memory_ranges].type = type;
>               memory_ranges++;
> -
> -             /* Segregate linearly mapped region. */
> -             if ((MAXMEM - 1) >= start && (MAXMEM - 1) <= end) {
> -                     crash_memory_range[memory_ranges-1].end = MAXMEM -1;
> -
> -                     /* Add segregated region. */
> -                     crash_memory_range[memory_ranges].start = MAXMEM;
> -                     crash_memory_range[memory_ranges].end = end;
> -                     crash_memory_range[memory_ranges].type = type;
> -                     memory_ranges++;
> -             }
>       }
>       fclose(fp);
>       if (exclude_crash_reserve_region(&memory_ranges) < 0)
> @@ -532,8 +664,35 @@ #define MAX_NOTE_BYTES   1024
>  
>               /* Increment number of program headers. */
>               (elf->e_phnum)++;
> +#if 0
> +             printf("Elf header: p_type = %d, p_offset = 0x%lx "
> +                     "p_paddr = 0x%lx p_vaddr = 0x%lx "
> +                     "p_filesz = 0x%lx p_memsz = 0x%lx\n",
> +                     phdr->p_type, phdr->p_offset, phdr->p_paddr,
> +                     phdr->p_vaddr, phdr->p_filesz, phdr->p_memsz);
> +#endif
>       }
>  
> +     /* Setup an PT_LOAD type program header for the region where
> +      * Kernel is mapped.
> +      */
> +     phdr = (Elf64_Phdr *) bufp;
> +     bufp += sizeof(Elf64_Phdr);
> +     phdr->p_type    = PT_LOAD;
> +     phdr->p_flags   = PF_R|PF_W|PF_X;
> +     phdr->p_offset  = phdr->p_paddr = info->kern_paddr_start;
> +     phdr->p_vaddr   = info->kern_vaddr_start;
> +     phdr->p_filesz  = phdr->p_memsz = info->kern_size;
> +     phdr->p_align   = 0;
> +     (elf->e_phnum)++;
> +#if 0
> +             printf("Kernel text Elf header: p_type = %d, p_offset = 0x%lx "
> +                     "p_paddr = 0x%lx p_vaddr = 0x%lx "
> +                     "p_filesz = 0x%lx p_memsz = 0x%lx\n",
> +                     phdr->p_type, phdr->p_offset, phdr->p_paddr,
> +                     phdr->p_vaddr, phdr->p_filesz, phdr->p_memsz);
> +#endif
> +
>       /* Setup PT_LOAD type program header for every system RAM chunk.
>        * A seprate program header for Backup Region*/
>       for (i = 0; i < CRASH_MAX_MEMORY_RANGES; i++) {
> @@ -553,27 +712,25 @@ #define MAX_NOTE_BYTES  1024
>               else
>                       phdr->p_offset  = mstart;
>  
> -             /* Handle linearly mapped region.*/
> -
> -             /* Filling the vaddr conditionally as we have two linearly
> -              * mapped regions here. One is __START_KERNEL_map 0 to 40 MB
> -              * other one is PAGE_OFFSET */
> -
> -             if ((mend <= (MAXMEM - 1)) && mstart < KERNEL_TEXT_SIZE)
> -                     phdr->p_vaddr = mstart + __START_KERNEL_map;
> -             else {
> -                     if (mend <= (MAXMEM - 1))
> -                             phdr->p_vaddr = mstart + PAGE_OFFSET;
> -                     else
> -                             phdr->p_vaddr = -1ULL;
> -             }
> +             /* We already prepared the header for kernel text. Map
> +              * rest of the memory segments to kernel linearly mapped
> +              * memory region.
> +              */
>               phdr->p_paddr = mstart;
> +             phdr->p_vaddr = mstart + PAGE_OFFSET;
>               phdr->p_filesz  = phdr->p_memsz = mend - mstart + 1;
>               /* Do we need any alignment of segments? */
>               phdr->p_align   = 0;
>  
>               /* Increment number of program headers. */
>               (elf->e_phnum)++;
> +#if 0
> +             printf("Elf header: p_type = %d, p_offset = 0x%lx "
> +                     "p_paddr = 0x%lx p_vaddr = 0x%lx "
> +                     "p_filesz = 0x%lx p_memsz = 0x%lx\n",
> +                     phdr->p_type, phdr->p_offset, phdr->p_paddr,
> +                     phdr->p_vaddr, phdr->p_filesz, phdr->p_memsz);
> +#endif
>       }
>       return 0;
>  }
> @@ -591,6 +748,12 @@ int load_crashdump_segments(struct kexec
>       long int nr_cpus = 0;
>       struct memory_range *mem_range, *memmap_p;
>  
> +     if (get_kernel_paddr(info))
> +             return -1;
> +
> +     if (get_kernel_vaddr_and_size(info))
> +             return -1;
> +
>       if (get_crash_memory_ranges(&mem_range, &nr_ranges) < 0)
>               return -1;
>  
> diff --git a/kexec/kexec-elf-core.c b/kexec/kexec-elf-core.c
> new file mode 100644
> index 0000000..6a61911
> --- /dev/null
> +++ b/kexec/kexec-elf-core.c
> @@ -0,0 +1,28 @@
> +#include <stdio.h>
> +#include <stdint.h>
> +#include <errno.h>
> +#include <stdlib.h>
> +#include "elf.h"
> +#include "kexec-elf.h"
> +
> +
> +int build_elf_core_info(const char *buf, off_t len, struct mem_ehdr *ehdr)
> +{
> +     int result;
> +     result = build_elf_info(buf, len, ehdr);
> +     if (result < 0) {
> +             return result;
> +     }
> +     if ((ehdr->e_type != ET_CORE)) {
> +             /* not an ELF Core */
> +             fprintf(stderr, "Not ELF type ET_CORE\n");
> +             return -1;
> +     }
> +     if (!ehdr->e_phdr) {
> +             /* No program header */
> +             fprintf(stderr, "No ELF program header\n");
> +             return -1;
> +     }
> +
> +     return 0;
> +}
> diff --git a/kexec/kexec-elf.c b/kexec/kexec-elf.c
> index da2c788..650289f 100644
> --- a/kexec/kexec-elf.c
> +++ b/kexec/kexec-elf.c
> @@ -11,6 +11,17 @@ #include "kexec-elf.h"
>  
>  static const int probe_debug = 0;
>  
> +/* This parameter can be set to force ELF phdr and shdr building routines
> + * to skip the check on file length which make sure that data pointed by
> + * phdrs and shdrs is actually present in the file.  This has been introduced
> + * to handle the corner case of reading /proc/kcore. /proc/kcore can be
> + * of huge size (GB, TB), depending on RAM size. It does not make sense
> + * to read that whole core file just to extract some info from kcore ELF
> + * headers.
> + * This is hackish. Please suggest a better way to handle it.
> + */
> +int ignore_elf_len_check = 0;
> +

Is there a reason this is global, rather than being passed
as an argument to the relevant functions?

>  uint16_t elf16_to_cpu(const struct mem_ehdr *ehdr, uint16_t value)
>  {
>       if (ehdr->ei_data == ELFDATA2LSB) {
> @@ -420,12 +431,14 @@ static int build_mem_phdrs(const char *b
>                * they are safe to use.
>                */
>               phdr = &ehdr->e_phdr[i];
> -             if ((phdr->p_offset + phdr->p_filesz) > len) {
> -                     /* The segment does not fit in the buffer */
> -                     if (probe_debug) {
> -                             fprintf(stderr, "ELF segment not in file\n");
> +             if (!ignore_elf_len_check) {
> +                     if ((phdr->p_offset + phdr->p_filesz) > len) {
> +                             /* The segment does not fit in the buffer */
> +                             if (probe_debug) {
> +                                     fprintf(stderr, "ELF segment not in 
> file\n");
> +                             }
> +                             return -1;
>                       }
> -                     return -1;
>               }
>               if ((phdr->p_paddr + phdr->p_memsz) < phdr->p_paddr) {
>                       /* The memory address wraps */

I think the above (mostly whitespace) change could be simplified to
the following, which has the advantage of not being so heavily
nested.

diff --git a/kexec/kexec-elf.c b/kexec/kexec-elf.c
index da2c788..8b8b613 100644
--- a/kexec/kexec-elf.c
+++ b/kexec/kexec-elf.c
@@ -389,7 +389,7 @@ static int build_mem_phdrs(const char *b
                return -1;
        }
        phdr_size *= ehdr->e_phnum;
-       if (ehdr->e_phoff + phdr_size > len) {
+       if (!ignore_elf_len_check && ehdr->e_phoff + phdr_size > len) {
                /* The program header did not fit in the file buffer */
                if (probe_debug) {
                        fprintf(stderr, "ELF program segment truncated\n");

> @@ -630,15 +643,15 @@ static int build_mem_shdrs(const char *b
>                * they are safe to use.
>                */
>               shdr = &ehdr->e_shdr[i];
> -             if ((shdr->sh_type != SHT_NOBITS) &&
> -                     ((shdr->sh_offset + shdr->sh_size) > len))
> -             {
> -                     /* The section does not fit in the buffer */
> -                     if (probe_debug) {
> -                             fprintf(stderr, "ELF section %d not in file\n",
> -                                     i);
> +             if (!ignore_elf_len_check) {
> +                     if ((shdr->sh_type != SHT_NOBITS) &&
> +                             ((shdr->sh_offset + shdr->sh_size) > len)) {
> +                             /* The section does not fit in the buffer */
> +                             if (probe_debug)
> +                                     fprintf(stderr, "ELF section %d not in "
> +                                                     "file\n", i);
> +                             return -1;
>                       }
> -                     return -1;
>               }
>               if ((shdr->sh_addr + shdr->sh_size) < shdr->sh_addr) {
>                       /* The memory address wraps */

Again, I think the following is a bit simpler.

diff --git a/kexec/kexec-elf.c b/kexec/kexec-elf.c
index da2c788..e045ef6 100644
--- a/kexec/kexec-elf.c
+++ b/kexec/kexec-elf.c
@@ -630,7 +630,7 @@ static int build_mem_shdrs(const char *b
                 * they are safe to use.
                 */
                shdr = &ehdr->e_shdr[i];
-               if ((shdr->sh_type != SHT_NOBITS) &&
+               if (!ignore_elf_len_check && (shdr->sh_type != SHT_NOBITS) &&
                        ((shdr->sh_offset + shdr->sh_size) > len))
                {
                        /* The section does not fit in the buffer */

> @@ -710,7 +723,8 @@ static int build_mem_notes(const char *b
>               note_size += (hdr.n_descsz + 3) & ~3;
>  
>               if ((hdr.n_namesz != 0) && (name[hdr.n_namesz -1] != '\0')) {
> -                     die("Note name is not null termiated");
> +                     fprintf(stderr, "Note name is not null termiated\n");
> +                     return -1;
>               }
>               ehdr->e_note[i].n_type = hdr.n_type;
>               ehdr->e_note[i].n_name = (char *)name;
> diff --git a/kexec/kexec-elf.h b/kexec/kexec-elf.h
> index b7332de..a784855 100644
> --- a/kexec/kexec-elf.h
> +++ b/kexec/kexec-elf.h
> @@ -89,6 +89,7 @@ extern int build_elf_info(const char *bu
>  extern int build_elf_exec_info(const char *buf, off_t len, struct mem_ehdr 
> *ehdr);
>  extern int build_elf_rel_info(const char *buf, off_t len, struct mem_ehdr 
> *ehdr);
>  
> +extern int build_elf_core_info(const char *buf, off_t len, struct mem_ehdr 
> *ehdr);
>  extern int elf_exec_load(struct mem_ehdr *ehdr, struct kexec_info *info);
>  extern int elf_rel_load(struct mem_ehdr *ehdr, struct kexec_info *info,
>       unsigned long min, unsigned long max, int end);
> @@ -107,6 +108,8 @@ extern void elf_rel_set_symbol(struct me
>  extern void elf_rel_get_symbol(struct mem_ehdr *ehdr,
>       const char *name, void *buf, size_t size);
>  
> +extern int ignore_elf_len_check;
> +
>  uint16_t elf16_to_cpu(const struct mem_ehdr *ehdr, uint16_t value);
>  uint32_t elf32_to_cpu(const struct mem_ehdr *ehdr, uint32_t value);
>  uint64_t elf64_to_cpu(const struct mem_ehdr *ehdr, uint64_t value);
> diff --git a/kexec/kexec.h b/kexec/kexec.h
> index 57fca7d..64bed68 100644
> --- a/kexec/kexec.h
> +++ b/kexec/kexec.h
> @@ -116,6 +116,9 @@ struct kexec_info {
>       struct mem_ehdr rhdr;
>       unsigned long backup_start;
>       unsigned long kexec_flags;
> +     unsigned long kern_vaddr_start;
> +     unsigned long kern_paddr_start;
> +     unsigned long kern_size;
>  };
>  
>  void usage(void);

_______________________________________________
fastboot mailing list
[email protected]
https://lists.osdl.org/mailman/listinfo/fastboot

Reply via email to