Hello Cliff,

>>From: Cliff Wickman <[email protected]>
>>
>>Applies to the development branch as of 10/13/2015.
>>Incorporates review 10/22 by kumagai-atsushi.
>>Incorporates review 10/26 by kumagai-atsushi. (arch x86_64-only 
>>implementation)
>
>Thanks for your great work.
>I don't want to delay v1.5.9 release anymore, I'll merge this patch
>into v1.6.0.

Now I'm testing for v1.6.0 release, I found that this feature
will be disabled on some environments.

[skip to find_vmemmap_x86_64()]

>>+/*
>>+ * Scan the kernel page table for the pfn's of the page structs
>>+ * Place them in array gvmem_pfns[nr_gvmem_pfns]
>>+ */
>>+int
>>+find_vmemmap_x86_64()
>>+{
>>+     int i;
>>+     int pgd_index, pud_index;
>>+     int start_range = 1;
>>+     int num_pmds=0, num_pmds_valid=0;
>>+     int break_in_valids, break_after_invalids;
>>+     int do_break, done = 0;
>>+     int last_valid=0, last_invalid=0;
>>+     int pagestructsize, structsperhpage, hugepagesize;
>>+     long page_structs_per_pud;
>>+     long num_puds, groups = 0;
>>+     long pgdindex, pudindex, pmdindex;
>>+     long vaddr, vaddr_base;
>>+     long rep_pfn_start = 0, rep_pfn_end = 0;
>>+     unsigned long init_level4_pgt;
>>+     unsigned long max_paddr, high_pfn;
>>+     unsigned long pgd_addr, pud_addr, pmd_addr;
>>+     unsigned long *pgdp, *pudp, *pmdp;
>>+     unsigned long pud_page[PTRS_PER_PUD];
>>+     unsigned long pmd_page[PTRS_PER_PMD];
>>+     unsigned long vmap_offset_start = 0, vmap_offset_end = 0;
>>+     unsigned long pmd, tpfn;
>>+     unsigned long pvaddr = 0;
>>+     unsigned long data_addr = 0, last_data_addr = 0, start_data_addr = 0;
>>+     /*
>>+      * data_addr is the paddr of the page holding the page structs.
>>+      * We keep lists of contiguous pages and the pfn's that their
>>+      * page structs represent.
>>+      *  start_data_addr and last_data_addr mark start/end of those
>>+      *  contiguous areas.
>>+      * An area descriptor is vmap start/end pfn and rep start/end
>>+      *  of the pfn's represented by the vmap start/end.
>>+      */
>>+     struct vmap_pfns *vmapp, *vmaphead = NULL, *cur, *tail;
>>+
>>+     init_level4_pgt = SYMBOL(init_level4_pgt);
>>+     if (init_level4_pgt == NOT_FOUND_SYMBOL) {
>>+             ERRMSG("init_level4_pgt not found\n");
>>+             return FAILED;
>>+     }
>>+     pagestructsize = size_table.page;
>>+     hugepagesize = PTRS_PER_PMD * info->page_size;
>>+     vaddr_base = info->vmemmap_start;
>>+     vaddr = vaddr_base;
>>+     max_paddr = get_max_paddr();
>>+     /*
>>+      * the page structures are mapped at VMEMMAP_START (info->vmemmap_start)
>>+      * for max_paddr >> 12 page structures
>>+      */
>>+     high_pfn = max_paddr >> 12;
>>+     pgd_index = pgd4_index(vaddr_base);
>>+     pud_index = pud_index(vaddr_base);
>>+     pgd_addr = vaddr_to_paddr(init_level4_pgt); /* address of pgd */
>>+     pgd_addr += pgd_index * sizeof(unsigned long);
>>+     page_structs_per_pud = (PTRS_PER_PUD * PTRS_PER_PMD * info->page_size) /
>>+                                                                     
>>pagestructsize;
>>+     num_puds = (high_pfn + page_structs_per_pud - 1) / page_structs_per_pud;
>>+     pvaddr = VMEMMAP_START;
>>+     structsperhpage = hugepagesize / pagestructsize;
>>+
>>+     /* outer loop is for pud entries in the pgd */
>>+     for (pgdindex = 0, pgdp = (unsigned long *)pgd_addr; pgdindex < 
>>num_puds;
>>+                                                             pgdindex++, 
>>pgdp++) {
>>+             /* read the pgd one word at a time, into pud_addr */
>>+             if (!readmem(PADDR, (unsigned long long)pgdp, (void *)&pud_addr,
>>+                                                             sizeof(unsigned 
>>long))) {
>>+                     ERRMSG("Can't get pgd entry for slot %d.\n", pgd_index);
>>+                     return FAILED;
>>+             }
>>+             /* mask the pgd entry for the address of the pud page */
>>+             pud_addr &= PMASK;

If the pgd entry is null, the code below will try to read 0x0.
This behavior will cause unexpected results, especially if pfn:0 is on
memory hole.
I think null pgd (and pud) entries should be skipped, how about you ?
I would like you to post the bug fix if you have no objection.


Thanks,
Atsushi Kumagai

>>+             /* read the entire pud page */
>>+             if (!readmem(PADDR, (unsigned long long)pud_addr, (void 
>>*)pud_page,
>>+                                     PTRS_PER_PUD * sizeof(unsigned long))) {
>>+                     ERRMSG("Can't get pud entry for pgd slot %ld.\n", 
>>pgdindex);
>>+                     return FAILED;
>>+             }
>>+             /* step thru each pmd address in the pud page */
>>+             /* pudp points to an entry in the pud page */
>>+             for (pudp = (unsigned long *)pud_page, pudindex = 0;
>>+                                     pudindex < PTRS_PER_PUD; pudindex++, 
>>pudp++) {
>>+                     pmd_addr = *pudp & PMASK;
>>+                     /* read the entire pmd page */
>>+                     if (!readmem(PADDR, pmd_addr, (void *)pmd_page,
>>+                                     PTRS_PER_PMD * sizeof(unsigned long))) {
>>+                             ERRMSG("Can't get pud entry for slot %ld.\n", 
>>pudindex);
>>+                             return FAILED;
>>+                     }
>>+                     /* pmdp points to an entry in the pmd */
>>+                     for (pmdp = (unsigned long *)pmd_page, pmdindex = 0;
>>+                                     pmdindex < PTRS_PER_PMD; pmdindex++, 
>>pmdp++) {
>>+                             /* linear page position in this page table: */
>>+                             pmd = *pmdp;
>>+                             num_pmds++;
>>+                             tpfn = (pvaddr - VMEMMAP_START) /
>>+                                                     pagestructsize;
>>+                             if (tpfn >= high_pfn) {
>>+                                     done = 1;
>>+                                     break;
>>+                             }
>>+                             /*
>>+                              * vmap_offset_start:
>>+                              * Starting logical position in the
>>+                              * vmemmap array for the group stays
>>+                              * constant until a hole in the table
>>+                              * or a break in contiguousness.
>>+                              */
>>+
>>+                             /*
>>+                              * Ending logical position in the
>>+                              * vmemmap array:
>>+                              */
>>+                             vmap_offset_end += hugepagesize;
>>+                             do_break = 0;
>>+                             break_in_valids = 0;
>>+                             break_after_invalids = 0;
>>+                             /*
>>+                              * We want breaks either when:
>>+                              * - we hit a hole (invalid)
>>+                              * - we discontiguous page is a string of valids
>>+                              */
>>+                             if (pmd) {
>>+                                     data_addr = (pmd & PMASK);
>>+                                     if (start_range) {
>>+                                             /* first-time kludge */
>>+                                             start_data_addr = data_addr;
>>+                                             last_data_addr = start_data_addr
>>+                                                      - hugepagesize;
>>+                                             start_range = 0;
>>+                                     }
>>+                                     if (last_invalid) {
>>+                                             /* end of a hole */
>>+                                             start_data_addr = data_addr;
>>+                                             last_data_addr = start_data_addr
>>+                                                      - hugepagesize;
>>+                                             /* trigger update of offset */
>>+                                             do_break = 1;
>>+                                     }
>>+                                     last_valid = 1;
>>+                                     last_invalid = 0;
>>+                                     /*
>>+                                      * we have a gap in physical
>>+                                      * contiguousness in the table.
>>+                                      */
>>+                                     /* ?? consecutive holes will have
>>+                                        same data_addr */
>>+                                     if (data_addr !=
>>+                                             last_data_addr + hugepagesize) {
>>+                                             do_break = 1;
>>+                                             break_in_valids = 1;
>>+                                     }
>>+                                     DEBUG_MSG("valid: pud %ld pmd %ld pfn 
>>%#lx"
>>+                                             " pvaddr %#lx pfns %#lx-%lx"
>>+                                             " start %#lx end %#lx\n",
>>+                                             pudindex, pmdindex,
>>+                                             data_addr >> 12,
>>+                                             pvaddr, tpfn,
>>+                                             tpfn + structsperhpage - 1,
>>+                                             vmap_offset_start,
>>+                                             vmap_offset_end);
>>+                                     num_pmds_valid++;
>>+                                     if (!(pmd & _PAGE_PSE)) {
>>+                                             printf("vmemmap pmd not huge, 
>>abort\n");
>>+                                             return FAILED;
>>+                                     }
>>+                             } else {
>>+                                     if (last_valid) {
>>+                                             /* this a hole after some 
>>valids */
>>+                                             do_break = 1;
>>+                                             break_in_valids = 1;
>>+                                             break_after_invalids = 0;
>>+                                     }
>>+                                     last_valid = 0;
>>+                                     last_invalid = 1;
>>+                                     /*
>>+                                      * There are holes in this sparsely
>>+                                      * populated table; they are 2MB gaps
>>+                                      * represented by null pmd entries.
>>+                                      */
>>+                                     DEBUG_MSG("invalid: pud %ld pmd %ld 
>>%#lx"
>>+                                             " pfns %#lx-%lx start %#lx end"
>>+                                             " %#lx\n", pudindex, pmdindex,
>>+                                             pvaddr, tpfn,
>>+                                             tpfn + structsperhpage - 1,
>>+                                             vmap_offset_start,
>>+                                             vmap_offset_end);
>>+                             }
>>+                             if (do_break) {
>>+                                     /* The end of a hole is not summarized.
>>+                                      * It must be the start of a hole or
>>+                                      * hitting a discontiguous series.
>>+                                      */
>>+                                     if (break_in_valids || 
>>break_after_invalids) {
>>+                                             /*
>>+                                              * calculate that pfns
>>+                                              * represented by the current
>>+                                              * offset in the vmemmap.
>>+                                              */
>>+                                             /* page struct even partly on 
>>this page */
>>+                                             rep_pfn_start = 
>>vmap_offset_start /
>>+                                                     pagestructsize;
>>+                                             /* ending page struct entirely 
>>on
>>+                                                this page */
>>+                                             rep_pfn_end = ((vmap_offset_end 
>>-
>>+                                                     hugepagesize) / 
>>pagestructsize);
>>+                                             DEBUG_MSG("vmap pfns %#lx-%lx "
>>+                                                     "represent pfns 
>>%#lx-%lx\n\n",
>>+                                                     start_data_addr >> 
>>PAGESHIFT(),
>>+                                                     last_data_addr >> 
>>PAGESHIFT(),
>>+                                                     rep_pfn_start, 
>>rep_pfn_end);
>>+                                             groups++;
>>+                                             vmapp = (struct vmap_pfns 
>>*)malloc(
>>+                                                             sizeof(struct 
>>vmap_pfns));
>>+                                             /* pfn of this 2MB page of page 
>>structs */
>>+                                             vmapp->vmap_pfn_start = 
>>start_data_addr
>>+                                                                     >> 
>>PTE_SHIFT;
>>+                                             vmapp->vmap_pfn_end = 
>>last_data_addr
>>+                                                                     >> 
>>PTE_SHIFT;
>>+                                             /* these (start/end) are 
>>literal pfns
>>+                                              * on this page, not start and 
>>end+1 */
>>+                                             vmapp->rep_pfn_start = 
>>rep_pfn_start;
>>+                                             vmapp->rep_pfn_end = 
>>rep_pfn_end;
>>+
>>+                                             if (!vmaphead) {
>>+                                                     vmaphead = vmapp;
>>+                                                     vmapp->next = vmapp;
>>+                                                     vmapp->prev = vmapp;
>>+                                             } else {
>>+                                                     tail = vmaphead->prev;
>>+                                                     vmaphead->prev = vmapp;
>>+                                                     tail->next = vmapp;
>>+                                                     vmapp->next = vmaphead;
>>+                                                     vmapp->prev = tail;
>>+                                             }
>>+                                     }
>>+
>>+                                     /* update logical position at every 
>>break */
>>+                                     vmap_offset_start =
>>+                                             vmap_offset_end - hugepagesize;
>>+                                     start_data_addr = data_addr;
>>+                             }
>>+
>>+                             last_data_addr = data_addr;
>>+                             pvaddr += hugepagesize;
>>+                             /*
>>+                              * pvaddr is current virtual address
>>+                              *   eg 0xffffea0004200000 if
>>+                              *    vmap_offset_start is 4200000
>>+                              */
>>+                     }
>>+             }
>>+             tpfn = (pvaddr - VMEMMAP_START) / pagestructsize;
>>+             if (tpfn >= high_pfn) {
>>+                     done = 1;
>>+                     break;
>>+             }
>>+     }
>>+     rep_pfn_start = vmap_offset_start / pagestructsize;
>>+     rep_pfn_end = (vmap_offset_end - hugepagesize) / pagestructsize;
>>+     DEBUG_MSG("vmap pfns %#lx-%lx represent pfns %#lx-%lx\n\n",
>>+             start_data_addr >> PAGESHIFT(), last_data_addr >> PAGESHIFT(),
>>+             rep_pfn_start, rep_pfn_end);
>>+     groups++;
>>+     vmapp = (struct vmap_pfns *)malloc(sizeof(struct vmap_pfns));
>>+     vmapp->vmap_pfn_start = start_data_addr >> PTE_SHIFT;
>>+     vmapp->vmap_pfn_end = last_data_addr >> PTE_SHIFT;
>>+     vmapp->rep_pfn_start = rep_pfn_start;
>>+     vmapp->rep_pfn_end = rep_pfn_end;
>>+     if (!vmaphead) {
>>+             vmaphead = vmapp;
>>+             vmapp->next = vmapp;
>>+             vmapp->prev = vmapp;
>>+     } else {
>>+             tail = vmaphead->prev;
>>+             vmaphead->prev = vmapp;
>>+             tail->next = vmapp;
>>+             vmapp->next = vmaphead;
>>+             vmapp->prev = tail;
>>+     }
>>+     DEBUG_MSG("num_pmds: %d num_pmds_valid %d\n", num_pmds, num_pmds_valid);
>>+
>>+     /* transfer the linked list to an array */
>>+     cur = vmaphead;
>>+     gvmem_pfns = (struct vmap_pfns *)malloc(sizeof(struct vmap_pfns) * 
>>groups);
>>+     i = 0;
>>+     do {
>>+             vmapp = gvmem_pfns + i;
>>+             vmapp->vmap_pfn_start = cur->vmap_pfn_start;
>>+             vmapp->vmap_pfn_end = cur->vmap_pfn_end;
>>+             vmapp->rep_pfn_start = cur->rep_pfn_start;
>>+             vmapp->rep_pfn_end = cur->rep_pfn_end;
>>+             cur = cur->next;
>>+             free(cur->prev);
>>+             i++;
>>+     } while (cur != vmaphead);
>>+     nr_gvmem_pfns = i;
>>+     return COMPLETED;
>>+}
>>+
>> #endif /* x86_64 */
>>
>
>_______________________________________________
>kexec mailing list
>[email protected]
>http://lists.infradead.org/mailman/listinfo/kexec

_______________________________________________
kexec mailing list
[email protected]
http://lists.infradead.org/mailman/listinfo/kexec

Reply via email to