On Wed, Sep 27, 2023 at 06:32:44AM +0000, HAGIO KAZUHITO(萩尾 一仁) wrote:
> On 2023/09/14 18:22, Aditya Gupta wrote:
> > Currently 'makedumpfile' fails to collect vmcore on upstream kernel,
> > with the errors:
> > 
> >      readpage_elf: Attempt to read non-existent page at 0x4000000000000000.
> >      readmem: type_addr: 0, addr:0, size:8
> >      get_vmemmap_list_info: Can't get vmemmap region addresses
> >      get_machdep_info_ppc64: Can't get vmemmap list info.
> > 
> > This occurs since makedumpfile depends on 'vmemmap_list' for translating
> > vmemmap addresses. But with below commit in Linux, vmemmap_list can be
> > empty, in case of Radix MMU on PowerPC64
> > 
> >      368a0590d954: (powerpc/book3s64/vmemmap: switch radix to use a
> >      different vmemmap handling function)
> > 
> > In case vmemmap_list is empty, then it's head is NULL, which causes
> > makedumpfile to fail with above error.
> > 
> > Since with above commit, 'vmemmap_list' is not populated (when MMU is
> > Radix MMU), kernel populates corresponding page table entries in kernel
> > page table. Hence, instead of depending on 'vmemmap_list' for address
> > translation for vmemmap addresses, do a kernel pagetable walk.
> > 
> > And since the pte can also be introduced at higher levels in the page
> > table, such as at PMD level, add hugepage support, by checking for
> > PAGE_PTE flag
> > 
> > Reported-by: Sachin Sant <sach...@linux.ibm.com>
> > Signed-off-by: Aditya Gupta <adit...@linux.ibm.com>
> 
> Thank you for the patch, applied.
> 
> https://github.com/makedumpfile/makedumpfile/commit/a34f017965583e89c4cb0b00117c200a6c191e54

Thanks for the update.

> 
> Sorry for the delay, exceptionally busy this month..

No issues :)

Thanks,
- Aditya Gupta

> 
> Thanks,
> Kazu
> 
> > ---
> >   arch/ppc64.c   | 111 ++++++++++++++++++++++++++++++++++---------------
> >   makedumpfile.h |   6 +++
> >   2 files changed, 84 insertions(+), 33 deletions(-)
> > 
> > diff --git a/arch/ppc64.c b/arch/ppc64.c
> > index 5e70acb51aba..9456b8b570c5 100644
> > --- a/arch/ppc64.c
> > +++ b/arch/ppc64.c
> > @@ -196,6 +196,10 @@ ppc64_vmemmap_init(void)
> >     int psize, shift;
> >     ulong head;
> >   
> > +   /* initialise vmemmap_list in case SYMBOL(vmemmap_list) is not found */
> > +   info->vmemmap_list = NULL;
> > +   info->vmemmap_cnt = 0;
> > +   
> >     if ((SYMBOL(vmemmap_list) == NOT_FOUND_SYMBOL)
> >         || (SYMBOL(mmu_psize_defs) == NOT_FOUND_SYMBOL)
> >         || (SYMBOL(mmu_vmemmap_psize) == NOT_FOUND_SYMBOL)
> > @@ -216,15 +220,24 @@ ppc64_vmemmap_init(void)
> >             return FALSE;
> >     info->vmemmap_psize = 1 << shift;
> >   
> > -   if (!readmem(VADDR, SYMBOL(vmemmap_list), &head, sizeof(unsigned long)))
> > -           return FALSE;
> > -
> >     /*
> > -    * Get vmemmap list count and populate vmemmap regions info
> > -    */
> > -   info->vmemmap_cnt = get_vmemmap_list_info(head);
> > -   if (info->vmemmap_cnt == 0)
> > -           return FALSE;
> > +    * vmemmap_list symbol can be missing or set to 0 in the kernel.
> > +    * This would imply vmemmap region is mapped in the kernel pagetable.
> > +    *
> > +    * So, read vmemmap_list anyway, and use 'vmemmap_list' if it's not 
> > empty
> > +    * (head != NULL), or we will do a kernel pagetable walk for vmemmap 
> > address
> > +    * translation later
> > +    **/
> > +   readmem(VADDR, SYMBOL(vmemmap_list), &head, sizeof(unsigned long));
> > +
> > +   if (head) {
> > +           /*
> > +            * Get vmemmap list count and populate vmemmap regions info
> > +            */
> > +           info->vmemmap_cnt = get_vmemmap_list_info(head);
> > +           if (info->vmemmap_cnt == 0)
> > +                   return FALSE;
> > +   }
> >   
> >     info->flag_vmemmap = TRUE;
> >     return TRUE;
> > @@ -347,29 +360,6 @@ ppc64_vmalloc_init(void)
> >     return TRUE;
> >   }
> >   
> > -/*
> > - *  If the vmemmap address translation information is stored in the kernel,
> > - *  make the translation.
> > - */
> > -static unsigned long long
> > -ppc64_vmemmap_to_phys(unsigned long vaddr)
> > -{
> > -   int     i;
> > -   ulong   offset;
> > -   unsigned long long paddr = NOT_PADDR;
> > -
> > -   for (i = 0; i < info->vmemmap_cnt; i++) {
> > -           if ((vaddr >= info->vmemmap_list[i].virt) && (vaddr <
> > -               (info->vmemmap_list[i].virt + info->vmemmap_psize))) {
> > -                   offset = vaddr - info->vmemmap_list[i].virt;
> > -                   paddr = info->vmemmap_list[i].phys + offset;
> > -                   break;
> > -           }
> > -   }
> > -
> > -   return paddr;
> > -}
> > -
> >   static unsigned long long
> >   ppc64_vtop_level4(unsigned long vaddr)
> >   {
> > @@ -379,6 +369,8 @@ ppc64_vtop_level4(unsigned long vaddr)
> >     unsigned long long pgd_pte, pud_pte;
> >     unsigned long long pmd_pte, pte;
> >     unsigned long long paddr = NOT_PADDR;
> > +   uint is_hugepage = 0;
> > +   uint pdshift;
> >     uint swap = 0;
> >   
> >     if (info->page_buf == NULL) {
> > @@ -413,6 +405,13 @@ ppc64_vtop_level4(unsigned long vaddr)
> >     if (!pgd_pte)
> >             return NOT_PADDR;
> >   
> > +   if (IS_HUGEPAGE(pgd_pte)) {
> > +           is_hugepage = 1;
> > +           pte = pgd_pte;
> > +           pdshift = info->l4_shift;
> > +           goto out;
> > +   }
> > +
> >     /*
> >      * Sometimes we don't have level3 pagetable entries
> >      */
> > @@ -426,6 +425,13 @@ ppc64_vtop_level4(unsigned long vaddr)
> >             pud_pte = swap64(ULONG((info->page_buf + 
> > PAGEOFFSET(page_upper))), swap);
> >             if (!pud_pte)
> >                     return NOT_PADDR;
> > +
> > +           if (IS_HUGEPAGE(pud_pte)) {
> > +                   is_hugepage = 1;
> > +                   pte = pud_pte;
> > +                   pdshift = info->l3_shift;
> > +                   goto out;
> > +           }
> >     } else {
> >             pud_pte = pgd_pte;
> >     }
> > @@ -440,6 +446,13 @@ ppc64_vtop_level4(unsigned long vaddr)
> >     if (!(pmd_pte))
> >             return NOT_PADDR;
> >   
> > +   if (IS_HUGEPAGE(pmd_pte)) {
> > +           is_hugepage = 1;
> > +           pte = pmd_pte;
> > +           pdshift = info->l2_shift;
> > +           goto out;
> > +   }
> > +
> >     pmd_pte = pmd_page_vaddr_l4(pmd_pte);
> >     page_table = (ulong *)(pmd_pte)
> >                     + (BTOP(vaddr) & (info->ptrs_per_l1 - 1));
> > @@ -456,8 +469,40 @@ ppc64_vtop_level4(unsigned long vaddr)
> >     if (!pte)
> >             return NOT_PADDR;
> >   
> > -   paddr = PAGEBASE(PTOB((pte & info->pte_rpn_mask) >> 
> > info->pte_rpn_shift))
> > +out:
> > +   if (is_hugepage) {
> > +           paddr = PAGEBASE(PTOB((pte & info->pte_rpn_mask) >> 
> > info->pte_rpn_shift))
> > +                   + (vaddr & ((1UL << pdshift) - 1));
> > +   } else {
> > +           paddr = PAGEBASE(PTOB((pte & info->pte_rpn_mask) >> 
> > info->pte_rpn_shift))
> >                     + PAGEOFFSET(vaddr);
> > +   }
> > +
> > +   return paddr;
> > +}
> > +
> > +/*
> > + *  If the vmemmap address translation information is stored in the kernel,
> > + *  make the translation.
> > + */
> > +static unsigned long long
> > +ppc64_vmemmap_to_phys(unsigned long vaddr)
> > +{
> > +   int     i;
> > +   ulong   offset;
> > +   unsigned long long paddr = NOT_PADDR;
> > +
> > +   if (!info->vmemmap_list)
> > +           return ppc64_vtop_level4(vaddr);
> > +
> > +   for (i = 0; i < info->vmemmap_cnt; i++) {
> > +           if ((vaddr >= info->vmemmap_list[i].virt) && (vaddr <
> > +               (info->vmemmap_list[i].virt + info->vmemmap_psize))) {
> > +                   offset = vaddr - info->vmemmap_list[i].virt;
> > +                   paddr = info->vmemmap_list[i].phys + offset;
> > +                   break;
> > +           }
> > +   }
> >   
> >     return paddr;
> >   }
> > @@ -567,8 +612,8 @@ get_machdep_info_ppc64(void)
> >             return FALSE;
> >     }
> >   
> > +   info->vmemmap_start = VMEMMAP_REGION_ID << REGION_SHIFT;
> >     if (SYMBOL(vmemmap_list) != NOT_FOUND_SYMBOL) {
> > -           info->vmemmap_start = VMEMMAP_REGION_ID << REGION_SHIFT;
> >             info->vmemmap_end = info->vmemmap_start;
> >             if (ppc64_vmemmap_init() == FALSE) {
> >                     ERRMSG("Can't get vmemmap list info.\n");
> > diff --git a/makedumpfile.h b/makedumpfile.h
> > index 85e5a4932983..056aee191519 100644
> > --- a/makedumpfile.h
> > +++ b/makedumpfile.h
> > @@ -678,6 +678,12 @@ unsigned long get_kvbase_arm64(void);
> >   #define REGION_SHIFT            (60UL)
> >   #define VMEMMAP_REGION_ID       (0xfUL)
> >   
> > +/*
> > + * If PAGE_PTE is set, then it's a leaf PTE for hugepage
> > + */
> > +#define PAGE_PTE (1UL << 62)
> > +#define IS_HUGEPAGE(pte) (!!((pte) & PAGE_PTE))
> > +
> >   /* 4-level page table support */
> >   
> >   /* 4K pagesize */

_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

Reply via email to