Re: [PATCH v2 08/13] mm/gup: Handle hugetlb for no_page_table()

2024-01-15 Thread Jason Gunthorpe
On Wed, Jan 03, 2024 at 05:14:18PM +0800, pet...@redhat.com wrote:
> From: Peter Xu 
> 
> no_page_table() is not yet used for hugetlb code paths. Make it prepared.
> 
> The major difference here is hugetlb will return -EFAULT as long as page
> cache does not exist, even if VM_SHARED.  See hugetlb_follow_page_mask().
> 
> Pass "address" into no_page_table() too, as hugetlb will need it.
> 
> Reviewed-by: Christoph Hellwig 
> Signed-off-by: Peter Xu 
> ---
>  mm/gup.c | 44 ++--
>  1 file changed, 26 insertions(+), 18 deletions(-)

Reviewed-by: Jason Gunthorpe 

Jason


[PATCH v2 08/13] mm/gup: Handle hugetlb for no_page_table()

2024-01-03 Thread peterx
From: Peter Xu 

no_page_table() is not yet used for hugetlb code paths. Make it prepared.

The major difference here is hugetlb will return -EFAULT as long as page
cache does not exist, even if VM_SHARED.  See hugetlb_follow_page_mask().

Pass "address" into no_page_table() too, as hugetlb will need it.

Reviewed-by: Christoph Hellwig 
Signed-off-by: Peter Xu 
---
 mm/gup.c | 44 ++--
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 3813aad79c4a..b8a80e2bfe08 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -501,19 +501,27 @@ static inline void mm_set_has_pinned_flag(unsigned long 
*mm_flags)
 
 #ifdef CONFIG_MMU
 static struct page *no_page_table(struct vm_area_struct *vma,
-   unsigned int flags)
+ unsigned int flags, unsigned long address)
 {
+   if (!(flags & FOLL_DUMP))
+   return NULL;
+
/*
-* When core dumping an enormous anonymous area that nobody
-* has touched so far, we don't want to allocate unnecessary pages or
+* When core dumping, we don't want to allocate unnecessary pages or
 * page tables.  Return error instead of NULL to skip handle_mm_fault,
 * then get_dump_page() will return NULL to leave a hole in the dump.
 * But we can only make this optimization where a hole would surely
 * be zero-filled if handle_mm_fault() actually did handle it.
 */
-   if ((flags & FOLL_DUMP) &&
-   (vma_is_anonymous(vma) || !vma->vm_ops->fault))
+   if (is_vm_hugetlb_page(vma)) {
+   struct hstate *h = hstate_vma(vma);
+
+   if (!hugetlbfs_pagecache_present(h, vma, address))
+   return ERR_PTR(-EFAULT);
+   } else if ((vma_is_anonymous(vma) || !vma->vm_ops->fault)) {
return ERR_PTR(-EFAULT);
+   }
+
return NULL;
 }
 
@@ -593,7 +601,7 @@ static struct page *follow_page_pte(struct vm_area_struct 
*vma,
 
ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
if (!ptep)
-   return no_page_table(vma, flags);
+   return no_page_table(vma, flags, address);
pte = ptep_get(ptep);
if (!pte_present(pte))
goto no_page;
@@ -685,7 +693,7 @@ static struct page *follow_page_pte(struct vm_area_struct 
*vma,
pte_unmap_unlock(ptep, ptl);
if (!pte_none(pte))
return NULL;
-   return no_page_table(vma, flags);
+   return no_page_table(vma, flags, address);
 }
 
 static struct page *follow_pmd_mask(struct vm_area_struct *vma,
@@ -701,27 +709,27 @@ static struct page *follow_pmd_mask(struct vm_area_struct 
*vma,
pmd = pmd_offset(pudp, address);
pmdval = pmdp_get_lockless(pmd);
if (pmd_none(pmdval))
-   return no_page_table(vma, flags);
+   return no_page_table(vma, flags, address);
if (!pmd_present(pmdval))
-   return no_page_table(vma, flags);
+   return no_page_table(vma, flags, address);
if (pmd_devmap(pmdval)) {
ptl = pmd_lock(mm, pmd);
page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
spin_unlock(ptl);
if (page)
return page;
-   return no_page_table(vma, flags);
+   return no_page_table(vma, flags, address);
}
if (likely(!pmd_trans_huge(pmdval)))
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
 
if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags))
-   return no_page_table(vma, flags);
+   return no_page_table(vma, flags, address);
 
ptl = pmd_lock(mm, pmd);
if (unlikely(!pmd_present(*pmd))) {
spin_unlock(ptl);
-   return no_page_table(vma, flags);
+   return no_page_table(vma, flags, address);
}
if (unlikely(!pmd_trans_huge(*pmd))) {
spin_unlock(ptl);
@@ -752,17 +760,17 @@ static struct page *follow_pud_mask(struct vm_area_struct 
*vma,
 
pud = pud_offset(p4dp, address);
if (pud_none(*pud))
-   return no_page_table(vma, flags);
+   return no_page_table(vma, flags, address);
if (pud_devmap(*pud)) {
ptl = pud_lock(mm, pud);
page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
spin_unlock(ptl);
if (page)
return page;
-   return no_page_table(vma, flags);
+   return no_page_table(vma, flags, address);
}
if (unlikely(pud_bad(*pud)))
-   return no_page_table(vma, flags);
+   return no_page_table(vma, flags, address);
 
return follow_pmd_mask(vma, address, pud, flags, ctx);
 }
@@ -776,10 +784,10 @@ static struct page *f