From: Hugh Dickins <[email protected]>

commit 119a5fc16105b2b9383a6e2a7800b2ef861b2975 upstream.

When retract_page_tables() removes a page table to make way for a huge
pmd, it holds huge page lock, i_mmap_lock_write, mmap_write_trylock and
pmd lock; but when collapse_pte_mapped_thp() does the same (to handle the
case when the original mmap_write_trylock had failed), only
mmap_write_trylock and pmd lock are held.

That's not enough.  One machine has twice crashed under load, with "BUG:
spinlock bad magic" and GPF on 6b6b6b6b6b6b6b6b.  Examining the second
crash, page_vma_mapped_walk_done()'s spin_unlock of pvmw->ptl (serving
page_referenced() on a file THP, that had found a page table at *pmd)
discovers that the page table page and its lock have already been freed by
the time it comes to unlock.

Follow the example of retract_page_tables(), but we only need one of huge
page lock or i_mmap_lock_write to secure against this: because it's the
narrower lock, and because it simplifies collapse_pte_mapped_thp() to know
the hpage earlier, choose to rely on huge page lock here.

Fixes: 27e1f8273113 ("khugepaged: enable collapse pmd for pte-mapped THP")
Signed-off-by: Hugh Dickins <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Acked-by: Kirill A. Shutemov <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Mike Kravetz <[email protected]>
Cc: Song Liu <[email protected]>
Cc: <[email protected]>    [5.4+]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Linus Torvalds <[email protected]>
Signed-off-by: Greg Kroah-Hartman <[email protected]>

---
 mm/khugepaged.c |   44 +++++++++++++++++++-------------------------
 1 file changed, 19 insertions(+), 25 deletions(-)

--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1412,7 +1412,7 @@ void collapse_pte_mapped_thp(struct mm_s
 {
        unsigned long haddr = addr & HPAGE_PMD_MASK;
        struct vm_area_struct *vma = find_vma(mm, haddr);
-       struct page *hpage = NULL;
+       struct page *hpage;
        pte_t *start_pte, *pte;
        pmd_t *pmd, _pmd;
        spinlock_t *ptl;
@@ -1432,9 +1432,17 @@ void collapse_pte_mapped_thp(struct mm_s
        if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
                return;
 
+       hpage = find_lock_page(vma->vm_file->f_mapping,
+                              linear_page_index(vma, haddr));
+       if (!hpage)
+               return;
+
+       if (!PageHead(hpage))
+               goto drop_hpage;
+
        pmd = mm_find_pmd(mm, haddr);
        if (!pmd)
-               return;
+               goto drop_hpage;
 
        start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
 
@@ -1453,30 +1461,11 @@ void collapse_pte_mapped_thp(struct mm_s
 
                page = vm_normal_page(vma, addr, *pte);
 
-               if (!page || !PageCompound(page))
-                       goto abort;
-
-               if (!hpage) {
-                       hpage = compound_head(page);
-                       /*
-                        * The mapping of the THP should not change.
-                        *
-                        * Note that uprobe, debugger, or MAP_PRIVATE may
-                        * change the page table, but the new page will
-                        * not pass PageCompound() check.
-                        */
-                       if (WARN_ON(hpage->mapping != vma->vm_file->f_mapping))
-                               goto abort;
-               }
-
                /*
-                * Confirm the page maps to the correct subpage.
-                *
-                * Note that uprobe, debugger, or MAP_PRIVATE may change
-                * the page table, but the new page will not pass
-                * PageCompound() check.
+                * Note that uprobe, debugger, or MAP_PRIVATE may change the
+                * page table, but the new page will not be a subpage of hpage.
                 */
-               if (WARN_ON(hpage + i != page))
+               if (hpage + i != page)
                        goto abort;
                count++;
        }
@@ -1495,7 +1484,7 @@ void collapse_pte_mapped_thp(struct mm_s
        pte_unmap_unlock(start_pte, ptl);
 
        /* step 3: set proper refcount and mm_counters. */
-       if (hpage) {
+       if (count) {
                page_ref_sub(hpage, count);
                add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count);
        }
@@ -1506,10 +1495,15 @@ void collapse_pte_mapped_thp(struct mm_s
        spin_unlock(ptl);
        mm_dec_nr_ptes(mm);
        pte_free(mm, pmd_pgtable(_pmd));
+
+drop_hpage:
+       unlock_page(hpage);
+       put_page(hpage);
        return;
 
 abort:
        pte_unmap_unlock(start_pte, ptl);
+       goto drop_hpage;
 }
 
 static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)


Reply via email to