From: Huang Ying
A huge PMD need to be split when zap a part of the PMD mapping etc.
If the PMD mapping is a swap mapping, we need to split it too. This
patch implemented the support for this. This is similar as splitting
the PMD page mapping, except we need to decrease the PMD swap mapping
count for the huge swap cluster too. If the PMD swap mapping count
becomes 0, the huge swap cluster will be split.
Notice: is_huge_zero_pmd() and pmd_page() doesn't work well with swap
PMD, so pmd_present() check is called before them.
Signed-off-by: "Huang, Ying"
Cc: "Kirill A. Shutemov"
Cc: Andrea Arcangeli
Cc: Michal Hocko
Cc: Johannes Weiner
Cc: Shaohua Li
Cc: Hugh Dickins
Cc: Minchan Kim
Cc: Rik van Riel
Cc: Dave Hansen
Cc: Naoya Horiguchi
Cc: Zi Yan
---
include/linux/swap.h | 6 +
mm/huge_memory.c | 54
mm/swapfile.c| 28 +++
3 files changed, 83 insertions(+), 5 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 89f34ebfd318..b5762b526719 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -618,11 +618,17 @@ static inline swp_entry_t get_swap_page(struct page *page)
#ifdef CONFIG_THP_SWAP
extern int split_swap_cluster(swp_entry_t entry);
+extern int split_swap_cluster_map(swp_entry_t entry);
#else
static inline int split_swap_cluster(swp_entry_t entry)
{
return 0;
}
+
+static inline int split_swap_cluster_map(swp_entry_t entry)
+{
+ return 0;
+}
#endif
#ifdef CONFIG_MEMCG
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a3a1815f8e11..7342ad88de5d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1605,6 +1605,47 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t
pmd)
return 0;
}
+#ifdef CONFIG_THP_SWAP
+static void __split_huge_swap_pmd(struct vm_area_struct *vma,
+ unsigned long haddr,
+ pmd_t *pmd)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ pgtable_t pgtable;
+ pmd_t _pmd;
+ swp_entry_t entry;
+ int i, soft_dirty;
+
+ entry = pmd_to_swp_entry(*pmd);
+ soft_dirty = pmd_soft_dirty(*pmd);
+
+ split_swap_cluster_map(entry);
+
+ pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+ pmd_populate(mm, &_pmd, pgtable);
+
+ for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE, entry.val++) {
+ pte_t *pte, ptent;
+
+ pte = pte_offset_map(&_pmd, haddr);
+ VM_BUG_ON(!pte_none(*pte));
+ ptent = swp_entry_to_pte(entry);
+ if (soft_dirty)
+ ptent = pte_swp_mksoft_dirty(ptent);
+ set_pte_at(mm, haddr, pte, ptent);
+ pte_unmap(pte);
+ }
+ smp_wmb(); /* make pte visible before pmd */
+ pmd_populate(mm, pmd, pgtable);
+}
+#else
+static inline void __split_huge_swap_pmd(struct vm_area_struct *vma,
+unsigned long haddr,
+pmd_t *pmd)
+{
+}
+#endif
+
/*
* Return true if we do MADV_FREE successfully on entire pmd page.
* Otherwise, return false.
@@ -2071,7 +2112,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct
*vma, pmd_t *pmd,
VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
- VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd)
+ VM_BUG_ON(!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd)
&& !pmd_devmap(*pmd));
count_vm_event(THP_SPLIT_PMD);
@@ -2093,7 +2134,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct
*vma, pmd_t *pmd,
put_page(page);
add_mm_counter(mm, MM_FILEPAGES, -HPAGE_PMD_NR);
return;
- } else if (is_huge_zero_pmd(*pmd)) {
+ } else if (pmd_present(*pmd) && is_huge_zero_pmd(*pmd)) {
/*
* FIXME: Do we want to invalidate secondary mmu by calling
* mmu_notifier_invalidate_range() see comments below inside
@@ -2137,6 +2178,9 @@ static void __split_huge_pmd_locked(struct vm_area_struct
*vma, pmd_t *pmd,
page = pfn_to_page(swp_offset(entry));
} else
#endif
+ if (thp_swap_supported() && is_swap_pmd(old_pmd))
+ return __split_huge_swap_pmd(vma, haddr, pmd);
+ else
page = pmd_page(old_pmd);
VM_BUG_ON_PAGE(!page_count(page), page);
page_ref_add(page, HPAGE_PMD_NR - 1);
@@ -2228,14 +2272,14 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t
*pmd,
* pmd against. Otherwise we can end up replacing wrong page.
*/
VM_BUG_ON(freeze && !page);
- if (page && page != pmd_page(*pmd))
- goto out;
+ if (page && (!pmd_present(*pmd) || page != pmd_page(*pmd)))
+