Re: [PATCH -mm 06/21] mm, THP, swap: Support PMD swap mapping when splitting huge PMD

2018-04-17 Thread Huang, Ying
Randy Dunlap  writes:

> On 04/16/18 19:02, Huang, Ying wrote:
>> From: Huang Ying 
>> 
>> A huge PMD need to be split when zap a part of the PMD mapping etc.
>> If the PMD mapping is a swap mapping, we need to split it too.  This
>> patch implemented the support for this.  This is similar as splitting
>> the PMD page mapping, except we need to decrease the PMD swap mapping
>> count for the huge swap cluster too.  If the PMD swap mapping count
>> becomes 0, the huge swap cluster will be split.
>> 
>> Notice: is_huge_zero_pmd() and pmd_page() doesn't work well with swap
>> PMD, so pmd_present() check is called before them.
>
> FWIW, I would prefer to see that comment in the source code, not just
> in the commit description.

Sure.  I will add comment in source code too.

Best Regards,
Huang, Ying

>> 
>> Signed-off-by: "Huang, Ying" 
>> Cc: "Kirill A. Shutemov" 
>> Cc: Andrea Arcangeli 
>> Cc: Michal Hocko 
>> Cc: Johannes Weiner 
>> Cc: Shaohua Li 
>> Cc: Hugh Dickins 
>> Cc: Minchan Kim 
>> Cc: Rik van Riel 
>> Cc: Dave Hansen 
>> Cc: Naoya Horiguchi 
>> Cc: Zi Yan 
>> ---
>>  include/linux/swap.h |  6 +
>>  mm/huge_memory.c | 54 
>>  mm/swapfile.c| 28 +++
>>  3 files changed, 83 insertions(+), 5 deletions(-)


Re: [PATCH -mm 06/21] mm, THP, swap: Support PMD swap mapping when splitting huge PMD

2018-04-17 Thread Randy Dunlap
On 04/16/18 19:02, Huang, Ying wrote:
> From: Huang Ying 
> 
> A huge PMD need to be split when zap a part of the PMD mapping etc.
> If the PMD mapping is a swap mapping, we need to split it too.  This
> patch implemented the support for this.  This is similar as splitting
> the PMD page mapping, except we need to decrease the PMD swap mapping
> count for the huge swap cluster too.  If the PMD swap mapping count
> becomes 0, the huge swap cluster will be split.
> 
> Notice: is_huge_zero_pmd() and pmd_page() doesn't work well with swap
> PMD, so pmd_present() check is called before them.

FWIW, I would prefer to see that comment in the source code, not just
in the commit description.

> 
> Signed-off-by: "Huang, Ying" 
> Cc: "Kirill A. Shutemov" 
> Cc: Andrea Arcangeli 
> Cc: Michal Hocko 
> Cc: Johannes Weiner 
> Cc: Shaohua Li 
> Cc: Hugh Dickins 
> Cc: Minchan Kim 
> Cc: Rik van Riel 
> Cc: Dave Hansen 
> Cc: Naoya Horiguchi 
> Cc: Zi Yan 
> ---
>  include/linux/swap.h |  6 +
>  mm/huge_memory.c | 54 
>  mm/swapfile.c| 28 +++
>  3 files changed, 83 insertions(+), 5 deletions(-)


-- 
~Randy


[PATCH -mm 06/21] mm, THP, swap: Support PMD swap mapping when splitting huge PMD

2018-04-16 Thread Huang, Ying
From: Huang Ying 

A huge PMD need to be split when zap a part of the PMD mapping etc.
If the PMD mapping is a swap mapping, we need to split it too.  This
patch implemented the support for this.  This is similar as splitting
the PMD page mapping, except we need to decrease the PMD swap mapping
count for the huge swap cluster too.  If the PMD swap mapping count
becomes 0, the huge swap cluster will be split.

Notice: is_huge_zero_pmd() and pmd_page() doesn't work well with swap
PMD, so pmd_present() check is called before them.

Signed-off-by: "Huang, Ying" 
Cc: "Kirill A. Shutemov" 
Cc: Andrea Arcangeli 
Cc: Michal Hocko 
Cc: Johannes Weiner 
Cc: Shaohua Li 
Cc: Hugh Dickins 
Cc: Minchan Kim 
Cc: Rik van Riel 
Cc: Dave Hansen 
Cc: Naoya Horiguchi 
Cc: Zi Yan 
---
 include/linux/swap.h |  6 +
 mm/huge_memory.c | 54 
 mm/swapfile.c| 28 +++
 3 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 89f34ebfd318..b5762b526719 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -618,11 +618,17 @@ static inline swp_entry_t get_swap_page(struct page *page)
 
 #ifdef CONFIG_THP_SWAP
 extern int split_swap_cluster(swp_entry_t entry);
+extern int split_swap_cluster_map(swp_entry_t entry);
 #else
 static inline int split_swap_cluster(swp_entry_t entry)
 {
return 0;
 }
+
+static inline int split_swap_cluster_map(swp_entry_t entry)
+{
+   return 0;
+}
 #endif
 
 #ifdef CONFIG_MEMCG
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a3a1815f8e11..7342ad88de5d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1605,6 +1605,47 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t 
pmd)
return 0;
 }
 
+#ifdef CONFIG_THP_SWAP
+static void __split_huge_swap_pmd(struct vm_area_struct *vma,
+ unsigned long haddr,
+ pmd_t *pmd)
+{
+   struct mm_struct *mm = vma->vm_mm;
+   pgtable_t pgtable;
+   pmd_t _pmd;
+   swp_entry_t entry;
+   int i, soft_dirty;
+
+   entry = pmd_to_swp_entry(*pmd);
+   soft_dirty = pmd_soft_dirty(*pmd);
+
+   split_swap_cluster_map(entry);
+
+   pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+   pmd_populate(mm, &_pmd, pgtable);
+
+   for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE, entry.val++) {
+   pte_t *pte, ptent;
+
+   pte = pte_offset_map(&_pmd, haddr);
+   VM_BUG_ON(!pte_none(*pte));
+   ptent = swp_entry_to_pte(entry);
+   if (soft_dirty)
+   ptent = pte_swp_mksoft_dirty(ptent);
+   set_pte_at(mm, haddr, pte, ptent);
+   pte_unmap(pte);
+   }
+   smp_wmb(); /* make pte visible before pmd */
+   pmd_populate(mm, pmd, pgtable);
+}
+#else
+static inline void __split_huge_swap_pmd(struct vm_area_struct *vma,
+unsigned long haddr,
+pmd_t *pmd)
+{
+}
+#endif
+
 /*
  * Return true if we do MADV_FREE successfully on entire pmd page.
  * Otherwise, return false.
@@ -2071,7 +2112,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct 
*vma, pmd_t *pmd,
VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
-   VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd)
+   VM_BUG_ON(!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd)
&& !pmd_devmap(*pmd));
 
count_vm_event(THP_SPLIT_PMD);
@@ -2093,7 +2134,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct 
*vma, pmd_t *pmd,
put_page(page);
add_mm_counter(mm, MM_FILEPAGES, -HPAGE_PMD_NR);
return;
-   } else if (is_huge_zero_pmd(*pmd)) {
+   } else if (pmd_present(*pmd) && is_huge_zero_pmd(*pmd)) {
/*
 * FIXME: Do we want to invalidate secondary mmu by calling
 * mmu_notifier_invalidate_range() see comments below inside
@@ -2137,6 +2178,9 @@ static void __split_huge_pmd_locked(struct vm_area_struct 
*vma, pmd_t *pmd,
page = pfn_to_page(swp_offset(entry));
} else
 #endif
+   if (thp_swap_supported() && is_swap_pmd(old_pmd))
+   return __split_huge_swap_pmd(vma, haddr, pmd);
+   else
page = pmd_page(old_pmd);
VM_BUG_ON_PAGE(!page_count(page), page);
page_ref_add(page, HPAGE_PMD_NR - 1);
@@ -2228,14 +2272,14 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t 
*pmd,
 * pmd against. Otherwise we can end up replacing wrong page.
 */
VM_BUG_ON(freeze && !page);
-   if (page && page != pmd_page(*pmd))
-   goto out;
+   if (page && (!pmd_present(*pmd) || page != pmd_page(*pmd)))
+