From: Zi Yan <[email protected]>

Add PUD-level TLB flush ops and teach page_vma_mapped_talk about 1GB
THPs.

Signed-off-by: Zi Yan <[email protected]>
---
 arch/x86/include/asm/pgtable.h |  3 +++
 arch/x86/mm/pgtable.c          | 13 +++++++++++++
 include/linux/mmu_notifier.h   | 13 +++++++++++++
 include/linux/pgtable.h        | 14 ++++++++++++++
 include/linux/rmap.h           |  1 +
 mm/page_vma_mapped.c           | 33 +++++++++++++++++++++++++++++----
 mm/rmap.c                      | 12 +++++++++---
 7 files changed, 82 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 26255cac78c0..15334f5ba172 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1127,6 +1127,9 @@ extern int pudp_test_and_clear_young(struct 
vm_area_struct *vma,
 extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
                                  unsigned long address, pmd_t *pmdp);
 
+#define __HAVE_ARCH_PUDP_CLEAR_YOUNG_FLUSH
+extern int pudp_clear_flush_young(struct vm_area_struct *vma,
+                                 unsigned long address, pud_t *pudp);
 
 #define pmd_write pmd_write
 static inline int pmd_write(pmd_t pmd)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 7be73aee6183..e4a2dffcc418 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -633,6 +633,19 @@ int pmdp_clear_flush_young(struct vm_area_struct *vma,
 
        return young;
 }
+int pudp_clear_flush_young(struct vm_area_struct *vma,
+                          unsigned long address, pud_t *pudp)
+{
+       int young;
+
+       VM_BUG_ON(address & ~HPAGE_PUD_MASK);
+
+       young = pudp_test_and_clear_young(vma, address, pudp);
+       if (young)
+               flush_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
+
+       return young;
+}
 #endif
 
 /**
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index b8200782dede..4ffa179e654f 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -557,6 +557,19 @@ static inline void mmu_notifier_range_init_migrate(
        __young;                                                        \
 })
 
+#define pudp_clear_flush_young_notify(__vma, __address, __pudp)                
\
+({                                                                     \
+       int __young;                                                    \
+       struct vm_area_struct *___vma = __vma;                          \
+       unsigned long ___address = __address;                           \
+       __young = pudp_clear_flush_young(___vma, ___address, __pudp);   \
+       __young |= mmu_notifier_clear_flush_young(___vma->vm_mm,        \
+                                                 ___address,           \
+                                                 ___address +          \
+                                                       PUD_SIZE);      \
+       __young;                                                        \
+})
+
 #define ptep_clear_young_notify(__vma, __address, __ptep)              \
 ({                                                                     \
        int __young;                                                    \
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 255275d5b73e..8ef358c386af 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -240,6 +240,20 @@ static inline int pmdp_clear_flush_young(struct 
vm_area_struct *vma,
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif
 
+#ifndef __HAVE_ARCH_PUDP_CLEAR_YOUNG_FLUSH
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+extern int pudp_clear_flush_young(struct vm_area_struct *vma,
+                                 unsigned long address, pud_t *pudp);
+#else
+int pudp_clear_flush_young(struct vm_area_struct *vma,
+                                 unsigned long address, pud_t *pudp)
+{
+       BUILD_BUG();
+       return 0;
+}
+#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD  */
+#endif
+
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
                                       unsigned long address,
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 3a6adfa70fb0..0af61dd193d2 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -206,6 +206,7 @@ struct page_vma_mapped_walk {
        struct page *page;
        struct vm_area_struct *vma;
        unsigned long address;
+       pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
        spinlock_t *ptl;
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 5e77b269c330..d9d39ec06e21 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -145,9 +145,12 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk 
*pvmw)
        struct page *page = pvmw->page;
        pgd_t *pgd;
        p4d_t *p4d;
-       pud_t *pud;
+       pud_t pude;
        pmd_t pmde;
 
+       if (!pvmw->pte && !pvmw->pmd && pvmw->pud)
+               return not_found(pvmw);
+
        /* The only possible pmd mapping has been handled on last iteration */
        if (pvmw->pmd && !pvmw->pte)
                return not_found(pvmw);
@@ -174,10 +177,31 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk 
*pvmw)
        p4d = p4d_offset(pgd, pvmw->address);
        if (!p4d_present(*p4d))
                return false;
-       pud = pud_offset(p4d, pvmw->address);
-       if (!pud_present(*pud))
+       pvmw->pud = pud_offset(p4d, pvmw->address);
+
+       /*
+        * Make sure the pud value isn't cached in a register by the
+        * compiler and used as a stale value after we've observed a
+        * subsequent update.
+        */
+       pude = READ_ONCE(*pvmw->pud);
+       if (pud_trans_huge(pude)) {
+               pvmw->ptl = pud_lock(mm, pvmw->pud);
+               if (likely(pud_trans_huge(*pvmw->pud))) {
+                       if (pvmw->flags & PVMW_MIGRATION)
+                               return not_found(pvmw);
+                       if (pud_page(*pvmw->pud) != page)
+                               return not_found(pvmw);
+                       return true;
+               } else {
+                       /* THP pud was split under us: handle on pmd level */
+                       spin_unlock(pvmw->ptl);
+                       pvmw->ptl = NULL;
+               }
+       } else if (!pud_present(pude))
                return false;
-       pvmw->pmd = pmd_offset(pud, pvmw->address);
+
+       pvmw->pmd = pmd_offset(pvmw->pud, pvmw->address);
        /*
         * Make sure the pmd value isn't cached in a register by the
         * compiler and used as a stale value after we've observed a
@@ -213,6 +237,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
        } else if (!pmd_present(pmde)) {
                return false;
        }
+
        if (!map_pte(pvmw))
                goto next_pte;
        while (1) {
diff --git a/mm/rmap.c b/mm/rmap.c
index 10195a2421cf..77cec0658b76 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -803,9 +803,15 @@ static bool page_referenced_one(struct page *page, struct 
vm_area_struct *vma,
                                        referenced++;
                        }
                } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
-                       if (pmdp_clear_flush_young_notify(vma, address,
-                                               pvmw.pmd))
-                               referenced++;
+                       if (pvmw.pmd) {
+                               if (pmdp_clear_flush_young_notify(vma, address,
+                                                       pvmw.pmd))
+                                       referenced++;
+                       } else if (pvmw.pud) {
+                               if (pudp_clear_flush_young_notify(vma, address,
+                                                       pvmw.pud))
+                                       referenced++;
+                       }
                } else {
                        /* unexpected pmd-mapped page? */
                        WARN_ON_ONCE(1);
-- 
2.28.0

Reply via email to