Rework __split_huge_pmd_locked() to classify huge PMDs by the PMD entry
itself instead of starting from vma_is_anonymous().

Present PMDs are classified with vm_normal_folio_pmd(): file/shmem THPs
are dropped and refaulted later, anonymous THPs are split into PTEs, and
PMDs without a normal folio are handled as huge zero or special PMDs.

Non-present PMDs are classified with pmd_to_softleaf_folio(): file/shmem
migration entries are dropped, while anonymous migration/device-private
entries are split into PTEs.

This also makes the anonymous decision folio-based.  A private file
mapping that has CoW'ed to an anonymous THP now follows the anonymous
path even though the VMA is file-backed.

No intended behavioural change.

Signed-off-by: Yin Tirui <[email protected]>
---
 mm/huge_memory.c | 197 +++++++++++++++++++++++++++--------------------
 1 file changed, 114 insertions(+), 83 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3964258ff91d..8cd77389d52f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3136,25 +3136,38 @@ static void __split_huge_pmd_locked(struct 
vm_area_struct *vma, pmd_t *pmd,
 
        count_vm_event(THP_SPLIT_PMD);
 
-       if (!vma_is_anonymous(vma)) {
-               old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd);
-               /*
-                * We are going to unmap this huge page. So
-                * just go ahead and zap it
-                */
-               if (arch_needs_pgtable_deposit())
-                       zap_deposited_table(mm, pmd);
-               if (vma_is_special_huge(vma))
-                       return;
-               if (unlikely(pmd_is_migration_entry(old_pmd))) {
-                       const softleaf_t old_entry = softleaf_from_pmd(old_pmd);
+       if (pmd_present(*pmd)) {
+               folio = vm_normal_folio_pmd(vma, haddr, *pmd);
+
+               if (unlikely(!folio)) {
+                       if (is_huge_zero_pmd(*pmd)) {
+                               /*
+                                * FIXME: Do we want to invalidate secondary 
mmu by calling
+                                * 
mmu_notifier_arch_invalidate_secondary_tlbs() see comments below
+                                * inside __split_huge_pmd() ?
+                                *
+                                * We are going from a zero huge page write 
protected to zero
+                                * small page also write protected so it does 
not seems useful
+                                * to invalidate secondary mmu at this time.
+                                */
+                               return __split_huge_zero_page_pmd(vma, haddr, 
pmd);
+                       }
 
-                       folio = softleaf_to_folio(old_entry);
-               } else if (is_huge_zero_pmd(old_pmd)) {
+                       /* Present but not a normal folio: drop the PMD. */
+                       old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd);
+                       if (arch_needs_pgtable_deposit())
+                               zap_deposited_table(mm, pmd);
                        return;
-               } else {
+               }
+
+               if (unlikely(!folio_test_anon(folio))) {
+                       old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd);
+                       if (arch_needs_pgtable_deposit())
+                               zap_deposited_table(mm, pmd);
+                       if (vma_is_special_huge(vma))
+                               return;
+
                        page = pmd_page(old_pmd);
-                       folio = page_folio(page);
                        if (!folio_test_dirty(folio) && pmd_dirty(old_pmd))
                                folio_mark_dirty(folio);
                        if (!folio_test_referenced(folio) && pmd_young(old_pmd))
@@ -3164,72 +3177,7 @@ static void __split_huge_pmd_locked(struct 
vm_area_struct *vma, pmd_t *pmd,
                        folio_put(folio);
                        return;
                }
-               add_mm_counter(mm, mm_counter_file(folio), -HPAGE_PMD_NR);
-               return;
-       }
-
-       if (is_huge_zero_pmd(*pmd)) {
-               /*
-                * FIXME: Do we want to invalidate secondary mmu by calling
-                * mmu_notifier_arch_invalidate_secondary_tlbs() see comments 
below
-                * inside __split_huge_pmd() ?
-                *
-                * We are going from a zero huge page write protected to zero
-                * small page also write protected so it does not seems useful
-                * to invalidate secondary mmu at this time.
-                */
-               return __split_huge_zero_page_pmd(vma, haddr, pmd);
-       }
-
-       if (pmd_is_migration_entry(*pmd)) {
-               softleaf_t entry;
-
-               old_pmd = *pmd;
-               entry = softleaf_from_pmd(old_pmd);
-               page = softleaf_to_page(entry);
-               folio = page_folio(page);
-
-               soft_dirty = pmd_swp_soft_dirty(old_pmd);
-               uffd_wp = pmd_swp_uffd_wp(old_pmd);
-
-               write = softleaf_is_migration_write(entry);
-               if (PageAnon(page))
-                       anon_exclusive = 
softleaf_is_migration_read_exclusive(entry);
-               young = softleaf_is_migration_young(entry);
-               dirty = softleaf_is_migration_dirty(entry);
-       } else if (pmd_is_device_private_entry(*pmd)) {
-               softleaf_t entry;
-
-               old_pmd = *pmd;
-               entry = softleaf_from_pmd(old_pmd);
-               page = softleaf_to_page(entry);
-               folio = page_folio(page);
-
-               soft_dirty = pmd_swp_soft_dirty(old_pmd);
-               uffd_wp = pmd_swp_uffd_wp(old_pmd);
-
-               write = softleaf_is_device_private_write(entry);
-               anon_exclusive = PageAnonExclusive(page);
-
-               /*
-                * Device private THP should be treated the same as regular
-                * folios w.r.t anon exclusive handling. See the comments for
-                * folio handling and anon_exclusive below.
-                */
-               if (freeze && anon_exclusive &&
-                   folio_try_share_anon_rmap_pmd(folio, page))
-                       freeze = false;
-               if (!freeze) {
-                       rmap_t rmap_flags = RMAP_NONE;
-
-                       folio_ref_add(folio, HPAGE_PMD_NR - 1);
-                       if (anon_exclusive)
-                               rmap_flags |= RMAP_EXCLUSIVE;
 
-                       folio_add_anon_rmap_ptes(folio, page, HPAGE_PMD_NR,
-                                                vma, haddr, rmap_flags);
-               }
-       } else {
                /*
                 * Up to this point the pmd is present and huge and userland has
                 * the whole access to the hugepage during the split (which
@@ -3255,7 +3203,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct 
*vma, pmd_t *pmd,
                 */
                old_pmd = pmdp_invalidate(vma, haddr, pmd);
                page = pmd_page(old_pmd);
-               folio = page_folio(page);
                if (pmd_dirty(old_pmd)) {
                        dirty = true;
                        folio_set_dirty(folio);
@@ -3266,7 +3213,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct 
*vma, pmd_t *pmd,
                uffd_wp = pmd_uffd_wp(old_pmd);
 
                VM_WARN_ON_FOLIO(!folio_ref_count(folio), folio);
-               VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
 
                /*
                 * Without "freeze", we'll simply split the PMD, propagating the
@@ -3296,6 +3242,85 @@ static void __split_huge_pmd_locked(struct 
vm_area_struct *vma, pmd_t *pmd,
                        folio_add_anon_rmap_ptes(folio, page, HPAGE_PMD_NR,
                                                 vma, haddr, rmap_flags);
                }
+       } else {
+               /*
+                * Non-present PMD: a softleaf-encoded migration or
+                * device-private entry. pmd_to_softleaf_folio() warns and
+                * returns NULL for any other encoding.
+                */
+               folio = pmd_to_softleaf_folio(*pmd);
+               if (unlikely(!folio))
+                       return;
+
+               if (unlikely(!folio_test_anon(folio))) {
+                       /*
+                        * File/shmem migration entry: drop the PMD without
+                        * splitting. Unlike the present case the entry holds
+                        * neither a folio reference nor an rmap to release,
+                        * so just adjust the RSS counter.
+                        */
+                       pmdp_huge_clear_flush(vma, haddr, pmd);
+                       if (arch_needs_pgtable_deposit())
+                               zap_deposited_table(mm, pmd);
+                       if (unlikely(vma_is_special_huge(vma))) {
+                               VM_WARN_ONCE(1,
+                                            "unexpected special huge PMD 
migration entry\n");
+                               return;
+                       }
+                       add_mm_counter(mm, mm_counter_file(folio), 
-HPAGE_PMD_NR);
+                       return;
+               }
+
+               if (pmd_is_migration_entry(*pmd)) {
+                       softleaf_t entry;
+
+                       old_pmd = *pmd;
+                       entry = softleaf_from_pmd(old_pmd);
+                       page = softleaf_to_page(entry);
+
+                       soft_dirty = pmd_swp_soft_dirty(old_pmd);
+                       uffd_wp = pmd_swp_uffd_wp(old_pmd);
+
+                       write = softleaf_is_migration_write(entry);
+                       if (PageAnon(page))
+                               anon_exclusive = 
softleaf_is_migration_read_exclusive(entry);
+                       young = softleaf_is_migration_young(entry);
+                       dirty = softleaf_is_migration_dirty(entry);
+               } else if (pmd_is_device_private_entry(*pmd)) {
+                       softleaf_t entry;
+
+                       old_pmd = *pmd;
+                       entry = softleaf_from_pmd(old_pmd);
+                       page = softleaf_to_page(entry);
+
+                       soft_dirty = pmd_swp_soft_dirty(old_pmd);
+                       uffd_wp = pmd_swp_uffd_wp(old_pmd);
+
+                       write = softleaf_is_device_private_write(entry);
+                       anon_exclusive = PageAnonExclusive(page);
+
+                       /*
+                        * Device-private THP should be treated the same as
+                        * regular folios w.r.t. anon-exclusive handling. See
+                        * the matching code for present anon folios above.
+                        */
+                       if (freeze && anon_exclusive &&
+                           folio_try_share_anon_rmap_pmd(folio, page))
+                               freeze = false;
+                       if (!freeze) {
+                               rmap_t rmap_flags = RMAP_NONE;
+
+                               folio_ref_add(folio, HPAGE_PMD_NR - 1);
+                               if (anon_exclusive)
+                                       rmap_flags |= RMAP_EXCLUSIVE;
+
+                               folio_add_anon_rmap_ptes(folio, page, 
HPAGE_PMD_NR,
+                                                        vma, haddr, 
rmap_flags);
+                       }
+               } else {
+                       VM_WARN_ON_ONCE(1);
+                       return;
+               }
        }
 
        /*
-- 
2.43.0


Reply via email to