On 16 Sep 2025, at 8:21, Balbir Singh wrote:

> Extend migrate_vma_collect_pmd() to handle partially mapped large folios
> that require splitting before migration can proceed.
>
> During PTE walk in the collection phase, if a large folio is only
> partially mapped in the migration range, it must be split to ensure the
> folio is correctly migrated.
>
> Signed-off-by: Balbir Singh <[email protected]>
> Cc: David Hildenbrand <[email protected]>
> Cc: Zi Yan <[email protected]>
> Cc: Joshua Hahn <[email protected]>
> Cc: Rakie Kim <[email protected]>
> Cc: Byungchul Park <[email protected]>
> Cc: Gregory Price <[email protected]>
> Cc: Ying Huang <[email protected]>
> Cc: Alistair Popple <[email protected]>
> Cc: Oscar Salvador <[email protected]>
> Cc: Lorenzo Stoakes <[email protected]>
> Cc: Baolin Wang <[email protected]>
> Cc: "Liam R. Howlett" <[email protected]>
> Cc: Nico Pache <[email protected]>
> Cc: Ryan Roberts <[email protected]>
> Cc: Dev Jain <[email protected]>
> Cc: Barry Song <[email protected]>
> Cc: Lyude Paul <[email protected]>
> Cc: Danilo Krummrich <[email protected]>
> Cc: David Airlie <[email protected]>
> Cc: Simona Vetter <[email protected]>
> Cc: Ralph Campbell <[email protected]>
> Cc: Mika Penttilä <[email protected]>
> Cc: Matthew Brost <[email protected]>
> Cc: Francois Dugast <[email protected]>
> ---
>  mm/migrate_device.c | 82 +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 82 insertions(+)
>
> diff --git a/mm/migrate_device.c b/mm/migrate_device.c
> index abd9f6850db6..70c0601f70ea 100644
> --- a/mm/migrate_device.c
> +++ b/mm/migrate_device.c
> @@ -54,6 +54,53 @@ static int migrate_vma_collect_hole(unsigned long start,
>       return 0;
>  }
>
> +/**
> + * migrate_vma_split_folio() - Helper function to split a THP folio
> + * @folio: the folio to split
> + * @fault_page: struct page associated with the fault if any
> + *
> + * Returns 0 on success
> + */
> +static int migrate_vma_split_folio(struct folio *folio,
> +                                struct page *fault_page)
> +{
> +     int ret;
> +     struct folio *fault_folio = fault_page ? page_folio(fault_page) : NULL;
> +     struct folio *new_fault_folio = NULL;
> +
> +     if (folio != fault_folio) {
> +             folio_get(folio);
> +             folio_lock(folio);
> +     }
> +
> +     ret = split_folio(folio);
> +     if (ret) {
> +             if (folio != fault_folio) {
> +                     folio_unlock(folio);
> +                     folio_put(folio);
> +             }
> +             return ret;
> +     }
> +
> +     new_fault_folio = fault_page ? page_folio(fault_page) : NULL;
> +
> +     /*
> +      * Ensure the lock is held on the correct
> +      * folio after the split
> +      */
> +     if (!new_fault_folio) {
> +             folio_unlock(folio);
> +             folio_put(folio);
> +     } else if (folio != new_fault_folio) {
> +             folio_get(new_fault_folio);
> +             folio_lock(new_fault_folio);
> +             folio_unlock(folio);
> +             folio_put(folio);
> +     }
> +
> +     return 0;
> +}
> +
>  static int migrate_vma_collect_pmd(pmd_t *pmdp,
>                                  unsigned long start,
>                                  unsigned long end,
> @@ -136,6 +183,8 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
>                        * page table entry. Other special swap entries are not
>                        * migratable, and we ignore regular swapped page.
>                        */
> +                     struct folio *folio;
> +
>                       entry = pte_to_swp_entry(pte);
>                       if (!is_device_private_entry(entry))
>                               goto next;
> @@ -147,6 +196,23 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
>                           pgmap->owner != migrate->pgmap_owner)
>                               goto next;
>
> +                     folio = page_folio(page);
> +                     if (folio_test_large(folio)) {
> +                             int ret;
> +
> +                             pte_unmap_unlock(ptep, ptl);
> +                             ret = migrate_vma_split_folio(folio,
> +                                                       migrate->fault_page);
> +
> +                             if (ret) {
> +                                     ptep = pte_offset_map_lock(mm, pmdp, 
> addr, &ptl);
> +                                     goto next;
> +                             }
> +
> +                             addr = start;
> +                             goto again;
> +                     }

This does not look right to me.

The folio here is device private, but migrate_vma_split_folio()
calls split_folio(), which cannot handle device private folios yet.
Your change to split_folio() is in Patch 10 and should be moved
before this patch.

> +
>                       mpfn = migrate_pfn(page_to_pfn(page)) |
>                                       MIGRATE_PFN_MIGRATE;
>                       if (is_writable_device_private_entry(entry))
> @@ -171,6 +237,22 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
>                                       pgmap->owner != migrate->pgmap_owner)
>                                       goto next;
>                       }
> +                     folio = page ? page_folio(page) : NULL;
> +                     if (folio && folio_test_large(folio)) {
> +                             int ret;
> +
> +                             pte_unmap_unlock(ptep, ptl);
> +                             ret = migrate_vma_split_folio(folio,
> +                                                       migrate->fault_page);
> +
> +                             if (ret) {
> +                                     ptep = pte_offset_map_lock(mm, pmdp, 
> addr, &ptl);
> +                                     goto next;
> +                             }
> +
> +                             addr = start;
> +                             goto again;
> +                     }
>                       mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
>                       mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
>               }
> -- 
> 2.50.1


--
Best Regards,
Yan, Zi

Reply via email to