Extend migrate_vma_collect_pmd() to handle partially mapped large folios that require splitting before migration can proceed.
During PTE walk in the collection phase, if a large folio is only partially mapped in the migration range, it must be split to ensure the folio is correctly migrated. Cc: Andrew Morton <a...@linux-foundation.org> Cc: David Hildenbrand <da...@redhat.com> Cc: Zi Yan <z...@nvidia.com> Cc: Joshua Hahn <joshua.hah...@gmail.com> Cc: Rakie Kim <rakie....@sk.com> Cc: Byungchul Park <byungc...@sk.com> Cc: Gregory Price <gou...@gourry.net> Cc: Ying Huang <ying.hu...@linux.alibaba.com> Cc: Alistair Popple <apop...@nvidia.com> Cc: Oscar Salvador <osalva...@suse.de> Cc: Lorenzo Stoakes <lorenzo.stoa...@oracle.com> Cc: Baolin Wang <baolin.w...@linux.alibaba.com> Cc: "Liam R. Howlett" <liam.howl...@oracle.com> Cc: Nico Pache <npa...@redhat.com> Cc: Ryan Roberts <ryan.robe...@arm.com> Cc: Dev Jain <dev.j...@arm.com> Cc: Barry Song <bao...@kernel.org> Cc: Lyude Paul <ly...@redhat.com> Cc: Danilo Krummrich <d...@kernel.org> Cc: David Airlie <airl...@gmail.com> Cc: Simona Vetter <sim...@ffwll.ch> Cc: Ralph Campbell <rcampb...@nvidia.com> Cc: Mika Penttilä <mpent...@redhat.com> Cc: Matthew Brost <matthew.br...@intel.com> Cc: Francois Dugast <francois.dug...@intel.com> Signed-off-by: Balbir Singh <balb...@nvidia.com> --- mm/migrate_device.c | 94 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/mm/migrate_device.c b/mm/migrate_device.c index abd9f6850db6..f45ef182287d 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -54,6 +54,53 @@ static int migrate_vma_collect_hole(unsigned long start, return 0; } +/** + * migrate_vma_split_folio() - Helper function to split a THP folio + * @folio: the folio to split + * @fault_page: struct page associated with the fault if any + * + * Returns 0 on success + */ +static int migrate_vma_split_folio(struct folio *folio, + struct page *fault_page) +{ + int ret; + struct folio *fault_folio = fault_page ? page_folio(fault_page) : NULL; + struct folio *new_fault_folio = NULL; + + if (folio != fault_folio) { + folio_get(folio); + folio_lock(folio); + } + + ret = split_folio(folio); + if (ret) { + if (folio != fault_folio) { + folio_unlock(folio); + folio_put(folio); + } + return ret; + } + + new_fault_folio = fault_page ? page_folio(fault_page) : NULL; + + /* + * Ensure the lock is held on the correct + * folio after the split + */ + if (!new_fault_folio) { + folio_unlock(folio); + folio_put(folio); + } else if (folio != new_fault_folio) { + folio_get(new_fault_folio); + folio_lock(new_fault_folio); + folio_unlock(folio); + folio_put(folio); + } + + return 0; +} + static int migrate_vma_collect_pmd(pmd_t *pmdp, unsigned long start, unsigned long end, @@ -136,6 +183,8 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, * page table entry. Other special swap entries are not * migratable, and we ignore regular swapped page. */ + struct folio *folio; + entry = pte_to_swp_entry(pte); if (!is_device_private_entry(entry)) goto next; @@ -147,6 +196,29 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, pgmap->owner != migrate->pgmap_owner) goto next; + folio = page_folio(page); + if (folio_test_large(folio)) { + int ret; + + /* + * The reason for finding pmd present with a + * large folio for the pte is partial unmaps. + * Split the folio now for the migration to be + * handled correctly + */ + pte_unmap_unlock(ptep, ptl); + ret = migrate_vma_split_folio(folio, + migrate->fault_page); + + if (ret) { + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + goto next; + } + + addr = start; + goto again; + } + mpfn = migrate_pfn(page_to_pfn(page)) | MIGRATE_PFN_MIGRATE; if (is_writable_device_private_entry(entry)) @@ -171,6 +243,28 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, pgmap->owner != migrate->pgmap_owner) goto next; } + folio = page_folio(page); + if (folio_test_large(folio)) { + int ret; + + /* + * The reason for finding pmd present with a + * large folio for the pte is partial unmaps. + * Split the folio now for the migration to be + * handled correctly + */ + pte_unmap_unlock(ptep, ptl); + ret = migrate_vma_split_folio(folio, + migrate->fault_page); + + if (ret) { + ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + goto next; + } + + addr = start; + goto again; + } mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE; mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0; } -- 2.50.1