On Fri, Nov 28, 2025 at 03:41:44PM +1100, Jordan Niethe wrote:
> A future change will remove device private pages from the physical
> address space. This will mean that device private pages no longer have
> normal PFN and must be handled separately.
> 
> When migrating a device private page a migration entry is created for
> that page - this includes the PFN for that page. Once device private
> PFNs exist in a different address space to regular PFNs we need to be
> able to determine which kind of PFN is in the entry so we can associate
> it with the correct page.
> 
> Introduce new swap types:
> 
>   - SWP_MIGRATION_DEVICE_READ
>   - SWP_MIGRATION_DEVICE_WRITE
>   - SWP_MIGRATION_DEVICE_READ_EXCLUSIVE
> 
> These correspond to
> 
>   - SWP_MIGRATION_READ
>   - SWP_MIGRATION_WRITE
>   - SWP_MIGRATION_READ_EXCLUSIVE
> 
> except the swap entry contains a device private PFN.
> 
> The existing helpers such as is_writable_migration_entry() will still
> return true for a SWP_MIGRATION_DEVICE_WRITE entry.
> 
> Introduce new helpers such as
> is_writable_device_migration_private_entry() to disambiguate between a
> SWP_MIGRATION_WRITE and a SWP_MIGRATION_DEVICE_WRITE entry.
> 
> Signed-off-by: Jordan Niethe <[email protected]>
> Signed-off-by: Alistair Popple <[email protected]>
> ---
>  include/linux/swap.h    |  8 +++-
>  include/linux/swapops.h | 87 ++++++++++++++++++++++++++++++++++++++---
>  mm/memory.c             |  9 ++++-
>  mm/migrate.c            |  2 +-
>  mm/migrate_device.c     | 31 ++++++++++-----
>  mm/mprotect.c           | 21 +++++++---
>  mm/page_vma_mapped.c    |  2 +-
>  mm/pagewalk.c           |  3 +-
>  mm/rmap.c               | 32 ++++++++++-----
>  9 files changed, 161 insertions(+), 34 deletions(-)
> 
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index e818fbade1e2..87f14d673979 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -74,12 +74,18 @@ static inline int current_is_kswapd(void)
>   *
>   * When a page is mapped by the device for exclusive access we set the CPU 
> page
>   * table entries to a special SWP_DEVICE_EXCLUSIVE entry.
> + *
> + * Because device private pages do not use regular PFNs, special migration
> + * entries are also needed.
>   */
>  #ifdef CONFIG_DEVICE_PRIVATE
> -#define SWP_DEVICE_NUM 3
> +#define SWP_DEVICE_NUM 6
>  #define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM)
>  #define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1)
>  #define SWP_DEVICE_EXCLUSIVE 
> (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2)
> +#define SWP_MIGRATION_DEVICE_READ 
> (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3)
> +#define SWP_MIGRATION_DEVICE_READ_EXCLUSIVE 
> (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+4)
> +#define SWP_MIGRATION_DEVICE_WRITE 
> (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+5)
>  #else
>  #define SWP_DEVICE_NUM 0
>  #endif
> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> index 64ea151a7ae3..7aa3f00e304a 100644
> --- a/include/linux/swapops.h
> +++ b/include/linux/swapops.h
> @@ -196,6 +196,43 @@ static inline bool is_device_exclusive_entry(swp_entry_t 
> entry)
>       return swp_type(entry) == SWP_DEVICE_EXCLUSIVE;
>  }
>  
> +static inline swp_entry_t 
> make_readable_migration_device_private_entry(pgoff_t offset)
> +{
> +     return swp_entry(SWP_MIGRATION_DEVICE_READ, offset);
> +}
> +
> +static inline swp_entry_t 
> make_writable_migration_device_private_entry(pgoff_t offset)
> +{
> +     return swp_entry(SWP_MIGRATION_DEVICE_WRITE, offset);
> +}
> +
> +static inline bool is_device_private_migration_entry(swp_entry_t entry)
> +{
> +     return unlikely(swp_type(entry) == SWP_MIGRATION_DEVICE_READ ||
> +                     swp_type(entry) == SWP_MIGRATION_DEVICE_READ_EXCLUSIVE 
> ||
> +                     swp_type(entry) == SWP_MIGRATION_DEVICE_WRITE);
> +}
> +
> +static inline bool is_readable_device_migration_private_entry(swp_entry_t 
> entry)
> +{
> +     return unlikely(swp_type(entry) == SWP_MIGRATION_DEVICE_READ);
> +}
> +
> +static inline bool is_writable_device_migration_private_entry(swp_entry_t 
> entry)
> +{
> +     return unlikely(swp_type(entry) == SWP_MIGRATION_DEVICE_WRITE);
> +}
> +
> +static inline swp_entry_t 
> make_device_migration_readable_exclusive_migration_entry(pgoff_t offset)
> +{
> +     return swp_entry(SWP_MIGRATION_DEVICE_READ_EXCLUSIVE, offset);
> +}
> +
> +static inline bool is_device_migration_readable_exclusive_entry(swp_entry_t 
> entry)
> +{
> +     return swp_type(entry) == SWP_MIGRATION_DEVICE_READ_EXCLUSIVE;
> +}

The names are inconsistent.

Maybe make_device_migration_readable_exclusive_migration_entry to
make_readable_exclusive_migration_device_private_entry, and
is_device_migration_readable_exclusive_entry to
is_readable_exclusive_device_private_migration_entry?


>  #else /* CONFIG_DEVICE_PRIVATE */
>  static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset)
>  {
> @@ -217,6 +254,11 @@ static inline bool 
> is_writable_device_private_entry(swp_entry_t entry)
>       return false;
>  }
>  
> +static inline bool is_readable_device_migration_private_entry(swp_entry_t 
> entry)
> +{
> +     return false;
> +}
> +
>  static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset)
>  {
>       return swp_entry(0, 0);
> @@ -227,6 +269,36 @@ static inline bool is_device_exclusive_entry(swp_entry_t 
> entry)
>       return false;
>  }
>  
> +static inline swp_entry_t 
> make_readable_migration_device_private_entry(pgoff_t offset)
> +{
> +     return swp_entry(0, 0);
> +}
> +
> +static inline swp_entry_t 
> make_writable_migration_device_private_entry(pgoff_t offset)
> +{
> +     return swp_entry(0, 0);
> +}
> +
> +static inline bool is_device_private_migration_entry(swp_entry_t entry)
> +{
> +     return false;
> +}
> +
> +static inline bool is_writable_device_migration_private_entry(swp_entry_t 
> entry)
> +{
> +     return false;
> +}
> +
> +static inline swp_entry_t 
> make_device_migration_readable_exclusive_migration_entry(pgoff_t offset)
> +{
> +     return swp_entry(0, 0);
> +}
> +
> +static inline bool is_device_migration_readable_exclusive_entry(swp_entry_t 
> entry)
> +{
> +     return false;
> +}
> +
>  #endif /* CONFIG_DEVICE_PRIVATE */
>  
>  #ifdef CONFIG_MIGRATION
> @@ -234,22 +306,26 @@ static inline int is_migration_entry(swp_entry_t entry)
>  {
>       return unlikely(swp_type(entry) == SWP_MIGRATION_READ ||
>                       swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE ||
> -                     swp_type(entry) == SWP_MIGRATION_WRITE);
> +                     swp_type(entry) == SWP_MIGRATION_WRITE ||
> +                     is_device_private_migration_entry(entry));
>  }
>  
>  static inline int is_writable_migration_entry(swp_entry_t entry)
>  {
> -     return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE);
> +     return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE ||
> +                     is_writable_device_migration_private_entry(entry));
>  }
>  
>  static inline int is_readable_migration_entry(swp_entry_t entry)
>  {
> -     return unlikely(swp_type(entry) == SWP_MIGRATION_READ);
> +     return unlikely(swp_type(entry) == SWP_MIGRATION_READ ||
> +                     is_readable_device_migration_private_entry(entry));
>  }
>  
>  static inline int is_readable_exclusive_migration_entry(swp_entry_t entry)
>  {
> -     return unlikely(swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE);
> +     return unlikely(swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE ||
> +                     is_device_migration_readable_exclusive_entry(entry));
>  }
>  
>  static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
> @@ -525,7 +601,8 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry)
>       BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
>  
>       return is_migration_entry(entry) || is_device_private_entry(entry) ||
> -            is_device_exclusive_entry(entry) || is_hwpoison_entry(entry);
> +            is_device_exclusive_entry(entry) || is_hwpoison_entry(entry) ||
> +            is_device_private_migration_entry(entry);
>  }
>  
>  struct page_vma_mapped_walk;
> diff --git a/mm/memory.c b/mm/memory.c
> index b59ae7ce42eb..f1ed361434ff 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -962,8 +962,13 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct 
> mm_struct *src_mm,
>                        * to be set to read. A previously exclusive entry is
>                        * now shared.
>                        */
> -                     entry = make_readable_migration_entry(
> -                                                     swp_offset(entry));
> +                     if (is_device_private_migration_entry(entry))
> +                             entry = 
> make_readable_migration_device_private_entry(
> +                                                             
> swp_offset(entry));
> +                     else
> +                             entry = make_readable_migration_entry(
> +                                                             
> swp_offset(entry));
> +
>                       pte = swp_entry_to_pte(entry);
>                       if (pte_swp_soft_dirty(orig_pte))
>                               pte = pte_swp_mksoft_dirty(pte);
> diff --git a/mm/migrate.c b/mm/migrate.c
> index c0e9f15be2a2..3c561d61afba 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -495,7 +495,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t 
> *pmd,
>               goto out;
>  
>       entry = pte_to_swp_entry(pte);
> -     if (!is_migration_entry(entry))
> +     if (!(is_migration_entry(entry)))
>               goto out;
>  
>       migration_entry_wait_on_locked(entry, ptl);
> diff --git a/mm/migrate_device.c b/mm/migrate_device.c
> index 82f09b24d913..458b5114bb2b 100644
> --- a/mm/migrate_device.c
> +++ b/mm/migrate_device.c
> @@ -235,15 +235,28 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
>                               folio_mark_dirty(folio);
>  
>                       /* Setup special migration page table entry */
> -                     if (mpfn & MIGRATE_PFN_WRITE)
> -                             entry = make_writable_migration_entry(
> -                                                     page_to_pfn(page));
> -                     else if (anon_exclusive)
> -                             entry = make_readable_exclusive_migration_entry(
> -                                                     page_to_pfn(page));
> -                     else
> -                             entry = make_readable_migration_entry(
> -                                                     page_to_pfn(page));
> +                     if (mpfn & MIGRATE_PFN_WRITE) {
> +                             if (is_device_private_page(page))
> +                                     entry = 
> make_writable_migration_device_private_entry(
> +                                                             
> page_to_pfn(page));
> +                             else
> +                                     entry = make_writable_migration_entry(
> +                                                             
> page_to_pfn(page));
> +                     } else if (anon_exclusive) {
> +                             if (is_device_private_page(page))
> +                                     entry = 
> make_device_migration_readable_exclusive_migration_entry(
> +                                                             
> page_to_pfn(page));
> +                             else
> +                                     entry = 
> make_readable_exclusive_migration_entry(
> +                                                             
> page_to_pfn(page));
> +                     } else {
> +                             if (is_device_private_page(page))
> +                                     entry = 
> make_readable_migration_device_private_entry(
> +                                                             
> page_to_pfn(page));
> +                             else
> +                                     entry = make_readable_migration_entry(
> +                                                             
> page_to_pfn(page));
> +                     }
>                       if (pte_present(pte)) {
>                               if (pte_young(pte))
>                                       entry = 
> make_migration_entry_young(entry);
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 113b48985834..7d79a0f53bf5 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -365,11 +365,22 @@ static long change_pte_range(struct mmu_gather *tlb,
>                                * A protection check is difficult so
>                                * just be safe and disable write
>                                */
> -                             if (folio_test_anon(folio))
> -                                     entry = 
> make_readable_exclusive_migration_entry(
> -                                                          swp_offset(entry));
> -                             else
> -                                     entry = 
> make_readable_migration_entry(swp_offset(entry));
> +                             if 
> (!is_writable_device_migration_private_entry(entry)) {
> +                                     if (folio_test_anon(folio))
> +                                             entry = 
> make_readable_exclusive_migration_entry(
> +                                                             
> swp_offset(entry));
> +                                     else
> +                                             entry = 
> make_readable_migration_entry(
> +                                                             
> swp_offset(entry));
> +                             } else {
> +                                     if (folio_test_anon(folio))
> +                                             entry = 
> make_device_migration_readable_exclusive_migration_entry(
> +                                                             
> swp_offset(entry));
> +                                     else
> +                                             entry = 
> make_readable_migration_device_private_entry(
> +                                                             
> swp_offset(entry));
> +                             }
> +
>                               newpte = swp_entry_to_pte(entry);
>                               if (pte_swp_soft_dirty(oldpte))
>                                       newpte = pte_swp_mksoft_dirty(newpte);
> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> index 9146bd084435..e9fe747d3df3 100644
> --- a/mm/page_vma_mapped.c
> +++ b/mm/page_vma_mapped.c
> @@ -112,7 +112,7 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, 
> unsigned long pte_nr)
>                       return false;
>               entry = pte_to_swp_entry(ptent);
>  
> -             if (!is_migration_entry(entry))
> +             if (!(is_migration_entry(entry)))
>                       return false;
>  
>               pfn = swp_offset_pfn(entry);
> diff --git a/mm/pagewalk.c b/mm/pagewalk.c
> index 9f91cf85a5be..f5c77dda3359 100644
> --- a/mm/pagewalk.c
> +++ b/mm/pagewalk.c
> @@ -1003,7 +1003,8 @@ struct folio *folio_walk_start(struct folio_walk *fw,
>               swp_entry_t entry = pte_to_swp_entry(pte);
>  
>               if ((flags & FW_MIGRATION) &&
> -                 is_migration_entry(entry)) {
> +                 (is_migration_entry(entry) ||
> +                  is_device_private_migration_entry(entry))) {
>                       page = pfn_swap_entry_to_page(entry);
>                       expose_page = false;
>                       goto found;
> diff --git a/mm/rmap.c b/mm/rmap.c
> index e94500318f92..9642a79cbdb4 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -2535,15 +2535,29 @@ static bool try_to_migrate_one(struct folio *folio, 
> struct vm_area_struct *vma,
>                        * pte. do_swap_page() will wait until the migration
>                        * pte is removed and then restart fault handling.
>                        */
> -                     if (writable)
> -                             entry = make_writable_migration_entry(
> -                                                     page_to_pfn(subpage));
> -                     else if (anon_exclusive)
> -                             entry = make_readable_exclusive_migration_entry(
> -                                                     page_to_pfn(subpage));
> -                     else
> -                             entry = make_readable_migration_entry(
> -                                                     page_to_pfn(subpage));
> +                     if (writable) {
> +                             if (is_device_private_page(subpage))
> +                                     entry = 
> make_writable_migration_device_private_entry(
> +                                                             
> page_to_pfn(subpage));
> +                             else
> +                                     entry = make_writable_migration_entry(
> +                                                             
> page_to_pfn(subpage));
> +                     } else if (anon_exclusive) {
> +                             if (is_device_private_page(subpage))
> +                                     entry = 
> make_device_migration_readable_exclusive_migration_entry(
> +                                                             
> page_to_pfn(subpage));
> +                             else
> +                                     entry = 
> make_readable_exclusive_migration_entry(
> +                                                             
> page_to_pfn(subpage));
> +                     } else {
> +                             if (is_device_private_page(subpage))
> +                                     entry = 
> make_readable_migration_device_private_entry(
> +                                                             
> page_to_pfn(subpage));
> +                             else
> +                                     entry = make_readable_migration_entry(
> +                                                             
> page_to_pfn(subpage));
> +                     }
> +
>                       if (likely(pte_present(pteval))) {
>                               if (pte_young(pteval))
>                                       entry = 
> make_migration_entry_young(entry);
> -- 
> 2.34.1
> 

Thanks,
Chih-En Lin

Reply via email to