Hi,
On 5/1/26 12:17, Alistair Popple wrote:
On 2025-12-31 at 15:31 +1100, Jordan Niethe <[email protected]> wrote...
A future change will remove device private pages from the physical
address space. This will mean that device private pages no longer have
pfns and must be handled separately.
When migrating a device private page a migration entry is created for
that page - this includes the pfn for that page. Once device private
pages begin using device memory offsets instead of pfns we will need to
be able to determine which kind of value is in the entry so we can
associate it with the correct page.
Introduce new swap types:
- SWP_MIGRATION_DEVICE_READ
- SWP_MIGRATION_DEVICE_WRITE
- SWP_MIGRATION_DEVICE_READ_EXCLUSIVE
These correspond to
- SWP_MIGRATION_READ
- SWP_MIGRATION_WRITE
- SWP_MIGRATION_READ_EXCLUSIVE
except the swap entry contains a device private offset.
The existing helpers such as is_writable_migration_entry() will still
return true for a SWP_MIGRATION_DEVICE_WRITE entry.
Introduce new helpers such as
is_writable_device_migration_private_entry() to disambiguate between a
SWP_MIGRATION_WRITE and a SWP_MIGRATION_DEVICE_WRITE entry.
Introduce corresponding softleaf types and helpers.
Signed-off-by: Jordan Niethe <[email protected]>
Signed-off-by: Alistair Popple <[email protected]>
---
v1:
- Update for softleaf infrastructure
- Handle make_readable_migration_entry_from_page() and friends
-
s/make_device_migration_readable_exclusive_migration_entry/make_readable_exclusive_migration_device_private_entry
-
s/is_device_migration_readable_exclusive_entry/is_readable_exclusive_device_private_migration_entry/
---
include/linux/leafops.h | 70 +++++++++++++++++++++++++++++++++----
include/linux/swap.h | 8 ++++-
include/linux/swapops.h | 76 +++++++++++++++++++++++++++++++++++++++++
mm/huge_memory.c | 21 +++++++++---
mm/memory.c | 10 ++++--
mm/mprotect.c | 21 +++++++++---
mm/page_vma_mapped.c | 3 +-
7 files changed, 188 insertions(+), 21 deletions(-)
diff --git a/include/linux/leafops.h b/include/linux/leafops.h
index cfafe7a5e7b1..a4a5c3ad647b 100644
--- a/include/linux/leafops.h
+++ b/include/linux/leafops.h
@@ -28,6 +28,9 @@ enum softleaf_type {
SOFTLEAF_DEVICE_PRIVATE_READ,
SOFTLEAF_DEVICE_PRIVATE_WRITE,
SOFTLEAF_DEVICE_EXCLUSIVE,
+ SOFTLEAF_MIGRATION_DEVICE_READ,
+ SOFTLEAF_MIGRATION_DEVICE_READ_EXCLUSIVE,
+ SOFTLEAF_MIGRATION_DEVICE_WRITE,
/* H/W posion types. */
SOFTLEAF_HWPOISON,
/* Marker types. */
@@ -165,6 +168,12 @@ static inline enum softleaf_type softleaf_type(softleaf_t
entry)
return SOFTLEAF_DEVICE_PRIVATE_READ;
case SWP_DEVICE_EXCLUSIVE:
return SOFTLEAF_DEVICE_EXCLUSIVE;
+ case SWP_MIGRATION_DEVICE_READ:
+ return SOFTLEAF_MIGRATION_DEVICE_READ;
+ case SWP_MIGRATION_DEVICE_WRITE:
+ return SOFTLEAF_MIGRATION_DEVICE_WRITE;
+ case SWP_MIGRATION_DEVICE_READ_EXCLUSIVE:
+ return SOFTLEAF_MIGRATION_DEVICE_READ_EXCLUSIVE;
#endif
#ifdef CONFIG_MEMORY_FAILURE
case SWP_HWPOISON:
@@ -190,16 +199,62 @@ static inline bool softleaf_is_swap(softleaf_t entry)
return softleaf_type(entry) == SOFTLEAF_SWAP;
}
+/**
+ * softleaf_is_migration_device_private() - Is this leaf entry a migration
+ * device private entry?
+ * @entry: Leaf entry.
+ *
+ * Returns: true if the leaf entry is a device private entry, otherwise false.
+ */
+static inline bool softleaf_is_migration_device_private(softleaf_t entry)
+{
+ switch (softleaf_type(entry)) {
+ case SOFTLEAF_MIGRATION_DEVICE_READ:
+ case SOFTLEAF_MIGRATION_DEVICE_WRITE:
+ case SOFTLEAF_MIGRATION_DEVICE_READ_EXCLUSIVE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
+ * softleaf_is_migration_device_private_write() - Is this leaf entry a writable
+ * device private migration entry?
+ * @entry: Leaf entry.
+ *
+ * Returns: true if the leaf entry is a writable device private migration
entry,
+ * otherwise false.
+ */
+static inline bool softleaf_is_migration_device_private_write(softleaf_t entry)
+{
+ return softleaf_type(entry) == SOFTLEAF_MIGRATION_DEVICE_WRITE;
+}
+
+/**
+ * softleaf_is_migration_read_exclusive() - Is this leaf entry an exclusive
+ * readable device private migration entry?
+ * @entry: Leaf entry.
+ *
+ * Returns: true if the leaf entry is an exclusive readable device private
+ * migration entry, otherwise false.
+ */
+static inline bool
softleaf_is_migration_device_private_read_exclusive(softleaf_t entry)
+{
+ return softleaf_type(entry) == SOFTLEAF_MIGRATION_DEVICE_READ_EXCLUSIVE;
+}
+
/**
* softleaf_is_migration_write() - Is this leaf entry a writable migration
entry?
* @entry: Leaf entry.
*
- * Returns: true if the leaf entry is a writable migration entry, otherwise
- * false.
+ * Returns: true if the leaf entry is a writable migration entry or a writable
+ * device private migration entry, otherwise false.
*/
static inline bool softleaf_is_migration_write(softleaf_t entry)
{
- return softleaf_type(entry) == SOFTLEAF_MIGRATION_WRITE;
+ return softleaf_type(entry) == SOFTLEAF_MIGRATION_WRITE ||
+ softleaf_is_migration_device_private_write(entry);
}
/**
@@ -219,12 +274,13 @@ static inline bool softleaf_is_migration_read(softleaf_t
entry)
* readable migration entry?
* @entry: Leaf entry.
*
- * Returns: true if the leaf entry is an exclusive readable migration entry,
- * otherwise false.
+ * Returns: true if the leaf entry is an exclusive readable migration entry or
+ * exclusive readable device private migration entry, otherwise false.
*/
static inline bool softleaf_is_migration_read_exclusive(softleaf_t entry)
{
- return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ_EXCLUSIVE;
+ return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ_EXCLUSIVE ||
+ softleaf_is_migration_device_private_read_exclusive(entry);
}
/**
@@ -241,7 +297,7 @@ static inline bool softleaf_is_migration(softleaf_t entry)
case SOFTLEAF_MIGRATION_WRITE:
return true;
default:
- return false;
+ return softleaf_is_migration_device_private(entry);
}
}
See below for a bit more context but I'm not convinced we want to include device
private entries in the definition for softleaf_is_migration(), etc. It seems
to me like it would be better to make callers explicitly deal with the device
private cases separately which most seem to do anyway.
That is not generally the case - the only instances where the device
private cases are dealt with separately are when we are creating new
entries.
The advantage to treating the device private migration leaf entry as a
specialization of the migration leaf entry is that because most callers
are not creating new entries, they do not need to be updated to
accommodate the new entry type.
As an example - take hmm_vma_handle_pte() -
if (softleaf_is_migration(entry)) {
pte_unmap(ptep);
hmm_vma_walk->last = addr;
migration_entry_wait(walk->mm, pmdp, addr);
return -EBUSY;
}
We'd need to update this to be
if (softleaf_is_migration(entry) ||
softleaf_is_migration_device_private(entry))
There is no distinction between these cases so this is just noise.
That would be the case for the majority of the callers of
softleaf_is_migration(), softleaf_is_migration_read(),
softleaf_is_migration_read_exclusive(), softleaf_is_migration_write().
This specialization treatment is further convenient as it allows
make_readable_migration_entry_from_page() to create a device private
migration entry or a migration entry as required.
The code in change_pte_range() called out below as difficult to read
would actually
be simplified by using the make_.*_migration_entry_from_page() helpers
as it would remove the conditional. The only extra consideration is
making sure the SWP_MIG_YOUNG_BIT and SWP_MIG_DIRTY_BIT bits were copied
to the new swap entry.
We also need to be consistent in the definitions - softleaf_is_migration(),
softleaf_is_migration_read_exclusive() and softleaf_is_migration_write()
all return true when encountering a device private entry but
softleaf_is_migration_read() does not.
Thanks - this is a mistake.
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 38ca3df68716..c15e3b3067cd 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -74,12 +74,18 @@ static inline int current_is_kswapd(void)
*
* When a page is mapped by the device for exclusive access we set the CPU
page
* table entries to a special SWP_DEVICE_EXCLUSIVE entry.
+ *
+ * Because device private pages do not use regular PFNs, special migration
+ * entries are also needed.
*/
#ifdef CONFIG_DEVICE_PRIVATE
-#define SWP_DEVICE_NUM 3
+#define SWP_DEVICE_NUM 6
#define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM)
#define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1)
#define SWP_DEVICE_EXCLUSIVE
(MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2)
+#define SWP_MIGRATION_DEVICE_READ
(MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3)
+#define SWP_MIGRATION_DEVICE_READ_EXCLUSIVE
(MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+4)
+#define SWP_MIGRATION_DEVICE_WRITE
(MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+5)
#else
#define SWP_DEVICE_NUM 0
#endif
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 72aa636fdb48..2bd01f97b4f0 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -148,6 +148,43 @@ static inline swp_entry_t
make_device_exclusive_entry(pgoff_t offset)
return swp_entry(SWP_DEVICE_EXCLUSIVE, offset);
}
+static inline swp_entry_t make_readable_migration_device_private_entry(pgoff_t offset)
+{
+ return swp_entry(SWP_MIGRATION_DEVICE_READ, offset);
+}
+
+static inline swp_entry_t make_writable_migration_device_private_entry(pgoff_t
offset)
+{
+ return swp_entry(SWP_MIGRATION_DEVICE_WRITE, offset);
+}
+
+static inline bool is_device_private_migration_entry(swp_entry_t entry)
+{
+ return unlikely(swp_type(entry) == SWP_MIGRATION_DEVICE_READ ||
+ swp_type(entry) == SWP_MIGRATION_DEVICE_READ_EXCLUSIVE
||
+ swp_type(entry) == SWP_MIGRATION_DEVICE_WRITE);
+}
+
+static inline bool is_readable_device_migration_private_entry(swp_entry_t
entry)
+{
+ return unlikely(swp_type(entry) == SWP_MIGRATION_DEVICE_READ);
+}
+
+static inline bool is_writable_device_migration_private_entry(swp_entry_t
entry)
+{
+ return unlikely(swp_type(entry) == SWP_MIGRATION_DEVICE_WRITE);
+}
+
+static inline swp_entry_t
make_readable_exclusive_migration_device_private_entry(pgoff_t offset)
+{
+ return swp_entry(SWP_MIGRATION_DEVICE_READ_EXCLUSIVE, offset);
+}
+
+static inline bool
is_readable_exclusive_device_private_migration_entry(swp_entry_t entry)
+{
+ return swp_type(entry) == SWP_MIGRATION_DEVICE_READ_EXCLUSIVE;
+}
+
#else /* CONFIG_DEVICE_PRIVATE */
static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset)
{
@@ -164,6 +201,36 @@ static inline swp_entry_t
make_device_exclusive_entry(pgoff_t offset)
return swp_entry(0, 0);
}
+static inline swp_entry_t make_readable_migration_device_private_entry(pgoff_t offset)
+{
+ return swp_entry(0, 0);
+}
+
+static inline swp_entry_t make_writable_migration_device_private_entry(pgoff_t
offset)
+{
+ return swp_entry(0, 0);
+}
+
+static inline bool is_device_private_migration_entry(swp_entry_t entry)
+{
+ return false;
+}
+
+static inline bool is_writable_device_migration_private_entry(swp_entry_t
entry)
+{
+ return false;
+}
+
+static inline swp_entry_t
make_readable_exclusive_migration_device_private_entry(pgoff_t offset)
+{
+ return swp_entry(0, 0);
+}
+
+static inline bool
is_readable_exclusive_device_private_migration_entry(swp_entry_t entry)
+{
+ return false;
+}
+
#endif /* CONFIG_DEVICE_PRIVATE */
#ifdef CONFIG_MIGRATION
@@ -175,6 +242,9 @@ static inline swp_entry_t
make_readable_migration_entry(pgoff_t offset)
static inline swp_entry_t make_readable_migration_entry_from_page(struct page *page)
{
+ if (is_device_private_page(page))
+ return
make_readable_migration_device_private_entry(page_to_pfn(page));
+
return swp_entry(SWP_MIGRATION_READ, page_to_pfn(page));
}
@@ -185,6 +255,9 @@ static inline swp_entry_t make_readable_exclusive_migration_entry(pgoff_t offset
static inline swp_entry_t make_readable_exclusive_migration_entry_from_page(struct page *page)
{
+ if (is_device_private_page(page))
+ return
make_readable_exclusive_migration_device_private_entry(page_to_pfn(page));
+
return swp_entry(SWP_MIGRATION_READ_EXCLUSIVE, page_to_pfn(page));
}
@@ -195,6 +268,9 @@ static inline swp_entry_t make_writable_migration_entry(pgoff_t offset)
static inline swp_entry_t make_writable_migration_entry_from_page(struct page *page)
{
+ if (is_device_private_page(page))
+ return
make_writable_migration_device_private_entry(page_to_pfn(page));
+
return swp_entry(SWP_MIGRATION_WRITE, page_to_pfn(page));
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 08c68e2e3f06..bbfe5e87884a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1800,7 +1800,11 @@ static void copy_huge_non_present_pmd(
if (softleaf_is_migration_write(entry) ||
softleaf_is_migration_read_exclusive(entry)) {
- entry = make_readable_migration_entry(swp_offset(entry));
+ if (softleaf_is_migration_device_private_write(entry) ||
+ softleaf_is_migration_device_private_read_exclusive(entry))
+ entry =
make_readable_migration_device_private_entry(swp_offset(entry));
+ else
+ entry =
make_readable_migration_entry(swp_offset(entry));
pmd = swp_entry_to_pmd(entry);
if (pmd_swp_soft_dirty(*src_pmd))
pmd = pmd_swp_mksoft_dirty(pmd);
@@ -2523,10 +2527,17 @@ static void change_non_present_huge_pmd(struct
mm_struct *mm,
* A protection check is difficult so
* just be safe and disable write
*/
- if (folio_test_anon(folio))
- entry =
make_readable_exclusive_migration_entry(swp_offset(entry));
- else
- entry =
make_readable_migration_entry(swp_offset(entry));
+ if (folio_test_anon(folio)) {
+ if (folio_is_device_private(folio))
+ entry =
make_readable_exclusive_migration_device_private_entry(swp_offset(entry));
+ else
+ entry =
make_readable_exclusive_migration_entry(swp_offset(entry));
+ } else {
+ if (folio_is_device_private(folio))
+ entry =
make_readable_migration_device_private_entry(swp_offset(entry));
Note that device private folios are always anon, so we should never hit this
path. We have enough other warnings scattered around for creating device private
entries for non-anon folios so I'd just remove the else clause entirely and
leave a comment saying they must be anon.
Good idea.
+ else
+ entry =
make_readable_migration_entry(swp_offset(entry));
+ }
newpmd = swp_entry_to_pmd(entry);
if (pmd_swp_soft_dirty(*pmd))
newpmd = pmd_swp_mksoft_dirty(newpmd);
diff --git a/mm/memory.c b/mm/memory.c
index 2a55edc48a65..8e5e305bc2dc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -963,8 +963,14 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct
mm_struct *src_mm,
* to be set to read. A previously exclusive entry is
* now shared.
*/
- entry = make_readable_migration_entry(
- swp_offset(entry));
+
+ if (softleaf_is_migration_device_private(entry))
+ entry =
make_readable_migration_device_private_entry(
+
swp_offset(entry));
+ else
+ entry = make_readable_migration_entry(
+
swp_offset(entry));
+
pte = softleaf_to_pte(entry);
if (pte_swp_soft_dirty(orig_pte))
pte = pte_swp_mksoft_dirty(pte);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 283889e4f1ce..61542a80074b 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -327,11 +327,22 @@ static long change_pte_range(struct mmu_gather *tlb,
* A protection check is difficult so
* just be safe and disable write
*/
- if (folio_test_anon(folio))
- entry =
make_readable_exclusive_migration_entry(
- swp_offset(entry));
- else
- entry =
make_readable_migration_entry(swp_offset(entry));
+ if
(!is_writable_device_migration_private_entry(entry)) {
I had to read this code a few times to convince myself it was correct.
I think it would be better to not make softleaf_is_migration_write()
return true for a device private migration entry and to instead deal with
writable device private entries one level up, same as how we deal with
softleaf_is_device_private_write().
As mentioned above, as an alternative, I think the
make_.*_migration_entry_from_page() might make this easier to follow.
+ if (folio_test_anon(folio))
+ entry =
make_readable_exclusive_migration_entry(
+
swp_offset(entry));
+ else
+ entry =
make_readable_migration_entry(
+
swp_offset(entry));
+ } else {
+ if (folio_test_anon(folio))
+ entry =
make_readable_exclusive_migration_device_private_entry(
+
swp_offset(entry));
+ else
+ entry =
make_readable_migration_device_private_entry(
+
swp_offset(entry));
Same comment as above for non-anon device private folios.
Ack.
Thanks,
Jordan.
+ }
+
newpte = swp_entry_to_pte(entry);
if (pte_swp_soft_dirty(oldpte))
newpte = pte_swp_mksoft_dirty(newpte);
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 7fddafed3ebb..b19820a51e95 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -255,7 +255,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
if (!softleaf_is_migration(entry) ||
!check_pmd(softleaf_to_pfn(entry),
-
softleaf_is_device_private(entry),
+
softleaf_is_device_private(entry) ||
+
softleaf_is_migration_device_private(entry),
pvmw))
return not_found(pvmw);
return true;
--
2.34.1