On 1/14/26 09:51, Gregory Price wrote:
Add new memory hotplug APIs that allow callers to explicitly control
the online type when adding or managing memory:
- Extend add_memory_driver_managed() with an online_type parameter:
Callers can now specify MMOP_ONLINE, MMOP_ONLINE_KERNEL, or
MMOP_ONLINE_MOVABLE to online with that type, MMOP_OFFLINE to leave
memory offline, or MMOP_SYSTEM_DEFAULT to use the system default
policy. Update virtio_mem to pass MMOP_SYSTEM_DEFAULT to maintain
existing behavior.
I wonder if we rather want to add a new interface
(add_and_online_memory_driver_managed()) where we can restrict it to
known kernel modules that do not violate user-space onlining policies.
For dax we know that user space will define the policy.
- online_memory_range(): online a previously-added memory range with
a specified online type (MMOP_ONLINE, MMOP_ONLINE_KERNEL, or
MMOP_ONLINE_MOVABLE). Validates that the type is valid for onlining.
Why not simply online_memory() and offline_memory() ?
- offline_memory(): offline a memory range without removing it. This
is a wrapper around the internal __offline_memory() that handles
locking. Useful for drivers that want to offline memory blocks
before performing other operations.
These two should be not exported to arbitrary kernel modules. Use
EXPORT_SYMBOL_FOR_MODULES() if required, or do not export them at all.
These APIs enable drivers like dax_kmem to implement sophisticated
memory management policies, such as adding memory offline and deferring
the online decision to userspace.
Signed-off-by: Gregory Price <[email protected]>
---
drivers/dax/kmem.c | 3 +-
drivers/virtio/virtio_mem.c | 3 +-
include/linux/memory_hotplug.h | 4 ++-
mm/memory_hotplug.c | 63 ++++++++++++++++++++++++++++++++--
4 files changed, 68 insertions(+), 5 deletions(-)
diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
index c036e4d0b610..5e0cf94a9620 100644
--- a/drivers/dax/kmem.c
+++ b/drivers/dax/kmem.c
@@ -175,7 +175,8 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
* this as RAM automatically.
*/
rc = add_memory_driver_managed(data->mgid, range.start,
- range_len(&range), kmem_name, mhp_flags);
+ range_len(&range), kmem_name, mhp_flags,
+ MMOP_SYSTEM_DEFAULT);
if (rc) {
dev_warn(dev, "mapping%d: %#llx-%#llx memory add
failed\n",
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 1688ecd69a04..b1ec8f2b9e31 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -654,7 +654,8 @@ static int virtio_mem_add_memory(struct virtio_mem *vm,
uint64_t addr,
/* Memory might get onlined immediately. */
atomic64_add(size, &vm->offline_size);
rc = add_memory_driver_managed(vm->mgid, addr, size, vm->resource_name,
- MHP_MERGE_RESOURCE | MHP_NID_IS_MGID);
+ MHP_MERGE_RESOURCE | MHP_NID_IS_MGID,
+ MMOP_SYSTEM_DEFAULT);
if (rc) {
atomic64_sub(size, &vm->offline_size);
dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index d5407264d72a..0f98bea6da65 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -265,6 +265,7 @@ static inline void pgdat_resize_init(struct pglist_data
*pgdat) {}
extern void try_offline_node(int nid);
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
struct zone *zone, struct memory_group *group);
+extern int offline_memory(u64 start, u64 size);
No new "extern" for functions.
extern int remove_memory(u64 start, u64 size);
extern void __remove_memory(u64 start, u64 size);
extern int offline_and_remove_memory(u64 start, u64 size);
@@ -297,7 +298,8 @@ extern int add_memory_resource(int nid, struct resource
*resource,
mhp_t mhp_flags);
extern int add_memory_driver_managed(int nid, u64 start, u64 size,
const char *resource_name,
- mhp_t mhp_flags);
+ mhp_t mhp_flags, int online_type);
+extern int online_memory_range(u64 start, u64 size, int online_type);
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages,
struct vmem_altmap *altmap, int migratetype,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ab73c8fcc0f1..515ff9d18039 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1343,6 +1343,34 @@ static int online_memory_block(struct memory_block *mem,
void *arg)
return device_online(&mem->dev);
}
+/**
+ * online_memory_range - online memory blocks in a range
+ * @start: physical start address of memory region
+ * @size: size of memory region
+ * @online_type: MMOP_ONLINE, MMOP_ONLINE_KERNEL, or MMOP_ONLINE_MOVABLE
I wonder if we instead want something that consumes all parameters like
int online_or_offline_memory(int online_type)
Then it's easier to use and we don't really have to document the
"online_type" that much to hand-select some values.
(I'm sure there are better nameing suggestions :) )
Should we document what happens if the memory is already online, but was
onlined to a different zone?
+ *
+ * Online all memory blocks in the specified range with the given online type.
+ * The memory must have already been added to the system.
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+int online_memory_range(u64 start, u64 size, int online_type)
+{
+ int rc;
+
+ if (online_type == MMOP_OFFLINE ||
+ online_type > MMOP_ONLINE_MOVABLE)
+ return -EINVAL;
+
+ lock_device_hotplug();
+ rc = walk_memory_blocks(start, size, &online_type,
+ online_memory_block);
+ unlock_device_hotplug();
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(online_memory_range);
+
#ifndef arch_supports_memmap_on_memory
static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size)
{
@@ -1656,9 +1684,16 @@ EXPORT_SYMBOL_GPL(add_memory);
*
* The resource_name (visible via /proc/iomem) has to have the format
* "System RAM ($DRIVER)".
+ *
+ * @online_type specifies the online behavior: MMOP_ONLINE, MMOP_ONLINE_KERNEL,
+ * MMOP_ONLINE_MOVABLE to online with that type, MMOP_OFFLINE to leave offline,
+ * or MMOP_SYSTEM_DEFAULT to use the system default policy.
+ *
I think we can simplify this documentation. Especially, one
MMOP_SYSTEM_DEFAULT is gone.
+ * Returns 0 on success, negative error code on failure.
*/
int add_memory_driver_managed(int nid, u64 start, u64 size,
- const char *resource_name, mhp_t mhp_flags)
+ const char *resource_name, mhp_t mhp_flags,
+ int online_type)
{
struct resource *res;
int rc;
@@ -1668,6 +1703,13 @@ int add_memory_driver_managed(int nid, u64 start, u64
size,
resource_name[strlen(resource_name) - 1] != ')')
return -EINVAL;
+ /* Convert system default to actual online type */
+ if (online_type == MMOP_SYSTEM_DEFAULT)
+ online_type = mhp_get_default_online_type();
+
+ if (online_type < 0 || online_type > MMOP_ONLINE_MOVABLE)
+ return -EINVAL;
+
lock_device_hotplug();
res = register_memory_resource(start, size, resource_name);
@@ -1676,7 +1718,7 @@ int add_memory_driver_managed(int nid, u64 start, u64
size,
goto out_unlock;
}
- rc = add_memory_resource(nid, res, mhp_flags);
+ rc = __add_memory_resource(nid, res, mhp_flags, online_type);
if (rc < 0)
release_memory_resource(res);
@@ -2412,6 +2454,23 @@ static int __offline_memory(u64 start, u64 size)
return rc;
}
+/*
+ * Try to offline a memory range. Might take a long time to finish in case
+ * memory is still in use. In case of failure, already offlined memory blocks
+ * will be re-onlined.
+ */
Proper kerneldoc? :)
+int offline_memory(u64 start, u64 size)
+{
+ int rc;
+
+ lock_device_hotplug();
+ rc = __offline_memory(start, size);
+ unlock_device_hotplug();
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(offline_memory);
+
/*
* Try to offline and remove memory. Might take a long time to finish in case
* memory is still in use. Primarily useful for memory devices that logically
--
Cheers
David