On 1/14/26 09:51, Gregory Price wrote:
Add new memory hotplug APIs that allow callers to explicitly control
the online type when adding or managing memory:

   - Extend add_memory_driver_managed() with an online_type parameter:
     Callers can now specify MMOP_ONLINE, MMOP_ONLINE_KERNEL, or
     MMOP_ONLINE_MOVABLE to online with that type, MMOP_OFFLINE to leave
     memory offline, or MMOP_SYSTEM_DEFAULT to use the system default
     policy. Update virtio_mem to pass MMOP_SYSTEM_DEFAULT to maintain
     existing behavior.

I wonder if we rather want to add a new interface (add_and_online_memory_driver_managed()) where we can restrict it to known kernel modules that do not violate user-space onlining policies.

For dax we know that user space will define the policy.


   - online_memory_range(): online a previously-added memory range with
     a specified online type (MMOP_ONLINE, MMOP_ONLINE_KERNEL, or
     MMOP_ONLINE_MOVABLE). Validates that the type is valid for onlining.

Why not simply online_memory() and offline_memory() ?


   - offline_memory(): offline a memory range without removing it. This
     is a wrapper around the internal __offline_memory() that handles
     locking. Useful for drivers that want to offline memory blocks
     before performing other operations.


These two should be not exported to arbitrary kernel modules. Use EXPORT_SYMBOL_FOR_MODULES() if required, or do not export them at all.

These APIs enable drivers like dax_kmem to implement sophisticated
memory management policies, such as adding memory offline and deferring
the online decision to userspace.

Signed-off-by: Gregory Price <[email protected]>
---
  drivers/dax/kmem.c             |  3 +-
  drivers/virtio/virtio_mem.c    |  3 +-
  include/linux/memory_hotplug.h |  4 ++-
  mm/memory_hotplug.c            | 63 ++++++++++++++++++++++++++++++++--
  4 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
index c036e4d0b610..5e0cf94a9620 100644
--- a/drivers/dax/kmem.c
+++ b/drivers/dax/kmem.c
@@ -175,7 +175,8 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
                 * this as RAM automatically.
                 */
                rc = add_memory_driver_managed(data->mgid, range.start,
-                               range_len(&range), kmem_name, mhp_flags);
+                               range_len(&range), kmem_name, mhp_flags,
+                               MMOP_SYSTEM_DEFAULT);
if (rc) {
                        dev_warn(dev, "mapping%d: %#llx-%#llx memory add 
failed\n",
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 1688ecd69a04..b1ec8f2b9e31 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -654,7 +654,8 @@ static int virtio_mem_add_memory(struct virtio_mem *vm, 
uint64_t addr,
        /* Memory might get onlined immediately. */
        atomic64_add(size, &vm->offline_size);
        rc = add_memory_driver_managed(vm->mgid, addr, size, vm->resource_name,
-                                      MHP_MERGE_RESOURCE | MHP_NID_IS_MGID);
+                                      MHP_MERGE_RESOURCE | MHP_NID_IS_MGID,
+                                      MMOP_SYSTEM_DEFAULT);
        if (rc) {
                atomic64_sub(size, &vm->offline_size);
                dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index d5407264d72a..0f98bea6da65 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -265,6 +265,7 @@ static inline void pgdat_resize_init(struct pglist_data 
*pgdat) {}
  extern void try_offline_node(int nid);
  extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
                         struct zone *zone, struct memory_group *group);
+extern int offline_memory(u64 start, u64 size);

No new "extern" for functions.

  extern int remove_memory(u64 start, u64 size);
  extern void __remove_memory(u64 start, u64 size);
  extern int offline_and_remove_memory(u64 start, u64 size);
@@ -297,7 +298,8 @@ extern int add_memory_resource(int nid, struct resource 
*resource,
                               mhp_t mhp_flags);
  extern int add_memory_driver_managed(int nid, u64 start, u64 size,
                                     const char *resource_name,
-                                    mhp_t mhp_flags);
+                                    mhp_t mhp_flags, int online_type);
+extern int online_memory_range(u64 start, u64 size, int online_type);
  extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
                                   unsigned long nr_pages,
                                   struct vmem_altmap *altmap, int migratetype,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index ab73c8fcc0f1..515ff9d18039 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1343,6 +1343,34 @@ static int online_memory_block(struct memory_block *mem, 
void *arg)
        return device_online(&mem->dev);
  }
+/**
+ * online_memory_range - online memory blocks in a range
+ * @start: physical start address of memory region
+ * @size: size of memory region
+ * @online_type: MMOP_ONLINE, MMOP_ONLINE_KERNEL, or MMOP_ONLINE_MOVABLE

I wonder if we instead want something that consumes all parameters like

int online_or_offline_memory(int online_type)

Then it's easier to use and we don't really have to document the "online_type" that much to hand-select some values.

(I'm sure there are better nameing suggestions :) )


Should we document what happens if the memory is already online, but was onlined to a different zone?

+ *
+ * Online all memory blocks in the specified range with the given online type.
+ * The memory must have already been added to the system.
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+int online_memory_range(u64 start, u64 size, int online_type)
+{
+       int rc;
+
+       if (online_type == MMOP_OFFLINE ||
+           online_type > MMOP_ONLINE_MOVABLE)
+               return -EINVAL;
+
+       lock_device_hotplug();
+       rc = walk_memory_blocks(start, size, &online_type,
+                               online_memory_block);
+       unlock_device_hotplug();
+
+       return rc;
+}
+EXPORT_SYMBOL_GPL(online_memory_range);
+
  #ifndef arch_supports_memmap_on_memory
  static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size)
  {
@@ -1656,9 +1684,16 @@ EXPORT_SYMBOL_GPL(add_memory);
   *
   * The resource_name (visible via /proc/iomem) has to have the format
   * "System RAM ($DRIVER)".
+ *
+ * @online_type specifies the online behavior: MMOP_ONLINE, MMOP_ONLINE_KERNEL,
+ * MMOP_ONLINE_MOVABLE to online with that type, MMOP_OFFLINE to leave offline,
+ * or MMOP_SYSTEM_DEFAULT to use the system default policy.
+ *

I think we can simplify this documentation. Especially, one MMOP_SYSTEM_DEFAULT is gone.

+ * Returns 0 on success, negative error code on failure.
   */
  int add_memory_driver_managed(int nid, u64 start, u64 size,
-                             const char *resource_name, mhp_t mhp_flags)
+                             const char *resource_name, mhp_t mhp_flags,
+                             int online_type)
  {
        struct resource *res;
        int rc;
@@ -1668,6 +1703,13 @@ int add_memory_driver_managed(int nid, u64 start, u64 
size,
            resource_name[strlen(resource_name) - 1] != ')')
                return -EINVAL;
+ /* Convert system default to actual online type */
+       if (online_type == MMOP_SYSTEM_DEFAULT)
+               online_type = mhp_get_default_online_type();
+
+       if (online_type < 0 || online_type > MMOP_ONLINE_MOVABLE)
+               return -EINVAL;
+
        lock_device_hotplug();
res = register_memory_resource(start, size, resource_name);
@@ -1676,7 +1718,7 @@ int add_memory_driver_managed(int nid, u64 start, u64 
size,
                goto out_unlock;
        }
- rc = add_memory_resource(nid, res, mhp_flags);
+       rc = __add_memory_resource(nid, res, mhp_flags, online_type);
        if (rc < 0)
                release_memory_resource(res);
@@ -2412,6 +2454,23 @@ static int __offline_memory(u64 start, u64 size)
        return rc;
  }
+/*
+ * Try to offline a memory range. Might take a long time to finish in case
+ * memory is still in use. In case of failure, already offlined memory blocks
+ * will be re-onlined.
+ */

Proper kerneldoc? :)

+int offline_memory(u64 start, u64 size)
+{
+       int rc;
+
+       lock_device_hotplug();
+       rc = __offline_memory(start, size);
+       unlock_device_hotplug();
+
+       return rc;
+}
+EXPORT_SYMBOL_GPL(offline_memory);
+
  /*
   * Try to offline and remove memory. Might take a long time to finish in case
   * memory is still in use. Primarily useful for memory devices that logically


--
Cheers

David

Reply via email to