In the current kmem driver binding process, the only way for users
to define hotplug policy is via a build-time option, or by not
onlining memory by default and setting each individual memory block
online after hotplug occurs.  We can solve this with a configuration
step between region-probe and dax-probe.

Add the infrastructure for a two-stage driver binding for kmem-mode
dax regions. The cxl_dax_kmem_region driver probes cxl_sysram_region
devices and creates cxl_dax_region with dax_driver=kmem.

This creates an interposition step where users can configure policy.

Device hierarchy:
  region0 -> sysram_region0 -> dax_region0 -> dax0.0

The sysram_region device exposes a sysfs 'online_type' attribute
that allows users to configure the memory online type before the
underlying dax_region is created and memory is hotplugged.

  sysram_region0/online_type:
      invalid:        not configured, blocks probe
      offline:        memory will not be onlined automatically
      online:         memory will be onlined in ZONE_NORMAL
      online_movable: memory will be onlined in ZONE_MMOVABLE

The device initializes with online_type=invalid which prevents the
cxl_dax_kmem_region driver from binding until the user explicitly
configures a valid online_type.

This enables a two-step binding process:
  echo region0 > cxl_sysram_region/bind
  echo online_movable > sysram_region0/online_type
  echo sysram_region0 > cxl_dax_kmem_region/bind

Signed-off-by: Gregory Price <[email protected]>
---
 Documentation/ABI/testing/sysfs-bus-cxl |  21 +++
 drivers/cxl/core/Makefile               |   1 +
 drivers/cxl/core/core.h                 |   6 +
 drivers/cxl/core/dax_region.c           |  50 +++++++
 drivers/cxl/core/port.c                 |   2 +
 drivers/cxl/core/region.c               |  14 ++
 drivers/cxl/core/sysram_region.c        | 180 ++++++++++++++++++++++++
 drivers/cxl/cxl.h                       |  25 ++++
 8 files changed, 299 insertions(+)
 create mode 100644 drivers/cxl/core/sysram_region.c

diff --git a/Documentation/ABI/testing/sysfs-bus-cxl 
b/Documentation/ABI/testing/sysfs-bus-cxl
index c80a1b5a03db..a051cb86bdfc 100644
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -624,3 +624,24 @@ Description:
                The count is persistent across power loss and wraps back to 0
                upon overflow. If this file is not present, the device does not
                have the necessary support for dirty tracking.
+
+
+What:          /sys/bus/cxl/devices/sysram_regionZ/online_type
+Date:          January, 2026
+KernelVersion: v7.1
+Contact:       [email protected]
+Description:
+               (RW) This attribute allows users to configure the memory online
+               type before the underlying dax_region engages in hotplug.
+
+               Valid values:
+                 'invalid': Not configured (default). Blocks probe.
+                 'offline': Memory will not be onlined automatically.
+                 'online' : Memory will be onlined in ZONE_NORMAL.
+                 'online_movable': Memory will be onlined in ZONE_MOVABLE.
+
+               The device initializes with online_type='invalid' which prevents
+               the cxl_dax_kmem_region driver from binding until the user
+               explicitly configures a valid online_type. This enables a
+               two-step binding process that gives users control over memory
+               hotplug policy before memory is added to the system.
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 36f284d7c500..faf662c7d88b 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -18,6 +18,7 @@ cxl_core-y += ras.o
 cxl_core-$(CONFIG_TRACING) += trace.o
 cxl_core-$(CONFIG_CXL_REGION) += region.o
 cxl_core-$(CONFIG_CXL_REGION) += dax_region.o
+cxl_core-$(CONFIG_CXL_REGION) += sysram_region.o
 cxl_core-$(CONFIG_CXL_REGION) += pmem_region.o
 cxl_core-$(CONFIG_CXL_MCE) += mce.o
 cxl_core-$(CONFIG_CXL_FEATURES) += features.o
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index ea4df8abc2ad..04b32015e9b1 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -26,6 +26,7 @@ extern struct device_attribute dev_attr_delete_region;
 extern struct device_attribute dev_attr_region;
 extern const struct device_type cxl_pmem_region_type;
 extern const struct device_type cxl_dax_region_type;
+extern const struct device_type cxl_sysram_region_type;
 extern const struct device_type cxl_region_type;
 
 int cxl_decoder_detach(struct cxl_region *cxlr,
@@ -37,6 +38,7 @@ int cxl_decoder_detach(struct cxl_region *cxlr,
 #define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr),
 #define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type)
 #define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type)
+#define CXL_SYSRAM_REGION_TYPE(x) (&cxl_sysram_region_type)
 int cxl_region_init(void);
 void cxl_region_exit(void);
 int cxl_get_poison_by_endpoint(struct cxl_port *port);
@@ -44,9 +46,12 @@ struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev 
*cxlmd, u64 dpa);
 u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
                   u64 dpa);
 int devm_cxl_add_dax_region(struct cxl_region *cxlr, enum dax_driver_type);
+int devm_cxl_add_sysram_region(struct cxl_region *cxlr);
 int devm_cxl_add_pmem_region(struct cxl_region *cxlr);
 
 extern struct cxl_driver cxl_devdax_region_driver;
+extern struct cxl_driver cxl_dax_kmem_region_driver;
+extern struct cxl_driver cxl_sysram_region_driver;
 
 #else
 static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr,
@@ -81,6 +86,7 @@ static inline void cxl_region_exit(void)
 #define SET_CXL_REGION_ATTR(x)
 #define CXL_PMEM_REGION_TYPE(x) NULL
 #define CXL_DAX_REGION_TYPE(x) NULL
+#define CXL_SYSRAM_REGION_TYPE(x) NULL
 #endif
 
 struct cxl_send_command;
diff --git a/drivers/cxl/core/dax_region.c b/drivers/cxl/core/dax_region.c
index 391d51e5ec37..a379f5b85e3d 100644
--- a/drivers/cxl/core/dax_region.c
+++ b/drivers/cxl/core/dax_region.c
@@ -127,3 +127,53 @@ struct cxl_driver cxl_devdax_region_driver = {
        .probe = cxl_devdax_region_driver_probe,
        .id = CXL_DEVICE_REGION,
 };
+
+static int cxl_dax_kmem_region_driver_probe(struct device *dev)
+{
+       struct cxl_sysram_region *cxlr_sysram = to_cxl_sysram_region(dev);
+       struct cxl_dax_region *cxlr_dax;
+       struct cxl_region *cxlr;
+       int rc;
+
+       if (!cxlr_sysram)
+               return -ENODEV;
+
+       /* Require explicit online_type configuration before binding */
+       if (cxlr_sysram->online_type == -1)
+               return -ENODEV;
+
+       cxlr = cxlr_sysram->cxlr;
+
+       cxlr_dax = cxl_dax_region_alloc(cxlr);
+       if (IS_ERR(cxlr_dax))
+               return PTR_ERR(cxlr_dax);
+
+       /* Inherit online_type from parent sysram_region */
+       cxlr_dax->online_type = cxlr_sysram->online_type;
+       cxlr_dax->dax_driver = DAXDRV_KMEM_TYPE;
+
+       /* Parent is the sysram_region device */
+       cxlr_dax->dev.parent = dev;
+
+       rc = dev_set_name(&cxlr_dax->dev, "dax_region%d", cxlr->id);
+       if (rc)
+               goto err;
+
+       rc = device_add(&cxlr_dax->dev);
+       if (rc)
+               goto err;
+
+       dev_dbg(dev, "%s: register %s\n", dev_name(dev),
+               dev_name(&cxlr_dax->dev));
+
+       return devm_add_action_or_reset(dev, cxlr_dax_unregister, cxlr_dax);
+err:
+       put_device(&cxlr_dax->dev);
+       return rc;
+}
+
+struct cxl_driver cxl_dax_kmem_region_driver = {
+       .name = "cxl_dax_kmem_region",
+       .probe = cxl_dax_kmem_region_driver_probe,
+       .id = CXL_DEVICE_SYSRAM_REGION,
+};
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 3310dbfae9d6..dc7262a5efd6 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -66,6 +66,8 @@ static int cxl_device_id(const struct device *dev)
                return CXL_DEVICE_PMEM_REGION;
        if (dev->type == CXL_DAX_REGION_TYPE())
                return CXL_DEVICE_DAX_REGION;
+       if (dev->type == CXL_SYSRAM_REGION_TYPE())
+               return CXL_DEVICE_SYSRAM_REGION;
        if (is_cxl_port(dev)) {
                if (is_cxl_root(to_cxl_port(dev)))
                        return CXL_DEVICE_ROOT;
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 6200ca1cc2dd..8bef91dc726c 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -3734,8 +3734,20 @@ int cxl_region_init(void)
        if (rc)
                goto err_dax;
 
+       rc = cxl_driver_register(&cxl_sysram_region_driver);
+       if (rc)
+               goto err_sysram;
+
+       rc = cxl_driver_register(&cxl_dax_kmem_region_driver);
+       if (rc)
+               goto err_dax_kmem;
+
        return 0;
 
+err_dax_kmem:
+       cxl_driver_unregister(&cxl_sysram_region_driver);
+err_sysram:
+       cxl_driver_unregister(&cxl_devdax_region_driver);
 err_dax:
        cxl_driver_unregister(&cxl_region_driver);
        return rc;
@@ -3743,6 +3755,8 @@ int cxl_region_init(void)
 
 void cxl_region_exit(void)
 {
+       cxl_driver_unregister(&cxl_dax_kmem_region_driver);
+       cxl_driver_unregister(&cxl_sysram_region_driver);
        cxl_driver_unregister(&cxl_devdax_region_driver);
        cxl_driver_unregister(&cxl_region_driver);
 }
diff --git a/drivers/cxl/core/sysram_region.c b/drivers/cxl/core/sysram_region.c
new file mode 100644
index 000000000000..5665db238d0f
--- /dev/null
+++ b/drivers/cxl/core/sysram_region.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2026 Meta Platforms, Inc. All rights reserved. */
+/*
+ * CXL Sysram Region - Intermediate device for kmem hotplug configuration
+ *
+ * This provides an intermediate device between cxl_region and cxl_dax_region
+ * that allows users to configure memory hotplug parameters (like online_type)
+ * before the underlying dax_region is created and memory is hotplugged.
+ */
+
+#include <linux/memory_hotplug.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <cxlmem.h>
+#include <cxl.h>
+#include "core.h"
+
+static void cxl_sysram_region_release(struct device *dev)
+{
+       struct cxl_sysram_region *cxlr_sysram = to_cxl_sysram_region(dev);
+
+       kfree(cxlr_sysram);
+}
+
+static ssize_t online_type_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct cxl_sysram_region *cxlr_sysram = to_cxl_sysram_region(dev);
+
+       switch (cxlr_sysram->online_type) {
+       case MMOP_OFFLINE:
+               return sysfs_emit(buf, "offline\n");
+       case MMOP_ONLINE:
+               return sysfs_emit(buf, "online\n");
+       case MMOP_ONLINE_MOVABLE:
+               return sysfs_emit(buf, "online_movable\n");
+       default:
+               return sysfs_emit(buf, "invalid\n");
+       }
+}
+
+static ssize_t online_type_store(struct device *dev,
+                                struct device_attribute *attr,
+                                const char *buf, size_t len)
+{
+       struct cxl_sysram_region *cxlr_sysram = to_cxl_sysram_region(dev);
+
+       if (sysfs_streq(buf, "offline"))
+               cxlr_sysram->online_type = MMOP_OFFLINE;
+       else if (sysfs_streq(buf, "online"))
+               cxlr_sysram->online_type = MMOP_ONLINE;
+       else if (sysfs_streq(buf, "online_movable"))
+               cxlr_sysram->online_type = MMOP_ONLINE_MOVABLE;
+       else
+               return -EINVAL;
+
+       return len;
+}
+
+static DEVICE_ATTR_RW(online_type);
+
+static struct attribute *cxl_sysram_region_attrs[] = {
+       &dev_attr_online_type.attr,
+       NULL,
+};
+
+static const struct attribute_group cxl_sysram_region_attribute_group = {
+       .attrs = cxl_sysram_region_attrs,
+};
+
+static const struct attribute_group *cxl_sysram_region_attribute_groups[] = {
+       &cxl_base_attribute_group,
+       &cxl_sysram_region_attribute_group,
+       NULL,
+};
+
+const struct device_type cxl_sysram_region_type = {
+       .name = "cxl_sysram_region",
+       .release = cxl_sysram_region_release,
+       .groups = cxl_sysram_region_attribute_groups,
+};
+
+static bool is_cxl_sysram_region(struct device *dev)
+{
+       return dev->type == &cxl_sysram_region_type;
+}
+
+struct cxl_sysram_region *to_cxl_sysram_region(struct device *dev)
+{
+       if (dev_WARN_ONCE(dev, !is_cxl_sysram_region(dev),
+                         "not a cxl_sysram_region device\n"))
+               return NULL;
+       return container_of(dev, struct cxl_sysram_region, dev);
+}
+EXPORT_SYMBOL_NS_GPL(to_cxl_sysram_region, "CXL");
+
+static struct lock_class_key cxl_sysram_region_key;
+
+static struct cxl_sysram_region *cxl_sysram_region_alloc(struct cxl_region 
*cxlr)
+{
+       struct cxl_region_params *p = &cxlr->params;
+       struct cxl_sysram_region *cxlr_sysram;
+       struct device *dev;
+
+       guard(rwsem_read)(&cxl_rwsem.region);
+       if (p->state != CXL_CONFIG_COMMIT)
+               return ERR_PTR(-ENXIO);
+
+       cxlr_sysram = kzalloc(sizeof(*cxlr_sysram), GFP_KERNEL);
+       if (!cxlr_sysram)
+               return ERR_PTR(-ENOMEM);
+
+       cxlr_sysram->hpa_range.start = p->res->start;
+       cxlr_sysram->hpa_range.end = p->res->end;
+       cxlr_sysram->online_type = -1;  /* Require explicit configuration */
+
+       dev = &cxlr_sysram->dev;
+       cxlr_sysram->cxlr = cxlr;
+       device_initialize(dev);
+       lockdep_set_class(&dev->mutex, &cxl_sysram_region_key);
+       device_set_pm_not_required(dev);
+       dev->parent = &cxlr->dev;
+       dev->bus = &cxl_bus_type;
+       dev->type = &cxl_sysram_region_type;
+
+       return cxlr_sysram;
+}
+
+static void cxlr_sysram_unregister(void *_cxlr_sysram)
+{
+       struct cxl_sysram_region *cxlr_sysram = _cxlr_sysram;
+
+       device_unregister(&cxlr_sysram->dev);
+}
+
+int devm_cxl_add_sysram_region(struct cxl_region *cxlr)
+{
+       struct cxl_sysram_region *cxlr_sysram;
+       struct device *dev;
+       int rc;
+
+       cxlr_sysram = cxl_sysram_region_alloc(cxlr);
+       if (IS_ERR(cxlr_sysram))
+               return PTR_ERR(cxlr_sysram);
+
+       dev = &cxlr_sysram->dev;
+       rc = dev_set_name(dev, "sysram_region%d", cxlr->id);
+       if (rc)
+               goto err;
+
+       rc = device_add(dev);
+       if (rc)
+               goto err;
+
+       dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
+               dev_name(dev));
+
+       return devm_add_action_or_reset(&cxlr->dev, cxlr_sysram_unregister,
+                                       cxlr_sysram);
+err:
+       put_device(dev);
+       return rc;
+}
+
+static int cxl_sysram_region_driver_probe(struct device *dev)
+{
+       struct cxl_region *cxlr = to_cxl_region(dev);
+
+       /* Only handle RAM regions */
+       if (cxlr->mode != CXL_PARTMODE_RAM)
+               return -ENODEV;
+
+       return devm_cxl_add_sysram_region(cxlr);
+}
+
+struct cxl_driver cxl_sysram_region_driver = {
+       .name = "cxl_sysram_region",
+       .probe = cxl_sysram_region_driver_probe,
+       .id = CXL_DEVICE_REGION,
+};
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 674d5f870c70..1544c27e9c89 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -596,6 +596,25 @@ struct cxl_dax_region {
        enum dax_driver_type dax_driver;
 };
 
+/**
+ * struct cxl_sysram_region - CXL RAM region for system memory hotplug
+ * @dev: device for this sysram_region
+ * @cxlr: parent cxl_region
+ * @hpa_range: Host physical address range for the region
+ * @online_type: Memory online type (MMOP_* 0-3, or -1 if not configured)
+ *
+ * Intermediate device that allows configuration of memory hotplug
+ * parameters before the underlying dax_region is created. The device
+ * starts with online_type=-1 which prevents the cxl_dax_kmem_region
+ * driver from binding until the user explicitly sets online_type.
+ */
+struct cxl_sysram_region {
+       struct device dev;
+       struct cxl_region *cxlr;
+       struct range hpa_range;
+       int online_type;
+};
+
 /**
  * struct cxl_port - logical collection of upstream port devices and
  *                  downstream port devices to construct a CXL memory
@@ -890,6 +909,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv);
 #define CXL_DEVICE_PMEM_REGION         7
 #define CXL_DEVICE_DAX_REGION          8
 #define CXL_DEVICE_PMU                 9
+#define CXL_DEVICE_SYSRAM_REGION       10
 
 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*")
 #define CXL_MODALIAS_FMT "cxl:t%d"
@@ -907,6 +927,7 @@ bool is_cxl_pmem_region(struct device *dev);
 struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);
 int cxl_add_to_region(struct cxl_endpoint_decoder *cxled);
 struct cxl_dax_region *to_cxl_dax_region(struct device *dev);
+struct cxl_sysram_region *to_cxl_sysram_region(struct device *dev);
 u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa);
 #else
 static inline bool is_cxl_pmem_region(struct device *dev)
@@ -925,6 +946,10 @@ static inline struct cxl_dax_region 
*to_cxl_dax_region(struct device *dev)
 {
        return NULL;
 }
+static inline struct cxl_sysram_region *to_cxl_sysram_region(struct device 
*dev)
+{
+       return NULL;
+}
 static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint,
                                               u64 spa)
 {
-- 
2.52.0


Reply via email to