Previously, dax_hmem deferred to CXL only when an immediate resource
intersection with a CXL window was detected. This left a gap: if cxl_acpi
or cxl_pci probing or region assembly had not yet started, hmem could
prematurely claim ranges.

Fix this by introducing a dax_cxl_mode state machine and a deferred
work mechanism.

The new workqueue delays consideration of Soft Reserved overlaps until
the CXL subsystem has had a chance to complete its discovery and region
assembly. This avoids premature iomem claims, eliminates race conditions
with async cxl_pci probe, and provides a cleaner handoff between hmem and
CXL resource management.

Signed-off-by: Smita Koralahalli <smita.koralahallichannabasa...@amd.com>
Signed-off-by: Dan Williams <dan.j.willi...@intel.com>
---
 drivers/dax/hmem/hmem.c | 72 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 70 insertions(+), 2 deletions(-)

diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
index 7ada820cb177..90978518e5f4 100644
--- a/drivers/dax/hmem/hmem.c
+++ b/drivers/dax/hmem/hmem.c
@@ -58,9 +58,45 @@ static void release_hmem(void *pdev)
        platform_device_unregister(pdev);
 }
 
+static enum dax_cxl_mode {
+       DAX_CXL_MODE_DEFER,
+       DAX_CXL_MODE_REGISTER,
+       DAX_CXL_MODE_DROP,
+} dax_cxl_mode;
+
+static int handle_deferred_cxl(struct device *host, int target_nid,
+                               const struct resource *res)
+{
+       if (region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
+                             IORES_DESC_CXL) != REGION_DISJOINT) {
+               if (dax_cxl_mode == DAX_CXL_MODE_DROP)
+                       dev_dbg(host, "dropping CXL range: %pr\n", res);
+       }
+       return 0;
+}
+
+struct dax_defer_work {
+       struct platform_device *pdev;
+       struct work_struct work;
+};
+
+static void process_defer_work(struct work_struct *_work)
+{
+       struct dax_defer_work *work = container_of(_work, typeof(*work), work);
+       struct platform_device *pdev = work->pdev;
+
+       /* relies on cxl_acpi and cxl_pci having had a chance to load */
+       wait_for_device_probe();
+
+       dax_cxl_mode = DAX_CXL_MODE_DROP;
+
+       walk_hmem_resources(&pdev->dev, handle_deferred_cxl);
+}
+
 static int hmem_register_device(struct device *host, int target_nid,
                                const struct resource *res)
 {
+       struct dax_defer_work *work = dev_get_drvdata(host);
        struct platform_device *pdev;
        struct memregion_info info;
        long id;
@@ -69,8 +105,18 @@ static int hmem_register_device(struct device *host, int 
target_nid,
        if (IS_ENABLED(CONFIG_DEV_DAX_CXL) &&
            region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
                              IORES_DESC_CXL) != REGION_DISJOINT) {
-               dev_dbg(host, "deferring range to CXL: %pr\n", res);
-               return 0;
+               switch (dax_cxl_mode) {
+               case DAX_CXL_MODE_DEFER:
+                       dev_dbg(host, "deferring range to CXL: %pr\n", res);
+                       schedule_work(&work->work);
+                       return 0;
+               case DAX_CXL_MODE_REGISTER:
+                       dev_dbg(host, "registering CXL range: %pr\n", res);
+                       break;
+               case DAX_CXL_MODE_DROP:
+                       dev_dbg(host, "dropping CXL range: %pr\n", res);
+                       return 0;
+               }
        }
 
 #ifdef CONFIG_EFI_SOFT_RESERVE
@@ -130,8 +176,30 @@ static int hmem_register_device(struct device *host, int 
target_nid,
        return rc;
 }
 
+static void kill_defer_work(void *_work)
+{
+       struct dax_defer_work *work = container_of(_work, typeof(*work), work);
+
+       cancel_work_sync(&work->work);
+       kfree(work);
+}
+
 static int dax_hmem_platform_probe(struct platform_device *pdev)
 {
+       struct dax_defer_work *work = kzalloc(sizeof(*work), GFP_KERNEL);
+       int rc;
+
+       if (!work)
+               return -ENOMEM;
+
+       work->pdev = pdev;
+       INIT_WORK(&work->work, process_defer_work);
+
+       rc = devm_add_action_or_reset(&pdev->dev, kill_defer_work, work);
+       if (rc)
+               return rc;
+
+       platform_set_drvdata(pdev, work);
        return walk_hmem_resources(&pdev->dev, hmem_register_device);
 }
 
-- 
2.17.1


Reply via email to