Previously, dax_hmem deferred to CXL only when an immediate resource intersection with a CXL window was detected. This left a gap: if cxl_acpi or cxl_pci probing or region assembly had not yet started, hmem could prematurely claim ranges.
Fix this by introducing a dax_cxl_mode state machine and a deferred work mechanism. The new workqueue delays consideration of Soft Reserved overlaps until the CXL subsystem has had a chance to complete its discovery and region assembly. This avoids premature iomem claims, eliminates race conditions with async cxl_pci probe, and provides a cleaner handoff between hmem and CXL resource management. Signed-off-by: Smita Koralahalli <smita.koralahallichannabasa...@amd.com> Signed-off-by: Dan Williams <dan.j.willi...@intel.com> --- drivers/dax/hmem/hmem.c | 72 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 2 deletions(-) diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index 7ada820cb177..90978518e5f4 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -58,9 +58,45 @@ static void release_hmem(void *pdev) platform_device_unregister(pdev); } +static enum dax_cxl_mode { + DAX_CXL_MODE_DEFER, + DAX_CXL_MODE_REGISTER, + DAX_CXL_MODE_DROP, +} dax_cxl_mode; + +static int handle_deferred_cxl(struct device *host, int target_nid, + const struct resource *res) +{ + if (region_intersects(res->start, resource_size(res), IORESOURCE_MEM, + IORES_DESC_CXL) != REGION_DISJOINT) { + if (dax_cxl_mode == DAX_CXL_MODE_DROP) + dev_dbg(host, "dropping CXL range: %pr\n", res); + } + return 0; +} + +struct dax_defer_work { + struct platform_device *pdev; + struct work_struct work; +}; + +static void process_defer_work(struct work_struct *_work) +{ + struct dax_defer_work *work = container_of(_work, typeof(*work), work); + struct platform_device *pdev = work->pdev; + + /* relies on cxl_acpi and cxl_pci having had a chance to load */ + wait_for_device_probe(); + + dax_cxl_mode = DAX_CXL_MODE_DROP; + + walk_hmem_resources(&pdev->dev, handle_deferred_cxl); +} + static int hmem_register_device(struct device *host, int target_nid, const struct resource *res) { + struct dax_defer_work *work = dev_get_drvdata(host); struct platform_device *pdev; struct memregion_info info; long id; @@ -69,8 +105,18 @@ static int hmem_register_device(struct device *host, int target_nid, if (IS_ENABLED(CONFIG_DEV_DAX_CXL) && region_intersects(res->start, resource_size(res), IORESOURCE_MEM, IORES_DESC_CXL) != REGION_DISJOINT) { - dev_dbg(host, "deferring range to CXL: %pr\n", res); - return 0; + switch (dax_cxl_mode) { + case DAX_CXL_MODE_DEFER: + dev_dbg(host, "deferring range to CXL: %pr\n", res); + schedule_work(&work->work); + return 0; + case DAX_CXL_MODE_REGISTER: + dev_dbg(host, "registering CXL range: %pr\n", res); + break; + case DAX_CXL_MODE_DROP: + dev_dbg(host, "dropping CXL range: %pr\n", res); + return 0; + } } #ifdef CONFIG_EFI_SOFT_RESERVE @@ -130,8 +176,30 @@ static int hmem_register_device(struct device *host, int target_nid, return rc; } +static void kill_defer_work(void *_work) +{ + struct dax_defer_work *work = container_of(_work, typeof(*work), work); + + cancel_work_sync(&work->work); + kfree(work); +} + static int dax_hmem_platform_probe(struct platform_device *pdev) { + struct dax_defer_work *work = kzalloc(sizeof(*work), GFP_KERNEL); + int rc; + + if (!work) + return -ENOMEM; + + work->pdev = pdev; + INIT_WORK(&work->work, process_defer_work); + + rc = devm_add_action_or_reset(&pdev->dev, kill_defer_work, work); + if (rc) + return rc; + + platform_set_drvdata(pdev, work); return walk_hmem_resources(&pdev->dev, hmem_register_device); } -- 2.17.1