Implement the release path that mirrors the add path: when the device asks for capacity back, the dax layer tears down the per-extent resources for the whole tag group atomically.
If any extent in the group is still mapped by a dev_dax, the release is refused with -EBUSY and no state changes; the cxl side then leaves the tag group intact and the device retries. Also add a rollback to the add path: if any per-extent registration fails midway through a group, undo the ones already added so a partial group never leaks into the dax region. Based on an original patch by Navneet Singh. Signed-off-by: Ira Weiny <[email protected]> Signed-off-by: Anisa Su <[email protected]> --- Changes: [anisa: split out from the original "Surface dc_extents" commit; fills in the RELEASE half of the bridge, moves the cxl-side RELEASE notify into this commit, and adds the rollback path to ADD.] --- drivers/cxl/core/extent.c | 13 +++++++++ drivers/dax/bus.c | 59 +++++++++++++++++++++++++++++++++++++++ drivers/dax/cxl.c | 54 +++++++++++++++++++++++++++-------- drivers/dax/dax-private.h | 8 ++++-- 4 files changed, 120 insertions(+), 14 deletions(-) diff --git a/drivers/cxl/core/extent.c b/drivers/cxl/core/extent.c index 3fc4b7292664..2c8edfe53c0a 100644 --- a/drivers/cxl/core/extent.c +++ b/drivers/cxl/core/extent.c @@ -532,6 +532,7 @@ int cxl_rm_extent(struct cxl_memdev_state *mds, struct cxl_extent *extent) struct range dpa_range; unsigned long idx; uuid_t tag; + int rc; dpa_range = (struct range) { .start = start_dpa, @@ -588,6 +589,18 @@ int cxl_rm_extent(struct cxl_memdev_state *mds, struct cxl_extent *extent) return -EINVAL; } + rc = cxlr_notify_extent(cxlr, DCD_RELEASE_CAPACITY, group); + if (rc) { + /* + * dax layer refused (-EBUSY) or failed (-ENOMEM, etc.). Do + * not proceed to tear down the tag group — leave its + * dax_resources alive so we do not free them out from under + * live dev_dax ranges. The device will retry the release. + */ + return 0; + } + + /* Release the entire tag group */ rm_tag_group(group); return 0; } diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index a6ee59f2d8a1..6368bdfdf93a 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -253,6 +253,65 @@ int dax_region_add_resource(struct dax_region *dax_region, } EXPORT_SYMBOL_GPL(dax_region_add_resource); +int dax_region_rm_resource(struct dax_region *dax_region, + struct device *dev) +{ + struct dax_resource *dax_resource; + + guard(rwsem_write)(&dax_region_rwsem); + + dax_resource = dev_get_drvdata(dev); + if (!dax_resource) + return 0; + + if (dax_resource->use_cnt) + return -EBUSY; + + /* + * release the resource under dax_region_rwsem to avoid races with + * users trying to use the extent + */ + __dax_release_resource(dax_resource); + dev_set_drvdata(dev, NULL); + return 0; +} +EXPORT_SYMBOL_GPL(dax_region_rm_resource); + +/** + * dax_region_rm_resources - atomically remove a set of dax_resources. + * + * Walk @devs twice under dax_region_rwsem. First pass refuses the + * operation if any member's use_cnt is non-zero; second pass releases + * each. This gives refuse-all-or-none semantics across the set, which + * a tag group's atomic release relies on. Devices with no + * dax_resource attached are silently skipped. + */ +int dax_region_rm_resources(struct dax_region *dax_region, + struct device * const *devs, unsigned int n) +{ + unsigned int i; + + guard(rwsem_write)(&dax_region_rwsem); + + for (i = 0; i < n; i++) { + struct dax_resource *r = dev_get_drvdata(devs[i]); + + if (r && r->use_cnt) + return -EBUSY; + } + + for (i = 0; i < n; i++) { + struct dax_resource *r = dev_get_drvdata(devs[i]); + + if (!r) + continue; + __dax_release_resource(r); + dev_set_drvdata(devs[i], NULL); + } + return 0; +} +EXPORT_SYMBOL_GPL(dax_region_rm_resources); + bool static_dev_dax(struct dev_dax *dev_dax) { return is_static(dev_dax->region); diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c index 690cf625e052..04b73315a8f2 100644 --- a/drivers/dax/cxl.c +++ b/drivers/dax/cxl.c @@ -44,19 +44,52 @@ static int cxl_dax_group_add(struct dax_region *dax_region, xa_for_each(&group->dc_extents, index, dc_extent) { rc = __cxl_dax_add_resource(dax_region, dc_extent); - if (rc) + if (rc) { + /* + * Unwind every dax_resource already added for this + * group; one rm per owner suffices. + */ + struct dc_extent *u; + unsigned long uidx; + + xa_for_each(&group->dc_extents, uidx, u) { + if (u == dc_extent) + break; + dax_region_rm_resource(dax_region, &u->dev); + } return rc; + } } return 0; } -/* - * RELEASE is still a stub here — the atomic dax_region_rm_resources API - * and its wire-up land in the next commit. An incoming RELEASE returns - * success and the cxl side proceeds to rm_tag_group(), which device- - * unregisters each dc_extent; the devm action armed by - * dax_region_add_resource() then tears down each dax_resource. - */ +static int cxl_dax_group_rm(struct dax_region *dax_region, + struct cxl_dc_tag_group *group) +{ + struct dc_extent *dc_extent; + struct device **devs; + unsigned long index; + unsigned int n = 0; + int rc; + + if (!group->nr_extents) + return 0; + + devs = kmalloc_array(group->nr_extents, sizeof(*devs), GFP_KERNEL); + if (!devs) + return -ENOMEM; + + xa_for_each(&group->dc_extents, index, dc_extent) { + if (n == group->nr_extents) + break; + devs[n++] = &dc_extent->dev; + } + + rc = dax_region_rm_resources(dax_region, devs, n); + kfree(devs); + return rc; +} + static int cxl_dax_region_notify(struct device *dev, struct cxl_notify_data *notify_data) { @@ -68,10 +101,7 @@ static int cxl_dax_region_notify(struct device *dev, case DCD_ADD_CAPACITY: return cxl_dax_group_add(dax_region, group); case DCD_RELEASE_CAPACITY: - dev_dbg(&cxlr_dax->dev, - "DCD RELEASE notify (tag %pUb): no-op (stub)\n", - &group->uuid); - return 0; + return cxl_dax_group_rm(dax_region, group); case DCD_FORCED_CAPACITY_RELEASE: default: dev_err(&cxlr_dax->dev, "Unknown DC event %d\n", diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index f2ae5918f94d..414813a6137f 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -146,13 +146,17 @@ struct dax_resource { }; /* - * Similar to run_dax() dax_region_add_resource() is exported but is not - * intended to be a generic operation outside the dax subsystem. It is only + * Similar to run_dax() dax_region_{add,rm}_resource() are exported but are not + * intended to be generic operations outside the dax subsystem. They are only * generic between the dax layer and the dax drivers. */ int dax_region_add_resource(struct dax_region *dax_region, struct device *dev, resource_size_t start, resource_size_t length, const uuid_t *tag, u16 seq_num); +int dax_region_rm_resource(struct dax_region *dax_region, + struct device *dev); +int dax_region_rm_resources(struct dax_region *dax_region, + struct device * const *devs, unsigned int n); static inline struct dev_dax *to_dev_dax(struct device *dev) { -- 2.43.0

