Implement the release path that mirrors the add path: when the
device asks for capacity back, the dax layer tears down the
per-extent resources for the whole tag group atomically.

If any extent in the group is still mapped by a dev_dax, the release
is refused with -EBUSY and no state changes; the cxl side then leaves
the tag group intact and the device retries.

Also add a rollback to the add path: if any per-extent registration
fails midway through a group, undo the ones already added so a
partial group never leaks into the dax region.

Based on an original patch by Navneet Singh.

Signed-off-by: Ira Weiny <[email protected]>
Signed-off-by: Anisa Su <[email protected]>

---
Changes:
[anisa: split out from the original "Surface dc_extents" commit;
 fills in the RELEASE half of the bridge, moves the cxl-side RELEASE
 notify into this commit, and adds the rollback path to ADD.]
---
 drivers/cxl/core/extent.c | 13 +++++++++
 drivers/dax/bus.c         | 59 +++++++++++++++++++++++++++++++++++++++
 drivers/dax/cxl.c         | 54 +++++++++++++++++++++++++++--------
 drivers/dax/dax-private.h |  8 ++++--
 4 files changed, 120 insertions(+), 14 deletions(-)

diff --git a/drivers/cxl/core/extent.c b/drivers/cxl/core/extent.c
index 3fc4b7292664..2c8edfe53c0a 100644
--- a/drivers/cxl/core/extent.c
+++ b/drivers/cxl/core/extent.c
@@ -532,6 +532,7 @@ int cxl_rm_extent(struct cxl_memdev_state *mds, struct 
cxl_extent *extent)
        struct range dpa_range;
        unsigned long idx;
        uuid_t tag;
+       int rc;
 
        dpa_range = (struct range) {
                .start = start_dpa,
@@ -588,6 +589,18 @@ int cxl_rm_extent(struct cxl_memdev_state *mds, struct 
cxl_extent *extent)
                return -EINVAL;
        }
 
+       rc = cxlr_notify_extent(cxlr, DCD_RELEASE_CAPACITY, group);
+       if (rc) {
+               /*
+                * dax layer refused (-EBUSY) or failed (-ENOMEM, etc.).  Do
+                * not proceed to tear down the tag group — leave its
+                * dax_resources alive so we do not free them out from under
+                * live dev_dax ranges.  The device will retry the release.
+                */
+               return 0;
+       }
+
+       /* Release the entire tag group */
        rm_tag_group(group);
        return 0;
 }
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index a6ee59f2d8a1..6368bdfdf93a 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -253,6 +253,65 @@ int dax_region_add_resource(struct dax_region *dax_region,
 }
 EXPORT_SYMBOL_GPL(dax_region_add_resource);
 
+int dax_region_rm_resource(struct dax_region *dax_region,
+                          struct device *dev)
+{
+       struct dax_resource *dax_resource;
+
+       guard(rwsem_write)(&dax_region_rwsem);
+
+       dax_resource = dev_get_drvdata(dev);
+       if (!dax_resource)
+               return 0;
+
+       if (dax_resource->use_cnt)
+               return -EBUSY;
+
+       /*
+        * release the resource under dax_region_rwsem to avoid races with
+        * users trying to use the extent
+        */
+       __dax_release_resource(dax_resource);
+       dev_set_drvdata(dev, NULL);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(dax_region_rm_resource);
+
+/**
+ * dax_region_rm_resources - atomically remove a set of dax_resources.
+ *
+ * Walk @devs twice under dax_region_rwsem.  First pass refuses the
+ * operation if any member's use_cnt is non-zero; second pass releases
+ * each.  This gives refuse-all-or-none semantics across the set, which
+ * a tag group's atomic release relies on.  Devices with no
+ * dax_resource attached are silently skipped.
+ */
+int dax_region_rm_resources(struct dax_region *dax_region,
+                           struct device * const *devs, unsigned int n)
+{
+       unsigned int i;
+
+       guard(rwsem_write)(&dax_region_rwsem);
+
+       for (i = 0; i < n; i++) {
+               struct dax_resource *r = dev_get_drvdata(devs[i]);
+
+               if (r && r->use_cnt)
+                       return -EBUSY;
+       }
+
+       for (i = 0; i < n; i++) {
+               struct dax_resource *r = dev_get_drvdata(devs[i]);
+
+               if (!r)
+                       continue;
+               __dax_release_resource(r);
+               dev_set_drvdata(devs[i], NULL);
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(dax_region_rm_resources);
+
 bool static_dev_dax(struct dev_dax *dev_dax)
 {
        return is_static(dev_dax->region);
diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c
index 690cf625e052..04b73315a8f2 100644
--- a/drivers/dax/cxl.c
+++ b/drivers/dax/cxl.c
@@ -44,19 +44,52 @@ static int cxl_dax_group_add(struct dax_region *dax_region,
 
        xa_for_each(&group->dc_extents, index, dc_extent) {
                rc = __cxl_dax_add_resource(dax_region, dc_extent);
-               if (rc)
+               if (rc) {
+                       /*
+                        * Unwind every dax_resource already added for this
+                        * group; one rm per owner suffices.
+                        */
+                       struct dc_extent *u;
+                       unsigned long uidx;
+
+                       xa_for_each(&group->dc_extents, uidx, u) {
+                               if (u == dc_extent)
+                                       break;
+                               dax_region_rm_resource(dax_region, &u->dev);
+                       }
                        return rc;
+               }
        }
        return 0;
 }
 
-/*
- * RELEASE is still a stub here — the atomic dax_region_rm_resources API
- * and its wire-up land in the next commit.  An incoming RELEASE returns
- * success and the cxl side proceeds to rm_tag_group(), which device-
- * unregisters each dc_extent; the devm action armed by
- * dax_region_add_resource() then tears down each dax_resource.
- */
+static int cxl_dax_group_rm(struct dax_region *dax_region,
+                           struct cxl_dc_tag_group *group)
+{
+       struct dc_extent *dc_extent;
+       struct device **devs;
+       unsigned long index;
+       unsigned int n = 0;
+       int rc;
+
+       if (!group->nr_extents)
+               return 0;
+
+       devs = kmalloc_array(group->nr_extents, sizeof(*devs), GFP_KERNEL);
+       if (!devs)
+               return -ENOMEM;
+
+       xa_for_each(&group->dc_extents, index, dc_extent) {
+               if (n == group->nr_extents)
+                       break;
+               devs[n++] = &dc_extent->dev;
+       }
+
+       rc = dax_region_rm_resources(dax_region, devs, n);
+       kfree(devs);
+       return rc;
+}
+
 static int cxl_dax_region_notify(struct device *dev,
                                 struct cxl_notify_data *notify_data)
 {
@@ -68,10 +101,7 @@ static int cxl_dax_region_notify(struct device *dev,
        case DCD_ADD_CAPACITY:
                return cxl_dax_group_add(dax_region, group);
        case DCD_RELEASE_CAPACITY:
-               dev_dbg(&cxlr_dax->dev,
-                       "DCD RELEASE notify (tag %pUb): no-op (stub)\n",
-                       &group->uuid);
-               return 0;
+               return cxl_dax_group_rm(dax_region, group);
        case DCD_FORCED_CAPACITY_RELEASE:
        default:
                dev_err(&cxlr_dax->dev, "Unknown DC event %d\n",
diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
index f2ae5918f94d..414813a6137f 100644
--- a/drivers/dax/dax-private.h
+++ b/drivers/dax/dax-private.h
@@ -146,13 +146,17 @@ struct dax_resource {
 };
 
 /*
- * Similar to run_dax() dax_region_add_resource() is exported but is not
- * intended to be a generic operation outside the dax subsystem.  It is only
+ * Similar to run_dax() dax_region_{add,rm}_resource() are exported but are not
+ * intended to be generic operations outside the dax subsystem.  They are only
  * generic between the dax layer and the dax drivers.
  */
 int dax_region_add_resource(struct dax_region *dax_region, struct device *dev,
                            resource_size_t start, resource_size_t length,
                            const uuid_t *tag, u16 seq_num);
+int dax_region_rm_resource(struct dax_region *dax_region,
+                          struct device *dev);
+int dax_region_rm_resources(struct dax_region *dax_region,
+                           struct device * const *devs, unsigned int n);
 
 static inline struct dev_dax *to_dev_dax(struct device *dev)
 {
-- 
2.43.0


Reply via email to