mem: Add 20 second timeout for stalled DC_ADD_CAPACITY chains

Anisa Su Tue, 09 Jun 2026 10:49:52 -0700

On Thu, May 28, 2026 at 09:57:25AM -0700, Dave Jiang wrote:
> 
> 
> On 5/23/26 2:43 AM, Anisa Su wrote:
> > A DC_ADD_CAPACITY event can span multiple event records grouped together
> > by the CXL_DCD_EVENT_MORE flag. Extents are staged in the pending list until
> > the last event record ('More'=0) is received, at which point the pending
> > list is processed. If the device opens such a chain (More=1) but never
> > sends the closing record, the staged list sits indefinitely.
> > 
> > Add a delayed-work watchdog that, on expiry, refuses the chain with an
> > empty ADD_DC_RESPONSE and drops the staged list.
> > 
> > The 20s timeout is a conservative upper bound and may be tightened
> > later. The timeout is purely defensive — the spec does not require it,
> > but prevents issues from a lost mailbox response or a crashed fabric 
> > manager.
> > 
> > Signed-off-by: Anisa Su <[email protected]>
> > ---
> >  drivers/cxl/core/mbox.c | 73 ++++++++++++++++++++++++++++++++++++++++-
> >  drivers/cxl/cxlmem.h    | 23 ++++++++++---
> >  2 files changed, 91 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> > index 1b38f34538f3..c376492fa166 100644
> > --- a/drivers/cxl/core/mbox.c
> > +++ b/drivers/cxl/core/mbox.c
> > @@ -1219,6 +1219,48 @@ static void clear_pending_extents(void *_mds)
> >     mds->add_ctx.group = NULL;
> >  }
> >  
> > +/*
> > + * Bound on how long the host will wait for a device to finish a
> > + * multi-record DC_ADD_CAPACITY chain (More=1 ... More=0) before
> > + * refusing the chain.
> > + * The timeout is not defined in the spec, but added for defensive 
> > purposes.
> > + * Since there is no spec-defined timeout, 20s is chosen as a generous
> > + * upper bound and matches the GPF timeout.
> > + */
> > +#define CXL_DC_ADD_TIMEOUT (20 * HZ)
> > +
> > +static void cxl_dc_add_timeout(struct work_struct *work)
> > +{
> > +   struct pending_add_ctx *ctx = container_of(to_delayed_work(work),
> > +                                              struct pending_add_ctx,
> > +                                              timeout_work);
> > +   struct cxl_memdev_state *mds = container_of(ctx,
> > +                                               struct cxl_memdev_state,
> > +                                               add_ctx);
> > +   struct device *dev = mds->cxlds.dev;
> > +
> > +   guard(mutex)(&ctx->lock);
> > +
> > +   if (!ctx->armed)
> > +           return;
> > +
> > +   dev_warn(dev, "DC add chain timed out; refusing staged extents\n");
> > +
> > +   if (cxl_send_dc_response(mds, CXL_MBOX_OP_ADD_DC_RESPONSE,
> > +                            &ctx->pending_extents, 0))
> > +           dev_dbg(dev, "Failed to send empty ADD_DC_RESPONSE on 
> > timeout\n");
> > +
> > +   clear_pending_extents(mds);
> > +   ctx->armed = false;
> > +}
> > +
> > +static void cxl_cancel_dcd_add_chain_work(void *_mds)
> > +{
> > +   struct cxl_memdev_state *mds = _mds;
> > +
> > +   cancel_delayed_work_sync(&mds->add_ctx.timeout_work);
> > +}
> > +
> >  static int add_to_pending_list(struct list_head *pending_list,
> >                            struct cxl_extent *to_add)
> >  {
> > @@ -1246,18 +1288,34 @@ static int add_to_pending_list(struct list_head 
> > *pending_list,
> >  static int handle_add_event(struct cxl_memdev_state *mds,
> >                         struct cxl_event_dcd *event)
> >  {
> > +   struct pending_add_ctx *ctx = &mds->add_ctx;
> >     struct device *dev = mds->cxlds.dev;
> >     int rc;
> >  
> > -   rc = add_to_pending_list(&mds->add_ctx.pending_extents, &event->extent);
> > +   guard(mutex)(&ctx->lock);
> > +
> > +   rc = add_to_pending_list(&ctx->pending_extents, &event->extent);
> >     if (rc)
> >             return rc;
> >  
> >     if (event->flags & CXL_DCD_EVENT_MORE) {
> >             dev_dbg(dev, "more bit set; delay the surfacing of extent\n");
> > +           mod_delayed_work(system_wq, &ctx->timeout_work,
> > +                                            CXL_DC_ADD_TIMEOUT);
> > +           ctx->armed = true;
> >             return 0;
> >     }
> >  
> > +   /*
> > +    * Chain is closing.  Disarm before flushing so a pending watchdog
> > +    * (queued but blocked on @ctx->lock) sees !armed and bails out.
> > +    * cancel_delayed_work() — not _sync — because handle_add_event()
> > +    * itself runs on system_wq and a sync cancel of same-wq work can
> > +    * deadlock.
> > +    */
> 
> Don't think this comment is correct. handle_add_event() is launched from 
> threaded irq and does not run in system_wq. Just drop that second part of the 
> comments.
> 
Oh I see. Done!
> > +   ctx->armed = false;
> > +   cancel_delayed_work(&ctx->timeout_work);
> > +
> >     rc = cxl_send_dc_response(mds, CXL_MBOX_OP_ADD_DC_RESPONSE,
> >                               &mds->add_ctx.pending_extents, 0);
> >     clear_pending_extents(mds);
> > @@ -2009,11 +2067,24 @@ struct cxl_memdev_state 
> > *cxl_memdev_state_create(struct device *dev, u64 serial,
> >  
> >     mutex_init(&mds->event.log_lock);
> >     INIT_LIST_HEAD(&mds->add_ctx.pending_extents);
> > +   mutex_init(&mds->add_ctx.lock);
> > +   INIT_DELAYED_WORK(&mds->add_ctx.timeout_work,
> > +                     cxl_dc_add_timeout);
> > +   mds->add_ctx.armed = false;
> 
> Not needed. Allocated memory zeroed.
> 
Dropped


> DJ
> 
Thanks,
Anisa
> >  
> >     rc = devm_add_action_or_reset(dev, clear_pending_extents, mds);
> >     if (rc)
> >             return ERR_PTR(rc);
> >  
> > +   /*
> > +    * Registered after clear_pending_extents so devm's reverse-order
> > +    * unwind cancels (and waits for) the watchdog first, then the list
> > +    * cleanup runs with the watchdog guaranteed not to refire.
> > +    */
> > +   rc = devm_add_action_or_reset(dev, cxl_cancel_dcd_add_chain_work, mds);
> > +   if (rc)
> > +           return ERR_PTR(rc);
> > +
> >     rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier);
> >     if (rc == -EOPNOTSUPP)
> >             dev_warn(dev, "CXL MCE unsupported\n");
> > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> > index 592c8e3b611c..d992cc9b7811 100644
> > --- a/drivers/cxl/cxlmem.h
> > +++ b/drivers/cxl/cxlmem.h
> > @@ -8,6 +8,8 @@
> >  #include <linux/uuid.h>
> >  #include <linux/node.h>
> >  #include <linux/list.h>
> > +#include <linux/mutex.h>
> > +#include <linux/workqueue.h>
> >  #include <cxl/event.h>
> >  #include <cxl/mailbox.h>
> >  #include "cxl.h"
> > @@ -402,19 +404,32 @@ static inline struct cxl_dev_state 
> > *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox)
> >  
> >  /**
> >   * struct pending_add_ctx - Staging state for an in-progress
> > - *                     DCD_ADD_CAPACITY event chain
> > + *                                                 DCD_ADD_CAPACITY event 
> > chain
> >   * @pending_extents: extents received so far in the chain; flushed when
> > - *              the chain closes (More=0)
> > + *                                  the chain closes (More=0)
> >   * @group: tag group being assembled from the chain
> > + * @timeout_work: watchdog that fires if a chain is opened with
> > + *                           CXL_DCD_EVENT_MORE but the closing record 
> > never arrives
> > + * @lock: serialises updates to the chain state against the watchdog
> > + * @armed: set when a More=1 chain opens; cleared when the chain closes,
> > + *            either by a More=0 event record or by the watchdog firing.
> >   *
> >   * A DCD_ADD_CAPACITY notification can span multiple event records
> >   * stitched together by the CXL_DCD_EVENT_MORE flag.  Records are staged
> > - * here until the device clears More, at which point the staged batch is
> > - * processed and responded to as a single Add_DC_Response.
> > + * here until an event record with 'More'=0 is received, at which point the
> > + * staged batch is processed and responded to as a single Add_DC_Response.
> > + *
> > + * If a chain is opened (More=1) but the device never sends the closing
> > + * record, the staged list would otherwise sit indefinitely.  @timeout_work
> > + * is a defensive watchdog that refuses such a chain with an empty response
> > + * and drops the staged list.
> >   */
> >  struct pending_add_ctx {
> >     struct list_head pending_extents;
> >     struct cxl_dc_tag_group *group;
> > +   struct delayed_work timeout_work;
> > +   struct mutex lock;
> > +   bool armed;
> >  };
> >  
> >  /**
>

Re: [PATCH v10 13/31] cxl/mem: Add 20 second timeout for stalled DC_ADD_CAPACITY chains

Reply via email to