On Wed, Jun 10, 2026 at 08:52:32PM -0700, Rosen Penev wrote: > fsldma_run_tx_complete_actions() calls dmaengine_desc_get_callback_invoke() > while still holding chan->desc_lock. If the client submits a new > transaction from their completion callback, fsl_dma_tx_submit() > tries to acquire the same non-recursive spinlock, causing a > self-deadlock. > > Fix by extracting the callback info under the lock, removing the > descriptor from ld_running, dropping the lock, then invoking the > callback and running dependencies outside the lock. > > Assisted-by: opencode:big-pickle > Signed-off-by: Rosen Penev <[email protected]> > --- > drivers/dma/fsldma.c | 108 ++++++++++++++++++++++--------------------- > 1 file changed, 55 insertions(+), 53 deletions(-) > > diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c > index 0e2f84862261..455d21d738de 100644 > --- a/drivers/dma/fsldma.c > +++ b/drivers/dma/fsldma.c > @@ -496,16 +496,19 @@ static void fsldma_clean_completed_descriptor(struct > fsldma_chan *chan) > } > > /** > - * fsldma_run_tx_complete_actions - cleanup a single link descriptor > + * fsldma_run_tx_complete_actions - unmap and extract callback from a > descriptor > * @chan: Freescale DMA channel > - * @desc: descriptor to cleanup and free > + * @desc: descriptor to process > * @cookie: Freescale DMA transaction identifier > + * @cb: returned callback information > * > - * This function is used on a descriptor which has been executed by the DMA > - * controller. It will run any callbacks, submit any dependencies. > + * Unmap the descriptor if it has been submitted and extract its callback > + * into @cb. The caller must invoke the callback and run dependencies > + * after releasing chan->desc_lock. > */ > static dma_cookie_t fsldma_run_tx_complete_actions(struct fsldma_chan *chan, > - struct fsl_desc_sw *desc, dma_cookie_t cookie) > + struct fsl_desc_sw *desc, dma_cookie_t cookie, > + struct dmaengine_desc_callback *cb) > { > struct dma_async_tx_descriptor *txd = &desc->async_tx; > dma_cookie_t ret = cookie; > @@ -514,49 +517,14 @@ static dma_cookie_t > fsldma_run_tx_complete_actions(struct fsldma_chan *chan, > > if (txd->cookie > 0) { > ret = txd->cookie; > - > dma_descriptor_unmap(txd); > - /* Run the link descriptor callback function */ > - dmaengine_desc_get_callback_invoke(txd, NULL); > } > > - /* Run any dependencies */ > - dma_run_dependencies(txd); > + dmaengine_desc_get_callback(txd, cb); > > return ret; > } > > -/** > - * fsldma_clean_running_descriptor - move the completed descriptor from > - * ld_running to ld_completed > - * @chan: Freescale DMA channel > - * @desc: the descriptor which is completed > - * > - * Free the descriptor directly if acked by async_tx api, or move it to > - * queue ld_completed. > - */ > -static void fsldma_clean_running_descriptor(struct fsldma_chan *chan, > - struct fsl_desc_sw *desc) > -{ > - /* Remove from the list of transactions */ > - list_del(&desc->node); > - > - /* > - * the client is allowed to attach dependent operations > - * until 'ack' is set > - */ > - if (!async_tx_test_ack(&desc->async_tx)) { > - /* > - * Move this descriptor to the list of descriptors which is > - * completed, but still awaiting the 'ack' bit to be set. > - */ > - list_add_tail(&desc->node, &chan->ld_completed); > - return; > - } > - > - dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys); > -} > - > /** > * fsl_chan_xfer_ld_queue - transfer any pending transactions > * @chan : Freescale DMA channel > @@ -635,22 +603,23 @@ static void fsl_chan_xfer_ld_queue(struct fsldma_chan > *chan) > */ > static void fsldma_cleanup_descriptors(struct fsldma_chan *chan) > { > - struct fsl_desc_sw *desc, *_desc; > + struct fsl_desc_sw *desc; > dma_cookie_t cookie = 0; > dma_addr_t curr_phys = get_cdar(chan); > int seen_current = 0; > > fsldma_clean_completed_descriptor(chan); > > - /* Run the callback for each descriptor, in order */ > - list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) { > - /* > - * do not advance past the current descriptor loaded into the > - * hardware channel, subsequent descriptors are either in > - * process or have not been submitted > - */ > - if (seen_current) > - break; > + /* > + * Take descriptors one at a time from the front of the running > + * queue. We re-read the list each iteration so that we don't > + * chase a stale next pointer across the lock-drop below. > + */ > + while (!seen_current && !list_empty(&chan->ld_running)) { > + struct dmaengine_desc_callback cb; > + > + desc = list_first_entry(&chan->ld_running, > + struct fsl_desc_sw, node); > > /* > * stop the search if we reach the current descriptor and the > @@ -662,9 +631,42 @@ static void fsldma_cleanup_descriptors(struct > fsldma_chan *chan) > break; > } > > - cookie = fsldma_run_tx_complete_actions(chan, desc, cookie); > + cookie = fsldma_run_tx_complete_actions(chan, desc, cookie, > &cb); > > - fsldma_clean_running_descriptor(chan, desc); > + /* > + * Remove from the running list before dropping the lock so > + * that terminate_all cannot free this descriptor while we > + * call into the client below. > + */ > + list_del(&desc->node); > + > + /* > + * Prevent dma_run_dependencies() from calling > + * fsl_chan_xfer_ld_queue() while we are not holding the > + * lock. That would splice pending descriptors into > + * ld_running before they have been completed by hardware. > + * fsl_chan_xfer_ld_queue at the end of this function will > + * re-evaluate the situation. > + */ > + chan->idle = false; > + > + /* > + * Drop the lock before invoking the client callback, since > + * the DMAengine API explicitly allows clients to submit new > + * transactions from their completion callback. Otherwise > + * we self-deadlock on chan->desc_lock. > + */ > + spin_unlock(&chan->desc_lock); > + dmaengine_desc_callback_invoke(&cb, NULL); > + dma_run_dependencies(&desc->async_tx); > + spin_lock(&chan->desc_lock);
Not sure if you have hardware to test it. This change is quite big. Generally, keep desc_lock and move these complete queue, defer to tasklet or workqueue run callback in in complete queue by hold complete queue's lock. > + > + chan->idle = true; > + > + if (!async_tx_test_ack(&desc->async_tx)) > + list_add_tail(&desc->node, &chan->ld_completed); > + else > + dma_pool_free(chan->desc_pool, desc, > desc->async_tx.phys); desc already removed from list, needn't hold desc_lock, you can move it just before spin_lock() You should use difference lock for ld_completed. Frank > } > > /* > -- > 2.54.0 >
