Hi Philipp,

see below...

On Thu, 2019-09-19 at 16:29 +0200, Philipp Puschmann wrote:
> For some years and since many kernel versions there are reports that the
> RX UART SDMA channel stops working at some point. The workaround was to
> disable DMA for RX. This commit tries to fix the problem itself.
> 
> Due to its license i wasn't able to debug the sdma script itself but it
> somehow leads to blocking the scheduling of the channel script when a
> running sdma script does not find any free descriptor in the ring to put
> its data into.
> 
> If we detect such a potential case we manually restart the channel.
> 
> As sdmac->desc is constant we can move desc out of the loop.
> 
> Fixes: 1ec1e82f2510 ("dmaengine: Add Freescale i.MX SDMA support")
> Signed-off-by: Philipp Puschmann <philipp.puschm...@emlix.com>
> Reviewed-by: Lucas Stach <l.st...@pengutronix.de>
> ---
> 
> Changelog v4:
>  - fixed the fixes tag
>  
> Changelog v3:
>  - use correct dma_wmb() instead of dma_wb()
>  - add fixes tag
>  
> Changelog v2:
>  - clarify comment and commit description
> 
>  drivers/dma/imx-sdma.c | 21 +++++++++++++++++----
>  1 file changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
> index e029a2443cfc..a32b5962630e 100644
> --- a/drivers/dma/imx-sdma.c
> +++ b/drivers/dma/imx-sdma.c
> @@ -775,21 +775,23 @@ static void sdma_start_desc(struct sdma_channel *sdmac)
>  static void sdma_update_channel_loop(struct sdma_channel *sdmac)
>  {
>       struct sdma_buffer_descriptor *bd;
> -     int error = 0;
> -     enum dma_status old_status = sdmac->status;
> +     struct sdma_desc *desc = sdmac->desc;
> +     int error = 0, cnt = 0;
> +     enum dma_status old_status = sdmac->status;
>  
>       /*
>        * loop mode. Iterate over descriptors, re-setup them and
>        * call callback function.
>        */
> -     while (sdmac->desc) {
> -             struct sdma_desc *desc = sdmac->desc;
> +     while (desc) {
>  
>               bd = &desc->bd[desc->buf_tail];
>  
>               if (bd->mode.status & BD_DONE)
>                       break;
>  
> +             cnt++;
> +
>               if (bd->mode.status & BD_RROR) {
>                       bd->mode.status &= ~BD_RROR;
>                       sdmac->status = DMA_ERROR;
> @@ -822,6 +824,17 @@ static void sdma_update_channel_loop(struct sdma_channel 
> *sdmac)
>               if (error)
>                       sdmac->status = old_status;
>       }
> +
> +     /* In some situations it may happen that the sdma does not found any
                                                          ^ hasn't
> +      * usable descriptor in the ring to put data into. The channel is
> +      * stopped then. While there is no specific error condition we can
> +      * check for, a necessary condition is that all available buffers for
> +      * the current channel have been written to by the sdma script. In
> +      * this case and after we have made the buffers available again,
> +      * we restart the channel.
> +      */

Are you sure we can't miss cases where we only had to make some buffers
available again, but the SDMA already ran out of buffers before?

A while ago, I was debugging a similar issue triggered by receiving
data with a wrong baud rate, which leads to all descriptors being
marked with the error flag very quickly (and the SDMA stalling).
I noticed that you can check if the channel is still running by
checking the SDMA_H_STATSTOP register & BIT(sdmac->channel).

I also added a flag for the sdmac->flags field to allow stopping the
channel from the callback (otherwise it would enable the channel
again).

Attached is my current version of that patch for reference.

> +     if (cnt >= desc->num_bd)
> +             sdma_enable_channel(sdmac->sdma, sdmac->channel);
>  }
>  
>  static void mxc_sdma_handle_channel_normal(struct sdma_channel *data)
From 73d7dcf84dac5512c50448ff6adf084f1a9bd6f9 Mon Sep 17 00:00:00 2001
From: Jan Luebbe <j...@pengutronix.de>
Date: Tue, 16 Apr 2019 18:35:04 +0200
Subject: [PATCH] dmaengine: imx-sdma: restart stopped cyclic transfers

For cyclic DMA transfers, we have at least two cases where we can run
out descriptors available to the engine:
- Interrups are disabled for too long and all buffers a filled with
  data.
- DMA errors (such as generated by baud rate mismatch with imx-uart) use
  up all descriptors before we can react.

In this case, SDMA stops the channel and no further transfers are done
until the respective channel is disabled and re-enabled.

The best we can do in this case is to check if the transfer should still
be enabled (it could have been disabled during
sdma_update_channel_loop), but the SDMA channel is stopped. In this
case, we re-start the channel.

To avoid racing with changes to the sdmac->status field (which is
written and restored in sdma_update_channel_loop), we add a new flag
(IMX_DMA_ACTIVE) to indicate that the channel is currently active.

Signed-off-by: Jan Luebbe <j...@pengutronix.de>
---
 drivers/dma/imx-sdma.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 58fa8520892b..8774259af24c 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -383,6 +383,7 @@ struct sdma_channel {
 };
 
 #define IMX_DMA_SG_LOOP		BIT(0)
+#define IMX_DMA_ACTIVE		BIT(1)
 
 #define MAX_DMA_CHANNELS 32
 #define MXC_SDMA_DEFAULT_PRIORITY 1
@@ -658,6 +659,9 @@ static int sdma_config_ownership(struct sdma_channel *sdmac,
 
 static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
 {
+	struct sdma_channel *sdmac = &sdma->channel[channel];
+
+	sdmac->flags |= IMX_DMA_ACTIVE;
 	writel(BIT(channel), sdma->regs + SDMA_H_START);
 }
 
@@ -774,6 +778,7 @@ static void sdma_start_desc(struct sdma_channel *sdmac)
 
 static void sdma_update_channel_loop(struct sdma_channel *sdmac)
 {
+	struct sdma_engine *sdma = sdmac->sdma;
 	struct sdma_buffer_descriptor *bd;
 	int error = 0;
 	enum dma_status	old_status = sdmac->status;
@@ -820,6 +825,13 @@ static void sdma_update_channel_loop(struct sdma_channel *sdmac)
 		if (error)
 			sdmac->status = old_status;
 	}
+
+	if ((sdmac->flags & IMX_DMA_ACTIVE) &&
+	    !(readl_relaxed(sdma->regs + SDMA_H_STATSTOP) & BIT(sdmac->channel))) {
+		dev_err_ratelimited(sdma->dev, "SDMA channel %d: cyclic transfer disabled by HW, reenabling\n",
+				sdmac->channel);
+		writel(BIT(sdmac->channel), sdma->regs + SDMA_H_START);
+	};
 }
 
 static void mxc_sdma_handle_channel_normal(struct sdma_channel *data)
@@ -1049,6 +1061,7 @@ static int sdma_disable_channel(struct dma_chan *chan)
 	struct sdma_engine *sdma = sdmac->sdma;
 	int channel = sdmac->channel;
 
+	sdmac->flags &= ~IMX_DMA_ACTIVE;
 	writel_relaxed(BIT(channel), sdma->regs + SDMA_H_STATSTOP);
 	sdmac->status = DMA_ERROR;
 
-- 
2.23.0

Reply via email to