This is an automated email from the ASF dual-hosted git repository.
xiaoxiang781216 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nuttx.git
The following commit(s) were added to refs/heads/master by this push:
new 8302295dd01 arch/arm/src/stm32/stm32_spi: chunk DMA exchanges to honor
16-bit NDTR
8302295dd01 is described below
commit 8302295dd01850bfdb19648466e507ca4784eede
Author: Jinji Cui <[email protected]>
AuthorDate: Thu May 14 09:57:56 2026 +0800
arch/arm/src/stm32/stm32_spi: chunk DMA exchanges to honor 16-bit NDTR
The STM32 DMA NDTR/CNDTR transfer-count register is 16 bits wide on
every STM32 series the in-tree driver supports (IPv1 CNDTR, IPv2
SxNDTR). spi_exchange()'s DMA path forwarded the caller's full
nwords to stm32_dmasetup(), so a single SPI_EXCHANGE() of >= 65536
words silently programmed NDTR to (nwords & 0xffff). When the
truncated count was zero - the typical case for an exact 64 KiB
transfer (flash erase block, FAT cluster, common DMA staging
buffer) - the stream completed instantly with no transfer-complete
interrupt and the caller deadlocked in spi_dmarxwait().
Walk the request in chunks of at most 65535 words inside the
existing DMA branch, reusing the same spi_dma{rx,tx}{setup,start,
wait}() sequence per chunk. Single-descriptor transfers (every
in-tree caller today) are byte-for-byte identical. CONFIG_SPI_TRIGGER
is honored for the first chunk only; subsequent chunks must run
unconditionally because re-arming between chunks of one logical
exchange was never part of the SPI_TRIGGER contract.
Drive-by: rescale priv->buflen-clamped nwords so the DMA
descriptor matches the actually-copied byte count, promote the
spiinfo() format specifier from %d to %zu, and fix two adjacent
comment typos.
See the PR description for reproduction, NSH log and benchmark
numbers (5.12 MB/s, 97.5% of SCK/8 @ 42 MHz on STM32F407 + W25Q128).
Signed-off-by: Jinji Cui <[email protected]>
---
arch/arm/src/stm32/stm32_spi.c | 110 ++++++++++++++++++++++++++++++++---------
1 file changed, 86 insertions(+), 24 deletions(-)
diff --git a/arch/arm/src/stm32/stm32_spi.c b/arch/arm/src/stm32/stm32_spi.c
index abb18e3db78..62b019ce07e 100644
--- a/arch/arm/src/stm32/stm32_spi.c
+++ b/arch/arm/src/stm32/stm32_spi.c
@@ -148,6 +148,19 @@
# error "Unknown STM32 DMA"
#endif
+/* Maximum number of data items per single DMA descriptor.
+ *
+ * Both the STM32 DMA IPv1 (CNDTR on F0/F1/F3/G4/L0/L1/L4) and IPv2 (SxNDTR
+ * on F2/F4/F7/H7) transfer-count registers are 16 bits wide, so each call
+ * to stm32_dmasetup() can program at most 65535 transfers. spi_exchange()
+ * below chunks larger requests to stay within this limit; without it a
+ * single SPI_EXCHANGE() of >= 64 KiB silently programs NDTR to 0 (low 16
+ * bits of nwords) and the driver blocks forever waiting on a DMA-complete
+ * IRQ that never fires.
+ */
+
+# define STM32_SPI_DMA_MAX_XFER 65535u
+
# define SPIDMA_BUFFER_MASK (4 - 1)
# define SPIDMA_SIZE(b) (((b) + SPIDMA_BUFFER_MASK) & ~SPIDMA_BUFFER_MASK)
# define SPIDMA_BUF_ALIGN aligned_data(4)
@@ -1871,14 +1884,17 @@ static void spi_exchange(struct spi_dev_s *dev, const
void *txbuffer,
{
static uint16_t rxdummy = 0xffff;
static const uint16_t txdummy = 0xffff;
+ const size_t word_size = (priv->nbits > 8) ? 2u : 1u;
+ const uint8_t *txp;
+ uint8_t *rxp;
- spiinfo("txbuffer=%p rxbuffer=%p nwords=%d\n",
+ spiinfo("txbuffer=%p rxbuffer=%p nwords=%zu\n",
txbuffer, rxbuffer, nwords);
DEBUGASSERT(priv && priv->spibase);
/* Setup DMAs */
- /* If this bus uses a in driver buffers we will incur 2 copies,
+ /* If this bus uses an in-driver buffer we will incur 2 copies,
* The copy cost is << less the non DMA transfer time and having
* the buffer in the driver ensures DMA can be used. This is because
* the API does not support passing the buffer extent so the only
@@ -1897,40 +1913,86 @@ static void spi_exchange(struct spi_dev_s *dev, const
void *txbuffer,
memcpy(priv->txbuf, txbuffer, nbytes);
txbuffer = priv->txbuf;
rxbuffer = rxbuffer ? priv->rxbuf : rxbuffer;
+
+ /* Rescale nwords to match the (possibly clamped) nbytes. */
+
+ nwords = (priv->nbits > 8) ? nbytes >> 1 : nbytes;
}
- spi_dmarxsetup(priv, rxbuffer, &rxdummy, nwords);
- spi_dmatxsetup(priv, txbuffer, &txdummy, nwords);
+ txp = (const uint8_t *)txbuffer;
+ rxp = (uint8_t *)rxbuffer;
+ ret = OK;
+
+ /* Walk the request in chunks of at most STM32_SPI_DMA_MAX_XFER words.
+ * The STM32 DMA NDTR/CNDTR transfer-count register is only 16 bits
+ * wide; submitting more than 65535 transfers in a single descriptor
+ * silently programs NDTR to (nwords & 0xffff) and on most paths
+ * results in a stream that never raises transfer-complete, causing
+ * the SPI driver to block forever in spi_dmarxwait(). Splitting the
+ * request keeps each descriptor within the hardware limit and lets
+ * the W25/SD/etc. driver remain agnostic of this constraint.
+ */
+
+ while (nwords > 0)
+ {
+ size_t chunk = (nwords > STM32_SPI_DMA_MAX_XFER)
+ ? STM32_SPI_DMA_MAX_XFER
+ : nwords;
+
+ spi_dmarxsetup(priv, rxp, &rxdummy, chunk);
+ spi_dmatxsetup(priv, txp, &txdummy, chunk);
#ifdef CONFIG_SPI_TRIGGER
- /* Is deferred triggering in effect? */
+ /* Is deferred triggering in effect? */
- if (!priv->defertrig)
- {
- /* No.. Start the DMAs */
+ if (!priv->defertrig)
+ {
+ /* No.. Start the DMAs */
- spi_dmarxstart(priv);
- spi_dmatxstart(priv);
- }
- else
- {
- /* Yes.. indicated that we are ready to be started */
+ spi_dmarxstart(priv);
+ spi_dmatxstart(priv);
+ }
+ else
+ {
+ /* Yes.. indicate that we are ready to be started. Deferred
+ * triggering is only meaningful for the first (often only)
+ * chunk; subsequent chunks must run unconditionally or the
+ * caller would have to re-arm between chunks.
+ */
- priv->trigarmed = true;
- }
+ priv->trigarmed = true;
+ }
#else
- /* Start the DMAs */
+ /* Start the DMAs */
- spi_dmarxstart(priv);
- spi_dmatxstart(priv);
+ spi_dmarxstart(priv);
+ spi_dmatxstart(priv);
#endif
- /* Then wait for each to complete */
+ /* Then wait for each to complete */
- ret = spi_dmarxwait(priv);
- if (ret < 0)
- {
- ret = spi_dmatxwait(priv);
+ ret = spi_dmarxwait(priv);
+ if (ret < 0)
+ {
+ ret = spi_dmatxwait(priv);
+ }
+
+ if (ret < 0)
+ {
+ break;
+ }
+
+ if (txp != NULL)
+ {
+ txp += chunk * word_size;
+ }
+
+ if (rxp != NULL)
+ {
+ rxp += chunk * word_size;
+ }
+
+ nwords -= chunk;
}
if (rxbuffer != NULL && priv->rxbuf != NULL && ret >= 0)