Re: svn commit: r366106 - head/sys/arm64/arm64

2020-11-01 Thread Marcin Wojtas
Hi,

With this commit SDHCI fails to allocate a bounce buffer for SDMA
(sdhci_dma_alloc() routine). The same behavior was observed on LS1046A
and Armada 7k8k. Example log:

sdhci_xenon0:  mem 0x78-0x7802ff
irq 38 on simplebus3
getaddr: error 27
sdhci_xenon0-slot0: Can't load DMA memory for SDMA
device_attach: sdhci_xenon0 attach returned 6

I debugged it a bit:
* bus_dmamap_load returns EFBIG (error = 27)
* The tag is created with an alignment to 128k
(https://github.com/freebsd/freebsd/blob/master/sys/dev/sdhci/sdhci.c#L752)
* When I set the alignment to anything =< PAGE_SIZE it works again:

--- a/sys/dev/sdhci/sdhci.c
+++ b/sys/dev/sdhci/sdhci.c
@@ -749,7 +749,7 @@ sdhci_dma_alloc(struct sdhci_slot *slot)
 * forming the actual address of data, requiring the SDMA buffer to
 * be aligned to the SDMA boundary.
 */
-   err = bus_dma_tag_create(bus_get_dma_tag(slot->bus), slot->sdma_bbufsz,
+   err = bus_dma_tag_create(bus_get_dma_tag(slot->bus), PAGE_SIZE,
0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
slot->sdma_bbufsz, 1, slot->sdma_bbufsz, BUS_DMA_ALLOCNOW,
NULL, NULL, >dmatag);

I don't know if it's a valid fix though given a comment in code above
(Linux however aligns DMA buffer to 512). Comments appreciated.

Any reason why the huge alignment value worked before the r366106 and
now it is problematic?

Best regards,
Marcin


czw., 24 wrz 2020 o 09:17 Andrew Turner  napisaƂ(a):
>
> Author: andrew
> Date: Thu Sep 24 07:17:05 2020
> New Revision: 366106
> URL: https://svnweb.freebsd.org/changeset/base/366106
>
> Log:
>   Bounce in more cases in the arm64 busdma
>
>   We need to use a bounce buffer when the memory we are operating on is not
>   aligned to a cacheline, and not aligned to the maps alignment.
>
>   The former is to stop other threads from dirtying the cacheline while we
>   are performing DMA operations with it. The latter is to check memory
>   passed in by a driver is correctly aligned for the device.
>
>   Reviewed by:  mmel
>   Sponsored by: Innovate UK
>   Differential Revision:https://reviews.freebsd.org/D26496
>
> Modified:
>   head/sys/arm64/arm64/busdma_bounce.c
>
> Modified: head/sys/arm64/arm64/busdma_bounce.c
> ==
> --- head/sys/arm64/arm64/busdma_bounce.cThu Sep 24 07:13:13 2020  
>   (r366105)
> +++ head/sys/arm64/arm64/busdma_bounce.cThu Sep 24 07:17:05 2020  
>   (r366106)
> @@ -139,6 +139,7 @@ struct bus_dmamap {
> u_int   flags;
>  #defineDMAMAP_COHERENT (1 << 0)
>  #defineDMAMAP_FROM_DMAMEM  (1 << 1)
> +#defineDMAMAP_MBUF (1 << 2)
> int sync_count;
> struct sync_listslist[];
>  };
> @@ -155,8 +156,8 @@ static bus_addr_t add_bounce_page(bus_dma_tag_t dmat,
>  vm_offset_t vaddr, bus_addr_t addr, bus_size_t size);
>  static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
>  int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr);
> -static bool _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, vm_paddr_t buf,
> -bus_size_t buflen, int *pagesneeded);
> +static bool _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, bus_dmamap_t map,
> +vm_paddr_t buf, bus_size_t buflen, int *pagesneeded);
>  static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
>  pmap_t pmap, void *buf, bus_size_t buflen, int flags);
>  static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
> @@ -164,20 +165,70 @@ static void _bus_dmamap_count_phys(bus_dma_tag_t dmat,
>  static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
>  int flags);
>
> +/*
> + * Return true if the DMA should bounce because the start or end does not 
> fall
> + * on a cacheline boundary (which would require a partial cacheline flush).
> + * COHERENT memory doesn't trigger cacheline flushes.  Memory allocated by
> + * bus_dmamem_alloc() is always aligned to cacheline boundaries, and there's 
> a
> + * strict rule that such memory cannot be accessed by the CPU while DMA is in
> + * progress (or by multiple DMA engines at once), so that it's always safe 
> to do
> + * full cacheline flushes even if that affects memory outside the range of a
> + * given DMA operation that doesn't involve the full allocated buffer.  If 
> we're
> + * mapping an mbuf, that follows the same rules as a buffer we allocated.
> + */
>  static bool
> -might_bounce(bus_dma_tag_t dmat)
> +cacheline_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr,
> +bus_size_t size)
>  {
>
> +#defineDMAMAP_CACHELINE_FLAGS
>   \
> +(DMAMAP_FROM_DMAMEM | DMAMAP_COHERENT | DMAMAP_MBUF)
> +   if ((dmat->bounce_flags & BF_COHERENT) != 0)
> +   return (false);
> +   if (map != NULL && (map->flags & 

svn commit: r366106 - head/sys/arm64/arm64

2020-09-24 Thread Andrew Turner
Author: andrew
Date: Thu Sep 24 07:17:05 2020
New Revision: 366106
URL: https://svnweb.freebsd.org/changeset/base/366106

Log:
  Bounce in more cases in the arm64 busdma
  
  We need to use a bounce buffer when the memory we are operating on is not
  aligned to a cacheline, and not aligned to the maps alignment.
  
  The former is to stop other threads from dirtying the cacheline while we
  are performing DMA operations with it. The latter is to check memory
  passed in by a driver is correctly aligned for the device.
  
  Reviewed by:  mmel
  Sponsored by: Innovate UK
  Differential Revision:https://reviews.freebsd.org/D26496

Modified:
  head/sys/arm64/arm64/busdma_bounce.c

Modified: head/sys/arm64/arm64/busdma_bounce.c
==
--- head/sys/arm64/arm64/busdma_bounce.cThu Sep 24 07:13:13 2020
(r366105)
+++ head/sys/arm64/arm64/busdma_bounce.cThu Sep 24 07:17:05 2020
(r366106)
@@ -139,6 +139,7 @@ struct bus_dmamap {
u_int   flags;
 #defineDMAMAP_COHERENT (1 << 0)
 #defineDMAMAP_FROM_DMAMEM  (1 << 1)
+#defineDMAMAP_MBUF (1 << 2)
int sync_count;
struct sync_listslist[];
 };
@@ -155,8 +156,8 @@ static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, 
 vm_offset_t vaddr, bus_addr_t addr, bus_size_t size);
 static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
 int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr);
-static bool _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, vm_paddr_t buf,
-bus_size_t buflen, int *pagesneeded);
+static bool _bus_dmamap_pagesneeded(bus_dma_tag_t dmat, bus_dmamap_t map,
+vm_paddr_t buf, bus_size_t buflen, int *pagesneeded);
 static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
 pmap_t pmap, void *buf, bus_size_t buflen, int flags);
 static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map,
@@ -164,20 +165,70 @@ static void _bus_dmamap_count_phys(bus_dma_tag_t dmat,
 static int _bus_dmamap_reserve_pages(bus_dma_tag_t dmat, bus_dmamap_t map,
 int flags);
 
+/*
+ * Return true if the DMA should bounce because the start or end does not fall
+ * on a cacheline boundary (which would require a partial cacheline flush).
+ * COHERENT memory doesn't trigger cacheline flushes.  Memory allocated by
+ * bus_dmamem_alloc() is always aligned to cacheline boundaries, and there's a
+ * strict rule that such memory cannot be accessed by the CPU while DMA is in
+ * progress (or by multiple DMA engines at once), so that it's always safe to 
do
+ * full cacheline flushes even if that affects memory outside the range of a
+ * given DMA operation that doesn't involve the full allocated buffer.  If 
we're
+ * mapping an mbuf, that follows the same rules as a buffer we allocated.
+ */
 static bool
-might_bounce(bus_dma_tag_t dmat)
+cacheline_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr,
+bus_size_t size)
 {
 
+#defineDMAMAP_CACHELINE_FLAGS  
\
+(DMAMAP_FROM_DMAMEM | DMAMAP_COHERENT | DMAMAP_MBUF)
+   if ((dmat->bounce_flags & BF_COHERENT) != 0)
+   return (false);
+   if (map != NULL && (map->flags & DMAMAP_CACHELINE_FLAGS) != 0)
+   return (false);
+   return (((paddr | size) & (dcache_line_size - 1)) != 0);
+#undef DMAMAP_CACHELINE_FLAGS
+}
+
+/*
+ * Return true if the given address does not fall on the alignment boundary.
+ */
+static bool
+alignment_bounce(bus_dma_tag_t dmat, bus_addr_t addr)
+{
+
+   return ((addr & (dmat->common.alignment - 1)) != 0);
+}
+
+static bool
+might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr,
+bus_size_t size)
+{
+
if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0)
return (true);
 
+   if (cacheline_bounce(dmat, map, paddr, size))
+   return (true);
+
+   if (alignment_bounce(dmat, paddr))
+   return (true);
+
return (false);
 }
 
 static bool
-must_bounce(bus_dma_tag_t dmat, bus_addr_t paddr)
+must_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr,
+bus_size_t size)
 {
 
+   if (cacheline_bounce(dmat, map, paddr, size))
+   return (true);
+
+   if (alignment_bounce(dmat, paddr))
+   return (true);
+
if ((dmat->bounce_flags & BF_COULD_BOUNCE) != 0 &&
bus_dma_run_filter(>common, paddr))
return (true);
@@ -240,8 +291,7 @@ bounce_bus_dma_tag_create(bus_dma_tag_t parent, bus_si
newtag->common.alignment > 1)
newtag->bounce_flags |= BF_COULD_BOUNCE;
 
-   if (((newtag->bounce_flags & BF_COULD_BOUNCE) != 0) &&
-   (flags & BUS_DMA_ALLOCNOW) != 0) {
+   if ((flags & BUS_DMA_ALLOCNOW) != 0) {
struct bounce_zone *bz;
 
/* Must bounce