DPAA2 triggers, [PATCH] dma debug: report -EEXIST errors in add_dma_entry
+DPAA2, netdev maintainers Hi, On 5/18/21 7:54 AM, Hamza Mahfooz wrote: Since, overlapping mappings are not supported by the DMA API we should report an error if active_cacheline_insert returns -EEXIST. It seems this patch found a victim. I was trying to run iperf3 on a honeycomb (5.14.0, fedora 35) and the console is blasting this error message at 100% cpu. So, I changed it to a WARN_ONCE() to get the call trace, which is attached below. [ 151.839693] cacheline tracking EEXIST, overlapping mappings aren't supported ... [ 151.924397] Hardware name: SolidRun Ltd. SolidRun CEX7 Platform, BIOS EDK II Aug 9 2021 [ 151.932481] pstate: 4045 (nZcv daif +PAN -UAO -TCO BTYPE=--) [ 151.938483] pc : add_dma_entry+0x218/0x240 [ 151.942575] lr : add_dma_entry+0x218/0x240 [ 151.94] sp : 8000101e2f20 [ 151.949975] x29: 8000101e2f20 x28: af317ac85000 x27: 3d0366ecb3a0 [ 151.957116] x26: 0400 x25: 0001 x24: af317bbe8908 [ 151.964257] x23: 0001 x22: af317bbe8810 x21: [ 151.971397] x20: 82e48000 x19: af317be6e000 x18: [ 151.978537] x17: 646574726f707075 x16: 732074276e657261 x15: 8000901e2c2f [ 151.985676] x14: x13: x12: [ 151.992816] x11: af317bb4c4c0 x10: e000 x9 : af3179708060 [ 151.56] x8 : dfff x7 : af317bb4c4c0 x6 : 0001 [ 152.007096] x5 : 3d0a9af66e30 x4 : x3 : 0027 [ 152.014236] x2 : 0023 x1 : 3d0360aac000 x0 : 0040 [ 152.021376] Call trace: [ 152.023816] add_dma_entry+0x218/0x240 [ 152.027561] debug_dma_map_sg+0x118/0x17c [ 152.031566] dma_map_sg_attrs+0x70/0xb0 [ 152.035397] dpaa2_eth_build_sg_fd+0xac/0x2f0 [fsl_dpaa2_eth] [ 152.041150] __dpaa2_eth_tx+0x3ec/0x570 [fsl_dpaa2_eth] [ 152.046377] dpaa2_eth_tx+0x74/0x110 [fsl_dpaa2_eth] [ 152.051342] dev_hard_start_xmit+0xe8/0x1a4 [ 152.055523] sch_direct_xmit+0x8c/0x1e0 [ 152.059355] __dev_xmit_skb+0x484/0x6a0 [ 152.063186] __dev_queue_xmit+0x380/0x744 [ 152.067190] dev_queue_xmit+0x20/0x2c [ 152.070848] neigh_hh_output+0xb4/0x130 [ 152.074679] ip_finish_output2+0x494/0x8f0 [ 152.078770] __ip_finish_output+0x12c/0x230 [ 152.082948] ip_finish_output+0x40/0xe0 [ 152.086778] ip_output+0xe4/0x2d4 [ 152.090088] __ip_queue_xmit+0x1b4/0x5c0 [ 152.094006] ip_queue_xmit+0x20/0x30 [ 152.097576] __tcp_transmit_skb+0x3b8/0x7b4 [ 152.101755] tcp_write_xmit+0x350/0x8e0 [ 152.105586] __tcp_push_pending_frames+0x48/0x110 [ 152.110286] tcp_rcv_established+0x338/0x690 [ 152.114550] tcp_v4_do_rcv+0x1c0/0x29c [ 152.118294] tcp_v4_rcv+0xd14/0xe3c [ 152.121777] ip_protocol_deliver_rcu+0x88/0x340 [ 152.126302] ip_local_deliver_finish+0xc0/0x184 [ 152.130827] ip_local_deliver+0x7c/0x23c [ 152.134744] ip_rcv_finish+0xb4/0x100 [ 152.138400] ip_rcv+0x54/0x210 [ 152.141449] deliver_skb+0x74/0xdc [ 152.144846] __netif_receive_skb_core.constprop.0+0x250/0x81c [ 152.150588] __netif_receive_skb_list_core+0x94/0x264 [ 152.155635] netif_receive_skb_list_internal+0x1d0/0x3bc [ 152.160942] netif_receive_skb_list+0x38/0x70 [ 152.165295] dpaa2_eth_poll+0x168/0x350 [fsl_dpaa2_eth] [ 152.170521] __napi_poll.constprop.0+0x40/0x19c [ 152.175047] net_rx_action+0x2c4/0x360 [ 152.178792] __do_softirq+0x1b0/0x394 [ 152.182450] run_ksoftirqd+0x68/0xa0 [ 152.186023] smpboot_thread_fn+0x13c/0x270 [ 152.190115] kthread+0x138/0x140 PS, it might not hurt to rate limit/_once this somehow to avoid a runtime problem if it starts to trigger. Thanks, Suggested-by: Dan Williams Signed-off-by: Hamza Mahfooz --- kernel/dma/debug.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 14de1271463f..dadae6255d05 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -566,11 +566,9 @@ static void add_dma_entry(struct dma_debug_entry *entry) if (rc == -ENOMEM) { pr_err("cacheline tracking ENOMEM, dma-debug disabled\n"); global_disable = true; + } else if (rc == -EEXIST) { + pr_err("cacheline tracking EEXIST, overlapping mappings aren't supported\n"); } - - /* TODO: report -EEXIST errors here as overlapping mappings are -* not supported by the DMA API -*/ } static int dma_debug_create_entries(gfp_t gfp) ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v4 0/7] arm64: Default to 32-bit wide ZONE_DMA
Hi, On 10/21/20 7:34 AM, Nicolas Saenz Julienne wrote: Using two distinct DMA zones turned out to be problematic. Here's an attempt go back to a saner default. I tested this on both a RPi4 and QEMU. I've tested this in ACPI mode on the rpi4 (4+8G with/without the 3G limiter) as well, with Ard's IORT patch. Nothing seems to have regressed. Thanks, Tested-by: Jeremy Linton --- Changes since v3: - Drop patch adding define in dma-mapping - Address small review changes - Update Ard's patch - Add new patch removing examples from mmzone.h Changes since v2: - Introduce Ard's patch - Improve OF dma-ranges parsing function - Add unit test for OF function - Address small changes - Move crashkernel reservation later in boot process Changes since v1: - Parse dma-ranges instead of using machine compatible string Ard Biesheuvel (1): arm64: mm: Set ZONE_DMA size based on early IORT scan Nicolas Saenz Julienne (6): arm64: mm: Move reserve_crashkernel() into mem_init() arm64: mm: Move zone_dma_bits initialization into zone_sizes_init() of/address: Introduce of_dma_get_max_cpu_address() of: unittest: Add test for of_dma_get_max_cpu_address() arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges mm: Remove examples from enum zone_type comment arch/arm64/mm/init.c | 16 ++-- drivers/acpi/arm64/iort.c | 52 +++ drivers/of/address.c | 42 +++ drivers/of/unittest.c | 18 ++ include/linux/acpi_iort.h | 4 +++ include/linux/mmzone.h| 20 --- include/linux/of.h| 7 ++ 7 files changed, 130 insertions(+), 29 deletions(-) ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/4] of/fdt: Update zone_dma_bits when running in bcm2711
On 10/8/20 2:43 PM, Ard Biesheuvel wrote: (+ Lorenzo) On Thu, 8 Oct 2020 at 12:14, Catalin Marinas wrote: On Thu, Oct 08, 2020 at 12:05:25PM +0200, Nicolas Saenz Julienne wrote: On Fri, 2020-10-02 at 12:55 +0100, Catalin Marinas wrote: On Thu, Oct 01, 2020 at 07:31:19PM +0200, Nicolas Saenz Julienne wrote: On Thu, 2020-10-01 at 18:23 +0100, Catalin Marinas wrote: On Thu, Oct 01, 2020 at 06:15:01PM +0100, Catalin Marinas wrote: On Thu, Oct 01, 2020 at 06:17:37PM +0200, Nicolas Saenz Julienne wrote: diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 4602e467ca8b..cd0d115ef329 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -25,6 +25,7 @@ #include #include #include +#include /* for zone_dma_bits */ #include /* for COMMAND_LINE_SIZE */ #include @@ -1198,6 +1199,14 @@ void __init early_init_dt_scan_nodes(void) of_scan_flat_dt(early_init_dt_scan_memory, NULL); } +void __init early_init_dt_update_zone_dma_bits(void) +{ + unsigned long dt_root = of_get_flat_dt_root(); + + if (of_flat_dt_is_compatible(dt_root, "brcm,bcm2711")) + zone_dma_bits = 30; +} I think we could keep this entirely in the arm64 setup_machine_fdt() and not pollute the core code with RPi4-specific code. Actually, even better, could we not move the check to arm64_memblock_init() when we initialise zone_dma_bits? I did it this way as I vaguely remembered Rob saying he wanted to centralise all early boot fdt code in one place. But I'll be happy to move it there. I can see Rob replied and I'm fine if that's his preference. However, what I don't particularly like is that in the arm64 code, if zone_dma_bits == 24, we set it to 32 assuming that it wasn't touched by the early_init_dt_update_zone_dma_bits(). What if at some point we'll get a platform that actually needs 24 here (I truly hope not, but just the principle of relying on magic values)? So rather than guessing, I'd prefer if the arch code can override ZONE_DMA_BITS_DEFAULT. Then, in arm64, we'll just set it to 32 and no need to explicitly touch the zone_dma_bits variable. Yes, sonds like the way to go. TBH I wasn't happy with that solution either, but couldn't think of a nicer alternative. Sadly I just realised that the series is incomplete, we have RPi4 users that want to boot unsing ACPI, and this series would break things for them. I'll have a word with them to see what we can do for their use-case. Is there a way to get some SoC information from ACPI? This is unfortunate. We used ACPI _DMA methods as they were designed to communicate the DMA limit of the XHCI controller to the OS. It shouldn't be too hard to match the OEM id field in the DSDT, and switch to the smaller mask. But it sucks to have to add a quirk like that. It also requires delaying setting the arm64_dma_phy_limit a bit, but that doesn't appear to be causing a problem. I've been boot/compiling with a patch like: diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index cada0b816c8a..9dfe776c1c75 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -168,6 +169,11 @@ static int __init acpi_fadt_sanity_check(void) ret = -EINVAL; } + if (!strncmp(table->oem_id, "RPIFDN", ACPI_OEM_ID_SIZE) && + !strncmp(table->oem_table_id, "RPI4", ACPI_OEM_TABLE_ID_SIZE) && + table->oem_revision <= 0x200) { + zone_dma_bits = 30; + } out: /* * acpi_get_table() creates FADT table mapping that diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index cd5caca8a929..6c8aaf1570ce 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -195,6 +195,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; #ifdef CONFIG_ZONE_DMA + arm64_dma_phys_limit = max_zone_phys(zone_dma_bits); max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); #endif #ifdef CONFIG_ZONE_DMA32 @@ -393,7 +394,6 @@ void __init arm64_memblock_init(void) */ if (zone_dma_bits == ZONE_DMA_BITS_DEFAULT) zone_dma_bits = 32; - arm64_dma_phys_limit = max_zone_phys(zone_dma_bits); } if (IS_ENABLED(CONFIG_ZONE_DMA32)) ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 0/4] dma-pool: Fix atomic pool selection
Hi, I have merged this to a 5.8 tree along with "dma-pool: Only allocate from CMA when in the same memory zone" and tested it in various ACPI/DT combinations, particularly on the RPI4. It seems to be working fine. So thanks for your time and effort clearing this up! Tested-by: Jeremy Linton On 7/9/20 11:19 AM, Nicolas Saenz Julienne wrote: This is my attempt at fixing one of the regressions we've seen[1] after the introduction of per-zone atomic pools. This combined with "dma-pool: Do not allocate pool memory from CMA"[2] should fix the boot issues on Jeremy's RPi4 setup. [1] https://lkml.org/lkml/2020/7/2/974 [2] https://lkml.org/lkml/2020/7/8/1108 --- Nicolas Saenz Julienne (4): dma-direct: Provide function to check physical memory area validity dma-pool: Get rid of dma_in_atomic_pool() dma-pool: Introduce dma_guess_pool() dma-pool: Make sure atomic pool suits device include/linux/dma-direct.h | 1 + kernel/dma/direct.c| 2 +- kernel/dma/pool.c | 76 +++--- 3 files changed, 56 insertions(+), 23 deletions(-) ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] dma-pool: Only allocate from CMA when in same memory zone
Hi, I have merged this to a 5.8 tree along with "dma-pool: Fix atomic pool selection" and tested it in various ACPI/DT combinations, particularly on the RPI4. It seems to be working fine. So thanks for your time and effort clearing this up! tested-by: Jeremy Linton On 7/10/20 9:10 AM, Nicolas Saenz Julienne wrote: There is no guarantee to CMA's placement, so allocating a zone specific atomic pool from CMA might return memory from a completely different memory zone. To get around this double check CMA's placement before allocating from it. Fixes: c84dc6e68a1d ("dma-pool: add additional coherent pools to map to gfp mask") Reported-by: Jeremy Linton Signed-off-by: Nicolas Saenz Julienne --- This is a code intensive alternative to "dma-pool: Do not allocate pool memory from CMA"[1]. [1] https://lkml.org/lkml/2020/7/8/1108 kernel/dma/pool.c | 36 +++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 8cfa01243ed2..ccf3eeb77e00 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -3,6 +3,7 @@ * Copyright (C) 2012 ARM Ltd. * Copyright (C) 2020 Google LLC */ +#include #include #include #include @@ -56,6 +57,39 @@ static void dma_atomic_pool_size_add(gfp_t gfp, size_t size) pool_size_kernel += size; } +static bool cma_in_zone(gfp_t gfp) +{ + u64 zone_dma_end, zone_dma32_end; + phys_addr_t base, end; + unsigned long size; + struct cma *cma; + + cma = dev_get_cma_area(NULL); + if (!cma) + return false; + + size = cma_get_size(cma); + if (!size) + return false; + base = cma_get_base(cma) - memblock_start_of_DRAM(); + end = base + size - 1; + + zone_dma_end = IS_ENABLED(CONFIG_ZONE_DMA) ? DMA_BIT_MASK(zone_dma_bits) : 0; + zone_dma32_end = IS_ENABLED(CONFIG_ZONE_DMA32) ? DMA_BIT_MASK(32) : 0; + + /* CMA can't cross zone boundaries, see cma_activate_area() */ + if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp & GFP_DMA && + end <= zone_dma_end) + return true; + else if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp & GFP_DMA32 && + base > zone_dma_end && end <= zone_dma32_end) + return true; + else if (base > zone_dma32_end) + return true; + + return false; +} + static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, gfp_t gfp) { @@ -70,7 +104,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, do { pool_size = 1 << (PAGE_SHIFT + order); - if (dev_get_cma_area(NULL)) + if (cma_in_zone(gfp)) page = dma_alloc_from_contiguous(NULL, 1 << order, order, false); else ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] dma-pool: use single atomic pool for both DMA zones
Hi, On 7/7/20 7:28 AM, Nicolas Saenz Julienne wrote: When allocating atomic DMA memory for a device, the dma-pool core queries __dma_direct_optimal_gfp_mask() to check which atomic pool to use. It turns out the GFP flag returned is only an optimistic guess. The pool selected might sometimes live in a zone higher than the device's view of memory. As there isn't a way to grantee a mapping between a device's DMA constraints and correct GFP flags this unifies both DMA atomic pools. The resulting pool is allocated in the lower DMA zone available, if any, so as for devices to always get accessible memory while having the flexibility of using dma_pool_kernel for the non constrained ones. With the follow-on patch "dma-pool: Do not allocate pool memory from CMA" everything seems to be working fine now. tested-by: Jeremy Linton Thanks for fixing this! Fixes: c84dc6e68a1d ("dma-pool: add additional coherent pools to map to gfp mask") Reported-by: Jeremy Linton Suggested-by: Robin Murphy Signed-off-by: Nicolas Saenz Julienne --- kernel/dma/pool.c | 47 +++ 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 8cfa01243ed2..883f7a583969 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -13,10 +13,11 @@ #include #include +#define GFP_ATOMIC_POOL_DMA (IS_ENABLED(CONFIG_ZONE_DMA) ? GFP_DMA : \ +IS_ENABLED(CONFIG_ZONE_DMA32) ? GFP_DMA32 : 0) + static struct gen_pool *atomic_pool_dma __ro_after_init; static unsigned long pool_size_dma; -static struct gen_pool *atomic_pool_dma32 __ro_after_init; -static unsigned long pool_size_dma32; static struct gen_pool *atomic_pool_kernel __ro_after_init; static unsigned long pool_size_kernel; @@ -42,16 +43,13 @@ static void __init dma_atomic_pool_debugfs_init(void) return; debugfs_create_ulong("pool_size_dma", 0400, root, _size_dma); - debugfs_create_ulong("pool_size_dma32", 0400, root, _size_dma32); debugfs_create_ulong("pool_size_kernel", 0400, root, _size_kernel); } static void dma_atomic_pool_size_add(gfp_t gfp, size_t size) { - if (gfp & __GFP_DMA) + if (gfp & GFP_ATOMIC_POOL_DMA) pool_size_dma += size; - else if (gfp & __GFP_DMA32) - pool_size_dma32 += size; else pool_size_kernel += size; } @@ -132,12 +130,11 @@ static void atomic_pool_resize(struct gen_pool *pool, gfp_t gfp) static void atomic_pool_work_fn(struct work_struct *work) { - if (IS_ENABLED(CONFIG_ZONE_DMA)) - atomic_pool_resize(atomic_pool_dma, - GFP_KERNEL | GFP_DMA); - if (IS_ENABLED(CONFIG_ZONE_DMA32)) - atomic_pool_resize(atomic_pool_dma32, - GFP_KERNEL | GFP_DMA32); + gfp_t dma_gfp = GFP_ATOMIC_POOL_DMA; + + if (dma_gfp) + atomic_pool_resize(atomic_pool_dma, GFP_KERNEL | dma_gfp); + atomic_pool_resize(atomic_pool_kernel, GFP_KERNEL); } @@ -168,6 +165,7 @@ static __init struct gen_pool *__dma_atomic_pool_init(size_t pool_size, static int __init dma_atomic_pool_init(void) { + gfp_t dma_gfp = GFP_ATOMIC_POOL_DMA; int ret = 0; /* @@ -185,18 +183,13 @@ static int __init dma_atomic_pool_init(void) GFP_KERNEL); if (!atomic_pool_kernel) ret = -ENOMEM; - if (IS_ENABLED(CONFIG_ZONE_DMA)) { + + if (dma_gfp) { atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size, - GFP_KERNEL | GFP_DMA); +GFP_KERNEL | dma_gfp); if (!atomic_pool_dma) ret = -ENOMEM; } - if (IS_ENABLED(CONFIG_ZONE_DMA32)) { - atomic_pool_dma32 = __dma_atomic_pool_init(atomic_pool_size, - GFP_KERNEL | GFP_DMA32); - if (!atomic_pool_dma32) - ret = -ENOMEM; - } dma_atomic_pool_debugfs_init(); return ret; @@ -206,14 +199,12 @@ postcore_initcall(dma_atomic_pool_init); static inline struct gen_pool *dev_to_pool(struct device *dev) { u64 phys_mask; - gfp_t gfp; - - gfp = dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, - _mask); - if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp == GFP_DMA) - return atomic_pool_dma; - if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp == GFP_DMA32) - return atomic_pool_dma32; + + if (atomic_pool_dma && + dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, +
Re: [PATCH] dma-pool: Do not allocate pool memory from CMA
Hi, On 7/8/20 11:49 AM, Nicolas Saenz Julienne wrote: There is no guarantee to CMA's placement, so allocating a zone specific atomic pool from CMA might return memory from a completely different memory zone. So stop using it. Fixes: c84dc6e68a1d ("dma-pool: add additional coherent pools to map to gfp mask") Reported-by: Jeremy Linton Signed-off-by: Nicolas Saenz Julienne --- With this patch and https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg2226971.html the machine appears to be working fine with/without CMA and in both DT/ACPI mode. The JBOD/etc I was having problems with are working fine, and the rtlsdr seems to be working better now too (its still not perfect, but that is likely another issue). so: tested-by: Jeremy Linton thanks! An more costly alternative would be adding an option to dma_alloc_from_contiguous() so it fails when the allocation doesn't fall in a specific zone. kernel/dma/pool.c | 11 ++- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 8cfa01243ed2..4bc1c46ae6ef 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -69,12 +68,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, do { pool_size = 1 << (PAGE_SHIFT + order); - - if (dev_get_cma_area(NULL)) - page = dma_alloc_from_contiguous(NULL, 1 << order, -order, false); - else - page = alloc_pages(gfp, order); + page = alloc_pages(gfp, order); } while (!page && order-- > 0); if (!page) goto out; @@ -118,8 +112,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, dma_common_free_remap(addr, pool_size); #endif free_page: __maybe_unused - if (!dma_release_from_contiguous(NULL, page, 1 << order)) - __free_pages(page, order); + __free_pages(page, order); out: return ret; } ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] dma-pool: use single atomic pool for both DMA zones
Hi, On 7/8/20 5:35 AM, Nicolas Saenz Julienne wrote: Hi Jim, On Tue, 2020-07-07 at 17:08 -0500, Jeremy Linton wrote: Hi, I spun this up on my 8G model using the PFTF firmware from: https://github.com/pftf/RPi4/releases Which allows me to switch between ACPI/DT on the machine. In DT mode it works fine now, Nice, would that count as a Tested-by from you? If it worked... :) but with ACPI I continue to have failures unless I disable CMA via cma=0 on the kernel command line. Yes, I see why, in atomic_pool_expand() memory is allocated from CMA without checking its correctness. That calls for a separate fix. I'll try to think of something. It think that is because using DT: [0.00] Reserved memory: created CMA memory pool at 0x3740, size 64 MiB using ACPI: [0.00] cma: Reserved 64 MiB at 0xf800 Which is AFAIK because the default arm64 CMA allocation is just below the arm64_dma32_phys_limit. As I'm sure you know, we fix the CMA address trough DT, isn't that possible trough ACPI? Well there isn't a linux specific cma location property in ACPI. There are various ways to infer the information, like looking for the lowest _DMA() range and using that to lower the arm64_dma32_phys_limit. OTOH, as it stands I don't think that information is available early enough to setup the cma pool. But as you mention the atomic pool code is allocating from CMA under the assumption that its going to be below the GFP_DMA range, which might not be generally true (due to lack of DT cma properties too?). ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] dma-pool: use single atomic pool for both DMA zones
Hi, I spun this up on my 8G model using the PFTF firmware from: https://github.com/pftf/RPi4/releases Which allows me to switch between ACPI/DT on the machine. In DT mode it works fine now, but with ACPI I continue to have failures unless I disable CMA via cma=0 on the kernel command line. It think that is because using DT: [0.00] Reserved memory: created CMA memory pool at 0x3740, size 64 MiB using ACPI: [0.00] cma: Reserved 64 MiB at 0xf800 Which is AFAIK because the default arm64 CMA allocation is just below the arm64_dma32_phys_limit. Thanks, On 7/7/20 7:28 AM, Nicolas Saenz Julienne wrote: When allocating atomic DMA memory for a device, the dma-pool core queries __dma_direct_optimal_gfp_mask() to check which atomic pool to use. It turns out the GFP flag returned is only an optimistic guess. The pool selected might sometimes live in a zone higher than the device's view of memory. As there isn't a way to grantee a mapping between a device's DMA constraints and correct GFP flags this unifies both DMA atomic pools. The resulting pool is allocated in the lower DMA zone available, if any, so as for devices to always get accessible memory while having the flexibility of using dma_pool_kernel for the non constrained ones. Fixes: c84dc6e68a1d ("dma-pool: add additional coherent pools to map to gfp mask") Reported-by: Jeremy Linton Suggested-by: Robin Murphy Signed-off-by: Nicolas Saenz Julienne --- kernel/dma/pool.c | 47 +++ 1 file changed, 19 insertions(+), 28 deletions(-) diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 8cfa01243ed2..883f7a583969 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -13,10 +13,11 @@ #include #include +#define GFP_ATOMIC_POOL_DMA (IS_ENABLED(CONFIG_ZONE_DMA) ? GFP_DMA : \ +IS_ENABLED(CONFIG_ZONE_DMA32) ? GFP_DMA32 : 0) + static struct gen_pool *atomic_pool_dma __ro_after_init; static unsigned long pool_size_dma; -static struct gen_pool *atomic_pool_dma32 __ro_after_init; -static unsigned long pool_size_dma32; static struct gen_pool *atomic_pool_kernel __ro_after_init; static unsigned long pool_size_kernel; @@ -42,16 +43,13 @@ static void __init dma_atomic_pool_debugfs_init(void) return; debugfs_create_ulong("pool_size_dma", 0400, root, _size_dma); - debugfs_create_ulong("pool_size_dma32", 0400, root, _size_dma32); debugfs_create_ulong("pool_size_kernel", 0400, root, _size_kernel); } static void dma_atomic_pool_size_add(gfp_t gfp, size_t size) { - if (gfp & __GFP_DMA) + if (gfp & GFP_ATOMIC_POOL_DMA) pool_size_dma += size; - else if (gfp & __GFP_DMA32) - pool_size_dma32 += size; else pool_size_kernel += size; } @@ -132,12 +130,11 @@ static void atomic_pool_resize(struct gen_pool *pool, gfp_t gfp) static void atomic_pool_work_fn(struct work_struct *work) { - if (IS_ENABLED(CONFIG_ZONE_DMA)) - atomic_pool_resize(atomic_pool_dma, - GFP_KERNEL | GFP_DMA); - if (IS_ENABLED(CONFIG_ZONE_DMA32)) - atomic_pool_resize(atomic_pool_dma32, - GFP_KERNEL | GFP_DMA32); + gfp_t dma_gfp = GFP_ATOMIC_POOL_DMA; + + if (dma_gfp) + atomic_pool_resize(atomic_pool_dma, GFP_KERNEL | dma_gfp); + atomic_pool_resize(atomic_pool_kernel, GFP_KERNEL); } @@ -168,6 +165,7 @@ static __init struct gen_pool *__dma_atomic_pool_init(size_t pool_size, static int __init dma_atomic_pool_init(void) { + gfp_t dma_gfp = GFP_ATOMIC_POOL_DMA; int ret = 0; /* @@ -185,18 +183,13 @@ static int __init dma_atomic_pool_init(void) GFP_KERNEL); if (!atomic_pool_kernel) ret = -ENOMEM; - if (IS_ENABLED(CONFIG_ZONE_DMA)) { + + if (dma_gfp) { atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size, - GFP_KERNEL | GFP_DMA); +GFP_KERNEL | dma_gfp); if (!atomic_pool_dma) ret = -ENOMEM; } - if (IS_ENABLED(CONFIG_ZONE_DMA32)) { - atomic_pool_dma32 = __dma_atomic_pool_init(atomic_pool_size, - GFP_KERNEL | GFP_DMA32); - if (!atomic_pool_dma32) - ret = -ENOMEM; - } dma_atomic_pool_debugfs_init(); return ret; @@ -206,14 +199,12 @@ postcore_initcall(dma_atomic_pool_init); static inline struct gen_pool *dev_to_pool(struct device *dev) { u64 phys_mask; - gfp_t gfp; - - gfp = dma