Re: [RFC PATCH v3 2/6] swiotlb: Add restricted DMA pool

2021-01-13 Thread Nicolas Saenz Julienne
Hi All,

On Tue, 2021-01-12 at 16:03 -0800, Florian Fainelli wrote:
> On 1/5/21 7:41 PM, Claire Chang wrote:
> > Add the initialization function to create restricted DMA pools from
> > matching reserved-memory nodes in the device tree.
> > 
> > Signed-off-by: Claire Chang 
> > ---
> >  include/linux/device.h  |   4 ++
> >  include/linux/swiotlb.h |   7 +-
> >  kernel/dma/Kconfig  |   1 +
> >  kernel/dma/swiotlb.c| 144 ++--
> >  4 files changed, 131 insertions(+), 25 deletions(-)
> > 
> > diff --git a/include/linux/device.h b/include/linux/device.h
> > index 89bb8b84173e..ca6f71ec8871 100644
> > --- a/include/linux/device.h
> > +++ b/include/linux/device.h
> > @@ -413,6 +413,7 @@ struct dev_links_info {
> >   * @dma_pools: Dma pools (if dma'ble device).
> >   * @dma_mem:   Internal for coherent mem override.
> >   * @cma_area:  Contiguous memory area for dma allocations
> > + * @dma_io_tlb_mem: Internal for swiotlb io_tlb_mem override.
> >   * @archdata:  For arch-specific additions.
> >   * @of_node:   Associated device tree node.
> >   * @fwnode:Associated device node supplied by platform firmware.
> > @@ -515,6 +516,9 @@ struct device {
> >  #ifdef CONFIG_DMA_CMA
> >     struct cma *cma_area;   /* contiguous memory area for dma
> >    allocations */
> > +#endif
> > +#ifdef CONFIG_SWIOTLB
> > +   struct io_tlb_mem   *dma_io_tlb_mem;
> >  #endif
> >     /* arch specific additions */
> >     struct dev_archdata archdata;
> > diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> > index dd8eb57cbb8f..a1bbd775 100644
> > --- a/include/linux/swiotlb.h
> > +++ b/include/linux/swiotlb.h
> > @@ -76,12 +76,13 @@ extern enum swiotlb_force swiotlb_force;
> >   *
> >   * @start: The start address of the swiotlb memory pool. Used to do a quick
> >   * range check to see if the memory was in fact allocated by this
> > - * API.
> > + * API. For restricted DMA pool, this is device tree adjustable.
> 
> Maybe write it as this is "firmware adjustable" such that when/if ACPI
> needs something like this, the description does not need updating.
> 
> [snip]
> 
> > +static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
> > +   struct device *dev)
> > +{
> > +   struct io_tlb_mem *mem = rmem->priv;
> > +   int ret;
> > +
> > +   if (dev->dma_io_tlb_mem)
> > +   return -EBUSY;
> > +
> > +   if (!mem) {
> > +   mem = kzalloc(sizeof(*mem), GFP_KERNEL);
> > +   if (!mem)
> > +   return -ENOMEM;
> > +
> > +   if (!memremap(rmem->base, rmem->size, MEMREMAP_WB)) {
> 
> MEMREMAP_WB sounds appropriate as a default.

As per the binding 'no-map' has to be disabled here. So AFAIU, this memory will
be part of the linear mapping. Is this really needed then?

> Documentation/devicetree/bindings/reserved-memory/ramoops.txt does
> define an "unbuffered" property which in premise could be applied to the
> generic reserved memory binding as well and that we may have to be
> honoring here, if we were to make it more generic. Oh well, this does
> not need to be addressed right now I guess.





signature.asc
Description: This is a digitally signed message part


[PATCH 1/2] kbuild: Add config fragment merge functionality

2020-02-03 Thread Nicolas Saenz Julienne
So far this function was only used locally in powerpc, some other
architectures might benefit from it. Move it into
scripts/Makefile.defconf.

Signed-off-by: Nicolas Saenz Julienne 
---
 arch/powerpc/Makefile| 12 +---
 scripts/Makefile.defconf | 15 +++
 2 files changed, 16 insertions(+), 11 deletions(-)
 create mode 100644 scripts/Makefile.defconf

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index f35730548e42..517ef8ec774b 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -301,17 +301,7 @@ $(BOOT_TARGETS2): vmlinux
 bootwrapper_install:
$(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
 
-# Used to create 'merged defconfigs'
-# To use it $(call) it with the first argument as the base defconfig
-# and the second argument as a space separated list of .config files to merge,
-# without the .config suffix.
-define merge_into_defconfig
-   $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
-   -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \
-   $(foreach 
config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config)
-   +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
-endef
-
+include $(srctree)/scripts/Makefile.defconf
 PHONY += pseries_le_defconfig
 pseries_le_defconfig:
$(call merge_into_defconfig,pseries_defconfig,le)
diff --git a/scripts/Makefile.defconf b/scripts/Makefile.defconf
new file mode 100644
index ..ab332f7534f5
--- /dev/null
+++ b/scripts/Makefile.defconf
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+# Configuration heplers
+
+# Creates 'merged defconfigs'
+# ---
+# Usage:
+#   $(call merge_into_defconfig,base_config,config_fragment1 config_fragment2 
...)
+#
+# Input config fragments without '.config' suffix
+define merge_into_defconfig
+   $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
+   -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \
+   $(foreach 
config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config)
+   +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
+endef
-- 
2.25.0



[PATCH 0/2] ARM: Introduce multi_v7_lpae_defconfig

2020-02-03 Thread Nicolas Saenz Julienne
This series introduces a new configuration target,
multi_v7_lpae_defconfig, built by merging the config fragment
lpae.config with mult_v7_defconfig. Ultimately needed in order for
Raspberry Pi 4's PCIe bus to work on arm builds, but which may benefit
other boards out there.

---

Changes since RFC:
 - Move merge function into the scripts directory.

Nicolas Saenz Julienne (2):
  kbuild: Add config fragment merge functionality
  ARM: add multi_v7_lpae_defconfig

 arch/arm/Makefile|  4 
 arch/arm/configs/lpae.config |  1 +
 arch/powerpc/Makefile| 12 +---
 scripts/Makefile.defconf | 15 +++
 4 files changed, 21 insertions(+), 11 deletions(-)
 create mode 100644 arch/arm/configs/lpae.config
 create mode 100644 scripts/Makefile.defconf

-- 
2.25.0



[PATCH 2/2] ARM: add multi_v7_lpae_defconfig

2020-02-03 Thread Nicolas Saenz Julienne
The only missing configuration option preventing us from using
multi_v7_defconfig with the Raspberry Pi 4 is ARM_LPAE. It's needed as
the PCIe controller found on the SoC depends on 64bit addressing, yet
can't be included as not all v7 boards support LPAE.

Introduce multi_v7_lpae_defconfig, built off multi_v7_defconfig, which will
avoid us having to duplicate and maintain multiple similar configurations.

Needless to say the Raspberry Pi 4 is not the only platform that can
benefit from this new configuration.

Signed-off-by: Nicolas Saenz Julienne 

---

Changes since RFC:
 - Move config merge function into scripts folder
---
 arch/arm/Makefile| 4 
 arch/arm/configs/lpae.config | 1 +
 2 files changed, 5 insertions(+)
 create mode 100644 arch/arm/configs/lpae.config

diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 16d41efea7f2..1f4f9a90561d 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -359,6 +359,10 @@ archclean:
 # My testing targets (bypasses dependencies)
 bp:;   $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/bootpImage
 
+include $(srctree)/scripts/Makefile.defconf
+PHONY += multi_v7_lpae_defconfig
+multi_v7_lpae_defconfig:
+   $(call merge_into_defconfig,multi_v7_defconfig,lpae)
 
 define archhelp
   echo  '* zImage- Compressed kernel image (arch/$(ARCH)/boot/zImage)'
diff --git a/arch/arm/configs/lpae.config b/arch/arm/configs/lpae.config
new file mode 100644
index ..19bab134e014
--- /dev/null
+++ b/arch/arm/configs/lpae.config
@@ -0,0 +1 @@
+CONFIG_ARM_LPAE=y
-- 
2.25.0



Re: [PATCH v2] dma-mapping: treat dev->bus_dma_mask as a DMA limit

2019-11-26 Thread Nicolas Saenz Julienne
On Mon, 2019-11-25 at 16:33 +, Robin Murphy wrote:
> On 25/11/2019 7:44 am, Christoph Hellwig wrote:
> > On Sat, Nov 23, 2019 at 09:51:08AM -0700, Nathan Chancellor wrote:
> > > Just as an FYI, this introduces a warning on arm32 allyesconfig for me:
> > 
> > I think the dma_limit argument to iommu_dma_alloc_iova should be a u64
> > and/or we need to use min_t and open code the zero exception.
> > 
> > Robin, Nicolas - any opinions?
> 
> Yeah, given that it's always held a mask I'm not entirely sure why it 
> was ever a dma_addr_t rather than a u64. Unless anyone else is desperate 
> to do it I'll get a cleanup patch ready for rc1.

Sounds good to me too

Robin, since I started the mess, I'll be happy to do it if it helps offloading
some work from you.

Regards,
Nicolas



signature.asc
Description: This is a digitally signed message part


Re: [PATCH v2] dma-mapping: treat dev->bus_dma_mask as a DMA limit

2019-11-21 Thread Nicolas Saenz Julienne
On Thu, 2019-11-21 at 16:24 +0100, Christoph Hellwig wrote:
> On Thu, Nov 21, 2019 at 10:26:44AM +0100, Nicolas Saenz Julienne wrote:
> > Using a mask to represent bus DMA constraints has a set of limitations.
> > The biggest one being it can only hold a power of two (minus one). The
> > DMA mapping code is already aware of this and treats dev->bus_dma_mask
> > as a limit. This quirk is already used by some architectures although
> > still rare.
> > 
> > With the introduction of the Raspberry Pi 4 we've found a new contender
> > for the use of bus DMA limits, as its PCIe bus can only address the
> > lower 3GB of memory (of a total of 4GB). This is impossible to represent
> > with a mask. To make things worse the device-tree code rounds non power
> > of two bus DMA limits to the next power of two, which is unacceptable in
> > this case.
> > 
> > In the light of this, rename dev->bus_dma_mask to dev->bus_dma_limit all
> > over the tree and treat it as such. Note that dev->bus_dma_limit should
> > contain the higher accesible DMA address.
> > 
> > Signed-off-by: Nicolas Saenz Julienne 
> 
> I've tentatively added this patch to the dma-mapping tree based on
> Robins principal approval of the last version.  That way tomorrows
> linux-next run should still pick it up.

Thanks!



signature.asc
Description: This is a digitally signed message part


[PATCH v2] dma-mapping: treat dev->bus_dma_mask as a DMA limit

2019-11-21 Thread Nicolas Saenz Julienne
Using a mask to represent bus DMA constraints has a set of limitations.
The biggest one being it can only hold a power of two (minus one). The
DMA mapping code is already aware of this and treats dev->bus_dma_mask
as a limit. This quirk is already used by some architectures although
still rare.

With the introduction of the Raspberry Pi 4 we've found a new contender
for the use of bus DMA limits, as its PCIe bus can only address the
lower 3GB of memory (of a total of 4GB). This is impossible to represent
with a mask. To make things worse the device-tree code rounds non power
of two bus DMA limits to the next power of two, which is unacceptable in
this case.

In the light of this, rename dev->bus_dma_mask to dev->bus_dma_limit all
over the tree and treat it as such. Note that dev->bus_dma_limit should
contain the higher accesible DMA address.

Signed-off-by: Nicolas Saenz Julienne 

---

Changes since v1:
  - rework ACPI code to avoid divergence with OF's

 arch/mips/pci/fixup-sb1250.c  | 16 
 arch/powerpc/sysdev/fsl_pci.c |  6 +++---
 arch/x86/kernel/pci-dma.c |  2 +-
 arch/x86/mm/mem_encrypt.c |  2 +-
 arch/x86/pci/sta2x11-fixup.c  |  2 +-
 drivers/acpi/arm64/iort.c | 20 +++-
 drivers/ata/ahci.c|  2 +-
 drivers/iommu/dma-iommu.c |  3 +--
 drivers/of/device.c   |  9 +
 include/linux/device.h|  6 +++---
 include/linux/dma-direct.h|  2 +-
 include/linux/dma-mapping.h   |  2 +-
 kernel/dma/direct.c   | 27 +--
 13 files changed, 46 insertions(+), 53 deletions(-)

diff --git a/arch/mips/pci/fixup-sb1250.c b/arch/mips/pci/fixup-sb1250.c
index 8a41b359cf90..40efc990cdce 100644
--- a/arch/mips/pci/fixup-sb1250.c
+++ b/arch/mips/pci/fixup-sb1250.c
@@ -21,22 +21,22 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIBYTE, 
PCI_DEVICE_ID_BCM1250_PCI,
 
 /*
  * The BCM1250, etc. PCI host bridge does not support DAC on its 32-bit
- * bus, so we set the bus's DMA mask accordingly.  However the HT link
+ * bus, so we set the bus's DMA limit accordingly.  However the HT link
  * down the artificial PCI-HT bridge supports 40-bit addressing and the
  * SP1011 HT-PCI bridge downstream supports both DAC and a 64-bit bus
  * width, so we record the PCI-HT bridge's secondary and subordinate bus
- * numbers and do not set the mask for devices present in the inclusive
+ * numbers and do not set the limit for devices present in the inclusive
  * range of those.
  */
-struct sb1250_bus_dma_mask_exclude {
+struct sb1250_bus_dma_limit_exclude {
bool set;
unsigned char start;
unsigned char end;
 };
 
-static int sb1250_bus_dma_mask(struct pci_dev *dev, void *data)
+static int sb1250_bus_dma_limit(struct pci_dev *dev, void *data)
 {
-   struct sb1250_bus_dma_mask_exclude *exclude = data;
+   struct sb1250_bus_dma_limit_exclude *exclude = data;
bool exclude_this;
bool ht_bridge;
 
@@ -55,7 +55,7 @@ static int sb1250_bus_dma_mask(struct pci_dev *dev, void 
*data)
exclude->start, exclude->end);
} else {
dev_dbg(>dev, "disabling DAC for device");
-   dev->dev.bus_dma_mask = DMA_BIT_MASK(32);
+   dev->dev.bus_dma_limit = DMA_BIT_MASK(32);
}
 
return 0;
@@ -63,9 +63,9 @@ static int sb1250_bus_dma_mask(struct pci_dev *dev, void 
*data)
 
 static void quirk_sb1250_pci_dac(struct pci_dev *dev)
 {
-   struct sb1250_bus_dma_mask_exclude exclude = { .set = false };
+   struct sb1250_bus_dma_limit_exclude exclude = { .set = false };
 
-   pci_walk_bus(dev->bus, sb1250_bus_dma_mask, );
+   pci_walk_bus(dev->bus, sb1250_bus_dma_limit, );
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_PCI,
quirk_sb1250_pci_dac);
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index ff0e2b156cb5..617a443d673d 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -115,8 +115,8 @@ static void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
 {
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
 
-   pdev->dev.bus_dma_mask =
-   hose->dma_window_base_cur + hose->dma_window_size;
+   pdev->dev.bus_dma_limit =
+   hose->dma_window_base_cur + hose->dma_window_size - 1;
 }
 
 static void setup_swiotlb_ops(struct pci_controller *hose)
@@ -135,7 +135,7 @@ static void fsl_pci_dma_set_mask(struct device *dev, u64 
dma_mask)
 * mapping that allows addressing any RAM address from across PCI.
 */
if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) {
-   dev->bus_dma_mask = 0;
+   dev->bus_dma_limit = 0;
dev->archdata.dma_offset = pci64_dma_offset;
}
 }
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci

Re: [PATCH] dma-mapping: treat dev->bus_dma_mask as a DMA limit

2019-11-21 Thread Nicolas Saenz Julienne
On Thu, 2019-11-21 at 08:31 +0100, Christoph Hellwig wrote:
> On Tue, Nov 19, 2019 at 05:17:03PM +, Robin Murphy wrote:
> > TBH I can't see it being a massive problem even if the DMA patch, driver 
> > and DTS patch went entirely separately via the respective DMA, PCI, and 
> > arm-soc trees in the same cycle. Bisecting over a merge window is a big 
> > enough pain in the bum as it is, and if the worst case is that someone 
> > trying to do that on a Pi4 has a wonky PCI controller appear for a couple 
> > of commits, they may as well just disable that driver for their bisection, 
> > because it wasn't there at the start so can't possibly be the thing they're 
> > looking for regressions in ;)
> 
> Agreed.
> 
> Nicolas, can you send a respin?  That way I can still queue it up
> for 5.5.

Oh, I thought it was too late for that already. I'll send it in a minute.

Regards,
Nicolas



signature.asc
Description: This is a digitally signed message part


Re: [PATCH] dma-mapping: treat dev->bus_dma_mask as a DMA limit

2019-11-19 Thread Nicolas Saenz Julienne
On Wed, 2019-11-13 at 17:13 +0100, Nicolas Saenz Julienne wrote:
> Using a mask to represent bus DMA constraints has a set of limitations.
> The biggest one being it can only hold a power of two (minus one). The
> DMA mapping code is already aware of this and treats dev->bus_dma_mask
> as a limit. This quirk is already used by some architectures although
> still rare.
> 
> With the introduction of the Raspberry Pi 4 we've found a new contender
> for the use of bus DMA limits, as its PCIe bus can only address the
> lower 3GB of memory (of a total of 4GB). This is impossible to represent
> with a mask. To make things worse the device-tree code rounds non power
> of two bus DMA limits to the next power of two, which is unacceptable in
> this case.
> 
> In the light of this, rename dev->bus_dma_mask to dev->bus_dma_limit all
> over the tree and treat it as such. Note that dev->bus_dma_limit is
> meant to contain the higher accesible DMA address.
> 
> Signed-off-by: Nicolas Saenz Julienne 
> 

Hi Rob & Christoph,
do you mind if I append v2 of this into my upcoming v3 RPi4 PCIe support
series, I didn't do it initially as I thought this was going to be a
contentious patch.  But as it turned out better than expected, I think it
should go into the PCIe series. In the end it's the first explicit user of the
bus DMA limit.

Here's v2 in case you don't know what I'm talking about:
https://www.spinics.net/lists/arm-kernel/msg768459.html

Regards,
Nicolas



signature.asc
Description: This is a digitally signed message part


Re: [PATCH 1/3] dma-direct: unify the dma_capable definitions

2019-11-19 Thread Nicolas Saenz Julienne
On Tue, 2019-11-19 at 10:27 +0100, Marek Szyprowski wrote:
> Hi Christoph,
> 
> On 13.11.2019 08:35, Christoph Hellwig wrote:
> > Currently each architectures that wants to override dma_to_phys and
> > phys_to_dma also has to provide dma_capable.  But there isn't really
> > any good reason for that.  powerpc and mips just have copies of the
> > generic one minus the latests fix, and the arm one was the inspiration
> > for said fix, but misses the bus_dma_mask handling.
> > Make all architectures use the generic version instead.
> > 
> > Signed-off-by: Christoph Hellwig 
> 
> This patch breaks DMAengine PL330 driver on Samsung Exynos SoCs:
> 
> [3.602338] dma-pl330 1269.pdma: overflow 0x13810020+1 of DMA 
> mask  bus mask 0
> [3.605113] [ cut here ]
> [3.609719] WARNING: CPU: 2 PID: 1 at kernel/dma/direct.c:36 
> report_addr+0xc0/0xfc
> [3.617226] Modules linked in:
> [3.620271] CPU: 2 PID: 1 Comm: init Not tainted 
> 5.4.0-rc5-00056-gb037b220e71d #6911
> [3.627986] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
> [3.634094] [] (unwind_backtrace) from [] 
> (show_stack+0x10/0x14)
> [3.641803] [] (show_stack) from [] 
> (dump_stack+0xa8/0xd4)
> [3.649002] [] (dump_stack) from [] 
> (__warn+0xf4/0x10c)
> [3.655940] [] (__warn) from [] 
> (warn_slowpath_fmt+0xb0/0xb8)
> [3.663404] [] (warn_slowpath_fmt) from [] 
> (report_addr+0xc0/0xfc)
> [3.671303] [] (report_addr) from [] 
> (dma_direct_map_resource+0x98/0xa0)
> [3.679739] [] (dma_direct_map_resource) from [] 
> (pl330_prep_slave_fifo+0xe4/0x128)
> [3.689099] [] (pl330_prep_slave_fifo) from [] 
> (pl330_prep_slave_sg+0x54/0x1cc)
> [3.698129] [] (pl330_prep_slave_sg) from [] 
> (s3c24xx_serial_start_tx_dma+0x118/0x1e8)
> [3.707764] [] (s3c24xx_serial_start_tx_dma) from 
> [] (uart_write+0xe4/0x1e0)
> [3.716532] [] (uart_write) from [] 
> (n_tty_write+0x1c8/0x474)
> [3.723992] [] (n_tty_write) from [] 
> (tty_write+0x154/0x314)
> [3.731380] [] (tty_write) from [] 
> (__vfs_write+0x30/0x1d0)
> [3.738662] [] (__vfs_write) from [] 
> (vfs_write+0xa4/0x180)
> [3.745949] [] (vfs_write) from [] 
> (ksys_write+0x60/0xd8)
> [3.753069] [] (ksys_write) from [] 
> (ret_fast_syscall+0x0/0x28)
> [3.760705] Exception stack(0xee8dffa8 to 0xee8dfff0)
> [3.765739] ffa0:   004a beca876c 0002 
> beca876c 004a 
> [3.773900] ffc0: 004a beca876c b6f02cf0 0004 beca876c 
> 004a  b6f99010
> [3.782056] ffe0: 006c beca8628 b6e1d000 b6e7a634
> [3.787092] irq event stamp: 289740
> [3.790571] hardirqs last  enabled at (289739): [] 
> _raw_spin_unlock_irqrestore+0x6c/0x74
> [3.799503] hardirqs last disabled at (289740): [] 
> _raw_spin_lock_irqsave+0x1c/0x58
> [3.808011] softirqs last  enabled at (289576): [] 
> __do_softirq+0x4fc/0x5fc
> [3.815830] softirqs last disabled at (289567): [] 
> irq_exit+0x16c/0x170
> [3.823286] ---[ end trace b1432c658797b861 ]---
> [3.827898] samsung-uart 1381.serial: Unable to get desc for Tx
> [3.834624] samsung-uart 1381.serial: Unable to get desc for Tx
> [3.840487] samsung-uart 1381.serial: Unable to get desc for Tx
> 
> I'm checking now which part of it causes the issue.

Hi Marek,
is this a on a non LPAE ARM32 device?

Regards,
Nicolas

> > ---
> >   arch/arm/include/asm/dma-direct.h | 19 ---
> >   arch/mips/include/asm/dma-direct.h|  8 
> >   arch/powerpc/include/asm/dma-direct.h |  9 -
> >   include/linux/dma-direct.h|  2 +-
> >   4 files changed, 1 insertion(+), 37 deletions(-)
> > 
> > diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-
> > direct.h
> > index b67e5fc1fe43..7c3001a6a775 100644
> > --- a/arch/arm/include/asm/dma-direct.h
> > +++ b/arch/arm/include/asm/dma-direct.h
> > @@ -14,23 +14,4 @@ static inline phys_addr_t __dma_to_phys(struct device
> > *dev, dma_addr_t dev_addr)
> > return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset;
> >   }
> >   
> > -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t
> > size)
> > -{
> > -   u64 limit, mask;
> > -
> > -   if (!dev->dma_mask)
> > -   return 0;
> > -
> > -   mask = *dev->dma_mask;
> > -
> > -   limit = (mask + 1) & ~mask;
> > -   if (limit && size > limit)
> > -   return 0;
> > -
> > -   if ((addr | (addr + size - 1)) & ~mask)
> > -   return 0;
> > -
> > -   return 1;
> > -}
> > -
> >   #endif /* ASM_ARM_DMA_DIRECT_H */
> > diff --git a/arch/mips/include/asm/dma-direct.h b/arch/mips/include/asm/dma-
> > direct.h
> > index b5c240806e1b..14e352651ce9 100644
> > --- a/arch/mips/include/asm/dma-direct.h
> > +++ b/arch/mips/include/asm/dma-direct.h
> > @@ -2,14 +2,6 @@
> >   #ifndef _MIPS_DMA_DIRECT_H
> >   #define _MIPS_DMA_DIRECT_H 1
> >   
> > -static inline bool dma_capable(struct device *dev, dma_addr_t addr, 

Re: [PATCH] dma-mapping: treat dev->bus_dma_mask as a DMA limit

2019-11-14 Thread Nicolas Saenz Julienne
On Wed, 2019-11-13 at 20:34 +, Robin Murphy wrote:
> On 13/11/2019 4:13 pm, Nicolas Saenz Julienne wrote:
> > Using a mask to represent bus DMA constraints has a set of limitations.
> > The biggest one being it can only hold a power of two (minus one). The
> > DMA mapping code is already aware of this and treats dev->bus_dma_mask
> > as a limit. This quirk is already used by some architectures although
> > still rare.
> > 
> > With the introduction of the Raspberry Pi 4 we've found a new contender
> > for the use of bus DMA limits, as its PCIe bus can only address the
> > lower 3GB of memory (of a total of 4GB). This is impossible to represent
> > with a mask. To make things worse the device-tree code rounds non power
> > of two bus DMA limits to the next power of two, which is unacceptable in
> > this case.
> > 
> > In the light of this, rename dev->bus_dma_mask to dev->bus_dma_limit all
> > over the tree and treat it as such. Note that dev->bus_dma_limit is
> > meant to contain the higher accesible DMA address.
> 
> Neat, you win a "why didn't I do it that way in the first place?" :)

:)

> Looking at it without all the history of previous attempts, this looks 
> entirely reasonable, and definitely a step in the right direction.
> 
> [...]
> > diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
> > index 5a7551d060f2..f18827cf96df 100644
> > --- a/drivers/acpi/arm64/iort.c
> > +++ b/drivers/acpi/arm64/iort.c
> > @@ -1097,7 +1097,7 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr,
> > u64 *dma_size)
> >  * Limit coherent and dma mask based on size
> >  * retrieved from firmware.
> >  */
> > -   dev->bus_dma_mask = mask;
> > +   dev->bus_dma_limit = mask;
> 
> Although this preserves the existing behaviour, as in of_dma_configure() 
> we can do better here since we have the original address range to hand. 
> I think it's worth keeping the ACPI and OF paths in sync for minor 
> tweaks like this, rather than letting them diverge unnecessarily.

I figure you mean something like this:

@@ -1085,19 +1085,15 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr,
u64 *dma_size)
}

if (!ret) {
-   msb = fls64(dmaaddr + size - 1);
-   /*
-* Round-up to the power-of-two mask or set
-* the mask to the whole 64-bit address space
-* in case the DMA region covers the full
-* memory window.
-*/
-   mask = msb == 64 ? U64_MAX : (1ULL << msb) - 1;
+   /* Round-up to the power-of-two */
+   end = dmaddr + size - 1;
+   mask = DMA_BIT_MASK(ilog2(end) + 1);
+
/*
 * Limit coherent and dma mask based on size
 * retrieved from firmware.
 */
-   dev->bus_dma_limit = mask;
+   dev->bus_dma_limit = end;
dev->coherent_dma_mask = mask;
*dev->dma_mask = mask;
}

> Otherwise, the rest looks OK to me - in principle we could store it as 
> an exclusive limit such that we could then streamline the min_not_zero() 
> tests to just min(mask, limit - 1), but that's probably too clever for 
> its own good.

Yes, that was my first intuition and in a perfect world I'd prefer it like
that. But as you say, it's probably going to cause more trouble than anything.

Regards,
Nicolas



signature.asc
Description: This is a digitally signed message part


[PATCH] dma-mapping: treat dev->bus_dma_mask as a DMA limit

2019-11-13 Thread Nicolas Saenz Julienne
Using a mask to represent bus DMA constraints has a set of limitations.
The biggest one being it can only hold a power of two (minus one). The
DMA mapping code is already aware of this and treats dev->bus_dma_mask
as a limit. This quirk is already used by some architectures although
still rare.

With the introduction of the Raspberry Pi 4 we've found a new contender
for the use of bus DMA limits, as its PCIe bus can only address the
lower 3GB of memory (of a total of 4GB). This is impossible to represent
with a mask. To make things worse the device-tree code rounds non power
of two bus DMA limits to the next power of two, which is unacceptable in
this case.

In the light of this, rename dev->bus_dma_mask to dev->bus_dma_limit all
over the tree and treat it as such. Note that dev->bus_dma_limit is
meant to contain the higher accesible DMA address.

Signed-off-by: Nicolas Saenz Julienne 

---

Note this is rebased on top of Christoph's latest DMA series:
https://www.spinics.net/lists/arm-kernel/msg768600.html

 arch/mips/pci/fixup-sb1250.c  | 16 
 arch/powerpc/sysdev/fsl_pci.c |  6 +++---
 arch/x86/kernel/pci-dma.c |  2 +-
 arch/x86/mm/mem_encrypt.c |  2 +-
 arch/x86/pci/sta2x11-fixup.c  |  2 +-
 drivers/acpi/arm64/iort.c |  2 +-
 drivers/ata/ahci.c|  2 +-
 drivers/iommu/dma-iommu.c |  3 +--
 drivers/of/device.c   |  9 +
 include/linux/device.h|  6 +++---
 include/linux/dma-direct.h|  2 +-
 include/linux/dma-mapping.h   |  2 +-
 kernel/dma/direct.c   | 27 +--
 13 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/arch/mips/pci/fixup-sb1250.c b/arch/mips/pci/fixup-sb1250.c
index 8a41b359cf90..40efc990cdce 100644
--- a/arch/mips/pci/fixup-sb1250.c
+++ b/arch/mips/pci/fixup-sb1250.c
@@ -21,22 +21,22 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIBYTE, 
PCI_DEVICE_ID_BCM1250_PCI,
 
 /*
  * The BCM1250, etc. PCI host bridge does not support DAC on its 32-bit
- * bus, so we set the bus's DMA mask accordingly.  However the HT link
+ * bus, so we set the bus's DMA limit accordingly.  However the HT link
  * down the artificial PCI-HT bridge supports 40-bit addressing and the
  * SP1011 HT-PCI bridge downstream supports both DAC and a 64-bit bus
  * width, so we record the PCI-HT bridge's secondary and subordinate bus
- * numbers and do not set the mask for devices present in the inclusive
+ * numbers and do not set the limit for devices present in the inclusive
  * range of those.
  */
-struct sb1250_bus_dma_mask_exclude {
+struct sb1250_bus_dma_limit_exclude {
bool set;
unsigned char start;
unsigned char end;
 };
 
-static int sb1250_bus_dma_mask(struct pci_dev *dev, void *data)
+static int sb1250_bus_dma_limit(struct pci_dev *dev, void *data)
 {
-   struct sb1250_bus_dma_mask_exclude *exclude = data;
+   struct sb1250_bus_dma_limit_exclude *exclude = data;
bool exclude_this;
bool ht_bridge;
 
@@ -55,7 +55,7 @@ static int sb1250_bus_dma_mask(struct pci_dev *dev, void 
*data)
exclude->start, exclude->end);
} else {
dev_dbg(>dev, "disabling DAC for device");
-   dev->dev.bus_dma_mask = DMA_BIT_MASK(32);
+   dev->dev.bus_dma_limit = DMA_BIT_MASK(32);
}
 
return 0;
@@ -63,9 +63,9 @@ static int sb1250_bus_dma_mask(struct pci_dev *dev, void 
*data)
 
 static void quirk_sb1250_pci_dac(struct pci_dev *dev)
 {
-   struct sb1250_bus_dma_mask_exclude exclude = { .set = false };
+   struct sb1250_bus_dma_limit_exclude exclude = { .set = false };
 
-   pci_walk_bus(dev->bus, sb1250_bus_dma_mask, );
+   pci_walk_bus(dev->bus, sb1250_bus_dma_limit, );
 }
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_PCI,
quirk_sb1250_pci_dac);
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index ff0e2b156cb5..617a443d673d 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -115,8 +115,8 @@ static void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
 {
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
 
-   pdev->dev.bus_dma_mask =
-   hose->dma_window_base_cur + hose->dma_window_size;
+   pdev->dev.bus_dma_limit =
+   hose->dma_window_base_cur + hose->dma_window_size - 1;
 }
 
 static void setup_swiotlb_ops(struct pci_controller *hose)
@@ -135,7 +135,7 @@ static void fsl_pci_dma_set_mask(struct device *dev, u64 
dma_mask)
 * mapping that allows addressing any RAM address from across PCI.
 */
if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) {
-   dev->bus_dma_mask = 0;
+   dev->bus_dma_limit = 0;
dev->archdata.dma_offset = pci64_dma_offset;
}
 }
diff --git a/arch/x86/

Re: unify the dma_capable definition

2019-11-13 Thread Nicolas Saenz Julienne
On Wed, 2019-11-13 at 08:35 +0100, Christoph Hellwig wrote:
> Hi all,
> 
> there is no good reason to have different version of dma_capable.
> This series removes the arch overrides and also adds a few cleanups
> in the same area.

Reviewed-by: Nicolas Saenz Julienne 

for the whole series,
Thanks!



signature.asc
Description: This is a digitally signed message part


Re: [PATCH RFC 1/5] dma/direct: turn ARCH_ZONE_DMA_BITS into a variable

2019-10-31 Thread Nicolas Saenz Julienne
On Thu, 2019-10-31 at 06:38 -0700, Christoph Hellwig wrote:
> On Thu, Oct 31, 2019 at 11:30:36AM +0100, Nicolas Saenz Julienne wrote:
> > On Wed, 2019-10-30 at 14:49 -0700, Christoph Hellwig wrote:
> > > On Mon, Oct 14, 2019 at 08:31:03PM +0200, Nicolas Saenz Julienne wrote:
> > > > Some architectures, notably ARM, are interested in tweaking this
> > > > depending on their runtime DMA addressing limitations.
> > > > 
> > > > Signed-off-by: Nicolas Saenz Julienne 
> > > 
> > > Do you want me to pick this up for the 5.5 dma-mapping tree, or do you
> > > want me to wait for the rest to settle?
> > 
> > I'd say take it, this will be ultimately needed once we push forward with
> > ARM.
> 
> Can you resend a version that applies against 5.4-rc?  The current one
> has conflicts in the arm64 code.

Hi Catalin,
would you mind taking this patch on top of the arm64 ZONE_DMA series?

I tried to go though Christoph's tree but it ultimately clashes with the
ZONE_DMA series. It's simpler to apply it on top of your tree.

Regrads,
Nicolas



signature.asc
Description: This is a digitally signed message part


Re: [PATCH] dma/direct: turn ARCH_ZONE_DMA_BITS into a variable

2019-10-31 Thread Nicolas Saenz Julienne
On Thu, 2019-10-31 at 16:57 +0100, Christoph Hellwig wrote:
> On Thu, Oct 31, 2019 at 04:53:13PM +0100, Nicolas Saenz Julienne wrote:
> > > > +#define ARM64_ZONE_DMA_BITS30
> > > > +
> > > >  /*
> > > >   * We need to be able to catch inadvertent references to memstart_addr
> > > >   * that occur (potentially in generic code) before
> > > > arm64_memblock_init()
> > > > @@ -424,6 +427,8 @@ void __init arm64_memblock_init(void)
> > > > else
> > > > arm64_dma_phys_limit = PHYS_MASK + 1;
> > > >  
> > > > +   zone_dma_bits = ARM64_ZONE_DMA_BITS;
> > > > +
> > > > reserve_crashkernel();
> > > 
> > > This actually adds a new limit, as there wasn't one before for arm64.
> > 
> > Well, as zone_dma_bits is only relevant in dma/direct when ZONE_DMA is
> > defined
> > I figured it doesn't matter if the variable is set conditionally to ZONE_DMA
> > or
> > not.
> 
> I'd much prefer that to do separately.

OK, I see what you mean now. It's wrong indeed.

The trouble is the ZONE_DMA series[1] in arm64, also due for v5.5, will be
affected by this patch. I don't know the right way to approach this problem
since depending on the merge order, this patch should be updated or the arm64
ZONE_DMA series fixed.

Maybe it's easier to just wait for v5.6.

Regards,
Nicolas

[1] https://lkml.org/lkml/2019/9/11/734



signature.asc
Description: This is a digitally signed message part


Re: [PATCH] dma/direct: turn ARCH_ZONE_DMA_BITS into a variable

2019-10-31 Thread Nicolas Saenz Julienne
On Thu, 2019-10-31 at 16:47 +0100, Christoph Hellwig wrote:
> On Thu, Oct 31, 2019 at 04:28:37PM +0100, Nicolas Saenz Julienne wrote:
> > Some architectures, notably ARM, are interested in tweaking this
> > depending on their runtime DMA addressing limitations.
> > 
> > Signed-off-by: Nicolas Saenz Julienne 
> > ---
> > 
> > Changes since RFC:
> >  - Rebased to v5.4-rc6, fixed arm64 code.
> > 
> > NOTE: This will only apply to linux-next, where 
> 
> missing end of the sentence.  But only applying to linux-next isn't
> going to help anyone..

Arrgh, excuse me, I meant to delete that line.

> > diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> > index 45c00a54909c..f716ea634804 100644
> > --- a/arch/arm64/mm/init.c
> > +++ b/arch/arm64/mm/init.c
> > @@ -20,6 +20,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -41,6 +42,8 @@
> >  #include 
> >  #include 
> >  
> > +#define ARM64_ZONE_DMA_BITS30
> > +
> >  /*
> >   * We need to be able to catch inadvertent references to memstart_addr
> >   * that occur (potentially in generic code) before arm64_memblock_init()
> > @@ -424,6 +427,8 @@ void __init arm64_memblock_init(void)
> > else
> > arm64_dma_phys_limit = PHYS_MASK + 1;
> >  
> > +   zone_dma_bits = ARM64_ZONE_DMA_BITS;
> > +
> > reserve_crashkernel();
> 
> This actually adds a new limit, as there wasn't one before for arm64.

Well, as zone_dma_bits is only relevant in dma/direct when ZONE_DMA is defined
I figured it doesn't matter if the variable is set conditionally to ZONE_DMA or
not.



signature.asc
Description: This is a digitally signed message part


[PATCH] dma/direct: turn ARCH_ZONE_DMA_BITS into a variable

2019-10-31 Thread Nicolas Saenz Julienne
Some architectures, notably ARM, are interested in tweaking this
depending on their runtime DMA addressing limitations.

Signed-off-by: Nicolas Saenz Julienne 
---

Changes since RFC:
 - Rebased to v5.4-rc6, fixed arm64 code.

NOTE: This will only apply to linux-next, where 
 arch/arm64/mm/init.c|  5 +
 arch/powerpc/include/asm/page.h |  9 -
 arch/powerpc/mm/mem.c   | 20 +++-
 arch/s390/include/asm/page.h|  2 --
 arch/s390/mm/init.c |  1 +
 include/linux/dma-direct.h  |  2 ++
 kernel/dma/direct.c | 13 ++---
 7 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 45c00a54909c..f716ea634804 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -41,6 +42,8 @@
 #include 
 #include 
 
+#define ARM64_ZONE_DMA_BITS30
+
 /*
  * We need to be able to catch inadvertent references to memstart_addr
  * that occur (potentially in generic code) before arm64_memblock_init()
@@ -424,6 +427,8 @@ void __init arm64_memblock_init(void)
else
arm64_dma_phys_limit = PHYS_MASK + 1;
 
+   zone_dma_bits = ARM64_ZONE_DMA_BITS;
+
reserve_crashkernel();
 
reserve_elfcorehdr();
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index c8bb14ff4713..f6c562acc3f8 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -329,13 +329,4 @@ struct vm_area_struct;
 #endif /* __ASSEMBLY__ */
 #include 
 
-/*
- * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks.
- */
-#ifdef CONFIG_PPC32
-#define ARCH_ZONE_DMA_BITS 30
-#else
-#define ARCH_ZONE_DMA_BITS 31
-#endif
-
 #endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index be941d382c8d..c95b7fe9f298 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -201,10 +202,10 @@ static int __init mark_nonram_nosave(void)
  * everything else. GFP_DMA32 page allocations automatically fall back to
  * ZONE_DMA.
  *
- * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
- * inform the generic DMA mapping code.  32-bit only devices (if not handled
- * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get
- * otherwise served by ZONE_DMA.
+ * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the
+ * generic DMA mapping code.  32-bit only devices (if not handled by an IOMMU
+ * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by
+ * ZONE_DMA.
  */
 static unsigned long max_zone_pfns[MAX_NR_ZONES];
 
@@ -237,9 +238,18 @@ void __init paging_init(void)
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
   (long int)((top_of_ram - total_ram) >> 20));
 
+   /*
+* Allow 30-bit DMA for very limited Broadcom wifi chips on many
+* powerbooks.
+*/
+   if (IS_ENABLED(CONFIG_PPC32))
+   zone_dma_bits = 30;
+   else
+   zone_dma_bits = 31;
+
 #ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
- 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
+ 1UL << (zone_dma_bits - PAGE_SHIFT));
 #endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 823578c6b9e2..a4d38092530a 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -177,8 +177,6 @@ static inline int devmem_is_allowed(unsigned long pfn)
 #define VM_DATA_DEFAULT_FLAGS  (VM_READ | VM_WRITE | \
 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#define ARCH_ZONE_DMA_BITS 31
-
 #include 
 #include 
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index a124f19f7b3c..f0ce0565 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -118,6 +118,7 @@ void __init paging_init(void)
 
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
+   zone_dma_bits = 31;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index adf993a3bd58..d03af3605460 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -5,6 +5,8 @@
 #include 
 #include 
 
+extern unsigned int zone_dma_bits;
+
 #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
 #include 
 #else
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 8402b29c280f..0b67c04e531b 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -16,12 +16,11 

Re: [PATCH RFC 1/5] dma/direct: turn ARCH_ZONE_DMA_BITS into a variable

2019-10-31 Thread Nicolas Saenz Julienne
On Wed, 2019-10-30 at 14:49 -0700, Christoph Hellwig wrote:
> On Mon, Oct 14, 2019 at 08:31:03PM +0200, Nicolas Saenz Julienne wrote:
> > Some architectures, notably ARM, are interested in tweaking this
> > depending on their runtime DMA addressing limitations.
> > 
> > Signed-off-by: Nicolas Saenz Julienne 
> 
> Do you want me to pick this up for the 5.5 dma-mapping tree, or do you
> want me to wait for the rest to settle?

I'd say take it, this will be ultimately needed once we push forward with ARM.



signature.asc
Description: This is a digitally signed message part


Re: [PATCH RFC 0/5] ARM: Raspberry Pi 4 DMA support

2019-10-15 Thread Nicolas Saenz Julienne
On Mon, 2019-10-14 at 21:59 +0100, Catalin Marinas wrote:
> On Mon, Oct 14, 2019 at 08:31:02PM +0200, Nicolas Saenz Julienne wrote:
> > the Raspberry Pi 4 offers up to 4GB of memory, of which only the first
> > is DMA capable device wide. This forces us to use of bounce buffers,
> > which are currently not very well supported by ARM's custom DMA ops.
> > Among other things the current mechanism (see dmabounce.c) isn't
> > suitable for high memory. Instead of fixing it, this series introduces a
> > way of selecting dma-direct as the default DMA ops provider which allows
> > for the Raspberry Pi to make use of swiotlb.
> 
> I presume these patches go on top of this series:
> 
> http://lkml.kernel.org/r/20190911182546.17094-1-nsaenzjulie...@suse.de

Yes, forgot to mention it. It's relevant for the first patch.

> 
> which I queued here:
> 
> 
https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git/log/?h=for-next/zone-dma

Thanks!

A little off topic but I was wondering if you have a preferred way to refer to
the arm architecture in a way that it unambiguously excludes arm64 (for example
arm32 would work).

Regards,
Nicolas



signature.asc
Description: This is a digitally signed message part


[PATCH RFC 1/5] dma/direct: turn ARCH_ZONE_DMA_BITS into a variable

2019-10-14 Thread Nicolas Saenz Julienne
Some architectures, notably ARM, are interested in tweaking this
depending on their runtime DMA addressing limitations.

Signed-off-by: Nicolas Saenz Julienne 
---
 arch/arm64/include/asm/page.h   |  2 --
 arch/arm64/mm/init.c|  9 +++--
 arch/powerpc/include/asm/page.h |  9 -
 arch/powerpc/mm/mem.c   | 20 +++-
 arch/s390/include/asm/page.h|  2 --
 arch/s390/mm/init.c |  1 +
 include/linux/dma-direct.h  |  2 ++
 kernel/dma/direct.c | 13 ++---
 8 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 7b8c98830101..d39ddb258a04 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -38,6 +38,4 @@ extern int pfn_valid(unsigned long);
 
 #include 
 
-#define ARCH_ZONE_DMA_BITS 30
-
 #endif
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 44f07fdf7a59..ddd6a6ce158e 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -41,6 +42,8 @@
 #include 
 #include 
 
+#define ARM64_ZONE_DMA_BITS30
+
 /*
  * We need to be able to catch inadvertent references to memstart_addr
  * that occur (potentially in generic code) before arm64_memblock_init()
@@ -440,8 +443,10 @@ void __init arm64_memblock_init(void)
 
early_init_fdt_scan_reserved_mem();
 
-   if (IS_ENABLED(CONFIG_ZONE_DMA))
-   arm64_dma_phys_limit = max_zone_phys(ARCH_ZONE_DMA_BITS);
+   if (IS_ENABLED(CONFIG_ZONE_DMA)) {
+   zone_dma_bits = ARM64_ZONE_DMA_BITS;
+   arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS);
+   }
 
if (IS_ENABLED(CONFIG_ZONE_DMA32))
arm64_dma32_phys_limit = max_zone_phys(32);
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index c8bb14ff4713..f6c562acc3f8 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -329,13 +329,4 @@ struct vm_area_struct;
 #endif /* __ASSEMBLY__ */
 #include 
 
-/*
- * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks.
- */
-#ifdef CONFIG_PPC32
-#define ARCH_ZONE_DMA_BITS 30
-#else
-#define ARCH_ZONE_DMA_BITS 31
-#endif
-
 #endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 97e5922cb52e..8bab4e8b6bae 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -200,10 +201,10 @@ static int __init mark_nonram_nosave(void)
  * everything else. GFP_DMA32 page allocations automatically fall back to
  * ZONE_DMA.
  *
- * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
- * inform the generic DMA mapping code.  32-bit only devices (if not handled
- * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get
- * otherwise served by ZONE_DMA.
+ * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the
+ * generic DMA mapping code.  32-bit only devices (if not handled by an IOMMU
+ * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by
+ * ZONE_DMA.
  */
 static unsigned long max_zone_pfns[MAX_NR_ZONES];
 
@@ -236,9 +237,18 @@ void __init paging_init(void)
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
   (long int)((top_of_ram - total_ram) >> 20));
 
+   /*
+* Allow 30-bit DMA for very limited Broadcom wifi chips on many
+* powerbooks.
+*/
+   if (IS_ENABLED(CONFIG_PPC32))
+   zone_dma_bits = 30;
+   else
+   zone_dma_bits = 31;
+
 #ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
- 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
+ 1UL << (zone_dma_bits - PAGE_SHIFT));
 #endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 823578c6b9e2..a4d38092530a 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -177,8 +177,6 @@ static inline int devmem_is_allowed(unsigned long pfn)
 #define VM_DATA_DEFAULT_FLAGS  (VM_READ | VM_WRITE | \
 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#define ARCH_ZONE_DMA_BITS 31
-
 #include 
 #include 
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index c1d96e588152..ac44bd76db4b 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -118,6 +118,7 @@ void __init paging_init(void)
 
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
+   zone_dma_bits = 31;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
max_zone_pfns[ZONE

[PATCH RFC 0/5] ARM: Raspberry Pi 4 DMA support

2019-10-14 Thread Nicolas Saenz Julienne
Hi all,
the Raspberry Pi 4 offers up to 4GB of memory, of which only the first
is DMA capable device wide. This forces us to use of bounce buffers,
which are currently not very well supported by ARM's custom DMA ops.
Among other things the current mechanism (see dmabounce.c) isn't
suitable for high memory. Instead of fixing it, this series introduces a
way of selecting dma-direct as the default DMA ops provider which allows
for the Raspberry Pi to make use of swiotlb.

Regards,
Nicolas

---

Nicolas Saenz Julienne (5):
  dma/direct: turn ARCH_ZONE_DMA_BITS into a variable
  ARM: introduce arm_dma_direct
  ARM: let machines select dma-direct over arch's DMA implementation
  dma/direct: check for overflows in ARM's dma_capable()
  ARM: bcm2711: use dma-direct

 arch/arm/include/asm/dma-direct.h  |  6 ++
 arch/arm/include/asm/dma-mapping.h |  3 ++-
 arch/arm/include/asm/dma.h |  2 ++
 arch/arm/include/asm/mach/arch.h   |  1 +
 arch/arm/mach-bcm/Kconfig  |  1 +
 arch/arm/mach-bcm/bcm2711.c|  1 +
 arch/arm/mm/dma-mapping.c  | 10 ++
 arch/arm/mm/init.c | 21 -
 arch/arm64/include/asm/page.h  |  2 --
 arch/arm64/mm/init.c   |  9 +++--
 arch/powerpc/include/asm/page.h|  9 -
 arch/powerpc/mm/mem.c  | 20 +++-
 arch/s390/include/asm/page.h   |  2 --
 arch/s390/mm/init.c|  1 +
 include/linux/dma-direct.h |  2 ++
 kernel/dma/direct.c| 13 ++---
 16 files changed, 66 insertions(+), 37 deletions(-)

-- 
2.23.0



[PATCH v2 09/11] dma-direct: turn ARCH_ZONE_DMA_BITS into a variable

2019-08-20 Thread Nicolas Saenz Julienne
Some architectures, notably arm64, are interested in tweaking this
depending on their runtime dma addressing limitations.

Signed-off-by: Nicolas Saenz Julienne 
---

Changes in v2:
- Rename new variable to zone_dma_bits
- Update comment with Christoph's suggestion
- Remove old powerpc comment

 arch/powerpc/include/asm/page.h |  9 -
 arch/powerpc/mm/mem.c   | 16 +++-
 arch/s390/include/asm/page.h|  2 --
 arch/s390/mm/init.c |  1 +
 include/linux/dma-direct.h  |  2 ++
 kernel/dma/direct.c | 13 ++---
 6 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 0d52f57fca04..73668a21ae78 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -319,13 +319,4 @@ struct vm_area_struct;
 #endif /* __ASSEMBLY__ */
 #include 
 
-/*
- * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks.
- */
-#ifdef CONFIG_PPC32
-#define ARCH_ZONE_DMA_BITS 30
-#else
-#define ARCH_ZONE_DMA_BITS 31
-#endif
-
 #endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 9191a66b3bc5..2a69f87585df 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -201,10 +202,10 @@ static int __init mark_nonram_nosave(void)
  * everything else. GFP_DMA32 page allocations automatically fall back to
  * ZONE_DMA.
  *
- * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
- * inform the generic DMA mapping code.  32-bit only devices (if not handled
- * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get
- * otherwise served by ZONE_DMA.
+ * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the
+ * generic DMA mapping code.  32-bit only devices (if not handled by an IOMMU
+ * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by
+ * ZONE_DMA.
  */
 static unsigned long max_zone_pfns[MAX_NR_ZONES];
 
@@ -237,9 +238,14 @@ void __init paging_init(void)
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
   (long int)((top_of_ram - total_ram) >> 20));
 
+   if (IS_ENABLED(CONFIG_PPC32))
+   zone_dma_bits = 30;
+   else
+   zone_dma_bits = 31;
+
 #ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
- 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
+ 1UL << (zone_dma_bits - PAGE_SHIFT));
 #endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 823578c6b9e2..a4d38092530a 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -177,8 +177,6 @@ static inline int devmem_is_allowed(unsigned long pfn)
 #define VM_DATA_DEFAULT_FLAGS  (VM_READ | VM_WRITE | \
 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#define ARCH_ZONE_DMA_BITS 31
-
 #include 
 #include 
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 20340a03ad90..bd98465b8b9f 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -118,6 +118,7 @@ void __init paging_init(void)
 
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
+   zone_dma_bits = 31;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index adf993a3bd58..d03af3605460 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -5,6 +5,8 @@
 #include 
 #include 
 
+extern unsigned int zone_dma_bits;
+
 #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
 #include 
 #else
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 795c9b095d75..b23cd65f26e0 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -16,12 +16,11 @@
 #include 
 
 /*
- * Most architectures use ZONE_DMA for the first 16 Megabytes, but
- * some use it for entirely different regions:
+ * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it
+ * it for entirely different regions. In that case the arch code needs to
+ * override the variable below for dma-direct to work properly.
  */
-#ifndef ARCH_ZONE_DMA_BITS
-#define ARCH_ZONE_DMA_BITS 24
-#endif
+unsigned int zone_dma_bits __ro_after_init = 24;
 
 static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size)
 {
@@ -69,7 +68,7 @@ static gfp_t __dma_direct_optimal_gfp_mask(struct device 
*dev, u64 dma_mask,
 * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding
 * zones.
 */
-   if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
+   if (*phys_mas

[PATCH v2 00/11] Raspberry Pi 4 DMA addressing support

2019-08-20 Thread Nicolas Saenz Julienne
Hi all,
this series attempts to address some issues we found while bringing up
the new Raspberry Pi 4 in arm64 and it's intended to serve as a follow
up of these discussions:
v1: https://lkml.org/lkml/2019/7/31/922
RFC: https://lkml.org/lkml/2019/7/17/476

The new Raspberry Pi 4 has up to 4GB of memory but most peripherals can
only address the first GB: their DMA address range is
0xc000-0xfc00 which is aliased to the first GB of physical
memory 0x-0x3c00. Note that only some peripherals have these
limitations: the PCIe, V3D, GENET, and 40-bit DMA channels have a wider
view of the address space by virtue of being hooked up trough a second
interconnect.

Part of this is solved in arm32 by setting up the machine specific
'.dma_zone_size = SZ_1G', which takes care of reserving the coherent
memory area at the right spot. That said no buffer bouncing (needed for
dma streaming) is available at the moment, but that's a story for
another series.

Unfortunately there is no such thing as 'dma_zone_size' in arm64. Only
ZONE_DMA32 is created which is interpreted by dma-direct and the arm64
arch code as if all peripherals where be able to address the first 4GB
of memory.

In the light of this, the series implements the following changes:

- Create generic 'dma_zone_size' in order for hardware description code
  to set it up when needed.

- Add a function in early_init_dt_scan() to setup 'dma_zone_size' for
  the RPi4.

- Create both DMA zones in arm64, ZONE_DMA will contain the area
  addressable by all peripherals and ZONE_DMA32 the rest of the 32 bit
  addressable memory. ZONE_DMA32 might be left empty.

- Reserve the CMA area in a place suitable for all peripherals.

- Inform dma-direct of the new runtime calculated min_mask.

This series has been tested on multiple devices both by checking the
zones setup matches the expectations and by double-checking physical
addresses on pages allocated on the three relevant areas GFP_DMA,
GFP_DMA32, GFP_KERNEL:

- On an RPi4 with variations on the ram memory size. But also forcing
  the situation where all three memory zones are nonempty by setting a 3G
  ZONE_DMA32 ceiling on a 4G setup. Both with and without NUMA support.

- On a Synquacer box[1] with 32G of memory.

- On an ACPI based Huawei TaiShan server[2] with 256G of memory.

- On a QEMU virtual machine running arm64's OpenSUSE Tumbleweed.

That's all.

Regards,
Nicolas

[1] https://www.96boards.org/product/developerbox/
[2] 
https://e.huawei.com/en/products/cloud-computing-dc/servers/taishan-server/taishan-2280-v2

---

Changes in v2:
- More in depth testing.
- Create new global 'dma_zone_size'.
- New approach to getting the dma_zone_size, instead of parsing the dts
  we hardcode it conditionally to the machine compatible name.
- Fix ZONE_DMA and ZONE_DMA32 split, now ZONE_DMA32 remains empty if
  ZONE_DMA fits the whole 32 bit addressable space.
- Take into account devices with DMA offset.
- Rename new dma-direct variable to zone_dma_bits.
- Try new approach by merging both ZONE_DMA and ZONE_DMA32 comments
  in mmzone.h, add new up to date examples.

Nicolas Saenz Julienne (11):
  asm-generic: add dma_zone_size
  arm: use generic dma_zone_size
  of/fdt: add of_fdt_machine_is_compatible function
  of/fdt: add early_init_dt_get_dma_zone_size()
  arm64: mm: use arm64_dma_phys_limit instead of calling
max_zone_dma_phys()
  arm64: rename variables used to calculate ZONE_DMA32's size
  arm64: re-introduce max_zone_dma_phys()
  arm64: use both ZONE_DMA and ZONE_DMA32
  dma-direct: turn ARCH_ZONE_DMA_BITS into a variable
  arm64: edit zone_dma_bits to fine tune dma-direct min mask
  mm: refresh ZONE_DMA and ZONE_DMA32 comments in 'enum zone_type'

 arch/arm/include/asm/dma.h  |  8 ++--
 arch/arm/mm/init.c  | 12 ++
 arch/arm64/Kconfig  |  4 ++
 arch/arm64/mm/init.c| 73 +
 arch/powerpc/include/asm/page.h |  9 
 arch/powerpc/mm/mem.c   | 16 +---
 arch/s390/include/asm/page.h|  2 -
 arch/s390/mm/init.c |  1 +
 drivers/of/fdt.c| 15 +++
 include/asm-generic/dma.h   |  8 +++-
 include/linux/dma-direct.h  |  2 +
 include/linux/mmzone.h  | 46 -
 kernel/dma/direct.c | 13 +++---
 mm/page_alloc.c |  3 ++
 14 files changed, 140 insertions(+), 72 deletions(-)

-- 
2.22.0



Re: [PATCH 6/8] dma-direct: turn ARCH_ZONE_DMA_BITS into a variable

2019-08-01 Thread Nicolas Saenz Julienne
Hi Christoph, thanks for the review.

On Thu, 2019-08-01 at 16:04 +0200, Christoph Hellwig wrote:
> A few nitpicks, otherwise this looks great:
> 
> > @@ -201,7 +202,7 @@ static int __init mark_nonram_nosave(void)
> >   * everything else. GFP_DMA32 page allocations automatically fall back to
> >   * ZONE_DMA.
> >   *
> > - * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
> > + * By using 31-bit unconditionally, we can exploit arch_zone_dma_bits to
> >   * inform the generic DMA mapping code.  32-bit only devices (if not
> > handled
> >   * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get
> >   * otherwise served by ZONE_DMA.
> > @@ -237,9 +238,18 @@ void __init paging_init(void)
> > printk(KERN_DEBUG "Memory hole size: %ldMB\n",
> >(long int)((top_of_ram - total_ram) >> 20));
> >  
> > +   /*
> > +* Allow 30-bit DMA for very limited Broadcom wifi chips on many
> > +* powerbooks.
> > +*/
> > +   if (IS_ENABLED(CONFIG_PPC32))
> > +   arch_zone_dma_bits = 30;
> > +   else
> > +   arch_zone_dma_bits = 31;
> > +
> 
> So the above unconditionally comment obviously isn't true any more, and
> Ben also said for the recent ppc32 hack he'd prefer dynamic detection.
> 
> Maybe Ben and or other ppc folks can chime in an add a patch to the series
> to sort this out now that we have a dynamic ZONE_DMA threshold?

Noted, for now I'll remove the comment.

> > diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> > index 59bdceea3737..40dfc9b4ee4c 100644
> > --- a/kernel/dma/direct.c
> > +++ b/kernel/dma/direct.c
> > @@ -19,9 +19,7 @@
> >   * Most architectures use ZONE_DMA for the first 16 Megabytes, but
> >   * some use it for entirely different regions:
> >   */
> > -#ifndef ARCH_ZONE_DMA_BITS
> > -#define ARCH_ZONE_DMA_BITS 24
> > -#endif
> > +unsigned int arch_zone_dma_bits __ro_after_init = 24;
> 
> I'd prefer to drop the arch_ prefix and just calls this zone_dma_bits.
> In the long run we really need to find a way to just automatically set
> this from the meminit code, but that is out of scope for this series.
> For now can you please just update the comment above to say something
> like:
> 
> /*
>  * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it
>  * it for entirely different regions.  In that case the arch code needs to
>  * override the variable below for dma-direct to work properly.
>  */

Ok perfect.



signature.asc
Description: This is a digitally signed message part


[PATCH 0/8] Raspberry Pi 4 DMA addressing support

2019-07-31 Thread Nicolas Saenz Julienne
Hi all,
this series attempts to address some issues we found while bringing up
the new Raspberry Pi 4 in arm64 and it's intended to serve as a follow
up of this discussion:
https://lkml.org/lkml/2019/7/17/476

The new Raspberry Pi 4 has up to 4GB of memory but most peripherals can
only address the first GB: their DMA address range is
0xc000-0xfc00 which is aliased to the first GB of physical
memory 0x-0x3c00. Note that only some peripherals have these
limitations: the ARM cores, PCIe, V3D, GENET, and 40-bit DMA channels
have a wider view of the address space.

Part of this is solved in arm32 by setting up the machine specific
'.dma_zone_size = SZ_1G', which takes care of the allocating the
coherent memory area at the right spot. Yet no buffer bouncing (needed
for dma streaming) is available at the moment, but that's a story for
another series.

Unfortunately there is no such thing as '.dma_zone_size' in arm64 also
only ZONE_DMA32 is created which is interpreted by dma-direct and the
arm64 code as if all peripherals where be able to address the first 4GB
of memory.

In the light of this, the series implements the following changes:

- Add code that parses the device-tree in oder to find the SoC's common
  DMA area.

- Create a ZONE_DMA whenever that area is needed and add the rest of the
  lower 4 GB of memory to ZONE_DMA32*.

- Create the CMA area in a place suitable for all peripherals.

- Inform dma-direct of the new runtime calculated min_mask*.

That's all.

Regards,
Nicolas

* These solutions where already discussed on the previous RFC (see link
above).

---

Nicolas Saenz Julienne (8):
  arm64: mm: use arm64_dma_phys_limit instead of calling
max_zone_dma_phys()
  arm64: rename variables used to calculate ZONE_DMA32's size
  of/fdt: add function to get the SoC wide DMA addressable memory size
  arm64: re-introduce max_zone_dma_phys()
  arm64: use ZONE_DMA on DMA addressing limited devices
  dma-direct: turn ARCH_ZONE_DMA_BITS into a variable
  arm64: update arch_zone_dma_bits to fine tune dma-direct min mask
  mm: comment arm64's usage of 'enum zone_type'

 arch/arm64/Kconfig  |  4 ++
 arch/arm64/mm/init.c| 78 ++---
 arch/powerpc/include/asm/page.h |  9 
 arch/powerpc/mm/mem.c   | 14 +-
 arch/s390/include/asm/page.h|  2 -
 arch/s390/mm/init.c |  1 +
 drivers/of/fdt.c| 72 ++
 include/linux/dma-direct.h  |  2 +
 include/linux/mmzone.h  | 21 -
 include/linux/of_fdt.h  |  2 +
 kernel/dma/direct.c |  8 ++--
 11 files changed, 168 insertions(+), 45 deletions(-)

-- 
2.22.0



[PATCH 6/8] dma-direct: turn ARCH_ZONE_DMA_BITS into a variable

2019-07-31 Thread Nicolas Saenz Julienne
Some architectures, notably arm64, are interested in tweaking this
depending on their runtime dma addressing limitations.

Signed-off-by: Nicolas Saenz Julienne 
---

 arch/powerpc/include/asm/page.h |  9 -
 arch/powerpc/mm/mem.c   | 14 --
 arch/s390/include/asm/page.h|  2 --
 arch/s390/mm/init.c |  1 +
 include/linux/dma-direct.h  |  2 ++
 kernel/dma/direct.c |  8 +++-
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 0d52f57fca04..73668a21ae78 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -319,13 +319,4 @@ struct vm_area_struct;
 #endif /* __ASSEMBLY__ */
 #include 
 
-/*
- * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks.
- */
-#ifdef CONFIG_PPC32
-#define ARCH_ZONE_DMA_BITS 30
-#else
-#define ARCH_ZONE_DMA_BITS 31
-#endif
-
 #endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 9191a66b3bc5..3792a998ca02 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -201,7 +202,7 @@ static int __init mark_nonram_nosave(void)
  * everything else. GFP_DMA32 page allocations automatically fall back to
  * ZONE_DMA.
  *
- * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
+ * By using 31-bit unconditionally, we can exploit arch_zone_dma_bits to
  * inform the generic DMA mapping code.  32-bit only devices (if not handled
  * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get
  * otherwise served by ZONE_DMA.
@@ -237,9 +238,18 @@ void __init paging_init(void)
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
   (long int)((top_of_ram - total_ram) >> 20));
 
+   /*
+* Allow 30-bit DMA for very limited Broadcom wifi chips on many
+* powerbooks.
+*/
+   if (IS_ENABLED(CONFIG_PPC32))
+   arch_zone_dma_bits = 30;
+   else
+   arch_zone_dma_bits = 31;
+
 #ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
- 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
+ 1UL << (arch_zone_dma_bits - PAGE_SHIFT));
 #endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 #ifdef CONFIG_HIGHMEM
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 823578c6b9e2..a4d38092530a 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -177,8 +177,6 @@ static inline int devmem_is_allowed(unsigned long pfn)
 #define VM_DATA_DEFAULT_FLAGS  (VM_READ | VM_WRITE | \
 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-#define ARCH_ZONE_DMA_BITS 31
-
 #include 
 #include 
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 20340a03ad90..07d93955d3e4 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -118,6 +118,7 @@ void __init paging_init(void)
 
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
+   arch_zone_dma_bits = 31;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index adf993a3bd58..a1b353b77858 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -5,6 +5,8 @@
 #include 
 #include 
 
+extern unsigned int arch_zone_dma_bits;
+
 #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
 #include 
 #else
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 59bdceea3737..40dfc9b4ee4c 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -19,9 +19,7 @@
  * Most architectures use ZONE_DMA for the first 16 Megabytes, but
  * some use it for entirely different regions:
  */
-#ifndef ARCH_ZONE_DMA_BITS
-#define ARCH_ZONE_DMA_BITS 24
-#endif
+unsigned int arch_zone_dma_bits __ro_after_init = 24;
 
 static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size)
 {
@@ -72,7 +70,7 @@ static gfp_t __dma_direct_optimal_gfp_mask(struct device 
*dev, u64 dma_mask,
 * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding
 * zones.
 */
-   if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
+   if (*phys_mask <= DMA_BIT_MASK(arch_zone_dma_bits))
return GFP_DMA;
if (*phys_mask <= DMA_BIT_MASK(32))
return GFP_DMA32;
@@ -387,7 +385,7 @@ int dma_direct_supported(struct device *dev, u64 mask)
u64 min_mask;
 
if (IS_ENABLED(CONFIG_ZONE_DMA))
-   min_mask = DMA_BIT_MASK(ARCH_ZONE_DMA_BITS);
+   min_mask = DMA_BIT_MASK(arch_zone_dma_bits);
else
min_mask = DMA_BIT_MASK(32);
 
-- 
2.22.0