Leverage ARCH_HAS_DMA_MAP_DIRECT config option for coherent allocations as well. This will bypass DMA ops for memory allocations that have been pre-mapped.
Always set device bus_dma_limit when memory is pre-mapped. In some architectures, like PowerPC, pmemory can be converted to regular memory via daxctl command. This will gate the coherent allocations to pre-mapped RAM only, by dma_coherent_ok(). Signed-off-by: Gaurav Batra <[email protected]> --- arch/powerpc/kernel/dma-iommu.c | 30 +++++++++++++++++--------- arch/powerpc/platforms/pseries/iommu.c | 6 ++---- include/linux/dma-map-ops.h | 4 ++++ kernel/dma/mapping.c | 4 ++-- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 0359ab72cd3b..a8742afb0d2e 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -65,6 +65,21 @@ bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, return true; } +bool arch_dma_alloc_direct(struct device *dev) +{ + if (dev->dma_ops_bypass) + return true; + + return false; +} + +bool arch_dma_free_direct(struct device *dev, dma_addr_t dma_handle) +{ + if (!dev->dma_ops_bypass) + return false; + + return is_direct_handle(dev, dma_handle); +} #endif /* CONFIG_ARCH_HAS_DMA_MAP_DIRECT */ /* @@ -148,17 +163,12 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask) if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) { /* - * dma_iommu_bypass_supported() sets dma_max when there is - * 1:1 mapping but it is somehow limited. - * ibm,pmemory is one example. + * fixed ops will be used for RAM. This is limited by + * bus_dma_limit which is set when RAM is pre-mapped. */ - dev->dma_ops_bypass = dev->bus_dma_limit == 0; - if (!dev->dma_ops_bypass) - dev_warn(dev, - "iommu: 64-bit OK but direct DMA is limited by %llx\n", - dev->bus_dma_limit); - else - dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n"); + dev->dma_ops_bypass = true; + dev_info(dev, "iommu: 64-bit OK but direct DMA is limited by %llx\n", + dev->bus_dma_limit); return 1; } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index eec333dd2e59..5497b130e026 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1769,10 +1769,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn, u64 dma_mas out_unlock: mutex_unlock(&dma_win_init_mutex); - /* If we have persistent memory and the window size is not big enough - * to directly map both RAM and vPMEM, then we need to set DMA limit. - */ - if (pmem_present && direct_mapping && len != MAX_PHYSMEM_BITS) + /* For pre-mapped memory, set bus_dma_limit to the max RAM */ + if (direct_mapping) dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << max_ram_len); diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 10882d00cb17..0b5bb6fea90f 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -401,11 +401,15 @@ bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg, int nents); bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg, int nents); +bool arch_dma_alloc_direct(struct device *dev); +bool arch_dma_free_direct(struct device *dev, dma_addr_t dma_handle); #else #define arch_dma_map_phys_direct(d, a) (false) #define arch_dma_unmap_phys_direct(d, a) (false) #define arch_dma_map_sg_direct(d, s, n) (false) #define arch_dma_unmap_sg_direct(d, s, n) (false) +#define arch_dma_alloc_direct(d) (false) +#define arch_dma_free_direct(d, a) (false) #endif #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index fe7472f13b10..d5743b3c3ab3 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -654,7 +654,7 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, /* let the implementation decide on the zone to allocate from: */ flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); - if (dma_alloc_direct(dev, ops)) { + if (dma_alloc_direct(dev, ops) || arch_dma_alloc_direct(dev)) { cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs); } else if (use_dma_iommu(dev)) { cpu_addr = iommu_dma_alloc(dev, size, dma_handle, flag, attrs); @@ -695,7 +695,7 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, return; debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); - if (dma_alloc_direct(dev, ops)) + if (dma_alloc_direct(dev, ops) || arch_dma_free_direct(dev, dma_handle)) dma_direct_free(dev, size, cpu_addr, dma_handle, attrs); else if (use_dma_iommu(dev)) iommu_dma_free(dev, size, cpu_addr, dma_handle, attrs); base-commit: c2c2ccfd4ba72718266a56f3ecc34c989cb5b7a0 -- 2.39.3
