iovad rbtree spinlock contention is very significant. In a workload with netperf and 10Gb NIC, multithread workload shows 100% cpu utilization (99% cpu time on the lock) and the total throughput is less than 1Gbps.
This patch introduces a bitmap based allocator. We allocate a big chunk of DMA range and divide it into 8k chunks. Each bit in the bitmap present the 8k chunk. For any allocation with size less than 8k, we allocate 8k. The DMA address allocation then becomes an allocation one bit from a bitmap. We use percpu bitmap to speed up the bit allocation further. With the bitmap allocation, the lock contention is completely avoided. In the workload above, the throughput is around 9.28Gbps and cpu utilization drops to 1%. The only works for DMA less than 8k, but it's case with most heavy lock contention. If DAC is enabled by default on the future, we can allocate a bigger DMA range and bigger chunk size. Cc: Joerg Roedel <[email protected]> Cc: David Woodhouse <[email protected]> Signed-off-by: Shaohua Li <[email protected]> --- drivers/iommu/intel-iommu.c | 23 +++++++---- drivers/iommu/iova.c | 93 +++++++++++++++++++++++++++++++++++++++------ include/linux/iova.h | 13 +++++++ 3 files changed, 109 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 5c57b9a..6412297 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3595,7 +3595,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr) { struct dmar_domain *domain; unsigned long start_pfn, last_pfn; - struct iova *iova; + struct iova *iova = NULL; struct intel_iommu *iommu; struct page *freelist; @@ -3607,13 +3607,17 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr) iommu = domain_get_iommu(domain); - iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); - if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n", + if (iova_pfn_in_bitmap(&domain->iovad, IOVA_PFN(dev_addr))) { + start_pfn = IOVA_PFN(dev_addr); + last_pfn = start_pfn + IOVA_BITMAP_UNIT - 1; + } else { + iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); + if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n", (unsigned long long)dev_addr)) - return; - - start_pfn = mm_to_dma_pfn(iova->pfn_lo); - last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; + return; + start_pfn = mm_to_dma_pfn(iova->pfn_lo); + last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; + } pr_debug("Device %s unmapping: pfn %lx-%lx\n", dev_name(dev), start_pfn, last_pfn); @@ -3624,7 +3628,10 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr) iommu_flush_iotlb_psi(iommu, domain, start_pfn, last_pfn - start_pfn + 1, !freelist, 0); /* free iova */ - __free_iova(&domain->iovad, iova); + if (iova) + __free_iova(&domain->iovad, iova); + else + free_iova(&domain->iovad, start_pfn); dma_free_pagelist(freelist); } else { add_unmap(domain, IOVA_PFN(dev_addr), last_pfn - start_pfn + 1, diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 2c5e197..6d11caf 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -15,6 +15,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Author: Anil S Keshavamurthy <[email protected]> + * Bitmap based allocation: Shaohua Li <[email protected]> */ #include <linux/iova.h> @@ -40,6 +41,9 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, start_pfn = 1; iovad->start_pfn = start_pfn; iovad->dma_32bit_pfn = pfn_32bit; + percpu_ida_init(&iovad->bitmap, IOVA_BITMAP_SIZE / IOVA_BITMAP_UNIT); + iovad->bitmap_iova = NULL; + iovad->disable_bitmap = true; } EXPORT_SYMBOL_GPL(init_iova_domain); @@ -258,6 +262,54 @@ void iova_cache_put(void) } EXPORT_SYMBOL_GPL(iova_cache_put); +static struct iova *__alloc_iova(struct iova_domain *iovad, + unsigned long size, unsigned long limit_pfn, bool size_aligned) +{ + struct iova *new_iova; + int ret; + + new_iova = alloc_iova_mem(); + if (!new_iova) + return NULL; + + ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn, + new_iova, size_aligned); + + if (ret) { + free_iova_mem(new_iova); + return NULL; + } + return new_iova; +} + +static int __init_iova_bitmap(struct iova_domain *iovad) +{ + struct iova *new_iova; + unsigned long flags; + + new_iova = __alloc_iova(iovad, IOVA_BITMAP_SIZE, + iovad->dma_32bit_pfn, false); + + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + if (!new_iova) { + if (!iovad->bitmap_iova) + iovad->disable_bitmap = true; + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return iovad->bitmap_iova ? 0 : -ENOMEM; + } + + if (!iovad->bitmap_iova) + iovad->bitmap_iova = new_iova; + else { + __cached_rbnode_delete_update(iovad, new_iova); + rb_erase(&new_iova->node, &iovad->rbroot); + } + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + if (iovad->bitmap_iova != new_iova) + free_iova_mem(new_iova); + return 0; +} + /** * alloc_iova - allocates an iova * @iovad: - iova domain in question @@ -275,20 +327,23 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, bool size_aligned) { struct iova *new_iova; - int ret; - - new_iova = alloc_iova_mem(); + int tag; + + if (size <= IOVA_BITMAP_UNIT && !iovad->disable_bitmap) { + if (!(iovad->bitmap_iova) && __init_iova_bitmap(iovad)) + goto fallback; + if (limit_pfn < iovad->bitmap_iova->pfn_hi) + goto fallback; + tag = percpu_ida_alloc(&iovad->bitmap, TASK_RUNNING); + if (tag < 0) + goto fallback; + return iovad->bitmap_iova->pfn_lo + tag * IOVA_BITMAP_UNIT; + } +fallback: + new_iova = __alloc_iova(iovad, size, limit_pfn, size_aligned); if (!new_iova) return 0; - ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn, - new_iova, size_aligned); - - if (ret) { - free_iova_mem(new_iova); - return 0; - } - return new_iova->pfn_lo; } EXPORT_SYMBOL_GPL(alloc_iova); @@ -345,6 +400,8 @@ __free_iova(struct iova_domain *iovad, struct iova *iova) { unsigned long flags; + BUG_ON(iova_pfn_in_bitmap(iovad, iova->pfn_lo)); + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); __cached_rbnode_delete_update(iovad, iova); rb_erase(&iova->node, &iovad->rbroot); @@ -363,8 +420,18 @@ EXPORT_SYMBOL_GPL(__free_iova); void free_iova(struct iova_domain *iovad, unsigned long pfn) { - struct iova *iova = find_iova(iovad, pfn); + struct iova *iova; + + if (iova_pfn_in_bitmap(iovad, pfn)) { + int tag; + + tag = (pfn - iovad->bitmap_iova->pfn_lo) >> + IOVA_BITMAP_UNIT_LOG; + percpu_ida_free(&iovad->bitmap, tag); + return; + } + iova = find_iova(iovad, pfn); if (iova) __free_iova(iovad, iova); } @@ -381,6 +448,7 @@ void put_iova_domain(struct iova_domain *iovad) unsigned long flags; spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + iovad->bitmap_iova = NULL; node = rb_first(&iovad->rbroot); while (node) { struct iova *iova = container_of(node, struct iova, node); @@ -390,6 +458,7 @@ void put_iova_domain(struct iova_domain *iovad) node = rb_first(&iovad->rbroot); } spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + percpu_ida_destroy(&iovad->bitmap); } EXPORT_SYMBOL_GPL(put_iova_domain); diff --git a/include/linux/iova.h b/include/linux/iova.h index cfe5ee9..63a81ef 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/rbtree.h> #include <linux/dma-mapping.h> +#include <linux/percpu_ida.h> /* iova structure */ struct iova { @@ -23,6 +24,9 @@ struct iova { unsigned long pfn_lo; /* IOMMU dish out addr lo */ }; +#define IOVA_BITMAP_UNIT ((8 * 1024) >> PAGE_SHIFT) +#define IOVA_BITMAP_UNIT_LOG (ilog2(IOVA_BITMAP_UNIT)) +#define IOVA_BITMAP_SIZE ((1L * 1024 * 1024 * 1024) >> PAGE_SHIFT) /* holds all the iova translations for a domain */ struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ @@ -31,6 +35,9 @@ struct iova_domain { unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; + struct percpu_ida bitmap; + struct iova *bitmap_iova; + bool disable_bitmap; }; static inline unsigned long iova_size(struct iova *iova) @@ -68,6 +75,12 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) return iova >> iova_shift(iovad); } +static inline bool iova_pfn_in_bitmap(struct iova_domain *iovad, unsigned long pfn) +{ + return iovad->bitmap_iova && pfn >= iovad->bitmap_iova->pfn_lo && + pfn <= iovad->bitmap_iova->pfn_hi; +} + int iova_cache_get(void); void iova_cache_put(void); -- 2.4.6 _______________________________________________ iommu mailing list [email protected] https://lists.linuxfoundation.org/mailman/listinfo/iommu
