This adds a percpu cache for small size iova allocation. For allocation size is bigger than 64k, the cache is bypassed. The cache allows iova allocation batched, so dramatically reduce iovad lock contention. In test, the the lock contention becomes very tiny. The cache has 5 classes of size (4k, 8k, 16k, 32k, 64k), each will cache 512K dma address. Each cpu will cache 512k * 5 dma address at most. For an IOMMU system, the cached dma address is quite tiny, so we don't bother draining cache for CPU hotplug.
Cc: Joerg Roedel <[email protected]> Cc: David Woodhouse <[email protected]> Signed-off-by: Shaohua Li <[email protected]> --- drivers/iommu/iova.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++----- include/linux/iova.h | 9 ++++++ 2 files changed, 89 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index fa0adef..5c86c5c 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -25,6 +25,7 @@ void init_iova_domain(struct iova_domain *iovad, unsigned long granule, unsigned long start_pfn, unsigned long pfn_32bit) { + int cpu; /* * IOVA granularity will normally be equal to the smallest * supported IOMMU page size; both *must* be capable of @@ -38,6 +39,13 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, iovad->granule = granule; iovad->start_pfn = start_pfn; iovad->dma_32bit_pfn = pfn_32bit; + iovad->percpu_cache = alloc_percpu(struct iova_cache); + for_each_possible_cpu(cpu) { + struct iova_cache *cache = per_cpu_ptr(iovad->percpu_cache, cpu); + int i; + for (i = 0; i <= ilog2(MAX_CACHE_SIZE); i++) + INIT_LIST_HEAD(&cache->cache_lists[i]); + } } EXPORT_SYMBOL_GPL(init_iova_domain); @@ -103,12 +111,10 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, struct iova *new, bool size_aligned) { struct rb_node *prev, *curr = NULL; - unsigned long flags; unsigned long saved_pfn; unsigned int pad_size = 0; /* Walk the tree backwards */ - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); saved_pfn = limit_pfn; curr = __get_cached_rbnode(iovad, &limit_pfn); prev = curr; @@ -135,10 +141,8 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, if (!curr) { if (size_aligned) pad_size = iova_get_pad_size(size, limit_pfn); - if ((iovad->start_pfn + size + pad_size) > limit_pfn) { - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + if ((iovad->start_pfn + size + pad_size) > limit_pfn) return -ENOMEM; - } } /* pfn_lo will point to size aligned address if size_aligned is set */ @@ -177,9 +181,6 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, } __cached_rbnode_insert_update(iovad, saved_pfn, new); - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - - return 0; } @@ -256,6 +257,69 @@ void iova_cache_put(void) } EXPORT_SYMBOL_GPL(iova_cache_put); +static void refill_percpu_iova(struct iova_domain *iovad, int index) +{ + struct iova_cache *cache; + struct iova *new_iova; + unsigned long flags; + int ret; + int count = 0; + + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + cache = this_cpu_ptr(iovad->percpu_cache); + + if (!list_empty(&cache->cache_lists[index])) + goto out; + + while (count < (MAX_CACHE_UNIT_SIZE >> index)) { + new_iova = alloc_iova_mem(); + if (!new_iova) + goto out; + ret = __alloc_and_insert_iova_range(iovad, 1 << index, + iovad->dma_32bit_pfn, new_iova, true); + if (ret) { + free_iova_mem(new_iova); + goto out; + } + count++; + list_add(&new_iova->sibling, &cache->cache_lists[index]); + } +out: + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); +} + +static struct iova *alloc_percpu_iova(struct iova_domain *iovad, + unsigned long size, unsigned long limit_pfn) +{ + int cache_index; + struct iova_cache *cache; + struct iova *iova; + bool refilled = false; + + if (size > MAX_CACHE_SIZE || limit_pfn < iovad->dma_32bit_pfn) + return NULL; + cache_index = order_base_2(size); + +again: + preempt_disable(); + cache = this_cpu_ptr(iovad->percpu_cache); + iova = list_first_entry_or_null(&cache->cache_lists[cache_index], + struct iova, sibling); + if (iova) + list_del(&iova->sibling); + preempt_enable(); + if (iova) + return iova; + + if (!refilled) { + refill_percpu_iova(iovad, cache_index); + refilled = true; + goto again; + } + + return NULL; +} + /** * alloc_iova - allocates an iova * @iovad: - iova domain in question @@ -274,13 +338,20 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, { struct iova *new_iova; int ret; + unsigned long flags; + + new_iova = alloc_percpu_iova(iovad, size, limit_pfn); + if (new_iova) + return new_iova; new_iova = alloc_iova_mem(); if (!new_iova) return NULL; + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn, new_iova, size_aligned); + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); if (ret) { free_iova_mem(new_iova); @@ -389,6 +460,7 @@ void put_iova_domain(struct iova_domain *iovad) node = rb_first(&iovad->rbroot); } spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + free_percpu(iovad->percpu_cache); } EXPORT_SYMBOL_GPL(put_iova_domain); diff --git a/include/linux/iova.h b/include/linux/iova.h index 92f7177..1386a7b 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -21,6 +21,14 @@ struct iova { struct rb_node node; unsigned long pfn_hi; /* IOMMU dish out addr hi */ unsigned long pfn_lo; /* IOMMU dish out addr lo */ + struct list_head sibling; +}; + +#define MAX_CACHE_SIZE 16 /* 16 * 4k, only cache <= 64K allocation */ +#define MAX_CACHE_UNIT_SIZE (512 / 4) /* 512k total cache size */ + +struct iova_cache { + struct list_head cache_lists[ilog2(MAX_CACHE_SIZE) + 1]; }; /* holds all the iova translations for a domain */ @@ -31,6 +39,7 @@ struct iova_domain { unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; + struct iova_cache __percpu *percpu_cache; }; static inline unsigned long iova_size(struct iova *iova) -- 2.4.6 _______________________________________________ iommu mailing list [email protected] https://lists.linuxfoundation.org/mailman/listinfo/iommu
