On 2022/4/4 19:27, John Garry wrote:
> Some low-level drivers may request DMA mappings whose IOVA length exceeds
> that of the current rcache upper limit.
>
> This means that allocations for those IOVAs will never be cached, and
> always must be allocated and freed from the RB tree per DMA mapping cycle.
> This has a significant effect on performance, more so since commit
> 4e89dce72521 ("iommu/iova: Retry from last rb tree node if iova search
> fails"), as discussed at [0].
>
> As a first step towards allowing the rcache range upper limit be
> configured, hold this value in the IOVA rcache structure, and allocate
> the rcaches separately.
>
> Delete macro IOVA_RANGE_CACHE_MAX_SIZE in case it's reused by mistake.
>
> [0]
> https://lore.kernel.org/linux-iommu/20210129092120.1482-1-thunder.leiz...@huawei.com/
>
> Signed-off-by: John Garry
> ---
> drivers/iommu/iova.c | 20 ++--
> include/linux/iova.h | 3 +++
> 2 files changed, 13 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
> index db77aa675145..5c22b9187b79 100644
> --- a/drivers/iommu/iova.c
> +++ b/drivers/iommu/iova.c
> @@ -15,8 +15,6 @@
> /* The anchor node sits above the top of the usable address space */
> #define IOVA_ANCHOR ~0UL
>
> -#define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size
> (in pages) */
> -
> static bool iova_rcache_insert(struct iova_domain *iovad,
> unsigned long pfn,
> unsigned long size);
> @@ -443,7 +441,7 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long
> size,
>* rounding up anything cacheable to make sure that can't happen. The
>* order of the unadjusted size will still match upon freeing.
>*/
> - if (size < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
> + if (size < (1 << (iovad->rcache_max_size - 1)))
> size = roundup_pow_of_two(size);
>
> iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1);
> @@ -713,13 +711,15 @@ int iova_domain_init_rcaches(struct iova_domain *iovad)
> unsigned int cpu;
> int i, ret;
>
> - iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE,
> + iovad->rcache_max_size = 6; /* Arbitrarily high default */
It would be better to assign this constant value to iovad->rcache_max_size in
init_iova_domain().
> +
> + iovad->rcaches = kcalloc(iovad->rcache_max_size,
>sizeof(struct iova_rcache),
>GFP_KERNEL);
> if (!iovad->rcaches)
> return -ENOMEM;
>
> - for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
> + for (i = 0; i < iovad->rcache_max_size; ++i) {
> struct iova_cpu_rcache *cpu_rcache;
> struct iova_rcache *rcache;
>
> @@ -816,7 +816,7 @@ static bool iova_rcache_insert(struct iova_domain *iovad,
> unsigned long pfn,
> {
> unsigned int log_size = order_base_2(size);
>
> - if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE)
> + if (log_size >= iovad->rcache_max_size)
> return false;
>
> return __iova_rcache_insert(iovad, >rcaches[log_size], pfn);
> @@ -872,7 +872,7 @@ static unsigned long iova_rcache_get(struct iova_domain
> *iovad,
> {
> unsigned int log_size = order_base_2(size);
>
> - if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE || !iovad->rcaches)
> + if (log_size >= iovad->rcache_max_size || !iovad->rcaches)
> return 0;
>
> return __iova_rcache_get(>rcaches[log_size], limit_pfn - size);
> @@ -888,7 +888,7 @@ static void free_iova_rcaches(struct iova_domain *iovad)
> unsigned int cpu;
> int i, j;
>
> - for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
> + for (i = 0; i < iovad->rcache_max_size; ++i) {
> rcache = >rcaches[i];
> if (!rcache->cpu_rcaches)
> break;
> @@ -916,7 +916,7 @@ static void free_cpu_cached_iovas(unsigned int cpu,
> struct iova_domain *iovad)
> unsigned long flags;
> int i;
>
> - for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
> + for (i = 0; i < iovad->rcache_max_size; ++i) {
> rcache = >rcaches[i];
> cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu);
> spin_lock_irqsave(_rcache->lock, flags);
> @@ -935,7 +935,7 @@ static void free_global_cached_iovas(struct iova_domain
> *iovad)
> unsigned long flags;
> int i, j;
>
> - for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
> + for (i = 0; i < iovad->rcache_max_size; ++i) {
> rcache = >rcaches[i];
> spin_lock_irqsave(>lock, flags);
> for (j = 0; j < rcache->depot_size; ++j) {
> diff --git a/include/linux/iova.h b/include/linux/iova.h
> index 320a70e40233..02f7222fa85a 100644
> --- a/include/linux/iova.h
> +++ b/include/linux/iova.h
> @@ -38,6 +38,9 @@ struct iova_domain {
>
> struct iova_rcache