>-----Original Message----- >From: Lendacky, Thomas >Sent: Tuesday, June 06, 2017 1:23 AM >To: [email protected] >Cc: Nath, Arindam <[email protected]>; Joerg Roedel ><[email protected]>; Duran, Leo <[email protected]>; Suthikulpanit, >Suravee <[email protected]> >Subject: [PATCH v1 3/3] iommu/amd: Optimize the IOMMU queue flush > >After reducing the amount of MMIO performed by the IOMMU during >operation, >perf data shows that flushing the TLB for all protection domains during >DMA unmapping is a performance issue. It is not necessary to flush the >TLBs for all protection domains, only the protection domains associated >with iova's on the flush queue. > >Create a separate queue that tracks the protection domains associated with >the iova's on the flush queue. This new queue optimizes the flushing of >TLBs to the required protection domains. > >Reviewed-by: Arindam Nath <[email protected]> >Signed-off-by: Tom Lendacky <[email protected]> >--- > drivers/iommu/amd_iommu.c | 56 >++++++++++++++++++++++++++++++++++++++++----- > 1 file changed, 50 insertions(+), 6 deletions(-) > >diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c >index 856103b..a5e77f0 100644 >--- a/drivers/iommu/amd_iommu.c >+++ b/drivers/iommu/amd_iommu.c >@@ -103,7 +103,18 @@ struct flush_queue { > struct flush_queue_entry *entries; > }; > >+struct flush_pd_queue_entry { >+ struct protection_domain *pd; >+}; >+ >+struct flush_pd_queue { >+ /* No lock needed, protected by flush_queue lock */ >+ unsigned next; >+ struct flush_pd_queue_entry *entries; >+}; >+ > static DEFINE_PER_CPU(struct flush_queue, flush_queue); >+static DEFINE_PER_CPU(struct flush_pd_queue, flush_pd_queue); > > static atomic_t queue_timer_on; > static struct timer_list queue_timer; >@@ -2227,16 +2238,20 @@ static struct iommu_group >*amd_iommu_device_group(struct device *dev) > * > >*********************************************************** >******************/ > >-static void __queue_flush(struct flush_queue *queue) >+static void __queue_flush(struct flush_queue *queue, >+ struct flush_pd_queue *pd_queue) > { >- struct protection_domain *domain; > unsigned long flags; > int idx; > > /* First flush TLB of all known domains */ > spin_lock_irqsave(&amd_iommu_pd_lock, flags); >- list_for_each_entry(domain, &amd_iommu_pd_list, list) >- domain_flush_tlb(domain); >+ for (idx = 0; idx < pd_queue->next; ++idx) { >+ struct flush_pd_queue_entry *entry; >+ >+ entry = pd_queue->entries + idx; >+ domain_flush_tlb(entry->pd); >+ } > spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); > > /* Wait until flushes have completed */ >@@ -2255,6 +2270,7 @@ static void __queue_flush(struct flush_queue >*queue) > entry->dma_dom = NULL; > } > >+ pd_queue->next = 0; > queue->next = 0; > } > >@@ -2263,13 +2279,15 @@ static void queue_flush_all(void) > int cpu; > > for_each_possible_cpu(cpu) { >+ struct flush_pd_queue *pd_queue; > struct flush_queue *queue; > unsigned long flags; > > queue = per_cpu_ptr(&flush_queue, cpu); >+ pd_queue = per_cpu_ptr(&flush_pd_queue, cpu); > spin_lock_irqsave(&queue->lock, flags); > if (queue->next > 0) >- __queue_flush(queue); >+ __queue_flush(queue, pd_queue); > spin_unlock_irqrestore(&queue->lock, flags); > } > } >@@ -2283,6 +2301,8 @@ static void queue_flush_timeout(unsigned long >unsused) > static void queue_add(struct dma_ops_domain *dma_dom, > unsigned long address, unsigned long pages) > { >+ struct flush_pd_queue_entry *pd_entry; >+ struct flush_pd_queue *pd_queue; > struct flush_queue_entry *entry; > struct flush_queue *queue; > unsigned long flags; >@@ -2292,10 +2312,22 @@ static void queue_add(struct dma_ops_domain >*dma_dom, > address >>= PAGE_SHIFT; > > queue = get_cpu_ptr(&flush_queue); >+ pd_queue = get_cpu_ptr(&flush_pd_queue); > spin_lock_irqsave(&queue->lock, flags); > > if (queue->next == FLUSH_QUEUE_SIZE) >- __queue_flush(queue); >+ __queue_flush(queue, pd_queue); >+ >+ for (idx = 0; idx < pd_queue->next; ++idx) { >+ pd_entry = pd_queue->entries + idx; >+ if (pd_entry->pd == &dma_dom->domain) >+ break; >+ } >+ if (idx == pd_queue->next) { >+ /* New protection domain, add it to the list */ >+ pd_entry = pd_queue->entries + pd_queue->next++; >+ pd_entry->pd = &dma_dom->domain; >+ } > > idx = queue->next++; > entry = queue->entries + idx; >@@ -2309,6 +2341,7 @@ static void queue_add(struct dma_ops_domain >*dma_dom, > if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0) > mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10)); > >+ put_cpu_ptr(&flush_pd_queue); > put_cpu_ptr(&flush_queue); > } > >@@ -2810,6 +2843,8 @@ int __init amd_iommu_init_api(void) > return ret; > > for_each_possible_cpu(cpu) { >+ struct flush_pd_queue *pd_queue = >per_cpu_ptr(&flush_pd_queue, >+ cpu); > struct flush_queue *queue = per_cpu_ptr(&flush_queue, >cpu); > > queue->entries = kzalloc(FLUSH_QUEUE_SIZE * >@@ -2819,6 +2854,12 @@ int __init amd_iommu_init_api(void) > goto out_put_iova; > > spin_lock_init(&queue->lock); >+ >+ pd_queue->entries = kzalloc(FLUSH_QUEUE_SIZE * >+ sizeof(*pd_queue->entries), >+ GFP_KERNEL); >+ if (!pd_queue->entries) >+ goto out_put_iova; > } > > err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops); >@@ -2836,9 +2877,12 @@ int __init amd_iommu_init_api(void) > > out_put_iova: > for_each_possible_cpu(cpu) { >+ struct flush_pd_queue *pd_queue = >per_cpu_ptr(&flush_pd_queue, >+ cpu); > struct flush_queue *queue = per_cpu_ptr(&flush_queue, >cpu); > > kfree(queue->entries); >+ kfree(pd_queue->entries); > } > > return -ENOMEM;
Craig and Jan, can you please confirm whether this patch fixes the IOMMU timeout errors you encountered before? If it does, then this is a better implementation of the fix I provided few weeks back. Thanks, Arindam _______________________________________________ iommu mailing list [email protected] https://lists.linuxfoundation.org/mailman/listinfo/iommu
