[PATCH v3] vfio/type1: Adopt fast IOTLB flush interface when unmap IOVAs

2018-01-21 Thread Suravee Suthikulpanit
VFIO IOMMU type1 currently upmaps IOVA pages synchronously, which requires
IOTLB flushing for every unmapping. This results in large IOTLB flushing
overhead when handling pass-through devices has a large number of mapped
IOVAs (e.g. GPUs). This could also cause IOTLB invalidate time-out issue
on AMD IOMMU with certain dGPUs.

This can be avoided by using the new IOTLB flushing interface.

Cc: Alex Williamson 
Cc: Joerg Roedel 
Signed-off-by: Suravee Suthikulpanit 
---
Changes from V2: (https://lkml.org/lkml/2017/12/27/43)

  * In vfio_unmap_unpin(), fallback to use slow IOTLB flush
when fast IOTLB flush fails (per Alex).

  * Do not adopt fast IOTLB flush in map_try_harder().

  * Submit VFIO and AMD IOMMU patches separately.

 drivers/vfio/vfio_iommu_type1.c | 98 +
 1 file changed, 98 insertions(+)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index e30e29a..5c40b00 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -102,6 +102,13 @@ struct vfio_pfn {
atomic_tref_count;
 };
 
+struct vfio_regions {
+   struct list_head list;
+   dma_addr_t iova;
+   phys_addr_t phys;
+   size_t len;
+};
+
 #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)\
(!list_empty(>domain_list))
 
@@ -479,6 +486,36 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, 
dma_addr_t iova,
return unlocked;
 }
 
+/*
+ * Generally, VFIO needs to unpin remote pages after each IOTLB flush.
+ * Therefore, when using IOTLB flush sync interface, VFIO need to keep track
+ * of these regions (currently using a list).
+ *
+ * This value specifies maximum number of regions for each IOTLB flush sync.
+ */
+#define VFIO_IOMMU_TLB_SYNC_MAX512
+
+static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain,
+   struct list_head *regions)
+{
+   long unlocked = 0;
+   struct vfio_regions *entry, *next;
+
+   iommu_tlb_sync(domain->domain);
+
+   list_for_each_entry_safe(entry, next, regions, list) {
+   unlocked += vfio_unpin_pages_remote(dma,
+   entry->iova,
+   entry->phys >> PAGE_SHIFT,
+   entry->len >> PAGE_SHIFT,
+   false);
+   list_del(>list);
+   kfree(entry);
+   }
+   cond_resched();
+   return unlocked;
+}
+
 static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
  unsigned long *pfn_base, bool do_accounting)
 {
@@ -648,12 +685,40 @@ static int vfio_iommu_type1_unpin_pages(void *iommu_data,
return i > npage ? npage : (i > 0 ? i : -EINVAL);
 }
 
+static size_t try_unmap_unpin_fast(struct vfio_domain *domain, dma_addr_t iova,
+  size_t len, phys_addr_t phys,
+  struct list_head *unmapped_regions)
+{
+   struct vfio_regions *entry;
+   size_t unmapped;
+
+   entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+   if (!entry)
+   return -ENOMEM;
+
+   unmapped = iommu_unmap_fast(domain->domain, iova, len);
+   if (WARN_ON(!unmapped)) {
+   kfree(entry);
+   return -EINVAL;
+   }
+
+   iommu_tlb_range_add(domain->domain, iova, unmapped);
+   entry->iova = iova;
+   entry->phys = phys;
+   entry->len  = unmapped;
+   list_add_tail(>list, unmapped_regions);
+   return unmapped;
+}
+
 static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
 bool do_accounting)
 {
dma_addr_t iova = dma->iova, end = dma->iova + dma->size;
struct vfio_domain *domain, *d;
+   struct list_head unmapped_regions;
+   bool use_fastpath = true;
long unlocked = 0;
+   int cnt = 0;
 
if (!dma->size)
return 0;
@@ -661,6 +726,8 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, 
struct vfio_dma *dma,
if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu))
return 0;
 
+   INIT_LIST_HEAD(_regions);
+
/*
 * We use the IOMMU to track the physical addresses, otherwise we'd
 * need a much more complicated tracking system.  Unfortunately that
@@ -698,6 +765,33 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, 
struct vfio_dma *dma,
break;
}
 
+   /*
+* First, try to use fast unmap/unpin. In case of failure,
+* sync upto the current point, and continue the slow
+* unmap/unpin path.
+*/
+   

Re: [PATCH v4 08/13] iommu/rockchip: Control clocks needed to access the IOMMU

2018-01-21 Thread JeffyChen

Hi Randy,

On 01/22/2018 10:15 AM, JeffyChen wrote:

Hi Randy,

On 01/22/2018 09:18 AM, Randy Li wrote:



Also the power domain driver could manage the clocks as well, I would
suggest to use pm_runtime_*.


actually the clocks required by pm domain may not be the same as what we
want to control here, there might be some clocks only be needed when
accessing mmu registers.

but i'm not very sure about that, will confirm it with Simon Xue.


confirmed with Simon, there might be some iommus don't have a pd, and 
the CONFIG_PM could be disabled.


so it might be better to control clocks in iommu driver itself.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH v2 2/2] iommu/amd: Add support for fast IOTLB flushing

2018-01-21 Thread Suravee Suthikulpanit

Hi Joerg,

Do you have any feedback regarding this patch for AMD IOMMU? I'm re-sending the 
patch 1/2
separately per Alex's suggestion.

Thanks,
Suravee

On 12/27/17 4:20 PM, Suravee Suthikulpanit wrote:

Implement the newly added IOTLB flushing interface for AMD IOMMU.

Signed-off-by: Suravee Suthikulpanit 
---
  drivers/iommu/amd_iommu.c   | 73 -
  drivers/iommu/amd_iommu_init.c  |  7 
  drivers/iommu/amd_iommu_types.h |  7 
  3 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 7d5eb00..42fe365 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -129,6 +129,12 @@ struct dma_ops_domain {
  static struct iova_domain reserved_iova_ranges;
  static struct lock_class_key reserved_rbtree_key;
  
+struct amd_iommu_flush_entries {

+   struct list_head list;
+   unsigned long iova;
+   size_t size;
+};
+
  /
   *
   * Helper functions
@@ -3043,7 +3049,6 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, 
unsigned long iova,
unmap_size = iommu_unmap_page(domain, iova, page_size);
mutex_unlock(>api_lock);
  
-	domain_flush_tlb_pde(domain);

domain_flush_complete(domain);
  
  	return unmap_size;

@@ -3163,6 +3168,69 @@ static bool amd_iommu_is_attach_deferred(struct 
iommu_domain *domain,
return dev_data->defer_attach;
  }
  
+static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)

+{
+   struct protection_domain *dom = to_pdomain(domain);
+
+   domain_flush_tlb_pde(dom);
+}
+
+static void amd_iommu_iotlb_range_add(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
+{
+   struct amd_iommu_flush_entries *entry, *p;
+   unsigned long flags;
+   bool found = false;
+
+   spin_lock_irqsave(_iommu_flush_list_lock, flags);
+   list_for_each_entry(p, _iommu_flush_list, list) {
+   if (iova != p->iova)
+   continue;
+
+   if (size > p->size) {
+   p->size = size;
+   pr_debug("%s: update range: iova=%#lx, size = %#lx\n",
+__func__, p->iova, p->size);
+   }
+   found = true;
+   break;
+   }
+
+   if (!found) {
+   entry = kzalloc(sizeof(struct amd_iommu_flush_entries),
+   GFP_KERNEL);
+   if (entry) {
+   pr_debug("%s: new range: iova=%lx, size=%#lx\n",
+__func__, iova, size);
+
+   entry->iova = iova;
+   entry->size = size;
+   list_add(>list, _iommu_flush_list);
+   }
+   }
+   spin_unlock_irqrestore(_iommu_flush_list_lock, flags);
+}
+
+static void amd_iommu_iotlb_sync(struct iommu_domain *domain)
+{
+   struct protection_domain *pdom = to_pdomain(domain);
+   struct amd_iommu_flush_entries *entry, *next;
+   unsigned long flags;
+
+   /* Note:
+* Currently, IOMMU driver just flushes the whole IO/TLB for
+* a given domain. So, just remove entries from the list here.
+*/
+   spin_lock_irqsave(_iommu_flush_list_lock, flags);
+   list_for_each_entry_safe(entry, next, _iommu_flush_list, list) {
+   list_del(>list);
+   kfree(entry);
+   }
+   spin_unlock_irqrestore(_iommu_flush_list_lock, flags);
+
+   domain_flush_tlb_pde(pdom);
+}
+
  const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.domain_alloc = amd_iommu_domain_alloc,
@@ -3181,6 +3249,9 @@ static bool amd_iommu_is_attach_deferred(struct 
iommu_domain *domain,
.apply_resv_region = amd_iommu_apply_resv_region,
.is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap  = AMD_IOMMU_PGSIZES,
+   .flush_iotlb_all = amd_iommu_flush_iotlb_all,
+   .iotlb_range_add = amd_iommu_iotlb_range_add,
+   .iotlb_sync = amd_iommu_iotlb_sync,
  };
  
  /*

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 6fe2d03..e8f8cee 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -185,6 +185,12 @@ struct ivmd_header {
  bool amd_iommu_force_isolation __read_mostly;
  
  /*

+ * IOTLB flush list
+ */
+LIST_HEAD(amd_iommu_flush_list);
+spinlock_t amd_iommu_flush_list_lock;
+
+/*
   * List of protection domains - used during resume
   */
  LIST_HEAD(amd_iommu_pd_list);
@@ -2490,6 +2496,7 @@ static int __init early_amd_iommu_init(void)
__set_bit(0, amd_iommu_pd_alloc_bitmap);
  
  	spin_lock_init(_iommu_pd_lock);

+   

Re: [PATCH v4 08/13] iommu/rockchip: Control clocks needed to access the IOMMU

2018-01-21 Thread Randy Li



On 01/18/2018 10:25 PM, JeffyChen wrote:

Hi Robin,

On 01/18/2018 08:27 PM, Robin Murphy wrote:




Is it worth using the clk_bulk_*() APIs for this? At a glance, most of
the code being added here appears to duplicate what those functions
already do (but I'm no clk API expert, for sure).
right, i think it's doable, the clk_bulk APIs are very helpful. i think 
we didn't use that is because this patch were wrote for the chromeos 4.4 
kernel, which doesn't have clk_bulk yet:)


will do it in the next version, thanks.
Also the power domain driver could manage the clocks as well, I would 
suggest to use pm_runtime_*.


Robin.




___
Linux-rockchip mailing list
linux-rockc...@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-rockchip



--
Randy Li

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 08/13] iommu/rockchip: Control clocks needed to access the IOMMU

2018-01-21 Thread JeffyChen

Hi Randy,

On 01/22/2018 09:18 AM, Randy Li wrote:



Also the power domain driver could manage the clocks as well, I would
suggest to use pm_runtime_*.


actually the clocks required by pm domain may not be the same as what we 
want to control here, there might be some clocks only be needed when 
accessing mmu registers.


but i'm not very sure about that, will confirm it with Simon Xue.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] Intel-IOMMU: Delete an error message for a failed memory allocation in init_dmars()

2018-01-21 Thread Joe Perches
On Sun, 2018-01-21 at 08:19 +0100, Jörg Rödel wrote:
> On Sat, Jan 20, 2018 at 05:37:52PM -0800, Joe Perches wrote:
> > While Markus' commit messages are nearly universally terrible,
> > is there really any signficant value in knowing when any
> > particular OOM condition occurs other than the subsystem that
> > became OOM?
> > 
> > You're going to be hosed in any case.
> > 
> > Why isn't the generic OOM stack dump good enough?
> 
> Because if we know the exact allocation attempt that failed right away,
> we can more easily check if we can rewrite it so that it is more likely
> to succeed, e.g. rewriting one higher-order allocation to multiple
> order-0 allocations.

Up to you but I think that's pretty dubious
as this is an init function and if it fails
the system really is stuffed.

Unrelated, there are some unnecessary casts
of pointers to void * that could be removed
to help make the code a bit more regular as
some callers use the cast and some do not.

---
 drivers/iommu/intel-iommu.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 4a2de34895ec..8d7ea76345ae 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -845,8 +845,8 @@ static inline struct context_entry 
*iommu_context_addr(struct intel_iommu *iommu
if (!context)
return NULL;
 
-   __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
-   phy_addr = virt_to_phys((void *)context);
+   __iommu_flush_cache(iommu, context, CONTEXT_SIZE);
+   phy_addr = virt_to_phys(context);
*entry = phy_addr | 1;
__iommu_flush_cache(iommu, entry, sizeof(*entry));
}
@@ -1298,7 +1298,7 @@ static int iommu_alloc_root_entry(struct intel_iommu 
*iommu)
struct root_entry *root;
unsigned long flags;
 
-   root = (struct root_entry *)alloc_pgtable_page(iommu->node);
+   root = alloc_pgtable_page(iommu->node);
if (!root) {
pr_err("Allocating root entry for %s failed\n",
iommu->name);
@@ -1978,7 +1978,7 @@ static int domain_init(struct dmar_domain *domain, struct 
intel_iommu *iommu,
domain->nid = iommu->node;
 
/* always allocate the top pgd */
-   domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
+   domain->pgd = alloc_pgtable_page(domain->nid);
if (!domain->pgd)
return -ENOMEM;
__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
@@ -4168,7 +4168,7 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header 
*header, void *arg)
if (!rmrru->resv)
goto free_rmrru;
 
-   rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
+   rmrru->devices = dmar_alloc_dev_scope(rmrr + 1,
((void *)rmrr) + rmrr->header.length,
>devices_cnt);
if (rmrru->devices_cnt && rmrru->devices == NULL)
@@ -4229,7 +4229,7 @@ int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, 
void *arg)
memcpy(atsru->hdr, hdr, hdr->length);
atsru->include_all = atsr->flags & 0x1;
if (!atsru->include_all) {
-   atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
+   atsru->devices = dmar_alloc_dev_scope(atsr + 1,
(void *)atsr + atsr->header.length,
>devices_cnt);
if (atsru->devices_cnt && atsru->devices == NULL) {
@@ -4465,7 +4465,7 @@ int dmar_iommu_notify_scope_dev(struct 
dmar_pci_notify_info *info)
rmrr = container_of(rmrru->hdr,
struct acpi_dmar_reserved_memory, header);
if (info->event == BUS_NOTIFY_ADD_DEVICE) {
-   ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
+   ret = dmar_insert_dev_scope(info, rmrr + 1,
((void *)rmrr) + rmrr->header.length,
rmrr->segment, rmrru->devices,
rmrru->devices_cnt);
@@ -4483,7 +4483,7 @@ int dmar_iommu_notify_scope_dev(struct 
dmar_pci_notify_info *info)
 
atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
if (info->event == BUS_NOTIFY_ADD_DEVICE) {
-   ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
+   ret = dmar_insert_dev_scope(info, atsr + 1,
(void *)atsr + atsr->header.length,
atsr->segment, atsru->devices,
atsru->devices_cnt);
@@ -4920,7 +4920,7 @@ static int md_domain_init(struct dmar_domain *domain, int 
guest_width)
domain->max_addr = 0;
 
/* always allocate the top pgd */
-   domain->pgd = (struct 

Re: [PATCH] Intel-IOMMU: Delete an error message for a failed memory allocation in init_dmars()

2018-01-21 Thread Joe Perches
On Sat, 2018-01-20 at 20:40 +0100, Jörg Rödel wrote:
> On Sat, Jan 20, 2018 at 03:55:37PM +0100, SF Markus Elfring wrote:
> > Do you need any more background information for this general
> > transformation pattern?
> 
> No.
> 
> > Do you find the Linux allocation failure report insufficient for this
> > use case?
> 
> Yes, because it can't tell me what the code was trying to allocate.

While Markus' commit messages are nearly universally terrible,
is there really any signficant value in knowing when any
particular OOM condition occurs other than the subsystem that
became OOM?

You're going to be hosed in any case.

Why isn't the generic OOM stack dump good enough?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/2] amd/iommu: Use raw locks on atomic context paths

2018-01-21 Thread Scott Wood
Several functions in this driver are called from atomic context,
and thus raw locks must be used in order to be safe on PREEMPT_RT.

This includes paths that must wait for command completion, which is
a potential PREEMPT_RT latency concern but not easily avoidable.

Signed-off-by: Scott Wood 
---
 drivers/iommu/amd_iommu.c   | 30 +++---
 drivers/iommu/amd_iommu_init.c  |  2 +-
 drivers/iommu/amd_iommu_types.h |  4 ++--
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 8ead1b296d09..213f5a796ae5 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1055,9 +1055,9 @@ static int iommu_queue_command_sync(struct amd_iommu 
*iommu,
unsigned long flags;
int ret;
 
-   spin_lock_irqsave(>lock, flags);
+   raw_spin_lock_irqsave(>lock, flags);
ret = __iommu_queue_command_sync(iommu, cmd, sync);
-   spin_unlock_irqrestore(>lock, flags);
+   raw_spin_unlock_irqrestore(>lock, flags);
 
return ret;
 }
@@ -1083,7 +1083,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 
build_completion_wait(, (u64)>cmd_sem);
 
-   spin_lock_irqsave(>lock, flags);
+   raw_spin_lock_irqsave(>lock, flags);
 
iommu->cmd_sem = 0;
 
@@ -1094,7 +1094,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
ret = wait_on_sem(>cmd_sem);
 
 out_unlock:
-   spin_unlock_irqrestore(>lock, flags);
+   raw_spin_unlock_irqrestore(>lock, flags);
 
return ret;
 }
@@ -3626,7 +3626,7 @@ static struct irq_remap_table *get_irq_table(u16 devid, 
bool ioapic)
goto out_unlock;
 
/* Initialize table spin-lock */
-   spin_lock_init(>lock);
+   raw_spin_lock_init(>lock);
 
if (ioapic)
/* Keep the first 32 indexes free for IOAPIC interrupts */
@@ -3688,7 +3688,7 @@ static int alloc_irq_index(u16 devid, int count, bool 
align)
if (align)
alignment = roundup_pow_of_two(count);
 
-   spin_lock_irqsave(>lock, flags);
+   raw_spin_lock_irqsave(>lock, flags);
 
/* Scan table for free entries */
for (index = ALIGN(table->min_index, alignment), c = 0;
@@ -3715,7 +3715,7 @@ static int alloc_irq_index(u16 devid, int count, bool 
align)
index = -ENOSPC;
 
 out:
-   spin_unlock_irqrestore(>lock, flags);
+   raw_spin_unlock_irqrestore(>lock, flags);
 
return index;
 }
@@ -3736,7 +3736,7 @@ static int modify_irte_ga(u16 devid, int index, struct 
irte_ga *irte,
if (!table)
return -ENOMEM;
 
-   spin_lock_irqsave(>lock, flags);
+   raw_spin_lock_irqsave(>lock, flags);
 
entry = (struct irte_ga *)table->table;
entry = [index];
@@ -3747,7 +3747,7 @@ static int modify_irte_ga(u16 devid, int index, struct 
irte_ga *irte,
if (data)
data->ref = entry;
 
-   spin_unlock_irqrestore(>lock, flags);
+   raw_spin_unlock_irqrestore(>lock, flags);
 
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
@@ -3769,9 +3769,9 @@ static int modify_irte(u16 devid, int index, union irte 
*irte)
if (!table)
return -ENOMEM;
 
-   spin_lock_irqsave(>lock, flags);
+   raw_spin_lock_irqsave(>lock, flags);
table->table[index] = irte->val;
-   spin_unlock_irqrestore(>lock, flags);
+   raw_spin_unlock_irqrestore(>lock, flags);
 
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
@@ -3793,9 +3793,9 @@ static void free_irte(u16 devid, int index)
if (!table)
return;
 
-   spin_lock_irqsave(>lock, flags);
+   raw_spin_lock_irqsave(>lock, flags);
iommu->irte_ops->clear_allocated(table, index);
-   spin_unlock_irqrestore(>lock, flags);
+   raw_spin_unlock_irqrestore(>lock, flags);
 
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
@@ -4396,7 +4396,7 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
if (!irt)
return -ENODEV;
 
-   spin_lock_irqsave(>lock, flags);
+   raw_spin_lock_irqsave(>lock, flags);
 
if (ref->lo.fields_vapic.guest_mode) {
if (cpu >= 0)
@@ -4405,7 +4405,7 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
barrier();
}
 
-   spin_unlock_irqrestore(>lock, flags);
+   raw_spin_unlock_irqrestore(>lock, flags);
 
iommu_flush_irt(iommu, devid);
iommu_completion_wait(iommu);
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 6fe2d0346073..e3cd81b32a33 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1474,7 +1474,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, 
struct ivhd_header *h)
 {
int ret;
 
-   spin_lock_init(>lock);
+   raw_spin_lock_init(>lock);
 
 

[PATCH 1/2] iommu/amd: Avoid get_irq_table() from atomic context

2018-01-21 Thread Scott Wood
get_irq_table() acquires amd_iommu_devtable_lock which is not a raw lock,
and thus cannot be acquired from atomic context on PREEMPT_RT.  Many
calls to modify_irte*() come from atomic context due to the IRQ
desc->lock, as does amd_iommu_update_ga() due to the preemption disabling
in vcpu_load/put().

The only difference between calling get_irq_table() and reading from
irq_lookup_table[] directly, other than the lock acquisition and
amd_iommu_rlookup_table[] check, is if the table entry is unpopulated,
which should never happen when looking up a devid that came from an
irq_2_irte struct, as get_irq_table() would have already been called on
that devid during irq_remapping_alloc().

The lock acquisition is not needed in these cases because entries in
irq_lookup_table[] never change once non-NULL -- nor would the
amd_iommu_devtable_lock usage in get_irq_table() provide meaningful
protection if they did, since it's released before using the looked up
table in the get_irq_table() caller.

The amd_iommu_rlookup_table[] check is not needed because
irq_lookup_table[devid] should never be non-NULL if
amd_iommu_rlookup_table[devid] is NULL.

Signed-off-by: Scott Wood 
---
 drivers/iommu/amd_iommu.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index dc4b73833419..8ead1b296d09 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3732,7 +3732,7 @@ static int modify_irte_ga(u16 devid, int index, struct 
irte_ga *irte,
if (iommu == NULL)
return -EINVAL;
 
-   table = get_irq_table(devid, false);
+   table = irq_lookup_table[devid];
if (!table)
return -ENOMEM;
 
@@ -3765,7 +3765,7 @@ static int modify_irte(u16 devid, int index, union irte 
*irte)
if (iommu == NULL)
return -EINVAL;
 
-   table = get_irq_table(devid, false);
+   table = irq_lookup_table[devid];
if (!table)
return -ENOMEM;
 
@@ -3789,7 +3789,7 @@ static void free_irte(u16 devid, int index)
if (iommu == NULL)
return;
 
-   table = get_irq_table(devid, false);
+   table = irq_lookup_table[devid];
if (!table)
return;
 
@@ -4392,7 +4392,7 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data)
if (!iommu)
return -ENODEV;
 
-   irt = get_irq_table(devid, false);
+   irt = irq_lookup_table[devid];
if (!irt)
return -ENODEV;
 
-- 
1.8.3.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu