Re: [PATCH v3 6/8] genirq: Add and use an irq_data_update_affinity helper

2022-07-07 Thread Marc Zyngier
On Sun, 03 Jul 2022 16:22:03 +0100,
Oleksandr  wrote:
> 
> 
> On 01.07.22 23:00, Samuel Holland wrote:
> 
> 
> Hello Samuel
> 
> > Some architectures and irqchip drivers modify the cpumask returned by
> > irq_data_get_affinity_mask, usually by copying in to it. This is
> > problematic for uniprocessor configurations, where the affinity mask
> > should be constant, as it is known at compile time.
> > 
> > Add and use a setter for the affinity mask, following the pattern of
> > irq_data_update_effective_affinity. This allows the getter function to
> > return a const cpumask pointer.
> > 
> > Signed-off-by: Samuel Holland 
> > ---
> > 
> > Changes in v3:
> >   - New patch to introduce irq_data_update_affinity
> > 
> >   arch/alpha/kernel/irq.c  | 2 +-
> >   arch/ia64/kernel/iosapic.c   | 2 +-
> >   arch/ia64/kernel/irq.c   | 4 ++--
> >   arch/ia64/kernel/msi_ia64.c  | 4 ++--
> >   arch/parisc/kernel/irq.c | 2 +-
> >   drivers/irqchip/irq-bcm6345-l1.c | 4 ++--
> >   drivers/parisc/iosapic.c | 2 +-
> >   drivers/sh/intc/chip.c   | 2 +-
> >   drivers/xen/events/events_base.c | 7 ---
> >   include/linux/irq.h  | 6 ++
> >   10 files changed, 21 insertions(+), 14 deletions(-)
> > 
> > diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
> > index f6d2946edbd2..15f2effd6baf 100644
> > --- a/arch/alpha/kernel/irq.c
> > +++ b/arch/alpha/kernel/irq.c
> > @@ -60,7 +60,7 @@ int irq_select_affinity(unsigned int irq)
> > cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
> > last_cpu = cpu;
> >   - cpumask_copy(irq_data_get_affinity_mask(data),
> > cpumask_of(cpu));
> > +   irq_data_update_affinity(data, cpumask_of(cpu));
> > chip->irq_set_affinity(data, cpumask_of(cpu), false);
> > return 0;
> >   }
> > diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
> > index 35adcf89035a..99300850abc1 100644
> > --- a/arch/ia64/kernel/iosapic.c
> > +++ b/arch/ia64/kernel/iosapic.c
> > @@ -834,7 +834,7 @@ iosapic_unregister_intr (unsigned int gsi)
> > if (iosapic_intr_info[irq].count == 0) {
> >   #ifdef CONFIG_SMP
> > /* Clear affinity */
> > -   cpumask_setall(irq_get_affinity_mask(irq));
> > +   irq_data_update_affinity(irq_get_irq_data(irq), cpu_all_mask);
> >   #endif
> > /* Clear the interrupt information */
> > iosapic_intr_info[irq].dest = 0;
> > diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
> > index ecef17c7c35b..275b9ea58c64 100644
> > --- a/arch/ia64/kernel/irq.c
> > +++ b/arch/ia64/kernel/irq.c
> > @@ -57,8 +57,8 @@ static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] 
> > = 1 };
> >   void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
> >   {
> > if (irq < NR_IRQS) {
> > -   cpumask_copy(irq_get_affinity_mask(irq),
> > -cpumask_of(cpu_logical_id(hwid)));
> > +   irq_data_update_affinity(irq_get_irq_data(irq),
> > +cpumask_of(cpu_logical_id(hwid)));
> > irq_redir[irq] = (char) (redir & 0xff);
> > }
> >   }
> > diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
> > index df5c28f252e3..025e5133c860 100644
> > --- a/arch/ia64/kernel/msi_ia64.c
> > +++ b/arch/ia64/kernel/msi_ia64.c
> > @@ -37,7 +37,7 @@ static int ia64_set_msi_irq_affinity(struct irq_data 
> > *idata,
> > msg.data = data;
> > pci_write_msi_msg(irq, );
> > -   cpumask_copy(irq_data_get_affinity_mask(idata), cpumask_of(cpu));
> > +   irq_data_update_affinity(idata, cpumask_of(cpu));
> > return 0;
> >   }
> > @@ -132,7 +132,7 @@ static int dmar_msi_set_affinity(struct irq_data *data,
> > msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu));
> > dmar_msi_write(irq, );
> > -   cpumask_copy(irq_data_get_affinity_mask(data), mask);
> > +   irq_data_update_affinity(data, mask);
> > return 0;
> >   }
> > diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c
> > index 0fe2d79fb123..5ebb1771b4ab 100644
> > --- a/arch/parisc/kernel/irq.c
> > +++ b/arch/parisc/kernel/irq.c
> > @@ -315,7 +315,7 @@ unsigned long txn_affinity_addr(unsigned int irq, int 
> > cpu)
> >   {
> >   #ifdef CONFIG_SMP
> > struct irq_data *d = irq_get_irq_data(irq);
> > -   cpumask_copy(irq_data_get_affinity_mask(d), cpumask_of(cpu));
> > +   irq_data_update_affinity(d, cpumask_of(cpu));
> >   #endif
> > return per_cpu(cpu_data, cpu).txn_addr;
> > diff --git a/drivers/irqchip/irq-bcm6345-l1.c 
> > b/drivers/irqchip/irq-bcm6345-l1.c
> > index 142a7431745f..6899e37810a8 100644
> > --- a/drivers/irqchip/irq-bcm6345-l1.c
> > +++ b/drivers/irqchip/irq-bcm6345-l1.c
> > @@ -216,11 +216,11 @@ static int bcm6345_l1_set_affinity(struct irq_data *d,
> > enabled = intc->cpus[old_cpu]->enable_cache[word] & mask;
> > if (enabled)
> > __bcm6345_l1_mask(d);
> > - 

Re: [PATCH v3 1/8] irqchip/mips-gic: Only register IPI domain when SMP is enabled

2022-07-07 Thread Marc Zyngier
On Tue, 05 Jul 2022 14:52:43 +0100,
Serge Semin  wrote:
> 
> Hi Samuel
> 
> On Fri, Jul 01, 2022 at 03:00:49PM -0500, Samuel Holland wrote:
> > The MIPS GIC irqchip driver may be selected in a uniprocessor
> > configuration, but it unconditionally registers an IPI domain.
> > 
> > Limit the part of the driver dealing with IPIs to only be compiled when
> > GENERIC_IRQ_IPI is enabled, which corresponds to an SMP configuration.
> 
> Thanks for the patch. Some comment is below.
> 
> > 
> > Reported-by: kernel test robot 
> > Signed-off-by: Samuel Holland 
> > ---
> > 
> > Changes in v3:
> >  - New patch to fix build errors in uniprocessor MIPS configs
> > 
> >  drivers/irqchip/Kconfig|  3 +-
> >  drivers/irqchip/irq-mips-gic.c | 80 +++---
> >  2 files changed, 56 insertions(+), 27 deletions(-)
> > 
> > diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
> > index 1f23a6be7d88..d26a4ff7c99f 100644
> > --- a/drivers/irqchip/Kconfig
> > +++ b/drivers/irqchip/Kconfig
> > @@ -322,7 +322,8 @@ config KEYSTONE_IRQ
> >  
> >  config MIPS_GIC
> > bool
> > -   select GENERIC_IRQ_IPI
> > +   select GENERIC_IRQ_IPI if SMP
> 
> > +   select IRQ_DOMAIN_HIERARCHY
> 
> It seems to me that the IRQ domains hierarchy is supposed to be
> created only if IPI is required. At least that's what the MIPS GIC
> driver implies. Thus we can go further and CONFIG_IRQ_DOMAIN_HIERARCHY
> ifdef-out the gic_irq_domain_alloc() and gic_irq_domain_free()
> methods definition together with the initialization:
> 
>  static const struct irq_domain_ops gic_irq_domain_ops = {
>   .xlate = gic_irq_domain_xlate,
> +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
>   .alloc = gic_irq_domain_alloc,
>   .free = gic_irq_domain_free,
> +#endif
>   .map = gic_irq_domain_map,
> };
> 
> If the GENERIC_IRQ_IPI config is enabled, CONFIG_IRQ_DOMAIN_HIERARCHY
> will be automatically selected (see the config definition in
> kernel/irq/Kconfig). If the IRQs hierarchy is needed for some another
> functionality like GENERIC_MSI_IRQ_DOMAIN or GPIOs then they will
> explicitly enable the IRQ_DOMAIN_HIERARCHY config thus activating the
> denoted .alloc and .free methods definitions.
> 
> To sum up you can get rid of the IRQ_DOMAIN_HIERARCHY config
> force-select from this patch and make the MIPS GIC driver code a bit
> more coherent.
> 
> @Marc, please correct me if were wrong.

Either way probably works correctly, but Samuel's approach is more
readable IMO. It is far easier to reason about a high-level feature
(GENERIC_IRQ_IPI) than an implementation detail (IRQ_DOMAIN_HIERARCHY).

If you really want to save a handful of bytes, you can make the
callbacks conditional on GENERIC_IRQ_IPI, and be done with it. But
this can come as an additional patch.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: fully convert arm to use dma-direct

2022-05-06 Thread Marc Zyngier
On Fri, 22 Apr 2022 22:17:20 +0100,
Linus Walleij  wrote:
> 
> On Thu, Apr 21, 2022 at 9:42 AM Christoph Hellwig  wrote:
> 
> > arm is the last platform not using the dma-direct code for directly
> > mapped DMA.  With the dmaboune removal from Arnd we can easily switch
> > arm to always use dma-direct now (it already does for LPAE configs
> > and nommu).  I'd love to merge this series through the dma-mapping tree
> > as it gives us the opportunity for additional core dma-mapping
> > improvements.
> (...)
> 
> >  b/arch/arm/mach-footbridge/Kconfig   |1
> >  b/arch/arm/mach-footbridge/common.c  |   19
> >  b/arch/arm/mach-footbridge/include/mach/dma-direct.h |8
> >  b/arch/arm/mach-footbridge/include/mach/memory.h |4
> 
> I think Marc Z has a Netwinder that he can test this on. Marc?
> I have one too, just not much in my office because of parental leave.

Finally found some time to hook the machine up and throw a new kernel
at it. Booted at its usual glacial speed, so FWIW:

Tested-by: Marc Zyngier 

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: fully convert arm to use dma-direct

2022-04-23 Thread Marc Zyngier

On 2022-04-22 22:17, Linus Walleij wrote:

On Thu, Apr 21, 2022 at 9:42 AM Christoph Hellwig  wrote:


arm is the last platform not using the dma-direct code for directly
mapped DMA.  With the dmaboune removal from Arnd we can easily switch
arm to always use dma-direct now (it already does for LPAE configs
and nommu).  I'd love to merge this series through the dma-mapping 
tree

as it gives us the opportunity for additional core dma-mapping
improvements.

(...)


 b/arch/arm/mach-footbridge/Kconfig   |1
 b/arch/arm/mach-footbridge/common.c  |   19
 b/arch/arm/mach-footbridge/include/mach/dma-direct.h |8
 b/arch/arm/mach-footbridge/include/mach/memory.h |4


I think Marc Z has a Netwinder that he can test this on. Marc?
I have one too, just not much in my office because of parental leave.


I'm about to travel for a week. Can this wait until I'm back?
This is one of the few boxes that isn't hooked up to the PDU,
so I can't operate it remotely.

M.
--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/dma: Explicitly sort PCI DMA windows

2022-03-23 Thread Marc Zyngier
On Tue, 22 Mar 2022 17:27:36 +,
Robin Murphy  wrote:
> 
> Originally, creating the dma_ranges resource list in pre-sorted fashion
> was the simplest and most efficient way to enforce the order required by
> iova_reserve_pci_windows(). However since then at least one PCI host
> driver is now re-sorting the list for its own probe-time processing,
> which doesn't seem entirely unreasonable, so that basic assumption no
> longer holds. Make iommu-dma robust and get the sort order it needs by
> explicitly sorting, which means we can also save the effort at creation
> time and just build the list in whatever natural order the DT had.
> 
> Signed-off-by: Robin Murphy 
> ---
> 
> Looking at this area off the back of the XGene thread[1] made me realise
> that we need to do it anyway, regardless of whether it might also happen
> to restore the previous XGene behaviour or not. Presumably nobody's
> tried to use pcie-cadence-host behind an IOMMU yet...

This definitely restores PCIe functionality on my Mustang (XGene-1).
Hopefully dann can comment on whether this addresses his own issue, as
his firmware is significantly different.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch 29/37] PCI/MSI: Use __msi_get_virq() in pci_get_vector()

2021-11-28 Thread Marc Zyngier
On Sat, 27 Nov 2021 01:22:03 +,
Thomas Gleixner  wrote:
> 
> Use __msi_get_vector() and handle the return values to be compatible.
> 
> No functional change intended.

You wish ;-).

[   15.779540] pcieport 0001:00:01.0: AER: request AER IRQ -22 failed

Notice how amusing the IRQ number is?

> 
> Signed-off-by: Thomas Gleixner 
> ---
>  drivers/pci/msi/msi.c |   25 +
>  1 file changed, 5 insertions(+), 20 deletions(-)
> 
> --- a/drivers/pci/msi/msi.c
> +++ b/drivers/pci/msi/msi.c
> @@ -1023,28 +1023,13 @@ EXPORT_SYMBOL(pci_free_irq_vectors);
>   */
>  int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
>  {
> - if (dev->msix_enabled) {
> - struct msi_desc *entry;
> + int irq = __msi_get_virq(>dev, nr);
>  
> - for_each_pci_msi_entry(entry, dev) {
> - if (entry->msi_index == nr)
> - return entry->irq;
> - }
> - WARN_ON_ONCE(1);
> - return -EINVAL;
> + switch (irq) {
> + case -ENODEV: return !nr ? dev->irq : -EINVAL;
> + case -ENOENT: return -EINVAL;
>   }
> -
> - if (dev->msi_enabled) {
> - struct msi_desc *entry = first_pci_msi_entry(dev);
> -
> - if (WARN_ON_ONCE(nr >= entry->nvec_used))
> - return -EINVAL;
> - } else {
> - if (WARN_ON_ONCE(nr > 0))
> - return -EINVAL;
> - }
> -
> - return dev->irq + nr;
> + return irq;
>  }
>  EXPORT_SYMBOL(pci_irq_vector);

I worked around it with the hack below, but I doubt this is the real
thing. portdrv_core.c does complicated things, and I don't completely
understand its logic.

M.

diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c
index 1f72bc734226..b15278a5fb4b 100644
--- a/drivers/pci/msi/msi.c
+++ b/drivers/pci/msi/msi.c
@@ -1092,8 +1092,9 @@ int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
int irq = __msi_get_virq(>dev, nr);
 
switch (irq) {
-   case -ENODEV: return !nr ? dev->irq : -EINVAL;
-   case -ENOENT: return -EINVAL;
+   case -ENOENT:
+   case -ENODEV:
+   return !nr ? dev->irq : -EINVAL;
}
return irq;
 }

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 4/6] iommu: Move IOMMU pagesize check to attach_device

2021-10-22 Thread Marc Zyngier
On Fri, 22 Oct 2021 03:52:38 +0100,
Lu Baolu  wrote:
> 
> On 10/21/21 4:10 PM, Marc Zyngier wrote:
> > On Thu, 21 Oct 2021 03:22:30 +0100,
> > Lu Baolu  wrote:
> >> 
> >> On 10/20/21 10:22 PM, Marc Zyngier wrote:
> >>> On Wed, 20 Oct 2021 06:21:44 +0100,
> >>> Lu Baolu  wrote:
> >>>> 
> >>>> On 2021/10/20 0:37, Sven Peter via iommu wrote:
> >>>>> +   /*
> >>>>> +* Check that CPU pages can be represented by the IOVA 
> >>>>> granularity.
> >>>>> +* This has to be done after ops->attach_dev since many IOMMU 
> >>>>> drivers
> >>>>> +* only limit domain->pgsize_bitmap after having attached the 
> >>>>> first
> >>>>> +* device.
> >>>>> +*/
> >>>>> +   ret = iommu_check_page_size(domain);
> >>>>> +   if (ret) {
> >>>>> +   __iommu_detach_device(domain, dev);
> >>>>> +   return ret;
> >>>>> +   }
> >>>> 
> >>>> It looks odd. __iommu_attach_device() attaches an I/O page table for a
> >>>> device. How does it relate to CPU pages? Why is it a failure case if CPU
> >>>> page size is not covered?
> >>> 
> >>> If you allocate a CPU PAGE_SIZE'd region, and point it at a device
> >>> that now can DMA to more than what you have allocated because the
> >>> IOMMU's own page size is larger, the device has now access to data it
> >>> shouldn't see. In my book, that's a pretty bad thing.
> >> 
> >> But even you enforce the CPU page size check here, this problem still
> >> exists unless all DMA buffers are PAGE_SIZE aligned and sized, right?
> > 
> > Let me take a CPU analogy: you have a page that contains some user
> > data *and* a kernel secret. How do you map this page into userspace
> > without leaking the kernel secret?
> > 
> > PAGE_SIZE allocations are the unit of isolation, and this applies to
> > both CPU and IOMMU. If you have allocated a DMA buffer that is less
> > than a page, you then have to resort to bounce buffering, or accept
> > that your data isn't safe.
> 
> I can understand the problems when IOMMU page sizes is larger than CPU
> page size. But the code itself is not clean. The vendor iommu drivers
> know more hardware details than the iommu core. It looks odd that the
> vendor iommu says "okay, I can attach this I/O page table to the
> device", but the iommu core says "no, you can't" and rolls everything
> back.

If your IOMMU driver can do things behind the core's back and
contradict the view that the core has, then it is probably time to fix
your IOMMU driver and make the core aware of what is going on.
Supported page sizes is one of these things.

In general, keeping the IOMMU driver as dumb as possible is a worthy
design goal, and this is why we have these abstractions.

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 4/6] iommu: Move IOMMU pagesize check to attach_device

2021-10-21 Thread Marc Zyngier
On Thu, 21 Oct 2021 03:22:30 +0100,
Lu Baolu  wrote:
> 
> On 10/20/21 10:22 PM, Marc Zyngier wrote:
> > On Wed, 20 Oct 2021 06:21:44 +0100,
> > Lu Baolu  wrote:
> >> 
> >> On 2021/10/20 0:37, Sven Peter via iommu wrote:
> >>> + /*
> >>> +  * Check that CPU pages can be represented by the IOVA granularity.
> >>> +  * This has to be done after ops->attach_dev since many IOMMU drivers
> >>> +  * only limit domain->pgsize_bitmap after having attached the first
> >>> +  * device.
> >>> +  */
> >>> + ret = iommu_check_page_size(domain);
> >>> + if (ret) {
> >>> + __iommu_detach_device(domain, dev);
> >>> + return ret;
> >>> + }
> >> 
> >> It looks odd. __iommu_attach_device() attaches an I/O page table for a
> >> device. How does it relate to CPU pages? Why is it a failure case if CPU
> >> page size is not covered?
> > 
> > If you allocate a CPU PAGE_SIZE'd region, and point it at a device
> > that now can DMA to more than what you have allocated because the
> > IOMMU's own page size is larger, the device has now access to data it
> > shouldn't see. In my book, that's a pretty bad thing.
> 
> But even you enforce the CPU page size check here, this problem still
> exists unless all DMA buffers are PAGE_SIZE aligned and sized, right?

Let me take a CPU analogy: you have a page that contains some user
data *and* a kernel secret. How do you map this page into userspace
without leaking the kernel secret?

PAGE_SIZE allocations are the unit of isolation, and this applies to
both CPU and IOMMU. If you have allocated a DMA buffer that is less
than a page, you then have to resort to bounce buffering, or accept
that your data isn't safe.

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 4/6] iommu: Move IOMMU pagesize check to attach_device

2021-10-20 Thread Marc Zyngier
On Wed, 20 Oct 2021 06:21:44 +0100,
Lu Baolu  wrote:
> 
> On 2021/10/20 0:37, Sven Peter via iommu wrote:
> > The iova allocator is capable of handling any granularity which is a power
> > of two. Remove the much stronger condition that the granularity must be
> > smaller or equal to the CPU page size from a BUG_ON there.
> > Instead, check this condition during __iommu_attach_device and fail
> > gracefully.
> > 
> > Signed-off-by: Sven Peter
> > ---
> >   drivers/iommu/iommu.c | 35 ---
> >   drivers/iommu/iova.c  |  7 ---
> >   include/linux/iommu.h |  5 +
> >   3 files changed, 41 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > index dd7863e453a5..28896739964b 100644
> > --- a/drivers/iommu/iommu.c
> > +++ b/drivers/iommu/iommu.c
> > @@ -80,6 +80,8 @@ static struct iommu_domain *__iommu_domain_alloc(struct 
> > bus_type *bus,
> >  unsigned type);
> >   static int __iommu_attach_device(struct iommu_domain *domain,
> >  struct device *dev);
> > +static void __iommu_detach_device(struct iommu_domain *domain,
> > + struct device *dev);
> >   static int __iommu_attach_group(struct iommu_domain *domain,
> > struct iommu_group *group);
> >   static void __iommu_detach_group(struct iommu_domain *domain,
> > @@ -1974,6 +1976,19 @@ void iommu_domain_free(struct iommu_domain *domain)
> >   }
> >   EXPORT_SYMBOL_GPL(iommu_domain_free);
> >   +static int iommu_check_page_size(struct iommu_domain *domain)
> > +{
> > +   if (!iommu_is_paging_domain(domain))
> > +   return 0;
> > +
> > +   if (!(domain->pgsize_bitmap & (PAGE_SIZE | (PAGE_SIZE - 1 {
> > +   pr_warn("IOMMU pages cannot exactly represent CPU pages.\n");
> > +   return -EFAULT;
> > +   }
> > +
> > +   return 0;
> > +}
> > +
> >   static int __iommu_attach_device(struct iommu_domain *domain,
> >  struct device *dev)
> >   {
> > @@ -1983,9 +1998,23 @@ static int __iommu_attach_device(struct iommu_domain 
> > *domain,
> > return -ENODEV;
> > ret = domain->ops->attach_dev(domain, dev);
> > -   if (!ret)
> > -   trace_attach_device_to_domain(dev);
> > -   return ret;
> > +   if (ret)
> > +   return ret;
> > +
> > +   /*
> > +* Check that CPU pages can be represented by the IOVA granularity.
> > +* This has to be done after ops->attach_dev since many IOMMU drivers
> > +* only limit domain->pgsize_bitmap after having attached the first
> > +* device.
> > +*/
> > +   ret = iommu_check_page_size(domain);
> > +   if (ret) {
> > +   __iommu_detach_device(domain, dev);
> > +   return ret;
> > +   }
> 
> It looks odd. __iommu_attach_device() attaches an I/O page table for a
> device. How does it relate to CPU pages? Why is it a failure case if CPU
> page size is not covered?

If you allocate a CPU PAGE_SIZE'd region, and point it at a device
that now can DMA to more than what you have allocated because the
IOMMU's own page size is larger, the device has now access to data it
shouldn't see. In my book, that's a pretty bad thing.

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/dart: Clear sid2group entry when a group is freed

2021-09-24 Thread Marc Zyngier
On Fri, 24 Sep 2021 14:45:02 +0100,
Sven Peter  wrote:
> 
> sid2groups keeps track of which stream id combinations belong to a
> iommu_group to assign those correctly to devices.
> When a iommu_group is freed a stale pointer will however remain in
> sid2groups. This prevents devices with the same stream id combination
> to ever be attached again (see below).
> Fix that by creating a shadow copy of the stream id configuration
> when a group is allocated for the first time and clear the sid2group
> entry when that group is freed.
> 
>   # echo 1 >/sys/bus/pci/devices/\:03\:00.0/remove
>   pci :03:00.0: Removing from iommu group 1
>   # echo 1 >/sys/bus/pci/rescan
>   [...]
>   pci :03:00.0: BAR 0: assigned [mem 0x6a000-0x6a000 64bit pref]
>   pci :03:00.0: BAR 2: assigned [mem 0x6a001-0x6a001 64bit pref]
>   pci :03:00.0: BAR 6: assigned [mem 0x6c010-0x6c01007ff pref]
>   tg3 :03:00.0: Failed to add to iommu group 1: -2
>   [...]
> 
> Fixes: 46d1fb072e76b161 ("iommu/dart: Add DART iommu driver")
> Reported-by: Marc Zyngier 
> Signed-off-by: Sven Peter 

Thanks for posting this.

Tested-by: Marc Zyngier 

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/dart: Remove iommu_flush_ops

2021-09-21 Thread Marc Zyngier
On Tue, 21 Sep 2021 16:39:34 +0100,
Sven Peter  wrote:
> 
> apple_dart_tlb_flush_{all,walk} expect to get a struct apple_dart_domain
> but instead get a struct iommu_domain right now. This breaks those two
> functions and can lead to kernel panics like the one below.
> DART can only invalidate the entire TLB and apple_dart_iotlb_sync will
> already flush everything. There's no need to do that again inside those
> two functions. Let's just drop them.
> 
>   pci :03:00.0: Removing from iommu group 1
>   Unable to handle kernel paging request at virtual address 00010023
>   [...]
>   Call trace:
>_raw_spin_lock_irqsave+0x54/0xbc
>apple_dart_hw_stream_command.constprop.0+0x2c/0x130
>apple_dart_tlb_flush_all+0x48/0x90
>free_io_pgtable_ops+0x40/0x70
>apple_dart_domain_free+0x2c/0x44
>iommu_group_release+0x68/0xac
>kobject_cleanup+0x4c/0x1fc
>kobject_cleanup+0x14c/0x1fc
>kobject_put+0x64/0x84
>iommu_group_remove_device+0x110/0x180
>iommu_release_device+0x50/0xa0
>   [...]
> 
> Fixes: 46d1fb072e76b161 ("iommu/dart: Add DART iommu driver")
> Reported-by: Marc Zyngier 
> Signed-off-by: Sven Peter 

Thanks for addressing this so quickly.

Acked-by: Marc Zyngier 
Tested-by: Marc Zyngier 

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH -next v2] iommu/arm-smmu-v3: Add suspend and resume support

2021-07-27 Thread Marc Zyngier
On Tue, 27 Jul 2021 13:14:08 +0100,
Bixuan Cui  wrote:
> 
> Add suspend and resume support for arm-smmu-v3 by low-power mode.
> 
> When the smmu is suspended, it is powered off and the registers are
> cleared. So saves the msi_msg context during msi interrupt initialization
> of smmu. When resume happens it calls arm_smmu_device_reset() to restore
> the registers.
> 
> Signed-off-by: Bixuan Cui 
> Reviewed-by: Wei Yongjun 
> Reviewed-by: Zhen Lei 
> Reviewed-by: Ding Tianhong 
> Reviewed-by: Hanjun Guo 
> ---
> Changes in v2:
> * Using get_cached_msi_msg() instead of the descriptor to resume msi_msg
>   in arm_smmu_resume_msis();
> 
> * Move arm_smmu_resume_msis() from arm_smmu_setup_unique_irqs() into
>   arm_smmu_setup_irqs() and rename it to arm_smmu_resume_unique_irqs();
> 
>   Call arm_smmu_setup_unique_irqs() to configure the IRQ during probe and
>   call arm_smmu_resume_unique_irqs() in resume mode to restore the IRQ
>   registers to make the code more reasonable.
> 
> * Call arm_smmu_device_disable() to disable smmu and clear CR0_SMMUEN on
>   suspend. Then the warning about CR0_SMMUEN being enabled can be cleared
>   on resume.
> 
> * Using SET_SYSTEM_SLEEP_PM_OPS();
> 
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 69 ++---
>  1 file changed, 62 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 235f9bdaeaf2..66f35d5c7a70 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -40,6 +40,7 @@ MODULE_PARM_DESC(disable_bypass,
>  
>  static bool disable_msipolling;
>  module_param(disable_msipolling, bool, 0444);
> +static bool bypass;

As outlined before, this is likely to be wrong if you can have
per-SMMU bypass control.

>  MODULE_PARM_DESC(disable_msipolling,
>   "Disable MSI-based polling for CMD_SYNC completion.");
>  
> @@ -3129,11 +3130,38 @@ static void arm_smmu_write_msi_msg(struct msi_desc 
> *desc, struct msi_msg *msg)
>   doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
>   doorbell &= MSI_CFG0_ADDR_MASK;
>  
> + /* Saves the msg context for resume if desc->msg is empty */
> + if (desc->msg.address_lo == 0x0 && desc->msg.address_hi == 0x0) {
> + desc->msg.address_lo = msg->address_lo;
> + desc->msg.address_hi = msg->address_hi;
> + desc->msg.data = msg->data;
> + }

I thought I had made it clear that this approach is not acceptable.
Please fix the generic code to keep track of the latest message.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [bug report] iommu_dma_unmap_sg() is very slow then running IO from remote numa node

2021-07-22 Thread Marc Zyngier

On 2021-07-22 12:12, John Garry wrote:

Hi John,

[...]


Your kernel log should show:
[0.00] GICv3: Pseudo-NMIs enabled using forced ICC_PMR_EL1
synchronisation


Unrelated, but you seem to be running with ICC_CTLR_EL3.PMHE set,
which makes the overhead of pseudo-NMIs much higher than it should be
(you take a DSB SY on each interrupt unmasking).

If you are not using 1:N distribution of SPIs on the secure side,
consider turning that off in your firmware. This should make NMIs
slightly more pleasant to use.

M.
--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH -next] iommu/arm-smmu-v3: Add suspend and resume support

2021-07-21 Thread Marc Zyngier
On Wed, 21 Jul 2021 14:59:47 +0100,
Robin Murphy  wrote:
> 
> On 2021-07-21 14:12, Marc Zyngier wrote:
> > On Wed, 21 Jul 2021 12:42:14 +0100,
> > Robin Murphy  wrote:
> >> 
> >> [ +Marc for MSI bits ]
> >> 
> >> On 2021-07-21 02:33, Bixuan Cui wrote:
> >>> Add suspend and resume support for arm-smmu-v3 by low-power mode.
> >>> 
> >>> When the smmu is suspended, it is powered off and the registers are
> >>> cleared. So saves the msi_msg context during msi interrupt initialization
> >>> of smmu. When resume happens it calls arm_smmu_device_reset() to restore
> >>> the registers.
> >>> 
> >>> Signed-off-by: Bixuan Cui 
> >>> Reviewed-by: Wei Yongjun 
> >>> Reviewed-by: Zhen Lei 
> >>> Reviewed-by: Ding Tianhong 
> >>> Reviewed-by: Hanjun Guo 
> >>> ---
> >>> 
> >>>drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 72 ++---
> >>>1 file changed, 64 insertions(+), 8 deletions(-)
> >>> 
> >>> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
> >>> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> >>> index 235f9bdaeaf2..bf1163acbcb1 100644
> >>> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> >>> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> >>> @@ -40,6 +40,7 @@ MODULE_PARM_DESC(disable_bypass,
> >>>  static bool disable_msipolling;
> >>>module_param(disable_msipolling, bool, 0444);
> >>> +static bool bypass;
> >>>MODULE_PARM_DESC(disable_msipolling,
> >>>   "Disable MSI-based polling for CMD_SYNC completion.");
> >>>@@ -3129,11 +3130,37 @@ static void arm_smmu_write_msi_msg(struct
> >>> msi_desc *desc, struct msi_msg *msg)
> >>>   doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
> >>>   doorbell &= MSI_CFG0_ADDR_MASK;
> >>>+  /* Saves the msg context for resume if desc->msg is empty */
> >>> + if (desc->msg.address_lo == 0 && desc->msg.address_hi == 0) {
> >>> + desc->msg.address_lo = msg->address_lo;
> >>> + desc->msg.address_hi = msg->address_hi;
> >>> + desc->msg.data = msg->data;
> >>> + }
> >> 
> >> My gut feeling is that this is something a device driver maybe
> >> shouldn't be poking into, but I'm not entirely familiar with the area
> >> :/
> > 
> > Certainly not. If you rely on the message being stored into the
> > descriptors, then implement this in the core code, like we do for PCI.
> 
> Ah, so it would be an acceptable compromise to *read* desc->msg (and
> thus avoid having to store our own copy of the message) if the core
> was guaranteed to cache it? That's good to know, thanks.

Yeah, vfio, a couple of other weird drivers and (*surprise!*) ia64 are
using this kind of trick. I don't see a reason not to implement that
for platform-MSI (although level signalling may be interesting...), or
even to move it into the core MSI code.

> 
> >>> +
> >>>   writeq_relaxed(doorbell, smmu->base + cfg[0]);
> >>>   writel_relaxed(msg->data, smmu->base + cfg[1]);
> >>>   writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + 
> >>> cfg[2]);
> >>>}
> >>>+static void arm_smmu_resume_msis(struct arm_smmu_device *smmu)
> >>> +{
> >>> + struct msi_desc *desc;
> >>> + struct device *dev = smmu->dev;
> >>> +
> >>> + for_each_msi_entry(desc, dev) {
> >>> + switch (desc->platform.msi_index) {
> >>> + case EVTQ_MSI_INDEX:
> >>> + case GERROR_MSI_INDEX:
> >>> + case PRIQ_MSI_INDEX:
> >>> + arm_smmu_write_msi_msg(desc, &(desc->msg));
> > 
> > Consider using get_cached_msi_msg() instead of using the internals of
> > the descriptor.
> 
> Oh, there's even a proper API for it, marvellous! I hadn't managed to
> dig that far myself :)

It is a bit odd in the sense that it takes a copy of the message
instead of returning a pointer, but at least this solves lifetime
issues.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH -next] iommu/arm-smmu-v3: Add suspend and resume support

2021-07-21 Thread Marc Zyngier
On Wed, 21 Jul 2021 12:42:14 +0100,
Robin Murphy  wrote:
> 
> [ +Marc for MSI bits ]
> 
> On 2021-07-21 02:33, Bixuan Cui wrote:
> > Add suspend and resume support for arm-smmu-v3 by low-power mode.
> > 
> > When the smmu is suspended, it is powered off and the registers are
> > cleared. So saves the msi_msg context during msi interrupt initialization
> > of smmu. When resume happens it calls arm_smmu_device_reset() to restore
> > the registers.
> > 
> > Signed-off-by: Bixuan Cui 
> > Reviewed-by: Wei Yongjun 
> > Reviewed-by: Zhen Lei 
> > Reviewed-by: Ding Tianhong 
> > Reviewed-by: Hanjun Guo 
> > ---
> > 
> >   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 72 ++---
> >   1 file changed, 64 insertions(+), 8 deletions(-)
> > 
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index 235f9bdaeaf2..bf1163acbcb1 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -40,6 +40,7 @@ MODULE_PARM_DESC(disable_bypass,
> > static bool disable_msipolling;
> >   module_param(disable_msipolling, bool, 0444);
> > +static bool bypass;
> >   MODULE_PARM_DESC(disable_msipolling,
> > "Disable MSI-based polling for CMD_SYNC completion.");
> >   @@ -3129,11 +3130,37 @@ static void arm_smmu_write_msi_msg(struct
> > msi_desc *desc, struct msi_msg *msg)
> > doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
> > doorbell &= MSI_CFG0_ADDR_MASK;
> >   + /* Saves the msg context for resume if desc->msg is empty */
> > +   if (desc->msg.address_lo == 0 && desc->msg.address_hi == 0) {
> > +   desc->msg.address_lo = msg->address_lo;
> > +   desc->msg.address_hi = msg->address_hi;
> > +   desc->msg.data = msg->data;
> > +   }
> 
> My gut feeling is that this is something a device driver maybe
> shouldn't be poking into, but I'm not entirely familiar with the area
> :/

Certainly not. If you rely on the message being stored into the
descriptors, then implement this in the core code, like we do for PCI.

> 
> > +
> > writeq_relaxed(doorbell, smmu->base + cfg[0]);
> > writel_relaxed(msg->data, smmu->base + cfg[1]);
> > writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
> >   }
> >   +static void arm_smmu_resume_msis(struct arm_smmu_device *smmu)
> > +{
> > +   struct msi_desc *desc;
> > +   struct device *dev = smmu->dev;
> > +
> > +   for_each_msi_entry(desc, dev) {
> > +   switch (desc->platform.msi_index) {
> > +   case EVTQ_MSI_INDEX:
> > +   case GERROR_MSI_INDEX:
> > +   case PRIQ_MSI_INDEX:
> > +   arm_smmu_write_msi_msg(desc, &(desc->msg));

Consider using get_cached_msi_msg() instead of using the internals of
the descriptor.

> > +   break;
> > +   default:
> > +   continue;
> > +
> > +   }
> > +   }
> > +}
> > +
> >   static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
> >   {
> > struct msi_desc *desc;
> > @@ -3184,11 +3211,17 @@ static void arm_smmu_setup_msis(struct 
> > arm_smmu_device *smmu)
> > devm_add_action(dev, arm_smmu_free_msis, dev);
> >   }
> >   -static void arm_smmu_setup_unique_irqs(struct arm_smmu_device
> > *smmu)
> > +static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu, bool 
> > resume_mode)
> >   {
> > int irq, ret;
> >   - arm_smmu_setup_msis(smmu);
> > +   if (!resume_mode)
> > +   arm_smmu_setup_msis(smmu);
> > +   else {
> > +   /* The irq doesn't need to be re-requested during resume */
> > +   arm_smmu_resume_msis(smmu);
> > +   return;
> 
> What about wired IRQs?

Yeah. I assume the SMMU needs to be told which event gets signalled
using MSIs or IRQs? Or is that implied by the MSI being configured or
not (I used to know the answer to that, but I have long paged it out).

> 
> > +   }
> > /* Request interrupt lines */
> > irq = smmu->evtq.q.irq;
> > @@ -3230,7 +3263,7 @@ static void arm_smmu_setup_unique_irqs(struct 
> > arm_smmu_device *smmu)
> > }
> >   }
> >   -static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
> > +static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu, bool 
> > resume_mode)
> >   {
> > int ret, irq;
> > u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
> > @@ -3257,7 +3290,7 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device 
> > *smmu)
> > if (ret < 0)
> > dev_warn(smmu->dev, "failed to enable combined irq\n");
> > } else
> > -   arm_smmu_setup_unique_irqs(smmu);
> > +   arm_smmu_setup_unique_irqs(smmu, resume_mode);
> > if (smmu->features & ARM_SMMU_FEAT_PRI)
> > irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
> > @@ -3282,7 +3315,7 @@ static int arm_smmu_device_disable(struct 
> > arm_smmu_device *smmu)
> > return ret;
> >   }
> >   -static int 

Re: [PATCH] iommu/arm-smmu-v3: Add default domain quirk for Arm Fast Models

2021-06-30 Thread Marc Zyngier
On Tue, 29 Jun 2021 18:34:40 +0100,
Will Deacon  wrote:
> 
> On Fri, Jun 18, 2021 at 05:24:49PM +0100, Robin Murphy wrote:
> > Arm Fast Models are still implementing legacy virtio-pci devices behind
> > the SMMU, which continue to be problematic as "real hardware" devices
> > (from the point of view of the simulated system) without the mitigating
> > VIRTIO_F_ACCESS_PLATFORM feature.
> > 
> > Since we now have the ability to force passthrough on a device-specific
> > basis, let's use it to work around this particular oddity so that people
> > who just want to boot Linux on a model don't have to fiddle around with
> > things to avoid the SMMU getting in the way by default (and I don't have
> > to keep telling them which things...)
> > 
> > Signed-off-by: Robin Murphy 
> > ---
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 +++
> >  1 file changed, 15 insertions(+)
> 
> Any chance of getting the fastmodels updated instead? It feels like it
> has to happen *eventually*, and then there would be no need for this bodge.

That'd be ideal. What are the chances of that happening before the Sun
turns into a black hole?

> Will
> 
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c 
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index 54b2f27b81d4..13cf16e8f45b 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -2649,6 +2649,20 @@ static int arm_smmu_dev_disable_feature(struct 
> > device *dev,
> > }
> >  }
> >  
> > +static int arm_smmu_def_domain_type(struct device *dev)
> > +{
> > +   if (dev_is_pci(dev)) {
> > +   struct pci_dev *pdev = to_pci_dev(dev);
> > +
> > +   /* Legacy virtio-block devices on Arm Fast Models */
> > +   if (pdev->vendor == 0x1af4 && pdev->device == 0x1001 &&
> > +   pdev->subsystem_vendor == 0x00ff && pdev->subsystem_device 
> > == 0x0002)
> > +   return IOMMU_DOMAIN_IDENTITY;
> > +   }
> > +
> > +   return 0;
> > +}

Could this be expressed as a PCI quirk instead? It would at least keep
the ID matching out of the SMMU driver...

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v14 00/13] SMMUv3 Nested Stage Setup (IOMMU part)

2021-04-24 Thread Marc Zyngier
On Fri, 23 Apr 2021 18:58:23 +0100,
Krishna Reddy  wrote:
> 
> >> Did that patch cause any issue, or is it just not needed on your system?
> >> It fixes an hypothetical problem with the way ATS is implemented. 
> >> Maybe I actually observed it on an old software model, I don't 
> >> remember. Either way it's unlikely to go upstream but I'd like to know 
> >> if I should drop it from my tree.
> 
> > Had to revert same patch "mm: notify remote TLBs when dirtying a PTE" to
> > avoid below crash[1]. I am not sure about the cause yet.
> 
> I have noticed this issue earlier with patch pointed here and root
> caused the issue as below.  It happens after vfio_mmap request from
> QEMU for the PCIe device and during the access of VA when PTE access
> flags are updated.
> 
> kvm_mmu_notifier_change_pte() --> kvm_set_spte_hve() -->
> kvm_set_spte_hva() --> clean_dcache_guest_page()
> 
> The validation model doesn't have FWB capability supported.
> __clean_dcache_guest_page() attempts to perform dcache flush on pcie
> bar address(not a valid_pfn()) through page_address(), which doesn't
> have page table mapping and leads to exception.
> 
> I have worked around the issue by filtering out the request if the
> pfn is not valid in __clean_dcache_guest_page().  As the patch
> wasn't posted in the community, reverted it as well.

That's papering over the real issue, and this mapping path needs
fixing as it was only ever expected to be called for CoW.

Can you please try the following patch and let me know if that fixes
the issue for good?

Thanks,

M.

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 77cb2d28f2a4..b62dd40a4083 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1147,7 +1147,8 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, 
pte_t pte)
 * We've moved a page around, probably through CoW, so let's treat it
 * just like a translation fault and clean the cache to the PoC.
 */
-   clean_dcache_guest_page(pfn, PAGE_SIZE);
+   if (!kvm_is_device_pfn(pfn))
+   clean_dcache_guest_page(pfn, PAGE_SIZE);
handle_hva_to_gpa(kvm, hva, end, _set_spte_handler, );
return 0;
 }


-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Patch V2 13/13] genirq/msi: Provide helpers to return Linux IRQ/dev_msi hw IRQ number

2021-03-26 Thread Marc Zyngier
On Fri, 26 Mar 2021 01:02:43 +,
"Dey, Megha"  wrote:
> 
> Hi Marc,
> 
> On 3/25/2021 10:53 AM, Marc Zyngier wrote:
> > On Fri, 26 Feb 2021 20:11:17 +,
> > Megha Dey  wrote:
> >> From: Dave Jiang 
> >> 
> >> Add new helpers to get the Linux IRQ number and device specific index
> >> for given device-relative vector so that the drivers don't need to
> >> allocate their own arrays to keep track of the vectors and hwirq for
> >> the multi vector device MSI case.
> >> 
> >> Reviewed-by: Tony Luck 
> >> Signed-off-by: Dave Jiang 
> >> Signed-off-by: Megha Dey 
> >> ---
> >>   include/linux/msi.h |  2 ++
> >>   kernel/irq/msi.c| 44 
> >>   2 files changed, 46 insertions(+)
> >> 
> >> diff --git a/include/linux/msi.h b/include/linux/msi.h
> >> index 24abec0..d60a6ba 100644
> >> --- a/include/linux/msi.h
> >> +++ b/include/linux/msi.h
> >> @@ -451,6 +451,8 @@ struct irq_domain 
> >> *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
> >>   int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
> >>   irq_write_msi_msg_t write_msi_msg);
> >>   void platform_msi_domain_free_irqs(struct device *dev);
> >> +int msi_irq_vector(struct device *dev, unsigned int nr);
> >> +int dev_msi_hwirq(struct device *dev, unsigned int nr);
> >> /* When an MSI domain is used as an intermediate domain */
> >>   int msi_domain_prepare_irqs(struct irq_domain *domain, struct device 
> >> *dev,
> >> diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
> >> index 047b59d..f2a8f55 100644
> >> --- a/kernel/irq/msi.c
> >> +++ b/kernel/irq/msi.c
> >> @@ -581,4 +581,48 @@ struct msi_domain_info *msi_get_domain_info(struct 
> >> irq_domain *domain)
> >>return (struct msi_domain_info *)domain->host_data;
> >>   }
> >>   +/**
> >> + * msi_irq_vector - Get the Linux IRQ number of a device vector
> >> + * @dev: device to operate on
> >> + * @nr: device-relative interrupt vector index (0-based).
> >> + *
> >> + * Returns the Linux IRQ number of a device vector.
> >> + */
> >> +int msi_irq_vector(struct device *dev, unsigned int nr)
> >> +{
> >> +  struct msi_desc *entry;
> >> +  int i = 0;
> >> +
> >> +  for_each_msi_entry(entry, dev) {
> >> +  if (i == nr)
> >> +  return entry->irq;
> >> +  i++;
> > This obviously doesn't work with Multi-MSI, does it?
> 
> This API is only for devices that support device MSI interrupts. They
> follow MSI-x format and don't support multi MSI (part of MSI).
> 
> Not sure if I am missing something here, can you please let me know?

Nothing in the prototype of the function indicates this limitation,
nor does the documentation. And I'm not sure why you should exclude
part of the MSI functionality here. It can't be for performance
reason, so you might as well make sure this works for all the MSI
variants:

int msi_irq_vector(struct device *dev, unsigned int nr)
{
struct msi_desc *entry;
int irq, index = 0;

for_each_msi_vector(entry, irq, dev) {
if (index == nr}
return irq;
index++;
}

return WARN_ON_ONCE(-EINVAL);
}

>
> > 
> >> +  }
> >> +  WARN_ON_ONCE(1);
> >> +  return -EINVAL;
> >> +}
> >> +EXPORT_SYMBOL_GPL(msi_irq_vector);
> >> +
> >> +/**
> >> + * dev_msi_hwirq - Get the device MSI hw IRQ number of a device vector
> >> + * @dev: device to operate on
> >> + * @nr: device-relative interrupt vector index (0-based).
> >> + *
> >> + * Return the dev_msi hw IRQ number of a device vector.
> >> + */
> >> +int dev_msi_hwirq(struct device *dev, unsigned int nr)
> >> +{
> >> +  struct msi_desc *entry;
> >> +  int i = 0;
> >> +
> >> +  for_each_msi_entry(entry, dev) {
> >> +  if (i == nr)
> >> +  return entry->device_msi.hwirq;
> >> +  i++;
> >> +  }
> >> +  WARN_ON_ONCE(1);
> >> +  return -EINVAL;
> >> +}

And this helper would be more generally useful if it returned the n-th
msi_desc entry rather than some obscure field in a substructure.

struct msi_desc *msi_get_nth_desc(struct device *dev, unsigned int nth)
{
struct msi_desc *entry = NULL;
unsigned in

Re: [Patch V2 08/13] genirq: Set auxiliary data for an interrupt

2021-03-26 Thread Marc Zyngier
On Thu, 25 Mar 2021 18:59:48 +,
Thomas Gleixner  wrote:
> 
> On Thu, Mar 25 2021 at 17:23, Marc Zyngier wrote:
> >> +{
> >> +  struct irq_desc *desc;
> >> +  struct irq_data *data;
> >> +  unsigned long flags;
> >> +  int res = -ENODEV;
> >> +
> >> +  desc = irq_get_desc_buslock(irq, , 0);
> >> +  if (!desc)
> >> +  return -EINVAL;
> >> +
> >> +  for (data = >irq_data; data; data = irqd_get_parent_data(data)) {
> >> +  if (data->chip->irq_set_auxdata) {
> >> +  res = data->chip->irq_set_auxdata(data, which, val);
> >
> > And this is where things can break: because you don't define what
> > 'which' is, you can end-up with two stacked layers clashing in their
> > interpretation of 'which', potentially doing the wrong thing.
> >
> > Short of having a global, cross architecture definition of all the
> > possible states, this is frankly dodgy.
> 
> My bad, I suggested this in the first place.
> 
> So what you suggest is to make 'which' an enum and have that in
> include/linux/whatever.h so we end up with unique identifiers accross
> architectures, irqdomains and whatever, right?

Exactly. As long as 'which' is unique and unambiguous, we can solve
the stacking issue (which is oddly starting to smell like the ghost of
the SVR3 STREAMS... /me runs ;-).

Once we have that, I can start killing the irq_set_vcpu_affinity()
abuse I have been guilty of over the past years. Even more, we could
kill irq_set_vcpu_affinity() altogether, because we have a generic way
of passing side-band information from a driver down to the IRQ stack.

> That makes a lot of sense.
> 
> Though that leaves the question of the data type for 'val'. While u64 is
> probably good enough for most stuff, anything which needs more than that
> is left out (again). union as arguments are horrible especially if you
> need the counterpart irq_get_auxdata() for which you need a pointer and
> then you can't do a forward declaration. Something like this might work
> though and avoid to make the pointer business unconditional:
> 
> struct irq_auxdata {
>union {
>u64val;
>  struct foo *foo;
>};
> };

I guess that at some point, irq_get_auxdata() will become a
requirement so we might as well bite the bullet now, and the above
looks like a good start to me.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Patch V2 07/13] irqdomain/msi: Provide msi_alloc/free_store() callbacks

2021-03-26 Thread Marc Zyngier
On Thu, 25 Mar 2021 18:44:48 +,
Thomas Gleixner  wrote:
> 
> On Thu, Mar 25 2021 at 17:08, Marc Zyngier wrote:
> > Megha Dey  wrote:
> >> @@ -434,6 +434,12 @@ int __msi_domain_alloc_irqs(struct irq_domain 
> >> *domain, struct device *dev,
> >>if (ret)
> >>return ret;
> >>  
> >> +  if (ops->msi_alloc_store) {
> >> +  ret = ops->msi_alloc_store(domain, dev, nvec);
> >
> > What is supposed to happen if we get aliasing devices (similar to what
> > we have with devices behind a PCI bridge)?
> >
> > The ITS code goes through all kind of hoops to try and detect this
> > case when sizing the translation tables (in the .prepare callback),
> > and I have the feeling that sizing the message store is analogous.
> 
> No. The message store itself is sized upfront by the underlying 'master'
> device. Each 'master' device has it's own irqdomain.
> 
> This is the allocation for the subdevice and this is not part of PCI and
> therefore not subject to PCI aliasing.

Fair enough. If we are guaranteed that there is no aliasing, then this
point is moot.

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Patch V2 13/13] genirq/msi: Provide helpers to return Linux IRQ/dev_msi hw IRQ number

2021-03-25 Thread Marc Zyngier
On Fri, 26 Feb 2021 20:11:17 +,
Megha Dey  wrote:
> 
> From: Dave Jiang 
> 
> Add new helpers to get the Linux IRQ number and device specific index
> for given device-relative vector so that the drivers don't need to
> allocate their own arrays to keep track of the vectors and hwirq for
> the multi vector device MSI case.
> 
> Reviewed-by: Tony Luck 
> Signed-off-by: Dave Jiang 
> Signed-off-by: Megha Dey 
> ---
>  include/linux/msi.h |  2 ++
>  kernel/irq/msi.c| 44 
>  2 files changed, 46 insertions(+)
> 
> diff --git a/include/linux/msi.h b/include/linux/msi.h
> index 24abec0..d60a6ba 100644
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -451,6 +451,8 @@ struct irq_domain *platform_msi_create_irq_domain(struct 
> fwnode_handle *fwnode,
>  int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
>  irq_write_msi_msg_t write_msi_msg);
>  void platform_msi_domain_free_irqs(struct device *dev);
> +int msi_irq_vector(struct device *dev, unsigned int nr);
> +int dev_msi_hwirq(struct device *dev, unsigned int nr);
>  
>  /* When an MSI domain is used as an intermediate domain */
>  int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
> diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
> index 047b59d..f2a8f55 100644
> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -581,4 +581,48 @@ struct msi_domain_info *msi_get_domain_info(struct 
> irq_domain *domain)
>   return (struct msi_domain_info *)domain->host_data;
>  }
>  
> +/**
> + * msi_irq_vector - Get the Linux IRQ number of a device vector
> + * @dev: device to operate on
> + * @nr: device-relative interrupt vector index (0-based).
> + *
> + * Returns the Linux IRQ number of a device vector.
> + */
> +int msi_irq_vector(struct device *dev, unsigned int nr)
> +{
> + struct msi_desc *entry;
> + int i = 0;
> +
> + for_each_msi_entry(entry, dev) {
> + if (i == nr)
> + return entry->irq;
> + i++;

This obviously doesn't work with Multi-MSI, does it?

> + }
> + WARN_ON_ONCE(1);
> + return -EINVAL;
> +}
> +EXPORT_SYMBOL_GPL(msi_irq_vector);
> +
> +/**
> + * dev_msi_hwirq - Get the device MSI hw IRQ number of a device vector
> + * @dev: device to operate on
> + * @nr: device-relative interrupt vector index (0-based).
> + *
> + * Return the dev_msi hw IRQ number of a device vector.
> + */
> +int dev_msi_hwirq(struct device *dev, unsigned int nr)
> +{
> + struct msi_desc *entry;
> + int i = 0;
> +
> + for_each_msi_entry(entry, dev) {
> + if (i == nr)
> + return entry->device_msi.hwirq;
> + i++;
> + }
> + WARN_ON_ONCE(1);
> + return -EINVAL;
> +}
> +EXPORT_SYMBOL_GPL(dev_msi_hwirq);
> +
>  #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */

And what uses these helpers?

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Patch V2 12/13] irqchip: Add IMS (Interrupt Message Store) driver

2021-03-25 Thread Marc Zyngier
On Fri, 26 Feb 2021 20:11:16 +,
Megha Dey  wrote:
> 
> Generic IMS(Interrupt Message Store) irq chips and irq domain
> implementations for IMS based devices which store the interrupt messages
> in an array in device memory.
> 
> Allocation and freeing of interrupts happens via the generic
> msi_domain_alloc/free_irqs() interface. No special purpose IMS magic
> required as long as the interrupt domain is stored in the underlying
> device struct. The irq_set_auxdata() is used to program the pasid into
> the IMS entry.
> 
> [Megha: Fixed compile time errors
> Added necessary dependencies to IMS_MSI_ARRAY config
> Fixed polarity of IMS_VECTOR_CTRL
> Added reads after writes to flush writes to device
> Added set_desc ops to IMS msi domain ops
> Tested the IMS infrastructure with the IDXD driver]
> 
> Reviewed-by: Tony Luck 
> Signed-off-by: Thomas Gleixner 
> Signed-off-by: Megha Dey 
> ---
>  drivers/irqchip/Kconfig |  14 +++
>  drivers/irqchip/Makefile|   1 +
>  drivers/irqchip/irq-ims-msi.c   | 211 
> 
>  include/linux/irqchip/irq-ims-msi.h |  68 
>  4 files changed, 294 insertions(+)
>  create mode 100644 drivers/irqchip/irq-ims-msi.c
>  create mode 100644 include/linux/irqchip/irq-ims-msi.h
> 
> diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
> index e74fa20..2fb0c24 100644
> --- a/drivers/irqchip/Kconfig
> +++ b/drivers/irqchip/Kconfig
> @@ -586,4 +586,18 @@ config MST_IRQ
>   help
> Support MStar Interrupt Controller.
>  
> +config IMS_MSI
> + depends on PCI
> + select DEVICE_MSI
> + bool
> +
> +config IMS_MSI_ARRAY
> + bool "IMS Interrupt Message Store MSI controller for device memory 
> storage arrays"
> + depends on PCI
> + select IMS_MSI
> + select GENERIC_MSI_IRQ_DOMAIN
> + help
> +   Support for IMS Interrupt Message Store MSI controller
> +   with IMS slot storage in a slot array in device memory
> +
>  endmenu
> diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
> index c59b95a..e903201 100644
> --- a/drivers/irqchip/Makefile
> +++ b/drivers/irqchip/Makefile
> @@ -113,3 +113,4 @@ obj-$(CONFIG_LOONGSON_PCH_MSI)+= 
> irq-loongson-pch-msi.o
>  obj-$(CONFIG_MST_IRQ)+= irq-mst-intc.o
>  obj-$(CONFIG_SL28CPLD_INTC)  += irq-sl28cpld.o
>  obj-$(CONFIG_MACH_REALTEK_RTL)   += irq-realtek-rtl.o
> +obj-$(CONFIG_IMS_MSI)+= irq-ims-msi.o
> diff --git a/drivers/irqchip/irq-ims-msi.c b/drivers/irqchip/irq-ims-msi.c
> new file mode 100644
> index 000..fa23207
> --- /dev/null
> +++ b/drivers/irqchip/irq-ims-msi.c
> @@ -0,0 +1,211 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// (C) Copyright 2021 Thomas Gleixner 
> +/*
> + * Shared interrupt chips and irq domains for IMS devices
> + */
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +
> +#ifdef CONFIG_IMS_MSI_ARRAY

Given that this covers the whole driver, what is this #defined used
for? You might as well make the driver depend on this config option.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Patch V2 08/13] genirq: Set auxiliary data for an interrupt

2021-03-25 Thread Marc Zyngier
On Fri, 26 Feb 2021 20:11:12 +,
Megha Dey  wrote:
> 
> Introduce a new function pointer in the irq_chip structure(irq_set_auxdata)
> which is responsible for updating data which is stored in a shared register
> or data storage. For example, the idxd driver uses the auxiliary data API
> to enable/set and disable PASID field that is in the IMS entry (introduced
> in a later patch) and that data are not typically present in MSI entry.
> 
> Reviewed-by: Tony Luck 
> Signed-off-by: Megha Dey 
> ---
>  include/linux/interrupt.h |  2 ++
>  include/linux/irq.h   |  4 
>  kernel/irq/manage.c   | 32 
>  3 files changed, 38 insertions(+)
> 
> diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
> index 967e257..461ed1c 100644
> --- a/include/linux/interrupt.h
> +++ b/include/linux/interrupt.h
> @@ -496,6 +496,8 @@ extern int irq_get_irqchip_state(unsigned int irq, enum 
> irqchip_irq_state which,
>  extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state 
> which,
>bool state);
>  
> +int irq_set_auxdata(unsigned int irq, unsigned int which, u64 val);
> +
>  #ifdef CONFIG_IRQ_FORCED_THREADING
>  # ifdef CONFIG_PREEMPT_RT
>  #  define force_irqthreads   (true)
> diff --git a/include/linux/irq.h b/include/linux/irq.h
> index 2efde6a..fc19f32 100644
> --- a/include/linux/irq.h
> +++ b/include/linux/irq.h
> @@ -491,6 +491,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct 
> irq_data *d)
>   *   irq_request_resources
>   * @irq_compose_msi_msg: optional to compose message content for MSI
>   * @irq_write_msi_msg:   optional to write message content for MSI
> + * @irq_set_auxdata: Optional function to update auxiliary data e.g. in
> + *   shared registers
>   * @irq_get_irqchip_state:   return the internal state of an interrupt
>   * @irq_set_irqchip_state:   set the internal state of a interrupt
>   * @irq_set_vcpu_affinity:   optional to target a vCPU in a virtual machine
> @@ -538,6 +540,8 @@ struct irq_chip {
>   void(*irq_compose_msi_msg)(struct irq_data *data, struct 
> msi_msg *msg);
>   void(*irq_write_msi_msg)(struct irq_data *data, struct 
> msi_msg *msg);
>  
> + int (*irq_set_auxdata)(struct irq_data *data, unsigned int 
> which, u64 auxval);
> +
>   int (*irq_get_irqchip_state)(struct irq_data *data, enum 
> irqchip_irq_state which, bool *state);
>   int (*irq_set_irqchip_state)(struct irq_data *data, enum 
> irqchip_irq_state which, bool state);
>  
> diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
> index 85ede4e..68ff559 100644
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -2860,3 +2860,35 @@ bool irq_check_status_bit(unsigned int irq, unsigned 
> int bitmask)
>   return res;
>  }
>  EXPORT_SYMBOL_GPL(irq_check_status_bit);
> +
> +/**
> + * irq_set_auxdata - Set auxiliary data
> + * @irq: Interrupt to update
> + * @which:   Selector which data to update
> + * @auxval:  Auxiliary data value
> + *
> + * Function to update auxiliary data for an interrupt, e.g. to update data
> + * which is stored in a shared register or data storage (e.g. IMS).
> + */
> +int irq_set_auxdata(unsigned int irq, unsigned int which, u64 val)

This looks to me like a massively generalised version of
irq_set_irqchip_state(), only without any defined semantics when it
comes to the 'which' state, making it look like the irqchip version of
an ioctl.

We also have the irq_set_vcpu_affinity() callback that is used to
perpetrate all sort of sins (and I have abused this interface more
than I should admit it).

Can we try and converge on a single interface that allows for
"side-band state" to be communicated, with documented state?

> +{
> + struct irq_desc *desc;
> + struct irq_data *data;
> + unsigned long flags;
> + int res = -ENODEV;
> +
> + desc = irq_get_desc_buslock(irq, , 0);
> + if (!desc)
> + return -EINVAL;
> +
> + for (data = >irq_data; data; data = irqd_get_parent_data(data)) {
> + if (data->chip->irq_set_auxdata) {
> + res = data->chip->irq_set_auxdata(data, which, val);

And this is where things can break: because you don't define what
'which' is, you can end-up with two stacked layers clashing in their
interpretation of 'which', potentially doing the wrong thing.

Short of having a global, cross architecture definition of all the
possible states, this is frankly dodgy.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Patch V2 07/13] irqdomain/msi: Provide msi_alloc/free_store() callbacks

2021-03-25 Thread Marc Zyngier
On Fri, 26 Feb 2021 20:11:11 +,
Megha Dey  wrote:
> 
> From: Thomas Gleixner 
> 
> For devices which don't have a standard storage for MSI messages like the
> upcoming IMS (Interrupt Message Store) it's required to allocate storage
> space before allocating interrupts and after freeing them.
> 
> This could be achieved with the existing callbacks, but that would be
> awkward because they operate on msi_alloc_info_t which is not uniform
> across architectures. Also these callbacks are invoked per interrupt but
> the allocation might have bulk requirements depending on the device.
> 
> As such devices can operate on different architectures it is simpler to
> have separate callbacks which operate on struct device. The resulting
> storage information has to be stored in struct msi_desc so the underlying
> irq chip implementation can retrieve it for the relevant operations.
> 
> Reviewed-by: Tony Luck 
> Signed-off-by: Thomas Gleixner 
> Signed-off-by: Megha Dey 
> ---
>  include/linux/msi.h |  8 
>  kernel/irq/msi.c| 11 +++
>  2 files changed, 19 insertions(+)
> 
> diff --git a/include/linux/msi.h b/include/linux/msi.h
> index 46e879c..e915932 100644
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -323,6 +323,10 @@ struct msi_domain_info;
>   *   function.
>   * @domain_free_irqs:Optional function to override the default free
>   *   function.
> + * @msi_alloc_store: Optional callback to allocate storage in a device
> + *   specific non-standard MSI store
> + * @msi_alloc_free:  Optional callback to free storage in a device
> + *   specific non-standard MSI store
>   *
>   * @get_hwirq, @msi_init and @msi_free are callbacks used by
>   * msi_create_irq_domain() and related interfaces
> @@ -372,6 +376,10 @@ struct msi_domain_ops {
>struct device *dev, int nvec);
>   void(*domain_free_irqs)(struct irq_domain *domain,
>   struct device *dev);
> + int (*msi_alloc_store)(struct irq_domain *domain,
> +struct device *dev, int nvec);
> + void(*msi_free_store)(struct irq_domain *domain,
> +   struct device *dev);
>  };
>  
>  /**
> diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
> index c54316d..047b59d 100644
> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -434,6 +434,12 @@ int __msi_domain_alloc_irqs(struct irq_domain *domain, 
> struct device *dev,
>   if (ret)
>   return ret;
>  
> + if (ops->msi_alloc_store) {
> + ret = ops->msi_alloc_store(domain, dev, nvec);

What is supposed to happen if we get aliasing devices (similar to what
we have with devices behind a PCI bridge)?

The ITS code goes through all kind of hoops to try and detect this
case when sizing the translation tables (in the .prepare callback),
and I have the feeling that sizing the message store is analogous.

Or do we all have the warm fuzzy feeling that aliasing is a thing of
the past and that we can ignore this potential problem?

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH 3/5] KVM: ARM64: Add support for pinned VMIDs

2021-03-09 Thread Marc Zyngier
Hi Shameer,

[+Will]

On Mon, 22 Feb 2021 15:53:36 +,
Shameer Kolothum  wrote:
> 
> On an ARM64 system with a SMMUv3 implementation that fully supports
> Broadcast TLB Maintenance(BTM) feature, the CPU TLB invalidate
> instructions are received by SMMU. This is very useful when the
> SMMU shares the page tables with the CPU(eg: Guest SVA use case).
> For this to work, the SMMU must use the same VMID that is allocated
> by KVM to configure the stage 2 translations.
> 
> At present KVM VMID allocations are recycled on rollover and may
> change as a result. This will create issues if we have to share
> the KVM VMID with SMMU. Hence, we spilt the KVM VMID space into
> two, the first half follows the normal recycle on rollover policy
> while the second half of the VMID pace is used to allocate pinned
> VMIDs. This feature is enabled based on a command line option
> "kvm-arm.pinned_vmid_enable".

I think this is the wrong approach. Instead of shoving the notion of
pinned VMID into the current allocator, which really isn't designed
for this, it'd be a lot better if we aligned the KVM VMID allocator
with the ASID allocator, which already has support for pinning and is
in general much more efficient.

Julien Grall worked on such a series[1] a long while ago, which got
stalled because of the 32bit KVM port. Since we don't have this burden
anymore, I'd rather you look in that direction instead of wasting half
of the VMID space on potentially pinned VMIDs.

Thanks,

M.

[1] 
https://patchwork.kernel.org/project/linux-arm-kernel/cover/20190724162534.7390-1-julien.gr...@arm.com/


-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: iommu/vt-d: Cure VF irqdomain hickup

2020-11-13 Thread Marc Zyngier

On 2020-11-12 21:34, Thomas Gleixner wrote:

On Thu, Nov 12 2020 at 20:15, Thomas Gleixner wrote:

The recent changes to store the MSI irqdomain pointer in struct device
missed that Intel DMAR does not register virtual function devices.  
Due to
that a VF device gets the plain PCI-MSI domain assigned and then 
issues

compat MSI messages which get caught by the interrupt remapping unit.

Cure that by inheriting the irq domain from the physical function
device.

That's a temporary workaround. The correct fix is to inherit the irq 
domain

from the bus, but that's a larger effort which needs quite some other
changes to the way how x86 manages PCI and MSI domains.


Bah, that's not really going to work with the way how irq remapping
works on x86 because at least Intel/DMAR can have more than one DMAR
unit on a bus.

So the alternative solution would be to assign the domain per device,
but the current ordering creates a hen and egg problem. Looking the
domain up in pci_set_msi_domain() does not work because at that point
the device is not registered in the IOMMU. That happens from
device_add().

Marc, is there any problem to reorder the calls in pci_device_add():

  device_add();
  pci_set_msi_domain();


I *think* it works as long as we keep the "match_driver = false" hack.
Otherwise, we risk binding to a driver early, and game over.


That would allow to add a irq_find_matching_fwspec() based lookup to
pci_msi_get_device_domain().


Just so that I understand the issue: is the core of the problem that
there is no 1:1 mapping between a PCI bus and a DMAR unit, and no
firmware topology information to indicate which one to pick?


Though I'm not yet convinced that the outcome would be less horrible
than the hack in the DMAR driver when I'm taking all the other horrors
of x86 (including XEN) into account :)


I tried to follow the notifier into the DMAR driver, ended up in the
IRQ remapping code, and lost the will to live. I have a question though:

In the bus notifier callback, you end-up in dmar_pci_bus_add_dev(),
which calls intel_irq_remap_add_device(), which tries to set the
MSI domain. Why isn't that enough? Are we still missing any information
at that stage?

Thanks,

M.
--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 22/35] genirq/irqdomain: Implement get_name() method on irqchip fwnodes

2020-10-25 Thread Marc Zyngier
Hi David,

nit: please use my kernel.org address for kernel related stuff.

On Sat, 24 Oct 2020 22:35:22 +0100,
David Woodhouse  wrote:
> 
> From: David Woodhouse 
> 
> Prerequesite to make x86 more irqdomain compliant.

Prerequisite?

> 
> Signed-off-by: David Woodhouse 
> Signed-off-by: Thomas Gleixner 
> ---
>  kernel/irq/irqdomain.c | 11 ++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
> index cf8b374b892d..673fa64c1c44 100644
> --- a/kernel/irq/irqdomain.c
> +++ b/kernel/irq/irqdomain.c
> @@ -42,7 +42,16 @@ static inline void debugfs_add_domain_dir(struct 
> irq_domain *d) { }
>  static inline void debugfs_remove_domain_dir(struct irq_domain *d) { }
>  #endif
>  
> -const struct fwnode_operations irqchip_fwnode_ops;
> +static const char *irqchip_fwnode_get_name(const struct fwnode_handle 
> *fwnode)
> +{
> + struct irqchip_fwid *fwid = container_of(fwnode, struct irqchip_fwid, 
> fwnode);
> +
> + return fwid->name;
> +}
> +
> +const struct fwnode_operations irqchip_fwnode_ops = {
> + .get_name = irqchip_fwnode_get_name,
> +};
>  EXPORT_SYMBOL_GPL(irqchip_fwnode_ops);
>  
>  /**

Acked-by: Marc Zyngier 

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch V2 34/46] PCI/MSI: Make arch_.*_msi_irq[s] fallbacks selectable

2020-08-28 Thread Marc Zyngier

On 2020-08-28 13:54, Jason Gunthorpe wrote:

On Fri, Aug 28, 2020 at 01:47:59PM +0100, Marc Zyngier wrote:


> So the arch_setup_msi_irq/etc is not really an arch hook, but some
> infrastructure to support those 4 PCI root port drivers.

I happen to have a *really old* patch addressing Tegra [1], which
I was never able to test (no HW). Rebasing it shouldn't be too hard,
and maybe you can find someone internally willing to give it a spin?


Sure, that helps a bunch, I will ask internally if someone in that BU
can take a look.


I just rebased[1] the patch onto -rc2, and provided a TODO list in the
commit message.

Thanks,

M.

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/tegra-msi

--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch V2 34/46] PCI/MSI: Make arch_.*_msi_irq[s] fallbacks selectable

2020-08-28 Thread Marc Zyngier

Hi Jason,

On 2020-08-28 13:19, Jason Gunthorpe wrote:

On Fri, Aug 28, 2020 at 12:21:42PM +0100, Lorenzo Pieralisi wrote:

On Thu, Aug 27, 2020 at 01:20:40PM -0500, Bjorn Helgaas wrote:

[...]

> And I can't figure out what's special about tegra, rcar, and xilinx
> that makes them need it as well.  Is there something I could grep for
> to identify them?  Is there a way to convert them so they don't need
> it?

I think DT binding and related firmware support are needed to setup 
the

MSI IRQ domains correctly, there is nothing special about tegra, rcar
and xilinx AFAIK (well, all native host controllers MSI handling is
*special* just to be polite but let's gloss over this for the time
being).

struct msi_controller, to answer the first question.

I have doubts about pci_mvebu too, they do allocate an msi_controller
but without methods so it looks pretty much useless.


Oh, I did once know things about mvebu..

I suspect the msi controller pointer assignment is dead code at this
point. The only implementation of MSI with that PCI root port is
drivers/irqchip/irq-armada-370-xp.c which looks like it uses
irq_domain.

Actually looks like things are very close to eliminating
msi_controller.

This is dead code, can't find a setter for hw_pci->msi_ctrl:

arch/arm/include/asm/mach/pci.h:struct msi_controller 
*msi_ctrl;

arch/arm/kernel/bios32.c:   bridge->msi =
hw->msi_ctrl;

This is probably just copying NULL from one place to another:

drivers/pci/controller/pci-mvebu.c: struct msi_controller *msi;

These need conversion to irq_domain (right?):

drivers/pci/controller/pci-hyperv.c:struct msi_controller msi_chip;
drivers/pci/controller/pci-tegra.c: struct msi_controller chip;
drivers/pci/controller/pcie-rcar-host.c:struct msi_controller 
chip;

drivers/pci/controller/pcie-xilinx.c:static struct msi_controller
xilinx_pcie_msi_chip = {

Then the stuff in drivers/pci/msi.c can go away.

So the arch_setup_msi_irq/etc is not really an arch hook, but some
infrastructure to support those 4 PCI root port drivers.


I happen to have a *really old* patch addressing Tegra [1], which
I was never able to test (no HW). Rebasing it shouldn't be too hard,
and maybe you can find someone internally willing to give it a spin?

That'd be a good start towards the removal of this cruft.

Thanks,

M.

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/commit/?h=irq/kill-msi-controller=83b3602fcee7972b9d549ed729b56ec28de16081

--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch V2 43/46] genirq/msi: Provide and use msi_domain_set_default_info_flags()

2020-08-27 Thread Marc Zyngier

On 2020-08-26 12:17, Thomas Gleixner wrote:

MSI interrupts have some common flags which should be set not only for
PCI/MSI interrupts.

Move the PCI/MSI flag setting into a common function so it can be 
reused.


Signed-off-by: Thomas Gleixner 
---
V2: New patch
---
 drivers/pci/msi.c   |7 +--
 include/linux/msi.h |1 +
 kernel/irq/msi.c|   24 
 3 files changed, 26 insertions(+), 6 deletions(-)

--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -1469,12 +1469,7 @@ struct irq_domain *pci_msi_create_irq_do
if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
pci_msi_domain_update_chip_ops(info);

-   info->flags |= MSI_FLAG_ACTIVATE_EARLY;
-   if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE))
-   info->flags |= MSI_FLAG_MUST_REACTIVATE;
-
-   /* PCI-MSI is oneshot-safe */
-   info->chip->flags |= IRQCHIP_ONESHOT_SAFE;
+   msi_domain_set_default_info_flags(info);

domain = msi_create_irq_domain(fwnode, info, parent);
if (!domain)
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -410,6 +410,7 @@ int platform_msi_domain_alloc(struct irq
 void platform_msi_domain_free(struct irq_domain *domain, unsigned int 
virq,

  unsigned int nvec);
 void *platform_msi_get_host_data(struct irq_domain *domain);
+void msi_domain_set_default_info_flags(struct msi_domain_info *info);
 #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */

 #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -70,6 +70,30 @@ void get_cached_msi_msg(unsigned int irq
 EXPORT_SYMBOL_GPL(get_cached_msi_msg);

 #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+void msi_domain_set_default_info_flags(struct msi_domain_info *info)
+{
+	/* Required so that a device latches a valid MSI message on startup 
*/

+   info->flags |= MSI_FLAG_ACTIVATE_EARLY;


As far as I remember the story behind this flag (it's been a while),
it was working around a PCI-specific issue, hence being located in
the PCI code.

Now, the "program the MSI before enabling it" concept makes sense no 
matter
what bus this is on, and I wonder why we are even keeping this flag 
around.
Can't we just drop it together with the check in 
msi_domain_alloc_irqs()?


Thanks,

M.
--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch V2 29/46] irqdomain/msi: Allow to override msi_domain_alloc/free_irqs()

2020-08-26 Thread Marc Zyngier
On Wed, 26 Aug 2020 20:47:38 +0100,
Thomas Gleixner  wrote:
> 
> On Wed, Aug 26 2020 at 20:06, Marc Zyngier wrote:
> > On Wed, 26 Aug 2020 12:16:57 +0100,
> > Thomas Gleixner  wrote:
> >>  /**
> >> - * msi_domain_free_irqs - Free interrupts from a MSI interrupt @domain 
> >> associated tp @dev
> >> - * @domain:   The domain to managing the interrupts
> >> + * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain
> >> + * @domain:   The domain to allocate from
> >>   * @dev:  Pointer to device struct of the device for which the interrupts
> >> - *are free
> >> + *are allocated
> >> + * @nvec: The number of interrupts to allocate
> >> + *
> >> + * Returns 0 on success or an error code.
> >>   */
> >> -void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
> >> +int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
> >> +int nvec)
> >> +{
> >> +  struct msi_domain_info *info = domain->host_data;
> >> +  struct msi_domain_ops *ops = info->ops;
> >
> > Rework leftovers, I imagine.
> 
> Hmm, no. How would it call ops->domain_alloc_irqs() without getting the
> ops. I know, that the diff is horrible, but don't blame me for it. diff
> sucks at times.

I can't read. Time to put the laptop away!

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch V2 04/46] genirq/chip: Use the first chip in irq_chip_compose_msi_msg()

2020-08-26 Thread Marc Zyngier
On Wed, 26 Aug 2020 22:19:56 +0100,
Thomas Gleixner  wrote:
> 
> On Wed, Aug 26 2020 at 20:50, Marc Zyngier wrote:
> > On Wed, 26 Aug 2020 12:16:32 +0100,
> > Thomas Gleixner  wrote:
> >> ---
> >> V2: New patch. Note, that this might break other stuff which relies on the
> >> current behaviour, but the hierarchy composition of DT based chips is
> >> really hard to follow.
> >

[...]

> What about the below?
> 
> Thanks,
> 
> tglx
> ---
> --- a/kernel/irq/internals.h
> +++ b/kernel/irq/internals.h
> @@ -473,6 +473,15 @@ static inline void irq_domain_deactivate
>  }
>  #endif
>  
> +static inline struct irq_data *irqd_get_parent_data(struct irq_data *irqd)
> +{
> +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
> + return irqd->parent_data;
> +#else
> + return NULL;
> +#endif
> +}
> +

We obviously should have had this forever.

>  #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
>  #include 
>  
> --- a/kernel/irq/chip.c
> +++ b/kernel/irq/chip.c
> @@ -1541,18 +1541,17 @@ EXPORT_SYMBOL_GPL(irq_chip_release_resou
>   */
>  int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
>  {
> - struct irq_data *pos = NULL;
> + struct irq_data *pos;
>  
> -#ifdef   CONFIG_IRQ_DOMAIN_HIERARCHY
> - for (; data; data = data->parent_data)
> -#endif
> + for (pos = NULL; !pos && data; data = irqd_get_parent_data(data)) {
>   if (data->chip && data->chip->irq_compose_msi_msg)
>   pos = data;
> + }
> +
>   if (!pos)
>   return -ENOSYS;
>  
>   pos->chip->irq_compose_msi_msg(pos, msg);
> -
>   return 0;
>  }

Perfect, ship it! ;-)

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch V2 41/46] platform-msi: Provide default irq_chip:: Ack

2020-08-26 Thread Marc Zyngier
On Wed, 26 Aug 2020 12:17:09 +0100,
Thomas Gleixner  wrote:
> 
> From: Thomas Gleixner 
> 
> For the upcoming device MSI support it's required to have a default
> irq_chip::ack implementation (irq_chip_ack_parent) so the drivers do not
> need to care.
> 
> Signed-off-by: Thomas Gleixner 
> 
> ---
>  drivers/base/platform-msi.c |2 ++
>  1 file changed, 2 insertions(+)
> 
> --- a/drivers/base/platform-msi.c
> +++ b/drivers/base/platform-msi.c
> @@ -95,6 +95,8 @@ static void platform_msi_update_chip_ops
>   chip->irq_mask = irq_chip_mask_parent;
>   if (!chip->irq_unmask)
>   chip->irq_unmask = irq_chip_unmask_parent;
> + if (!chip->irq_ack)
> + chip->irq_ack = irq_chip_ack_parent;
>   if (!chip->irq_eoi)
>   chip->irq_eoi = irq_chip_eoi_parent;
>   if (!chip->irq_set_affinity)
> 
> 

Acked-by: Marc Zyngier 

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch V2 34/46] PCI/MSI: Make arch_.*_msi_irq[s] fallbacks selectable

2020-08-26 Thread Marc Zyngier
On Wed, 26 Aug 2020 12:17:02 +0100,
Thomas Gleixner  wrote:
> 
> From: Thomas Gleixner 
> 
> The arch_.*_msi_irq[s] fallbacks are compiled in whether an architecture
> requires them or not. Architectures which are fully utilizing hierarchical
> irq domains should never call into that code.
> 
> It's not only architectures which depend on that by implementing one or
> more of the weak functions, there is also a bunch of drivers which relies
> on the weak functions which invoke msi_controller::setup_irq[s] and
> msi_controller::teardown_irq.
> 
> Make the architectures and drivers which rely on them select them in Kconfig
> and if not selected replace them by stub functions which emit a warning and
> fail the PCI/MSI interrupt allocation.
> 
> Signed-off-by: Thomas Gleixner 
> ---
> V2: Make the architectures (and drivers) which need the fallbacks select them
> and not the other way round (Bjorn).
> ---
>  arch/ia64/Kconfig  |1 +
>  arch/mips/Kconfig  |1 +
>  arch/powerpc/Kconfig   |1 +
>  arch/s390/Kconfig  |1 +
>  arch/sparc/Kconfig |1 +
>  arch/x86/Kconfig   |1 +
>  drivers/pci/Kconfig|3 +++
>  drivers/pci/controller/Kconfig |3 +++
>  drivers/pci/msi.c  |3 ++-
>  include/linux/msi.h|   31 ++-
>  10 files changed, 40 insertions(+), 6 deletions(-)
> 

[...]

> --- a/drivers/pci/controller/Kconfig
> +++ b/drivers/pci/controller/Kconfig
> @@ -41,6 +41,7 @@ config PCI_TEGRA
>   bool "NVIDIA Tegra PCIe controller"
>   depends on ARCH_TEGRA || COMPILE_TEST
>   depends on PCI_MSI_IRQ_DOMAIN
> + select PCI_MSI_ARCH_FALLBACKS
>   help
> Say Y here if you want support for the PCIe host controller found
> on NVIDIA Tegra SoCs.
> @@ -67,6 +68,7 @@ config PCIE_RCAR_HOST
>   bool "Renesas R-Car PCIe host controller"
>   depends on ARCH_RENESAS || COMPILE_TEST
>   depends on PCI_MSI_IRQ_DOMAIN
> + select PCI_MSI_ARCH_FALLBACKS
>   help
> Say Y here if you want PCIe controller support on R-Car SoCs in host
> mode.
> @@ -103,6 +105,7 @@ config PCIE_XILINX_CPM
>   bool "Xilinx Versal CPM host bridge support"
>   depends on ARCH_ZYNQMP || COMPILE_TEST
>   select PCI_HOST_COMMON
> + select PCI_MSI_ARCH_FALLBACKS

This guy actually doesn't implement MSIs at all (it seems to delegate
them to an ITS present in the system, if I read the DT binding
correctly). However its older brother from the same silicon dealer
seems to need it. The patchlet below should fix it.

diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index 9ad13919bcaa..f56ff049d469 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -96,6 +96,7 @@ config PCI_HOST_GENERIC
 
 config PCIE_XILINX
bool "Xilinx AXI PCIe host bridge support"
+   select PCI_MSI_ARCH_FALLBACKS
depends on OF || COMPILE_TEST
help
  Say 'Y' here if you want kernel to support the Xilinx AXI PCIe
@@ -105,7 +106,6 @@ config PCIE_XILINX_CPM
bool "Xilinx Versal CPM host bridge support"
depends on ARCH_ZYNQMP || COMPILE_TEST
    select PCI_HOST_COMMON
-   select PCI_MSI_ARCH_FALLBACKS
help
  Say 'Y' here if you want kernel support for the
  Xilinx Versal CPM host bridge.


With that fixed,

Acked-by: Marc Zyngier 

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch V2 24/46] PCI: vmd: Mark VMD irqdomain with DOMAIN_BUS_VMD_MSI

2020-08-26 Thread Marc Zyngier
On Wed, 26 Aug 2020 12:16:52 +0100,
Thomas Gleixner  wrote:
> 
> From: Thomas Gleixner 
> 
> Devices on the VMD bus use their own MSI irq domain, but it is not
> distinguishable from regular PCI/MSI irq domains. This is required
> to exclude VMD devices from getting the irq domain pointer set by
> interrupt remapping.
> 
> Override the default bus token.
> 
> Signed-off-by: Thomas Gleixner 
> Acked-by: Bjorn Helgaas 
> ---
>  drivers/pci/controller/vmd.c |6 ++
>  1 file changed, 6 insertions(+)
> 
> --- a/drivers/pci/controller/vmd.c
> +++ b/drivers/pci/controller/vmd.c
> @@ -579,6 +579,12 @@ static int vmd_enable_domain(struct vmd_
>   return -ENODEV;
>   }
>  
> + /*
> +  * Override the irq domain bus token so the domain can be distinguished
> +  * from a regular PCI/MSI domain.
> +  */
> + irq_domain_update_bus_token(vmd->irq_domain, DOMAIN_BUS_VMD_MSI);
> +

One day, we'll be able to set the token at domain creation time. In
the meantime,

Acked-by: Marc Zyngier 

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch V2 23/46] irqdomain/msi: Provide DOMAIN_BUS_VMD_MSI

2020-08-26 Thread Marc Zyngier
On Wed, 26 Aug 2020 12:16:51 +0100,
Thomas Gleixner  wrote:
> 
> From: Thomas Gleixner 
> 
> PCI devices behind a VMD bus are not subject to interrupt remapping, but
> the irq domain for VMD MSI cannot be distinguished from a regular PCI/MSI
> irq domain.
> 
> Add a new domain bus token and allow it in the bus token check in
> msi_check_reservation_mode() to keep the functionality the same once VMD
> uses this token.
> 
> Signed-off-by: Thomas Gleixner 
> 
> ---
>  include/linux/irqdomain.h |1 +
>  kernel/irq/msi.c  |7 ++-
>  2 files changed, 7 insertions(+), 1 deletion(-)
> 
> --- a/include/linux/irqdomain.h
> +++ b/include/linux/irqdomain.h
> @@ -84,6 +84,7 @@ enum irq_domain_bus_token {
>   DOMAIN_BUS_FSL_MC_MSI,
>   DOMAIN_BUS_TI_SCI_INTA_MSI,
>   DOMAIN_BUS_WAKEUP,
> + DOMAIN_BUS_VMD_MSI,
>  };
>  
>  /**
> --- a/kernel/irq/msi.c
> +++ b/kernel/irq/msi.c
> @@ -370,8 +370,13 @@ static bool msi_check_reservation_mode(s
>  {
>   struct msi_desc *desc;
>  
> - if (domain->bus_token != DOMAIN_BUS_PCI_MSI)
> + switch(domain->bus_token) {
> + case DOMAIN_BUS_PCI_MSI:
> + case DOMAIN_BUS_VMD_MSI:
> + break;
> + default:
>   return false;
> + }
>  
>   if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
>   return false;

Acked-by: Marc Zyngier 

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch V2 17/46] PCI/MSI: Rework pci_msi_domain_calc_hwirq()

2020-08-26 Thread Marc Zyngier
On Wed, 26 Aug 2020 12:16:45 +0100,
Thomas Gleixner  wrote:
> 
> From: Thomas Gleixner 
> 
> Retrieve the PCI device from the msi descriptor instead of doing so at the
> call sites.
> 
> Signed-off-by: Thomas Gleixner 
> Acked-by: Bjorn Helgaas 

Acked-by: Marc Zyngier 

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch V2 19/46] x86/msi: Use generic MSI domain ops

2020-08-26 Thread Marc Zyngier
On Wed, 26 Aug 2020 12:16:47 +0100,
Thomas Gleixner  wrote:
> 
> From: Thomas Gleixner 
> 
> pci_msi_get_hwirq() and pci_msi_set_desc are not longer special. Enable the
> generic MSI domain ops in the core and PCI MSI code unconditionally and get
> rid of the x86 specific implementations in the X86 MSI code and in the
> hyperv PCI driver.
> 
> Signed-off-by: Thomas Gleixner 
> 
> ---
>  arch/x86/include/asm/msi.h  |2 --
>  arch/x86/kernel/apic/msi.c  |   15 ---
>  drivers/pci/controller/pci-hyperv.c |8 
>  drivers/pci/msi.c   |4 
>  kernel/irq/msi.c|6 --
>  5 files changed, 35 deletions(-)
> 
> --- a/arch/x86/include/asm/msi.h
> +++ b/arch/x86/include/asm/msi.h
> @@ -9,6 +9,4 @@ typedef struct irq_alloc_info msi_alloc_
>  int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
>   msi_alloc_info_t *arg);
>  
> -void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc);
> -
>  #endif /* _ASM_X86_MSI_H */
> --- a/arch/x86/kernel/apic/msi.c
> +++ b/arch/x86/kernel/apic/msi.c
> @@ -204,12 +204,6 @@ void native_teardown_msi_irq(unsigned in
>   irq_domain_free_irqs(irq, 1);
>  }
>  
> -static irq_hw_number_t pci_msi_get_hwirq(struct msi_domain_info *info,
> -  msi_alloc_info_t *arg)
> -{
> - return arg->hwirq;
> -}
> -
>  int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
>   msi_alloc_info_t *arg)
>  {
> @@ -228,17 +222,8 @@ int pci_msi_prepare(struct irq_domain *d
>  }
>  EXPORT_SYMBOL_GPL(pci_msi_prepare);
>  
> -void pci_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
> -{
> - arg->desc = desc;
> - arg->hwirq = pci_msi_domain_calc_hwirq(desc);
> -}
> -EXPORT_SYMBOL_GPL(pci_msi_set_desc);

I think that at this stage, pci_msi_domain_calc_hwirq() can be made
static, as it was only ever exported for this call site. Nice cleanup!

Reviewed-by: Marc Zyngier 

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [patch V2 04/46] genirq/chip: Use the first chip in irq_chip_compose_msi_msg()

2020-08-26 Thread Marc Zyngier
On Wed, 26 Aug 2020 12:16:32 +0100,
Thomas Gleixner  wrote:
> 
> The documentation of irq_chip_compose_msi_msg() claims that with
> hierarchical irq domains the first chip in the hierarchy which has an
> irq_compose_msi_msg() callback is chosen. But the code just keeps
> iterating after it finds a chip with a compose callback.
> 
> The x86 HPET MSI implementation relies on that behaviour, but that does not
> make it more correct.
> 
> The message should always be composed at the domain which manages the
> underlying resource (e.g. APIC or remap table) because that domain knows
> about the required layout of the message.
> 
> On X86 the following hierarchies exist:
> 
> 1)   vector  PCI/MSI
> 2)   vector -- IR -- PCI/MSI
> 
> The vector domain has a different message format than the IR (remapping)
> domain. So obviously the PCI/MSI domain can't compose the message without
> having knowledge about the parent domain, which is exactly the opposite of
> what hierarchical domains want to achieve.
> 
> X86 actually has two different PCI/MSI chips where #1 has a compose
> callback and #2 does not. #2 delegates the composition to the remap domain
> where it belongs, but #1 does it at the PCI/MSI level.
> 
> For the upcoming device MSI support it's necessary to change this and just
> let the first domain which can compose the message take care of it. That
> way the top level chip does not have to worry about it and the device MSI
> code does not need special knowledge about topologies. It just sets the
> compose callback to NULL and lets the hierarchy pick the first chip which
> has one.
> 
> Due to that the attempt to move the compose callback from the direct
> delivery PCI/MSI domain to the vector domain made the system fail to boot
> with interrupt remapping enabled because in the remapping case
> irq_chip_compose_msi_msg() keeps iterating and choses the compose callback
> of the vector domain which obviously creates the wrong format for the remap
> table.
> 
> Break out of the loop when the first irq chip with a compose callback is
> found and fixup the HPET code temporarily. That workaround will be removed
> once the direct delivery compose callback is moved to the place where it
> belongs in the vector domain.
> 
> Signed-off-by: Thomas Gleixner 
> ---
> V2: New patch. Note, that this might break other stuff which relies on the
> current behaviour, but the hierarchy composition of DT based chips is
> really hard to follow.

Grepping around, I don't think there is any occurrence of two irqchips
providing irq_compose_msi() that can share a hierarchy on any real
system, so we should be fine. Famous last words.

> ---
>  arch/x86/kernel/apic/msi.c |7 +--
>  kernel/irq/chip.c  |   12 +---
>  2 files changed, 14 insertions(+), 5 deletions(-)
> 
> --- a/arch/x86/kernel/apic/msi.c
> +++ b/arch/x86/kernel/apic/msi.c
> @@ -479,10 +479,13 @@ struct irq_domain *hpet_create_irq_domai
>   info.type = X86_IRQ_ALLOC_TYPE_HPET;
>   info.hpet_id = hpet_id;
>   parent = irq_remapping_get_ir_irq_domain();
> - if (parent == NULL)
> + if (parent == NULL) {
>   parent = x86_vector_domain;
> - else
> + } else {
>   hpet_msi_controller.name = "IR-HPET-MSI";
> + /* Temporary fix: Will go away */
> + hpet_msi_controller.irq_compose_msi_msg = NULL;
> + }
>  
>   fn = irq_domain_alloc_named_id_fwnode(hpet_msi_controller.name,
> hpet_id);
> --- a/kernel/irq/chip.c
> +++ b/kernel/irq/chip.c
> @@ -1544,10 +1544,16 @@ int irq_chip_compose_msi_msg(struct irq_
>   struct irq_data *pos = NULL;
>  
>  #ifdef   CONFIG_IRQ_DOMAIN_HIERARCHY
> - for (; data; data = data->parent_data)
> -#endif
> - if (data->chip && data->chip->irq_compose_msi_msg)
> + for (; data; data = data->parent_data) {
> + if (data->chip && data->chip->irq_compose_msi_msg) {
>   pos = data;
> + break;
> + }
> + }
> +#else
> + if (data->chip && data->chip->irq_compose_msi_msg)
> + pos = data;
> +#endif
>   if (!pos)
>   return -ENOSYS;
>  
> 
> 

Is it just me, or is this last change more complex than it ought to be?

diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 857f5f4c8098..25e18b73699c 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1544,7 +1544,7 @@ int irq_chip_compose_msi_msg(struct irq_data *data, 
struct msi_msg *msg)
struct irq_data *pos = NULL;
 
 #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
- 

Re: [patch V2 29/46] irqdomain/msi: Allow to override msi_domain_alloc/free_irqs()

2020-08-26 Thread Marc Zyngier
 = msi_domain_ops_get_hwirq,
> + .msi_init   = msi_domain_ops_init,
> + .msi_check  = msi_domain_ops_check,
> + .msi_prepare= msi_domain_ops_prepare,
> + .set_desc   = msi_domain_ops_set_desc,
> + .domain_alloc_irqs  = __msi_domain_alloc_irqs,
> + .domain_free_irqs   = __msi_domain_free_irqs,
>  };
>  
>  static void msi_domain_update_dom_ops(struct msi_domain_info *info)
> @@ -245,6 +247,14 @@ static void msi_domain_update_dom_ops(st
>   return;
>   }
>  
> + if (ops->domain_alloc_irqs == NULL)
> + ops->domain_alloc_irqs = 
> msi_domain_ops_default.domain_alloc_irqs;
> + if (ops->domain_free_irqs == NULL)
> + ops->domain_free_irqs = msi_domain_ops_default.domain_free_irqs;
> +
> + if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS))
> + return;
> +
>   if (ops->get_hwirq == NULL)
>   ops->get_hwirq = msi_domain_ops_default.get_hwirq;
>   if (ops->msi_init == NULL)
> @@ -278,8 +288,7 @@ struct irq_domain *msi_create_irq_domain
>  {
>   struct irq_domain *domain;
>  
> - if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
> - msi_domain_update_dom_ops(info);
> + msi_domain_update_dom_ops(info);
>   if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
>   msi_domain_update_chip_ops(info);
>  
> @@ -386,17 +395,8 @@ static bool msi_check_reservation_mode(s
>   return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
>  }
>  
> -/**
> - * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain
> - * @domain:  The domain to allocate from
> - * @dev: Pointer to device struct of the device for which the interrupts
> - *   are allocated
> - * @nvec:The number of interrupts to allocate
> - *
> - * Returns 0 on success or an error code.
> - */
> -int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
> -   int nvec)
> +int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
> + int nvec)
>  {
>   struct msi_domain_info *info = domain->host_data;
>   struct msi_domain_ops *ops = info->ops;
> @@ -490,12 +490,24 @@ int msi_domain_alloc_irqs(struct irq_dom
>  }
>  
>  /**
> - * msi_domain_free_irqs - Free interrupts from a MSI interrupt @domain 
> associated tp @dev
> - * @domain:  The domain to managing the interrupts
> + * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain
> + * @domain:  The domain to allocate from
>   * @dev: Pointer to device struct of the device for which the interrupts
> - *   are free
> + *   are allocated
> + * @nvec:The number of interrupts to allocate
> + *
> + * Returns 0 on success or an error code.
>   */
> -void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
> +int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
> +   int nvec)
> +{
> + struct msi_domain_info *info = domain->host_data;
> + struct msi_domain_ops *ops = info->ops;

Rework leftovers, I imagine.

> +
> + return ops->domain_alloc_irqs(domain, dev, nvec);
> +}
> +
> +void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
>  {
>   struct msi_desc *desc;
>  
> @@ -513,6 +525,20 @@ void msi_domain_free_irqs(struct irq_dom
>  }
>  
>  /**
> + * __msi_domain_free_irqs - Free interrupts from a MSI interrupt @domain 
> associated tp @dev

Spurious __.

> + * @domain:  The domain to managing the interrupts
> + * @dev: Pointer to device struct of the device for which the interrupts
> + *   are free
> + */
> +void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
> +{
> + struct msi_domain_info *info = domain->host_data;
> + struct msi_domain_ops *ops = info->ops;

Same thing?

> +
> + return ops->domain_free_irqs(domain, dev);
> +}
> +
> +/**
>   * msi_get_domain_info - Get the MSI interrupt domain info for @domain
>   * @domain:  The interrupt domain to retrieve data from
>   *

Otherwise looks good to me:

Reviewed-by: Marc Zyngier 

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [EXT] Re: [PATCH v2 12/12] bus: fsl-mc: Add ACPI support for fsl-mc

2020-07-16 Thread Marc Zyngier

On 2020-07-16 04:23, Makarand Pawagi wrote:

-Original Message-
From: Lorenzo Pieralisi 


[...]


Anyway - you need to seek feedback from Marc on whether patches
11 and 12 are OK from an irqchip perspective, it is possible we can 
take the rest
of the series independently if everyone agrees but I don't necessarily 
see a

reason for that.

Long story short: you need Marc's ACK on [11-12], it is your code.


Hi Marc, can you please review/ack this patch?


https://lore.kernel.org/linux-acpi/bd07f44dad1d029e0d023202cbf5f...@kernel.org/

M.
--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 11/12] bus/fsl-mc: Refactor the MSI domain creation in the DPRC driver

2020-07-15 Thread Marc Zyngier

On 2020-06-19 09:20, Lorenzo Pieralisi wrote:

From: Diana Craciun 

The DPRC driver is not taking into account the msi-map property
and assumes that the icid is the same as the stream ID. Although
this assumption is correct, generalize the code to include a
translation between icid and streamID.

Furthermore do not just copy the MSI domain from parent (for child
containers), but use the information provided by the msi-map property.

If the msi-map property is missing from the device tree retain the old
behaviour for backward compatibility ie the child DPRC objects
inherit the MSI domain from the parent.

Signed-off-by: Diana Craciun 
---
 drivers/bus/fsl-mc/dprc-driver.c| 31 ++---
 drivers/bus/fsl-mc/fsl-mc-bus.c |  4 +--
 drivers/bus/fsl-mc/fsl-mc-msi.c | 31 +
 drivers/bus/fsl-mc/fsl-mc-private.h |  6 ++--
 drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c | 15 +-
 5 files changed, 47 insertions(+), 40 deletions(-)


For this patch and the following one:

Acked-by: Marc Zyngier 

M.
--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 3/5] irqchip: Allow QCOM_PDC to be loadable as a permanent module

2020-07-12 Thread Marc Zyngier
On Sat, 11 Jul 2020 00:27:45 +0100,
Stephen Boyd  wrote:
> 
> Quoting John Stultz (2020-07-10 15:44:18)
> > On Thu, Jul 9, 2020 at 11:02 PM Stephen Boyd  wrote:
> > >
> > > Does it work? I haven't looked in detail but I worry that the child
> > > irqdomain (i.e. pinctrl-msm) would need to delay probing until this
> > > parent irqdomain is registered. Or has the hierarchical irqdomain code
> > > been updated to handle the parent child relationship and wait for things
> > > to probe or be loaded?
> > 
> > So I can't say I know the underlying hardware particularly well, but
> > I've been using this successfully on the Dragonboard 845c with both
> > static builds as well as module enabled builds.
> > And the same patch has been in the android-mainline and android-5.4
> > kernels for a while without objections from QCOM.
> > 
> > As to the probe ordering question, Saravana can maybe speak in more
> > detail if it's involved in this case but the fw_devlink code has
> > addressed many of these sorts of ordering issues.
> > However, I'm not sure if I'm lucking into the right probe order, as we
> > have been able to boot android-mainline w/ both fw_devlink=on and
> > fw_devlink=off (though in the =off case, we need
> > deferred_probe_timeout=30 to give us a bit more time for modules to
> > load after init starts).
> > 
> 
> Ok I looked at the code (sorry for not checking earlier) and I see this in
> msm_gpio_init()
> 
> np = of_parse_phandle(pctrl->dev->of_node, "wakeup-parent", 0);
> if (np) {
> chip->irq.parent_domain = irq_find_matching_host(np,
>  DOMAIN_BUS_WAKEUP);
> of_node_put(np);
> if (!chip->irq.parent_domain)
> return -EPROBE_DEFER;
> 
> so it looks like we'll probe defer the pinctrl driver until the pdc module
> loads. Meaning it should work to have pinctrl builtin and pdc as a module.

What I hope is that eventually fw_devlink will become the norm (on by
default), and that probe deferral will become a thing of the past.

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 3/5] irqchip: Allow QCOM_PDC to be loadable as a permanent module

2020-06-27 Thread Marc Zyngier
On Sat, 27 Jun 2020 02:34:25 +0100,
John Stultz  wrote:
> 
> On Fri, Jun 26, 2020 at 12:42 AM Stephen Boyd  wrote:
> >
> > Quoting John Stultz (2020-06-24 17:10:37)
> > > diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
> > > index 6ae9e1f0819d..3fee8b655da1 100644
> > > --- a/drivers/irqchip/qcom-pdc.c
> > > +++ b/drivers/irqchip/qcom-pdc.c
> > > @@ -430,4 +432,33 @@ static int qcom_pdc_init(struct device_node *node, 
> > > struct device_node *parent)
> > > return ret;
> > >  }
> > >
> > > +#ifdef MODULE
> > > +static int qcom_pdc_probe(struct platform_device *pdev)
> > > +{
> > > +   struct device_node *np = pdev->dev.of_node;
> > > +   struct device_node *parent = of_irq_find_parent(np);
> > > +
> > > +   return qcom_pdc_init(np, parent);
> > > +}
> > > +
> > > +static const struct of_device_id qcom_pdc_match_table[] = {
> > > +   { .compatible = "qcom,pdc" },
> > > +   {}
> > > +};
> > > +MODULE_DEVICE_TABLE(of, qcom_pdc_match_table);
> > > +
> > > +static struct platform_driver qcom_pdc_driver = {
> > > +   .probe = qcom_pdc_probe,
> > > +   .driver = {
> > > +   .name = "qcom-pdc",
> > > +   .of_match_table = qcom_pdc_match_table,
> > > +   .suppress_bind_attrs = true,
> > > +   },
> > > +};
> > > +module_platform_driver(qcom_pdc_driver);
> > > +#else
> > >  IRQCHIP_DECLARE(qcom_pdc, "qcom,pdc", qcom_pdc_init);
> >
> > Is there any reason to use IRQCHIP_DECLARE if this can work as a
> > platform device driver?
> >
> 
> Hey! Thanks so much for the review!
> 
> Mostly it was done this way to minimize the change in the non-module
> case. But if you'd rather avoid the #ifdefery I'll respin it without.

That would certainly be my own preference. In general, IRQCHIP_DECLARE
and platform drivers should be mutually exclusive in the same driver:
if you can delay the probing and have it as a proper platform device,
then this should be the one true way.

Thanks,

M.

-- 
Without deviation from the norm, progress is not possible.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC][PATCH 5/5] firmware: QCOM_SCM: Allow qcom_scm driver to be loadable as a permenent module

2020-06-16 Thread Marc Zyngier

Hi John,

+Will for the SMMU part.

On 2020-06-16 07:13, John Stultz wrote:

Allow the qcom_scm driver to be loadable as a
permenent module.

Cc: Andy Gross 
Cc: Bjorn Andersson 
Cc: Joerg Roedel 
Cc: Thomas Gleixner 
Cc: Jason Cooper 
Cc: Marc Zyngier 
Cc: Linus Walleij 
Cc: Lina Iyer 
Cc: Saravana Kannan 
Cc: Todd Kjos 
Cc: Greg Kroah-Hartman 
Cc: linux-arm-...@vger.kernel.org
Cc: iommu@lists.linux-foundation.org
Cc: linux-g...@vger.kernel.org
Signed-off-by: John Stultz 
---
 drivers/firmware/Kconfig| 2 +-
 drivers/firmware/Makefile   | 3 ++-
 drivers/firmware/qcom_scm.c | 4 
 drivers/iommu/Kconfig   | 2 ++
 4 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index fbd785dd0513..9e533a462bf4 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -236,7 +236,7 @@ config INTEL_STRATIX10_RSU
  Say Y here if you want Intel RSU support.

 config QCOM_SCM
-   bool
+   tristate "Qcom SCM driver"
depends on ARM || ARM64
select RESET_CONTROLLER

diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 99510be9f5ed..cf24d674216b 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -17,7 +17,8 @@ obj-$(CONFIG_ISCSI_IBFT)  += iscsi_ibft.o
 obj-$(CONFIG_FIRMWARE_MEMMAP)  += memmap.o
 obj-$(CONFIG_RASPBERRYPI_FIRMWARE) += raspberrypi.o
 obj-$(CONFIG_FW_CFG_SYSFS) += qemu_fw_cfg.o
-obj-$(CONFIG_QCOM_SCM) += qcom_scm.o qcom_scm-smc.o qcom_scm-legacy.o
+obj-$(CONFIG_QCOM_SCM) += qcom-scm.o
+qcom-scm-objs += qcom_scm.o qcom_scm-smc.o qcom_scm-legacy.o
 obj-$(CONFIG_TI_SCI_PROTOCOL)  += ti_sci.o
 obj-$(CONFIG_TRUSTED_FOUNDATIONS) += trusted_foundations.o
 obj-$(CONFIG_TURRIS_MOX_RWTM)  += turris-mox-rwtm.o
diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c
index 0e7233a20f34..b5e88bf66975 100644
--- a/drivers/firmware/qcom_scm.c
+++ b/drivers/firmware/qcom_scm.c
@@ -1155,6 +1155,7 @@ static const struct of_device_id 
qcom_scm_dt_match[] = {

{ .compatible = "qcom,scm" },
{}
 };
+MODULE_DEVICE_TABLE(of, qcom_scm_dt_match);

 static struct platform_driver qcom_scm_driver = {
.driver = {
@@ -1170,3 +1171,6 @@ static int __init qcom_scm_init(void)
return platform_driver_register(_scm_driver);
 }
 subsys_initcall(qcom_scm_init);
+
+MODULE_DESCRIPTION("Qualcomm Technologies, Inc. SCM driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index b510f67dfa49..714893535dd2 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -381,6 +381,7 @@ config SPAPR_TCE_IOMMU
 config ARM_SMMU
tristate "ARM Ltd. System MMU (SMMU) Support"
 	depends on (ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64)) && 
MMU

+   depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y


This looks a bit ugly. Could you explain why we need this at the SMMU 
level? I'd have expected the dependency to flow the other way around...



select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
select ARM_DMA_USE_IOMMU if ARM
@@ -500,6 +501,7 @@ config QCOM_IOMMU
# Note: iommu drivers cannot (yet?) be built as modules
bool "Qualcomm IOMMU Support"
depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64)
+   depends on QCOM_SCM=y
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
select ARM_DMA_USE_IOMMU


Thanks,

M.
--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 13/24] iommu/arm-smmu-v3: Enable broadcast TLB maintenance

2020-05-21 Thread Marc Zyngier

On 2020-05-21 15:17, Will Deacon wrote:

[+Marc]

On Tue, May 19, 2020 at 07:54:51PM +0200, Jean-Philippe Brucker wrote:

The SMMUv3 can handle invalidation targeted at TLB entries with shared
ASIDs. If the implementation supports broadcast TLB maintenance, 
enable it
and keep track of it in a feature bit. The SMMU will then be affected 
by

inner-shareable TLB invalidations from other agents.

A major side-effect of this change is that stage-2 translation 
contexts
are now affected by all invalidations by VMID. VMIDs are all shared 
and
the only ways to prevent over-invalidation, since the stage-2 page 
tables
are not shared between CPU and SMMU, are to either disable BTM or 
allocate

different VMIDs. This patch does not address the problem.


This sounds like a potential performance issue, particularly as we 
expose
stage-2 contexts via VFIO directly. Maybe we could reserve some portion 
of

VMID space for the SMMU? Marc, what do you reckon?


Certainly doable when we have 16bits VMIDs. With smaller VMID spaces 
(like on
v8.0), this is a bit more difficult (we do have pretty large v8.0 
systems

around). How many VMID bits are we talking about?

M.
--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: arm-smmu-v3 high cpu usage for NVMe

2020-03-24 Thread Marc Zyngier
On Tue, 24 Mar 2020 09:18:10 +
John Garry  wrote:

> On 23/03/2020 09:16, Marc Zyngier wrote:
> 
> + Julien, Mark
> 
> Hi Marc,
> 
> >>> Time to enable pseudo-NMIs in the PMUv3 driver...
> >>>
> >>
> >> Do you know if there is any plan for this?
> > 
> > There was. Julien Thierry has a bunch of patches for that [1], but they > 
> > needs
> > reviving.
> > 
> 
> So those patches still apply cleanly (apart from the kvm patch, which
> I can skip, I suppose) and build, so I can try this I figure. Is
> there anything else which I should ensure or know about? Apart from
> enable CONFIG_ARM64_PSUEDO_NMI.

You need to make sure that your firmware sets SCR_EL3.FIQ to 1. My D05
has it set to 0, preventing me from being able to use the feature
(hint, nudge... ;-).

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: arm-smmu-v3 high cpu usage for NVMe

2020-03-23 Thread Marc Zyngier

On 2020-03-23 09:03, John Garry wrote:

On 20/03/2020 16:33, Marc Zyngier wrote:

JFYI, I've been playing for "perf annotate" today and it's giving
strange results for my NVMe testing. So "report" looks somewhat sane,
if not a worryingly high % for arm_smmu_cmdq_issue_cmdlist():


    55.39%  irq/342-nvme0q1  [kernel.kallsyms]  [k] 
arm_smmu_cmdq_issue_cmdlist
 9.74%  irq/342-nvme0q1  [kernel.kallsyms]  [k] 
_raw_spin_unlock_irqrestore

 2.02%  irq/342-nvme0q1  [kernel.kallsyms]  [k] nvme_irq
 1.86%  irq/342-nvme0q1  [kernel.kallsyms]  [k] fput_many
 1.73%  irq/342-nvme0q1  [kernel.kallsyms]  [k]
arm_smmu_atc_inv_domain.constprop.42
 1.67%  irq/342-nvme0q1  [kernel.kallsyms]  [k] __arm_lpae_unmap
 1.49%  irq/342-nvme0q1  [kernel.kallsyms]  [k] aio_complete_rw

But "annotate" consistently tells me that a specific instruction
consumes ~99% of the load for the enqueue function:

 :  /* 5. If we are inserting a CMD_SYNC,
we must wait for it to complete */
 :  if (sync) {
    0.00 :   80001071c948:   ldr w0, [x29, #108]
 :  int ret = 0;
    0.00 :   80001071c94c:   mov w24, #0x0  // #0
 :  if (sync) {
    0.00 :   80001071c950:   cbnz    w0, 80001071c990

 :  arch_local_irq_restore():
    0.00 :   80001071c954:   msr daif, x21
 :  arm_smmu_cmdq_issue_cmdlist():
 :  }
 :  }
 :
 :  local_irq_restore(flags);
 :  return ret;
 :  }
   99.51 :   80001071c958:   adrp    x0, 800011909000





Hi Marc,

This is likely the side effect of the re-enabling of interrupts (msr 
daif, x21)
on the previous instruction which causes the perf interrupt to fire 
right after.


ok, makes sense.



Time to enable pseudo-NMIs in the PMUv3 driver...



Do you know if there is any plan for this?


There was. Julien Thierry has a bunch of patches for that [1], but they 
needs

reviving.



In the meantime, maybe I can do some trickery by putting the
local_irq_restore() in a separate function, outside
arm_smmu_cmdq_issue_cmdlist(), to get a fair profile for that same
function.


I don't see how you can improve the profiling without compromising
the locking in this case...

Thanks,

M.

[1] https://patchwork.kernel.org/cover/11047407/
--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: arm-smmu-v3 high cpu usage for NVMe

2020-03-20 Thread Marc Zyngier

Hi John,

On 2020-03-20 16:20, John Garry wrote:




I've run a bunch of netperf instances on multiple cores and 
collecting

SMMU usage (on TaiShan 2280). I'm getting the following ratio pretty
consistently.

- 6.07% arm_smmu_iotlb_sync
 - 5.74% arm_smmu_tlb_inv_range
  5.09% arm_smmu_cmdq_issue_cmdlist
  0.28% __pi_memset
  0.08% __pi_memcpy
  0.08% arm_smmu_atc_inv_domain.constprop.37
  0.07% arm_smmu_cmdq_build_cmd
  0.01% arm_smmu_cmdq_batch_add
   0.31% __pi_memset

So arm_smmu_atc_inv_domain() takes about 1.4% of 
arm_smmu_iotlb_sync(),
when ATS is not used. According to the annotations, the load from 
the
atomic_read(), that checks whether the domain uses ATS, is 77% of 
the
samples in arm_smmu_atc_inv_domain() (265 of 345 samples), so I'm 
not sure

there is much room for optimization there.


Well I did originally suggest using RCU protection to scan the list 
of
devices, instead of reading an atomic and checking for non-zero 
value. But
that would be an optimsation for ATS also, and there was no ATS 
devices at

the time (to verify performance).


Heh, I have yet to get my hands on one. Currently I can't evaluate ATS
performance, but I agree that using RCU to scan the list should get 
better

results when using ATS.

When ATS isn't in use however, I suspect reading nr_ats_masters should 
be
more efficient than taking the RCU lock + reading an "ats_devices" 
list

(since the smmu_domain->devices list also serves context descriptor
invalidation, even when ATS isn't in use). I'll run some tests 
however, to

see if I can micro-optimize this case, but I don't expect noticeable
improvements.


ok, cheers. I, too, would not expect a significant improvement there.

JFYI, I've been playing for "perf annotate" today and it's giving
strange results for my NVMe testing. So "report" looks somewhat sane,
if not a worryingly high % for arm_smmu_cmdq_issue_cmdlist():


55.39%  irq/342-nvme0q1  [kernel.kallsyms]  [k] 
arm_smmu_cmdq_issue_cmdlist
 9.74%  irq/342-nvme0q1  [kernel.kallsyms]  [k] 
_raw_spin_unlock_irqrestore

 2.02%  irq/342-nvme0q1  [kernel.kallsyms]  [k] nvme_irq
 1.86%  irq/342-nvme0q1  [kernel.kallsyms]  [k] fput_many
 1.73%  irq/342-nvme0q1  [kernel.kallsyms]  [k]
arm_smmu_atc_inv_domain.constprop.42
 1.67%  irq/342-nvme0q1  [kernel.kallsyms]  [k] __arm_lpae_unmap
 1.49%  irq/342-nvme0q1  [kernel.kallsyms]  [k] aio_complete_rw

But "annotate" consistently tells me that a specific instruction
consumes ~99% of the load for the enqueue function:

 :  /* 5. If we are inserting a CMD_SYNC,
we must wait for it to complete */
 :  if (sync) {
0.00 :   80001071c948:   ldr w0, [x29, #108]
 :  int ret = 0;
0.00 :   80001071c94c:   mov w24, #0x0  // #0
 :  if (sync) {
0.00 :   80001071c950:   cbnzw0, 80001071c990

 :  arch_local_irq_restore():
0.00 :   80001071c954:   msr daif, x21
 :  arm_smmu_cmdq_issue_cmdlist():
 :  }
 :  }
 :
 :  local_irq_restore(flags);
 :  return ret;
 :  }
   99.51 :   80001071c958:   adrpx0, 800011909000



This is likely the side effect of the re-enabling of interrupts (msr 
daif, x21)
on the previous instruction which causes the perf interrupt to fire 
right after.


Time to enable pseudo-NMIs in the PMUv3 driver...

 M.
--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/rockchip: Don't provoke WARN for harmless IRQs

2019-11-12 Thread Marc Zyngier

On 2019-11-11 20:04, Robin Murphy wrote:
Although we don't generally expect IRQs to fire for a suspended 
IOMMU,

there are certain situations (particularly with debug options) where
we might legitimately end up with the pm_runtime_get_if_in_use() call
from rk_iommu_irq() returning 0. Since this doesn't represent an 
actual

error, follow the other parts of the driver and save the WARN_ON()
condition for a genuine negative value. Even if we do have spurious
IRQs due to a wedged VOP asserting the shared line, it's not this
driver's job to try to second-guess the IRQ core to warn about that.

Reported-by: Vasily Khoruzhick 
Signed-off-by: Robin Murphy 
---
 drivers/iommu/rockchip-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/rockchip-iommu.c 
b/drivers/iommu/rockchip-iommu.c

index 4dcbf68dfda4..bd7e9b1e40ac 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -527,7 +527,7 @@ static irqreturn_t rk_iommu_irq(int irq, void 
*dev_id)

int i, err;

err = pm_runtime_get_if_in_use(iommu->dev);
-   if (WARN_ON_ONCE(err <= 0))
+   if (!err || WARN_ON_ONCE(err < 0))
return ret;

if (WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks)))


Acked-by: Marc Zyngier 

M.
--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/dma: Handle MSI mappings separately

2019-07-29 Thread Marc Zyngier

On 2019-07-29 16:32, Robin Murphy wrote:
MSI pages must always be mapped into a device's *current* domain, 
which

*might* be the default DMA domain, but might instead be a VFIO domain
with its own MSI cookie. This subtlety got accidentally lost in the
streamlining of __iommu_dma_map(), but rather than reintroduce more
complexity and/or special-casing, it turns out neater to just split 
this

path out entirely.

Since iommu_dma_get_msi_page() already duplicates much of what
__iommu_dma_map() does, it can easily just make the allocation and
mapping calls directly as well. That way we can further streamline 
the

helper back to exclusively operating on DMA domains.

Fixes: b61d271e59d7 ("iommu/dma: Move domain lookup into
__iommu_dma_{map,unmap}")
Reported-by: Shameer Kolothum 
Reported-by: Andre Przywara 
Signed-off-by: Robin Murphy 


With this patch, my TX2 is back in business with a bnx2x device passed
to a guest. FWIW:

Tested-by: Marc Zyngier 

Thanks,

M.
--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 6/7] irqchip/gic-v3-mbi: Don't map the MSI page in mbi_compose_m{b, s}i_msg()

2019-05-03 Thread Marc Zyngier
On 01/05/2019 14:58, Julien Grall wrote:
> The functions mbi_compose_m{b, s}i_msg may be called from non-preemptible
> context. However, on RT, iommu_dma_map_msi_msg() requires to be called
> from a preemptible context.
> 
> A recent patch split iommu_dma_map_msi_msg in two new functions:
> one that should be called in preemptible context, the other does
> not have any requirement.
> 
> The GICv3 MSI driver is reworked to avoid executing preemptible code in
> non-preemptible context. This can be achieved by preparing the two MSI
> mappings when allocating the MSI interrupt.
> 
> Signed-off-by: Julien Grall 
> 
> ---
> Changes in v2:
> - Rework the commit message to use imperative mood
> ---
>  drivers/irqchip/irq-gic-v3-mbi.c | 15 +--
>  1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/irqchip/irq-gic-v3-mbi.c 
> b/drivers/irqchip/irq-gic-v3-mbi.c
> index fbfa7ff6deb1..d50f6cdf043c 100644
> --- a/drivers/irqchip/irq-gic-v3-mbi.c
> +++ b/drivers/irqchip/irq-gic-v3-mbi.c
> @@ -84,6 +84,7 @@ static void mbi_free_msi(struct mbi_range *mbi, unsigned 
> int hwirq,
>  static int mbi_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
>  unsigned int nr_irqs, void *args)
>  {
> + msi_alloc_info_t *info = args;
>   struct mbi_range *mbi = NULL;
>   int hwirq, offset, i, err = 0;
>  
> @@ -104,6 +105,16 @@ static int mbi_irq_domain_alloc(struct irq_domain 
> *domain, unsigned int virq,
>  
>   hwirq = mbi->spi_start + offset;
>  
> + err = iommu_dma_prepare_msi(info->desc,
> + mbi_phys_base + GICD_CLRSPI_NSR);
> + if (err)
> + return err;
> +
> + err = iommu_dma_prepare_msi(info->desc,
> + mbi_phys_base + GICD_SETSPI_NSR);
> + if (err)
> + return err;

Nit here: The two registers are guaranteed to be in the same 4k page
(respectively offsets 0x48 and 0x40), so the second call is at best
useless. I'll fix it when applying it, no need to resend.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 4/7] irqchip/gic-v3-its: Don't map the MSI page in its_irq_compose_msi_msg()

2019-05-01 Thread Marc Zyngier
On 01/05/2019 12:14, Julien Grall wrote:
> On 30/04/2019 13:34, Auger Eric wrote:
>> Hi Julien,
> 
> Hi Eric,
> 
> Thank you for the review!
> 
>>
>> On 4/29/19 4:44 PM, Julien Grall wrote:
>>> its_irq_compose_msi_msg() may be called from non-preemptible context.
>>> However, on RT, iommu_dma_map_msi_msg requires to be called from a
>>> preemptible context.
>>>
>>> A recent change split iommu_dma_map_msi_msg() in two new functions:
>>> one that should be called in preemptible context, the other does
>>> not have any requirement.
>>>
>>> The GICv3 ITS driver is reworked to avoid executing preemptible code in
>>> non-preemptible context. This can be achieved by preparing the MSI
>>> maping when allocating the MSI interrupt.
>> mapping
>>>
>>> Signed-off-by: Julien Grall 
>>>
>>> ---
>>>  Changes in v2:
>>>  - Rework the commit message to use imperative mood
>>> ---
>>>   drivers/irqchip/irq-gic-v3-its.c | 5 -
>>>   1 file changed, 4 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/irqchip/irq-gic-v3-its.c 
>>> b/drivers/irqchip/irq-gic-v3-its.c
>>> index 7577755bdcf4..12ddbcfe1b1e 100644
>>> --- a/drivers/irqchip/irq-gic-v3-its.c
>>> +++ b/drivers/irqchip/irq-gic-v3-its.c
>>> @@ -1179,7 +1179,7 @@ static void its_irq_compose_msi_msg(struct irq_data 
>>> *d, struct msi_msg *msg)
>>> msg->address_hi = upper_32_bits(addr);
>>> msg->data   = its_get_event_id(d);
>>>   
>>> -   iommu_dma_map_msi_msg(d->irq, msg);
>>> +   iommu_dma_compose_msi_msg(irq_data_get_msi_desc(d), msg);
>>>   }
>>>   
>>>   static int its_irq_set_irqchip_state(struct irq_data *d,
>>> @@ -2566,6 +2566,7 @@ static int its_irq_domain_alloc(struct irq_domain 
>>> *domain, unsigned int virq,
>>>   {
>>> msi_alloc_info_t *info = args;
>>> struct its_device *its_dev = info->scratchpad[0].ptr;
>>> +   struct its_node *its = its_dev->its;
>>> irq_hw_number_t hwirq;
>>> int err;
>>> int i;
>>> @@ -2574,6 +2575,8 @@ static int its_irq_domain_alloc(struct irq_domain 
>>> *domain, unsigned int virq,
>>> if (err)
>>> return err;
>>>   
>>> +   err = iommu_dma_prepare_msi(info->desc, its->get_msi_base(its_dev));
>> Test err as in gicv2m driver?
> 
> Hmmm yes. Marc, do you want me to respin the patch?

Sure, feel free to if you can. But what I really need is an Ack from
Jorg on the first few patches.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 0/7] iommu/dma-iommu: Split iommu_dma_map_msi_msg in two parts

2019-04-29 Thread Marc Zyngier
Hi Julien,

On 29/04/2019 15:44, Julien Grall wrote:
> Hi all,
> 
> On RT, the function iommu_dma_map_msi_msg expects to be called from 
> preemptible
> context. However, this is not always the case resulting a splat with
> !CONFIG_DEBUG_ATOMIC_SLEEP:
> 
> [   48.875777] BUG: sleeping function called from invalid context at 
> kernel/locking/rtmutex.c:974
> [   48.875779] in_atomic(): 1, irqs_disabled(): 128, pid: 2103, name: ip
> [   48.875782] INFO: lockdep is turned off.
> [   48.875784] irq event stamp: 10684
> [   48.875786] hardirqs last  enabled at (10683): [] 
> _raw_spin_unlock_irqrestore+0x88/0x90
> [   48.875791] hardirqs last disabled at (10684): [] 
> _raw_spin_lock_irqsave+0x24/0x68
> [   48.875796] softirqs last  enabled at (0): [] 
> copy_process.isra.1.part.2+0x8d8/0x1970
> [   48.875801] softirqs last disabled at (0): [<>]   
> (null)
> [   48.875805] Preemption disabled at:
> [   48.875805] [] __setup_irq+0xd8/0x6c0
> [   48.875811] CPU: 2 PID: 2103 Comm: ip Not tainted 
> 5.0.3-rt1-7-g42ede9a0fed6 #45
> [   48.875815] Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno 
> Development Platform, BIOS EDK II Jan 23 2017
> [   48.875817] Call trace:
> [   48.875818]  dump_backtrace+0x0/0x140
> [   48.875821]  show_stack+0x14/0x20
> [   48.875823]  dump_stack+0xa0/0xd4
> [   48.875827]  ___might_sleep+0x16c/0x1f8
> [   48.875831]  rt_spin_lock+0x5c/0x70
> [   48.875835]  iommu_dma_map_msi_msg+0x5c/0x1d8
> [   48.875839]  gicv2m_compose_msi_msg+0x3c/0x48
> [   48.875843]  irq_chip_compose_msi_msg+0x40/0x58
> [   48.875846]  msi_domain_activate+0x38/0x98
> [   48.875849]  __irq_domain_activate_irq+0x58/0xa0
> [   48.875852]  irq_domain_activate_irq+0x34/0x58
> [   48.875855]  irq_activate+0x28/0x30
> [   48.875858]  __setup_irq+0x2b0/0x6c0
> [   48.875861]  request_threaded_irq+0xdc/0x188
> [   48.875865]  sky2_setup_irq+0x44/0xf8
> [   48.875868]  sky2_open+0x1a4/0x240
> [   48.875871]  __dev_open+0xd8/0x188
> [   48.875874]  __dev_change_flags+0x164/0x1f0
> [   48.875877]  dev_change_flags+0x20/0x60
> [   48.875879]  do_setlink+0x2a0/0xd30
> [   48.875882]  __rtnl_newlink+0x5b4/0x6d8
> [   48.875885]  rtnl_newlink+0x50/0x78
> [   48.875888]  rtnetlink_rcv_msg+0x178/0x640
> [   48.875891]  netlink_rcv_skb+0x58/0x118
> [   48.875893]  rtnetlink_rcv+0x14/0x20
> [   48.875896]  netlink_unicast+0x188/0x200
> [   48.875898]  netlink_sendmsg+0x248/0x3d8
> [   48.875900]  sock_sendmsg+0x18/0x40
> [   48.875904]  ___sys_sendmsg+0x294/0x2d0
> [   48.875908]  __sys_sendmsg+0x68/0xb8
> [   48.875911]  __arm64_sys_sendmsg+0x20/0x28
> [   48.875914]  el0_svc_common+0x90/0x118
> [   48.875918]  el0_svc_handler+0x2c/0x80
> [   48.875922]  el0_svc+0x8/0xc
> 
> This series is a first attempt to rework how MSI are mapped and composed
> when an IOMMU is present.
> 
> I was able to test the changes in GICv2m and GICv3 ITS. I don't have
> hardware for the other interrupt controllers.
> 
> Cheers,
> 
> Julien Grall (7):
>   genirq/msi: Add a new field in msi_desc to store an IOMMU cookie
>   iommu/dma-iommu: Split iommu_dma_map_msi_msg() in two parts
>   irqchip/gicv2m: Don't map the MSI page in gicv2m_compose_msi_msg()
>   irqchip/gic-v3-its: Don't map the MSI page in
> its_irq_compose_msi_msg()
>   irqchip/ls-scfg-msi: Don't map the MSI page in
> ls_scfg_msi_compose_msg()
>   irqchip/gic-v3-mbi: Don't map the MSI page in mbi_compose_m{b,
> s}i_msg()
>   iommu/dma-iommu: Remove iommu_dma_map_msi_msg()
> 
>  drivers/iommu/Kconfig |  1 +
>  drivers/iommu/dma-iommu.c | 49 
> +++
>  drivers/irqchip/irq-gic-v2m.c |  8 ++-
>  drivers/irqchip/irq-gic-v3-its.c  |  5 +++-
>  drivers/irqchip/irq-gic-v3-mbi.c  | 15 ++--
>  drivers/irqchip/irq-ls-scfg-msi.c |  7 +-
>  include/linux/dma-iommu.h | 22 --
>  include/linux/msi.h   | 26 +
>  kernel/irq/Kconfig|  3 +++
>  9 files changed, 109 insertions(+), 27 deletions(-)

Thanks for having reworked this. I'm quite happy with the way this looks
now (modulo the couple of nits Robin and I mentioned, which I'm to
address myself).

Jorg: are you OK with this going via the irq tree?

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 2/7] iommu/dma-iommu: Split iommu_dma_map_msi_msg() in two parts

2019-04-29 Thread Marc Zyngier
On 29/04/2019 15:44, Julien Grall wrote:
> On RT, iommu_dma_map_msi_msg() may be called from non-preemptible
> context. This will lead to a splat with CONFIG_DEBUG_ATOMIC_SLEEP as
> the function is using spin_lock (they can sleep on RT).
> 
> iommu_dma_map_msi_msg() is used to map the MSI page in the IOMMU PT
> and update the MSI message with the IOVA.
> 
> Only the part to lookup for the MSI page requires to be called in
> preemptible context. As the MSI page cannot change over the lifecycle
> of the MSI interrupt, the lookup can be cached and re-used later on.
> 
> iomma_dma_map_msi_msg() is now split in two functions:
> - iommu_dma_prepare_msi(): This function will prepare the mapping
> in the IOMMU and store the cookie in the structure msi_desc. This
> function should be called in preemptible context.
> - iommu_dma_compose_msi_msg(): This function will update the MSI
> message with the IOVA when the device is behind an IOMMU.
> 
> Signed-off-by: Julien Grall 
> 
> ---
> Changes in v2:
> - Rework the commit message to use imperative mood
> - Use the MSI accessor to get/set the iommu cookie
> - Don't use ternary on return
> - Select CONFIG_IRQ_MSI_IOMMU
> - Pass an msi_desc rather than the irq number
> ---
>  drivers/iommu/Kconfig |  1 +
>  drivers/iommu/dma-iommu.c | 47 
> ++-
>  include/linux/dma-iommu.h | 23 +++
>  3 files changed, 62 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index 6f07f3b21816..eb1c8cd243f9 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -94,6 +94,7 @@ config IOMMU_DMA
>   bool
>   select IOMMU_API
>   select IOMMU_IOVA
> + select IRQ_MSI_IOMMU
>   select NEED_SG_DMA_LENGTH
>  
>  config FSL_PAMU
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 77aabe637a60..2309f59cefa4 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -888,17 +888,18 @@ static struct iommu_dma_msi_page 
> *iommu_dma_get_msi_page(struct device *dev,
>   return NULL;
>  }
>  
> -void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
> +int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
>  {
> - struct device *dev = msi_desc_to_dev(irq_get_msi_desc(irq));
> + struct device *dev = msi_desc_to_dev(desc);
>   struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
>   struct iommu_dma_cookie *cookie;
>   struct iommu_dma_msi_page *msi_page;
> - phys_addr_t msi_addr = (u64)msg->address_hi << 32 | msg->address_lo;
>   unsigned long flags;
>  
> - if (!domain || !domain->iova_cookie)
> - return;
> + if (!domain || !domain->iova_cookie) {
> + desc->iommu_cookie = NULL;

nit: This could be

msi_desc_set_iommu_cookie(desc, NULL);

now that you have introduced the relevant accessors.

> + return 0;
> + }
>  
>   cookie = domain->iova_cookie;
>  
> @@ -911,7 +912,37 @@ void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
>   msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
>   spin_unlock_irqrestore(>msi_lock, flags);
>  
> - if (WARN_ON(!msi_page)) {
> + msi_desc_set_iommu_cookie(desc, msi_page);
> +
> + if (!msi_page)
> + return -ENOMEM;
> + else
> + return 0;
> +}
> +
> +void iommu_dma_compose_msi_msg(struct msi_desc *desc,
> +struct msi_msg *msg)
> +{
> + struct device *dev = msi_desc_to_dev(desc);
> + const struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> + const struct iommu_dma_msi_page *msi_page;
> +
> + msi_page = msi_desc_get_iommu_cookie(desc);
> +
> + if (!domain || !domain->iova_cookie || WARN_ON(!msi_page))
> + return;
> +
> + msg->address_hi = upper_32_bits(msi_page->iova);
> + msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1;
> + msg->address_lo += lower_32_bits(msi_page->iova);
> +}
> +
> +void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
> +{
> + struct msi_desc *desc = irq_get_msi_desc(irq);
> + phys_addr_t msi_addr = (u64)msg->address_hi << 32 | msg->address_lo;
> +
> + if (WARN_ON(iommu_dma_prepare_msi(desc, msi_addr))) {
>   /*
>* We're called from a void callback, so the best we can do is
>* 'fail' by filling the message with obviously bogus values.
> @@ -922,8 +953,6 @@ void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
>   msg->address_lo = ~0U;
>   msg->data = ~0U;
>   } else {
> - msg->address_hi = upper_32_bits(msi_page->iova);
> - msg->address_lo &= cookie_msi_granule(cookie) - 1;
> - msg->address_lo += lower_32_bits(msi_page->iova);
> + iommu_dma_compose_msi_msg(desc, msg);
>   }
>  }
> 

Re: [PATCH 1/7] genirq/msi: Add a new field in msi_desc to store an IOMMU cookie

2019-04-23 Thread Marc Zyngier
On 23/04/2019 14:19, Robin Murphy wrote:
> On 23/04/2019 12:46, Marc Zyngier wrote:
>> On 23/04/2019 11:51, Julien Grall wrote:
>>> On 4/23/19 11:23 AM, Marc Zyngier wrote:
>>>> Hi Julien,
>>>
>>> Hi Marc,
>>>
>>>> On 18/04/2019 18:26, Julien Grall wrote:
>>>>> When an MSI doorbell is located downstream of an IOMMU, it is required
>>>>> to swizzle the physical address with an appropriately-mapped IOVA for any
>>>>> device attached to one of our DMA ops domain.
>>>>>
>>>>> At the moment, the allocation of the mapping may be done when composing
>>>>> the message. However, the composing may be done in non-preemtible
>>>>> context while the allocation requires to be called from preemptible
>>>>> context.
>>>>>
>>>>> A follow-up patch will split the current logic in two functions
>>>>> requiring to keep an IOMMU cookie per MSI.
>>>>>
>>>>> This patch introduces a new field in msi_desc to store an IOMMU cookie
>>>>> when CONFIG_IOMMU_DMA is selected.
>>>>>
>>>>> Signed-off-by: Julien Grall 
>>>>> ---
>>>>>include/linux/msi.h | 3 +++
>>>>>1 file changed, 3 insertions(+)
>>>>>
>>>>> diff --git a/include/linux/msi.h b/include/linux/msi.h
>>>>> index 7e9b81c3b50d..d7907feef1bb 100644
>>>>> --- a/include/linux/msi.h
>>>>> +++ b/include/linux/msi.h
>>>>> @@ -77,6 +77,9 @@ struct msi_desc {
>>>>>   struct device   *dev;
>>>>>   struct msi_msg  msg;
>>>>>   struct irq_affinity_desc*affinity;
>>>>> +#ifdef CONFIG_IOMMU_DMA
>>>>> + const void  *iommu_cookie;
>>>>> +#endif
>>>>>
>>>>>   union {
>>>>>   /* PCI MSI/X specific data */
>>>>>
>>>>
>>>> Given that this is the only member in this structure that is dependent
>>>> on a config option, you could also add a couple of accessors that would
>>>> do nothing when IOMMU_DMA is not selected (and use that in the DMA code).
>>>
>>> I haven't seen any use of the helpers so far because the DMA code is
>>> also protected by IOMMU_DMA.
>>>
>>> I can add the helpers in the next version if you see any use outside of
>>> the DMA code.
>>
>> There may not be any user user yet, but I'd surely like to see the
>> accessors. This isn't very different from the stub functions you add in
>> patch #2.
> 
> If you foresee this being useful in general, do you reckon it would be 
> worth decoupling it under its own irqchip-layer Kconfig which can then 
> be selected by IOMMU_DMA?

I think that'd be a useful thing to do, as most architectures do not
require this dynamic mapping of MSIs.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/7] genirq/msi: Add a new field in msi_desc to store an IOMMU cookie

2019-04-23 Thread Marc Zyngier
On 23/04/2019 11:51, Julien Grall wrote:
> On 4/23/19 11:23 AM, Marc Zyngier wrote:
>> Hi Julien,
> 
> Hi Marc,
> 
>> On 18/04/2019 18:26, Julien Grall wrote:
>>> When an MSI doorbell is located downstream of an IOMMU, it is required
>>> to swizzle the physical address with an appropriately-mapped IOVA for any
>>> device attached to one of our DMA ops domain.
>>>
>>> At the moment, the allocation of the mapping may be done when composing
>>> the message. However, the composing may be done in non-preemtible
>>> context while the allocation requires to be called from preemptible
>>> context.
>>>
>>> A follow-up patch will split the current logic in two functions
>>> requiring to keep an IOMMU cookie per MSI.
>>>
>>> This patch introduces a new field in msi_desc to store an IOMMU cookie
>>> when CONFIG_IOMMU_DMA is selected.
>>>
>>> Signed-off-by: Julien Grall 
>>> ---
>>>   include/linux/msi.h | 3 +++
>>>   1 file changed, 3 insertions(+)
>>>
>>> diff --git a/include/linux/msi.h b/include/linux/msi.h
>>> index 7e9b81c3b50d..d7907feef1bb 100644
>>> --- a/include/linux/msi.h
>>> +++ b/include/linux/msi.h
>>> @@ -77,6 +77,9 @@ struct msi_desc {
>>> struct device   *dev;
>>> struct msi_msg  msg;
>>> struct irq_affinity_desc*affinity;
>>> +#ifdef CONFIG_IOMMU_DMA
>>> +   const void  *iommu_cookie;
>>> +#endif
>>>   
>>> union {
>>> /* PCI MSI/X specific data */
>>>
>>
>> Given that this is the only member in this structure that is dependent
>> on a config option, you could also add a couple of accessors that would
>> do nothing when IOMMU_DMA is not selected (and use that in the DMA code).
> 
> I haven't seen any use of the helpers so far because the DMA code is 
> also protected by IOMMU_DMA.
> 
> I can add the helpers in the next version if you see any use outside of 
> the DMA code.

There may not be any user user yet, but I'd surely like to see the
accessors. This isn't very different from the stub functions you add in
patch #2.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/7] genirq/msi: Add a new field in msi_desc to store an IOMMU cookie

2019-04-23 Thread Marc Zyngier
Hi Julien,

On 18/04/2019 18:26, Julien Grall wrote:
> When an MSI doorbell is located downstream of an IOMMU, it is required
> to swizzle the physical address with an appropriately-mapped IOVA for any
> device attached to one of our DMA ops domain.
> 
> At the moment, the allocation of the mapping may be done when composing
> the message. However, the composing may be done in non-preemtible
> context while the allocation requires to be called from preemptible
> context.
> 
> A follow-up patch will split the current logic in two functions
> requiring to keep an IOMMU cookie per MSI.
> 
> This patch introduces a new field in msi_desc to store an IOMMU cookie
> when CONFIG_IOMMU_DMA is selected.
> 
> Signed-off-by: Julien Grall 
> ---
>  include/linux/msi.h | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/include/linux/msi.h b/include/linux/msi.h
> index 7e9b81c3b50d..d7907feef1bb 100644
> --- a/include/linux/msi.h
> +++ b/include/linux/msi.h
> @@ -77,6 +77,9 @@ struct msi_desc {
>   struct device   *dev;
>   struct msi_msg  msg;
>   struct irq_affinity_desc*affinity;
> +#ifdef CONFIG_IOMMU_DMA
> + const void  *iommu_cookie;
> +#endif
>  
>   union {
>   /* PCI MSI/X specific data */
> 

Given that this is the only member in this structure that is dependent
on a config option, you could also add a couple of accessors that would
do nothing when IOMMU_DMA is not selected (and use that in the DMA code).

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v6 09/22] vfio: VFIO_IOMMU_BIND/UNBIND_MSI

2019-04-10 Thread Marc Zyngier
Hi Vincent,

On 10/04/2019 13:35, Vincent Stehlé wrote:
> On Thu, Apr 04, 2019 at 08:55:25AM +0200, Auger Eric wrote:
>> Hi Marc, Robin, Alex,
> (..)
>> Do you think this is a reasonable assumption to consider devices within
>> the same host iommu group share the same MSI doorbell?
> 
> Hi Eric,
> 
> I am not sure this assumption always hold.
> 
> Marc, Robin and Alex can correct me, but for example I think the following
> topology is valid for Arm systems:
> 
>  ++  ++
>  | Endpoint A |  | Endpoint B |
>  ++  ++
> v v
>   /-\
>  |  Non-ACS  |
>  |  Switch   |
>   \-/
>v
>+---+
>| PCIe  |
>| Root Complex  |
>+---+
>v
>  +---+
>  |   SMMU|
>  +---+
>v
>   +--+
>   |   System interconnect|
>   +--+
> v  v
>   +---+  +---+
>   |   ITS A   |  |   ITS B   |
>   +---+  +---+
> 
> All PCIe Endpoints and ITS could be in the same ITS Group 0, meaning
> devices could send their MSI at any ITS in hardware.
> 
> For Linux the two PCIe Endpoints would be in the same iommu group, because
> the switch in this example does not support ACS.
> 
> I think the devicetree msi-map property could be used to "map" the RID of
> Endpoint A to ITS A and the RID of Endpoint B to ITS B, which would violate
> the assumption.
> 
> See the monolithic example in [1], the example system in [2], appendices
> D, E and F in [3] and the msi-map property in [4].

I think we are all in agreement that this is a possible topology. It is
just that it doesn't exist in any real-life implementation we know of
(the ITS tends to be close to the RC and not downstream of the
interconnect).

Given the complexity of what we're trying to put together, I'd rather
start with a small step which supports commonly implemented topology,
and later address the odd ones if they actually crop up.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH] iommu: rockchip: Drop verbose prints in the interrupt handler

2018-09-27 Thread Marc Zyngier

On 26/09/18 18:31, Ezequiel Garcia wrote:

On Tue, 2018-09-25 at 13:29 +0200, Joerg Roedel wrote:

On Thu, Aug 30, 2018 at 07:28:32PM -0300, Ezequiel Garcia wrote:

Printing verbosely via WARN macros and friends in interrupt handlers
is strongly discouraged. Drop them and use proper ratelimited
prints.

Signed-off-by: Ezequiel Garcia 
---
  drivers/iommu/rockchip-iommu.c | 10 ++
  1 file changed, 6 insertions(+), 4 deletions(-)


This doesn't apply cleanly to v4.19-rc5, can you please base the patch
on that tag, add the Reviewed-by and re-send?



Well, after looking at the latest code, now I'm wondering why Marc
changed the WARN_ON with a WARN_ON_ONCE, instead of a less verbose
ratelimited print.


WARN_ONCE does it exactly once. A rate-limited printk will still spit as 
many messages as you want.


Thanks,

M.
--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/rockchip: Free irqs in shutdown handler

2018-09-20 Thread Marc Zyngier
On Mon, 27 Aug 2018 11:56:24 +0100,
Heiko Stuebner  wrote:
> 
> In the iommu's shutdown handler we disable runtime-pm which could
> result in the irq-handler running unclocked and since commit
> 3fc7c5c0cff3 ("iommu/rockchip: Handle errors returned from PM framework")
> we warn about that fact.
> 
> This can cause warnings on shutdown on some Rockchip machines, so
> free the irqs in the shutdown handler before we disable runtime-pm.
> 
> Reported-by: Enric Balletbo i Serra 
> Fixes: 3fc7c5c0cff3 ("iommu/rockchip: Handle errors returned from PM 
> framework")
> Signed-off-by: Heiko Stuebner 
> ---
>  drivers/iommu/rockchip-iommu.c | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
> index 258115b10fa9..ad3e2b97469e 100644
> --- a/drivers/iommu/rockchip-iommu.c
> +++ b/drivers/iommu/rockchip-iommu.c
> @@ -1241,6 +1241,12 @@ static int rk_iommu_probe(struct platform_device *pdev)
>  
>  static void rk_iommu_shutdown(struct platform_device *pdev)
>  {
> + struct rk_iommu *iommu = platform_get_drvdata(pdev);
> + int i = 0, irq;
> +
> + while ((irq = platform_get_irq(pdev, i++)) != -ENXIO)
> + devm_free_irq(iommu->dev, irq, iommu);
> +
>   pm_runtime_force_suspend(>dev);
>  }
>  
> -- 
> 2.17.0
> 

Acked-by: Marc Zyngier 

M.

-- 
Jazz is not dead, it just smell funny.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/rockchip: Free irqs in shutdown handler

2018-09-11 Thread Marc Zyngier

Hi Heiko,

On 27/08/18 11:56, Heiko Stuebner wrote:

In the iommu's shutdown handler we disable runtime-pm which could
result in the irq-handler running unclocked and since commit
3fc7c5c0cff3 ("iommu/rockchip: Handle errors returned from PM framework")
we warn about that fact.

This can cause warnings on shutdown on some Rockchip machines, so
free the irqs in the shutdown handler before we disable runtime-pm.

Reported-by: Enric Balletbo i Serra 
Fixes: 3fc7c5c0cff3 ("iommu/rockchip: Handle errors returned from PM framework")
Signed-off-by: Heiko Stuebner 
---
 drivers/iommu/rockchip-iommu.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 258115b10fa9..ad3e2b97469e 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1241,6 +1241,12 @@ static int rk_iommu_probe(struct platform_device *pdev)
 
 static void rk_iommu_shutdown(struct platform_device *pdev)

 {
+   struct rk_iommu *iommu = platform_get_drvdata(pdev);
+   int i = 0, irq;
+
+   while ((irq = platform_get_irq(pdev, i++)) != -ENXIO)
+   devm_free_irq(iommu->dev, irq, iommu);
+
pm_runtime_force_suspend(>dev);
 }
 



Looks OK to me. I don't think there is a point in using devm_irq* 
anymore in this driver, but that's a separate issue.


Acked-by: Marc Zyngier 

M.
--
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 4/4] iommu/rockchip: Move irq request past pm_runtime_enable

2018-08-24 Thread Marc Zyngier
Enabling the interrupt early, before power has been applied to the
device, can result in an interrupt being delivered too early if:

- the IOMMU shares an interrupt with a VOP
- the VOP has a pending interrupt (after a kexec, for example)

In these conditions, we end-up taking the interrupt without
the IOMMU being ready to handle the interrupt (not powered on).

Moving the interrupt request past the pm_runtime_enable() call
makes sure we can at least access the IOMMU registers. Note that
this is only a partial fix, and that the VOP interrupt will still
be screaming until the VOP driver kicks in, which advocates for
a more synchronized interrupt enabling/disabling approach.

Fixes: 0f181d3cf7d98 ("iommu/rockchip: Add runtime PM support")
Reviewed-by: Heiko Stuebner 
Signed-off-by: Marc Zyngier 
---
 drivers/iommu/rockchip-iommu.c | 24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 4e0f9b61cd7f..2b1724e8d307 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1161,17 +1161,6 @@ static int rk_iommu_probe(struct platform_device *pdev)
if (iommu->num_mmu == 0)
return PTR_ERR(iommu->bases[0]);
 
-   i = 0;
-   while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) {
-   if (irq < 0)
-   return irq;
-
-   err = devm_request_irq(iommu->dev, irq, rk_iommu_irq,
-  IRQF_SHARED, dev_name(dev), iommu);
-   if (err)
-   return err;
-   }
-
iommu->reset_disabled = device_property_read_bool(dev,
"rockchip,disable-mmu-reset");
 
@@ -1228,6 +1217,19 @@ static int rk_iommu_probe(struct platform_device *pdev)
 
pm_runtime_enable(dev);
 
+   i = 0;
+   while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) {
+   if (irq < 0)
+   return irq;
+
+   err = devm_request_irq(iommu->dev, irq, rk_iommu_irq,
+  IRQF_SHARED, dev_name(dev), iommu);
+   if (err) {
+   pm_runtime_disable(dev);
+   goto err_remove_sysfs;
+   }
+   }
+
return 0;
 err_remove_sysfs:
iommu_device_sysfs_remove(>iommu);
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 2/4] arm64: rockchip: Force CONFIG_PM on Rockchip systems

2018-08-24 Thread Marc Zyngier
A number of the Rockchip-specific drivers (IOMMU, display controllers)
are now assuming that CONFIG_PM is set, and may completely misbehave
if that's not the case.

Since there is hardly any reason for this configuration option not
to be selected anyway, let's require it (in the same way Tegra already
does).

Signed-off-by: Marc Zyngier 
---
 arch/arm64/Kconfig.platforms | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index d5aeac351fc3..21a715ad8222 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -151,6 +151,7 @@ config ARCH_ROCKCHIP
select GPIOLIB
select PINCTRL
select PINCTRL_ROCKCHIP
+   select PM
select ROCKCHIP_TIMER
help
  This enables support for the ARMv8 based Rockchip chipsets,
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 0/4] iommu/rockchip: Runtime PM fixes

2018-08-24 Thread Marc Zyngier
This small series addresses a couple of runtime PM issues I've spotted
while running 4.18 on a Chromebook Plus (kevin, rk3399) platform, and
specifically doing kexec.

In order to avoid making a complete mess of the IOMMU code, Heiko has
requested that all RK platforms would select CONFIG_PM, which
simplifies a lot of things. I've kept 32 and 64bit patches separate,
but feel free to squash them into on if that's more convenient.

Note that even with these patches, kexec is still fairly broken on
rk3399, as the VOP is never turned off (see [1] for a fix).

[1] https://www.spinics.net/lists/arm-kernel/msg670229.html

* From v1:
  - Collected RBs from Heiko
  - Added two patches forcing CONFIG_PM on all Rockchip platforms at
Heiko's request, following the example set by Tegra platforms.

Marc Zyngier (4):
  ARM: rockchip: Force CONFIG_PM on Rockchip systems
  arm64: rockchip: Force CONFIG_PM on Rockchip systems
  iommu/rockchip: Handle errors returned from PM framework
  iommu/rockchip: Move irq request past pm_runtime_enable

 arch/arm/mach-rockchip/Kconfig |  1 +
 arch/arm64/Kconfig.platforms   |  1 +
 drivers/iommu/rockchip-iommu.c | 45 +-
 3 files changed, 30 insertions(+), 17 deletions(-)

-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 3/4] iommu/rockchip: Handle errors returned from PM framework

2018-08-24 Thread Marc Zyngier
pm_runtime_get_if_in_use can fail: either PM has been disabled
altogether (-EINVAL), or the device hasn't been enabled yet (0).
Sadly, the Rockchip IOMMU driver tends to conflate the two things
by considering a non-zero return value as successful.

This has the consequence of hiding other bugs, so let's handle this
case throughout the driver, with a WARN_ON_ONCE so that we can try
and work out what happened.

Fixes: 0f181d3cf7d98 ("iommu/rockchip: Add runtime PM support")
Reviewed-by: Heiko Stuebner 
Signed-off-by: Marc Zyngier 
---
 drivers/iommu/rockchip-iommu.c | 21 +++--
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 054cd2c8e9c8..4e0f9b61cd7f 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -521,10 +521,11 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
u32 int_status;
dma_addr_t iova;
irqreturn_t ret = IRQ_NONE;
-   int i;
+   int i, err;
 
-   if (WARN_ON(!pm_runtime_get_if_in_use(iommu->dev)))
-   return 0;
+   err = pm_runtime_get_if_in_use(iommu->dev);
+   if (WARN_ON_ONCE(err <= 0))
+   return ret;
 
if (WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks)))
goto out;
@@ -620,11 +621,15 @@ static void rk_iommu_zap_iova(struct rk_iommu_domain 
*rk_domain,
spin_lock_irqsave(_domain->iommus_lock, flags);
list_for_each(pos, _domain->iommus) {
struct rk_iommu *iommu;
+   int ret;
 
iommu = list_entry(pos, struct rk_iommu, node);
 
/* Only zap TLBs of IOMMUs that are powered on. */
-   if (pm_runtime_get_if_in_use(iommu->dev)) {
+   ret = pm_runtime_get_if_in_use(iommu->dev);
+   if (WARN_ON_ONCE(ret < 0))
+   continue;
+   if (ret) {
WARN_ON(clk_bulk_enable(iommu->num_clocks,
iommu->clocks));
rk_iommu_zap_lines(iommu, iova, size);
@@ -891,6 +896,7 @@ static void rk_iommu_detach_device(struct iommu_domain 
*domain,
struct rk_iommu *iommu;
struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
unsigned long flags;
+   int ret;
 
/* Allow 'virtual devices' (eg drm) to detach from domain */
iommu = rk_iommu_from_dev(dev);
@@ -909,7 +915,9 @@ static void rk_iommu_detach_device(struct iommu_domain 
*domain,
list_del_init(>node);
spin_unlock_irqrestore(_domain->iommus_lock, flags);
 
-   if (pm_runtime_get_if_in_use(iommu->dev)) {
+   ret = pm_runtime_get_if_in_use(iommu->dev);
+   WARN_ON_ONCE(ret < 0);
+   if (ret > 0) {
rk_iommu_disable(iommu);
pm_runtime_put(iommu->dev);
}
@@ -946,7 +954,8 @@ static int rk_iommu_attach_device(struct iommu_domain 
*domain,
list_add_tail(>node, _domain->iommus);
spin_unlock_irqrestore(_domain->iommus_lock, flags);
 
-   if (!pm_runtime_get_if_in_use(iommu->dev))
+   ret = pm_runtime_get_if_in_use(iommu->dev);
+   if (!ret || WARN_ON_ONCE(ret < 0))
return 0;
 
ret = rk_iommu_enable(iommu);
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 1/4] ARM: rockchip: Force CONFIG_PM on Rockchip systems

2018-08-24 Thread Marc Zyngier
A number of the Rockchip-specific drivers (IOMMU, display controllers)
are now assuming that CONFIG_PM is set, and may completely misbehave
if that's not the case.

Since there is hardly any reason for this configuration option not
to be selected anyway, let's require it (in the same way Tegra already
does).

Signed-off-by: Marc Zyngier 
---
 arch/arm/mach-rockchip/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-rockchip/Kconfig b/arch/arm/mach-rockchip/Kconfig
index fafd3d7f9f8c..8ca926522026 100644
--- a/arch/arm/mach-rockchip/Kconfig
+++ b/arch/arm/mach-rockchip/Kconfig
@@ -17,6 +17,7 @@ config ARCH_ROCKCHIP
select ARM_GLOBAL_TIMER
select CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
select ZONE_DMA if ARM_LPAE
+   select PM
help
  Support for Rockchip's Cortex-A9 Single-to-Quad-Core-SoCs
  containing the RK2928, RK30xx and RK31xx series.
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/2] iommu/rockchip: Handle errors returned from PM framework

2018-08-07 Thread Marc Zyngier
On 07/08/18 14:15, Heiko Stuebner wrote:
> Am Dienstag, 7. August 2018, 14:31:49 CEST schrieb Marc Zyngier:
>> On 07/08/18 13:09, Heiko Stuebner wrote:
>>> Hi Marc,
>>>
>>> Am Dienstag, 7. August 2018, 10:54:05 CEST schrieb Marc Zyngier:
>>>> pm_runtime_get_if_in_use can fail: either PM has been disabled
>>>> altogether (-EINVAL), or the device hasn't been enabled yet (0).
>>>> Sadly, the Rockchip IOMMU driver tends to conflate the two things
>>>> by considering a non-zero return value as successful.
>>>>
>>>> This has the consequence of hiding other bugs, so let's handle this
>>>> case throughout the driver, with a WARN_ON_ONCE so that we can try
>>>> and work out what happened.
>>>>
>>>> Fixes: 0f181d3cf7d98 ("iommu/rockchip: Add runtime PM support")
>>>> Signed-off-by: Marc Zyngier 
>>>
>>> I'm still not sure about the !CONFIG_PM case, as it was probably silently
>>> working in that case before
>>
>> Do we agree that this is an orthogonal problem though?
> 
> Nope ;-)  I.e. right now the code ignores the -EINVAL from disabled PM
> and continues, possibly even handling the irq correctly.

Ah, I now see what you mean. Yeah, this is a bit rubbish. It would have
been better if the API returned something more sensible in that case,
but that's a bit late...

> If it actually worked is a different matter, as I guess nobody really tried
> with !PM in the past.

I don't think anyone noticed. !CONFIG_PM on something like rk3399
probably isn't very popular, and certainly comes for free on a
multiplatform kernel.

> Now with error-handling we always return IRQ_NONE for !PM.

Yup.

>>> But on the other hand we're also already running over it in other places
>>> like in the iommu-shutdown and I guess if someone _really_ disabled
>>> CONFIG_PM, a lot of additional stuff would fail anyway.
>>>
>>> So should we wrap that in some #ifdef magic, just ignore it or simply
>>> select PM similar to what Tegra, Renesas and Vexpress seem to do?
>>>
>>> I guess I like the 3rd option best ;-)
>>
>> It probably doesn't hurt. At what level do you want it? As a dependency
>> to the IOMMU? or to the platform?
> 
> I guess it might be best to go the Tegra, etc way. Whoever in their right
> mind would want to drive a mobile platform without any form for power
> management ;-) .
> 
> I can do these patches for arm32+arm64 myself ... I just wanted to put
> that thought out there - in case that was just a stupid idea of mine :-D .

Not stupid at all. Regarding this very patch: where do you want me to
take it?

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/2] iommu/rockchip: Handle errors returned from PM framework

2018-08-07 Thread Marc Zyngier
On 07/08/18 13:09, Heiko Stuebner wrote:
> Hi Marc,
> 
> Am Dienstag, 7. August 2018, 10:54:05 CEST schrieb Marc Zyngier:
>> pm_runtime_get_if_in_use can fail: either PM has been disabled
>> altogether (-EINVAL), or the device hasn't been enabled yet (0).
>> Sadly, the Rockchip IOMMU driver tends to conflate the two things
>> by considering a non-zero return value as successful.
>>
>> This has the consequence of hiding other bugs, so let's handle this
>> case throughout the driver, with a WARN_ON_ONCE so that we can try
>> and work out what happened.
>>
>> Fixes: 0f181d3cf7d98 ("iommu/rockchip: Add runtime PM support")
>> Signed-off-by: Marc Zyngier 
> 
> I'm still not sure about the !CONFIG_PM case, as it was probably silently
> working in that case before

Do we agree that this is an orthogonal problem though?

> 
> But on the other hand we're also already running over it in other places
> like in the iommu-shutdown and I guess if someone _really_ disabled
> CONFIG_PM, a lot of additional stuff would fail anyway.
> 
> So should we wrap that in some #ifdef magic, just ignore it or simply
> select PM similar to what Tegra, Renesas and Vexpress seem to do?
> 
> I guess I like the 3rd option best ;-)

It probably doesn't hurt. At what level do you want it? As a dependency
to the IOMMU? or to the platform?

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/2] iommu/rockchip: Move irq request past pm_runtime_enable

2018-08-07 Thread Marc Zyngier
Enabling the interrupt early, before power has been applied to the
device, can result in an interrupt being delivered too early if:

- the IOMMU shares an interrupt with a VOP
- the VOP has a pending interrupt (after a kexec, for example)

In these conditions, we end-up taking the interrupt without
the IOMMU being ready to handle the interrupt (not powered on).

Moving the interrupt request past the pm_runtime_enable() call
makes sure we can at least access the IOMMU registers. Note that
this is only a partial fix, and that the VOP interrupt will still
be screaming until the VOP driver kicks in, which advocates for
a more synchronized interrupt enabling/disabling approach.

Fixes: 0f181d3cf7d98 ("iommu/rockchip: Add runtime PM support")
Signed-off-by: Marc Zyngier 
---
 drivers/iommu/rockchip-iommu.c | 24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 4e0f9b61cd7f..2b1724e8d307 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1161,17 +1161,6 @@ static int rk_iommu_probe(struct platform_device *pdev)
if (iommu->num_mmu == 0)
return PTR_ERR(iommu->bases[0]);
 
-   i = 0;
-   while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) {
-   if (irq < 0)
-   return irq;
-
-   err = devm_request_irq(iommu->dev, irq, rk_iommu_irq,
-  IRQF_SHARED, dev_name(dev), iommu);
-   if (err)
-   return err;
-   }
-
iommu->reset_disabled = device_property_read_bool(dev,
"rockchip,disable-mmu-reset");
 
@@ -1228,6 +1217,19 @@ static int rk_iommu_probe(struct platform_device *pdev)
 
pm_runtime_enable(dev);
 
+   i = 0;
+   while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) {
+   if (irq < 0)
+   return irq;
+
+   err = devm_request_irq(iommu->dev, irq, rk_iommu_irq,
+  IRQF_SHARED, dev_name(dev), iommu);
+   if (err) {
+   pm_runtime_disable(dev);
+   goto err_remove_sysfs;
+   }
+   }
+
return 0;
 err_remove_sysfs:
iommu_device_sysfs_remove(>iommu);
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/2] iommu/rockchip: Runtime PM fixes

2018-08-07 Thread Marc Zyngier
This small series addresses a couple of runtime PM issues I've spotted
while running 4.18 on a Chromebook Plus (kevin, rk3399) platform, and
specifically doing kexec.

Note that even with these two patches, kexec is still fairly broken on
rk3399, as the VOP is never turned off (see [1] for a fix).

[1] https://www.spinics.net/lists/arm-kernel/msg670229.html

Marc Zyngier (2):
  iommu/rockchip: Handle errors returned from PM framework
  iommu/rockchip: Move irq request past pm_runtime_enable

 drivers/iommu/rockchip-iommu.c | 45 +-
 1 file changed, 28 insertions(+), 17 deletions(-)

-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/2] iommu/rockchip: Handle errors returned from PM framework

2018-08-07 Thread Marc Zyngier
pm_runtime_get_if_in_use can fail: either PM has been disabled
altogether (-EINVAL), or the device hasn't been enabled yet (0).
Sadly, the Rockchip IOMMU driver tends to conflate the two things
by considering a non-zero return value as successful.

This has the consequence of hiding other bugs, so let's handle this
case throughout the driver, with a WARN_ON_ONCE so that we can try
and work out what happened.

Fixes: 0f181d3cf7d98 ("iommu/rockchip: Add runtime PM support")
Signed-off-by: Marc Zyngier 
---
 drivers/iommu/rockchip-iommu.c | 21 +++--
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 054cd2c8e9c8..4e0f9b61cd7f 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -521,10 +521,11 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
u32 int_status;
dma_addr_t iova;
irqreturn_t ret = IRQ_NONE;
-   int i;
+   int i, err;
 
-   if (WARN_ON(!pm_runtime_get_if_in_use(iommu->dev)))
-   return 0;
+   err = pm_runtime_get_if_in_use(iommu->dev);
+   if (WARN_ON_ONCE(err <= 0))
+   return ret;
 
if (WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks)))
goto out;
@@ -620,11 +621,15 @@ static void rk_iommu_zap_iova(struct rk_iommu_domain 
*rk_domain,
spin_lock_irqsave(_domain->iommus_lock, flags);
list_for_each(pos, _domain->iommus) {
struct rk_iommu *iommu;
+   int ret;
 
iommu = list_entry(pos, struct rk_iommu, node);
 
/* Only zap TLBs of IOMMUs that are powered on. */
-   if (pm_runtime_get_if_in_use(iommu->dev)) {
+   ret = pm_runtime_get_if_in_use(iommu->dev);
+   if (WARN_ON_ONCE(ret < 0))
+   continue;
+   if (ret) {
WARN_ON(clk_bulk_enable(iommu->num_clocks,
iommu->clocks));
rk_iommu_zap_lines(iommu, iova, size);
@@ -891,6 +896,7 @@ static void rk_iommu_detach_device(struct iommu_domain 
*domain,
struct rk_iommu *iommu;
struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
unsigned long flags;
+   int ret;
 
/* Allow 'virtual devices' (eg drm) to detach from domain */
iommu = rk_iommu_from_dev(dev);
@@ -909,7 +915,9 @@ static void rk_iommu_detach_device(struct iommu_domain 
*domain,
list_del_init(>node);
spin_unlock_irqrestore(_domain->iommus_lock, flags);
 
-   if (pm_runtime_get_if_in_use(iommu->dev)) {
+   ret = pm_runtime_get_if_in_use(iommu->dev);
+   WARN_ON_ONCE(ret < 0);
+   if (ret > 0) {
rk_iommu_disable(iommu);
pm_runtime_put(iommu->dev);
}
@@ -946,7 +954,8 @@ static int rk_iommu_attach_device(struct iommu_domain 
*domain,
list_add_tail(>node, _domain->iommus);
spin_unlock_irqrestore(_domain->iommus_lock, flags);
 
-   if (!pm_runtime_get_if_in_use(iommu->dev))
+   ret = pm_runtime_get_if_in_use(iommu->dev);
+   if (!ret || WARN_ON_ONCE(ret < 0))
return 0;
 
ret = rk_iommu_enable(iommu);
-- 
2.18.0

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/rockchip: Handle errors returned from PM framework

2018-08-06 Thread Marc Zyngier
Hi Heiko,

On 06/08/18 13:09, Heiko Stuebner wrote:
> Hi Marc,
> 
> Am Sonntag, 5. August 2018, 14:46:16 CEST schrieb Marc Zyngier:
>> pm_runtime_get_if_in_use can fail: either PM has been disabled
>> altogether (-EINVAL), or the device hasn't been enabled yet (0).
>> Sadly, we ignore the first case at the moment, and end up treating
>> it as if we had received a valid interrupt.
>>
>> The first case could happen because the interrupt line is shared
>> (with the VOP for example), and although that device hasn't been
>> programmed yet, an interrupt may be pending (think kexec/kdump).
>>
>> The second case means that we've enabled the IOMMU, but it is
>> not powered-on just yet. This could be another instance of the
>> above, but as it deserves investigation, let's output a single
>> warning (instead of flodding the console).
>>
>> In both cases, bail with an IRQ_NONE.
>>
>> Fixes: 0f181d3cf7d98 ("iommu/rockchip: Add runtime PM support")
>> Signed-off-by: Marc Zyngier 
> 
> Hmm, maybe my thinking is flawed, but to me it looks a bit different.
> 
> I.e. the iommu, as well as the vop have the capability to check if the irq
> is for them via their status registers (INT_STATUS and MMU_STATUS).
> 
> For this to happen the power-domain must be active and the device clocked.
> Clock handling is done on both the vop and iommu and in the !CONFIG_PM
> case all power-domains are left on.
> 
> Right now a !CONFIG_PM just passes through the driver unnoticed but with
> this change, we would actually make it depend on PM?
> 
> What am I missing?

I'm not sure you're missing anything, except that what I'm reporting 
here has nothing to do with !CONFIG_PM, but with the state of runtime PM 
when an interrupt gets delivered.

> Also there are quite a bit more instances of pm_runtime_get_if_in_use
> present in the iommu driver.

Indeed. Most of the are mishandling the return value by ignoring the 
error cases. And the more I look at it, the more I think I'm just fixing 
an effect, and not the root cause. See when the IOMMU interrupts get 
requested, long before pm_runtime_enable() is called.

If you get an interrupt at that point (which is perfectly likely given 
that the VOP shares the IRQ line), you handle it in an unexpected 
context. I should have spotted that yesterday...

I've quickly hacked the (untested) following patch, which should do the
trick (I'll test it tonight).

Thanks,

M.

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 054cd2c8e9c8..94942712959d 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1152,17 +1152,6 @@ static int rk_iommu_probe(struct platform_device *pdev)
if (iommu->num_mmu == 0)
return PTR_ERR(iommu->bases[0]);
 
-   i = 0;
-   while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) {
-   if (irq < 0)
-   return irq;
-
-   err = devm_request_irq(iommu->dev, irq, rk_iommu_irq,
-  IRQF_SHARED, dev_name(dev), iommu);
-   if (err)
-   return err;
-   }
-
iommu->reset_disabled = device_property_read_bool(dev,
"rockchip,disable-mmu-reset");
 
@@ -1219,6 +1208,19 @@ static int rk_iommu_probe(struct platform_device *pdev)
 
pm_runtime_enable(dev);
 
+   i = 0;
+   while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) {
+   if (irq < 0)
+   return irq;
+
+   err = devm_request_irq(iommu->dev, irq, rk_iommu_irq,
+  IRQF_SHARED, dev_name(dev), iommu);
+   if (err) {
+   pm_runtime_disable(dev);
+   goto err_remove_sysfs;
+   }
+   }
+
return 0;
 err_remove_sysfs:
iommu_device_sysfs_remove(>iommu);

-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/rockchip: Handle errors returned from PM framework

2018-08-05 Thread Marc Zyngier
pm_runtime_get_if_in_use can fail: either PM has been disabled
altogether (-EINVAL), or the device hasn't been enabled yet (0).
Sadly, we ignore the first case at the moment, and end up treating
it as if we had received a valid interrupt.

The first case could happen because the interrupt line is shared
(with the VOP for example), and although that device hasn't been
programmed yet, an interrupt may be pending (think kexec/kdump).

The second case means that we've enabled the IOMMU, but it is
not powered-on just yet. This could be another instance of the
above, but as it deserves investigation, let's output a single
warning (instead of flodding the console).

In both cases, bail with an IRQ_NONE.

Fixes: 0f181d3cf7d98 ("iommu/rockchip: Add runtime PM support")
Signed-off-by: Marc Zyngier 
---
 drivers/iommu/rockchip-iommu.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 054cd2c8e9c8..9d21495a8433 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -521,10 +521,11 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
u32 int_status;
dma_addr_t iova;
irqreturn_t ret = IRQ_NONE;
-   int i;
+   int i, err;
 
-   if (WARN_ON(!pm_runtime_get_if_in_use(iommu->dev)))
-   return 0;
+   err = pm_runtime_get_if_in_use(iommu->dev);
+   if ((err < 0) || WARN_ON_ONCE(!err))
+   return ret;
 
if (WARN_ON(clk_bulk_enable(iommu->num_clocks, iommu->clocks)))
goto out;
-- 
2.14.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH 03/23] genirq: Introduce IRQF_DELIVER_AS_NMI

2018-06-13 Thread Marc Zyngier
On 13/06/18 10:20, Thomas Gleixner wrote:
> On Wed, 13 Jun 2018, Julien Thierry wrote:
>> On 13/06/18 09:34, Peter Zijlstra wrote:
>>> On Tue, Jun 12, 2018 at 05:57:23PM -0700, Ricardo Neri wrote:
 diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
 index 5426627..dbc5e02 100644
 --- a/include/linux/interrupt.h
 +++ b/include/linux/interrupt.h
 @@ -61,6 +61,8 @@
*interrupt handler after suspending interrupts. For
 system
*wakeup devices users need to implement wakeup
 detection in
*their interrupt handlers.
 + * IRQF_DELIVER_AS_NMI - Configure interrupt to be delivered as
 non-maskable, if
 + *supported by the chip.
*/
>>>
>>> NAK on the first 6 patches. You really _REALLY_ don't want to expose
>>> NMIs to this level.
>>>
>>
>> I've been working on something similar on arm64 side, and effectively the one
>> thing that might be common to arm64 and intel is the interface to set an
>> interrupt as NMI. So I guess it would be nice to agree on the right approach
>> for this.
>>
>> The way I did it was by introducing a new irq_state and let the irqchip 
>> driver
>> handle most of the work (if it supports that state):
>>
>> https://lkml.org/lkml/2018/5/25/181
>>
>> This has not been ACKed nor NAKed. So I am just asking whether this is a more
>> suitable approach, and if not, is there any suggestions on how to do this?
> 
> I really didn't pay attention to that as it's burried in the GIC/ARM series
> which is usually Marc's playground.

I'm working my way through it ATM now that I have some brain cycles back.

> Adding NMI delivery support at low level architecture irq chip level is
> perfectly fine, but the exposure of that needs to be restricted very
> much. Adding it to the generic interrupt control interfaces is not going to
> happen. That's doomed to begin with and a complete abuse of the interface
> as the handler can not ever be used for that.

I can only agree with that. Allowing random driver to use request_irq()
to make anything an NMI ultimately turns it into a complete mess ("hey,
NMI is *faster*, let's use that"), and a potential source of horrible
deadlocks.

What I'd find more palatable is a way for an irqchip to be able to
prioritize some interrupts based on a set of architecturally-defined
requirements, and a separate NMI requesting/handling framework that is
separate from the IRQ API, as the overall requirements are likely to
completely different.

It shouldn't have to be nearly as complex as the IRQ API, and require
much stricter requirements in terms of what you can do there (flow
handling should definitely be different).

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] drm/rockchip: vop: fix irq disabled after vop driver probed

2018-05-25 Thread Marc Zyngier
[Thanks Robin for pointing me to that patch.]

Hi Heiko,

On 24/05/18 23:06, Heiko Stübner wrote:
> From: Sandy Huang 
> 
> The vop irq is shared between vop and iommu and irq probing in the
> iommu driver moved to the probe function recently. This can in some
> cases lead to a stall if the irq is triggered while the vop driver
> still has it disabled.
> 
> But there is no real need to disable the irq, as the vop can simply
> also track its enabled state and ignore irqs in that case.
> 
> So remove the enable/disable handling and add appropriate condition
> to the irq handler.
> 
> Signed-off-by: Sandy Huang 
> [added an actual commit message]
> Signed-off-by: Heiko Stuebner 
> ---
> Hi Ezequiel,
> 
> this patch came from a discussion I had with Rockchip people over the
> iommu changes and resulting issues back in april, but somehow was
> forgotten and not posted to the lists. Correcting that now.
> 
> So removing the enable/disable voodoo on the shared interrupt is
> the preferred way.
> 
>  drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 13 ++---
>  1 file changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c 
> b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
> index 510cdf0..61493d4 100644
> --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
> +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
> @@ -549,8 +549,6 @@ static int vop_enable(struct drm_crtc *crtc)
>  
>   spin_unlock(>reg_lock);
>  
> - enable_irq(vop->irq);
> -
>   drm_crtc_vblank_on(crtc);
>  
>   return 0;
> @@ -596,8 +594,6 @@ static void vop_crtc_atomic_disable(struct drm_crtc *crtc,
>  
>   vop_dsp_hold_valid_irq_disable(vop);
>  
> - disable_irq(vop->irq);
> -
>   vop->is_enabled = false;
>  
>   /*
> @@ -1168,6 +1164,13 @@ static irqreturn_t vop_isr(int irq, void *data)
>   int ret = IRQ_NONE;
>  
>   /*
> +  * since the irq is shared with iommu, iommu irq is enabled before vop
> +  * enable, so before vop enable we do nothing.
> +  */
> + if (!vop->is_enabled)
> + return IRQ_NONE;
> +

What guarantee do we have that the IOMMU will actually service that
interrupt? Can't we get into a situation where the interrupt gets
ignored by both drivers and subsequently disabled by the core irq code
as being spurious?

From what I understood of the way things are plugged together on the
rockchip platforms, each individual VOP is behind a single IOMMU, hence
there there is hardly any point in handling IOMMU interrupts if the VOP
is off.

But I'm missing the actual reason behind this patch. Could you enlighten me?

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH] iommu/arm-smmu-v3: Set GBPA to abort all transactions

2018-04-12 Thread Marc Zyngier
On 12/04/18 11:17, Robin Murphy wrote:
> On 11/04/18 17:54, Marc Zyngier wrote:
>> Hi Sammer,
>>
>> On 11/04/18 16:58, Goel, Sameer wrote:
>>>
>>>
>>> On 3/28/2018 9:00 AM, Marc Zyngier wrote:
>>>> On 2018-03-28 15:39, Timur Tabi wrote:
>>>>> From: Sameer Goel <sg...@codeaurora.org>
>>>>>
>>>>> Set SMMU_GBPA to abort all incoming translations during the SMMU reset
>>>>> when SMMUEN==0.
>>>>>
>>>>> This prevents a race condition where a stray DMA from the crashed primary
>>>>> kernel can try to access an IOVA address as an invalid PA when SMMU is
>>>>> disabled during reset in the crash kernel.
>>>>>
>>>>> Signed-off-by: Sameer Goel <sg...@codeaurora.org>
>>>>> ---
>>>>>   drivers/iommu/arm-smmu-v3.c | 12 
>>>>>   1 file changed, 12 insertions(+)
>>>>>
>>>>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>>>>> index 3f2f1fc68b52..c04a89310c59 100644
>>>>> --- a/drivers/iommu/arm-smmu-v3.c
>>>>> +++ b/drivers/iommu/arm-smmu-v3.c
>>>>> @@ -2458,6 +2458,18 @@ static int arm_smmu_device_reset(struct
>>>>> arm_smmu_device *smmu, bool bypass)
>>>>>   if (reg & CR0_SMMUEN)
>>>>>   dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
>>>>>
>>>>> +    /*
>>>>> + * Abort all incoming translations. This can happen in a kdump case
>>>>> + * where SMMU is initialized when a prior DMA is pending. Just
>>>>> + * disabling the SMMU in this case might result in writes to invalid
>>>>> + * PAs.
>>>>> + */
>>>>> +    ret = arm_smmu_update_gbpa(smmu, 1, GBPA_ABORT);
>>>>> +    if (ret) {
>>>>> +    dev_err(smmu->dev, "GBPA not responding to update\n");
>>>>> +    return ret;
>>>>> +    }
>>>>> +
>>>>>   ret = arm_smmu_device_disable(smmu);
>>>>>   if (ret)
>>>>>   return ret;
>>>>
>>>> A tangential question: can we reliably detect that the SMMU already
>>>> has valid mappings, which would indicate that we're in a pretty bad
>>>> shape already by the time we set that bit? For all we know, memory
>>>> could have been corrupted long before we hit this point, and this
>>>> patch barely narrows the window of opportunity.
>>>
>>> :) Yes that is correct. This only covers the kdump scenario. Trying
>>> to get some reliability when booting up the crash kernel. The system
>>> is already in a bad state. I don't think that this will happen in a
>>> normal scenario. But please point me to the GICv3 change and I'll
>>> have a look.
>>
>> See this:
>> https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/tree/drivers/irqchip/irq-gic-v3-its.c?h=irq/irqchip-4.17=6eb486b66a3094cdcd68dc39c9df3a29d6a51dd5#n3407
> 
> The nearest equivalent to that is probably the top-level SMMUEN check 
> that we already have (see the diff context above). To go beyond that 
> you'd have to chase the old stream table pointer and scan the whole 
> thing looking for valid contexts, then potentially walk page tables 
> within those contexts to check for live translations if you really 
> wanted to be sure. That would be a hell of a lot of work to do in the 
> boot path.
Yeah, feels a bit too involved for sanity. I'd simply suggest you taint
the kernel if you find the SMMU enabled, as you're already on shaky ground.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH] iommu/arm-smmu-v3: Set GBPA to abort all transactions

2018-04-11 Thread Marc Zyngier
Hi Sammer,

On 11/04/18 16:58, Goel, Sameer wrote:
> 
> 
> On 3/28/2018 9:00 AM, Marc Zyngier wrote:
>> On 2018-03-28 15:39, Timur Tabi wrote:
>>> From: Sameer Goel <sg...@codeaurora.org>
>>>
>>> Set SMMU_GBPA to abort all incoming translations during the SMMU reset
>>> when SMMUEN==0.
>>>
>>> This prevents a race condition where a stray DMA from the crashed primary
>>> kernel can try to access an IOVA address as an invalid PA when SMMU is
>>> disabled during reset in the crash kernel.
>>>
>>> Signed-off-by: Sameer Goel <sg...@codeaurora.org>
>>> ---
>>>  drivers/iommu/arm-smmu-v3.c | 12 
>>>  1 file changed, 12 insertions(+)
>>>
>>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>>> index 3f2f1fc68b52..c04a89310c59 100644
>>> --- a/drivers/iommu/arm-smmu-v3.c
>>> +++ b/drivers/iommu/arm-smmu-v3.c
>>> @@ -2458,6 +2458,18 @@ static int arm_smmu_device_reset(struct
>>> arm_smmu_device *smmu, bool bypass)
>>>  if (reg & CR0_SMMUEN)
>>>  dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
>>>
>>> +    /*
>>> + * Abort all incoming translations. This can happen in a kdump case
>>> + * where SMMU is initialized when a prior DMA is pending. Just
>>> + * disabling the SMMU in this case might result in writes to invalid
>>> + * PAs.
>>> + */
>>> +    ret = arm_smmu_update_gbpa(smmu, 1, GBPA_ABORT);
>>> +    if (ret) {
>>> +    dev_err(smmu->dev, "GBPA not responding to update\n");
>>> +    return ret;
>>> +    }
>>> +
>>>  ret = arm_smmu_device_disable(smmu);
>>>  if (ret)
>>>  return ret;
>>
>> A tangential question: can we reliably detect that the SMMU already
>> has valid mappings, which would indicate that we're in a pretty bad
>> shape already by the time we set that bit? For all we know, memory
>> could have been corrupted long before we hit this point, and this
>> patch barely narrows the window of opportunity.
>
> :) Yes that is correct. This only covers the kdump scenario. Trying
> to get some reliability when booting up the crash kernel. The system
> is already in a bad state. I don't think that this will happen in a
> normal scenario. But please point me to the GICv3 change and I'll
> have a look.

See this:
https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/tree/drivers/irqchip/irq-gic-v3-its.c?h=irq/irqchip-4.17=6eb486b66a3094cdcd68dc39c9df3a29d6a51dd5#n3407

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH] iommu/arm-smmu-v3: Set GBPA to abort all transactions

2018-03-28 Thread Marc Zyngier

On 2018-03-28 15:39, Timur Tabi wrote:

From: Sameer Goel 

Set SMMU_GBPA to abort all incoming translations during the SMMU 
reset

when SMMUEN==0.

This prevents a race condition where a stray DMA from the crashed 
primary
kernel can try to access an IOVA address as an invalid PA when SMMU 
is

disabled during reset in the crash kernel.

Signed-off-by: Sameer Goel 
---
 drivers/iommu/arm-smmu-v3.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/iommu/arm-smmu-v3.c 
b/drivers/iommu/arm-smmu-v3.c

index 3f2f1fc68b52..c04a89310c59 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2458,6 +2458,18 @@ static int arm_smmu_device_reset(struct
arm_smmu_device *smmu, bool bypass)
if (reg & CR0_SMMUEN)
dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");

+   /*
+* Abort all incoming translations. This can happen in a kdump case
+* where SMMU is initialized when a prior DMA is pending. Just
+	 * disabling the SMMU in this case might result in writes to 
invalid

+* PAs.
+*/
+   ret = arm_smmu_update_gbpa(smmu, 1, GBPA_ABORT);
+   if (ret) {
+   dev_err(smmu->dev, "GBPA not responding to update\n");
+   return ret;
+   }
+
ret = arm_smmu_device_disable(smmu);
if (ret)
return ret;


A tangential question: can we reliably detect that the SMMU already has 
valid mappings, which would indicate that we're in a pretty bad shape 
already by the time we set that bit? For all we know, memory could have 
been corrupted long before we hit this point, and this patch barely 
narrows the window of opportunity.


At the very least, we should emit a warning and taint the kernel (we've 
recently added such measures to the GICv3 driver).


Thanks,

M.
--
Fast, cheap, reliable. Pick two.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/rockchip: Perform a reset on shutdown

2018-02-20 Thread Marc Zyngier
Trying to do a kexec whilst the iommus are still on is proving to be
a challenging exercise. It is terribly unsafe, as we're reusing the
memory allocated for the page tables, leading to a likely crash.

Let's implement a shutdown method that will at least try to stop
DMA from going crazy behind our back. Note that we need to be
extra cautious when doing so, as the IOMMU may not be clocked
if controlled by a another master, as typical on Rockchip system.

Suggested-by: Robin Murphy <robin.mur...@arm.com>
Signed-off-by: Marc Zyngier <marc.zyng...@arm.com>
---
 drivers/iommu/rockchip-iommu.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 9d991c2d8767..6a3719e118da 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -1209,6 +1209,23 @@ static int rk_iommu_remove(struct platform_device *pdev)
return 0;
 }
 
+static void rk_iommu_shutdown(struct platform_device *pdev)
+{
+   struct rk_iommu *iommu = platform_get_drvdata(pdev);
+
+   /*
+* Be careful not to try to shutdown an otherwise unused
+* IOMMU, as it is likely not to be clocked, and accessing it
+* would just block. An IOMMU without a domain is likely to be
+* unused, so let's use this as a (weak) guard.
+*/
+   if (iommu && iommu->domain) {
+   rk_iommu_enable_stall(iommu);
+   rk_iommu_disable_paging(iommu);
+   rk_iommu_force_reset(iommu);
+   }
+}
+
 static const struct of_device_id rk_iommu_dt_ids[] = {
{ .compatible = "rockchip,iommu" },
{ /* sentinel */ }
@@ -1218,6 +1235,7 @@ MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids);
 static struct platform_driver rk_iommu_driver = {
.probe = rk_iommu_probe,
.remove = rk_iommu_remove,
+   .shutdown = rk_iommu_shutdown,
.driver = {
   .name = "rk_iommu",
   .of_match_table = rk_iommu_dt_ids,
-- 
2.14.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2] iommu/arm-smmu-v3: limit reporting of MSI allocation failures

2018-01-22 Thread Marc Zyngier
On 20/01/18 18:08, Nate Watterson wrote:
> Currently, the arm-smmu-v3 driver expects to allocate MSIs for all SMMUs
> with FEAT_MSI set. This results in unwarranted "failed to allocate MSIs"
> warnings being printed on systems where FW was either deliberately
> configured to force the use of SMMU wired interrupts -or- is altogether
> incapable of describing SMMU MSI topology (ACPI IORT prior to rev.C).
> 
> Remedy this by checking msi_domain before attempting to allocate SMMU
> MSIs.
> 
> Signed-off-by: Nate Watterson <nwatt...@codeaurora.org>
> Signed-off-by: Sinan Kaya <ok...@codeaurora.org>
> ---
>  drivers/iommu/arm-smmu-v3.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 744592d..00de028 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -2328,10 +2328,15 @@ static void arm_smmu_setup_msis(struct 
> arm_smmu_device *smmu)
>   if (!(smmu->features & ARM_SMMU_FEAT_MSI))
>   return;
>  
> + if (!dev->msi_domain) {
> + dev_info(smmu->dev, "msi_domain absent - falling back to wired 
> irqs\n");
> + return;
> + }
> +
>   /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
>   ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
>   if (ret) {
> - dev_warn(dev, "failed to allocate MSIs\n");
> + dev_warn(dev, "failed to allocate MSIs - falling back to wired 
> irqs\n");
>   return;
>   }
>  
> 

Acked-by: Marc Zyngier <marc.zyng...@arm.com>

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/arm-smmu-v3: suppress MSI allocation failure message

2018-01-17 Thread Marc Zyngier
On 17/01/18 18:54, Robin Murphy wrote:
> [ +Marc just in case ]
> 
> On 17/01/18 18:39, Nate Watterson wrote:
>> From: Sinan Kaya 
>>
>> Even though QDF2400 supports MSI interrupts with SMMUv3, it is not enabled
>> in ACPI FW to preserve compatibility with older kernel versions. Code is
>> emitting warning message during boot.
>>
>> This is causing some test tools to record a false warning and is causing
>> support issues.
>>
>> Better reduce the message level.
> 
> Ugh, that's unfortunate, since there are also plenty of genuine error 
> conditions encapsulated in there which we *would* want to report as such 
> (but still then fall back to wired IRQs if possible). Is the return 
> value sufficient to differentiate the "there is no MSI parent" and 
> "there are MSIs but something went wrong" cases, or is it more 
> complicated than that?

Indeed. How about checking dev->msi_domain first, which should tell you
whether it is even possible to allocate MSIs, and fallback to wired IRQs
instead. That way, we keep the warning on genuine failures to allocate
MSIs, and you get to add a nice "Falling back to wired interrupts"
message when msi_domain is NULL.

Thoughts?

M.

> 
> Robin.
> 
>> Signed-off-by: Sinan Kaya 
>> Signed-off-by: Nate Watterson 
>> ---
>>   drivers/iommu/arm-smmu-v3.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>> index 744592d..2118fda 100644
>> --- a/drivers/iommu/arm-smmu-v3.c
>> +++ b/drivers/iommu/arm-smmu-v3.c
>> @@ -2331,7 +2331,7 @@ static void arm_smmu_setup_msis(struct arm_smmu_device 
>> *smmu)
>>  /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
>>  ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
>>  if (ret) {
>> -dev_warn(dev, "failed to allocate MSIs\n");
>> +dev_info(dev, "failed to allocate MSIs\n");
>>  return;
>>  }
>>   
>>


-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v12 1/3] ACPI/IORT: Add msi address regions reservation helper

2017-12-15 Thread Marc Zyngier
On Thu, 14 Dec 2017 16:09:55 +,
Shameer Kolothum wrote:
> 
> On some platforms msi parent address regions have to be excluded from
> normal IOVA allocation in that they are detected and decoded in a HW
> specific way by system components and so they cannot be considered normal
> IOVA address space.
> 
> Add a helper function that retrieves ITS address regions - the msi
> parent - through IORT device <-> ITS mappings and reserves it so that
> these regions will not be translated by IOMMU and will be excluded from
> IOVA allocations. The function checks for the smmu model number and
> only applies the msi reservation if the platform requires it.
> 
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.th...@huawei.com>
> ---
>  drivers/acpi/arm64/iort.c| 111 
> +--
>  drivers/irqchip/irq-gic-v3-its.c |   3 +-
>  include/linux/acpi_iort.h|   7 ++-
>  3 files changed, 116 insertions(+), 5 deletions(-)

For the ITS part:

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>

M.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 2/5] ACPI/IORT: Add msi address regions reservation helper

2017-10-04 Thread Marc Zyngier
On 27/09/17 14:32, Shameer Kolothum wrote:
> On some platforms msi parent address regions have to be excluded from
> normal IOVA allocation in that they are detected and decoded in a HW
> specific way by system components and so they cannot be considered normal
> IOVA address space.
> 
> Add a helper function that retrieves ITS address regions - the msi
> parent - through IORT device <-> ITS mappings and reserves it so that
> these regions will not be translated by IOMMU and will be excluded from
> IOVA allocations.
> 
> Signed-off-by: Shameer Kolothum 
> [lorenzo.pieral...@arm.com: updated commit log/added comments]
> Signed-off-by: Lorenzo Pieralisi 
> ---
>  drivers/acpi/arm64/iort.c| 96 
> ++--
>  drivers/irqchip/irq-gic-v3-its.c |  3 +-
>  include/linux/acpi_iort.h|  7 ++-
>  3 files changed, 101 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
> index 9565d57..14efa9d 100644
> --- a/drivers/acpi/arm64/iort.c
> +++ b/drivers/acpi/arm64/iort.c
> @@ -39,6 +39,7 @@
>  struct iort_its_msi_chip {
>   struct list_headlist;
>   struct fwnode_handle*fw_node;
> + phys_addr_t base_addr;
>   u32 translation_id;
>  };
>  
> @@ -136,14 +137,16 @@ typedef acpi_status (*iort_find_node_callback)
>  static DEFINE_SPINLOCK(iort_msi_chip_lock);
>  
>  /**
> - * iort_register_domain_token() - register domain token and related ITS ID
> - * to the list from where we can get it back later on.
> + * iort_register_domain_token() - register domain token along with related
> + * ITS ID and base address to the list from where we can get it back later 
> on.
>   * @trans_id: ITS ID.
> + * @base: ITS base address.
>   * @fw_node: Domain token.
>   *
>   * Returns: 0 on success, -ENOMEM if no memory when allocating list element
>   */
> -int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node)
> +int iort_register_domain_token(int trans_id, phys_addr_t base,
> +struct fwnode_handle *fw_node)
>  {
>   struct iort_its_msi_chip *its_msi_chip;
>  
> @@ -153,6 +156,7 @@ int iort_register_domain_token(int trans_id, struct 
> fwnode_handle *fw_node)
>  
>   its_msi_chip->fw_node = fw_node;
>   its_msi_chip->translation_id = trans_id;
> + its_msi_chip->base_addr = base;
>  
>   spin_lock(_msi_chip_lock);
>   list_add(_msi_chip->list, _msi_chip_list);
> @@ -481,6 +485,24 @@ int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id)
>   return -ENODEV;
>  }
>  
> +static int __maybe_unused iort_find_its_base(u32 its_id, phys_addr_t *base)
> +{
> + struct iort_its_msi_chip *its_msi_chip;
> + bool match = false;
> +
> + spin_lock(_msi_chip_lock);
> + list_for_each_entry(its_msi_chip, _msi_chip_list, list) {
> + if (its_msi_chip->translation_id == its_id) {
> + *base = its_msi_chip->base_addr;
> + match = true;
> + break;
> + }
> + }
> + spin_unlock(_msi_chip_lock);
> +
> + return match ? 0 : -ENODEV;
> +}
> +
>  /**
>   * iort_dev_find_its_id() - Find the ITS identifier for a device
>   * @dev: The device.
> @@ -639,6 +661,72 @@ int iort_add_device_replay(const struct iommu_ops *ops, 
> struct device *dev)
>  
>   return err;
>  }
> +
> +/**
> + * iort_iommu_msi_get_resv_regions - Reserved region driver helper
> + * @dev: Device from iommu_get_resv_regions()
> + * @head: Reserved region list from iommu_get_resv_regions()
> + *
> + * Returns: Number of reserved regions on success (0 if no associated msi
> + *  regions), appropriate error value otherwise. The ITS regions
> + *  associated with the device are the msi reserved regions.
> + */
> +int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head 
> *head)
> +{
> + struct acpi_iort_its_group *its;
> + struct acpi_iort_node *node, *its_node = NULL;
> + int i, resv = 0;
> +
> + node = iort_find_dev_node(dev);
> + if (!node)
> + return -ENODEV;
> +
> + /*
> +  * Current logic to reserve ITS regions relies on HW topologies
> +  * where a given PCI or named component maps its IDs to only one
> +  * ITS group; if a PCI or named component can map its IDs to
> +  * different ITS groups through IORT mappings this function has
> +  * to be reworked to ensure we reserve regions for all ITS groups
> +  * a given PCI or named component may map IDs to.
> +  */
> + if (dev_is_pci(dev)) {
> + u32 rid;
> +
> + pci_for_each_dma_alias(to_pci_dev(dev), __get_pci_rid, );
> + its_node = iort_node_map_id(node, rid, NULL, IORT_MSI_TYPE);
> + } else {
> + for (i = 0; i < node->mapping_count; i++) {
> + its_node = 

Re: [PATCH v8 3/5] iommu/of: Add msi address regions reservation helper

2017-10-04 Thread Marc Zyngier
On 27/09/17 14:32, Shameer Kolothum wrote:
> From: John Garry 
> 
> On some platforms msi-controller address regions have to be excluded
> from normal IOVA allocation in that they are detected and decoded in
> a HW specific way by system components and so they cannot be considered
> normal IOVA address space.
> 
> Add a helper function that retrieves msi address regions through device
> tree msi mapping, so that these regions will not be translated by IOMMU
> and will be excluded from IOVA allocations.
> 
> Signed-off-by: John Garry 
> [Shameer: Modified msi-parent retrieval logic]
> Signed-off-by: Shameer Kolothum 
> ---
>  drivers/iommu/of_iommu.c | 95 
> 
>  include/linux/of_iommu.h | 10 +
>  2 files changed, 105 insertions(+)
> 
> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
> index e60e3db..ffd7fb7 100644
> --- a/drivers/iommu/of_iommu.c
> +++ b/drivers/iommu/of_iommu.c
> @@ -21,6 +21,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -138,6 +139,14 @@ static int of_iommu_xlate(struct device *dev,
>   return ops->of_xlate(dev, iommu_spec);
>  }
>  
> +static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data)
> +{
> + u32 *rid = data;
> +
> + *rid = alias;
> + return 0;
> +}
> +
>  struct of_pci_iommu_alias_info {
>   struct device *dev;
>   struct device_node *np;
> @@ -163,6 +172,73 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 
> alias, void *data)
>   return info->np == pdev->bus->dev.of_node;
>  }
>  
> +static int of_iommu_alloc_resv_region(struct device_node *np,
> +   struct list_head *head)
> +{
> + int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
> + struct iommu_resv_region *region;
> + struct resource res;
> + int err;
> +
> + err = of_address_to_resource(np, 0, );

What is the rational for registering the first region only? Surely you
can have more than one region in an MSI controller (yes, your particular
case is the ITS which has only one, but we're trying to do something
generic here).

Another issue I have with this code is that it inserts all of the ITS
MMIO in the RESV_MSI range. As long as we don't generate any page tables
for this, we're fine. But if we ever change this, we'll end-up with the
ITS programming interface being exposed to a device, which wouldn't be
acceptable.

Why can't we have some generic property instead, that would describe the
actual ranges that cannot be translated? That way, no random insertion
of a random range, and relatively future proof.

I'm not sure where to declare it (PCIe node? IOMMU node?), but it'd feel
like a much nicer and simpler solution to this problem.

Thoughts?

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu: Avoid NULL group dereference

2017-08-17 Thread Marc Zyngier
On 17/08/17 11:40, Robin Murphy wrote:
> The recently-removed FIXME in iommu_get_domain_for_dev() turns out to
> have been a little misleading, since that check is still worthwhile even
> when groups *are* universal. We have a few IOMMU-aware drivers which
> only care whether their device is already attached to an existing domain
> or not, for which the previous behaviour of iommu_get_domain_for_dev()
> was ideal, and who now crash if their device does not have an IOMMU.
> 
> With IOMMU groups now serving as a reliable indicator of whether a
> device has an IOMMU or not (barring false-positives from VFIO no-IOMMU
> mode), drivers could arguably do this:
> 
>   group = iommu_group_get(dev);
>   if (group) {
>   domain = iommu_get_domain_for_dev(dev);
>   iommu_group_put(group);
>   }
> 
> However, rather than duplicate that code across multiple callsites,
> particularly when it's still only the domain they care about, let's skip
> straight to the next step and factor out the check into the common place
> it applies - in iommu_get_domain_for_dev() itself. Sure, it ends up
> looking rather familiar, but now it's backed by the reasoning of having
> a robust API able to do the expected thing for all devices regardless.
> 
> Fixes: 05f80300dc8b ("iommu: Finish making iommu_group support mandatory")
> Reported-by: Shawn Lin <shawn@rock-chips.com>
> Signed-off-by: Robin Murphy <robin.mur...@arm.com>

Acked-by: Marc Zyngier <marc.zyng...@arm.com>

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] irqchip/gic-{v2m, v3-its}: check iommu capable before doing iommu map

2017-08-17 Thread Marc Zyngier
On 17/08/17 10:01, Shawn Lin wrote:
> Hi Marc
> 
> On 2017/8/17 16:52, Marc Zyngier wrote:
>> On 17/08/17 09:28, Shawn Lin wrote:
>>> If a PCIe RC use gic-v2m or gic-v3-its as a msi domain but doesn't
>>> have iommu support, we don't need to do iommu_dma_map_msi_msg to
>>> get mapped iommu address as all we need is the physical address.
>>> Otherwise we see the oops shown below as we can't get a iommu_group
>>> for a device without iommu capable.
>>>
>>> Unable to handle kernel NULL pointer dereference at virtual address
>>> 00d0
>>> [00d0] user address but active_mm is swapper
>>> Internal error: Oops: 9604 [#1] PREEMPT SMP
>>> Modules linked in:
>>> CPU: 3 PID: 1 Comm: swapper/0 Not tainted
>>> 4.13.0-rc5-next-20170815-1-g0744890-dirty #53
>>> Hardware name: Firefly-RK3399 Board (DT)
>>> task: 80007bc7 task.stack: 80007bc6c000
>>> PC is at iommu_get_domain_for_dev+0x38/0x50
>>> LR is at iommu_dma_map_msi_msg+0x3c/0x1b8
>>> pc : [] lr : [] pstate:
>>> a045
>>>
>>> ...
>>>
>>> [] iommu_get_domain_for_dev+0x38/0x50
>>> [] iommu_dma_map_msi_msg+0x3c/0x1b8
>>> [] its_irq_compose_msi_msg+0x44/0x50
>>> [] irq_chip_compose_msi_msg+0x40/0x58
>>> [] msi_domain_activate+0x1c/0x48
>>> [] __irq_domain_activate_irq+0x40/0x58
>>> [] irq_domain_activate_irq+0x24/0x40
>>> [] msi_domain_alloc_irqs+0x104/0x190
>>> [] pci_msi_setup_msi_irqs+0x3c/0x48
>>> [] __pci_enable_msi_range+0x21c/0x408
>>> [] pci_alloc_irq_vectors_affinity+0x104/0x168
>>> [] pcie_port_device_register+0x200/0x488
>>> [] pcie_portdrv_probe+0x34/0xd0
>>> [] local_pci_probe+0x3c/0xb8
>>> [] pci_device_probe+0x138/0x170
>>> [] driver_probe_device+0x21c/0x2d8
>>> [] __device_attach_driver+0x9c/0xf8
>>> [] bus_for_each_drv+0x5c/0x98
>>> [] __device_attach+0xc4/0x138
>>> [] device_attach+0x10/0x18
>>> [] pci_bus_add_device+0x4c/0xa8
>>> [] pci_bus_add_devices+0x44/0x90
>>> [] rockchip_pcie_probe+0xc70/0xcc8
>>> [] platform_drv_probe+0x58/0xc0
>>> [] driver_probe_device+0x21c/0x2d8
>>> [] __driver_attach+0xac/0xb0
>>> [] bus_for_each_dev+0x64/0xa0
>>> [] driver_attach+0x20/0x28
>>> [] bus_add_driver+0x110/0x230
>>> [] driver_register+0x60/0xf8
>>> [] __platform_driver_register+0x40/0x48
>>> [] rockchip_pcie_driver_init+0x18/0x20
>>> [] do_one_initcall+0xb0/0x120
>>> [] kernel_init_freeable+0x184/0x224
>>> [] kernel_init+0x10/0x100
>>> [] ret_from_fork+0x10/0x18
>>>
>>> This patch is to fix the problem exposed by the commit mentioned below.
>>> Before this commit, iommu has a work around method to fix this but now
>>> it doesn't. So we could fix this in gic code but maybe still need a fixes
>>> tag here.
>>>
>>> Fixes: 05f80300dc8b ("iommu: Finish making iommu_group support mandatory")
>>> Signed-off-by: Shawn Lin <shawn@rock-chips.com>
>>
>> That looks pretty horrible. Please see the patch I posted in response to
>> your initial report:
>>
>> http://marc.info/?l=linux-pci=150295809430903=2
> 
> Aha, I saw it but I didn't get your point of "I'm not convinced  that
> playing with the group refcount in an irqchip is the right approach..."
> 
> So, I'm not sure whether you prefer your patch?
I really dislike both:

- yours: because it reinvent the wheel (parsing DT one more time on each
MSI message creation), and is DT specific while the rest of the code is
completely firmware agnostic (what about ACPI?).

- mine: because it relies on an intimate knowledge of the internals of
the iommu stuff, which is not what was originally intended.

My comment was an invitation to Joerg to speak his mind.

The original intent of iommu_dma_map_msi_msg() was that it could silently
fail without exploding in your face. We can change that, but a minimum of
coordination wouldn't hurt.

My personal preference would be to address this in the iommu layer,
restoring a non-exploding iommu_dma_map_msi_msg():

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 44eca1e3243f..5440eae21bea 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -883,12 +883,18 @@ static struct iommu_dma_msi_page 
*iommu_dma_get_msi_page(struct device *dev,
 void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
 {
struct device *dev = msi_desc_to_dev(irq_get_msi_desc(irq));
-   struct iommu_domain *domain = iommu_get_domain_for_dev(dev);

Re: [PATCH] irqchip/gic-{v2m, v3-its}: check iommu capable before doing iommu map

2017-08-17 Thread Marc Zyngier
On 17/08/17 09:28, Shawn Lin wrote:
> If a PCIe RC use gic-v2m or gic-v3-its as a msi domain but doesn't
> have iommu support, we don't need to do iommu_dma_map_msi_msg to
> get mapped iommu address as all we need is the physical address.
> Otherwise we see the oops shown below as we can't get a iommu_group
> for a device without iommu capable.
> 
> Unable to handle kernel NULL pointer dereference at virtual address
> 00d0
> [00d0] user address but active_mm is swapper
> Internal error: Oops: 9604 [#1] PREEMPT SMP
> Modules linked in:
> CPU: 3 PID: 1 Comm: swapper/0 Not tainted
> 4.13.0-rc5-next-20170815-1-g0744890-dirty #53
> Hardware name: Firefly-RK3399 Board (DT)
> task: 80007bc7 task.stack: 80007bc6c000
> PC is at iommu_get_domain_for_dev+0x38/0x50
> LR is at iommu_dma_map_msi_msg+0x3c/0x1b8
> pc : [] lr : [] pstate:
> a045
> 
> ...
> 
> [] iommu_get_domain_for_dev+0x38/0x50
> [] iommu_dma_map_msi_msg+0x3c/0x1b8
> [] its_irq_compose_msi_msg+0x44/0x50
> [] irq_chip_compose_msi_msg+0x40/0x58
> [] msi_domain_activate+0x1c/0x48
> [] __irq_domain_activate_irq+0x40/0x58
> [] irq_domain_activate_irq+0x24/0x40
> [] msi_domain_alloc_irqs+0x104/0x190
> [] pci_msi_setup_msi_irqs+0x3c/0x48
> [] __pci_enable_msi_range+0x21c/0x408
> [] pci_alloc_irq_vectors_affinity+0x104/0x168
> [] pcie_port_device_register+0x200/0x488
> [] pcie_portdrv_probe+0x34/0xd0
> [] local_pci_probe+0x3c/0xb8
> [] pci_device_probe+0x138/0x170
> [] driver_probe_device+0x21c/0x2d8
> [] __device_attach_driver+0x9c/0xf8
> [] bus_for_each_drv+0x5c/0x98
> [] __device_attach+0xc4/0x138
> [] device_attach+0x10/0x18
> [] pci_bus_add_device+0x4c/0xa8
> [] pci_bus_add_devices+0x44/0x90
> [] rockchip_pcie_probe+0xc70/0xcc8
> [] platform_drv_probe+0x58/0xc0
> [] driver_probe_device+0x21c/0x2d8
> [] __driver_attach+0xac/0xb0
> [] bus_for_each_dev+0x64/0xa0
> [] driver_attach+0x20/0x28
> [] bus_add_driver+0x110/0x230
> [] driver_register+0x60/0xf8
> [] __platform_driver_register+0x40/0x48
> [] rockchip_pcie_driver_init+0x18/0x20
> [] do_one_initcall+0xb0/0x120
> [] kernel_init_freeable+0x184/0x224
> [] kernel_init+0x10/0x100
> [] ret_from_fork+0x10/0x18
> 
> This patch is to fix the problem exposed by the commit mentioned below.
> Before this commit, iommu has a work around method to fix this but now
> it doesn't. So we could fix this in gic code but maybe still need a fixes
> tag here.
> 
> Fixes: 05f80300dc8b ("iommu: Finish making iommu_group support mandatory")
> Signed-off-by: Shawn Lin 

That looks pretty horrible. Please see the patch I posted in response to
your initial report:

http://marc.info/?l=linux-pci=150295809430903=2

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: PCIe oops for NULL pointer dereference during next-20170807 and next-20170815

2017-08-17 Thread Marc Zyngier
On 17/08/17 08:58, Joerg Roedel wrote:
> On Thu, Aug 17, 2017 at 03:02:31PM +0800, Shawn Lin wrote:
>> So should we revert this commit or maybe we could add some checking
>> into gic-v2m and gic-v3-its to see if the dev is iommu-capable? If not,
>> we should create another routine to map MSI msg.
> 
> Yes, fixing this in gic code is the right approach. I usually don't
> revert patches to hide problems somewhere else.
Here's what it would look like:

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index b47097a3e4b4..5e81ad62c801 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1125,6 +1125,8 @@ static void its_irq_compose_msi_msg(struct irq_data *d, 
struct msi_msg *msg)
 {
struct its_device *its_dev = irq_data_get_irq_chip_data(d);
struct its_node *its;
+   struct iommu_group *group;
+   struct device *dev;
u64 addr;
 
its = its_dev->its;
@@ -1134,7 +1136,12 @@ static void its_irq_compose_msi_msg(struct irq_data *d, 
struct msi_msg *msg)
msg->address_hi = upper_32_bits(addr);
msg->data   = its_get_event_id(d);
 
-   iommu_dma_map_msi_msg(d->irq, msg);
+   dev = msi_desc_to_dev(irq_get_msi_desc(d->irq));
+   group = iommu_group_get(dev);
+   if (group) {
+   iommu_dma_map_msi_msg(d->irq, msg);
+   iommu_group_put(group);
+   }
 }
 
 static int its_irq_set_irqchip_state(struct irq_data *d,

I'm not convinced  that playing with the group refcount in an irqchip
is the right approach...

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v8 3/3] iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #126

2017-06-21 Thread Marc Zyngier
On 21/06/17 10:08, Will Deacon wrote:
> Hi Geetha,
> 
> On Wed, Jun 21, 2017 at 12:09:45PM +0530, Geetha Akula wrote:
>> On Tue, Jun 20, 2017 at 11:30 PM, Will Deacon  wrote:
>>> On Tue, Jun 20, 2017 at 07:47:39PM +0530, Geetha sowjanya wrote:
 From: Geetha Sowjanya 

 Cavium ThunderX2 SMMU doesn't support MSI and also doesn't have unique irq
 lines for gerror, eventq and cmdq-sync.

 SHARED_IRQ option is set as a errata workaround, which allows to share the 
 irq
 line by register single irq handler for all the interrupts.

 Signed-off-by: Geetha sowjanya 
 ---
  .../devicetree/bindings/iommu/arm,smmu-v3.txt  |5 ++
  drivers/iommu/arm-smmu-v3.c|   73 
 
  2 files changed, 64 insertions(+), 14 deletions(-)

 diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt 
 b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
 index 6ecc48c..44b40e0 100644
 --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
 +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
 @@ -55,6 +55,11 @@ the PCIe specification.
 Set for Caviun ThunderX2 silicon that doesn't support
 SMMU page1 register space.

 +- cavium,cn9900-broken-unique-irqline
 +: Use single irq line for all the SMMUv3 interrupts.
 +   Set for Caviun ThunderX2 silicon that doesn't support
 +   MSI and also doesn't have unique irq lines for gerror,
 +   eventq and cmdq-sync.
>>>
>>> I think we're better off just supporting a new (optional) named interrupt
>>> as "combined", and then allowing that to be used instead of the others.
>>
>> Are you suggesting to have new name irq "combined" like gerror ?
>> If yes, then this won't be possible with apci. We need to update iort spec to
>> add new name irq.
> 
> I'm mainly talking about the DT binding here, but I don't see why you
> can't hack drivers/acpi/arm64/iort.c like you did for the other erratum and
> have it register a single interrupt called "combined" based on the model
> number.
> 
 + arm_smmu_shared_irq_thread,
 + IRQF_ONESHOT | IRQF_SHARED,
>>>
>>> Why do you need IRQF_SHARED here?
>>
>>
>> +devm_request_threaded_irq(smmu->dev, irq,
>> +   arm_smmu_combined_irq_handler,
>> +   arm_smmu_combined_irq_thread,
>> +   IRQF_SHARED,
>> +   "arm-smmu-v3-combined-irq", smmu);
>>
>> On multi-node system, node1 SMMU's share irq lines with node0 SMMU's.
> 
> How does that work? Are these really MSIs under the hood? If so, why didn't
> you just build them as... MSIs?

More specifically, I suspect that they are made out of message-signalled
SPIs, targeting the GIC distributor directly... That's the only way I
can imagine it has been built... If I'm right, we probably have the
firmware programming the same SPI number in both nodes.

But of course, that's pure speculation.

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: Device address specific mapping of arm,mmu-500

2017-05-30 Thread Marc Zyngier
On 30/05/17 18:16, Ray Jui wrote:
> Hi Marc,
> 
> On 5/30/17 9:59 AM, Marc Zyngier wrote:
>> On 30/05/17 17:49, Ray Jui wrote:
>>> Hi Will,
>>>
>>> On 5/30/17 8:14 AM, Will Deacon wrote:
>>>> On Mon, May 29, 2017 at 06:18:45PM -0700, Ray Jui wrote:
>>>>> I'm writing to check with you to see if the latest arm-smmu.c driver in
>>>>> v4.12-rc Linux for smmu-500 can support mapping that is only specific to
>>>>> a particular physical address range while leave the rest still to be
>>>>> handled by the client device. I believe this can already be supported by
>>>>> the device tree binding of the generic IOMMU framework; however, it is
>>>>> not clear to me whether or not the arm-smmu.c driver can support it.
>>>>>
>>>>> To give you some background information:
>>>>>
>>>>> We have a SoC that has PCIe root complex that has a build-in logic block
>>>>> to forward MSI writes to ARM GICv3 ITS. Unfortunately, this logic block
>>>>> has a HW bug that causes the MSI writes not parsed properly and can
>>>>> potentially corrupt data in the internal FIFO. A workaround is to have
>>>>> ARM MMU-500 takes care of all inbound transactions. I found that is
>>>>> working after hooking up our PCIe root complex to MMU-500; however, even
>>>>> with this optimized arm-smmu driver in v4.12, I'm still seeing a
>>>>> significant Ethernet throughput drop in both the TX and RX directions.
>>>>> The throughput drop is very significant at around 50% (but is already
>>>>> much improved compared to other prior kernel versions at 70~90%).
>>>>
>>>> Did Robin's experiments help at all with this?
>>>>
>>>> http://www.linux-arm.org/git?p=linux-rm.git;a=shortlog;h=refs/heads/iommu/perf
>>>>
>>>
>>> It looks like these are new optimizations that have not yet been merged
>>> in v4.12? I'm going to give it a try.
>>>
>>>>> One alternative is to only use MMU-500 for MSI writes towards
>>>>> GITS_TRANSLATER register in the GICv3, i.e., if I can define a specific
>>>>> region of physical address that I want MMU-500 to act on and leave the
>>>>> rest of inbound transactions to be handled directly by our PCIe
>>>>> controller, it can potentially work around the HW bug we have and at the
>>>>> same time achieve optimal throughput.
>>>>
>>>> I don't think you can bypass the SMMU for MSIs unless you give them their
>>>> own StreamIDs, which is likely to break things horribly in the kernel. You
>>>> could try to create an identity mapping, but you'll still have the
>>>> translation overhead and you'd probably end up having to supply your own 
>>>> DMA
>>>> ops to manage the address space. I'm assuming that you need to prevent the
>>>> physical address of the ITS from being allocated as an IOVA?
>>>
>>> Will, is that a HW limitation that the SMMU cannot be used, only for MSI
>>> writes, in which case, the physical address range is very specific in
>>> our ASIC that falls in the device memory region (e.g., below 0x8000)?
>>>
>>> In fact, what I need in this case is a static mapping from IOMMU on the
>>> physical address of the GITS_TRANSLATER of the GICv3 ITS, which is the
>>> address that MSI writes go to. This is to bypass the MSI forwarding
>>> logic in our PCIe controller. At the same time, I can leave the rest of
>>> inbound transactions to be handled by our PCIe controller without going
>>> through the MMU.
>>
>> How is that going to work for DMA? I imagine your network interfaces do
>> have to access memory, don't they? How can the transactions be
>> terminated in the PCIe controller?
> 
> Sorry, I may not phrase this properly. These inbound transactions (DMA
> write to DDR, from endpoint) do not terminate in the PCIe controller.
> They are taken by the PCIe controller as PCIe transactions and will be
> carried towards the designated memory on the host.

So what is the StreamID used for these transactions? Is that a different
StreamID from that of the DMAing device? If you want to avoid the SMMU
effect on the transaction, you must make sure if doesn't match anything
there.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: Device address specific mapping of arm,mmu-500

2017-05-30 Thread Marc Zyngier
On 30/05/17 17:49, Ray Jui wrote:
> Hi Will,
> 
> On 5/30/17 8:14 AM, Will Deacon wrote:
>> On Mon, May 29, 2017 at 06:18:45PM -0700, Ray Jui wrote:
>>> I'm writing to check with you to see if the latest arm-smmu.c driver in
>>> v4.12-rc Linux for smmu-500 can support mapping that is only specific to
>>> a particular physical address range while leave the rest still to be
>>> handled by the client device. I believe this can already be supported by
>>> the device tree binding of the generic IOMMU framework; however, it is
>>> not clear to me whether or not the arm-smmu.c driver can support it.
>>>
>>> To give you some background information:
>>>
>>> We have a SoC that has PCIe root complex that has a build-in logic block
>>> to forward MSI writes to ARM GICv3 ITS. Unfortunately, this logic block
>>> has a HW bug that causes the MSI writes not parsed properly and can
>>> potentially corrupt data in the internal FIFO. A workaround is to have
>>> ARM MMU-500 takes care of all inbound transactions. I found that is
>>> working after hooking up our PCIe root complex to MMU-500; however, even
>>> with this optimized arm-smmu driver in v4.12, I'm still seeing a
>>> significant Ethernet throughput drop in both the TX and RX directions.
>>> The throughput drop is very significant at around 50% (but is already
>>> much improved compared to other prior kernel versions at 70~90%).
>>
>> Did Robin's experiments help at all with this?
>>
>> http://www.linux-arm.org/git?p=linux-rm.git;a=shortlog;h=refs/heads/iommu/perf
>>
> 
> It looks like these are new optimizations that have not yet been merged
> in v4.12? I'm going to give it a try.
> 
>>> One alternative is to only use MMU-500 for MSI writes towards
>>> GITS_TRANSLATER register in the GICv3, i.e., if I can define a specific
>>> region of physical address that I want MMU-500 to act on and leave the
>>> rest of inbound transactions to be handled directly by our PCIe
>>> controller, it can potentially work around the HW bug we have and at the
>>> same time achieve optimal throughput.
>>
>> I don't think you can bypass the SMMU for MSIs unless you give them their
>> own StreamIDs, which is likely to break things horribly in the kernel. You
>> could try to create an identity mapping, but you'll still have the
>> translation overhead and you'd probably end up having to supply your own DMA
>> ops to manage the address space. I'm assuming that you need to prevent the
>> physical address of the ITS from being allocated as an IOVA?
> 
> Will, is that a HW limitation that the SMMU cannot be used, only for MSI
> writes, in which case, the physical address range is very specific in
> our ASIC that falls in the device memory region (e.g., below 0x8000)?
> 
> In fact, what I need in this case is a static mapping from IOMMU on the
> physical address of the GITS_TRANSLATER of the GICv3 ITS, which is the
> address that MSI writes go to. This is to bypass the MSI forwarding
> logic in our PCIe controller. At the same time, I can leave the rest of
> inbound transactions to be handled by our PCIe controller without going
> through the MMU.

How is that going to work for DMA? I imagine your network interfaces do
have to access memory, don't they? How can the transactions be
terminated in the PCIe controller?

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 16/19] irqchip/gicv3-its: Sets IRQ_DOMAIN_FLAG_MSI_REMAP

2017-01-10 Thread Marc Zyngier
Hi Eric,

On 09/01/17 13:46, Eric Auger wrote:
> The GICv3 ITS is MSI remapping capable. Let's advertise
> this property so that VFIO passthrough can assess IRQ safety.
> 
> Signed-off-by: Eric Auger <eric.au...@redhat.com>
> ---
>  drivers/irqchip/irq-gic-v3-its.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/irqchip/irq-gic-v3-its.c 
> b/drivers/irqchip/irq-gic-v3-its.c
> index 69b040f..9d4fefc 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -1642,6 +1642,7 @@ static int its_init_domain(struct fwnode_handle 
> *handle, struct its_node *its)
>  
>   inner_domain->parent = its_parent;
>   inner_domain->bus_token = DOMAIN_BUS_NEXUS;
> + inner_domain->flags |= IRQ_DOMAIN_FLAG_MSI_REMAP;
>   info->ops = _msi_domain_ops;
>   info->data = its;
>   inner_domain->host_data = info;
> 

For patches 13 to 16, and provided that you address the couple of nits I
mentioned in reply to patches 13 and 15:

Reviewed-by: Marc Zyngier <marc.zyng...@arm.com>

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 15/19] irqdomain: irq_domain_check_msi_remap

2017-01-10 Thread Marc Zyngier
On 09/01/17 13:46, Eric Auger wrote:
> This new function checks whether all MSI irq domains
> implement IRQ remapping. This is useful to understand
> whether VFIO passthrough is safe with respect to interrupts.
> 
> On ARM typically an MSI controller can sit downstream
> to the IOMMU without preventing VFIO passthrough.
> As such any assigned device can write into the MSI doorbell.
> In case the MSI controller implements IRQ remapping, assigned
> devices will not be able to trigger interrupts towards the
> host. On the contrary, the assignment must be emphasized as
> unsafe with respect to interrupts.
> 
> Signed-off-by: Eric Auger 
> 
> ---
> v5 -> v6:
> - use irq_domain_hierarchical_is_msi_remap()
> - comment rewording
> 
> v4 -> v5:
> - Handle DOMAIN_BUS_FSL_MC_MSI domains
> - Check parents
> ---
>  include/linux/irqdomain.h |  1 +
>  kernel/irq/irqdomain.c| 23 +++
>  2 files changed, 24 insertions(+)
> 
> diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
> index bc2f571..188eced 100644
> --- a/include/linux/irqdomain.h
> +++ b/include/linux/irqdomain.h
> @@ -222,6 +222,7 @@ struct irq_domain *irq_domain_add_legacy(struct 
> device_node *of_node,
>void *host_data);
>  extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
>  enum irq_domain_bus_token 
> bus_token);
> +extern bool irq_domain_check_msi_remap(void);
>  extern void irq_set_default_host(struct irq_domain *host);
>  extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
> irq_hw_number_t hwirq, int node,
> diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
> index 101ee8f..fff30cb 100644
> --- a/kernel/irq/irqdomain.c
> +++ b/kernel/irq/irqdomain.c
> @@ -278,6 +278,29 @@ struct irq_domain *irq_find_matching_fwspec(struct 
> irq_fwspec *fwspec,
>  EXPORT_SYMBOL_GPL(irq_find_matching_fwspec);
>  
>  /**
> + * irq_domain_check_msi_remap - Check whether all MSI
> + * irq domains implement IRQ remapping
> + */
> +bool irq_domain_check_msi_remap(void)
> +{
> + struct irq_domain *h;
> + bool ret = true;
> +
> + mutex_lock(_domain_mutex);
> + list_for_each_entry(h, _domain_list, link) {
> + if (irq_domain_is_msi(h) &&
> + !irq_domain_hierarchical_is_msi_remap(h)) {
> + ret = false;
> + goto out;

Let's avoid gotos if we can. a break statement will have the same effect
here, and we can drop the label.

> + }
> + }
> +out:
> + mutex_unlock(_domain_mutex);
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(irq_domain_check_msi_remap);
> +
> +/**
>   * irq_set_default_host() - Set a "default" irq domain
>   * @domain: default domain pointer
>   *
> 

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 13/19] irqdomain: Add irq domain MSI and MSI_REMAP flags

2017-01-10 Thread Marc Zyngier
Hi Eric,

On 09/01/17 13:46, Eric Auger wrote:
> We introduce two new enum values for the irq domain flag:
> - IRQ_DOMAIN_FLAG_MSI indicates the irq domain corresponds to
>   an MSI domain
> - IRQ_DOMAIN_FLAG_MSI_REMAP indicates the irq domain has MSI
>   remapping capabilities.
> 
> Those values will be useful to check all MSI irq domains have
> MSI remapping support when assessing the safety of IRQ assignment
> to a guest.
> 
> irq_domain_hierarchical_is_msi_remap() allows to check if an
> irq domain or any parent implements MSI remapping.
> 
> Signed-off-by: Eric Auger 
> 
> ---
> 
> v6:
> - add IRQ_DOMAIN_FLAG_MSI as suggested by Marc
> - add irq_domain_is_msi, irq_domain_is_msi_remap and
>   irq_domain_hierarchical_is_msi_remap
> ---
>  include/linux/irqdomain.h | 35 +++
>  kernel/irq/irqdomain.c| 16 
>  2 files changed, 51 insertions(+)
> 
> diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
> index ffb8460..bc2f571 100644
> --- a/include/linux/irqdomain.h
> +++ b/include/linux/irqdomain.h
> @@ -183,6 +183,12 @@ enum {
>   /* Irq domain is an IPI domain with single virq */
>   IRQ_DOMAIN_FLAG_IPI_SINGLE  = (1 << 3),
>  
> + /* Irq domain implements MSIs */
> + IRQ_DOMAIN_FLAG_MSI = (1 << 4),
> +
> + /* Irq domain implements MSI remapping */
> + IRQ_DOMAIN_FLAG_MSI_REMAP   = (1 << 5),
> +
>   /*
>* Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
>* for implementation specific purposes and ignored by the
> @@ -446,6 +452,19 @@ static inline bool irq_domain_is_ipi_single(struct 
> irq_domain *domain)
>  {
>   return domain->flags & IRQ_DOMAIN_FLAG_IPI_SINGLE;
>  }
> +
> +static inline bool irq_domain_is_msi(struct irq_domain *domain)
> +{
> + return domain->flags & IRQ_DOMAIN_FLAG_MSI;
> +}
> +
> +static inline bool irq_domain_is_msi_remap(struct irq_domain *domain)
> +{
> + return domain->flags & IRQ_DOMAIN_FLAG_MSI_REMAP;
> +}
> +
> +extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain);
> +
>  #else/* CONFIG_IRQ_DOMAIN_HIERARCHY */
>  static inline void irq_domain_activate_irq(struct irq_data *data) { }
>  static inline void irq_domain_deactivate_irq(struct irq_data *data) { }
> @@ -477,6 +496,22 @@ static inline bool irq_domain_is_ipi_single(struct 
> irq_domain *domain)
>  {
>   return false;
>  }
> +
> +static inline bool irq_domain_is_msi(struct irq_domain *domain)
> +{
> + return false;
> +}
> +
> +static inline bool irq_domain_is_msi_remap(struct irq_domain *domain)
> +{
> + return false;
> +}
> +
> +static inline bool
> +irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain)
> +{
> + return false;
> +}
>  #endif   /* CONFIG_IRQ_DOMAIN_HIERARCHY */
>  
>  #else /* CONFIG_IRQ_DOMAIN */
> diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
> index 8c0a0ae..101ee8f 100644
> --- a/kernel/irq/irqdomain.c
> +++ b/kernel/irq/irqdomain.c
> @@ -1392,6 +1392,22 @@ static void irq_domain_check_hierarchy(struct 
> irq_domain *domain)
>   if (domain->ops->alloc)
>   domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY;
>  }
> +
> +/**
> + * irq_domain_hierarchical_is_msi_remap - Check if the domain or any
> + * parent has MSI remapping support
> + * @domain: domain pointer
> + */
> +bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain)
> +{
> + struct irq_domain *h = domain;
> +
> + for (; h; h = h->parent) {
> + if (irq_domain_is_msi_remap(h))

nit: Why do you need to go via this extra 'h' variable?

for (; domain; domain = domain->parent) {
if (irq_domain_is_msi_remap(domain))

should work just as well.

> + return true;
> + }
> + return false;
> +}
>  #else/* CONFIG_IRQ_DOMAIN_HIERARCHY */
>  /**
>   * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain
> 

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 13/17] irqdomain: irq_domain_check_msi_remap

2017-01-06 Thread Marc Zyngier
On 06/01/17 04:27, Bharat Bhushan wrote:
> Hi Mark,
> 
>> -Original Message-
>> From: Auger Eric [mailto:eric.au...@redhat.com]
>> Sent: Thursday, January 05, 2017 5:39 PM
>> To: Marc Zyngier <marc.zyng...@arm.com>; eric.auger@gmail.com;
>> christoffer.d...@linaro.org; robin.mur...@arm.com;
>> alex.william...@redhat.com; will.dea...@arm.com; j...@8bytes.org;
>> t...@linutronix.de; ja...@lakedaemon.net; linux-arm-
>> ker...@lists.infradead.org
>> Cc: drjo...@redhat.com; k...@vger.kernel.org; punit.agra...@arm.com;
>> linux-ker...@vger.kernel.org; geethasowjanya.ak...@gmail.com; Diana
>> Madalina Craciun <diana.crac...@nxp.com>; iommu@lists.linux-
>> foundation.org; pranav.sawargaon...@gmail.com; Bharat Bhushan
>> <bharat.bhus...@nxp.com>; shank...@codeaurora.org;
>> gpkulka...@gmail.com
>> Subject: Re: [PATCH v5 13/17] irqdomain: irq_domain_check_msi_remap
>>
>> Hi Marc,
>>
>> On 05/01/2017 12:57, Marc Zyngier wrote:
>>> On 05/01/17 11:29, Auger Eric wrote:
>>>> Hi Marc,
>>>>
>>>> On 05/01/2017 12:25, Marc Zyngier wrote:
>>>>> On 05/01/17 10:45, Auger Eric wrote:
>>>>>> Hi Marc,
>>>>>>
>>>>>> On 04/01/2017 16:27, Marc Zyngier wrote:
>>>>>>> On 04/01/17 14:11, Auger Eric wrote:
>>>>>>>> Hi Marc,
>>>>>>>>
>>>>>>>> On 04/01/2017 14:46, Marc Zyngier wrote:
>>>>>>>>> Hi Eric,
>>>>>>>>>
>>>>>>>>> On 04/01/17 13:32, Eric Auger wrote:
>>>>>>>>>> This new function checks whether all platform and PCI MSI
>>>>>>>>>> domains implement IRQ remapping. This is useful to understand
>>>>>>>>>> whether VFIO passthrough is safe with respect to interrupts.
>>>>>>>>>>
>>>>>>>>>> On ARM typically an MSI controller can sit downstream to the
>>>>>>>>>> IOMMU without preventing VFIO passthrough.
>>>>>>>>>> As such any assigned device can write into the MSI doorbell.
>>>>>>>>>> In case the MSI controller implements IRQ remapping, assigned
>>>>>>>>>> devices will not be able to trigger interrupts towards the
>>>>>>>>>> host. On the contrary, the assignment must be emphasized as
>>>>>>>>>> unsafe with respect to interrupts.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Eric Auger <eric.au...@redhat.com>
>>>>>>>>>>
>>>>>>>>>> ---
>>>>>>>>>>
>>>>>>>>>> v4 -> v5:
>>>>>>>>>> - Handle DOMAIN_BUS_FSL_MC_MSI domains
>>>>>>>>>> - Check parents
>>>>>>>>>> ---
>>>>>>>>>>  include/linux/irqdomain.h |  1 +
>>>>>>>>>>  kernel/irq/irqdomain.c| 41
>> +
>>>>>>>>>>  2 files changed, 42 insertions(+)
>>>>>>>>>>
>>>>>>>>>> diff --git a/include/linux/irqdomain.h
>>>>>>>>>> b/include/linux/irqdomain.h index ab017b2..281a40f 100644
>>>>>>>>>> --- a/include/linux/irqdomain.h
>>>>>>>>>> +++ b/include/linux/irqdomain.h
>>>>>>>>>> @@ -219,6 +219,7 @@ struct irq_domain
>> *irq_domain_add_legacy(struct device_node *of_node,
>>>>>>>>>>   void *host_data);
>>>>>>>>>>  extern struct irq_domain *irq_find_matching_fwspec(struct
>> irq_fwspec *fwspec,
>>>>>>>>>> enum
>> irq_domain_bus_token bus_token);
>>>>>>>>>> +extern bool irq_domain_check_msi_remap(void);
>>>>>>>>>>  extern void irq_set_default_host(struct irq_domain *host);
>>>>>>>>>> extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
>>>>>>>>>>irq_hw_number_t hwirq, int node,
>> diff --git
>>>>>>>>>> a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index
>>>>>>>

Re: [PATCH v6 17/18] vfio/type1: Check MSI remapping at irq domain level

2017-01-06 Thread Marc Zyngier
On 06/01/17 09:08, Auger Eric wrote:
> Hi Bharat
> 
> On 06/01/2017 09:50, Bharat Bhushan wrote:
>> Hi Eric,
>>
>>> -Original Message-
>>> From: Eric Auger [mailto:eric.au...@redhat.com]
>>> Sent: Friday, January 06, 2017 12:35 AM
>>> To: eric.au...@redhat.com; eric.auger@gmail.com;
>>> christoffer.d...@linaro.org; marc.zyng...@arm.com;
>>> robin.mur...@arm.com; alex.william...@redhat.com;
>>> will.dea...@arm.com; j...@8bytes.org; t...@linutronix.de;
>>> ja...@lakedaemon.net; linux-arm-ker...@lists.infradead.org
>>> Cc: k...@vger.kernel.org; drjo...@redhat.com; linux-
>>> ker...@vger.kernel.org; pranav.sawargaon...@gmail.com;
>>> iommu@lists.linux-foundation.org; punit.agra...@arm.com; Diana Madalina
>>> Craciun ; gpkulka...@gmail.com;
>>> shank...@codeaurora.org; Bharat Bhushan ;
>>> geethasowjanya.ak...@gmail.com
>>> Subject: [PATCH v6 17/18] vfio/type1: Check MSI remapping at irq domain
>>> level
>>>
>>> In case the IOMMU translates MSI transactions (typical case on ARM), we
>>> check MSI remapping capability at IRQ domain level. Otherwise it is checked
>>> at IOMMU level.
>>>
>>> At this stage the arm-smmu-(v3) still advertise the
>>> IOMMU_CAP_INTR_REMAP capability at IOMMU level. This will be removed
>>> in subsequent patches.
>>>
>>> Signed-off-by: Eric Auger 
>>>
>>> ---
>>>
>>> v6: rewrite test
>>> ---
>>>  drivers/vfio/vfio_iommu_type1.c | 9 ++---
>>>  1 file changed, 6 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/vfio/vfio_iommu_type1.c
>>> b/drivers/vfio/vfio_iommu_type1.c index b473ef80..fa0b5c4 100644
>>> --- a/drivers/vfio/vfio_iommu_type1.c
>>> +++ b/drivers/vfio/vfio_iommu_type1.c
>>> @@ -40,6 +40,7 @@
>>>  #include 
>>>  #include 
>>>  #include 
>>> +#include 
>>>
>>>  #define DRIVER_VERSION  "0.2"
>>>  #define DRIVER_AUTHOR   "Alex Williamson
>>> "
>>> @@ -1208,7 +1209,7 @@ static int vfio_iommu_type1_attach_group(void
>>> *iommu_data,
>>> struct vfio_domain *domain, *d;
>>> struct bus_type *bus = NULL, *mdev_bus;
>>> int ret;
>>> -   bool resv_msi;
>>> +   bool resv_msi, msi_remap;
>>> phys_addr_t resv_msi_base;
>>>
>>> mutex_lock(>lock);
>>> @@ -1284,8 +1285,10 @@ static int vfio_iommu_type1_attach_group(void
>>> *iommu_data,
>>> INIT_LIST_HEAD(>group_list);
>>> list_add(>next, >group_list);
>>>
>>> -   if (!allow_unsafe_interrupts &&
>>> -   !iommu_capable(bus, IOMMU_CAP_INTR_REMAP)) {
>>> +   msi_remap = resv_msi ? irq_domain_check_msi_remap() :
>>
>> There can be multiple interrupt-controller, at-least theoretically it is 
>> possible and not sure practically it exists and supported, where not all may 
>> support IRQ_REMAP. If that is the case be then should we check for IRQ-REMAP 
>> for that device-bus irq-domain?
>>
> I mentioned in the cover letter that the approach was defensive and
> rough today. As soon as we detect an MSI controller in the platform that
> has no support for MSI remapping we flag the assignment as unsafe. I
> think this approach was agreed on the ML. Such rough assessment was used
> in the past on x86.
> 
> I am reluctant to add more complexity at that stage. This can be
> improved latter I think when such platforms show up.

I don't think this is worth it. If the system is so broken that the
designer cannot make up their mind about device isolation, too bad.
People will either disable the non-isolating MSI controller altogether,
or force the unsafe flag.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 13/17] irqdomain: irq_domain_check_msi_remap

2017-01-05 Thread Marc Zyngier
On 05/01/17 11:29, Auger Eric wrote:
> Hi Marc,
> 
> On 05/01/2017 12:25, Marc Zyngier wrote:
>> On 05/01/17 10:45, Auger Eric wrote:
>>> Hi Marc,
>>>
>>> On 04/01/2017 16:27, Marc Zyngier wrote:
>>>> On 04/01/17 14:11, Auger Eric wrote:
>>>>> Hi Marc,
>>>>>
>>>>> On 04/01/2017 14:46, Marc Zyngier wrote:
>>>>>> Hi Eric,
>>>>>>
>>>>>> On 04/01/17 13:32, Eric Auger wrote:
>>>>>>> This new function checks whether all platform and PCI
>>>>>>> MSI domains implement IRQ remapping. This is useful to
>>>>>>> understand whether VFIO passthrough is safe with respect
>>>>>>> to interrupts.
>>>>>>>
>>>>>>> On ARM typically an MSI controller can sit downstream
>>>>>>> to the IOMMU without preventing VFIO passthrough.
>>>>>>> As such any assigned device can write into the MSI doorbell.
>>>>>>> In case the MSI controller implements IRQ remapping, assigned
>>>>>>> devices will not be able to trigger interrupts towards the
>>>>>>> host. On the contrary, the assignment must be emphasized as
>>>>>>> unsafe with respect to interrupts.
>>>>>>>
>>>>>>> Signed-off-by: Eric Auger <eric.au...@redhat.com>
>>>>>>>
>>>>>>> ---
>>>>>>>
>>>>>>> v4 -> v5:
>>>>>>> - Handle DOMAIN_BUS_FSL_MC_MSI domains
>>>>>>> - Check parents
>>>>>>> ---
>>>>>>>  include/linux/irqdomain.h |  1 +
>>>>>>>  kernel/irq/irqdomain.c| 41 
>>>>>>> +
>>>>>>>  2 files changed, 42 insertions(+)
>>>>>>>
>>>>>>> diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
>>>>>>> index ab017b2..281a40f 100644
>>>>>>> --- a/include/linux/irqdomain.h
>>>>>>> +++ b/include/linux/irqdomain.h
>>>>>>> @@ -219,6 +219,7 @@ struct irq_domain *irq_domain_add_legacy(struct 
>>>>>>> device_node *of_node,
>>>>>>>  void *host_data);
>>>>>>>  extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec 
>>>>>>> *fwspec,
>>>>>>>enum 
>>>>>>> irq_domain_bus_token bus_token);
>>>>>>> +extern bool irq_domain_check_msi_remap(void);
>>>>>>>  extern void irq_set_default_host(struct irq_domain *host);
>>>>>>>  extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
>>>>>>>   irq_hw_number_t hwirq, int node,
>>>>>>> diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
>>>>>>> index 8c0a0ae..700caea 100644
>>>>>>> --- a/kernel/irq/irqdomain.c
>>>>>>> +++ b/kernel/irq/irqdomain.c
>>>>>>> @@ -278,6 +278,47 @@ struct irq_domain *irq_find_matching_fwspec(struct 
>>>>>>> irq_fwspec *fwspec,
>>>>>>>  EXPORT_SYMBOL_GPL(irq_find_matching_fwspec);
>>>>>>>  
>>>>>>>  /**
>>>>>>> + * irq_domain_is_msi_remap - Check if @domain or any parent
>>>>>>> + * has MSI remapping support
>>>>>>> + * @domain: domain pointer
>>>>>>> + */
>>>>>>> +static bool irq_domain_is_msi_remap(struct irq_domain *domain)
>>>>>>> +{
>>>>>>> +   struct irq_domain *h = domain;
>>>>>>> +
>>>>>>> +   for (; h; h = h->parent) {
>>>>>>> +   if (h->flags & IRQ_DOMAIN_FLAG_MSI_REMAP)
>>>>>>> +   return true;
>>>>>>> +   }
>>>>>>> +   return false;
>>>>>>> +}
>>>>>>> +
>>>>>>> +/**
>>>>>>> + * irq_domain_check_msi_remap() - Checks whether all MSI
>>>>>>> + * irq domains implement IRQ remapping
>>>>>>> + */
>>>>>>> +bool irq_domain_check_msi_remap(void)
>>>>>>

Re: [PATCH v5 13/17] irqdomain: irq_domain_check_msi_remap

2017-01-05 Thread Marc Zyngier
On 05/01/17 10:45, Auger Eric wrote:
> Hi Marc,
> 
> On 04/01/2017 16:27, Marc Zyngier wrote:
>> On 04/01/17 14:11, Auger Eric wrote:
>>> Hi Marc,
>>>
>>> On 04/01/2017 14:46, Marc Zyngier wrote:
>>>> Hi Eric,
>>>>
>>>> On 04/01/17 13:32, Eric Auger wrote:
>>>>> This new function checks whether all platform and PCI
>>>>> MSI domains implement IRQ remapping. This is useful to
>>>>> understand whether VFIO passthrough is safe with respect
>>>>> to interrupts.
>>>>>
>>>>> On ARM typically an MSI controller can sit downstream
>>>>> to the IOMMU without preventing VFIO passthrough.
>>>>> As such any assigned device can write into the MSI doorbell.
>>>>> In case the MSI controller implements IRQ remapping, assigned
>>>>> devices will not be able to trigger interrupts towards the
>>>>> host. On the contrary, the assignment must be emphasized as
>>>>> unsafe with respect to interrupts.
>>>>>
>>>>> Signed-off-by: Eric Auger <eric.au...@redhat.com>
>>>>>
>>>>> ---
>>>>>
>>>>> v4 -> v5:
>>>>> - Handle DOMAIN_BUS_FSL_MC_MSI domains
>>>>> - Check parents
>>>>> ---
>>>>>  include/linux/irqdomain.h |  1 +
>>>>>  kernel/irq/irqdomain.c| 41 +
>>>>>  2 files changed, 42 insertions(+)
>>>>>
>>>>> diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
>>>>> index ab017b2..281a40f 100644
>>>>> --- a/include/linux/irqdomain.h
>>>>> +++ b/include/linux/irqdomain.h
>>>>> @@ -219,6 +219,7 @@ struct irq_domain *irq_domain_add_legacy(struct 
>>>>> device_node *of_node,
>>>>>void *host_data);
>>>>>  extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec 
>>>>> *fwspec,
>>>>>  enum irq_domain_bus_token 
>>>>> bus_token);
>>>>> +extern bool irq_domain_check_msi_remap(void);
>>>>>  extern void irq_set_default_host(struct irq_domain *host);
>>>>>  extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
>>>>> irq_hw_number_t hwirq, int node,
>>>>> diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
>>>>> index 8c0a0ae..700caea 100644
>>>>> --- a/kernel/irq/irqdomain.c
>>>>> +++ b/kernel/irq/irqdomain.c
>>>>> @@ -278,6 +278,47 @@ struct irq_domain *irq_find_matching_fwspec(struct 
>>>>> irq_fwspec *fwspec,
>>>>>  EXPORT_SYMBOL_GPL(irq_find_matching_fwspec);
>>>>>  
>>>>>  /**
>>>>> + * irq_domain_is_msi_remap - Check if @domain or any parent
>>>>> + * has MSI remapping support
>>>>> + * @domain: domain pointer
>>>>> + */
>>>>> +static bool irq_domain_is_msi_remap(struct irq_domain *domain)
>>>>> +{
>>>>> + struct irq_domain *h = domain;
>>>>> +
>>>>> + for (; h; h = h->parent) {
>>>>> + if (h->flags & IRQ_DOMAIN_FLAG_MSI_REMAP)
>>>>> + return true;
>>>>> + }
>>>>> + return false;
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * irq_domain_check_msi_remap() - Checks whether all MSI
>>>>> + * irq domains implement IRQ remapping
>>>>> + */
>>>>> +bool irq_domain_check_msi_remap(void)
>>>>> +{
>>>>> + struct irq_domain *h;
>>>>> + bool ret = true;
>>>>> +
>>>>> + mutex_lock(_domain_mutex);
>>>>> + list_for_each_entry(h, _domain_list, link) {
>>>>> + if (((h->bus_token & DOMAIN_BUS_PCI_MSI) ||
>>>>> +  (h->bus_token & DOMAIN_BUS_PLATFORM_MSI) ||
>>>>> +  (h->bus_token & DOMAIN_BUS_FSL_MC_MSI)) &&
>>>>> +  !irq_domain_is_msi_remap(h)) {
>>>>
>>>> (h->bus_token & DOMAIN_BUS_PCI_MSI) and co looks quite wrong. bus_token
>>>> is not a bitmap, and DOMAIN_BUS_* not a single bit value (see enum
>>>> irq_domain_bus_token). Surely this should read
>>>

Re: [PATCH v5 13/17] irqdomain: irq_domain_check_msi_remap

2017-01-04 Thread Marc Zyngier
On 04/01/17 14:11, Auger Eric wrote:
> Hi Marc,
> 
> On 04/01/2017 14:46, Marc Zyngier wrote:
>> Hi Eric,
>>
>> On 04/01/17 13:32, Eric Auger wrote:
>>> This new function checks whether all platform and PCI
>>> MSI domains implement IRQ remapping. This is useful to
>>> understand whether VFIO passthrough is safe with respect
>>> to interrupts.
>>>
>>> On ARM typically an MSI controller can sit downstream
>>> to the IOMMU without preventing VFIO passthrough.
>>> As such any assigned device can write into the MSI doorbell.
>>> In case the MSI controller implements IRQ remapping, assigned
>>> devices will not be able to trigger interrupts towards the
>>> host. On the contrary, the assignment must be emphasized as
>>> unsafe with respect to interrupts.
>>>
>>> Signed-off-by: Eric Auger <eric.au...@redhat.com>
>>>
>>> ---
>>>
>>> v4 -> v5:
>>> - Handle DOMAIN_BUS_FSL_MC_MSI domains
>>> - Check parents
>>> ---
>>>  include/linux/irqdomain.h |  1 +
>>>  kernel/irq/irqdomain.c| 41 +
>>>  2 files changed, 42 insertions(+)
>>>
>>> diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
>>> index ab017b2..281a40f 100644
>>> --- a/include/linux/irqdomain.h
>>> +++ b/include/linux/irqdomain.h
>>> @@ -219,6 +219,7 @@ struct irq_domain *irq_domain_add_legacy(struct 
>>> device_node *of_node,
>>>  void *host_data);
>>>  extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec 
>>> *fwspec,
>>>enum irq_domain_bus_token 
>>> bus_token);
>>> +extern bool irq_domain_check_msi_remap(void);
>>>  extern void irq_set_default_host(struct irq_domain *host);
>>>  extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
>>>   irq_hw_number_t hwirq, int node,
>>> diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
>>> index 8c0a0ae..700caea 100644
>>> --- a/kernel/irq/irqdomain.c
>>> +++ b/kernel/irq/irqdomain.c
>>> @@ -278,6 +278,47 @@ struct irq_domain *irq_find_matching_fwspec(struct 
>>> irq_fwspec *fwspec,
>>>  EXPORT_SYMBOL_GPL(irq_find_matching_fwspec);
>>>  
>>>  /**
>>> + * irq_domain_is_msi_remap - Check if @domain or any parent
>>> + * has MSI remapping support
>>> + * @domain: domain pointer
>>> + */
>>> +static bool irq_domain_is_msi_remap(struct irq_domain *domain)
>>> +{
>>> +   struct irq_domain *h = domain;
>>> +
>>> +   for (; h; h = h->parent) {
>>> +   if (h->flags & IRQ_DOMAIN_FLAG_MSI_REMAP)
>>> +   return true;
>>> +   }
>>> +   return false;
>>> +}
>>> +
>>> +/**
>>> + * irq_domain_check_msi_remap() - Checks whether all MSI
>>> + * irq domains implement IRQ remapping
>>> + */
>>> +bool irq_domain_check_msi_remap(void)
>>> +{
>>> +   struct irq_domain *h;
>>> +   bool ret = true;
>>> +
>>> +   mutex_lock(_domain_mutex);
>>> +   list_for_each_entry(h, _domain_list, link) {
>>> +   if (((h->bus_token & DOMAIN_BUS_PCI_MSI) ||
>>> +(h->bus_token & DOMAIN_BUS_PLATFORM_MSI) ||
>>> +(h->bus_token & DOMAIN_BUS_FSL_MC_MSI)) &&
>>> +!irq_domain_is_msi_remap(h)) {
>>
>> (h->bus_token & DOMAIN_BUS_PCI_MSI) and co looks quite wrong. bus_token
>> is not a bitmap, and DOMAIN_BUS_* not a single bit value (see enum
>> irq_domain_bus_token). Surely this should read
>> (h->bus_token == DOMAIN_BUS_PCI_MSI).
> Oh I did not notice that. Thanks.
> 
> Any other comments on the irqdomain side? Do you think the current
> approach consisting in looking at those bus tokens and their parents
> looks good?

To be completely honest, I don't like it much, as having to enumerate
all the bus types can come up with could become quite a burden in the
long run. I'd rather be able to identify MSI capable domains by
construction. I came up with the following approach (fully untested):

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 281a40f..7779796 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -183,8 +183,11 @@ enum {
/* Irq domain is an IPI domain with single virq */
IRQ_DOMAIN_FL

  1   2   >