Re: [RFC v3 15/15] irqchip/gicv2m/v3-its-pci-msi: IOMMU map the MSI frame when needed

2016-02-18 Thread Eric Auger
Hi Marc,
On 02/18/2016 04:47 PM, Marc Zyngier wrote:
> On 18/02/16 15:33, Eric Auger wrote:
>> Hi Marc,
>> On 02/18/2016 12:33 PM, Marc Zyngier wrote:
>>> On Fri, 12 Feb 2016 08:13:17 +
>>> Eric Auger  wrote:
>>>
 In case the msi_desc references a device attached to an iommu
 domain, the msi address needs to be mapped in the IOMMU. Else any
 MSI write transaction will cause a fault.

 gic_set_msi_addr detects that case and allocates an iova bound
 to the physical address page comprising the MSI frame. This iova
 then is used as the msi_msg address. Unset operation decrements the
 reference on the binding.

 The functions are called in the irq_write_msi_msg ops implementation.
 At that time we can recognize whether the msi is setup or teared down
 looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all
 the fields.

 Signed-off-by: Eric Auger 

 ---

 v2 -> v3:
 - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and
   CONFIG_PHYS_ADDR_T_64BIT
 - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API &
   CONFIG_PCI_MSI_IRQ_DOMAIN are set.
 - gic_set/unset_msi_addr duly become static
 ---
  drivers/irqchip/irq-gic-common.c | 69 
 
  drivers/irqchip/irq-gic-common.h |  5 +++
  drivers/irqchip/irq-gic-v2m.c|  7 +++-
  drivers/irqchip/irq-gic-v3-its-pci-msi.c |  5 +++
  4 files changed, 85 insertions(+), 1 deletion(-)

 diff --git a/drivers/irqchip/irq-gic-common.c 
 b/drivers/irqchip/irq-gic-common.c
 index f174ce0..46cd06c 100644
 --- a/drivers/irqchip/irq-gic-common.c
 +++ b/drivers/irqchip/irq-gic-common.c
 @@ -18,6 +18,8 @@
  #include 
  #include 
  #include 
 +#include 
 +#include 
  
  #include "irq-gic-common.h"
  
 @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void 
 (*sync_access)(void))
if (sync_access)
sync_access();
  }
 +
 +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN)
 +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg)
 +{
 +  struct msi_desc *desc = irq_data_get_msi_desc(data);
 +  struct device *dev = msi_desc_to_dev(desc);
 +  struct iommu_domain *d;
 +  phys_addr_t addr;
 +  dma_addr_t iova;
 +  int ret;
 +
 +  d = iommu_get_domain_for_dev(dev);
 +  if (!d)
 +  return 0;
 +#ifdef CONFIG_PHYS_ADDR_T_64BIT
 +  addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo;
 +#else
 +  addr = msg->address_lo;
 +#endif
 +
 +  ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, );
 +
 +  if (!ret) {
 +  msg->address_lo = lower_32_bits(iova);
 +  msg->address_hi = upper_32_bits(iova);
 +  }
 +  return ret;
 +}
 +
 +
 +static void gic_unset_msi_addr(struct irq_data *data)
 +{
 +  struct msi_desc *desc = irq_data_get_msi_desc(data);
 +  struct device *dev;
 +  struct iommu_domain *d;
 +  dma_addr_t iova;
 +
 +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 +  iova = ((dma_addr_t)(desc->msg.address_hi) << 32) |
 +  desc->msg.address_lo;
 +#else
 +  iova = desc->msg.address_lo;
 +#endif
 +
 +  dev = msi_desc_to_dev(desc);
 +  if (!dev)
 +  return;
 +
 +  d = iommu_get_domain_for_dev(dev);
 +  if (!d)
 +  return;
 +
 +  iommu_put_single_reserved(d, iova);
 +}
 +
 +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
 +struct msi_msg *msg)
 +{
 +  if (!msg->address_hi && !msg->address_lo && !msg->data)
 +  gic_unset_msi_addr(irq_data); /* deactivate */
 +  else
 +  gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */
 +
 +  pci_msi_domain_write_msg(irq_data, msg);
 +}
>>>
>>> So by doing that, you are specializing this infrastructure to PCI.
>>> If you hijacked irq_compose_msi_msg() instead, you'd have both
>>> platform and PCI MSI for the same price.
>>>
>>> I can see a potential problem with the teardown of an MSI (I don't
>>> think the compose method is called on teardown), but I think this could
>>> be easily addressed.
>> Yes effectively this is the reason why I moved it from
>> irq_compose_msi_msg (my original implementation) to irq_write_msi_msg. I
>> noticed I had no way to detect the teardown whereas the
>> msi_domain_deactivate also calls irq_write_msi_msg which is quite
>> practical ;-) To be honest I need to further look at the non PCI
>> implementation.
> 
> Another thing to consider is that MSI controllers may use different
> doorbells for different CPU affinities.

OK thanks for pointing this out.

This is 

Re: [RFC v3 15/15] irqchip/gicv2m/v3-its-pci-msi: IOMMU map the MSI frame when needed

2016-02-18 Thread Eric Auger
Hi Marc,
On 02/18/2016 12:33 PM, Marc Zyngier wrote:
> On Fri, 12 Feb 2016 08:13:17 +
> Eric Auger  wrote:
> 
>> In case the msi_desc references a device attached to an iommu
>> domain, the msi address needs to be mapped in the IOMMU. Else any
>> MSI write transaction will cause a fault.
>>
>> gic_set_msi_addr detects that case and allocates an iova bound
>> to the physical address page comprising the MSI frame. This iova
>> then is used as the msi_msg address. Unset operation decrements the
>> reference on the binding.
>>
>> The functions are called in the irq_write_msi_msg ops implementation.
>> At that time we can recognize whether the msi is setup or teared down
>> looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all
>> the fields.
>>
>> Signed-off-by: Eric Auger 
>>
>> ---
>>
>> v2 -> v3:
>> - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and
>>   CONFIG_PHYS_ADDR_T_64BIT
>> - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API &
>>   CONFIG_PCI_MSI_IRQ_DOMAIN are set.
>> - gic_set/unset_msi_addr duly become static
>> ---
>>  drivers/irqchip/irq-gic-common.c | 69 
>> 
>>  drivers/irqchip/irq-gic-common.h |  5 +++
>>  drivers/irqchip/irq-gic-v2m.c|  7 +++-
>>  drivers/irqchip/irq-gic-v3-its-pci-msi.c |  5 +++
>>  4 files changed, 85 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/irqchip/irq-gic-common.c 
>> b/drivers/irqchip/irq-gic-common.c
>> index f174ce0..46cd06c 100644
>> --- a/drivers/irqchip/irq-gic-common.c
>> +++ b/drivers/irqchip/irq-gic-common.c
>> @@ -18,6 +18,8 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>> +#include 
>>  
>>  #include "irq-gic-common.h"
>>  
>> @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void 
>> (*sync_access)(void))
>>  if (sync_access)
>>  sync_access();
>>  }
>> +
>> +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN)
>> +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg)
>> +{
>> +struct msi_desc *desc = irq_data_get_msi_desc(data);
>> +struct device *dev = msi_desc_to_dev(desc);
>> +struct iommu_domain *d;
>> +phys_addr_t addr;
>> +dma_addr_t iova;
>> +int ret;
>> +
>> +d = iommu_get_domain_for_dev(dev);
>> +if (!d)
>> +return 0;
>> +#ifdef CONFIG_PHYS_ADDR_T_64BIT
>> +addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo;
>> +#else
>> +addr = msg->address_lo;
>> +#endif
>> +
>> +ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, );
>> +
>> +if (!ret) {
>> +msg->address_lo = lower_32_bits(iova);
>> +msg->address_hi = upper_32_bits(iova);
>> +}
>> +return ret;
>> +}
>> +
>> +
>> +static void gic_unset_msi_addr(struct irq_data *data)
>> +{
>> +struct msi_desc *desc = irq_data_get_msi_desc(data);
>> +struct device *dev;
>> +struct iommu_domain *d;
>> +dma_addr_t iova;
>> +
>> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
>> +iova = ((dma_addr_t)(desc->msg.address_hi) << 32) |
>> +desc->msg.address_lo;
>> +#else
>> +iova = desc->msg.address_lo;
>> +#endif
>> +
>> +dev = msi_desc_to_dev(desc);
>> +if (!dev)
>> +return;
>> +
>> +d = iommu_get_domain_for_dev(dev);
>> +if (!d)
>> +return;
>> +
>> +iommu_put_single_reserved(d, iova);
>> +}
>> +
>> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
>> +  struct msi_msg *msg)
>> +{
>> +if (!msg->address_hi && !msg->address_lo && !msg->data)
>> +gic_unset_msi_addr(irq_data); /* deactivate */
>> +else
>> +gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */
>> +
>> +pci_msi_domain_write_msg(irq_data, msg);
>> +}
> 
> So by doing that, you are specializing this infrastructure to PCI.
> If you hijacked irq_compose_msi_msg() instead, you'd have both
> platform and PCI MSI for the same price.
> 
> I can see a potential problem with the teardown of an MSI (I don't
> think the compose method is called on teardown), but I think this could
> be easily addressed.
Yes effectively this is the reason why I moved it from
irq_compose_msi_msg (my original implementation) to irq_write_msi_msg. I
noticed I had no way to detect the teardown whereas the
msi_domain_deactivate also calls irq_write_msi_msg which is quite
practical ;-) To be honest I need to further look at the non PCI
implementation.


> 
>> +#endif
>> +
>> diff --git a/drivers/irqchip/irq-gic-common.h 
>> b/drivers/irqchip/irq-gic-common.h
>> index fff697d..98681fd 100644
>> --- a/drivers/irqchip/irq-gic-common.h
>> +++ b/drivers/irqchip/irq-gic-common.h
>> @@ -35,4 +35,9 @@ void gic_cpu_config(void __iomem *base, void 
>> (*sync_access)(void));
>>  void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
>>  void *data);
>>  
>> +#if 

Re: [RFC v3 15/15] irqchip/gicv2m/v3-its-pci-msi: IOMMU map the MSI frame when needed

2016-02-18 Thread Marc Zyngier
On Fri, 12 Feb 2016 08:13:17 +
Eric Auger  wrote:

> In case the msi_desc references a device attached to an iommu
> domain, the msi address needs to be mapped in the IOMMU. Else any
> MSI write transaction will cause a fault.
> 
> gic_set_msi_addr detects that case and allocates an iova bound
> to the physical address page comprising the MSI frame. This iova
> then is used as the msi_msg address. Unset operation decrements the
> reference on the binding.
> 
> The functions are called in the irq_write_msi_msg ops implementation.
> At that time we can recognize whether the msi is setup or teared down
> looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all
> the fields.
> 
> Signed-off-by: Eric Auger 
> 
> ---
> 
> v2 -> v3:
> - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and
>   CONFIG_PHYS_ADDR_T_64BIT
> - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API &
>   CONFIG_PCI_MSI_IRQ_DOMAIN are set.
> - gic_set/unset_msi_addr duly become static
> ---
>  drivers/irqchip/irq-gic-common.c | 69 
> 
>  drivers/irqchip/irq-gic-common.h |  5 +++
>  drivers/irqchip/irq-gic-v2m.c|  7 +++-
>  drivers/irqchip/irq-gic-v3-its-pci-msi.c |  5 +++
>  4 files changed, 85 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/irqchip/irq-gic-common.c 
> b/drivers/irqchip/irq-gic-common.c
> index f174ce0..46cd06c 100644
> --- a/drivers/irqchip/irq-gic-common.c
> +++ b/drivers/irqchip/irq-gic-common.c
> @@ -18,6 +18,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  
>  #include "irq-gic-common.h"
>  
> @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void 
> (*sync_access)(void))
>   if (sync_access)
>   sync_access();
>  }
> +
> +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN)
> +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg)
> +{
> + struct msi_desc *desc = irq_data_get_msi_desc(data);
> + struct device *dev = msi_desc_to_dev(desc);
> + struct iommu_domain *d;
> + phys_addr_t addr;
> + dma_addr_t iova;
> + int ret;
> +
> + d = iommu_get_domain_for_dev(dev);
> + if (!d)
> + return 0;
> +#ifdef CONFIG_PHYS_ADDR_T_64BIT
> + addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo;
> +#else
> + addr = msg->address_lo;
> +#endif
> +
> + ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, );
> +
> + if (!ret) {
> + msg->address_lo = lower_32_bits(iova);
> + msg->address_hi = upper_32_bits(iova);
> + }
> + return ret;
> +}
> +
> +
> +static void gic_unset_msi_addr(struct irq_data *data)
> +{
> + struct msi_desc *desc = irq_data_get_msi_desc(data);
> + struct device *dev;
> + struct iommu_domain *d;
> + dma_addr_t iova;
> +
> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> + iova = ((dma_addr_t)(desc->msg.address_hi) << 32) |
> + desc->msg.address_lo;
> +#else
> + iova = desc->msg.address_lo;
> +#endif
> +
> + dev = msi_desc_to_dev(desc);
> + if (!dev)
> + return;
> +
> + d = iommu_get_domain_for_dev(dev);
> + if (!d)
> + return;
> +
> + iommu_put_single_reserved(d, iova);
> +}
> +
> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
> +   struct msi_msg *msg)
> +{
> + if (!msg->address_hi && !msg->address_lo && !msg->data)
> + gic_unset_msi_addr(irq_data); /* deactivate */
> + else
> + gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */
> +
> + pci_msi_domain_write_msg(irq_data, msg);
> +}

So by doing that, you are specializing this infrastructure to PCI.
If you hijacked irq_compose_msi_msg() instead, you'd have both
platform and PCI MSI for the same price.

I can see a potential problem with the teardown of an MSI (I don't
think the compose method is called on teardown), but I think this could
be easily addressed.

> +#endif
> +
> diff --git a/drivers/irqchip/irq-gic-common.h 
> b/drivers/irqchip/irq-gic-common.h
> index fff697d..98681fd 100644
> --- a/drivers/irqchip/irq-gic-common.h
> +++ b/drivers/irqchip/irq-gic-common.h
> @@ -35,4 +35,9 @@ void gic_cpu_config(void __iomem *base, void 
> (*sync_access)(void));
>  void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
>   void *data);
>  
> +#if defined(CONFIG_PCI_MSI_IRQ_DOMAIN) && defined(CONFIG_IOMMU_API)
> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
> +   struct msi_msg *msg);
> +#endif
> +
>  #endif /* _IRQ_GIC_COMMON_H */
> diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
> index c779f83..692d809 100644
> --- a/drivers/irqchip/irq-gic-v2m.c
> +++ b/drivers/irqchip/irq-gic-v2m.c
> @@ -24,6 +24,7 @@
>  #include 
>  #include 
>  #include 
> +#include "irq-gic-common.h"
>  
>