Re: [RFC v3 18/21] vfio-pci: Add a new VFIO_REGION_TYPE_NESTED region type

2019-01-11 Thread Alex Williamson
On Tue,  8 Jan 2019 11:26:30 +0100
Eric Auger  wrote:

> This patch adds a new 64kB region aiming to report nested mode
> translation faults.
> 
> The region contains a header with the size of the queue,
> the producer and consumer indices and then the actual
> fault queue data. The producer is updated by the kernel while
> the consumer is updated by the userspace.
> 
> Signed-off-by: Eric Auger 
> 
> ---
> ---
>  drivers/vfio/pci/vfio_pci.c | 102 +++-
>  drivers/vfio/pci/vfio_pci_private.h |   2 +
>  include/uapi/linux/vfio.h   |  15 
>  3 files changed, 118 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index ff60bd1ea587..2ba181ab2edd 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -56,6 +56,11 @@ module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR);
>  MODULE_PARM_DESC(disable_idle_d3,
>"Disable using the PCI D3 low power state for idle, unused 
> devices");
>  
> +#define VFIO_FAULT_REGION_SIZE 0x1

Why 64K?

> +#define VFIO_FAULT_QUEUE_SIZE\
> + ((VFIO_FAULT_REGION_SIZE - sizeof(struct vfio_fault_region_header)) / \
> + sizeof(struct iommu_fault))
> +
>  static inline bool vfio_vga_disabled(void)
>  {
>  #ifdef CONFIG_VFIO_PCI_VGA
> @@ -1226,6 +1231,100 @@ static const struct vfio_device_ops vfio_pci_ops = {
>  static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev);
>  static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck);
>  
> +static size_t
> +vfio_pci_dma_fault_rw(struct vfio_pci_device *vdev, char __user *buf,
> +   size_t count, loff_t *ppos, bool iswrite)
> +{
> + unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
> + void *base = vdev->region[i].data;
> + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
> +
> + if (pos >= vdev->region[i].size)
> + return -EINVAL;
> +
> + count = min(count, (size_t)(vdev->region[i].size - pos));
> +
> + if (copy_to_user(buf, base + pos, count))
> + return -EFAULT;
> +
> + *ppos += count;
> +
> + return count;
> +}
> +
> +static int vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev,
> +struct vfio_pci_region *region,
> +struct vm_area_struct *vma)
> +{
> + u64 phys_len, req_len, pgoff, req_start;
> + unsigned long long addr;
> + unsigned int index;
> +
> + index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
> +
> + if (vma->vm_end < vma->vm_start)
> + return -EINVAL;
> + if ((vma->vm_flags & VM_SHARED) == 0)
> + return -EINVAL;
> +
> + phys_len = VFIO_FAULT_REGION_SIZE;
> +
> + req_len = vma->vm_end - vma->vm_start;
> + pgoff = vma->vm_pgoff &
> + ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
> + req_start = pgoff << PAGE_SHIFT;
> +
> + if (req_start + req_len > phys_len)
> + return -EINVAL;
> +
> + addr = virt_to_phys(vdev->fault_region);
> + vma->vm_private_data = vdev;
> + vma->vm_pgoff = (addr >> PAGE_SHIFT) + pgoff;
> +
> + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
> +req_len, vma->vm_page_prot);
> +}
> +
> +void vfio_pci_dma_fault_release(struct vfio_pci_device *vdev,
> + struct vfio_pci_region *region)
> +{
> +}
> +
> +static const struct vfio_pci_regops vfio_pci_dma_fault_regops = {
> + .rw = vfio_pci_dma_fault_rw,
> + .mmap   = vfio_pci_dma_fault_mmap,
> + .release= vfio_pci_dma_fault_release,
> +};
> +
> +static int vfio_pci_init_dma_fault_region(struct vfio_pci_device *vdev)
> +{
> + u32 flags = VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE |
> + VFIO_REGION_INFO_FLAG_MMAP;
> + int ret;
> +
> + spin_lock_init(>fault_queue_lock);
> +
> + vdev->fault_region = kmalloc(VFIO_FAULT_REGION_SIZE, GFP_KERNEL);
> + if (!vdev->fault_region)
> + return -ENOMEM;
> +
> + ret = vfio_pci_register_dev_region(vdev,
> + VFIO_REGION_TYPE_NESTED,
> + VFIO_REGION_SUBTYPE_NESTED_FAULT_REGION,
> + _pci_dma_fault_regops, VFIO_FAULT_REGION_SIZE,
> + flags, vdev->fault_region);
> + if (ret) {
> + kfree(vdev->fault_region);
> + return ret;
> + }
> +
> + vdev->fault_region->header.prod = 0;
> + vdev->fault_region->header.cons = 0;
> + vdev->fault_region->header.reserved = 0;

Use kzalloc above or else we're leaking kernel memory to userspace
anyway.

> + vdev->fault_region->header.size = VFIO_FAULT_QUEUE_SIZE;
> + return 0;
> +}
> +
>  static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id 
> *id)
>  {
>   struct vfio_pci_device *vdev;
> @@ -1300,7 +1399,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const 
> 

Re: [RFC v3 06/21] vfio: VFIO_IOMMU_BIND_MSI

2019-01-11 Thread Alex Williamson
On Fri, 11 Jan 2019 16:02:44 -0700
Alex Williamson  wrote:

> On Tue,  8 Jan 2019 11:26:18 +0100
> Eric Auger  wrote:
> 
> > This patch adds the VFIO_IOMMU_BIND_MSI ioctl which aims at
> > passing the guest MSI binding to the host.
> > 
> > Signed-off-by: Eric Auger 
> > 
> > ---
> > 
> > v2 -> v3:
> > - adapt to new proto of bind_guest_msi
> > - directly use vfio_iommu_for_each_dev
> > 
> > v1 -> v2:
> > - s/vfio_iommu_type1_guest_msi_binding/vfio_iommu_type1_bind_guest_msi
> > ---
> >  drivers/vfio/vfio_iommu_type1.c | 27 +++
> >  include/uapi/linux/vfio.h   |  7 +++
> >  2 files changed, 34 insertions(+)
> > 
> > diff --git a/drivers/vfio/vfio_iommu_type1.c 
> > b/drivers/vfio/vfio_iommu_type1.c
> > index c3ba3f249438..59229f6e2d84 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -1673,6 +1673,15 @@ static int vfio_cache_inv_fn(struct device *dev, 
> > void *data)
> > return iommu_cache_invalidate(d, dev, >info);
> >  }
> >  
> > +static int vfio_bind_guest_msi_fn(struct device *dev, void *data)
> > +{
> > +   struct vfio_iommu_type1_bind_guest_msi *ustruct =
> > +   (struct vfio_iommu_type1_bind_guest_msi *)data;
> > +   struct iommu_domain *d = iommu_get_domain_for_dev(dev);
> > +
> > +   return iommu_bind_guest_msi(d, dev, >binding);
> > +}
> > +
> >  static int
> >  vfio_set_pasid_table(struct vfio_iommu *iommu,
> >   struct vfio_iommu_type1_set_pasid_table *ustruct)
> > @@ -1792,6 +1801,24 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
> >   vfio_cache_inv_fn);
> > mutex_unlock(>lock);
> > return ret;
> > +   } else if (cmd == VFIO_IOMMU_BIND_MSI) {
> > +   struct vfio_iommu_type1_bind_guest_msi ustruct;
> > +   int ret;
> > +
> > +   minsz = offsetofend(struct vfio_iommu_type1_bind_guest_msi,
> > +   binding);
> > +
> > +   if (copy_from_user(, (void __user *)arg, minsz))
> > +   return -EFAULT;
> > +
> > +   if (ustruct.argsz < minsz || ustruct.flags)
> > +   return -EINVAL;
> > +
> > +   mutex_lock(>lock);
> > +   ret = vfio_iommu_for_each_dev(iommu, ,
> > + vfio_bind_guest_msi_fn);  
> 
> The vfio_iommu_for_each_dev() interface is fine for invalidation, where
> a partial failure requires no unwind, but it's not sufficiently robust
> here.

Additionally, what happens as devices are added and removed from the
guest?  Are we designing an interface that specifically precludes
hotplug?  Thanks,

Alex
 
> > +   mutex_unlock(>lock);
> > +   return ret;
> > }
> >  
> > return -ENOTTY;
> > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> > index 11a07165e7e1..352e795a93c8 100644
> > --- a/include/uapi/linux/vfio.h
> > +++ b/include/uapi/linux/vfio.h
> > @@ -774,6 +774,13 @@ struct vfio_iommu_type1_cache_invalidate {
> >  };
> >  #define VFIO_IOMMU_CACHE_INVALIDATE  _IO(VFIO_TYPE, VFIO_BASE + 23)
> >  
> > +struct vfio_iommu_type1_bind_guest_msi {
> > +   __u32   argsz;
> > +   __u32   flags;
> > +   struct iommu_guest_msi_binding binding;
> > +};
> > +#define VFIO_IOMMU_BIND_MSI  _IO(VFIO_TYPE, VFIO_BASE + 24)  
> 
> -ENOCOMMENTS  MSIs are setup and torn down, is this only a machine init
> sort of interface?  How does the user un-bind?  Thanks,
> 
> Alex
> 
> > +
> >  /*  Additional API for SPAPR TCE (Server POWERPC) IOMMU  */
> >  
> >  /*  
> 

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC v3 06/21] vfio: VFIO_IOMMU_BIND_MSI

2019-01-11 Thread Alex Williamson
On Tue,  8 Jan 2019 11:26:18 +0100
Eric Auger  wrote:

> This patch adds the VFIO_IOMMU_BIND_MSI ioctl which aims at
> passing the guest MSI binding to the host.
> 
> Signed-off-by: Eric Auger 
> 
> ---
> 
> v2 -> v3:
> - adapt to new proto of bind_guest_msi
> - directly use vfio_iommu_for_each_dev
> 
> v1 -> v2:
> - s/vfio_iommu_type1_guest_msi_binding/vfio_iommu_type1_bind_guest_msi
> ---
>  drivers/vfio/vfio_iommu_type1.c | 27 +++
>  include/uapi/linux/vfio.h   |  7 +++
>  2 files changed, 34 insertions(+)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index c3ba3f249438..59229f6e2d84 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -1673,6 +1673,15 @@ static int vfio_cache_inv_fn(struct device *dev, void 
> *data)
>   return iommu_cache_invalidate(d, dev, >info);
>  }
>  
> +static int vfio_bind_guest_msi_fn(struct device *dev, void *data)
> +{
> + struct vfio_iommu_type1_bind_guest_msi *ustruct =
> + (struct vfio_iommu_type1_bind_guest_msi *)data;
> + struct iommu_domain *d = iommu_get_domain_for_dev(dev);
> +
> + return iommu_bind_guest_msi(d, dev, >binding);
> +}
> +
>  static int
>  vfio_set_pasid_table(struct vfio_iommu *iommu,
> struct vfio_iommu_type1_set_pasid_table *ustruct)
> @@ -1792,6 +1801,24 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
> vfio_cache_inv_fn);
>   mutex_unlock(>lock);
>   return ret;
> + } else if (cmd == VFIO_IOMMU_BIND_MSI) {
> + struct vfio_iommu_type1_bind_guest_msi ustruct;
> + int ret;
> +
> + minsz = offsetofend(struct vfio_iommu_type1_bind_guest_msi,
> + binding);
> +
> + if (copy_from_user(, (void __user *)arg, minsz))
> + return -EFAULT;
> +
> + if (ustruct.argsz < minsz || ustruct.flags)
> + return -EINVAL;
> +
> + mutex_lock(>lock);
> + ret = vfio_iommu_for_each_dev(iommu, ,
> +   vfio_bind_guest_msi_fn);

The vfio_iommu_for_each_dev() interface is fine for invalidation, where
a partial failure requires no unwind, but it's not sufficiently robust
here.

> + mutex_unlock(>lock);
> + return ret;
>   }
>  
>   return -ENOTTY;
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 11a07165e7e1..352e795a93c8 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -774,6 +774,13 @@ struct vfio_iommu_type1_cache_invalidate {
>  };
>  #define VFIO_IOMMU_CACHE_INVALIDATE  _IO(VFIO_TYPE, VFIO_BASE + 23)
>  
> +struct vfio_iommu_type1_bind_guest_msi {
> + __u32   argsz;
> + __u32   flags;
> + struct iommu_guest_msi_binding binding;
> +};
> +#define VFIO_IOMMU_BIND_MSI  _IO(VFIO_TYPE, VFIO_BASE + 24)

-ENOCOMMENTS  MSIs are setup and torn down, is this only a machine init
sort of interface?  How does the user un-bind?  Thanks,

Alex

> +
>  /*  Additional API for SPAPR TCE (Server POWERPC) IOMMU  */
>  
>  /*

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC v3 04/21] vfio: VFIO_IOMMU_SET_PASID_TABLE

2019-01-11 Thread Alex Williamson
On Tue,  8 Jan 2019 11:26:16 +0100
Eric Auger  wrote:

> From: "Liu, Yi L" 
> 
> This patch adds VFIO_IOMMU_SET_PASID_TABLE ioctl which aims at
> passing the virtual iommu guest configuration to the VFIO driver
> downto to the iommu subsystem.
> 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Liu, Yi L 
> Signed-off-by: Eric Auger 
> 
> ---
> v2 -> v3:
> - s/BIND_PASID_TABLE/SET_PASID_TABLE
> 
> v1 -> v2:
> - s/BIND_GUEST_STAGE/BIND_PASID_TABLE
> - remove the struct device arg
> ---
>  drivers/vfio/vfio_iommu_type1.c | 31 +++
>  include/uapi/linux/vfio.h   |  8 
>  2 files changed, 39 insertions(+)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 7651cfb14836..d9dd23f64f00 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -1644,6 +1644,24 @@ static int vfio_domains_have_iommu_cache(struct 
> vfio_iommu *iommu)
>   return ret;
>  }
>  
> +static int
> +vfio_set_pasid_table(struct vfio_iommu *iommu,
> +   struct vfio_iommu_type1_set_pasid_table *ustruct)
> +{
> + struct vfio_domain *d;
> + int ret = 0;
> +
> + mutex_lock(>lock);
> +
> + list_for_each_entry(d, >domain_list, next) {
> + ret = iommu_set_pasid_table(d->domain, >config);
> + if (ret)
> + break;
> + }

There's no unwind on failure here, leaves us in an inconsistent state
should something go wrong or domains don't have homogeneous PASID
support.  What's expected to happen if a PASID table is already set for
a domain, does it replace the old one or return -EBUSY?

> + mutex_unlock(>lock);
> + return ret;
> +}
> +
>  static long vfio_iommu_type1_ioctl(void *iommu_data,
>  unsigned int cmd, unsigned long arg)
>  {
> @@ -1714,6 +1732,19 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
>  
>   return copy_to_user((void __user *)arg, , minsz) ?
>   -EFAULT : 0;
> + } else if (cmd == VFIO_IOMMU_SET_PASID_TABLE) {
> + struct vfio_iommu_type1_set_pasid_table ustruct;
> +
> + minsz = offsetofend(struct vfio_iommu_type1_set_pasid_table,
> + config);
> +
> + if (copy_from_user(, (void __user *)arg, minsz))
> + return -EFAULT;
> +
> + if (ustruct.argsz < minsz || ustruct.flags)
> + return -EINVAL;
> +
> + return vfio_set_pasid_table(iommu, );
>   }
>  
>   return -ENOTTY;
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 02bb7ad6e986..0d9f4090c95d 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -14,6 +14,7 @@
>  
>  #include 
>  #include 
> +#include 
>  
>  #define VFIO_API_VERSION 0
>  
> @@ -759,6 +760,13 @@ struct vfio_iommu_type1_dma_unmap {
>  #define VFIO_IOMMU_ENABLE_IO(VFIO_TYPE, VFIO_BASE + 15)
>  #define VFIO_IOMMU_DISABLE   _IO(VFIO_TYPE, VFIO_BASE + 16)
>  
> +struct vfio_iommu_type1_set_pasid_table {
> + __u32   argsz;
> + __u32   flags;
> + struct iommu_pasid_table_config config;
> +};
> +#define VFIO_IOMMU_SET_PASID_TABLE   _IO(VFIO_TYPE, VFIO_BASE + 22)

-ENOCOMMENTS  Thanks,

Alex

> +
>  /*  Additional API for SPAPR TCE (Server POWERPC) IOMMU  */
>  
>  /*

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC v3 03/21] iommu: Introduce bind_guest_msi

2019-01-11 Thread Alex Williamson
On Tue,  8 Jan 2019 11:26:15 +0100
Eric Auger  wrote:

> On ARM, MSI are translated by the SMMU. An IOVA is allocated
> for each MSI doorbell. If both the host and the guest are exposed
> with SMMUs, we end up with 2 different IOVAs allocated by each.
> guest allocates an IOVA (gIOVA) to map onto the guest MSI
> doorbell (gDB). The Host allocates another IOVA (hIOVA) to map
> onto the physical doorbell (hDB).
> 
> So we end up with 2 untied mappings:
>  S1S2
> gIOVA->gDB
>   hIOVA->gDB
   ^^^ hDB

> Currently the PCI device is programmed by the host with hIOVA
> as MSI doorbell. So this does not work.
> 
> This patch introduces an API to pass gIOVA/gDB to the host so
> that gIOVA can be reused by the host instead of re-allocating
> a new IOVA. So the goal is to create the following nested mapping:
> 
>  S1S2
> gIOVA->gDB ->hDB
> 
> and program the PCI device with gIOVA MSI doorbell.
> 
> Signed-off-by: Eric Auger 
> 
> ---
> 
> v2 -> v3:
> - add a struct device handle
> ---
>  drivers/iommu/iommu.c  | 10 ++
>  include/linux/iommu.h  | 13 +
>  include/uapi/linux/iommu.h |  6 ++
>  3 files changed, 29 insertions(+)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index b2e248770508..ea11442e7054 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1431,6 +1431,16 @@ static void __iommu_detach_device(struct iommu_domain 
> *domain,
>   trace_detach_device_from_domain(dev);
>  }
>  
> +int iommu_bind_guest_msi(struct iommu_domain *domain, struct device *dev,
> +  struct iommu_guest_msi_binding *binding)
> +{
> + if (unlikely(!domain->ops->bind_guest_msi))
> + return -ENODEV;
> +
> + return domain->ops->bind_guest_msi(domain, dev, binding);
> +}
> +EXPORT_SYMBOL_GPL(iommu_bind_guest_msi);
> +
>  void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
>  {
>   struct iommu_group *group;
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 96d59886f230..244c1a3d5989 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -235,6 +235,9 @@ struct iommu_ops {
>   int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev,
>   struct iommu_cache_invalidate_info *inv_info);
>  
> + int (*bind_guest_msi)(struct iommu_domain *domain, struct device *dev,
> +   struct iommu_guest_msi_binding *binding);
> +
>   unsigned long pgsize_bitmap;
>  };
>  
> @@ -301,6 +304,9 @@ extern int iommu_set_pasid_table(struct iommu_domain 
> *domain,
>  extern int iommu_cache_invalidate(struct iommu_domain *domain,
>   struct device *dev,
>   struct iommu_cache_invalidate_info *inv_info);
> +extern int iommu_bind_guest_msi(struct iommu_domain *domain, struct device 
> *dev,
> + struct iommu_guest_msi_binding *binding);
> +
>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>  extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
>  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
> @@ -724,6 +730,13 @@ iommu_cache_invalidate(struct iommu_domain *domain,
>   return -ENODEV;
>  }
>  
> +static inline
> +int iommu_bind_guest_msi(struct iommu_domain *domain, struct device *dev,
> +  struct iommu_guest_msi_binding *binding)
> +{
> + return -ENODEV;
> +}
> +
>  #endif /* CONFIG_IOMMU_API */
>  
>  #ifdef CONFIG_IOMMU_DEBUGFS
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> index 4605f5cfac84..f28cd9a1aa96 100644
> --- a/include/uapi/linux/iommu.h
> +++ b/include/uapi/linux/iommu.h
> @@ -142,4 +142,10 @@ struct iommu_cache_invalidate_info {
>   __u64   arch_id;
>   __u64   addr;
>  };
> +
> +struct iommu_guest_msi_binding {
> + __u64   iova;
> + __u64   gpa;
> + __u32   granule;

What's granule?  The size?  This looks a lot like just a stage 1
mapping interface, I can't really figure out from the description how
this matches to any specific MSI mapping.  Zero comments in the code
or headers here about how this is supposed to work.  Thanks,

Alex
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC v3 02/21] iommu: Introduce cache_invalidate API

2019-01-11 Thread Alex Williamson
On Tue,  8 Jan 2019 11:26:14 +0100
Eric Auger  wrote:

> From: "Liu, Yi L" 
> 
> In any virtualization use case, when the first translation stage
> is "owned" by the guest OS, the host IOMMU driver has no knowledge
> of caching structure updates unless the guest invalidation activities
> are trapped by the virtualizer and passed down to the host.
> 
> Since the invalidation data are obtained from user space and will be
> written into physical IOMMU, we must allow security check at various
> layers. Therefore, generic invalidation data format are proposed here,
> model specific IOMMU drivers need to convert them into their own format.
> 
> Signed-off-by: Liu, Yi L 
> Signed-off-by: Jean-Philippe Brucker 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Ashok Raj 
> Signed-off-by: Eric Auger 
> 
> ---
> v1 -> v2:
> - add arch_id field
> - renamed tlb_invalidate into cache_invalidate as this API allows
>   to invalidate context caches on top of IOTLBs
> 
> v1:
> renamed sva_invalidate into tlb_invalidate and add iommu_ prefix in
> header. Commit message reworded.
> ---
>  drivers/iommu/iommu.c  | 14 ++
>  include/linux/iommu.h  | 14 ++
>  include/uapi/linux/iommu.h | 95 ++
>  3 files changed, 123 insertions(+)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 0f2b7f1fc7c8..b2e248770508 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1403,6 +1403,20 @@ int iommu_set_pasid_table(struct iommu_domain *domain,
>  }
>  EXPORT_SYMBOL_GPL(iommu_set_pasid_table);
>  
> +int iommu_cache_invalidate(struct iommu_domain *domain, struct device *dev,
> +struct iommu_cache_invalidate_info *inv_info)
> +{
> + int ret = 0;
> +
> + if (unlikely(!domain->ops->cache_invalidate))
> + return -ENODEV;
> +
> + ret = domain->ops->cache_invalidate(domain, dev, inv_info);
> +
> + return ret;
> +}
> +EXPORT_SYMBOL_GPL(iommu_cache_invalidate);
> +
>  static void __iommu_detach_device(struct iommu_domain *domain,
> struct device *dev)
>  {
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 1da2a2357ea4..96d59886f230 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -186,6 +186,7 @@ struct iommu_resv_region {
>   * @of_xlate: add OF master IDs to iommu grouping
>   * @pgsize_bitmap: bitmap of all possible supported page sizes
>   * @set_pasid_table: set pasid table
> + * @cache_invalidate: invalidate translation caches
>   */
>  struct iommu_ops {
>   bool (*capable)(enum iommu_cap);
> @@ -231,6 +232,9 @@ struct iommu_ops {
>   int (*set_pasid_table)(struct iommu_domain *domain,
>  struct iommu_pasid_table_config *cfg);
>  
> + int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev,
> + struct iommu_cache_invalidate_info *inv_info);
> +
>   unsigned long pgsize_bitmap;
>  };
>  
> @@ -294,6 +298,9 @@ extern void iommu_detach_device(struct iommu_domain 
> *domain,
>   struct device *dev);
>  extern int iommu_set_pasid_table(struct iommu_domain *domain,
>struct iommu_pasid_table_config *cfg);
> +extern int iommu_cache_invalidate(struct iommu_domain *domain,
> + struct device *dev,
> + struct iommu_cache_invalidate_info *inv_info);
>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>  extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
>  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
> @@ -709,6 +716,13 @@ int iommu_set_pasid_table(struct iommu_domain *domain,
>  {
>   return -ENODEV;
>  }
> +static inline int
> +iommu_cache_invalidate(struct iommu_domain *domain,
> +struct device *dev,
> +struct iommu_cache_invalidate_info *inv_info)
> +{
> + return -ENODEV;
> +}
>  
>  #endif /* CONFIG_IOMMU_API */
>  
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> index 7a7cf7a3de7c..4605f5cfac84 100644
> --- a/include/uapi/linux/iommu.h
> +++ b/include/uapi/linux/iommu.h
> @@ -47,4 +47,99 @@ struct iommu_pasid_table_config {
>   };
>  };
>  
> +/**
> + * enum iommu_inv_granularity - Generic invalidation granularity
> + * @IOMMU_INV_GRANU_DOMAIN_ALL_PASID:TLB entries or PASID caches of 
> all
> + *   PASIDs associated with a domain ID
> + * @IOMMU_INV_GRANU_PASID_SEL:   TLB entries or PASID cache 
> associated
> + *   with a PASID and a domain
> + * @IOMMU_INV_GRANU_PAGE_PASID:  TLB entries of selected page 
> range
> + *   within a PASID
> + *
> + * When an invalidation request is passed down to IOMMU to flush translation
> + * caches, it may carry different granularity 

Re: [PATCH 3/3] videobuf2: replace a layering violation with dma_map_resource

2019-01-11 Thread Mauro Carvalho Chehab
Em Fri, 11 Jan 2019 19:17:31 +0100
Christoph Hellwig  escreveu:

> vb2_dc_get_userptr pokes into arm direct mapping details to get the
> resemblance of a dma address for a a physical address that does is
> not backed by a page struct.  Not only is this not portable to other
> architectures with dma direct mapping offsets, but also not to uses
> of IOMMUs of any kind.  Switch to the proper dma_map_resource /
> dma_unmap_resource interface instead.

Makes sense to me. I'm assuming that you'll be pushing it together
with other mm patches, so:

Acked-by: Mauro Carvalho Chehab 

> 
> Signed-off-by: Christoph Hellwig 
> ---
>  .../common/videobuf2/videobuf2-dma-contig.c   | 41 ---
>  1 file changed, 9 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c 
> b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
> index aff0ab7bf83d..82389aead6ed 100644
> --- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c
> +++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
> @@ -439,42 +439,14 @@ static void vb2_dc_put_userptr(void *buf_priv)
>   set_page_dirty_lock(pages[i]);
>   sg_free_table(sgt);
>   kfree(sgt);
> + } else {
> + dma_unmap_resource(buf->dev, buf->dma_addr, buf->size,
> +buf->dma_dir, 0);
>   }
>   vb2_destroy_framevec(buf->vec);
>   kfree(buf);
>  }
>  
> -/*
> - * For some kind of reserved memory there might be no struct page available,
> - * so all that can be done to support such 'pages' is to try to convert
> - * pfn to dma address or at the last resort just assume that
> - * dma address == physical address (like it has been assumed in earlier 
> version
> - * of videobuf2-dma-contig
> - */
> -
> -#ifdef __arch_pfn_to_dma
> -static inline dma_addr_t vb2_dc_pfn_to_dma(struct device *dev, unsigned long 
> pfn)
> -{
> - return (dma_addr_t)__arch_pfn_to_dma(dev, pfn);
> -}
> -#elif defined(__pfn_to_bus)
> -static inline dma_addr_t vb2_dc_pfn_to_dma(struct device *dev, unsigned long 
> pfn)
> -{
> - return (dma_addr_t)__pfn_to_bus(pfn);
> -}
> -#elif defined(__pfn_to_phys)
> -static inline dma_addr_t vb2_dc_pfn_to_dma(struct device *dev, unsigned long 
> pfn)
> -{
> - return (dma_addr_t)__pfn_to_phys(pfn);
> -}
> -#else
> -static inline dma_addr_t vb2_dc_pfn_to_dma(struct device *dev, unsigned long 
> pfn)
> -{
> - /* really, we cannot do anything better at this point */
> - return (dma_addr_t)(pfn) << PAGE_SHIFT;
> -}
> -#endif
> -
>  static void *vb2_dc_get_userptr(struct device *dev, unsigned long vaddr,
>   unsigned long size, enum dma_data_direction dma_dir)
>  {
> @@ -528,7 +500,12 @@ static void *vb2_dc_get_userptr(struct device *dev, 
> unsigned long vaddr,
>   for (i = 1; i < n_pages; i++)
>   if (nums[i-1] + 1 != nums[i])
>   goto fail_pfnvec;
> - buf->dma_addr = vb2_dc_pfn_to_dma(buf->dev, nums[0]);
> + buf->dma_addr = dma_map_resource(buf->dev,
> + __pfn_to_phys(nums[0]), size, buf->dma_dir, 0);
> + if (dma_mapping_error(buf->dev, buf->dma_addr)) {
> + ret = -ENOMEM;
> + goto fail_pfnvec;
> + }
>   goto out;
>   }
>  



Thanks,
Mauro
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC v3 01/21] iommu: Introduce set_pasid_table API

2019-01-11 Thread Alex Williamson
On Tue,  8 Jan 2019 11:26:13 +0100
Eric Auger  wrote:

> From: Jacob Pan 
> 
> In virtualization use case, when a guest is assigned
> a PCI host device, protected by a virtual IOMMU on a guest,
> the physical IOMMU must be programmed to be consistent with
> the guest mappings. If the physical IOMMU supports two
> translation stages it makes sense to program guest mappings
> onto the first stage/level (ARM/VTD terminology) while to host
> owns the stage/level 2.
> 
> In that case, it is mandated to trap on guest configuration
> settings and pass those to the physical iommu driver.
> 
> This patch adds a new API to the iommu subsystem that allows
> to set the pasid table information.
> 
> A generic iommu_pasid_table_config struct is introduced in
> a new iommu.h uapi header. This is going to be used by the VFIO
> user API. We foresee at least two specializations of this struct,
> for PASID table passing and ARM SMMUv3.
> 
> Signed-off-by: Jean-Philippe Brucker 
> Signed-off-by: Liu, Yi L 
> Signed-off-by: Ashok Raj 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Eric Auger 
> 
> ---
> 
> This patch generalizes the API introduced by Jacob & co-authors in
> https://lwn.net/Articles/754331/
> 
> v2 -> v3:
> - replace unbind/bind by set_pasid_table
> - move table pointer and pasid bits in the generic part of the struct
> 
> v1 -> v2:
> - restore the original pasid table name
> - remove the struct device * parameter in the API
> - reworked iommu_pasid_smmuv3
> ---
>  drivers/iommu/iommu.c  | 10 
>  include/linux/iommu.h  | 14 +++
>  include/uapi/linux/iommu.h | 50 ++
>  3 files changed, 74 insertions(+)
>  create mode 100644 include/uapi/linux/iommu.h
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 3ed4db334341..0f2b7f1fc7c8 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1393,6 +1393,16 @@ int iommu_attach_device(struct iommu_domain *domain, 
> struct device *dev)
>  }
>  EXPORT_SYMBOL_GPL(iommu_attach_device);
>  
> +int iommu_set_pasid_table(struct iommu_domain *domain,
> +   struct iommu_pasid_table_config *cfg)
> +{
> + if (unlikely(!domain->ops->set_pasid_table))
> + return -ENODEV;
> +
> + return domain->ops->set_pasid_table(domain, cfg);
> +}
> +EXPORT_SYMBOL_GPL(iommu_set_pasid_table);
> +
>  static void __iommu_detach_device(struct iommu_domain *domain,
> struct device *dev)
>  {
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index e90da6b6f3d1..1da2a2357ea4 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -25,6 +25,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define IOMMU_READ   (1 << 0)
>  #define IOMMU_WRITE  (1 << 1)
> @@ -184,6 +185,7 @@ struct iommu_resv_region {
>   * @domain_window_disable: Disable a particular window for a domain
>   * @of_xlate: add OF master IDs to iommu grouping
>   * @pgsize_bitmap: bitmap of all possible supported page sizes
> + * @set_pasid_table: set pasid table
>   */
>  struct iommu_ops {
>   bool (*capable)(enum iommu_cap);
> @@ -226,6 +228,9 @@ struct iommu_ops {
>   int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
>   bool (*is_attach_deferred)(struct iommu_domain *domain, struct device 
> *dev);
>  
> + int (*set_pasid_table)(struct iommu_domain *domain,
> +struct iommu_pasid_table_config *cfg);
> +
>   unsigned long pgsize_bitmap;
>  };
>  
> @@ -287,6 +292,8 @@ extern int iommu_attach_device(struct iommu_domain 
> *domain,
>  struct device *dev);
>  extern void iommu_detach_device(struct iommu_domain *domain,
>   struct device *dev);
> +extern int iommu_set_pasid_table(struct iommu_domain *domain,
> +  struct iommu_pasid_table_config *cfg);
>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>  extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
>  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
> @@ -696,6 +703,13 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct 
> fwnode_handle *fwnode)
>   return NULL;
>  }
>  
> +static inline
> +int iommu_set_pasid_table(struct iommu_domain *domain,
> +   struct iommu_pasid_table_config *cfg)
> +{
> + return -ENODEV;
> +}
> +
>  #endif /* CONFIG_IOMMU_API */
>  
>  #ifdef CONFIG_IOMMU_DEBUGFS
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> new file mode 100644
> index ..7a7cf7a3de7c
> --- /dev/null
> +++ b/include/uapi/linux/iommu.h
> @@ -0,0 +1,50 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/*
> + * IOMMU user API definitions
> + *
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 

[PATCH 2/3] dma-mapping: don't BUG when calling dma_map_resource on RAM

2019-01-11 Thread Christoph Hellwig
Use WARN_ON_ONCE to print a stack trace and return a proper error
code instead.

Signed-off-by: Christoph Hellwig 
---
 include/linux/dma-mapping.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index d3087829a6df..91add0751aa5 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -353,7 +353,8 @@ static inline dma_addr_t dma_map_resource(struct device 
*dev,
BUG_ON(!valid_dma_direction(dir));
 
/* Don't allow RAM to be mapped */
-   BUG_ON(pfn_valid(PHYS_PFN(phys_addr)));
+   if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr
+   return DMA_MAPPING_ERROR;
 
if (dma_is_direct(ops))
addr = dma_direct_map_resource(dev, phys_addr, size, dir, 
attrs);
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


fix a layering violation in videobuf2 and improve dma_map_resource

2019-01-11 Thread Christoph Hellwig
Hi all,

this series fixes a rather gross layering violation in videobuf2, which
pokes into arm DMA mapping internals to get a DMA address for memory that
does not have a page structure, and to do so fixes up the dma_map_resource
implementation to be practically useful.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/3] videobuf2: replace a layering violation with dma_map_resource

2019-01-11 Thread Christoph Hellwig
vb2_dc_get_userptr pokes into arm direct mapping details to get the
resemblance of a dma address for a a physical address that does is
not backed by a page struct.  Not only is this not portable to other
architectures with dma direct mapping offsets, but also not to uses
of IOMMUs of any kind.  Switch to the proper dma_map_resource /
dma_unmap_resource interface instead.

Signed-off-by: Christoph Hellwig 
---
 .../common/videobuf2/videobuf2-dma-contig.c   | 41 ---
 1 file changed, 9 insertions(+), 32 deletions(-)

diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c 
b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
index aff0ab7bf83d..82389aead6ed 100644
--- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c
+++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
@@ -439,42 +439,14 @@ static void vb2_dc_put_userptr(void *buf_priv)
set_page_dirty_lock(pages[i]);
sg_free_table(sgt);
kfree(sgt);
+   } else {
+   dma_unmap_resource(buf->dev, buf->dma_addr, buf->size,
+  buf->dma_dir, 0);
}
vb2_destroy_framevec(buf->vec);
kfree(buf);
 }
 
-/*
- * For some kind of reserved memory there might be no struct page available,
- * so all that can be done to support such 'pages' is to try to convert
- * pfn to dma address or at the last resort just assume that
- * dma address == physical address (like it has been assumed in earlier version
- * of videobuf2-dma-contig
- */
-
-#ifdef __arch_pfn_to_dma
-static inline dma_addr_t vb2_dc_pfn_to_dma(struct device *dev, unsigned long 
pfn)
-{
-   return (dma_addr_t)__arch_pfn_to_dma(dev, pfn);
-}
-#elif defined(__pfn_to_bus)
-static inline dma_addr_t vb2_dc_pfn_to_dma(struct device *dev, unsigned long 
pfn)
-{
-   return (dma_addr_t)__pfn_to_bus(pfn);
-}
-#elif defined(__pfn_to_phys)
-static inline dma_addr_t vb2_dc_pfn_to_dma(struct device *dev, unsigned long 
pfn)
-{
-   return (dma_addr_t)__pfn_to_phys(pfn);
-}
-#else
-static inline dma_addr_t vb2_dc_pfn_to_dma(struct device *dev, unsigned long 
pfn)
-{
-   /* really, we cannot do anything better at this point */
-   return (dma_addr_t)(pfn) << PAGE_SHIFT;
-}
-#endif
-
 static void *vb2_dc_get_userptr(struct device *dev, unsigned long vaddr,
unsigned long size, enum dma_data_direction dma_dir)
 {
@@ -528,7 +500,12 @@ static void *vb2_dc_get_userptr(struct device *dev, 
unsigned long vaddr,
for (i = 1; i < n_pages; i++)
if (nums[i-1] + 1 != nums[i])
goto fail_pfnvec;
-   buf->dma_addr = vb2_dc_pfn_to_dma(buf->dev, nums[0]);
+   buf->dma_addr = dma_map_resource(buf->dev,
+   __pfn_to_phys(nums[0]), size, buf->dma_dir, 0);
+   if (dma_mapping_error(buf->dev, buf->dma_addr)) {
+   ret = -ENOMEM;
+   goto fail_pfnvec;
+   }
goto out;
}
 
-- 
2.20.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC v3 01/21] iommu: Introduce set_pasid_table API

2019-01-11 Thread Jean-Philippe Brucker
On 08/01/2019 10:26, Eric Auger wrote:
> From: Jacob Pan 
> 
> In virtualization use case, when a guest is assigned
> a PCI host device, protected by a virtual IOMMU on a guest,
> the physical IOMMU must be programmed to be consistent with
> the guest mappings. If the physical IOMMU supports two
> translation stages it makes sense to program guest mappings
> onto the first stage/level (ARM/VTD terminology) while to host
> owns the stage/level 2.
> 
> In that case, it is mandated to trap on guest configuration
> settings and pass those to the physical iommu driver.
> 
> This patch adds a new API to the iommu subsystem that allows
> to set the pasid table information.
> 
> A generic iommu_pasid_table_config struct is introduced in
> a new iommu.h uapi header. This is going to be used by the VFIO
> user API. We foresee at least two specializations of this struct,
> for PASID table passing and ARM SMMUv3.

Last sentence is a bit confusing. With SMMUv3 it is also used for the
PASID table, even when it only has one entry and PASID is disabled.

> Signed-off-by: Jean-Philippe Brucker 
> Signed-off-by: Liu, Yi L 
> Signed-off-by: Ashok Raj 
> Signed-off-by: Jacob Pan 
> Signed-off-by: Eric Auger 
> 
> ---
> 
> This patch generalizes the API introduced by Jacob & co-authors in
> https://lwn.net/Articles/754331/
> 
> v2 -> v3:
> - replace unbind/bind by set_pasid_table
> - move table pointer and pasid bits in the generic part of the struct
> 
> v1 -> v2:
> - restore the original pasid table name
> - remove the struct device * parameter in the API
> - reworked iommu_pasid_smmuv3
> ---
>  drivers/iommu/iommu.c  | 10 
>  include/linux/iommu.h  | 14 +++
>  include/uapi/linux/iommu.h | 50 ++
>  3 files changed, 74 insertions(+)
>  create mode 100644 include/uapi/linux/iommu.h
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 3ed4db334341..0f2b7f1fc7c8 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1393,6 +1393,16 @@ int iommu_attach_device(struct iommu_domain *domain, 
> struct device *dev)
>  }
>  EXPORT_SYMBOL_GPL(iommu_attach_device);
>  
> +int iommu_set_pasid_table(struct iommu_domain *domain,
> +   struct iommu_pasid_table_config *cfg)
> +{
> + if (unlikely(!domain->ops->set_pasid_table))
> + return -ENODEV;
> +
> + return domain->ops->set_pasid_table(domain, cfg);
> +}
> +EXPORT_SYMBOL_GPL(iommu_set_pasid_table);
> +
>  static void __iommu_detach_device(struct iommu_domain *domain,
> struct device *dev)
>  {
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index e90da6b6f3d1..1da2a2357ea4 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -25,6 +25,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define IOMMU_READ   (1 << 0)
>  #define IOMMU_WRITE  (1 << 1)
> @@ -184,6 +185,7 @@ struct iommu_resv_region {
>   * @domain_window_disable: Disable a particular window for a domain
>   * @of_xlate: add OF master IDs to iommu grouping
>   * @pgsize_bitmap: bitmap of all possible supported page sizes
> + * @set_pasid_table: set pasid table
>   */
>  struct iommu_ops {
>   bool (*capable)(enum iommu_cap);
> @@ -226,6 +228,9 @@ struct iommu_ops {
>   int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
>   bool (*is_attach_deferred)(struct iommu_domain *domain, struct device 
> *dev);
>  
> + int (*set_pasid_table)(struct iommu_domain *domain,
> +struct iommu_pasid_table_config *cfg);
> +
>   unsigned long pgsize_bitmap;
>  };
>  
> @@ -287,6 +292,8 @@ extern int iommu_attach_device(struct iommu_domain 
> *domain,
>  struct device *dev);
>  extern void iommu_detach_device(struct iommu_domain *domain,
>   struct device *dev);
> +extern int iommu_set_pasid_table(struct iommu_domain *domain,
> +  struct iommu_pasid_table_config *cfg);
>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>  extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
>  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
> @@ -696,6 +703,13 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct 
> fwnode_handle *fwnode)
>   return NULL;
>  }
>  
> +static inline
> +int iommu_set_pasid_table(struct iommu_domain *domain,
> +   struct iommu_pasid_table_config *cfg)
> +{
> + return -ENODEV;
> +}
> +
>  #endif /* CONFIG_IOMMU_API */
>  
>  #ifdef CONFIG_IOMMU_DEBUGFS
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> new file mode 100644
> index ..7a7cf7a3de7c
> --- /dev/null
> +++ b/include/uapi/linux/iommu.h
> @@ -0,0 +1,50 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/*
> + * IOMMU user API definitions
> + *
> + *
> + * This program is 

Re: remove dma_zalloc_coherent

2019-01-11 Thread Christoph Hellwig
Linus,

any chance you could take this before -rc2?  That should avoid a lot
of churn going forward.  Any fine tuning of the memset-removal
cochinnelle scripts can be queued up with normal updates.

On Tue, Jan 08, 2019 at 08:06:58AM -0500, Christoph Hellwig wrote:
> Hi Linus and world,
> 
> We've always had a weird situation around dma_zalloc_coherent.  To
> safely support mapping the allocations to userspace major architectures
> like x86 and arm have always zeroed allocations from dma_alloc_coherent,
> but a couple other architectures were missing that zeroing either always
> or in corner cases.  Then later we grew anothe dma_zalloc_coherent
> interface to explicitly request zeroing, but that just added __GFP_ZERO
> to the allocation flags, which for some allocators that didn't end
> up using the page allocator ended up being a no-op and still not
> zeroing the allocations.
> 
> So for this merge window I fixed up all remaining architectures to zero
> the memory in dma_alloc_coherent, and made dma_zalloc_coherent a no-op
> wrapper around dma_alloc_coherent, which fixes all of the above issues.
> 
> dma_zalloc_coherent is now pointless and can go away, and Luis helped
> me writing a cocchinelle script and patch series to kill it, which I
> think we should apply now just after -rc1 to finally settle these
> issue.
> 
> The script that generated the first patch is included here:
> 
> @ replace_dma_zalloc_coherent @
> expression dev, size, data, handle, flags;
> @@
> 
> -dma_zalloc_coherent(dev, size, handle, flags)
> +dma_alloc_coherent(dev, size, handle, flags)
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
---end quoted text---
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/3] dma-mapping: remove the default map_resource implementation

2019-01-11 Thread Christoph Hellwig
Just returning the physical address when not map_resource method is
present is highly dangerous as it doesn't take any offset in the
direct mapping into account and does the completely wrong thing for
IOMMUs.  Instead provide a proper implementation in the direct mapping
code, and also wire it up for arm and powerpc.

Signed-off-by: Christoph Hellwig 
---
 arch/arm/mm/dma-mapping.c |  2 ++
 arch/powerpc/kernel/dma-swiotlb.c |  1 +
 arch/powerpc/kernel/dma.c |  1 +
 include/linux/dma-mapping.h   | 12 +++-
 kernel/dma/direct.c   | 14 ++
 5 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index f1e2922e447c..3c8534904209 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -188,6 +188,7 @@ const struct dma_map_ops arm_dma_ops = {
.unmap_page = arm_dma_unmap_page,
.map_sg = arm_dma_map_sg,
.unmap_sg   = arm_dma_unmap_sg,
+   .map_resource   = dma_direct_map_resource,
.sync_single_for_cpu= arm_dma_sync_single_for_cpu,
.sync_single_for_device = arm_dma_sync_single_for_device,
.sync_sg_for_cpu= arm_dma_sync_sg_for_cpu,
@@ -211,6 +212,7 @@ const struct dma_map_ops arm_coherent_dma_ops = {
.get_sgtable= arm_dma_get_sgtable,
.map_page   = arm_coherent_dma_map_page,
.map_sg = arm_dma_map_sg,
+   .map_resource   = dma_direct_map_resource,
.dma_supported  = arm_dma_supported,
 };
 EXPORT_SYMBOL(arm_coherent_dma_ops);
diff --git a/arch/powerpc/kernel/dma-swiotlb.c 
b/arch/powerpc/kernel/dma-swiotlb.c
index 7d5fc9751622..fbb2506a414e 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -55,6 +55,7 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = {
.dma_supported = swiotlb_dma_supported,
.map_page = dma_direct_map_page,
.unmap_page = dma_direct_unmap_page,
+   .map_resource = dma_direct_map_resource,
.sync_single_for_cpu = dma_direct_sync_single_for_cpu,
.sync_single_for_device = dma_direct_sync_single_for_device,
.sync_sg_for_cpu = dma_direct_sync_sg_for_cpu,
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index b1903ebb2e9c..258b9e8ebb99 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -273,6 +273,7 @@ const struct dma_map_ops dma_nommu_ops = {
.dma_supported  = dma_nommu_dma_supported,
.map_page   = dma_nommu_map_page,
.unmap_page = dma_nommu_unmap_page,
+   .map_resource   = dma_direct_map_resource,
.get_required_mask  = dma_nommu_get_required_mask,
 #ifdef CONFIG_NOT_COHERENT_CACHE
.sync_single_for_cpu= dma_nommu_sync_single,
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index cef2127e1d70..d3087829a6df 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -208,6 +208,8 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct 
page *page,
unsigned long attrs);
 int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
enum dma_data_direction dir, unsigned long attrs);
+dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
+   size_t size, enum dma_data_direction dir, unsigned long attrs);
 
 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
 defined(CONFIG_SWIOTLB)
@@ -346,19 +348,19 @@ static inline dma_addr_t dma_map_resource(struct device 
*dev,
  unsigned long attrs)
 {
const struct dma_map_ops *ops = get_dma_ops(dev);
-   dma_addr_t addr;
+   dma_addr_t addr = DMA_MAPPING_ERROR;
 
BUG_ON(!valid_dma_direction(dir));
 
/* Don't allow RAM to be mapped */
BUG_ON(pfn_valid(PHYS_PFN(phys_addr)));
 
-   addr = phys_addr;
-   if (ops && ops->map_resource)
+   if (dma_is_direct(ops))
+   addr = dma_direct_map_resource(dev, phys_addr, size, dir, 
attrs);
+   else if (ops->map_resource)
addr = ops->map_resource(dev, phys_addr, size, dir, attrs);
 
debug_dma_map_resource(dev, phys_addr, size, dir, addr);
-
return addr;
 }
 
@@ -369,7 +371,7 @@ static inline void dma_unmap_resource(struct device *dev, 
dma_addr_t addr,
const struct dma_map_ops *ops = get_dma_ops(dev);
 
BUG_ON(!valid_dma_direction(dir));
-   if (ops && ops->unmap_resource)
+   if (!dma_is_direct(ops) && ops->unmap_resource)
ops->unmap_resource(dev, addr, size, dir, attrs);
debug_dma_unmap_resource(dev, addr, size, dir);
 }
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 355d16acee6d..8e0359b04957 100644
--- 

Re: [RFC v3 17/21] iommu/smmuv3: Report non recoverable faults

2019-01-11 Thread Jean-Philippe Brucker
On 08/01/2019 10:26, Eric Auger wrote:
> When a stage 1 related fault event is read from the event queue,
> let's propagate it to potential external fault listeners, ie. users
> who registered a fault handler.
> 
> Signed-off-by: Eric Auger 
> ---
>  drivers/iommu/arm-smmu-v3.c | 124 
>  1 file changed, 113 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 999ee470a2ae..6a711cbbb228 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -168,6 +168,26 @@
>  #define ARM_SMMU_PRIQ_IRQ_CFG1   0xd8
>  #define ARM_SMMU_PRIQ_IRQ_CFG2   0xdc
>  
> +/* Events */
> +#define ARM_SMMU_EVT_F_UUT   0x01
> +#define ARM_SMMU_EVT_C_BAD_STREAMID  0x02
> +#define ARM_SMMU_EVT_F_STE_FETCH 0x03
> +#define ARM_SMMU_EVT_C_BAD_STE   0x04
> +#define ARM_SMMU_EVT_F_BAD_ATS_TREQ  0x05
> +#define ARM_SMMU_EVT_F_STREAM_DISABLED   0x06
> +#define ARM_SMMU_EVT_F_TRANSL_FORBIDDEN  0x07
> +#define ARM_SMMU_EVT_C_BAD_SUBSTREAMID   0x08
> +#define ARM_SMMU_EVT_F_CD_FETCH  0x09
> +#define ARM_SMMU_EVT_C_BAD_CD0x0a
> +#define ARM_SMMU_EVT_F_WALK_EABT 0x0b
> +#define ARM_SMMU_EVT_F_TRANSLATION   0x10
> +#define ARM_SMMU_EVT_F_ADDR_SIZE 0x11
> +#define ARM_SMMU_EVT_F_ACCESS0x12
> +#define ARM_SMMU_EVT_F_PERMISSION0x13
> +#define ARM_SMMU_EVT_F_TLB_CONFLICT  0x20
> +#define ARM_SMMU_EVT_F_CFG_CONFLICT  0x21
> +#define ARM_SMMU_EVT_E_PAGE_REQUEST  0x24
> +
>  /* Common MSI config fields */
>  #define MSI_CFG0_ADDR_MASK   GENMASK_ULL(51, 2)
>  #define MSI_CFG2_SH  GENMASK(5, 4)
> @@ -333,6 +353,11 @@
>  #define EVTQ_MAX_SZ_SHIFT7
>  
>  #define EVTQ_0_IDGENMASK_ULL(7, 0)
> +#define EVTQ_0_SUBSTREAMID   GENMASK_ULL(31, 12)
> +#define EVTQ_0_STREAMID  GENMASK_ULL(63, 32)
> +#define EVTQ_1_S2GENMASK_ULL(39, 39)
> +#define EVTQ_1_CLASS GENMASK_ULL(40, 41)
> +#define EVTQ_3_FETCH_ADDRGENMASK_ULL(51, 3)
>  
>  /* PRI queue */
>  #define PRIQ_ENT_DWORDS  2
> @@ -1270,7 +1295,6 @@ static int arm_smmu_init_l2_strtab(struct 
> arm_smmu_device *smmu, u32 sid)
>   return 0;
>  }
>  
> -__maybe_unused
>  static struct arm_smmu_master_data *
>  arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
>  {
> @@ -1296,24 +1320,102 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, 
> u32 sid)
>   return master;
>  }
>  
> +static void arm_smmu_report_event(struct arm_smmu_device *smmu, u64 *evt)
> +{
> + u64 fetch_addr = FIELD_GET(EVTQ_3_FETCH_ADDR, evt[3]);
> + u32 sid = FIELD_GET(EVTQ_0_STREAMID, evt[0]);
> + bool s1 = !FIELD_GET(EVTQ_1_S2, evt[1]);
> + u8 type = FIELD_GET(EVTQ_0_ID, evt[0]);
> + struct arm_smmu_master_data *master;
> + struct iommu_fault_event event;
> + bool propagate = true;
> + u64 addr = evt[2];
> + int i;
> +
> + master = arm_smmu_find_master(smmu, sid);
> + if (WARN_ON(!master))
> + return;
> +
> + event.fault.type = IOMMU_FAULT_DMA_UNRECOV;
> +
> + switch (type) {
> + case ARM_SMMU_EVT_C_BAD_STREAMID:
> + event.fault.reason = IOMMU_FAULT_REASON_SOURCEID_INVALID;
> + break;
> + case ARM_SMMU_EVT_F_STREAM_DISABLED:
> + case ARM_SMMU_EVT_C_BAD_SUBSTREAMID:
> + event.fault.reason = IOMMU_FAULT_REASON_PASID_INVALID;
> + break;
> + case ARM_SMMU_EVT_F_CD_FETCH:
> + event.fault.reason = IOMMU_FAULT_REASON_PASID_FETCH;
> + break;
> + case ARM_SMMU_EVT_F_WALK_EABT:
> + event.fault.reason = IOMMU_FAULT_REASON_WALK_EABT;
> + event.fault.addr = addr;
> + event.fault.fetch_addr = fetch_addr;
> + propagate = s1;
> + break;
> + case ARM_SMMU_EVT_F_TRANSLATION:
> + event.fault.reason = IOMMU_FAULT_REASON_PTE_FETCH;
> + event.fault.addr = addr;
> + event.fault.fetch_addr = fetch_addr;
> + propagate = s1;
> + break;
> + case ARM_SMMU_EVT_F_PERMISSION:
> + event.fault.reason = IOMMU_FAULT_REASON_PERMISSION;
> + event.fault.addr = addr;
> + propagate = s1;
> + break;
> + case ARM_SMMU_EVT_F_ACCESS:
> + event.fault.reason = IOMMU_FAULT_REASON_ACCESS;
> + event.fault.addr = addr;
> + propagate = s1;
> + break;
> + case ARM_SMMU_EVT_C_BAD_STE:
> + event.fault.reason = 
> IOMMU_FAULT_REASON_BAD_DEVICE_CONTEXT_ENTRY;
> + break;
> + case ARM_SMMU_EVT_C_BAD_CD:
> + event.fault.reason = IOMMU_FAULT_REASON_BAD_PASID_ENTRY;
> + break;
> + case ARM_SMMU_EVT_F_ADDR_SIZE:
> + event.fault.reason = 

Re: [RFC v3 11/21] iommu/smmuv3: Implement cache_invalidate

2019-01-11 Thread Jean-Philippe Brucker
On 08/01/2019 10:26, Eric Auger wrote:
> Implement IOMMU_INV_TYPE_TLB invalidations. When
> nr_pages is null we interpret this as a context
> invalidation.
> 
> Signed-off-by: Eric Auger 
> 
> ---
> 
> The user API needs to be refined to discriminate context
> invalidations from NH_VA invalidations. Also the leaf attribute
> is not yet properly handled.
> 
> v2 -> v3:
> - replace __arm_smmu_tlb_sync by arm_smmu_cmdq_issue_sync
> 
> v1 -> v2:
> - properly pass the asid
> ---
>  drivers/iommu/arm-smmu-v3.c | 40 +
>  1 file changed, 40 insertions(+)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 0e006babc8a6..ca72e0ce92f6 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -2293,6 +2293,45 @@ static int arm_smmu_set_pasid_table(struct 
> iommu_domain *domain,
>   return ret;
>  }
>  
> +static int
> +arm_smmu_cache_invalidate(struct iommu_domain *domain, struct device *dev,
> +   struct iommu_cache_invalidate_info *inv_info)
> +{
> + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> + struct arm_smmu_device *smmu = smmu_domain->smmu;
> +
> + if (smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
> + return -EINVAL;
> +
> + if (!smmu)
> + return -EINVAL;
> +
> + switch (inv_info->hdr.type) {
> + case IOMMU_INV_TYPE_TLB:
> + /*
> +  * TODO: On context invalidation, the userspace sets nr_pages
> +  * to 0. Refine the API to add a dedicated flags and also
> +  * properly handle the leaf parameter.
> +  */

That's what inv->granularity is for: if inv->granularity is PASID_SEL,
then the invalidation is for the whole context (and nr_pages, size,
addr, etc. should be ignored). If inv->granularity is PAGE_PASID, then
it's a range. The names could probably be improved but it's already in
the API

Thanks,
Jean

> + if (!inv_info->nr_pages) {
> + smmu_domain->s1_cfg.cd.asid = inv_info->arch_id;
> + arm_smmu_tlb_inv_context(smmu_domain);
> + } else {
> + size_t granule = 1 << (inv_info->size + 12);
> + size_t size = inv_info->nr_pages * granule;
> +
> + smmu_domain->s1_cfg.cd.asid = inv_info->arch_id;
> + arm_smmu_tlb_inv_range_nosync(inv_info->addr, size,
> +   granule, false,
> +   smmu_domain);
> + arm_smmu_cmdq_issue_sync(smmu);
> + }
> + return 0;
> + default:
> + return -EINVAL;
> + }
> +}
> +
>  static struct iommu_ops arm_smmu_ops = {
>   .capable= arm_smmu_capable,
>   .domain_alloc   = arm_smmu_domain_alloc,
> @@ -2312,6 +2351,7 @@ static struct iommu_ops arm_smmu_ops = {
>   .get_resv_regions   = arm_smmu_get_resv_regions,
>   .put_resv_regions   = arm_smmu_put_resv_regions,
>   .set_pasid_table= arm_smmu_set_pasid_table,
> + .cache_invalidate   = arm_smmu_cache_invalidate,
>   .pgsize_bitmap  = -1UL, /* Restricted during device attach */
>  };
>  
> 

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC v3 09/21] iommu/smmuv3: Get prepared for nested stage support

2019-01-11 Thread Jean-Philippe Brucker
Hi Eric,

On 08/01/2019 10:26, Eric Auger wrote:
> To allow nested stage support, we need to store both
> stage 1 and stage 2 configurations (and remove the former
> union).
> 
> arm_smmu_write_strtab_ent() is modified to write both stage
> fields in the STE.
> 
> We add a nested_bypass field to the S1 configuration as the first
> stage can be bypassed. Also the guest may force the STE to abort:
> this information gets stored into the nested_abort field.
> 
> Only S2 stage is "finalized" as the host does not configure
> S1 CD, guest does.
> 
> Signed-off-by: Eric Auger 
> 
> ---
> 
> v1 -> v2:
> - invalidate the STE before moving from a live STE config to another
> - add the nested_abort and nested_bypass fields
> ---
>  drivers/iommu/arm-smmu-v3.c | 43 -
>  1 file changed, 33 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index 9af68266bbb1..9716a301d9ae 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -212,6 +212,7 @@
>  #define STRTAB_STE_0_CFG_BYPASS  4
>  #define STRTAB_STE_0_CFG_S1_TRANS5
>  #define STRTAB_STE_0_CFG_S2_TRANS6
> +#define STRTAB_STE_0_CFG_NESTED  7
>  
>  #define STRTAB_STE_0_S1FMT   GENMASK_ULL(5, 4)
>  #define STRTAB_STE_0_S1FMT_LINEAR0
> @@ -491,6 +492,10 @@ struct arm_smmu_strtab_l1_desc {
>  struct arm_smmu_s1_cfg {
>   __le64  *cdptr;
>   dma_addr_t  cdptr_dma;
> + /* in nested mode, tells s1 must be bypassed */
> + boolnested_bypass;
> + /* in nested mode, abort is forced by guest */
> + boolnested_abort;
>  
>   struct arm_smmu_ctx_desc {
>   u16 asid;
> @@ -515,6 +520,7 @@ struct arm_smmu_strtab_ent {
>* configured according to the domain type.
>*/
>   boolassigned;
> + boolnested;
>   struct arm_smmu_s1_cfg  *s1_cfg;
>   struct arm_smmu_s2_cfg  *s2_cfg;
>  };
> @@ -629,10 +635,8 @@ struct arm_smmu_domain {
>   boolnon_strict;
>  
>   enum arm_smmu_domain_stage  stage;
> - union {
> - struct arm_smmu_s1_cfg  s1_cfg;
> - struct arm_smmu_s2_cfg  s2_cfg;
> - };
> + struct arm_smmu_s1_cfg  s1_cfg;
> + struct arm_smmu_s2_cfg  s2_cfg;
>  
>   struct iommu_domain domain;
>  
> @@ -1139,10 +1143,11 @@ static void arm_smmu_write_strtab_ent(struct 
> arm_smmu_device *smmu, u32 sid,

Could you also update the "This is hideously complicated..." comment
with the nested case? This function was complicated before, but it
becomes hell when adding nested and SVA support, so we really need the
comments :)

>   break;
>   case STRTAB_STE_0_CFG_S1_TRANS:
>   case STRTAB_STE_0_CFG_S2_TRANS:
> + case STRTAB_STE_0_CFG_NESTED:
>   ste_live = true;
>   break;
>   case STRTAB_STE_0_CFG_ABORT:
> - if (disable_bypass)
> + if (disable_bypass || ste->nested)
>   break;
>   default:
>   BUG(); /* STE corruption */
> @@ -1154,7 +1159,8 @@ static void arm_smmu_write_strtab_ent(struct 
> arm_smmu_device *smmu, u32 sid,
>  
>   /* Bypass/fault */
>   if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
> - if (!ste->assigned && disable_bypass)
> + if ((!ste->assigned && disable_bypass) ||
> + (ste->s1_cfg && ste->s1_cfg->nested_abort))

I don't think we're ever reaching this, given that ste->assigned is true
and ste->s2_cfg is set.

Something I find noteworthy is that with STRTAB_STE_0_CFG_ABORT, no
event is recorded in case of DMA fault. For vSMMU you'd want to emulate
the SMMU behavior closely, so you don't want to inject faults if the
guest sets CFG_ABORT, but this way you also can't report errors to the
VMM. If we did want to notify the VMM of faults, we'd need to implement
nested_abort differently, for example by installing an empty context
descriptor with Config=s1translate-s2translate.

>   val |= FIELD_PREP(STRTAB_STE_0_CFG, 
> STRTAB_STE_0_CFG_ABORT);
>   else
>   val |= FIELD_PREP(STRTAB_STE_0_CFG, 
> STRTAB_STE_0_CFG_BYPASS);
> @@ -1172,8 +1178,17 @@ static void arm_smmu_write_strtab_ent(struct 
> arm_smmu_device *smmu, u32 sid,
>   return;
>   }
>  
> + if (ste->nested && ste_live) {
> + /*
> +  * When enabling nested, the STE may be transitionning from

transitioning (my bad)

> +  * s2 to nested and back. Invalidate the STE before changing it.
> +  */
> + dst[0] = cpu_to_le64(0);
> + 

[PATCH 6/9] iommu/dma-iommu.c: Convert to use vm_insert_range

2019-01-11 Thread Souptick Joarder
Convert to use vm_insert_range() to map range of kernel
memory to user vma.

Signed-off-by: Souptick Joarder 
---
 drivers/iommu/dma-iommu.c | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index d1b0475..802de67 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -622,17 +622,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t 
size, gfp_t gfp,
 
 int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct 
*vma)
 {
-   unsigned long uaddr = vma->vm_start;
-   unsigned int i, count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-   int ret = -ENXIO;
-
-   for (i = vma->vm_pgoff; i < count && uaddr < vma->vm_end; i++) {
-   ret = vm_insert_page(vma, uaddr, pages[i]);
-   if (ret)
-   break;
-   uaddr += PAGE_SIZE;
-   }
-   return ret;
+   return vm_insert_range(vma, pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
 }
 
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
-- 
1.9.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/9] mm: Introduce new vm_insert_range and vm_insert_range_buggy API

2019-01-11 Thread Souptick Joarder
Previouly drivers have their own way of mapping range of
kernel pages/memory into user vma and this was done by
invoking vm_insert_page() within a loop.

As this pattern is common across different drivers, it can
be generalized by creating new functions and use it across
the drivers.

vm_insert_range() is the API which could be used to mapped
kernel memory/pages in drivers which has considered vm_pgoff

vm_insert_range_buggy() is the API which could be used to map
range of kernel memory/pages in drivers which has not considered
vm_pgoff. vm_pgoff is passed default as 0 for those drivers.

We _could_ then at a later "fix" these drivers which are using
vm_insert_range_buggy() to behave according to the normal vm_pgoff
offsetting simply by removing the _buggy suffix on the function
name and if that causes regressions, it gives us an easy way to revert.

Signed-off-by: Souptick Joarder 
Suggested-by: Russell King 
Suggested-by: Matthew Wilcox 
---
 include/linux/mm.h |  4 +++
 mm/memory.c| 81 ++
 mm/nommu.c | 14 ++
 3 files changed, 99 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5411de9..9d1dff6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2514,6 +2514,10 @@ unsigned long change_prot_numa(struct vm_area_struct 
*vma,
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t);
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
+int vm_insert_range(struct vm_area_struct *vma, struct page **pages,
+   unsigned long num);
+int vm_insert_range_buggy(struct vm_area_struct *vma, struct page **pages,
+   unsigned long num);
 vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn);
 vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/memory.c b/mm/memory.c
index 4ad2d29..00e66df 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1520,6 +1520,87 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned 
long addr,
 }
 EXPORT_SYMBOL(vm_insert_page);
 
+/**
+ * __vm_insert_range - insert range of kernel pages into user vma
+ * @vma: user vma to map to
+ * @pages: pointer to array of source kernel pages
+ * @num: number of pages in page array
+ * @offset: user's requested vm_pgoff
+ *
+ * This allows drivers to insert range of kernel pages they've allocated
+ * into a user vma.
+ *
+ * If we fail to insert any page into the vma, the function will return
+ * immediately leaving any previously inserted pages present.  Callers
+ * from the mmap handler may immediately return the error as their caller
+ * will destroy the vma, removing any successfully inserted pages. Other
+ * callers should make their own arrangements for calling unmap_region().
+ *
+ * Context: Process context.
+ * Return: 0 on success and error code otherwise.
+ */
+static int __vm_insert_range(struct vm_area_struct *vma, struct page **pages,
+   unsigned long num, unsigned long offset)
+{
+   unsigned long count = vma_pages(vma);
+   unsigned long uaddr = vma->vm_start;
+   int ret, i;
+
+   /* Fail if the user requested offset is beyond the end of the object */
+   if (offset > num)
+   return -ENXIO;
+
+   /* Fail if the user requested size exceeds available object size */
+   if (count > num - offset)
+   return -ENXIO;
+
+   for (i = 0; i < count; i++) {
+   ret = vm_insert_page(vma, uaddr, pages[offset + i]);
+   if (ret < 0)
+   return ret;
+   uaddr += PAGE_SIZE;
+   }
+
+   return 0;
+}
+
+/**
+ * vm_insert_range - insert range of kernel pages starts with non zero offset
+ * @vma: user vma to map to
+ * @pages: pointer to array of source kernel pages
+ * @num: number of pages in page array
+ *
+ * Maps an object consisting of `num' `pages', catering for the user's
+ * requested vm_pgoff
+ *
+ * Context: Process context. Called by mmap handlers.
+ * Return: 0 on success and error code otherwise.
+ */
+int vm_insert_range(struct vm_area_struct *vma, struct page **pages,
+   unsigned long num)
+{
+   return __vm_insert_range(vma, pages, num, vma->vm_pgoff);
+}
+EXPORT_SYMBOL(vm_insert_range);
+
+/**
+ * vm_insert_range_buggy - insert range of kernel pages starts with zero offset
+ * @vma: user vma to map to
+ * @pages: pointer to array of source kernel pages
+ * @num: number of pages in page array
+ *
+ * Maps a set of pages, always starting at page[0]
+ *
+ * Context: Process context. Called by mmap handlers.
+ * Return: 0 on success and error code otherwise.
+ */
+int vm_insert_range_buggy(struct vm_area_struct *vma, struct page **pages,
+   unsigned long num)

[PATCH 0/9] Use vm_insert_range and vm_insert_range_buggy

2019-01-11 Thread Souptick Joarder
Previouly drivers have their own way of mapping range of
kernel pages/memory into user vma and this was done by
invoking vm_insert_page() within a loop.

As this pattern is common across different drivers, it can
be generalized by creating new functions and use it across
the drivers.

vm_insert_range() is the API which could be used to mapped
kernel memory/pages in drivers which has considered vm_pgoff

vm_insert_range_buggy() is the API which could be used to map
range of kernel memory/pages in drivers which has not considered
vm_pgoff. vm_pgoff is passed default as 0 for those drivers.

We _could_ then at a later "fix" these drivers which are using
vm_insert_range_buggy() to behave according to the normal vm_pgoff
offsetting simply by removing the _buggy suffix on the function
name and if that causes regressions, it gives us an easy way to revert.

There is an existing bug in [7/9], where user passed length is not
verified against object_count. For any value of length > object_count
it will end up overrun page array which could lead to a potential bug.
This is fixed as part of these conversion.

Souptick Joarder (9):
  mm: Introduce new vm_insert_range and vm_insert_range_buggy API
  arch/arm/mm/dma-mapping.c: Convert to use vm_insert_range
  drivers/firewire/core-iso.c: Convert to use vm_insert_range_buggy
  drm/rockchip/rockchip_drm_gem.c: Convert to use vm_insert_range
  drm/xen/xen_drm_front_gem.c: Convert to use vm_insert_range
  iommu/dma-iommu.c: Convert to use vm_insert_range
  videobuf2/videobuf2-dma-sg.c: Convert to use vm_insert_range_buggy
  xen/gntdev.c: Convert to use vm_insert_range
  xen/privcmd-buf.c: Convert to use vm_insert_range_buggy

 arch/arm/mm/dma-mapping.c | 22 ++
 drivers/firewire/core-iso.c   | 15 +
 drivers/gpu/drm/rockchip/rockchip_drm_gem.c   | 17 +
 drivers/gpu/drm/xen/xen_drm_front_gem.c   | 18 ++---
 drivers/iommu/dma-iommu.c | 12 +---
 drivers/media/common/videobuf2/videobuf2-dma-sg.c | 22 ++
 drivers/xen/gntdev.c  | 16 ++---
 drivers/xen/privcmd-buf.c |  8 +--
 include/linux/mm.h|  4 ++
 mm/memory.c   | 81 +++
 mm/nommu.c| 14 
 11 files changed, 129 insertions(+), 100 deletions(-)

-- 
1.9.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 01/13] dt-binding: mediatek: Get rid of mediatek, larb for multimedia HW

2019-01-11 Thread Rob Herring
On Tue, 1 Jan 2019 12:51:04 +0800, Yong Wu wrote:
> After adding device_link between the consumer with the smi-larbs,
> if the consumer call its owner pm_runtime_get(_sync), the
> pm_runtime_get(_sync) of smi-larb and smi-common will be called
> automatically. Thus, the consumer don't need the property.
> 
> And IOMMU also know which larb this consumer connects with from
> iommu id in the "iommus=" property.
> 
> Signed-off-by: Yong Wu 
> ---
> I guess it should go through Matthias's tree if the patch is ok,
> thus I don't separate to different patches. If it's really needed,
> please feel free to tell me.
> ---
>  .../devicetree/bindings/display/mediatek/mediatek,disp.txt   | 9 
> -
>  .../devicetree/bindings/media/mediatek-jpeg-decoder.txt  | 4 
>  Documentation/devicetree/bindings/media/mediatek-mdp.txt | 8 
>  Documentation/devicetree/bindings/media/mediatek-vcodec.txt  | 4 
>  4 files changed, 25 deletions(-)
> 

Reviewed-by: Rob Herring 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v6 0/7] Add virtio-iommu driver

2019-01-11 Thread Jean-Philippe Brucker
On 11/01/2019 12:28, Joerg Roedel wrote:
> Hi Jean-Philippe,
> 
> On Thu, Dec 13, 2018 at 12:50:29PM +, Jean-Philippe Brucker wrote:
>> We already do deferred flush: UNMAP requests are added to the queue by
>> iommu_unmap(), and then flushed out by iotlb_sync(). So we switch to the
>> host only on iotlb_sync(), or when the request queue is full.
> 
> So the mappings can stay in place until iotlb_sync() returns? What
> happens when the guest sends a map-request for a region it sent and
> unmap request before, but did not call iotlb_sync inbetween?

At that point the unmap is still in the request queue, and the host will
handle it before getting to the map request. For correctness requests
are necessarily handled in-order by the host. So if the map and unmap
refer to the same domain and IOVA, the host will remove the old mapping
before creating the new one.

Thanks,
Jean
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v6 0/7] Add virtio-iommu driver

2019-01-11 Thread Joerg Roedel
Hi Jean-Philippe,

On Thu, Dec 13, 2018 at 12:50:29PM +, Jean-Philippe Brucker wrote:
> We already do deferred flush: UNMAP requests are added to the queue by
> iommu_unmap(), and then flushed out by iotlb_sync(). So we switch to the
> host only on iotlb_sync(), or when the request queue is full.

So the mappings can stay in place until iotlb_sync() returns? What
happens when the guest sends a map-request for a region it sent and
unmap request before, but did not call iotlb_sync inbetween?

Regards,

Joerg
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/1] iommu/vt-d: Support page request in scalable mode

2019-01-11 Thread Joerg Roedel
On Fri, Jan 11, 2019 at 01:04:57PM +0800, Lu Baolu wrote:
> From: Jacob Pan 
> 
> VT-d Rev3.0 has made a few changes to the page request interface,
> 
> 1. widened PRQ descriptor from 128 bits to 256 bits;
> 2. removed streaming response type;
> 3. introduced private data that requires page response even the
>request is not last request in group (LPIG).
> 
> This is a supplement to commit 1c4f88b7f1f92 ("iommu/vt-d: Shared
> virtual address in scalable mode") and makes the svm code compliant
> with VT-d Rev3.0.
> 
> Cc: Ashok Raj 
> Cc: Liu Yi L 
> Cc: Kevin Tian 
> Signed-off-by: Jacob Pan 
> Fixes: 1c4f88b7f1f92 ("iommu/vt-d: Shared virtual address in scalable mode")
> Signed-off-by: Lu Baolu 

Applied for v5.1, thanks.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/1] iova: Allow compiling the library without IOMMU support

2019-01-11 Thread Joerg Roedel
On Wed, Jan 02, 2019 at 11:16:57PM +0200, Sakari Ailus wrote:
> Drivers such as the Intel IPU3 ImgU driver use the IOVA library to manage
> the device's own virtual address space while not implementing the IOMMU
> API. Currently the IOVA library is only compiled if the IOMMU support is
> enabled, resulting into a failure during linking due to missing symbols.
> 
> Fix this by defining IOVA library Kconfig bits independently of IOMMU
> support configuration, and descending to the iommu directory
> unconditionally during the build.
> 
> Signed-off-by: Sakari Ailus 

Applied, thanks.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/of: Fix probe-deferral

2019-01-11 Thread Joerg Roedel
On Mon, Jan 07, 2019 at 05:04:50PM +, Robin Murphy wrote:
> Whilst iommu_probe_device() does check for non-NULL ops as the previous
> code did, it does not do so in the same order relative to the other
> checks, and as a result means that -EPROBE_DEFER returned by of_xlate()
> (plus any real error condition too) gets overwritten with -EINVAL and
> leads to various misbehaviour.
> 
> Reinstate the original logic, but without implicitly relying on ops
> being set to infer !err as the initial condition (now that the validity
> of ops for its own sake is checked elsewhere).
> 
> Fixes: 641fb0efbff0 ("iommu/of: Don't call iommu_ops->add_device directly")
> Signed-off-by: Robin Murphy 

Applied to iommu/fixes, thanks.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 8/8] iommu/msm: reduce indentation

2019-01-11 Thread Joerg Roedel
On Sun, Dec 30, 2018 at 04:53:15PM +0100, Julia Lawall wrote:
> Delete tab aligning a statement with the right hand side of a
> preceding assignment rather than the left hand side.
> 
> Found with the help of Coccinelle.
> 
> Signed-off-by: Julia Lawall 

Applied, thanks.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v5 2/8] iommu/vt-d: Add per-device IOMMU feature ops entries

2019-01-11 Thread Joerg Roedel
Hi,

this looks a bit confusing to me because I can see no checking whether
the device actually supports scalable mode. More below:

On Thu, Jan 10, 2019 at 11:00:21AM +0800, Lu Baolu wrote:
> +static int intel_iommu_enable_auxd(struct device *dev)
> +{
> + struct device_domain_info *info;
> + struct dmar_domain *domain;
> + unsigned long flags;
> +
> + if (!scalable_mode_support())
> + return -ENODEV;
> +
> + domain = get_valid_domain_for_dev(dev);
> + if (!domain)
> + return -ENODEV;
> +
> + spin_lock_irqsave(_domain_lock, flags);
> + info = dev->archdata.iommu;
> + info->auxd_enabled = 1;
> + spin_unlock_irqrestore(_domain_lock, flags);
> +
> + return 0;
> +}

This code sets a flag to mark scalable mode enabled. Doesn't the device
need some handling too, like enabling the PASID capability and all?

> +
> +static bool
> +intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
> +{
> + struct device_domain_info *info = dev->archdata.iommu;
> +
> + if (feat == IOMMU_DEV_FEAT_AUX)
> + return scalable_mode_support() && info && info->auxd_enabled;
> +
> + return false;
> +}

Why is this checking the auxd_enabled flag? The function should just
return whether the device _supports_ scalable mode, not whether it is
enabled.

Regards,

Joerg
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC v3 14/21] iommu: introduce device fault data

2019-01-11 Thread Jean-Philippe Brucker
On 10/01/2019 18:45, Jacob Pan wrote:
> On Tue,  8 Jan 2019 11:26:26 +0100
> Eric Auger  wrote:
> 
>> From: Jacob Pan 
>>
>> Device faults detected by IOMMU can be reported outside IOMMU
>> subsystem for further processing. This patch intends to provide
>> a generic device fault data such that device drivers can be
>> communicated with IOMMU faults without model specific knowledge.
>>
>> The proposed format is the result of discussion at:
>> https://lkml.org/lkml/2017/11/10/291
>> Part of the code is based on Jean-Philippe Brucker's patchset
>> (https://patchwork.kernel.org/patch/9989315/).
>>
>> The assumption is that model specific IOMMU driver can filter and
>> handle most of the internal faults if the cause is within IOMMU driver
>> control. Therefore, the fault reasons can be reported are grouped
>> and generalized based common specifications such as PCI ATS.
>>
>> Signed-off-by: Jacob Pan 
>> Signed-off-by: Jean-Philippe Brucker 
>> Signed-off-by: Liu, Yi L 
>> Signed-off-by: Ashok Raj 
>> Signed-off-by: Eric Auger 
>> [moved part of the iommu_fault_event struct in the uapi, enriched
>>  the fault reasons to be able to map unrecoverable SMMUv3 errors]
>> ---
>>  include/linux/iommu.h  | 55 -
>>  include/uapi/linux/iommu.h | 83
>> ++ 2 files changed, 136
>> insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>> index 244c1a3d5989..1dedc2d247c2 100644
>> --- a/include/linux/iommu.h
>> +++ b/include/linux/iommu.h
>> @@ -49,13 +49,17 @@ struct bus_type;
>>  struct device;
>>  struct iommu_domain;
>>  struct notifier_block;
>> +struct iommu_fault_event;
>>  
>>  /* iommu fault flags */
>> -#define IOMMU_FAULT_READ0x0
>> -#define IOMMU_FAULT_WRITE   0x1
>> +#define IOMMU_FAULT_READ(1 << 0)
>> +#define IOMMU_FAULT_WRITE   (1 << 1)
>> +#define IOMMU_FAULT_EXEC(1 << 2)
>> +#define IOMMU_FAULT_PRIV(1 << 3)
>>  
>>  typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
>>  struct device *, unsigned long, int, void *);
>> +typedef int (*iommu_dev_fault_handler_t)(struct iommu_fault_event *,
>> void *); 
>>  struct iommu_domain_geometry {
>>  dma_addr_t aperture_start; /* First address that can be
>> mapped*/ @@ -255,6 +259,52 @@ struct iommu_device {
>>  struct device *dev;
>>  };
>>  
>> +/**
>> + * struct iommu_fault_event - Generic per device fault data
>> + *
>> + * - PCI and non-PCI devices
>> + * - Recoverable faults (e.g. page request), information based on
>> PCI ATS
>> + * and PASID spec.
>> + * - Un-recoverable faults of device interest
>> + * - DMA remapping and IRQ remapping faults
>> + *
>> + * @fault: fault descriptor
>> + * @device_private: if present, uniquely identify device-specific
>> + *  private data for an individual page request.
>> + * @iommu_private: used by the IOMMU driver for storing
>> fault-specific
>> + * data. Users should not modify this field before
>> + * sending the fault response.
>> + */
>> +struct iommu_fault_event {
>> +struct iommu_fault fault;
>> +u64 device_private;
> I think we want to move device_private to uapi since it gets injected
> into the guest, then returned by guest in case of page response. For
> VT-d we also need 128 bits of private data. VT-d spec. 7.7.1

Ah, I didn't notice the format changed in VT-d rev3. On that topic, how
do we manage future extensions to the iommu_fault struct? Should we add
~48 bytes of padding after device_private, along with some flags telling
which field is valid, or deal with it using a structure version like we
do for the invalidate and bind structs? In the first case, iommu_fault
wouldn't fit in a 64-byte cacheline anymore, but I'm not sure we care.

> For exception tracking (e.g. unanswered page request), I can add timer
> and list info later when I include PRQ. sounds ok?
>> +u64 iommu_private;
[...]
>> +/**
>> + * struct iommu_fault - Generic fault data
>> + *
>> + * @type contains fault type
>> + * @reason fault reasons if relevant outside IOMMU driver.
>> + * IOMMU driver internal faults are not reported.
>> + * @addr: tells the offending page address
>> + * @fetch_addr: tells the address that caused an abort, if any
>> + * @pasid: contains process address space ID, used in shared virtual
>> memory
>> + * @page_req_group_id: page request group index
>> + * @last_req: last request in a page request group
>> + * @pasid_valid: indicates if the PRQ has a valid PASID
>> + * @prot: page access protection flag:
>> + *  IOMMU_FAULT_READ, IOMMU_FAULT_WRITE
>> + */
>> +
>> +struct iommu_fault {
>> +__u32   type;   /* enum iommu_fault_type */
>> +__u32   reason; /* enum iommu_fault_reason */
>> +__u64   addr;
>> +__u64   fetch_addr;
>> +__u32   pasid;
>> +__u32   page_req_group_id;
>> +__u32   last_req;
>> +__u32   pasid_valid;
>> +__u32   prot;
>> 

Re: [PATCH 3/4] iommu/of: Don't call iommu_ops->add_device directly

2019-01-11 Thread Joerg Roedel


Hi Geert,

On Thu, Dec 20, 2018 at 04:42:17PM +0100, Geert Uytterhoeven wrote:
> > -   return ops->add_device(dev);
> > +   if (ops)
> 
> Is this sufficient? The old code checked for ops->add_device != NULL,
> too.

Robin brought up that all iommu-ops implementations support the
add_device call-back, so we can remove that check and make the call-back
mandatory for new implementations too.

Regards,

Joerg
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v6 0/3] iommu/io-pgtable-arm-v7s: Use DMA32 zone for page tables

2019-01-11 Thread Joerg Roedel
On Wed, Jan 02, 2019 at 01:51:45PM +0800, Nicolas Boichat wrote:
> Does anyone have any further comment on this series? If not, which
> maintainer is going to pick this up? I assume Andrew Morton?

Probably, yes. I don't like to carry the mm-changes in iommu-tree, so
this should go through mm.

Regards,

Joerg
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] Fix typo. Change tlb_range_add to iotlb_range_add and tlb_sync to iotlb_sync

2019-01-11 Thread Joerg Roedel
On Thu, Dec 20, 2018 at 03:47:28PM +, Tom Murphy wrote:
> Ah shoot, it looks like I forgot to change flush_tlb_all -> flush_iotlb_all
> 
> Should I submit another patch?

Yes, please.

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/3] Fix virtio-blk issue with SWIOTLB

2019-01-11 Thread Joerg Roedel
On Fri, Jan 11, 2019 at 11:29:31AM +0800, Jason Wang wrote:
> Just wonder if my understanding is correct IOMMU_PLATFORM must be set for
> all virtio devices under AMD-SEV guests?

Yes, that is correct. Emulated DMA can only happen on the SWIOTLB
aperture, because that memory is not encrypted. The guest bounces the
data then to its encrypted memory.

Regards,

Joerg
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/3] swiotlb: Export maximum allocation size

2019-01-11 Thread Joerg Roedel
On Thu, Jan 10, 2019 at 12:02:05PM -0500, Konrad Rzeszutek Wilk wrote:
> Why not use swiotlb_nr_tbl ? That is how drivers/gpu/drm use to figure if they
> need to limit the size of pages.

That function just exports the overall size of the swiotlb aperture, no?
What I need here is the maximum size for a single mapping.

Regards,

Joerg
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu