Re: [PATCH 37/37] vfio: Add support for Shared Virtual Addressing

2018-03-21 Thread Jean-Philippe Brucker
On 19/03/18 09:47, Yisheng Xie wrote:
> Hi Jean,
> 
> vfio can be compiled as module, however you use some functions which are not
> exported.

Oh right. I remember the kbuild test robot warning about this once, I
wonder why it didn't find this one.

> comment inline:
> 
> [...]
>> Add two new ioctl for VFIO containers. VFIO_IOMMU_BIND_PROCESS creates a
>> bond between a container and a process address space, identified by a
>> device-specific ID named PASID. This allows the device to target DMA
>> transactions at the process virtual addresses without a need for mapping
>> and unmapping buffers explicitly in the IOMMU. The process page tables are
>> shared with the IOMMU, and mechanisms such as PCI ATS/PRI are used to
>> handle faults. VFIO_IOMMU_UNBIND_PROCESS removes a bond created with
>> VFIO_IOMMU_BIND_PROCESS.
>>
>> Signed-off-by: Jean-Philippe Brucker 
>> ---
> [...]
>> +static struct mm_struct *vfio_iommu_get_mm_by_vpid(pid_t vpid)
>> +{
>> +struct mm_struct *mm;
>> +struct task_struct *task;
>> +
>> +rcu_read_lock();
>> +task = find_task_by_vpid(vpid);
> 
> Maybe can use?
>   task = pid_task(find_vpid(params.vpid), PIDTYPE_PID)

I'd rather submit a patch requesting to export the symbol. Especially
since this function can be further simplified by using the brand new
find_get_task_by_vpid() helper, introduced by 2ee0826085d1.

>> +if (task)
>> +get_task_struct(task);
>> +rcu_read_unlock();
>> +if (!task)
>> +return ERR_PTR(-ESRCH);
>> +
>> +/* Ensure that current has RW access on the mm */
>> +mm = mm_access(task, PTRACE_MODE_ATTACH_REALCREDS);
> 
> You will try to export mm_access, I find Felix have tried to, but seems give 
> up:
> 
>  https://patchwork.kernel.org/patch/9744281/

Thanks for the pointer, I'll try to revive this.

Thanks,
Jean

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 37/37] vfio: Add support for Shared Virtual Addressing

2018-03-19 Thread Yisheng Xie
Hi Jean,

vfio can be compiled as module, however you use some functions which are not
exported.

comment inline:

[...]
> Add two new ioctl for VFIO containers. VFIO_IOMMU_BIND_PROCESS creates a
> bond between a container and a process address space, identified by a
> device-specific ID named PASID. This allows the device to target DMA
> transactions at the process virtual addresses without a need for mapping
> and unmapping buffers explicitly in the IOMMU. The process page tables are
> shared with the IOMMU, and mechanisms such as PCI ATS/PRI are used to
> handle faults. VFIO_IOMMU_UNBIND_PROCESS removes a bond created with
> VFIO_IOMMU_BIND_PROCESS.
>
> Signed-off-by: Jean-Philippe Brucker 
> ---
[...]
> +static struct mm_struct *vfio_iommu_get_mm_by_vpid(pid_t vpid)
> +{
> + struct mm_struct *mm;
> + struct task_struct *task;
> +
> + rcu_read_lock();
> + task = find_task_by_vpid(vpid);

Maybe can use?
task = pid_task(find_vpid(params.vpid), PIDTYPE_PID)

> + if (task)
> + get_task_struct(task);
> + rcu_read_unlock();
> + if (!task)
> + return ERR_PTR(-ESRCH);
> +
> + /* Ensure that current has RW access on the mm */
> + mm = mm_access(task, PTRACE_MODE_ATTACH_REALCREDS);

You will try to export mm_access, I find Felix have tried to, but seems give up:

 https://patchwork.kernel.org/patch/9744281/

Thanks
Yisheng

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 37/37] vfio: Add support for Shared Virtual Addressing

2018-02-28 Thread Jean-Philippe Brucker
On 28/02/18 01:26, Sinan Kaya wrote:
[...]
>> +static int vfio_iommu_sva_init(struct device *dev, void *data)
>> +{
> 
> data is not getting used.

That's the pointer passed to "iommu_group_for_each_dev", NULL at the
moment. Next version of this patch will keep some state in data to
ensure one device per group.

>> +
>> +int ret;
>> +
>> +ret = iommu_sva_device_init(dev, IOMMU_SVA_FEAT_PASID |
>> +IOMMU_SVA_FEAT_IOPF, 0);
>> +if (ret)
>> +return ret;
>> +
>> +return iommu_register_mm_exit_handler(dev, vfio_iommu_mm_exit);
>> +}
>> +
>> +static int vfio_iommu_sva_shutdown(struct device *dev, void *data)
>> +{
>> +iommu_sva_device_shutdown(dev);
>> +iommu_unregister_mm_exit_handler(dev);
>> +
>> +return 0;
>> +}
>> +
>> +static int vfio_iommu_bind_group(struct vfio_iommu *iommu,
>> + struct vfio_group *group,
>> + struct vfio_mm *vfio_mm)
>> +{
>> +int ret;
>> +int pasid;
>> +
>> +if (!group->sva_enabled) {
>> +ret = iommu_group_for_each_dev(group->iommu_group, NULL,
>> +   vfio_iommu_sva_init);
>> +if (ret)
>> +return ret;
>> +
>> +group->sva_enabled = true;
>> +}
>> +
>> +ret = iommu_sva_bind_group(group->iommu_group, vfio_mm->mm, &pasid,
>> +   IOMMU_SVA_FEAT_PASID | IOMMU_SVA_FEAT_IOPF,
>> +   vfio_mm);
>> +if (ret)
>> +return ret;
> 
> don't you need to clean up the work done by vfio_iommu_sva_init() here.

Yes I suppose we can, if we enabled during this bind

[...]
>> +static long vfio_iommu_type1_bind_process(struct vfio_iommu *iommu,
>> +  void __user *arg,
>> +  struct vfio_iommu_type1_bind *bind)
>> +{
>> +struct vfio_iommu_type1_bind_process params;
>> +struct vfio_domain *domain;
>> +struct vfio_group *group;
>> +struct vfio_mm *vfio_mm;
>> +struct mm_struct *mm;
>> +unsigned long minsz;
>> +int ret = 0;
>> +
>> +minsz = sizeof(*bind) + sizeof(params);
>> +if (bind->argsz < minsz)
>> +return -EINVAL;
>> +
>> +arg += sizeof(*bind);
>> +if (copy_from_user(¶ms, arg, sizeof(params)))
>> +return -EFAULT;
>> +
>> +if (params.flags & ~VFIO_IOMMU_BIND_PID)
>> +return -EINVAL;
>> +
>> +if (params.flags & VFIO_IOMMU_BIND_PID) {
>> +mm = vfio_iommu_get_mm_by_vpid(params.pid);
>> +if (IS_ERR(mm))
>> +return PTR_ERR(mm);
>> +} else {
>> +mm = get_task_mm(current);
>> +if (!mm)
>> +return -EINVAL;
>> +}
> 
> I think you can merge mm failure in both states.

Yes, I think vfio_iommu_get_mm_by_vpid could return NULL instead of an
error pointer, and we can throw -ESRCH in all cases (the existing
get_task_mm() failure in this driver does return -ESRCH, so it would be
consistent.)

[...]
>> +/*
>> + * We can't simply unbind a foreign process by PASID, because the
>> + * process might have died and the PASID might have been reallocated to
>> + * another process. Instead we need to fetch that process mm by PID
>> + * again to make sure we remove the right vfio_mm. In addition, holding
>> + * the mm guarantees that mm_users isn't dropped while we unbind and the
>> + * exit_mm handler doesn't fire. While not strictly necessary, not
>> + * having to care about that race simplifies everyone's life.
>> + */
>> +if (params.flags & VFIO_IOMMU_BIND_PID) {
>> +mm = vfio_iommu_get_mm_by_vpid(params.pid);
>> +if (IS_ERR(mm))
>> +return PTR_ERR(mm);
>> +} else {
>> +mm = get_task_mm(current);
>> +if (!mm)
>> +return -EINVAL;
>> +}
>> +
> 
> I think you can merge mm failure in both states.

ok

>> +ret = -ESRCH;
>> +mutex_lock(&iommu->lock);
>> +list_for_each_entry(vfio_mm, &iommu->mm_list, next) {
>> +if (vfio_mm->mm != mm)
>> +continue;
>> +
> 
> these loops look wierd 
> 1. for loops + break 
> 2. for loops + goto
> 
> how about closing the for loop here. and then return here if not vfio_mm
> not found.

ok

>> +vfio_iommu_unbind(iommu, vfio_mm);
>> +list_del(&vfio_mm->next);
>> +kfree(vfio_mm);
>> +ret = 0;
>> +break;
>> +}
>> +mutex_unlock(&iommu->lock);
>> +mmput(mm);
>> +
>> +return ret;
>> +}
>> +
> 

Thanks,
Jean
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 37/37] vfio: Add support for Shared Virtual Addressing

2018-02-27 Thread Sinan Kaya
On 2/12/2018 1:33 PM, Jean-Philippe Brucker wrote:
> Add two new ioctl for VFIO containers. VFIO_IOMMU_BIND_PROCESS creates a
> bond between a container and a process address space, identified by a
> device-specific ID named PASID. This allows the device to target DMA
> transactions at the process virtual addresses without a need for mapping
> and unmapping buffers explicitly in the IOMMU. The process page tables are
> shared with the IOMMU, and mechanisms such as PCI ATS/PRI are used to
> handle faults. VFIO_IOMMU_UNBIND_PROCESS removes a bond created with
> VFIO_IOMMU_BIND_PROCESS.
> 
> Signed-off-by: Jean-Philippe Brucker 
> ---
>  drivers/vfio/vfio_iommu_type1.c | 399 
> 
>  include/uapi/linux/vfio.h   |  76 
>  2 files changed, 475 insertions(+)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index e30e29ae4819..cac066f0026b 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -30,6 +30,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -60,6 +61,7 @@ MODULE_PARM_DESC(disable_hugepages,
>  
>  struct vfio_iommu {
>   struct list_headdomain_list;
> + struct list_headmm_list;
>   struct vfio_domain  *external_domain; /* domain for external user */
>   struct mutexlock;
>   struct rb_root  dma_list;
> @@ -90,6 +92,15 @@ struct vfio_dma {
>  struct vfio_group {
>   struct iommu_group  *iommu_group;
>   struct list_headnext;
> + boolsva_enabled;
> +};
> +
> +struct vfio_mm {
> +#define VFIO_PASID_INVALID   (-1)
> + spinlock_t  lock;
> + int pasid;
> + struct mm_struct*mm;
> + struct list_headnext;
>  };
>  
>  /*
> @@ -1117,6 +1128,157 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
>   return 0;
>  }
>  
> +static int vfio_iommu_mm_exit(struct device *dev, int pasid, void *data)
> +{
> + struct vfio_mm *vfio_mm = data;
> +
> + /*
> +  * The mm_exit callback cannot block, so we can't take the iommu mutex
> +  * and remove this vfio_mm from the list. Hopefully the SVA code will
> +  * relax its locking requirement in the future.
> +  *
> +  * We mostly care about attach_group, which will attempt to replay all
> +  * binds in this container. Ensure that it doesn't touch this defunct mm
> +  * struct, by clearing the pointer. The structure will be freed when the
> +  * group is removed from the container.
> +  */
> + spin_lock(&vfio_mm->lock);
> + vfio_mm->mm = NULL;
> + spin_unlock(&vfio_mm->lock);
> +
> + return 0;
> +}
> +
> +static int vfio_iommu_sva_init(struct device *dev, void *data)
> +{

data is not getting used.

> +
> + int ret;
> +
> + ret = iommu_sva_device_init(dev, IOMMU_SVA_FEAT_PASID |
> + IOMMU_SVA_FEAT_IOPF, 0);
> + if (ret)
> + return ret;
> +
> + return iommu_register_mm_exit_handler(dev, vfio_iommu_mm_exit);
> +}
> +
> +static int vfio_iommu_sva_shutdown(struct device *dev, void *data)
> +{
> + iommu_sva_device_shutdown(dev);
> + iommu_unregister_mm_exit_handler(dev);
> +
> + return 0;
> +}
> +
> +static int vfio_iommu_bind_group(struct vfio_iommu *iommu,
> +  struct vfio_group *group,
> +  struct vfio_mm *vfio_mm)
> +{
> + int ret;
> + int pasid;
> +
> + if (!group->sva_enabled) {
> + ret = iommu_group_for_each_dev(group->iommu_group, NULL,
> +vfio_iommu_sva_init);
> + if (ret)
> + return ret;
> +
> + group->sva_enabled = true;
> + }
> +
> + ret = iommu_sva_bind_group(group->iommu_group, vfio_mm->mm, &pasid,
> +IOMMU_SVA_FEAT_PASID | IOMMU_SVA_FEAT_IOPF,
> +vfio_mm);
> + if (ret)
> + return ret;

don't you need to clean up the work done by vfio_iommu_sva_init() here.

> +
> + if (WARN_ON(vfio_mm->pasid != VFIO_PASID_INVALID && pasid !=
> + vfio_mm->pasid))
> + return -EFAULT;
> +
> + vfio_mm->pasid = pasid;
> +
> + return 0;
> +}
> +
> +static void vfio_iommu_unbind_group(struct vfio_group *group,
> + struct vfio_mm *vfio_mm)
> +{
> + iommu_sva_unbind_group(group->iommu_group, vfio_mm->pasid);
> +}
> +
> +static void vfio_iommu_unbind(struct vfio_iommu *iommu,
> +   struct vfio_mm *vfio_mm)
> +{
> + struct vfio_group *group;
> + struct vfio_domain *domain;
> +
> + list_for_each_entry(domain, &iommu->domain_list, next)
> + list_for_each_entry(group, &domain->group_list, next)
> + vfio_iommu_unbind_group(group, v

Re: [PATCH 37/37] vfio: Add support for Shared Virtual Addressing

2018-02-20 Thread Jean-Philippe Brucker
On 16/02/18 19:33, Alex Williamson wrote:
[...]
>> +static int vfio_iommu_sva_init(struct device *dev, void *data)
>> +{
>> +
>> +int ret;
>> +
>> +ret = iommu_sva_device_init(dev, IOMMU_SVA_FEAT_PASID |
>> +IOMMU_SVA_FEAT_IOPF, 0);
>> +if (ret)
>> +return ret;
>> +
>> +return iommu_register_mm_exit_handler(dev, vfio_iommu_mm_exit);
>> +}
>> +
>> +static int vfio_iommu_sva_shutdown(struct device *dev, void *data)
>> +{
>> +iommu_sva_device_shutdown(dev);
>> +iommu_unregister_mm_exit_handler(dev);
> 
> Typically the order would be reverse of the setup, is it correct this
> way?

I don't think it matters either way, but ABBA order would be nicer.
Registering mm_exit handler before sva_device_init is probably best.

>> +
>> +return 0;
>> +}
>> +
>> +static int vfio_iommu_bind_group(struct vfio_iommu *iommu,
>> + struct vfio_group *group,
>> + struct vfio_mm *vfio_mm)
>> +{
>> +int ret;
>> +int pasid;
>> +
>> +if (!group->sva_enabled) {
>> +ret = iommu_group_for_each_dev(group->iommu_group, NULL,
>> +   vfio_iommu_sva_init);
>> +if (ret)
>> +return ret;
> 
> Seems were at an unknown state here, do we need to undo any that
> succeeded?

I think we do. However following the discussion on patch 2/37 it seems
we should limit SVA to singular groups for the moment, disallowing it if
the group has more than one device. Handling compound groups is
complicated and hopefully not needed by SVA systems. So I'd like to
change the logic here and ensure group_for_each_dev only calls sva_init
once.

[...]
>> +/*
>> + * We can't simply unbind a foreign process by PASID, because the
>> + * process might have died and the PASID might have been reallocated to
>> + * another process. Instead we need to fetch that process mm by PID
>> + * again to make sure we remove the right vfio_mm. In addition, holding
>> + * the mm guarantees that mm_users isn't dropped while we unbind and the
>> + * exit_mm handler doesn't fire. While not strictly necessary, not
>> + * having to care about that race simplifies everyone's life.
>> + */
>> +if (params.flags & VFIO_IOMMU_BIND_PID) {
>> +mm = vfio_iommu_get_mm_by_vpid(params.pid);
>> +if (IS_ERR(mm))
>> +return PTR_ERR(mm);
> 
> I don't understand how this works for a process that has exited, the
> mm_exit function gets called to clear vfio_mm.mm, the above may or may
> not work (could be new ptrace'able process with same pid), but it won't
> match the mm below, so is the vfio_mm that mm_exit zapped forever stuck
> in this list until the container is destroyed?

Yes, it's not nice. mm_exit() is called with a spinlock held, so it
can't take the iommu->lock and modify mm_list.
vfio_iommu_type1_unbind_process() could do a bit of garbage collection
and remove all defunct vfio_mm, if they're not held by any iommu_bond
anymore.

But I think iommu_notifier_release (patch 5/37) can actually release the
lock temporarily if it's careful about concurrent list modifications
(and takes a ref to the given bond), in which case we can remove this
mm_exit() constraint and simplify the VFIO patch.

[...]
>> +/*
>> + * Only mode supported at the moment is VFIO_IOMMU_BIND_PROCESS, which takes
>> + * vfio_iommu_type1_bind_process in data.
>> + */
>> +struct vfio_iommu_type1_bind {
>> +__u32   argsz;
>> +__u32   mode;
> 
> s/mode/flags/
> 
>> +#define VFIO_IOMMU_BIND_PROCESS (1 << 0)
>> +__u8data[];
>> +};
> 
> I'm not convinced having a separate vfio_iommu_type1_bind_process
> struct is necessary.  It seems like we always expect to return a pasid,
> only the pid is optional, but that could be handled by a single
> structure with a flag bit to indicate a pid bind is requested.

We were planning to reuse VFIO_IOMMU_BIND for PASID table binding as
well. So vfio_iommu_type1_bind::flags would either be
VFIO_IOMMU_BIND_PROCESS or VFIO_IOMMU_BIND_PASID_TABLE, and
vfio_iommu_type1_bind::data is an union of vfio_iommu_type1_bind_process
and vfio_iommu_type1_bind_pasid_table

https://patchwork.kernel.org/patch/9701025/

> 
>> +
>> +/*
>> + * VFIO_IOMMU_BIND - _IOWR(VFIO_TYPE, VFIO_BASE + 22, struct 
>> vfio_iommu_bind)
> 
> vfio_iommu_type1_bind

Thanks,
Jean

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 37/37] vfio: Add support for Shared Virtual Addressing

2018-02-16 Thread Alex Williamson
On Mon, 12 Feb 2018 18:33:52 +
Jean-Philippe Brucker  wrote:

> Add two new ioctl for VFIO containers. VFIO_IOMMU_BIND_PROCESS creates a
> bond between a container and a process address space, identified by a
> device-specific ID named PASID. This allows the device to target DMA
> transactions at the process virtual addresses without a need for mapping
> and unmapping buffers explicitly in the IOMMU. The process page tables are
> shared with the IOMMU, and mechanisms such as PCI ATS/PRI are used to
> handle faults. VFIO_IOMMU_UNBIND_PROCESS removes a bond created with
> VFIO_IOMMU_BIND_PROCESS.
> 
> Signed-off-by: Jean-Philippe Brucker 
> ---
>  drivers/vfio/vfio_iommu_type1.c | 399 
> 
>  include/uapi/linux/vfio.h   |  76 
>  2 files changed, 475 insertions(+)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index e30e29ae4819..cac066f0026b 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -30,6 +30,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -60,6 +61,7 @@ MODULE_PARM_DESC(disable_hugepages,
>  
>  struct vfio_iommu {
>   struct list_headdomain_list;
> + struct list_headmm_list;
>   struct vfio_domain  *external_domain; /* domain for external user */
>   struct mutexlock;
>   struct rb_root  dma_list;
> @@ -90,6 +92,15 @@ struct vfio_dma {
>  struct vfio_group {
>   struct iommu_group  *iommu_group;
>   struct list_headnext;
> + boolsva_enabled;
> +};
> +
> +struct vfio_mm {
> +#define VFIO_PASID_INVALID   (-1)
> + spinlock_t  lock;
> + int pasid;
> + struct mm_struct*mm;
> + struct list_headnext;
>  };
>  
>  /*
> @@ -1117,6 +1128,157 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
>   return 0;
>  }
>  
> +static int vfio_iommu_mm_exit(struct device *dev, int pasid, void *data)
> +{
> + struct vfio_mm *vfio_mm = data;
> +
> + /*
> +  * The mm_exit callback cannot block, so we can't take the iommu mutex
> +  * and remove this vfio_mm from the list. Hopefully the SVA code will
> +  * relax its locking requirement in the future.
> +  *
> +  * We mostly care about attach_group, which will attempt to replay all
> +  * binds in this container. Ensure that it doesn't touch this defunct mm
> +  * struct, by clearing the pointer. The structure will be freed when the
> +  * group is removed from the container.
> +  */
> + spin_lock(&vfio_mm->lock);
> + vfio_mm->mm = NULL;
> + spin_unlock(&vfio_mm->lock);
> +
> + return 0;
> +}
> +
> +static int vfio_iommu_sva_init(struct device *dev, void *data)
> +{
> +
> + int ret;
> +
> + ret = iommu_sva_device_init(dev, IOMMU_SVA_FEAT_PASID |
> + IOMMU_SVA_FEAT_IOPF, 0);
> + if (ret)
> + return ret;
> +
> + return iommu_register_mm_exit_handler(dev, vfio_iommu_mm_exit);
> +}
> +
> +static int vfio_iommu_sva_shutdown(struct device *dev, void *data)
> +{
> + iommu_sva_device_shutdown(dev);
> + iommu_unregister_mm_exit_handler(dev);

Typically the order would be reverse of the setup, is it correct this
way?

> +
> + return 0;
> +}
> +
> +static int vfio_iommu_bind_group(struct vfio_iommu *iommu,
> +  struct vfio_group *group,
> +  struct vfio_mm *vfio_mm)
> +{
> + int ret;
> + int pasid;
> +
> + if (!group->sva_enabled) {
> + ret = iommu_group_for_each_dev(group->iommu_group, NULL,
> +vfio_iommu_sva_init);
> + if (ret)
> + return ret;

Seems were at an unknown state here, do we need to undo any that
succeeded?

> +
> + group->sva_enabled = true;
> + }
> +
> + ret = iommu_sva_bind_group(group->iommu_group, vfio_mm->mm, &pasid,
> +IOMMU_SVA_FEAT_PASID | IOMMU_SVA_FEAT_IOPF,
> +vfio_mm);
> + if (ret)
> + return ret;
> +
> + if (WARN_ON(vfio_mm->pasid != VFIO_PASID_INVALID && pasid !=
> + vfio_mm->pasid))
> + return -EFAULT;
> +
> + vfio_mm->pasid = pasid;
> +
> + return 0;
> +}
> +
> +static void vfio_iommu_unbind_group(struct vfio_group *group,
> + struct vfio_mm *vfio_mm)
> +{
> + iommu_sva_unbind_group(group->iommu_group, vfio_mm->pasid);
> +}
> +
> +static void vfio_iommu_unbind(struct vfio_iommu *iommu,
> +   struct vfio_mm *vfio_mm)
> +{
> + struct vfio_group *group;
> + struct vfio_domain *domain;
> +
> + list_for_each_entry(domain, &iommu->domain_list, next)
> + list_for_each_entry(group, &domain->group_

[PATCH 37/37] vfio: Add support for Shared Virtual Addressing

2018-02-12 Thread Jean-Philippe Brucker
Add two new ioctl for VFIO containers. VFIO_IOMMU_BIND_PROCESS creates a
bond between a container and a process address space, identified by a
device-specific ID named PASID. This allows the device to target DMA
transactions at the process virtual addresses without a need for mapping
and unmapping buffers explicitly in the IOMMU. The process page tables are
shared with the IOMMU, and mechanisms such as PCI ATS/PRI are used to
handle faults. VFIO_IOMMU_UNBIND_PROCESS removes a bond created with
VFIO_IOMMU_BIND_PROCESS.

Signed-off-by: Jean-Philippe Brucker 
---
 drivers/vfio/vfio_iommu_type1.c | 399 
 include/uapi/linux/vfio.h   |  76 
 2 files changed, 475 insertions(+)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index e30e29ae4819..cac066f0026b 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -60,6 +61,7 @@ MODULE_PARM_DESC(disable_hugepages,
 
 struct vfio_iommu {
struct list_headdomain_list;
+   struct list_headmm_list;
struct vfio_domain  *external_domain; /* domain for external user */
struct mutexlock;
struct rb_root  dma_list;
@@ -90,6 +92,15 @@ struct vfio_dma {
 struct vfio_group {
struct iommu_group  *iommu_group;
struct list_headnext;
+   boolsva_enabled;
+};
+
+struct vfio_mm {
+#define VFIO_PASID_INVALID (-1)
+   spinlock_t  lock;
+   int pasid;
+   struct mm_struct*mm;
+   struct list_headnext;
 };
 
 /*
@@ -1117,6 +1128,157 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
return 0;
 }
 
+static int vfio_iommu_mm_exit(struct device *dev, int pasid, void *data)
+{
+   struct vfio_mm *vfio_mm = data;
+
+   /*
+* The mm_exit callback cannot block, so we can't take the iommu mutex
+* and remove this vfio_mm from the list. Hopefully the SVA code will
+* relax its locking requirement in the future.
+*
+* We mostly care about attach_group, which will attempt to replay all
+* binds in this container. Ensure that it doesn't touch this defunct mm
+* struct, by clearing the pointer. The structure will be freed when the
+* group is removed from the container.
+*/
+   spin_lock(&vfio_mm->lock);
+   vfio_mm->mm = NULL;
+   spin_unlock(&vfio_mm->lock);
+
+   return 0;
+}
+
+static int vfio_iommu_sva_init(struct device *dev, void *data)
+{
+
+   int ret;
+
+   ret = iommu_sva_device_init(dev, IOMMU_SVA_FEAT_PASID |
+   IOMMU_SVA_FEAT_IOPF, 0);
+   if (ret)
+   return ret;
+
+   return iommu_register_mm_exit_handler(dev, vfio_iommu_mm_exit);
+}
+
+static int vfio_iommu_sva_shutdown(struct device *dev, void *data)
+{
+   iommu_sva_device_shutdown(dev);
+   iommu_unregister_mm_exit_handler(dev);
+
+   return 0;
+}
+
+static int vfio_iommu_bind_group(struct vfio_iommu *iommu,
+struct vfio_group *group,
+struct vfio_mm *vfio_mm)
+{
+   int ret;
+   int pasid;
+
+   if (!group->sva_enabled) {
+   ret = iommu_group_for_each_dev(group->iommu_group, NULL,
+  vfio_iommu_sva_init);
+   if (ret)
+   return ret;
+
+   group->sva_enabled = true;
+   }
+
+   ret = iommu_sva_bind_group(group->iommu_group, vfio_mm->mm, &pasid,
+  IOMMU_SVA_FEAT_PASID | IOMMU_SVA_FEAT_IOPF,
+  vfio_mm);
+   if (ret)
+   return ret;
+
+   if (WARN_ON(vfio_mm->pasid != VFIO_PASID_INVALID && pasid !=
+   vfio_mm->pasid))
+   return -EFAULT;
+
+   vfio_mm->pasid = pasid;
+
+   return 0;
+}
+
+static void vfio_iommu_unbind_group(struct vfio_group *group,
+   struct vfio_mm *vfio_mm)
+{
+   iommu_sva_unbind_group(group->iommu_group, vfio_mm->pasid);
+}
+
+static void vfio_iommu_unbind(struct vfio_iommu *iommu,
+ struct vfio_mm *vfio_mm)
+{
+   struct vfio_group *group;
+   struct vfio_domain *domain;
+
+   list_for_each_entry(domain, &iommu->domain_list, next)
+   list_for_each_entry(group, &domain->group_list, next)
+   vfio_iommu_unbind_group(group, vfio_mm);
+}
+
+static bool vfio_mm_get(struct vfio_mm *vfio_mm)
+{
+   bool ret;
+
+   spin_lock(&vfio_mm->lock);
+   ret = vfio_mm->mm && mmget_not_zero(vfio_mm->mm);
+   spin_unlock(&vfio_mm->lock);
+
+   return ret;
+}
+
+static void vfio_mm_put(struct vfio_mm *vfio_mm)
+{
+   mmp