Re: [PATCH 37/37] vfio: Add support for Shared Virtual Addressing
On 19/03/18 09:47, Yisheng Xie wrote: > Hi Jean, > > vfio can be compiled as module, however you use some functions which are not > exported. Oh right. I remember the kbuild test robot warning about this once, I wonder why it didn't find this one. > comment inline: > > [...] >> Add two new ioctl for VFIO containers. VFIO_IOMMU_BIND_PROCESS creates a >> bond between a container and a process address space, identified by a >> device-specific ID named PASID. This allows the device to target DMA >> transactions at the process virtual addresses without a need for mapping >> and unmapping buffers explicitly in the IOMMU. The process page tables are >> shared with the IOMMU, and mechanisms such as PCI ATS/PRI are used to >> handle faults. VFIO_IOMMU_UNBIND_PROCESS removes a bond created with >> VFIO_IOMMU_BIND_PROCESS. >> >> Signed-off-by: Jean-Philippe Brucker >> --- > [...] >> +static struct mm_struct *vfio_iommu_get_mm_by_vpid(pid_t vpid) >> +{ >> +struct mm_struct *mm; >> +struct task_struct *task; >> + >> +rcu_read_lock(); >> +task = find_task_by_vpid(vpid); > > Maybe can use? > task = pid_task(find_vpid(params.vpid), PIDTYPE_PID) I'd rather submit a patch requesting to export the symbol. Especially since this function can be further simplified by using the brand new find_get_task_by_vpid() helper, introduced by 2ee0826085d1. >> +if (task) >> +get_task_struct(task); >> +rcu_read_unlock(); >> +if (!task) >> +return ERR_PTR(-ESRCH); >> + >> +/* Ensure that current has RW access on the mm */ >> +mm = mm_access(task, PTRACE_MODE_ATTACH_REALCREDS); > > You will try to export mm_access, I find Felix have tried to, but seems give > up: > > https://patchwork.kernel.org/patch/9744281/ Thanks for the pointer, I'll try to revive this. Thanks, Jean ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 37/37] vfio: Add support for Shared Virtual Addressing
Hi Jean, vfio can be compiled as module, however you use some functions which are not exported. comment inline: [...] > Add two new ioctl for VFIO containers. VFIO_IOMMU_BIND_PROCESS creates a > bond between a container and a process address space, identified by a > device-specific ID named PASID. This allows the device to target DMA > transactions at the process virtual addresses without a need for mapping > and unmapping buffers explicitly in the IOMMU. The process page tables are > shared with the IOMMU, and mechanisms such as PCI ATS/PRI are used to > handle faults. VFIO_IOMMU_UNBIND_PROCESS removes a bond created with > VFIO_IOMMU_BIND_PROCESS. > > Signed-off-by: Jean-Philippe Brucker > --- [...] > +static struct mm_struct *vfio_iommu_get_mm_by_vpid(pid_t vpid) > +{ > + struct mm_struct *mm; > + struct task_struct *task; > + > + rcu_read_lock(); > + task = find_task_by_vpid(vpid); Maybe can use? task = pid_task(find_vpid(params.vpid), PIDTYPE_PID) > + if (task) > + get_task_struct(task); > + rcu_read_unlock(); > + if (!task) > + return ERR_PTR(-ESRCH); > + > + /* Ensure that current has RW access on the mm */ > + mm = mm_access(task, PTRACE_MODE_ATTACH_REALCREDS); You will try to export mm_access, I find Felix have tried to, but seems give up: https://patchwork.kernel.org/patch/9744281/ Thanks Yisheng ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 37/37] vfio: Add support for Shared Virtual Addressing
On 28/02/18 01:26, Sinan Kaya wrote: [...] >> +static int vfio_iommu_sva_init(struct device *dev, void *data) >> +{ > > data is not getting used. That's the pointer passed to "iommu_group_for_each_dev", NULL at the moment. Next version of this patch will keep some state in data to ensure one device per group. >> + >> +int ret; >> + >> +ret = iommu_sva_device_init(dev, IOMMU_SVA_FEAT_PASID | >> +IOMMU_SVA_FEAT_IOPF, 0); >> +if (ret) >> +return ret; >> + >> +return iommu_register_mm_exit_handler(dev, vfio_iommu_mm_exit); >> +} >> + >> +static int vfio_iommu_sva_shutdown(struct device *dev, void *data) >> +{ >> +iommu_sva_device_shutdown(dev); >> +iommu_unregister_mm_exit_handler(dev); >> + >> +return 0; >> +} >> + >> +static int vfio_iommu_bind_group(struct vfio_iommu *iommu, >> + struct vfio_group *group, >> + struct vfio_mm *vfio_mm) >> +{ >> +int ret; >> +int pasid; >> + >> +if (!group->sva_enabled) { >> +ret = iommu_group_for_each_dev(group->iommu_group, NULL, >> + vfio_iommu_sva_init); >> +if (ret) >> +return ret; >> + >> +group->sva_enabled = true; >> +} >> + >> +ret = iommu_sva_bind_group(group->iommu_group, vfio_mm->mm, &pasid, >> + IOMMU_SVA_FEAT_PASID | IOMMU_SVA_FEAT_IOPF, >> + vfio_mm); >> +if (ret) >> +return ret; > > don't you need to clean up the work done by vfio_iommu_sva_init() here. Yes I suppose we can, if we enabled during this bind [...] >> +static long vfio_iommu_type1_bind_process(struct vfio_iommu *iommu, >> + void __user *arg, >> + struct vfio_iommu_type1_bind *bind) >> +{ >> +struct vfio_iommu_type1_bind_process params; >> +struct vfio_domain *domain; >> +struct vfio_group *group; >> +struct vfio_mm *vfio_mm; >> +struct mm_struct *mm; >> +unsigned long minsz; >> +int ret = 0; >> + >> +minsz = sizeof(*bind) + sizeof(params); >> +if (bind->argsz < minsz) >> +return -EINVAL; >> + >> +arg += sizeof(*bind); >> +if (copy_from_user(¶ms, arg, sizeof(params))) >> +return -EFAULT; >> + >> +if (params.flags & ~VFIO_IOMMU_BIND_PID) >> +return -EINVAL; >> + >> +if (params.flags & VFIO_IOMMU_BIND_PID) { >> +mm = vfio_iommu_get_mm_by_vpid(params.pid); >> +if (IS_ERR(mm)) >> +return PTR_ERR(mm); >> +} else { >> +mm = get_task_mm(current); >> +if (!mm) >> +return -EINVAL; >> +} > > I think you can merge mm failure in both states. Yes, I think vfio_iommu_get_mm_by_vpid could return NULL instead of an error pointer, and we can throw -ESRCH in all cases (the existing get_task_mm() failure in this driver does return -ESRCH, so it would be consistent.) [...] >> +/* >> + * We can't simply unbind a foreign process by PASID, because the >> + * process might have died and the PASID might have been reallocated to >> + * another process. Instead we need to fetch that process mm by PID >> + * again to make sure we remove the right vfio_mm. In addition, holding >> + * the mm guarantees that mm_users isn't dropped while we unbind and the >> + * exit_mm handler doesn't fire. While not strictly necessary, not >> + * having to care about that race simplifies everyone's life. >> + */ >> +if (params.flags & VFIO_IOMMU_BIND_PID) { >> +mm = vfio_iommu_get_mm_by_vpid(params.pid); >> +if (IS_ERR(mm)) >> +return PTR_ERR(mm); >> +} else { >> +mm = get_task_mm(current); >> +if (!mm) >> +return -EINVAL; >> +} >> + > > I think you can merge mm failure in both states. ok >> +ret = -ESRCH; >> +mutex_lock(&iommu->lock); >> +list_for_each_entry(vfio_mm, &iommu->mm_list, next) { >> +if (vfio_mm->mm != mm) >> +continue; >> + > > these loops look wierd > 1. for loops + break > 2. for loops + goto > > how about closing the for loop here. and then return here if not vfio_mm > not found. ok >> +vfio_iommu_unbind(iommu, vfio_mm); >> +list_del(&vfio_mm->next); >> +kfree(vfio_mm); >> +ret = 0; >> +break; >> +} >> +mutex_unlock(&iommu->lock); >> +mmput(mm); >> + >> +return ret; >> +} >> + > Thanks, Jean ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 37/37] vfio: Add support for Shared Virtual Addressing
On 2/12/2018 1:33 PM, Jean-Philippe Brucker wrote: > Add two new ioctl for VFIO containers. VFIO_IOMMU_BIND_PROCESS creates a > bond between a container and a process address space, identified by a > device-specific ID named PASID. This allows the device to target DMA > transactions at the process virtual addresses without a need for mapping > and unmapping buffers explicitly in the IOMMU. The process page tables are > shared with the IOMMU, and mechanisms such as PCI ATS/PRI are used to > handle faults. VFIO_IOMMU_UNBIND_PROCESS removes a bond created with > VFIO_IOMMU_BIND_PROCESS. > > Signed-off-by: Jean-Philippe Brucker > --- > drivers/vfio/vfio_iommu_type1.c | 399 > > include/uapi/linux/vfio.h | 76 > 2 files changed, 475 insertions(+) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index e30e29ae4819..cac066f0026b 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -30,6 +30,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -60,6 +61,7 @@ MODULE_PARM_DESC(disable_hugepages, > > struct vfio_iommu { > struct list_headdomain_list; > + struct list_headmm_list; > struct vfio_domain *external_domain; /* domain for external user */ > struct mutexlock; > struct rb_root dma_list; > @@ -90,6 +92,15 @@ struct vfio_dma { > struct vfio_group { > struct iommu_group *iommu_group; > struct list_headnext; > + boolsva_enabled; > +}; > + > +struct vfio_mm { > +#define VFIO_PASID_INVALID (-1) > + spinlock_t lock; > + int pasid; > + struct mm_struct*mm; > + struct list_headnext; > }; > > /* > @@ -1117,6 +1128,157 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, > return 0; > } > > +static int vfio_iommu_mm_exit(struct device *dev, int pasid, void *data) > +{ > + struct vfio_mm *vfio_mm = data; > + > + /* > + * The mm_exit callback cannot block, so we can't take the iommu mutex > + * and remove this vfio_mm from the list. Hopefully the SVA code will > + * relax its locking requirement in the future. > + * > + * We mostly care about attach_group, which will attempt to replay all > + * binds in this container. Ensure that it doesn't touch this defunct mm > + * struct, by clearing the pointer. The structure will be freed when the > + * group is removed from the container. > + */ > + spin_lock(&vfio_mm->lock); > + vfio_mm->mm = NULL; > + spin_unlock(&vfio_mm->lock); > + > + return 0; > +} > + > +static int vfio_iommu_sva_init(struct device *dev, void *data) > +{ data is not getting used. > + > + int ret; > + > + ret = iommu_sva_device_init(dev, IOMMU_SVA_FEAT_PASID | > + IOMMU_SVA_FEAT_IOPF, 0); > + if (ret) > + return ret; > + > + return iommu_register_mm_exit_handler(dev, vfio_iommu_mm_exit); > +} > + > +static int vfio_iommu_sva_shutdown(struct device *dev, void *data) > +{ > + iommu_sva_device_shutdown(dev); > + iommu_unregister_mm_exit_handler(dev); > + > + return 0; > +} > + > +static int vfio_iommu_bind_group(struct vfio_iommu *iommu, > + struct vfio_group *group, > + struct vfio_mm *vfio_mm) > +{ > + int ret; > + int pasid; > + > + if (!group->sva_enabled) { > + ret = iommu_group_for_each_dev(group->iommu_group, NULL, > +vfio_iommu_sva_init); > + if (ret) > + return ret; > + > + group->sva_enabled = true; > + } > + > + ret = iommu_sva_bind_group(group->iommu_group, vfio_mm->mm, &pasid, > +IOMMU_SVA_FEAT_PASID | IOMMU_SVA_FEAT_IOPF, > +vfio_mm); > + if (ret) > + return ret; don't you need to clean up the work done by vfio_iommu_sva_init() here. > + > + if (WARN_ON(vfio_mm->pasid != VFIO_PASID_INVALID && pasid != > + vfio_mm->pasid)) > + return -EFAULT; > + > + vfio_mm->pasid = pasid; > + > + return 0; > +} > + > +static void vfio_iommu_unbind_group(struct vfio_group *group, > + struct vfio_mm *vfio_mm) > +{ > + iommu_sva_unbind_group(group->iommu_group, vfio_mm->pasid); > +} > + > +static void vfio_iommu_unbind(struct vfio_iommu *iommu, > + struct vfio_mm *vfio_mm) > +{ > + struct vfio_group *group; > + struct vfio_domain *domain; > + > + list_for_each_entry(domain, &iommu->domain_list, next) > + list_for_each_entry(group, &domain->group_list, next) > + vfio_iommu_unbind_group(group, v
Re: [PATCH 37/37] vfio: Add support for Shared Virtual Addressing
On 16/02/18 19:33, Alex Williamson wrote: [...] >> +static int vfio_iommu_sva_init(struct device *dev, void *data) >> +{ >> + >> +int ret; >> + >> +ret = iommu_sva_device_init(dev, IOMMU_SVA_FEAT_PASID | >> +IOMMU_SVA_FEAT_IOPF, 0); >> +if (ret) >> +return ret; >> + >> +return iommu_register_mm_exit_handler(dev, vfio_iommu_mm_exit); >> +} >> + >> +static int vfio_iommu_sva_shutdown(struct device *dev, void *data) >> +{ >> +iommu_sva_device_shutdown(dev); >> +iommu_unregister_mm_exit_handler(dev); > > Typically the order would be reverse of the setup, is it correct this > way? I don't think it matters either way, but ABBA order would be nicer. Registering mm_exit handler before sva_device_init is probably best. >> + >> +return 0; >> +} >> + >> +static int vfio_iommu_bind_group(struct vfio_iommu *iommu, >> + struct vfio_group *group, >> + struct vfio_mm *vfio_mm) >> +{ >> +int ret; >> +int pasid; >> + >> +if (!group->sva_enabled) { >> +ret = iommu_group_for_each_dev(group->iommu_group, NULL, >> + vfio_iommu_sva_init); >> +if (ret) >> +return ret; > > Seems were at an unknown state here, do we need to undo any that > succeeded? I think we do. However following the discussion on patch 2/37 it seems we should limit SVA to singular groups for the moment, disallowing it if the group has more than one device. Handling compound groups is complicated and hopefully not needed by SVA systems. So I'd like to change the logic here and ensure group_for_each_dev only calls sva_init once. [...] >> +/* >> + * We can't simply unbind a foreign process by PASID, because the >> + * process might have died and the PASID might have been reallocated to >> + * another process. Instead we need to fetch that process mm by PID >> + * again to make sure we remove the right vfio_mm. In addition, holding >> + * the mm guarantees that mm_users isn't dropped while we unbind and the >> + * exit_mm handler doesn't fire. While not strictly necessary, not >> + * having to care about that race simplifies everyone's life. >> + */ >> +if (params.flags & VFIO_IOMMU_BIND_PID) { >> +mm = vfio_iommu_get_mm_by_vpid(params.pid); >> +if (IS_ERR(mm)) >> +return PTR_ERR(mm); > > I don't understand how this works for a process that has exited, the > mm_exit function gets called to clear vfio_mm.mm, the above may or may > not work (could be new ptrace'able process with same pid), but it won't > match the mm below, so is the vfio_mm that mm_exit zapped forever stuck > in this list until the container is destroyed? Yes, it's not nice. mm_exit() is called with a spinlock held, so it can't take the iommu->lock and modify mm_list. vfio_iommu_type1_unbind_process() could do a bit of garbage collection and remove all defunct vfio_mm, if they're not held by any iommu_bond anymore. But I think iommu_notifier_release (patch 5/37) can actually release the lock temporarily if it's careful about concurrent list modifications (and takes a ref to the given bond), in which case we can remove this mm_exit() constraint and simplify the VFIO patch. [...] >> +/* >> + * Only mode supported at the moment is VFIO_IOMMU_BIND_PROCESS, which takes >> + * vfio_iommu_type1_bind_process in data. >> + */ >> +struct vfio_iommu_type1_bind { >> +__u32 argsz; >> +__u32 mode; > > s/mode/flags/ > >> +#define VFIO_IOMMU_BIND_PROCESS (1 << 0) >> +__u8data[]; >> +}; > > I'm not convinced having a separate vfio_iommu_type1_bind_process > struct is necessary. It seems like we always expect to return a pasid, > only the pid is optional, but that could be handled by a single > structure with a flag bit to indicate a pid bind is requested. We were planning to reuse VFIO_IOMMU_BIND for PASID table binding as well. So vfio_iommu_type1_bind::flags would either be VFIO_IOMMU_BIND_PROCESS or VFIO_IOMMU_BIND_PASID_TABLE, and vfio_iommu_type1_bind::data is an union of vfio_iommu_type1_bind_process and vfio_iommu_type1_bind_pasid_table https://patchwork.kernel.org/patch/9701025/ > >> + >> +/* >> + * VFIO_IOMMU_BIND - _IOWR(VFIO_TYPE, VFIO_BASE + 22, struct >> vfio_iommu_bind) > > vfio_iommu_type1_bind Thanks, Jean ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 37/37] vfio: Add support for Shared Virtual Addressing
On Mon, 12 Feb 2018 18:33:52 + Jean-Philippe Brucker wrote: > Add two new ioctl for VFIO containers. VFIO_IOMMU_BIND_PROCESS creates a > bond between a container and a process address space, identified by a > device-specific ID named PASID. This allows the device to target DMA > transactions at the process virtual addresses without a need for mapping > and unmapping buffers explicitly in the IOMMU. The process page tables are > shared with the IOMMU, and mechanisms such as PCI ATS/PRI are used to > handle faults. VFIO_IOMMU_UNBIND_PROCESS removes a bond created with > VFIO_IOMMU_BIND_PROCESS. > > Signed-off-by: Jean-Philippe Brucker > --- > drivers/vfio/vfio_iommu_type1.c | 399 > > include/uapi/linux/vfio.h | 76 > 2 files changed, 475 insertions(+) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index e30e29ae4819..cac066f0026b 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -30,6 +30,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -60,6 +61,7 @@ MODULE_PARM_DESC(disable_hugepages, > > struct vfio_iommu { > struct list_headdomain_list; > + struct list_headmm_list; > struct vfio_domain *external_domain; /* domain for external user */ > struct mutexlock; > struct rb_root dma_list; > @@ -90,6 +92,15 @@ struct vfio_dma { > struct vfio_group { > struct iommu_group *iommu_group; > struct list_headnext; > + boolsva_enabled; > +}; > + > +struct vfio_mm { > +#define VFIO_PASID_INVALID (-1) > + spinlock_t lock; > + int pasid; > + struct mm_struct*mm; > + struct list_headnext; > }; > > /* > @@ -1117,6 +1128,157 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, > return 0; > } > > +static int vfio_iommu_mm_exit(struct device *dev, int pasid, void *data) > +{ > + struct vfio_mm *vfio_mm = data; > + > + /* > + * The mm_exit callback cannot block, so we can't take the iommu mutex > + * and remove this vfio_mm from the list. Hopefully the SVA code will > + * relax its locking requirement in the future. > + * > + * We mostly care about attach_group, which will attempt to replay all > + * binds in this container. Ensure that it doesn't touch this defunct mm > + * struct, by clearing the pointer. The structure will be freed when the > + * group is removed from the container. > + */ > + spin_lock(&vfio_mm->lock); > + vfio_mm->mm = NULL; > + spin_unlock(&vfio_mm->lock); > + > + return 0; > +} > + > +static int vfio_iommu_sva_init(struct device *dev, void *data) > +{ > + > + int ret; > + > + ret = iommu_sva_device_init(dev, IOMMU_SVA_FEAT_PASID | > + IOMMU_SVA_FEAT_IOPF, 0); > + if (ret) > + return ret; > + > + return iommu_register_mm_exit_handler(dev, vfio_iommu_mm_exit); > +} > + > +static int vfio_iommu_sva_shutdown(struct device *dev, void *data) > +{ > + iommu_sva_device_shutdown(dev); > + iommu_unregister_mm_exit_handler(dev); Typically the order would be reverse of the setup, is it correct this way? > + > + return 0; > +} > + > +static int vfio_iommu_bind_group(struct vfio_iommu *iommu, > + struct vfio_group *group, > + struct vfio_mm *vfio_mm) > +{ > + int ret; > + int pasid; > + > + if (!group->sva_enabled) { > + ret = iommu_group_for_each_dev(group->iommu_group, NULL, > +vfio_iommu_sva_init); > + if (ret) > + return ret; Seems were at an unknown state here, do we need to undo any that succeeded? > + > + group->sva_enabled = true; > + } > + > + ret = iommu_sva_bind_group(group->iommu_group, vfio_mm->mm, &pasid, > +IOMMU_SVA_FEAT_PASID | IOMMU_SVA_FEAT_IOPF, > +vfio_mm); > + if (ret) > + return ret; > + > + if (WARN_ON(vfio_mm->pasid != VFIO_PASID_INVALID && pasid != > + vfio_mm->pasid)) > + return -EFAULT; > + > + vfio_mm->pasid = pasid; > + > + return 0; > +} > + > +static void vfio_iommu_unbind_group(struct vfio_group *group, > + struct vfio_mm *vfio_mm) > +{ > + iommu_sva_unbind_group(group->iommu_group, vfio_mm->pasid); > +} > + > +static void vfio_iommu_unbind(struct vfio_iommu *iommu, > + struct vfio_mm *vfio_mm) > +{ > + struct vfio_group *group; > + struct vfio_domain *domain; > + > + list_for_each_entry(domain, &iommu->domain_list, next) > + list_for_each_entry(group, &domain->group_
[PATCH 37/37] vfio: Add support for Shared Virtual Addressing
Add two new ioctl for VFIO containers. VFIO_IOMMU_BIND_PROCESS creates a bond between a container and a process address space, identified by a device-specific ID named PASID. This allows the device to target DMA transactions at the process virtual addresses without a need for mapping and unmapping buffers explicitly in the IOMMU. The process page tables are shared with the IOMMU, and mechanisms such as PCI ATS/PRI are used to handle faults. VFIO_IOMMU_UNBIND_PROCESS removes a bond created with VFIO_IOMMU_BIND_PROCESS. Signed-off-by: Jean-Philippe Brucker --- drivers/vfio/vfio_iommu_type1.c | 399 include/uapi/linux/vfio.h | 76 2 files changed, 475 insertions(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index e30e29ae4819..cac066f0026b 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,7 @@ MODULE_PARM_DESC(disable_hugepages, struct vfio_iommu { struct list_headdomain_list; + struct list_headmm_list; struct vfio_domain *external_domain; /* domain for external user */ struct mutexlock; struct rb_root dma_list; @@ -90,6 +92,15 @@ struct vfio_dma { struct vfio_group { struct iommu_group *iommu_group; struct list_headnext; + boolsva_enabled; +}; + +struct vfio_mm { +#define VFIO_PASID_INVALID (-1) + spinlock_t lock; + int pasid; + struct mm_struct*mm; + struct list_headnext; }; /* @@ -1117,6 +1128,157 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, return 0; } +static int vfio_iommu_mm_exit(struct device *dev, int pasid, void *data) +{ + struct vfio_mm *vfio_mm = data; + + /* +* The mm_exit callback cannot block, so we can't take the iommu mutex +* and remove this vfio_mm from the list. Hopefully the SVA code will +* relax its locking requirement in the future. +* +* We mostly care about attach_group, which will attempt to replay all +* binds in this container. Ensure that it doesn't touch this defunct mm +* struct, by clearing the pointer. The structure will be freed when the +* group is removed from the container. +*/ + spin_lock(&vfio_mm->lock); + vfio_mm->mm = NULL; + spin_unlock(&vfio_mm->lock); + + return 0; +} + +static int vfio_iommu_sva_init(struct device *dev, void *data) +{ + + int ret; + + ret = iommu_sva_device_init(dev, IOMMU_SVA_FEAT_PASID | + IOMMU_SVA_FEAT_IOPF, 0); + if (ret) + return ret; + + return iommu_register_mm_exit_handler(dev, vfio_iommu_mm_exit); +} + +static int vfio_iommu_sva_shutdown(struct device *dev, void *data) +{ + iommu_sva_device_shutdown(dev); + iommu_unregister_mm_exit_handler(dev); + + return 0; +} + +static int vfio_iommu_bind_group(struct vfio_iommu *iommu, +struct vfio_group *group, +struct vfio_mm *vfio_mm) +{ + int ret; + int pasid; + + if (!group->sva_enabled) { + ret = iommu_group_for_each_dev(group->iommu_group, NULL, + vfio_iommu_sva_init); + if (ret) + return ret; + + group->sva_enabled = true; + } + + ret = iommu_sva_bind_group(group->iommu_group, vfio_mm->mm, &pasid, + IOMMU_SVA_FEAT_PASID | IOMMU_SVA_FEAT_IOPF, + vfio_mm); + if (ret) + return ret; + + if (WARN_ON(vfio_mm->pasid != VFIO_PASID_INVALID && pasid != + vfio_mm->pasid)) + return -EFAULT; + + vfio_mm->pasid = pasid; + + return 0; +} + +static void vfio_iommu_unbind_group(struct vfio_group *group, + struct vfio_mm *vfio_mm) +{ + iommu_sva_unbind_group(group->iommu_group, vfio_mm->pasid); +} + +static void vfio_iommu_unbind(struct vfio_iommu *iommu, + struct vfio_mm *vfio_mm) +{ + struct vfio_group *group; + struct vfio_domain *domain; + + list_for_each_entry(domain, &iommu->domain_list, next) + list_for_each_entry(group, &domain->group_list, next) + vfio_iommu_unbind_group(group, vfio_mm); +} + +static bool vfio_mm_get(struct vfio_mm *vfio_mm) +{ + bool ret; + + spin_lock(&vfio_mm->lock); + ret = vfio_mm->mm && mmget_not_zero(vfio_mm->mm); + spin_unlock(&vfio_mm->lock); + + return ret; +} + +static void vfio_mm_put(struct vfio_mm *vfio_mm) +{ + mmp