RE: [PATCH v5 10/15] vfio/type1: Support binding guest page tables to PASID

2020-07-20 Thread Liu, Yi L
Hi Eric,

> From: Auger Eric 
> Sent: Monday, July 20, 2020 5:37 PM
> 
> Yi,
> 
> On 7/12/20 1:21 PM, Liu Yi L wrote:
> > Nesting translation allows two-levels/stages page tables, with 1st level
> > for guest translations (e.g. GVA->GPA), 2nd level for host translations
> > (e.g. GPA->HPA). This patch adds interface for binding guest page tables
> > to a PASID. This PASID must have been allocated to user space before the
> by the userspace?

yes, it is. will modify it.

> > binding request.
> >
> > Cc: Kevin Tian 
> > CC: Jacob Pan 
> > Cc: Alex Williamson 
> > Cc: Eric Auger 
> > Cc: Jean-Philippe Brucker 
> > Cc: Joerg Roedel 
> > Cc: Lu Baolu 
> > Signed-off-by: Jean-Philippe Brucker 
> > Signed-off-by: Liu Yi L 
> > Signed-off-by: Jacob Pan 
> > ---
> > v3 -> v4:
> > *) address comments from Alex on v3
> >
> > v2 -> v3:
> > *) use __iommu_sva_unbind_gpasid() for unbind call issued by VFIO
> >https://lore.kernel.org/linux-iommu/1592931837-58223-6-git-send-email-
> jacob.jun@linux.intel.com/
> >
> > v1 -> v2:
> > *) rename subject from "vfio/type1: Bind guest page tables to host"
> > *) remove VFIO_IOMMU_BIND, introduce VFIO_IOMMU_NESTING_OP to support
> bind/
> >unbind guet page table
> > *) replaced vfio_iommu_for_each_dev() with a group level loop since this
> >series enforces one group per container w/ nesting type as start.
> > *) rename vfio_bind/unbind_gpasid_fn() to vfio_dev_bind/unbind_gpasid_fn()
> > *) vfio_dev_unbind_gpasid() always successful
> > *) use vfio_mm->pasid_lock to avoid race between PASID free and page table
> >bind/unbind
> > ---
> >  drivers/vfio/vfio_iommu_type1.c | 166
> 
> >  drivers/vfio/vfio_pasid.c   |  26 +++
> >  include/linux/vfio.h|  20 +
> >  include/uapi/linux/vfio.h   |  31 
> >  4 files changed, 243 insertions(+)
> >
> > diff --git a/drivers/vfio/vfio_iommu_type1.c 
> > b/drivers/vfio/vfio_iommu_type1.c
> > index 55b4065..f0f21ff 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -149,6 +149,30 @@ struct vfio_regions {
> >  #define DIRTY_BITMAP_PAGES_MAX  ((u64)INT_MAX)
> >  #define DIRTY_BITMAP_SIZE_MAX
> DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX)
> >
> > +struct domain_capsule {
> > +   struct vfio_group *group;
> > +   struct iommu_domain *domain;
> > +   void *data;
> > +};
> > +
> > +/* iommu->lock must be held */
> > +static struct vfio_group *vfio_find_nesting_group(struct vfio_iommu *iommu)
> > +{
> > +   struct vfio_domain *d;
> > +   struct vfio_group *group = NULL;
> > +
> > +   if (!iommu->nesting_info)
> > +   return NULL;
> > +
> > +   /* only support singleton container with nesting type */
> > +   list_for_each_entry(d, >domain_list, next) {
> > +   list_for_each_entry(group, >group_list, next) {
> > +   break;
> use list_first_entry?

yeah, based on the discussion in below link, we only support singleton
container with nesting type, also if no group in container, the nesting_info
will be cleared. so yes, list_first_entry will work as well.

https://lkml.org/lkml/2020/5/15/1028

> > +   }
> > +   }
> > +   return group;
> > +}
> > +
> >  static int put_pfn(unsigned long pfn, int prot);
> >
> >  static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu
> *iommu,
> > @@ -2349,6 +2373,48 @@ static int vfio_iommu_resv_refresh(struct vfio_iommu
> *iommu,
> > return ret;
> >  }
> >
> > +static int vfio_dev_bind_gpasid_fn(struct device *dev, void *data)
> > +{
> > +   struct domain_capsule *dc = (struct domain_capsule *)data;
> > +   unsigned long arg = *(unsigned long *)dc->data;
> > +
> > +   return iommu_sva_bind_gpasid(dc->domain, dev, (void __user *)arg);
> > +}
> > +
> > +static int vfio_dev_unbind_gpasid_fn(struct device *dev, void *data)
> > +{
> > +   struct domain_capsule *dc = (struct domain_capsule *)data;
> > +   unsigned long arg = *(unsigned long *)dc->data;
> > +
> > +   iommu_sva_unbind_gpasid(dc->domain, dev, (void __user *)arg);
> > +   return 0;
> > +}
> > +
> > +static int __vfio_dev_unbind_gpasid_fn(struct device *dev, void *data)
> > +{
> > +   struct domain_capsule *dc = (struct domain_capsule *)data;
> > +   struct iommu_gpasid_bind_data *unbind_data =
> > +   (struct iommu_gpasid_bind_data *)dc->data;
> > +
> > +   __iommu_sva_unbind_gpasid(dc->domain, dev, unbind_data);
> > +   return 0;
> > +}
> > +
> > +static void vfio_group_unbind_gpasid_fn(ioasid_t pasid, void *data)
> > +{
> > +   struct domain_capsule *dc = (struct domain_capsule *)data;
> > +   struct iommu_gpasid_bind_data unbind_data;
> > +
> > +   unbind_data.argsz = offsetof(struct iommu_gpasid_bind_data, vendor);
> > +   unbind_data.flags = 0;
> > +   unbind_data.hpasid = pasid;
> > +
> > +   dc->data = _data;
> > +
> > +   iommu_group_for_each_dev(dc->group->iommu_group,
> > +dc, __vfio_dev_unbind_gpasid_fn);
> 

Re: [PATCH v5 10/15] vfio/type1: Support binding guest page tables to PASID

2020-07-20 Thread Auger Eric
Yi,

On 7/12/20 1:21 PM, Liu Yi L wrote:
> Nesting translation allows two-levels/stages page tables, with 1st level
> for guest translations (e.g. GVA->GPA), 2nd level for host translations
> (e.g. GPA->HPA). This patch adds interface for binding guest page tables
> to a PASID. This PASID must have been allocated to user space before the
by the userspace?
> binding request.
> 
> Cc: Kevin Tian 
> CC: Jacob Pan 
> Cc: Alex Williamson 
> Cc: Eric Auger 
> Cc: Jean-Philippe Brucker 
> Cc: Joerg Roedel 
> Cc: Lu Baolu 
> Signed-off-by: Jean-Philippe Brucker 
> Signed-off-by: Liu Yi L 
> Signed-off-by: Jacob Pan 
> ---
> v3 -> v4:
> *) address comments from Alex on v3
> 
> v2 -> v3:
> *) use __iommu_sva_unbind_gpasid() for unbind call issued by VFIO
>
> https://lore.kernel.org/linux-iommu/1592931837-58223-6-git-send-email-jacob.jun@linux.intel.com/
> 
> v1 -> v2:
> *) rename subject from "vfio/type1: Bind guest page tables to host"
> *) remove VFIO_IOMMU_BIND, introduce VFIO_IOMMU_NESTING_OP to support bind/
>unbind guet page table
> *) replaced vfio_iommu_for_each_dev() with a group level loop since this
>series enforces one group per container w/ nesting type as start.
> *) rename vfio_bind/unbind_gpasid_fn() to vfio_dev_bind/unbind_gpasid_fn()
> *) vfio_dev_unbind_gpasid() always successful
> *) use vfio_mm->pasid_lock to avoid race between PASID free and page table
>bind/unbind
> ---
>  drivers/vfio/vfio_iommu_type1.c | 166 
> 
>  drivers/vfio/vfio_pasid.c   |  26 +++
>  include/linux/vfio.h|  20 +
>  include/uapi/linux/vfio.h   |  31 
>  4 files changed, 243 insertions(+)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 55b4065..f0f21ff 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -149,6 +149,30 @@ struct vfio_regions {
>  #define DIRTY_BITMAP_PAGES_MAX((u64)INT_MAX)
>  #define DIRTY_BITMAP_SIZE_MAX 
> DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX)
>  
> +struct domain_capsule {
> + struct vfio_group *group;
> + struct iommu_domain *domain;
> + void *data;
> +};
> +
> +/* iommu->lock must be held */
> +static struct vfio_group *vfio_find_nesting_group(struct vfio_iommu *iommu)
> +{
> + struct vfio_domain *d;
> + struct vfio_group *group = NULL;
> +
> + if (!iommu->nesting_info)
> + return NULL;
> +
> + /* only support singleton container with nesting type */
> + list_for_each_entry(d, >domain_list, next) {
> + list_for_each_entry(group, >group_list, next) {
> + break;
use list_first_entry?
> + }
> + }
> + return group;
> +}
> +
>  static int put_pfn(unsigned long pfn, int prot);
>  
>  static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu 
> *iommu,
> @@ -2349,6 +2373,48 @@ static int vfio_iommu_resv_refresh(struct vfio_iommu 
> *iommu,
>   return ret;
>  }
>  
> +static int vfio_dev_bind_gpasid_fn(struct device *dev, void *data)
> +{
> + struct domain_capsule *dc = (struct domain_capsule *)data;
> + unsigned long arg = *(unsigned long *)dc->data;
> +
> + return iommu_sva_bind_gpasid(dc->domain, dev, (void __user *)arg);
> +}
> +
> +static int vfio_dev_unbind_gpasid_fn(struct device *dev, void *data)
> +{
> + struct domain_capsule *dc = (struct domain_capsule *)data;
> + unsigned long arg = *(unsigned long *)dc->data;
> +
> + iommu_sva_unbind_gpasid(dc->domain, dev, (void __user *)arg);
> + return 0;
> +}
> +
> +static int __vfio_dev_unbind_gpasid_fn(struct device *dev, void *data)
> +{
> + struct domain_capsule *dc = (struct domain_capsule *)data;
> + struct iommu_gpasid_bind_data *unbind_data =
> + (struct iommu_gpasid_bind_data *)dc->data;
> +
> + __iommu_sva_unbind_gpasid(dc->domain, dev, unbind_data);
> + return 0;
> +}
> +
> +static void vfio_group_unbind_gpasid_fn(ioasid_t pasid, void *data)
> +{
> + struct domain_capsule *dc = (struct domain_capsule *)data;
> + struct iommu_gpasid_bind_data unbind_data;
> +
> + unbind_data.argsz = offsetof(struct iommu_gpasid_bind_data, vendor);
> + unbind_data.flags = 0;
> + unbind_data.hpasid = pasid;
> +
> + dc->data = _data;
> +
> + iommu_group_for_each_dev(dc->group->iommu_group,
> +  dc, __vfio_dev_unbind_gpasid_fn);
> +}
> +
>  static void vfio_iommu_type1_detach_group(void *iommu_data,
> struct iommu_group *iommu_group)
>  {
> @@ -2392,6 +2458,21 @@ static void vfio_iommu_type1_detach_group(void 
> *iommu_data,
>   if (!group)
>   continue;
>  
> + if (iommu->nesting_info && iommu->vmm &&
> + (iommu->nesting_info->features &
> + IOMMU_NESTING_FEAT_BIND_PGTBL)) {
> + 

[PATCH v5 10/15] vfio/type1: Support binding guest page tables to PASID

2020-07-12 Thread Liu Yi L
Nesting translation allows two-levels/stages page tables, with 1st level
for guest translations (e.g. GVA->GPA), 2nd level for host translations
(e.g. GPA->HPA). This patch adds interface for binding guest page tables
to a PASID. This PASID must have been allocated to user space before the
binding request.

Cc: Kevin Tian 
CC: Jacob Pan 
Cc: Alex Williamson 
Cc: Eric Auger 
Cc: Jean-Philippe Brucker 
Cc: Joerg Roedel 
Cc: Lu Baolu 
Signed-off-by: Jean-Philippe Brucker 
Signed-off-by: Liu Yi L 
Signed-off-by: Jacob Pan 
---
v3 -> v4:
*) address comments from Alex on v3

v2 -> v3:
*) use __iommu_sva_unbind_gpasid() for unbind call issued by VFIO
   
https://lore.kernel.org/linux-iommu/1592931837-58223-6-git-send-email-jacob.jun@linux.intel.com/

v1 -> v2:
*) rename subject from "vfio/type1: Bind guest page tables to host"
*) remove VFIO_IOMMU_BIND, introduce VFIO_IOMMU_NESTING_OP to support bind/
   unbind guet page table
*) replaced vfio_iommu_for_each_dev() with a group level loop since this
   series enforces one group per container w/ nesting type as start.
*) rename vfio_bind/unbind_gpasid_fn() to vfio_dev_bind/unbind_gpasid_fn()
*) vfio_dev_unbind_gpasid() always successful
*) use vfio_mm->pasid_lock to avoid race between PASID free and page table
   bind/unbind
---
 drivers/vfio/vfio_iommu_type1.c | 166 
 drivers/vfio/vfio_pasid.c   |  26 +++
 include/linux/vfio.h|  20 +
 include/uapi/linux/vfio.h   |  31 
 4 files changed, 243 insertions(+)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 55b4065..f0f21ff 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -149,6 +149,30 @@ struct vfio_regions {
 #define DIRTY_BITMAP_PAGES_MAX  ((u64)INT_MAX)
 #define DIRTY_BITMAP_SIZE_MAX   DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX)
 
+struct domain_capsule {
+   struct vfio_group *group;
+   struct iommu_domain *domain;
+   void *data;
+};
+
+/* iommu->lock must be held */
+static struct vfio_group *vfio_find_nesting_group(struct vfio_iommu *iommu)
+{
+   struct vfio_domain *d;
+   struct vfio_group *group = NULL;
+
+   if (!iommu->nesting_info)
+   return NULL;
+
+   /* only support singleton container with nesting type */
+   list_for_each_entry(d, >domain_list, next) {
+   list_for_each_entry(group, >group_list, next) {
+   break;
+   }
+   }
+   return group;
+}
+
 static int put_pfn(unsigned long pfn, int prot);
 
 static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
@@ -2349,6 +2373,48 @@ static int vfio_iommu_resv_refresh(struct vfio_iommu 
*iommu,
return ret;
 }
 
+static int vfio_dev_bind_gpasid_fn(struct device *dev, void *data)
+{
+   struct domain_capsule *dc = (struct domain_capsule *)data;
+   unsigned long arg = *(unsigned long *)dc->data;
+
+   return iommu_sva_bind_gpasid(dc->domain, dev, (void __user *)arg);
+}
+
+static int vfio_dev_unbind_gpasid_fn(struct device *dev, void *data)
+{
+   struct domain_capsule *dc = (struct domain_capsule *)data;
+   unsigned long arg = *(unsigned long *)dc->data;
+
+   iommu_sva_unbind_gpasid(dc->domain, dev, (void __user *)arg);
+   return 0;
+}
+
+static int __vfio_dev_unbind_gpasid_fn(struct device *dev, void *data)
+{
+   struct domain_capsule *dc = (struct domain_capsule *)data;
+   struct iommu_gpasid_bind_data *unbind_data =
+   (struct iommu_gpasid_bind_data *)dc->data;
+
+   __iommu_sva_unbind_gpasid(dc->domain, dev, unbind_data);
+   return 0;
+}
+
+static void vfio_group_unbind_gpasid_fn(ioasid_t pasid, void *data)
+{
+   struct domain_capsule *dc = (struct domain_capsule *)data;
+   struct iommu_gpasid_bind_data unbind_data;
+
+   unbind_data.argsz = offsetof(struct iommu_gpasid_bind_data, vendor);
+   unbind_data.flags = 0;
+   unbind_data.hpasid = pasid;
+
+   dc->data = _data;
+
+   iommu_group_for_each_dev(dc->group->iommu_group,
+dc, __vfio_dev_unbind_gpasid_fn);
+}
+
 static void vfio_iommu_type1_detach_group(void *iommu_data,
  struct iommu_group *iommu_group)
 {
@@ -2392,6 +2458,21 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
if (!group)
continue;
 
+   if (iommu->nesting_info && iommu->vmm &&
+   (iommu->nesting_info->features &
+   IOMMU_NESTING_FEAT_BIND_PGTBL)) {
+   struct domain_capsule dc = { .group = group,
+.domain = domain->domain,
+.data = NULL };
+
+   /*
+* Unbind page tables bound with system wide