RE: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

2020-07-08 Thread Liu, Yi L
Hi Alex,

> From: Alex Williamson 
> Sent: Thursday, July 9, 2020 3:30 AM
> 
> On Wed, 8 Jul 2020 08:08:40 +
> "Liu, Yi L"  wrote:
> 
> > Hi Alex,
> >
> > Eric asked if we will to have data strcut other than struct 
> > iommu_nesting_info
> > type in the struct vfio_iommu_type1_info_cap_nesting @info[] field. I'm not
> > quit sure on it. I guess the answer may be not as VFIO's nesting support 
> > should
> > based on IOMMU UAPI. how about your opinion?
> >
> > +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING  3
> > +
> > +/*
> > + * Reporting nesting info to user space.
> > + *
> > + * @info:  the nesting info provided by IOMMU driver. Today
> > + * it is expected to be a struct iommu_nesting_info
> > + * data.
> > + */
> > +struct vfio_iommu_type1_info_cap_nesting {
> > +   struct  vfio_info_cap_header header;
> > +   __u32   flags;
> > +   __u32   padding;
> > +   __u8info[];
> > +};
> 
> It's not a very useful uAPI if the user can't be sure what they're
> getting out of it.  Info capabilities are "cheap", they don't need to
> be as extensible as an ioctl.  It's not clear that we really even need
> the flags (and therefore the padding), just define it to return the
> IOMMU uAPI structure with no extensibility.  If we need to expose
> something else, create a new capability.  Thanks,

thanks for the guiding, then I may embed the struct iommu_nesting_info
here. :-)

Regards,
Yi Liu

> Alex
> 
> >
> > https://lore.kernel.org/linux-
> iommu/dm5pr11mb1435290b6cd561ec61027892c3...@dm5pr11mb1435.nam
> prd11.prod.outlook.com/
> >
> > Regards,
> > Yi Liu
> >
> > > From: Liu, Yi L
> > > Sent: Tuesday, July 7, 2020 5:32 PM
> > >
> > [...]
> > > > >
> > > > >>> +
> > > > >>> +/*
> > > > >>> + * Reporting nesting info to user space.
> > > > >>> + *
> > > > >>> + * @info:  the nesting info provided by IOMMU driver. Today
> > > > >>> + * it is expected to be a struct iommu_nesting_info
> > > > >>> + * data.
> > > > >> Is it expected to change?
> > > > >
> > > > > honestly, I'm not quite sure on it. I did considered to embed struct
> > > > > iommu_nesting_info here instead of using info[]. but I hesitated as
> > > > > using info[] may leave more flexibility on this struct. how about
> > > > > your opinion? perhaps it's fine to embed the struct
> > > > > iommu_nesting_info here as long as VFIO is setup nesting based on
> > > > > IOMMU UAPI.
> > > > >
> > > > >>> + */
> > > > >>> +struct vfio_iommu_type1_info_cap_nesting {
> > > > >>> +   struct  vfio_info_cap_header header;
> > > > >>> +   __u32   flags;
> > > > >> You may document flags.
> > > > >
> > > > > sure. it's reserved for future.
> > > > >
> > > > > Regards,
> > > > > Yi Liu
> > > > >
> > > > >>> +   __u32   padding;
> > > > >>> +   __u8info[];
> > > > >>> +};
> > > > >>> +
> > > > >>>  #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> > > > >>>
> > > > >>>  /**
> > > > >>>
> > > > >> Thanks
> > > > >>
> > > > >> Eric
> > > > >
> >

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

2020-07-08 Thread Alex Williamson
On Wed, 8 Jul 2020 08:08:40 +
"Liu, Yi L"  wrote:

> Hi Alex,
> 
> Eric asked if we will to have data strcut other than struct iommu_nesting_info
> type in the struct vfio_iommu_type1_info_cap_nesting @info[] field. I'm not
> quit sure on it. I guess the answer may be not as VFIO's nesting support 
> should
> based on IOMMU UAPI. how about your opinion?
> 
> +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING  3
> +
> +/*
> + * Reporting nesting info to user space.
> + *
> + * @info:the nesting info provided by IOMMU driver. Today
> + *   it is expected to be a struct iommu_nesting_info
> + *   data.
> + */
> +struct vfio_iommu_type1_info_cap_nesting {
> + struct  vfio_info_cap_header header;
> + __u32   flags;
> + __u32   padding;
> + __u8info[];
> +};

It's not a very useful uAPI if the user can't be sure what they're
getting out of it.  Info capabilities are "cheap", they don't need to
be as extensible as an ioctl.  It's not clear that we really even need
the flags (and therefore the padding), just define it to return the
IOMMU uAPI structure with no extensibility.  If we need to expose
something else, create a new capability.  Thanks,

Alex

> 
> https://lore.kernel.org/linux-iommu/dm5pr11mb1435290b6cd561ec61027892c3...@dm5pr11mb1435.namprd11.prod.outlook.com/
> 
> Regards,
> Yi Liu
> 
> > From: Liu, Yi L
> > Sent: Tuesday, July 7, 2020 5:32 PM
> >   
> [...]
> > > >  
> > > >>> +
> > > >>> +/*
> > > >>> + * Reporting nesting info to user space.
> > > >>> + *
> > > >>> + * @info:the nesting info provided by IOMMU driver. Today
> > > >>> + *   it is expected to be a struct iommu_nesting_info
> > > >>> + *   data.  
> > > >> Is it expected to change?  
> > > >
> > > > honestly, I'm not quite sure on it. I did considered to embed struct
> > > > iommu_nesting_info here instead of using info[]. but I hesitated as
> > > > using info[] may leave more flexibility on this struct. how about
> > > > your opinion? perhaps it's fine to embed the struct
> > > > iommu_nesting_info here as long as VFIO is setup nesting based on
> > > > IOMMU UAPI.
> > > >  
> > > >>> + */
> > > >>> +struct vfio_iommu_type1_info_cap_nesting {
> > > >>> + struct  vfio_info_cap_header header;
> > > >>> + __u32   flags;  
> > > >> You may document flags.  
> > > >
> > > > sure. it's reserved for future.
> > > >
> > > > Regards,
> > > > Yi Liu
> > > >  
> > > >>> + __u32   padding;
> > > >>> + __u8info[];
> > > >>> +};
> > > >>> +
> > > >>>  #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> > > >>>
> > > >>>  /**
> > > >>>  
> > > >> Thanks
> > > >>
> > > >> Eric  
> > > >  
> 

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

2020-07-08 Thread Liu, Yi L
Hi Alex,

Eric asked if we will to have data strcut other than struct iommu_nesting_info
type in the struct vfio_iommu_type1_info_cap_nesting @info[] field. I'm not
quit sure on it. I guess the answer may be not as VFIO's nesting support should
based on IOMMU UAPI. how about your opinion?

+#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING  3
+
+/*
+ * Reporting nesting info to user space.
+ *
+ * @info:  the nesting info provided by IOMMU driver. Today
+ * it is expected to be a struct iommu_nesting_info
+ * data.
+ */
+struct vfio_iommu_type1_info_cap_nesting {
+   struct  vfio_info_cap_header header;
+   __u32   flags;
+   __u32   padding;
+   __u8info[];
+};

https://lore.kernel.org/linux-iommu/dm5pr11mb1435290b6cd561ec61027892c3...@dm5pr11mb1435.namprd11.prod.outlook.com/

Regards,
Yi Liu

> From: Liu, Yi L
> Sent: Tuesday, July 7, 2020 5:32 PM
> 
[...]
> > >
> > >>> +
> > >>> +/*
> > >>> + * Reporting nesting info to user space.
> > >>> + *
> > >>> + * @info:  the nesting info provided by IOMMU driver. Today
> > >>> + * it is expected to be a struct iommu_nesting_info
> > >>> + * data.
> > >> Is it expected to change?
> > >
> > > honestly, I'm not quite sure on it. I did considered to embed struct
> > > iommu_nesting_info here instead of using info[]. but I hesitated as
> > > using info[] may leave more flexibility on this struct. how about
> > > your opinion? perhaps it's fine to embed the struct
> > > iommu_nesting_info here as long as VFIO is setup nesting based on
> > > IOMMU UAPI.
> > >
> > >>> + */
> > >>> +struct vfio_iommu_type1_info_cap_nesting {
> > >>> +   struct  vfio_info_cap_header header;
> > >>> +   __u32   flags;
> > >> You may document flags.
> > >
> > > sure. it's reserved for future.
> > >
> > > Regards,
> > > Yi Liu
> > >
> > >>> +   __u32   padding;
> > >>> +   __u8info[];
> > >>> +};
> > >>> +
> > >>>  #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> > >>>
> > >>>  /**
> > >>>
> > >> Thanks
> > >>
> > >> Eric
> > >

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

2020-07-07 Thread Liu, Yi L
Hi Eric,

> From: Auger Eric 
> Sent: Monday, July 6, 2020 10:07 PM
> 
> Hi Yi,
> On 7/4/20 1:26 PM, Liu Yi L wrote:
> > This patch exports iommu nesting capability info to user space through
> > VFIO. User space is expected to check this info for supported uAPIs (e.g.
> > PASID alloc/free, bind page table, and cache invalidation) and the vendor
> > specific format information for first level/stage page table that will be
> > bound to.
> >
> > The nesting info is available only after the nesting iommu type is set
> > for a container. Current implementation imposes one limitation - one
> > nesting container should include at most one group. The philosophy of
> > vfio container is having all groups/devices within the container share
> > the same IOMMU context. When vSVA is enabled, one IOMMU context could
> > include one 2nd-level address space and multiple 1st-level address spaces.
> > While the 2nd-leve address space is reasonably sharable by multiple groups
> > , blindly sharing 1st-level address spaces across all groups within the
> > container might instead break the guest expectation. In the future sub/
> > super container concept might be introduced to allow partial address space
> > sharing within an IOMMU context. But for now let's go with this restriction
> > by requiring singleton container for using nesting iommu features. Below
> > link has the related discussion about this decision.
> >
> > https://lkml.org/lkml/2020/5/15/1028
> >
> > Cc: Kevin Tian 
> > CC: Jacob Pan 
> > Cc: Alex Williamson 
> > Cc: Eric Auger 
> > Cc: Jean-Philippe Brucker 
> > Cc: Joerg Roedel 
> > Cc: Lu Baolu 
> > Signed-off-by: Liu Yi L 
> > ---
> > v3 -> v4:
> > *) address comments against v3.
> >
> > v1 -> v2:
> > *) added in v2
> > ---
> >
> >  drivers/vfio/vfio_iommu_type1.c | 105
> +++-
> >  include/uapi/linux/vfio.h   |  16 ++
> >  2 files changed, 109 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/vfio/vfio_iommu_type1.c 
> > b/drivers/vfio/vfio_iommu_type1.c
> > index 7accb59..80623b8 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit,
> >  "Maximum number of user DMA mappings per container (65535).");
> >
> >  struct vfio_iommu {
> > -   struct list_headdomain_list;
> > -   struct list_headiova_list;
> > -   struct vfio_domain  *external_domain; /* domain for external user */
> > -   struct mutexlock;
> > -   struct rb_root  dma_list;
> > -   struct blocking_notifier_head notifier;
> > -   unsigned intdma_avail;
> > -   uint64_tpgsize_bitmap;
> > -   boolv2;
> > -   boolnesting;
> > -   booldirty_page_tracking;
> > -   boolpinned_page_dirty_scope;
> > +   struct list_headdomain_list;
> > +   struct list_headiova_list;
> > +   struct vfio_domain  *external_domain; /* domain for
> > +external user */
> > +   struct mutexlock;
> > +   struct rb_root  dma_list;
> > +   struct blocking_notifier_head   notifier;
> > +   unsigned intdma_avail;
> > +   uint64_tpgsize_bitmap;
> > +   boolv2;
> > +   boolnesting;
> > +   booldirty_page_tracking;
> > +   boolpinned_page_dirty_scope;
> > +   struct iommu_nesting_info   *nesting_info;
> >  };
> >
> >  struct vfio_domain {
> > @@ -130,6 +132,9 @@ struct vfio_regions {
> >  #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)\
> > (!list_empty(>domain_list))
> >
> > +#define IS_DOMAIN_IN_CONTAINER(iommu)  ((iommu->external_domain) || \
> > +(!list_empty(>domain_list)))
> > +
> >  #define DIRTY_BITMAP_BYTES(n)  (ALIGN(n, BITS_PER_TYPE(u64)) /
> BITS_PER_BYTE)
> >
> >  /*
> > @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct
> vfio_iommu *iommu,
> >
> > list_splice_tail(iova_copy, iova);
> >  }
> > +
> > +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu)
> > +{
> > +   kfree(iommu->nesting_info);
> > +   iommu->nesting_info = NULL;
> > +}
> > +
> >  static int vfio_iommu_type1_attach_group(void *iommu_data,
> >  struct iommu_group *iommu_group)
> >  {
> > @@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void
> *iommu_data,
> > }
> > }
> >
> > +   /* Nesting type container can include only one group */
> > +   if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) {
> > +   mutex_unlock(>lock);
> > +   return -EINVAL;
> > +   }
> > +
> > group = kzalloc(sizeof(*group), 

RE: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

2020-07-07 Thread Liu, Yi L
Hi Eric,

> From: Auger Eric < eric.au...@redhat.com >
> Sent: Monday, July 6, 2020 9:45 PM
> 
> Hi Yi,
> 
> On 7/6/20 3:10 PM, Liu, Yi L wrote:
> > Hi Eric,
> >
> >> From: Auger Eric 
> >> Sent: Monday, July 6, 2020 6:37 PM
> >>
> >> Yi,
> >>
> >> On 7/4/20 1:26 PM, Liu Yi L wrote:
> >>> This patch exports iommu nesting capability info to user space through
> >>> VFIO. User space is expected to check this info for supported uAPIs (e.g.
> >>> PASID alloc/free, bind page table, and cache invalidation) and the vendor
> >>> specific format information for first level/stage page table that will be
> >>> bound to.
> >>>
> >>> The nesting info is available only after the nesting iommu type is set
> >>> for a container. Current implementation imposes one limitation - one
> >>> nesting container should include at most one group. The philosophy of
> >>> vfio container is having all groups/devices within the container share
> >>> the same IOMMU context. When vSVA is enabled, one IOMMU context could
> >>> include one 2nd-level address space and multiple 1st-level address spaces.
> >>> While the 2nd-leve address space is reasonably sharable by multiple groups
> >> level
> >
> > oh, yes.
> >
> >>> , blindly sharing 1st-level address spaces across all groups within the
> >>> container might instead break the guest expectation. In the future sub/
> >>> super container concept might be introduced to allow partial address space
> >>> sharing within an IOMMU context. But for now let's go with this 
> >>> restriction
> >>> by requiring singleton container for using nesting iommu features. Below
> >>> link has the related discussion about this decision.
> >>>
> >>> https://lkml.org/lkml/2020/5/15/1028
> >>>
> >>> Cc: Kevin Tian 
> >>> CC: Jacob Pan 
> >>> Cc: Alex Williamson 
> >>> Cc: Eric Auger 
> >>> Cc: Jean-Philippe Brucker 
> >>> Cc: Joerg Roedel 
> >>> Cc: Lu Baolu 
> >>> Signed-off-by: Liu Yi L 
> >>> ---
> >>> v3 -> v4:
> >>> *) address comments against v3.
> >>>
> >>> v1 -> v2:
> >>> *) added in v2
> >>> ---
> >>>
> >>>  drivers/vfio/vfio_iommu_type1.c | 105
> >> +++-
> >>>  include/uapi/linux/vfio.h   |  16 ++
> >>>  2 files changed, 109 insertions(+), 12 deletions(-)
> >>>
> >>> diff --git a/drivers/vfio/vfio_iommu_type1.c 
> >>> b/drivers/vfio/vfio_iommu_type1.c
> >>> index 7accb59..80623b8 100644
> >>> --- a/drivers/vfio/vfio_iommu_type1.c
> >>> +++ b/drivers/vfio/vfio_iommu_type1.c
> >>> @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit,
> >>>"Maximum number of user DMA mappings per container (65535).");
> >>>
> >>>  struct vfio_iommu {
> >>> - struct list_headdomain_list;
> >>> - struct list_headiova_list;
> >>> - struct vfio_domain  *external_domain; /* domain for external user */
> >>> - struct mutexlock;
> >>> - struct rb_root  dma_list;
> >>> - struct blocking_notifier_head notifier;
> >>> - unsigned intdma_avail;
> >>> - uint64_tpgsize_bitmap;
> >>> - boolv2;
> >>> - boolnesting;
> >>> - booldirty_page_tracking;
> >>> - boolpinned_page_dirty_scope;
> >>> + struct list_headdomain_list;
> >>> + struct list_headiova_list;
> >>> + struct vfio_domain  *external_domain; /* domain for
> >>> +  external user */
> >> nit: put the comment before the field?
> >
> > do you mean below?
> >
> > +   /* domain for external user */
> > +   struct vfio_domain  *external_domain;
> yes that's what I meant

got you. :-)

> >
> >>> + struct mutexlock;
> >>> + struct rb_root  dma_list;
> >>> + struct blocking_notifier_head   notifier;
> >>> + unsigned intdma_avail;
> >>> + uint64_tpgsize_bitmap;
> >>> + boolv2;
> >>> + boolnesting;
> >>> + booldirty_page_tracking;
> >>> + boolpinned_page_dirty_scope;
> >>> + struct iommu_nesting_info   *nesting_info;
> >>>  };
> >>>
> >>>  struct vfio_domain {
> >>> @@ -130,6 +132,9 @@ struct vfio_regions {
> >>>  #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)  \
> >>>   (!list_empty(>domain_list))
> >>>
> >>> +#define IS_DOMAIN_IN_CONTAINER(iommu)((iommu-
> >external_domain) || \
> >>> +  (!list_empty(>domain_list)))
> >> rename into something like CONTAINER_HAS_DOMAIN()?
> >
> > got it.
> >
> >>> +
> >>>  #define DIRTY_BITMAP_BYTES(n)(ALIGN(n, BITS_PER_TYPE(u64)) /
> >> BITS_PER_BYTE)
> >>>
> >>>  /*
> >>> @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct
> >> vfio_iommu *iommu,
> >>>
> >>>   list_splice_tail(iova_copy, iova);
> >>>  }
> >>> +
> >>> +static void 

Re: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

2020-07-06 Thread Auger Eric
Hi Yi,
On 7/4/20 1:26 PM, Liu Yi L wrote:
> This patch exports iommu nesting capability info to user space through
> VFIO. User space is expected to check this info for supported uAPIs (e.g.
> PASID alloc/free, bind page table, and cache invalidation) and the vendor
> specific format information for first level/stage page table that will be
> bound to.
> 
> The nesting info is available only after the nesting iommu type is set
> for a container. Current implementation imposes one limitation - one
> nesting container should include at most one group. The philosophy of
> vfio container is having all groups/devices within the container share
> the same IOMMU context. When vSVA is enabled, one IOMMU context could
> include one 2nd-level address space and multiple 1st-level address spaces.
> While the 2nd-leve address space is reasonably sharable by multiple groups
> , blindly sharing 1st-level address spaces across all groups within the
> container might instead break the guest expectation. In the future sub/
> super container concept might be introduced to allow partial address space
> sharing within an IOMMU context. But for now let's go with this restriction
> by requiring singleton container for using nesting iommu features. Below
> link has the related discussion about this decision.
> 
> https://lkml.org/lkml/2020/5/15/1028
> 
> Cc: Kevin Tian 
> CC: Jacob Pan 
> Cc: Alex Williamson 
> Cc: Eric Auger 
> Cc: Jean-Philippe Brucker 
> Cc: Joerg Roedel 
> Cc: Lu Baolu 
> Signed-off-by: Liu Yi L 
> ---
> v3 -> v4:
> *) address comments against v3.
> 
> v1 -> v2:
> *) added in v2
> ---
> 
>  drivers/vfio/vfio_iommu_type1.c | 105 
> +++-
>  include/uapi/linux/vfio.h   |  16 ++
>  2 files changed, 109 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 7accb59..80623b8 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit,
>"Maximum number of user DMA mappings per container (65535).");
>  
>  struct vfio_iommu {
> - struct list_headdomain_list;
> - struct list_headiova_list;
> - struct vfio_domain  *external_domain; /* domain for external user */
> - struct mutexlock;
> - struct rb_root  dma_list;
> - struct blocking_notifier_head notifier;
> - unsigned intdma_avail;
> - uint64_tpgsize_bitmap;
> - boolv2;
> - boolnesting;
> - booldirty_page_tracking;
> - boolpinned_page_dirty_scope;
> + struct list_headdomain_list;
> + struct list_headiova_list;
> + struct vfio_domain  *external_domain; /* domain for
> +  external user */
> + struct mutexlock;
> + struct rb_root  dma_list;
> + struct blocking_notifier_head   notifier;
> + unsigned intdma_avail;
> + uint64_tpgsize_bitmap;
> + boolv2;
> + boolnesting;
> + booldirty_page_tracking;
> + boolpinned_page_dirty_scope;
> + struct iommu_nesting_info   *nesting_info;
>  };
>  
>  struct vfio_domain {
> @@ -130,6 +132,9 @@ struct vfio_regions {
>  #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)  \
>   (!list_empty(>domain_list))
>  
> +#define IS_DOMAIN_IN_CONTAINER(iommu)((iommu->external_domain) || \
> +  (!list_empty(>domain_list)))
> +
>  #define DIRTY_BITMAP_BYTES(n)(ALIGN(n, BITS_PER_TYPE(u64)) / 
> BITS_PER_BYTE)
>  
>  /*
> @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct 
> vfio_iommu *iommu,
>  
>   list_splice_tail(iova_copy, iova);
>  }
> +
> +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu)
> +{
> + kfree(iommu->nesting_info);
> + iommu->nesting_info = NULL;
> +}
> +
>  static int vfio_iommu_type1_attach_group(void *iommu_data,
>struct iommu_group *iommu_group)
>  {
> @@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void 
> *iommu_data,
>   }
>   }
>  
> + /* Nesting type container can include only one group */
> + if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) {
> + mutex_unlock(>lock);
> + return -EINVAL;
> + }
> +
>   group = kzalloc(sizeof(*group), GFP_KERNEL);
>   domain = kzalloc(sizeof(*domain), GFP_KERNEL);
>   if (!group || !domain) {
> @@ -2029,6 +2047,36 @@ static int vfio_iommu_type1_attach_group(void 
> *iommu_data,
>   if (ret)

Re: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

2020-07-06 Thread Auger Eric
Hi Yi,

On 7/6/20 3:10 PM, Liu, Yi L wrote:
> Hi Eric,
> 
>> From: Auger Eric 
>> Sent: Monday, July 6, 2020 6:37 PM
>>
>> Yi,
>>
>> On 7/4/20 1:26 PM, Liu Yi L wrote:
>>> This patch exports iommu nesting capability info to user space through
>>> VFIO. User space is expected to check this info for supported uAPIs (e.g.
>>> PASID alloc/free, bind page table, and cache invalidation) and the vendor
>>> specific format information for first level/stage page table that will be
>>> bound to.
>>>
>>> The nesting info is available only after the nesting iommu type is set
>>> for a container. Current implementation imposes one limitation - one
>>> nesting container should include at most one group. The philosophy of
>>> vfio container is having all groups/devices within the container share
>>> the same IOMMU context. When vSVA is enabled, one IOMMU context could
>>> include one 2nd-level address space and multiple 1st-level address spaces.
>>> While the 2nd-leve address space is reasonably sharable by multiple groups
>> level
> 
> oh, yes.
> 
>>> , blindly sharing 1st-level address spaces across all groups within the
>>> container might instead break the guest expectation. In the future sub/
>>> super container concept might be introduced to allow partial address space
>>> sharing within an IOMMU context. But for now let's go with this restriction
>>> by requiring singleton container for using nesting iommu features. Below
>>> link has the related discussion about this decision.
>>>
>>> https://lkml.org/lkml/2020/5/15/1028
>>>
>>> Cc: Kevin Tian 
>>> CC: Jacob Pan 
>>> Cc: Alex Williamson 
>>> Cc: Eric Auger 
>>> Cc: Jean-Philippe Brucker 
>>> Cc: Joerg Roedel 
>>> Cc: Lu Baolu 
>>> Signed-off-by: Liu Yi L 
>>> ---
>>> v3 -> v4:
>>> *) address comments against v3.
>>>
>>> v1 -> v2:
>>> *) added in v2
>>> ---
>>>
>>>  drivers/vfio/vfio_iommu_type1.c | 105
>> +++-
>>>  include/uapi/linux/vfio.h   |  16 ++
>>>  2 files changed, 109 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/vfio/vfio_iommu_type1.c 
>>> b/drivers/vfio/vfio_iommu_type1.c
>>> index 7accb59..80623b8 100644
>>> --- a/drivers/vfio/vfio_iommu_type1.c
>>> +++ b/drivers/vfio/vfio_iommu_type1.c
>>> @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit,
>>>  "Maximum number of user DMA mappings per container (65535).");
>>>
>>>  struct vfio_iommu {
>>> -   struct list_headdomain_list;
>>> -   struct list_headiova_list;
>>> -   struct vfio_domain  *external_domain; /* domain for external user */
>>> -   struct mutexlock;
>>> -   struct rb_root  dma_list;
>>> -   struct blocking_notifier_head notifier;
>>> -   unsigned intdma_avail;
>>> -   uint64_tpgsize_bitmap;
>>> -   boolv2;
>>> -   boolnesting;
>>> -   booldirty_page_tracking;
>>> -   boolpinned_page_dirty_scope;
>>> +   struct list_headdomain_list;
>>> +   struct list_headiova_list;
>>> +   struct vfio_domain  *external_domain; /* domain for
>>> +external user */
>> nit: put the comment before the field?
> 
> do you mean below?
> 
> + /* domain for external user */
> + struct vfio_domain  *external_domain;
yes that's what I meant
> 
>>> +   struct mutexlock;
>>> +   struct rb_root  dma_list;
>>> +   struct blocking_notifier_head   notifier;
>>> +   unsigned intdma_avail;
>>> +   uint64_tpgsize_bitmap;
>>> +   boolv2;
>>> +   boolnesting;
>>> +   booldirty_page_tracking;
>>> +   boolpinned_page_dirty_scope;
>>> +   struct iommu_nesting_info   *nesting_info;
>>>  };
>>>
>>>  struct vfio_domain {
>>> @@ -130,6 +132,9 @@ struct vfio_regions {
>>>  #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)\
>>> (!list_empty(>domain_list))
>>>
>>> +#define IS_DOMAIN_IN_CONTAINER(iommu)  ((iommu->external_domain) || \
>>> +(!list_empty(>domain_list)))
>> rename into something like CONTAINER_HAS_DOMAIN()?
> 
> got it.
> 
>>> +
>>>  #define DIRTY_BITMAP_BYTES(n)  (ALIGN(n, BITS_PER_TYPE(u64)) /
>> BITS_PER_BYTE)
>>>
>>>  /*
>>> @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct
>> vfio_iommu *iommu,
>>>
>>> list_splice_tail(iova_copy, iova);
>>>  }
>>> +
>>> +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu)
>>> +{
>>> +   kfree(iommu->nesting_info);
>>> +   iommu->nesting_info = NULL;
>>> +}
>>> +
>>>  static int vfio_iommu_type1_attach_group(void *iommu_data,
>>>  struct iommu_group *iommu_group)
>>>  {
>>> @@ -1959,6 +1971,12 @@ static 

RE: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

2020-07-06 Thread Liu, Yi L
Hi Eric,

> From: Auger Eric 
> Sent: Monday, July 6, 2020 6:37 PM
> 
> Yi,
> 
> On 7/4/20 1:26 PM, Liu Yi L wrote:
> > This patch exports iommu nesting capability info to user space through
> > VFIO. User space is expected to check this info for supported uAPIs (e.g.
> > PASID alloc/free, bind page table, and cache invalidation) and the vendor
> > specific format information for first level/stage page table that will be
> > bound to.
> >
> > The nesting info is available only after the nesting iommu type is set
> > for a container. Current implementation imposes one limitation - one
> > nesting container should include at most one group. The philosophy of
> > vfio container is having all groups/devices within the container share
> > the same IOMMU context. When vSVA is enabled, one IOMMU context could
> > include one 2nd-level address space and multiple 1st-level address spaces.
> > While the 2nd-leve address space is reasonably sharable by multiple groups
> level

oh, yes.

> > , blindly sharing 1st-level address spaces across all groups within the
> > container might instead break the guest expectation. In the future sub/
> > super container concept might be introduced to allow partial address space
> > sharing within an IOMMU context. But for now let's go with this restriction
> > by requiring singleton container for using nesting iommu features. Below
> > link has the related discussion about this decision.
> >
> > https://lkml.org/lkml/2020/5/15/1028
> >
> > Cc: Kevin Tian 
> > CC: Jacob Pan 
> > Cc: Alex Williamson 
> > Cc: Eric Auger 
> > Cc: Jean-Philippe Brucker 
> > Cc: Joerg Roedel 
> > Cc: Lu Baolu 
> > Signed-off-by: Liu Yi L 
> > ---
> > v3 -> v4:
> > *) address comments against v3.
> >
> > v1 -> v2:
> > *) added in v2
> > ---
> >
> >  drivers/vfio/vfio_iommu_type1.c | 105
> +++-
> >  include/uapi/linux/vfio.h   |  16 ++
> >  2 files changed, 109 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/vfio/vfio_iommu_type1.c 
> > b/drivers/vfio/vfio_iommu_type1.c
> > index 7accb59..80623b8 100644
> > --- a/drivers/vfio/vfio_iommu_type1.c
> > +++ b/drivers/vfio/vfio_iommu_type1.c
> > @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit,
> >  "Maximum number of user DMA mappings per container (65535).");
> >
> >  struct vfio_iommu {
> > -   struct list_headdomain_list;
> > -   struct list_headiova_list;
> > -   struct vfio_domain  *external_domain; /* domain for external user */
> > -   struct mutexlock;
> > -   struct rb_root  dma_list;
> > -   struct blocking_notifier_head notifier;
> > -   unsigned intdma_avail;
> > -   uint64_tpgsize_bitmap;
> > -   boolv2;
> > -   boolnesting;
> > -   booldirty_page_tracking;
> > -   boolpinned_page_dirty_scope;
> > +   struct list_headdomain_list;
> > +   struct list_headiova_list;
> > +   struct vfio_domain  *external_domain; /* domain for
> > +external user */
> nit: put the comment before the field?

do you mean below?

+   /* domain for external user */
+   struct vfio_domain  *external_domain;

> > +   struct mutexlock;
> > +   struct rb_root  dma_list;
> > +   struct blocking_notifier_head   notifier;
> > +   unsigned intdma_avail;
> > +   uint64_tpgsize_bitmap;
> > +   boolv2;
> > +   boolnesting;
> > +   booldirty_page_tracking;
> > +   boolpinned_page_dirty_scope;
> > +   struct iommu_nesting_info   *nesting_info;
> >  };
> >
> >  struct vfio_domain {
> > @@ -130,6 +132,9 @@ struct vfio_regions {
> >  #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)\
> > (!list_empty(>domain_list))
> >
> > +#define IS_DOMAIN_IN_CONTAINER(iommu)  ((iommu->external_domain) || \
> > +(!list_empty(>domain_list)))
> rename into something like CONTAINER_HAS_DOMAIN()?

got it.

> > +
> >  #define DIRTY_BITMAP_BYTES(n)  (ALIGN(n, BITS_PER_TYPE(u64)) /
> BITS_PER_BYTE)
> >
> >  /*
> > @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct
> vfio_iommu *iommu,
> >
> > list_splice_tail(iova_copy, iova);
> >  }
> > +
> > +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu)
> > +{
> > +   kfree(iommu->nesting_info);
> > +   iommu->nesting_info = NULL;
> > +}
> > +
> >  static int vfio_iommu_type1_attach_group(void *iommu_data,
> >  struct iommu_group *iommu_group)
> >  {
> > @@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void
> *iommu_data,
> > }
> > }
> >
> > +   /* Nesting 

Re: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

2020-07-06 Thread Auger Eric
Yi,

On 7/4/20 1:26 PM, Liu Yi L wrote:
> This patch exports iommu nesting capability info to user space through
> VFIO. User space is expected to check this info for supported uAPIs (e.g.
> PASID alloc/free, bind page table, and cache invalidation) and the vendor
> specific format information for first level/stage page table that will be
> bound to.
> 
> The nesting info is available only after the nesting iommu type is set
> for a container. Current implementation imposes one limitation - one
> nesting container should include at most one group. The philosophy of
> vfio container is having all groups/devices within the container share
> the same IOMMU context. When vSVA is enabled, one IOMMU context could
> include one 2nd-level address space and multiple 1st-level address spaces.
> While the 2nd-leve address space is reasonably sharable by multiple groups
level
> , blindly sharing 1st-level address spaces across all groups within the
> container might instead break the guest expectation. In the future sub/
> super container concept might be introduced to allow partial address space
> sharing within an IOMMU context. But for now let's go with this restriction
> by requiring singleton container for using nesting iommu features. Below
> link has the related discussion about this decision.
> 
> https://lkml.org/lkml/2020/5/15/1028
> 
> Cc: Kevin Tian 
> CC: Jacob Pan 
> Cc: Alex Williamson 
> Cc: Eric Auger 
> Cc: Jean-Philippe Brucker 
> Cc: Joerg Roedel 
> Cc: Lu Baolu 
> Signed-off-by: Liu Yi L 
> ---
> v3 -> v4:
> *) address comments against v3.
> 
> v1 -> v2:
> *) added in v2
> ---
> 
>  drivers/vfio/vfio_iommu_type1.c | 105 
> +++-
>  include/uapi/linux/vfio.h   |  16 ++
>  2 files changed, 109 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 7accb59..80623b8 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit,
>"Maximum number of user DMA mappings per container (65535).");
>  
>  struct vfio_iommu {
> - struct list_headdomain_list;
> - struct list_headiova_list;
> - struct vfio_domain  *external_domain; /* domain for external user */
> - struct mutexlock;
> - struct rb_root  dma_list;
> - struct blocking_notifier_head notifier;
> - unsigned intdma_avail;
> - uint64_tpgsize_bitmap;
> - boolv2;
> - boolnesting;
> - booldirty_page_tracking;
> - boolpinned_page_dirty_scope;
> + struct list_headdomain_list;
> + struct list_headiova_list;
> + struct vfio_domain  *external_domain; /* domain for
> +  external user */
nit: put the comment before the field?
> + struct mutexlock;
> + struct rb_root  dma_list;
> + struct blocking_notifier_head   notifier;
> + unsigned intdma_avail;
> + uint64_tpgsize_bitmap;
> + boolv2;
> + boolnesting;
> + booldirty_page_tracking;
> + boolpinned_page_dirty_scope;
> + struct iommu_nesting_info   *nesting_info;
>  };
>  
>  struct vfio_domain {
> @@ -130,6 +132,9 @@ struct vfio_regions {
>  #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)  \
>   (!list_empty(>domain_list))
>  
> +#define IS_DOMAIN_IN_CONTAINER(iommu)((iommu->external_domain) || \
> +  (!list_empty(>domain_list)))
rename into something like CONTAINER_HAS_DOMAIN()?
> +
>  #define DIRTY_BITMAP_BYTES(n)(ALIGN(n, BITS_PER_TYPE(u64)) / 
> BITS_PER_BYTE)
>  
>  /*
> @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct 
> vfio_iommu *iommu,
>  
>   list_splice_tail(iova_copy, iova);
>  }
> +
> +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu)
> +{
> + kfree(iommu->nesting_info);
> + iommu->nesting_info = NULL;
> +}
> +
>  static int vfio_iommu_type1_attach_group(void *iommu_data,
>struct iommu_group *iommu_group)
>  {
> @@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void 
> *iommu_data,
>   }
>   }
>  
> + /* Nesting type container can include only one group */
> + if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) {
> + mutex_unlock(>lock);
> + return -EINVAL;
> + }
> +
>   group = kzalloc(sizeof(*group), GFP_KERNEL);
>   domain = kzalloc(sizeof(*domain), GFP_KERNEL);
>   if (!group || !domain) {
> @@ 

[PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace

2020-07-04 Thread Liu Yi L
This patch exports iommu nesting capability info to user space through
VFIO. User space is expected to check this info for supported uAPIs (e.g.
PASID alloc/free, bind page table, and cache invalidation) and the vendor
specific format information for first level/stage page table that will be
bound to.

The nesting info is available only after the nesting iommu type is set
for a container. Current implementation imposes one limitation - one
nesting container should include at most one group. The philosophy of
vfio container is having all groups/devices within the container share
the same IOMMU context. When vSVA is enabled, one IOMMU context could
include one 2nd-level address space and multiple 1st-level address spaces.
While the 2nd-leve address space is reasonably sharable by multiple groups
, blindly sharing 1st-level address spaces across all groups within the
container might instead break the guest expectation. In the future sub/
super container concept might be introduced to allow partial address space
sharing within an IOMMU context. But for now let's go with this restriction
by requiring singleton container for using nesting iommu features. Below
link has the related discussion about this decision.

https://lkml.org/lkml/2020/5/15/1028

Cc: Kevin Tian 
CC: Jacob Pan 
Cc: Alex Williamson 
Cc: Eric Auger 
Cc: Jean-Philippe Brucker 
Cc: Joerg Roedel 
Cc: Lu Baolu 
Signed-off-by: Liu Yi L 
---
v3 -> v4:
*) address comments against v3.

v1 -> v2:
*) added in v2
---

 drivers/vfio/vfio_iommu_type1.c | 105 +++-
 include/uapi/linux/vfio.h   |  16 ++
 2 files changed, 109 insertions(+), 12 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 7accb59..80623b8 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit,
 "Maximum number of user DMA mappings per container (65535).");
 
 struct vfio_iommu {
-   struct list_headdomain_list;
-   struct list_headiova_list;
-   struct vfio_domain  *external_domain; /* domain for external user */
-   struct mutexlock;
-   struct rb_root  dma_list;
-   struct blocking_notifier_head notifier;
-   unsigned intdma_avail;
-   uint64_tpgsize_bitmap;
-   boolv2;
-   boolnesting;
-   booldirty_page_tracking;
-   boolpinned_page_dirty_scope;
+   struct list_headdomain_list;
+   struct list_headiova_list;
+   struct vfio_domain  *external_domain; /* domain for
+external user */
+   struct mutexlock;
+   struct rb_root  dma_list;
+   struct blocking_notifier_head   notifier;
+   unsigned intdma_avail;
+   uint64_tpgsize_bitmap;
+   boolv2;
+   boolnesting;
+   booldirty_page_tracking;
+   boolpinned_page_dirty_scope;
+   struct iommu_nesting_info   *nesting_info;
 };
 
 struct vfio_domain {
@@ -130,6 +132,9 @@ struct vfio_regions {
 #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)\
(!list_empty(>domain_list))
 
+#define IS_DOMAIN_IN_CONTAINER(iommu)  ((iommu->external_domain) || \
+(!list_empty(>domain_list)))
+
 #define DIRTY_BITMAP_BYTES(n)  (ALIGN(n, BITS_PER_TYPE(u64)) / BITS_PER_BYTE)
 
 /*
@@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct 
vfio_iommu *iommu,
 
list_splice_tail(iova_copy, iova);
 }
+
+static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu)
+{
+   kfree(iommu->nesting_info);
+   iommu->nesting_info = NULL;
+}
+
 static int vfio_iommu_type1_attach_group(void *iommu_data,
 struct iommu_group *iommu_group)
 {
@@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void 
*iommu_data,
}
}
 
+   /* Nesting type container can include only one group */
+   if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) {
+   mutex_unlock(>lock);
+   return -EINVAL;
+   }
+
group = kzalloc(sizeof(*group), GFP_KERNEL);
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!group || !domain) {
@@ -2029,6 +2047,36 @@ static int vfio_iommu_type1_attach_group(void 
*iommu_data,
if (ret)
goto out_domain;
 
+   /* Nesting cap info is available only after attaching */
+   if (iommu->nesting) {
+   struct iommu_nesting_info tmp;
+   struct