RE: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace
Hi Alex, > From: Alex Williamson > Sent: Thursday, July 9, 2020 3:30 AM > > On Wed, 8 Jul 2020 08:08:40 + > "Liu, Yi L" wrote: > > > Hi Alex, > > > > Eric asked if we will to have data strcut other than struct > > iommu_nesting_info > > type in the struct vfio_iommu_type1_info_cap_nesting @info[] field. I'm not > > quit sure on it. I guess the answer may be not as VFIO's nesting support > > should > > based on IOMMU UAPI. how about your opinion? > > > > +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3 > > + > > +/* > > + * Reporting nesting info to user space. > > + * > > + * @info: the nesting info provided by IOMMU driver. Today > > + * it is expected to be a struct iommu_nesting_info > > + * data. > > + */ > > +struct vfio_iommu_type1_info_cap_nesting { > > + struct vfio_info_cap_header header; > > + __u32 flags; > > + __u32 padding; > > + __u8info[]; > > +}; > > It's not a very useful uAPI if the user can't be sure what they're > getting out of it. Info capabilities are "cheap", they don't need to > be as extensible as an ioctl. It's not clear that we really even need > the flags (and therefore the padding), just define it to return the > IOMMU uAPI structure with no extensibility. If we need to expose > something else, create a new capability. Thanks, thanks for the guiding, then I may embed the struct iommu_nesting_info here. :-) Regards, Yi Liu > Alex > > > > > https://lore.kernel.org/linux- > iommu/dm5pr11mb1435290b6cd561ec61027892c3...@dm5pr11mb1435.nam > prd11.prod.outlook.com/ > > > > Regards, > > Yi Liu > > > > > From: Liu, Yi L > > > Sent: Tuesday, July 7, 2020 5:32 PM > > > > > [...] > > > > > > > > > >>> + > > > > >>> +/* > > > > >>> + * Reporting nesting info to user space. > > > > >>> + * > > > > >>> + * @info: the nesting info provided by IOMMU driver. Today > > > > >>> + * it is expected to be a struct iommu_nesting_info > > > > >>> + * data. > > > > >> Is it expected to change? > > > > > > > > > > honestly, I'm not quite sure on it. I did considered to embed struct > > > > > iommu_nesting_info here instead of using info[]. but I hesitated as > > > > > using info[] may leave more flexibility on this struct. how about > > > > > your opinion? perhaps it's fine to embed the struct > > > > > iommu_nesting_info here as long as VFIO is setup nesting based on > > > > > IOMMU UAPI. > > > > > > > > > >>> + */ > > > > >>> +struct vfio_iommu_type1_info_cap_nesting { > > > > >>> + struct vfio_info_cap_header header; > > > > >>> + __u32 flags; > > > > >> You may document flags. > > > > > > > > > > sure. it's reserved for future. > > > > > > > > > > Regards, > > > > > Yi Liu > > > > > > > > > >>> + __u32 padding; > > > > >>> + __u8info[]; > > > > >>> +}; > > > > >>> + > > > > >>> #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) > > > > >>> > > > > >>> /** > > > > >>> > > > > >> Thanks > > > > >> > > > > >> Eric > > > > > > > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace
On Wed, 8 Jul 2020 08:08:40 + "Liu, Yi L" wrote: > Hi Alex, > > Eric asked if we will to have data strcut other than struct iommu_nesting_info > type in the struct vfio_iommu_type1_info_cap_nesting @info[] field. I'm not > quit sure on it. I guess the answer may be not as VFIO's nesting support > should > based on IOMMU UAPI. how about your opinion? > > +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3 > + > +/* > + * Reporting nesting info to user space. > + * > + * @info:the nesting info provided by IOMMU driver. Today > + * it is expected to be a struct iommu_nesting_info > + * data. > + */ > +struct vfio_iommu_type1_info_cap_nesting { > + struct vfio_info_cap_header header; > + __u32 flags; > + __u32 padding; > + __u8info[]; > +}; It's not a very useful uAPI if the user can't be sure what they're getting out of it. Info capabilities are "cheap", they don't need to be as extensible as an ioctl. It's not clear that we really even need the flags (and therefore the padding), just define it to return the IOMMU uAPI structure with no extensibility. If we need to expose something else, create a new capability. Thanks, Alex > > https://lore.kernel.org/linux-iommu/dm5pr11mb1435290b6cd561ec61027892c3...@dm5pr11mb1435.namprd11.prod.outlook.com/ > > Regards, > Yi Liu > > > From: Liu, Yi L > > Sent: Tuesday, July 7, 2020 5:32 PM > > > [...] > > > > > > > >>> + > > > >>> +/* > > > >>> + * Reporting nesting info to user space. > > > >>> + * > > > >>> + * @info:the nesting info provided by IOMMU driver. Today > > > >>> + * it is expected to be a struct iommu_nesting_info > > > >>> + * data. > > > >> Is it expected to change? > > > > > > > > honestly, I'm not quite sure on it. I did considered to embed struct > > > > iommu_nesting_info here instead of using info[]. but I hesitated as > > > > using info[] may leave more flexibility on this struct. how about > > > > your opinion? perhaps it's fine to embed the struct > > > > iommu_nesting_info here as long as VFIO is setup nesting based on > > > > IOMMU UAPI. > > > > > > > >>> + */ > > > >>> +struct vfio_iommu_type1_info_cap_nesting { > > > >>> + struct vfio_info_cap_header header; > > > >>> + __u32 flags; > > > >> You may document flags. > > > > > > > > sure. it's reserved for future. > > > > > > > > Regards, > > > > Yi Liu > > > > > > > >>> + __u32 padding; > > > >>> + __u8info[]; > > > >>> +}; > > > >>> + > > > >>> #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) > > > >>> > > > >>> /** > > > >>> > > > >> Thanks > > > >> > > > >> Eric > > > > > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace
Hi Alex, Eric asked if we will to have data strcut other than struct iommu_nesting_info type in the struct vfio_iommu_type1_info_cap_nesting @info[] field. I'm not quit sure on it. I guess the answer may be not as VFIO's nesting support should based on IOMMU UAPI. how about your opinion? +#define VFIO_IOMMU_TYPE1_INFO_CAP_NESTING 3 + +/* + * Reporting nesting info to user space. + * + * @info: the nesting info provided by IOMMU driver. Today + * it is expected to be a struct iommu_nesting_info + * data. + */ +struct vfio_iommu_type1_info_cap_nesting { + struct vfio_info_cap_header header; + __u32 flags; + __u32 padding; + __u8info[]; +}; https://lore.kernel.org/linux-iommu/dm5pr11mb1435290b6cd561ec61027892c3...@dm5pr11mb1435.namprd11.prod.outlook.com/ Regards, Yi Liu > From: Liu, Yi L > Sent: Tuesday, July 7, 2020 5:32 PM > [...] > > > > > >>> + > > >>> +/* > > >>> + * Reporting nesting info to user space. > > >>> + * > > >>> + * @info: the nesting info provided by IOMMU driver. Today > > >>> + * it is expected to be a struct iommu_nesting_info > > >>> + * data. > > >> Is it expected to change? > > > > > > honestly, I'm not quite sure on it. I did considered to embed struct > > > iommu_nesting_info here instead of using info[]. but I hesitated as > > > using info[] may leave more flexibility on this struct. how about > > > your opinion? perhaps it's fine to embed the struct > > > iommu_nesting_info here as long as VFIO is setup nesting based on > > > IOMMU UAPI. > > > > > >>> + */ > > >>> +struct vfio_iommu_type1_info_cap_nesting { > > >>> + struct vfio_info_cap_header header; > > >>> + __u32 flags; > > >> You may document flags. > > > > > > sure. it's reserved for future. > > > > > > Regards, > > > Yi Liu > > > > > >>> + __u32 padding; > > >>> + __u8info[]; > > >>> +}; > > >>> + > > >>> #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) > > >>> > > >>> /** > > >>> > > >> Thanks > > >> > > >> Eric > > > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace
Hi Eric, > From: Auger Eric > Sent: Monday, July 6, 2020 10:07 PM > > Hi Yi, > On 7/4/20 1:26 PM, Liu Yi L wrote: > > This patch exports iommu nesting capability info to user space through > > VFIO. User space is expected to check this info for supported uAPIs (e.g. > > PASID alloc/free, bind page table, and cache invalidation) and the vendor > > specific format information for first level/stage page table that will be > > bound to. > > > > The nesting info is available only after the nesting iommu type is set > > for a container. Current implementation imposes one limitation - one > > nesting container should include at most one group. The philosophy of > > vfio container is having all groups/devices within the container share > > the same IOMMU context. When vSVA is enabled, one IOMMU context could > > include one 2nd-level address space and multiple 1st-level address spaces. > > While the 2nd-leve address space is reasonably sharable by multiple groups > > , blindly sharing 1st-level address spaces across all groups within the > > container might instead break the guest expectation. In the future sub/ > > super container concept might be introduced to allow partial address space > > sharing within an IOMMU context. But for now let's go with this restriction > > by requiring singleton container for using nesting iommu features. Below > > link has the related discussion about this decision. > > > > https://lkml.org/lkml/2020/5/15/1028 > > > > Cc: Kevin Tian > > CC: Jacob Pan > > Cc: Alex Williamson > > Cc: Eric Auger > > Cc: Jean-Philippe Brucker > > Cc: Joerg Roedel > > Cc: Lu Baolu > > Signed-off-by: Liu Yi L > > --- > > v3 -> v4: > > *) address comments against v3. > > > > v1 -> v2: > > *) added in v2 > > --- > > > > drivers/vfio/vfio_iommu_type1.c | 105 > +++- > > include/uapi/linux/vfio.h | 16 ++ > > 2 files changed, 109 insertions(+), 12 deletions(-) > > > > diff --git a/drivers/vfio/vfio_iommu_type1.c > > b/drivers/vfio/vfio_iommu_type1.c > > index 7accb59..80623b8 100644 > > --- a/drivers/vfio/vfio_iommu_type1.c > > +++ b/drivers/vfio/vfio_iommu_type1.c > > @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit, > > "Maximum number of user DMA mappings per container (65535)."); > > > > struct vfio_iommu { > > - struct list_headdomain_list; > > - struct list_headiova_list; > > - struct vfio_domain *external_domain; /* domain for external user */ > > - struct mutexlock; > > - struct rb_root dma_list; > > - struct blocking_notifier_head notifier; > > - unsigned intdma_avail; > > - uint64_tpgsize_bitmap; > > - boolv2; > > - boolnesting; > > - booldirty_page_tracking; > > - boolpinned_page_dirty_scope; > > + struct list_headdomain_list; > > + struct list_headiova_list; > > + struct vfio_domain *external_domain; /* domain for > > +external user */ > > + struct mutexlock; > > + struct rb_root dma_list; > > + struct blocking_notifier_head notifier; > > + unsigned intdma_avail; > > + uint64_tpgsize_bitmap; > > + boolv2; > > + boolnesting; > > + booldirty_page_tracking; > > + boolpinned_page_dirty_scope; > > + struct iommu_nesting_info *nesting_info; > > }; > > > > struct vfio_domain { > > @@ -130,6 +132,9 @@ struct vfio_regions { > > #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)\ > > (!list_empty(>domain_list)) > > > > +#define IS_DOMAIN_IN_CONTAINER(iommu) ((iommu->external_domain) || \ > > +(!list_empty(>domain_list))) > > + > > #define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) / > BITS_PER_BYTE) > > > > /* > > @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct > vfio_iommu *iommu, > > > > list_splice_tail(iova_copy, iova); > > } > > + > > +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu) > > +{ > > + kfree(iommu->nesting_info); > > + iommu->nesting_info = NULL; > > +} > > + > > static int vfio_iommu_type1_attach_group(void *iommu_data, > > struct iommu_group *iommu_group) > > { > > @@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void > *iommu_data, > > } > > } > > > > + /* Nesting type container can include only one group */ > > + if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) { > > + mutex_unlock(>lock); > > + return -EINVAL; > > + } > > + > > group = kzalloc(sizeof(*group),
RE: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace
Hi Eric, > From: Auger Eric < eric.au...@redhat.com > > Sent: Monday, July 6, 2020 9:45 PM > > Hi Yi, > > On 7/6/20 3:10 PM, Liu, Yi L wrote: > > Hi Eric, > > > >> From: Auger Eric > >> Sent: Monday, July 6, 2020 6:37 PM > >> > >> Yi, > >> > >> On 7/4/20 1:26 PM, Liu Yi L wrote: > >>> This patch exports iommu nesting capability info to user space through > >>> VFIO. User space is expected to check this info for supported uAPIs (e.g. > >>> PASID alloc/free, bind page table, and cache invalidation) and the vendor > >>> specific format information for first level/stage page table that will be > >>> bound to. > >>> > >>> The nesting info is available only after the nesting iommu type is set > >>> for a container. Current implementation imposes one limitation - one > >>> nesting container should include at most one group. The philosophy of > >>> vfio container is having all groups/devices within the container share > >>> the same IOMMU context. When vSVA is enabled, one IOMMU context could > >>> include one 2nd-level address space and multiple 1st-level address spaces. > >>> While the 2nd-leve address space is reasonably sharable by multiple groups > >> level > > > > oh, yes. > > > >>> , blindly sharing 1st-level address spaces across all groups within the > >>> container might instead break the guest expectation. In the future sub/ > >>> super container concept might be introduced to allow partial address space > >>> sharing within an IOMMU context. But for now let's go with this > >>> restriction > >>> by requiring singleton container for using nesting iommu features. Below > >>> link has the related discussion about this decision. > >>> > >>> https://lkml.org/lkml/2020/5/15/1028 > >>> > >>> Cc: Kevin Tian > >>> CC: Jacob Pan > >>> Cc: Alex Williamson > >>> Cc: Eric Auger > >>> Cc: Jean-Philippe Brucker > >>> Cc: Joerg Roedel > >>> Cc: Lu Baolu > >>> Signed-off-by: Liu Yi L > >>> --- > >>> v3 -> v4: > >>> *) address comments against v3. > >>> > >>> v1 -> v2: > >>> *) added in v2 > >>> --- > >>> > >>> drivers/vfio/vfio_iommu_type1.c | 105 > >> +++- > >>> include/uapi/linux/vfio.h | 16 ++ > >>> 2 files changed, 109 insertions(+), 12 deletions(-) > >>> > >>> diff --git a/drivers/vfio/vfio_iommu_type1.c > >>> b/drivers/vfio/vfio_iommu_type1.c > >>> index 7accb59..80623b8 100644 > >>> --- a/drivers/vfio/vfio_iommu_type1.c > >>> +++ b/drivers/vfio/vfio_iommu_type1.c > >>> @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit, > >>>"Maximum number of user DMA mappings per container (65535)."); > >>> > >>> struct vfio_iommu { > >>> - struct list_headdomain_list; > >>> - struct list_headiova_list; > >>> - struct vfio_domain *external_domain; /* domain for external user */ > >>> - struct mutexlock; > >>> - struct rb_root dma_list; > >>> - struct blocking_notifier_head notifier; > >>> - unsigned intdma_avail; > >>> - uint64_tpgsize_bitmap; > >>> - boolv2; > >>> - boolnesting; > >>> - booldirty_page_tracking; > >>> - boolpinned_page_dirty_scope; > >>> + struct list_headdomain_list; > >>> + struct list_headiova_list; > >>> + struct vfio_domain *external_domain; /* domain for > >>> + external user */ > >> nit: put the comment before the field? > > > > do you mean below? > > > > + /* domain for external user */ > > + struct vfio_domain *external_domain; > yes that's what I meant got you. :-) > > > >>> + struct mutexlock; > >>> + struct rb_root dma_list; > >>> + struct blocking_notifier_head notifier; > >>> + unsigned intdma_avail; > >>> + uint64_tpgsize_bitmap; > >>> + boolv2; > >>> + boolnesting; > >>> + booldirty_page_tracking; > >>> + boolpinned_page_dirty_scope; > >>> + struct iommu_nesting_info *nesting_info; > >>> }; > >>> > >>> struct vfio_domain { > >>> @@ -130,6 +132,9 @@ struct vfio_regions { > >>> #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \ > >>> (!list_empty(>domain_list)) > >>> > >>> +#define IS_DOMAIN_IN_CONTAINER(iommu)((iommu- > >external_domain) || \ > >>> + (!list_empty(>domain_list))) > >> rename into something like CONTAINER_HAS_DOMAIN()? > > > > got it. > > > >>> + > >>> #define DIRTY_BITMAP_BYTES(n)(ALIGN(n, BITS_PER_TYPE(u64)) / > >> BITS_PER_BYTE) > >>> > >>> /* > >>> @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct > >> vfio_iommu *iommu, > >>> > >>> list_splice_tail(iova_copy, iova); > >>> } > >>> + > >>> +static void
Re: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace
Hi Yi, On 7/4/20 1:26 PM, Liu Yi L wrote: > This patch exports iommu nesting capability info to user space through > VFIO. User space is expected to check this info for supported uAPIs (e.g. > PASID alloc/free, bind page table, and cache invalidation) and the vendor > specific format information for first level/stage page table that will be > bound to. > > The nesting info is available only after the nesting iommu type is set > for a container. Current implementation imposes one limitation - one > nesting container should include at most one group. The philosophy of > vfio container is having all groups/devices within the container share > the same IOMMU context. When vSVA is enabled, one IOMMU context could > include one 2nd-level address space and multiple 1st-level address spaces. > While the 2nd-leve address space is reasonably sharable by multiple groups > , blindly sharing 1st-level address spaces across all groups within the > container might instead break the guest expectation. In the future sub/ > super container concept might be introduced to allow partial address space > sharing within an IOMMU context. But for now let's go with this restriction > by requiring singleton container for using nesting iommu features. Below > link has the related discussion about this decision. > > https://lkml.org/lkml/2020/5/15/1028 > > Cc: Kevin Tian > CC: Jacob Pan > Cc: Alex Williamson > Cc: Eric Auger > Cc: Jean-Philippe Brucker > Cc: Joerg Roedel > Cc: Lu Baolu > Signed-off-by: Liu Yi L > --- > v3 -> v4: > *) address comments against v3. > > v1 -> v2: > *) added in v2 > --- > > drivers/vfio/vfio_iommu_type1.c | 105 > +++- > include/uapi/linux/vfio.h | 16 ++ > 2 files changed, 109 insertions(+), 12 deletions(-) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 7accb59..80623b8 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit, >"Maximum number of user DMA mappings per container (65535)."); > > struct vfio_iommu { > - struct list_headdomain_list; > - struct list_headiova_list; > - struct vfio_domain *external_domain; /* domain for external user */ > - struct mutexlock; > - struct rb_root dma_list; > - struct blocking_notifier_head notifier; > - unsigned intdma_avail; > - uint64_tpgsize_bitmap; > - boolv2; > - boolnesting; > - booldirty_page_tracking; > - boolpinned_page_dirty_scope; > + struct list_headdomain_list; > + struct list_headiova_list; > + struct vfio_domain *external_domain; /* domain for > + external user */ > + struct mutexlock; > + struct rb_root dma_list; > + struct blocking_notifier_head notifier; > + unsigned intdma_avail; > + uint64_tpgsize_bitmap; > + boolv2; > + boolnesting; > + booldirty_page_tracking; > + boolpinned_page_dirty_scope; > + struct iommu_nesting_info *nesting_info; > }; > > struct vfio_domain { > @@ -130,6 +132,9 @@ struct vfio_regions { > #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \ > (!list_empty(>domain_list)) > > +#define IS_DOMAIN_IN_CONTAINER(iommu)((iommu->external_domain) || \ > + (!list_empty(>domain_list))) > + > #define DIRTY_BITMAP_BYTES(n)(ALIGN(n, BITS_PER_TYPE(u64)) / > BITS_PER_BYTE) > > /* > @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct > vfio_iommu *iommu, > > list_splice_tail(iova_copy, iova); > } > + > +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu) > +{ > + kfree(iommu->nesting_info); > + iommu->nesting_info = NULL; > +} > + > static int vfio_iommu_type1_attach_group(void *iommu_data, >struct iommu_group *iommu_group) > { > @@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void > *iommu_data, > } > } > > + /* Nesting type container can include only one group */ > + if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) { > + mutex_unlock(>lock); > + return -EINVAL; > + } > + > group = kzalloc(sizeof(*group), GFP_KERNEL); > domain = kzalloc(sizeof(*domain), GFP_KERNEL); > if (!group || !domain) { > @@ -2029,6 +2047,36 @@ static int vfio_iommu_type1_attach_group(void > *iommu_data, > if (ret)
Re: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace
Hi Yi, On 7/6/20 3:10 PM, Liu, Yi L wrote: > Hi Eric, > >> From: Auger Eric >> Sent: Monday, July 6, 2020 6:37 PM >> >> Yi, >> >> On 7/4/20 1:26 PM, Liu Yi L wrote: >>> This patch exports iommu nesting capability info to user space through >>> VFIO. User space is expected to check this info for supported uAPIs (e.g. >>> PASID alloc/free, bind page table, and cache invalidation) and the vendor >>> specific format information for first level/stage page table that will be >>> bound to. >>> >>> The nesting info is available only after the nesting iommu type is set >>> for a container. Current implementation imposes one limitation - one >>> nesting container should include at most one group. The philosophy of >>> vfio container is having all groups/devices within the container share >>> the same IOMMU context. When vSVA is enabled, one IOMMU context could >>> include one 2nd-level address space and multiple 1st-level address spaces. >>> While the 2nd-leve address space is reasonably sharable by multiple groups >> level > > oh, yes. > >>> , blindly sharing 1st-level address spaces across all groups within the >>> container might instead break the guest expectation. In the future sub/ >>> super container concept might be introduced to allow partial address space >>> sharing within an IOMMU context. But for now let's go with this restriction >>> by requiring singleton container for using nesting iommu features. Below >>> link has the related discussion about this decision. >>> >>> https://lkml.org/lkml/2020/5/15/1028 >>> >>> Cc: Kevin Tian >>> CC: Jacob Pan >>> Cc: Alex Williamson >>> Cc: Eric Auger >>> Cc: Jean-Philippe Brucker >>> Cc: Joerg Roedel >>> Cc: Lu Baolu >>> Signed-off-by: Liu Yi L >>> --- >>> v3 -> v4: >>> *) address comments against v3. >>> >>> v1 -> v2: >>> *) added in v2 >>> --- >>> >>> drivers/vfio/vfio_iommu_type1.c | 105 >> +++- >>> include/uapi/linux/vfio.h | 16 ++ >>> 2 files changed, 109 insertions(+), 12 deletions(-) >>> >>> diff --git a/drivers/vfio/vfio_iommu_type1.c >>> b/drivers/vfio/vfio_iommu_type1.c >>> index 7accb59..80623b8 100644 >>> --- a/drivers/vfio/vfio_iommu_type1.c >>> +++ b/drivers/vfio/vfio_iommu_type1.c >>> @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit, >>> "Maximum number of user DMA mappings per container (65535)."); >>> >>> struct vfio_iommu { >>> - struct list_headdomain_list; >>> - struct list_headiova_list; >>> - struct vfio_domain *external_domain; /* domain for external user */ >>> - struct mutexlock; >>> - struct rb_root dma_list; >>> - struct blocking_notifier_head notifier; >>> - unsigned intdma_avail; >>> - uint64_tpgsize_bitmap; >>> - boolv2; >>> - boolnesting; >>> - booldirty_page_tracking; >>> - boolpinned_page_dirty_scope; >>> + struct list_headdomain_list; >>> + struct list_headiova_list; >>> + struct vfio_domain *external_domain; /* domain for >>> +external user */ >> nit: put the comment before the field? > > do you mean below? > > + /* domain for external user */ > + struct vfio_domain *external_domain; yes that's what I meant > >>> + struct mutexlock; >>> + struct rb_root dma_list; >>> + struct blocking_notifier_head notifier; >>> + unsigned intdma_avail; >>> + uint64_tpgsize_bitmap; >>> + boolv2; >>> + boolnesting; >>> + booldirty_page_tracking; >>> + boolpinned_page_dirty_scope; >>> + struct iommu_nesting_info *nesting_info; >>> }; >>> >>> struct vfio_domain { >>> @@ -130,6 +132,9 @@ struct vfio_regions { >>> #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)\ >>> (!list_empty(>domain_list)) >>> >>> +#define IS_DOMAIN_IN_CONTAINER(iommu) ((iommu->external_domain) || \ >>> +(!list_empty(>domain_list))) >> rename into something like CONTAINER_HAS_DOMAIN()? > > got it. > >>> + >>> #define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) / >> BITS_PER_BYTE) >>> >>> /* >>> @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct >> vfio_iommu *iommu, >>> >>> list_splice_tail(iova_copy, iova); >>> } >>> + >>> +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu) >>> +{ >>> + kfree(iommu->nesting_info); >>> + iommu->nesting_info = NULL; >>> +} >>> + >>> static int vfio_iommu_type1_attach_group(void *iommu_data, >>> struct iommu_group *iommu_group) >>> { >>> @@ -1959,6 +1971,12 @@ static
RE: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace
Hi Eric, > From: Auger Eric > Sent: Monday, July 6, 2020 6:37 PM > > Yi, > > On 7/4/20 1:26 PM, Liu Yi L wrote: > > This patch exports iommu nesting capability info to user space through > > VFIO. User space is expected to check this info for supported uAPIs (e.g. > > PASID alloc/free, bind page table, and cache invalidation) and the vendor > > specific format information for first level/stage page table that will be > > bound to. > > > > The nesting info is available only after the nesting iommu type is set > > for a container. Current implementation imposes one limitation - one > > nesting container should include at most one group. The philosophy of > > vfio container is having all groups/devices within the container share > > the same IOMMU context. When vSVA is enabled, one IOMMU context could > > include one 2nd-level address space and multiple 1st-level address spaces. > > While the 2nd-leve address space is reasonably sharable by multiple groups > level oh, yes. > > , blindly sharing 1st-level address spaces across all groups within the > > container might instead break the guest expectation. In the future sub/ > > super container concept might be introduced to allow partial address space > > sharing within an IOMMU context. But for now let's go with this restriction > > by requiring singleton container for using nesting iommu features. Below > > link has the related discussion about this decision. > > > > https://lkml.org/lkml/2020/5/15/1028 > > > > Cc: Kevin Tian > > CC: Jacob Pan > > Cc: Alex Williamson > > Cc: Eric Auger > > Cc: Jean-Philippe Brucker > > Cc: Joerg Roedel > > Cc: Lu Baolu > > Signed-off-by: Liu Yi L > > --- > > v3 -> v4: > > *) address comments against v3. > > > > v1 -> v2: > > *) added in v2 > > --- > > > > drivers/vfio/vfio_iommu_type1.c | 105 > +++- > > include/uapi/linux/vfio.h | 16 ++ > > 2 files changed, 109 insertions(+), 12 deletions(-) > > > > diff --git a/drivers/vfio/vfio_iommu_type1.c > > b/drivers/vfio/vfio_iommu_type1.c > > index 7accb59..80623b8 100644 > > --- a/drivers/vfio/vfio_iommu_type1.c > > +++ b/drivers/vfio/vfio_iommu_type1.c > > @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit, > > "Maximum number of user DMA mappings per container (65535)."); > > > > struct vfio_iommu { > > - struct list_headdomain_list; > > - struct list_headiova_list; > > - struct vfio_domain *external_domain; /* domain for external user */ > > - struct mutexlock; > > - struct rb_root dma_list; > > - struct blocking_notifier_head notifier; > > - unsigned intdma_avail; > > - uint64_tpgsize_bitmap; > > - boolv2; > > - boolnesting; > > - booldirty_page_tracking; > > - boolpinned_page_dirty_scope; > > + struct list_headdomain_list; > > + struct list_headiova_list; > > + struct vfio_domain *external_domain; /* domain for > > +external user */ > nit: put the comment before the field? do you mean below? + /* domain for external user */ + struct vfio_domain *external_domain; > > + struct mutexlock; > > + struct rb_root dma_list; > > + struct blocking_notifier_head notifier; > > + unsigned intdma_avail; > > + uint64_tpgsize_bitmap; > > + boolv2; > > + boolnesting; > > + booldirty_page_tracking; > > + boolpinned_page_dirty_scope; > > + struct iommu_nesting_info *nesting_info; > > }; > > > > struct vfio_domain { > > @@ -130,6 +132,9 @@ struct vfio_regions { > > #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)\ > > (!list_empty(>domain_list)) > > > > +#define IS_DOMAIN_IN_CONTAINER(iommu) ((iommu->external_domain) || \ > > +(!list_empty(>domain_list))) > rename into something like CONTAINER_HAS_DOMAIN()? got it. > > + > > #define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) / > BITS_PER_BYTE) > > > > /* > > @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct > vfio_iommu *iommu, > > > > list_splice_tail(iova_copy, iova); > > } > > + > > +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu) > > +{ > > + kfree(iommu->nesting_info); > > + iommu->nesting_info = NULL; > > +} > > + > > static int vfio_iommu_type1_attach_group(void *iommu_data, > > struct iommu_group *iommu_group) > > { > > @@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void > *iommu_data, > > } > > } > > > > + /* Nesting
Re: [PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace
Yi, On 7/4/20 1:26 PM, Liu Yi L wrote: > This patch exports iommu nesting capability info to user space through > VFIO. User space is expected to check this info for supported uAPIs (e.g. > PASID alloc/free, bind page table, and cache invalidation) and the vendor > specific format information for first level/stage page table that will be > bound to. > > The nesting info is available only after the nesting iommu type is set > for a container. Current implementation imposes one limitation - one > nesting container should include at most one group. The philosophy of > vfio container is having all groups/devices within the container share > the same IOMMU context. When vSVA is enabled, one IOMMU context could > include one 2nd-level address space and multiple 1st-level address spaces. > While the 2nd-leve address space is reasonably sharable by multiple groups level > , blindly sharing 1st-level address spaces across all groups within the > container might instead break the guest expectation. In the future sub/ > super container concept might be introduced to allow partial address space > sharing within an IOMMU context. But for now let's go with this restriction > by requiring singleton container for using nesting iommu features. Below > link has the related discussion about this decision. > > https://lkml.org/lkml/2020/5/15/1028 > > Cc: Kevin Tian > CC: Jacob Pan > Cc: Alex Williamson > Cc: Eric Auger > Cc: Jean-Philippe Brucker > Cc: Joerg Roedel > Cc: Lu Baolu > Signed-off-by: Liu Yi L > --- > v3 -> v4: > *) address comments against v3. > > v1 -> v2: > *) added in v2 > --- > > drivers/vfio/vfio_iommu_type1.c | 105 > +++- > include/uapi/linux/vfio.h | 16 ++ > 2 files changed, 109 insertions(+), 12 deletions(-) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 7accb59..80623b8 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit, >"Maximum number of user DMA mappings per container (65535)."); > > struct vfio_iommu { > - struct list_headdomain_list; > - struct list_headiova_list; > - struct vfio_domain *external_domain; /* domain for external user */ > - struct mutexlock; > - struct rb_root dma_list; > - struct blocking_notifier_head notifier; > - unsigned intdma_avail; > - uint64_tpgsize_bitmap; > - boolv2; > - boolnesting; > - booldirty_page_tracking; > - boolpinned_page_dirty_scope; > + struct list_headdomain_list; > + struct list_headiova_list; > + struct vfio_domain *external_domain; /* domain for > + external user */ nit: put the comment before the field? > + struct mutexlock; > + struct rb_root dma_list; > + struct blocking_notifier_head notifier; > + unsigned intdma_avail; > + uint64_tpgsize_bitmap; > + boolv2; > + boolnesting; > + booldirty_page_tracking; > + boolpinned_page_dirty_scope; > + struct iommu_nesting_info *nesting_info; > }; > > struct vfio_domain { > @@ -130,6 +132,9 @@ struct vfio_regions { > #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu) \ > (!list_empty(>domain_list)) > > +#define IS_DOMAIN_IN_CONTAINER(iommu)((iommu->external_domain) || \ > + (!list_empty(>domain_list))) rename into something like CONTAINER_HAS_DOMAIN()? > + > #define DIRTY_BITMAP_BYTES(n)(ALIGN(n, BITS_PER_TYPE(u64)) / > BITS_PER_BYTE) > > /* > @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct > vfio_iommu *iommu, > > list_splice_tail(iova_copy, iova); > } > + > +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu) > +{ > + kfree(iommu->nesting_info); > + iommu->nesting_info = NULL; > +} > + > static int vfio_iommu_type1_attach_group(void *iommu_data, >struct iommu_group *iommu_group) > { > @@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void > *iommu_data, > } > } > > + /* Nesting type container can include only one group */ > + if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) { > + mutex_unlock(>lock); > + return -EINVAL; > + } > + > group = kzalloc(sizeof(*group), GFP_KERNEL); > domain = kzalloc(sizeof(*domain), GFP_KERNEL); > if (!group || !domain) { > @@
[PATCH v4 04/15] vfio/type1: Report iommu nesting info to userspace
This patch exports iommu nesting capability info to user space through VFIO. User space is expected to check this info for supported uAPIs (e.g. PASID alloc/free, bind page table, and cache invalidation) and the vendor specific format information for first level/stage page table that will be bound to. The nesting info is available only after the nesting iommu type is set for a container. Current implementation imposes one limitation - one nesting container should include at most one group. The philosophy of vfio container is having all groups/devices within the container share the same IOMMU context. When vSVA is enabled, one IOMMU context could include one 2nd-level address space and multiple 1st-level address spaces. While the 2nd-leve address space is reasonably sharable by multiple groups , blindly sharing 1st-level address spaces across all groups within the container might instead break the guest expectation. In the future sub/ super container concept might be introduced to allow partial address space sharing within an IOMMU context. But for now let's go with this restriction by requiring singleton container for using nesting iommu features. Below link has the related discussion about this decision. https://lkml.org/lkml/2020/5/15/1028 Cc: Kevin Tian CC: Jacob Pan Cc: Alex Williamson Cc: Eric Auger Cc: Jean-Philippe Brucker Cc: Joerg Roedel Cc: Lu Baolu Signed-off-by: Liu Yi L --- v3 -> v4: *) address comments against v3. v1 -> v2: *) added in v2 --- drivers/vfio/vfio_iommu_type1.c | 105 +++- include/uapi/linux/vfio.h | 16 ++ 2 files changed, 109 insertions(+), 12 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 7accb59..80623b8 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -62,18 +62,20 @@ MODULE_PARM_DESC(dma_entry_limit, "Maximum number of user DMA mappings per container (65535)."); struct vfio_iommu { - struct list_headdomain_list; - struct list_headiova_list; - struct vfio_domain *external_domain; /* domain for external user */ - struct mutexlock; - struct rb_root dma_list; - struct blocking_notifier_head notifier; - unsigned intdma_avail; - uint64_tpgsize_bitmap; - boolv2; - boolnesting; - booldirty_page_tracking; - boolpinned_page_dirty_scope; + struct list_headdomain_list; + struct list_headiova_list; + struct vfio_domain *external_domain; /* domain for +external user */ + struct mutexlock; + struct rb_root dma_list; + struct blocking_notifier_head notifier; + unsigned intdma_avail; + uint64_tpgsize_bitmap; + boolv2; + boolnesting; + booldirty_page_tracking; + boolpinned_page_dirty_scope; + struct iommu_nesting_info *nesting_info; }; struct vfio_domain { @@ -130,6 +132,9 @@ struct vfio_regions { #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)\ (!list_empty(>domain_list)) +#define IS_DOMAIN_IN_CONTAINER(iommu) ((iommu->external_domain) || \ +(!list_empty(>domain_list))) + #define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) / BITS_PER_BYTE) /* @@ -1929,6 +1934,13 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu, list_splice_tail(iova_copy, iova); } + +static void vfio_iommu_release_nesting_info(struct vfio_iommu *iommu) +{ + kfree(iommu->nesting_info); + iommu->nesting_info = NULL; +} + static int vfio_iommu_type1_attach_group(void *iommu_data, struct iommu_group *iommu_group) { @@ -1959,6 +1971,12 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, } } + /* Nesting type container can include only one group */ + if (iommu->nesting && IS_DOMAIN_IN_CONTAINER(iommu)) { + mutex_unlock(>lock); + return -EINVAL; + } + group = kzalloc(sizeof(*group), GFP_KERNEL); domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!group || !domain) { @@ -2029,6 +2047,36 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (ret) goto out_domain; + /* Nesting cap info is available only after attaching */ + if (iommu->nesting) { + struct iommu_nesting_info tmp; + struct