Hi Yi,

On 4/1/23 16:44, Yi Liu wrote:
> as an alternative method for ownership check when iommufd is used. In
I don't understand the 1st sentence.
> this case all opened devices in the affected dev_set are verified to
> be bound to a same valid iommufd value to allow reset. It's simpler
> and faster as user does not need to pass a set of fds and kernel no
kernel does not need to search
> need to search the device within the given fds.
>
> a device in noiommu mode doesn't have a valid iommufd, so this method
> should not be used in a dev_set which contains multiple devices and one
> of them is in noiommu. The only allowed noiommu scenario is that the
> calling device is noiommu and it's in a singleton dev_set.
>
> Suggested-by: Jason Gunthorpe <j...@nvidia.com>
> Signed-off-by: Jason Gunthorpe <j...@nvidia.com>
> Reviewed-by: Jason Gunthorpe <j...@nvidia.com>
> Tested-by: Yanting Jiang <yanting.ji...@intel.com>
> Signed-off-by: Yi Liu <yi.l....@intel.com>
> ---
>  drivers/vfio/pci/vfio_pci_core.c | 42 +++++++++++++++++++++++++++-----
>  include/uapi/linux/vfio.h        |  9 ++++++-
>  2 files changed, 44 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/vfio/pci/vfio_pci_core.c 
> b/drivers/vfio/pci/vfio_pci_core.c
> index 3696b8e58445..b68fcba67a4b 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -180,7 +180,8 @@ static void vfio_pci_probe_mmaps(struct 
> vfio_pci_core_device *vdev)
>  struct vfio_pci_group_info;
>  static void vfio_pci_dev_set_try_reset(struct vfio_device_set *dev_set);
>  static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> -                                   struct vfio_pci_group_info *groups);
> +                                   struct vfio_pci_group_info *groups,
> +                                   struct iommufd_ctx *iommufd_ctx);
>  
>  /*
>   * INTx masking requires the ability to disable INTx signaling via 
> PCI_COMMAND
> @@ -1277,7 +1278,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct 
> vfio_pci_core_device *vdev,
>               return ret;
>  
>       /* Somewhere between 1 and count is OK */
> -     if (!hdr->count || hdr->count > count)
> +     if (hdr->count > count)
then I would simply remove the above comment since !count check is done
by the caller.
>               return -EINVAL;
>  
>       group_fds = kcalloc(hdr->count, sizeof(*group_fds), GFP_KERNEL);
> @@ -1326,7 +1327,7 @@ vfio_pci_ioctl_pci_hot_reset_groups(struct 
> vfio_pci_core_device *vdev,
>       info.count = hdr->count;
>       info.files = files;
>  
> -     ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
> +     ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info, NULL);
>  
>  hot_reset_release:
>       for (file_idx--; file_idx >= 0; file_idx--)
> @@ -1341,6 +1342,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct 
> vfio_pci_core_device *vdev,
>  {
>       unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count);
>       struct vfio_pci_hot_reset hdr;
> +     struct iommufd_ctx *iommufd;
>       bool slot = false;
>  
>       if (copy_from_user(&hdr, arg, minsz))
> @@ -1355,7 +1357,12 @@ static int vfio_pci_ioctl_pci_hot_reset(struct 
> vfio_pci_core_device *vdev,
>       else if (pci_probe_reset_bus(vdev->pdev->bus))
>               return -ENODEV;
>  
> -     return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, arg);
> +     if (hdr.count)
> +             return vfio_pci_ioctl_pci_hot_reset_groups(vdev, &hdr, slot, 
> arg);
> +
> +     iommufd = vfio_iommufd_physical_ictx(&vdev->vdev);
> +
> +     return vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, NULL, iommufd);
>  }
>  
>  static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev,
> @@ -2327,6 +2334,9 @@ static bool vfio_dev_in_groups(struct 
> vfio_pci_core_device *vdev,
>  {
>       unsigned int i;
>  
> +     if (!groups)
> +             return false;
> +
>       for (i = 0; i < groups->count; i++)
>               if (vfio_file_has_dev(groups->files[i], &vdev->vdev))
>                       return true;
> @@ -2402,13 +2412,25 @@ static int vfio_pci_dev_set_pm_runtime_get(struct 
> vfio_device_set *dev_set)
>       return ret;
>  }
>  
> +static bool vfio_dev_in_iommufd_ctx(struct vfio_pci_core_device *vdev,
> +                                 struct iommufd_ctx *iommufd_ctx)
> +{
> +     struct iommufd_ctx *iommufd = vfio_iommufd_physical_ictx(&vdev->vdev);
> +
> +     if (!iommufd)
> +             return false;
> +
> +     return iommufd == iommufd_ctx;
> +}
> +
>  /*
>   * We need to get memory_lock for each device, but devices can share 
> mmap_lock,
>   * therefore we need to zap and hold the vma_lock for each device, and only 
> then
>   * get each memory_lock.
>   */
>  static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
> -                                   struct vfio_pci_group_info *groups)
> +                                   struct vfio_pci_group_info *groups,
> +                                   struct iommufd_ctx *iommufd_ctx)
>  {
>       struct vfio_pci_core_device *cur_mem;
>       struct vfio_pci_core_device *cur_vma;
> @@ -2448,9 +2470,17 @@ static int vfio_pci_dev_set_hot_reset(struct 
> vfio_device_set *dev_set,
>                *
>                * Otherwise all opened devices in the dev_set must be
>                * contained by the set of groups provided by the user.
> +              *
> +              * If user provides a zero-length array, then all the
> +              * opened devices must be bound to a same iommufd_ctx.
> +              *
> +              * If all above checks are failed, reset is allowed only if
> +              * the calling device is in a singleton dev_set.
>                */
>               if (cur_vma->vdev.open_count &&
> -                 !vfio_dev_in_groups(cur_vma, groups)) {
> +                 !vfio_dev_in_groups(cur_vma, groups) &&
> +                 !vfio_dev_in_iommufd_ctx(cur_vma, iommufd_ctx) &&
> +                 (dev_set->device_count > 1)) {
>                       ret = -EINVAL;
>                       goto err_undo;
>               }
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index f96e5689cffc..17aa5d09db41 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -679,7 +679,14 @@ struct vfio_pci_hot_reset_info {
>   * the calling user must ensure all affected devices, if opened, are
>   * owned by itself.
>   *
> - * The ownership is proved by an array of group fds.
> + * The ownership can be proved by:
> + *   - An array of group fds
> + *   - A zero-length array

I would suggest something alike
in case a non void group fd array is passed, the devices affected by the
reset must belong to those opened VFIO groups.
in case a zero length array is passed, the other devices affected by the
reset, if any, must be bound to the same iommufd as this VFIO device
Either of the 2 methods is applied to check the feasibility of the reset
> + *
> + * In the last case all affected devices which are opened by this user
> + * must have been bound to a same iommufd. If the calling device is in
> + * noiommu mode (no valid iommufd) then it can be reset only if the reset
> + * doesn't affect other devices.
and keep that too
>   *
>   * Return: 0 on success, -errno on failure.
>   */
Thanks

Eric

Reply via email to