On Wed, 27 May 2026 03:23:06 -0700
Matt Evans <[email protected]> wrote:

> This helper, vfio_pci_core_mmap_prep_dmabuf(), creates a single-range
> DMABUF for the purpose of mapping a PCI BAR.  This is used in a future
> commit by VFIO's ordinary mmap() path.
> 
> This function transfers ownership of the VFIO device fd to the
> DMABUF, which fput()s when it's released.
> 
> Refactor the existing vfio_pci_core_feature_dma_buf() to split out
> export code common to the two paths, VFIO_DEVICE_FEATURE_DMA_BUF and
> this new VFIO_BAR mmap().
> 
> Signed-off-by: Matt Evans <[email protected]>
> ---
>  drivers/vfio/pci/vfio_pci_dmabuf.c | 140 ++++++++++++++++++++++-------
>  drivers/vfio/pci/vfio_pci_priv.h   |   5 ++
>  2 files changed, 115 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c 
> b/drivers/vfio/pci/vfio_pci_dmabuf.c
> index 0d132c4ca95f..782408c08a5e 100644
> --- a/drivers/vfio/pci/vfio_pci_dmabuf.c
> +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
> @@ -82,6 +82,8 @@ static void vfio_pci_dma_buf_release(struct dma_buf *dmabuf)
>               up_write(&priv->vdev->memory_lock);
>               vfio_device_put_registration(&priv->vdev->vdev);
>       }
> +     if (priv->vfile)
> +             fput(priv->vfile);
>       kfree(priv->phys_vec);
>       kfree(priv);
>  }
> @@ -222,6 +224,45 @@ int vfio_pci_dma_buf_find_pfn(struct vfio_pci_dma_buf 
> *vpdmabuf,
>       return -EFAULT;
>  }
>  
> +/*
> + * Create a DMABUF corresponding to priv, add it to vdev->dmabufs list
> + * for tracking (meaning cleanup or revocation will zap it), and take
> + * a vfio_device registration.
> + */
> +static int vfio_pci_dmabuf_export(struct vfio_pci_core_device *vdev,
> +                               struct vfio_pci_dma_buf *priv, uint32_t flags)

s/uint32_t/u32/?

> +{
> +     DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
> +
> +     if (!vfio_device_try_get_registration(&vdev->vdev))
> +             return -ENODEV;
> +
> +     exp_info.ops = &vfio_pci_dmabuf_ops;
> +     exp_info.size = priv->size;
> +     exp_info.flags = flags;
> +     exp_info.priv = priv;
> +
> +     priv->dmabuf = dma_buf_export(&exp_info);
> +     if (IS_ERR(priv->dmabuf)) {
> +             vfio_device_put_registration(&vdev->vdev);
> +             return PTR_ERR(priv->dmabuf);
> +     }
> +
> +     kref_init(&priv->kref);
> +     init_completion(&priv->comp);
> +
> +     /* dma_buf_put() now frees priv */
> +     INIT_LIST_HEAD(&priv->dmabufs_elm);
> +     down_write(&vdev->memory_lock);
> +     dma_resv_lock(priv->dmabuf->resv, NULL);
> +     priv->revoked = !__vfio_pci_memory_enabled(vdev);
> +     list_add_tail(&priv->dmabufs_elm, &vdev->dmabufs);
> +     dma_resv_unlock(priv->dmabuf->resv);
> +     up_write(&vdev->memory_lock);
> +
> +     return 0;
> +}
> +
>  /*
>   * This is a temporary "private interconnect" between VFIO DMABUF and 
> iommufd.
>   * It allows the two co-operating drivers to exchange the physical address of
> @@ -340,7 +381,6 @@ int vfio_pci_core_feature_dma_buf(struct 
> vfio_pci_core_device *vdev, u32 flags,
>  {
>       struct vfio_device_feature_dma_buf get_dma_buf = {};
>       struct vfio_region_dma_range *dma_ranges;
> -     DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
>       struct vfio_pci_dma_buf *priv;
>       size_t length;
>       int ret;
> @@ -400,34 +440,9 @@ int vfio_pci_core_feature_dma_buf(struct 
> vfio_pci_core_device *vdev, u32 flags,
>       kfree(dma_ranges);
>       dma_ranges = NULL;
>  
> -     if (!vfio_device_try_get_registration(&vdev->vdev)) {
> -             ret = -ENODEV;
> +     ret = vfio_pci_dmabuf_export(vdev, priv, get_dma_buf.open_flags);
> +     if (ret)
>               goto err_free_phys;
> -     }
> -
> -     exp_info.ops = &vfio_pci_dmabuf_ops;
> -     exp_info.size = priv->size;
> -     exp_info.flags = get_dma_buf.open_flags;
> -     exp_info.priv = priv;
> -
> -     priv->dmabuf = dma_buf_export(&exp_info);
> -     if (IS_ERR(priv->dmabuf)) {
> -             ret = PTR_ERR(priv->dmabuf);
> -             goto err_dev_put;
> -     }
> -
> -     kref_init(&priv->kref);
> -     init_completion(&priv->comp);
> -
> -     /* dma_buf_put() now frees priv */
> -     INIT_LIST_HEAD(&priv->dmabufs_elm);
> -     down_write(&vdev->memory_lock);
> -     dma_resv_lock(priv->dmabuf->resv, NULL);
> -     priv->revoked = !__vfio_pci_memory_enabled(vdev);
> -     list_add_tail(&priv->dmabufs_elm, &vdev->dmabufs);
> -     dma_resv_unlock(priv->dmabuf->resv);
> -     up_write(&vdev->memory_lock);
> -
>       /*
>        * dma_buf_fd() consumes the reference, when the file closes the dmabuf
>        * will be released.
> @@ -438,8 +453,6 @@ int vfio_pci_core_feature_dma_buf(struct 
> vfio_pci_core_device *vdev, u32 flags,
>  
>       return ret;
>  
> -err_dev_put:
> -     vfio_device_put_registration(&vdev->vdev);
>  err_free_phys:
>       kfree(priv->phys_vec);
>  err_free_priv:
> @@ -449,6 +462,73 @@ int vfio_pci_core_feature_dma_buf(struct 
> vfio_pci_core_device *vdev, u32 flags,
>       return ret;
>  }
>  
> +int vfio_pci_core_mmap_prep_dmabuf(struct vfio_pci_core_device *vdev,
> +                                struct vm_area_struct *vma,
> +                                u64 phys_start, u64 req_len,
> +                                unsigned int res_index)
> +{
> +     struct vfio_pci_dma_buf *priv;
> +     const unsigned int nr_ranges = 1;

Why, versus priv->nr_ranges = 1; below?  Thanks,

Alex

> +     unsigned long vma_pgoff = vma->vm_pgoff & (VFIO_PCI_OFFSET_MASK >> 
> PAGE_SHIFT);
> +     int ret;
> +
> +     priv = kzalloc_obj(*priv);
> +     if (!priv)
> +             return -ENOMEM;
> +
> +     priv->phys_vec = kzalloc_obj(*priv->phys_vec);
> +     if (!priv->phys_vec) {
> +             ret = -ENOMEM;
> +             goto err_free_priv;
> +     }
> +
> +     /*
> +      * The DMABUF begins from the mmap()'s BAR offset, i.e. the
> +      * start of the VMA corresponds to byte 0 of the DMABUF and
> +      * byte (vma_pgoff << PAGE_SHIFT) of the BAR.
> +      *
> +      * vfio_pci_dma_buf_find_pfn() reverses this offset using
> +      * vma_pgoff_adjust, so that ultimately a fault's offset from
> +      * the start of the _VMA_ has a consistent usage whether the
> +      * VMA originates from an mmap() of the VFIO device here or a
> +      * direct DMABUF mmap().
> +      */
> +     priv->vdev = vdev;
> +     priv->size = req_len;
> +     priv->nr_ranges = nr_ranges;
> +     priv->vma_pgoff_adjust = vma_pgoff;
> +     priv->provider = pcim_p2pdma_provider(vdev->pdev, res_index);
> +     if (!priv->provider) {
> +             ret = -EINVAL;
> +             goto err_free_phys;
> +     }
> +
> +     priv->phys_vec[0].paddr = phys_start + ((u64)vma_pgoff << PAGE_SHIFT);
> +     priv->phys_vec[0].len = priv->size;
> +
> +     ret = vfio_pci_dmabuf_export(vdev, priv, O_CLOEXEC | O_RDWR);
> +     if (ret)
> +             goto err_free_phys;
> +
> +     /*
> +      * The VMA gets the DMABUF file so that other users can locate
> +      * the DMABUF via a VA.  Ownership of the original VFIO device
> +      * file being mmap()ed transfers to priv, and is put when the
> +      * DMABUF is released.
> +      */
> +     priv->vfile = vma->vm_file;
> +     vma->vm_file = priv->dmabuf->file;
> +     vma->vm_private_data = priv;
> +
> +     return 0;
> +
> +err_free_phys:
> +     kfree(priv->phys_vec);
> +err_free_priv:
> +     kfree(priv);
> +     return ret;
> +}
> +
>  void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
>  {
>       struct vfio_pci_dma_buf *priv;
> diff --git a/drivers/vfio/pci/vfio_pci_priv.h 
> b/drivers/vfio/pci/vfio_pci_priv.h
> index c8f6f959056a..06dc0fd3e230 100644
> --- a/drivers/vfio/pci/vfio_pci_priv.h
> +++ b/drivers/vfio/pci/vfio_pci_priv.h
> @@ -30,6 +30,7 @@ struct vfio_pci_dma_buf {
>       size_t size;
>       struct phys_vec *phys_vec;
>       struct p2pdma_provider *provider;
> +     struct file *vfile;
>       u32 nr_ranges;
>       struct kref kref;
>       struct completion comp;
> @@ -133,6 +134,10 @@ int vfio_pci_dma_buf_find_pfn(struct vfio_pci_dma_buf 
> *vpdmabuf,
>                             unsigned long address,
>                             unsigned int order,
>                             unsigned long *out_pfn);
> +int vfio_pci_core_mmap_prep_dmabuf(struct vfio_pci_core_device *vdev,
> +                                struct vm_area_struct *vma,
> +                                u64 phys_start, u64 req_len,
> +                                unsigned int res_index);
>  
>  #ifdef CONFIG_VFIO_PCI_DMABUF
>  int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 
> flags,

Reply via email to