> From: Leon Romanovsky <[email protected]>
> Sent: Tuesday, November 11, 2025 5:58 PM
>
> - if (!new_mem)
> + if (!new_mem) {
> vfio_pci_zap_and_down_write_memory_lock(vdev);
> - else
> + vfio_pci_dma_buf_move(vdev, true);
> + } else {
> down_write(&vdev->memory_lock);
> + }
shouldn't we notify move before zapping the bars? otherwise there is
still a small window in between where the exporter already has the
mapping cleared while the importer still keeps it...
> +static void vfio_pci_dma_buf_release(struct dma_buf *dmabuf)
> +{
> + struct vfio_pci_dma_buf *priv = dmabuf->priv;
> +
> + /*
> + * Either this or vfio_pci_dma_buf_cleanup() will remove from the list.
> + * The refcount prevents both.
which refcount? I thought it's vdev->memory_lock preventing the race...
> + */
> + if (priv->vdev) {
> + down_write(&priv->vdev->memory_lock);
> + list_del_init(&priv->dmabufs_elm);
> + up_write(&priv->vdev->memory_lock);
> + vfio_device_put_registration(&priv->vdev->vdev);
> + }
> + kfree(priv->phys_vec);
> + kfree(priv);
> +}
[...]
> +int vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec,
> + struct vfio_region_dma_range *dma_ranges,
> + size_t nr_ranges, phys_addr_t start,
> + phys_addr_t len)
> +{
> + phys_addr_t max_addr;
> + unsigned int i;
> +
> + max_addr = start + len;
> + for (i = 0; i < nr_ranges; i++) {
> + phys_addr_t end;
> +
> + if (!dma_ranges[i].length)
> + return -EINVAL;
Looks redundant as there is already a check in validate_dmabuf_input().
> +
> +int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32
> flags,
> + struct vfio_device_feature_dma_buf __user
> *arg,
> + size_t argsz)
> +{
> + struct vfio_device_feature_dma_buf get_dma_buf = {};
> + struct vfio_region_dma_range *dma_ranges;
> + DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
> + struct vfio_pci_dma_buf *priv;
> + size_t length;
> + int ret;
> +
> + if (!vdev->pci_ops || !vdev->pci_ops->get_dmabuf_phys)
> + return -EOPNOTSUPP;
> +
> + ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
> + sizeof(get_dma_buf));
> + if (ret != 1)
> + return ret;
> +
> + if (copy_from_user(&get_dma_buf, arg, sizeof(get_dma_buf)))
> + return -EFAULT;
> +
> + if (!get_dma_buf.nr_ranges || get_dma_buf.flags)
> + return -EINVAL;
unknown flag bits get -EOPNOTSUPP.
> +
> +void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
> +{
> + struct vfio_pci_dma_buf *priv;
> + struct vfio_pci_dma_buf *tmp;
> +
> + down_write(&vdev->memory_lock);
> + list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm)
> {
> + if (!get_file_active(&priv->dmabuf->file))
> + continue;
> +
> + dma_resv_lock(priv->dmabuf->resv, NULL);
> + list_del_init(&priv->dmabufs_elm);
> + priv->vdev = NULL;
> + priv->revoked = true;
> + dma_buf_move_notify(priv->dmabuf);
> + dma_resv_unlock(priv->dmabuf->resv);
> + vfio_device_put_registration(&vdev->vdev);
> + fput(priv->dmabuf->file);
dma_buf_put(priv->dmabuf), consistent with other places.
> +/**
> + * Upon VFIO_DEVICE_FEATURE_GET create a dma_buf fd for the
> + * regions selected.
s/regions/region/
> + *
> + * open_flags are the typical flags passed to open(2), eg O_RDWR,
> O_CLOEXEC,
> + * etc. offset/length specify a slice of the region to create the dmabuf
> from.
> + * nr_ranges is the total number of (P2P DMA) ranges that comprise the
> dmabuf.
> + *
> + * flags should be 0.
> + *
> + * Return: The fd number on success, -1 and errno is set on failure.
> + */
> +#define VFIO_DEVICE_FEATURE_DMA_BUF 11
> +
> +struct vfio_region_dma_range {
> + __u64 offset;
> + __u64 length;
> +};
> +
> +struct vfio_device_feature_dma_buf {
> + __u32 region_index;
> + __u32 open_flags;
> + __u32 flags;
Usually the 'flags' field is put in the start (following argsz if existing).
No big issues, so:
Reviewed-by: Kevin Tian <[email protected]>