On Wed, 27 May 2026 03:23:12 -0700
Matt Evans <[email protected]> wrote:

> A new VFIO feature, VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR, is added to
> set (and get) CPU-facing memory type attributes for a DMABUF exported
> from vfio-pci.  These are used for subsequent mmap()s of the buffer.
> 
> There are two attributes supported:
>  - The default, VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_UC
>  - VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC, which results in WC
>    PTEs for the DMABUF's BAR region.
> 
> Signed-off-by: Matt Evans <[email protected]>
> ---
>  drivers/vfio/pci/vfio_pci_core.c   |  2 +
>  drivers/vfio/pci/vfio_pci_dmabuf.c | 70 +++++++++++++++++++++++++++++-
>  drivers/vfio/pci/vfio_pci_priv.h   | 12 +++++
>  include/uapi/linux/vfio.h          | 27 ++++++++++++
>  4 files changed, 110 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_core.c 
> b/drivers/vfio/pci/vfio_pci_core.c
> index 5184b3cac160..e256a925e7ce 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -1590,6 +1590,8 @@ int vfio_pci_core_ioctl_feature(struct vfio_device 
> *device, u32 flags,
>               return vfio_pci_core_feature_token(vdev, flags, arg, argsz);
>       case VFIO_DEVICE_FEATURE_DMA_BUF:
>               return vfio_pci_core_feature_dma_buf(vdev, flags, arg, argsz);
> +     case VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR:
> +             return vfio_pci_core_feature_dma_buf_memattr(vdev, flags, arg, 
> argsz);
>       default:
>               return -ENOTTY;
>       }
> diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c 
> b/drivers/vfio/pci/vfio_pci_dmabuf.c
> index 3fa14760898f..db8b95ddbe18 100644
> --- a/drivers/vfio/pci/vfio_pci_dmabuf.c
> +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
> @@ -42,7 +42,10 @@ static int vfio_pci_dma_buf_mmap(struct dma_buf *dmabuf, 
> struct vm_area_struct *
>        * contained within the DMABUF size before calling this.
>        */
>  
> -     vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> +     if (READ_ONCE(priv->memattr) == VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC)
> +             vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
> +     else
> +             vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
>       vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
>  
>       /* See comments in vfio_pci_core_mmap() re VM_ALLOW_ANY_UNCACHED. */
> @@ -464,6 +467,7 @@ int vfio_pci_core_feature_dma_buf(struct 
> vfio_pci_core_device *vdev, u32 flags,
>       priv->vdev = vdev;
>       priv->nr_ranges = get_dma_buf.nr_ranges;
>       priv->size = length;
> +     priv->memattr = VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_NC;
>       ret = vdev->pci_ops->get_dmabuf_phys(vdev, &priv->provider,
>                                            get_dma_buf.region_index,
>                                            priv->phys_vec, dma_ranges,
> @@ -731,4 +735,68 @@ int vfio_pci_dma_buf_revoke(struct vfio_pci_core_device 
> *vdev, int dmabuf_fd)
>  
>       return ret;
>  }
> +
> +int vfio_pci_core_feature_dma_buf_memattr(
> +     struct vfio_pci_core_device *vdev, u32 flags,
> +     struct vfio_device_feature_dma_buf_memattr __user *arg,
> +     size_t argsz)
> +{
> +     struct vfio_device_feature_dma_buf_memattr db_attr;
> +     struct vfio_pci_dma_buf *priv;
> +     struct dma_buf *dmabuf;
> +     int ret;
> +
> +     if (!vdev->pci_ops || !vdev->pci_ops->get_dmabuf_phys)
> +             return -EOPNOTSUPP;
> +
> +     ret = vfio_check_feature(flags, argsz,
> +                              VFIO_DEVICE_FEATURE_GET |
> +                              VFIO_DEVICE_FEATURE_SET,
> +                              sizeof(db_attr));

I don't see why this needs to support GET.  Are we solving a userspace
problem that doesn't exist?

> +     if (ret != 1)
> +             return ret;
> +
> +     if (copy_from_user(&db_attr, arg, sizeof(db_attr)))
> +             return -EFAULT;
> +
> +     dmabuf = dma_buf_get(db_attr.dmabuf_fd);
> +     if (IS_ERR(dmabuf))
> +             return PTR_ERR(dmabuf);
> +
> +     /* Verify DMABUF: see comments in vfio_pci_dma_buf_revoke() */
> +     priv = dmabuf->priv;
> +     if (dmabuf->ops != &vfio_pci_dmabuf_ops || priv->vdev != vdev) {
> +             ret = -ENODEV;
> +             goto out_put_buf;
> +     }
> +
> +     ret = 0;
> +     scoped_guard(rwsem_write, &vdev->memory_lock) {

Why?  This doesn't serialize against mmap.  Just use a WRITE_ONCE() to
match the READ_ONCE() on mmap?

> +             uint32_t old_attr = priv->memattr;
> +
> +             if (flags & VFIO_DEVICE_FEATURE_SET) {
> +                     switch(db_attr.memattr) {
> +                     case VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_NC:
> +                     case VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC:
> +                             priv->memattr = db_attr.memattr;
> +                             break;
> +
> +                     default:
> +                             ret = -ENOTSUPP;

-EINVAL

> +                     }
> +             }
> +             db_attr.memattr = old_attr;
> +     }
> +
> +     if (!ret && (flags & VFIO_DEVICE_FEATURE_GET)) {
> +             if (copy_to_user(arg, &db_attr, sizeof(db_attr)))
> +                     ret = -EFAULT;
> +     }
> +
> + out_put_buf:
> +     dma_buf_put(dmabuf);
> +
> +     return ret;
> +
> +}
>  #endif /* CONFIG_VFIO_PCI_DMABUF */
> diff --git a/drivers/vfio/pci/vfio_pci_priv.h 
> b/drivers/vfio/pci/vfio_pci_priv.h
> index a1e0f4fcb1dc..8067be45beb0 100644
> --- a/drivers/vfio/pci/vfio_pci_priv.h
> +++ b/drivers/vfio/pci/vfio_pci_priv.h
> @@ -41,6 +41,7 @@ struct vfio_pci_dma_buf {
>       struct kref kref;
>       struct completion comp;
>       unsigned long vma_pgoff_adjust;
> +     u32 memattr;
>       enum vfio_pci_dma_buf_status status;
>  };
>  
> @@ -154,6 +155,10 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device 
> *vdev, bool revoked);
>  int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 
> flags,
>                                 struct vfio_device_feature_dma_buf __user 
> *arg,
>                                 size_t argsz);
> +int vfio_pci_core_feature_dma_buf_memattr(
> +     struct vfio_pci_core_device *vdev, u32 flags,
> +     struct vfio_device_feature_dma_buf_memattr __user *arg,
> +     size_t argsz);
>  int vfio_pci_dma_buf_revoke(struct vfio_pci_core_device *vdev, int 
> dmabuf_fd);
>  #else
>  static inline int
> @@ -163,6 +168,13 @@ vfio_pci_core_feature_dma_buf(struct 
> vfio_pci_core_device *vdev, u32 flags,
>  {
>       return -ENOTTY;
>  }
> +static inline int vfio_pci_core_feature_dma_buf_memattr(
> +     struct vfio_pci_core_device *vdev, u32 flags,
> +     struct vfio_device_feature_dma_buf_memattr __user *arg,
> +     size_t argsz)
> +{
> +     return -ENODEV;

-ENOTTY

Thanks,
Alex

> +}
>  static inline int vfio_pci_dma_buf_revoke(struct vfio_pci_core_device *vdev,
>                                         int dmabuf_fd)
>  {
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 02366e9f8e16..9b0b68f8a1ef 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -1564,6 +1564,33 @@ struct vfio_device_feature_dma_buf {
>   */
>  #define VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2  12
>  
> +/**
> + * Given a dma_buf fd previously created by
> + * VFIO_DEVICE_FEATURE_DMA_BUF, GET or SET the memory attribute that
> + * will be used by future mmap()s of that fd.  SETting a new attribute
> + * does not affect existing VMAs.
> + *
> + * The default, if no previous SET has been performed, is NC.
> + *
> + * Return: 0 on success, -1 and errno is set on failure:
> + *
> + *  ENOTSUPP: The given memattr is not supported.
> + *  EBADF, EINVAL: dmabuf_fd is not a DMABUF fd.
> + *  ENODEV: The dmabuf_fd does not match this VFIO device.
> + */
> +#define VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR 13
> +
> +/* Valid memory attributes for the memattr field */
> +enum vfio_device_dma_buf_memattr {
> +     VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_NC = 0, /* pgprot_noncached */
> +     VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC = 1, /* pgprot_writecombine */
> +};
> +
> +struct vfio_device_feature_dma_buf_memattr {
> +     __s32   dmabuf_fd;
> +     __u32   memattr;
> +};
> +
>  /* -------- API for Type1 VFIO IOMMU -------- */
>  
>  /**

Reply via email to