On Wed, 27 May 2026 03:23:12 -0700 Matt Evans <[email protected]> wrote:
> A new VFIO feature, VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR, is added to > set (and get) CPU-facing memory type attributes for a DMABUF exported > from vfio-pci. These are used for subsequent mmap()s of the buffer. > > There are two attributes supported: > - The default, VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_UC > - VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC, which results in WC > PTEs for the DMABUF's BAR region. > > Signed-off-by: Matt Evans <[email protected]> > --- > drivers/vfio/pci/vfio_pci_core.c | 2 + > drivers/vfio/pci/vfio_pci_dmabuf.c | 70 +++++++++++++++++++++++++++++- > drivers/vfio/pci/vfio_pci_priv.h | 12 +++++ > include/uapi/linux/vfio.h | 27 ++++++++++++ > 4 files changed, 110 insertions(+), 1 deletion(-) > > diff --git a/drivers/vfio/pci/vfio_pci_core.c > b/drivers/vfio/pci/vfio_pci_core.c > index 5184b3cac160..e256a925e7ce 100644 > --- a/drivers/vfio/pci/vfio_pci_core.c > +++ b/drivers/vfio/pci/vfio_pci_core.c > @@ -1590,6 +1590,8 @@ int vfio_pci_core_ioctl_feature(struct vfio_device > *device, u32 flags, > return vfio_pci_core_feature_token(vdev, flags, arg, argsz); > case VFIO_DEVICE_FEATURE_DMA_BUF: > return vfio_pci_core_feature_dma_buf(vdev, flags, arg, argsz); > + case VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR: > + return vfio_pci_core_feature_dma_buf_memattr(vdev, flags, arg, > argsz); > default: > return -ENOTTY; > } > diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c > b/drivers/vfio/pci/vfio_pci_dmabuf.c > index 3fa14760898f..db8b95ddbe18 100644 > --- a/drivers/vfio/pci/vfio_pci_dmabuf.c > +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c > @@ -42,7 +42,10 @@ static int vfio_pci_dma_buf_mmap(struct dma_buf *dmabuf, > struct vm_area_struct * > * contained within the DMABUF size before calling this. > */ > > - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); > + if (READ_ONCE(priv->memattr) == VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC) > + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); > + else > + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); > vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); > > /* See comments in vfio_pci_core_mmap() re VM_ALLOW_ANY_UNCACHED. */ > @@ -464,6 +467,7 @@ int vfio_pci_core_feature_dma_buf(struct > vfio_pci_core_device *vdev, u32 flags, > priv->vdev = vdev; > priv->nr_ranges = get_dma_buf.nr_ranges; > priv->size = length; > + priv->memattr = VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_NC; > ret = vdev->pci_ops->get_dmabuf_phys(vdev, &priv->provider, > get_dma_buf.region_index, > priv->phys_vec, dma_ranges, > @@ -731,4 +735,68 @@ int vfio_pci_dma_buf_revoke(struct vfio_pci_core_device > *vdev, int dmabuf_fd) > > return ret; > } > + > +int vfio_pci_core_feature_dma_buf_memattr( > + struct vfio_pci_core_device *vdev, u32 flags, > + struct vfio_device_feature_dma_buf_memattr __user *arg, > + size_t argsz) > +{ > + struct vfio_device_feature_dma_buf_memattr db_attr; > + struct vfio_pci_dma_buf *priv; > + struct dma_buf *dmabuf; > + int ret; > + > + if (!vdev->pci_ops || !vdev->pci_ops->get_dmabuf_phys) > + return -EOPNOTSUPP; > + > + ret = vfio_check_feature(flags, argsz, > + VFIO_DEVICE_FEATURE_GET | > + VFIO_DEVICE_FEATURE_SET, > + sizeof(db_attr)); I don't see why this needs to support GET. Are we solving a userspace problem that doesn't exist? > + if (ret != 1) > + return ret; > + > + if (copy_from_user(&db_attr, arg, sizeof(db_attr))) > + return -EFAULT; > + > + dmabuf = dma_buf_get(db_attr.dmabuf_fd); > + if (IS_ERR(dmabuf)) > + return PTR_ERR(dmabuf); > + > + /* Verify DMABUF: see comments in vfio_pci_dma_buf_revoke() */ > + priv = dmabuf->priv; > + if (dmabuf->ops != &vfio_pci_dmabuf_ops || priv->vdev != vdev) { > + ret = -ENODEV; > + goto out_put_buf; > + } > + > + ret = 0; > + scoped_guard(rwsem_write, &vdev->memory_lock) { Why? This doesn't serialize against mmap. Just use a WRITE_ONCE() to match the READ_ONCE() on mmap? > + uint32_t old_attr = priv->memattr; > + > + if (flags & VFIO_DEVICE_FEATURE_SET) { > + switch(db_attr.memattr) { > + case VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_NC: > + case VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC: > + priv->memattr = db_attr.memattr; > + break; > + > + default: > + ret = -ENOTSUPP; -EINVAL > + } > + } > + db_attr.memattr = old_attr; > + } > + > + if (!ret && (flags & VFIO_DEVICE_FEATURE_GET)) { > + if (copy_to_user(arg, &db_attr, sizeof(db_attr))) > + ret = -EFAULT; > + } > + > + out_put_buf: > + dma_buf_put(dmabuf); > + > + return ret; > + > +} > #endif /* CONFIG_VFIO_PCI_DMABUF */ > diff --git a/drivers/vfio/pci/vfio_pci_priv.h > b/drivers/vfio/pci/vfio_pci_priv.h > index a1e0f4fcb1dc..8067be45beb0 100644 > --- a/drivers/vfio/pci/vfio_pci_priv.h > +++ b/drivers/vfio/pci/vfio_pci_priv.h > @@ -41,6 +41,7 @@ struct vfio_pci_dma_buf { > struct kref kref; > struct completion comp; > unsigned long vma_pgoff_adjust; > + u32 memattr; > enum vfio_pci_dma_buf_status status; > }; > > @@ -154,6 +155,10 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device > *vdev, bool revoked); > int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 > flags, > struct vfio_device_feature_dma_buf __user > *arg, > size_t argsz); > +int vfio_pci_core_feature_dma_buf_memattr( > + struct vfio_pci_core_device *vdev, u32 flags, > + struct vfio_device_feature_dma_buf_memattr __user *arg, > + size_t argsz); > int vfio_pci_dma_buf_revoke(struct vfio_pci_core_device *vdev, int > dmabuf_fd); > #else > static inline int > @@ -163,6 +168,13 @@ vfio_pci_core_feature_dma_buf(struct > vfio_pci_core_device *vdev, u32 flags, > { > return -ENOTTY; > } > +static inline int vfio_pci_core_feature_dma_buf_memattr( > + struct vfio_pci_core_device *vdev, u32 flags, > + struct vfio_device_feature_dma_buf_memattr __user *arg, > + size_t argsz) > +{ > + return -ENODEV; -ENOTTY Thanks, Alex > +} > static inline int vfio_pci_dma_buf_revoke(struct vfio_pci_core_device *vdev, > int dmabuf_fd) > { > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > index 02366e9f8e16..9b0b68f8a1ef 100644 > --- a/include/uapi/linux/vfio.h > +++ b/include/uapi/linux/vfio.h > @@ -1564,6 +1564,33 @@ struct vfio_device_feature_dma_buf { > */ > #define VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2 12 > > +/** > + * Given a dma_buf fd previously created by > + * VFIO_DEVICE_FEATURE_DMA_BUF, GET or SET the memory attribute that > + * will be used by future mmap()s of that fd. SETting a new attribute > + * does not affect existing VMAs. > + * > + * The default, if no previous SET has been performed, is NC. > + * > + * Return: 0 on success, -1 and errno is set on failure: > + * > + * ENOTSUPP: The given memattr is not supported. > + * EBADF, EINVAL: dmabuf_fd is not a DMABUF fd. > + * ENODEV: The dmabuf_fd does not match this VFIO device. > + */ > +#define VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR 13 > + > +/* Valid memory attributes for the memattr field */ > +enum vfio_device_dma_buf_memattr { > + VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_NC = 0, /* pgprot_noncached */ > + VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC = 1, /* pgprot_writecombine */ > +}; > + > +struct vfio_device_feature_dma_buf_memattr { > + __s32 dmabuf_fd; > + __u32 memattr; > +}; > + > /* -------- API for Type1 VFIO IOMMU -------- */ > > /**
