Hi Alex,

On 29/05/2026 00:14, Alex Williamson wrote:

On Wed, 27 May 2026 03:23:12 -0700
Matt Evans <[email protected]> wrote:

A new VFIO feature, VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR, is added to
set (and get) CPU-facing memory type attributes for a DMABUF exported
from vfio-pci.  These are used for subsequent mmap()s of the buffer.

There are two attributes supported:
  - The default, VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_UC
  - VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC, which results in WC
    PTEs for the DMABUF's BAR region.

Signed-off-by: Matt Evans <[email protected]>
---
  drivers/vfio/pci/vfio_pci_core.c   |  2 +
  drivers/vfio/pci/vfio_pci_dmabuf.c | 70 +++++++++++++++++++++++++++++-
  drivers/vfio/pci/vfio_pci_priv.h   | 12 +++++
  include/uapi/linux/vfio.h          | 27 ++++++++++++
  4 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 5184b3cac160..e256a925e7ce 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1590,6 +1590,8 @@ int vfio_pci_core_ioctl_feature(struct vfio_device 
*device, u32 flags,
                return vfio_pci_core_feature_token(vdev, flags, arg, argsz);
        case VFIO_DEVICE_FEATURE_DMA_BUF:
                return vfio_pci_core_feature_dma_buf(vdev, flags, arg, argsz);
+       case VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR:
+               return vfio_pci_core_feature_dma_buf_memattr(vdev, flags, arg, 
argsz);
        default:
                return -ENOTTY;
        }
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c 
b/drivers/vfio/pci/vfio_pci_dmabuf.c
index 3fa14760898f..db8b95ddbe18 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -42,7 +42,10 @@ static int vfio_pci_dma_buf_mmap(struct dma_buf *dmabuf, 
struct vm_area_struct *
         * contained within the DMABUF size before calling this.
         */
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+       if (READ_ONCE(priv->memattr) == VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC)
+               vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+       else
+               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
        vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
/* See comments in vfio_pci_core_mmap() re VM_ALLOW_ANY_UNCACHED. */
@@ -464,6 +467,7 @@ int vfio_pci_core_feature_dma_buf(struct 
vfio_pci_core_device *vdev, u32 flags,
        priv->vdev = vdev;
        priv->nr_ranges = get_dma_buf.nr_ranges;
        priv->size = length;
+       priv->memattr = VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_NC;
        ret = vdev->pci_ops->get_dmabuf_phys(vdev, &priv->provider,
                                             get_dma_buf.region_index,
                                             priv->phys_vec, dma_ranges,
@@ -731,4 +735,68 @@ int vfio_pci_dma_buf_revoke(struct vfio_pci_core_device 
*vdev, int dmabuf_fd)
return ret;
  }
+
+int vfio_pci_core_feature_dma_buf_memattr(
+       struct vfio_pci_core_device *vdev, u32 flags,
+       struct vfio_device_feature_dma_buf_memattr __user *arg,
+       size_t argsz)
+{
+       struct vfio_device_feature_dma_buf_memattr db_attr;
+       struct vfio_pci_dma_buf *priv;
+       struct dma_buf *dmabuf;
+       int ret;
+
+       if (!vdev->pci_ops || !vdev->pci_ops->get_dmabuf_phys)
+               return -EOPNOTSUPP;
+
+       ret = vfio_check_feature(flags, argsz,
+                                VFIO_DEVICE_FEATURE_GET |
+                                VFIO_DEVICE_FEATURE_SET,
+                                sizeof(db_attr));

I don't see why this needs to support GET.  Are we solving a userspace
problem that doesn't exist?

Possibly, I'm a bit twitchy about unobservable/write-only internal state so added GET out of habit. However, no worries, removed it and this function is now much lighter.

+       if (ret != 1)
+               return ret;
+
+       if (copy_from_user(&db_attr, arg, sizeof(db_attr)))
+               return -EFAULT;
+
+       dmabuf = dma_buf_get(db_attr.dmabuf_fd);
+       if (IS_ERR(dmabuf))
+               return PTR_ERR(dmabuf);
+
+       /* Verify DMABUF: see comments in vfio_pci_dma_buf_revoke() */
+       priv = dmabuf->priv;
+       if (dmabuf->ops != &vfio_pci_dmabuf_ops || priv->vdev != vdev) {
+               ret = -ENODEV;
+               goto out_put_buf;
+       }
+
+       ret = 0;
+       scoped_guard(rwsem_write, &vdev->memory_lock) {

Why?  This doesn't serialize against mmap.  Just use a WRITE_ONCE() to
match the READ_ONCE() on mmap?

Ooops, good point.  Fixed.

+               uint32_t old_attr = priv->memattr;
+
+               if (flags & VFIO_DEVICE_FEATURE_SET) {
+                       switch(db_attr.memattr) {
+                       case VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_NC:
+                       case VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC:
+                               priv->memattr = db_attr.memattr;
+                               break;
+
+                       default:
+                               ret = -ENOTSUPP;

-EINVAL

I'd like to push back on this one; ENOTSUPP distinguishes trying to use a wild attribute value from all manner of other screwups such as trying to GET (with it no longer supported), passing an fd that isn't a DMABUF, etc.

I'm not wed to the value ENOTSUPP, just something distinctive versus the other errors userspace might see, and that value seems appropriate.

+                       }
+               }
+               db_attr.memattr = old_attr;
+       }
+
+       if (!ret && (flags & VFIO_DEVICE_FEATURE_GET)) {
+               if (copy_to_user(arg, &db_attr, sizeof(db_attr)))
+                       ret = -EFAULT;
+       }
+
+ out_put_buf:
+       dma_buf_put(dmabuf);
+
+       return ret;
+
+}
  #endif /* CONFIG_VFIO_PCI_DMABUF */
diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
index a1e0f4fcb1dc..8067be45beb0 100644
--- a/drivers/vfio/pci/vfio_pci_priv.h
+++ b/drivers/vfio/pci/vfio_pci_priv.h
@@ -41,6 +41,7 @@ struct vfio_pci_dma_buf {
        struct kref kref;
        struct completion comp;
        unsigned long vma_pgoff_adjust;
+       u32 memattr;
        enum vfio_pci_dma_buf_status status;
  };
@@ -154,6 +155,10 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked);
  int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 
flags,
                                  struct vfio_device_feature_dma_buf __user 
*arg,
                                  size_t argsz);
+int vfio_pci_core_feature_dma_buf_memattr(
+       struct vfio_pci_core_device *vdev, u32 flags,
+       struct vfio_device_feature_dma_buf_memattr __user *arg,
+       size_t argsz);
  int vfio_pci_dma_buf_revoke(struct vfio_pci_core_device *vdev, int dmabuf_fd);
  #else
  static inline int
@@ -163,6 +168,13 @@ vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device 
*vdev, u32 flags,
  {
        return -ENOTTY;
  }
+static inline int vfio_pci_core_feature_dma_buf_memattr(
+       struct vfio_pci_core_device *vdev, u32 flags,
+       struct vfio_device_feature_dma_buf_memattr __user *arg,
+       size_t argsz)
+{
+       return -ENODEV;

-ENOTTY

Gotcha, fixed. Yes, that's quite conspicuous versus the chunk above, unsure where I got that from.


Thank you,


Matt



Thanks,
Alex

+}
  static inline int vfio_pci_dma_buf_revoke(struct vfio_pci_core_device *vdev,
                                          int dmabuf_fd)
  {
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 02366e9f8e16..9b0b68f8a1ef 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1564,6 +1564,33 @@ struct vfio_device_feature_dma_buf {
   */
  #define VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2  12
+/**
+ * Given a dma_buf fd previously created by
+ * VFIO_DEVICE_FEATURE_DMA_BUF, GET or SET the memory attribute that
+ * will be used by future mmap()s of that fd.  SETting a new attribute
+ * does not affect existing VMAs.
+ *
+ * The default, if no previous SET has been performed, is NC.
+ *
+ * Return: 0 on success, -1 and errno is set on failure:
+ *
+ *  ENOTSUPP: The given memattr is not supported.
+ *  EBADF, EINVAL: dmabuf_fd is not a DMABUF fd.
+ *  ENODEV: The dmabuf_fd does not match this VFIO device.
+ */
+#define VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR 13
+
+/* Valid memory attributes for the memattr field */
+enum vfio_device_dma_buf_memattr {
+       VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_NC = 0, /* pgprot_noncached */
+       VFIO_DEVICE_FEATURE_DMA_BUF_MEMATTR_WC = 1, /* pgprot_writecombine */
+};
+
+struct vfio_device_feature_dma_buf_memattr {
+       __s32   dmabuf_fd;
+       __u32   memattr;
+};
+
  /* -------- API for Type1 VFIO IOMMU -------- */
/**


Reply via email to