Expand the VFIO DMABUF revocation state to three states:
Not revoked, temporarily revoked, and permanently revoked.

The first two are for existing transient revocation, e.g. across a
function reset, and the DMABUF is put into the last in response to a
new VFIO feature VFIO_DEVICE_FEATURE_DMA_BUF.

VFIO_DEVICE_FEATURE_DMA_BUF passes a DMABUF by fd and requests that
the DMABUF is permanently revoked.  On success, it's guaranteed that
the buffer can never be imported/attached/mmap()ed in future, that
dynamic imports have been cleanly detached, and that all mappings have
been made inaccessible/PTEs zapped.

This is useful for lifecycle management, to reclaim VFIO PCI BAR
ranges previously delegated to a subordinate client process: The
driver process can ensure that the loaned resources are revoked when
the client is deemed "done", and exported ranges can be safely re-used
elsewhere.

Refactor the revocation code out of vfio_pci_dma_buf_move() to a
function common to move and the new feature request path.

Signed-off-by: Matt Evans <[email protected]>
---
 drivers/vfio/pci/vfio_pci_core.c   |   6 +-
 drivers/vfio/pci/vfio_pci_dmabuf.c | 169 ++++++++++++++++++++++-------
 drivers/vfio/pci/vfio_pci_priv.h   |  19 +++-
 include/uapi/linux/vfio.h          |  20 ++++
 4 files changed, 173 insertions(+), 41 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 508a5eca910a..064906b25467 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1573,6 +1573,8 @@ int vfio_pci_core_ioctl_feature(struct vfio_device 
*device, u32 flags,
                return vfio_pci_core_feature_token(vdev, flags, arg, argsz);
        case VFIO_DEVICE_FEATURE_DMA_BUF:
                return vfio_pci_core_feature_dma_buf(vdev, flags, arg, argsz);
+       case VFIO_DEVICE_FEATURE_DMA_BUF_REVOKE:
+               return vfio_pci_core_feature_dma_buf_revoke(vdev, flags, arg, 
argsz);
        default:
                return -ENOTTY;
        }
@@ -1784,7 +1786,7 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct 
vm_fault *vmf,
 
        dma_resv_lock(priv->dmabuf->resv, NULL);
 
-       if (priv->revoked) {
+       if (priv->status != VFIO_PCI_DMABUF_OK) {
                pr_debug_ratelimited("%s VA 0x%lx, pgoff 0x%lx: DMABUF 
revoked/cleaned up\n",
                                     __func__, vmf->address, vma->vm_pgoff);
                dma_resv_unlock(priv->dmabuf->resv);
@@ -1809,7 +1811,7 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct 
vm_fault *vmf,
 
        scoped_guard(rwsem_read, &vdev->memory_lock) {
                /* Revocation status must be re-read, under memory_lock */
-               if (!priv->revoked) {
+               if (priv->status == VFIO_PCI_DMABUF_OK) {
                        int pres = vfio_pci_dma_buf_find_pfn(priv, vma,
                                                             vmf->address,
                                                             order, &pfn);
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c 
b/drivers/vfio/pci/vfio_pci_dmabuf.c
index 2fb09a2c0f6b..b47411992ab6 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -19,7 +19,7 @@ static int vfio_pci_dma_buf_attach(struct dma_buf *dmabuf,
        if (!attachment->peer2peer)
                return -EOPNOTSUPP;
 
-       if (priv->revoked)
+       if (priv->status != VFIO_PCI_DMABUF_OK)
                return -ENODEV;
 
        if (!dma_buf_attach_revocable(attachment))
@@ -41,7 +41,7 @@ static int vfio_pci_dma_buf_mmap(struct dma_buf *dmabuf, 
struct vm_area_struct *
         * still safe because the fault handler ultimately prevents
         * access to a revoked buffer if it isn't caught here.
         */
-       if (READ_ONCE(priv->revoked))
+       if (READ_ONCE(priv->status) != VFIO_PCI_DMABUF_OK)
                return -ENODEV;
        if ((vma->vm_flags & VM_SHARED) == 0)
                return -EINVAL;
@@ -81,7 +81,7 @@ vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment,
 
        dma_resv_assert_held(priv->dmabuf->resv);
 
-       if (priv->revoked)
+       if (priv->status != VFIO_PCI_DMABUF_OK)
                return ERR_PTR(-ENODEV);
 
        ret = dma_buf_phys_vec_to_sgt(attachment, priv->provider,
@@ -291,7 +291,8 @@ static int vfio_pci_dmabuf_export(struct 
vfio_pci_core_device *vdev,
        INIT_LIST_HEAD(&priv->dmabufs_elm);
        down_write(&vdev->memory_lock);
        dma_resv_lock(priv->dmabuf->resv, NULL);
-       priv->revoked = !__vfio_pci_memory_enabled(vdev);
+       priv->status = __vfio_pci_memory_enabled(vdev) ? VFIO_PCI_DMABUF_OK :
+               VFIO_PCI_DMABUF_TEMP_REVOKED;
        list_add_tail(&priv->dmabufs_elm, &vdev->dmabufs);
        dma_resv_unlock(priv->dmabuf->resv);
        up_write(&vdev->memory_lock);
@@ -322,7 +323,7 @@ int vfio_pci_dma_buf_iommufd_map(struct dma_buf_attachment 
*attachment,
                return -EOPNOTSUPP;
 
        priv = attachment->dmabuf->priv;
-       if (priv->revoked)
+       if (priv->status != VFIO_PCI_DMABUF_OK)
                return -ENODEV;
 
        /* More than one range to iommufd will require proper DMABUF support */
@@ -591,6 +592,64 @@ int vfio_pci_core_mmap_prep_dmabuf(struct 
vfio_pci_core_device *vdev,
        return ret;
 }
 
+/* Set the DMABUF's revocation status (OK or temporarily/permanently revoked) 
*/
+static void vfio_pci_dma_buf_set_status(struct vfio_pci_dma_buf *priv,
+                                       enum vfio_pci_dma_buf_status new_status)
+{
+       bool was_revoked;
+
+       lockdep_assert_held_write(&priv->vdev->memory_lock);
+
+       if (priv->status == VFIO_PCI_DMABUF_PERM_REVOKED ||
+           priv->status == new_status) {
+               return;
+       }
+
+       dma_resv_lock(priv->dmabuf->resv, NULL);
+       was_revoked = (priv->status == VFIO_PCI_DMABUF_TEMP_REVOKED);
+
+       if (new_status != VFIO_PCI_DMABUF_OK) {
+               priv->status = new_status; /* Temp or permanently revoked */
+
+               if (was_revoked) {
+                       /*
+                        * TEMP_REVOKED is being upgraded to
+                        * PERM_REVOKED.  The buffer is already gone,
+                        * don't wait on it again.
+                        */
+                       dma_resv_unlock(priv->dmabuf->resv);
+                       return;
+               }
+       }
+
+       dma_buf_invalidate_mappings(priv->dmabuf);
+       dma_resv_wait_timeout(priv->dmabuf->resv,
+                             DMA_RESV_USAGE_BOOKKEEP, false,
+                             MAX_SCHEDULE_TIMEOUT);
+       dma_resv_unlock(priv->dmabuf->resv);
+       if (new_status != VFIO_PCI_DMABUF_OK) {
+               kref_put(&priv->kref, vfio_pci_dma_buf_done);
+               wait_for_completion(&priv->comp);
+               unmap_mapping_range(priv->dmabuf->file->f_mapping,
+                                   0, priv->size, 1);
+               /*
+                * Re-arm the registered kref reference and the
+                * completion so the post-revoke state matches the
+                * post-creation state.  An un-revoke followed by a
+                * new mapping needs the kref to be non-zero before
+                * kref_get(), and vfio_pci_dma_buf_cleanup()
+                * delegates its drain back through this revoke
+                * path on a possibly-already-revoked dma-buf.
+                */
+               kref_init(&priv->kref);
+               reinit_completion(&priv->comp);
+       } else {
+               dma_resv_lock(priv->dmabuf->resv, NULL);
+               priv->status = VFIO_PCI_DMABUF_OK;
+               dma_resv_unlock(priv->dmabuf->resv);
+       }
+}
+
 void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
 {
        struct vfio_pci_dma_buf *priv;
@@ -599,44 +658,15 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device 
*vdev, bool revoked)
        lockdep_assert_held_write(&vdev->memory_lock);
        /*
         * Holding memory_lock ensures a racing VMA fault observes
-        * priv->revoked properly.
+        * priv->status properly.
         */
 
        list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) {
                if (!get_file_active(&priv->dmabuf->file))
                        continue;
-
-               if (priv->revoked != revoked) {
-                       dma_resv_lock(priv->dmabuf->resv, NULL);
-                       if (revoked)
-                               priv->revoked = true;
-                       dma_buf_invalidate_mappings(priv->dmabuf);
-                       dma_resv_wait_timeout(priv->dmabuf->resv,
-                                             DMA_RESV_USAGE_BOOKKEEP, false,
-                                             MAX_SCHEDULE_TIMEOUT);
-                       dma_resv_unlock(priv->dmabuf->resv);
-                       if (revoked) {
-                               kref_put(&priv->kref, vfio_pci_dma_buf_done);
-                               wait_for_completion(&priv->comp);
-                               
unmap_mapping_range(priv->dmabuf->file->f_mapping,
-                                                   0, priv->size, 1);
-                               /*
-                                * Re-arm the registered kref reference and the
-                                * completion so the post-revoke state matches 
the
-                                * post-creation state.  An un-revoke followed 
by a
-                                * new mapping needs the kref to be non-zero 
before
-                                * kref_get(), and vfio_pci_dma_buf_cleanup()
-                                * delegates its drain back through this revoke
-                                * path on a possibly-already-revoked dma-buf.
-                                */
-                               kref_init(&priv->kref);
-                               reinit_completion(&priv->comp);
-                       } else {
-                               dma_resv_lock(priv->dmabuf->resv, NULL);
-                               priv->revoked = false;
-                               dma_resv_unlock(priv->dmabuf->resv);
-                       }
-               }
+               vfio_pci_dma_buf_set_status(priv, revoked ?
+                                           VFIO_PCI_DMABUF_TEMP_REVOKED :
+                                           VFIO_PCI_DMABUF_OK);
                fput(priv->dmabuf->file);
        }
 }
@@ -668,3 +698,66 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device 
*vdev)
        }
        up_write(&vdev->memory_lock);
 }
+
+#ifdef CONFIG_VFIO_PCI_DMABUF
+int vfio_pci_core_feature_dma_buf_revoke(
+       struct vfio_pci_core_device *vdev, u32 flags,
+       struct vfio_device_feature_dma_buf_revoke __user *arg,
+       size_t argsz)
+{
+       struct vfio_device_feature_dma_buf_revoke db_revoke;
+       struct vfio_pci_dma_buf *priv;
+       struct dma_buf *dmabuf;
+       int ret;
+
+       if (!vdev->pci_ops || !vdev->pci_ops->get_dmabuf_phys)
+               return -EOPNOTSUPP;
+
+       ret = vfio_check_feature(flags, argsz,
+                                VFIO_DEVICE_FEATURE_SET,
+                                sizeof(db_revoke));
+       if (ret != 1)
+               return ret;
+
+       if (copy_from_user(&db_revoke, arg, sizeof(db_revoke)))
+               return -EFAULT;
+
+       dmabuf = dma_buf_get(db_revoke.dmabuf_fd);
+       if (IS_ERR(dmabuf))
+               return PTR_ERR(dmabuf);
+
+       priv = dmabuf->priv;
+       /*
+        * Sanity-check the DMABUF is really a vfio_pci_dma_buf _and_
+        * relates to the VFIO device it was provided with.
+        *
+        * If the DMABUF relates to this vdev then priv->vdev is
+        * stable because this open fd prevents cleanup.
+        *
+        * If it relates to a different vdev, reading priv->vdev might
+        * race with a concurrent cleanup on that device.  But if so,
+        * it points to a non-matching vdev or NULL and is unusable
+        * either way.
+        */
+       if (dmabuf->ops != &vfio_pci_dmabuf_ops ||
+           READ_ONCE(priv->vdev) != vdev) {
+               ret = -ENODEV;
+               goto out_put_buf;
+       }
+
+       scoped_guard(rwsem_write, &vdev->memory_lock) {
+               if (priv->status == VFIO_PCI_DMABUF_PERM_REVOKED) {
+                       ret = -EBADFD;
+               } else {
+                       vfio_pci_dma_buf_set_status(priv,
+                                                   
VFIO_PCI_DMABUF_PERM_REVOKED);
+                       ret = 0;
+               }
+       }
+
+out_put_buf:
+       dma_buf_put(dmabuf);
+
+       return ret;
+}
+#endif /* CONFIG_VFIO_PCI_DMABUF */
diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
index db2e2aeae88f..3c2f2575b670 100644
--- a/drivers/vfio/pci/vfio_pci_priv.h
+++ b/drivers/vfio/pci/vfio_pci_priv.h
@@ -23,6 +23,12 @@ struct vfio_pci_ioeventfd {
        bool                    test_mem;
 };
 
+enum vfio_pci_dma_buf_status {
+       VFIO_PCI_DMABUF_OK = 0,
+       VFIO_PCI_DMABUF_TEMP_REVOKED = 1,
+       VFIO_PCI_DMABUF_PERM_REVOKED = 2,
+};
+
 struct vfio_pci_dma_buf {
        struct dma_buf *dmabuf;
        struct vfio_pci_core_device *vdev;
@@ -35,7 +41,7 @@ struct vfio_pci_dma_buf {
        struct kref kref;
        struct completion comp;
        unsigned long vma_pgoff_adjust;
-       u8 revoked : 1;
+       enum vfio_pci_dma_buf_status status;
 };
 
 extern const struct vm_operations_struct vfio_pci_mmap_ops;
@@ -148,6 +154,10 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device 
*vdev, bool revoked);
 int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
                                  struct vfio_device_feature_dma_buf __user 
*arg,
                                  size_t argsz);
+int vfio_pci_core_feature_dma_buf_revoke(
+       struct vfio_pci_core_device *vdev, u32 flags,
+       struct vfio_device_feature_dma_buf_revoke __user *arg,
+       size_t argsz);
 #else
 static inline int
 vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
@@ -156,6 +166,13 @@ vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device 
*vdev, u32 flags,
 {
        return -ENOTTY;
 }
+static inline int vfio_pci_core_feature_dma_buf_revoke(
+       struct vfio_pci_core_device *vdev, u32 flags,
+       struct vfio_device_feature_dma_buf_revoke __user *arg,
+       size_t argsz)
+{
+       return -ENOTTY;
+}
 #endif
 
 #endif
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 5de618a3a5ee..697c0bb4b9bc 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1534,6 +1534,26 @@ struct vfio_device_feature_dma_buf {
  */
 #define VFIO_DEVICE_FEATURE_MIG_PRECOPY_INFOv2  12
 
+/**
+ * Given a dma_buf fd previously created by
+ * VFIO_DEVICE_FEATURE_DMA_BUF, a SET of this feature requests that
+ * access to the corresponding DMABUF is immediately and permanently
+ * revoked.  On successful return, the buffer is not accessible
+ * through any mmap() or dma-buf import.  The buffer is permanently
+ * disabled, and VFIO refuses all map, mmap, attach, etc. requests.
+ *
+ * Return: 0 on success, -1 and errno is set on failure:
+ *
+ *  EBADF, EINVAL: dmabuf_fd is not a DMABUF fd.
+ *  ENODEV: The dmabuf_fd does not match this VFIO device.
+ *  EBADFD: The DMABUF is already revoked.
+ */
+#define VFIO_DEVICE_FEATURE_DMA_BUF_REVOKE 13
+
+struct vfio_device_feature_dma_buf_revoke {
+       __s32   dmabuf_fd;
+};
+
 /* -------- API for Type1 VFIO IOMMU -------- */
 
 /**
-- 
2.50.1 (Apple Git-155)

Reply via email to