On Fri, Jan 30, 2026 at 09:30:59AM +0100, Christian König wrote:
> On 1/24/26 20:14, Leon Romanovsky wrote:
> > From: Leon Romanovsky <[email protected]>
> > 
> > dma-buf invalidation is handled asynchronously by the hardware, so VFIO
> > must wait until all affected objects have been fully invalidated.
> > 
> > In addition, the dma-buf exporter is expecting that all importers unmap any
> > buffers they previously mapped.
> > 
> > Fixes: 5d74781ebc86 ("vfio/pci: Add dma-buf export support for MMIO 
> > regions")
> > Signed-off-by: Leon Romanovsky <[email protected]>
> > ---
> >  drivers/vfio/pci/vfio_pci_dmabuf.c | 71 
> > ++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 68 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c 
> > b/drivers/vfio/pci/vfio_pci_dmabuf.c
> > index d8ceafabef48..485515629fe4 100644
> > --- a/drivers/vfio/pci/vfio_pci_dmabuf.c
> > +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
> > @@ -17,6 +17,8 @@ struct vfio_pci_dma_buf {
> >     struct dma_buf_phys_vec *phys_vec;
> >     struct p2pdma_provider *provider;
> >     u32 nr_ranges;
> > +   struct kref kref;
> > +   struct completion comp;
> >     u8 revoked : 1;
> >  };
> >  
> > @@ -44,27 +46,46 @@ static int vfio_pci_dma_buf_attach(struct dma_buf 
> > *dmabuf,
> >     return 0;
> >  }
> >  
> > +static void vfio_pci_dma_buf_done(struct kref *kref)
> > +{
> > +   struct vfio_pci_dma_buf *priv =
> > +           container_of(kref, struct vfio_pci_dma_buf, kref);
> > +
> > +   complete(&priv->comp);
> > +}
> > +
> >  static struct sg_table *
> >  vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment,
> >                  enum dma_data_direction dir)
> >  {
> >     struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
> > +   struct sg_table *ret;
> >  
> >     dma_resv_assert_held(priv->dmabuf->resv);
> >  
> >     if (priv->revoked)
> >             return ERR_PTR(-ENODEV);
> >  
> > -   return dma_buf_phys_vec_to_sgt(attachment, priv->provider,
> > -                                  priv->phys_vec, priv->nr_ranges,
> > -                                  priv->size, dir);
> > +   ret = dma_buf_phys_vec_to_sgt(attachment, priv->provider,
> > +                                 priv->phys_vec, priv->nr_ranges,
> > +                                 priv->size, dir);
> > +   if (IS_ERR(ret))
> > +           return ret;
> > +
> > +   kref_get(&priv->kref);
> > +   return ret;
> >  }
> >  
> >  static void vfio_pci_dma_buf_unmap(struct dma_buf_attachment *attachment,
> >                                struct sg_table *sgt,
> >                                enum dma_data_direction dir)
> >  {
> > +   struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
> > +
> > +   dma_resv_assert_held(priv->dmabuf->resv);
> > +
> >     dma_buf_free_sgt(attachment, sgt, dir);
> > +   kref_put(&priv->kref, vfio_pci_dma_buf_done);
> >  }
> >  
> >  static void vfio_pci_dma_buf_release(struct dma_buf *dmabuf)
> > @@ -287,6 +308,9 @@ int vfio_pci_core_feature_dma_buf(struct 
> > vfio_pci_core_device *vdev, u32 flags,
> >             goto err_dev_put;
> >     }
> >  
> > +   kref_init(&priv->kref);
> > +   init_completion(&priv->comp);
> > +
> >     /* dma_buf_put() now frees priv */
> >     INIT_LIST_HEAD(&priv->dmabufs_elm);
> >     down_write(&vdev->memory_lock);
> > @@ -326,6 +350,8 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device 
> > *vdev, bool revoked)
> >     lockdep_assert_held_write(&vdev->memory_lock);
> >  
> >     list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) {
> > +           unsigned long wait;
> > +
> >             if (!get_file_active(&priv->dmabuf->file))
> >                     continue;
> >  
> > @@ -333,7 +359,37 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device 
> > *vdev, bool revoked)
> >                     dma_resv_lock(priv->dmabuf->resv, NULL);
> >                     priv->revoked = revoked;
> >                     dma_buf_invalidate_mappings(priv->dmabuf);
> > +                   dma_resv_wait_timeout(priv->dmabuf->resv,
> > +                                         DMA_RESV_USAGE_BOOKKEEP, false,
> > +                                         MAX_SCHEDULE_TIMEOUT);
> >                     dma_resv_unlock(priv->dmabuf->resv);
> > +                   if (revoked) {
> > +                           kref_put(&priv->kref, vfio_pci_dma_buf_done);
> > +                           /* Let's wait till all DMA unmap are completed. 
> > */
> > +                           wait = wait_for_completion_timeout(
> > +                                   &priv->comp, secs_to_jiffies(1));
> > +                           /*
> > +                            * If you see this WARN_ON, it means that
> > +                            * importer didn't call unmap in response to
> > +                            * dma_buf_invalidate_mappings() which is not
> > +                            * allowed.
> > +                            */
> > +                           WARN(!wait,
> > +                                "Timed out waiting for DMABUF unmap, 
> > importer has a broken invalidate_mapping()");
> 
> You can do the revoke to do your resource management, for example re-use the 
> backing store for something else.
> 
> But it is mandatory that you keep the mapping around indefinitely until the 
> importer closes it.
> 
> Before that you can't do things like runtime PM or remove or anything which 
> would make the DMA addresses invalid.
> 
> As far as I can see vfio_pci_dma_buf_move() is used exactly for that use case 
> so this here is an absolutely clear NAK from my side for this approach.
> 
> You can either split up the functionality of vfio_pci_dma_buf_move() into 
> vfio_pci_dma_buf_invalidate_mappings() and vfio_pci_dma_buf_flush() and then 
> call the later whenever necessary or you keep it in one function and block 
> everybody until the importer has dropped all mappings.

No problem, I can change it to be:

diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c 
b/drivers/vfio/pci/vfio_pci_dmabuf.c
index d087d018d547..53772a84c93b 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -357,23 +357,7 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device 
*vdev, bool revoked)
                        dma_resv_unlock(priv->dmabuf->resv);
                        if (revoked) {
                                kref_put(&priv->kref, vfio_pci_dma_buf_done);
-                               /*
-                                * Let's wait for 1 second till all DMA unmap
-                                * are completed. It is supposed to catch 
dma-buf
-                                * importers which lied about their support
-                                * of dmabuf revoke. See 
dma_buf_invalidate_mappings()
-                                * for the expected behaviour.
-                                */
-                               wait = wait_for_completion_timeout(
-                                       &priv->comp, secs_to_jiffies(1));
-                               /*
-                                * If you see this WARN_ON, it means that
-                                * importer didn't call unmap in response to
-                                * dma_buf_invalidate_mappings() which is not
-                                * allowed.
-                                */
-                               WARN(!wait,
-                                    "Timed out waiting for DMABUF unmap, 
importer has a broken invalidate_mapping()");
+                               wait_for_completion(&priv->comp);
                        } else {
                                /*
                                 * Kref is initialize again, because when revoke

Do you want me to send v6?

Thanks

> 
> > +                   } else {
> > +                           /*
> > +                            * Kref is initialize again, because when revoke
> > +                            * was performed the reference counter was 
> > decreased
> > +                            * to zero to trigger completion.
> > +                            */
> > +                           kref_init(&priv->kref);
> > +                           /*
> > +                            * There is no need to wait as no mapping was
> > +                            * performed when the previous status was
> > +                            * priv->revoked == true.
> > +                            */
> > +                           reinit_completion(&priv->comp);
> > +                   }
> >             }
> >             fput(priv->dmabuf->file);
> 
> This is also extremely questionable. Why doesn't the dmabuf have a reference 
> while on the linked list?
> 
> Regards,
> Christian.
> 
> >     }
> > @@ -346,6 +402,8 @@ void vfio_pci_dma_buf_cleanup(struct 
> > vfio_pci_core_device *vdev)
> >  
> >     down_write(&vdev->memory_lock);
> >     list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) {
> > +           unsigned long wait;
> > +
> >             if (!get_file_active(&priv->dmabuf->file))
> >                     continue;
> >  
> > @@ -354,7 +412,14 @@ void vfio_pci_dma_buf_cleanup(struct 
> > vfio_pci_core_device *vdev)
> >             priv->vdev = NULL;
> >             priv->revoked = true;
> >             dma_buf_invalidate_mappings(priv->dmabuf);
> > +           dma_resv_wait_timeout(priv->dmabuf->resv,
> > +                                 DMA_RESV_USAGE_BOOKKEEP, false,
> > +                                 MAX_SCHEDULE_TIMEOUT);
> >             dma_resv_unlock(priv->dmabuf->resv);
> > +           kref_put(&priv->kref, vfio_pci_dma_buf_done);
> > +           wait = wait_for_completion_timeout(&priv->comp,
> > +                                              secs_to_jiffies(1));
> > +           WARN_ON(!wait);
> >             vfio_device_put_registration(&vdev->vdev);
> >             fput(priv->dmabuf->file);
> >     }
> > 
> 
> 

Reply via email to