On Thu, 3 Nov 2022 18:16:10 +0200
Avihai Horon <avih...@nvidia.com> wrote:

> Currently, if IOMMU of a VFIO container doesn't support dirty page
> tracking, migration is blocked. This is because a DMA-able VFIO device
> can dirty RAM pages without updating QEMU about it, thus breaking the
> migration.
> 
> However, this doesn't mean that migration can't be done at all.
> In such case, allow migration and let QEMU VFIO code mark the entire
> bitmap dirty.
> 
> This guarantees that all pages that might have gotten dirty are reported
> back, and thus guarantees a valid migration even without VFIO IOMMU
> dirty tracking support.
> 
> The motivation for this patch is the future introduction of iommufd [1].
> iommufd will directly implement the /dev/vfio/vfio container IOCTLs by
> mapping them into its internal ops, allowing the usage of these IOCTLs
> over iommufd. However, VFIO IOMMU dirty tracking will not be supported
> by this VFIO compatibility API.
> 
> This patch will allow migration by hosts that use the VFIO compatibility
> API and prevent migration regressions caused by the lack of VFIO IOMMU
> dirty tracking support.
> 
> [1] https://lore.kernel.org/kvm/0-v2-f9436d0bde78+4bb-iommufd_...@nvidia.com/
> 
> Signed-off-by: Avihai Horon <avih...@nvidia.com>
> ---
>  hw/vfio/common.c    | 84 +++++++++++++++++++++++++++++++++++++--------
>  hw/vfio/migration.c |  3 +-
>  2 files changed, 70 insertions(+), 17 deletions(-)

This duplicates quite a bit of code, I think we can integrate this into
a common flow quite a bit more.  See below, only compile tested. Thanks,

Alex

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 6b5d8c0bf694..4117b40fd9b0 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -397,17 +397,33 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
                                  IOMMUTLBEntry *iotlb)
 {
     struct vfio_iommu_type1_dma_unmap *unmap;
-    struct vfio_bitmap *bitmap;
+    struct vfio_bitmap *vbitmap;
+    unsigned long *bitmap;
+    uint64_t bitmap_size;
     uint64_t pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size();
     int ret;
 
-    unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
+    unmap = g_malloc0(sizeof(*unmap) + sizeof(*vbitmap));
 
-    unmap->argsz = sizeof(*unmap) + sizeof(*bitmap);
+    unmap->argsz = sizeof(*unmap);
     unmap->iova = iova;
     unmap->size = size;
-    unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
-    bitmap = (struct vfio_bitmap *)&unmap->data;
+
+    bitmap_size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
+                  BITS_PER_BYTE;
+    bitmap = g_try_malloc0(bitmap_size);
+    if (!bitmap) {
+        ret = -ENOMEM;
+        goto unmap_exit;
+    }
+
+    if (!container->dirty_pages_supported) {
+        bitmap_set(bitmap, 0, pages);
+        goto do_unmap;
+    }
+
+    unmap->argsz += sizeof(*vbitmap);
+    unmap->flags = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
 
     /*
      * cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of
@@ -415,33 +431,28 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
      * to qemu_real_host_page_size.
      */
 
-    bitmap->pgsize = qemu_real_host_page_size();
-    bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
-                   BITS_PER_BYTE;
+    vbitmap = (struct vfio_bitmap *)&unmap->data;
+    vbitmap->data = (__u64 *)bitmap;
+    vbitmap->pgsize = qemu_real_host_page_size();
+    vbitmap->size = bitmap_size;
 
-    if (bitmap->size > container->max_dirty_bitmap_size) {
-        error_report("UNMAP: Size of bitmap too big 0x%"PRIx64,
-                     (uint64_t)bitmap->size);
+    if (bitmap_size > container->max_dirty_bitmap_size) {
+        error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, bitmap_size);
         ret = -E2BIG;
         goto unmap_exit;
     }
 
-    bitmap->data = g_try_malloc0(bitmap->size);
-    if (!bitmap->data) {
-        ret = -ENOMEM;
-        goto unmap_exit;
-    }
-
+do_unmap:
     ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
     if (!ret) {
-        cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data,
-                iotlb->translated_addr, pages);
+        cpu_physical_memory_set_dirty_lebitmap(bitmap, iotlb->translated_addr,
+                                               pages);
     } else {
         error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
     }
 
-    g_free(bitmap->data);
 unmap_exit:
+    g_free(bitmap);
     g_free(unmap);
     return ret;
 }
@@ -460,8 +471,7 @@ static int vfio_dma_unmap(VFIOContainer *container,
         .size = size,
     };
 
-    if (iotlb && container->dirty_pages_supported &&
-        vfio_devices_all_running_and_saving(container)) {
+    if (iotlb && vfio_devices_all_running_and_saving(container)) {
         return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
     }
 
@@ -1257,6 +1267,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer 
*container, bool start)
         .argsz = sizeof(dirty),
     };
 
+    if (!container->dirty_pages_supported) {
+        return;
+    }
+
     if (start) {
         dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
     } else {
@@ -1287,11 +1301,26 @@ static void 
vfio_listener_log_global_stop(MemoryListener *listener)
 static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
                                  uint64_t size, ram_addr_t ram_addr)
 {
-    struct vfio_iommu_type1_dirty_bitmap *dbitmap;
+    struct vfio_iommu_type1_dirty_bitmap *dbitmap = NULL;
     struct vfio_iommu_type1_dirty_bitmap_get *range;
+    unsigned long *bitmap;
+    uint64_t bitmap_size;
     uint64_t pages;
     int ret;
 
+    pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size();
+    bitmap_size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
+                           BITS_PER_BYTE;
+    bitmap = g_try_malloc0(bitmap_size);
+    if (!bitmap) {
+        return -ENOMEM;
+    }
+
+    if (!container->dirty_pages_supported) {
+        bitmap_set(bitmap, 0, pages);
+        goto set_dirty;
+    }
+
     dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
 
     dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
@@ -1306,15 +1335,8 @@ static int vfio_get_dirty_bitmap(VFIOContainer 
*container, uint64_t iova,
      * to qemu_real_host_page_size.
      */
     range->bitmap.pgsize = qemu_real_host_page_size();
-
-    pages = REAL_HOST_PAGE_ALIGN(range->size) / qemu_real_host_page_size();
-    range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
-                                         BITS_PER_BYTE;
-    range->bitmap.data = g_try_malloc0(range->bitmap.size);
-    if (!range->bitmap.data) {
-        ret = -ENOMEM;
-        goto err_out;
-    }
+    range->bitmap.size = bitmap_size;
+    range->bitmap.data = (__u64 *)bitmap;
 
     ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
     if (ret) {
@@ -1324,13 +1346,13 @@ static int vfio_get_dirty_bitmap(VFIOContainer 
*container, uint64_t iova,
         goto err_out;
     }
 
-    cpu_physical_memory_set_dirty_lebitmap((unsigned long *)range->bitmap.data,
-                                            ram_addr, pages);
+set_dirty:
+    cpu_physical_memory_set_dirty_lebitmap(bitmap, ram_addr, pages);
 
-    trace_vfio_get_dirty_bitmap(container->fd, range->iova, range->size,
-                                range->bitmap.size, ram_addr);
+    trace_vfio_get_dirty_bitmap(container->fd, iova, size,
+                                bitmap_size, ram_addr);
 err_out:
-    g_free(range->bitmap.data);
+    g_free(bitmap);
     g_free(dbitmap);
 
     return ret;
@@ -1465,8 +1487,7 @@ static void vfio_listener_log_sync(MemoryListener 
*listener,
 {
     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
 
-    if (vfio_listener_skipped_section(section) ||
-        !container->dirty_pages_supported) {
+    if (vfio_listener_skipped_section(section)) {
         return;
     }
 
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index f5e72c7ac198..99ffb7578290 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -857,11 +857,10 @@ int64_t vfio_mig_bytes_transferred(void)
 
 int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
 {
-    VFIOContainer *container = vbasedev->group->container;
     struct vfio_region_info *info = NULL;
     int ret = -ENOTSUP;
 
-    if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
+    if (!vbasedev->enable_migration) {
         goto add_blocker;
     }
 


Reply via email to