Re: [PATCH v8 04/13] vfio/migration: Allow migration without VFIO IOMMU dirty tracking support

2023-01-22 Thread Avihai Horon



On 21/01/2023 1:06, Alex Williamson wrote:

External email: Use caution opening links or attachments


On Mon, 16 Jan 2023 16:11:26 +0200
Avihai Horon  wrote:


Currently, if IOMMU of a VFIO container doesn't support dirty page
tracking, migration is blocked. This is because a DMA-able VFIO device
can dirty RAM pages without updating QEMU about it, thus breaking the
migration.

However, this doesn't mean that migration can't be done at all.
In such case, allow migration and let QEMU VFIO code mark all pages
dirty.

This guarantees that all pages that might have gotten dirty are reported
back, and thus guarantees a valid migration even without VFIO IOMMU
dirty tracking support.

The motivation for this patch is the introduction of iommufd [1].
iommufd can directly implement the /dev/vfio/vfio container IOCTLs by
mapping them into its internal ops, allowing the usage of these IOCTLs
over iommufd. However, VFIO IOMMU dirty tracking is not supported by
this VFIO compatibility API.

This patch will allow migration by hosts that use the VFIO compatibility
API and prevent migration regressions caused by the lack of VFIO IOMMU
dirty tracking support.

[1]
https://lore.kernel.org/kvm/0-v6-a196d26f289e+11787-iommufd_...@nvidia.com/

Signed-off-by: Avihai Horon 
---
  hw/vfio/common.c| 20 ++--
  hw/vfio/migration.c |  3 +--
  2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 130e5d1dc7..f6dd571549 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -488,6 +488,12 @@ static int vfio_dma_unmap(VFIOContainer *container,
  return -errno;
  }

+if (iotlb && vfio_devices_all_running_and_saving(container)) {
+cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size,
+tcg_enabled() ? DIRTY_CLIENTS_ALL :
+DIRTY_CLIENTS_NOCODE);

I take it this is an attempt to decipher the mask arg based on its use
in cpu_physical_memory_set_dirty_lebitmap().


Right.


I'm attempting to do the
same.  It seems like it must logically be the case that
global_dirty_tracking is set to pass the running-and-saving test, but I
can't connect the pieces.  Is this your understanding as well and the
reason we don't also need to optionally exclude DIRTY_MEMORY_MIGRATION?


Yes, this is how I understood it.
Running-and-saving test is passed only if migration has started, and if 
migration has started, global_dirty_tracking is set.


So global_dirty_tracking should be set and we don't need to check for 
DIRTY_MEMORY_MIGRATION exclusion.



Thanks,

Alex


+}
+
  return 0;
  }

@@ -1201,6 +1207,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer 
*container, bool start)
  .argsz = sizeof(dirty),
  };

+if (!container->dirty_pages_supported) {
+return;
+}
+
  if (start) {
  dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
  } else {
@@ -1236,6 +1246,13 @@ static int vfio_get_dirty_bitmap(VFIOContainer 
*container, uint64_t iova,
  uint64_t pages;
  int ret;

+if (!container->dirty_pages_supported) {
+cpu_physical_memory_set_dirty_range(ram_addr, size,
+tcg_enabled() ? DIRTY_CLIENTS_ALL :
+DIRTY_CLIENTS_NOCODE);
+return 0;
+}
+
  dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));

  dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
@@ -1409,8 +1426,7 @@ static void vfio_listener_log_sync(MemoryListener 
*listener,
  {
  VFIOContainer *container = container_of(listener, VFIOContainer, 
listener);

-if (vfio_listener_skipped_section(section) ||
-!container->dirty_pages_supported) {
+if (vfio_listener_skipped_section(section)) {
  return;
  }

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 09fe7c1de2..552c2313b2 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -860,11 +860,10 @@ int64_t vfio_mig_bytes_transferred(void)

  int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
  {
-VFIOContainer *container = vbasedev->group->container;
  struct vfio_region_info *info = NULL;
  int ret = -ENOTSUP;

-if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
+if (!vbasedev->enable_migration) {
  goto add_blocker;
  }





Re: [PATCH v8 04/13] vfio/migration: Allow migration without VFIO IOMMU dirty tracking support

2023-01-20 Thread Alex Williamson
On Mon, 16 Jan 2023 16:11:26 +0200
Avihai Horon  wrote:

> Currently, if IOMMU of a VFIO container doesn't support dirty page
> tracking, migration is blocked. This is because a DMA-able VFIO device
> can dirty RAM pages without updating QEMU about it, thus breaking the
> migration.
> 
> However, this doesn't mean that migration can't be done at all.
> In such case, allow migration and let QEMU VFIO code mark all pages
> dirty.
> 
> This guarantees that all pages that might have gotten dirty are reported
> back, and thus guarantees a valid migration even without VFIO IOMMU
> dirty tracking support.
> 
> The motivation for this patch is the introduction of iommufd [1].
> iommufd can directly implement the /dev/vfio/vfio container IOCTLs by
> mapping them into its internal ops, allowing the usage of these IOCTLs
> over iommufd. However, VFIO IOMMU dirty tracking is not supported by
> this VFIO compatibility API.
> 
> This patch will allow migration by hosts that use the VFIO compatibility
> API and prevent migration regressions caused by the lack of VFIO IOMMU
> dirty tracking support.
> 
> [1]
> https://lore.kernel.org/kvm/0-v6-a196d26f289e+11787-iommufd_...@nvidia.com/
> 
> Signed-off-by: Avihai Horon 
> ---
>  hw/vfio/common.c| 20 ++--
>  hw/vfio/migration.c |  3 +--
>  2 files changed, 19 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 130e5d1dc7..f6dd571549 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -488,6 +488,12 @@ static int vfio_dma_unmap(VFIOContainer *container,
>  return -errno;
>  }
>  
> +if (iotlb && vfio_devices_all_running_and_saving(container)) {
> +cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size,
> +tcg_enabled() ? 
> DIRTY_CLIENTS_ALL :
> +DIRTY_CLIENTS_NOCODE);

I take it this is an attempt to decipher the mask arg based on its use
in cpu_physical_memory_set_dirty_lebitmap().  I'm attempting to do the
same.  It seems like it must logically be the case that
global_dirty_tracking is set to pass the running-and-saving test, but I
can't connect the pieces.  Is this your understanding as well and the
reason we don't also need to optionally exclude DIRTY_MEMORY_MIGRATION?
Thanks,

Alex

> +}
> +
>  return 0;
>  }
>  
> @@ -1201,6 +1207,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer 
> *container, bool start)
>  .argsz = sizeof(dirty),
>  };
>  
> +if (!container->dirty_pages_supported) {
> +return;
> +}
> +
>  if (start) {
>  dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
>  } else {
> @@ -1236,6 +1246,13 @@ static int vfio_get_dirty_bitmap(VFIOContainer 
> *container, uint64_t iova,
>  uint64_t pages;
>  int ret;
>  
> +if (!container->dirty_pages_supported) {
> +cpu_physical_memory_set_dirty_range(ram_addr, size,
> +tcg_enabled() ? 
> DIRTY_CLIENTS_ALL :
> +DIRTY_CLIENTS_NOCODE);
> +return 0;
> +}
> +
>  dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
>  
>  dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
> @@ -1409,8 +1426,7 @@ static void vfio_listener_log_sync(MemoryListener 
> *listener,
>  {
>  VFIOContainer *container = container_of(listener, VFIOContainer, 
> listener);
>  
> -if (vfio_listener_skipped_section(section) ||
> -!container->dirty_pages_supported) {
> +if (vfio_listener_skipped_section(section)) {
>  return;
>  }
>  
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 09fe7c1de2..552c2313b2 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -860,11 +860,10 @@ int64_t vfio_mig_bytes_transferred(void)
>  
>  int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
>  {
> -VFIOContainer *container = vbasedev->group->container;
>  struct vfio_region_info *info = NULL;
>  int ret = -ENOTSUP;
>  
> -if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
> +if (!vbasedev->enable_migration) {
>  goto add_blocker;
>  }
>  




Re: [PATCH v8 04/13] vfio/migration: Allow migration without VFIO IOMMU dirty tracking support

2023-01-16 Thread Cédric Le Goater

On 1/16/23 15:11, Avihai Horon wrote:

Currently, if IOMMU of a VFIO container doesn't support dirty page
tracking, migration is blocked. This is because a DMA-able VFIO device
can dirty RAM pages without updating QEMU about it, thus breaking the
migration.

However, this doesn't mean that migration can't be done at all.
In such case, allow migration and let QEMU VFIO code mark all pages
dirty.

This guarantees that all pages that might have gotten dirty are reported
back, and thus guarantees a valid migration even without VFIO IOMMU
dirty tracking support.

The motivation for this patch is the introduction of iommufd [1].
iommufd can directly implement the /dev/vfio/vfio container IOCTLs by
mapping them into its internal ops, allowing the usage of these IOCTLs
over iommufd. However, VFIO IOMMU dirty tracking is not supported by
this VFIO compatibility API.

This patch will allow migration by hosts that use the VFIO compatibility
API and prevent migration regressions caused by the lack of VFIO IOMMU
dirty tracking support.

[1]
https://lore.kernel.org/kvm/0-v6-a196d26f289e+11787-iommufd_...@nvidia.com/

Signed-off-by: Avihai Horon 


Reviewed-by: Cédric Le Goater 

Thanks,

C.


---
  hw/vfio/common.c| 20 ++--
  hw/vfio/migration.c |  3 +--
  2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 130e5d1dc7..f6dd571549 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -488,6 +488,12 @@ static int vfio_dma_unmap(VFIOContainer *container,
  return -errno;
  }
  
+if (iotlb && vfio_devices_all_running_and_saving(container)) {

+cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size,
+tcg_enabled() ? DIRTY_CLIENTS_ALL :
+DIRTY_CLIENTS_NOCODE);
+}
+
  return 0;
  }
  
@@ -1201,6 +1207,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)

  .argsz = sizeof(dirty),
  };
  
+if (!container->dirty_pages_supported) {

+return;
+}
+
  if (start) {
  dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
  } else {
@@ -1236,6 +1246,13 @@ static int vfio_get_dirty_bitmap(VFIOContainer 
*container, uint64_t iova,
  uint64_t pages;
  int ret;
  
+if (!container->dirty_pages_supported) {

+cpu_physical_memory_set_dirty_range(ram_addr, size,
+tcg_enabled() ? DIRTY_CLIENTS_ALL :
+DIRTY_CLIENTS_NOCODE);
+return 0;
+}
+
  dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
  
  dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);

@@ -1409,8 +1426,7 @@ static void vfio_listener_log_sync(MemoryListener 
*listener,
  {
  VFIOContainer *container = container_of(listener, VFIOContainer, 
listener);
  
-if (vfio_listener_skipped_section(section) ||

-!container->dirty_pages_supported) {
+if (vfio_listener_skipped_section(section)) {
  return;
  }
  
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c

index 09fe7c1de2..552c2313b2 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -860,11 +860,10 @@ int64_t vfio_mig_bytes_transferred(void)
  
  int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)

  {
-VFIOContainer *container = vbasedev->group->container;
  struct vfio_region_info *info = NULL;
  int ret = -ENOTSUP;
  
-if (!vbasedev->enable_migration || !container->dirty_pages_supported) {

+if (!vbasedev->enable_migration) {
  goto add_blocker;
  }
  





[PATCH v8 04/13] vfio/migration: Allow migration without VFIO IOMMU dirty tracking support

2023-01-16 Thread Avihai Horon
Currently, if IOMMU of a VFIO container doesn't support dirty page
tracking, migration is blocked. This is because a DMA-able VFIO device
can dirty RAM pages without updating QEMU about it, thus breaking the
migration.

However, this doesn't mean that migration can't be done at all.
In such case, allow migration and let QEMU VFIO code mark all pages
dirty.

This guarantees that all pages that might have gotten dirty are reported
back, and thus guarantees a valid migration even without VFIO IOMMU
dirty tracking support.

The motivation for this patch is the introduction of iommufd [1].
iommufd can directly implement the /dev/vfio/vfio container IOCTLs by
mapping them into its internal ops, allowing the usage of these IOCTLs
over iommufd. However, VFIO IOMMU dirty tracking is not supported by
this VFIO compatibility API.

This patch will allow migration by hosts that use the VFIO compatibility
API and prevent migration regressions caused by the lack of VFIO IOMMU
dirty tracking support.

[1]
https://lore.kernel.org/kvm/0-v6-a196d26f289e+11787-iommufd_...@nvidia.com/

Signed-off-by: Avihai Horon 
---
 hw/vfio/common.c| 20 ++--
 hw/vfio/migration.c |  3 +--
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 130e5d1dc7..f6dd571549 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -488,6 +488,12 @@ static int vfio_dma_unmap(VFIOContainer *container,
 return -errno;
 }
 
+if (iotlb && vfio_devices_all_running_and_saving(container)) {
+cpu_physical_memory_set_dirty_range(iotlb->translated_addr, size,
+tcg_enabled() ? DIRTY_CLIENTS_ALL :
+DIRTY_CLIENTS_NOCODE);
+}
+
 return 0;
 }
 
@@ -1201,6 +1207,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer 
*container, bool start)
 .argsz = sizeof(dirty),
 };
 
+if (!container->dirty_pages_supported) {
+return;
+}
+
 if (start) {
 dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
 } else {
@@ -1236,6 +1246,13 @@ static int vfio_get_dirty_bitmap(VFIOContainer 
*container, uint64_t iova,
 uint64_t pages;
 int ret;
 
+if (!container->dirty_pages_supported) {
+cpu_physical_memory_set_dirty_range(ram_addr, size,
+tcg_enabled() ? DIRTY_CLIENTS_ALL :
+DIRTY_CLIENTS_NOCODE);
+return 0;
+}
+
 dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
 
 dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
@@ -1409,8 +1426,7 @@ static void vfio_listener_log_sync(MemoryListener 
*listener,
 {
 VFIOContainer *container = container_of(listener, VFIOContainer, listener);
 
-if (vfio_listener_skipped_section(section) ||
-!container->dirty_pages_supported) {
+if (vfio_listener_skipped_section(section)) {
 return;
 }
 
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 09fe7c1de2..552c2313b2 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -860,11 +860,10 @@ int64_t vfio_mig_bytes_transferred(void)
 
 int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
 {
-VFIOContainer *container = vbasedev->group->container;
 struct vfio_region_info *info = NULL;
 int ret = -ENOTSUP;
 
-if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
+if (!vbasedev->enable_migration) {
 goto add_blocker;
 }
 
-- 
2.26.3