Re: [PATCH V6 19/27] vfio-pci: cpr part 1 (fd and dma)

2021-11-30 Thread Steven Sistare
On 11/10/2021 2:48 AM, Zheng Chuan wrote:
> 
> Hi, steve
> 
> On 2021/8/11 1:06, Alex Williamson wrote:
>> On Fri,  6 Aug 2021 14:43:53 -0700
>> Steve Sistare  wrote:
>> [...]
>>> +static int
>>> +vfio_region_remap(MemoryRegionSection *section, void *handle, Error **errp)
>>> +{
>>> +MemoryRegion *mr = section->mr;
>>> +VFIOContainer *container = handle;
>>> +const char *name = memory_region_name(mr);
>>> +ram_addr_t size = int128_get64(section->size);
>>> +hwaddr offset, iova, roundup;
>>> +void *vaddr;
>>> +
>>> +if (vfio_listener_skipped_section(section) || 
>>> memory_region_is_iommu(mr)) {
>>> +return 0;
>>> +}
>>> +
>>> +offset = section->offset_within_address_space;
>>> +iova = REAL_HOST_PAGE_ALIGN(offset);
> We should not do remap if it shares on host page with other structures.
> I think a judgement like int128_ge((int128_make64(iova), llend)) in 
> vfio_listener_region_add() should be also added here to check it,
> otherwise it will remap no-exit dma which causes the live update failure.
> diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
> index 0981d31..d231841 100644
> --- a/hw/vfio/cpr.c
> +++ b/hw/vfio/cpr.c
> @@ -58,13 +58,21 @@ vfio_region_remap(MemoryRegionSection *section, void 
> *handle, Error **errp)
>  ram_addr_t size = int128_get64(section->size);
>  hwaddr offset, iova, roundup;
>  void *vaddr;
> -
> +Int128 llend;
> +
>  if (vfio_listener_skipped_section(section) || 
> memory_region_is_iommu(mr)) {
>  return 0;
>  }
> 
>  offset = section->offset_within_address_space;
>  iova = REAL_HOST_PAGE_ALIGN(offset);
> +llend = int128_make64(section->offset_within_address_space);
> +llend = int128_add(llend, section->size);
> +llend = int128_and(llend, int128_exts64(qemu_real_host_page_mask));
> +if (int128_ge(int128_make64(iova), llend)) {
> +return 0;
> +}
> +
>  roundup = iova - offset;
>  size -= roundup;
>  size = REAL_HOST_PAGE_ALIGN(size);
> 
>>> +roundup = iova - offset;
>>> +size -= roundup;
>>> +size = REAL_HOST_PAGE_ALIGN(size);
>>> +vaddr = memory_region_get_ram_ptr(mr) +
>>> +section->offset_within_region + roundup;
>>> +
>>> +trace_vfio_region_remap(name, container->fd, iova, iova + size - 1, 
>>> vaddr);
>>> +return vfio_dma_map_vaddr(container, iova, size, vaddr, errp);
>>> +}

Thank you Zheng.  I intended to implement the logic you suggest, using 64-bit 
arithmetic,
but I botched it.  This should do the trick:

diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
index df334d9..bbdeaea 100644
--- a/hw/vfio/cpr.c
+++ b/hw/vfio/cpr.c
@@ -66,8 +66,8 @@ vfio_region_remap(MemoryRegionSection *section, void *handle,
 offset = section->offset_within_address_space;
 iova = REAL_HOST_PAGE_ALIGN(offset);
 roundup = iova - offset;
-size -= roundup;
-size = REAL_HOST_PAGE_ALIGN(size);
+size -= roundup;/* adjust for starting alignment */
+size &= qemu_real_host_page_mask;   /* adjust for ending alignment */
 end = iova + size;
 if (iova >= end) {
 return 0;

- Steve



Re: [PATCH V6 19/27] vfio-pci: cpr part 1 (fd and dma)

2021-11-09 Thread Zheng Chuan


Hi, steve

On 2021/8/11 1:06, Alex Williamson wrote:
> On Fri,  6 Aug 2021 14:43:53 -0700
> Steve Sistare  wrote:
> 
>> Enable vfio-pci devices to be saved and restored across an exec restart
>> of qemu.
>>
>> At vfio creation time, save the value of vfio container, group, and device
>> descriptors in cpr state.
>>
>> In cpr-save and cpr-exec, suspend the use of virtual addresses in DMA
>> mappings with VFIO_DMA_UNMAP_FLAG_VADDR, because guest ram will be remapped
>> at a different VA after exec.  DMA to already-mapped pages continues.  Save
>> the msi message area as part of vfio-pci vmstate, save the interrupt and
>> notifier eventfd's in cpr state, and clear the close-on-exec flag for the
>> vfio descriptors.  The flag is not cleared earlier because the descriptors
>> should not persist across miscellaneous fork and exec calls that may be
>> performed during normal operation.
>>
>> On qemu restart, vfio_realize() finds the descriptor env vars, uses
>> the descriptors, and notes that the device is being reused.  Device and
>> iommu state is already configured, so operations in vfio_realize that
>> would modify the configuration are skipped for a reused device, including
>> vfio ioctl's and writes to PCI configuration space.  The result is that
>> vfio_realize constructs qemu data structures that reflect the current
>> state of the device.  However, the reconstruction is not complete until
>> cpr-load is called. cpr-load loads the msi data and finds eventfds in cpr
>> state.  It rebuilds vector data structures and attaches the interrupts to
>> the new KVM instance.  cpr-load then walks the flattened ranges of the
>> vfio_address_spaces and calls VFIO_DMA_MAP_FLAG_VADDR to inform the kernel
>> of the new VA's.  Lastly, it starts the VM and suppresses vfio device reset.
>>
>> This functionality is delivered by 3 patches for clarity.  Part 1 handles
>> device file descriptors and DMA.  Part 2 adds eventfd and MSI/MSI-X vector
>> support.  Part 3 adds INTX support.
>>
>> Signed-off-by: Steve Sistare 
>> ---
>>  MAINTAINERS   |   1 +
>>  hw/pci/pci.c  |   4 ++
>>  hw/vfio/common.c  |  69 --
>>  hw/vfio/cpr.c | 160 
>> ++
>>  hw/vfio/meson.build   |   1 +
>>  hw/vfio/pci.c |  57 +++
>>  hw/vfio/trace-events  |   1 +
>>  include/hw/pci/pci.h  |   1 +
>>  include/hw/vfio/vfio-common.h |   5 ++
>>  include/migration/cpr.h   |   3 +
>>  linux-headers/linux/vfio.h|   6 ++
>>  migration/cpr.c   |  10 ++-
>>  migration/target.c|  14 
>>  13 files changed, 325 insertions(+), 7 deletions(-)
>>  create mode 100644 hw/vfio/cpr.c
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index a9d2ed8..3132965 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -2904,6 +2904,7 @@ CPR
>>  M: Steve Sistare 
>>  M: Mark Kanda 
>>  S: Maintained
>> +F: hw/vfio/cpr.c
>>  F: include/migration/cpr.h
>>  F: migration/cpr.c
>>  F: qapi/cpr.json
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index 59408a3..b9c6ca1 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -307,6 +307,10 @@ static void pci_do_device_reset(PCIDevice *dev)
>>  {
>>  int r;
>>  
>> +if (dev->reused) {
>> +return;
>> +}
>> +
>>  pci_device_deassert_intx(dev);
>>  assert(dev->irq_state == 0);
>>  
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 7918c0d..872a1ac 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -31,6 +31,7 @@
>>  #include "exec/memory.h"
>>  #include "exec/ram_addr.h"
>>  #include "hw/hw.h"
>> +#include "migration/cpr.h"
>>  #include "qemu/error-report.h"
>>  #include "qemu/main-loop.h"
>>  #include "qemu/range.h"
>> @@ -464,6 +465,10 @@ static int vfio_dma_unmap(VFIOContainer *container,
>>  return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
>>  }
>>  
>> +if (container->reused) {
>> +return 0;
>> +}
>> +
>>  while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, )) {
>>  /*
>>   * The type1 backend has an off-by-one bug in the kernel 
>> (71a7d3d78e3c
>> @@ -501,6 +506,10 @@ static int vfio_dma_map(VFIOContainer *container, 
>> hwaddr iova,
>>  .size = size,
>>  };
>>  
>> +if (container->reused) {
>> +return 0;
>> +}
>> +
>>  if (!readonly) {
>>  map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
>>  }
>> @@ -1872,6 +1881,10 @@ static int vfio_init_container(VFIOContainer 
>> *container, int group_fd,
>>  if (iommu_type < 0) {
>>  return iommu_type;
>>  }
>> +if (container->reused) {
>> +container->iommu_type = iommu_type;
>> +return 0;
>> +}
>>  
> 
> I'd like to see more comments throughout, but particularly where we're
> dumping out of functions for reused containers, groups, and devices.
> For instance map/unmap we're assuming we'll reach the same IOMMU
> mapping 

Re: [PATCH V6 19/27] vfio-pci: cpr part 1 (fd and dma)

2021-08-23 Thread Steven Sistare
On 8/10/2021 1:06 PM, Alex Williamson wrote:
> On Fri,  6 Aug 2021 14:43:53 -0700
> Steve Sistare  wrote:
> 
>> Enable vfio-pci devices to be saved and restored across an exec restart
>> of qemu.
>>
>> At vfio creation time, save the value of vfio container, group, and device
>> descriptors in cpr state.
>>
>> In cpr-save and cpr-exec, suspend the use of virtual addresses in DMA
>> mappings with VFIO_DMA_UNMAP_FLAG_VADDR, because guest ram will be remapped
>> at a different VA after exec.  DMA to already-mapped pages continues.  Save
>> the msi message area as part of vfio-pci vmstate, save the interrupt and
>> notifier eventfd's in cpr state, and clear the close-on-exec flag for the
>> vfio descriptors.  The flag is not cleared earlier because the descriptors
>> should not persist across miscellaneous fork and exec calls that may be
>> performed during normal operation.
>>
>> On qemu restart, vfio_realize() finds the descriptor env vars, uses
>> the descriptors, and notes that the device is being reused.  Device and
>> iommu state is already configured, so operations in vfio_realize that
>> would modify the configuration are skipped for a reused device, including
>> vfio ioctl's and writes to PCI configuration space.  The result is that
>> vfio_realize constructs qemu data structures that reflect the current
>> state of the device.  However, the reconstruction is not complete until
>> cpr-load is called. cpr-load loads the msi data and finds eventfds in cpr
>> state.  It rebuilds vector data structures and attaches the interrupts to
>> the new KVM instance.  cpr-load then walks the flattened ranges of the
>> vfio_address_spaces and calls VFIO_DMA_MAP_FLAG_VADDR to inform the kernel
>> of the new VA's.  Lastly, it starts the VM and suppresses vfio device reset.
>>
>> This functionality is delivered by 3 patches for clarity.  Part 1 handles
>> device file descriptors and DMA.  Part 2 adds eventfd and MSI/MSI-X vector
>> support.  Part 3 adds INTX support.
>>
>> Signed-off-by: Steve Sistare 
>> ---
>>  MAINTAINERS   |   1 +
>>  hw/pci/pci.c  |   4 ++
>>  hw/vfio/common.c  |  69 --
>>  hw/vfio/cpr.c | 160 
>> ++
>>  hw/vfio/meson.build   |   1 +
>>  hw/vfio/pci.c |  57 +++
>>  hw/vfio/trace-events  |   1 +
>>  include/hw/pci/pci.h  |   1 +
>>  include/hw/vfio/vfio-common.h |   5 ++
>>  include/migration/cpr.h   |   3 +
>>  linux-headers/linux/vfio.h|   6 ++
>>  migration/cpr.c   |  10 ++-
>>  migration/target.c|  14 
>>  13 files changed, 325 insertions(+), 7 deletions(-)
>>  create mode 100644 hw/vfio/cpr.c
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index a9d2ed8..3132965 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -2904,6 +2904,7 @@ CPR
>>  M: Steve Sistare 
>>  M: Mark Kanda 
>>  S: Maintained
>> +F: hw/vfio/cpr.c
>>  F: include/migration/cpr.h
>>  F: migration/cpr.c
>>  F: qapi/cpr.json
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index 59408a3..b9c6ca1 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -307,6 +307,10 @@ static void pci_do_device_reset(PCIDevice *dev)
>>  {
>>  int r;
>>  
>> +if (dev->reused) {
>> +return;
>> +}
>> +
>>  pci_device_deassert_intx(dev);
>>  assert(dev->irq_state == 0);
>>  
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 7918c0d..872a1ac 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -31,6 +31,7 @@
>>  #include "exec/memory.h"
>>  #include "exec/ram_addr.h"
>>  #include "hw/hw.h"
>> +#include "migration/cpr.h"
>>  #include "qemu/error-report.h"
>>  #include "qemu/main-loop.h"
>>  #include "qemu/range.h"
>> @@ -464,6 +465,10 @@ static int vfio_dma_unmap(VFIOContainer *container,
>>  return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
>>  }
>>  
>> +if (container->reused) {
>> +return 0;
>> +}
>> +
>>  while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, )) {
>>  /*
>>   * The type1 backend has an off-by-one bug in the kernel 
>> (71a7d3d78e3c
>> @@ -501,6 +506,10 @@ static int vfio_dma_map(VFIOContainer *container, 
>> hwaddr iova,
>>  .size = size,
>>  };
>>  
>> +if (container->reused) {
>> +return 0;
>> +}
>> +
>>  if (!readonly) {
>>  map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
>>  }
>> @@ -1872,6 +1881,10 @@ static int vfio_init_container(VFIOContainer 
>> *container, int group_fd,
>>  if (iommu_type < 0) {
>>  return iommu_type;
>>  }
>> +if (container->reused) {
>> +container->iommu_type = iommu_type;
>> +return 0;
>> +}
>>  
> 
> I'd like to see more comments throughout, but particularly where we're
> dumping out of functions for reused containers, groups, and devices.
> For instance map/unmap we're assuming we'll reach the same IOMMU
> mapping state we 

Re: [PATCH V6 19/27] vfio-pci: cpr part 1 (fd and dma)

2021-08-10 Thread Alex Williamson
On Fri,  6 Aug 2021 14:43:53 -0700
Steve Sistare  wrote:

> Enable vfio-pci devices to be saved and restored across an exec restart
> of qemu.
> 
> At vfio creation time, save the value of vfio container, group, and device
> descriptors in cpr state.
> 
> In cpr-save and cpr-exec, suspend the use of virtual addresses in DMA
> mappings with VFIO_DMA_UNMAP_FLAG_VADDR, because guest ram will be remapped
> at a different VA after exec.  DMA to already-mapped pages continues.  Save
> the msi message area as part of vfio-pci vmstate, save the interrupt and
> notifier eventfd's in cpr state, and clear the close-on-exec flag for the
> vfio descriptors.  The flag is not cleared earlier because the descriptors
> should not persist across miscellaneous fork and exec calls that may be
> performed during normal operation.
> 
> On qemu restart, vfio_realize() finds the descriptor env vars, uses
> the descriptors, and notes that the device is being reused.  Device and
> iommu state is already configured, so operations in vfio_realize that
> would modify the configuration are skipped for a reused device, including
> vfio ioctl's and writes to PCI configuration space.  The result is that
> vfio_realize constructs qemu data structures that reflect the current
> state of the device.  However, the reconstruction is not complete until
> cpr-load is called. cpr-load loads the msi data and finds eventfds in cpr
> state.  It rebuilds vector data structures and attaches the interrupts to
> the new KVM instance.  cpr-load then walks the flattened ranges of the
> vfio_address_spaces and calls VFIO_DMA_MAP_FLAG_VADDR to inform the kernel
> of the new VA's.  Lastly, it starts the VM and suppresses vfio device reset.
> 
> This functionality is delivered by 3 patches for clarity.  Part 1 handles
> device file descriptors and DMA.  Part 2 adds eventfd and MSI/MSI-X vector
> support.  Part 3 adds INTX support.
> 
> Signed-off-by: Steve Sistare 
> ---
>  MAINTAINERS   |   1 +
>  hw/pci/pci.c  |   4 ++
>  hw/vfio/common.c  |  69 --
>  hw/vfio/cpr.c | 160 
> ++
>  hw/vfio/meson.build   |   1 +
>  hw/vfio/pci.c |  57 +++
>  hw/vfio/trace-events  |   1 +
>  include/hw/pci/pci.h  |   1 +
>  include/hw/vfio/vfio-common.h |   5 ++
>  include/migration/cpr.h   |   3 +
>  linux-headers/linux/vfio.h|   6 ++
>  migration/cpr.c   |  10 ++-
>  migration/target.c|  14 
>  13 files changed, 325 insertions(+), 7 deletions(-)
>  create mode 100644 hw/vfio/cpr.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index a9d2ed8..3132965 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -2904,6 +2904,7 @@ CPR
>  M: Steve Sistare 
>  M: Mark Kanda 
>  S: Maintained
> +F: hw/vfio/cpr.c
>  F: include/migration/cpr.h
>  F: migration/cpr.c
>  F: qapi/cpr.json
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 59408a3..b9c6ca1 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -307,6 +307,10 @@ static void pci_do_device_reset(PCIDevice *dev)
>  {
>  int r;
>  
> +if (dev->reused) {
> +return;
> +}
> +
>  pci_device_deassert_intx(dev);
>  assert(dev->irq_state == 0);
>  
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 7918c0d..872a1ac 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -31,6 +31,7 @@
>  #include "exec/memory.h"
>  #include "exec/ram_addr.h"
>  #include "hw/hw.h"
> +#include "migration/cpr.h"
>  #include "qemu/error-report.h"
>  #include "qemu/main-loop.h"
>  #include "qemu/range.h"
> @@ -464,6 +465,10 @@ static int vfio_dma_unmap(VFIOContainer *container,
>  return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
>  }
>  
> +if (container->reused) {
> +return 0;
> +}
> +
>  while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, )) {
>  /*
>   * The type1 backend has an off-by-one bug in the kernel 
> (71a7d3d78e3c
> @@ -501,6 +506,10 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr 
> iova,
>  .size = size,
>  };
>  
> +if (container->reused) {
> +return 0;
> +}
> +
>  if (!readonly) {
>  map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
>  }
> @@ -1872,6 +1881,10 @@ static int vfio_init_container(VFIOContainer 
> *container, int group_fd,
>  if (iommu_type < 0) {
>  return iommu_type;
>  }
> +if (container->reused) {
> +container->iommu_type = iommu_type;
> +return 0;
> +}
>  

I'd like to see more comments throughout, but particularly where we're
dumping out of functions for reused containers, groups, and devices.
For instance map/unmap we're assuming we'll reach the same IOMMU
mapping state we had previously, how do we validate that, why can't we
only set vaddr in the mapping path rather than skipping it for a later
pass at the flatmap, do we actually see unmaps, is 

[PATCH V6 19/27] vfio-pci: cpr part 1 (fd and dma)

2021-08-06 Thread Steve Sistare
Enable vfio-pci devices to be saved and restored across an exec restart
of qemu.

At vfio creation time, save the value of vfio container, group, and device
descriptors in cpr state.

In cpr-save and cpr-exec, suspend the use of virtual addresses in DMA
mappings with VFIO_DMA_UNMAP_FLAG_VADDR, because guest ram will be remapped
at a different VA after exec.  DMA to already-mapped pages continues.  Save
the msi message area as part of vfio-pci vmstate, save the interrupt and
notifier eventfd's in cpr state, and clear the close-on-exec flag for the
vfio descriptors.  The flag is not cleared earlier because the descriptors
should not persist across miscellaneous fork and exec calls that may be
performed during normal operation.

On qemu restart, vfio_realize() finds the descriptor env vars, uses
the descriptors, and notes that the device is being reused.  Device and
iommu state is already configured, so operations in vfio_realize that
would modify the configuration are skipped for a reused device, including
vfio ioctl's and writes to PCI configuration space.  The result is that
vfio_realize constructs qemu data structures that reflect the current
state of the device.  However, the reconstruction is not complete until
cpr-load is called. cpr-load loads the msi data and finds eventfds in cpr
state.  It rebuilds vector data structures and attaches the interrupts to
the new KVM instance.  cpr-load then walks the flattened ranges of the
vfio_address_spaces and calls VFIO_DMA_MAP_FLAG_VADDR to inform the kernel
of the new VA's.  Lastly, it starts the VM and suppresses vfio device reset.

This functionality is delivered by 3 patches for clarity.  Part 1 handles
device file descriptors and DMA.  Part 2 adds eventfd and MSI/MSI-X vector
support.  Part 3 adds INTX support.

Signed-off-by: Steve Sistare 
---
 MAINTAINERS   |   1 +
 hw/pci/pci.c  |   4 ++
 hw/vfio/common.c  |  69 --
 hw/vfio/cpr.c | 160 ++
 hw/vfio/meson.build   |   1 +
 hw/vfio/pci.c |  57 +++
 hw/vfio/trace-events  |   1 +
 include/hw/pci/pci.h  |   1 +
 include/hw/vfio/vfio-common.h |   5 ++
 include/migration/cpr.h   |   3 +
 linux-headers/linux/vfio.h|   6 ++
 migration/cpr.c   |  10 ++-
 migration/target.c|  14 
 13 files changed, 325 insertions(+), 7 deletions(-)
 create mode 100644 hw/vfio/cpr.c

diff --git a/MAINTAINERS b/MAINTAINERS
index a9d2ed8..3132965 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2904,6 +2904,7 @@ CPR
 M: Steve Sistare 
 M: Mark Kanda 
 S: Maintained
+F: hw/vfio/cpr.c
 F: include/migration/cpr.h
 F: migration/cpr.c
 F: qapi/cpr.json
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 59408a3..b9c6ca1 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -307,6 +307,10 @@ static void pci_do_device_reset(PCIDevice *dev)
 {
 int r;
 
+if (dev->reused) {
+return;
+}
+
 pci_device_deassert_intx(dev);
 assert(dev->irq_state == 0);
 
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 7918c0d..872a1ac 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -31,6 +31,7 @@
 #include "exec/memory.h"
 #include "exec/ram_addr.h"
 #include "hw/hw.h"
+#include "migration/cpr.h"
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qemu/range.h"
@@ -464,6 +465,10 @@ static int vfio_dma_unmap(VFIOContainer *container,
 return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
 }
 
+if (container->reused) {
+return 0;
+}
+
 while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, )) {
 /*
  * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
@@ -501,6 +506,10 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr 
iova,
 .size = size,
 };
 
+if (container->reused) {
+return 0;
+}
+
 if (!readonly) {
 map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
 }
@@ -1872,6 +1881,10 @@ static int vfio_init_container(VFIOContainer *container, 
int group_fd,
 if (iommu_type < 0) {
 return iommu_type;
 }
+if (container->reused) {
+container->iommu_type = iommu_type;
+return 0;
+}
 
 ret = ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, >fd);
 if (ret) {
@@ -1972,6 +1985,7 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as,
 {
 VFIOContainer *container;
 int ret, fd;
+bool reused;
 VFIOAddressSpace *space;
 
 space = vfio_get_address_space(as);
@@ -2007,7 +2021,13 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as,
  * details once we know which type of IOMMU we are using.
  */
 
+fd = cpr_find_fd("vfio_container_for_group", group->groupid);
+reused = (fd >= 0);
+
 QLIST_FOREACH(container, >containers, next) {
+if (container->fd == fd) {
+break;
+}
 if