I withdraw this patch.  Most of it is not needed if I save ioas_id
in cpr-state.  I will move a tiny bit that remains to another patch.

- Steve

On 5/12/2025 11:32 AM, Steve Sistare wrote:
Reconstruct userland device state after CPR.  During vfio_realize, skip
all ioctls that configure the device, as it was already configured in old
QEMU.

Save the ioas_id in vmstate, and skip its allocation in vfio_realize.
Because we skip ioctl's, it is not needed at realize time.  However, we do
need the range info, so defer the call to iommufd_cdev_get_info_iova_range
to a post_load handler, at which time the ioas_id is known.

This reconstruction is not complete.  hwpt_id and devid need special
treatment, handled in subsequent patches.

Signed-off-by: Steve Sistare <steven.sist...@oracle.com>
---
  hw/vfio/cpr-iommufd.c |  8 ++++++++
  hw/vfio/iommufd.c     | 17 +++++++++++++++++
  2 files changed, 25 insertions(+)

diff --git a/hw/vfio/cpr-iommufd.c b/hw/vfio/cpr-iommufd.c
index b760bd3..3d430f0 100644
--- a/hw/vfio/cpr-iommufd.c
+++ b/hw/vfio/cpr-iommufd.c
@@ -31,6 +31,13 @@ static int vfio_container_post_load(void *opaque, int 
version_id)
      VFIOIOMMUFDContainer *container = opaque;
      VFIOContainerBase *bcontainer = &container->bcontainer;
      VFIODevice *vbasedev;
+    Error *err = NULL;
+    uint32_t ioas_id = container->ioas_id;
+
+    if (!iommufd_cdev_get_info_iova_range(container, ioas_id, &err)) {
+        error_report_err(err);
+        return -1;
+    }
QLIST_FOREACH(vbasedev, &bcontainer->device_list, container_next) {
          vbasedev->cpr.reused = false;
@@ -47,6 +54,7 @@ static const VMStateDescription vfio_container_vmstate = {
      .post_load = vfio_container_post_load,
      .needed = cpr_needed_for_reuse,
      .fields = (VMStateField[]) {
+        VMSTATE_UINT32(ioas_id, VFIOIOMMUFDContainer),
          VMSTATE_END_OF_LIST()
      }
  };
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 046f601..c49a7e7 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -122,6 +122,10 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice 
*vbasedev, Error **errp)
          goto err_kvm_device_add;
      }
+ if (vbasedev->cpr.reused) {
+        goto skip_bind;
+    }
+
      /* Bind device to iommufd */
      bind.iommufd = iommufd->fd;
      if (ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind)) {
@@ -133,6 +137,8 @@ static bool iommufd_cdev_connect_and_bind(VFIODevice 
*vbasedev, Error **errp)
      vbasedev->devid = bind.out_devid;
      trace_iommufd_cdev_connect_and_bind(bind.iommufd, vbasedev->name,
                                          vbasedev->fd, vbasedev->devid);
+
+skip_bind:
      return true;
  err_bind:
      iommufd_cdev_kvm_device_del(vbasedev);
@@ -580,6 +586,11 @@ static bool iommufd_cdev_attach(const char *name, 
VFIODevice *vbasedev,
          }
      }
+ if (vbasedev->cpr.reused) {
+        ioas_id = -1;           /* ioas_id will be received from vmstate */
+        goto skip_ioas_alloc;
+    }
+
      /* Need to allocate a new dedicated container */
      if (!iommufd_backend_alloc_ioas(vbasedev->iommufd, &ioas_id, errp)) {
          goto err_alloc_ioas;
@@ -587,6 +598,7 @@ static bool iommufd_cdev_attach(const char *name, 
VFIODevice *vbasedev,
trace_iommufd_cdev_alloc_ioas(vbasedev->iommufd->fd, ioas_id); +skip_ioas_alloc:
      container = VFIO_IOMMU_IOMMUFD(object_new(TYPE_VFIO_IOMMU_IOMMUFD));
      container->be = vbasedev->iommufd;
      container->ioas_id = ioas_id;
@@ -605,6 +617,10 @@ static bool iommufd_cdev_attach(const char *name, 
VFIODevice *vbasedev,
          goto err_discard_disable;
      }
+ if (vbasedev->cpr.reused) {
+        goto skip_info;
+    }
+
      if (!iommufd_cdev_get_info_iova_range(container, ioas_id, &err)) {
          error_append_hint(&err,
                     "Fallback to default 64bit IOVA range and 4K page size\n");
@@ -613,6 +629,7 @@ static bool iommufd_cdev_attach(const char *name, 
VFIODevice *vbasedev,
          bcontainer->pgsizes = qemu_real_host_page_size();
      }
+skip_info:
      if (!vfio_listener_register(bcontainer, errp)) {
          goto err_listener_register;
      }


Reply via email to