On 5/20/2025 9:55 AM, Cédric Le Goater wrote:
On 5/12/25 17:32, Steve Sistare wrote:
cpr-transfer will use the device name as a key to find the value
of the device descriptor in new QEMU.  However, if the descriptor
number is specified by a command-line fd parameter, then
vfio_device_get_name creates a name that includes the fd number.
This causes a chicken-and-egg problem: new QEMU must know the fd
number to construct a name to find the fd number.

To fix, create an invariant name based on the id command-line
parameter.  If id is not defined, add a CPR blocker.

Signed-off-by: Steve Sistare <steven.sist...@oracle.com>
---
  hw/vfio/cpr.c              | 21 +++++++++++++++++++++
  hw/vfio/device.c           | 10 ++++------
  hw/vfio/iommufd.c          |  2 ++
  include/hw/vfio/vfio-cpr.h |  4 ++++
  4 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
index 6081a89..7609c62 100644
--- a/hw/vfio/cpr.c
+++ b/hw/vfio/cpr.c
@@ -11,6 +11,7 @@
  #include "hw/vfio/pci.h"
  #include "hw/pci/msix.h"
  #include "hw/pci/msi.h"
+#include "migration/blocker.h"
  #include "migration/cpr.h"
  #include "qapi/error.h"
  #include "system/runstate.h"
@@ -184,3 +185,23 @@ const VMStateDescription vfio_cpr_pci_vmstate = {
          VMSTATE_END_OF_LIST()
      }
  };
+
+bool vfio_cpr_set_device_name(VFIODevice *vbasedev, Error **errp)
+{
+    if (vbasedev->dev->id) {
+        vbasedev->name = g_strdup(vbasedev->dev->id);
+        return true;
+    } else {
+        /*
+         * Assign a name so any function printing it will not break, but the
+         * fd number changes across processes, so this cannot be used as an
+         * invariant name for CPR.
+         */
+        vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);

The code above should be in vfio_device_get_name() proposed in its own path.

I understand, "in its own patch".  Will do.

+        error_setg(&vbasedev->cpr.id_blocker,
+                   "vfio device with fd=%d needs an id property",
+                   vbasedev->fd);
+        return migrate_add_blocker_modes(&vbasedev->cpr.id_blocker, errp,
+                                         MIG_MODE_CPR_TRANSFER, -1) == 0;

The cpr blocker should proposed in a second patch, maybe with a small
wrapper to set the 'Error *'.

will do.

- Steve

+    }
+}
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 9fba2c7..8e9de68 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -28,6 +28,7 @@
  #include "qapi/error.h"
  #include "qemu/error-report.h"
  #include "qemu/units.h"
+#include "migration/cpr.h"
  #include "monitor/monitor.h"
  #include "vfio-helpers.h"
@@ -284,6 +285,7 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error 
**errp)
  {
      ERRP_GUARD();
      struct stat st;
+    bool ret = true;
      if (vbasedev->fd < 0) {
          if (stat(vbasedev->sysfsdev, &st) < 0) {
@@ -300,16 +302,12 @@ bool vfio_device_get_name(VFIODevice *vbasedev, Error 
**errp)
              error_setg(errp, "Use FD passing only with iommufd backend");
              return false;
          }
-        /*
-         * Give a name with fd so any function printing out vbasedev->name
-         * will not break.
-         */
          if (!vbasedev->name) {
-            vbasedev->name = g_strdup_printf("VFIO_FD%d", vbasedev->fd);
+            ret = vfio_cpr_set_device_name(vbasedev, errp);
          }
      }
-    return true;
+    return ret;
  }
  void vfio_device_set_fd(VFIODevice *vbasedev, const char *str, Error **errp)
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 8661947..ea99b8d 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -25,6 +25,7 @@
  #include "system/reset.h"
  #include "qemu/cutils.h"
  #include "qemu/chardev_open.h"
+#include "migration/blocker.h"
  #include "pci.h"
  #include "vfio-iommufd.h"
  #include "vfio-helpers.h"
@@ -669,6 +670,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev)
      iommufd_cdev_container_destroy(container);
      vfio_address_space_put(space);
+    migrate_del_blocker(&vbasedev->cpr.id_blocker);
      iommufd_cdev_unbind_and_disconnect(vbasedev);
      close(vbasedev->fd);
  }
diff --git a/include/hw/vfio/vfio-cpr.h b/include/hw/vfio/vfio-cpr.h
index 765e334..d06d117 100644
--- a/include/hw/vfio/vfio-cpr.h
+++ b/include/hw/vfio/vfio-cpr.h
@@ -23,12 +23,14 @@ typedef struct VFIOContainerCPR {
  typedef struct VFIODeviceCPR {
      bool reused;
      Error *mdev_blocker;
+    Error *id_blocker;
  } VFIODeviceCPR;
  struct VFIOContainer;
  struct VFIOContainerBase;
  struct VFIOGroup;
  struct VFIOPCIDevice;
+struct VFIODevice;
  bool vfio_legacy_cpr_register_container(struct VFIOContainer *container,
                                          Error **errp);
@@ -59,4 +61,6 @@ void vfio_cpr_delete_vector_fd(struct VFIOPCIDevice *vdev, 
const char *name,
  extern const VMStateDescription vfio_cpr_pci_vmstate;
+bool vfio_cpr_set_device_name(struct VFIODevice *vbasedev, Error **errp);
+
  #endif /* HW_VFIO_VFIO_CPR_H */



Reply via email to