On 2.03.2025 14:12, Avihai Horon wrote:

On 19/02/2025 22:34, Maciej S. Szmigiero wrote:
External email: Use caution opening links or attachments


From: "Maciej S. Szmigiero" <maciej.szmigi...@oracle.com>

The multifd received data needs to be reassembled since device state
packets sent via different multifd channels can arrive out-of-order.

Therefore, each VFIO device state packet carries a header indicating its
position in the stream.
The raw device state data is saved into a VFIOStateBuffer for later
in-order loading into the device.

The last such VFIO device state packet should have
VFIO_DEVICE_STATE_CONFIG_STATE flag set and carry the device config state.

Signed-off-by: Maciej S. Szmigiero <maciej.szmigi...@oracle.com>
---
  hw/vfio/migration-multifd.c | 103 ++++++++++++++++++++++++++++++++++++
  hw/vfio/migration-multifd.h |   3 ++
  hw/vfio/migration.c         |   1 +
  hw/vfio/trace-events        |   1 +
  4 files changed, 108 insertions(+)

diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index c2defc0efef0..5d5ee1393674 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -42,6 +42,11 @@ typedef struct VFIOStateBuffer {
  } VFIOStateBuffer;

  typedef struct VFIOMultifd {
+    VFIOStateBuffers load_bufs;
+    QemuCond load_bufs_buffer_ready_cond;
+    QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
+    uint32_t load_buf_idx;
+    uint32_t load_buf_idx_last;
  } VFIOMultifd;

  static void vfio_state_buffer_clear(gpointer data)
@@ -87,15 +92,113 @@ static VFIOStateBuffer 
*vfio_state_buffers_at(VFIOStateBuffers *bufs, guint idx)
      return &g_array_index(bufs->array, VFIOStateBuffer, idx);
  }

+static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
+                                          VFIODeviceStatePacket *packet,
+                                          size_t packet_total_size,
+                                          Error **errp)
+{
+    VFIOMigration *migration = vbasedev->migration;
+    VFIOMultifd *multifd = migration->multifd;
+    VFIOStateBuffer *lb;
+
+    vfio_state_buffers_assert_init(&multifd->load_bufs);
+    if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) {
+        vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1);
+    }
+
+    lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx);
+    if (lb->is_present) {
+        error_setg(errp, "state buffer %" PRIu32 " already filled",
+                   packet->idx);

Let's add vbasedev->name to the error message so we know which device caused 
the error.

Done.

+        return false;
+    }
+
+    assert(packet->idx >= multifd->load_buf_idx);
+
+    lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet));
+    lb->len = packet_total_size - sizeof(*packet);
+    lb->is_present = true;
+
+    return true;
+}
+
+bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size,
+                            Error **errp)
+{
+    VFIODevice *vbasedev = opaque;
+    VFIOMigration *migration = vbasedev->migration;
+    VFIOMultifd *multifd = migration->multifd;
+    VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data;
+
+    /*
+     * Holding BQL here would violate the lock order and can cause
+     * a deadlock once we attempt to lock load_bufs_mutex below.
+     */
+    assert(!bql_locked());

To be clearer, I'd move the assert down to be just above 
"QEMU_LOCK_GUARD(&multifd->load_bufs_mutex);".

Moved there.
+
+    if (!vfio_multifd_transfer_enabled(vbasedev)) {
+        error_setg(errp,
+                   "got device state packet but not doing multifd transfer");

Let's add vbasedev->name to the error message so we know which device caused 
the error.

Done.

+        return false;
+    }
+
+    assert(multifd);
+
+    if (data_size < sizeof(*packet)) {
+        error_setg(errp, "packet too short at %zu (min is %zu)",
+                   data_size, sizeof(*packet));

Ditto.

Done.

+        return false;
+    }
+
+    if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) {
+        error_setg(errp, "packet has unknown version %" PRIu32,
+                   packet->version);

Ditto.

Done.

+        return false;
+    }
+
+    if (packet->idx == UINT32_MAX) {
+        error_setg(errp, "packet has too high idx");

Ditto.

Done.

+        return false;
+    }
+
+    trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx);
+
+    QEMU_LOCK_GUARD(&multifd->load_bufs_mutex);
+
+    /* config state packet should be the last one in the stream */
+    if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) {
+        multifd->load_buf_idx_last = packet->idx;
+    }
+
+    if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size, errp)) {
+        return false;
+    }
+
+    qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
+
+    return true;
+}
+
  VFIOMultifd *vfio_multifd_new(void)
  {
      VFIOMultifd *multifd = g_new(VFIOMultifd, 1);

+    vfio_state_buffers_init(&multifd->load_bufs);
+
+    qemu_mutex_init(&multifd->load_bufs_mutex);

Nit: move qemu_mutex_init() just above qemu_cond_init()?

It's in a separate "block" because it is common for all 3
conditions in the ultimate form of code (and most of the
later variables too), rather just this single condition.

 > Thanks.

Thanks,
Maciej

+
+    multifd->load_buf_idx = 0;
+    multifd->load_buf_idx_last = UINT32_MAX;
+    qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
+
      return multifd;
  }




Reply via email to