A few suggestions here. Overall, it looks sane to me.
On Wed, Aug 13, 2025 at 12:56 PM Vladimir Sementsov-Ogievskiy
<[email protected]> wrote:
>
> Normally on migration we stop and destroy connection with
> vhost (vhost-user-blk server, or kernel vhost) on source
> and reinitialize it on target.
>
> With this commit we start to implement vhost backend migration,
> i.e. we don't stop the connection and operation of vhost. Instead,
> we pass backend-related state, including open file descriptors
> to target process. Of course, it's possible only for local
> migration, and migration channel should be a unix socket.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <[email protected]>
> ---
> hw/virtio/vhost.c | 184 +++++++++++++++++++++++++-----
> include/hw/virtio/vhost-backend.h | 5 +
> include/hw/virtio/vhost.h | 6 +
> 3 files changed, 167 insertions(+), 28 deletions(-)
>
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index 0427fc29b2..80371a2653 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -26,8 +26,10 @@
> #include "hw/mem/memory-device.h"
> #include "migration/blocker.h"
> #include "migration/qemu-file-types.h"
> +#include "migration/qemu-file.h"
> #include "system/dma.h"
> #include "trace.h"
> +#include <stdint.h>
>
> /* enabled until disconnected backend stabilizes */
> #define _VHOST_DEBUG 1
> @@ -1321,6 +1323,8 @@ out:
> return ret;
> }
>
> +static void vhost_virtqueue_error_notifier(EventNotifier *n);
> +
> int vhost_virtqueue_start(struct vhost_dev *dev,
> struct VirtIODevice *vdev,
> struct vhost_virtqueue *vq,
> @@ -1346,7 +1350,17 @@ int vhost_virtqueue_start(struct vhost_dev *dev,
> return r;
> }
>
> - vq->num = state.num = virtio_queue_get_num(vdev, idx);
> + vq->num = virtio_queue_get_num(vdev, idx);
> +
> + if (dev->migrating_backend) {
> + if (dev->vhost_ops->vhost_set_vring_err) {
> + event_notifier_set_handler(&vq->error_notifier,
> + vhost_virtqueue_error_notifier);
> + }
> + return 0;
> + }
> +
> + state.num = vq->num;
> r = dev->vhost_ops->vhost_set_vring_num(dev, &state);
> if (r) {
> VHOST_OPS_DEBUG(r, "vhost_set_vring_num failed");
> @@ -1424,6 +1438,10 @@ static int do_vhost_virtqueue_stop(struct vhost_dev
> *dev,
>
> trace_vhost_virtque_stop(vdev->name, idx);
>
> + if (dev->migrating_backend) {
> + return 0;
> + }
> +
> if (virtio_queue_get_desc_addr(vdev, idx) == 0) {
> /* Don't stop the virtqueue which might have not been started */
> return 0;
> @@ -1514,7 +1532,15 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
> struct vhost_vring_file file = {
> .index = vhost_vq_index,
> };
> - int r = event_notifier_init(&vq->masked_notifier, 0);
> + int r;
> +
> + vq->dev = dev;
> +
> + if (dev->migrating_backend) {
> + return 0;
> + }
> +
> + r = event_notifier_init(&vq->masked_notifier, 0);
> if (r < 0) {
> return r;
> }
> @@ -1526,8 +1552,6 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
> goto fail_call;
> }
>
> - vq->dev = dev;
> -
> if (dev->vhost_ops->vhost_set_vring_err) {
> r = event_notifier_init(&vq->error_notifier, 0);
> if (r < 0) {
> @@ -1564,10 +1588,14 @@ fail_call:
>
> static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
> {
> - event_notifier_cleanup(&vq->masked_notifier);
> + if (!vq->dev->migrating_backend) {
> + event_notifier_cleanup(&vq->masked_notifier);
> + }
> if (vq->dev->vhost_ops->vhost_set_vring_err) {
> event_notifier_set_handler(&vq->error_notifier, NULL);
> - event_notifier_cleanup(&vq->error_notifier);
> + if (!vq->dev->migrating_backend) {
> + event_notifier_cleanup(&vq->error_notifier);
> + }
> }
> }
>
> @@ -1624,21 +1652,30 @@ int vhost_dev_init(struct vhost_dev *hdev, void
> *opaque,
> r = vhost_set_backend_type(hdev, backend_type);
> assert(r >= 0);
>
> - r = hdev->vhost_ops->vhost_backend_init(hdev, opaque, errp);
> - if (r < 0) {
> - goto fail;
> + if (hdev->migrating_backend) {
> + /* backend must support detached state */
Probably better to error_report() or something other than a raw assert?
> + assert(hdev->vhost_ops->vhost_save_backend);
> + assert(hdev->vhost_ops->vhost_load_backend);
> + hdev->_features_wait_incoming = true;
> }
>
> - r = hdev->vhost_ops->vhost_set_owner(hdev);
> + r = hdev->vhost_ops->vhost_backend_init(hdev, opaque, errp);
> if (r < 0) {
> - error_setg_errno(errp, -r, "vhost_set_owner failed");
> goto fail;
> }
>
> - r = hdev->vhost_ops->vhost_get_features(hdev, &hdev->_features);
> - if (r < 0) {
> - error_setg_errno(errp, -r, "vhost_get_features failed");
> - goto fail;
> + if (!hdev->migrating_backend) {
> + r = hdev->vhost_ops->vhost_set_owner(hdev);
> + if (r < 0) {
> + error_setg_errno(errp, -r, "vhost_set_owner failed");
> + goto fail;
> + }
> +
> + r = hdev->vhost_ops->vhost_get_features(hdev, &hdev->_features);
> + if (r < 0) {
> + error_setg_errno(errp, -r, "vhost_get_features failed");
> + goto fail;
> + }
> }
>
> for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
> @@ -1670,7 +1707,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
> .region_del = vhost_iommu_region_del,
> };
>
> - if (hdev->migration_blocker == NULL) {
> + if (!hdev->migrating_backend && hdev->migration_blocker == NULL) {
> if (!vhost_dev_has_feature(hdev, VHOST_F_LOG_ALL)) {
> error_setg(&hdev->migration_blocker,
> "Migration disabled: vhost lacks VHOST_F_LOG_ALL
> feature.");
> @@ -1697,7 +1734,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
> memory_listener_register(&hdev->memory_listener, &address_space_memory);
> QLIST_INSERT_HEAD(&vhost_devices, hdev, entry);
>
> - if (!check_memslots(hdev, errp)) {
> + if (!hdev->migrating_backend && !check_memslots(hdev, errp)) {
> r = -EINVAL;
> goto fail;
> }
> @@ -1765,8 +1802,11 @@ void vhost_dev_disable_notifiers_nvqs(struct vhost_dev
> *hdev,
> */
> memory_region_transaction_commit();
>
> - for (i = 0; i < nvqs; ++i) {
> - virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index +
> i);
> + if (!hdev->migrating_backend) {
> + for (i = 0; i < nvqs; ++i) {
> + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus),
> + hdev->vq_index + i);
> + }
> }
> virtio_device_release_ioeventfd(vdev);
> }
> @@ -1920,6 +1960,12 @@ uint64_t vhost_get_features(struct vhost_dev *hdev,
> const int *feature_bits,
> uint64_t features)
> {
> const int *bit = feature_bits;
> +
Should this be
if (hdev->_features_wait_incoming && hdev->migrating_backend) {
to not impact existing flows?
> + if (hdev->_features_wait_incoming) {
> + /* Excessive set is enough for early initialization. */
> + return features;
> + }
> +
> while (*bit != VHOST_INVALID_FEATURE_BIT) {
> uint64_t bit_mask = (1ULL << *bit);
> if (!vhost_dev_has_feature(hdev, *bit)) {
> @@ -1930,6 +1976,66 @@ uint64_t vhost_get_features(struct vhost_dev *hdev,
> const int *feature_bits,
> return features;
> }
>
> +void vhost_save_backend(struct vhost_dev *hdev, QEMUFile *f)
> +{
> + int i;
> +
> + assert(hdev->migrating_backend);
> +
> + if (hdev->vhost_ops->vhost_save_backend) {
> + hdev->vhost_ops->vhost_save_backend(hdev, f);
> + }
> +
> + qemu_put_be64(f, hdev->_features);
> + qemu_put_be64(f, hdev->max_queues);
> + qemu_put_be64(f, hdev->nvqs);
> +
> + for (i = 0; i < hdev->nvqs; i++) {
> + qemu_file_put_fd(f,
> +
> event_notifier_get_fd(&hdev->vqs[i].error_notifier));
> + qemu_file_put_fd(f,
> +
> event_notifier_get_fd(&hdev->vqs[i].masked_notifier));
> + }
> +}
> +
> +int vhost_load_backend(struct vhost_dev *hdev, QEMUFile *f)
> +{
> + int i;
> + Error *err = NULL;
> + uint64_t nvqs;
> +
> + assert(hdev->migrating_backend);
> +
> + if (hdev->vhost_ops->vhost_load_backend) {
> + hdev->vhost_ops->vhost_load_backend(hdev, f);
> + }
> +
> + qemu_get_be64s(f, &hdev->_features);
> + qemu_get_be64s(f, &hdev->max_queues);
> + qemu_get_be64s(f, &nvqs);
> +
> + if (nvqs != hdev->nvqs) {
> + error_report("%s: number of virt queues mismatch", __func__);
> + return -EINVAL;
> + }
> +
> + for (i = 0; i < hdev->nvqs; i++) {
> + event_notifier_init_fd(&hdev->vqs[i].error_notifier,
> + qemu_file_get_fd(f));
> + event_notifier_init_fd(&hdev->vqs[i].masked_notifier,
> + qemu_file_get_fd(f));
> + }
> +
> + if (!check_memslots(hdev, &err)) {
> + error_report_err(err);
> + return -EINVAL;
> + }
> +
> + hdev->_features_wait_incoming = false;
> +
> + return 0;
> +}
> +
> void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits,
> uint64_t features)
> {
> @@ -2075,19 +2181,24 @@ int vhost_dev_start(struct vhost_dev *hdev,
> VirtIODevice *vdev, bool vrings)
> hdev->started = true;
> hdev->vdev = vdev;
>
> - r = vhost_dev_set_features(hdev, hdev->log_enabled);
> - if (r < 0) {
> - goto fail_features;
> + if (!hdev->migrating_backend) {
> + r = vhost_dev_set_features(hdev, hdev->log_enabled);
> + if (r < 0) {
> + warn_report("%s %d", __func__, __LINE__);
> + goto fail_features;
> + }
> }
>
> if (vhost_dev_has_iommu(hdev)) {
> memory_listener_register(&hdev->iommu_listener, vdev->dma_as);
> }
>
> - r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem);
> - if (r < 0) {
> - VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
> - goto fail_mem;
> + if (!hdev->migrating_backend) {
> + r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem);
> + if (r < 0) {
> + VHOST_OPS_DEBUG(r, "vhost_set_mem_table failed");
> + goto fail_mem;
> + }
> }
> for (i = 0; i < hdev->nvqs; ++i) {
> r = vhost_virtqueue_start(hdev,
> @@ -2127,7 +2238,7 @@ int vhost_dev_start(struct vhost_dev *hdev,
> VirtIODevice *vdev, bool vrings)
> }
> vhost_dev_elect_mem_logger(hdev, true);
> }
> - if (vrings) {
> + if (vrings && !hdev->migrating_backend) {
> r = vhost_dev_set_vring_enable(hdev, true);
> if (r) {
> goto fail_log;
> @@ -2155,6 +2266,8 @@ int vhost_dev_start(struct vhost_dev *hdev,
> VirtIODevice *vdev, bool vrings)
> }
> vhost_start_config_intr(hdev);
>
> + hdev->migrating_backend = false;
> +
> trace_vhost_dev_start_finish(vdev->name);
> return 0;
> fail_iotlb:
> @@ -2204,14 +2317,29 @@ static int do_vhost_dev_stop(struct vhost_dev *hdev,
> VirtIODevice *vdev,
> event_notifier_cleanup(
> &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
>
> + if (hdev->migrating_backend) {
Ditto - no raw assert()?
> + /* backend must support detached state */
> + assert(hdev->vhost_ops->vhost_save_backend);
> + assert(hdev->vhost_ops->vhost_load_backend);
> + }
> +
> trace_vhost_dev_stop(hdev, vdev->name, vrings);
>
> if (hdev->vhost_ops->vhost_dev_start) {
> hdev->vhost_ops->vhost_dev_start(hdev, false);
> }
> - if (vrings) {
> + if (vrings && !hdev->migrating_backend) {
> vhost_dev_set_vring_enable(hdev, false);
> }
> +
> + if (hdev->migrating_backend) {
> + for (i = 0; i < hdev->nvqs; ++i) {
> + struct vhost_virtqueue *vq = hdev->vqs + i;
> +
> + event_notifier_set_handler(&vq->error_notifier, NULL);
> + }
> + }
> +
> for (i = 0; i < hdev->nvqs; ++i) {
> rc |= do_vhost_virtqueue_stop(hdev,
> vdev,
> diff --git a/include/hw/virtio/vhost-backend.h
> b/include/hw/virtio/vhost-backend.h
> index 0785fc764d..66627c6a56 100644
> --- a/include/hw/virtio/vhost-backend.h
> +++ b/include/hw/virtio/vhost-backend.h
> @@ -163,6 +163,9 @@ typedef int (*vhost_set_device_state_fd_op)(struct
> vhost_dev *dev,
> typedef int (*vhost_check_device_state_op)(struct vhost_dev *dev, Error
> **errp);
> typedef void (*vhost_qmp_status_op)(struct vhost_dev *dev, VhostStatus
> *status);
>
> +typedef void (*vhost_detached_save_op)(struct vhost_dev *dev, QEMUFile *f);
> +typedef int (*vhost_detached_load_op)(struct vhost_dev *dev, QEMUFile *f);
> +
> typedef struct VhostOps {
> VhostBackendType backend_type;
> vhost_backend_init vhost_backend_init;
> @@ -219,6 +222,8 @@ typedef struct VhostOps {
> vhost_set_device_state_fd_op vhost_set_device_state_fd;
> vhost_check_device_state_op vhost_check_device_state;
> vhost_qmp_status_op vhost_qmp_status;
> + vhost_detached_save_op vhost_save_backend;
> + vhost_detached_load_op vhost_load_backend;
> } VhostOps;
>
> int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> index 8a4c8c3502..330374aca2 100644
> --- a/include/hw/virtio/vhost.h
> +++ b/include/hw/virtio/vhost.h
> @@ -103,6 +103,10 @@ struct vhost_dev {
> * @acked_features: final negotiated features with front-end driver
> */
> uint64_t _features;
> + bool _features_wait_incoming;
> +
> + bool migrating_backend;
> +
> uint64_t acked_features;
>
> uint64_t max_queues;
> @@ -318,6 +322,8 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev,
> VirtIODevice *vdev, int n,
> */
> uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
> uint64_t features);
> +void vhost_save_backend(struct vhost_dev *hdev, QEMUFile *f);
> +int vhost_load_backend(struct vhost_dev *hdev, QEMUFile *f);
>
> /**
> * vhost_ack_features() - set vhost acked_features
> --
> 2.48.1
>
>