> 2025年3月19日 23:11,Stefano Garzarella <sgarz...@redhat.com> 写道:
>
> On Fri, Mar 14, 2025 at 06:15:33AM -0400, Haoqian He wrote:
>> The backend maybe crash when vhost_dev_stop and GET_VRING_BASE
>> would fail, we can return failure to indicate the connection
>> with the backend is broken.
>>
>> Signed-off-by: Haoqian He <haoqian...@smartx.com>
>> ---
>> hw/virtio/vhost.c | 27 +++++++++++++++------------
>> include/hw/virtio/vhost.h | 8 +++++---
>> 2 files changed, 20 insertions(+), 15 deletions(-)
>>
>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
>> index 6aa72fd434..c82bbbe4cc 100644
>> --- a/hw/virtio/vhost.c
>> +++ b/hw/virtio/vhost.c
>> @@ -1368,23 +1368,23 @@ fail_alloc_desc:
>> return r;
>> }
>>
>> -void vhost_virtqueue_stop(struct vhost_dev *dev,
>> - struct VirtIODevice *vdev,
>> - struct vhost_virtqueue *vq,
>> - unsigned idx)
>> +int vhost_virtqueue_stop(struct vhost_dev *dev,
>> + struct VirtIODevice *vdev,
>> + struct vhost_virtqueue *vq,
>> + unsigned idx)
>> {
>> int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
>> struct vhost_vring_state state = {
>> .index = vhost_vq_index,
>> };
>> - int r;
>> + int r = 0;
>>
>> if (virtio_queue_get_desc_addr(vdev, idx) == 0) {
>> /* Don't stop the virtqueue which might have not been started */
>> - return;
>> + return 0;
>> }
>>
>> - r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
>> + r |= dev->vhost_ops->vhost_get_vring_base(dev, &state);
>
> We can avoid this and also initialize r to 0.
Here we need to do `vhost_virtqueue_stop` for each vq.
>
>> if (r < 0) {
>> VHOST_OPS_DEBUG(r, "vhost VQ %u ring restore failed: %d", idx, r);
>> /* Connection to the backend is broken, so let's sync internal
>> @@ -1412,6 +1412,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev,
>> 0, virtio_queue_get_avail_size(vdev, idx));
>> vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
>> 0, virtio_queue_get_desc_size(vdev, idx));
>> + return r;
>> }
>>
>> static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev,
>> @@ -2136,9 +2137,10 @@ fail_features:
>> }
>>
>> /* Host notifiers must be enabled at this point. */
>> -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
>> +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
>> {
>> int i;
>> + int rc = 0;
>>
>> /* should only be called after backend is connected */
>> assert(hdev->vhost_ops);
>> @@ -2157,10 +2159,10 @@ void vhost_dev_stop(struct vhost_dev *hdev,
>> VirtIODevice *vdev, bool vrings)
>> vhost_dev_set_vring_enable(hdev, false);
>> }
>> for (i = 0; i < hdev->nvqs; ++i) {
>> - vhost_virtqueue_stop(hdev,
>> - vdev,
>> - hdev->vqs + i,
>> - hdev->vq_index + i);
>> + rc |= vhost_virtqueue_stop(hdev,
>> + vdev,
>> + hdev->vqs + i,
>> + hdev->vq_index + i);
>
> Also other function can fails, should we consider also them?
> (e.g. , vhost_dev_set_vring_enable, etc.)
>
> If not, why?
Since we only want to know the return value of callback when the stopping device
live migration, there is no need to catch the failure of `vhost_dev_start`.
We can also catch the failure of `vhost_dev_set_vring_enable`, because
`vhost_dev_set_vring_enable` will also fail if qemu lost connection with the
the backend, but I need to test it.
>
>> }
>> if (hdev->vhost_ops->vhost_reset_status) {
>> hdev->vhost_ops->vhost_reset_status(hdev);
>> @@ -2177,6 +2179,7 @@ void vhost_dev_stop(struct vhost_dev *hdev,
>> VirtIODevice *vdev, bool vrings)
>> hdev->started = false;
>> vdev->vhost_started = false;
>> hdev->vdev = NULL;
>> + return rc;
>> }
>>
>> int vhost_net_set_backend(struct vhost_dev *hdev,
>> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
>> index a9469d50bc..fd96ec9c39 100644
>> --- a/include/hw/virtio/vhost.h
>> +++ b/include/hw/virtio/vhost.h
>> @@ -232,8 +232,10 @@ int vhost_dev_start(struct vhost_dev *hdev,
>> VirtIODevice *vdev, bool vrings);
>> * Stop the vhost device. After the device is stopped the notifiers
>> * can be disabled (@vhost_dev_disable_notifiers) and the device can
>> * be torn down (@vhost_dev_cleanup).
>> + *
>> + * Return: 0 on success, != 0 on error when stopping dev.
>> */
>> -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool
>> vrings);
>> +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings);
>>
>> /**
>> * DOC: vhost device configuration handling
>> @@ -333,8 +335,8 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev,
>> uint64_t iova, int write);
>>
>> int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev,
>> struct vhost_virtqueue *vq, unsigned idx);
>> -void vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev,
>> - struct vhost_virtqueue *vq, unsigned idx);
>> +int vhost_virtqueue_stop(struct vhost_dev *dev, struct VirtIODevice *vdev,
>> + struct vhost_virtqueue *vq, unsigned idx);
>>
>> void vhost_dev_reset_inflight(struct vhost_inflight *inflight);
>> void vhost_dev_free_inflight(struct vhost_inflight *inflight);
>> --
>> 2.48.1