A socket write during vhost-user communication may trigger a disconnect event, calling vhost_user_blk_disconnect() and clearing all the vhost_dev structures holding data that vhost-user functions expect to remain valid to roll back initialization correctly. Delay the cleanup to keep vhost_dev structure valid. There are two possible states to handle: 1. RUN_STATE_PRELAUNCH: skip bh oneshot call and perform disconnect in the caller routine. 2. RUN_STATE_RUNNING: delay by using bh
BH changes are based on the similar changes for the vhost-user-net device: commit e7c83a885f865128ae3cf1946f8cb538b63cbfba "vhost-user: delay vhost_user_stop" Signed-off-by: Dima Stepanov <[email protected]> --- hw/block/vhost-user-blk.c | 49 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 9d8c0b3..447fc9c 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -337,11 +337,6 @@ static void vhost_user_blk_disconnect(DeviceState *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserBlk *s = VHOST_USER_BLK(vdev); - if (!s->connected) { - return; - } - s->connected = false; - if (s->dev.started) { vhost_user_blk_stop(vdev); } @@ -349,6 +344,19 @@ static void vhost_user_blk_disconnect(DeviceState *dev) vhost_dev_cleanup(&s->dev); } +static void vhost_user_blk_event(void *opaque, QEMUChrEvent event); + +static void vhost_user_blk_chr_closed_bh(void *opaque) +{ + DeviceState *dev = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(vdev); + + vhost_user_blk_disconnect(dev); + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, vhost_user_blk_event, + NULL, opaque, NULL, true); +} + static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) { DeviceState *dev = opaque; @@ -363,7 +371,28 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event) } break; case CHR_EVENT_CLOSED: - vhost_user_blk_disconnect(dev); + /* + * A close event may happen during a read/write, but vhost + * code assumes the vhost_dev remains setup, so delay the + * stop & clear. There are two possible paths to hit this + * disconnect event: + * 1. When VM is in the RUN_STATE_PRELAUNCH state. The + * vhost_user_blk_device_realize() is a caller. + * 2. In tha main loop phase after VM start. + * + * For p2 the disconnect event will be delayed. We can't + * do the same for p1, because we are not running the loop + * at this moment. So just skip this step and perform + * disconnect in the caller function. + */ + if (s->connected && runstate_is_running()) { + AioContext *ctx = qemu_get_current_aio_context(); + + qemu_chr_fe_set_handlers(&s->chardev, NULL, NULL, NULL, NULL, + NULL, NULL, false); + aio_bh_schedule_oneshot(ctx, vhost_user_blk_chr_closed_bh, opaque); + } + s->connected = false; break; case CHR_EVENT_BREAK: case CHR_EVENT_MUX_IN: @@ -428,6 +457,14 @@ reconnect: ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, sizeof(struct virtio_blk_config)); + if (!s->connected) { + /* + * Perform disconnect before making reconnect. More detailed + * comment why it was delayed is in the vhost_user_blk_event() + * routine. + */ + vhost_user_blk_disconnect(dev); + } if (ret < 0) { error_report("vhost-user-blk: get block config failed"); goto reconnect; -- 2.7.4
