Before processing a request, virtio-scsi dataplane will check if the backend runs on the same context with it. If not, it has to be moved, with bdrv_set_aio_context.
However this function is unsafe to be called from IOThread outside BQL. The reason is that it calls bdrv_drain_all(), to acquire and drain all existing BDS. Therefore there is a deadlock problem. Fix it by offloading the bdrv_set_aio_context to main loop thread, through a BH (#1). This main loop BH will set the context, then notify the calling thread with another BH (#2). In BH (#2), we can continue the virtio-scsi request processing. A queue is added to VirtIOSCSI for tracking the pending requests, so in virtio_scsi_dataplane_stop, we have to drain them for migration. Signed-off-by: Fam Zheng <f...@redhat.com> --- hw/scsi/virtio-scsi-dataplane.c | 11 ++++ hw/scsi/virtio-scsi.c | 132 +++++++++++++++++++++++++++++++++++----- include/hw/virtio/virtio-scsi.h | 3 +- 3 files changed, 131 insertions(+), 15 deletions(-) diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index 4a9b9bd..3bc7e82 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -283,6 +283,17 @@ void virtio_scsi_dataplane_stop(VirtIOSCSI *s) aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier, NULL); } + while (true) { + if (QTAILQ_EMPTY(&s->pending_cmd_reqs)) { + break; + } + aio_context_release(s->ctx); + if (!aio_poll(qemu_get_aio_context(), false)) { + usleep(500000); + } + aio_context_acquire(s->ctx); + } + blk_drain_all(); /* ensure there are no in-flight requests */ aio_context_release(s->ctx); diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c index ae8b79a..a842862 100644 --- a/hw/scsi/virtio-scsi.c +++ b/hw/scsi/virtio-scsi.c @@ -243,6 +243,54 @@ static void virtio_scsi_cancel_notify(Notifier *notifier, void *data) g_slice_free(VirtIOSCSICancelNotifier, n); } +typedef struct { + QEMUBH *bh; + BlockBackend *blk; + AioContext *new_context; + QEMUBHFunc *cb; + void *data; +} PullAioContextInfo; + +static void set_aio_context_cb(void *opaque) +{ + PullAioContextInfo *info = opaque; + + aio_context_acquire(info->new_context); + blk_set_aio_context(info->blk, info->new_context); + aio_context_release(info->new_context); + qemu_bh_delete(info->bh); + info->bh = aio_bh_new(info->new_context, info->cb, info); + qemu_bh_schedule(info->bh); +} + +/* Schedule a BH on main thread, and let it handle the context movement. Once + * that is done, we are notified by @cb with a second BH on current thread. + */ +static void pull_aio_context(BlockBackend *blk, AioContext *ctx, + void (*cb)(void *), void *data) +{ + PullAioContextInfo *info = g_new(PullAioContextInfo, 1); + + info->blk = blk; + info->new_context = ctx; + info->cb = cb; + info->data = data; + info->bh = aio_bh_new(qemu_get_aio_context(), set_aio_context_cb, info); + qemu_bh_schedule(info->bh); +} + +static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req); +static void virtio_scsi_tmf_continue(void *opaque) +{ + PullAioContextInfo *info = opaque; + VirtIOSCSIReq *req = info->data; + VirtIOSCSI *s = req->dev; + + qemu_bh_delete(info->bh); + g_free(info); + virtio_scsi_do_tmf(s, req); +} + /* Return 0 if the request is ready to be completed and return to guest; * -EINPROGRESS if the request is submitted and will be completed later, in the * case of async cancellation. */ @@ -255,9 +303,8 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req) int ret = 0; if (s->dataplane_started && blk_get_aio_context(d->conf.blk) != s->ctx) { - aio_context_acquire(s->ctx); - blk_set_aio_context(d->conf.blk, s->ctx); - aio_context_release(s->ctx); + pull_aio_context(d->conf.blk, s->ctx, virtio_scsi_tmf_continue, req); + return -EINPROGRESS; } /* Here VIRTIO_SCSI_S_OK means "FUNCTION COMPLETE". */ req->resp.tmf.response = VIRTIO_SCSI_S_OK; @@ -517,8 +564,38 @@ static void virtio_scsi_fail_cmd_req(VirtIOSCSIReq *req) virtio_scsi_complete_cmd_req(req); } -static bool virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, - VirtIOSCSIReq *req) +static void virtio_scsi_cmd_continue(void *opaque) +{ + PullAioContextInfo *info = opaque; + VirtIOSCSIReq *req = info->data; + VirtIOSCSI *s = req->dev; + + qemu_bh_delete(info->bh); + g_free(info); + if (QTAILQ_FIRST(&s->pending_cmd_reqs)) { + if (req->vring) { + virtio_scsi_handle_cmd_do(s, + (VirtIOSCSIPopFunc)virtio_scsi_pop_req_vring, + req->vring); + } else { + virtio_scsi_handle_cmd_do(s, + (VirtIOSCSIPopFunc)virtio_scsi_pop_req, + req->vq); + } + } +} + +/* + * virtio_scsi_handle_cmd_req_prepare: + * + * Prepare the command request before enqueueing it, or complete it if nothing + * to do. + * + * Returns: 0 if the command is completed. + * -EINPROGRESS if the command should be enqueued to scsi bus. + * -EAGAIN if the command is not ready to be processed. + */ +static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req) { VirtIOSCSICommon *vs = &s->parent_obj; SCSIDevice *d; @@ -532,19 +609,18 @@ static bool virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, } else { virtio_scsi_bad_req(); } - return false; + return 0; } d = virtio_scsi_device_find(s, req->req.cmd.lun); if (!d) { req->resp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET; virtio_scsi_complete_cmd_req(req); - return false; + return 0; } if (s->dataplane_started && blk_get_aio_context(d->conf.blk) != s->ctx) { - aio_context_acquire(s->ctx); - blk_set_aio_context(d->conf.blk, s->ctx); - aio_context_release(s->ctx); + pull_aio_context(d->conf.blk, s->ctx, virtio_scsi_cmd_continue, req); + return -EAGAIN; } req->sreq = scsi_req_new(d, req->req.cmd.tag, virtio_scsi_get_lun(req->req.cmd.lun), @@ -555,11 +631,11 @@ static bool virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, req->sreq->cmd.xfer > req->qsgl.size)) { req->resp.cmd.response = VIRTIO_SCSI_S_OVERRUN; virtio_scsi_complete_cmd_req(req); - return false; + return 0; } scsi_req_ref(req->sreq); blk_io_plug(d->conf.blk); - return true; + return -EINPROGRESS; } static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq *req) @@ -580,12 +656,38 @@ void virtio_scsi_handle_cmd_do(VirtIOSCSI *s, VirtIOSCSIReq *req, *next; QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs); - while ((req = pop_req(s, opaque))) { - if (virtio_scsi_handle_cmd_req_prepare(s, req)) { + while (true) { + int r; + + req = QTAILQ_FIRST(&s->pending_cmd_reqs); + if (req) { + QTAILQ_REMOVE(&s->pending_cmd_reqs, req, next); + } else { + req = pop_req(s, opaque); + } + if (!req) { + break; + } + r = virtio_scsi_handle_cmd_req_prepare(s, req); + switch (r) { + case 0: + break; + case -EINPROGRESS: QTAILQ_INSERT_TAIL(&reqs, req, next); + break; + case -EAGAIN: + /* Only used for aio context switching, impossible for + * non-dataplane code */ + assert(req->vring); + QTAILQ_INSERT_TAIL(&s->pending_cmd_reqs, req, next); + goto out; + break; + default: + abort(); } } +out: QTAILQ_FOREACH_SAFE(req, &reqs, next, next) { virtio_scsi_handle_cmd_req_submit(s, req); } @@ -918,7 +1020,9 @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp) static void virtio_scsi_instance_init(Object *obj) { VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(obj); + VirtIOSCSI *s = VIRTIO_SCSI(obj); + QTAILQ_INIT(&s->pending_cmd_reqs); object_property_add_link(obj, "iothread", TYPE_IOTHREAD, (Object **)&vs->conf.iothread, qdev_prop_allow_set_link_before_realize, diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index cba82ea..bc0fead 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -185,6 +185,7 @@ typedef struct VirtIOSCSI { /* Fields for dataplane below */ AioContext *ctx; /* one iothread per virtio-scsi-pci for now */ + QTAILQ_HEAD(, VirtIOSCSIReq) pending_cmd_reqs; /* Vring is used instead of vq in dataplane code, because of the underlying * memory layer thread safety */ @@ -218,7 +219,7 @@ typedef struct VirtIOSCSIReq { VirtIOSCSIVring *vring; union { - /* Used for two-stage request submission */ + /* Used for two-stage request submission or suspension */ QTAILQ_ENTRY(VirtIOSCSIReq) next; /* Used for cancellation of request during TMFs */ -- 1.9.3