[Qemu-devel] [PATCH 3/3] virtio-scsi-dataplane: Use main thread BH to set BDS' aio context

Fam Zheng Wed, 11 Feb 2015 21:24:14 -0800

Before processing a request, virtio-scsi dataplane will check if the
backend runs on the same context with it. If not, it has to be moved,
with bdrv_set_aio_context.


However this function is unsafe to be called from IOThread outside BQL.
The reason is that it calls bdrv_drain_all(), to acquire and drain all
existing BDS. Therefore there is a deadlock problem.

Fix it by offloading the bdrv_set_aio_context to main loop thread,
through a BH (#1). This main loop BH will set the context, then notify
the calling thread with another BH (#2). In BH (#2), we can continue the
virtio-scsi request processing.

A queue is added to VirtIOSCSI for tracking the pending requests, so in
virtio_scsi_dataplane_stop, we have to drain them for migration.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 hw/scsi/virtio-scsi-dataplane.c |  11 ++++
 hw/scsi/virtio-scsi.c           | 132 +++++++++++++++++++++++++++++++++++-----
 include/hw/virtio/virtio-scsi.h |   3 +-
 3 files changed, 131 insertions(+), 15 deletions(-)

diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 4a9b9bd..3bc7e82 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -283,6 +283,17 @@ void virtio_scsi_dataplane_stop(VirtIOSCSI *s)
         aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier, NULL);
     }
 
+    while (true) {
+        if (QTAILQ_EMPTY(&s->pending_cmd_reqs)) {
+            break;
+        }
+        aio_context_release(s->ctx);
+        if (!aio_poll(qemu_get_aio_context(), false)) {
+            usleep(500000);
+        }
+        aio_context_acquire(s->ctx);
+    }
+
     blk_drain_all(); /* ensure there are no in-flight requests */
 
     aio_context_release(s->ctx);
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index ae8b79a..a842862 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -243,6 +243,54 @@ static void virtio_scsi_cancel_notify(Notifier *notifier, 
void *data)
     g_slice_free(VirtIOSCSICancelNotifier, n);
 }
 
+typedef struct {
+    QEMUBH *bh;
+    BlockBackend *blk;
+    AioContext *new_context;
+    QEMUBHFunc *cb;
+    void *data;
+} PullAioContextInfo;
+
+static void set_aio_context_cb(void *opaque)
+{
+    PullAioContextInfo *info = opaque;
+
+    aio_context_acquire(info->new_context);
+    blk_set_aio_context(info->blk, info->new_context);
+    aio_context_release(info->new_context);
+    qemu_bh_delete(info->bh);
+    info->bh = aio_bh_new(info->new_context, info->cb, info);
+    qemu_bh_schedule(info->bh);
+}
+
+/* Schedule a BH on main thread, and let it handle the context movement.  Once
+ * that is done, we are notified by @cb with a second BH on current thread.
+ */
+static void pull_aio_context(BlockBackend *blk, AioContext *ctx,
+                             void (*cb)(void *), void *data)
+{
+    PullAioContextInfo *info = g_new(PullAioContextInfo, 1);
+
+    info->blk = blk;
+    info->new_context = ctx;
+    info->cb = cb;
+    info->data = data;
+    info->bh = aio_bh_new(qemu_get_aio_context(), set_aio_context_cb, info);
+    qemu_bh_schedule(info->bh);
+}
+
+static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req);
+static void virtio_scsi_tmf_continue(void *opaque)
+{
+    PullAioContextInfo *info = opaque;
+    VirtIOSCSIReq *req = info->data;
+    VirtIOSCSI *s = req->dev;
+
+    qemu_bh_delete(info->bh);
+    g_free(info);
+    virtio_scsi_do_tmf(s, req);
+}
+
 /* Return 0 if the request is ready to be completed and return to guest;
  * -EINPROGRESS if the request is submitted and will be completed later, in the
  *  case of async cancellation. */
@@ -255,9 +303,8 @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq 
*req)
     int ret = 0;
 
     if (s->dataplane_started && blk_get_aio_context(d->conf.blk) != s->ctx) {
-        aio_context_acquire(s->ctx);
-        blk_set_aio_context(d->conf.blk, s->ctx);
-        aio_context_release(s->ctx);
+        pull_aio_context(d->conf.blk, s->ctx, virtio_scsi_tmf_continue, req);
+        return -EINPROGRESS;
     }
     /* Here VIRTIO_SCSI_S_OK means "FUNCTION COMPLETE".  */
     req->resp.tmf.response = VIRTIO_SCSI_S_OK;
@@ -517,8 +564,38 @@ static void virtio_scsi_fail_cmd_req(VirtIOSCSIReq *req)
     virtio_scsi_complete_cmd_req(req);
 }
 
-static bool virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s,
-                                               VirtIOSCSIReq *req)
+static void virtio_scsi_cmd_continue(void *opaque)
+{
+    PullAioContextInfo *info = opaque;
+    VirtIOSCSIReq *req = info->data;
+    VirtIOSCSI *s = req->dev;
+
+    qemu_bh_delete(info->bh);
+    g_free(info);
+    if (QTAILQ_FIRST(&s->pending_cmd_reqs)) {
+        if (req->vring) {
+            virtio_scsi_handle_cmd_do(s,
+                    (VirtIOSCSIPopFunc)virtio_scsi_pop_req_vring,
+                    req->vring);
+        } else {
+            virtio_scsi_handle_cmd_do(s,
+                    (VirtIOSCSIPopFunc)virtio_scsi_pop_req,
+                    req->vq);
+        }
+    }
+}
+
+/*
+ * virtio_scsi_handle_cmd_req_prepare:
+ *
+ * Prepare the command request before enqueueing it, or complete it if nothing
+ * to do.
+ *
+ * Returns: 0 if the command is completed.
+ *          -EINPROGRESS if the command should be enqueued to scsi bus.
+ *          -EAGAIN if the command is not ready to be processed.
+ */
+static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq 
*req)
 {
     VirtIOSCSICommon *vs = &s->parent_obj;
     SCSIDevice *d;
@@ -532,19 +609,18 @@ static bool virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI 
*s,
         } else {
             virtio_scsi_bad_req();
         }
-        return false;
+        return 0;
     }
 
     d = virtio_scsi_device_find(s, req->req.cmd.lun);
     if (!d) {
         req->resp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET;
         virtio_scsi_complete_cmd_req(req);
-        return false;
+        return 0;
     }
     if (s->dataplane_started && blk_get_aio_context(d->conf.blk) != s->ctx) {
-        aio_context_acquire(s->ctx);
-        blk_set_aio_context(d->conf.blk, s->ctx);
-        aio_context_release(s->ctx);
+        pull_aio_context(d->conf.blk, s->ctx, virtio_scsi_cmd_continue, req);
+        return -EAGAIN;
     }
     req->sreq = scsi_req_new(d, req->req.cmd.tag,
                              virtio_scsi_get_lun(req->req.cmd.lun),
@@ -555,11 +631,11 @@ static bool virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI 
*s,
             req->sreq->cmd.xfer > req->qsgl.size)) {
         req->resp.cmd.response = VIRTIO_SCSI_S_OVERRUN;
         virtio_scsi_complete_cmd_req(req);
-        return false;
+        return 0;
     }
     scsi_req_ref(req->sreq);
     blk_io_plug(d->conf.blk);
-    return true;
+    return -EINPROGRESS;
 }
 
 static void virtio_scsi_handle_cmd_req_submit(VirtIOSCSI *s, VirtIOSCSIReq 
*req)
@@ -580,12 +656,38 @@ void virtio_scsi_handle_cmd_do(VirtIOSCSI *s,
     VirtIOSCSIReq *req, *next;
     QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
 
-    while ((req = pop_req(s, opaque))) {
-        if (virtio_scsi_handle_cmd_req_prepare(s, req)) {
+    while (true) {
+        int r;
+
+        req = QTAILQ_FIRST(&s->pending_cmd_reqs);
+        if (req) {
+            QTAILQ_REMOVE(&s->pending_cmd_reqs, req, next);
+        } else {
+            req = pop_req(s, opaque);
+        }
+        if (!req) {
+            break;
+        }
+        r = virtio_scsi_handle_cmd_req_prepare(s, req);
+        switch (r) {
+        case 0:
+            break;
+        case -EINPROGRESS:
             QTAILQ_INSERT_TAIL(&reqs, req, next);
+            break;
+        case -EAGAIN:
+            /* Only used for aio context switching, impossible for
+             * non-dataplane code */
+            assert(req->vring);
+            QTAILQ_INSERT_TAIL(&s->pending_cmd_reqs, req, next);
+            goto out;
+            break;
+        default:
+            abort();
         }
     }
 
+out:
     QTAILQ_FOREACH_SAFE(req, &reqs, next, next) {
         virtio_scsi_handle_cmd_req_submit(s, req);
     }
@@ -918,7 +1020,9 @@ static void virtio_scsi_device_realize(DeviceState *dev, 
Error **errp)
 static void virtio_scsi_instance_init(Object *obj)
 {
     VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(obj);
+    VirtIOSCSI *s = VIRTIO_SCSI(obj);
 
+    QTAILQ_INIT(&s->pending_cmd_reqs);
     object_property_add_link(obj, "iothread", TYPE_IOTHREAD,
                              (Object **)&vs->conf.iothread,
                              qdev_prop_allow_set_link_before_realize,
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
index cba82ea..bc0fead 100644
--- a/include/hw/virtio/virtio-scsi.h
+++ b/include/hw/virtio/virtio-scsi.h
@@ -185,6 +185,7 @@ typedef struct VirtIOSCSI {
 
     /* Fields for dataplane below */
     AioContext *ctx; /* one iothread per virtio-scsi-pci for now */
+    QTAILQ_HEAD(, VirtIOSCSIReq) pending_cmd_reqs;
 
     /* Vring is used instead of vq in dataplane code, because of the underlying
      * memory layer thread safety */
@@ -218,7 +219,7 @@ typedef struct VirtIOSCSIReq {
     VirtIOSCSIVring *vring;
 
     union {
-        /* Used for two-stage request submission */
+        /* Used for two-stage request submission or suspension */
         QTAILQ_ENTRY(VirtIOSCSIReq) next;
 
         /* Used for cancellation of request during TMFs */
-- 
1.9.3

[Qemu-devel] [PATCH 3/3] virtio-scsi-dataplane: Use main thread BH to set BDS' aio context

Reply via email to