Marcelo Tosatti wrote:
> Use asynchronous IO in the virtio block QEMU driver.
>
> virtio_blk_handle_output should not block for long periods, since it
> holds the mutex lock prohibiting other vcpu's from doing IO to QEMU
> devices. Without AIO write intensive benchmarks make guests hang for
> several seconds. Write performance also increases significantly.
>
> Also report errors properly.
>
> To take full advantage of parallel IO we need to allow for more than AIO
> thread per-fd, or use direct IO (-nocache) which uses kernel AIO.
>
> Separate patch allows virtio-block guest driver to queue more than one
> element in the virtio ring.
>
> Anthony, this patch abuses the virtqueue_push() interface by passing a
> VirtQueueElement with only "index" member valid, since we know this is
> all it uses. Doing so avoids allocation, zeroing and copy of an entire
> VirtQueueElement structure. What do you say?
>   

I've got a virtio patch series that changes the virtqueue_pop() 
interface to return a pointer to a VirtQueueElement which I believe 
addresses your use-case.

I can just fold your patch into my series (I'll be sending it out this 
afternoon).

Regards,

Anthony Liguori

> Index: kvm-userspace.io/qemu/hw/virtio-blk.c
> ===================================================================
> --- kvm-userspace.io.orig/qemu/hw/virtio-blk.c
> +++ kvm-userspace.io/qemu/hw/virtio-blk.c
> @@ -77,53 +77,100 @@ static VirtIOBlock *to_virtio_blk(VirtIO
>      return (VirtIOBlock *)vdev;
>  }
>
> +typedef struct VirtIOBlockReq
> +{
> +    VirtIODevice *vdev;
> +    VirtQueue *vq;
> +    struct iovec in_sg_status;
> +    unsigned int pending;
> +    unsigned int len;
> +    unsigned int elem_idx;
> +    int status;
> +} VirtIOBlockReq;
> +
> +static void virtio_blk_rw_complete(void *opaque, int ret)
> +{
> +    VirtIOBlockReq *req = opaque;
> +    struct virtio_blk_inhdr *in;
> +    VirtQueueElement elem;
> +
> +    req->status |= ret;
> +    if (--req->pending > 0)
> +        return;
> +
> +    elem.index = req->elem_idx;
> +    in = (void *)req->in_sg_status.iov_base;
> +
> +    in->status = req->status ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK;
> +    virtqueue_push(req->vq, &elem, req->len);
> +    virtio_notify(req->vdev, req->vq);
> +    qemu_free(req);
> +}
> +
>  static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>  {
>      VirtIOBlock *s = to_virtio_blk(vdev);
>      VirtQueueElement elem;
> +    VirtIOBlockReq *req;
>      unsigned int count;
>
>      while ((count = virtqueue_pop(vq, &elem)) != 0) {
>       struct virtio_blk_inhdr *in;
>       struct virtio_blk_outhdr *out;
> -     unsigned int wlen;
>       off_t off;
>       int i;
>
> +     /*
> +      * FIXME: limit the number of in-flight requests
> +      */
> +     req = qemu_malloc(sizeof(VirtIOBlockReq));
> +     if (!req)
> +     return;
> +     memset(req, 0, sizeof(*req));
> +     memcpy(&req->in_sg_status, &elem.in_sg[elem.in_num - 1],
> +            sizeof(req->in_sg_status));
> +     req->vdev = vdev;
> +     req->vq = vq;
> +     req->elem_idx = elem.index;
> +
>       out = (void *)elem.out_sg[0].iov_base;
>       in = (void *)elem.in_sg[elem.in_num - 1].iov_base;
>       off = out->sector;
>
>       if (out->type & VIRTIO_BLK_T_SCSI_CMD) {
> -         wlen = sizeof(*in);
> +         unsigned int len = sizeof(*in);
> +
>           in->status = VIRTIO_BLK_S_UNSUPP;
> +         virtqueue_push(vq, &elem, len);
> +         virtio_notify(vdev, vq);
> +         qemu_free(req);
> +
>       } else if (out->type & VIRTIO_BLK_T_OUT) {
> -         wlen = sizeof(*in);
> +         req->pending = elem.out_num - 1;
>
>           for (i = 1; i < elem.out_num; i++) {
> -             bdrv_write(s->bs, off,
> +             bdrv_aio_write(s->bs, off,
>                          elem.out_sg[i].iov_base,
> -                        elem.out_sg[i].iov_len / 512);
> +                        elem.out_sg[i].iov_len / 512,
> +                        virtio_blk_rw_complete,
> +                        req);
>               off += elem.out_sg[i].iov_len / 512;
> +             req->len += elem.out_sg[i].iov_len;
>           }
>
> -         in->status = VIRTIO_BLK_S_OK;
>       } else {
> -         wlen = sizeof(*in);
> +         req->pending = elem.in_num - 1;
>
>           for (i = 0; i < elem.in_num - 1; i++) {
> -             bdrv_read(s->bs, off,
> +             bdrv_aio_read(s->bs, off,
>                         elem.in_sg[i].iov_base,
> -                       elem.in_sg[i].iov_len / 512);
> +                       elem.in_sg[i].iov_len / 512,
> +                       virtio_blk_rw_complete,
> +                       req);
>               off += elem.in_sg[i].iov_len / 512;
> -             wlen += elem.in_sg[i].iov_len;
> +             req->len += elem.in_sg[i].iov_len;
>           }
> -
> -         in->status = VIRTIO_BLK_S_OK;
>       }
> -
> -     virtqueue_push(vq, &elem, wlen);
> -     virtio_notify(vdev, vq);
>      }
>  }
>
>   


-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to