Re: [RFC v3 1/8] blkio: add io_uring block driver using libblkio

2022-08-11 Thread Stefan Hajnoczi
On Wed, Jul 27, 2022 at 09:33:40PM +0200, Kevin Wolf wrote:
> Am 08.07.2022 um 06:17 hat Stefan Hajnoczi geschrieben:
> > libblkio (https://gitlab.com/libblkio/libblkio/) is a library for
> > high-performance disk I/O. It currently supports io_uring and
> > virtio-blk-vhost-vdpa with additional drivers under development.
> > 
> > One of the reasons for developing libblkio is that other applications
> > besides QEMU can use it. This will be particularly useful for
> > vhost-user-blk which applications may wish to use for connecting to
> > qemu-storage-daemon.
> > 
> > libblkio also gives us an opportunity to develop in Rust behind a C API
> > that is easy to consume from QEMU.
> > 
> > This commit adds io_uring and virtio-blk-vhost-vdpa BlockDrivers to QEMU
> > using libblkio. It will be easy to add other libblkio drivers since they
> > will share the majority of code.
> > 
> > For now I/O buffers are copied through bounce buffers if the libblkio
> > driver requires it. Later commits add an optimization for
> > pre-registering guest RAM to avoid bounce buffers.
> > 
> > The syntax is:
> > 
> >   --blockdev 
> > io_uring,node-name=drive0,filename=test.img,readonly=on|off,cache.direct=on|off
> > 
> > and:
> > 
> >   --blockdev 
> > virtio-blk-vhost-vdpa,node-name=drive0,path=/dev/vdpa...,readonly=on|off
> > 
> > Signed-off-by: Stefan Hajnoczi 
> 
> The subject line implies only io_uring, but you actually add vhost-vdpa
> support, too. I think the subject line should be changed.
> 
> I think it would also make sense to already implement support for
> vhost-user-blk on the QEMU side even if support isn't compiled in
> libblkio by default and opening vhost-user-blk images would therefore
> always fail with a default build.
> 
> But then you could run QEMU with a custom build of libblkio to make use
> of it without patching QEMU. This is probably useful for getting libvirt
> support for using a storage daemon implemented without having to wait
> for another QEMU release. (Peter, do you have any opinion on this?)

vhost-user-blk is now supported in all builds of libblkio. I'll add it.

Stefan


signature.asc
Description: PGP signature


Re: [RFC v3 1/8] blkio: add io_uring block driver using libblkio

2022-08-11 Thread Stefan Hajnoczi
On Wed, Jul 13, 2022 at 02:05:18PM +0200, Hanna Reitz wrote:
> On 08.07.22 06:17, Stefan Hajnoczi wrote:
> > libblkio (https://gitlab.com/libblkio/libblkio/) is a library for
> > high-performance disk I/O. It currently supports io_uring and
> > virtio-blk-vhost-vdpa with additional drivers under development.
> > 
> > One of the reasons for developing libblkio is that other applications
> > besides QEMU can use it. This will be particularly useful for
> > vhost-user-blk which applications may wish to use for connecting to
> > qemu-storage-daemon.
> > 
> > libblkio also gives us an opportunity to develop in Rust behind a C API
> > that is easy to consume from QEMU.
> > 
> > This commit adds io_uring and virtio-blk-vhost-vdpa BlockDrivers to QEMU
> > using libblkio. It will be easy to add other libblkio drivers since they
> > will share the majority of code.
> > 
> > For now I/O buffers are copied through bounce buffers if the libblkio
> > driver requires it. Later commits add an optimization for
> > pre-registering guest RAM to avoid bounce buffers.
> > 
> > The syntax is:
> > 
> >--blockdev 
> > io_uring,node-name=drive0,filename=test.img,readonly=on|off,cache.direct=on|off
> > 
> > and:
> > 
> >--blockdev 
> > virtio-blk-vhost-vdpa,node-name=drive0,path=/dev/vdpa...,readonly=on|off
> > 
> > Signed-off-by: Stefan Hajnoczi 
> > ---
> >   MAINTAINERS   |   6 +
> >   meson_options.txt |   2 +
> >   qapi/block-core.json  |  37 +-
> >   meson.build   |   9 +
> >   block/blkio.c | 659 ++
> >   tests/qtest/modules-test.c|   3 +
> >   block/meson.build |   1 +
> >   scripts/meson-buildoptions.sh |   3 +
> >   8 files changed, 718 insertions(+), 2 deletions(-)
> >   create mode 100644 block/blkio.c
> 
> [...]
> 
> > diff --git a/block/blkio.c b/block/blkio.c
> > new file mode 100644
> > index 00..7fbdbd7fae
> > --- /dev/null
> > +++ b/block/blkio.c
> > @@ -0,0 +1,659 @@
> 
> Not sure whether it’s necessary, but I would have expected a copyright
> header here.

Thanks for reminding me, I will add a header.

> 
> > +#include "qemu/osdep.h"
> > +#include 
> > +#include "block/block_int.h"
> > +#include "qapi/error.h"
> > +#include "qapi/qmp/qdict.h"
> > +#include "qemu/module.h"
> > +
> > +typedef struct BlkAIOCB {
> > +BlockAIOCB common;
> > +struct blkio_mem_region mem_region;
> > +QEMUIOVector qiov;
> > +struct iovec bounce_iov;
> > +} BlkioAIOCB;
> > +
> > +typedef struct {
> > +/* Protects ->blkio and request submission on ->blkioq */
> > +QemuMutex lock;
> > +
> > +struct blkio *blkio;
> > +struct blkioq *blkioq; /* this could be multi-queue in the future */
> > +int completion_fd;
> > +
> > +/* Polling fetches the next completion into this field */
> > +struct blkio_completion poll_completion;
> > +
> > +/* The value of the "mem-region-alignment" property */
> > +size_t mem_region_alignment;
> > +
> > +/* Can we skip adding/deleting blkio_mem_regions? */
> > +bool needs_mem_regions;
> > +} BDRVBlkioState;
> > +
> > +static void blkio_aiocb_complete(BlkioAIOCB *acb, int ret)
> > +{
> > +/* Copy bounce buffer back to qiov */
> > +if (acb->qiov.niov > 0) {
> > +qemu_iovec_from_buf(>qiov, 0,
> > +acb->bounce_iov.iov_base,
> > +acb->bounce_iov.iov_len);
> > +qemu_iovec_destroy(>qiov);
> > +}
> > +
> > +acb->common.cb(acb->common.opaque, ret);
> > +
> > +if (acb->mem_region.len > 0) {
> > +BDRVBlkioState *s = acb->common.bs->opaque;
> > +
> > +WITH_QEMU_LOCK_GUARD(>lock) {
> > +blkio_free_mem_region(s->blkio, >mem_region);
> > +}
> > +}
> > +
> > +qemu_aio_unref(>common);
> > +}
> > +
> > +/*
> > + * Only the thread that calls aio_poll() invokes fd and poll handlers.
> > + * Therefore locks are not necessary except when accessing s->blkio.
> > + *
> > + * No locking is performed around blkioq_get_completions() although other
> > + * threads may submit I/O requests on s->blkioq. We're assuming there is no
> > + * inteference between blkioq_get_completions() and other s->blkioq APIs.
> > + */
> > +
> > +static void blkio_completion_fd_read(void *opaque)
> > +{
> > +BlockDriverState *bs = opaque;
> > +BDRVBlkioState *s = bs->opaque;
> > +struct blkio_completion completion;
> > +uint64_t val;
> > +ssize_t ret __attribute__((unused));
> 
> I’d prefer a `(void)ret;` over this attribute, not least because that line
> would give a nice opportunity to explain in a short comment why we ignore
> this return value that the compiler tells us not to ignore, but if you
> don’t, then this’ll be fine.

Okay, I'll use (void)ret; and add a comment.

> 
> > +
> > +/* Polling may have already fetched a completion */
> > +if (s->poll_completion.user_data != NULL) {
> > +completion = s->poll_completion;
> 

Re: [RFC v3 1/8] blkio: add io_uring block driver using libblkio

2022-08-11 Thread Stefan Hajnoczi
On Tue, Jul 12, 2022 at 04:23:32PM +0200, Stefano Garzarella wrote:
> On Fri, Jul 08, 2022 at 05:17:30AM +0100, Stefan Hajnoczi wrote:
> > libblkio (https://gitlab.com/libblkio/libblkio/) is a library for
> > high-performance disk I/O. It currently supports io_uring and
> > virtio-blk-vhost-vdpa with additional drivers under development.
> > 
> > One of the reasons for developing libblkio is that other applications
> > besides QEMU can use it. This will be particularly useful for
> > vhost-user-blk which applications may wish to use for connecting to
> > qemu-storage-daemon.
> > 
> > libblkio also gives us an opportunity to develop in Rust behind a C API
> > that is easy to consume from QEMU.
> > 
> > This commit adds io_uring and virtio-blk-vhost-vdpa BlockDrivers to QEMU
> > using libblkio. It will be easy to add other libblkio drivers since they
> > will share the majority of code.
> > 
> > For now I/O buffers are copied through bounce buffers if the libblkio
> > driver requires it. Later commits add an optimization for
> > pre-registering guest RAM to avoid bounce buffers.
> > 
> > The syntax is:
> > 
> >  --blockdev 
> > io_uring,node-name=drive0,filename=test.img,readonly=on|off,cache.direct=on|off
> > 
> > and:
> > 
> >  --blockdev 
> > virtio-blk-vhost-vdpa,node-name=drive0,path=/dev/vdpa...,readonly=on|off
> > 
> > Signed-off-by: Stefan Hajnoczi 
> > ---
> > MAINTAINERS   |   6 +
> > meson_options.txt |   2 +
> > qapi/block-core.json  |  37 +-
> > meson.build   |   9 +
> > block/blkio.c | 659 ++
> > tests/qtest/modules-test.c|   3 +
> > block/meson.build |   1 +
> > scripts/meson-buildoptions.sh |   3 +
> > 8 files changed, 718 insertions(+), 2 deletions(-)
> > create mode 100644 block/blkio.c
> > 
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 450abd0252..50f340d9ee 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -3395,6 +3395,12 @@ L: qemu-block@nongnu.org
> > S: Maintained
> > F: block/vdi.c
> > 
> > +blkio
> > +M: Stefan Hajnoczi 
> > +L: qemu-block@nongnu.org
> > +S: Maintained
> > +F: block/blkio.c
> > +
> > iSCSI
> > M: Ronnie Sahlberg 
> > M: Paolo Bonzini 
> > diff --git a/meson_options.txt b/meson_options.txt
> > index 97c38109b1..b0b2e0c9b5 100644
> > --- a/meson_options.txt
> > +++ b/meson_options.txt
> > @@ -117,6 +117,8 @@ option('bzip2', type : 'feature', value : 'auto',
> >description: 'bzip2 support for DMG images')
> > option('cap_ng', type : 'feature', value : 'auto',
> >description: 'cap_ng support')
> > +option('blkio', type : 'feature', value : 'auto',
> > +   description: 'libblkio block device driver')
> > option('bpf', type : 'feature', value : 'auto',
> > description: 'eBPF support')
> > option('cocoa', type : 'feature', value : 'auto',
> > diff --git a/qapi/block-core.json b/qapi/block-core.json
> > index 2173e7734a..aa63d5e9bd 100644
> > --- a/qapi/block-core.json
> > +++ b/qapi/block-core.json
> > @@ -2951,11 +2951,15 @@
> > 'file', 'snapshot-access', 'ftp', 'ftps', 'gluster',
> > {'name': 'host_cdrom', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
> > {'name': 'host_device', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
> > -'http', 'https', 'iscsi',
> > +'http', 'https',
> > +{ 'name': 'io_uring', 'if': 'CONFIG_BLKIO' },
> > +'iscsi',
> > 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 
> > 'parallels',
> > 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
> > { 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
> > -'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
> > +'ssh', 'throttle', 'vdi', 'vhdx',
> > +{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
> > +'vmdk', 'vpc', 'vvfat' ] }
> > 
> > ##
> > # @BlockdevOptionsFile:
> > @@ -3678,6 +3682,30 @@
> > '*debug': 'int',
> > '*logfile': 'str' } }
> > 
> > +##
> > +# @BlockdevOptionsIoUring:
> > +#
> > +# Driver specific block device options for the io_uring backend.
> > +#
> > +# @filename: path to the image file
> > +#
> > +# Since: 7.1
> > +##
> > +{ 'struct': 'BlockdevOptionsIoUring',
> > +  'data': { 'filename': 'str' } }
> > +
> > +##
> > +# @BlockdevOptionsVirtioBlkVhostVdpa:
> > +#
> > +# Driver specific block device options for the virtio-blk-vhost-vdpa 
> > backend.
> > +#
> > +# @path: path to the vhost-vdpa character device.
> > +#
> > +# Since: 7.1
> > +##
> > +{ 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa',
> > +  'data': { 'path': 'str' } }
> > +
> > ##
> > # @IscsiTransport:
> > #
> > @@ -4305,6 +4333,8 @@
> >'if': 'HAVE_HOST_BLOCK_DEVICE' },
> >   'http':   'BlockdevOptionsCurlHttp',
> >   'https':  'BlockdevOptionsCurlHttps',
> > +  'io_uring':   { 'type': 'BlockdevOptionsIoUring',
> > +   

Re: [RFC v3 1/8] blkio: add io_uring block driver using libblkio

2022-08-03 Thread Kevin Wolf
Am 03.08.2022 um 14:25 hat Peter Krempa geschrieben:
> On Wed, Jul 27, 2022 at 21:33:40 +0200, Kevin Wolf wrote:
> > Am 08.07.2022 um 06:17 hat Stefan Hajnoczi geschrieben:
> > > libblkio (https://gitlab.com/libblkio/libblkio/) is a library for
> > > high-performance disk I/O. It currently supports io_uring and
> > > virtio-blk-vhost-vdpa with additional drivers under development.
> > > 
> > > One of the reasons for developing libblkio is that other applications
> > > besides QEMU can use it. This will be particularly useful for
> > > vhost-user-blk which applications may wish to use for connecting to
> > > qemu-storage-daemon.
> > > 
> > > libblkio also gives us an opportunity to develop in Rust behind a C API
> > > that is easy to consume from QEMU.
> > > 
> > > This commit adds io_uring and virtio-blk-vhost-vdpa BlockDrivers to QEMU
> > > using libblkio. It will be easy to add other libblkio drivers since they
> > > will share the majority of code.
> > > 
> > > For now I/O buffers are copied through bounce buffers if the libblkio
> > > driver requires it. Later commits add an optimization for
> > > pre-registering guest RAM to avoid bounce buffers.
> > > 
> > > The syntax is:
> > > 
> > >   --blockdev 
> > > io_uring,node-name=drive0,filename=test.img,readonly=on|off,cache.direct=on|off
> > > 
> > > and:
> > > 
> > >   --blockdev 
> > > virtio-blk-vhost-vdpa,node-name=drive0,path=/dev/vdpa...,readonly=on|off
> > > 
> > > Signed-off-by: Stefan Hajnoczi 
> > 
> > The subject line implies only io_uring, but you actually add vhost-vdpa
> > support, too. I think the subject line should be changed.
> > 
> > I think it would also make sense to already implement support for
> > vhost-user-blk on the QEMU side even if support isn't compiled in
> > libblkio by default and opening vhost-user-blk images would therefore
> > always fail with a default build.
> > 
> > But then you could run QEMU with a custom build of libblkio to make use
> > of it without patching QEMU. This is probably useful for getting libvirt
> > support for using a storage daemon implemented without having to wait
> > for another QEMU release. (Peter, do you have any opinion on this?)
> 
> How will this work in terms of detecting whether that feature is
> present?
> 
> The issue is that libvirt caches capabilities of qemu and the cache is
> invalidated based on the timestamp of the qemu binary (and few other
> mostly host kernel and cpu properties). In case when a backend library
> is updated/changed this probably means that libvirt will not be able to
> detect that qemu gained support.

How is this done with other libraries? We use a few more storage
libraries and depending on their version, we may or may not be able to
provide some feature. I assume we always just ignored this and if you
don't have the right version, you get runtime errors.

> In case when qemu lies about the support even if the backend library
> doesn't suport it then we have a problem in not being even able to see
> whether we can use it.

I'm not sure if I would call it "lying", it's just that we have a static
QAPI schema that can only represent what the QEMU binary could
theoretically handle, but not dynamically what is actually available at
runtime.

Another option would be to either add an API to libblkio that returns a
list of supported drivers or probe it with a pair of blkio_create() and
blkio_destroy() before registering the QEMU drivers. QEMU and qemu-img
can print a list of registered read-write and read-only block drivers
and I think libvirt has been using that?

Of course, it doesn't change anything about the fact that this list
can change between two QEMU runs if you replace the library, but don't
touch QEMU.

Kevin




Re: [RFC v3 1/8] blkio: add io_uring block driver using libblkio

2022-08-03 Thread Peter Krempa
On Wed, Jul 27, 2022 at 21:33:40 +0200, Kevin Wolf wrote:
> Am 08.07.2022 um 06:17 hat Stefan Hajnoczi geschrieben:
> > libblkio (https://gitlab.com/libblkio/libblkio/) is a library for
> > high-performance disk I/O. It currently supports io_uring and
> > virtio-blk-vhost-vdpa with additional drivers under development.
> > 
> > One of the reasons for developing libblkio is that other applications
> > besides QEMU can use it. This will be particularly useful for
> > vhost-user-blk which applications may wish to use for connecting to
> > qemu-storage-daemon.
> > 
> > libblkio also gives us an opportunity to develop in Rust behind a C API
> > that is easy to consume from QEMU.
> > 
> > This commit adds io_uring and virtio-blk-vhost-vdpa BlockDrivers to QEMU
> > using libblkio. It will be easy to add other libblkio drivers since they
> > will share the majority of code.
> > 
> > For now I/O buffers are copied through bounce buffers if the libblkio
> > driver requires it. Later commits add an optimization for
> > pre-registering guest RAM to avoid bounce buffers.
> > 
> > The syntax is:
> > 
> >   --blockdev 
> > io_uring,node-name=drive0,filename=test.img,readonly=on|off,cache.direct=on|off
> > 
> > and:
> > 
> >   --blockdev 
> > virtio-blk-vhost-vdpa,node-name=drive0,path=/dev/vdpa...,readonly=on|off
> > 
> > Signed-off-by: Stefan Hajnoczi 
> 
> The subject line implies only io_uring, but you actually add vhost-vdpa
> support, too. I think the subject line should be changed.
> 
> I think it would also make sense to already implement support for
> vhost-user-blk on the QEMU side even if support isn't compiled in
> libblkio by default and opening vhost-user-blk images would therefore
> always fail with a default build.
> 
> But then you could run QEMU with a custom build of libblkio to make use
> of it without patching QEMU. This is probably useful for getting libvirt
> support for using a storage daemon implemented without having to wait
> for another QEMU release. (Peter, do you have any opinion on this?)

How will this work in terms of detecting whether that feature is
present?

The issue is that libvirt caches capabilities of qemu and the cache is
invalidated based on the timestamp of the qemu binary (and few other
mostly host kernel and cpu properties). In case when a backend library
is updated/changed this probably means that libvirt will not be able to
detect that qemu gained support.

In case when qemu lies about the support even if the backend library
doesn't suport it then we have a problem in not being even able to see
whether we can use it.




Re: [RFC v3 1/8] blkio: add io_uring block driver using libblkio

2022-07-27 Thread Kevin Wolf
Am 08.07.2022 um 06:17 hat Stefan Hajnoczi geschrieben:
> libblkio (https://gitlab.com/libblkio/libblkio/) is a library for
> high-performance disk I/O. It currently supports io_uring and
> virtio-blk-vhost-vdpa with additional drivers under development.
> 
> One of the reasons for developing libblkio is that other applications
> besides QEMU can use it. This will be particularly useful for
> vhost-user-blk which applications may wish to use for connecting to
> qemu-storage-daemon.
> 
> libblkio also gives us an opportunity to develop in Rust behind a C API
> that is easy to consume from QEMU.
> 
> This commit adds io_uring and virtio-blk-vhost-vdpa BlockDrivers to QEMU
> using libblkio. It will be easy to add other libblkio drivers since they
> will share the majority of code.
> 
> For now I/O buffers are copied through bounce buffers if the libblkio
> driver requires it. Later commits add an optimization for
> pre-registering guest RAM to avoid bounce buffers.
> 
> The syntax is:
> 
>   --blockdev 
> io_uring,node-name=drive0,filename=test.img,readonly=on|off,cache.direct=on|off
> 
> and:
> 
>   --blockdev 
> virtio-blk-vhost-vdpa,node-name=drive0,path=/dev/vdpa...,readonly=on|off
> 
> Signed-off-by: Stefan Hajnoczi 

The subject line implies only io_uring, but you actually add vhost-vdpa
support, too. I think the subject line should be changed.

I think it would also make sense to already implement support for
vhost-user-blk on the QEMU side even if support isn't compiled in
libblkio by default and opening vhost-user-blk images would therefore
always fail with a default build.

But then you could run QEMU with a custom build of libblkio to make use
of it without patching QEMU. This is probably useful for getting libvirt
support for using a storage daemon implemented without having to wait
for another QEMU release. (Peter, do you have any opinion on this?)

Kevin




Re: [RFC v3 1/8] blkio: add io_uring block driver using libblkio

2022-07-13 Thread Hanna Reitz

On 08.07.22 06:17, Stefan Hajnoczi wrote:

libblkio (https://gitlab.com/libblkio/libblkio/) is a library for
high-performance disk I/O. It currently supports io_uring and
virtio-blk-vhost-vdpa with additional drivers under development.

One of the reasons for developing libblkio is that other applications
besides QEMU can use it. This will be particularly useful for
vhost-user-blk which applications may wish to use for connecting to
qemu-storage-daemon.

libblkio also gives us an opportunity to develop in Rust behind a C API
that is easy to consume from QEMU.

This commit adds io_uring and virtio-blk-vhost-vdpa BlockDrivers to QEMU
using libblkio. It will be easy to add other libblkio drivers since they
will share the majority of code.

For now I/O buffers are copied through bounce buffers if the libblkio
driver requires it. Later commits add an optimization for
pre-registering guest RAM to avoid bounce buffers.

The syntax is:

   --blockdev 
io_uring,node-name=drive0,filename=test.img,readonly=on|off,cache.direct=on|off

and:

   --blockdev 
virtio-blk-vhost-vdpa,node-name=drive0,path=/dev/vdpa...,readonly=on|off

Signed-off-by: Stefan Hajnoczi 
---
  MAINTAINERS   |   6 +
  meson_options.txt |   2 +
  qapi/block-core.json  |  37 +-
  meson.build   |   9 +
  block/blkio.c | 659 ++
  tests/qtest/modules-test.c|   3 +
  block/meson.build |   1 +
  scripts/meson-buildoptions.sh |   3 +
  8 files changed, 718 insertions(+), 2 deletions(-)
  create mode 100644 block/blkio.c


[...]


diff --git a/block/blkio.c b/block/blkio.c
new file mode 100644
index 00..7fbdbd7fae
--- /dev/null
+++ b/block/blkio.c
@@ -0,0 +1,659 @@


Not sure whether it’s necessary, but I would have expected a copyright 
header here.



+#include "qemu/osdep.h"
+#include 
+#include "block/block_int.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qemu/module.h"
+
+typedef struct BlkAIOCB {
+BlockAIOCB common;
+struct blkio_mem_region mem_region;
+QEMUIOVector qiov;
+struct iovec bounce_iov;
+} BlkioAIOCB;
+
+typedef struct {
+/* Protects ->blkio and request submission on ->blkioq */
+QemuMutex lock;
+
+struct blkio *blkio;
+struct blkioq *blkioq; /* this could be multi-queue in the future */
+int completion_fd;
+
+/* Polling fetches the next completion into this field */
+struct blkio_completion poll_completion;
+
+/* The value of the "mem-region-alignment" property */
+size_t mem_region_alignment;
+
+/* Can we skip adding/deleting blkio_mem_regions? */
+bool needs_mem_regions;
+} BDRVBlkioState;
+
+static void blkio_aiocb_complete(BlkioAIOCB *acb, int ret)
+{
+/* Copy bounce buffer back to qiov */
+if (acb->qiov.niov > 0) {
+qemu_iovec_from_buf(>qiov, 0,
+acb->bounce_iov.iov_base,
+acb->bounce_iov.iov_len);
+qemu_iovec_destroy(>qiov);
+}
+
+acb->common.cb(acb->common.opaque, ret);
+
+if (acb->mem_region.len > 0) {
+BDRVBlkioState *s = acb->common.bs->opaque;
+
+WITH_QEMU_LOCK_GUARD(>lock) {
+blkio_free_mem_region(s->blkio, >mem_region);
+}
+}
+
+qemu_aio_unref(>common);
+}
+
+/*
+ * Only the thread that calls aio_poll() invokes fd and poll handlers.
+ * Therefore locks are not necessary except when accessing s->blkio.
+ *
+ * No locking is performed around blkioq_get_completions() although other
+ * threads may submit I/O requests on s->blkioq. We're assuming there is no
+ * inteference between blkioq_get_completions() and other s->blkioq APIs.
+ */
+
+static void blkio_completion_fd_read(void *opaque)
+{
+BlockDriverState *bs = opaque;
+BDRVBlkioState *s = bs->opaque;
+struct blkio_completion completion;
+uint64_t val;
+ssize_t ret __attribute__((unused));


I’d prefer a `(void)ret;` over this attribute, not least because that 
line would give a nice opportunity to explain in a short comment why we 
ignore this return value that the compiler tells us not to ignore, but 
if you don’t, then this’ll be fine.



+
+/* Polling may have already fetched a completion */
+if (s->poll_completion.user_data != NULL) {
+completion = s->poll_completion;
+
+/* Clear it in case blkio_aiocb_complete() has a nested event loop */
+s->poll_completion.user_data = NULL;
+
+blkio_aiocb_complete(completion.user_data, completion.ret);
+}
+
+/* Reset completion fd status */
+ret = read(s->completion_fd, , sizeof(val));
+
+/*
+ * Reading one completion at a time makes nested event loop re-entrancy
+ * simple. Change this loop to get multiple completions in one go if it
+ * becomes a performance bottleneck.
+ */
+while (blkioq_do_io(s->blkioq, , 0, 1, NULL) == 1) {
+blkio_aiocb_complete(completion.user_data, completion.ret);
+}
+}
+
+static bool 

Re: [RFC v3 1/8] blkio: add io_uring block driver using libblkio

2022-07-12 Thread Stefano Garzarella

On Fri, Jul 08, 2022 at 05:17:30AM +0100, Stefan Hajnoczi wrote:

libblkio (https://gitlab.com/libblkio/libblkio/) is a library for
high-performance disk I/O. It currently supports io_uring and
virtio-blk-vhost-vdpa with additional drivers under development.

One of the reasons for developing libblkio is that other applications
besides QEMU can use it. This will be particularly useful for
vhost-user-blk which applications may wish to use for connecting to
qemu-storage-daemon.

libblkio also gives us an opportunity to develop in Rust behind a C API
that is easy to consume from QEMU.

This commit adds io_uring and virtio-blk-vhost-vdpa BlockDrivers to QEMU
using libblkio. It will be easy to add other libblkio drivers since they
will share the majority of code.

For now I/O buffers are copied through bounce buffers if the libblkio
driver requires it. Later commits add an optimization for
pre-registering guest RAM to avoid bounce buffers.

The syntax is:

 --blockdev 
io_uring,node-name=drive0,filename=test.img,readonly=on|off,cache.direct=on|off

and:

 --blockdev 
virtio-blk-vhost-vdpa,node-name=drive0,path=/dev/vdpa...,readonly=on|off

Signed-off-by: Stefan Hajnoczi 
---
MAINTAINERS   |   6 +
meson_options.txt |   2 +
qapi/block-core.json  |  37 +-
meson.build   |   9 +
block/blkio.c | 659 ++
tests/qtest/modules-test.c|   3 +
block/meson.build |   1 +
scripts/meson-buildoptions.sh |   3 +
8 files changed, 718 insertions(+), 2 deletions(-)
create mode 100644 block/blkio.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 450abd0252..50f340d9ee 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3395,6 +3395,12 @@ L: qemu-block@nongnu.org
S: Maintained
F: block/vdi.c

+blkio
+M: Stefan Hajnoczi 
+L: qemu-block@nongnu.org
+S: Maintained
+F: block/blkio.c
+
iSCSI
M: Ronnie Sahlberg 
M: Paolo Bonzini 
diff --git a/meson_options.txt b/meson_options.txt
index 97c38109b1..b0b2e0c9b5 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -117,6 +117,8 @@ option('bzip2', type : 'feature', value : 'auto',
   description: 'bzip2 support for DMG images')
option('cap_ng', type : 'feature', value : 'auto',
   description: 'cap_ng support')
+option('blkio', type : 'feature', value : 'auto',
+   description: 'libblkio block device driver')
option('bpf', type : 'feature', value : 'auto',
description: 'eBPF support')
option('cocoa', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 2173e7734a..aa63d5e9bd 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2951,11 +2951,15 @@
'file', 'snapshot-access', 'ftp', 'ftps', 'gluster',
{'name': 'host_cdrom', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
{'name': 'host_device', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
-'http', 'https', 'iscsi',
+'http', 'https',
+{ 'name': 'io_uring', 'if': 'CONFIG_BLKIO' },
+'iscsi',
'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
-'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+'ssh', 'throttle', 'vdi', 'vhdx',
+{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
+'vmdk', 'vpc', 'vvfat' ] }

##
# @BlockdevOptionsFile:
@@ -3678,6 +3682,30 @@
'*debug': 'int',
'*logfile': 'str' } }

+##
+# @BlockdevOptionsIoUring:
+#
+# Driver specific block device options for the io_uring backend.
+#
+# @filename: path to the image file
+#
+# Since: 7.1
+##
+{ 'struct': 'BlockdevOptionsIoUring',
+  'data': { 'filename': 'str' } }
+
+##
+# @BlockdevOptionsVirtioBlkVhostVdpa:
+#
+# Driver specific block device options for the virtio-blk-vhost-vdpa backend.
+#
+# @path: path to the vhost-vdpa character device.
+#
+# Since: 7.1
+##
+{ 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa',
+  'data': { 'path': 'str' } }
+
##
# @IscsiTransport:
#
@@ -4305,6 +4333,8 @@
   'if': 'HAVE_HOST_BLOCK_DEVICE' },
  'http':   'BlockdevOptionsCurlHttp',
  'https':  'BlockdevOptionsCurlHttps',
+  'io_uring':   { 'type': 'BlockdevOptionsIoUring',
+  'if': 'CONFIG_BLKIO' },
  'iscsi':  'BlockdevOptionsIscsi',
  'luks':   'BlockdevOptionsLUKS',
  'nbd':'BlockdevOptionsNbd',
@@ -4327,6 +4357,9 @@
  'throttle':   'BlockdevOptionsThrottle',
  'vdi':'BlockdevOptionsGenericFormat',
  'vhdx':   'BlockdevOptionsGenericFormat',
+  'virtio-blk-vhost-vdpa':
+{ 'type': 'BlockdevOptionsVirtioBlkVhostVdpa',
+  'if': 'CONFIG_BLKIO' },
  'vmdk':   'BlockdevOptionsGenericCOWFormat',
  'vpc':'BlockdevOptionsGenericFormat',
  

[RFC v3 1/8] blkio: add io_uring block driver using libblkio

2022-07-07 Thread Stefan Hajnoczi
libblkio (https://gitlab.com/libblkio/libblkio/) is a library for
high-performance disk I/O. It currently supports io_uring and
virtio-blk-vhost-vdpa with additional drivers under development.

One of the reasons for developing libblkio is that other applications
besides QEMU can use it. This will be particularly useful for
vhost-user-blk which applications may wish to use for connecting to
qemu-storage-daemon.

libblkio also gives us an opportunity to develop in Rust behind a C API
that is easy to consume from QEMU.

This commit adds io_uring and virtio-blk-vhost-vdpa BlockDrivers to QEMU
using libblkio. It will be easy to add other libblkio drivers since they
will share the majority of code.

For now I/O buffers are copied through bounce buffers if the libblkio
driver requires it. Later commits add an optimization for
pre-registering guest RAM to avoid bounce buffers.

The syntax is:

  --blockdev 
io_uring,node-name=drive0,filename=test.img,readonly=on|off,cache.direct=on|off

and:

  --blockdev 
virtio-blk-vhost-vdpa,node-name=drive0,path=/dev/vdpa...,readonly=on|off

Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS   |   6 +
 meson_options.txt |   2 +
 qapi/block-core.json  |  37 +-
 meson.build   |   9 +
 block/blkio.c | 659 ++
 tests/qtest/modules-test.c|   3 +
 block/meson.build |   1 +
 scripts/meson-buildoptions.sh |   3 +
 8 files changed, 718 insertions(+), 2 deletions(-)
 create mode 100644 block/blkio.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 450abd0252..50f340d9ee 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3395,6 +3395,12 @@ L: qemu-block@nongnu.org
 S: Maintained
 F: block/vdi.c
 
+blkio
+M: Stefan Hajnoczi 
+L: qemu-block@nongnu.org
+S: Maintained
+F: block/blkio.c
+
 iSCSI
 M: Ronnie Sahlberg 
 M: Paolo Bonzini 
diff --git a/meson_options.txt b/meson_options.txt
index 97c38109b1..b0b2e0c9b5 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -117,6 +117,8 @@ option('bzip2', type : 'feature', value : 'auto',
description: 'bzip2 support for DMG images')
 option('cap_ng', type : 'feature', value : 'auto',
description: 'cap_ng support')
+option('blkio', type : 'feature', value : 'auto',
+   description: 'libblkio block device driver')
 option('bpf', type : 'feature', value : 'auto',
 description: 'eBPF support')
 option('cocoa', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 2173e7734a..aa63d5e9bd 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2951,11 +2951,15 @@
 'file', 'snapshot-access', 'ftp', 'ftps', 'gluster',
 {'name': 'host_cdrom', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
 {'name': 'host_device', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
-'http', 'https', 'iscsi',
+'http', 'https',
+{ 'name': 'io_uring', 'if': 'CONFIG_BLKIO' },
+'iscsi',
 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
 { 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
-'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+'ssh', 'throttle', 'vdi', 'vhdx',
+{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
+'vmdk', 'vpc', 'vvfat' ] }
 
 ##
 # @BlockdevOptionsFile:
@@ -3678,6 +3682,30 @@
 '*debug': 'int',
 '*logfile': 'str' } }
 
+##
+# @BlockdevOptionsIoUring:
+#
+# Driver specific block device options for the io_uring backend.
+#
+# @filename: path to the image file
+#
+# Since: 7.1
+##
+{ 'struct': 'BlockdevOptionsIoUring',
+  'data': { 'filename': 'str' } }
+
+##
+# @BlockdevOptionsVirtioBlkVhostVdpa:
+#
+# Driver specific block device options for the virtio-blk-vhost-vdpa backend.
+#
+# @path: path to the vhost-vdpa character device.
+#
+# Since: 7.1
+##
+{ 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa',
+  'data': { 'path': 'str' } }
+
 ##
 # @IscsiTransport:
 #
@@ -4305,6 +4333,8 @@
'if': 'HAVE_HOST_BLOCK_DEVICE' },
   'http':   'BlockdevOptionsCurlHttp',
   'https':  'BlockdevOptionsCurlHttps',
+  'io_uring':   { 'type': 'BlockdevOptionsIoUring',
+  'if': 'CONFIG_BLKIO' },
   'iscsi':  'BlockdevOptionsIscsi',
   'luks':   'BlockdevOptionsLUKS',
   'nbd':'BlockdevOptionsNbd',
@@ -4327,6 +4357,9 @@
   'throttle':   'BlockdevOptionsThrottle',
   'vdi':'BlockdevOptionsGenericFormat',
   'vhdx':   'BlockdevOptionsGenericFormat',
+  'virtio-blk-vhost-vdpa':
+{ 'type': 'BlockdevOptionsVirtioBlkVhostVdpa',
+  'if': 'CONFIG_BLKIO' },
   'vmdk':   'BlockdevOptionsGenericCOWFormat',
   'vpc':'BlockdevOptionsGenericFormat',
   'vvfat':