The io_uring_prep_readv2/writev2() man pages recommend using the non-vectored read/write operations when possible for performance reasons.
I didn't measure a significant difference but it doesn't hurt to have this optimization in place. Suggested-by: Eric Blake <[email protected]> Signed-off-by: Stefan Hajnoczi <[email protected]> --- v5: - Reduce #ifdef HAVE_IO_URING_PREP_WRITEV2 code duplication [Kevin] --- block/io_uring.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/block/io_uring.c b/block/io_uring.c index dd930ee57e..f1514cf024 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -46,17 +46,28 @@ static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque) switch (req->type) { case QEMU_AIO_WRITE: -#ifdef HAVE_IO_URING_PREP_WRITEV2 { int luring_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0; - io_uring_prep_writev2(sqe, fd, qiov->iov, - qiov->niov, offset, luring_flags); - } + if (luring_flags != 0 || qiov->niov > 1) { +#ifdef HAVE_IO_URING_PREP_WRITEV2 + io_uring_prep_writev2(sqe, fd, qiov->iov, + qiov->niov, offset, luring_flags); #else - assert(flags == 0); - io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset); + /* + * FUA should only be enabled with HAVE_IO_URING_PREP_WRITEV2, see + * luring_has_fua(). + */ + assert(luring_flags == 0); + + io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset); #endif + } else { + /* The man page says non-vectored is faster than vectored */ + struct iovec *iov = qiov->iov; + io_uring_prep_write(sqe, fd, iov->iov_base, iov->iov_len, offset); + } break; + } case QEMU_AIO_ZONE_APPEND: io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset); break; @@ -65,8 +76,15 @@ static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque) if (req->resubmit_qiov.iov != NULL) { qiov = &req->resubmit_qiov; } - io_uring_prep_readv(sqe, fd, qiov->iov, qiov->niov, - offset + req->total_read); + if (qiov->niov > 1) { + io_uring_prep_readv(sqe, fd, qiov->iov, qiov->niov, + offset + req->total_read); + } else { + /* The man page says non-vectored is faster than vectored */ + struct iovec *iov = qiov->iov; + io_uring_prep_read(sqe, fd, iov->iov_base, iov->iov_len, + offset + req->total_read); + } break; } case QEMU_AIO_FLUSH: -- 2.51.1
