this patch introduces a new flag to indicate that we are going to sequentially read from a file and do not plan to reread/reuse the data after it has been read.
The current use of this flag is to open the source(s) of a qemu-img convert process. If a protocol from block/raw-posix.c is used posix_fadvise is utilized to advise to the kernel that we are going to read sequentially from the file and a POSIX_FADV_DONTNEED advise is issued after each write to indicate that there is no advantage keeping the blocks in the buffers. While the first seems to offer a slight performance benefit the latter option avoids that older data is swapped out to have the data unnecessarily buffered. Signed-off-by: Peter Lieven <p...@kamp.de> --- block/raw-posix.c | 14 ++++++++++++++ include/block/block.h | 1 + qemu-img.c | 3 ++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/block/raw-posix.c b/block/raw-posix.c index 161ea14..fa6d9d2 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -433,6 +433,13 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, } #endif +#ifdef POSIX_FADV_SEQUENTIAL + if (bs->open_flags & BDRV_O_SEQUENTIAL && + !(bs->open_flags & BDRV_O_NOCACHE)) { + posix_fadvise(s->fd, 0, 0, POSIX_FADV_SEQUENTIAL); + } +#endif + ret = 0; fail: qemu_opts_del(opts); @@ -902,6 +909,13 @@ static int aio_worker(void *arg) ret = aiocb->aio_nbytes; } if (ret == aiocb->aio_nbytes) { +#ifdef POSIX_FADV_DONTNEED + if (aiocb->bs->open_flags & BDRV_O_SEQUENTIAL && + !(aiocb->bs->open_flags & BDRV_O_NOCACHE)) { + posix_fadvise(aiocb->aio_fildes, aiocb->aio_offset, + aiocb->aio_nbytes, POSIX_FADV_DONTNEED); + } +#endif ret = 0; } else if (ret >= 0 && ret < aiocb->aio_nbytes) { ret = -EINVAL; diff --git a/include/block/block.h b/include/block/block.h index 780f48b..502982f 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -105,6 +105,7 @@ typedef enum { #define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given: select an appropriate protocol driver, ignoring the format layer */ +#define BDRV_O_SEQUENTIAL 0x10000 /* open device for sequential read/write */ #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH) diff --git a/qemu-img.c b/qemu-img.c index 78fc868..e7a5721 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1298,7 +1298,8 @@ static int img_convert(int argc, char **argv) total_sectors = 0; for (bs_i = 0; bs_i < bs_n; bs_i++) { - bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt, BDRV_O_FLAGS, true, + bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt, + BDRV_O_FLAGS | BDRV_O_SEQUENTIAL, true, quiet); if (!bs[bs_i]) { error_report("Could not open '%s'", argv[optind + bs_i]); -- 1.7.9.5