On 2019/4/12 9:52, Xiang Zheng wrote: > On 2019/4/11 20:22, Kevin Wolf wrote: >> Okay, so your problem is that blk_pread() writes to the whole buffer, >> writing explicit zeroes for unallocated parts of the image, while you >> would like to leave those parts of the buffer untouched so that we don't >> actually allocate the memory, but can just use the shared zero page. >> >> If you just want to read the non-zero parts of the image, that can be >> done by using a loop that calls bdrv_block_status() and only reads from >> the image if the BDRV_BLOCK_ZERO bit is clear. >> >> Would this solve your problem? > > Sounds good! What if guest tried to read/write the zero parts? >
I wrote the below patch (refer to bdrv_make_zero()) for test, it seems that everything is OK and the memory is also exactly allocated on demand. This requires pflash devices to use sparse files backend. Thus I have to create images like: dd of="QEMU_EFI-pflash.raw" if="/dev/zero" bs=1M seek=64 count=0 dd of="QEMU_EFI-pflash.raw" if="QEMU_EFI.fd" conv=notrunc dd of="empty_VARS.fd" if="/dev/zero" bs=1M seek=64 count=0 ---8>--- diff --git a/block/block-backend.c b/block/block-backend.c index f78e82a..ed8ca87 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1379,6 +1379,12 @@ BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, flags | BDRV_REQ_ZERO_WRITE, cb, opaque); } +int blk_pread_nonzeroes(BlockBackend *blk, void *buf) +{ + int ret = bdrv_pread_nonzeroes(blk->root, buf); + return ret; +} + int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count) { int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0); diff --git a/block/io.c b/block/io.c index dfc153b..83e5ea7 100644 --- a/block/io.c +++ b/block/io.c @@ -882,6 +882,38 @@ int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, BDRV_REQ_ZERO_WRITE | flags); } +int bdrv_pread_nonzeroes(BdrvChild *child, void *buf) +{ + int ret; + int64_t target_size, bytes, offset = 0; + BlockDriverState *bs = child->bs; + + target_size = bdrv_getlength(bs); + if (target_size < 0) { + return target_size; + } + + for (;;) { + bytes = MIN(target_size - offset, BDRV_REQUEST_MAX_BYTES); + if (bytes <= 0) { + return 0; + } + ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL); + if (ret < 0) { + return ret; + } + if (ret & BDRV_BLOCK_ZERO) { + offset += bytes; + continue; + } + ret = bdrv_pread(child, offset, buf, bytes); + if (ret < 0) { + return ret; + } + offset += bytes; + } +} + /* * Completely zero out a block device with the help of bdrv_pwrite_zeroes. * The operation is sped up by checking the block status and only writing diff --git a/hw/block/block.c b/hw/block/block.c index bf56c76..e3c67f8 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -53,7 +53,7 @@ bool blk_check_size_and_read_all(BlockBackend *blk, void *buf, hwaddr size, * block device and read only on demand. */ assert(size <= BDRV_REQUEST_MAX_BYTES); - ret = blk_pread(blk, 0, buf, size); + ret = blk_pread_nonzeroes(blk, buf); if (ret < 0) { error_setg_errno(errp, -ret, "can't read block backend"); return false; diff --git a/include/block/block.h b/include/block/block.h index c7a2619..d0e06cf 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -322,6 +322,7 @@ int bdrv_write(BdrvChild *child, int64_t sector_num, const uint8_t *buf, int nb_sectors); int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int bytes, BdrvRequestFlags flags); +int bdrv_pread_nonzeroes(BdrvChild *child, void *buf); int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags); int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes); int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov); diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 3be05c2..5d349d2 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -129,6 +129,7 @@ int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset, BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset, int bytes, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque); +int blk_pread_nonzeroes(BlockBackend *blk, void *buf); int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags); int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes); int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes, -- 1.8.3.1 -- Thanks, Xiang