HBitmaps allow us to search set bits pretty fast. On the contrary, when searching zeroes, we may be forced to fully traverse the lower level. When we run blockdev-backup with mode=full on top of snapshot filter + cbw filter, the job fills copy bitmap by calling block_status() with range (X, virtual_size). The problem is that we check for zeroes in this whole range. We also hit the worst case here, as access bitmap is fully set and we need to scan the entire lowest level. After scanning the full bitmap we actually ask the block status of original image, which may return significantly lower amount of empty clusters. Beacuse of this, the backup job 'hangs' on block copy initializaiton for a long time with 100% CPU.
Example copy bitmap buildup time for image with clu_size=65536 and preallocated metadata size 10T 11T blockdev-backup 52s 57s cbw + snap 325s 413s cbw + snap + patch 55s 61s To fix it, reverse the access bitmap in cbw filter: rather set it when the user is not allowed to read the cluster. Update qemu-iotest 257: now access bitmap have count 0 instead of the image size 67108864 Signed-off-by: Andrey Zhadchenko <andrey.zhadche...@virtuozzo.com> --- block/copy-before-write.c | 17 ++++++++++------- tests/qemu-iotests/257.out | 28 ++++++++++++++-------------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/block/copy-before-write.c b/block/copy-before-write.c index fd470f5f92..5f5b3e7515 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -53,7 +53,7 @@ typedef struct BDRVCopyBeforeWriteState { CoMutex lock; /* - * @access_bitmap: represents areas allowed for reading by fleecing user. + * @access_bitmap: represents areas disallowed for reading by fleecing user. * Reading from non-dirty areas leads to -EACCES. */ BdrvDirtyBitmap *access_bitmap; @@ -220,7 +220,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes, return NULL; } - if (bdrv_dirty_bitmap_next_zero(s->access_bitmap, offset, bytes) != -1) { + if (bdrv_dirty_bitmap_next_dirty(s->access_bitmap, offset, bytes) != -1) { g_free(req); return NULL; } @@ -338,8 +338,8 @@ cbw_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes) aligned_bytes = aligned_end - aligned_offset; WITH_QEMU_LOCK_GUARD(&s->lock) { - bdrv_reset_dirty_bitmap(s->access_bitmap, aligned_offset, - aligned_bytes); + bdrv_set_dirty_bitmap(s->access_bitmap, aligned_offset, + aligned_bytes); } block_copy_reset(s->bcs, aligned_offset, aligned_bytes); @@ -501,9 +501,12 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags, return -EINVAL; } bdrv_disable_dirty_bitmap(s->access_bitmap); - bdrv_dirty_bitmap_merge_internal(s->access_bitmap, - block_copy_dirty_bitmap(s->bcs), NULL, - true); + if (bitmap) { + bdrv_dirty_bitmap_merge_internal(s->access_bitmap, + block_copy_dirty_bitmap(s->bcs), NULL, + true); + bdrv_dirty_bitmap_reverse(s->access_bitmap); + } qemu_co_mutex_init(&s->lock); QLIST_INIT(&s->frozen_read_reqs); diff --git a/tests/qemu-iotests/257.out b/tests/qemu-iotests/257.out index c33dd7f3a9..55efb418e6 100644 --- a/tests/qemu-iotests/257.out +++ b/tests/qemu-iotests/257.out @@ -109,7 +109,7 @@ write -P0x67 0x3fe0000 0x20000 "backup-top": [ { "busy": false, - "count": 67108864, + "count": 0, "granularity": 65536, "persistent": false, "recording": false @@ -585,7 +585,7 @@ write -P0x67 0x3fe0000 0x20000 "backup-top": [ { "busy": false, - "count": 67108864, + "count": 0, "granularity": 65536, "persistent": false, "recording": false @@ -854,7 +854,7 @@ write -P0x67 0x3fe0000 0x20000 "backup-top": [ { "busy": false, - "count": 67108864, + "count": 0, "granularity": 65536, "persistent": false, "recording": false @@ -1330,7 +1330,7 @@ write -P0x67 0x3fe0000 0x20000 "backup-top": [ { "busy": false, - "count": 67108864, + "count": 0, "granularity": 65536, "persistent": false, "recording": false @@ -1599,7 +1599,7 @@ write -P0x67 0x3fe0000 0x20000 "backup-top": [ { "busy": false, - "count": 67108864, + "count": 0, "granularity": 65536, "persistent": false, "recording": false @@ -2075,7 +2075,7 @@ write -P0x67 0x3fe0000 0x20000 "backup-top": [ { "busy": false, - "count": 67108864, + "count": 0, "granularity": 65536, "persistent": false, "recording": false @@ -2344,7 +2344,7 @@ write -P0x67 0x3fe0000 0x20000 "backup-top": [ { "busy": false, - "count": 67108864, + "count": 0, "granularity": 65536, "persistent": false, "recording": false @@ -2820,7 +2820,7 @@ write -P0x67 0x3fe0000 0x20000 "backup-top": [ { "busy": false, - "count": 67108864, + "count": 0, "granularity": 65536, "persistent": false, "recording": false @@ -3089,7 +3089,7 @@ write -P0x67 0x3fe0000 0x20000 "backup-top": [ { "busy": false, - "count": 67108864, + "count": 0, "granularity": 65536, "persistent": false, "recording": false @@ -3565,7 +3565,7 @@ write -P0x67 0x3fe0000 0x20000 "backup-top": [ { "busy": false, - "count": 67108864, + "count": 0, "granularity": 65536, "persistent": false, "recording": false @@ -3834,7 +3834,7 @@ write -P0x67 0x3fe0000 0x20000 "backup-top": [ { "busy": false, - "count": 67108864, + "count": 0, "granularity": 65536, "persistent": false, "recording": false @@ -4310,7 +4310,7 @@ write -P0x67 0x3fe0000 0x20000 "backup-top": [ { "busy": false, - "count": 67108864, + "count": 0, "granularity": 65536, "persistent": false, "recording": false @@ -4579,7 +4579,7 @@ write -P0x67 0x3fe0000 0x20000 "backup-top": [ { "busy": false, - "count": 67108864, + "count": 0, "granularity": 65536, "persistent": false, "recording": false @@ -5055,7 +5055,7 @@ write -P0x67 0x3fe0000 0x20000 "backup-top": [ { "busy": false, - "count": 67108864, + "count": 0, "granularity": 65536, "persistent": false, "recording": false -- 2.43.0