The "pnum < nb_sectors" condition in deciding whether to actually copy data is unnecessarily strict, and the qiov initialization is unnecessarily too, for both bdrv_aio_write_zeroes and bdrv_aio_discard branches.
Reorganize mirror_iteration flow so that we: 1) Find the contiguous zero/discarded sectors with bdrv_get_block_status_above() before deciding what to do. We query s->buf_size sized blocks at a time. 2) If the sectors in question are zeroed/discarded and aligned to target cluster, issue zero write or discard accordingly. It's done in mirror_do_zero_or_discard, where we don't add buffer to qiov. 3) Otherwise, do the same loop as before in mirror_do_read. Signed-off-by: Fam Zheng <f...@redhat.com> --- v2: Address Max's comments: Don't do superfluous assignment for next_sector and next_chunk; Move assignment to hbitmap_next_sector above sector_num to reduce conflict. --- block/mirror.c | 154 +++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 122 insertions(+), 32 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index b1252a1..7c8d090 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -157,23 +157,13 @@ static void mirror_read_complete(void *opaque, int ret) mirror_write_complete, op); } -static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) +static uint64_t mirror_do_read(MirrorBlockJob *s) { BlockDriverState *source = s->common.bs; - int nb_sectors, sectors_per_chunk, nb_chunks; - int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector; + int sectors_per_chunk, nb_sectors, nb_chunks; + int64_t end, next_chunk, next_sector, hbitmap_next_sector, sector_num; uint64_t delay_ns = 0; MirrorOp *op; - int pnum; - int64_t ret; - - s->sector_num = hbitmap_iter_next(&s->hbi); - if (s->sector_num < 0) { - bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi); - s->sector_num = hbitmap_iter_next(&s->hbi); - trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); - assert(s->sector_num >= 0); - } hbitmap_next_sector = s->sector_num; sector_num = s->sector_num; @@ -198,14 +188,6 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) next_sector = sector_num; next_chunk = sector_num / sectors_per_chunk; - /* Wait for I/O to this cluster (from a previous iteration) to be done. */ - while (test_bit(next_chunk, s->in_flight_bitmap)) { - trace_mirror_yield_in_flight(s, sector_num, s->in_flight); - s->waiting_for_io = true; - qemu_coroutine_yield(); - s->waiting_for_io = false; - } - do { int added_sectors, added_chunks; @@ -301,24 +283,132 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) s->sectors_in_flight += nb_sectors; trace_mirror_one_iteration(s, sector_num, nb_sectors); - ret = bdrv_get_block_status_above(source, NULL, sector_num, - nb_sectors, &pnum); - if (ret < 0 || pnum < nb_sectors || - (ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) { - bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors, - mirror_read_complete, op); - } else if (ret & BDRV_BLOCK_ZERO) { + bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors, + mirror_read_complete, op); + return delay_ns; +} + +static uint64_t mirror_do_zero_or_discard(MirrorBlockJob *s, + int64_t sector_num, + int nb_sectors, + bool is_discard) +{ + int sectors_per_chunk, nb_chunks; + int64_t next_chunk, next_sector, hbitmap_next_sector; + uint64_t delay_ns = 0; + MirrorOp *op; + + sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; + assert(nb_sectors >= sectors_per_chunk); + next_chunk = sector_num / sectors_per_chunk; + nb_chunks = DIV_ROUND_UP(nb_sectors, sectors_per_chunk); + bitmap_set(s->in_flight_bitmap, next_chunk, nb_chunks); + delay_ns = ratelimit_calculate_delay(&s->limit, nb_sectors); + + /* Allocate a MirrorOp that is used as an AIO callback. The qiov is zeroed + * out so the freeing in iteration is nop. */ + op = g_new0(MirrorOp, 1); + op->s = s; + op->sector_num = sector_num; + op->nb_sectors = nb_sectors; + + /* Advance the HBitmapIter in parallel, so that we do not examine + * the same sector twice. + */ + hbitmap_next_sector = sector_num; + next_sector = sector_num + nb_sectors; + while (next_sector > hbitmap_next_sector) { + hbitmap_next_sector = hbitmap_iter_next(&s->hbi); + if (hbitmap_next_sector < 0) { + break; + } + } + + bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors); + s->in_flight++; + s->sectors_in_flight += nb_sectors; + if (is_discard) { + bdrv_aio_discard(s->target, sector_num, op->nb_sectors, + mirror_write_complete, op); + } else { bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors, s->unmap ? BDRV_REQ_MAY_UNMAP : 0, mirror_write_complete, op); - } else { - assert(!(ret & BDRV_BLOCK_DATA)); - bdrv_aio_discard(s->target, sector_num, op->nb_sectors, - mirror_write_complete, op); } + return delay_ns; } +static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) +{ + BlockDriverState *source = s->common.bs; + int sectors_per_chunk; + int64_t sector_num, next_chunk; + int ret; + int contiguous_sectors = s->buf_size >> BDRV_SECTOR_BITS; + enum MirrorMethod { + MIRROR_METHOD_COPY, + MIRROR_METHOD_ZERO, + MIRROR_METHOD_DISCARD + } mirror_method; + + s->sector_num = hbitmap_iter_next(&s->hbi); + if (s->sector_num < 0) { + bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi); + s->sector_num = hbitmap_iter_next(&s->hbi); + trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); + assert(s->sector_num >= 0); + } + + sector_num = s->sector_num; + sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; + next_chunk = sector_num / sectors_per_chunk; + + /* Wait for I/O to this cluster (from a previous iteration) to be done. */ + while (test_bit(next_chunk, s->in_flight_bitmap)) { + trace_mirror_yield_in_flight(s, sector_num, s->in_flight); + s->waiting_for_io = true; + qemu_coroutine_yield(); + s->waiting_for_io = false; + } + + mirror_method = MIRROR_METHOD_COPY; + ret = bdrv_get_block_status_above(source, NULL, sector_num, + contiguous_sectors, + &contiguous_sectors); + + contiguous_sectors -= contiguous_sectors % sectors_per_chunk; + if (ret < 0 || contiguous_sectors < sectors_per_chunk) { + contiguous_sectors = sectors_per_chunk; + } else if (!(ret & BDRV_BLOCK_DATA)) { + int64_t target_sector_num; + int target_nb_sectors; + bdrv_round_to_clusters(s->target, sector_num, contiguous_sectors, + &target_sector_num, &target_nb_sectors); + if (target_sector_num == sector_num && + target_nb_sectors == contiguous_sectors) { + mirror_method = ret & BDRV_BLOCK_ZERO ? + MIRROR_METHOD_ZERO : + MIRROR_METHOD_DISCARD; + } + } + + switch (mirror_method) { + case MIRROR_METHOD_COPY: + return mirror_do_read(s); + case MIRROR_METHOD_ZERO: + return mirror_do_zero_or_discard(s, sector_num, + contiguous_sectors, + false); + case MIRROR_METHOD_DISCARD: + return mirror_do_zero_or_discard(s, sector_num, + contiguous_sectors, + true); + default: + abort(); + } +} + static void mirror_free_init(MirrorBlockJob *s) { int granularity = s->granularity; -- 2.4.3