On Tue, 07/30 14:53, Stefan Hajnoczi wrote: > On Tue, Jul 30, 2013 at 03:17:47PM +0800, Fam Zheng wrote: > > for (sector_num = 0; sector_num < end; sector_num += n) { > > - uint64_t delay_ns = 0; > > - bool copy; > > > > -wait: > > - /* Note that even when no rate limit is applied we need to yield > > - * with no pending I/O here so that bdrv_drain_all() returns. > > - */ > > - block_job_sleep_ns(&s->common, rt_clock, delay_ns); > > - if (block_job_is_cancelled(&s->common)) { > > - break; > > - } > > /* Copy if allocated above the base */ > > ret = bdrv_co_is_allocated_above(top, base, sector_num, > > - COMMIT_BUFFER_SIZE / > > BDRV_SECTOR_SIZE, > > + COMMIT_BUFFER_SECTORS, > > &n); > > - copy = (ret == 1); > > - trace_commit_one_iteration(s, sector_num, n, ret); > > - if (copy) { > > - if (s->common.speed) { > > - delay_ns = ratelimit_calculate_delay(&s->limit, n); > > - if (delay_ns > 0) { > > - goto wait; > > - } > > + if (ret) { > > + bdrv_set_dirty(top, sector_num, n); > > + } > > This could take a while on a big image. You need sleep/cancel here like > the other blockjob loops have. > > I think error handling isn't sufficient here. If > bdrv_co_is_allocated_above() fails you need to exit (if n becomes 0 on > error, then this is an infinite loop!). > Yes, you are right.
> > + bdrv_dirty_iter_init(s->top, &hbi); > > + for (next_dirty = hbitmap_iter_next(&hbi); > > + next_dirty >= 0; > > + next_dirty = hbitmap_iter_next(&hbi)) { > > + sector_num = next_dirty; > > + if (block_job_is_cancelled(&s->common)) { > > + goto exit; > > + } > > + delay_ns = ratelimit_calculate_delay(&s->limit, > > + COMMIT_BUFFER_SECTORS); > > + /* Note that even when no rate limit is applied we need to > > yield > > + * with no pending I/O here so that bdrv_drain_all() returns. > > + */ > > + block_job_sleep_ns(&s->common, rt_clock, delay_ns); > > + trace_commit_one_iteration(s, sector_num, > > + COMMIT_BUFFER_SECTORS, ret); > > + ret = commit_populate(top, base, sector_num, > > + COMMIT_BUFFER_SECTORS, buf); > > Can we be sure that a guest write during commit_populate()... > > > + if (ret < 0) { > > + if (s->on_error == BLOCKDEV_ON_ERROR_STOP || > > + s->on_error == BLOCKDEV_ON_ERROR_REPORT || > > + (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && > > + ret == -ENOSPC)) { > > + goto exit; > > + } else { > > + continue; > > + } > > } > > + /* Publish progress */ > > + s->common.offset += COMMIT_BUFFER_BYTES; > > + bdrv_reset_dirty(top, sector_num, COMMIT_BUFFER_SECTORS); > > ...sets the dirty but *after* us? Otherwise there is a race condition > where guest writes fail to be copied into the base image. > > I think the answer is "no" since commit_populate() performs two separate > blocking operations: bdrv_read(top) followed by bdrv_write(base). Now > imagine the guest does bdrv_aio_writev(top) after we complete > bdrv_read(top) but before we do bdrv_reset_dirty(). > I see, good explaination, thanks and will fix. > > } > > - /* Publish progress */ > > - s->common.offset += n * BDRV_SECTOR_SIZE; > > } > > + s->common.offset = end; > > > > - ret = 0; > > - > > - if (!block_job_is_cancelled(&s->common) && sector_num == end) { > > - /* success */ > > - ret = bdrv_drop_intermediate(active, top, base); > > + bdrv_flush(base); > > bdrv_co_flush() is clearer since we're in a coroutine. > OK > > + if (!block_job_is_cancelled(&s->common)) { > > + /* Drop intermediate: [top, base) */ > > + ret = bdrv_drop_intermediate(s->overlay, &top, &base); > > + s->common.offset = s->common.len; > > } > > > > -exit_free_buf: > > - qemu_vfree(buf); > > + ret = 0; > > + > > +exit: > > + bdrv_set_dirty_tracking(active, 0); > > > > -exit_restore_reopen: > > /* restore base open flags here if appropriate (e.g., change the base > > back > > * to r/o). These reopens do not need to be atomic, since we won't > > abort > > * even on failure here */ > > - if (s->base_flags != bdrv_get_flags(base)) { > > + if (s->overlay && s->base_flags != bdrv_get_flags(base)) { > > Why check s->overlay, this only concerns base? > With s->overlay, we are committing nonactive, the usual case. s->overlay == NULL means we are committing active layer, so base will be dropped, don't reopen it. Will add a comment here. > > @@ -212,23 +246,20 @@ void commit_start(BlockDriverState *bs, > > BlockDriverState *base, > > > > overlay_bs = bdrv_find_overlay(bs, top); > > > > - if (overlay_bs == NULL) { > > - error_setg(errp, "Could not find overlay image for %s:", > > top->filename); > > - return; > > - } > > - > > orig_base_flags = bdrv_get_flags(base); > > - orig_overlay_flags = bdrv_get_flags(overlay_bs); > > + if (overlay_bs) { > > + orig_overlay_flags = bdrv_get_flags(overlay_bs); > > + if (!(orig_overlay_flags & BDRV_O_RDWR)) { > > + reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, > > + orig_overlay_flags | BDRV_O_RDWR); > > + } > > + } > > > > /* convert base & overlay_bs to r/w, if necessary */ > > if (!(orig_base_flags & BDRV_O_RDWR)) { > > reopen_queue = bdrv_reopen_queue(reopen_queue, base, > > orig_base_flags | BDRV_O_RDWR); > > } > > - if (!(orig_overlay_flags & BDRV_O_RDWR)) { > > - reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, > > - orig_overlay_flags | BDRV_O_RDWR); > > - } > > IMO it is clearer to put the two orig_base_flags stanzas together rather > than interleaving them: > > /* convert base & overlay_bs to r/w, if necessary */ > orig_base_flags = bdrv_get_flags(base); > if (!(orig_base_flags & BDRV_O_RDWR)) { > reopen_queue = bdrv_reopen_queue(reopen_queue, base, > orig_base_flags | > BDRV_O_RDWR); > } > > overlay_bs = bdrv_find_overlay(bs, top); > if (overlay_bs) { > orig_overlay_flags = bdrv_get_flags(overlay_bs); > if (!(orig_overlay_flags & BDRV_O_RDWR)) { > reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, > orig_overlay_flags | BDRV_O_RDWR); > } > } OK Fam