On 07/04/2016 10:38 AM, Denis V. Lunev wrote: > From: Evgeny Yakovlev <eyakov...@virtuozzo.com> > > Some guests (win2008 server for example) do a lot of unnecessary > flushing when underlying media has not changed. This adds additional > overhead on host when calling fsync/fdatasync. > > This change introduces a write generation scheme in BlockDriverState. > Current write generation is checked against last flushed generation to > avoid unnessesary flushes. > > The problem with excessive flushing was found by a performance test > which does parallel directory tree creation (from 2 processes). > Results improved from 0.424 loops/sec to 0.432 loops/sec. > Each loop creates 10^3 directories with 10 files in each. > > Signed-off-by: Evgeny Yakovlev <eyakov...@virtuozzo.com> > Signed-off-by: Denis V. Lunev <d...@openvz.org> > CC: Kevin Wolf <kw...@redhat.com> > CC: Max Reitz <mre...@redhat.com> > CC: Stefan Hajnoczi <stefa...@redhat.com> > CC: Fam Zheng <f...@redhat.com> > CC: John Snow <js...@redhat.com> > --- > block.c | 3 +++ > block/io.c | 18 ++++++++++++++++++ > include/block/block_int.h | 5 +++++ > 3 files changed, 26 insertions(+) > > diff --git a/block.c b/block.c > index f4648e9..366fad6 100644 > --- a/block.c > +++ b/block.c > @@ -234,6 +234,8 @@ BlockDriverState *bdrv_new(void) > bs->refcnt = 1; > bs->aio_context = qemu_get_aio_context(); > > + qemu_co_queue_init(&bs->flush_queue); > + > QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list); > > return bs; > @@ -2582,6 +2584,7 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset) > ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); > bdrv_dirty_bitmap_truncate(bs); > bdrv_parent_cb_resize(bs); > + ++bs->write_gen; > } > return ret; > } > diff --git a/block/io.c b/block/io.c > index 7cf3645..a5451b6 100644 > --- a/block/io.c > +++ b/block/io.c > @@ -1294,6 +1294,7 @@ static int coroutine_fn > bdrv_aligned_pwritev(BlockDriverState *bs, > } > bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE); > > + ++bs->write_gen; > bdrv_set_dirty(bs, start_sector, end_sector - start_sector); > > if (bs->wr_highest_offset < offset + bytes) { > @@ -2211,6 +2212,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) > { > int ret; > BdrvTrackedRequest req; > + int current_gen = bs->write_gen; > > if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) || > bdrv_is_sg(bs)) { > @@ -2219,6 +2221,12 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) > > tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH); > > + /* Wait until any previous flushes are completed */ > + while (bs->flush_started_gen != bs->flushed_gen) { > + qemu_co_queue_wait(&bs->flush_queue); > + } > + bs->flush_started_gen = current_gen; > + > /* Write back all layers by calling one driver function */ > if (bs->drv->bdrv_co_flush) { > ret = bs->drv->bdrv_co_flush(bs); > @@ -2239,6 +2247,11 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) > goto flush_parent; > } > > + /* Check if we really need to flush anything */ > + if (bs->flushed_gen == current_gen) { > + goto flush_parent; > + } > + > BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK); > if (bs->drv->bdrv_co_flush_to_disk) { > ret = bs->drv->bdrv_co_flush_to_disk(bs); > @@ -2279,6 +2292,10 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) > flush_parent: > ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0; > out: > + /* Notify any pending flushes that we have completed */ > + bs->flushed_gen = current_gen; > + qemu_co_queue_restart_all(&bs->flush_queue); > + > tracked_request_end(&req); > return ret; > } > @@ -2402,6 +2419,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, > int64_t sector_num, > } > ret = 0; > out: > + ++bs->write_gen; > bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS, > req.bytes >> BDRV_SECTOR_BITS); > tracked_request_end(&req); > diff --git a/include/block/block_int.h b/include/block/block_int.h > index 2057156..8543daf 100644 > --- a/include/block/block_int.h > +++ b/include/block/block_int.h > @@ -420,6 +420,11 @@ struct BlockDriverState { > note this is a reference count */ > bool probed; > > + CoQueue flush_queue; /* Serializing flush queue */ > + unsigned int write_gen; /* Current data generation */ > + unsigned int flush_started_gen; /* Generation for which flush has > started */ > + unsigned int flushed_gen; /* Flushed write generation */ > + > BlockDriver *drv; /* NULL means no media */ > void *opaque; > >
Breaks qcow2 iotests 026 089 141 144 --js