Just migrate parts of dirty bitmaps, corresponding to the block being migrated. Also, skip block migration if it is disabled (blk parameter of migrate command is false).
Skipping shared sectors: bitmaps are migrated independently of this, just send blk without block data. Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@parallels.com> --- block-migration.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++------- savevm.c | 1 + 2 files changed, 154 insertions(+), 20 deletions(-) diff --git a/block-migration.c b/block-migration.c index 908a66d..95d54a1 100644 --- a/block-migration.c +++ b/block-migration.c @@ -32,6 +32,8 @@ #define BLK_MIG_FLAG_EOS 0x02 #define BLK_MIG_FLAG_PROGRESS 0x04 #define BLK_MIG_FLAG_ZERO_BLOCK 0x08 +#define BLK_MIG_FLAG_HAS_BLOCK 0x10 +#define BLK_MIG_FLAG_HAS_BITMAPS 0x20 #define MAX_IS_ALLOCATED_SEARCH 65536 @@ -51,6 +53,8 @@ typedef struct BlkMigDevState { int shared_base; int64_t total_sectors; QSIMPLEQ_ENTRY(BlkMigDevState) entry; + int nr_bitmaps; + BdrvDirtyBitmap **dirty_bitmaps; /* Only used by migration thread. Does not need a lock. */ int bulk_completed; @@ -64,6 +68,11 @@ typedef struct BlkMigDevState { Error *blocker; } BlkMigDevState; +typedef struct BlkMigDirtyBitmap { + BdrvDirtyBitmap *bitmap; + uint8_t *buf; +} BlkMigDirtyBitmap; + typedef struct BlkMigBlock { /* Only used by migration thread. */ uint8_t *buf; @@ -74,6 +83,9 @@ typedef struct BlkMigBlock { QEMUIOVector qiov; BlockAIOCB *aiocb; + int nr_bitmaps; + BlkMigDirtyBitmap *dirty_bitmaps; + /* Protected by block migration lock. */ int ret; QSIMPLEQ_ENTRY(BlkMigBlock) entry; @@ -83,6 +95,7 @@ typedef struct BlkMigState { /* Written during setup phase. Can be read without a lock. */ int blk_enable; int shared_base; + int dbm_enable; QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list; int64_t total_sector_sum; bool zero_blocks; @@ -116,27 +129,63 @@ static void blk_mig_unlock(void) /* Only allocating and initializing structure fields, not copying any data. */ static BlkMigBlock *blk_create(BlkMigDevState *bmds, int64_t sector, - int nr_sectors) + int nr_sectors, bool only_bitmaps) { + int i; BlkMigBlock *blk = g_new(BlkMigBlock, 1); - blk->buf = g_malloc(BLOCK_SIZE); + blk->buf = only_bitmaps ? NULL : g_malloc(BLOCK_SIZE); blk->bmds = bmds; blk->sector = sector; blk->nr_sectors = nr_sectors; + blk->dirty_bitmaps = NULL; + blk->nr_bitmaps = 0; blk->iov.iov_base = blk->buf; blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); + if (block_mig_state.dbm_enable) { + BlkMigDirtyBitmap *mig_bm; + + blk->nr_bitmaps = bmds->nr_bitmaps; + mig_bm = blk->dirty_bitmaps = g_new(BlkMigDirtyBitmap, + bmds->nr_bitmaps); + for (i = 0; i < bmds->nr_bitmaps; ++i) { + BdrvDirtyBitmap *bm = bmds->dirty_bitmaps[i]; + mig_bm->buf = g_malloc(bdrv_dbm_data_size(bm, nr_sectors)); + mig_bm->bitmap = bm; + mig_bm++; + } + } + return blk; } static void blk_free(BlkMigBlock *blk) { + int i; g_free(blk->buf); + + if (blk->dirty_bitmaps) { + for (i = 0; i < blk->nr_bitmaps; ++i) { + g_free(blk->dirty_bitmaps[i].buf); + } + g_free(blk->dirty_bitmaps); + } + g_free(blk); } +static void blk_store_bitmaps(BlkMigBlock *blk) +{ + int i; + for (i = 0; i < blk->nr_bitmaps; ++i) { + bdrv_dbm_store_data(blk->dirty_bitmaps[i].bitmap, + blk->dirty_bitmaps[i].buf, + blk->sector, blk->nr_sectors); + } +} + /* Must run outside of the iothread lock during the bulk phase, * or the VM will stall. */ @@ -146,9 +195,15 @@ static void blk_send(QEMUFile *f, BlkMigBlock * blk) int len; uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK; - if (block_mig_state.zero_blocks && + if (block_mig_state.zero_blocks && blk->buf && buffer_is_zero(blk->buf, BLOCK_SIZE)) { flags |= BLK_MIG_FLAG_ZERO_BLOCK; + } else if (blk->buf) { + flags |= BLK_MIG_FLAG_HAS_BLOCK; + } + + if (block_mig_state.dbm_enable) { + flags |= BLK_MIG_FLAG_HAS_BITMAPS; } /* sector number and flags */ @@ -160,10 +215,27 @@ static void blk_send(QEMUFile *f, BlkMigBlock * blk) qemu_put_byte(f, len); qemu_put_buffer(f, (uint8_t *)bdrv_get_device_name(blk->bmds->bs), len); + /* dirty bitmaps */ + if (flags & BLK_MIG_FLAG_HAS_BITMAPS) { + int i; + qemu_put_be32(f, blk->nr_bitmaps); + for (i = 0; i < blk->nr_bitmaps; ++i) { + BdrvDirtyBitmap *bm = blk->dirty_bitmaps[i].bitmap; + int buf_size = bdrv_dbm_data_size(bm, blk->nr_sectors); + const char *name = bdrv_dirty_bitmap_name(bm); + int len = strlen(name); + + qemu_put_byte(f, len); + qemu_put_buffer(f, (const uint8_t *)name, len); + qemu_put_buffer(f, blk->dirty_bitmaps[i].buf, buf_size); + } + } + /* if a block is zero we need to flush here since the network * bandwidth is now a lot higher than the storage device bandwidth. - * thus if we queue zero blocks we slow down the migration */ - if (flags & BLK_MIG_FLAG_ZERO_BLOCK) { + * thus if we queue zero blocks we slow down the migration. + * also, skip writing block when migrate only dirty bitmaps. */ + if (!(flags & BLK_MIG_FLAG_HAS_BLOCK)) { qemu_fflush(f); return; } @@ -282,13 +354,20 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) BlockDriverState *bs = bmds->bs; BlkMigBlock *blk; int nr_sectors; + bool skip_chunk = false; if (bmds->shared_base) { qemu_mutex_lock_iothread(); - while (cur_sector < total_sectors && - !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH, - &nr_sectors)) { - cur_sector += nr_sectors; + if (block_mig_state.dbm_enable) { + bdrv_is_allocated(bs, cur_sector, BDRV_SECTORS_PER_DIRTY_CHUNK, + &nr_sectors); + skip_chunk = nr_sectors >= BDRV_SECTORS_PER_DIRTY_CHUNK; + } else { + while (cur_sector < total_sectors && + !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH, + &nr_sectors)) { + cur_sector += nr_sectors; + } } qemu_mutex_unlock_iothread(); } @@ -309,7 +388,8 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) nr_sectors = total_sectors - cur_sector; } - blk = blk_create(bmds, cur_sector, nr_sectors); + blk = blk_create(bmds, cur_sector, nr_sectors, + skip_chunk || !block_mig_state.blk_enable); blk_mig_lock(); block_mig_state.submitted++; @@ -317,8 +397,16 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) bdrv_reset_dirty_bitmap(bs, bmds->dirty_bitmap, cur_sector, nr_sectors); - blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, - nr_sectors, blk_mig_read_cb, blk); + if (block_mig_state.dbm_enable) { + blk_store_bitmaps(blk); + } + + if (block_mig_state.blk_enable && !skip_chunk) { + blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, + nr_sectors, blk_mig_read_cb, blk); + } else if (block_mig_state.dbm_enable) { + blk_mig_read_cb(blk, 0); + } bmds->cur_sector = cur_sector + nr_sectors; return (bmds->cur_sector >= total_sectors); @@ -403,6 +491,8 @@ static void init_blk_migration(QEMUFile *f) DPRINTF("Start full migration for %s\n", bdrv_get_device_name(bs)); } + bmds->dirty_bitmaps = bdrv_dbm_find_all_named(bs, &bmds->nr_bitmaps); + QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry); } } @@ -481,20 +571,32 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, } else { nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; } - blk = blk_create(bmds, sector, nr_sectors); + blk = blk_create(bmds, sector, nr_sectors, + !block_mig_state.blk_enable); + + if (block_mig_state.dbm_enable) { + blk_store_bitmaps(blk); + } if (is_async) { - blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov, - nr_sectors, blk_mig_read_cb, blk); + if (block_mig_state.blk_enable) { + blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov, + nr_sectors, blk_mig_read_cb, + blk); + } else { + blk_mig_read_cb(blk, 0); + } blk_mig_lock(); block_mig_state.submitted++; bmds_set_aio_inflight(bmds, sector, nr_sectors, 1); blk_mig_unlock(); } else { - ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors); - if (ret < 0) { - goto error; + if (block_mig_state.blk_enable) { + ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors); + if (ret < 0) { + goto error; + } } blk_send(f, blk); @@ -817,10 +919,39 @@ static int block_load(QEMUFile *f, void *opaque, int version_id) nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; } + /* load dirty bitmaps */ + if (flags & BLK_MIG_FLAG_HAS_BITMAPS) { + int i, nr_bitmaps = qemu_get_be32(f); + + for (i = 0; i < nr_bitmaps; ++i) { + char bitmap_name[256]; + BdrvDirtyBitmap *bitmap; + int buf_size, len; + + len = qemu_get_byte(f); + qemu_get_buffer(f, (uint8_t *)bitmap_name, len); + bitmap_name[len] = '\0'; + + bitmap = bdrv_find_dirty_bitmap(bs, bitmap_name); + if (!bitmap) { + fprintf(stderr, "Error unknown dirty bitmap\ + %s for block device %s\n", + bitmap_name, device_name); + return -EINVAL; + } + + buf_size = bdrv_dbm_data_size(bitmap, nr_sectors); + buf = g_malloc(buf_size); + qemu_get_buffer(f, buf, buf_size); + bdrv_dbm_restore_data(bitmap, buf, addr, nr_sectors); + g_free(buf); + } + } + if (flags & BLK_MIG_FLAG_ZERO_BLOCK) { ret = bdrv_write_zeroes(bs, addr, nr_sectors, BDRV_REQ_MAY_UNMAP); - } else { + } else if (flags & BLK_MIG_FLAG_HAS_BLOCK) { buf = g_malloc(BLOCK_SIZE); qemu_get_buffer(f, buf, BLOCK_SIZE); ret = bdrv_write(bs, addr, buf, nr_sectors); @@ -855,6 +986,7 @@ static void block_set_params(const MigrationParams *params, void *opaque) { block_mig_state.blk_enable = params->blk; block_mig_state.shared_base = params->shared; + block_mig_state.dbm_enable = params->dirty; /* shared base means that blk_enable = 1 */ block_mig_state.blk_enable |= params->shared; @@ -862,7 +994,8 @@ static void block_set_params(const MigrationParams *params, void *opaque) static bool block_is_active(void *opaque) { - return block_mig_state.blk_enable == 1; + return block_mig_state.blk_enable == 1 || + block_mig_state.dbm_enable == 1; } static SaveVMHandlers savevm_block_handlers = { diff --git a/savevm.c b/savevm.c index a598d1d..1299faa 100644 --- a/savevm.c +++ b/savevm.c @@ -983,6 +983,7 @@ int qemu_loadvm_state(QEMUFile *f) } } + bdrv_dbm_restore_finish(); cpu_synchronize_all_post_init(); ret = 0; -- 1.9.1