From: "Denis V. Lunev"
This patch adds image preallocation at expand to provide better locality
of QCOW2 image file and optimize this procedure for some distributed
storages where this procedure is slow.
Image expand requests have to be suspended until the allocation is
performed which is done via special QCowL2Meta.
This meta is invisible to handle_dependencies() code.
This is the main reason for also calling preallocation before metadata
write: it might intersect with preallocation triggered by another IO,
and has to yield
Signed-off-by: Denis V. Lunev
Signed-off-by: Anton Nefedov
---
block/qcow2-cache.c| 3 +
block/qcow2-cluster.c | 5 ++
block/qcow2-refcount.c | 14 +
block/qcow2.c | 151 +
block/qcow2.h | 5 ++
5 files changed, 178 insertions(+)
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 1d25147..aa9da5f 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -204,6 +204,9 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs,
Qcow2Cache *c, int i)
return ret;
}
+/* check and preallocate extra space if touching a fresh metadata cluster
*/
+qcow2_handle_prealloc(bs, c->entries[i].offset, s->cluster_size);
+
if (c == s->refcount_block_cache) {
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
} else if (c == s->l2_table_cache) {
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index cf18dee..a4b6d40 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -108,6 +108,9 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t
min_size,
goto fail;
}
+qcow2_handle_prealloc(bs, new_l1_table_offset,
+ QEMU_ALIGN_UP(new_l1_size2, s->cluster_size));
+
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
for(i = 0; i < s->l1_size; i++)
new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
@@ -1820,6 +1823,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState
*bs, uint64_t *l1_table,
goto fail;
}
+qcow2_handle_prealloc(bs, offset, s->cluster_size);
+
ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
if (ret < 0) {
if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 7c06061..873a1d2 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -547,6 +547,8 @@ static int alloc_refcount_block(BlockDriverState *bs,
}
/* Write refcount blocks to disk */
+qcow2_handle_prealloc(bs, meta_offset, blocks_clusters * s->cluster_size);
+
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
blocks_clusters * s->cluster_size);
@@ -561,6 +563,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
cpu_to_be64s(_table[i]);
}
+qcow2_handle_prealloc(bs, table_offset,
+ QEMU_ALIGN_UP(table_size * sizeof(uint64_t),
+s->cluster_size));
+
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
table_size * sizeof(uint64_t));
@@ -2104,6 +2110,8 @@ write_refblocks:
goto fail;
}
+qcow2_handle_prealloc(bs, refblock_offset, s->cluster_size);
+
/* The size of *refcount_table is always cluster-aligned, therefore the
* write operation will not overflow */
on_disk_refblock = (void *)((char *) *refcount_table +
@@ -2158,6 +2166,8 @@ write_refblocks:
}
assert(reftable_size < INT_MAX / sizeof(uint64_t));
+qcow2_handle_prealloc(bs, reftable_offset,
+ reftable_size * sizeof(uint64_t));
ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
reftable_size * sizeof(uint64_t));
if (ret < 0) {
@@ -2845,6 +2855,10 @@ int qcow2_change_refcount_order(BlockDriverState *bs,
int refcount_order,
cpu_to_be64s(_reftable[i]);
}
+qcow2_handle_prealloc(bs, new_reftable_offset,
+ QEMU_ALIGN_UP(new_reftable_size * sizeof(uint64_t),
+s->cluster_size));
+
ret = bdrv_pwrite(bs->file, new_reftable_offset, new_reftable,
new_reftable_size * sizeof(uint64_t));
diff --git a/block/qcow2.c b/block/qcow2.c
index b438f22..6e7ce96 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -464,6 +464,11 @@ static QemuOptsList qcow2_runtime_opts = {
.type = QEMU_OPT_NUMBER,
.help = "Clean unused cache entries after this time (in seconds)",
},
+{
+.name = QCOW2_OPT_PREALLOC_SIZE,
+.type = QEMU_OPT_SIZE,
+.help =