[Qemu-devel] [PATCHv2] block: add the optional file entry to query-block
This patch adds the optional file entry to the query-block output. The value is a json-object representing the information about the underlying file or device (when present). Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- block/qapi.c |7 +++ qapi-schema.json |4 +++- qmp-commands.hx|8 tests/qemu-iotests/043.out | 15 +++ 4 files changed, 33 insertions(+), 1 deletions(-) diff --git a/block/qapi.c b/block/qapi.c index a4bc411..40db983 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -171,6 +171,13 @@ void bdrv_query_image_info(BlockDriverState *bs, return; } +if (bs-file) { +bdrv_query_image_info(bs-file, info-file, err); +if (info-file) { +info-has_file = true; +} +} + *p_info = info; } diff --git a/qapi-schema.json b/qapi-schema.json index 5c32528..0cb872c 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -238,6 +238,8 @@ # # @backing-image: #optional info of the backing image (since 1.6) # +# @file: #optional info of the underlying file (since 1.6) +# # Since: 1.3 # ## @@ -248,7 +250,7 @@ '*cluster-size': 'int', '*encrypted': 'bool', '*backing-filename': 'str', '*full-backing-filename': 'str', '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'], - '*backing-image': 'ImageInfo' } } + '*backing-image': 'ImageInfo', '*file': 'ImageInfo' } } ## # @ImageCheck: diff --git a/qmp-commands.hx b/qmp-commands.hx index 362f0e1..bb8a931 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1791,6 +1791,8 @@ Each json-object contain the following: - backing-image: the detail of the backing image, it is an optional json-object only present when a backing image present for this image + - file: an optional json-object representing the information + about the underlying file or device (when present) - io-status: I/O operation status, only present if the device supports it and the VM is configured to stop on errors. It's always reset @@ -1842,6 +1844,12 @@ Example: format:qcow2, virtual-size:2048000 } + file:{ + filename:disks/test.qcow2, + format: file, + virtual-size: 262144, + actual-size: 139264 + } } }, type:unknown diff --git a/tests/qemu-iotests/043.out b/tests/qemu-iotests/043.out index ad23337..6aea37b 100644 --- a/tests/qemu-iotests/043.out +++ b/tests/qemu-iotests/043.out @@ -44,6 +44,11 @@ cluster_size: 65536 filename: TEST_DIR/t.IMGFMT, cluster-size: 65536, format: IMGFMT, +file: { +virtual-size: 327680, +filename: TEST_DIR/t.IMGFMT, +format: file, +}, backing-filename: TEST_DIR/t.IMGFMT.2.base, dirty-flag: false }, @@ -52,6 +57,11 @@ cluster_size: 65536 filename: TEST_DIR/t.IMGFMT.2.base, cluster-size: 65536, format: IMGFMT, +file: { +virtual-size: 327680, +filename: TEST_DIR/t.IMGFMT.2.base, +format: file, +}, backing-filename: TEST_DIR/t.IMGFMT.1.base, dirty-flag: false }, @@ -60,6 +70,11 @@ cluster_size: 65536 filename: TEST_DIR/t.IMGFMT.1.base, cluster-size: 65536, format: IMGFMT, +file: { +virtual-size: 327680, +filename: TEST_DIR/t.IMGFMT.1.base, +format: file, +}, dirty-flag: false } ] -- 1.7.1
[Qemu-devel] [PATCH] block: add the optional file entry to query-block
This patch adds the optional file entry to the query-block output. The value is a json-object representing the information about the underlying file or device (when present). Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- block/qapi.c |9 - qapi-schema.json |4 +++- qmp-commands.hx|8 tests/qemu-iotests/043.out | 15 +++ 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/block/qapi.c b/block/qapi.c index a4bc411..03cd222 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -119,7 +119,7 @@ void bdrv_query_image_info(BlockDriverState *bs, info-filename= g_strdup(bs-filename); info-format = g_strdup(bdrv_get_format_name(bs)); -info-virtual_size= total_sectors * 512; +info-virtual_size= bdrv_getlength(bs); info-actual_size = bdrv_get_allocated_file_size(bs); info-has_actual_size = info-actual_size = 0; if (bdrv_is_encrypted(bs)) { @@ -171,6 +171,13 @@ void bdrv_query_image_info(BlockDriverState *bs, return; } +if (bs-file) { +bdrv_query_image_info(bs-file, info-file, err); +if (info-file) { +info-has_file = true; +} +} + *p_info = info; } diff --git a/qapi-schema.json b/qapi-schema.json index a30a728..524a4a0 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -238,6 +238,8 @@ # # @backing-image: #optional info of the backing image (since 1.6) # +# @file: #optional info of the underlying file (since 1.6) +# # Since: 1.3 # ## @@ -248,7 +250,7 @@ '*cluster-size': 'int', '*encrypted': 'bool', '*backing-filename': 'str', '*full-backing-filename': 'str', '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'], - '*backing-image': 'ImageInfo' } } + '*backing-image': 'ImageInfo', '*file': 'ImageInfo' } } ## # @ImageCheck: diff --git a/qmp-commands.hx b/qmp-commands.hx index 8cea5e5..c25ac77 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1745,6 +1745,8 @@ Each json-object contain the following: - backing-image: the detail of the backing image, it is an optional json-object only present when a backing image present for this image + - file: an optional json-object representing the information + about the underlying file or device (when present) - io-status: I/O operation status, only present if the device supports it and the VM is configured to stop on errors. It's always reset @@ -1796,6 +1798,12 @@ Example: format:qcow2, virtual-size:2048000 } + file:{ + filename:disks/test.qcow2, + format: file, + virtual-size: 262144, + actual-size: 139264 + } } }, type:unknown diff --git a/tests/qemu-iotests/043.out b/tests/qemu-iotests/043.out index ad23337..6aea37b 100644 --- a/tests/qemu-iotests/043.out +++ b/tests/qemu-iotests/043.out @@ -44,6 +44,11 @@ cluster_size: 65536 filename: TEST_DIR/t.IMGFMT, cluster-size: 65536, format: IMGFMT, +file: { +virtual-size: 327680, +filename: TEST_DIR/t.IMGFMT, +format: file, +}, backing-filename: TEST_DIR/t.IMGFMT.2.base, dirty-flag: false }, @@ -52,6 +57,11 @@ cluster_size: 65536 filename: TEST_DIR/t.IMGFMT.2.base, cluster-size: 65536, format: IMGFMT, +file: { +virtual-size: 327680, +filename: TEST_DIR/t.IMGFMT.2.base, +format: file, +}, backing-filename: TEST_DIR/t.IMGFMT.1.base, dirty-flag: false }, @@ -60,6 +70,11 @@ cluster_size: 65536 filename: TEST_DIR/t.IMGFMT.1.base, cluster-size: 65536, format: IMGFMT, +file: { +virtual-size: 327680, +filename: TEST_DIR/t.IMGFMT.1.base, +format: file, +}, dirty-flag: false } ] -- 1.7.1
[Qemu-devel] [PATCH] block: add apparent-size to query-block
This patch adds the apparent-size entry to the query-block output. The value represents the apparent size in bytes of the image, e.g. file size (including the blocks not yet allocated) or block device size. Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- block/qapi.c |3 ++- qapi-schema.json |4 +++- qmp-commands.hx |1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/block/qapi.c b/block/qapi.c index a4bc411..ccaba67 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -119,8 +119,9 @@ void bdrv_query_image_info(BlockDriverState *bs, info-filename= g_strdup(bs-filename); info-format = g_strdup(bdrv_get_format_name(bs)); -info-virtual_size= total_sectors * 512; +info-virtual_size= bdrv_getlength(bs); info-actual_size = bdrv_get_allocated_file_size(bs); +info-apparent_size = bdrv_getlength(bs-file); info-has_actual_size = info-actual_size = 0; if (bdrv_is_encrypted(bs)) { info-encrypted = true; diff --git a/qapi-schema.json b/qapi-schema.json index a80ee40..c7403ed 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -222,6 +222,8 @@ # # @actual-size: #optional actual size on disk in bytes of the image # +# @apparent-size: apparent size in bytes of the image +# # @dirty-flag: #optional true if image is not cleanly closed # # @cluster-size: #optional size of a cluster in bytes @@ -244,7 +246,7 @@ { 'type': 'ImageInfo', 'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool', - '*actual-size': 'int', 'virtual-size': 'int', + '*actual-size': 'int', 'virtual-size': 'int', 'apparent-size': 'int', '*cluster-size': 'int', '*encrypted': 'bool', '*backing-filename': 'str', '*full-backing-filename': 'str', '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'], diff --git a/qmp-commands.hx b/qmp-commands.hx index 8cea5e5..3f3aded 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1714,6 +1714,7 @@ Each json-object contain the following: - actual-size: actual size on disk in bytes of the image, not present when image does not support thin provision (json-int, optional) + - apparent-size: apparent size in bytes of the image - cluster-size: size of a cluster in bytes, not present if image format does not support it (json-int, optional) - encrypted: true if the image is encrypted, not present means -- 1.7.1
[Qemu-devel] [PATCHv4 2/2] qemu-img: add json output option to the check command
This option --output=[human|json] makes qemu-img check output a human or JSON representation at the choice of the user. Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- qapi-schema.json | 46 +++ qemu-img-cmds.hx |4 +- qemu-img.c | 232 +++--- qemu-img.texi|5 +- 4 files changed, 220 insertions(+), 67 deletions(-) diff --git a/qapi-schema.json b/qapi-schema.json index 6d7252b..dc99c28 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -245,6 +245,52 @@ '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'] } } ## +# @ImageCheck: +# +# Information about a QEMU image file check +# +# @filename: name of the image file checked +# +# @format: format of the image file checked +# +# @check-errors: number of unexpected errors occurred during check +# +# @image-end-offset: #optional offset (in bytes) where the image ends, this +#field is present if the driver for the image format +#supports it +# +# @corruptions: #optional number of corruptions found during the check if any +# +# @leaks: #optional number of leaks found during the check if any +# +# @corruptions-fixed: #optional number of corruptions fixed during the check +# if any +# +# @leaks-fixed: #optional number of leaks fixed during the check if any +# +# @total-clusters: #optional total number of clusters, this field is present +# if the driver for the image format supports it +# +# @allocated-clusters: #optional total number of allocated clusters, this +# field is present if the driver for the image format +# supports it +# +# @fragmented-clusters: #optional total number of fragmented clusters, this +# field is present if the driver for the image format +# supports it +# +# Since: 1.4 +# +## + +{ 'type': 'ImageCheck', + 'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int', + '*image-end-offset': 'int', '*corruptions': 'int', '*leaks': 'int', + '*corruptions-fixed': 'int', '*leaks-fixed': 'int', + '*total-clusters': 'int', '*allocated-clusters': 'int', + '*fragmented-clusters': 'int' } } + +## # @StatusInfo: # # Information about VCPU run state diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index a181363..259fc14 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -10,9 +10,9 @@ STEXI ETEXI DEF(check, img_check, -check [-f fmt] [-r [leaks | all]] filename) +check [-f fmt] [--output=ofmt] [-r [leaks | all]] filename) STEXI -@item check [-f @var{fmt}] [-r [leaks | all]] @var{filename} +@item check [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] @var{filename} ETEXI DEF(create, img_create, diff --git a/qemu-img.c b/qemu-img.c index e80c1c5..34249fe 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -42,6 +42,16 @@ typedef struct img_cmd_t { int (*handler)(int argc, char **argv); } img_cmd_t; +enum { +OPTION_OUTPUT = 256, +OPTION_BACKING_CHAIN = 257, +}; + +typedef enum OutputFormat { +OFORMAT_JSON, +OFORMAT_HUMAN, +} OutputFormat; + /* Default to cache=writeback as data integrity is not important for qemu-tcg. */ #define BDRV_O_FLAGS BDRV_O_CACHE_WB #define BDRV_DEFAULT_CACHE writeback @@ -375,6 +385,96 @@ static int img_create(int argc, char **argv) return 0; } +static void dump_json_image_check(ImageCheck *check) +{ +Error *errp = NULL; +QString *str; +QmpOutputVisitor *ov = qmp_output_visitor_new(); +QObject *obj; +visit_type_ImageCheck(qmp_output_get_visitor(ov), + check, NULL, errp); +obj = qmp_output_get_qobject(ov); +str = qobject_to_json_pretty(obj); +assert(str != NULL); +printf(%s\n, qstring_get_str(str)); +qobject_decref(obj); +qmp_output_visitor_cleanup(ov); +QDECREF(str); +} + +static void dump_human_image_check(ImageCheck *check) +{ +if (!(check-corruptions || check-leaks || check-check_errors)) { +printf(No errors were found on the image.\n); +} else { +if (check-corruptions) { +printf(\n% PRId64 errors were found on the image.\n +Data may be corrupted, or further writes to the image +may corrupt it.\n, +check-corruptions); +} + +if (check-leaks) { +printf(\n% PRId64 leaked clusters were found on the image.\n +This means waste of disk space, but no harm to data.\n, +check-leaks); +} + +if (check-check_errors) { +printf(\n% PRId64 internal errors have occurred during the check.\n, +check-check_errors); +} +} + +if (check-total_clusters != 0 check-allocated_clusters != 0) { +printf(% PRId64 /% PRId64 = %0.2f%% allocated, %0.2f%% fragmented\n, +check
[Qemu-devel] [PATCHv4 1/2] qemu-img: find the image end offset during check
This patch adds the support for reporting the image end offset (in bytes). This is particularly useful after a conversion (or a rebase) where the destination is a block device in order to find the first unused byte at the end of the image. Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- block/qcow2-refcount.c | 10 -- include/block/block.h|1 + qemu-img.c |4 tests/qemu-iotests/026 |6 +++--- tests/qemu-iotests/036 |3 ++- tests/qemu-iotests/039 |2 +- tests/qemu-iotests/044.out |1 + tests/qemu-iotests/common.rc |5 +++-- 8 files changed, 23 insertions(+), 9 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 6a95aa6..45ad043 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { BDRVQcowState *s = bs-opaque; -int64_t size, i; +int64_t size, i, highest_cluster; int nb_clusters, refcount1, refcount2; QCowSnapshot *sn; uint16_t *refcount_table; @@ -1187,7 +1187,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } /* compare ref counts */ -for(i = 0; i nb_clusters; i++) { +for(i = 0, highest_cluster = 0; i nb_clusters; i++) { refcount1 = get_refcount(bs, i); if (refcount1 0) { fprintf(stderr, Can't get refcount for cluster % PRId64 : %s\n, @@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } refcount2 = refcount_table[i]; + +if (refcount1 0 || refcount2 0) { +highest_cluster = i; +} + if (refcount1 != refcount2) { /* Check if we're allowed to fix the mismatch */ @@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } } +res-image_end_offset = (highest_cluster + 1) * s-cluster_size; ret = 0; fail: diff --git a/include/block/block.h b/include/block/block.h index ffd1936..85eb3d9 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -213,6 +213,7 @@ typedef struct BdrvCheckResult { int check_errors; int corruptions_fixed; int leaks_fixed; +int64_t image_end_offset; BlockFragInfo bfi; } BdrvCheckResult; diff --git a/qemu-img.c b/qemu-img.c index 85d3740..e80c1c5 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -475,6 +475,10 @@ static int img_check(int argc, char **argv) result.bfi.fragmented_clusters * 100.0 / result.bfi.allocated_clusters); } +if (result.image_end_offset 0) { +printf(Image end offset: % PRId64 \n, result.image_end_offset); +} + bdrv_delete(bs); if (ret 0 || result.check_errors) { diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 index 1602ccd..107a3ff 100755 --- a/tests/qemu-iotests/026 +++ b/tests/qemu-iotests/026 @@ -102,7 +102,7 @@ if [ $event == l2_load ]; then $QEMU_IO -c read $vmstate 0 128k $BLKDBG_TEST_IMG | _filter_qemu_io fi -$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0 +_check_test_img 21 | grep -v refcount=1 reference=0 done done @@ -147,7 +147,7 @@ echo echo Event: $event; errno: $errno; imm: $imm; once: $once; write $vmstate $QEMU_IO -c write $vmstate 0 64M $BLKDBG_TEST_IMG | _filter_qemu_io -$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0 +_check_test_img 21 | grep -v refcount=1 reference=0 done done @@ -186,7 +186,7 @@ echo echo Event: $event; errno: $errno; imm: $imm; once: $once $QEMU_IO -c write -b 0 64k $BLKDBG_TEST_IMG | _filter_qemu_io -$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0 +_check_test_img 21 | grep -v refcount=1 reference=0 done done diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036 index 329533e..4dbfc57 100755 --- a/tests/qemu-iotests/036 +++ b/tests/qemu-iotests/036 @@ -59,7 +59,8 @@ _make_test_img 64M echo echo === Repair image === echo -$QEMU_IMG check -r all $TEST_IMG +_check_test_img -r all + ./qcow2.py $TEST_IMG dump-header # success, all done diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039 index c5ae806..ae35175 100755 --- a/tests/qemu-iotests/039 +++ b/tests/qemu-iotests/039 @@ -86,7 +86,7 @@ $QEMU_IO -r -c read -P 0x5a 0 512 $TEST_IMG | _filter_qemu_io echo echo == Repairing the image file must succeed == -$QEMU_IMG check -r all $TEST_IMG +_check_test_img -r all # The dirty bit must not be set ./qcow2.py $TEST_IMG dump-header | grep incompatible_features diff --git a/tests/qemu-iotests/044.out b/tests/qemu-iotests/044.out index 7a40071..9c48673 100644 --- a/tests/qemu-iotests/044.out +++ b/tests/qemu-iotests/044.out @@ -1,4 +1,5 @@ No errors were found on the image. +Image end offset: 4296447488 . -- Ran 1
[Qemu-devel] [PATCHv3 1/2] qemu-img: find the highest offset in use during check
This patch adds the support for reporting the highest offset in use by an image. This is particularly useful after a conversion (or a rebase) where the destination is a block device in order to find the actual amount of space in use. Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- block/qcow2-refcount.c | 10 -- include/block/block.h|1 + qemu-img.c |4 tests/qemu-iotests/026 |6 +++--- tests/qemu-iotests/036 |3 ++- tests/qemu-iotests/039 |2 +- tests/qemu-iotests/044.out |1 + tests/qemu-iotests/common.rc |5 +++-- 8 files changed, 23 insertions(+), 9 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 6a95aa6..3444f85 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { BDRVQcowState *s = bs-opaque; -int64_t size, i; +int64_t size, i, highest_cluster; int nb_clusters, refcount1, refcount2; QCowSnapshot *sn; uint16_t *refcount_table; @@ -1154,7 +1154,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, s-refcount_table_offset, s-refcount_table_size * sizeof(uint64_t)); -for(i = 0; i s-refcount_table_size; i++) { +for(i = 0, highest_cluster = 0; i s-refcount_table_size; i++) { uint64_t offset, cluster; offset = s-refcount_table[i]; cluster = offset s-cluster_bits; @@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } refcount2 = refcount_table[i]; + +if (refcount1 0 || refcount2 0) { +highest_cluster = i; +} + if (refcount1 != refcount2) { /* Check if we're allowed to fix the mismatch */ @@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } } +res-highest_offset = (highest_cluster + 1) * s-cluster_size; ret = 0; fail: diff --git a/include/block/block.h b/include/block/block.h index ffd1936..b5cf6ac 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -213,6 +213,7 @@ typedef struct BdrvCheckResult { int check_errors; int corruptions_fixed; int leaks_fixed; +int64_t highest_offset; BlockFragInfo bfi; } BdrvCheckResult; diff --git a/qemu-img.c b/qemu-img.c index 85d3740..7b977cd 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -475,6 +475,10 @@ static int img_check(int argc, char **argv) result.bfi.fragmented_clusters * 100.0 / result.bfi.allocated_clusters); } +if (result.highest_offset 0) { +printf(Highest offset in use: % PRId64 \n, result.highest_offset); +} + bdrv_delete(bs); if (ret 0 || result.check_errors) { diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 index 1602ccd..107a3ff 100755 --- a/tests/qemu-iotests/026 +++ b/tests/qemu-iotests/026 @@ -102,7 +102,7 @@ if [ $event == l2_load ]; then $QEMU_IO -c read $vmstate 0 128k $BLKDBG_TEST_IMG | _filter_qemu_io fi -$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0 +_check_test_img 21 | grep -v refcount=1 reference=0 done done @@ -147,7 +147,7 @@ echo echo Event: $event; errno: $errno; imm: $imm; once: $once; write $vmstate $QEMU_IO -c write $vmstate 0 64M $BLKDBG_TEST_IMG | _filter_qemu_io -$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0 +_check_test_img 21 | grep -v refcount=1 reference=0 done done @@ -186,7 +186,7 @@ echo echo Event: $event; errno: $errno; imm: $imm; once: $once $QEMU_IO -c write -b 0 64k $BLKDBG_TEST_IMG | _filter_qemu_io -$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0 +_check_test_img 21 | grep -v refcount=1 reference=0 done done diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036 index 329533e..4dbfc57 100755 --- a/tests/qemu-iotests/036 +++ b/tests/qemu-iotests/036 @@ -59,7 +59,8 @@ _make_test_img 64M echo echo === Repair image === echo -$QEMU_IMG check -r all $TEST_IMG +_check_test_img -r all + ./qcow2.py $TEST_IMG dump-header # success, all done diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039 index c5ae806..ae35175 100755 --- a/tests/qemu-iotests/039 +++ b/tests/qemu-iotests/039 @@ -86,7 +86,7 @@ $QEMU_IO -r -c read -P 0x5a 0 512 $TEST_IMG | _filter_qemu_io echo echo == Repairing the image file must succeed == -$QEMU_IMG check -r all $TEST_IMG +_check_test_img -r all # The dirty bit must not be set ./qcow2.py $TEST_IMG dump-header | grep incompatible_features diff --git a/tests/qemu-iotests/044.out b/tests/qemu-iotests/044.out index 7a40071..6851878 100644 --- a/tests/qemu-iotests/044.out +++ b/tests/qemu-iotests/044.out @@ -1,4 +1,5 @@ No errors were found on the image. +Highest offset in use: 4296447488
[Qemu-devel] [PATCHv3 2/2] qemu-img: add json output option to the check command
This option --output=[human|json] make qemu-img check output an human or JSON representation at the choice of the user. Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- qapi-schema.json | 46 +++ qemu-img-cmds.hx |4 +- qemu-img.c | 232 +++-- qemu-img.texi|5 +- 4 files changed, 221 insertions(+), 66 deletions(-) diff --git a/qapi-schema.json b/qapi-schema.json index 6d7252b..d576728 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -245,6 +245,52 @@ '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'] } } ## +# @ImageCheck: +# +# Information about a QEMU image file check +# +# @filename: name of the image file checked +# +# @format: format of the image file checked +# +# @check-errors: number of unexpected errors occurred during check +# +# @highest-offset: #optional highest offset (in bytes) in use by the image, +# this field is present if the driver for the image format +# supports it +# +# @corruptions: #optional number of corruptions found during the check if any +# +# @leaks: #optional number of leaks found during the check if any +# +# @corruptions-fixed: #optional number of corruptions fixed during the check +# if any +# +# @leaks-fixed: #optional number of leaks fixed during the check if any +# +# @total-clusters: #optional total number of clusters, this field is present +# if the driver for the image format supports it +# +# @allocated-clusters: #optional total number of allocated clusters, this +# field is present if the driver for the image format +# supports it +# +# @fragmented-clusters: #optional total number of fragmented clusters, this +# field is present if the driver for the image format +# supports it +# +# Since: 1.4 +# +## + +{ 'type': 'ImageCheck', + 'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int', + '*highest-offset': 'int', '*corruptions': 'int', '*leaks': 'int', + '*corruptions-fixed': 'int', '*leaks-fixed': 'int', + '*total-clusters': 'int', '*allocated-clusters': 'int', + '*fragmented-clusters': 'int' } } + +## # @StatusInfo: # # Information about VCPU run state diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index a181363..259fc14 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -10,9 +10,9 @@ STEXI ETEXI DEF(check, img_check, -check [-f fmt] [-r [leaks | all]] filename) +check [-f fmt] [--output=ofmt] [-r [leaks | all]] filename) STEXI -@item check [-f @var{fmt}] [-r [leaks | all]] @var{filename} +@item check [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] @var{filename} ETEXI DEF(create, img_create, diff --git a/qemu-img.c b/qemu-img.c index 7b977cd..bcb21d5 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -42,6 +42,16 @@ typedef struct img_cmd_t { int (*handler)(int argc, char **argv); } img_cmd_t; +enum { +OPTION_OUTPUT = 256, +OPTION_BACKING_CHAIN = 257, +}; + +typedef enum OutputFormat { +OFORMAT_JSON, +OFORMAT_HUMAN, +} OutputFormat; + /* Default to cache=writeback as data integrity is not important for qemu-tcg. */ #define BDRV_O_FLAGS BDRV_O_CACHE_WB #define BDRV_DEFAULT_CACHE writeback @@ -375,6 +385,96 @@ static int img_create(int argc, char **argv) return 0; } +static void dump_json_image_check(ImageCheck *check) +{ +Error *errp = NULL; +QString *str; +QmpOutputVisitor *ov = qmp_output_visitor_new(); +QObject *obj; +visit_type_ImageCheck(qmp_output_get_visitor(ov), + check, NULL, errp); +obj = qmp_output_get_qobject(ov); +str = qobject_to_json_pretty(obj); +assert(str != NULL); +printf(%s\n, qstring_get_str(str)); +qobject_decref(obj); +qmp_output_visitor_cleanup(ov); +QDECREF(str); +} + +static void dump_human_image_check(ImageCheck *check) +{ +if (!(check-corruptions || check-leaks || check-check_errors)) { +printf(No errors were found on the image.\n); +} else { +if (check-corruptions) { +printf(\n% PRId64 errors were found on the image.\n +Data may be corrupted, or further writes to the image +may corrupt it.\n, +check-corruptions); +} + +if (check-leaks) { +printf(\n% PRId64 leaked clusters were found on the image.\n +This means waste of disk space, but no harm to data.\n, +check-leaks); +} + +if (check-check_errors) { +printf(\n% PRId64 internal errors have occurred during the check.\n, +check-check_errors); +} +} + +if (check-total_clusters != 0 check-allocated_clusters != 0) { +printf(% PRId64 /% PRId64 = %0.2f%% allocated, %0.2f%% fragmented\n, +check
[Qemu-devel] [PATCH] qemu-img: find the highest offset in use during check
This patch adds the support for reporting the highest offset in use by an image. This is particularly useful after a conversion (or a rebase) where the destination is a block device in order to find the actual amount of space in use. Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- block.h|1 + block/qcow2-refcount.c | 10 -- qemu-img.c |4 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/block.h b/block.h index 722c620..de42e8c 100644 --- a/block.h +++ b/block.h @@ -213,6 +213,7 @@ typedef struct BdrvCheckResult { int check_errors; int corruptions_fixed; int leaks_fixed; +int64_t highest_offset; BlockFragInfo bfi; } BdrvCheckResult; diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 96224d1..017439d 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { BDRVQcowState *s = bs-opaque; -int64_t size, i; +int64_t size, i, highest_cluster; int nb_clusters, refcount1, refcount2; QCowSnapshot *sn; uint16_t *refcount_table; @@ -1154,7 +1154,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, s-refcount_table_offset, s-refcount_table_size * sizeof(uint64_t)); -for(i = 0; i s-refcount_table_size; i++) { +for(i = 0, highest_cluster = 0; i s-refcount_table_size; i++) { uint64_t offset, cluster; offset = s-refcount_table[i]; cluster = offset s-cluster_bits; @@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } refcount2 = refcount_table[i]; + +if (refcount1 0 || refcount2 0) { +highest_cluster = i; +} + if (refcount1 != refcount2) { /* Check if we're allowed to fix the mismatch */ @@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } } +res-highest_offset = (highest_cluster + 1) * s-cluster_size; ret = 0; fail: diff --git a/qemu-img.c b/qemu-img.c index e29e01b..3a8090b 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -470,6 +470,10 @@ static int img_check(int argc, char **argv) result.bfi.fragmented_clusters * 100.0 / result.bfi.allocated_clusters); } +if (result.highest_offset 0) { +printf(Highest offset in use: %lu\n, result.highest_offset); +} + bdrv_delete(bs); if (ret 0 || result.check_errors) { -- 1.7.1
[Qemu-devel] [PATCHv2 1/2] qemu-img: find the highest offset in use during check
This patch adds the support for reporting the highest offset in use by an image. This is particularly useful after a conversion (or a rebase) where the destination is a block device in order to find the actual amount of space in use. Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- block.h |1 + block/qcow2-refcount.c | 10 -- qemu-img.c |4 tests/qemu-iotests/026 |6 +++--- tests/qemu-iotests/036 |3 ++- tests/qemu-iotests/039 |2 +- tests/qemu-iotests/common.rc |5 +++-- 7 files changed, 22 insertions(+), 9 deletions(-) diff --git a/block.h b/block.h index 722c620..de42e8c 100644 --- a/block.h +++ b/block.h @@ -213,6 +213,7 @@ typedef struct BdrvCheckResult { int check_errors; int corruptions_fixed; int leaks_fixed; +int64_t highest_offset; BlockFragInfo bfi; } BdrvCheckResult; diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 96224d1..017439d 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { BDRVQcowState *s = bs-opaque; -int64_t size, i; +int64_t size, i, highest_cluster; int nb_clusters, refcount1, refcount2; QCowSnapshot *sn; uint16_t *refcount_table; @@ -1154,7 +1154,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, s-refcount_table_offset, s-refcount_table_size * sizeof(uint64_t)); -for(i = 0; i s-refcount_table_size; i++) { +for(i = 0, highest_cluster = 0; i s-refcount_table_size; i++) { uint64_t offset, cluster; offset = s-refcount_table[i]; cluster = offset s-cluster_bits; @@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } refcount2 = refcount_table[i]; + +if (refcount1 0 || refcount2 0) { +highest_cluster = i; +} + if (refcount1 != refcount2) { /* Check if we're allowed to fix the mismatch */ @@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } } +res-highest_offset = (highest_cluster + 1) * s-cluster_size; ret = 0; fail: diff --git a/qemu-img.c b/qemu-img.c index e29e01b..45c1ec1 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -470,6 +470,10 @@ static int img_check(int argc, char **argv) result.bfi.fragmented_clusters * 100.0 / result.bfi.allocated_clusters); } +if (result.highest_offset 0) { +printf(Highest offset in use: % PRId64 \n, result.highest_offset); +} + bdrv_delete(bs); if (ret 0 || result.check_errors) { diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 index 1602ccd..107a3ff 100755 --- a/tests/qemu-iotests/026 +++ b/tests/qemu-iotests/026 @@ -102,7 +102,7 @@ if [ $event == l2_load ]; then $QEMU_IO -c read $vmstate 0 128k $BLKDBG_TEST_IMG | _filter_qemu_io fi -$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0 +_check_test_img 21 | grep -v refcount=1 reference=0 done done @@ -147,7 +147,7 @@ echo echo Event: $event; errno: $errno; imm: $imm; once: $once; write $vmstate $QEMU_IO -c write $vmstate 0 64M $BLKDBG_TEST_IMG | _filter_qemu_io -$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0 +_check_test_img 21 | grep -v refcount=1 reference=0 done done @@ -186,7 +186,7 @@ echo echo Event: $event; errno: $errno; imm: $imm; once: $once $QEMU_IO -c write -b 0 64k $BLKDBG_TEST_IMG | _filter_qemu_io -$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0 +_check_test_img 21 | grep -v refcount=1 reference=0 done done diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036 index 329533e..4dbfc57 100755 --- a/tests/qemu-iotests/036 +++ b/tests/qemu-iotests/036 @@ -59,7 +59,8 @@ _make_test_img 64M echo echo === Repair image === echo -$QEMU_IMG check -r all $TEST_IMG +_check_test_img -r all + ./qcow2.py $TEST_IMG dump-header # success, all done diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039 index c5ae806..ae35175 100755 --- a/tests/qemu-iotests/039 +++ b/tests/qemu-iotests/039 @@ -86,7 +86,7 @@ $QEMU_IO -r -c read -P 0x5a 0 512 $TEST_IMG | _filter_qemu_io echo echo == Repairing the image file must succeed == -$QEMU_IMG check -r all $TEST_IMG +_check_test_img -r all # The dirty bit must not be set ./qcow2.py $TEST_IMG dump-header | grep incompatible_features diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index aef5f52..22c0186 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -161,9 +161,10 @@ _cleanup_test_img() _check_test_img() { -$QEMU_IMG check -f $IMGFMT $TEST_IMG 21 | \ +$QEMU_IMG check $@ -f $IMGFMT $TEST_IMG 21 | \ grep -v fragmented$ | \ - sed -e 's/qemu
[Qemu-devel] [PATCH 2/2] qemu-img: add json output option to the check command
This option --output=[human|json] make qemu-img check output an human or JSON representation at the choice of the user. Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- qapi-schema.json | 38 + qemu-img-cmds.hx |4 +- qemu-img.c | 246 +++--- qemu-img.texi|5 +- 4 files changed, 221 insertions(+), 72 deletions(-) diff --git a/qapi-schema.json b/qapi-schema.json index 5dfa052..8877285 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -245,6 +245,44 @@ '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'] } } ## +# @ImageCheck: +# +# Information about a QEMU image file check +# +# @filename: name of the image file checked +# +# @format: format of the image file checked +# +# @check-errors: number of unexpected errors occurred during check +# +# @highest-offset: #optional highest offset (in bytes) in use by the image +# +# @corruptions: #optional number of corruptions found during the check +# +# @leaks: #optional number of leaks found during the check +# +# @corruptions-fixed: #optional number of corruptions fixed during the check +# +# @leaks-fixed: #optional number of leaks fixed during the check +# +# @total-clusters: #optional total number of clusters +# +# @allocated-clusters: #optional total number of allocated clusters +# +# @fragmented-clusters: #optional total number of fragmented clusters +# +# Since: 1.4 +# +## + +{ 'type': 'ImageCheck', + 'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int', + '*highest-offset': 'int', '*corruptions': 'int', '*leaks': 'int', + '*corruptions-fixed': 'int', '*leaks-fixed': 'int', + '*total-clusters': 'int', '*allocated-clusters': 'int', + '*fragmented-clusters': 'int' } } + +## # @StatusInfo: # # Information about VCPU run state diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index a181363..259fc14 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -10,9 +10,9 @@ STEXI ETEXI DEF(check, img_check, -check [-f fmt] [-r [leaks | all]] filename) +check [-f fmt] [--output=ofmt] [-r [leaks | all]] filename) STEXI -@item check [-f @var{fmt}] [-r [leaks | all]] @var{filename} +@item check [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] @var{filename} ETEXI DEF(create, img_create, diff --git a/qemu-img.c b/qemu-img.c index 45c1ec1..18ba5c2 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -42,6 +42,16 @@ typedef struct img_cmd_t { int (*handler)(int argc, char **argv); } img_cmd_t; +enum { +OPTION_OUTPUT = 256, +OPTION_BACKING_CHAIN = 257, +}; + +typedef enum OutputFormat { +OFORMAT_JSON, +OFORMAT_HUMAN, +} OutputFormat; + /* Default to cache=writeback as data integrity is not important for qemu-tcg. */ #define BDRV_O_FLAGS BDRV_O_CACHE_WB #define BDRV_DEFAULT_CACHE writeback @@ -370,6 +380,122 @@ out: return 0; } +static void dump_json_image_check(ImageCheck *check) +{ +Error *errp = NULL; +QString *str; +QmpOutputVisitor *ov = qmp_output_visitor_new(); +QObject *obj; +visit_type_ImageCheck(qmp_output_get_visitor(ov), + check, NULL, errp); +obj = qmp_output_get_qobject(ov); +str = qobject_to_json_pretty(obj); +assert(str != NULL); +printf(%s\n, qstring_get_str(str)); +qobject_decref(obj); +qmp_output_visitor_cleanup(ov); +QDECREF(str); +} + +static void dump_human_image_check(ImageCheck *check) +{ +if (check-corruptions_fixed || check-leaks_fixed) { + printf(The following inconsistencies were found and repaired:\n\n +% PRId64 leaked clusters\n +% PRId64 corruptions\n\n +Double checking the fixed image now...\n, +check-leaks_fixed, +check-corruptions_fixed); +} + +if (!(check-corruptions || check-leaks || check-check_errors)) { +printf(No errors were found on the image.\n); +} else { +if (check-corruptions) { +printf(\n% PRId64 errors were found on the image.\n +Data may be corrupted, or further writes to the image +may corrupt it.\n, +check-corruptions); +} + +if (check-leaks) { +printf(\n% PRId64 leaked clusters were found on the image.\n +This means waste of disk space, but no harm to data.\n, +check-leaks); +} + +if (check-check_errors) { +printf(\n% PRId64 internal errors have occurred during the check.\n, +check-check_errors); +} +} + +if (check-total_clusters != 0 check-allocated_clusters != 0) { +printf(% PRId64 /% PRId64 = %0.2f%% allocated, %0.2f%% fragmented\n, +check-allocated_clusters, check-total_clusters, +check-allocated_clusters * 100.0 / check-total_clusters, +check-fragmented_clusters * 100.0 / check
Re: [Qemu-devel] [PATCH 4/7] block: close unused image files at the end of streaming
- Original Message - From: Paolo Bonzini pbonz...@redhat.com To: qemu-devel@nongnu.org Cc: Marcelo Tosatti mtosa...@redhat.com, Federico Simoncelli fsimo...@redhat.com Sent: Thursday, April 5, 2012 5:42:58 PM Subject: [PATCH 4/7] block: close unused image files at the end of streaming From: Marcelo Tosatti mtosa...@redhat.com Close the now unused images that were part of the previous backing file chain and adjust -backing_hd properly. Note that this only works with relative paths. s/relative/absolute/ Given the images: /tmp/a/base.raw /tmp/a/snap1.qcow2 /tmp/b/snap2.qcow2 chained as: base(bak:) - snap1(bak:base.raw) - snap2(bak:../a/snap1.qcow2) merging snap1 and snap2 we will obtain: base(bak:) - snap2(bak:base.raw) However this should be maintained by the user/admin: one can also screw up relative paths using qemu-img manually. The patch is fine but I disagree with this comment. The user/admin didn't make any mistake and he shouldn't be in charge of additional maintenance (which is also tricky since the VM is running). -- Federico
[Qemu-devel] [PATCH v3] qapi: add c_fun to escape function names
Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- scripts/qapi-commands.py | 14 +++--- scripts/qapi-types.py|4 ++-- scripts/qapi-visit.py|4 ++-- scripts/qapi.py |5 - 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/scripts/qapi-commands.py b/scripts/qapi-commands.py index 3aabf61..30a24d2 100644 --- a/scripts/qapi-commands.py +++ b/scripts/qapi-commands.py @@ -42,7 +42,7 @@ def generate_command_decl(name, args, ret_type): return mcgen(''' %(ret_type)s qmp_%(name)s(%(args)sError **errp); ''', - ret_type=c_type(ret_type), name=c_var(name), args=arglist).strip() + ret_type=c_type(ret_type), name=c_fun(name), args=arglist).strip() def gen_sync_call(name, args, ret_type, indent=0): ret = @@ -59,7 +59,7 @@ def gen_sync_call(name, args, ret_type, indent=0): %(retval)sqmp_%(name)s(%(args)serrp); ''', -name=c_var(name), args=arglist, retval=retval).rstrip() +name=c_fun(name), args=arglist, retval=retval).rstrip() if ret_type: ret += \n + mcgen( if (!error_is_set(errp)) { @@ -74,7 +74,7 @@ if (!error_is_set(errp)) { def gen_marshal_output_call(name, ret_type): if not ret_type: return -return qmp_marshal_output_%s(retval, ret, errp); % c_var(name) +return qmp_marshal_output_%s(retval, ret, errp); % c_fun(name) def gen_visitor_output_containers_decl(ret_type): ret = @@ -198,16 +198,16 @@ static void qmp_marshal_output_%(c_name)s(%(c_ret_type)s ret_in, QObject **ret_o qapi_dealloc_visitor_cleanup(md); } ''', -c_ret_type=c_type(ret_type), c_name=c_var(name), +c_ret_type=c_type(ret_type), c_name=c_fun(name), visitor=type_visitor(ret_type)) return ret def gen_marshal_input_decl(name, args, ret_type, middle_mode): if middle_mode: -return 'int qmp_marshal_input_%s(Monitor *mon, const QDict *qdict, QObject **ret)' % c_var(name) +return 'int qmp_marshal_input_%s(Monitor *mon, const QDict *qdict, QObject **ret)' % c_fun(name) else: -return 'static void qmp_marshal_input_%s(QDict *args, QObject **ret, Error **errp)' % c_var(name) +return 'static void qmp_marshal_input_%s(QDict *args, QObject **ret, Error **errp)' % c_fun(name) @@ -298,7 +298,7 @@ def gen_registry(commands): registry += mcgen(''' qmp_register_command(%(name)s, qmp_marshal_input_%(c_name)s); ''', - name=cmd['command'], c_name=c_var(cmd['command'])) + name=cmd['command'], c_name=c_fun(cmd['command'])) pop_indent() ret = mcgen(''' static void qmp_init_marshal(void) diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py index 727fb77..4a734f5 100644 --- a/scripts/qapi-types.py +++ b/scripts/qapi-types.py @@ -100,7 +100,7 @@ typedef enum %(name)s %(abbrev)s_%(value)s = %(i)d, ''', abbrev=de_camel_case(name).upper(), - value=c_var(value).upper(), + value=c_fun(value).upper(), i=i) i += 1 @@ -126,7 +126,7 @@ struct %(name)s %(c_type)s %(c_name)s; ''', c_type=c_type(typeinfo[key]), - c_name=c_var(key)) + c_name=c_fun(key)) ret += mcgen(''' }; diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py index 54117d4..78c947c 100644 --- a/scripts/qapi-visit.py +++ b/scripts/qapi-visit.py @@ -129,9 +129,9 @@ void visit_type_%(name)s(Visitor *m, %(name)s ** obj, const char *name, Error ** break; ''', abbrev = de_camel_case(name).upper(), -enum = de_camel_case(key).upper(), +enum = c_fun(de_camel_case(key)).upper(), c_type=members[key], -c_name=c_var(key)) +c_name=c_fun(key)) ret += mcgen(''' default: diff --git a/scripts/qapi.py b/scripts/qapi.py index 6e05469..e062336 100644 --- a/scripts/qapi.py +++ b/scripts/qapi.py @@ -131,7 +131,10 @@ def camel_case(name): return new_name def c_var(name): -return '_'.join(name.split('-')).lstrip(*) +return name.replace('-', '_').lstrip(*) + +def c_fun(name): +return c_var(name).replace('.', '_') def c_list_type(name): return '%sList' % name -- 1.7.1
[Qemu-devel] [PATCH] qapi: escaping the dots in c_var
This allows qapi commands and types with dots. Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- scripts/qapi.py |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/scripts/qapi.py b/scripts/qapi.py index 6e05469..4090c55 100644 --- a/scripts/qapi.py +++ b/scripts/qapi.py @@ -131,7 +131,7 @@ def camel_case(name): return new_name def c_var(name): -return '_'.join(name.split('-')).lstrip(*) +return name.replace('-', '_').replace('.', '_').lstrip('*') def c_list_type(name): return '%sList' % name -- 1.7.1
Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command
- Original Message - From: Kevin Wolf kw...@redhat.com To: Federico Simoncelli fsimo...@redhat.com Cc: Eric Blake ebl...@redhat.com, qemu-devel@nongnu.org, stefa...@linux.vnet.ibm.com, lcapitul...@redhat.com, Paolo Bonzini pbonz...@redhat.com, Markus Armbruster arm...@redhat.com Sent: Wednesday, March 14, 2012 10:34:08 AM Subject: Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command * We could leave it as it is, a distinct command that is not part of the transaction and that it's closing the old image before opening the new one. Yes, this would be the short-term preliminary solution. I would tend to leave it to downstreams to implement it as an extension, though. This is not completely correct, the main intent was to not spread one image chain across two storage domains (making it incomplete if one of them was missing). In the next oVirt release a VM can have different disks on different storage domains, so this wouldn't be a special case but just a normal situation. The problem with this kind of argument is that we're not developing only for oVirt, but need to look for what makes sense for any management tool (or even just direct users of qemu). That is a general rule, and it's perfectly agreeable, but it has nothing to do with what I was saying. Do you know any management tool or any reason to forbid to a VM to have different disks on different storages? In fact qemu-kvm doesn't even know where the images are coming from (what particular iscsi connection or what particular NFS server). What I was asking was a tool (mirroring) to avoid the split of an image chain between multiple storages, and not a tool to avoid the ability to have full image chains on different storages. There is nothing oVirt specific here, it came into play only when I said that for us is not a problem (for once :-). -- Federico
Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command
- Original Message - From: Eric Blake ebl...@redhat.com To: Paolo Bonzini pbonz...@redhat.com Cc: kw...@redhat.com, fsimo...@redhat.com, qemu-devel@nongnu.org, stefa...@linux.vnet.ibm.com, lcapitul...@redhat.com Sent: Tuesday, March 13, 2012 9:48:10 PM Subject: Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command On 03/06/2012 10:56 AM, Paolo Bonzini wrote: From: Federico Simoncelli fsimo...@redhat.com Signed-off-by: Federico Simoncelli fsimo...@redhat.com Signed-off-by: Paolo Bonzini pbonz...@redhat.com ## +# @drive-reopen +# +# Assigns a new image file to a device. +# +# @device: the name of the device for which we are changing the image file. +# +# @new-image-file: the target of the new image. If the file doesn't exists the +# command will fail. +# +# @format: #optional the format of the new image, default is 'qcow2'. +# +# Returns: nothing on success +# If @device is not a valid block device, DeviceNotFound +# If @new-image-file can't be opened, OpenFileFailed +# If @format is invalid, InvalidBlockFormat +# +# Since 1.1 +## +{ 'command': 'drive-reopen', + 'data': { 'device': 'str', 'new-image-file': 'str', '*format': 'str' } } I still think we need a 'drive-reopen' action included in 'transaction', as an 11/10 on this series. For disk migration, it is true that you can migrate one disk at a time, and therefore only need to reopen one disk at a time, to get the guarantee that for a single disk image, the current state of that image will be guaranteed to be consistent using only one storage domain. I'm not sure if this was already addressed on this mailing list but the main problem is that as general rule a qcow file cannot be opened in r/w mode twice. I believe there was only one exception to that with the live migration and it generated several issues. That said, reopen is really hard to be implemented as a transaction without breaking that rule. For example in the blkmirror case you'd need to open the destination image in r/w while the mirroring is in action (already having the same image in r/w mode). There are several solutions here but they are either really hard to implement or non definitive. For example: * We could try to implement the reopen command for each special case, eg: blkmirror, reopening the same image, etc... and in such cases reusing the same bs that we already have. The downside is that this command will be coupled with all this special cases. * We could use the transaction APIs without actually making it transaction (if we fail in the middle we can't rollback). The only advantage of this is that we'd provide a consistent API to libvirt and we would postpone the problem to the future. Anyway I strongly discourage this as it's completely unsafe and it's going to break the transaction semantic. Moreover it's a solution that relies too much on the hope of finding something appropriate in the future. * We could leave it as it is, a distinct command that is not part of the transaction and that it's closing the old image before opening the new one. But since the API allows the creation of two mirrors in one command, I'm worried that someone will try to start a mirror on two disks at once, but then be stuck doing two separate 'drive-reopen' commands. If the first succeeds but the second fails, you have now stranded the qemu process across two storage domains, which is exactly what we were trying to avoid in the first place by inventing transactions. That is, even if all disks are individually consistent in a single domain, the act of migrating then reopening one disk at a time means you will have a window where disk 1 and disk 2 are opened on different storage domains. This is not completely correct, the main intent was to not spread one image chain across two storage domains (making it incomplete if one of them was missing). In the next oVirt release a VM can have different disks on different storage domains, so this wouldn't be a special case but just a normal situation. -- Federico
[Qemu-devel] [PATCH] Add the drive-reopen command
Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- blockdev.c | 40 +++- hmp-commands.hx | 16 hmp.c| 11 +++ hmp.h|1 + qapi-schema.json | 22 ++ 5 files changed, 77 insertions(+), 13 deletions(-) diff --git a/blockdev.c b/blockdev.c index 058b6d5..56da5c9 100644 --- a/blockdev.c +++ b/blockdev.c @@ -646,9 +646,8 @@ void do_commit(Monitor *mon, const QDict *qdict) } } -void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, -bool has_format, const char *format, -Error **errp) +static void change_blockdev_image(const char *device, const char *new_image_file, + const char *format, bool create, Error **errp) { BlockDriverState *bs; BlockDriver *drv, *old_drv, *proto_drv; @@ -671,7 +670,7 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, old_drv = bs-drv; flags = bs-open_flags; -if (!has_format) { +if (!format) { format = qcow2; } @@ -681,24 +680,26 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, return; } -proto_drv = bdrv_find_protocol(snapshot_file); +proto_drv = bdrv_find_protocol(new_image_file); if (!proto_drv) { error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); return; } -ret = bdrv_img_create(snapshot_file, format, bs-filename, - bs-drv-format_name, NULL, -1, flags); -if (ret) { -error_set(errp, QERR_UNDEFINED_ERROR); -return; +if (create) { +ret = bdrv_img_create(new_image_file, format, bs-filename, + bs-drv-format_name, NULL, -1, flags); +if (ret) { +error_set(errp, QERR_UNDEFINED_ERROR); +return; +} } bdrv_drain_all(); bdrv_flush(bs); bdrv_close(bs); -ret = bdrv_open(bs, snapshot_file, flags, drv); +ret = bdrv_open(bs, new_image_file, flags, drv); /* * If reopening the image file we just created fails, fall back * and try to re-open the original image. If that fails too, we @@ -709,11 +710,25 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, if (ret != 0) { error_set(errp, QERR_OPEN_FILE_FAILED, old_filename); } else { -error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file); +error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); } } } +void qmp_drive_reopen(const char *device, const char *new_image_file, + bool has_format, const char *format, Error **errp) +{ +change_blockdev_image(device, new_image_file, + has_format ? format : NULL, false, errp); +} + +void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, +bool has_format, const char *format, +Error **errp) +{ +change_blockdev_image(device, snapshot_file, + has_format ? format : NULL, true, errp); +} /* New and old BlockDriverState structs for group snapshots */ typedef struct BlkTransactionStates { @@ -877,7 +892,6 @@ exit: return; } - static void eject_device(BlockDriverState *bs, int force, Error **errp) { if (bdrv_in_use(bs)) { diff --git a/hmp-commands.hx b/hmp-commands.hx index 64b3656..9e08123 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -900,6 +900,22 @@ Snapshot device, using snapshot file as target if provided ETEXI { +.name = drive-reopen, +.args_type = device:B,new-image-file:s,format:s?, +.params = device new-image-file [format], +.help = Assigns a new image file to a device.\n\t\t\t + The image will be opened using the format\n\t\t\t + specified or 'qcow2' by default.\n\t\t\t, +.mhandler.cmd = hmp_drive_reopen, +}, + +STEXI +@item drive-reopen +@findex drive-reopen +Assigns a new image file to a device. +ETEXI + +{ .name = drive_add, .args_type = pci_addr:s,opts:s, .params = [[domain:]bus:]slot\n diff --git a/hmp.c b/hmp.c index 3a54455..ab069ad 100644 --- a/hmp.c +++ b/hmp.c @@ -706,6 +706,17 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, errp); } +void hmp_drive_reopen(Monitor *mon, const QDict *qdict) +{ +const char *device = qdict_get_str(qdict, device); +const char *filename = qdict_get_str(qdict, new-image-file); +const char *format = qdict_get_try_str(qdict, format); +Error *errp = NULL; + +qmp_drive_reopen(device, filename, !!format, format, errp); +hmp_handle_error(mon, errp); +} + void hmp_migrate_cancel
[Qemu-devel] [PATCH] add reopen to blockdev-transaction
Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- blockdev.c |8 qapi-schema.json | 12 qmp-commands.hx |6 +- 3 files changed, 25 insertions(+), 1 deletions(-) diff --git a/blockdev.c b/blockdev.c index 56da5c9..36fe07c 100644 --- a/blockdev.c +++ b/blockdev.c @@ -798,6 +798,14 @@ void qmp_blockdev_transaction(BlockdevActionList *dev_list, dev_info-mirror-target); break; +case BLOCKDEV_ACTION_KIND_REOPEN: +device = dev_info-reopen-device; +if (dev_info-format-has_format) { +format = dev_info-reopen-format; +} +new_source = g_strdup(dev_info-reopen-target); +break; + default: abort(); } diff --git a/qapi-schema.json b/qapi-schema.json index b33875d..17f7548 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1145,6 +1145,17 @@ { 'type': 'BlockdevMirror', 'data': { 'device': 'str', 'target': 'str', '*format': 'str', '*reuse': 'bool' } } +## +# @BlockdevReopen +# +# @device: the name of the device to reopen. +# +# @target: the target of the new image. +# +# @format: #optional the format of the new image, default is 'qcow2'. +## +{ 'type': 'BlockdevReopen', + 'data': { 'device': 'str', 'target': 'str', '*format': 'str' } } ## # @BlockdevAction @@ -1156,6 +1167,7 @@ 'data': { 'snapshot': 'BlockdevSnapshot', 'mirror': 'BlockdevMirror', + 'reopen': 'BlockdevReopen', } } ## diff --git a/qmp-commands.hx b/qmp-commands.hx index 50ac5a0..317c448 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -720,7 +720,7 @@ Arguments: actions array: - type: the operation to perform. The only supported - values are snapshot and mirror. (json-string) + values are snapshot, mirror and reopen. (json-string) - data: a dictionary. The contents depend on the value of type. When type is snapshot: - device: device name to snapshot (json-string) @@ -734,6 +734,10 @@ actions array: - format: format of new image (json-string, optional) - reuse: whether QEMU should look for an existing image file (json-bool, optional, default false) + When type is reopen: + - device: device name to reopen (json-string) + - target: name of destination image file (json-string) + - format: format of new image (json-string, optional) Example: -- 1.7.1
[Qemu-devel] [PATCHv3] Add blkmirror block driver
Mirrored writes are used by live block copy. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- Makefile.objs |2 +- block/blkmirror.c | 255 cutils.c | 30 ++ docs/blkmirror.txt | 15 +++ qemu-common.h |1 + 5 files changed, 302 insertions(+), 1 deletions(-) create mode 100644 block/blkmirror.c create mode 100644 docs/blkmirror.txt diff --git a/Makefile.objs b/Makefile.objs index 808de6a..2302c96 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vv block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-nested-y += qed-check.o -block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o +block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o blkmirror.o block-nested-y += stream.o block-nested-$(CONFIG_WIN32) += raw-win32.o block-nested-$(CONFIG_POSIX) += raw-posix.o diff --git a/block/blkmirror.c b/block/blkmirror.c new file mode 100644 index 000..c98b162 --- /dev/null +++ b/block/blkmirror.c @@ -0,0 +1,255 @@ +/* + * Block driver for mirrored writes. + * + * Copyright (C) 2011-2012 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include stdarg.h +#include block_int.h + +typedef struct { +BlockDriverState *bs[2]; +} BdrvMirrorState; + +typedef struct DupAIOCB DupAIOCB; + +typedef struct SingleAIOCB { +BlockDriverAIOCB *aiocb; +int finished; +DupAIOCB *parent; +} SingleAIOCB; + +struct DupAIOCB { +BlockDriverAIOCB common; +int count; + +BlockDriverCompletionFunc *cb; +SingleAIOCB aios[2]; +int ret; +}; + +/* Valid blkmirror filenames look like + * blkmirror:fmt1:path/to/image1:fmt2:path/to/image2 */ +static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) +{ +int ret = 0, i; +char *tmpbuf, *tok[4], *next; +BlockDriver *drv1, *drv2; +BdrvMirrorState *m = bs-opaque; + +m-bs[0] = bdrv_new(); +if (m-bs[0] == NULL) { +return -ENOMEM; +} + +m-bs[1] = bdrv_new(); +if (m-bs[1] == NULL) { +bdrv_delete(m-bs[0]); +return -ENOMEM; +} + +tmpbuf = g_malloc(strlen(filename) + 1); +pstrcpy(tmpbuf, strlen(filename) + 1, filename); + +/* Parse the blkmirror: prefix */ +if (!strstart(tmpbuf, blkmirror:, (const char **) next)) { +next = tmpbuf; +} + +for (i = 0; i 4; i++) { +if (!next) { +ret = -EINVAL; +goto out; +} +tok[i] = pstrtok_r(NULL, :, '\\', next); +} + +drv1 = bdrv_find_whitelisted_format(tok[0]); +drv2 = bdrv_find_whitelisted_format(tok[2]); + +if (!drv1 || !drv2) { +ret = -EINVAL; +goto out; +} + +ret = bdrv_open(m-bs[0], tok[1], flags, drv1); +if (ret 0) { +goto out; +} + +ret = bdrv_open(m-bs[0], tok[3], flags, drv2); +if (ret 0) { +goto out; +} + + out: +g_free(tmpbuf); + +if (ret 0) { +for (i = 0; i 2; i++) { +bdrv_delete(m-bs[i]); +m-bs[i] = NULL; +} +} + +return ret; +} + +static void blkmirror_close(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs-opaque; +int i; + +for (i = 0; i 2; i++) { +bdrv_delete(m-bs[i]); +m-bs[i] = NULL; +} +} + +static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs-opaque; +int ret; + +ret = bdrv_co_flush(m-bs[0]); +if (ret 0) { +return ret; +} + +return bdrv_co_flush(m-bs[1]); +} + +static int64_t blkmirror_getlength(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs-opaque; + +return bdrv_getlength(m-bs[0]); +} + +static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ +BdrvMirrorState *m = bs-opaque; +return bdrv_aio_readv(m-bs[0], sector_num, qiov, nb_sectors, cb, opaque); +} + +static void dup_aio_cancel(BlockDriverAIOCB *blockacb) +{ +DupAIOCB *acb = container_of(blockacb, DupAIOCB, common); +int i; + +for (i = 0 ; i 2; i++) { +if (!acb-aios[i].finished) { +bdrv_aio_cancel(acb-aios[i].aiocb); +} +} +qemu_aio_release(acb); +} + +static AIOPool dup_aio_pool = { +.aiocb_size = sizeof(DupAIOCB), +.cancel
Re: [Qemu-devel] [PATCHv3] Add blkmirror block driver
- Original Message - From: Federico Simoncelli fsimo...@redhat.com To: qemu-devel@nongnu.org Cc: mtosa...@redhat.com, kw...@redhat.com, pbonz...@redhat.com, stefa...@gmail.com, Federico Simoncelli fsimo...@redhat.com Sent: Wednesday, February 29, 2012 1:28:21 PM Subject: [PATCHv3] Add blkmirror block driver Mirrored writes are used by live block copy. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- Makefile.objs |2 +- block/blkmirror.c | 255 cutils.c | 30 ++ docs/blkmirror.txt | 15 +++ qemu-common.h |1 + 5 files changed, 302 insertions(+), 1 deletions(-) create mode 100644 block/blkmirror.c create mode 100644 docs/blkmirror.txt diff --git a/block/blkmirror.c b/block/blkmirror.c new file mode 100644 index 000..c98b162 --- /dev/null +++ b/block/blkmirror.c [...] +if (!drv1 || !drv2) { +ret = -EINVAL; +goto out; +} + +ret = bdrv_open(m-bs[0], tok[1], flags, drv1); +if (ret 0) { +goto out; +} + +ret = bdrv_open(m-bs[0], tok[3], flags, drv2); +if (ret 0) { +goto out; +} There's a small mistake here, the second one is m-bs[1]. -- Federico
[Qemu-devel] [PATCHv4] Add blkmirror block driver
Mirrored writes are used by live block copy. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Federico Simoncelli fsimo...@redhat.com Signed-off-by: Paolo Bonzini pbonz...@redhat.com --- Makefile.objs |2 +- block/blkmirror.c | 278 cutils.c | 56 +++ docs/blkmirror.txt | 15 +++ qemu-common.h |2 + 5 files changed, 352 insertions(+), 1 deletions(-) create mode 100644 block/blkmirror.c create mode 100644 docs/blkmirror.txt diff --git a/Makefile.objs b/Makefile.objs index 808de6a..2302c96 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vv block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-nested-y += qed-check.o -block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o +block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o blkmirror.o block-nested-y += stream.o block-nested-$(CONFIG_WIN32) += raw-win32.o block-nested-$(CONFIG_POSIX) += raw-posix.o diff --git a/block/blkmirror.c b/block/blkmirror.c new file mode 100644 index 000..d894ca8 --- /dev/null +++ b/block/blkmirror.c @@ -0,0 +1,278 @@ +/* + * Block driver for mirrored writes. + * + * Copyright (C) 2011-2012 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include stdarg.h +#include block_int.h + +typedef struct { +BlockDriverState *bs[2]; +} BdrvMirrorState; + +typedef struct DupAIOCB DupAIOCB; + +typedef struct SingleAIOCB { +BlockDriverAIOCB *aiocb; +int finished; +DupAIOCB *parent; +} SingleAIOCB; + +struct DupAIOCB { +BlockDriverAIOCB common; +int count; + +BlockDriverCompletionFunc *cb; +SingleAIOCB aios[2]; +int ret; +}; + +/* Valid blkmirror filenames look like + * blkmirror:fmt1:path/to/image1:fmt2:path/to/image2 */ +static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) +{ +int ret = 0, i; +char *tmpbuf, *tok[4], *next; +BlockDriver *drv1, *drv2; +BdrvMirrorState *m = bs-opaque; +BlockDriverState *bk; + +m-bs[0] = bdrv_new(); +if (m-bs[0] == NULL) { +return -ENOMEM; +} + +m-bs[1] = bdrv_new(); +if (m-bs[1] == NULL) { +bdrv_delete(m-bs[0]); +return -ENOMEM; +} + +tmpbuf = g_malloc(strlen(filename) + 1); +pstrcpy(tmpbuf, strlen(filename) + 1, filename); + +/* Parse the blkmirror: prefix */ +if (!strstart(tmpbuf, blkmirror:, (const char **) next)) { +next = tmpbuf; +} + +for (i = 0; i 4; i++) { +if (!next) { +ret = -EINVAL; +goto out; +} +tok[i] = estrtok_r(NULL, :, '\\', next); +} + +drv1 = bdrv_find_whitelisted_format(tok[0]); +drv2 = bdrv_find_whitelisted_format(tok[2]); +if (!drv1 || !drv2) { +ret = -EINVAL; +goto out; +} + +ret = bdrv_open(m-bs[0], tok[1], flags, drv1); +if (ret 0) { +goto out; +} + +/* If we crash, we cannot assume that the destination is a + * valid mirror and we have to start over. So speed up things + * by effectively operating on the destination in cache=unsafe + * mode. + */ +ret = bdrv_open(m-bs[1], tok[3], flags | BDRV_O_NO_BACKING +| BDRV_O_NO_FLUSH | BDRV_O_CACHE_WB, drv2); +if (ret 0) { +goto out; +} + +if (m-bs[0]-backing_hd) { +bk = m-bs[0]-backing_hd; + +m-bs[1]-backing_hd = bdrv_new(); +if (!m-bs[1]-backing_hd) { +ret = -ENOMEM; +goto out; +} + +/* opening the same backing file of the source */ +ret = bdrv_open(m-bs[1]-backing_hd, +bk-filename, bk-open_flags, bk-drv); +if (ret 0) { +goto out; +} +} + + out: +g_free(tmpbuf); + +if (ret 0) { +for (i = 0; i 2; i++) { +bdrv_delete(m-bs[i]); +m-bs[i] = NULL; +} +} + +return ret; +} + +static void blkmirror_close(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs-opaque; +int i; + +for (i = 0; i 2; i++) { +bdrv_delete(m-bs[i]); +m-bs[i] = NULL; +} +} + +static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs-opaque; +int ret; + +ret = bdrv_co_flush(m-bs[0]); +if (ret 0) { +return ret; +} + +return bdrv_co_flush(m-bs[1]); +} + +static int64_t blkmirror_getlength(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs-opaque; + +return bdrv_getlength(m-bs[0]); +} + +static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs
Re: [Qemu-devel] Live Block Migration using Mirroring
- Original Message - From: Stefan Hajnoczi stefa...@gmail.com To: Federico Simoncelli fsimo...@redhat.com Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com Sent: Tuesday, February 28, 2012 4:47:48 PM Subject: Re: [Qemu-devel] Live Block Migration using Mirroring On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli fsimo...@redhat.com wrote: Step 3 - Mirrored Live Snapshot === A mirrored live snapshot is issued using src/hd0snap1 and dst/hd0snap1 as image files. (Where - stands for has backing file) [src/hd0base] - [src/hd0snap1] = VM1(read-write) ... - [dst/hd0snap1] = VM1(write-only) $ qemu-img create -f qcow2 \ -b /tmp/src/hd0base.qcow2 /tmp/src/hd0snap1.qcow2 20G Formatting '/tmp/src/hd0snap1.qcow2', fmt=qcow2 size=21474836480 backing_file='/tmp/src/hd0base.qcow2' encryption=off cluster_size=65536 $ qemu-img create -f qcow2 \ -b /tmp/dst/hd0base.qcow2 /tmp/dst/hd0snap1.qcow2 20G Formatting '/tmp/dst/hd0snap1.qcow2', fmt=qcow2 size=21474836480 backing_file='/tmp/src/hd0base.qcow2' encryption=off cluster_size=65536 (qemu) snapshot_blkdev -n ide0-hd0 \ blkmirror:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2 blkmirror Step 4 - Backing File Copy == An external manager copies src/hd0base to the destination dst/hd0base. [src/hd0base] - [src/hd0snap1] = VM1(read-write) [dst/hd0base] - [dst/hd0snap1] = VM1(write-only) At this stage we have dst/hd0snap1 opened with BDRV_O_NO_BACKING. If it has no backing file and the guest issues a write request that is smaller than a cluster in the image file, the untouched areas of that cluster will be populated with zeroes. Once dst/hd0snap1 is reopened with dst/hd0base in place there will be zeros in clusters where the guest wrote only a few sectors. We will not see the backing file data in those clusters. Have you hit this problem or did I miss something? Thank you for getting this. Being able to have a bogus backing file was a bonus but it's not really required for the mirrored live block migration. We can add the support for switching the backing file in the drive-reopen part. I'll remove the BDRV_O_NO_BACKING flag from the blkmirror patch. -- Federico
Re: [Qemu-devel] Live Block Migration using Mirroring
- Original Message - From: Paolo Bonzini pbonz...@redhat.com To: Federico Simoncelli fsimo...@redhat.com Cc: Stefan Hajnoczi stefa...@gmail.com, qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com Sent: Tuesday, February 28, 2012 6:36:57 PM Subject: Re: [Qemu-devel] Live Block Migration using Mirroring Il 28/02/2012 18:15, Federico Simoncelli ha scritto: Thank you for getting this. Being able to have a bogus backing file was a bonus but it's not really required for the mirrored live block migration. We can add the support for switching the backing file in the drive-reopen part. Wait, it's not really required for oVirt because it creates the snapshot outside QEMU. What about everyone else? They'll have (as oVirt) two mirrored snapshot pointing at the same base. The only difference is that the image is created internally, but that's not hard. -- Federico
Re: [Qemu-devel] Live Block Migration using Mirroring
- Original Message - From: Paolo Bonzini pbonz...@redhat.com To: qemu-devel@nongnu.org Sent: Tuesday, February 28, 2012 7:02:40 PM Subject: Re: [Qemu-devel] Live Block Migration using Mirroring Il 28/02/2012 18:46, Federico Simoncelli ha scritto: Thank you for getting this. Being able to have a bogus backing file was a bonus but it's not really required for the mirrored live block migration. We can add the support for switching the backing file in the drive-reopen part. Wait, it's not really required for oVirt because it creates the snapshot outside QEMU. What about everyone else? They'll have (as oVirt) two mirrored snapshot pointing at the same base. The only difference is that the image is created internally, but that's not hard. Can you detail how you are switching the backing file in the drive-reopen? Either the BlockDriverState opened by blkmirror, or the one opened at the end, will have to use the wrong backing_file. How do you arrange for that? Step 1 - Initital Scenario == VM1 is running on the src/hd0base. [src/hd0base] = VM1(read-write) Step 3 - Mirrored Live Snapshot === A mirrored live snapshot is issued using src/hd0snap1 and dst/hd0snap1 as image files (both having src/hd0base as backing file). [src/hd0base] - [src/hd0snap1] = VM1(read-write) ^-- [dst/hd0snap1] = VM1(read-write) Step 4 - Backing File Copy == An external manager copies src/hd0base to the destination (dst/hd0base). [src/hd0base] - [src/hd0snap1] = VM1(read-write) [dst/hd0base]^-- [dst/hd0snap1] = VM1(read-write) Step 5 - Final Switch to Destination VM1 is now able to switch to the destination for both read and write operations fixing the backing file path in dst/hd0snap1. [src/hd0base] - [src/hd0snap1] [dst/hd0base] - [dst/hd0snap1] = VM1(read-write) -- Federico
Re: [Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver
- Original Message - From: Luiz Capitulino lcapitul...@redhat.com To: Federico Simoncelli fsimo...@redhat.com Cc: qemu-devel@nongnu.org, mtosa...@redhat.com, pbonz...@redhat.com, kw...@redhat.com, arm...@redhat.com Sent: Friday, February 24, 2012 7:17:22 PM Subject: Re: [PATCH 1/2 v2] Add blkmirror block driver On Fri, 24 Feb 2012 16:49:03 + Federico Simoncelli fsimo...@redhat.com wrote: +/* Parse the raw image filename */ +filename2 = g_malloc(strlen(filename)+1); +escape = 0; +for (i = n = 0; i strlen(filename); i++) { +if (!escape filename[i] == ':') { +break; +} +if (!escape filename[i] == '\\') { +escape = 1; +} else { +escape = 0; +} + +if (!escape) { +filename2[n++] = filename[i]; +} +} +filename2[n] = '\0'; You're escaping only the first image name string, is that intentional? Yes, it was also documented here by Marcelo: diff --git a/docs/blkmirror.txt b/docs/blkmirror.txt new file mode 100644 index 000..cf73f3f --- /dev/null +++ b/docs/blkmirror.txt @@ -0,0 +1,16 @@ +Block mirror driver +--- + +This driver will mirror writes to two distinct images. +It's used internally by live block copy. + +Format +-- + +blkmirror:/image1.img:/image2.img + +'\' (backslash) can be used to escape colon processing +as a separator, in the first image filename. +Backslashes themselves also can be escaped as '\\'. + + Anyway this format is encapsulated by blockdev-migrate. -- Federico
Re: [Qemu-devel] [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands
- Original Message - From: Luiz Capitulino lcapitul...@redhat.com To: Federico Simoncelli fsimo...@redhat.com Cc: qemu-devel@nongnu.org, mtosa...@redhat.com, pbonz...@redhat.com, kw...@redhat.com, arm...@redhat.com Sent: Friday, February 24, 2012 8:01:43 PM Subject: Re: [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands On Fri, 24 Feb 2012 16:49:04 + Federico Simoncelli fsimo...@redhat.com wrote: Signed-off-by: Federico Simoncelli fsimo...@redhat.com Btw, would be nice to have a 0/2 intro email describing the feature and changelog info. Yes the 0/2 (actually 0/3) was sent at the beginning of the thread so you might have missed it because you were added later on but I'm sure you can still find it in the archives. +BlockDriver *drv; +int i, j, escape; +char new_filename[2048], *filename; I'd use PATH_MAX for new_filename's size. Maybe PATH_MAX * 2 + 1? (To handle the case where all the characters should be escaped). +escape = 0; +for (i = 0, j = 0; j strlen(new_image_file); j++) { + loop: +if (!(i sizeof(new_filename) - 2)) { +error_set(errp, QERR_INVALID_PARAMETER_VALUE, + new-image-file, shorter filename); +return; +} + +if (new_image_file[j] == ':' || new_image_file[j] == '\\') { +if (!escape) { +new_filename[i++] = '\\', escape = 1; +goto loop; +} else { +escape = 0; +} +} + +new_filename[i++] = new_image_file[j]; +} IMO, you could require the filename arguments to be escaped already. Can you be more explicit, who should escape it? The only thing that comes to my mind right now is requiring the input of blockdev-migrate already escaped but that would expose an internal format. (I'd not recommend it). +void qmp_blockdev_migrate(const char *device, bool incremental, + const char *destination, bool has_new_image_file, + const char *new_image_file, Error **errp) +{ +BlockDriverState *bs; + +bs = bdrv_find(device); +if (!bs) { +error_set(errp, QERR_DEVICE_NOT_FOUND, device); +return; +} +if (bdrv_in_use(bs)) { +error_set(errp, QERR_DEVICE_IN_USE, device); +return; +} + +if (incremental) { +if (!has_new_image_file) { +error_set(errp, QERR_INVALID_PARAMETER_VALUE, + incremental, a new image file); +} else { +qmp_blockdev_mirror(device, destination, new_image_file, errp); +} +} else { +error_set(errp, QERR_NOT_SUPPORTED); +} The command documentation says that 'incremental' and 'new_image_file' are optionals, but the code makes them required. Why? If I didn't make any mistake in the code I'm just enforcing that when you specify incremental you also need a new image. There are still other valid cases where they are optional. -- Federico
Re: [Qemu-devel] drive transactions (was Re: [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands)
- Original Message - From: Paolo Bonzini pbonz...@redhat.com To: Luiz Capitulino lcapitul...@redhat.com Cc: Federico Simoncelli fsimo...@redhat.com, kw...@redhat.com, mtosa...@redhat.com, qemu-devel@nongnu.org, arm...@redhat.com, Jeff Cody jc...@redhat.com Sent: Monday, February 27, 2012 3:39:33 PM Subject: drive transactions (was Re: [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands) 4) incremental=true, new_image_file passed: - no images will be created - writes will be mirrored to new_image_file and dest No need to provide this from within QEMU, because libvirt/oVirt can do the dance using elementary operations: blockdev-begin-transaction drive-reopen device new-image-file drive-mirror streaming=false device dest blockdev-commit-transaction No strange optional arguments, no proliferation of commands, etc. The only downside is that if someone tries to do (4) without transactions (or without stopping the VM) they'll get corruption because atomicity is required. I'm all for the modularity of the commands (I suggested it since the beginning), but all this infrastructure goes way beyond what I'd need for oVirt now. When I submitted my patches we knew that my work wasn't the definitive solution (eg: the future implementation of -blockdev). So I'd suggest to try and settle with something that is good enough and that is not preventing a future improvement. What about having a (temporary) flag in drive-mirror to accept also a new-image-file until we will have the optimal solution? -- Federico
Re: [Qemu-devel] drive transactions (was Re: [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands)
- Original Message - From: Anthony Liguori anth...@codemonkey.ws To: Federico Simoncelli fsimo...@redhat.com Cc: Paolo Bonzini pbonz...@redhat.com, kw...@redhat.com, arm...@redhat.com, Jeff Cody jc...@redhat.com, mtosa...@redhat.com, qemu-devel@nongnu.org, Luiz Capitulino lcapitul...@redhat.com Sent: Monday, February 27, 2012 5:42:31 PM Subject: Re: [Qemu-devel] drive transactions (was Re: [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands) On 02/27/2012 10:33 AM, Federico Simoncelli wrote: I'm all for the modularity of the commands (I suggested it since the beginning), but all this infrastructure goes way beyond what I'd need for oVirt now. When I submitted my patches we knew that my work wasn't the definitive solution (eg: the future implementation of -blockdev). So I'd suggest to try and settle with something that is good enough and that is not preventing a future improvement. What about having a (temporary) flag in drive-mirror to accept also a new-image-file until we will have the optimal solution? Unless there are extenuating circumstances (like the absence of core infrastructure in QEMU), then we should not add commands that we know are not the right command. So are you in favor or against my suggestion? It looks like this is exactly the case where the core infrastructure (transactions) is missing. -- Federico
[Qemu-devel] [PATCH 1/2] Add blkmirror block driver
From: Marcelo Tosatti mtosa...@redhat.com Mirrored writes are used by live block copy. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- Makefile.objs |2 +- block/blkmirror.c | 247 docs/blkmirror.txt | 16 3 files changed, 264 insertions(+), 1 deletions(-) create mode 100644 block/blkmirror.c create mode 100644 docs/blkmirror.txt diff --git a/Makefile.objs b/Makefile.objs index 67ee3df..6020308 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vv block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-nested-y += qed-check.o -block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o +block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o blkmirror.o block-nested-y += stream.o block-nested-$(CONFIG_WIN32) += raw-win32.o block-nested-$(CONFIG_POSIX) += raw-posix.o diff --git a/block/blkmirror.c b/block/blkmirror.c new file mode 100644 index 000..39927c8 --- /dev/null +++ b/block/blkmirror.c @@ -0,0 +1,247 @@ +/* + * Block driver for mirrored writes. + * + * Copyright (C) 2011 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include stdarg.h +#include block_int.h + +typedef struct { +BlockDriverState *bs[2]; +} BdrvMirrorState; + +typedef struct DupAIOCB DupAIOCB; + +typedef struct SingleAIOCB { +BlockDriverAIOCB *aiocb; +int finished; +DupAIOCB *parent; +} SingleAIOCB; + +struct DupAIOCB { +BlockDriverAIOCB common; +int count; + +BlockDriverCompletionFunc *cb; +SingleAIOCB aios[2]; +int ret; +}; + +/* Valid blkmirror filenames look like + * blkmirror:path/to/image1:path/to/image2 */ +static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) +{ +BdrvMirrorState *m = bs-opaque; +int ret, escape, i, n; +char *filename2; + +/* Parse the blkmirror: prefix */ +if (strncmp(filename, blkmirror:, strlen(blkmirror:))) { +return -EINVAL; +} +filename += strlen(blkmirror:); + +/* Parse the raw image filename */ +filename2 = g_malloc(strlen(filename)+1); +escape = 0; +for (i = n = 0; i strlen(filename); i++) { +if (!escape filename[i] == ':') { +break; +} +if (!escape filename[i] == '\\') { +escape = 1; +} else { +escape = 0; +} + +if (!escape) { +filename2[n++] = filename[i]; +} +} +filename2[n] = '\0'; + +m-bs[0] = bdrv_new(); +if (m-bs[0] == NULL) { +g_free(filename2); +return -ENOMEM; +} +ret = bdrv_open(m-bs[0], filename2, flags, NULL); +g_free(filename2); +if (ret 0) { +return ret; +} +filename += i + 1; + +m-bs[1] = bdrv_new(); +if (m-bs[1] == NULL) { +bdrv_delete(m-bs[0]); +return -ENOMEM; +} +ret = bdrv_open(m-bs[1], filename, flags, NULL); +if (ret 0) { +bdrv_delete(m-bs[0]); +return ret; +} + +return 0; +} + +static void blkmirror_close(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs-opaque; +int i; + +for (i = 0; i 2; i++) { +bdrv_delete(m-bs[i]); +m-bs[i] = NULL; +} +} + +static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs-opaque; +int ret; + +ret = bdrv_co_flush(m-bs[0]); +if (ret 0) { +return ret; +} + +return bdrv_co_flush(m-bs[1]); +} + +static int64_t blkmirror_getlength(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs-opaque; + +return bdrv_getlength(m-bs[0]); +} + +static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ +BdrvMirrorState *m = bs-opaque; +return bdrv_aio_readv(m-bs[0], sector_num, qiov, nb_sectors, cb, opaque); +} + +static void dup_aio_cancel(BlockDriverAIOCB *blockacb) +{ +DupAIOCB *acb = container_of(blockacb, DupAIOCB, common); +int i; + +for (i = 0 ; i 2; i++) { +if (!acb-aios[i].finished) { +bdrv_aio_cancel(acb-aios[i].aiocb); +} +} +qemu_aio_release(acb); +} + +static AIOPool dup_aio_pool = { +.aiocb_size = sizeof(DupAIOCB), +.cancel = dup_aio_cancel, +}; + +static void blkmirror_aio_cb(void *opaque, int ret
[Qemu-devel] [PATCH 2/2] Add the blockdev-reopen and blockdev-migrate commands
Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- block/blkmirror.c |2 +- blockdev.c| 109 +++-- hmp-commands.hx | 36 + hmp.c | 30 ++ hmp.h |2 + qapi-schema.json | 63 ++ 6 files changed, 229 insertions(+), 13 deletions(-) diff --git a/block/blkmirror.c b/block/blkmirror.c index 39927c8..49e3381 100644 --- a/block/blkmirror.c +++ b/block/blkmirror.c @@ -81,7 +81,7 @@ static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) bdrv_delete(m-bs[0]); return -ENOMEM; } -ret = bdrv_open(m-bs[1], filename, flags, NULL); +ret = bdrv_open(m-bs[1], filename, flags | BDRV_O_NO_BACKING, NULL); if (ret 0) { bdrv_delete(m-bs[0]); return ret; diff --git a/blockdev.c b/blockdev.c index 2c132a3..1df2542 100644 --- a/blockdev.c +++ b/blockdev.c @@ -646,9 +646,8 @@ void do_commit(Monitor *mon, const QDict *qdict) } } -void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, -bool has_format, const char *format, -Error **errp) +static void change_blockdev_image(const char *device, const char *new_image_file, + const char *format, bool create, Error **errp) { BlockDriverState *bs; BlockDriver *drv, *old_drv, *proto_drv; @@ -671,7 +670,7 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, old_drv = bs-drv; flags = bs-open_flags; -if (!has_format) { +if (!format) { format = qcow2; } @@ -681,24 +680,26 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, return; } -proto_drv = bdrv_find_protocol(snapshot_file); +proto_drv = bdrv_find_protocol(new_image_file); if (!proto_drv) { error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); return; } -ret = bdrv_img_create(snapshot_file, format, bs-filename, - bs-drv-format_name, NULL, -1, flags); -if (ret) { -error_set(errp, QERR_UNDEFINED_ERROR); -return; +if (create) { +ret = bdrv_img_create(new_image_file, format, bs-filename, + bs-drv-format_name, NULL, -1, flags); +if (ret) { +error_set(errp, QERR_UNDEFINED_ERROR); +return; +} } bdrv_drain_all(); bdrv_flush(bs); bdrv_close(bs); -ret = bdrv_open(bs, snapshot_file, flags, drv); +ret = bdrv_open(bs, new_image_file, flags, drv); /* * If reopening the image file we just created fails, fall back * and try to re-open the original image. If that fails too, we @@ -709,11 +710,95 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, if (ret != 0) { error_set(errp, QERR_OPEN_FILE_FAILED, old_filename); } else { -error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file); +error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); } } } +void qmp_blockdev_reopen(const char *device, const char *new_image_file, + bool has_format, const char *format, Error **errp) +{ +change_blockdev_image(device, new_image_file, + has_format ? format : NULL, false, errp); +} + +void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, +bool has_format, const char *format, +Error **errp) +{ +change_blockdev_image(device, snapshot_file, + has_format ? format : NULL, true, errp); +} + +void qmp_blockdev_migrate(const char *device, BlockMigrateOp mode, + const char *destination, bool has_new_image_file, + const char *new_image_file, Error **errp) +{ +BlockDriverState *bs; +BlockDriver *drv; +int i, j, escape; +char filename[2048]; + +bs = bdrv_find(device); +if (!bs) { +error_set(errp, QERR_DEVICE_NOT_FOUND, device); +return; +} +if (bdrv_in_use(bs)) { +error_set(errp, QERR_DEVICE_IN_USE, device); +return; +} + +if (mode == BLOCK_MIGRATE_OP_MIRROR) { +drv = bdrv_find_format(blkmirror); +if (!drv) { +error_set(errp, QERR_INVALID_BLOCK_FORMAT, blkmirror); +return; +} + +if (!has_new_image_file) { +new_image_file = bs-filename; +} + +pstrcpy(filename, sizeof(filename), blkmirror:); +i = strlen(blkmirror:); + +escape = 0; +for (j = 0; j strlen(new_image_file); j++) { + loop: +if (!(i sizeof(filename) - 2)) { +error_set(errp, QERR_INVALID_PARAMETER_VALUE
Re: [Qemu-devel] [PATCH 2/2] Add the blockdev-reopen and blockdev-migrate commands
- Original Message - From: Kevin Wolf kw...@redhat.com To: Federico Simoncelli fsimo...@redhat.com Cc: qemu-devel@nongnu.org, Marcelo Tosatti mtosa...@redhat.com, lcapitul...@redhat.com, Paolo Bonzini pbonz...@redhat.com, Markus Armbruster arm...@redhat.com Sent: Friday, February 24, 2012 1:03:08 PM Subject: Re: [PATCH 2/2] Add the blockdev-reopen and blockdev-migrate commands Am 24.02.2012 12:37, schrieb Federico Simoncelli: Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- block/blkmirror.c |2 +- blockdev.c| 109 +++-- hmp-commands.hx | 36 + hmp.c | 30 ++ hmp.h |2 + qapi-schema.json | 63 ++ 6 files changed, 229 insertions(+), 13 deletions(-) diff --git a/block/blkmirror.c b/block/blkmirror.c index 39927c8..49e3381 100644 --- a/block/blkmirror.c +++ b/block/blkmirror.c @@ -81,7 +81,7 @@ static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) bdrv_delete(m-bs[0]); return -ENOMEM; } -ret = bdrv_open(m-bs[1], filename, flags, NULL); +ret = bdrv_open(m-bs[1], filename, flags | BDRV_O_NO_BACKING, NULL); if (ret 0) { bdrv_delete(m-bs[0]); return ret; Was this hunk meant to be in patch 1? Not necessarily, I thought a lot about it. I didn't want to modify Marcelo's patch too much. This flag is actually mandatory only to make blockdev-migrate work with a destination that doesn't have a backing file yet, so I thought it was more appropriate to squash it here. -- Federico
[Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver
From: Marcelo Tosatti mtosa...@redhat.com Mirrored writes are used by live block copy. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- Makefile.objs |2 +- block/blkmirror.c | 247 docs/blkmirror.txt | 16 3 files changed, 264 insertions(+), 1 deletions(-) create mode 100644 block/blkmirror.c create mode 100644 docs/blkmirror.txt diff --git a/Makefile.objs b/Makefile.objs index 67ee3df..6020308 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vv block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-nested-y += qed-check.o -block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o +block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o blkmirror.o block-nested-y += stream.o block-nested-$(CONFIG_WIN32) += raw-win32.o block-nested-$(CONFIG_POSIX) += raw-posix.o diff --git a/block/blkmirror.c b/block/blkmirror.c new file mode 100644 index 000..49e3381 --- /dev/null +++ b/block/blkmirror.c @@ -0,0 +1,247 @@ +/* + * Block driver for mirrored writes. + * + * Copyright (C) 2011 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include stdarg.h +#include block_int.h + +typedef struct { +BlockDriverState *bs[2]; +} BdrvMirrorState; + +typedef struct DupAIOCB DupAIOCB; + +typedef struct SingleAIOCB { +BlockDriverAIOCB *aiocb; +int finished; +DupAIOCB *parent; +} SingleAIOCB; + +struct DupAIOCB { +BlockDriverAIOCB common; +int count; + +BlockDriverCompletionFunc *cb; +SingleAIOCB aios[2]; +int ret; +}; + +/* Valid blkmirror filenames look like + * blkmirror:path/to/image1:path/to/image2 */ +static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) +{ +BdrvMirrorState *m = bs-opaque; +int ret, escape, i, n; +char *filename2; + +/* Parse the blkmirror: prefix */ +if (strncmp(filename, blkmirror:, strlen(blkmirror:))) { +return -EINVAL; +} +filename += strlen(blkmirror:); + +/* Parse the raw image filename */ +filename2 = g_malloc(strlen(filename)+1); +escape = 0; +for (i = n = 0; i strlen(filename); i++) { +if (!escape filename[i] == ':') { +break; +} +if (!escape filename[i] == '\\') { +escape = 1; +} else { +escape = 0; +} + +if (!escape) { +filename2[n++] = filename[i]; +} +} +filename2[n] = '\0'; + +m-bs[0] = bdrv_new(); +if (m-bs[0] == NULL) { +g_free(filename2); +return -ENOMEM; +} +ret = bdrv_open(m-bs[0], filename2, flags, NULL); +g_free(filename2); +if (ret 0) { +return ret; +} +filename += i + 1; + +m-bs[1] = bdrv_new(); +if (m-bs[1] == NULL) { +bdrv_delete(m-bs[0]); +return -ENOMEM; +} +ret = bdrv_open(m-bs[1], filename, flags | BDRV_O_NO_BACKING, NULL); +if (ret 0) { +bdrv_delete(m-bs[0]); +return ret; +} + +return 0; +} + +static void blkmirror_close(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs-opaque; +int i; + +for (i = 0; i 2; i++) { +bdrv_delete(m-bs[i]); +m-bs[i] = NULL; +} +} + +static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs-opaque; +int ret; + +ret = bdrv_co_flush(m-bs[0]); +if (ret 0) { +return ret; +} + +return bdrv_co_flush(m-bs[1]); +} + +static int64_t blkmirror_getlength(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs-opaque; + +return bdrv_getlength(m-bs[0]); +} + +static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ +BdrvMirrorState *m = bs-opaque; +return bdrv_aio_readv(m-bs[0], sector_num, qiov, nb_sectors, cb, opaque); +} + +static void dup_aio_cancel(BlockDriverAIOCB *blockacb) +{ +DupAIOCB *acb = container_of(blockacb, DupAIOCB, common); +int i; + +for (i = 0 ; i 2; i++) { +if (!acb-aios[i].finished) { +bdrv_aio_cancel(acb-aios[i].aiocb); +} +} +qemu_aio_release(acb); +} + +static AIOPool dup_aio_pool = { +.aiocb_size = sizeof(DupAIOCB), +.cancel = dup_aio_cancel, +}; + +static void blkmirror_aio_cb(void
[Qemu-devel] [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands
Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- blockdev.c | 107 -- hmp-commands.hx | 38 +++ hmp.c| 24 hmp.h|2 + qapi-schema.json | 54 +++ 5 files changed, 213 insertions(+), 12 deletions(-) diff --git a/blockdev.c b/blockdev.c index 2c132a3..f51bd6f 100644 --- a/blockdev.c +++ b/blockdev.c @@ -646,9 +646,8 @@ void do_commit(Monitor *mon, const QDict *qdict) } } -void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, -bool has_format, const char *format, -Error **errp) +static void change_blockdev_image(const char *device, const char *new_image_file, + const char *format, bool create, Error **errp) { BlockDriverState *bs; BlockDriver *drv, *old_drv, *proto_drv; @@ -671,7 +670,7 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, old_drv = bs-drv; flags = bs-open_flags; -if (!has_format) { +if (!format) { format = qcow2; } @@ -681,24 +680,26 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, return; } -proto_drv = bdrv_find_protocol(snapshot_file); +proto_drv = bdrv_find_protocol(new_image_file); if (!proto_drv) { error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); return; } -ret = bdrv_img_create(snapshot_file, format, bs-filename, - bs-drv-format_name, NULL, -1, flags); -if (ret) { -error_set(errp, QERR_UNDEFINED_ERROR); -return; +if (create) { +ret = bdrv_img_create(new_image_file, format, bs-filename, + bs-drv-format_name, NULL, -1, flags); +if (ret) { +error_set(errp, QERR_UNDEFINED_ERROR); +return; +} } bdrv_drain_all(); bdrv_flush(bs); bdrv_close(bs); -ret = bdrv_open(bs, snapshot_file, flags, drv); +ret = bdrv_open(bs, new_image_file, flags, drv); /* * If reopening the image file we just created fails, fall back * and try to re-open the original image. If that fails too, we @@ -709,11 +710,93 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, if (ret != 0) { error_set(errp, QERR_OPEN_FILE_FAILED, old_filename); } else { -error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file); +error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); } } } +void qmp_drive_reopen(const char *device, const char *new_image_file, + bool has_format, const char *format, Error **errp) +{ +change_blockdev_image(device, new_image_file, + has_format ? format : NULL, false, errp); +} + +void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, +bool has_format, const char *format, +Error **errp) +{ +change_blockdev_image(device, snapshot_file, + has_format ? format : NULL, true, errp); +} + +static void qmp_blockdev_mirror(const char *device, const char *destination, +const char *new_image_file, Error **errp) +{ +BlockDriver *drv; +int i, j, escape; +char new_filename[2048], *filename; + +drv = bdrv_find_format(blkmirror); +if (!drv) { +error_set(errp, QERR_INVALID_BLOCK_FORMAT, blkmirror); +return; +} + +escape = 0; +for (i = 0, j = 0; j strlen(new_image_file); j++) { + loop: +if (!(i sizeof(new_filename) - 2)) { +error_set(errp, QERR_INVALID_PARAMETER_VALUE, + new-image-file, shorter filename); +return; +} + +if (new_image_file[j] == ':' || new_image_file[j] == '\\') { +if (!escape) { +new_filename[i++] = '\\', escape = 1; +goto loop; +} else { +escape = 0; +} +} + +new_filename[i++] = new_image_file[j]; +} + +filename = g_strdup_printf(blkmirror:%s:%s, new_filename, destination); +change_blockdev_image(device, filename, blkmirror, false, errp); +g_free(filename); +} + +void qmp_blockdev_migrate(const char *device, bool incremental, + const char *destination, bool has_new_image_file, + const char *new_image_file, Error **errp) +{ +BlockDriverState *bs; + +bs = bdrv_find(device); +if (!bs) { +error_set(errp, QERR_DEVICE_NOT_FOUND, device); +return; +} +if (bdrv_in_use(bs)) { +error_set(errp, QERR_DEVICE_IN_USE, device); +return; +} + +if (incremental
Re: [Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver
- Original Message - From: Eric Blake ebl...@redhat.com To: Federico Simoncelli fsimo...@redhat.com Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com, arm...@redhat.com, lcapitul...@redhat.com, pbonz...@redhat.com Sent: Friday, February 24, 2012 6:02:36 PM Subject: Re: [Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver +++ b/docs/blkmirror.txt @@ -0,0 +1,16 @@ +Block mirror driver +--- + +This driver will mirror writes to two distinct images. +It's used internally by live block copy. + +Format +-- + +blkmirror:/image1.img:/image2.img + +'\' (backslash) can be used to escape colon processing +as a separator, in the first image filename. +Backslashes themselves also can be escaped as '\\'. Is the escaping of : and \ only necessary for image1.img, or for both image1 and image2? I need to know if the parser is consistent for both arguments, but this wording makes it sound like it is only for the first argument. Yes it is only for the first argument. -- Federico
Re: [Qemu-devel] [PATCH 3/3] Add nocreate option to snapshot_blkdev
- Original Message - From: Paolo Bonzini pbonz...@redhat.com Cc: Federico Simoncelli fsimo...@redhat.com, kw...@redhat.com, mtosa...@redhat.com, qemu-devel@nongnu.org Sent: Thursday, February 23, 2012 8:38:55 AM Subject: Re: [PATCH 3/3] Add nocreate option to snapshot_blkdev On 02/23/2012 08:19 AM, Paolo Bonzini wrote: Signed-off-by: Federico Simoncelli fsimo...@redhat.com What is the usecase, and how can this be tested? Oops, you explained it in part 0. Step 5 - Final Switch to Destination VM1 is now able to switch to the destination for both read and write operations. [src/hd0base] - [src/hd0snap1] = VM1(read-write) (qemu) snapshot_blkdev -n ide0-hd0 /tmp/dst/hd0snap1.qcow2 It seems to me that reusing the snapshot_blkdev command is a bit of a misnomer. It also forces you to use format autodetection for the destination in the blkmirror phase. I see two possibilities: 1) you can make a new command block_reopen; this fixes only the naming problem. 2) you can hide the new format behind a new command to be used like this: block_migrate --mirror ide-hd0 /tmp/dst/hd0snap1.qcow2 ... block_migrate --switch ide-hd0 This leave the possibility to specify the format in the future: block_migrate --mirror ide-hd0 /tmp/dst/hd0snap1.qcow2 qcow2 And we could have another sub-command block_mirror --stream ide-hd0 /tmp/dst/hd0.raw to migrate block devices without shared storage and without an intermediate snapshot. I agree on the fact that using snapshot_blkdev is a misnomer but at the same time I like very much how blkmirror is modular and it can be used as a regular image filename. I'm worried that making it explicit with a --mirror option would take away its flexibility. What about adding block_reopen and extending the blkmirror string: (qemu) block_reopen ide-hd0 \ blkmirror:qcow2:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2 so that the format is passed as first argument (and if it's empty we would use the auto-detection). -- Federico
Re: [Qemu-devel] [PATCH 2/3] Update the blkmirror block driver
- Original Message - From: Paolo Bonzini pbonz...@redhat.com To: Federico Simoncelli fsimo...@redhat.com Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com Sent: Thursday, February 23, 2012 8:18:28 AM Subject: Re: [PATCH 2/3] Update the blkmirror block driver On 02/22/2012 06:13 PM, Federico Simoncelli wrote: @@ -46,7 +46,7 @@ static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) filename += strlen(blkmirror:); /* Parse the raw image filename */ -filename2 = qemu_malloc(strlen(filename)+1); +filename2 = qemu_vmalloc(strlen(filename)+1); escape = 0; for (i = n = 0; i strlen(filename); i++) { if (!escape filename[i] == ':') { @@ -66,11 +66,11 @@ static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) m-bs[0] = bdrv_new(); if (m-bs[0] == NULL) { -free(filename2); +qemu_vfree(filename2); return -ENOMEM; } ret = bdrv_open(m-bs[0], filename2, flags, NULL); -free(filename2); +qemu_vfree(filename2); if (ret 0) { return ret; } These should be g_malloc and g_free. Thanks! Also, please squash this patch in part 1. Yes, I sent it as a separate patch to make it easier to review my changes (if someone already reviewed Marcelo's patch). Any comment on the BDRV_O_NO_BACKING flag I added? That's the real trick I'm pulling here. It basically allows to open the second image only for writing and it doesn't complain if the backing file is not there yet (it will be copied during step 4). -- Federico
Re: [Qemu-devel] [PATCH 3/3] Add nocreate option to snapshot_blkdev
- Original Message - From: Paolo Bonzini pbonz...@redhat.com To: Federico Simoncelli fsimo...@redhat.com Cc: kw...@redhat.com, mtosa...@redhat.com, qemu-devel@nongnu.org Sent: Thursday, February 23, 2012 10:48:59 AM Subject: Re: [PATCH 3/3] Add nocreate option to snapshot_blkdev On 02/23/2012 10:39 AM, Federico Simoncelli wrote: I agree on the fact that using snapshot_blkdev is a misnomer but at the same time I like very much how blkmirror is modular and it can be used as a regular image filename. True, on the other hand if we add this we will have to keep it forever. I'm worried that blkmirror does not satisfy _all_ mirroring needs, for example you cannot use block_stream to copy from the source to the destination, so perhaps in the future we want to change it to something else. Are you talking about a mirroring where you block_stream the missing clusters in the destination from the source? I believe that it could be done without losing the blkmirror modularity probably extending the BlockDriver structure with some additional concepts. So while I appreciate the easier debugging, I'm afraid that we'll regret adding a command-line-visible feature. I'm worried that making it explicit with a --mirror option would take away its flexibility. What about adding block_reopen and extending the blkmirror string: (qemu) block_reopen ide-hd0 \ blkmirror:qcow2:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2 so that the format is passed as first argument (and if it's empty we would use the auto-detection). No, the format of the source and destination should be independent. It would be nice to have something like this: blkmirror:src=...,dst=...,srcformat=...,dstformat=... but commas are a pain because they need escaping. I like that, one limitation we need to keep in mind is that it should fit into the hard-coded filename limit of 1024 characters that (sadly) we have in multiple places. Another thing is that at this stage the mirroring is more an original/copy concept rather than source/destination. What about: blkmirror:...,copy=...,fmt=... (both images uses the same fmt) blkmirror:...,copy=...,fmt=...,copyfmt=... Or, eventually if you feel like that source/destination is more appropriate for the future, then: blkmirror:...,dst=...,fmt=...,dstfmt=... -- Federico
Re: [Qemu-devel] Live Block Migration using Mirroring
- Original Message - From: Stefan Hajnoczi stefa...@gmail.com To: Federico Simoncelli fsimo...@redhat.com Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com Sent: Thursday, February 23, 2012 4:47:38 PM Subject: Re: [Qemu-devel] Live Block Migration using Mirroring On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli fsimo...@redhat.com wrote: Step 3 - Mirrored Live Snapshot === A mirrored live snapshot is issued using src/hd0snap1 and dst/hd0snap1 as image files. (Where - stands for has backing file) [src/hd0base] - [src/hd0snap1] = VM1(read-write) ... - [dst/hd0snap1] = VM1(write-only) $ qemu-img create -f qcow2 \ -b /tmp/src/hd0base.qcow2 /tmp/src/hd0snap1.qcow2 20G Formatting '/tmp/src/hd0snap1.qcow2', fmt=qcow2 size=21474836480 backing_file='/tmp/src/hd0base.qcow2' encryption=off cluster_size=65536 $ qemu-img create -f qcow2 \ -b /tmp/dst/hd0base.qcow2 /tmp/dst/hd0snap1.qcow2 20G Formatting '/tmp/dst/hd0snap1.qcow2', fmt=qcow2 size=21474836480 backing_file='/tmp/src/hd0base.qcow2' encryption=off cluster_size=65536 At this stage /tmp/dst/hd0base.qcow2 does not exist yet. The qemu-img output you pasted shows /tmp/src/hd0base.qcow2 was actually used. Typo? No that's part of the flag used in [PATCH 2/3] (Update the blkmirror block driver): BDRV_O_NO_BACKING It's also documented in the design: http://www.ovirt.org/wiki/File:StorageLiveMigration2.png (qemu) snapshot_blkdev -n ide0-hd0 \ blkmirror:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2 blkmirror Step 4 - Backing File Copy == An external manager copies src/hd0base to the destination dst/hd0base. [src/hd0base] - [src/hd0snap1] = VM1(read-write) [dst/hd0base] - [dst/hd0snap1] = VM1(write-only) $ cp -a /tmp/src/hd0base.qcow2 /tmp/dst/hd0base.qcow2 Are we missing a fixup step that changes backing_file in dst/hd0snap1.qcow2 to point at dst/hd0base.qcow2? See above. -- Federico
Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver
- Original Message - From: Stefan Hajnoczi stefa...@gmail.com To: Federico Simoncelli fsimo...@redhat.com Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com Sent: Thursday, February 23, 2012 5:14:09 PM Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli fsimo...@redhat.com wrote: From: Marcelo Tosatti mtosa...@redhat.com Mirrored writes are used by live block copy. I think the right approach is to create a single blkmirror driver that also includes blkverify functionality. The code is basically the same except blkverify also compares reads - just use a flag to enable/disable that behavior. Feel free to rename the blkverify driver to blkmirror if you wish. By the way, this code does not build against qemu.git/master. It uses interfaces which have been dropped. Yes you also need: [PATCH 2/3] Update the blkmirror block driver Which was sent separately to facilitate the review for people who already reviewed this. -- Federico
Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver
- Original Message - From: Stefan Hajnoczi stefa...@gmail.com To: Federico Simoncelli fsimo...@redhat.com Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com Sent: Thursday, February 23, 2012 5:18:41 PM Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver On Thu, Feb 23, 2012 at 4:14 PM, Stefan Hajnoczi stefa...@gmail.com wrote: By the way, this code does not build against qemu.git/master. It uses interfaces which have been dropped. Ah, I see you changed that in Patch 2/3. Please don't do that, it's hard to review and breaks git-bisect. Squashing is easy (anyone could do it at the commit phase), reviewing is hard (if you hide your changes in someone else's code). -- Federico
Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver
- Original Message - From: Stefan Hajnoczi stefa...@gmail.com To: Federico Simoncelli fsimo...@redhat.com Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com Sent: Thursday, February 23, 2012 5:28:23 PM Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver On Thu, Feb 23, 2012 at 4:20 PM, Federico Simoncelli fsimo...@redhat.com wrote: - Original Message - From: Stefan Hajnoczi stefa...@gmail.com To: Federico Simoncelli fsimo...@redhat.com Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com Sent: Thursday, February 23, 2012 5:18:41 PM Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver On Thu, Feb 23, 2012 at 4:14 PM, Stefan Hajnoczi stefa...@gmail.com wrote: By the way, this code does not build against qemu.git/master. It uses interfaces which have been dropped. Ah, I see you changed that in Patch 2/3. Please don't do that, it's hard to review and breaks git-bisect. Squashing is easy (anyone could do it at the commit phase), reviewing is hard (if you hide your changes in someone else's code). I don't care if you or Marcelo wrote it, I want to see a coherent piece of code. The way to do it is to merge the g_malloc() and BlockDriver interface changes into this path. Then you have not snuck any significant changes into Marcelo's code. Well for example I'd have snuck the qemu_vmalloc/qemu_vfree mistake. Keep the BDRV_O_NO_BACKING separate. Ok, thanks. -- Federico
Re: [Qemu-devel] Live Block Migration using Mirroring
- Original Message - From: Stefan Hajnoczi stefa...@gmail.com To: Federico Simoncelli fsimo...@redhat.com Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com Sent: Thursday, February 23, 2012 5:35:23 PM Subject: Re: [Qemu-devel] Live Block Migration using Mirroring On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli fsimo...@redhat.com wrote: recently I've been working on live block migration combining the live snapshots and the blkmirror patch sent by Marcelo Tosatti few months ago. The design is summarized at this url as Mirrored-Snapshot: http://www.ovirt.org/wiki/Features/Design/StorageLiveMigration After mirrored-snapshot completes we're left with the base and the snapshot. Is the idea to implement live snapshot merge next? Or do you have something else planned to avoid growing the backing file chain each time mirrored-snapshot is used? The general idea is that I don't expect the need to migrate to a new storage to be very frequent. Being able to live merge the new snapshot would be great. -- Federico
[Qemu-devel] Live Block Migration using Mirroring
Hi, recently I've been working on live block migration combining the live snapshots and the blkmirror patch sent by Marcelo Tosatti few months ago. The design is summarized at this url as Mirrored-Snapshot: http://www.ovirt.org/wiki/Features/Design/StorageLiveMigration The design assumes that the qemu process can reach both the source and destination storages and no real VM migration between hosts is involved. The principal problem that it tries to solve is moving a VM to a new reachable storage (more space, faster) without temporarily disrupting its services. The following set of patches are implementing the required changes in QEMU. Here it is a quick example of the use case (for consistency with the design at the url above I will use the same step numbers): Preparation === $ mkdir /tmp/{src/dst} $ qemu-img create -f qcow2 /tmp/src/hd0base.qcow2 20G Formatting '/tmp/src/hd0base.qcow2', fmt=qcow2 size=21474836480 encryption=off cluster_size=65536 Step 1 - Initital Scenario == VM1 is running on the src/hd0base. (Where = stands for uses) [src/hd0base] = VM1(read-write) $ qemu-system-x86_64 -hda /tmp/src/hd0base.qcow2 -monitor stdio QEMU 1.0.50 monitor - type 'help' for more information (qemu) Step 3 - Mirrored Live Snapshot === A mirrored live snapshot is issued using src/hd0snap1 and dst/hd0snap1 as image files. (Where - stands for has backing file) [src/hd0base] - [src/hd0snap1] = VM1(read-write) ... - [dst/hd0snap1] = VM1(write-only) $ qemu-img create -f qcow2 \ -b /tmp/src/hd0base.qcow2 /tmp/src/hd0snap1.qcow2 20G Formatting '/tmp/src/hd0snap1.qcow2', fmt=qcow2 size=21474836480 backing_file='/tmp/src/hd0base.qcow2' encryption=off cluster_size=65536 $ qemu-img create -f qcow2 \ -b /tmp/dst/hd0base.qcow2 /tmp/dst/hd0snap1.qcow2 20G Formatting '/tmp/dst/hd0snap1.qcow2', fmt=qcow2 size=21474836480 backing_file='/tmp/src/hd0base.qcow2' encryption=off cluster_size=65536 (qemu) snapshot_blkdev -n ide0-hd0 \ blkmirror:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2 blkmirror Step 4 - Backing File Copy == An external manager copies src/hd0base to the destination dst/hd0base. [src/hd0base] - [src/hd0snap1] = VM1(read-write) [dst/hd0base] - [dst/hd0snap1] = VM1(write-only) $ cp -a /tmp/src/hd0base.qcow2 /tmp/dst/hd0base.qcow2 Step 5 - Final Switch to Destination VM1 is now able to switch to the destination for both read and write operations. [src/hd0base] - [src/hd0snap1] = VM1(read-write) (qemu) snapshot_blkdev -n ide0-hd0 /tmp/dst/hd0snap1.qcow2 -- Federico
[Qemu-devel] [PATCH 2/3] Update the blkmirror block driver
Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- block/blkmirror.c | 26 +- 1 files changed, 13 insertions(+), 13 deletions(-) diff --git a/block/blkmirror.c b/block/blkmirror.c index 1c02710..1cfd2fb 100644 --- a/block/blkmirror.c +++ b/block/blkmirror.c @@ -46,7 +46,7 @@ static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) filename += strlen(blkmirror:); /* Parse the raw image filename */ -filename2 = qemu_malloc(strlen(filename)+1); +filename2 = qemu_vmalloc(strlen(filename)+1); escape = 0; for (i = n = 0; i strlen(filename); i++) { if (!escape filename[i] == ':') { @@ -66,11 +66,11 @@ static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) m-bs[0] = bdrv_new(); if (m-bs[0] == NULL) { -free(filename2); +qemu_vfree(filename2); return -ENOMEM; } ret = bdrv_open(m-bs[0], filename2, flags, NULL); -free(filename2); +qemu_vfree(filename2); if (ret 0) { return ret; } @@ -81,7 +81,7 @@ static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) bdrv_delete(m-bs[0]); return -ENOMEM; } -ret = bdrv_open(m-bs[1], filename, flags, NULL); +ret = bdrv_open(m-bs[1], filename, flags | BDRV_O_NO_BACKING, NULL); if (ret 0) { bdrv_delete(m-bs[0]); return ret; @@ -101,17 +101,17 @@ static void blkmirror_close(BlockDriverState *bs) } } -static int blkmirror_flush(BlockDriverState *bs) +static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs) { BdrvMirrorState *m = bs-opaque; int ret; -ret = bdrv_flush(m-bs[0]); +ret = bdrv_co_flush(m-bs[0]); if (ret 0) { return ret; } -return bdrv_flush(m-bs[1]); +return bdrv_co_flush(m-bs[1]); } static int64_t blkmirror_getlength(BlockDriverState *bs) @@ -242,18 +242,18 @@ static BlockDriverAIOCB *blkmirror_aio_flush(BlockDriverState *bs, return dcb-common; } -static int blkmirror_discard(BlockDriverState *bs, int64_t sector_num, - int nb_sectors) +static coroutine_fn int blkmirror_co_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) { BdrvMirrorState *m = bs-opaque; int ret; -ret = bdrv_discard(m-bs[0], sector_num, nb_sectors); +ret = bdrv_co_discard(m-bs[0], sector_num, nb_sectors); if (ret 0) { return ret; } -return bdrv_discard(m-bs[1], sector_num, nb_sectors); +return bdrv_co_discard(m-bs[1], sector_num, nb_sectors); } @@ -266,8 +266,8 @@ static BlockDriver bdrv_blkmirror = { .bdrv_file_open = blkmirror_open, .bdrv_close = blkmirror_close, -.bdrv_flush = blkmirror_flush, -.bdrv_discard = blkmirror_discard, +.bdrv_co_flush_to_disk = blkmirror_co_flush, +.bdrv_co_discard= blkmirror_co_discard, .bdrv_aio_readv = blkmirror_aio_readv, .bdrv_aio_writev= blkmirror_aio_writev, -- 1.7.1
[Qemu-devel] [PATCH 3/3] Add nocreate option to snapshot_blkdev
Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- blockdev.c | 14 -- hmp-commands.hx | 16 ++-- hmp.c|4 +++- qapi-schema.json |8 +++- qmp-commands.hx |2 +- 5 files changed, 29 insertions(+), 15 deletions(-) diff --git a/blockdev.c b/blockdev.c index 7a6613a..2c3e10a 100644 --- a/blockdev.c +++ b/blockdev.c @@ -647,7 +647,7 @@ void do_commit(Monitor *mon, const QDict *qdict) void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, bool has_format, const char *format, -Error **errp) +bool has_nocreate, bool nocreate, Error **errp) { BlockDriverState *bs; BlockDriver *drv, *old_drv, *proto_drv; @@ -686,11 +686,13 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, return; } -ret = bdrv_img_create(snapshot_file, format, bs-filename, - bs-drv-format_name, NULL, -1, flags); -if (ret) { -error_set(errp, QERR_UNDEFINED_ERROR); -return; +if (!(has_nocreate nocreate)) { +ret = bdrv_img_create(snapshot_file, format, bs-filename, + bs-drv-format_name, NULL, -1, flags); +if (ret) { +error_set(errp, QERR_UNDEFINED_ERROR); +return; +} } bdrv_drain_all(); diff --git a/hmp-commands.hx b/hmp-commands.hx index 573b823..ff49412 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -868,14 +868,18 @@ ETEXI { .name = snapshot_blkdev, -.args_type = device:B,snapshot-file:s?,format:s?, -.params = device [new-image-file] [format], +.args_type = nocreate:-n,device:B,snapshot-file:s?,format:s?, +.params = [-n] device [new-image-file] [format], .help = initiates a live snapshot\n\t\t\t of device. If a new image file is specified, the\n\t\t\t - new image file will become the new root image.\n\t\t\t - If format is specified, the snapshot file will\n\t\t\t - be created in that format. Otherwise the\n\t\t\t - snapshot will be internal! (currently unsupported), + new image file will be created (unless -n is\n\t\t\t + specified) and will become the new root image.\n\t\t\t + The -n (nocreate) option is potentially dangerous\n\t\t\t + if the image wasn't prepared in the correct way\n\t\t\t + by an external manager. If format is specified,\n\t\t\t + the snapshot file will be created in that format.\n\t\t\t + Otherwise the snapshot will be internal!\n\t\t\t + (currently unsupported), .mhandler.cmd = hmp_snapshot_blkdev, }, diff --git a/hmp.c b/hmp.c index 8ff8c94..44868c7 100644 --- a/hmp.c +++ b/hmp.c @@ -684,6 +684,7 @@ void hmp_block_resize(Monitor *mon, const QDict *qdict) void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) { +int nocreate = qdict_get_try_bool(qdict, nocreate, 0); const char *device = qdict_get_str(qdict, device); const char *filename = qdict_get_try_str(qdict, snapshot-file); const char *format = qdict_get_try_str(qdict, format); @@ -697,7 +698,8 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) return; } -qmp_blockdev_snapshot_sync(device, filename, !!format, format, errp); +qmp_blockdev_snapshot_sync(device, filename, + !!format, format, true, nocreate, errp); hmp_handle_error(mon, errp); } diff --git a/qapi-schema.json b/qapi-schema.json index d02ee86..ac652c2 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1116,9 +1116,14 @@ # @snapshot-file: the target of the new image. If the file exists, or if it # is a device, the snapshot will be created in the existing # file/device. If does not exist, a new file will be created. +# This behavior can be overridden using the nocreate option. # # @format: #optional the format of the snapshot image, default is 'qcow2'. # +# @nocreate: #optional use the target image avoiding the creation. This is +#a potentially dangerous option if the image wasn't prepared +#in the correct way by an external manager. +# # Returns: nothing on success # If @device is not a valid block device, DeviceNotFound # If @snapshot-file can't be opened, OpenFileFailed @@ -1133,7 +1138,8 @@ # Since 0.14.0 ## { 'command': 'blockdev-snapshot-sync', - 'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str' } } + 'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str', +'*nocreate': 'bool
[Qemu-devel] [PATCH 1/3] Add blkmirror block driver
From: Marcelo Tosatti mtosa...@redhat.com Mirrored writes are used by live block copy. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com --- Makefile.objs |2 +- block/blkmirror.c | 282 docs/blkmirror.txt | 15 +++ 3 files changed, 298 insertions(+), 1 deletions(-) create mode 100644 block/blkmirror.c create mode 100644 docs/blkmirror.txt diff --git a/Makefile.objs b/Makefile.objs index 391e524..cd65e1b 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vv block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-nested-y += qed-check.o -block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o +block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o blkmirror.o block-nested-y += stream.o block-nested-$(CONFIG_WIN32) += raw-win32.o block-nested-$(CONFIG_POSIX) += raw-posix.o diff --git a/block/blkmirror.c b/block/blkmirror.c new file mode 100644 index 000..1c02710 --- /dev/null +++ b/block/blkmirror.c @@ -0,0 +1,282 @@ +/* + * Block driver for mirrored writes. + * + * Copyright (C) 2011 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include stdarg.h +#include block_int.h + +typedef struct { +BlockDriverState *bs[2]; +} BdrvMirrorState; + +typedef struct DupAIOCB DupAIOCB; + +typedef struct SingleAIOCB { +BlockDriverAIOCB *aiocb; +int finished; +DupAIOCB *parent; +} SingleAIOCB; + +struct DupAIOCB { +BlockDriverAIOCB common; +int count; + +BlockDriverCompletionFunc *cb; +SingleAIOCB aios[2]; +int ret; +}; + +/* Valid blkmirror filenames look like + * blkmirror:path/to/image1:path/to/image2 */ +static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) +{ +BdrvMirrorState *m = bs-opaque; +int ret, escape, i, n; +char *filename2; + +/* Parse the blkmirror: prefix */ +if (strncmp(filename, blkmirror:, strlen(blkmirror:))) { +return -EINVAL; +} +filename += strlen(blkmirror:); + +/* Parse the raw image filename */ +filename2 = qemu_malloc(strlen(filename)+1); +escape = 0; +for (i = n = 0; i strlen(filename); i++) { +if (!escape filename[i] == ':') { +break; +} +if (!escape filename[i] == '\\') { +escape = 1; +} else { +escape = 0; +} + +if (!escape) { +filename2[n++] = filename[i]; +} +} +filename2[n] = '\0'; + +m-bs[0] = bdrv_new(); +if (m-bs[0] == NULL) { +free(filename2); +return -ENOMEM; +} +ret = bdrv_open(m-bs[0], filename2, flags, NULL); +free(filename2); +if (ret 0) { +return ret; +} +filename += i + 1; + +m-bs[1] = bdrv_new(); +if (m-bs[1] == NULL) { +bdrv_delete(m-bs[0]); +return -ENOMEM; +} +ret = bdrv_open(m-bs[1], filename, flags, NULL); +if (ret 0) { +bdrv_delete(m-bs[0]); +return ret; +} + +return 0; +} + +static void blkmirror_close(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs-opaque; +int i; + +for (i = 0; i 2; i++) { +bdrv_delete(m-bs[i]); +m-bs[i] = NULL; +} +} + +static int blkmirror_flush(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs-opaque; +int ret; + +ret = bdrv_flush(m-bs[0]); +if (ret 0) { +return ret; +} + +return bdrv_flush(m-bs[1]); +} + +static int64_t blkmirror_getlength(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs-opaque; + +return bdrv_getlength(m-bs[0]); +} + +static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ +BdrvMirrorState *m = bs-opaque; +return bdrv_aio_readv(m-bs[0], sector_num, qiov, nb_sectors, cb, opaque); +} + +static void dup_aio_cancel(BlockDriverAIOCB *blockacb) +{ +DupAIOCB *acb = container_of(blockacb, DupAIOCB, common); +int i; + +for (i = 0 ; i 2; i++) { +if (!acb-aios[i].finished) { +bdrv_aio_cancel(acb-aios[i].aiocb); +} +} +qemu_aio_release(acb); +} + +static AIOPool dup_aio_pool = { +.aiocb_size = sizeof(DupAIOCB), +.cancel = dup_aio_cancel, +}; + +static void blkmirror_aio_cb(void *opaque, int ret) +{ +SingleAIOCB *scb = opaque; +DupAIOCB *dcb = scb-parent; + +
Re: [Qemu-devel] [vdsm] oVirt Live Snapshots
- Original Message - From: Shu Ming shum...@linux.vnet.ibm.com To: Federico Simoncelli fsimo...@redhat.com Cc: qemu-devel@nongnu.org, libvir-l...@redhat.com, VDSM Project Development vdsm-de...@lists.fedorahosted.org, Dave Allan dal...@redhat.com, Eric Blake ebl...@redhat.com Sent: Thursday, February 2, 2012 1:59:01 PM Subject: Re: [vdsm] oVirt Live Snapshots Can someone explain what is DB in this wiki page? It is the ovirt-engine database, where the VMs/images information and status is stored. That part of the wiki should be improved. See, Live snapshots operation extend regular snapshots as follow: • Create a locked snapshot in DB -- Federico
[Qemu-devel] oVirt Live Snapshots
Hi, oVirt, and more specifically VDSM, is currently implementing the live snapshot feature using the API/commands provided by libvirt and qemu. It would be great if you could review the design and the current open issues at: http://ovirt.org/wiki/Live_Snapshots Thank you, -- Federico
Re: [Qemu-devel] [PATCH] qemu: new option for snapshot_blkdev to avoid image creation
- Original Message - From: Dor Laor dl...@redhat.com To: Federico Simoncelli fsimo...@redhat.com Cc: qemu-devel@nongnu.org, aba...@redhat.com, Kevin Wolf kw...@redhat.com Sent: Friday, October 7, 2011 12:45:06 AM Subject: Re: [Qemu-devel] [PATCH] qemu: new option for snapshot_blkdev to avoid image creation On 10/03/2011 06:09 PM, Federico Simoncelli wrote: Add the new option [-n] for snapshot_blkdev to avoid the image creation. The file provided as [new-image-file] is considered as already initialized and will be used after passing a check for the backing file. Seems ok to me as a way to go around fdget and still have selinux gain. Worth to get Kevin's view too. Hi Kevin, any news on this? Federico, would you like to ack or extend the design: http://wiki.qemu.org/Features/Snapshots Dor, should I do it now or only after my patch is acked? -- Federico
Re: [Qemu-devel] New option for snapshot_blkdev to avoid image creation
- Original Message - From: Stefan Hajnoczi stefa...@gmail.com To: Federico Simoncelli fsimo...@redhat.com Cc: qemu-devel@nongnu.org, aba...@redhat.com, dl...@redhat.com Sent: Tuesday, October 4, 2011 9:33:48 AM Subject: Re: [Qemu-devel] New option for snapshot_blkdev to avoid image creation On Mon, Oct 03, 2011 at 04:09:01PM +, Federico Simoncelli wrote: In some situations might be useful to let qemu use an image that was prepared for a live snapshot. The advantage is that creating the snapshot file outside of the qemu process we can use the whole range of options provided by the format (eg for qcow2: encryption, cluster_size and preallocation). It is also possible to pre-set a relative path to the backing file (now it is created by default as absolute path). In the long run it can also avoid the danger of reimplementing qemu-img inside qemu (if we wanted to expose such options when a snapshot is requested). When the image file is created based on the backing file size: $ qemu-img create -f qcow2 -o backing_file=master.img vm001.qcow2 It turns out that bdrv_img_create() opens the backing file with read/write permissions. This is generally a bad idea but especially dangerous when the VM currently has the image file open already since image formats are not designed for multiple initiators (clustering). Hi Stefan, are you sure that bdrv_img_create opens the backing file with read/write permissions? After a quick check I can't see that happening: $ ./qemu-img create -f qcow2 /tmp/master.img 1G Formatting '/tmp/master.img', fmt=qcow2 size=1073741824 encryption=off cluster_size=65536 $ strace -e trace=open ./qemu-img create -f qcow2 -o backing_file=/tmp/master.img /tmp/vm001.qcow2 [...] open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or directory) open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or directory) open(/tmp/master.img, O_RDONLY|O_NONBLOCK) = 3 open(/tmp/master.img, O_RDONLY|O_NONBLOCK) = 3 open(/tmp/master.img, O_RDONLY|O_DSYNC|O_CLOEXEC) = 3 open(/tmp/master.img, O_RDONLY|O_NONBLOCK) = 3 open(/tmp/master.img, O_RDONLY|O_NONBLOCK) = 3 open(/tmp/master.img, O_RDONLY|O_CLOEXEC) = 3 Formatting '/tmp/vm001.qcow2', fmt=qcow2 size=1073741824 backing_file='/tmp/master.img' encryption=off cluster_size=65536 open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or directory) open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or directory) open(/tmp/vm001.qcow2, O_WRONLY|O_CREAT|O_TRUNC, 0644) = 6 open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = 6 open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = 6 open(/tmp/vm001.qcow2, O_RDWR|O_DSYNC|O_CLOEXEC) = 6 open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = 6 open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = 6 open(/tmp/vm001.qcow2, O_RDWR|O_CLOEXEC) = 6 -- Federico
[Qemu-devel] New option for snapshot_blkdev to avoid image creation
In some situations might be useful to let qemu use an image that was prepared for a live snapshot. The advantage is that creating the snapshot file outside of the qemu process we can use the whole range of options provided by the format (eg for qcow2: encryption, cluster_size and preallocation). It is also possible to pre-set a relative path to the backing file (now it is created by default as absolute path). In the long run it can also avoid the danger of reimplementing qemu-img inside qemu (if we wanted to expose such options when a snapshot is requested). -- Federico.
[Qemu-devel] [PATCH] qemu: new option for snapshot_blkdev to avoid image creation
Add the new option [-n] for snapshot_blkdev to avoid the image creation. The file provided as [new-image-file] is considered as already initialized and will be used after passing a check for the backing file. Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- blockdev.c | 54 -- hmp-commands.hx |7 --- qmp-commands.hx |4 ++-- 3 files changed, 58 insertions(+), 7 deletions(-) diff --git a/blockdev.c b/blockdev.c index 0827bf7..bd46808 100644 --- a/blockdev.c +++ b/blockdev.c @@ -550,8 +550,53 @@ void do_commit(Monitor *mon, const QDict *qdict) } } +static int check_snapshot_file(const char *filename, const char *oldfilename, + int flags, BlockDriver *drv) +{ +BlockDriverState *bs; +char bak_filename[1024], *abs_filename; +int ret = 0; + +bs = bdrv_new(); +if (!bs) { +return -1; +} + +ret = bdrv_open(bs, filename, flags, drv); +if (ret) { +qerror_report(QERR_OPEN_FILE_FAILED, filename); +goto err0; +} + +if (bs-backing_file) { +path_combine(bak_filename, sizeof(bak_filename), + filename, bs-backing_file); + +abs_filename = realpath(bak_filename, NULL); +if (!abs_filename) { +ret = -1; +goto err1; +} + +if (strcmp(abs_filename, oldfilename)) { +qerror_report(QERR_OPEN_FILE_FAILED, filename); +ret = -1; +} + +free(abs_filename); +} + +err1: +bdrv_close(bs); + +err0: +bdrv_delete(bs); +return ret; +} + int do_snapshot_blkdev(Monitor *mon, const QDict *qdict, QObject **ret_data) { +const int nocreate = qdict_get_try_bool(qdict, nocreate, 0); const char *device = qdict_get_str(qdict, device); const char *filename = qdict_get_try_str(qdict, snapshot-file); const char *format = qdict_get_try_str(qdict, format); @@ -597,8 +642,13 @@ int do_snapshot_blkdev(Monitor *mon, const QDict *qdict, QObject **ret_data) goto out; } -ret = bdrv_img_create(filename, format, bs-filename, - bs-drv-format_name, NULL, -1, flags); +if (nocreate) { +ret = check_snapshot_file(filename, bs-filename, flags, drv); +} else { +ret = bdrv_img_create(filename, format, bs-filename, + bs-drv-format_name, NULL, -1, flags); +} + if (ret) { goto out; } diff --git a/hmp-commands.hx b/hmp-commands.hx index 9e1cca8..eb9fcd4 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -840,11 +840,12 @@ ETEXI { .name = snapshot_blkdev, -.args_type = device:B,snapshot-file:s?,format:s?, -.params = device [new-image-file] [format], +.args_type = nocreate:-n,device:B,snapshot-file:s?,format:s?, +.params = [-n] device [new-image-file] [format], .help = initiates a live snapshot\n\t\t\t of device. If a new image file is specified, the\n\t\t\t - new image file will become the new root image.\n\t\t\t + new image file will be created (unless -n is\n\t\t\t + specified) and will become the new root image.\n\t\t\t If format is specified, the snapshot file will\n\t\t\t be created in that format. Otherwise the\n\t\t\t snapshot will be internal! (currently unsupported), diff --git a/qmp-commands.hx b/qmp-commands.hx index d83bce5..7af36d8 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -695,8 +695,8 @@ EQMP { .name = blockdev-snapshot-sync, -.args_type = device:B,snapshot-file:s?,format:s?, -.params = device [new-image-file] [format], +.args_type = nocreate:-n,device:B,snapshot-file:s?,format:s?, +.params = [-n] device [new-image-file] [format], .user_print = monitor_user_noop, .mhandler.cmd_new = do_snapshot_blkdev, }, -- 1.7.1
[Qemu-devel] [PATCHv4] qemu-img: Add cache command line option
qemu-img currently writes disk images using writeback and filling up the cache buffers which are then flushed by the kernel preventing other processes from accessing the storage. This is particularly bad in cluster environments where time-based algorithms might be in place and accessing the storage within certain timeouts is critical. This patch adds the option to choose a cache method when writing disk images. Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- qemu-img-cmds.hx |6 ++-- qemu-img.c | 80 +- 2 files changed, 70 insertions(+), 16 deletions(-) diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index 3072d38..2b70618 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -22,13 +22,13 @@ STEXI ETEXI DEF(commit, img_commit, -commit [-f fmt] filename) +commit [-f fmt] [-t cache] filename) STEXI @item commit [-f @var{fmt}] @var{filename} ETEXI DEF(convert, img_convert, -convert [-c] [-p] [-f fmt] [-O output_fmt] [-o options] [-s snapshot_name] filename [filename2 [...]] output_filename) +convert [-c] [-p] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s snapshot_name] filename [filename2 [...]] output_filename) STEXI @item convert [-c] [-f @var{fmt}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] @var{filename} [@var{filename2} [...]] @var{output_filename} ETEXI @@ -46,7 +46,7 @@ STEXI ETEXI DEF(rebase, img_rebase, -rebase [-f fmt] [-p] [-u] -b backing_file [-F backing_fmt] filename) +rebase [-f fmt] [-t cache] [-p] [-u] -b backing_file [-F backing_fmt] filename) STEXI @item rebase [-f @var{fmt}] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename} ETEXI diff --git a/qemu-img.c b/qemu-img.c index 4f162d1..f904e32 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -40,6 +40,7 @@ typedef struct img_cmd_t { /* Default to cache=writeback as data integrity is not important for qemu-tcg. */ #define BDRV_O_FLAGS BDRV_O_CACHE_WB +#define BDRV_DEFAULT_CACHE writeback static void format_print(void *opaque, const char *name) { @@ -64,6 +65,8 @@ static void help(void) Command parameters:\n 'filename' is a disk image filename\n 'fmt' is the disk image format. It is guessed automatically in most cases\n + 'cache' is the cache mode used to write the output disk image, the valid\n + options are: 'none', 'writeback' (default), 'writethrough' and 'unsafe'\n 'size' is the disk image size in bytes. Optional suffixes\n 'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M)\n and T (terabyte, 1024G) are supported. 'b' is ignored.\n @@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size) } #endif +static int set_cache_flag(const char *mode, int *flags) +{ +*flags = ~BDRV_O_CACHE_MASK; + +if (!strcmp(mode, none) || !strcmp(mode, off)) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NOCACHE; +} else if (!strcmp(mode, writeback)) { +*flags |= BDRV_O_CACHE_WB; +} else if (!strcmp(mode, unsafe)) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NO_FLUSH; +} else if (!strcmp(mode, writethrough)) { +/* this is the default */ +} else { +return -1; +} + +return 0; +} + static int print_block_option_help(const char *filename, const char *fmt) { BlockDriver *drv, *proto_drv; @@ -441,13 +465,14 @@ static int img_check(int argc, char **argv) static int img_commit(int argc, char **argv) { -int c, ret; -const char *filename, *fmt; +int c, ret, flags; +const char *filename, *fmt, *cache; BlockDriverState *bs; fmt = NULL; +cache = BDRV_DEFAULT_CACHE; for(;;) { -c = getopt(argc, argv, f:h); +c = getopt(argc, argv, f:ht:); if (c == -1) { break; } @@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv) case 'f': fmt = optarg; break; +case 't': +cache = optarg; +break; } } if (optind = argc) { @@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv) } filename = argv[optind++]; -bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR); +flags = BDRV_O_RDWR; +ret = set_cache_flag(cache, flags); +if (ret 0) { +error_report(Invalid cache option: %s\n, cache); +return -1; +} + +bs = bdrv_new_open(filename, fmt, flags); if (!bs) { return 1; } @@ -591,8 +626,8 @@ static int compare_sectors(const uint8_t *buf1, const uint8_t *buf2, int n, static int img_convert(int argc, char **argv) { int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors; -int progress = 0; -const char *fmt, *out_fmt, *out_baseimg, *out_filename; +int progress = 0, flags
[Qemu-devel] [PATCHv3] qemu-img: Add cache command line option
qemu-img currently writes disk images using writeback and filling up the cache buffers which are then flushed by the kernel preventing other processes from accessing the storage. This is particularly bad in cluster environments where time-based algorithms might be in place and accessing the storage within certain timeouts is critical. This patch adds the option to choose a cache method when writing disk images. Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- qemu-img-cmds.hx |6 ++-- qemu-img.c | 80 +- 2 files changed, 70 insertions(+), 16 deletions(-) diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index 3072d38..2b70618 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -22,13 +22,13 @@ STEXI ETEXI DEF(commit, img_commit, -commit [-f fmt] filename) +commit [-f fmt] [-t cache] filename) STEXI @item commit [-f @var{fmt}] @var{filename} ETEXI DEF(convert, img_convert, -convert [-c] [-p] [-f fmt] [-O output_fmt] [-o options] [-s snapshot_name] filename [filename2 [...]] output_filename) +convert [-c] [-p] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s snapshot_name] filename [filename2 [...]] output_filename) STEXI @item convert [-c] [-f @var{fmt}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] @var{filename} [@var{filename2} [...]] @var{output_filename} ETEXI @@ -46,7 +46,7 @@ STEXI ETEXI DEF(rebase, img_rebase, -rebase [-f fmt] [-p] [-u] -b backing_file [-F backing_fmt] filename) +rebase [-f fmt] [-t cache] [-p] [-u] -b backing_file [-F backing_fmt] filename) STEXI @item rebase [-f @var{fmt}] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename} ETEXI diff --git a/qemu-img.c b/qemu-img.c index 4f162d1..c2a106e 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -40,6 +40,7 @@ typedef struct img_cmd_t { /* Default to cache=writeback as data integrity is not important for qemu-tcg. */ #define BDRV_O_FLAGS BDRV_O_CACHE_WB +#define BDRV_DEFAULT_CACHE writeback static void format_print(void *opaque, const char *name) { @@ -64,6 +65,8 @@ static void help(void) Command parameters:\n 'filename' is a disk image filename\n 'fmt' is the disk image format. It is guessed automatically in most cases\n + 'cache' is the cache mode used to write the output disk image, the valid\n + options are: 'none', 'writeback' (default), 'writethrough' and 'unsafe'\n 'size' is the disk image size in bytes. Optional suffixes\n 'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M)\n and T (terabyte, 1024G) are supported. 'b' is ignored.\n @@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size) } #endif +static int set_cache_flag(const char *mode, int *flags) +{ +*flags = ~BDRV_O_CACHE_MASK; + +if (!strcmp(mode, none) || !strcmp(mode, off)) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NOCACHE; +} else if (!strcmp(mode, writeback)) { +*flags |= BDRV_O_CACHE_WB; +} else if (!strcmp(mode, unsafe)) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NO_FLUSH; +} else if (!strcmp(mode, writethrough)) { +/* this is the default */ +} else { +return -1; +} + +return 0; +} + static int print_block_option_help(const char *filename, const char *fmt) { BlockDriver *drv, *proto_drv; @@ -441,13 +465,14 @@ static int img_check(int argc, char **argv) static int img_commit(int argc, char **argv) { -int c, ret; -const char *filename, *fmt; +int c, ret, flags; +const char *filename, *fmt, *cache; BlockDriverState *bs; fmt = NULL; +cache = BDRV_DEFAULT_CACHE; for(;;) { -c = getopt(argc, argv, f:h); +c = getopt(argc, argv, f:ht:); if (c == -1) { break; } @@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv) case 'f': fmt = optarg; break; +case 't': +cache = optarg; +break; } } if (optind = argc) { @@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv) } filename = argv[optind++]; -bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR); +flags = BDRV_O_RDWR; +ret = set_cache_flag(cache, flags); +if (ret 0) { +error_report(Invalid cache option: %s\n, cache); +return -1; +} + +bs = bdrv_new_open(filename, fmt, flags); if (!bs) { return 1; } @@ -591,8 +626,8 @@ static int compare_sectors(const uint8_t *buf1, const uint8_t *buf2, int n, static int img_convert(int argc, char **argv) { int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors; -int progress = 0; -const char *fmt, *out_fmt, *out_baseimg, *out_filename; +int progress = 0, flags
Re: [Qemu-devel] [PATCH] qemu-img: Add cache command line option
- Original Message - From: Christoph Hellwig h...@lst.de To: Federico Simoncelli fsimo...@redhat.com Cc: k...@vger.kernel.org, kw...@redhat.com, qemu-devel@nongnu.org, a...@redhat.com Sent: Thursday, June 16, 2011 4:28:09 PM Subject: Re: [Qemu-devel] [PATCH] qemu-img: Add cache command line option On Wed, Jun 15, 2011 at 09:46:10AM -0400, Federico Simoncelli wrote: qemu-img currently writes disk images using writeback and filling up the cache buffers which are then flushed by the kernel preventing other processes from accessing the storage. This is particularly bad in cluster environments where time-based algorithms might be in place and accessing the storage within certain timeouts is critical. This patch adds the option to choose a cache method when writing disk images. Allowing to chose the mode is of course fine, but what about also choosing a good default? writethrough doesn't really make any sense for qemu-img, given that we can trivially flush the cache at the end of the operations. I'd also say that using the buffer cache doesn't make sense either, as there is little point in caching these operations. I totally agree with you, I didn't change the default to increase the chances for my patch to be accepted. If we want cache=none as default we can easily change: -#define BDRV_DEFAULT_CACHE writeback +#define BDRV_DEFAULT_CACHE none -- Federico
[Qemu-devel] [PATCH] qemu-img: Add cache command line option
qemu-img currently writes disk images using writeback and filling up the cache buffers which are then flushed by the kernel preventing other processes from accessing the storage. This is particularly bad in cluster environments where time-based algorithms might be in place and accessing the storage within certain timeouts is critical. This patch adds the option to choose a cache method when writing disk images. Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- qemu-img.c | 81 ++- 1 files changed, 68 insertions(+), 13 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 4f162d1..7609db5 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -40,6 +40,7 @@ typedef struct img_cmd_t { /* Default to cache=writeback as data integrity is not important for qemu-tcg. */ #define BDRV_O_FLAGS BDRV_O_CACHE_WB +#define BDRV_DEFAULT_CACHE writeback static void format_print(void *opaque, const char *name) { @@ -64,6 +65,8 @@ static void help(void) Command parameters:\n 'filename' is a disk image filename\n 'fmt' is the disk image format. It is guessed automatically in most cases\n + 'cache' is the cache mode used to write the output disk image, the valid\n + options are: 'none', 'writeback' (default), 'writethrough' and 'unsafe'\n 'size' is the disk image size in bytes. Optional suffixes\n 'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M)\n and T (terabyte, 1024G) are supported. 'b' is ignored.\n @@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size) } #endif +static int set_cache_flag(const char *mode, int *flags) +{ +*flags = ~BDRV_O_CACHE_MASK; + +if (!strcmp(mode, none) || !strcmp(mode, off)) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NOCACHE; +} else if (!strcmp(mode, writeback)) { +*flags |= BDRV_O_CACHE_WB; +} else if (!strcmp(mode, unsafe)) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NO_FLUSH; +} else if (!strcmp(mode, writethrough)) { +/* this is the default */ +} else { +return -1; +} + +return 0; +} + static int print_block_option_help(const char *filename, const char *fmt) { BlockDriver *drv, *proto_drv; @@ -441,13 +465,14 @@ static int img_check(int argc, char **argv) static int img_commit(int argc, char **argv) { -int c, ret; -const char *filename, *fmt; +int c, ret, flags; +const char *filename, *fmt, *cache; BlockDriverState *bs; fmt = NULL; +cache = BDRV_DEFAULT_CACHE; for(;;) { -c = getopt(argc, argv, f:h); +c = getopt(argc, argv, f:ht:); if (c == -1) { break; } @@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv) case 'f': fmt = optarg; break; +case 't': +cache = optarg; +break; } } if (optind = argc) { @@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv) } filename = argv[optind++]; -bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR); +flags = BDRV_O_RDWR; +ret = set_cache_flag(cache, flags); +if (ret 0) { +error_report(Invalid cache option: %s\n, cache); +return -1; +} + +bs = bdrv_new_open(filename, fmt, flags); if (!bs) { return 1; } @@ -591,8 +626,8 @@ static int compare_sectors(const uint8_t *buf1, const uint8_t *buf2, int n, static int img_convert(int argc, char **argv) { int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors; -int progress = 0; -const char *fmt, *out_fmt, *out_baseimg, *out_filename; +int progress = 0, flags; +const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename; BlockDriver *drv, *proto_drv; BlockDriverState **bs = NULL, *out_bs = NULL; int64_t total_sectors, nb_sectors, sector_num, bs_offset; @@ -608,10 +643,11 @@ static int img_convert(int argc, char **argv) fmt = NULL; out_fmt = raw; +cache = BDRV_DEFAULT_CACHE; out_baseimg = NULL; compress = 0; for(;;) { -c = getopt(argc, argv, f:O:B:s:hce6o:p); +c = getopt(argc, argv, f:O:B:s:hce6o:pt:); if (c == -1) { break; } @@ -649,6 +685,9 @@ static int img_convert(int argc, char **argv) case 'p': progress = 1; break; +case 't': +cache = optarg; +break; } } @@ -779,8 +818,14 @@ static int img_convert(int argc, char **argv) goto out; } -out_bs = bdrv_new_open(out_filename, out_fmt, -BDRV_O_FLAGS | BDRV_O_RDWR | BDRV_O_NO_FLUSH); +flags = BDRV_O_RDWR; +ret = set_cache_flag(cache, flags); +if (ret 0) { +error_report(Invalid cache option: %s\n, cache
[Qemu-devel] [PATCHv2] qemu-img: Add cache command line option
qemu-img currently writes disk images using writeback and filling up the cache buffers which are then flushed by the kernel preventing other processes from accessing the storage. This is particularly bad in cluster environments where time-based algorithms might be in place and accessing the storage within certain timeouts is critical. This patch adds the option to choose a cache method when writing disk images. Signed-off-by: Federico Simoncelli fsimo...@redhat.com --- qemu-img.c | 80 ++- 1 files changed, 67 insertions(+), 13 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 4f162d1..c2a106e 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -40,6 +40,7 @@ typedef struct img_cmd_t { /* Default to cache=writeback as data integrity is not important for qemu-tcg. */ #define BDRV_O_FLAGS BDRV_O_CACHE_WB +#define BDRV_DEFAULT_CACHE writeback static void format_print(void *opaque, const char *name) { @@ -64,6 +65,8 @@ static void help(void) Command parameters:\n 'filename' is a disk image filename\n 'fmt' is the disk image format. It is guessed automatically in most cases\n + 'cache' is the cache mode used to write the output disk image, the valid\n + options are: 'none', 'writeback' (default), 'writethrough' and 'unsafe'\n 'size' is the disk image size in bytes. Optional suffixes\n 'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M)\n and T (terabyte, 1024G) are supported. 'b' is ignored.\n @@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size) } #endif +static int set_cache_flag(const char *mode, int *flags) +{ +*flags = ~BDRV_O_CACHE_MASK; + +if (!strcmp(mode, none) || !strcmp(mode, off)) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NOCACHE; +} else if (!strcmp(mode, writeback)) { +*flags |= BDRV_O_CACHE_WB; +} else if (!strcmp(mode, unsafe)) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NO_FLUSH; +} else if (!strcmp(mode, writethrough)) { +/* this is the default */ +} else { +return -1; +} + +return 0; +} + static int print_block_option_help(const char *filename, const char *fmt) { BlockDriver *drv, *proto_drv; @@ -441,13 +465,14 @@ static int img_check(int argc, char **argv) static int img_commit(int argc, char **argv) { -int c, ret; -const char *filename, *fmt; +int c, ret, flags; +const char *filename, *fmt, *cache; BlockDriverState *bs; fmt = NULL; +cache = BDRV_DEFAULT_CACHE; for(;;) { -c = getopt(argc, argv, f:h); +c = getopt(argc, argv, f:ht:); if (c == -1) { break; } @@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv) case 'f': fmt = optarg; break; +case 't': +cache = optarg; +break; } } if (optind = argc) { @@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv) } filename = argv[optind++]; -bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR); +flags = BDRV_O_RDWR; +ret = set_cache_flag(cache, flags); +if (ret 0) { +error_report(Invalid cache option: %s\n, cache); +return -1; +} + +bs = bdrv_new_open(filename, fmt, flags); if (!bs) { return 1; } @@ -591,8 +626,8 @@ static int compare_sectors(const uint8_t *buf1, const uint8_t *buf2, int n, static int img_convert(int argc, char **argv) { int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors; -int progress = 0; -const char *fmt, *out_fmt, *out_baseimg, *out_filename; +int progress = 0, flags; +const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename; BlockDriver *drv, *proto_drv; BlockDriverState **bs = NULL, *out_bs = NULL; int64_t total_sectors, nb_sectors, sector_num, bs_offset; @@ -608,10 +643,11 @@ static int img_convert(int argc, char **argv) fmt = NULL; out_fmt = raw; +cache = BDRV_DEFAULT_CACHE; out_baseimg = NULL; compress = 0; for(;;) { -c = getopt(argc, argv, f:O:B:s:hce6o:p); +c = getopt(argc, argv, f:O:B:s:hce6o:pt:); if (c == -1) { break; } @@ -649,6 +685,9 @@ static int img_convert(int argc, char **argv) case 'p': progress = 1; break; +case 't': +cache = optarg; +break; } } @@ -779,8 +818,14 @@ static int img_convert(int argc, char **argv) goto out; } -out_bs = bdrv_new_open(out_filename, out_fmt, -BDRV_O_FLAGS | BDRV_O_RDWR | BDRV_O_NO_FLUSH); +flags = BDRV_O_RDWR; +ret = set_cache_flag(cache, flags); +if (ret 0) { +error_report(Invalid cache option: %s\n, cache