[Qemu-devel] [PATCHv2] block: add the optional file entry to query-block

2013-07-05 Thread Federico Simoncelli
This patch adds the optional file entry to the query-block output.
The value is a json-object representing the information about the
underlying file or device (when present).

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 block/qapi.c   |7 +++
 qapi-schema.json   |4 +++-
 qmp-commands.hx|8 
 tests/qemu-iotests/043.out |   15 +++
 4 files changed, 33 insertions(+), 1 deletions(-)

diff --git a/block/qapi.c b/block/qapi.c
index a4bc411..40db983 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -171,6 +171,13 @@ void bdrv_query_image_info(BlockDriverState *bs,
 return;
 }
 
+if (bs-file) {
+bdrv_query_image_info(bs-file, info-file, err);
+if (info-file) {
+info-has_file = true;
+}
+}
+
 *p_info = info;
 }
 
diff --git a/qapi-schema.json b/qapi-schema.json
index 5c32528..0cb872c 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -238,6 +238,8 @@
 #
 # @backing-image: #optional info of the backing image (since 1.6)
 #
+# @file: #optional info of the underlying file (since 1.6)
+#
 # Since: 1.3
 #
 ##
@@ -248,7 +250,7 @@
'*cluster-size': 'int', '*encrypted': 'bool',
'*backing-filename': 'str', '*full-backing-filename': 'str',
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
-   '*backing-image': 'ImageInfo' } }
+   '*backing-image': 'ImageInfo', '*file': 'ImageInfo' } }
 
 ##
 # @ImageCheck:
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 362f0e1..bb8a931 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1791,6 +1791,8 @@ Each json-object contain the following:
  - backing-image: the detail of the backing image, it is an
 optional json-object only present when a
 backing image present for this image
+ - file: an optional json-object representing the information
+   about the underlying file or device (when present)
 
 - io-status: I/O operation status, only present if the device supports it
and the VM is configured to stop on errors. It's always reset
@@ -1842,6 +1844,12 @@ Example:
   format:qcow2,
   virtual-size:2048000
   }
+  file:{
+  filename:disks/test.qcow2,
+  format: file,
+  virtual-size: 262144,
+  actual-size: 139264
+  }
}
 },
 type:unknown
diff --git a/tests/qemu-iotests/043.out b/tests/qemu-iotests/043.out
index ad23337..6aea37b 100644
--- a/tests/qemu-iotests/043.out
+++ b/tests/qemu-iotests/043.out
@@ -44,6 +44,11 @@ cluster_size: 65536
 filename: TEST_DIR/t.IMGFMT, 
 cluster-size: 65536, 
 format: IMGFMT, 
+file: {
+virtual-size: 327680, 
+filename: TEST_DIR/t.IMGFMT, 
+format: file, 
+}, 
 backing-filename: TEST_DIR/t.IMGFMT.2.base, 
 dirty-flag: false
 }, 
@@ -52,6 +57,11 @@ cluster_size: 65536
 filename: TEST_DIR/t.IMGFMT.2.base, 
 cluster-size: 65536, 
 format: IMGFMT, 
+file: {
+virtual-size: 327680, 
+filename: TEST_DIR/t.IMGFMT.2.base, 
+format: file, 
+}, 
 backing-filename: TEST_DIR/t.IMGFMT.1.base, 
 dirty-flag: false
 }, 
@@ -60,6 +70,11 @@ cluster_size: 65536
 filename: TEST_DIR/t.IMGFMT.1.base, 
 cluster-size: 65536, 
 format: IMGFMT, 
+file: {
+virtual-size: 327680, 
+filename: TEST_DIR/t.IMGFMT.1.base, 
+format: file, 
+}, 
 dirty-flag: false
 }
 ]
-- 
1.7.1




[Qemu-devel] [PATCH] block: add the optional file entry to query-block

2013-06-28 Thread Federico Simoncelli
This patch adds the optional file entry to the query-block output.
The value is a json-object representing the information about the
underlying file or device (when present).

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 block/qapi.c   |9 -
 qapi-schema.json   |4 +++-
 qmp-commands.hx|8 
 tests/qemu-iotests/043.out |   15 +++
 4 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/block/qapi.c b/block/qapi.c
index a4bc411..03cd222 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -119,7 +119,7 @@ void bdrv_query_image_info(BlockDriverState *bs,
 
 info-filename= g_strdup(bs-filename);
 info-format  = g_strdup(bdrv_get_format_name(bs));
-info-virtual_size= total_sectors * 512;
+info-virtual_size= bdrv_getlength(bs);
 info-actual_size = bdrv_get_allocated_file_size(bs);
 info-has_actual_size = info-actual_size = 0;
 if (bdrv_is_encrypted(bs)) {
@@ -171,6 +171,13 @@ void bdrv_query_image_info(BlockDriverState *bs,
 return;
 }
 
+if (bs-file) {
+bdrv_query_image_info(bs-file, info-file, err);
+if (info-file) {
+info-has_file = true;
+}
+}
+
 *p_info = info;
 }
 
diff --git a/qapi-schema.json b/qapi-schema.json
index a30a728..524a4a0 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -238,6 +238,8 @@
 #
 # @backing-image: #optional info of the backing image (since 1.6)
 #
+# @file: #optional info of the underlying file (since 1.6)
+#
 # Since: 1.3
 #
 ##
@@ -248,7 +250,7 @@
'*cluster-size': 'int', '*encrypted': 'bool',
'*backing-filename': 'str', '*full-backing-filename': 'str',
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
-   '*backing-image': 'ImageInfo' } }
+   '*backing-image': 'ImageInfo', '*file': 'ImageInfo' } }
 
 ##
 # @ImageCheck:
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 8cea5e5..c25ac77 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1745,6 +1745,8 @@ Each json-object contain the following:
  - backing-image: the detail of the backing image, it is an
 optional json-object only present when a
 backing image present for this image
+ - file: an optional json-object representing the information
+   about the underlying file or device (when present)
 
 - io-status: I/O operation status, only present if the device supports it
and the VM is configured to stop on errors. It's always reset
@@ -1796,6 +1798,12 @@ Example:
   format:qcow2,
   virtual-size:2048000
   }
+  file:{
+  filename:disks/test.qcow2,
+  format: file,
+  virtual-size: 262144,
+  actual-size: 139264
+  }
}
 },
 type:unknown
diff --git a/tests/qemu-iotests/043.out b/tests/qemu-iotests/043.out
index ad23337..6aea37b 100644
--- a/tests/qemu-iotests/043.out
+++ b/tests/qemu-iotests/043.out
@@ -44,6 +44,11 @@ cluster_size: 65536
 filename: TEST_DIR/t.IMGFMT, 
 cluster-size: 65536, 
 format: IMGFMT, 
+file: {
+virtual-size: 327680, 
+filename: TEST_DIR/t.IMGFMT, 
+format: file, 
+}, 
 backing-filename: TEST_DIR/t.IMGFMT.2.base, 
 dirty-flag: false
 }, 
@@ -52,6 +57,11 @@ cluster_size: 65536
 filename: TEST_DIR/t.IMGFMT.2.base, 
 cluster-size: 65536, 
 format: IMGFMT, 
+file: {
+virtual-size: 327680, 
+filename: TEST_DIR/t.IMGFMT.2.base, 
+format: file, 
+}, 
 backing-filename: TEST_DIR/t.IMGFMT.1.base, 
 dirty-flag: false
 }, 
@@ -60,6 +70,11 @@ cluster_size: 65536
 filename: TEST_DIR/t.IMGFMT.1.base, 
 cluster-size: 65536, 
 format: IMGFMT, 
+file: {
+virtual-size: 327680, 
+filename: TEST_DIR/t.IMGFMT.1.base, 
+format: file, 
+}, 
 dirty-flag: false
 }
 ]
-- 
1.7.1




[Qemu-devel] [PATCH] block: add apparent-size to query-block

2013-06-21 Thread Federico Simoncelli
This patch adds the apparent-size entry to the query-block output.
The value represents the apparent size in bytes of the image, e.g.
file size (including the blocks not yet allocated) or block device
size.

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 block/qapi.c |3 ++-
 qapi-schema.json |4 +++-
 qmp-commands.hx  |1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/block/qapi.c b/block/qapi.c
index a4bc411..ccaba67 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -119,8 +119,9 @@ void bdrv_query_image_info(BlockDriverState *bs,
 
 info-filename= g_strdup(bs-filename);
 info-format  = g_strdup(bdrv_get_format_name(bs));
-info-virtual_size= total_sectors * 512;
+info-virtual_size= bdrv_getlength(bs);
 info-actual_size = bdrv_get_allocated_file_size(bs);
+info-apparent_size   = bdrv_getlength(bs-file);
 info-has_actual_size = info-actual_size = 0;
 if (bdrv_is_encrypted(bs)) {
 info-encrypted = true;
diff --git a/qapi-schema.json b/qapi-schema.json
index a80ee40..c7403ed 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -222,6 +222,8 @@
 #
 # @actual-size: #optional actual size on disk in bytes of the image
 #
+# @apparent-size: apparent size in bytes of the image
+#
 # @dirty-flag: #optional true if image is not cleanly closed
 #
 # @cluster-size: #optional size of a cluster in bytes
@@ -244,7 +246,7 @@
 
 { 'type': 'ImageInfo',
   'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
-   '*actual-size': 'int', 'virtual-size': 'int',
+   '*actual-size': 'int', 'virtual-size': 'int', 'apparent-size': 
'int',
'*cluster-size': 'int', '*encrypted': 'bool',
'*backing-filename': 'str', '*full-backing-filename': 'str',
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 8cea5e5..3f3aded 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1714,6 +1714,7 @@ Each json-object contain the following:
  - actual-size: actual size on disk in bytes of the image, not
   present when image does not support thin
   provision (json-int, optional)
+ - apparent-size: apparent size in bytes of the image
  - cluster-size: size of a cluster in bytes, not present if image
format does not support it (json-int, optional)
  - encrypted: true if the image is encrypted, not present means
-- 
1.7.1




[Qemu-devel] [PATCHv4 2/2] qemu-img: add json output option to the check command

2013-01-28 Thread Federico Simoncelli
This option --output=[human|json] makes qemu-img check output a human
or JSON representation at the choice of the user.

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 qapi-schema.json |   46 +++
 qemu-img-cmds.hx |4 +-
 qemu-img.c   |  232 +++---
 qemu-img.texi|5 +-
 4 files changed, 220 insertions(+), 67 deletions(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index 6d7252b..dc99c28 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -245,6 +245,52 @@
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'] } 
}
 
 ##
+# @ImageCheck:
+#
+# Information about a QEMU image file check
+#
+# @filename: name of the image file checked
+#
+# @format: format of the image file checked
+#
+# @check-errors: number of unexpected errors occurred during check
+#
+# @image-end-offset: #optional offset (in bytes) where the image ends, this
+#field is present if the driver for the image format
+#supports it
+#
+# @corruptions: #optional number of corruptions found during the check if any
+#
+# @leaks: #optional number of leaks found during the check if any
+#
+# @corruptions-fixed: #optional number of corruptions fixed during the check
+# if any
+#
+# @leaks-fixed: #optional number of leaks fixed during the check if any
+#
+# @total-clusters: #optional total number of clusters, this field is present
+#  if the driver for the image format supports it
+#
+# @allocated-clusters: #optional total number of allocated clusters, this
+#  field is present if the driver for the image format
+#  supports it
+#
+# @fragmented-clusters: #optional total number of fragmented clusters, this
+#   field is present if the driver for the image format
+#   supports it
+#
+# Since: 1.4
+#
+##
+
+{ 'type': 'ImageCheck',
+  'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int',
+   '*image-end-offset': 'int', '*corruptions': 'int', '*leaks': 'int',
+   '*corruptions-fixed': 'int', '*leaks-fixed': 'int',
+   '*total-clusters': 'int', '*allocated-clusters': 'int',
+   '*fragmented-clusters': 'int' } }
+
+##
 # @StatusInfo:
 #
 # Information about VCPU run state
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index a181363..259fc14 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -10,9 +10,9 @@ STEXI
 ETEXI
 
 DEF(check, img_check,
-check [-f fmt] [-r [leaks | all]] filename)
+check [-f fmt] [--output=ofmt] [-r [leaks | all]] filename)
 STEXI
-@item check [-f @var{fmt}] [-r [leaks | all]] @var{filename}
+@item check [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] 
@var{filename}
 ETEXI
 
 DEF(create, img_create,
diff --git a/qemu-img.c b/qemu-img.c
index e80c1c5..34249fe 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -42,6 +42,16 @@ typedef struct img_cmd_t {
 int (*handler)(int argc, char **argv);
 } img_cmd_t;
 
+enum {
+OPTION_OUTPUT = 256,
+OPTION_BACKING_CHAIN = 257,
+};
+
+typedef enum OutputFormat {
+OFORMAT_JSON,
+OFORMAT_HUMAN,
+} OutputFormat;
+
 /* Default to cache=writeback as data integrity is not important for qemu-tcg. 
*/
 #define BDRV_O_FLAGS BDRV_O_CACHE_WB
 #define BDRV_DEFAULT_CACHE writeback
@@ -375,6 +385,96 @@ static int img_create(int argc, char **argv)
 return 0;
 }
 
+static void dump_json_image_check(ImageCheck *check)
+{
+Error *errp = NULL;
+QString *str;
+QmpOutputVisitor *ov = qmp_output_visitor_new();
+QObject *obj;
+visit_type_ImageCheck(qmp_output_get_visitor(ov),
+  check, NULL, errp);
+obj = qmp_output_get_qobject(ov);
+str = qobject_to_json_pretty(obj);
+assert(str != NULL);
+printf(%s\n, qstring_get_str(str));
+qobject_decref(obj);
+qmp_output_visitor_cleanup(ov);
+QDECREF(str);
+}
+
+static void dump_human_image_check(ImageCheck *check)
+{
+if (!(check-corruptions || check-leaks || check-check_errors)) {
+printf(No errors were found on the image.\n);
+} else {
+if (check-corruptions) {
+printf(\n% PRId64  errors were found on the image.\n
+Data may be corrupted, or further writes to the image 
+may corrupt it.\n,
+check-corruptions);
+}
+
+if (check-leaks) {
+printf(\n% PRId64  leaked clusters were found on the image.\n
+This means waste of disk space, but no harm to data.\n,
+check-leaks);
+}
+
+if (check-check_errors) {
+printf(\n% PRId64  internal errors have occurred during the 
check.\n,
+check-check_errors);
+}
+}
+
+if (check-total_clusters != 0  check-allocated_clusters != 0) {
+printf(% PRId64 /% PRId64 = %0.2f%% allocated, %0.2f%% 
fragmented\n,
+check

[Qemu-devel] [PATCHv4 1/2] qemu-img: find the image end offset during check

2013-01-28 Thread Federico Simoncelli
This patch adds the support for reporting the image end offset (in
bytes). This is particularly useful after a conversion (or a rebase)
where the destination is a block device in order to find the first
unused byte at the end of the image.

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 block/qcow2-refcount.c   |   10 --
 include/block/block.h|1 +
 qemu-img.c   |4 
 tests/qemu-iotests/026   |6 +++---
 tests/qemu-iotests/036   |3 ++-
 tests/qemu-iotests/039   |2 +-
 tests/qemu-iotests/044.out   |1 +
 tests/qemu-iotests/common.rc |5 +++--
 8 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 6a95aa6..45ad043 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
   BdrvCheckMode fix)
 {
 BDRVQcowState *s = bs-opaque;
-int64_t size, i;
+int64_t size, i, highest_cluster;
 int nb_clusters, refcount1, refcount2;
 QCowSnapshot *sn;
 uint16_t *refcount_table;
@@ -1187,7 +1187,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 
 /* compare ref counts */
-for(i = 0; i  nb_clusters; i++) {
+for(i = 0, highest_cluster = 0; i  nb_clusters; i++) {
 refcount1 = get_refcount(bs, i);
 if (refcount1  0) {
 fprintf(stderr, Can't get refcount for cluster % PRId64 : %s\n,
@@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 
 refcount2 = refcount_table[i];
+
+if (refcount1  0 || refcount2  0) {
+highest_cluster = i;
+}
+
 if (refcount1 != refcount2) {
 
 /* Check if we're allowed to fix the mismatch */
@@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 }
 
+res-image_end_offset = (highest_cluster + 1) * s-cluster_size;
 ret = 0;
 
 fail:
diff --git a/include/block/block.h b/include/block/block.h
index ffd1936..85eb3d9 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -213,6 +213,7 @@ typedef struct BdrvCheckResult {
 int check_errors;
 int corruptions_fixed;
 int leaks_fixed;
+int64_t image_end_offset;
 BlockFragInfo bfi;
 } BdrvCheckResult;
 
diff --git a/qemu-img.c b/qemu-img.c
index 85d3740..e80c1c5 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -475,6 +475,10 @@ static int img_check(int argc, char **argv)
 result.bfi.fragmented_clusters * 100.0 / 
result.bfi.allocated_clusters);
 }
 
+if (result.image_end_offset  0) {
+printf(Image end offset: % PRId64 \n, result.image_end_offset);
+}
+
 bdrv_delete(bs);
 
 if (ret  0 || result.check_errors) {
diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026
index 1602ccd..107a3ff 100755
--- a/tests/qemu-iotests/026
+++ b/tests/qemu-iotests/026
@@ -102,7 +102,7 @@ if [ $event == l2_load ]; then
 $QEMU_IO -c read $vmstate 0 128k  $BLKDBG_TEST_IMG | _filter_qemu_io
 fi
 
-$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0
+_check_test_img 21 | grep -v refcount=1 reference=0
 
 done
 done
@@ -147,7 +147,7 @@ echo
 echo Event: $event; errno: $errno; imm: $imm; once: $once; write $vmstate
 $QEMU_IO -c write $vmstate 0 64M $BLKDBG_TEST_IMG | _filter_qemu_io
 
-$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0
+_check_test_img 21 | grep -v refcount=1 reference=0
 
 done
 done
@@ -186,7 +186,7 @@ echo
 echo Event: $event; errno: $errno; imm: $imm; once: $once
 $QEMU_IO -c write -b 0 64k $BLKDBG_TEST_IMG | _filter_qemu_io
 
-$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0
+_check_test_img 21 | grep -v refcount=1 reference=0
 
 done
 done
diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036
index 329533e..4dbfc57 100755
--- a/tests/qemu-iotests/036
+++ b/tests/qemu-iotests/036
@@ -59,7 +59,8 @@ _make_test_img 64M
 echo
 echo === Repair image ===
 echo
-$QEMU_IMG check -r all $TEST_IMG
+_check_test_img -r all
+
 ./qcow2.py $TEST_IMG dump-header
 
 # success, all done
diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039
index c5ae806..ae35175 100755
--- a/tests/qemu-iotests/039
+++ b/tests/qemu-iotests/039
@@ -86,7 +86,7 @@ $QEMU_IO -r -c read -P 0x5a 0 512 $TEST_IMG | 
_filter_qemu_io
 echo
 echo == Repairing the image file must succeed ==
 
-$QEMU_IMG check -r all $TEST_IMG
+_check_test_img -r all
 
 # The dirty bit must not be set
 ./qcow2.py $TEST_IMG dump-header | grep incompatible_features
diff --git a/tests/qemu-iotests/044.out b/tests/qemu-iotests/044.out
index 7a40071..9c48673 100644
--- a/tests/qemu-iotests/044.out
+++ b/tests/qemu-iotests/044.out
@@ -1,4 +1,5 @@
 No errors were found on the image.
+Image end offset: 4296447488
 .
 --
 Ran 1

[Qemu-devel] [PATCHv3 1/2] qemu-img: find the highest offset in use during check

2013-01-21 Thread Federico Simoncelli
This patch adds the support for reporting the highest offset in use by
an image. This is particularly useful after a conversion (or a rebase)
where the destination is a block device in order to find the actual
amount of space in use.

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 block/qcow2-refcount.c   |   10 --
 include/block/block.h|1 +
 qemu-img.c   |4 
 tests/qemu-iotests/026   |6 +++---
 tests/qemu-iotests/036   |3 ++-
 tests/qemu-iotests/039   |2 +-
 tests/qemu-iotests/044.out   |1 +
 tests/qemu-iotests/common.rc |5 +++--
 8 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 6a95aa6..3444f85 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
   BdrvCheckMode fix)
 {
 BDRVQcowState *s = bs-opaque;
-int64_t size, i;
+int64_t size, i, highest_cluster;
 int nb_clusters, refcount1, refcount2;
 QCowSnapshot *sn;
 uint16_t *refcount_table;
@@ -1154,7 +1154,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 s-refcount_table_offset,
 s-refcount_table_size * sizeof(uint64_t));
 
-for(i = 0; i  s-refcount_table_size; i++) {
+for(i = 0, highest_cluster = 0; i  s-refcount_table_size; i++) {
 uint64_t offset, cluster;
 offset = s-refcount_table[i];
 cluster = offset  s-cluster_bits;
@@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 
 refcount2 = refcount_table[i];
+
+if (refcount1  0 || refcount2  0) {
+highest_cluster = i;
+}
+
 if (refcount1 != refcount2) {
 
 /* Check if we're allowed to fix the mismatch */
@@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 }
 
+res-highest_offset = (highest_cluster + 1) * s-cluster_size;
 ret = 0;
 
 fail:
diff --git a/include/block/block.h b/include/block/block.h
index ffd1936..b5cf6ac 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -213,6 +213,7 @@ typedef struct BdrvCheckResult {
 int check_errors;
 int corruptions_fixed;
 int leaks_fixed;
+int64_t highest_offset;
 BlockFragInfo bfi;
 } BdrvCheckResult;
 
diff --git a/qemu-img.c b/qemu-img.c
index 85d3740..7b977cd 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -475,6 +475,10 @@ static int img_check(int argc, char **argv)
 result.bfi.fragmented_clusters * 100.0 / 
result.bfi.allocated_clusters);
 }
 
+if (result.highest_offset  0) {
+printf(Highest offset in use: % PRId64 \n, result.highest_offset);
+}
+
 bdrv_delete(bs);
 
 if (ret  0 || result.check_errors) {
diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026
index 1602ccd..107a3ff 100755
--- a/tests/qemu-iotests/026
+++ b/tests/qemu-iotests/026
@@ -102,7 +102,7 @@ if [ $event == l2_load ]; then
 $QEMU_IO -c read $vmstate 0 128k  $BLKDBG_TEST_IMG | _filter_qemu_io
 fi
 
-$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0
+_check_test_img 21 | grep -v refcount=1 reference=0
 
 done
 done
@@ -147,7 +147,7 @@ echo
 echo Event: $event; errno: $errno; imm: $imm; once: $once; write $vmstate
 $QEMU_IO -c write $vmstate 0 64M $BLKDBG_TEST_IMG | _filter_qemu_io
 
-$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0
+_check_test_img 21 | grep -v refcount=1 reference=0
 
 done
 done
@@ -186,7 +186,7 @@ echo
 echo Event: $event; errno: $errno; imm: $imm; once: $once
 $QEMU_IO -c write -b 0 64k $BLKDBG_TEST_IMG | _filter_qemu_io
 
-$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0
+_check_test_img 21 | grep -v refcount=1 reference=0
 
 done
 done
diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036
index 329533e..4dbfc57 100755
--- a/tests/qemu-iotests/036
+++ b/tests/qemu-iotests/036
@@ -59,7 +59,8 @@ _make_test_img 64M
 echo
 echo === Repair image ===
 echo
-$QEMU_IMG check -r all $TEST_IMG
+_check_test_img -r all
+
 ./qcow2.py $TEST_IMG dump-header
 
 # success, all done
diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039
index c5ae806..ae35175 100755
--- a/tests/qemu-iotests/039
+++ b/tests/qemu-iotests/039
@@ -86,7 +86,7 @@ $QEMU_IO -r -c read -P 0x5a 0 512 $TEST_IMG | 
_filter_qemu_io
 echo
 echo == Repairing the image file must succeed ==
 
-$QEMU_IMG check -r all $TEST_IMG
+_check_test_img -r all
 
 # The dirty bit must not be set
 ./qcow2.py $TEST_IMG dump-header | grep incompatible_features
diff --git a/tests/qemu-iotests/044.out b/tests/qemu-iotests/044.out
index 7a40071..6851878 100644
--- a/tests/qemu-iotests/044.out
+++ b/tests/qemu-iotests/044.out
@@ -1,4 +1,5 @@
 No errors were found on the image.
+Highest offset in use: 4296447488

[Qemu-devel] [PATCHv3 2/2] qemu-img: add json output option to the check command

2013-01-21 Thread Federico Simoncelli
This option --output=[human|json] make qemu-img check output an human
or JSON representation at the choice of the user.

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 qapi-schema.json |   46 +++
 qemu-img-cmds.hx |4 +-
 qemu-img.c   |  232 +++--
 qemu-img.texi|5 +-
 4 files changed, 221 insertions(+), 66 deletions(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index 6d7252b..d576728 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -245,6 +245,52 @@
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'] } 
}
 
 ##
+# @ImageCheck:
+#
+# Information about a QEMU image file check
+#
+# @filename: name of the image file checked
+#
+# @format: format of the image file checked
+#
+# @check-errors: number of unexpected errors occurred during check
+#
+# @highest-offset: #optional highest offset (in bytes) in use by the image,
+#  this field is present if the driver for the image format
+#  supports it
+#
+# @corruptions: #optional number of corruptions found during the check if any
+#
+# @leaks: #optional number of leaks found during the check if any
+#
+# @corruptions-fixed: #optional number of corruptions fixed during the check
+# if any
+#
+# @leaks-fixed: #optional number of leaks fixed during the check if any
+#
+# @total-clusters: #optional total number of clusters, this field is present
+#  if the driver for the image format supports it
+#
+# @allocated-clusters: #optional total number of allocated clusters, this
+#  field is present if the driver for the image format
+#  supports it
+#
+# @fragmented-clusters: #optional total number of fragmented clusters, this
+#   field is present if the driver for the image format
+#   supports it
+#
+# Since: 1.4
+#
+##
+
+{ 'type': 'ImageCheck',
+  'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int',
+   '*highest-offset': 'int', '*corruptions': 'int', '*leaks': 'int',
+   '*corruptions-fixed': 'int', '*leaks-fixed': 'int',
+   '*total-clusters': 'int', '*allocated-clusters': 'int',
+   '*fragmented-clusters': 'int' } }
+
+##
 # @StatusInfo:
 #
 # Information about VCPU run state
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index a181363..259fc14 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -10,9 +10,9 @@ STEXI
 ETEXI
 
 DEF(check, img_check,
-check [-f fmt] [-r [leaks | all]] filename)
+check [-f fmt] [--output=ofmt] [-r [leaks | all]] filename)
 STEXI
-@item check [-f @var{fmt}] [-r [leaks | all]] @var{filename}
+@item check [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] 
@var{filename}
 ETEXI
 
 DEF(create, img_create,
diff --git a/qemu-img.c b/qemu-img.c
index 7b977cd..bcb21d5 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -42,6 +42,16 @@ typedef struct img_cmd_t {
 int (*handler)(int argc, char **argv);
 } img_cmd_t;
 
+enum {
+OPTION_OUTPUT = 256,
+OPTION_BACKING_CHAIN = 257,
+};
+
+typedef enum OutputFormat {
+OFORMAT_JSON,
+OFORMAT_HUMAN,
+} OutputFormat;
+
 /* Default to cache=writeback as data integrity is not important for qemu-tcg. 
*/
 #define BDRV_O_FLAGS BDRV_O_CACHE_WB
 #define BDRV_DEFAULT_CACHE writeback
@@ -375,6 +385,96 @@ static int img_create(int argc, char **argv)
 return 0;
 }
 
+static void dump_json_image_check(ImageCheck *check)
+{
+Error *errp = NULL;
+QString *str;
+QmpOutputVisitor *ov = qmp_output_visitor_new();
+QObject *obj;
+visit_type_ImageCheck(qmp_output_get_visitor(ov),
+  check, NULL, errp);
+obj = qmp_output_get_qobject(ov);
+str = qobject_to_json_pretty(obj);
+assert(str != NULL);
+printf(%s\n, qstring_get_str(str));
+qobject_decref(obj);
+qmp_output_visitor_cleanup(ov);
+QDECREF(str);
+}
+
+static void dump_human_image_check(ImageCheck *check)
+{
+if (!(check-corruptions || check-leaks || check-check_errors)) {
+printf(No errors were found on the image.\n);
+} else {
+if (check-corruptions) {
+printf(\n% PRId64  errors were found on the image.\n
+Data may be corrupted, or further writes to the image 
+may corrupt it.\n,
+check-corruptions);
+}
+
+if (check-leaks) {
+printf(\n% PRId64  leaked clusters were found on the image.\n
+This means waste of disk space, but no harm to data.\n,
+check-leaks);
+}
+
+if (check-check_errors) {
+printf(\n% PRId64  internal errors have occurred during the 
check.\n,
+check-check_errors);
+}
+}
+
+if (check-total_clusters != 0  check-allocated_clusters != 0) {
+printf(% PRId64 /% PRId64 = %0.2f%% allocated, %0.2f%% 
fragmented\n,
+check

[Qemu-devel] [PATCH] qemu-img: find the highest offset in use during check

2012-12-10 Thread Federico Simoncelli
This patch adds the support for reporting the highest offset in use by
an image. This is particularly useful after a conversion (or a rebase)
where the destination is a block device in order to find the actual
amount of space in use.

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 block.h|1 +
 block/qcow2-refcount.c |   10 --
 qemu-img.c |4 
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/block.h b/block.h
index 722c620..de42e8c 100644
--- a/block.h
+++ b/block.h
@@ -213,6 +213,7 @@ typedef struct BdrvCheckResult {
 int check_errors;
 int corruptions_fixed;
 int leaks_fixed;
+int64_t highest_offset;
 BlockFragInfo bfi;
 } BdrvCheckResult;
 
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 96224d1..017439d 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
   BdrvCheckMode fix)
 {
 BDRVQcowState *s = bs-opaque;
-int64_t size, i;
+int64_t size, i, highest_cluster;
 int nb_clusters, refcount1, refcount2;
 QCowSnapshot *sn;
 uint16_t *refcount_table;
@@ -1154,7 +1154,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 s-refcount_table_offset,
 s-refcount_table_size * sizeof(uint64_t));
 
-for(i = 0; i  s-refcount_table_size; i++) {
+for(i = 0, highest_cluster = 0; i  s-refcount_table_size; i++) {
 uint64_t offset, cluster;
 offset = s-refcount_table[i];
 cluster = offset  s-cluster_bits;
@@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 
 refcount2 = refcount_table[i];
+
+if (refcount1  0 || refcount2  0) {
+highest_cluster = i;
+}
+
 if (refcount1 != refcount2) {
 
 /* Check if we're allowed to fix the mismatch */
@@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 }
 
+res-highest_offset = (highest_cluster + 1) * s-cluster_size;
 ret = 0;
 
 fail:
diff --git a/qemu-img.c b/qemu-img.c
index e29e01b..3a8090b 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -470,6 +470,10 @@ static int img_check(int argc, char **argv)
 result.bfi.fragmented_clusters * 100.0 / 
result.bfi.allocated_clusters);
 }
 
+if (result.highest_offset  0) {
+printf(Highest offset in use: %lu\n, result.highest_offset);
+}
+
 bdrv_delete(bs);
 
 if (ret  0 || result.check_errors) {
-- 
1.7.1




[Qemu-devel] [PATCHv2 1/2] qemu-img: find the highest offset in use during check

2012-12-10 Thread Federico Simoncelli
This patch adds the support for reporting the highest offset in use by
an image. This is particularly useful after a conversion (or a rebase)
where the destination is a block device in order to find the actual
amount of space in use.

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 block.h  |1 +
 block/qcow2-refcount.c   |   10 --
 qemu-img.c   |4 
 tests/qemu-iotests/026   |6 +++---
 tests/qemu-iotests/036   |3 ++-
 tests/qemu-iotests/039   |2 +-
 tests/qemu-iotests/common.rc |5 +++--
 7 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/block.h b/block.h
index 722c620..de42e8c 100644
--- a/block.h
+++ b/block.h
@@ -213,6 +213,7 @@ typedef struct BdrvCheckResult {
 int check_errors;
 int corruptions_fixed;
 int leaks_fixed;
+int64_t highest_offset;
 BlockFragInfo bfi;
 } BdrvCheckResult;
 
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 96224d1..017439d 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
   BdrvCheckMode fix)
 {
 BDRVQcowState *s = bs-opaque;
-int64_t size, i;
+int64_t size, i, highest_cluster;
 int nb_clusters, refcount1, refcount2;
 QCowSnapshot *sn;
 uint16_t *refcount_table;
@@ -1154,7 +1154,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 s-refcount_table_offset,
 s-refcount_table_size * sizeof(uint64_t));
 
-for(i = 0; i  s-refcount_table_size; i++) {
+for(i = 0, highest_cluster = 0; i  s-refcount_table_size; i++) {
 uint64_t offset, cluster;
 offset = s-refcount_table[i];
 cluster = offset  s-cluster_bits;
@@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 
 refcount2 = refcount_table[i];
+
+if (refcount1  0 || refcount2  0) {
+highest_cluster = i;
+}
+
 if (refcount1 != refcount2) {
 
 /* Check if we're allowed to fix the mismatch */
@@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 }
 
+res-highest_offset = (highest_cluster + 1) * s-cluster_size;
 ret = 0;
 
 fail:
diff --git a/qemu-img.c b/qemu-img.c
index e29e01b..45c1ec1 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -470,6 +470,10 @@ static int img_check(int argc, char **argv)
 result.bfi.fragmented_clusters * 100.0 / 
result.bfi.allocated_clusters);
 }
 
+if (result.highest_offset  0) {
+printf(Highest offset in use: % PRId64 \n, result.highest_offset);
+}
+
 bdrv_delete(bs);
 
 if (ret  0 || result.check_errors) {
diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026
index 1602ccd..107a3ff 100755
--- a/tests/qemu-iotests/026
+++ b/tests/qemu-iotests/026
@@ -102,7 +102,7 @@ if [ $event == l2_load ]; then
 $QEMU_IO -c read $vmstate 0 128k  $BLKDBG_TEST_IMG | _filter_qemu_io
 fi
 
-$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0
+_check_test_img 21 | grep -v refcount=1 reference=0
 
 done
 done
@@ -147,7 +147,7 @@ echo
 echo Event: $event; errno: $errno; imm: $imm; once: $once; write $vmstate
 $QEMU_IO -c write $vmstate 0 64M $BLKDBG_TEST_IMG | _filter_qemu_io
 
-$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0
+_check_test_img 21 | grep -v refcount=1 reference=0
 
 done
 done
@@ -186,7 +186,7 @@ echo
 echo Event: $event; errno: $errno; imm: $imm; once: $once
 $QEMU_IO -c write -b 0 64k $BLKDBG_TEST_IMG | _filter_qemu_io
 
-$QEMU_IMG check $TEST_IMG 21 | grep -v refcount=1 reference=0
+_check_test_img 21 | grep -v refcount=1 reference=0
 
 done
 done
diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036
index 329533e..4dbfc57 100755
--- a/tests/qemu-iotests/036
+++ b/tests/qemu-iotests/036
@@ -59,7 +59,8 @@ _make_test_img 64M
 echo
 echo === Repair image ===
 echo
-$QEMU_IMG check -r all $TEST_IMG
+_check_test_img -r all
+
 ./qcow2.py $TEST_IMG dump-header
 
 # success, all done
diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039
index c5ae806..ae35175 100755
--- a/tests/qemu-iotests/039
+++ b/tests/qemu-iotests/039
@@ -86,7 +86,7 @@ $QEMU_IO -r -c read -P 0x5a 0 512 $TEST_IMG | 
_filter_qemu_io
 echo
 echo == Repairing the image file must succeed ==
 
-$QEMU_IMG check -r all $TEST_IMG
+_check_test_img -r all
 
 # The dirty bit must not be set
 ./qcow2.py $TEST_IMG dump-header | grep incompatible_features
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index aef5f52..22c0186 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -161,9 +161,10 @@ _cleanup_test_img()
 
 _check_test_img()
 {
-$QEMU_IMG check -f $IMGFMT $TEST_IMG 21 | \
+$QEMU_IMG check $@ -f $IMGFMT $TEST_IMG 21 | \
 grep -v fragmented$ | \
-   sed -e 's/qemu

[Qemu-devel] [PATCH 2/2] qemu-img: add json output option to the check command

2012-12-10 Thread Federico Simoncelli
This option --output=[human|json] make qemu-img check output an human
or JSON representation at the choice of the user.

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 qapi-schema.json |   38 +
 qemu-img-cmds.hx |4 +-
 qemu-img.c   |  246 +++---
 qemu-img.texi|5 +-
 4 files changed, 221 insertions(+), 72 deletions(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index 5dfa052..8877285 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -245,6 +245,44 @@
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'] } 
}
 
 ##
+# @ImageCheck:
+#
+# Information about a QEMU image file check
+#
+# @filename: name of the image file checked
+#
+# @format: format of the image file checked
+#
+# @check-errors: number of unexpected errors occurred during check
+#
+# @highest-offset: #optional highest offset (in bytes) in use by the image
+#
+# @corruptions: #optional number of corruptions found during the check
+#
+# @leaks: #optional number of leaks found during the check
+#
+# @corruptions-fixed: #optional number of corruptions fixed during the check
+#
+# @leaks-fixed: #optional number of leaks fixed during the check
+#
+# @total-clusters: #optional total number of clusters
+#
+# @allocated-clusters: #optional total number of allocated clusters
+#
+# @fragmented-clusters: #optional total number of fragmented clusters
+#
+# Since: 1.4
+#
+##
+
+{ 'type': 'ImageCheck',
+  'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int',
+   '*highest-offset': 'int', '*corruptions': 'int', '*leaks': 'int',
+   '*corruptions-fixed': 'int', '*leaks-fixed': 'int',
+   '*total-clusters': 'int', '*allocated-clusters': 'int',
+   '*fragmented-clusters': 'int' } }
+
+##
 # @StatusInfo:
 #
 # Information about VCPU run state
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index a181363..259fc14 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -10,9 +10,9 @@ STEXI
 ETEXI
 
 DEF(check, img_check,
-check [-f fmt] [-r [leaks | all]] filename)
+check [-f fmt] [--output=ofmt] [-r [leaks | all]] filename)
 STEXI
-@item check [-f @var{fmt}] [-r [leaks | all]] @var{filename}
+@item check [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] 
@var{filename}
 ETEXI
 
 DEF(create, img_create,
diff --git a/qemu-img.c b/qemu-img.c
index 45c1ec1..18ba5c2 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -42,6 +42,16 @@ typedef struct img_cmd_t {
 int (*handler)(int argc, char **argv);
 } img_cmd_t;
 
+enum {
+OPTION_OUTPUT = 256,
+OPTION_BACKING_CHAIN = 257,
+};
+
+typedef enum OutputFormat {
+OFORMAT_JSON,
+OFORMAT_HUMAN,
+} OutputFormat;
+
 /* Default to cache=writeback as data integrity is not important for qemu-tcg. 
*/
 #define BDRV_O_FLAGS BDRV_O_CACHE_WB
 #define BDRV_DEFAULT_CACHE writeback
@@ -370,6 +380,122 @@ out:
 return 0;
 }
 
+static void dump_json_image_check(ImageCheck *check)
+{
+Error *errp = NULL;
+QString *str;
+QmpOutputVisitor *ov = qmp_output_visitor_new();
+QObject *obj;
+visit_type_ImageCheck(qmp_output_get_visitor(ov),
+  check, NULL, errp);
+obj = qmp_output_get_qobject(ov);
+str = qobject_to_json_pretty(obj);
+assert(str != NULL);
+printf(%s\n, qstring_get_str(str));
+qobject_decref(obj);
+qmp_output_visitor_cleanup(ov);
+QDECREF(str);
+}
+
+static void dump_human_image_check(ImageCheck *check)
+{
+if (check-corruptions_fixed || check-leaks_fixed) {
+ printf(The following inconsistencies were found and repaired:\n\n
+% PRId64  leaked clusters\n
+% PRId64  corruptions\n\n
+Double checking the fixed image now...\n,
+check-leaks_fixed,
+check-corruptions_fixed);
+}
+
+if (!(check-corruptions || check-leaks || check-check_errors)) {
+printf(No errors were found on the image.\n);
+} else {
+if (check-corruptions) {
+printf(\n% PRId64  errors were found on the image.\n
+Data may be corrupted, or further writes to the image 
+may corrupt it.\n,
+check-corruptions);
+}
+
+if (check-leaks) {
+printf(\n% PRId64  leaked clusters were found on the image.\n
+This means waste of disk space, but no harm to data.\n,
+check-leaks);
+}
+
+if (check-check_errors) {
+printf(\n% PRId64  internal errors have occurred during the 
check.\n,
+check-check_errors);
+}
+}
+
+if (check-total_clusters != 0  check-allocated_clusters != 0) {
+printf(% PRId64 /% PRId64 = %0.2f%% allocated, %0.2f%% 
fragmented\n,
+check-allocated_clusters, check-total_clusters,
+check-allocated_clusters * 100.0 / check-total_clusters,
+check-fragmented_clusters * 100.0 / check

Re: [Qemu-devel] [PATCH 4/7] block: close unused image files at the end of streaming

2012-04-06 Thread Federico Simoncelli
- Original Message -
 From: Paolo Bonzini pbonz...@redhat.com
 To: qemu-devel@nongnu.org
 Cc: Marcelo Tosatti mtosa...@redhat.com, Federico Simoncelli 
 fsimo...@redhat.com
 Sent: Thursday, April 5, 2012 5:42:58 PM
 Subject: [PATCH 4/7] block: close unused image files at the end of streaming
 
 From: Marcelo Tosatti mtosa...@redhat.com
 
 Close the now unused images that were part of the previous backing
 file
 chain and adjust -backing_hd properly.
 
 Note that this only works with relative paths.

s/relative/absolute/

 Given the images:
 
  /tmp/a/base.raw
  /tmp/a/snap1.qcow2
  /tmp/b/snap2.qcow2
 
 chained as:
 
  base(bak:) - snap1(bak:base.raw) -
  snap2(bak:../a/snap1.qcow2)
 
 merging snap1 and snap2 we will obtain:
 
  base(bak:) - snap2(bak:base.raw)
 
 However this should be maintained by the user/admin: one can also
 screw up relative paths using qemu-img manually.

The patch is fine but I disagree with this comment. The user/admin didn't make
any mistake and he shouldn't be in charge of additional maintenance (which is
also tricky since the VM is running).

-- 
Federico



[Qemu-devel] [PATCH v3] qapi: add c_fun to escape function names

2012-03-20 Thread Federico Simoncelli
Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 scripts/qapi-commands.py |   14 +++---
 scripts/qapi-types.py|4 ++--
 scripts/qapi-visit.py|4 ++--
 scripts/qapi.py  |5 -
 4 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/scripts/qapi-commands.py b/scripts/qapi-commands.py
index 3aabf61..30a24d2 100644
--- a/scripts/qapi-commands.py
+++ b/scripts/qapi-commands.py
@@ -42,7 +42,7 @@ def generate_command_decl(name, args, ret_type):
 return mcgen('''
 %(ret_type)s qmp_%(name)s(%(args)sError **errp);
 ''',
- ret_type=c_type(ret_type), name=c_var(name), 
args=arglist).strip()
+ ret_type=c_type(ret_type), name=c_fun(name), 
args=arglist).strip()
 
 def gen_sync_call(name, args, ret_type, indent=0):
 ret = 
@@ -59,7 +59,7 @@ def gen_sync_call(name, args, ret_type, indent=0):
 %(retval)sqmp_%(name)s(%(args)serrp);
 
 ''',
-name=c_var(name), args=arglist, retval=retval).rstrip()
+name=c_fun(name), args=arglist, retval=retval).rstrip()
 if ret_type:
 ret += \n + mcgen(
 if (!error_is_set(errp)) {
@@ -74,7 +74,7 @@ if (!error_is_set(errp)) {
 def gen_marshal_output_call(name, ret_type):
 if not ret_type:
 return 
-return qmp_marshal_output_%s(retval, ret, errp); % c_var(name)
+return qmp_marshal_output_%s(retval, ret, errp); % c_fun(name)
 
 def gen_visitor_output_containers_decl(ret_type):
 ret = 
@@ -198,16 +198,16 @@ static void qmp_marshal_output_%(c_name)s(%(c_ret_type)s 
ret_in, QObject **ret_o
 qapi_dealloc_visitor_cleanup(md);
 }
 ''',
-c_ret_type=c_type(ret_type), c_name=c_var(name),
+c_ret_type=c_type(ret_type), c_name=c_fun(name),
 visitor=type_visitor(ret_type))
 
 return ret
 
 def gen_marshal_input_decl(name, args, ret_type, middle_mode):
 if middle_mode:
-return 'int qmp_marshal_input_%s(Monitor *mon, const QDict *qdict, 
QObject **ret)' % c_var(name)
+return 'int qmp_marshal_input_%s(Monitor *mon, const QDict *qdict, 
QObject **ret)' % c_fun(name)
 else:
-return 'static void qmp_marshal_input_%s(QDict *args, QObject **ret, 
Error **errp)' % c_var(name)
+return 'static void qmp_marshal_input_%s(QDict *args, QObject **ret, 
Error **errp)' % c_fun(name)
 
 
 
@@ -298,7 +298,7 @@ def gen_registry(commands):
 registry += mcgen('''
 qmp_register_command(%(name)s, qmp_marshal_input_%(c_name)s);
 ''',
- name=cmd['command'], c_name=c_var(cmd['command']))
+ name=cmd['command'], c_name=c_fun(cmd['command']))
 pop_indent()
 ret = mcgen('''
 static void qmp_init_marshal(void)
diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py
index 727fb77..4a734f5 100644
--- a/scripts/qapi-types.py
+++ b/scripts/qapi-types.py
@@ -100,7 +100,7 @@ typedef enum %(name)s
 %(abbrev)s_%(value)s = %(i)d,
 ''',
  abbrev=de_camel_case(name).upper(),
- value=c_var(value).upper(),
+ value=c_fun(value).upper(),
  i=i)
 i += 1
 
@@ -126,7 +126,7 @@ struct %(name)s
 %(c_type)s %(c_name)s;
 ''',
  c_type=c_type(typeinfo[key]),
- c_name=c_var(key))
+ c_name=c_fun(key))
 
 ret += mcgen('''
 };
diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py
index 54117d4..78c947c 100644
--- a/scripts/qapi-visit.py
+++ b/scripts/qapi-visit.py
@@ -129,9 +129,9 @@ void visit_type_%(name)s(Visitor *m, %(name)s ** obj, const 
char *name, Error **
 break;
 ''',
 abbrev = de_camel_case(name).upper(),
-enum = de_camel_case(key).upper(),
+enum = c_fun(de_camel_case(key)).upper(),
 c_type=members[key],
-c_name=c_var(key))
+c_name=c_fun(key))
 
 ret += mcgen('''
 default:
diff --git a/scripts/qapi.py b/scripts/qapi.py
index 6e05469..e062336 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -131,7 +131,10 @@ def camel_case(name):
 return new_name
 
 def c_var(name):
-return '_'.join(name.split('-')).lstrip(*)
+return name.replace('-', '_').lstrip(*)
+
+def c_fun(name):
+return c_var(name).replace('.', '_')
 
 def c_list_type(name):
 return '%sList' % name
-- 
1.7.1




[Qemu-devel] [PATCH] qapi: escaping the dots in c_var

2012-03-15 Thread Federico Simoncelli
This allows qapi commands and types with dots.

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 scripts/qapi.py |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/scripts/qapi.py b/scripts/qapi.py
index 6e05469..4090c55 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -131,7 +131,7 @@ def camel_case(name):
 return new_name
 
 def c_var(name):
-return '_'.join(name.split('-')).lstrip(*)
+return name.replace('-', '_').replace('.', '_').lstrip('*')
 
 def c_list_type(name):
 return '%sList' % name
-- 
1.7.1




Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command

2012-03-14 Thread Federico Simoncelli
- Original Message -
 From: Kevin Wolf kw...@redhat.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: Eric Blake ebl...@redhat.com, qemu-devel@nongnu.org, 
 stefa...@linux.vnet.ibm.com, lcapitul...@redhat.com,
 Paolo Bonzini pbonz...@redhat.com, Markus Armbruster arm...@redhat.com
 Sent: Wednesday, March 14, 2012 10:34:08 AM
 Subject: Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command
 
  * We could leave it as it is, a distinct command that is not part
  of
the transaction and that it's closing the old image before
opening
the new one.
 
 Yes, this would be the short-term preliminary solution. I would tend
 to
 leave it to downstreams to implement it as an extension, though.
 
  This is not completely correct, the main intent was to not spread
  one image chain across two storage domains (making it incomplete if one
  of them was missing). In the next oVirt release a VM can have
  different disks on different storage domains, so this wouldn't be a special
  case but just a normal situation.
 
 The problem with this kind of argument is that we're not developing
 only for oVirt, but need to look for what makes sense for any management
 tool (or even just direct users of qemu).

That is a general rule, and it's perfectly agreeable, but it has nothing to
do with what I was saying. Do you know any management tool or any reason to
forbid to a VM to have different disks on different storages?
In fact qemu-kvm doesn't even know where the images are coming from (what
particular iscsi connection or what particular NFS server).

What I was asking was a tool (mirroring) to avoid the split of an image
chain between multiple storages, and not a tool to avoid the ability to
have full image chains on different storages.

There is nothing oVirt specific here, it came into play only when I said that
for us is not a problem (for once :-).

-- 
Federico



Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command

2012-03-13 Thread Federico Simoncelli
- Original Message -
 From: Eric Blake ebl...@redhat.com
 To: Paolo Bonzini pbonz...@redhat.com
 Cc: kw...@redhat.com, fsimo...@redhat.com, qemu-devel@nongnu.org, 
 stefa...@linux.vnet.ibm.com, lcapitul...@redhat.com
 Sent: Tuesday, March 13, 2012 9:48:10 PM
 Subject: Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command
 
 On 03/06/2012 10:56 AM, Paolo Bonzini wrote:
  From: Federico Simoncelli fsimo...@redhat.com
  
  Signed-off-by: Federico Simoncelli fsimo...@redhat.com
  Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 
   ##
  +# @drive-reopen
  +#
  +# Assigns a new image file to a device.
  +#
  +# @device: the name of the device for which we are changing the
  image file.
  +#
  +# @new-image-file: the target of the new image. If the file
  doesn't exists the
  +#  command will fail.
  +#
  +# @format: #optional the format of the new image, default is
  'qcow2'.
  +#
  +# Returns: nothing on success
  +#  If @device is not a valid block device, DeviceNotFound
  +#  If @new-image-file can't be opened, OpenFileFailed
  +#  If @format is invalid, InvalidBlockFormat
  +#
  +# Since 1.1
  +##
  +{ 'command': 'drive-reopen',
  +  'data': { 'device': 'str', 'new-image-file': 'str', '*format':
  'str' } }
 
 I still think we need a 'drive-reopen' action included in
 'transaction',
 as an 11/10 on this series.  For disk migration, it is true that you
 can
 migrate one disk at a time, and therefore only need to reopen one
 disk
 at a time, to get the guarantee that for a single disk image, the
 current state of that image will be guaranteed to be consistent using
 only one storage domain.

I'm not sure if this was already addressed on this mailing list but
the main problem is that as general rule a qcow file cannot be opened
in r/w mode twice. I believe there was only one exception to that
with the live migration and it generated several issues.

That said, reopen is really hard to be implemented as a transaction
without breaking that rule. For example in the blkmirror case you'd
need to open the destination image in r/w while the mirroring is in
action (already having the same image in r/w mode).

There are several solutions here but they are either really hard to
implement or non definitive. For example:

* We could try to implement the reopen command for each special case,
  eg: blkmirror, reopening the same image, etc... and in such cases
  reusing the same bs that we already have. The downside is that this
  command will be coupled with all this special cases.

* We could use the transaction APIs without actually making it
  transaction (if we fail in the middle we can't rollback). The only
  advantage of this is that we'd provide a consistent API to libvirt
  and we would postpone the problem to the future. Anyway I strongly
  discourage this as it's completely unsafe and it's going to break
  the transaction semantic. Moreover it's a solution that relies too
  much on the hope of finding something appropriate in the future.

* We could leave it as it is, a distinct command that is not part of
  the transaction and that it's closing the old image before opening
  the new one.

 But since the API allows the creation of two mirrors in one command,
 I'm
 worried that someone will try to start a mirror on two disks at once,
 but then be stuck doing two separate 'drive-reopen' commands.  If the
 first succeeds but the second fails, you have now stranded the qemu
 process across two storage domains, which is exactly what we were
 trying
 to avoid in the first place by inventing transactions.  That is, even
 if
 all disks are individually consistent in a single domain, the act of
 migrating then reopening one disk at a time means you will have a
 window
 where disk 1 and disk 2 are opened on different storage domains.

This is not completely correct, the main intent was to not spread one
image chain across two storage domains (making it incomplete if one of
them was missing). In the next oVirt release a VM can have different
disks on different storage domains, so this wouldn't be a special case
but just a normal situation.

-- 
Federico



[Qemu-devel] [PATCH] Add the drive-reopen command

2012-03-05 Thread Federico Simoncelli
Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 blockdev.c   |   40 +++-
 hmp-commands.hx  |   16 
 hmp.c|   11 +++
 hmp.h|1 +
 qapi-schema.json |   22 ++
 5 files changed, 77 insertions(+), 13 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 058b6d5..56da5c9 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -646,9 +646,8 @@ void do_commit(Monitor *mon, const QDict *qdict)
 }
 }
 
-void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
-bool has_format, const char *format,
-Error **errp)
+static void change_blockdev_image(const char *device, const char 
*new_image_file,
+  const char *format, bool create, Error 
**errp)
 {
 BlockDriverState *bs;
 BlockDriver *drv, *old_drv, *proto_drv;
@@ -671,7 +670,7 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 old_drv = bs-drv;
 flags = bs-open_flags;
 
-if (!has_format) {
+if (!format) {
 format = qcow2;
 }
 
@@ -681,24 +680,26 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 return;
 }
 
-proto_drv = bdrv_find_protocol(snapshot_file);
+proto_drv = bdrv_find_protocol(new_image_file);
 if (!proto_drv) {
 error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
 return;
 }
 
-ret = bdrv_img_create(snapshot_file, format, bs-filename,
-  bs-drv-format_name, NULL, -1, flags);
-if (ret) {
-error_set(errp, QERR_UNDEFINED_ERROR);
-return;
+if (create) {
+ret = bdrv_img_create(new_image_file, format, bs-filename,
+  bs-drv-format_name, NULL, -1, flags);
+if (ret) {
+error_set(errp, QERR_UNDEFINED_ERROR);
+return;
+}
 }
 
 bdrv_drain_all();
 bdrv_flush(bs);
 
 bdrv_close(bs);
-ret = bdrv_open(bs, snapshot_file, flags, drv);
+ret = bdrv_open(bs, new_image_file, flags, drv);
 /*
  * If reopening the image file we just created fails, fall back
  * and try to re-open the original image. If that fails too, we
@@ -709,11 +710,25 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 if (ret != 0) {
 error_set(errp, QERR_OPEN_FILE_FAILED, old_filename);
 } else {
-error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file);
+error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
 }
 }
 }
 
+void qmp_drive_reopen(const char *device, const char *new_image_file,
+  bool has_format, const char *format, Error **errp)
+{
+change_blockdev_image(device, new_image_file,
+  has_format ? format : NULL, false, errp);
+}
+
+void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
+bool has_format, const char *format,
+Error **errp)
+{
+change_blockdev_image(device, snapshot_file,
+  has_format ? format : NULL, true, errp);
+}
 
 /* New and old BlockDriverState structs for group snapshots */
 typedef struct BlkTransactionStates {
@@ -877,7 +892,6 @@ exit:
 return;
 }
 
-
 static void eject_device(BlockDriverState *bs, int force, Error **errp)
 {
 if (bdrv_in_use(bs)) {
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 64b3656..9e08123 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -900,6 +900,22 @@ Snapshot device, using snapshot file as target if provided
 ETEXI
 
 {
+.name   = drive-reopen,
+.args_type  = device:B,new-image-file:s,format:s?,
+.params = device new-image-file [format],
+.help   = Assigns a new image file to a device.\n\t\t\t
+  The image will be opened using the format\n\t\t\t
+  specified or 'qcow2' by default.\n\t\t\t,
+.mhandler.cmd = hmp_drive_reopen,
+},
+
+STEXI
+@item drive-reopen
+@findex drive-reopen
+Assigns a new image file to a device.
+ETEXI
+
+{
 .name   = drive_add,
 .args_type  = pci_addr:s,opts:s,
 .params = [[domain:]bus:]slot\n
diff --git a/hmp.c b/hmp.c
index 3a54455..ab069ad 100644
--- a/hmp.c
+++ b/hmp.c
@@ -706,6 +706,17 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
 hmp_handle_error(mon, errp);
 }
 
+void hmp_drive_reopen(Monitor *mon, const QDict *qdict)
+{
+const char *device = qdict_get_str(qdict, device);
+const char *filename = qdict_get_str(qdict, new-image-file);
+const char *format = qdict_get_try_str(qdict, format);
+Error *errp = NULL;
+
+qmp_drive_reopen(device, filename, !!format, format, errp);
+hmp_handle_error(mon, errp);
+}
+
 void hmp_migrate_cancel

[Qemu-devel] [PATCH] add reopen to blockdev-transaction

2012-03-01 Thread Federico Simoncelli
Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 blockdev.c   |8 
 qapi-schema.json |   12 
 qmp-commands.hx  |6 +-
 3 files changed, 25 insertions(+), 1 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 56da5c9..36fe07c 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -798,6 +798,14 @@ void qmp_blockdev_transaction(BlockdevActionList *dev_list,
  dev_info-mirror-target);
 break;
 
+case BLOCKDEV_ACTION_KIND_REOPEN:
+device = dev_info-reopen-device;
+if (dev_info-format-has_format) {
+format = dev_info-reopen-format;
+}
+new_source = g_strdup(dev_info-reopen-target);
+break;
+
 default:
 abort();
 }
diff --git a/qapi-schema.json b/qapi-schema.json
index b33875d..17f7548 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1145,6 +1145,17 @@
 { 'type': 'BlockdevMirror',
   'data': { 'device': 'str', 'target': 'str', '*format': 'str',
 '*reuse': 'bool' } }
+##
+# @BlockdevReopen
+#
+# @device: the name of the device to reopen.
+#
+# @target: the target of the new image.
+#
+# @format: #optional the format of the new image, default is 'qcow2'.
+##
+{ 'type': 'BlockdevReopen',
+  'data': { 'device': 'str', 'target': 'str', '*format': 'str' } }
 
 ##
 # @BlockdevAction
@@ -1156,6 +1167,7 @@
   'data': {
'snapshot': 'BlockdevSnapshot',
'mirror': 'BlockdevMirror',
+   'reopen': 'BlockdevReopen',
} }
 
 ##
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 50ac5a0..317c448 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -720,7 +720,7 @@ Arguments:
 
 actions array:
 - type: the operation to perform.  The only supported
-  values are snapshot and mirror. (json-string)
+  values are snapshot, mirror and reopen. (json-string)
 - data: a dictionary.  The contents depend on the value
   of type.  When type is snapshot:
   - device: device name to snapshot (json-string)
@@ -734,6 +734,10 @@ actions array:
   - format: format of new image (json-string, optional)
   - reuse: whether QEMU should look for an existing image file
 (json-bool, optional, default false)
+  When type is reopen:
+  - device: device name to reopen (json-string)
+  - target: name of destination image file (json-string)
+  - format: format of new image (json-string, optional)
 
 Example:
 
-- 
1.7.1




[Qemu-devel] [PATCHv3] Add blkmirror block driver

2012-02-29 Thread Federico Simoncelli
Mirrored writes are used by live block copy.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 Makefile.objs  |2 +-
 block/blkmirror.c  |  255 
 cutils.c   |   30 ++
 docs/blkmirror.txt |   15 +++
 qemu-common.h  |1 +
 5 files changed, 302 insertions(+), 1 deletions(-)
 create mode 100644 block/blkmirror.c
 create mode 100644 docs/blkmirror.txt

diff --git a/Makefile.objs b/Makefile.objs
index 808de6a..2302c96 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o 
dmg.o bochs.o vpc.o vv
 block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
 block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-nested-y += qed-check.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o 
blkmirror.o
 block-nested-y += stream.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
 block-nested-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/blkmirror.c b/block/blkmirror.c
new file mode 100644
index 000..c98b162
--- /dev/null
+++ b/block/blkmirror.c
@@ -0,0 +1,255 @@
+/*
+ * Block driver for mirrored writes.
+ *
+ * Copyright (C) 2011-2012 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include stdarg.h
+#include block_int.h
+
+typedef struct {
+BlockDriverState *bs[2];
+} BdrvMirrorState;
+
+typedef struct DupAIOCB DupAIOCB;
+
+typedef struct SingleAIOCB {
+BlockDriverAIOCB *aiocb;
+int finished;
+DupAIOCB *parent;
+} SingleAIOCB;
+
+struct DupAIOCB {
+BlockDriverAIOCB common;
+int count;
+
+BlockDriverCompletionFunc *cb;
+SingleAIOCB aios[2];
+int ret;
+};
+
+/* Valid blkmirror filenames look like
+ * blkmirror:fmt1:path/to/image1:fmt2:path/to/image2 */
+static int blkmirror_open(BlockDriverState *bs, const char *filename, int 
flags)
+{
+int ret = 0, i;
+char *tmpbuf, *tok[4], *next;
+BlockDriver *drv1, *drv2;
+BdrvMirrorState *m = bs-opaque;
+
+m-bs[0] = bdrv_new();
+if (m-bs[0] == NULL) {
+return -ENOMEM;
+}
+
+m-bs[1] = bdrv_new();
+if (m-bs[1] == NULL) {
+bdrv_delete(m-bs[0]);
+return -ENOMEM;
+}
+
+tmpbuf = g_malloc(strlen(filename) + 1);
+pstrcpy(tmpbuf, strlen(filename) + 1, filename);
+
+/* Parse the blkmirror: prefix */
+if (!strstart(tmpbuf, blkmirror:, (const char **) next)) {
+next = tmpbuf;
+}
+
+for (i = 0; i  4; i++) {
+if (!next) {
+ret = -EINVAL;
+goto out;
+}
+tok[i] = pstrtok_r(NULL, :, '\\', next);
+}
+
+drv1 = bdrv_find_whitelisted_format(tok[0]);
+drv2 = bdrv_find_whitelisted_format(tok[2]);
+
+if (!drv1 || !drv2) {
+ret = -EINVAL;
+goto out;
+}
+
+ret = bdrv_open(m-bs[0], tok[1], flags, drv1);
+if (ret  0) {
+goto out;
+}
+
+ret = bdrv_open(m-bs[0], tok[3], flags, drv2);
+if (ret  0) {
+goto out;
+}
+
+ out:
+g_free(tmpbuf);
+
+if (ret  0) {
+for (i = 0; i  2; i++) {
+bdrv_delete(m-bs[i]);
+m-bs[i] = NULL;
+}
+}
+
+return ret;
+}
+
+static void blkmirror_close(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs-opaque;
+int i;
+
+for (i = 0; i  2; i++) {
+bdrv_delete(m-bs[i]);
+m-bs[i] = NULL;
+}
+}
+
+static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs-opaque;
+int ret;
+
+ret = bdrv_co_flush(m-bs[0]);
+if (ret  0) {
+return ret;
+}
+
+return bdrv_co_flush(m-bs[1]);
+}
+
+static int64_t blkmirror_getlength(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs-opaque;
+
+return bdrv_getlength(m-bs[0]);
+}
+
+static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+BdrvMirrorState *m = bs-opaque;
+return bdrv_aio_readv(m-bs[0], sector_num, qiov, nb_sectors, cb, opaque);
+}
+
+static void dup_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+DupAIOCB *acb = container_of(blockacb, DupAIOCB, common);
+int i;
+
+for (i = 0 ; i  2; i++) {
+if (!acb-aios[i].finished) {
+bdrv_aio_cancel(acb-aios[i].aiocb);
+}
+}
+qemu_aio_release(acb);
+}
+
+static AIOPool dup_aio_pool = {
+.aiocb_size = sizeof(DupAIOCB),
+.cancel

Re: [Qemu-devel] [PATCHv3] Add blkmirror block driver

2012-02-29 Thread Federico Simoncelli
- Original Message -
 From: Federico Simoncelli fsimo...@redhat.com
 To: qemu-devel@nongnu.org
 Cc: mtosa...@redhat.com, kw...@redhat.com, pbonz...@redhat.com, 
 stefa...@gmail.com, Federico Simoncelli
 fsimo...@redhat.com
 Sent: Wednesday, February 29, 2012 1:28:21 PM
 Subject: [PATCHv3] Add blkmirror block driver
 
 Mirrored writes are used by live block copy.
 
 Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
 Signed-off-by: Federico Simoncelli fsimo...@redhat.com
 ---
  Makefile.objs  |2 +-
  block/blkmirror.c  |  255
  
  cutils.c   |   30 ++
  docs/blkmirror.txt |   15 +++
  qemu-common.h  |1 +
  5 files changed, 302 insertions(+), 1 deletions(-)
  create mode 100644 block/blkmirror.c
  create mode 100644 docs/blkmirror.txt
 
 diff --git a/block/blkmirror.c b/block/blkmirror.c
 new file mode 100644
 index 000..c98b162
 --- /dev/null
 +++ b/block/blkmirror.c
[...]
 +if (!drv1 || !drv2) {
 +ret = -EINVAL;
 +goto out;
 +}
 +
 +ret = bdrv_open(m-bs[0], tok[1], flags, drv1);
 +if (ret  0) {
 +goto out;
 +}
 +
 +ret = bdrv_open(m-bs[0], tok[3], flags, drv2);
 +if (ret  0) {
 +goto out;
 +}

There's a small mistake here, the second one is m-bs[1].

-- 
Federico



[Qemu-devel] [PATCHv4] Add blkmirror block driver

2012-02-29 Thread Federico Simoncelli
Mirrored writes are used by live block copy.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Federico Simoncelli fsimo...@redhat.com
Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 Makefile.objs  |2 +-
 block/blkmirror.c  |  278 
 cutils.c   |   56 +++
 docs/blkmirror.txt |   15 +++
 qemu-common.h  |2 +
 5 files changed, 352 insertions(+), 1 deletions(-)
 create mode 100644 block/blkmirror.c
 create mode 100644 docs/blkmirror.txt

diff --git a/Makefile.objs b/Makefile.objs
index 808de6a..2302c96 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o 
dmg.o bochs.o vpc.o vv
 block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
 block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-nested-y += qed-check.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o 
blkmirror.o
 block-nested-y += stream.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
 block-nested-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/blkmirror.c b/block/blkmirror.c
new file mode 100644
index 000..d894ca8
--- /dev/null
+++ b/block/blkmirror.c
@@ -0,0 +1,278 @@
+/*
+ * Block driver for mirrored writes.
+ *
+ * Copyright (C) 2011-2012 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include stdarg.h
+#include block_int.h
+
+typedef struct {
+BlockDriverState *bs[2];
+} BdrvMirrorState;
+
+typedef struct DupAIOCB DupAIOCB;
+
+typedef struct SingleAIOCB {
+BlockDriverAIOCB *aiocb;
+int finished;
+DupAIOCB *parent;
+} SingleAIOCB;
+
+struct DupAIOCB {
+BlockDriverAIOCB common;
+int count;
+
+BlockDriverCompletionFunc *cb;
+SingleAIOCB aios[2];
+int ret;
+};
+
+/* Valid blkmirror filenames look like
+ * blkmirror:fmt1:path/to/image1:fmt2:path/to/image2 */
+static int blkmirror_open(BlockDriverState *bs, const char *filename, int 
flags)
+{
+int ret = 0, i;
+char *tmpbuf, *tok[4], *next;
+BlockDriver *drv1, *drv2;
+BdrvMirrorState *m = bs-opaque;
+BlockDriverState *bk;
+
+m-bs[0] = bdrv_new();
+if (m-bs[0] == NULL) {
+return -ENOMEM;
+}
+
+m-bs[1] = bdrv_new();
+if (m-bs[1] == NULL) {
+bdrv_delete(m-bs[0]);
+return -ENOMEM;
+}
+
+tmpbuf = g_malloc(strlen(filename) + 1);
+pstrcpy(tmpbuf, strlen(filename) + 1, filename);
+
+/* Parse the blkmirror: prefix */
+if (!strstart(tmpbuf, blkmirror:, (const char **) next)) {
+next = tmpbuf;
+}
+
+for (i = 0; i  4; i++) {
+if (!next) {
+ret = -EINVAL;
+goto out;
+}
+tok[i] = estrtok_r(NULL, :, '\\', next);
+}
+
+drv1 = bdrv_find_whitelisted_format(tok[0]);
+drv2 = bdrv_find_whitelisted_format(tok[2]);
+if (!drv1 || !drv2) {
+ret = -EINVAL;
+goto out;
+}
+
+ret = bdrv_open(m-bs[0], tok[1], flags, drv1);
+if (ret  0) {
+goto out;
+}
+
+/* If we crash, we cannot assume that the destination is a
+ * valid mirror and we have to start over.  So speed up things
+ * by effectively operating on the destination in cache=unsafe
+ * mode.
+ */
+ret = bdrv_open(m-bs[1], tok[3], flags | BDRV_O_NO_BACKING
+| BDRV_O_NO_FLUSH | BDRV_O_CACHE_WB, drv2);
+if (ret  0) {
+goto out;
+}
+
+if (m-bs[0]-backing_hd) {
+bk = m-bs[0]-backing_hd;
+
+m-bs[1]-backing_hd = bdrv_new();
+if (!m-bs[1]-backing_hd) {
+ret = -ENOMEM;
+goto out;
+}
+
+/* opening the same backing file of the source */
+ret = bdrv_open(m-bs[1]-backing_hd,
+bk-filename, bk-open_flags, bk-drv);
+if (ret  0) {
+goto out;
+}
+}
+
+ out:
+g_free(tmpbuf);
+
+if (ret  0) {
+for (i = 0; i  2; i++) {
+bdrv_delete(m-bs[i]);
+m-bs[i] = NULL;
+}
+}
+
+return ret;
+}
+
+static void blkmirror_close(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs-opaque;
+int i;
+
+for (i = 0; i  2; i++) {
+bdrv_delete(m-bs[i]);
+m-bs[i] = NULL;
+}
+}
+
+static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs-opaque;
+int ret;
+
+ret = bdrv_co_flush(m-bs[0]);
+if (ret  0) {
+return ret;
+}
+
+return bdrv_co_flush(m-bs[1]);
+}
+
+static int64_t blkmirror_getlength(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs-opaque;
+
+return bdrv_getlength(m-bs[0]);
+}
+
+static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs

Re: [Qemu-devel] Live Block Migration using Mirroring

2012-02-28 Thread Federico Simoncelli
- Original Message -
 From: Stefan Hajnoczi stefa...@gmail.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
 Sent: Tuesday, February 28, 2012 4:47:48 PM
 Subject: Re: [Qemu-devel] Live Block Migration using Mirroring

 On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli
 fsimo...@redhat.com wrote:
  Step 3 - Mirrored Live Snapshot
  ===
  A mirrored live snapshot is issued using src/hd0snap1 and
  dst/hd0snap1 as
  image files. (Where - stands for has backing file)
 
  [src/hd0base] - [src/hd0snap1] = VM1(read-write)
      ...      - [dst/hd0snap1] = VM1(write-only)
 
  $ qemu-img create -f qcow2 \
            -b /tmp/src/hd0base.qcow2 /tmp/src/hd0snap1.qcow2 20G
  Formatting '/tmp/src/hd0snap1.qcow2', fmt=qcow2 size=21474836480
  backing_file='/tmp/src/hd0base.qcow2' encryption=off
  cluster_size=65536
 
  $ qemu-img create -f qcow2 \
            -b /tmp/dst/hd0base.qcow2 /tmp/dst/hd0snap1.qcow2 20G
  Formatting '/tmp/dst/hd0snap1.qcow2', fmt=qcow2 size=21474836480
  backing_file='/tmp/src/hd0base.qcow2' encryption=off
  cluster_size=65536
 
  (qemu) snapshot_blkdev -n ide0-hd0 \
          blkmirror:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2
          blkmirror
 
  Step 4 - Backing File Copy
  ==
  An external manager copies src/hd0base to the destination
  dst/hd0base.
 
  [src/hd0base] - [src/hd0snap1] = VM1(read-write)
  [dst/hd0base] - [dst/hd0snap1] = VM1(write-only)

 At this stage we have dst/hd0snap1 opened with BDRV_O_NO_BACKING.  If
 it has no backing file and the guest issues a write request that is
 smaller than a cluster in the image file, the untouched areas of that
 cluster will be populated with zeroes.

 Once dst/hd0snap1 is reopened with dst/hd0base in place there will be
 zeros in clusters where the guest wrote only a few sectors.  We will
 not see the backing file data in those clusters.

 Have you hit this problem or did I miss something?

Thank you for getting this. Being able to have a bogus backing file was
a bonus but it's not really required for the mirrored live block migration.
We can add the support for switching the backing file in the drive-reopen
part.
I'll remove the BDRV_O_NO_BACKING flag from the blkmirror patch.

--
Federico



Re: [Qemu-devel] Live Block Migration using Mirroring

2012-02-28 Thread Federico Simoncelli
- Original Message -
 From: Paolo Bonzini pbonz...@redhat.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: Stefan Hajnoczi stefa...@gmail.com, qemu-devel@nongnu.org, 
 kw...@redhat.com, mtosa...@redhat.com
 Sent: Tuesday, February 28, 2012 6:36:57 PM
 Subject: Re: [Qemu-devel] Live Block Migration using Mirroring
 
 Il 28/02/2012 18:15, Federico Simoncelli ha scritto:
  Thank you for getting this. Being able to have a bogus backing file
  was
  a bonus but it's not really required for the mirrored live block
  migration.
  We can add the support for switching the backing file in the
  drive-reopen
  part.
 
 Wait, it's not really required for oVirt because it creates the
 snapshot
 outside QEMU.  What about everyone else?

They'll have (as oVirt) two mirrored snapshot pointing at the same
base. The only difference is that the image is created internally, but
that's not hard.

-- 
Federico



Re: [Qemu-devel] Live Block Migration using Mirroring

2012-02-28 Thread Federico Simoncelli
- Original Message -
 From: Paolo Bonzini pbonz...@redhat.com
 To: qemu-devel@nongnu.org
 Sent: Tuesday, February 28, 2012 7:02:40 PM
 Subject: Re: [Qemu-devel] Live Block Migration using Mirroring
 
 Il 28/02/2012 18:46, Federico Simoncelli ha scritto:
Thank you for getting this. Being able to have a bogus backing
file
was a bonus but it's not really required for the mirrored live
block
migration. We can add the support for switching the backing
file in the
drive-reopen part.
   
   Wait, it's not really required for oVirt because it creates the
   snapshot outside QEMU.  What about everyone else?
  
  They'll have (as oVirt) two mirrored snapshot pointing at the same
  base. The only difference is that the image is created internally,
  but
  that's not hard.
 
 Can you detail how you are switching the backing file in the
 drive-reopen?  Either the BlockDriverState opened by blkmirror, or
 the
 one opened at the end, will have to use the wrong backing_file.
  How
 do you arrange for that?

Step 1 - Initital Scenario
==
VM1 is running on the src/hd0base.

[src/hd0base] = VM1(read-write)

Step 3 - Mirrored Live Snapshot
===
A mirrored live snapshot is issued using src/hd0snap1 and dst/hd0snap1 as image
files (both having src/hd0base as backing file).

[src/hd0base] - [src/hd0snap1] = VM1(read-write)
 ^-- [dst/hd0snap1] = VM1(read-write)

Step 4 - Backing File Copy
==
An external manager copies src/hd0base to the destination (dst/hd0base).

[src/hd0base] - [src/hd0snap1] = VM1(read-write)
[dst/hd0base]^-- [dst/hd0snap1] = VM1(read-write)


Step 5 - Final Switch to Destination

VM1 is now able to switch to the destination for both read and write operations
fixing the backing file path in dst/hd0snap1.

[src/hd0base] - [src/hd0snap1]
[dst/hd0base] - [dst/hd0snap1] = VM1(read-write)

-- 
Federico



Re: [Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver

2012-02-27 Thread Federico Simoncelli
- Original Message -
 From: Luiz Capitulino lcapitul...@redhat.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: qemu-devel@nongnu.org, mtosa...@redhat.com, pbonz...@redhat.com, 
 kw...@redhat.com, arm...@redhat.com
 Sent: Friday, February 24, 2012 7:17:22 PM
 Subject: Re: [PATCH 1/2 v2] Add blkmirror block driver
 
 On Fri, 24 Feb 2012 16:49:03 +
 Federico Simoncelli fsimo...@redhat.com wrote:
 
  +/* Parse the raw image filename */
  +filename2 = g_malloc(strlen(filename)+1);
  +escape = 0;
  +for (i = n = 0; i  strlen(filename); i++) {
  +if (!escape  filename[i] == ':') {
  +break;
  +}
  +if (!escape  filename[i] == '\\') {
  +escape = 1;
  +} else {
  +escape = 0;
  +}
  +
  +if (!escape) {
  +filename2[n++] = filename[i];
  +}
  +}
  +filename2[n] = '\0';
 
 You're escaping only the first image name string, is that
 intentional?

Yes, it was also documented here by Marcelo:

  diff --git a/docs/blkmirror.txt b/docs/blkmirror.txt
  new file mode 100644
  index 000..cf73f3f
  --- /dev/null
  +++ b/docs/blkmirror.txt
  @@ -0,0 +1,16 @@
  +Block mirror driver
  +---
  +
  +This driver will mirror writes to two distinct images.
  +It's used internally by live block copy.
  +
  +Format
  +--
  +
  +blkmirror:/image1.img:/image2.img
  +
  +'\' (backslash) can be used to escape colon processing
  +as a separator, in the first image filename.
  +Backslashes themselves also can be escaped as '\\'.
  +
  +

Anyway this format is encapsulated by blockdev-migrate.

-- 
Federico



Re: [Qemu-devel] [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands

2012-02-27 Thread Federico Simoncelli
- Original Message -
 From: Luiz Capitulino lcapitul...@redhat.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: qemu-devel@nongnu.org, mtosa...@redhat.com, pbonz...@redhat.com, 
 kw...@redhat.com, arm...@redhat.com
 Sent: Friday, February 24, 2012 8:01:43 PM
 Subject: Re: [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate 
 commands
 
 On Fri, 24 Feb 2012 16:49:04 +
 Federico Simoncelli fsimo...@redhat.com wrote:
 
  Signed-off-by: Federico Simoncelli fsimo...@redhat.com
 
 Btw, would be nice to have a 0/2 intro email describing the feature
 and changelog
 info.

Yes the 0/2 (actually 0/3) was sent at the beginning of the thread so you might
have missed it because you were added later on but I'm sure you can still find
it in the archives.

 
  +BlockDriver *drv;
  +int i, j, escape;
  +char new_filename[2048], *filename;
 
 I'd use PATH_MAX for new_filename's size.

Maybe PATH_MAX * 2 + 1? (To handle the case where all the characters should be
escaped).

  +escape = 0;
  +for (i = 0, j = 0; j  strlen(new_image_file); j++) {
  + loop:
  +if (!(i  sizeof(new_filename) - 2)) {
  +error_set(errp, QERR_INVALID_PARAMETER_VALUE,
  +  new-image-file, shorter filename);
  +return;
  +}
  +
  +if (new_image_file[j] == ':' || new_image_file[j] == '\\')
  {
  +if (!escape) {
  +new_filename[i++] = '\\', escape = 1;
  +goto loop;
  +} else {
  +escape = 0;
  +}
  +}
  +
  +new_filename[i++] = new_image_file[j];
  +}
 
 IMO, you could require the filename arguments to be escaped already.

Can you be more explicit, who should escape it?
The only thing that comes to my mind right now is requiring the input of
blockdev-migrate already escaped but that would expose an internal format.
(I'd not recommend it).

  +void qmp_blockdev_migrate(const char *device, bool incremental,
  +  const char *destination, bool
  has_new_image_file,
  +  const char *new_image_file, Error
  **errp)
  +{
  +BlockDriverState *bs;
  +
  +bs = bdrv_find(device);
  +if (!bs) {
  +error_set(errp, QERR_DEVICE_NOT_FOUND, device);
  +return;
  +}
  +if (bdrv_in_use(bs)) {
  +error_set(errp, QERR_DEVICE_IN_USE, device);
  +return;
  +}
  +
  +if (incremental) {
  +if (!has_new_image_file) {
  +error_set(errp, QERR_INVALID_PARAMETER_VALUE,
  +  incremental, a new image file);
  +} else {
  +qmp_blockdev_mirror(device, destination,
  new_image_file, errp);
  +}
  +} else {
  +error_set(errp, QERR_NOT_SUPPORTED);
  +}
 
 The command documentation says that 'incremental' and
 'new_image_file' are
 optionals, but the code makes them required. Why?

If I didn't make any mistake in the code I'm just enforcing that when
you specify incremental you also need a new image.
There are still other valid cases where they are optional.

-- 
Federico



Re: [Qemu-devel] drive transactions (was Re: [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands)

2012-02-27 Thread Federico Simoncelli
- Original Message -
 From: Paolo Bonzini pbonz...@redhat.com
 To: Luiz Capitulino lcapitul...@redhat.com
 Cc: Federico Simoncelli fsimo...@redhat.com, kw...@redhat.com, 
 mtosa...@redhat.com, qemu-devel@nongnu.org,
 arm...@redhat.com, Jeff Cody jc...@redhat.com
 Sent: Monday, February 27, 2012 3:39:33 PM
 Subject: drive transactions (was Re: [PATCH 2/2 v2] Add the blockdev-reopen 
 and blockdev-migrate commands)
 
   4) incremental=true, new_image_file passed:
  - no images will be created
  - writes will be mirrored to new_image_file and dest
 
 No need to provide this from within QEMU, because libvirt/oVirt can
 do
 the dance using elementary operations:
 
 blockdev-begin-transaction
 drive-reopen device new-image-file
 drive-mirror streaming=false device dest
 blockdev-commit-transaction
 
 No strange optional arguments, no proliferation of commands, etc.
  The
 only downside is that if someone tries to do (4) without transactions
 (or without stopping the VM) they'll get corruption because atomicity
 is
 required.

I'm all for the modularity of the commands (I suggested it since the beginning),
but all this infrastructure goes way beyond what I'd need for oVirt now.

When I submitted my patches we knew that my work wasn't the definitive solution
(eg: the future implementation of -blockdev). So I'd suggest to try and settle
with something that is good enough and that is not preventing a future 
improvement.

What about having a (temporary) flag in drive-mirror to accept also a 
new-image-file
until we will have the optimal solution?

-- 
Federico



Re: [Qemu-devel] drive transactions (was Re: [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands)

2012-02-27 Thread Federico Simoncelli
- Original Message -
 From: Anthony Liguori anth...@codemonkey.ws
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: Paolo Bonzini pbonz...@redhat.com, kw...@redhat.com, 
 arm...@redhat.com, Jeff Cody jc...@redhat.com,
 mtosa...@redhat.com, qemu-devel@nongnu.org, Luiz Capitulino 
 lcapitul...@redhat.com
 Sent: Monday, February 27, 2012 5:42:31 PM
 Subject: Re: [Qemu-devel] drive transactions (was Re: [PATCH 2/2 v2] Add the 
 blockdev-reopen and blockdev-migrate
 commands)
 
 On 02/27/2012 10:33 AM, Federico Simoncelli wrote:
  I'm all for the modularity of the commands (I suggested it since
  the beginning),
  but all this infrastructure goes way beyond what I'd need for oVirt
  now.
 
  When I submitted my patches we knew that my work wasn't the
  definitive solution
  (eg: the future implementation of -blockdev). So I'd suggest to try
  and settle
  with something that is good enough and that is not preventing a
  future improvement.
 
  What about having a (temporary) flag in drive-mirror to accept also
  a new-image-file
  until we will have the optimal solution?
 
 Unless there are extenuating circumstances (like the absence of core
 infrastructure in QEMU), then we should not add commands that we know
 are not
 the right command.

So are you in favor or against my suggestion? It looks like this is exactly
the case where the core infrastructure (transactions) is missing.

-- 
Federico



[Qemu-devel] [PATCH 1/2] Add blkmirror block driver

2012-02-24 Thread Federico Simoncelli
From: Marcelo Tosatti mtosa...@redhat.com

Mirrored writes are used by live block copy.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 Makefile.objs  |2 +-
 block/blkmirror.c  |  247 
 docs/blkmirror.txt |   16 
 3 files changed, 264 insertions(+), 1 deletions(-)
 create mode 100644 block/blkmirror.c
 create mode 100644 docs/blkmirror.txt

diff --git a/Makefile.objs b/Makefile.objs
index 67ee3df..6020308 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o 
dmg.o bochs.o vpc.o vv
 block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
 block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-nested-y += qed-check.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o 
blkmirror.o
 block-nested-y += stream.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
 block-nested-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/blkmirror.c b/block/blkmirror.c
new file mode 100644
index 000..39927c8
--- /dev/null
+++ b/block/blkmirror.c
@@ -0,0 +1,247 @@
+/*
+ * Block driver for mirrored writes.
+ *
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include stdarg.h
+#include block_int.h
+
+typedef struct {
+BlockDriverState *bs[2];
+} BdrvMirrorState;
+
+typedef struct DupAIOCB DupAIOCB;
+
+typedef struct SingleAIOCB {
+BlockDriverAIOCB *aiocb;
+int finished;
+DupAIOCB *parent;
+} SingleAIOCB;
+
+struct DupAIOCB {
+BlockDriverAIOCB common;
+int count;
+
+BlockDriverCompletionFunc *cb;
+SingleAIOCB aios[2];
+int ret;
+};
+
+/* Valid blkmirror filenames look like
+ * blkmirror:path/to/image1:path/to/image2 */
+static int blkmirror_open(BlockDriverState *bs, const char *filename, int 
flags)
+{
+BdrvMirrorState *m = bs-opaque;
+int ret, escape, i, n;
+char *filename2;
+
+/* Parse the blkmirror: prefix */
+if (strncmp(filename, blkmirror:, strlen(blkmirror:))) {
+return -EINVAL;
+}
+filename += strlen(blkmirror:);
+
+/* Parse the raw image filename */
+filename2 = g_malloc(strlen(filename)+1);
+escape = 0;
+for (i = n = 0; i  strlen(filename); i++) {
+if (!escape  filename[i] == ':') {
+break;
+}
+if (!escape  filename[i] == '\\') {
+escape = 1;
+} else {
+escape = 0;
+}
+
+if (!escape) {
+filename2[n++] = filename[i];
+}
+}
+filename2[n] = '\0';
+
+m-bs[0] = bdrv_new();
+if (m-bs[0] == NULL) {
+g_free(filename2);
+return -ENOMEM;
+}
+ret = bdrv_open(m-bs[0], filename2, flags, NULL);
+g_free(filename2);
+if (ret  0) {
+return ret;
+}
+filename += i + 1;
+
+m-bs[1] = bdrv_new();
+if (m-bs[1] == NULL) {
+bdrv_delete(m-bs[0]);
+return -ENOMEM;
+}
+ret = bdrv_open(m-bs[1], filename, flags, NULL);
+if (ret  0) {
+bdrv_delete(m-bs[0]);
+return ret;
+}
+
+return 0;
+}
+
+static void blkmirror_close(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs-opaque;
+int i;
+
+for (i = 0; i  2; i++) {
+bdrv_delete(m-bs[i]);
+m-bs[i] = NULL;
+}
+}
+
+static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs-opaque;
+int ret;
+
+ret = bdrv_co_flush(m-bs[0]);
+if (ret  0) {
+return ret;
+}
+
+return bdrv_co_flush(m-bs[1]);
+}
+
+static int64_t blkmirror_getlength(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs-opaque;
+
+return bdrv_getlength(m-bs[0]);
+}
+
+static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+BdrvMirrorState *m = bs-opaque;
+return bdrv_aio_readv(m-bs[0], sector_num, qiov, nb_sectors, cb, opaque);
+}
+
+static void dup_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+DupAIOCB *acb = container_of(blockacb, DupAIOCB, common);
+int i;
+
+for (i = 0 ; i  2; i++) {
+if (!acb-aios[i].finished) {
+bdrv_aio_cancel(acb-aios[i].aiocb);
+}
+}
+qemu_aio_release(acb);
+}
+
+static AIOPool dup_aio_pool = {
+.aiocb_size = sizeof(DupAIOCB),
+.cancel = dup_aio_cancel,
+};
+
+static void blkmirror_aio_cb(void *opaque, int ret

[Qemu-devel] [PATCH 2/2] Add the blockdev-reopen and blockdev-migrate commands

2012-02-24 Thread Federico Simoncelli
Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 block/blkmirror.c |2 +-
 blockdev.c|  109 +++--
 hmp-commands.hx   |   36 +
 hmp.c |   30 ++
 hmp.h |2 +
 qapi-schema.json  |   63 ++
 6 files changed, 229 insertions(+), 13 deletions(-)

diff --git a/block/blkmirror.c b/block/blkmirror.c
index 39927c8..49e3381 100644
--- a/block/blkmirror.c
+++ b/block/blkmirror.c
@@ -81,7 +81,7 @@ static int blkmirror_open(BlockDriverState *bs, const char 
*filename, int flags)
 bdrv_delete(m-bs[0]);
 return -ENOMEM;
 }
-ret = bdrv_open(m-bs[1], filename, flags, NULL);
+ret = bdrv_open(m-bs[1], filename, flags | BDRV_O_NO_BACKING, NULL);
 if (ret  0) {
 bdrv_delete(m-bs[0]);
 return ret;
diff --git a/blockdev.c b/blockdev.c
index 2c132a3..1df2542 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -646,9 +646,8 @@ void do_commit(Monitor *mon, const QDict *qdict)
 }
 }
 
-void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
-bool has_format, const char *format,
-Error **errp)
+static void change_blockdev_image(const char *device, const char 
*new_image_file,
+  const char *format, bool create, Error 
**errp)
 {
 BlockDriverState *bs;
 BlockDriver *drv, *old_drv, *proto_drv;
@@ -671,7 +670,7 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 old_drv = bs-drv;
 flags = bs-open_flags;
 
-if (!has_format) {
+if (!format) {
 format = qcow2;
 }
 
@@ -681,24 +680,26 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 return;
 }
 
-proto_drv = bdrv_find_protocol(snapshot_file);
+proto_drv = bdrv_find_protocol(new_image_file);
 if (!proto_drv) {
 error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
 return;
 }
 
-ret = bdrv_img_create(snapshot_file, format, bs-filename,
-  bs-drv-format_name, NULL, -1, flags);
-if (ret) {
-error_set(errp, QERR_UNDEFINED_ERROR);
-return;
+if (create) {
+ret = bdrv_img_create(new_image_file, format, bs-filename,
+  bs-drv-format_name, NULL, -1, flags);
+if (ret) {
+error_set(errp, QERR_UNDEFINED_ERROR);
+return;
+}
 }
 
 bdrv_drain_all();
 bdrv_flush(bs);
 
 bdrv_close(bs);
-ret = bdrv_open(bs, snapshot_file, flags, drv);
+ret = bdrv_open(bs, new_image_file, flags, drv);
 /*
  * If reopening the image file we just created fails, fall back
  * and try to re-open the original image. If that fails too, we
@@ -709,11 +710,95 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 if (ret != 0) {
 error_set(errp, QERR_OPEN_FILE_FAILED, old_filename);
 } else {
-error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file);
+error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
 }
 }
 }
 
+void qmp_blockdev_reopen(const char *device, const char *new_image_file,
+ bool has_format, const char *format, Error **errp)
+{
+change_blockdev_image(device, new_image_file,
+  has_format ? format : NULL, false, errp);
+}
+
+void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
+bool has_format, const char *format,
+Error **errp)
+{
+change_blockdev_image(device, snapshot_file,
+  has_format ? format : NULL, true, errp);
+}
+
+void qmp_blockdev_migrate(const char *device, BlockMigrateOp mode,
+  const char *destination, bool has_new_image_file,
+  const char *new_image_file, Error **errp)
+{
+BlockDriverState *bs;
+BlockDriver *drv;
+int i, j, escape;
+char filename[2048];
+
+bs = bdrv_find(device);
+if (!bs) {
+error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+return;
+}
+if (bdrv_in_use(bs)) {
+error_set(errp, QERR_DEVICE_IN_USE, device);
+return;
+}
+
+if (mode == BLOCK_MIGRATE_OP_MIRROR) {
+drv = bdrv_find_format(blkmirror);
+if (!drv) {
+error_set(errp, QERR_INVALID_BLOCK_FORMAT, blkmirror);
+return;
+}
+
+if (!has_new_image_file) {
+new_image_file = bs-filename;
+}
+
+pstrcpy(filename, sizeof(filename), blkmirror:);
+i = strlen(blkmirror:);
+
+escape = 0;
+for (j = 0; j  strlen(new_image_file); j++) {
+ loop:
+if (!(i  sizeof(filename) - 2)) {
+error_set(errp, QERR_INVALID_PARAMETER_VALUE

Re: [Qemu-devel] [PATCH 2/2] Add the blockdev-reopen and blockdev-migrate commands

2012-02-24 Thread Federico Simoncelli
- Original Message -
 From: Kevin Wolf kw...@redhat.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: qemu-devel@nongnu.org, Marcelo Tosatti mtosa...@redhat.com, 
 lcapitul...@redhat.com, Paolo Bonzini
 pbonz...@redhat.com, Markus Armbruster arm...@redhat.com
 Sent: Friday, February 24, 2012 1:03:08 PM
 Subject: Re: [PATCH 2/2] Add the blockdev-reopen and blockdev-migrate commands
 
 Am 24.02.2012 12:37, schrieb Federico Simoncelli:
  Signed-off-by: Federico Simoncelli fsimo...@redhat.com
  ---
   block/blkmirror.c |2 +-
   blockdev.c|  109
   +++--
   hmp-commands.hx   |   36 +
   hmp.c |   30 ++
   hmp.h |2 +
   qapi-schema.json  |   63 ++
   6 files changed, 229 insertions(+), 13 deletions(-)
  
  diff --git a/block/blkmirror.c b/block/blkmirror.c
  index 39927c8..49e3381 100644
  --- a/block/blkmirror.c
  +++ b/block/blkmirror.c
  @@ -81,7 +81,7 @@ static int blkmirror_open(BlockDriverState *bs,
  const char *filename, int flags)
   bdrv_delete(m-bs[0]);
   return -ENOMEM;
   }
  -ret = bdrv_open(m-bs[1], filename, flags, NULL);
  +ret = bdrv_open(m-bs[1], filename, flags | BDRV_O_NO_BACKING,
  NULL);
   if (ret  0) {
   bdrv_delete(m-bs[0]);
   return ret;
 
 Was this hunk meant to be in patch 1?

Not necessarily, I thought a lot about it. I didn't want to modify Marcelo's
patch too much. This flag is actually mandatory only to make blockdev-migrate
work with a destination that doesn't have a backing file yet, so I thought it
was more appropriate to squash it here.

-- 
Federico



[Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver

2012-02-24 Thread Federico Simoncelli
From: Marcelo Tosatti mtosa...@redhat.com

Mirrored writes are used by live block copy.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 Makefile.objs  |2 +-
 block/blkmirror.c  |  247 
 docs/blkmirror.txt |   16 
 3 files changed, 264 insertions(+), 1 deletions(-)
 create mode 100644 block/blkmirror.c
 create mode 100644 docs/blkmirror.txt

diff --git a/Makefile.objs b/Makefile.objs
index 67ee3df..6020308 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o 
dmg.o bochs.o vpc.o vv
 block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
 block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-nested-y += qed-check.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o 
blkmirror.o
 block-nested-y += stream.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
 block-nested-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/blkmirror.c b/block/blkmirror.c
new file mode 100644
index 000..49e3381
--- /dev/null
+++ b/block/blkmirror.c
@@ -0,0 +1,247 @@
+/*
+ * Block driver for mirrored writes.
+ *
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include stdarg.h
+#include block_int.h
+
+typedef struct {
+BlockDriverState *bs[2];
+} BdrvMirrorState;
+
+typedef struct DupAIOCB DupAIOCB;
+
+typedef struct SingleAIOCB {
+BlockDriverAIOCB *aiocb;
+int finished;
+DupAIOCB *parent;
+} SingleAIOCB;
+
+struct DupAIOCB {
+BlockDriverAIOCB common;
+int count;
+
+BlockDriverCompletionFunc *cb;
+SingleAIOCB aios[2];
+int ret;
+};
+
+/* Valid blkmirror filenames look like
+ * blkmirror:path/to/image1:path/to/image2 */
+static int blkmirror_open(BlockDriverState *bs, const char *filename, int 
flags)
+{
+BdrvMirrorState *m = bs-opaque;
+int ret, escape, i, n;
+char *filename2;
+
+/* Parse the blkmirror: prefix */
+if (strncmp(filename, blkmirror:, strlen(blkmirror:))) {
+return -EINVAL;
+}
+filename += strlen(blkmirror:);
+
+/* Parse the raw image filename */
+filename2 = g_malloc(strlen(filename)+1);
+escape = 0;
+for (i = n = 0; i  strlen(filename); i++) {
+if (!escape  filename[i] == ':') {
+break;
+}
+if (!escape  filename[i] == '\\') {
+escape = 1;
+} else {
+escape = 0;
+}
+
+if (!escape) {
+filename2[n++] = filename[i];
+}
+}
+filename2[n] = '\0';
+
+m-bs[0] = bdrv_new();
+if (m-bs[0] == NULL) {
+g_free(filename2);
+return -ENOMEM;
+}
+ret = bdrv_open(m-bs[0], filename2, flags, NULL);
+g_free(filename2);
+if (ret  0) {
+return ret;
+}
+filename += i + 1;
+
+m-bs[1] = bdrv_new();
+if (m-bs[1] == NULL) {
+bdrv_delete(m-bs[0]);
+return -ENOMEM;
+}
+ret = bdrv_open(m-bs[1], filename, flags | BDRV_O_NO_BACKING, NULL);
+if (ret  0) {
+bdrv_delete(m-bs[0]);
+return ret;
+}
+
+return 0;
+}
+
+static void blkmirror_close(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs-opaque;
+int i;
+
+for (i = 0; i  2; i++) {
+bdrv_delete(m-bs[i]);
+m-bs[i] = NULL;
+}
+}
+
+static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs-opaque;
+int ret;
+
+ret = bdrv_co_flush(m-bs[0]);
+if (ret  0) {
+return ret;
+}
+
+return bdrv_co_flush(m-bs[1]);
+}
+
+static int64_t blkmirror_getlength(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs-opaque;
+
+return bdrv_getlength(m-bs[0]);
+}
+
+static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+BdrvMirrorState *m = bs-opaque;
+return bdrv_aio_readv(m-bs[0], sector_num, qiov, nb_sectors, cb, opaque);
+}
+
+static void dup_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+DupAIOCB *acb = container_of(blockacb, DupAIOCB, common);
+int i;
+
+for (i = 0 ; i  2; i++) {
+if (!acb-aios[i].finished) {
+bdrv_aio_cancel(acb-aios[i].aiocb);
+}
+}
+qemu_aio_release(acb);
+}
+
+static AIOPool dup_aio_pool = {
+.aiocb_size = sizeof(DupAIOCB),
+.cancel = dup_aio_cancel,
+};
+
+static void blkmirror_aio_cb(void

[Qemu-devel] [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands

2012-02-24 Thread Federico Simoncelli
Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 blockdev.c   |  107 --
 hmp-commands.hx  |   38 +++
 hmp.c|   24 
 hmp.h|2 +
 qapi-schema.json |   54 +++
 5 files changed, 213 insertions(+), 12 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 2c132a3..f51bd6f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -646,9 +646,8 @@ void do_commit(Monitor *mon, const QDict *qdict)
 }
 }
 
-void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
-bool has_format, const char *format,
-Error **errp)
+static void change_blockdev_image(const char *device, const char 
*new_image_file,
+  const char *format, bool create, Error 
**errp)
 {
 BlockDriverState *bs;
 BlockDriver *drv, *old_drv, *proto_drv;
@@ -671,7 +670,7 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 old_drv = bs-drv;
 flags = bs-open_flags;
 
-if (!has_format) {
+if (!format) {
 format = qcow2;
 }
 
@@ -681,24 +680,26 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 return;
 }
 
-proto_drv = bdrv_find_protocol(snapshot_file);
+proto_drv = bdrv_find_protocol(new_image_file);
 if (!proto_drv) {
 error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
 return;
 }
 
-ret = bdrv_img_create(snapshot_file, format, bs-filename,
-  bs-drv-format_name, NULL, -1, flags);
-if (ret) {
-error_set(errp, QERR_UNDEFINED_ERROR);
-return;
+if (create) {
+ret = bdrv_img_create(new_image_file, format, bs-filename,
+  bs-drv-format_name, NULL, -1, flags);
+if (ret) {
+error_set(errp, QERR_UNDEFINED_ERROR);
+return;
+}
 }
 
 bdrv_drain_all();
 bdrv_flush(bs);
 
 bdrv_close(bs);
-ret = bdrv_open(bs, snapshot_file, flags, drv);
+ret = bdrv_open(bs, new_image_file, flags, drv);
 /*
  * If reopening the image file we just created fails, fall back
  * and try to re-open the original image. If that fails too, we
@@ -709,11 +710,93 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 if (ret != 0) {
 error_set(errp, QERR_OPEN_FILE_FAILED, old_filename);
 } else {
-error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file);
+error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
 }
 }
 }
 
+void qmp_drive_reopen(const char *device, const char *new_image_file,
+ bool has_format, const char *format, Error **errp)
+{
+change_blockdev_image(device, new_image_file,
+  has_format ? format : NULL, false, errp);
+}
+
+void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
+bool has_format, const char *format,
+Error **errp)
+{
+change_blockdev_image(device, snapshot_file,
+  has_format ? format : NULL, true, errp);
+}
+
+static void qmp_blockdev_mirror(const char *device, const char *destination,
+const char *new_image_file, Error **errp)
+{
+BlockDriver *drv;
+int i, j, escape;
+char new_filename[2048], *filename;
+
+drv = bdrv_find_format(blkmirror);
+if (!drv) {
+error_set(errp, QERR_INVALID_BLOCK_FORMAT, blkmirror);
+return;
+}
+
+escape = 0;
+for (i = 0, j = 0; j  strlen(new_image_file); j++) {
+ loop:
+if (!(i  sizeof(new_filename) - 2)) {
+error_set(errp, QERR_INVALID_PARAMETER_VALUE,
+  new-image-file, shorter filename);
+return;
+}
+
+if (new_image_file[j] == ':' || new_image_file[j] == '\\') {
+if (!escape) {
+new_filename[i++] = '\\', escape = 1;
+goto loop;
+} else {
+escape = 0;
+}
+}
+
+new_filename[i++] = new_image_file[j];
+}
+
+filename = g_strdup_printf(blkmirror:%s:%s, new_filename, destination);
+change_blockdev_image(device, filename, blkmirror, false, errp);
+g_free(filename);
+}
+
+void qmp_blockdev_migrate(const char *device, bool incremental,
+  const char *destination, bool has_new_image_file,
+  const char *new_image_file, Error **errp)
+{
+BlockDriverState *bs;
+
+bs = bdrv_find(device);
+if (!bs) {
+error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+return;
+}
+if (bdrv_in_use(bs)) {
+error_set(errp, QERR_DEVICE_IN_USE, device);
+return;
+}
+
+if (incremental

Re: [Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver

2012-02-24 Thread Federico Simoncelli
- Original Message -
 From: Eric Blake ebl...@redhat.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com, 
 arm...@redhat.com, lcapitul...@redhat.com,
 pbonz...@redhat.com
 Sent: Friday, February 24, 2012 6:02:36 PM
 Subject: Re: [Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver
  +++ b/docs/blkmirror.txt
  @@ -0,0 +1,16 @@
  +Block mirror driver
  +---
  +
  +This driver will mirror writes to two distinct images.
  +It's used internally by live block copy.
  +
  +Format
  +--
  +
  +blkmirror:/image1.img:/image2.img
  +
  +'\' (backslash) can be used to escape colon processing
  +as a separator, in the first image filename.
  +Backslashes themselves also can be escaped as '\\'.
 
 Is the escaping of : and \ only necessary for image1.img, or for both
 image1 and image2?  I need to know if the parser is consistent for
 both
 arguments, but this wording makes it sound like it is only for the
 first
 argument.

Yes it is only for the first argument.

-- 
Federico



Re: [Qemu-devel] [PATCH 3/3] Add nocreate option to snapshot_blkdev

2012-02-23 Thread Federico Simoncelli
- Original Message -
 From: Paolo Bonzini pbonz...@redhat.com
 Cc: Federico Simoncelli fsimo...@redhat.com, kw...@redhat.com, 
 mtosa...@redhat.com, qemu-devel@nongnu.org
 Sent: Thursday, February 23, 2012 8:38:55 AM
 Subject: Re: [PATCH 3/3] Add nocreate option to snapshot_blkdev
 
 On 02/23/2012 08:19 AM, Paolo Bonzini wrote:
   Signed-off-by: Federico Simoncelli fsimo...@redhat.com
  What is the usecase, and how can this be tested?
 
 Oops, you explained it in part 0.
 
  Step 5 - Final Switch to Destination
  
  VM1 is now able to switch to the destination for both read and
  write
  operations.
  
  [src/hd0base] - [src/hd0snap1] = VM1(read-write)
  
  (qemu) snapshot_blkdev -n ide0-hd0 /tmp/dst/hd0snap1.qcow2
 
 It seems to me that reusing the snapshot_blkdev command is a bit of a
 misnomer.  It also forces you to use format autodetection for the
 destination in the blkmirror phase.
 
 I see two possibilities:
 
 1) you can make a new command block_reopen; this fixes only the
 naming
 problem.
 
 2) you can hide the new format behind a new command to be used like
 this:
 
block_migrate --mirror ide-hd0 /tmp/dst/hd0snap1.qcow2
...
block_migrate --switch ide-hd0
 
 This leave the possibility to specify the format in the future:
 
block_migrate --mirror ide-hd0 /tmp/dst/hd0snap1.qcow2 qcow2
 
 And we could have another sub-command
 
block_mirror --stream ide-hd0 /tmp/dst/hd0.raw
 
 to migrate block devices without shared storage and without an
 intermediate snapshot.

I agree on the fact that using snapshot_blkdev is a misnomer but at
the same time I like very much how blkmirror is modular and it can
be used as a regular image filename.
I'm worried that making it explicit with a --mirror option would
take away its flexibility.
What about adding block_reopen and extending the blkmirror string:

 (qemu) block_reopen ide-hd0 \
 blkmirror:qcow2:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2

so that the format is passed as first argument (and if it's empty we
would use the auto-detection).

-- 
Federico



Re: [Qemu-devel] [PATCH 2/3] Update the blkmirror block driver

2012-02-23 Thread Federico Simoncelli
- Original Message -
 From: Paolo Bonzini pbonz...@redhat.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
 Sent: Thursday, February 23, 2012 8:18:28 AM
 Subject: Re: [PATCH 2/3] Update the blkmirror block driver
 
 On 02/22/2012 06:13 PM, Federico Simoncelli wrote:
  @@ -46,7 +46,7 @@ static int blkmirror_open(BlockDriverState *bs,
  const char *filename, int flags)
   filename += strlen(blkmirror:);
   
   /* Parse the raw image filename */
  -filename2 = qemu_malloc(strlen(filename)+1);
  +filename2 = qemu_vmalloc(strlen(filename)+1);
   escape = 0;
   for (i = n = 0; i  strlen(filename); i++) {
   if (!escape  filename[i] == ':') {
  @@ -66,11 +66,11 @@ static int blkmirror_open(BlockDriverState *bs,
  const char *filename, int flags)
   
   m-bs[0] = bdrv_new();
   if (m-bs[0] == NULL) {
  -free(filename2);
  +qemu_vfree(filename2);
   return -ENOMEM;
   }
   ret = bdrv_open(m-bs[0], filename2, flags, NULL);
  -free(filename2);
  +qemu_vfree(filename2);
   if (ret  0) {
   return ret;
   }
 
 These should be g_malloc and g_free.

Thanks!

 Also, please squash this patch in part 1.

Yes, I sent it as a separate patch to make it easier to review my
changes (if someone already reviewed Marcelo's patch).
Any comment on the BDRV_O_NO_BACKING flag I added? That's the real
trick I'm pulling here. It basically allows to open the second image
only for writing and it doesn't complain if the backing file is not
there yet (it will be copied during step 4).

-- 
Federico



Re: [Qemu-devel] [PATCH 3/3] Add nocreate option to snapshot_blkdev

2012-02-23 Thread Federico Simoncelli
- Original Message -
 From: Paolo Bonzini pbonz...@redhat.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: kw...@redhat.com, mtosa...@redhat.com, qemu-devel@nongnu.org
 Sent: Thursday, February 23, 2012 10:48:59 AM
 Subject: Re: [PATCH 3/3] Add nocreate option to snapshot_blkdev
 
 On 02/23/2012 10:39 AM, Federico Simoncelli wrote:
  I agree on the fact that using snapshot_blkdev is a misnomer but at
  the same time I like very much how blkmirror is modular and it can
  be used as a regular image filename.
 
 True, on the other hand if we add this we will have to keep it
 forever.
 
 I'm worried that blkmirror does not satisfy _all_ mirroring needs,
 for
 example you cannot use block_stream to copy from the source to the
 destination, so perhaps in the future we want to change it to
 something
 else.

Are you talking about a mirroring where you block_stream the missing
clusters in the destination from the source?
I believe that it could be done without losing the blkmirror
modularity probably extending the BlockDriver structure with some
additional concepts.

 So while I appreciate the easier debugging, I'm afraid that we'll
 regret
 adding a command-line-visible feature.
 
  I'm worried that making it explicit with a --mirror option would
  take away its flexibility.
  What about adding block_reopen and extending the blkmirror string:
  
   (qemu) block_reopen ide-hd0 \
   blkmirror:qcow2:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2
  
  so that the format is passed as first argument (and if it's empty
  we
  would use the auto-detection).
 
 No, the format of the source and destination should be independent.
  It
 would be nice to have something like this:
 
 blkmirror:src=...,dst=...,srcformat=...,dstformat=...
 
 but commas are a pain because they need escaping.

I like that, one limitation we need to keep in mind is that it should
fit into the hard-coded filename limit of 1024 characters that (sadly) we
have in multiple places. Another thing is that at this stage the mirroring
is more an original/copy concept rather than source/destination.
What about:

blkmirror:...,copy=...,fmt=... (both images uses the same fmt)
blkmirror:...,copy=...,fmt=...,copyfmt=...

Or, eventually if you feel like that source/destination is more appropriate
for the future, then:

blkmirror:...,dst=...,fmt=...,dstfmt=...

-- 
Federico



Re: [Qemu-devel] Live Block Migration using Mirroring

2012-02-23 Thread Federico Simoncelli
- Original Message -
 From: Stefan Hajnoczi stefa...@gmail.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
 Sent: Thursday, February 23, 2012 4:47:38 PM
 Subject: Re: [Qemu-devel] Live Block Migration using Mirroring

 On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli
 fsimo...@redhat.com wrote:
  Step 3 - Mirrored Live Snapshot
  ===
  A mirrored live snapshot is issued using src/hd0snap1 and
  dst/hd0snap1 as
  image files. (Where - stands for has backing file)
 
  [src/hd0base] - [src/hd0snap1] = VM1(read-write)
      ...      - [dst/hd0snap1] = VM1(write-only)
 
  $ qemu-img create -f qcow2 \
            -b /tmp/src/hd0base.qcow2 /tmp/src/hd0snap1.qcow2 20G
  Formatting '/tmp/src/hd0snap1.qcow2', fmt=qcow2 size=21474836480
  backing_file='/tmp/src/hd0base.qcow2' encryption=off
  cluster_size=65536
 
  $ qemu-img create -f qcow2 \
            -b /tmp/dst/hd0base.qcow2 /tmp/dst/hd0snap1.qcow2 20G
  Formatting '/tmp/dst/hd0snap1.qcow2', fmt=qcow2 size=21474836480
  backing_file='/tmp/src/hd0base.qcow2' encryption=off
  cluster_size=65536

 At this stage /tmp/dst/hd0base.qcow2 does not exist yet.  The
 qemu-img
 output you pasted shows /tmp/src/hd0base.qcow2 was actually used.
 Typo?

No that's part of the flag used in [PATCH 2/3] (Update the blkmirror
block driver): BDRV_O_NO_BACKING

It's also documented in the design:

http://www.ovirt.org/wiki/File:StorageLiveMigration2.png

  (qemu) snapshot_blkdev -n ide0-hd0 \
          blkmirror:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2
          blkmirror
 
  Step 4 - Backing File Copy
  ==
  An external manager copies src/hd0base to the destination
  dst/hd0base.
 
  [src/hd0base] - [src/hd0snap1] = VM1(read-write)
  [dst/hd0base] - [dst/hd0snap1] = VM1(write-only)
 
  $ cp -a /tmp/src/hd0base.qcow2 /tmp/dst/hd0base.qcow2

 Are we missing a fixup step that changes backing_file in
 dst/hd0snap1.qcow2 to point at dst/hd0base.qcow2?

See above.

--
Federico



Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver

2012-02-23 Thread Federico Simoncelli
- Original Message -
 From: Stefan Hajnoczi stefa...@gmail.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
 Sent: Thursday, February 23, 2012 5:14:09 PM
 Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver
 
 On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli
 fsimo...@redhat.com wrote:
  From: Marcelo Tosatti mtosa...@redhat.com
 
  Mirrored writes are used by live block copy.
 
 I think the right approach is to create a single blkmirror driver
 that
 also includes blkverify functionality.  The code is basically the
 same
 except blkverify also compares reads - just use a flag to
 enable/disable that behavior.
 
 Feel free to rename the blkverify driver to blkmirror if you wish.
 
 By the way, this code does not build against qemu.git/master.  It
 uses
 interfaces which have been dropped.

Yes you also need:

[PATCH 2/3] Update the blkmirror block driver

Which was sent separately to facilitate the review for people who
already reviewed this.

-- 
Federico



Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver

2012-02-23 Thread Federico Simoncelli
- Original Message -
 From: Stefan Hajnoczi stefa...@gmail.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
 Sent: Thursday, February 23, 2012 5:18:41 PM
 Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver

 On Thu, Feb 23, 2012 at 4:14 PM, Stefan Hajnoczi stefa...@gmail.com
 wrote:
  By the way, this code does not build against qemu.git/master.  It
  uses
  interfaces which have been dropped.

 Ah, I see you changed that in Patch 2/3.  Please don't do that, it's
 hard to review and breaks git-bisect.

Squashing is easy (anyone could do it at the commit phase), reviewing
is hard (if you hide your changes in someone else's code).

--
Federico



Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver

2012-02-23 Thread Federico Simoncelli
- Original Message -
 From: Stefan Hajnoczi stefa...@gmail.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
 Sent: Thursday, February 23, 2012 5:28:23 PM
 Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver

 On Thu, Feb 23, 2012 at 4:20 PM, Federico Simoncelli
 fsimo...@redhat.com wrote:
  - Original Message -
  From: Stefan Hajnoczi stefa...@gmail.com
  To: Federico Simoncelli fsimo...@redhat.com
  Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
  Sent: Thursday, February 23, 2012 5:18:41 PM
  Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver
 
  On Thu, Feb 23, 2012 at 4:14 PM, Stefan Hajnoczi
  stefa...@gmail.com
  wrote:
   By the way, this code does not build against qemu.git/master.
    It
   uses
   interfaces which have been dropped.
 
  Ah, I see you changed that in Patch 2/3.  Please don't do that,
  it's
  hard to review and breaks git-bisect.
 
  Squashing is easy (anyone could do it at the commit phase),
  reviewing
  is hard (if you hide your changes in someone else's code).

 I don't care if you or Marcelo wrote it, I want to see a coherent
 piece of code.

 The way to do it is to merge the g_malloc() and BlockDriver interface
 changes into this path.  Then you have not snuck any significant
 changes into Marcelo's code.

Well for example I'd have snuck the qemu_vmalloc/qemu_vfree mistake.

 Keep the BDRV_O_NO_BACKING separate.

Ok, thanks.

--
Federico



Re: [Qemu-devel] Live Block Migration using Mirroring

2012-02-23 Thread Federico Simoncelli
- Original Message -
 From: Stefan Hajnoczi stefa...@gmail.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
 Sent: Thursday, February 23, 2012 5:35:23 PM
 Subject: Re: [Qemu-devel] Live Block Migration using Mirroring

 On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli
 fsimo...@redhat.com wrote:
   recently I've been working on live block migration combining the
   live
  snapshots and the blkmirror patch sent by Marcelo Tosatti few
  months ago.
 
  The design is summarized at this url as Mirrored-Snapshot:
 
  http://www.ovirt.org/wiki/Features/Design/StorageLiveMigration

 After mirrored-snapshot completes we're left with the base and the
 snapshot.  Is the idea to implement live snapshot merge next?  Or do
 you have something else planned to avoid growing the backing file
 chain each time mirrored-snapshot is used?

The general idea is that I don't expect the need to migrate to a new
storage to be very frequent. Being able to live merge the new snapshot
would be great.

--
Federico



[Qemu-devel] Live Block Migration using Mirroring

2012-02-22 Thread Federico Simoncelli
Hi,
 recently I've been working on live block migration combining the live
snapshots and the blkmirror patch sent by Marcelo Tosatti few months ago.

The design is summarized at this url as Mirrored-Snapshot: 

http://www.ovirt.org/wiki/Features/Design/StorageLiveMigration

The design assumes that the qemu process can reach both the source and
destination storages and no real VM migration between hosts is involved.
The principal problem that it tries to solve is moving a VM to a new
reachable storage (more space, faster) without temporarily disrupting its
services.

The following set of patches are implementing the required changes in
QEMU.

Here it is a quick example of the use case (for consistency with the
design at the url above I will use the same step numbers):

Preparation
===

$ mkdir /tmp/{src/dst}
$ qemu-img create -f qcow2 /tmp/src/hd0base.qcow2 20G
Formatting '/tmp/src/hd0base.qcow2', fmt=qcow2 size=21474836480
encryption=off cluster_size=65536

Step 1 - Initital Scenario
==
VM1 is running on the src/hd0base. (Where = stands for uses)

[src/hd0base] = VM1(read-write)

$ qemu-system-x86_64 -hda /tmp/src/hd0base.qcow2 -monitor stdio
QEMU 1.0.50 monitor - type 'help' for more information
(qemu)

Step 3 - Mirrored Live Snapshot
===
A mirrored live snapshot is issued using src/hd0snap1 and dst/hd0snap1 as
image files. (Where - stands for has backing file)

[src/hd0base] - [src/hd0snap1] = VM1(read-write)
 ...  - [dst/hd0snap1] = VM1(write-only)

$ qemu-img create -f qcow2 \
   -b /tmp/src/hd0base.qcow2 /tmp/src/hd0snap1.qcow2 20G
Formatting '/tmp/src/hd0snap1.qcow2', fmt=qcow2 size=21474836480
backing_file='/tmp/src/hd0base.qcow2' encryption=off cluster_size=65536

$ qemu-img create -f qcow2 \
   -b /tmp/dst/hd0base.qcow2 /tmp/dst/hd0snap1.qcow2 20G
Formatting '/tmp/dst/hd0snap1.qcow2', fmt=qcow2 size=21474836480
backing_file='/tmp/src/hd0base.qcow2' encryption=off cluster_size=65536

(qemu) snapshot_blkdev -n ide0-hd0 \
 blkmirror:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2 blkmirror

Step 4 - Backing File Copy
==
An external manager copies src/hd0base to the destination dst/hd0base.

[src/hd0base] - [src/hd0snap1] = VM1(read-write)
[dst/hd0base] - [dst/hd0snap1] = VM1(write-only)

$ cp -a /tmp/src/hd0base.qcow2 /tmp/dst/hd0base.qcow2

Step 5 - Final Switch to Destination

VM1 is now able to switch to the destination for both read and write
operations.

[src/hd0base] - [src/hd0snap1] = VM1(read-write)

(qemu) snapshot_blkdev -n ide0-hd0 /tmp/dst/hd0snap1.qcow2

-- 
Federico




[Qemu-devel] [PATCH 2/3] Update the blkmirror block driver

2012-02-22 Thread Federico Simoncelli
Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 block/blkmirror.c |   26 +-
 1 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/block/blkmirror.c b/block/blkmirror.c
index 1c02710..1cfd2fb 100644
--- a/block/blkmirror.c
+++ b/block/blkmirror.c
@@ -46,7 +46,7 @@ static int blkmirror_open(BlockDriverState *bs, const char 
*filename, int flags)
 filename += strlen(blkmirror:);
 
 /* Parse the raw image filename */
-filename2 = qemu_malloc(strlen(filename)+1);
+filename2 = qemu_vmalloc(strlen(filename)+1);
 escape = 0;
 for (i = n = 0; i  strlen(filename); i++) {
 if (!escape  filename[i] == ':') {
@@ -66,11 +66,11 @@ static int blkmirror_open(BlockDriverState *bs, const char 
*filename, int flags)
 
 m-bs[0] = bdrv_new();
 if (m-bs[0] == NULL) {
-free(filename2);
+qemu_vfree(filename2);
 return -ENOMEM;
 }
 ret = bdrv_open(m-bs[0], filename2, flags, NULL);
-free(filename2);
+qemu_vfree(filename2);
 if (ret  0) {
 return ret;
 }
@@ -81,7 +81,7 @@ static int blkmirror_open(BlockDriverState *bs, const char 
*filename, int flags)
 bdrv_delete(m-bs[0]);
 return -ENOMEM;
 }
-ret = bdrv_open(m-bs[1], filename, flags, NULL);
+ret = bdrv_open(m-bs[1], filename, flags | BDRV_O_NO_BACKING, NULL);
 if (ret  0) {
 bdrv_delete(m-bs[0]);
 return ret;
@@ -101,17 +101,17 @@ static void blkmirror_close(BlockDriverState *bs)
 }
 }
 
-static int blkmirror_flush(BlockDriverState *bs)
+static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs)
 {
 BdrvMirrorState *m = bs-opaque;
 int ret;
 
-ret = bdrv_flush(m-bs[0]);
+ret = bdrv_co_flush(m-bs[0]);
 if (ret  0) {
 return ret;
 }
 
-return bdrv_flush(m-bs[1]);
+return bdrv_co_flush(m-bs[1]);
 }
 
 static int64_t blkmirror_getlength(BlockDriverState *bs)
@@ -242,18 +242,18 @@ static BlockDriverAIOCB 
*blkmirror_aio_flush(BlockDriverState *bs,
 return dcb-common;
 }
 
-static int blkmirror_discard(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
+static coroutine_fn int blkmirror_co_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
 {
 BdrvMirrorState *m = bs-opaque;
 int ret;
 
-ret = bdrv_discard(m-bs[0], sector_num, nb_sectors);
+ret = bdrv_co_discard(m-bs[0], sector_num, nb_sectors);
 if (ret  0) {
 return ret;
 }
 
-return bdrv_discard(m-bs[1], sector_num, nb_sectors);
+return bdrv_co_discard(m-bs[1], sector_num, nb_sectors);
 }
 
 
@@ -266,8 +266,8 @@ static BlockDriver bdrv_blkmirror = {
 
 .bdrv_file_open = blkmirror_open,
 .bdrv_close = blkmirror_close,
-.bdrv_flush = blkmirror_flush,
-.bdrv_discard   = blkmirror_discard,
+.bdrv_co_flush_to_disk = blkmirror_co_flush,
+.bdrv_co_discard= blkmirror_co_discard,
 
 .bdrv_aio_readv = blkmirror_aio_readv,
 .bdrv_aio_writev= blkmirror_aio_writev,
-- 
1.7.1




[Qemu-devel] [PATCH 3/3] Add nocreate option to snapshot_blkdev

2012-02-22 Thread Federico Simoncelli
Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 blockdev.c   |   14 --
 hmp-commands.hx  |   16 ++--
 hmp.c|4 +++-
 qapi-schema.json |8 +++-
 qmp-commands.hx  |2 +-
 5 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 7a6613a..2c3e10a 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -647,7 +647,7 @@ void do_commit(Monitor *mon, const QDict *qdict)
 
 void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
 bool has_format, const char *format,
-Error **errp)
+bool has_nocreate, bool nocreate, Error **errp)
 {
 BlockDriverState *bs;
 BlockDriver *drv, *old_drv, *proto_drv;
@@ -686,11 +686,13 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 return;
 }
 
-ret = bdrv_img_create(snapshot_file, format, bs-filename,
-  bs-drv-format_name, NULL, -1, flags);
-if (ret) {
-error_set(errp, QERR_UNDEFINED_ERROR);
-return;
+if (!(has_nocreate  nocreate)) {
+ret = bdrv_img_create(snapshot_file, format, bs-filename,
+  bs-drv-format_name, NULL, -1, flags);
+if (ret) {
+error_set(errp, QERR_UNDEFINED_ERROR);
+return;
+}
 }
 
 bdrv_drain_all();
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 573b823..ff49412 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -868,14 +868,18 @@ ETEXI
 
 {
 .name   = snapshot_blkdev,
-.args_type  = device:B,snapshot-file:s?,format:s?,
-.params = device [new-image-file] [format],
+.args_type  = nocreate:-n,device:B,snapshot-file:s?,format:s?,
+.params = [-n] device [new-image-file] [format],
 .help   = initiates a live snapshot\n\t\t\t
   of device. If a new image file is specified, 
the\n\t\t\t
-  new image file will become the new root image.\n\t\t\t
-  If format is specified, the snapshot file will\n\t\t\t
-  be created in that format. Otherwise the\n\t\t\t
-  snapshot will be internal! (currently unsupported),
+  new image file will be created (unless -n is\n\t\t\t
+  specified) and will become the new root image.\n\t\t\t
+  The -n (nocreate) option is potentially 
dangerous\n\t\t\t
+  if the image wasn't prepared in the correct way\n\t\t\t
+  by an external manager. If format is specified,\n\t\t\t
+  the snapshot file will be created in that 
format.\n\t\t\t
+  Otherwise the snapshot will be internal!\n\t\t\t
+  (currently unsupported),
 .mhandler.cmd = hmp_snapshot_blkdev,
 },
 
diff --git a/hmp.c b/hmp.c
index 8ff8c94..44868c7 100644
--- a/hmp.c
+++ b/hmp.c
@@ -684,6 +684,7 @@ void hmp_block_resize(Monitor *mon, const QDict *qdict)
 
 void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
 {
+int nocreate = qdict_get_try_bool(qdict, nocreate, 0);
 const char *device = qdict_get_str(qdict, device);
 const char *filename = qdict_get_try_str(qdict, snapshot-file);
 const char *format = qdict_get_try_str(qdict, format);
@@ -697,7 +698,8 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
 return;
 }
 
-qmp_blockdev_snapshot_sync(device, filename, !!format, format, errp);
+qmp_blockdev_snapshot_sync(device, filename,
+   !!format, format, true, nocreate, errp);
 hmp_handle_error(mon, errp);
 }
 
diff --git a/qapi-schema.json b/qapi-schema.json
index d02ee86..ac652c2 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1116,9 +1116,14 @@
 # @snapshot-file: the target of the new image. If the file exists, or if it
 # is a device, the snapshot will be created in the existing
 # file/device. If does not exist, a new file will be created.
+# This behavior can be overridden using the nocreate option.
 #
 # @format: #optional the format of the snapshot image, default is 'qcow2'.
 #
+# @nocreate: #optional use the target image avoiding the creation. This is
+#a potentially dangerous option if the image wasn't prepared
+#in the correct way by an external manager.
+#
 # Returns: nothing on success
 #  If @device is not a valid block device, DeviceNotFound
 #  If @snapshot-file can't be opened, OpenFileFailed
@@ -1133,7 +1138,8 @@
 # Since 0.14.0
 ##
 { 'command': 'blockdev-snapshot-sync',
-  'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str' } }
+  'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str',
+'*nocreate': 'bool

[Qemu-devel] [PATCH 1/3] Add blkmirror block driver

2012-02-22 Thread Federico Simoncelli
From: Marcelo Tosatti mtosa...@redhat.com

Mirrored writes are used by live block copy.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
---
 Makefile.objs  |2 +-
 block/blkmirror.c  |  282 
 docs/blkmirror.txt |   15 +++
 3 files changed, 298 insertions(+), 1 deletions(-)
 create mode 100644 block/blkmirror.c
 create mode 100644 docs/blkmirror.txt

diff --git a/Makefile.objs b/Makefile.objs
index 391e524..cd65e1b 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o 
dmg.o bochs.o vpc.o vv
 block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
 block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-nested-y += qed-check.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o 
blkmirror.o
 block-nested-y += stream.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
 block-nested-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/blkmirror.c b/block/blkmirror.c
new file mode 100644
index 000..1c02710
--- /dev/null
+++ b/block/blkmirror.c
@@ -0,0 +1,282 @@
+/*
+ * Block driver for mirrored writes.
+ *
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include stdarg.h
+#include block_int.h
+
+typedef struct {
+BlockDriverState *bs[2];
+} BdrvMirrorState;
+
+typedef struct DupAIOCB DupAIOCB;
+
+typedef struct SingleAIOCB {
+BlockDriverAIOCB *aiocb;
+int finished;
+DupAIOCB *parent;
+} SingleAIOCB;
+
+struct DupAIOCB {
+BlockDriverAIOCB common;
+int count;
+
+BlockDriverCompletionFunc *cb;
+SingleAIOCB aios[2];
+int ret;
+};
+
+/* Valid blkmirror filenames look like
+ * blkmirror:path/to/image1:path/to/image2 */
+static int blkmirror_open(BlockDriverState *bs, const char *filename, int 
flags)
+{
+BdrvMirrorState *m = bs-opaque;
+int ret, escape, i, n;
+char *filename2;
+
+/* Parse the blkmirror: prefix */
+if (strncmp(filename, blkmirror:, strlen(blkmirror:))) {
+return -EINVAL;
+}
+filename += strlen(blkmirror:);
+
+/* Parse the raw image filename */
+filename2 = qemu_malloc(strlen(filename)+1);
+escape = 0;
+for (i = n = 0; i  strlen(filename); i++) {
+if (!escape  filename[i] == ':') {
+break;
+}
+if (!escape  filename[i] == '\\') {
+escape = 1;
+} else {
+escape = 0;
+}
+
+if (!escape) {
+filename2[n++] = filename[i];
+}
+}
+filename2[n] = '\0';
+
+m-bs[0] = bdrv_new();
+if (m-bs[0] == NULL) {
+free(filename2);
+return -ENOMEM;
+}
+ret = bdrv_open(m-bs[0], filename2, flags, NULL);
+free(filename2);
+if (ret  0) {
+return ret;
+}
+filename += i + 1;
+
+m-bs[1] = bdrv_new();
+if (m-bs[1] == NULL) {
+bdrv_delete(m-bs[0]);
+return -ENOMEM;
+}
+ret = bdrv_open(m-bs[1], filename, flags, NULL);
+if (ret  0) {
+bdrv_delete(m-bs[0]);
+return ret;
+}
+
+return 0;
+}
+
+static void blkmirror_close(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs-opaque;
+int i;
+
+for (i = 0; i  2; i++) {
+bdrv_delete(m-bs[i]);
+m-bs[i] = NULL;
+}
+}
+
+static int blkmirror_flush(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs-opaque;
+int ret;
+
+ret = bdrv_flush(m-bs[0]);
+if (ret  0) {
+return ret;
+}
+
+return bdrv_flush(m-bs[1]);
+}
+
+static int64_t blkmirror_getlength(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs-opaque;
+
+return bdrv_getlength(m-bs[0]);
+}
+
+static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+BdrvMirrorState *m = bs-opaque;
+return bdrv_aio_readv(m-bs[0], sector_num, qiov, nb_sectors, cb, opaque);
+}
+
+static void dup_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+DupAIOCB *acb = container_of(blockacb, DupAIOCB, common);
+int i;
+
+for (i = 0 ; i  2; i++) {
+if (!acb-aios[i].finished) {
+bdrv_aio_cancel(acb-aios[i].aiocb);
+}
+}
+qemu_aio_release(acb);
+}
+
+static AIOPool dup_aio_pool = {
+.aiocb_size = sizeof(DupAIOCB),
+.cancel = dup_aio_cancel,
+};
+
+static void blkmirror_aio_cb(void *opaque, int ret)
+{
+SingleAIOCB *scb = opaque;
+DupAIOCB *dcb = scb-parent;
+
+

Re: [Qemu-devel] [vdsm] oVirt Live Snapshots

2012-02-02 Thread Federico Simoncelli
- Original Message -
 From: Shu Ming shum...@linux.vnet.ibm.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: qemu-devel@nongnu.org, libvir-l...@redhat.com, VDSM Project Development 
 vdsm-de...@lists.fedorahosted.org,
 Dave Allan dal...@redhat.com, Eric Blake ebl...@redhat.com
 Sent: Thursday, February 2, 2012 1:59:01 PM
 Subject: Re: [vdsm] oVirt Live Snapshots

 Can someone explain what is DB in this wiki page?

It is the ovirt-engine database, where the VMs/images information
and status is stored.
That part of the wiki should be improved.

 See,


 Live snapshots operation extend regular snapshots as follow:

 • Create a locked snapshot in DB


--
Federico



[Qemu-devel] oVirt Live Snapshots

2012-01-30 Thread Federico Simoncelli
Hi,
  oVirt, and more specifically VDSM, is currently implementing the live
snapshot feature using the API/commands provided by libvirt and qemu.
It would be great if you could review the design and the current open
issues at:

  http://ovirt.org/wiki/Live_Snapshots

Thank you,
-- 
Federico



Re: [Qemu-devel] [PATCH] qemu: new option for snapshot_blkdev to avoid image creation

2011-10-11 Thread Federico Simoncelli
- Original Message -
 From: Dor Laor dl...@redhat.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: qemu-devel@nongnu.org, aba...@redhat.com, Kevin Wolf kw...@redhat.com
 Sent: Friday, October 7, 2011 12:45:06 AM
 Subject: Re: [Qemu-devel] [PATCH] qemu: new option for snapshot_blkdev to 
 avoid image creation
 
 On 10/03/2011 06:09 PM, Federico Simoncelli wrote:
  Add the new option [-n] for snapshot_blkdev to avoid the image
  creation.
  The file provided as [new-image-file] is considered as already
  initialized
  and will be used after passing a check for the backing file.
 
 Seems ok to me as a way to go around fdget and still have selinux
 gain.
 Worth to get Kevin's view too.

Hi Kevin, any news on this?

 Federico, would you like to ack or extend the design:
 http://wiki.qemu.org/Features/Snapshots

Dor, should I do it now or only after my patch is acked?

-- 
Federico



Re: [Qemu-devel] New option for snapshot_blkdev to avoid image creation

2011-10-04 Thread Federico Simoncelli
- Original Message -
 From: Stefan Hajnoczi stefa...@gmail.com
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: qemu-devel@nongnu.org, aba...@redhat.com, dl...@redhat.com
 Sent: Tuesday, October 4, 2011 9:33:48 AM
 Subject: Re: [Qemu-devel] New option for snapshot_blkdev to avoid image 
 creation
 
 On Mon, Oct 03, 2011 at 04:09:01PM +, Federico Simoncelli wrote:
  In some situations might be useful to let qemu use an image that
  was
  prepared for a live snapshot.
  The advantage is that creating the snapshot file outside of the
  qemu
  process we can use the whole range of options provided by the
  format
  (eg for qcow2: encryption, cluster_size and preallocation).
  It is also possible to pre-set a relative path to the backing file
  (now it is created by default as absolute path).
  In the long run it can also avoid the danger of reimplementing
  qemu-img
  inside qemu (if we wanted to expose such options when a snapshot is
  requested).
 
 When the image file is created based on the backing file size:
 
 $ qemu-img create -f qcow2 -o backing_file=master.img vm001.qcow2
 
 It turns out that bdrv_img_create() opens the backing file with
 read/write permissions.  This is generally a bad idea but especially
 dangerous when the VM currently has the image file open already since
 image formats are not designed for multiple initiators (clustering).

Hi Stefan, are you sure that bdrv_img_create opens the backing file
with read/write permissions?
After a quick check I can't see that happening:

$ ./qemu-img create -f qcow2 /tmp/master.img 1G
Formatting '/tmp/master.img', fmt=qcow2 size=1073741824 encryption=off 
cluster_size=65536

$ strace -e trace=open ./qemu-img create -f qcow2 -o 
backing_file=/tmp/master.img /tmp/vm001.qcow2
[...]
open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or 
directory)
open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or 
directory)
open(/tmp/master.img, O_RDONLY|O_NONBLOCK) = 3
open(/tmp/master.img, O_RDONLY|O_NONBLOCK) = 3
open(/tmp/master.img, O_RDONLY|O_DSYNC|O_CLOEXEC) = 3
open(/tmp/master.img, O_RDONLY|O_NONBLOCK) = 3
open(/tmp/master.img, O_RDONLY|O_NONBLOCK) = 3
open(/tmp/master.img, O_RDONLY|O_CLOEXEC) = 3
Formatting '/tmp/vm001.qcow2', fmt=qcow2 size=1073741824 
backing_file='/tmp/master.img' encryption=off cluster_size=65536 
open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or 
directory)
open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or 
directory)
open(/tmp/vm001.qcow2, O_WRONLY|O_CREAT|O_TRUNC, 0644) = 6
open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = 6
open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = 6
open(/tmp/vm001.qcow2, O_RDWR|O_DSYNC|O_CLOEXEC) = 6
open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = 6
open(/tmp/vm001.qcow2, O_RDONLY|O_NONBLOCK) = 6
open(/tmp/vm001.qcow2, O_RDWR|O_CLOEXEC) = 6

-- 
Federico



[Qemu-devel] New option for snapshot_blkdev to avoid image creation

2011-10-03 Thread Federico Simoncelli
In some situations might be useful to let qemu use an image that was
prepared for a live snapshot.
The advantage is that creating the snapshot file outside of the qemu
process we can use the whole range of options provided by the format
(eg for qcow2: encryption, cluster_size and preallocation).
It is also possible to pre-set a relative path to the backing file
(now it is created by default as absolute path).
In the long run it can also avoid the danger of reimplementing qemu-img
inside qemu (if we wanted to expose such options when a snapshot is
requested).

-- 
Federico.




[Qemu-devel] [PATCH] qemu: new option for snapshot_blkdev to avoid image creation

2011-10-03 Thread Federico Simoncelli
Add the new option [-n] for snapshot_blkdev to avoid the image creation.
The file provided as [new-image-file] is considered as already initialized
and will be used after passing a check for the backing file.

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 blockdev.c  |   54 --
 hmp-commands.hx |7 ---
 qmp-commands.hx |4 ++--
 3 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 0827bf7..bd46808 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -550,8 +550,53 @@ void do_commit(Monitor *mon, const QDict *qdict)
 }
 }
 
+static int check_snapshot_file(const char *filename, const char *oldfilename,
+   int flags, BlockDriver *drv)
+{
+BlockDriverState *bs;
+char bak_filename[1024], *abs_filename;
+int ret = 0;
+
+bs = bdrv_new();
+if (!bs) {
+return -1;
+}
+
+ret = bdrv_open(bs, filename, flags, drv);
+if (ret) {
+qerror_report(QERR_OPEN_FILE_FAILED, filename);
+goto err0;
+}
+
+if (bs-backing_file) {
+path_combine(bak_filename, sizeof(bak_filename),
+ filename, bs-backing_file);
+
+abs_filename = realpath(bak_filename, NULL);
+if (!abs_filename) {
+ret = -1;
+goto err1;
+}
+
+if (strcmp(abs_filename, oldfilename)) {
+qerror_report(QERR_OPEN_FILE_FAILED, filename);
+ret = -1;
+}
+
+free(abs_filename);
+}
+
+err1:
+bdrv_close(bs);
+
+err0:
+bdrv_delete(bs);
+return ret;
+}
+
 int do_snapshot_blkdev(Monitor *mon, const QDict *qdict, QObject **ret_data)
 {
+const int nocreate = qdict_get_try_bool(qdict, nocreate, 0);
 const char *device = qdict_get_str(qdict, device);
 const char *filename = qdict_get_try_str(qdict, snapshot-file);
 const char *format = qdict_get_try_str(qdict, format);
@@ -597,8 +642,13 @@ int do_snapshot_blkdev(Monitor *mon, const QDict *qdict, 
QObject **ret_data)
 goto out;
 }
 
-ret = bdrv_img_create(filename, format, bs-filename,
-  bs-drv-format_name, NULL, -1, flags);
+if (nocreate) {
+ret = check_snapshot_file(filename, bs-filename, flags, drv);
+} else {
+ret = bdrv_img_create(filename, format, bs-filename,
+  bs-drv-format_name, NULL, -1, flags);
+}
+
 if (ret) {
 goto out;
 }
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 9e1cca8..eb9fcd4 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -840,11 +840,12 @@ ETEXI
 
 {
 .name   = snapshot_blkdev,
-.args_type  = device:B,snapshot-file:s?,format:s?,
-.params = device [new-image-file] [format],
+.args_type  = nocreate:-n,device:B,snapshot-file:s?,format:s?,
+.params = [-n] device [new-image-file] [format],
 .help   = initiates a live snapshot\n\t\t\t
   of device. If a new image file is specified, 
the\n\t\t\t
-  new image file will become the new root image.\n\t\t\t
+  new image file will be created (unless -n is\n\t\t\t
+  specified) and will become the new root image.\n\t\t\t
   If format is specified, the snapshot file will\n\t\t\t
   be created in that format. Otherwise the\n\t\t\t
   snapshot will be internal! (currently unsupported),
diff --git a/qmp-commands.hx b/qmp-commands.hx
index d83bce5..7af36d8 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -695,8 +695,8 @@ EQMP
 
 {
 .name   = blockdev-snapshot-sync,
-.args_type  = device:B,snapshot-file:s?,format:s?,
-.params = device [new-image-file] [format],
+.args_type  = nocreate:-n,device:B,snapshot-file:s?,format:s?,
+.params = [-n] device [new-image-file] [format],
 .user_print = monitor_user_noop,
 .mhandler.cmd_new = do_snapshot_blkdev,
 },
-- 
1.7.1




[Qemu-devel] [PATCHv4] qemu-img: Add cache command line option

2011-06-20 Thread Federico Simoncelli
qemu-img currently writes disk images using writeback and filling
up the cache buffers which are then flushed by the kernel preventing
other processes from accessing the storage.
This is particularly bad in cluster environments where time-based
algorithms might be in place and accessing the storage within
certain timeouts is critical.
This patch adds the option to choose a cache method when writing
disk images.

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 qemu-img-cmds.hx |6 ++--
 qemu-img.c   |   80 +-
 2 files changed, 70 insertions(+), 16 deletions(-)

diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 3072d38..2b70618 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -22,13 +22,13 @@ STEXI
 ETEXI
 
 DEF(commit, img_commit,
-commit [-f fmt] filename)
+commit [-f fmt] [-t cache] filename)
 STEXI
 @item commit [-f @var{fmt}] @var{filename}
 ETEXI
 
 DEF(convert, img_convert,
-convert [-c] [-p] [-f fmt] [-O output_fmt] [-o options] [-s 
snapshot_name] filename [filename2 [...]] output_filename)
+convert [-c] [-p] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s 
snapshot_name] filename [filename2 [...]] output_filename)
 STEXI
 @item convert [-c] [-f @var{fmt}] [-O @var{output_fmt}] [-o @var{options}] [-s 
@var{snapshot_name}] @var{filename} [@var{filename2} [...]] 
@var{output_filename}
 ETEXI
@@ -46,7 +46,7 @@ STEXI
 ETEXI
 
 DEF(rebase, img_rebase,
-rebase [-f fmt] [-p] [-u] -b backing_file [-F backing_fmt] filename)
+rebase [-f fmt] [-t cache] [-p] [-u] -b backing_file [-F backing_fmt] 
filename)
 STEXI
 @item rebase [-f @var{fmt}] [-u] -b @var{backing_file} [-F @var{backing_fmt}] 
@var{filename}
 ETEXI
diff --git a/qemu-img.c b/qemu-img.c
index 4f162d1..f904e32 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -40,6 +40,7 @@ typedef struct img_cmd_t {
 
 /* Default to cache=writeback as data integrity is not important for qemu-tcg. 
*/
 #define BDRV_O_FLAGS BDRV_O_CACHE_WB
+#define BDRV_DEFAULT_CACHE writeback
 
 static void format_print(void *opaque, const char *name)
 {
@@ -64,6 +65,8 @@ static void help(void)
Command parameters:\n
  'filename' is a disk image filename\n
  'fmt' is the disk image format. It is guessed automatically in 
most cases\n
+ 'cache' is the cache mode used to write the output disk image, 
the valid\n
+   options are: 'none', 'writeback' (default), 'writethrough' and 
'unsafe'\n
  'size' is the disk image size in bytes. Optional suffixes\n
'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' 
(gigabyte, 1024M)\n
and T (terabyte, 1024G) are supported. 'b' is ignored.\n
@@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size)
 }
 #endif
 
+static int set_cache_flag(const char *mode, int *flags)
+{
+*flags = ~BDRV_O_CACHE_MASK;
+
+if (!strcmp(mode, none) || !strcmp(mode, off)) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NOCACHE;
+} else if (!strcmp(mode, writeback)) {
+*flags |= BDRV_O_CACHE_WB;
+} else if (!strcmp(mode, unsafe)) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NO_FLUSH;
+} else if (!strcmp(mode, writethrough)) {
+/* this is the default */
+} else {
+return -1;
+}
+
+return 0;
+}
+
 static int print_block_option_help(const char *filename, const char *fmt)
 {
 BlockDriver *drv, *proto_drv;
@@ -441,13 +465,14 @@ static int img_check(int argc, char **argv)
 
 static int img_commit(int argc, char **argv)
 {
-int c, ret;
-const char *filename, *fmt;
+int c, ret, flags;
+const char *filename, *fmt, *cache;
 BlockDriverState *bs;
 
 fmt = NULL;
+cache = BDRV_DEFAULT_CACHE;
 for(;;) {
-c = getopt(argc, argv, f:h);
+c = getopt(argc, argv, f:ht:);
 if (c == -1) {
 break;
 }
@@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv)
 case 'f':
 fmt = optarg;
 break;
+case 't':
+cache = optarg;
+break;
 }
 }
 if (optind = argc) {
@@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv)
 }
 filename = argv[optind++];
 
-bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR);
+flags = BDRV_O_RDWR;
+ret = set_cache_flag(cache, flags);
+if (ret  0) {
+error_report(Invalid cache option: %s\n, cache);
+return -1;
+}
+
+bs = bdrv_new_open(filename, fmt, flags);
 if (!bs) {
 return 1;
 }
@@ -591,8 +626,8 @@ static int compare_sectors(const uint8_t *buf1, const 
uint8_t *buf2, int n,
 static int img_convert(int argc, char **argv)
 {
 int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors;
-int progress = 0;
-const char *fmt, *out_fmt, *out_baseimg, *out_filename;
+int progress = 0, flags

[Qemu-devel] [PATCHv3] qemu-img: Add cache command line option

2011-06-16 Thread Federico Simoncelli
qemu-img currently writes disk images using writeback and filling
up the cache buffers which are then flushed by the kernel preventing
other processes from accessing the storage.
This is particularly bad in cluster environments where time-based
algorithms might be in place and accessing the storage within
certain timeouts is critical.
This patch adds the option to choose a cache method when writing
disk images.

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 qemu-img-cmds.hx |6 ++--
 qemu-img.c   |   80 +-
 2 files changed, 70 insertions(+), 16 deletions(-)

diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 3072d38..2b70618 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -22,13 +22,13 @@ STEXI
 ETEXI
 
 DEF(commit, img_commit,
-commit [-f fmt] filename)
+commit [-f fmt] [-t cache] filename)
 STEXI
 @item commit [-f @var{fmt}] @var{filename}
 ETEXI
 
 DEF(convert, img_convert,
-convert [-c] [-p] [-f fmt] [-O output_fmt] [-o options] [-s 
snapshot_name] filename [filename2 [...]] output_filename)
+convert [-c] [-p] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s 
snapshot_name] filename [filename2 [...]] output_filename)
 STEXI
 @item convert [-c] [-f @var{fmt}] [-O @var{output_fmt}] [-o @var{options}] [-s 
@var{snapshot_name}] @var{filename} [@var{filename2} [...]] 
@var{output_filename}
 ETEXI
@@ -46,7 +46,7 @@ STEXI
 ETEXI
 
 DEF(rebase, img_rebase,
-rebase [-f fmt] [-p] [-u] -b backing_file [-F backing_fmt] filename)
+rebase [-f fmt] [-t cache] [-p] [-u] -b backing_file [-F backing_fmt] 
filename)
 STEXI
 @item rebase [-f @var{fmt}] [-u] -b @var{backing_file} [-F @var{backing_fmt}] 
@var{filename}
 ETEXI
diff --git a/qemu-img.c b/qemu-img.c
index 4f162d1..c2a106e 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -40,6 +40,7 @@ typedef struct img_cmd_t {
 
 /* Default to cache=writeback as data integrity is not important for qemu-tcg. 
*/
 #define BDRV_O_FLAGS BDRV_O_CACHE_WB
+#define BDRV_DEFAULT_CACHE writeback
 
 static void format_print(void *opaque, const char *name)
 {
@@ -64,6 +65,8 @@ static void help(void)
Command parameters:\n
  'filename' is a disk image filename\n
  'fmt' is the disk image format. It is guessed automatically in 
most cases\n
+ 'cache' is the cache mode used to write the output disk image, 
the valid\n
+   options are: 'none', 'writeback' (default), 'writethrough' and 
'unsafe'\n
  'size' is the disk image size in bytes. Optional suffixes\n
'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' 
(gigabyte, 1024M)\n
and T (terabyte, 1024G) are supported. 'b' is ignored.\n
@@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size)
 }
 #endif
 
+static int set_cache_flag(const char *mode, int *flags)
+{
+*flags = ~BDRV_O_CACHE_MASK;
+
+if (!strcmp(mode, none) || !strcmp(mode, off)) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NOCACHE;
+} else if (!strcmp(mode, writeback)) {
+*flags |= BDRV_O_CACHE_WB;
+} else if (!strcmp(mode, unsafe)) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NO_FLUSH;
+} else if (!strcmp(mode, writethrough)) {
+/* this is the default */
+} else {
+return -1;
+}
+
+return 0;
+}
+
 static int print_block_option_help(const char *filename, const char *fmt)
 {
 BlockDriver *drv, *proto_drv;
@@ -441,13 +465,14 @@ static int img_check(int argc, char **argv)
 
 static int img_commit(int argc, char **argv)
 {
-int c, ret;
-const char *filename, *fmt;
+int c, ret, flags;
+const char *filename, *fmt, *cache;
 BlockDriverState *bs;
 
 fmt = NULL;
+cache = BDRV_DEFAULT_CACHE;
 for(;;) {
-c = getopt(argc, argv, f:h);
+c = getopt(argc, argv, f:ht:);
 if (c == -1) {
 break;
 }
@@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv)
 case 'f':
 fmt = optarg;
 break;
+case 't':
+cache = optarg;
+break;
 }
 }
 if (optind = argc) {
@@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv)
 }
 filename = argv[optind++];
 
-bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR);
+flags = BDRV_O_RDWR;
+ret = set_cache_flag(cache, flags);
+if (ret  0) {
+error_report(Invalid cache option: %s\n, cache);
+return -1;
+}
+
+bs = bdrv_new_open(filename, fmt, flags);
 if (!bs) {
 return 1;
 }
@@ -591,8 +626,8 @@ static int compare_sectors(const uint8_t *buf1, const 
uint8_t *buf2, int n,
 static int img_convert(int argc, char **argv)
 {
 int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors;
-int progress = 0;
-const char *fmt, *out_fmt, *out_baseimg, *out_filename;
+int progress = 0, flags

Re: [Qemu-devel] [PATCH] qemu-img: Add cache command line option

2011-06-16 Thread Federico Simoncelli
- Original Message -
 From: Christoph Hellwig h...@lst.de
 To: Federico Simoncelli fsimo...@redhat.com
 Cc: k...@vger.kernel.org, kw...@redhat.com, qemu-devel@nongnu.org, 
 a...@redhat.com
 Sent: Thursday, June 16, 2011 4:28:09 PM
 Subject: Re: [Qemu-devel] [PATCH] qemu-img: Add cache command line option
 On Wed, Jun 15, 2011 at 09:46:10AM -0400, Federico Simoncelli wrote:
  qemu-img currently writes disk images using writeback and filling
  up the cache buffers which are then flushed by the kernel preventing
  other processes from accessing the storage.
  This is particularly bad in cluster environments where time-based
  algorithms might be in place and accessing the storage within
  certain timeouts is critical.
  This patch adds the option to choose a cache method when writing
  disk images.
 
 Allowing to chose the mode is of course fine, but what about also
 choosing a good default? writethrough doesn't really make any sense
 for qemu-img, given that we can trivially flush the cache at the end
 of the operations. I'd also say that using the buffer cache doesn't
 make sense either, as there is little point in caching these
 operations.

I totally agree with you, I didn't change the default to increase the
chances for my patch to be accepted.
If we want cache=none as default we can easily change:

-#define BDRV_DEFAULT_CACHE writeback
+#define BDRV_DEFAULT_CACHE none

-- 
Federico



[Qemu-devel] [PATCH] qemu-img: Add cache command line option

2011-06-15 Thread Federico Simoncelli
qemu-img currently writes disk images using writeback and filling
up the cache buffers which are then flushed by the kernel preventing
other processes from accessing the storage.
This is particularly bad in cluster environments where time-based
algorithms might be in place and accessing the storage within
certain timeouts is critical.
This patch adds the option to choose a cache method when writing
disk images.

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 qemu-img.c |   81 ++-
 1 files changed, 68 insertions(+), 13 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 4f162d1..7609db5 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -40,6 +40,7 @@ typedef struct img_cmd_t {
 
 /* Default to cache=writeback as data integrity is not important for qemu-tcg. 
*/
 #define BDRV_O_FLAGS BDRV_O_CACHE_WB
+#define BDRV_DEFAULT_CACHE writeback
 
 static void format_print(void *opaque, const char *name)
 {
@@ -64,6 +65,8 @@ static void help(void)
Command parameters:\n
  'filename' is a disk image filename\n
  'fmt' is the disk image format. It is guessed automatically in 
most cases\n
+ 'cache' is the cache mode used to write the output disk image, 
the valid\n
+   options are: 'none', 'writeback' (default), 'writethrough' and 
'unsafe'\n
  'size' is the disk image size in bytes. Optional suffixes\n
'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' 
(gigabyte, 1024M)\n
and T (terabyte, 1024G) are supported. 'b' is ignored.\n
@@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size)
 }
 #endif
 
+static int set_cache_flag(const char *mode, int *flags)
+{
+*flags = ~BDRV_O_CACHE_MASK;
+
+if (!strcmp(mode, none) || !strcmp(mode, off)) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NOCACHE;
+} else if (!strcmp(mode, writeback)) {
+*flags |= BDRV_O_CACHE_WB;
+} else if (!strcmp(mode, unsafe)) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NO_FLUSH;
+} else if (!strcmp(mode, writethrough)) {
+/* this is the default */
+} else {
+return -1;
+}
+
+return 0;
+}
+
 static int print_block_option_help(const char *filename, const char *fmt)
 {
 BlockDriver *drv, *proto_drv;
@@ -441,13 +465,14 @@ static int img_check(int argc, char **argv)
 
 static int img_commit(int argc, char **argv)
 {
-int c, ret;
-const char *filename, *fmt;
+int c, ret, flags;
+const char *filename, *fmt, *cache;
 BlockDriverState *bs;
 
 fmt = NULL;
+cache = BDRV_DEFAULT_CACHE;
 for(;;) {
-c = getopt(argc, argv, f:h);
+c = getopt(argc, argv, f:ht:);
 if (c == -1) {
 break;
 }
@@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv)
 case 'f':
 fmt = optarg;
 break;
+case 't':
+cache = optarg;
+break;
 }
 }
 if (optind = argc) {
@@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv)
 }
 filename = argv[optind++];
 
-bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR);
+flags = BDRV_O_RDWR;
+ret = set_cache_flag(cache, flags);
+if (ret  0) {
+error_report(Invalid cache option: %s\n, cache);
+return -1;
+}
+
+bs = bdrv_new_open(filename, fmt, flags);
 if (!bs) {
 return 1;
 }
@@ -591,8 +626,8 @@ static int compare_sectors(const uint8_t *buf1, const 
uint8_t *buf2, int n,
 static int img_convert(int argc, char **argv)
 {
 int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors;
-int progress = 0;
-const char *fmt, *out_fmt, *out_baseimg, *out_filename;
+int progress = 0, flags;
+const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename;
 BlockDriver *drv, *proto_drv;
 BlockDriverState **bs = NULL, *out_bs = NULL;
 int64_t total_sectors, nb_sectors, sector_num, bs_offset;
@@ -608,10 +643,11 @@ static int img_convert(int argc, char **argv)
 
 fmt = NULL;
 out_fmt = raw;
+cache = BDRV_DEFAULT_CACHE;
 out_baseimg = NULL;
 compress = 0;
 for(;;) {
-c = getopt(argc, argv, f:O:B:s:hce6o:p);
+c = getopt(argc, argv, f:O:B:s:hce6o:pt:);
 if (c == -1) {
 break;
 }
@@ -649,6 +685,9 @@ static int img_convert(int argc, char **argv)
 case 'p':
 progress = 1;
 break;
+case 't':
+cache = optarg;
+break;
 }
 }
 
@@ -779,8 +818,14 @@ static int img_convert(int argc, char **argv)
 goto out;
 }
 
-out_bs = bdrv_new_open(out_filename, out_fmt,
-BDRV_O_FLAGS | BDRV_O_RDWR | BDRV_O_NO_FLUSH);
+flags = BDRV_O_RDWR;
+ret = set_cache_flag(cache, flags);
+if (ret  0) {
+error_report(Invalid cache option: %s\n, cache

[Qemu-devel] [PATCHv2] qemu-img: Add cache command line option

2011-06-15 Thread Federico Simoncelli
qemu-img currently writes disk images using writeback and filling
up the cache buffers which are then flushed by the kernel preventing
other processes from accessing the storage.
This is particularly bad in cluster environments where time-based
algorithms might be in place and accessing the storage within
certain timeouts is critical.
This patch adds the option to choose a cache method when writing
disk images.

Signed-off-by: Federico Simoncelli fsimo...@redhat.com
---
 qemu-img.c |   80 ++-
 1 files changed, 67 insertions(+), 13 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 4f162d1..c2a106e 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -40,6 +40,7 @@ typedef struct img_cmd_t {
 
 /* Default to cache=writeback as data integrity is not important for qemu-tcg. 
*/
 #define BDRV_O_FLAGS BDRV_O_CACHE_WB
+#define BDRV_DEFAULT_CACHE writeback
 
 static void format_print(void *opaque, const char *name)
 {
@@ -64,6 +65,8 @@ static void help(void)
Command parameters:\n
  'filename' is a disk image filename\n
  'fmt' is the disk image format. It is guessed automatically in 
most cases\n
+ 'cache' is the cache mode used to write the output disk image, 
the valid\n
+   options are: 'none', 'writeback' (default), 'writethrough' and 
'unsafe'\n
  'size' is the disk image size in bytes. Optional suffixes\n
'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' 
(gigabyte, 1024M)\n
and T (terabyte, 1024G) are supported. 'b' is ignored.\n
@@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size)
 }
 #endif
 
+static int set_cache_flag(const char *mode, int *flags)
+{
+*flags = ~BDRV_O_CACHE_MASK;
+
+if (!strcmp(mode, none) || !strcmp(mode, off)) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NOCACHE;
+} else if (!strcmp(mode, writeback)) {
+*flags |= BDRV_O_CACHE_WB;
+} else if (!strcmp(mode, unsafe)) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NO_FLUSH;
+} else if (!strcmp(mode, writethrough)) {
+/* this is the default */
+} else {
+return -1;
+}
+
+return 0;
+}
+
 static int print_block_option_help(const char *filename, const char *fmt)
 {
 BlockDriver *drv, *proto_drv;
@@ -441,13 +465,14 @@ static int img_check(int argc, char **argv)
 
 static int img_commit(int argc, char **argv)
 {
-int c, ret;
-const char *filename, *fmt;
+int c, ret, flags;
+const char *filename, *fmt, *cache;
 BlockDriverState *bs;
 
 fmt = NULL;
+cache = BDRV_DEFAULT_CACHE;
 for(;;) {
-c = getopt(argc, argv, f:h);
+c = getopt(argc, argv, f:ht:);
 if (c == -1) {
 break;
 }
@@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv)
 case 'f':
 fmt = optarg;
 break;
+case 't':
+cache = optarg;
+break;
 }
 }
 if (optind = argc) {
@@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv)
 }
 filename = argv[optind++];
 
-bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR);
+flags = BDRV_O_RDWR;
+ret = set_cache_flag(cache, flags);
+if (ret  0) {
+error_report(Invalid cache option: %s\n, cache);
+return -1;
+}
+
+bs = bdrv_new_open(filename, fmt, flags);
 if (!bs) {
 return 1;
 }
@@ -591,8 +626,8 @@ static int compare_sectors(const uint8_t *buf1, const 
uint8_t *buf2, int n,
 static int img_convert(int argc, char **argv)
 {
 int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors;
-int progress = 0;
-const char *fmt, *out_fmt, *out_baseimg, *out_filename;
+int progress = 0, flags;
+const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename;
 BlockDriver *drv, *proto_drv;
 BlockDriverState **bs = NULL, *out_bs = NULL;
 int64_t total_sectors, nb_sectors, sector_num, bs_offset;
@@ -608,10 +643,11 @@ static int img_convert(int argc, char **argv)
 
 fmt = NULL;
 out_fmt = raw;
+cache = BDRV_DEFAULT_CACHE;
 out_baseimg = NULL;
 compress = 0;
 for(;;) {
-c = getopt(argc, argv, f:O:B:s:hce6o:p);
+c = getopt(argc, argv, f:O:B:s:hce6o:pt:);
 if (c == -1) {
 break;
 }
@@ -649,6 +685,9 @@ static int img_convert(int argc, char **argv)
 case 'p':
 progress = 1;
 break;
+case 't':
+cache = optarg;
+break;
 }
 }
 
@@ -779,8 +818,14 @@ static int img_convert(int argc, char **argv)
 goto out;
 }
 
-out_bs = bdrv_new_open(out_filename, out_fmt,
-BDRV_O_FLAGS | BDRV_O_RDWR | BDRV_O_NO_FLUSH);
+flags = BDRV_O_RDWR;
+ret = set_cache_flag(cache, flags);
+if (ret  0) {
+error_report(Invalid cache option: %s\n, cache