from:"Vladimir Sementsov\-Ogievskiy"

[Qemu-block] [PATCH v22 18/30] block/dirty-bitmap: add bdrv_dirty_bitmap_next()

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block/dirty-bitmap.c | 7 +++
 include/block/dirty-bitmap.h | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 3c17c452ae..d1469418e6 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -718,3 +718,10 @@ bool bdrv_has_changed_persistent_bitmaps(BlockDriverState 
*bs)
 
 return false;
 }
+
+BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
+BdrvDirtyBitmap *bitmap)
+{
+return bitmap == NULL ? QLIST_FIRST(>dirty_bitmaps) :
+QLIST_NEXT(bitmap, list);
+}
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 3995789218..ccf2f81640 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -95,5 +95,7 @@ bool bdrv_has_readonly_bitmaps(BlockDriverState *bs);
 bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_get_persistance(BdrvDirtyBitmap *bitmap);
 bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs);
+BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
+BdrvDirtyBitmap *bitmap);
 
 #endif
-- 
2.11.1

[Qemu-block] [PATCH v22 19/30] qcow2: add persistent dirty bitmaps support

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Store persistent dirty bitmaps in qcow2 image.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
---
 block/qcow2-bitmap.c | 475 +++
 block/qcow2.c|   9 +
 block/qcow2.h|   1 +
 3 files changed, 485 insertions(+)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 52e4616b8c..5f53486b22 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -27,6 +27,7 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "qemu/cutils.h"
 
 #include "block/block_int.h"
 #include "block/qcow2.h"
@@ -42,6 +43,10 @@
 #define BME_MIN_GRANULARITY_BITS 9
 #define BME_MAX_NAME_SIZE 1023
 
+#if BME_MAX_TABLE_SIZE * 8ULL > INT_MAX
+#error In the code bitmap table physical size assumed to fit into int
+#endif
+
 /* Bitmap directory entry flags */
 #define BME_RESERVED_FLAGS 0xfffcU
 #define BME_FLAG_IN_USE (1U << 0)
@@ -72,6 +77,8 @@ typedef struct Qcow2BitmapTable {
 uint32_t size; /* number of 64bit entries */
 QSIMPLEQ_ENTRY(Qcow2BitmapTable) entry;
 } Qcow2BitmapTable;
+typedef QSIMPLEQ_HEAD(Qcow2BitmapTableList, Qcow2BitmapTable)
+Qcow2BitmapTableList;
 
 typedef struct Qcow2Bitmap {
 Qcow2BitmapTable table;
@@ -79,6 +86,8 @@ typedef struct Qcow2Bitmap {
 uint8_t granularity_bits;
 char *name;
 
+BdrvDirtyBitmap *dirty_bitmap;
+
 QSIMPLEQ_ENTRY(Qcow2Bitmap) entry;
 } Qcow2Bitmap;
 typedef QSIMPLEQ_HEAD(Qcow2BitmapList, Qcow2Bitmap) Qcow2BitmapList;
@@ -104,6 +113,15 @@ static int update_header_sync(BlockDriverState *bs)
 return bdrv_flush(bs);
 }
 
+static inline void bitmap_table_to_be(uint64_t *bitmap_table, size_t size)
+{
+size_t i;
+
+for (i = 0; i < size; ++i) {
+cpu_to_be64s(_table[i]);
+}
+}
+
 static int check_table_entry(uint64_t entry, int cluster_size)
 {
 uint64_t offset;
@@ -127,6 +145,70 @@ static int check_table_entry(uint64_t entry, int 
cluster_size)
 return 0;
 }
 
+static int check_constraints_on_bitmap(BlockDriverState *bs,
+   const char *name,
+   uint32_t granularity,
+   Error **errp)
+{
+BDRVQcow2State *s = bs->opaque;
+int granularity_bits = ctz32(granularity);
+int64_t len = bdrv_getlength(bs);
+
+assert(granularity > 0);
+assert((granularity & (granularity - 1)) == 0);
+
+if (len < 0) {
+error_setg_errno(errp, -len, "Failed to get size of '%s'",
+ bdrv_get_device_or_node_name(bs));
+return len;
+}
+
+if (granularity_bits > BME_MAX_GRANULARITY_BITS) {
+error_setg(errp, "Granularity exceeds maximum (%llu bytes)",
+   1ULL << BME_MAX_GRANULARITY_BITS);
+return -EINVAL;
+}
+if (granularity_bits < BME_MIN_GRANULARITY_BITS) {
+error_setg(errp, "Granularity is under minimum (%llu bytes)",
+   1ULL << BME_MIN_GRANULARITY_BITS);
+return -EINVAL;
+}
+
+if ((len > (uint64_t)BME_MAX_PHYS_SIZE << granularity_bits) ||
+(len > (uint64_t)BME_MAX_TABLE_SIZE * s->cluster_size <<
+   granularity_bits))
+{
+error_setg(errp, "Too much space will be occupied by the bitmap. "
+   "Use larger granularity");
+return -EINVAL;
+}
+
+if (strlen(name) > BME_MAX_NAME_SIZE) {
+error_setg(errp, "Name length exceeds maximum (%u characters)",
+   BME_MAX_NAME_SIZE);
+return -EINVAL;
+}
+
+return 0;
+}
+
+static void clear_bitmap_table(BlockDriverState *bs, uint64_t *bitmap_table,
+   uint32_t bitmap_table_size)
+{
+BDRVQcow2State *s = bs->opaque;
+int i;
+
+for (i = 0; i < bitmap_table_size; ++i) {
+uint64_t addr = bitmap_table[i] & BME_TABLE_ENTRY_OFFSET_MASK;
+if (!addr) {
+continue;
+}
+
+qcow2_free_clusters(bs, addr, s->cluster_size, QCOW2_DISCARD_OTHER);
+bitmap_table[i] = 0;
+}
+}
+
 static int bitmap_table_load(BlockDriverState *bs, Qcow2BitmapTable *tb,
  uint64_t **bitmap_table)
 {
@@ -165,6 +247,28 @@ fail:
 return ret;
 }
 
+static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb)
+{
+int ret;
+uint64_t *bitmap_table;
+
+ret = bitmap_table_load(bs, tb, _table);
+if (ret < 0) {
+assert(bitmap_table == NULL);
+return ret;
+}
+
+clear_bitmap_table(bs, bitmap_table, tb->size);
+qcow2_free_clusters(bs, tb->offset, tb->size * sizeof(uint64_t),
+QCOW2_DISCARD_OTHER);
+g_free(bitmap_table);
+
+tb->offset = 0;
+

[Qemu-block] [PATCH v22 27/30] block/dirty-bitmap: add bdrv_remove_persistent_dirty_bitmap

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Interface for removing persistent bitmap from its storage.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block/dirty-bitmap.c | 18 ++
 include/block/block_int.h|  3 +++
 include/block/dirty-bitmap.h |  3 +++
 3 files changed, 24 insertions(+)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 5fcf917707..b2ca78b4d0 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -395,6 +395,7 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, 
BdrvDirtyBitmap *bitmap)
 /**
  * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
  * There must not be any frozen bitmaps attached.
+ * This function does not remove persistent bitmaps from the storage.
  * Called with BQL taken.
  */
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
@@ -402,6 +403,23 @@ void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
 bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
 }
 
+/**
+ * Remove persistent dirty bitmap from the storage if it exists.
+ * Absence of bitmap is not an error, because we have the following scenario:
+ * BdrvDirtyBitmap can have .persistent = true but not yet saved and have no
+ * stored version. For such bitmap bdrv_remove_persistent_dirty_bitmap() should
+ * not fail.
+ * This function doesn't release corresponding BdrvDirtyBitmap.
+ */
+void bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
+ const char *name,
+ Error **errp)
+{
+if (bs->drv && bs->drv->bdrv_remove_persistent_dirty_bitmap) {
+bs->drv->bdrv_remove_persistent_dirty_bitmap(bs, name, errp);
+}
+}
+
 /* Called with BQL taken.  */
 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 009b4d41df..b3be797a96 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -391,6 +391,9 @@ struct BlockDriver {
 const char *name,
 uint32_t granularity,
 Error **errp);
+void (*bdrv_remove_persistent_dirty_bitmap)(BlockDriverState *bs,
+const char *name,
+Error **errp);
 
 QLIST_ENTRY(BlockDriver) list;
 };
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 744479bc76..d38233efd8 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -25,6 +25,9 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
+void bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
+ const char *name,
+ Error **errp);
 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
-- 
2.11.1

[Qemu-block] [PATCH v22 11/30] qcow2: autoloading dirty bitmaps

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Auto loading bitmaps are bitmaps in Qcow2, with the AUTO flag set. They
are loaded when the image is opened and become BdrvDirtyBitmaps for the
corresponding drive.

Extra data in bitmaps is not supported for now.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block/qcow2-bitmap.c | 389 +++
 block/qcow2.c|  17 ++-
 block/qcow2.h|   2 +
 3 files changed, 406 insertions(+), 2 deletions(-)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index b8e472b3e8..2c7b057e21 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -44,6 +44,8 @@
 
 /* Bitmap directory entry flags */
 #define BME_RESERVED_FLAGS 0xfffcU
+#define BME_FLAG_IN_USE (1U << 0)
+#define BME_FLAG_AUTO   (1U << 1)
 
 /* bits [1, 8] U [56, 63] are reserved */
 #define BME_TABLE_ENTRY_RESERVED_MASK 0xff0001feULL
@@ -85,6 +87,23 @@ typedef enum BitmapType {
 BT_DIRTY_TRACKING_BITMAP = 1
 } BitmapType;
 
+static inline bool can_write(BlockDriverState *bs)
+{
+return !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE);
+}
+
+static int update_header_sync(BlockDriverState *bs)
+{
+int ret;
+
+ret = qcow2_update_header(bs);
+if (ret < 0) {
+return ret;
+}
+
+return bdrv_flush(bs);
+}
+
 static int check_table_entry(uint64_t entry, int cluster_size)
 {
 uint64_t offset;
@@ -146,6 +165,120 @@ fail:
 return ret;
 }
 
+/* This function returns the number of disk sectors covered by a single qcow2
+ * cluster of bitmap data. */
+static uint64_t sectors_covered_by_bitmap_cluster(const BDRVQcow2State *s,
+  const BdrvDirtyBitmap 
*bitmap)
+{
+uint32_t sector_granularity =
+bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
+
+return (uint64_t)sector_granularity * (s->cluster_size << 3);
+}
+
+/* load_bitmap_data
+ * @bitmap_table entries must satisfy specification constraints.
+ * @bitmap must be cleared */
+static int load_bitmap_data(BlockDriverState *bs,
+const uint64_t *bitmap_table,
+uint32_t bitmap_table_size,
+BdrvDirtyBitmap *bitmap)
+{
+int ret = 0;
+BDRVQcow2State *s = bs->opaque;
+uint64_t sector, sbc;
+uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap);
+uint8_t *buf = NULL;
+uint64_t i, tab_size =
+size_to_clusters(s,
+bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size));
+
+if (tab_size != bitmap_table_size || tab_size > BME_MAX_TABLE_SIZE) {
+return -EINVAL;
+}
+
+buf = g_malloc(s->cluster_size);
+sbc = sectors_covered_by_bitmap_cluster(s, bitmap);
+for (i = 0, sector = 0; i < tab_size; ++i, sector += sbc) {
+uint64_t count = MIN(bm_size - sector, sbc);
+uint64_t entry = bitmap_table[i];
+uint64_t offset = entry & BME_TABLE_ENTRY_OFFSET_MASK;
+
+assert(check_table_entry(entry, s->cluster_size) == 0);
+
+if (offset == 0) {
+if (entry & BME_TABLE_ENTRY_FLAG_ALL_ONES) {
+bdrv_dirty_bitmap_deserialize_ones(bitmap, sector, count,
+   false);
+} else {
+/* No need to deserialize zeros because the dirty bitmap is
+ * already cleared */
+}
+} else {
+ret = bdrv_pread(bs->file, offset, buf, s->cluster_size);
+if (ret < 0) {
+goto finish;
+}
+bdrv_dirty_bitmap_deserialize_part(bitmap, buf, sector, count,
+   false);
+}
+}
+ret = 0;
+
+bdrv_dirty_bitmap_deserialize_finish(bitmap);
+
+finish:
+g_free(buf);
+
+return ret;
+}
+
+static BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs,
+Qcow2Bitmap *bm, Error **errp)
+{
+int ret;
+uint64_t *bitmap_table = NULL;
+uint32_t granularity;
+BdrvDirtyBitmap *bitmap = NULL;
+
+if (bm->flags & BME_FLAG_IN_USE) {
+error_setg(errp, "Bitmap '%s' is in use", bm->name);
+goto fail;
+}
+
+ret = bitmap_table_load(bs, >table, _table);
+if (ret < 0) {
+error_setg_errno(errp, -ret,
+ "Could not read bitmap_table table from image for "
+ "bitmap '%s'", bm->name);
+goto fail;
+}
+
+granularity = 1U << bm->granularity_bits;
+bitmap = bdrv_create_dirty_bitmap(bs, granularity, bm->name, errp);
+if (bitmap == NULL) {
+goto fail;
+}
+
+ret = load_bitmap_data(bs, bitmap_table, bm->table.size, bi

[Qemu-block] [PATCH v22 06/30] block/dirty-bitmap: add deserialize_ones func

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Add bdrv_dirty_bitmap_deserialize_ones() function, which is needed for
qcow2 bitmap loading, to handle unallocated bitmap parts, marked as
all-ones.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Kevin Wolf <kw...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block/dirty-bitmap.c |  7 +++
 include/block/dirty-bitmap.h |  3 +++
 include/qemu/hbitmap.h   | 15 +++
 util/hbitmap.c   | 17 +
 4 files changed, 42 insertions(+)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index df0110cf9f..f502c45a70 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -586,6 +586,13 @@ void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap 
*bitmap,
 hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish);
 }
 
+void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap,
+uint64_t start, uint64_t count,
+bool finish)
+{
+hbitmap_deserialize_ones(bitmap->bitmap, start, count, finish);
+}
+
 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
 {
 hbitmap_deserialize_finish(bitmap->bitmap);
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index ab89f08e3d..05451c727d 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -66,6 +66,9 @@ void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap 
*bitmap,
 void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
   uint64_t start, uint64_t count,
   bool finish);
+void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap,
+uint64_t start, uint64_t count,
+bool finish);
 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
 
 /* Functions that require manual locking.  */
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 6b04391266..b52304ac29 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -229,6 +229,21 @@ void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t 
start, uint64_t count,
 bool finish);
 
 /**
+ * hbitmap_deserialize_ones
+ * @hb: HBitmap to operate on.
+ * @start: First bit to restore.
+ * @count: Number of bits to restore.
+ * @finish: Whether to call hbitmap_deserialize_finish automatically.
+ *
+ * Fills the bitmap with ones.
+ *
+ * If @finish is false, caller must call hbitmap_serialize_finish before using
+ * the bitmap.
+ */
+void hbitmap_deserialize_ones(HBitmap *hb, uint64_t start, uint64_t count,
+  bool finish);
+
+/**
  * hbitmap_deserialize_finish
  * @hb: HBitmap to operate on.
  *
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 0b38817505..0c1591a594 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -551,6 +551,23 @@ void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t 
start, uint64_t count,
 }
 }
 
+void hbitmap_deserialize_ones(HBitmap *hb, uint64_t start, uint64_t count,
+  bool finish)
+{
+uint64_t el_count;
+unsigned long *first;
+
+if (!count) {
+return;
+}
+serialization_chunk(hb, start, count, , _count);
+
+memset(first, 0xff, el_count * sizeof(unsigned long));
+if (finish) {
+hbitmap_deserialize_finish(hb);
+}
+}
+
 void hbitmap_deserialize_finish(HBitmap *bitmap)
 {
 int64_t i, size, prev_size;
-- 
2.11.1

[Qemu-block] [PATCH v22 12/30] block: refactor bdrv_reopen_commit

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Add bs local variable to simplify code.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/block.c b/block.c
index 694396281b..37d68e3276 100644
--- a/block.c
+++ b/block.c
@@ -2989,9 +2989,11 @@ error:
 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
 {
 BlockDriver *drv;
+BlockDriverState *bs;
 
 assert(reopen_state != NULL);
-drv = reopen_state->bs->drv;
+bs = reopen_state->bs;
+drv = bs->drv;
 assert(drv != NULL);
 
 /* If there are any driver level actions to take */
@@ -3000,13 +3002,13 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)
 }
 
 /* set BDS specific flags now */
-QDECREF(reopen_state->bs->explicit_options);
+QDECREF(bs->explicit_options);
 
-reopen_state->bs->explicit_options   = reopen_state->explicit_options;
-reopen_state->bs->open_flags = reopen_state->flags;
-reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
+bs->explicit_options   = reopen_state->explicit_options;
+bs->open_flags = reopen_state->flags;
+bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
 
-bdrv_refresh_limits(reopen_state->bs, NULL);
+bdrv_refresh_limits(bs, NULL);
 }
 
 /*
-- 
2.11.1

[Qemu-block] [PATCH v22 28/30] qcow2: add .bdrv_remove_persistent_dirty_bitmap

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Realize .bdrv_remove_persistent_dirty_bitmap interface.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block/qcow2-bitmap.c | 41 +
 block/qcow2.c|  1 +
 block/qcow2.h|  3 +++
 3 files changed, 45 insertions(+)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index f45324e584..8448bec46d 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -1236,6 +1236,47 @@ static Qcow2Bitmap *find_bitmap_by_name(Qcow2BitmapList 
*bm_list,
 return NULL;
 }
 
+void qcow2_remove_persistent_dirty_bitmap(BlockDriverState *bs,
+  const char *name,
+  Error **errp)
+{
+int ret;
+BDRVQcow2State *s = bs->opaque;
+Qcow2Bitmap *bm;
+Qcow2BitmapList *bm_list;
+
+if (s->nb_bitmaps == 0) {
+/* Absence of the bitmap is not an error: see explanation above
+ * bdrv_remove_persistent_dirty_bitmap() definition. */
+return;
+}
+
+bm_list = bitmap_list_load(bs, s->bitmap_directory_offset,
+   s->bitmap_directory_size, errp);
+if (bm_list == NULL) {
+return;
+}
+
+bm = find_bitmap_by_name(bm_list, name);
+if (bm == NULL) {
+goto fail;
+}
+
+QSIMPLEQ_REMOVE(bm_list, bm, Qcow2Bitmap, entry);
+
+ret = update_ext_header_and_dir(bs, bm_list);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "Failed to update bitmap extension");
+goto fail;
+}
+
+free_bitmap_clusters(bs, >table);
+
+fail:
+bitmap_free(bm);
+bitmap_list_free(bm_list);
+}
+
 void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp)
 {
 BdrvDirtyBitmap *bitmap;
diff --git a/block/qcow2.c b/block/qcow2.c
index fc1f69cead..b836b8c831 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3612,6 +3612,7 @@ BlockDriver bdrv_qcow2 = {
 
 .bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw,
 .bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap,
+.bdrv_remove_persistent_dirty_bitmap = 
qcow2_remove_persistent_dirty_bitmap,
 };
 
 static void bdrv_qcow2_init(void)
diff --git a/block/qcow2.h b/block/qcow2.h
index 8b2f66f8b6..ffb951df52 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -637,5 +637,8 @@ bool qcow2_can_store_new_dirty_bitmap(BlockDriverState *bs,
   const char *name,
   uint32_t granularity,
   Error **errp);
+void qcow2_remove_persistent_dirty_bitmap(BlockDriverState *bs,
+  const char *name,
+  Error **errp);
 
 #endif
-- 
2.11.1

[Qemu-block] [PATCH v22 29/30] qmp: block-dirty-bitmap-remove: remove persistent

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Remove persistent bitmap from the storage on block-dirty-bitmap-remove.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 blockdev.c   | 10 ++
 qapi/block-core.json |  3 ++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index 3c8fb75208..122a936719 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2783,6 +2783,7 @@ void qmp_block_dirty_bitmap_remove(const char *node, 
const char *name,
 {
 BlockDriverState *bs;
 BdrvDirtyBitmap *bitmap;
+Error *local_err = NULL;
 
 bitmap = block_dirty_bitmap_lookup(node, name, , errp);
 if (!bitmap || !bs) {
@@ -2795,6 +2796,15 @@ void qmp_block_dirty_bitmap_remove(const char *node, 
const char *name,
name);
 return;
 }
+
+if (bdrv_dirty_bitmap_get_persistance(bitmap)) {
+bdrv_remove_persistent_dirty_bitmap(bs, name, _err);
+if (local_err != NULL) {
+error_propagate(errp, local_err);
+return;
+}
+}
+
 bdrv_dirty_bitmap_make_anon(bitmap);
 bdrv_release_dirty_bitmap(bs, bitmap);
 }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 6ad8585400..e471efa1b4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1601,7 +1601,8 @@
 # @block-dirty-bitmap-remove:
 #
 # Stop write tracking and remove the dirty bitmap that was created
-# with block-dirty-bitmap-add.
+# with block-dirty-bitmap-add. If the bitmap is persistent, remove it from its
+# storage too.
 #
 # Returns: nothing on success
 #  If @node is not a valid block device or node, DeviceNotFound
-- 
2.11.1

[Qemu-block] [PATCH v22 24/30] qmp: add autoload parameter to block-dirty-bitmap-add

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Optional. Default is false.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Signed-off-by: Denis V. Lunev <d...@openvz.org>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 blockdev.c   | 18 --
 qapi/block-core.json |  6 +-
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 125acabc07..4bb7033994 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1974,6 +1974,7 @@ static void block_dirty_bitmap_add_prepare(BlkActionState 
*common,
 qmp_block_dirty_bitmap_add(action->node, action->name,
action->has_granularity, action->granularity,
action->has_persistent, action->persistent,
+   action->has_autoload, action->autoload,
_err);
 
 if (!local_err) {
@@ -2722,6 +2723,7 @@ out:
 void qmp_block_dirty_bitmap_add(const char *node, const char *name,
 bool has_granularity, uint32_t granularity,
 bool has_persistent, bool persistent,
+bool has_autoload, bool autoload,
 Error **errp)
 {
 BlockDriverState *bs;
@@ -2751,6 +2753,15 @@ void qmp_block_dirty_bitmap_add(const char *node, const 
char *name,
 if (!has_persistent) {
 persistent = false;
 }
+if (!has_autoload) {
+autoload = false;
+}
+
+if (has_autoload && !persistent) {
+error_setg(errp, "Autoload flag must be used only for persistent "
+ "bitmaps");
+return;
+}
 
 if (persistent &&
 !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp))
@@ -2759,9 +2770,12 @@ void qmp_block_dirty_bitmap_add(const char *node, const 
char *name,
 }
 
 bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp);
-if (bitmap != NULL) {
-bdrv_dirty_bitmap_set_persistance(bitmap, persistent);
+if (bitmap == NULL) {
+return;
 }
+
+bdrv_dirty_bitmap_set_persistance(bitmap, persistent);
+bdrv_dirty_bitmap_set_autoload(bitmap, autoload);
 }
 
 void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 13f98ec146..5c42cc7790 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1566,11 +1566,15 @@
 #  Qcow2 disks support persistent bitmaps. Default is false for
 #  block-dirty-bitmap-add. (Since: 2.10)
 #
+# @autoload: the bitmap will be automatically loaded when the image it is 
stored
+#in is opened. This flag may only be specified for persistent
+#bitmaps. Default is false for block-dirty-bitmap-add. (Since: 
2.10)
+#
 # Since: 2.4
 ##
 { 'struct': 'BlockDirtyBitmapAdd',
   'data': { 'node': 'str', 'name': 'str', '*granularity': 'uint32',
-'*persistent': 'bool' } }
+'*persistent': 'bool', '*autoload': 'bool' } }
 
 ##
 # @block-dirty-bitmap-add:
-- 
2.11.1

[Qemu-block] [PATCH v22 02/30] specs/qcow2: do not use wording 'bitmap header'

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

A bitmap directory entry is sometimes called a 'bitmap header'. This
patch leaves only one name - 'bitmap directory entry'. The name 'bitmap
header' creates misunderstandings with 'qcow2 header' and 'qcow2 bitmap
header extension' (which is extension of qcow2 header)

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Eric Blake <ebl...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 docs/interop/qcow2.txt | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index dda53dd2a3..8874e8c774 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -201,7 +201,7 @@ The fields of the bitmaps extension are:
 
   8 - 15:  bitmap_directory_size
Size of the bitmap directory in bytes. It is the cumulative
-   size of all (nb_bitmaps) bitmap headers.
+   size of all (nb_bitmaps) bitmap directory entries.
 
  16 - 23:  bitmap_directory_offset
Offset into the image file at which the bitmap directory
@@ -426,8 +426,7 @@ Each bitmap saved in the image is described in a bitmap 
directory entry. The
 bitmap directory is a contiguous area in the image file, whose starting offset
 and length are given by the header extension fields bitmap_directory_offset and
 bitmap_directory_size. The entries of the bitmap directory have variable
-length, depending on the lengths of the bitmap name and extra data. These
-entries are also called bitmap headers.
+length, depending on the lengths of the bitmap name and extra data.
 
 Structure of a bitmap directory entry:
 
-- 
2.11.1

[Qemu-block] [PATCH v22 21/30] block: add bdrv_can_store_new_dirty_bitmap

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

This will be needed to check some restrictions before making bitmap
persistent in qmp-block-dirty-bitmap-add (this functionality will be
added by future patch)

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block.c   | 22 ++
 include/block/block.h |  3 +++
 include/block/block_int.h |  4 
 3 files changed, 29 insertions(+)

diff --git a/block.c b/block.c
index c649afec91..b2719bceff 100644
--- a/block.c
+++ b/block.c
@@ -4954,3 +4954,25 @@ void bdrv_del_child(BlockDriverState *parent_bs, 
BdrvChild *child, Error **errp)
 
 parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
 }
+
+bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+ uint32_t granularity, Error **errp)
+{
+BlockDriver *drv = bs->drv;
+
+if (!drv) {
+error_setg_errno(errp, ENOMEDIUM,
+ "Can't store persistent bitmaps to %s",
+ bdrv_get_device_or_node_name(bs));
+return false;
+}
+
+if (!drv->bdrv_can_store_new_dirty_bitmap) {
+error_setg_errno(errp, ENOTSUP,
+ "Can't store persistent bitmaps to %s",
+ bdrv_get_device_or_node_name(bs));
+return false;
+}
+
+return drv->bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp);
+}
diff --git a/include/block/block.h b/include/block/block.h
index a4f09df95a..1daf9a0882 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -630,4 +630,7 @@ void bdrv_add_child(BlockDriverState *parent, 
BlockDriverState *child,
 Error **errp);
 void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
 
+bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+ uint32_t granularity, Error **errp);
+
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 4ad8eec2dd..009b4d41df 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -387,6 +387,10 @@ struct BlockDriver {
  * field of BlockDirtyBitmap's in case of success.
  */
 int (*bdrv_reopen_bitmaps_rw)(BlockDriverState *bs, Error **errp);
+bool (*bdrv_can_store_new_dirty_bitmap)(BlockDriverState *bs,
+const char *name,
+uint32_t granularity,
+Error **errp);
 
 QLIST_ENTRY(BlockDriver) list;
 };
-- 
2.11.1

[Qemu-block] [PATCH v22 08/30] qcow2: add bitmaps extension

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Add bitmap extension as specified in docs/specs/qcow2.txt.
For now, just mirror extension header into Qcow2 state and check
constraints. Also, calculate refcounts for qcow2 bitmaps, to not break
qemu-img check.

For now, disable image resize if it has bitmaps. It will be fixed later.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block/Makefile.objs|   2 +-
 block/qcow2-bitmap.c   | 439 +
 block/qcow2-refcount.c |   6 +
 block/qcow2.c  | 124 +-
 block/qcow2.h  |  27 +++
 5 files changed, 592 insertions(+), 6 deletions(-)
 create mode 100644 block/qcow2-bitmap.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index ea955302c8..9efc6c49ea 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,5 +1,5 @@
 block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o 
dmg.o
-block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
+block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o qcow2-bitmap.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
new file mode 100644
index 00..b8e472b3e8
--- /dev/null
+++ b/block/qcow2-bitmap.c
@@ -0,0 +1,439 @@
+/*
+ * Bitmaps for the QCOW version 2 format
+ *
+ * Copyright (c) 2014-2017 Vladimir Sementsov-Ogievskiy
+ *
+ * This file is derived from qcow2-snapshot.c, original copyright:
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+
+#include "block/block_int.h"
+#include "block/qcow2.h"
+
+/* NOTICE: BME here means Bitmaps Extension and used as a namespace for
+ * _internal_ constants. Please do not use this _internal_ abbreviation for
+ * other needs and/or outside of this file. */
+
+/* Bitmap directory entry constraints */
+#define BME_MAX_TABLE_SIZE 0x800
+#define BME_MAX_PHYS_SIZE 0x2000 /* restrict BdrvDirtyBitmap size in RAM */
+#define BME_MAX_GRANULARITY_BITS 31
+#define BME_MIN_GRANULARITY_BITS 9
+#define BME_MAX_NAME_SIZE 1023
+
+/* Bitmap directory entry flags */
+#define BME_RESERVED_FLAGS 0xfffcU
+
+/* bits [1, 8] U [56, 63] are reserved */
+#define BME_TABLE_ENTRY_RESERVED_MASK 0xff0001feULL
+#define BME_TABLE_ENTRY_OFFSET_MASK 0x00fffe00ULL
+#define BME_TABLE_ENTRY_FLAG_ALL_ONES (1ULL << 0)
+
+typedef struct QEMU_PACKED Qcow2BitmapDirEntry {
+/* header is 8 byte aligned */
+uint64_t bitmap_table_offset;
+
+uint32_t bitmap_table_size;
+uint32_t flags;
+
+uint8_t type;
+uint8_t granularity_bits;
+uint16_t name_size;
+uint32_t extra_data_size;
+/* extra data follows  */
+/* name follows  */
+} Qcow2BitmapDirEntry;
+
+typedef struct Qcow2BitmapTable {
+uint64_t offset;
+uint32_t size; /* number of 64bit entries */
+QSIMPLEQ_ENTRY(Qcow2BitmapTable) entry;
+} Qcow2BitmapTable;
+
+typedef struct Qcow2Bitmap {
+Qcow2BitmapTable table;
+uint32_t flags;
+uint8_t granularity_bits;
+char *name;
+
+QSIMPLEQ_ENTRY(Qcow2Bitmap) entry;
+} Qcow2Bitmap;
+typedef QSIMPLEQ_HEAD(Qcow2BitmapList, Qcow2Bitmap) Qcow2BitmapList;
+
+typedef enum BitmapType {
+BT_DIRTY_TRACKING_BITMAP = 1
+} BitmapType;
+
+static int check_table_entry(uint64_t entry, int cluster_size)
+{
+uint64_t offset;
+
+if (entry & BME_TABLE_ENTRY_RESERVED_MASK) {
+return -EINVAL;
+}
+
+offset = entry & BME_TABLE_ENTRY_OFFSET_MASK;
+if (offset != 0) {
+/* if offset specified, bit 0 is r

[Qemu-block] [PATCH v22 17/30] block: introduce persistent dirty bitmaps

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

New field BdrvDirtyBitmap.persistent means, that bitmap should be saved
by format driver in .bdrv_close and .bdrv_inactivate. No format driver
supports it for now.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/dirty-bitmap.c | 29 +
 block/qcow2-bitmap.c |  1 +
 include/block/dirty-bitmap.h |  4 
 3 files changed, 34 insertions(+)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 06dc7a3ac9..3c17c452ae 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -54,6 +54,7 @@ struct BdrvDirtyBitmap {
this flag is set. */
 bool autoload;  /* For persistent bitmaps: bitmap must be
autoloaded on image opening */
+bool persistent;/* bitmap must be saved to owner disk image */
 QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
 
@@ -102,6 +103,7 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
 assert(!bdrv_dirty_bitmap_frozen(bitmap));
 g_free(bitmap->name);
 bitmap->name = NULL;
+bitmap->persistent = false;
 bitmap->autoload = false;
 }
 
@@ -299,6 +301,8 @@ BdrvDirtyBitmap 
*bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
 bitmap->name = NULL;
 successor->name = name;
 bitmap->successor = NULL;
+successor->persistent = bitmap->persistent;
+bitmap->persistent = false;
 successor->autoload = bitmap->autoload;
 bitmap->autoload = false;
 bdrv_release_dirty_bitmap(bs, bitmap);
@@ -689,3 +693,28 @@ bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap 
*bitmap)
 {
 return bitmap->autoload;
 }
+
+/* Called with BQL taken. */
+void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap, bool 
persistent)
+{
+qemu_mutex_lock(bitmap->mutex);
+bitmap->persistent = persistent;
+qemu_mutex_unlock(bitmap->mutex);
+}
+
+bool bdrv_dirty_bitmap_get_persistance(BdrvDirtyBitmap *bitmap)
+{
+return bitmap->persistent;
+}
+
+bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs)
+{
+BdrvDirtyBitmap *bm;
+QLIST_FOREACH(bm, >dirty_bitmaps, list) {
+if (bm->persistent && !bm->readonly) {
+return true;
+}
+}
+
+return false;
+}
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index ee6d8f75a9..52e4616b8c 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -794,6 +794,7 @@ bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState 
*bs, Error **errp)
 goto fail;
 }
 
+bdrv_dirty_bitmap_set_persistance(bitmap, true);
 bdrv_dirty_bitmap_set_autoload(bitmap, true);
 bm->flags |= BME_FLAG_IN_USE;
 created_dirty_bitmaps =
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index e2fea12b94..3995789218 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -73,6 +73,8 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap 
*bitmap);
 
 void bdrv_dirty_bitmap_set_readonly(BdrvDirtyBitmap *bitmap, bool value);
 void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload);
+void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap *bitmap,
+bool persistent);
 
 /* Functions that require manual locking.  */
 void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap);
@@ -91,5 +93,7 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
 bool bdrv_dirty_bitmap_readonly(const BdrvDirtyBitmap *bitmap);
 bool bdrv_has_readonly_bitmaps(BlockDriverState *bs);
 bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap);
+bool bdrv_dirty_bitmap_get_persistance(BdrvDirtyBitmap *bitmap);
+bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs);
 
 #endif
-- 
2.11.1

[Qemu-block] [PATCH v22 09/30] block/dirty-bitmap: fix comment for BlockDirtyBitmap.disabled field

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: John Snow <js...@redhat.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
---
 block/dirty-bitmap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index f502c45a70..a8fe149c4a 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -43,7 +43,8 @@ struct BdrvDirtyBitmap {
 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
 char *name; /* Optional non-empty unique ID */
 int64_t size;   /* Size of the bitmap (Number of sectors) */
-bool disabled;  /* Bitmap is read-only */
+bool disabled;  /* Bitmap is disabled. It ignores all writes to
+   the device */
 int active_iterators;   /* How many iterators are active */
 QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
-- 
2.11.1

[Qemu-block] [PATCH v22 20/30] qcow2: store bitmaps on reopening image as read-only

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Store bitmaps and mark them read-only on reopening image as read-only.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
---
 block/qcow2-bitmap.c | 22 ++
 block/qcow2.c|  5 +
 block/qcow2.h|  1 +
 3 files changed, 28 insertions(+)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 5f53486b22..7912a82c8c 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -1365,3 +1365,25 @@ fail:
 
 bitmap_list_free(bm_list);
 }
+
+int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp)
+{
+BdrvDirtyBitmap *bitmap;
+Error *local_err = NULL;
+
+qcow2_store_persistent_dirty_bitmaps(bs, _err);
+if (local_err != NULL) {
+error_propagate(errp, local_err);
+return -EINVAL;
+}
+
+for (bitmap = bdrv_dirty_bitmap_next(bs, NULL); bitmap != NULL;
+ bitmap = bdrv_dirty_bitmap_next(bs, bitmap))
+{
+if (bdrv_dirty_bitmap_get_persistance(bitmap)) {
+bdrv_dirty_bitmap_set_readonly(bitmap, true);
+}
+}
+
+return 0;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index 365298d4ce..b68e04766f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1382,6 +1382,11 @@ static int qcow2_reopen_prepare(BDRVReopenState *state,
 
 /* We need to write out any unwritten data if we reopen read-only. */
 if ((state->flags & BDRV_O_RDWR) == 0) {
+ret = qcow2_reopen_bitmaps_ro(state->bs, errp);
+if (ret < 0) {
+goto fail;
+}
+
 ret = bdrv_flush(state->bs);
 if (ret < 0) {
 goto fail;
diff --git a/block/qcow2.h b/block/qcow2.h
index 0594551237..7d0a20c053 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -632,5 +632,6 @@ int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp);
 int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
 void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp);
+int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
 
 #endif
-- 
2.11.1

[Qemu-block] [PATCH v22 15/30] block/dirty-bitmap: add autoload field to BdrvDirtyBitmap

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Mirror AUTO flag from Qcow2 bitmap in BdrvDirtyBitmap. This will be
needed in future, to save this flag back to Qcow2 for persistent
bitmaps.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/dirty-bitmap.c | 18 ++
 block/qcow2-bitmap.c |  2 ++
 include/block/dirty-bitmap.h |  2 ++
 3 files changed, 22 insertions(+)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 17d3068336..06dc7a3ac9 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -52,6 +52,8 @@ struct BdrvDirtyBitmap {
Such operations must fail and both the image
and this bitmap must remain unchanged while
this flag is set. */
+bool autoload;  /* For persistent bitmaps: bitmap must be
+   autoloaded on image opening */
 QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
 
@@ -100,6 +102,7 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
 assert(!bdrv_dirty_bitmap_frozen(bitmap));
 g_free(bitmap->name);
 bitmap->name = NULL;
+bitmap->autoload = false;
 }
 
 /* Called with BQL taken.  */
@@ -296,6 +299,8 @@ BdrvDirtyBitmap 
*bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
 bitmap->name = NULL;
 successor->name = name;
 bitmap->successor = NULL;
+successor->autoload = bitmap->autoload;
+bitmap->autoload = false;
 bdrv_release_dirty_bitmap(bs, bitmap);
 
 return successor;
@@ -671,3 +676,16 @@ bool bdrv_has_readonly_bitmaps(BlockDriverState *bs)
 
 return false;
 }
+
+/* Called with BQL taken. */
+void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload)
+{
+qemu_mutex_lock(bitmap->mutex);
+bitmap->autoload = autoload;
+qemu_mutex_unlock(bitmap->mutex);
+}
+
+bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap)
+{
+return bitmap->autoload;
+}
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index a21fab8ce8..ee6d8f75a9 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -793,6 +793,8 @@ bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState 
*bs, Error **errp)
 if (bitmap == NULL) {
 goto fail;
 }
+
+bdrv_dirty_bitmap_set_autoload(bitmap, true);
 bm->flags |= BME_FLAG_IN_USE;
 created_dirty_bitmaps =
 g_slist_append(created_dirty_bitmaps, bitmap);
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index cb43fa37e2..e2fea12b94 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -72,6 +72,7 @@ void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap 
*bitmap,
 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
 
 void bdrv_dirty_bitmap_set_readonly(BdrvDirtyBitmap *bitmap, bool value);
+void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload);
 
 /* Functions that require manual locking.  */
 void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap);
@@ -89,5 +90,6 @@ int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
 void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
 bool bdrv_dirty_bitmap_readonly(const BdrvDirtyBitmap *bitmap);
 bool bdrv_has_readonly_bitmaps(BlockDriverState *bs);
+bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap);
 
 #endif
-- 
2.11.1

[Qemu-block] [PATCH v22 22/30] qcow2: add .bdrv_can_store_new_dirty_bitmap

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Realize .bdrv_can_store_new_dirty_bitmap interface.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: John Snow <js...@redhat.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
---
 block/qcow2-bitmap.c | 51 +++
 block/qcow2.c|  1 +
 block/qcow2.h|  4 
 3 files changed, 56 insertions(+)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 7912a82c8c..f45324e584 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -1387,3 +1387,54 @@ int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error 
**errp)
 
 return 0;
 }
+
+bool qcow2_can_store_new_dirty_bitmap(BlockDriverState *bs,
+  const char *name,
+  uint32_t granularity,
+  Error **errp)
+{
+BDRVQcow2State *s = bs->opaque;
+bool found;
+Qcow2BitmapList *bm_list;
+
+if (check_constraints_on_bitmap(bs, name, granularity, errp) != 0) {
+goto fail;
+}
+
+if (s->nb_bitmaps == 0) {
+return true;
+}
+
+if (s->nb_bitmaps >= QCOW2_MAX_BITMAPS) {
+error_setg(errp,
+   "Maximum number of persistent bitmaps is already reached");
+goto fail;
+}
+
+if (s->bitmap_directory_size + calc_dir_entry_size(strlen(name), 0) >
+QCOW2_MAX_BITMAP_DIRECTORY_SIZE)
+{
+error_setg(errp, "Not enough space in the bitmap directory");
+goto fail;
+}
+
+bm_list = bitmap_list_load(bs, s->bitmap_directory_offset,
+   s->bitmap_directory_size, errp);
+if (bm_list == NULL) {
+goto fail;
+}
+
+found = find_bitmap_by_name(bm_list, name);
+bitmap_list_free(bm_list);
+if (found) {
+error_setg(errp, "Bitmap with the same name is already stored");
+goto fail;
+}
+
+return true;
+
+fail:
+error_prepend(errp, "Can't make bitmap '%s' persistent in '%s': ",
+  name, bdrv_get_device_or_node_name(bs));
+return false;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index b68e04766f..fc1f69cead 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3611,6 +3611,7 @@ BlockDriver bdrv_qcow2 = {
 .bdrv_attach_aio_context  = qcow2_attach_aio_context,
 
 .bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw,
+.bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap,
 };
 
 static void bdrv_qcow2_init(void)
diff --git a/block/qcow2.h b/block/qcow2.h
index 7d0a20c053..8b2f66f8b6 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -633,5 +633,9 @@ bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState 
*bs, Error **errp);
 int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
 void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp);
 int qcow2_reopen_bitmaps_ro(BlockDriverState *bs, Error **errp);
+bool qcow2_can_store_new_dirty_bitmap(BlockDriverState *bs,
+  const char *name,
+  uint32_t granularity,
+  Error **errp);
 
 #endif
-- 
2.11.1

[Qemu-block] [PATCH v22 13/30] block: new bdrv_reopen_bitmaps_rw interface

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Add format driver handler, which should mark loaded read-only
bitmaps as 'IN_USE' in the image and unset read_only field in
corresponding BdrvDirtyBitmap's.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block.c   | 19 +++
 include/block/block_int.h |  7 +++
 2 files changed, 26 insertions(+)

diff --git a/block.c b/block.c
index 37d68e3276..3f83da178d 100644
--- a/block.c
+++ b/block.c
@@ -2990,12 +2990,16 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)
 {
 BlockDriver *drv;
 BlockDriverState *bs;
+bool old_can_write, new_can_write;
 
 assert(reopen_state != NULL);
 bs = reopen_state->bs;
 drv = bs->drv;
 assert(drv != NULL);
 
+old_can_write =
+!bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE);
+
 /* If there are any driver level actions to take */
 if (drv->bdrv_reopen_commit) {
 drv->bdrv_reopen_commit(reopen_state);
@@ -3009,6 +3013,21 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)
 bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
 
 bdrv_refresh_limits(bs, NULL);
+
+new_can_write =
+!bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE);
+if (!old_can_write && new_can_write && drv->bdrv_reopen_bitmaps_rw) {
+Error *local_err = NULL;
+if (drv->bdrv_reopen_bitmaps_rw(bs, _err) < 0) {
+/* This is not fatal, bitmaps just left read-only, so all following
+ * writes will fail. User can remove read-only bitmaps to unblock
+ * writes.
+ */
+error_reportf_err(local_err,
+  "%s: Failed to make dirty bitmaps writable: ",
+  bdrv_get_node_name(bs));
+}
+}
 }
 
 /*
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 748970055e..4ad8eec2dd 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -381,6 +381,13 @@ struct BlockDriver {
  uint64_t parent_perm, uint64_t parent_shared,
  uint64_t *nperm, uint64_t *nshared);
 
+/**
+ * Bitmaps should be marked as 'IN_USE' in the image on reopening image
+ * as rw. This handler should realize it. It also should unset readonly
+ * field of BlockDirtyBitmap's in case of success.
+ */
+int (*bdrv_reopen_bitmaps_rw)(BlockDriverState *bs, Error **errp);
+
 QLIST_ENTRY(BlockDriver) list;
 };
 
-- 
2.11.1

[Qemu-block] [PATCH v22 14/30] qcow2: support .bdrv_reopen_bitmaps_rw

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Realize bdrv_reopen_bitmaps_rw interface.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: John Snow <js...@redhat.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
---
 block/qcow2-bitmap.c | 61 
 block/qcow2.c|  2 ++
 block/qcow2.h|  1 +
 3 files changed, 64 insertions(+)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 2c7b057e21..a21fab8ce8 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -826,3 +826,64 @@ fail:
 
 return false;
 }
+
+int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp)
+{
+BDRVQcow2State *s = bs->opaque;
+Qcow2BitmapList *bm_list;
+Qcow2Bitmap *bm;
+GSList *ro_dirty_bitmaps = NULL;
+int ret = 0;
+
+if (s->nb_bitmaps == 0) {
+/* No bitmaps - nothing to do */
+return 0;
+}
+
+if (!can_write(bs)) {
+error_setg(errp, "Can't write to the image on reopening bitmaps rw");
+return -EINVAL;
+}
+
+bm_list = bitmap_list_load(bs, s->bitmap_directory_offset,
+   s->bitmap_directory_size, errp);
+if (bm_list == NULL) {
+return -EINVAL;
+}
+
+QSIMPLEQ_FOREACH(bm, bm_list, entry) {
+if (!(bm->flags & BME_FLAG_IN_USE)) {
+BdrvDirtyBitmap *bitmap = bdrv_find_dirty_bitmap(bs, bm->name);
+if (bitmap == NULL) {
+continue;
+}
+
+if (!bdrv_dirty_bitmap_readonly(bitmap)) {
+error_setg(errp, "Bitmap %s is not readonly but not marked"
+ "'IN_USE' in the image. Something went wrong,"
+ "all the bitmaps may be corrupted", bm->name);
+ret = -EINVAL;
+goto out;
+}
+
+bm->flags |= BME_FLAG_IN_USE;
+ro_dirty_bitmaps = g_slist_append(ro_dirty_bitmaps, bitmap);
+}
+}
+
+if (ro_dirty_bitmaps != NULL) {
+/* in_use flags must be updated */
+ret = update_ext_header_and_dir_in_place(bs, bm_list);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "Can't update bitmap directory");
+goto out;
+}
+g_slist_foreach(ro_dirty_bitmaps, set_readonly_helper, false);
+}
+
+out:
+g_slist_free(ro_dirty_bitmaps);
+bitmap_list_free(bm_list);
+
+return ret;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index 92e8ff064d..8f070b12a2 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3595,6 +3595,8 @@ BlockDriver bdrv_qcow2 = {
 
 .bdrv_detach_aio_context  = qcow2_detach_aio_context,
 .bdrv_attach_aio_context  = qcow2_attach_aio_context,
+
+.bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw,
 };
 
 static void bdrv_qcow2_init(void)
diff --git a/block/qcow2.h b/block/qcow2.h
index 67c61de008..3e23bb7361 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -630,5 +630,6 @@ int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
   void **refcount_table,
   int64_t *refcount_table_size);
 bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp);
+int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
 
 #endif
-- 
2.11.1

[Qemu-block] [PATCH v22 00/30] qcow2: persistent dirty bitmaps

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

 possibility of creating persistent bitmap 
with
  specified name and granularity in specified BDS

16-17: spelling, rewording, indenting, tiny code simplifying, check can_store 
(by 14-15),
s/if (autoload && !persistent)/if (has_autoload && !persistent)/,
rebase (deleted qmp-commands.hx)

18: alternative to md5 in query-block:
  - separated qmp command -x-debug-block-dirty-bitmap-sha256
  - as adviced by Daniel P. Berrange in my parallel thread:
- sha256 instead of md5
- use qcrypto_hash_* instead of GChecksum
  - fix bug =) (size was wrong in hbitmap_md5)

19: s/3999/3fff/, use x-debug-block-dirty-bitmap-sha256

20: new patch to rename and publish inc_refcounts

21: some fixes and refactoring mostyly by Max's comments.

Max, Eric, great tanks for your review!
I hope, I've covered most of your comments by this update.

TODO:
- handle reopening image RO->RW and incoming migration, set IN_USE for existing 
loaded bitmaps
  in these cases.
- reuse old, already allocated data clusters for bitmaps storing
- truncate bitmaps in the image on truncate


v7:

https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=refs%2Ftags%2Fqcow2-bitmap-v7
based on block-next (https://github.com/XanClic/qemu/commits/block-next)

- a lot of refactoring and reordering of patches.
- dead code removed (bdrv_dirty_bitmap_load, etc.)
- do not maintain extra data for now
- do not store dirty bitmap directory in memory
  (as we use it seldom, we can just reread if needed)

By Kevin's review:
01 - commit message changed: fix->improvement (as it was not a bug)
03 - r-b
04 - r-b
05 - add 21 patch to fix spec, also, removed all (I hope) mentions of
 "Bitmap Header", switch to one unified name for it - "Bitmap
 Directory Entry", to avoid misunderstanding with Qcow2 header.
 (also, add patch 22, to fix it in spec)
v6.06 - improve for_each_dir_entry loop, reorder patches, other small fixes
v6.07 ~> v7.09 - dead code removed, I've moved to one function 
.bdrv_store_persistent_bitmaps and have wrapper and callback in one
patch (with also some other staff. If it not ok, I can split them)
v6.08 - about keeping bitmap directory instead of bitmap list: no I don't keep
it at all.
v6.16 - old bdrv_ bitmap-storing related functions are removed. The new one is
bdrv_store_persistent_bitmaps.


v6:
https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=refs%2Ftags%2Fqcow2-bitmap-v6
based on block-next (https://github.com/XanClic/qemu/commits/block-next)

There are a lot of changes, reorderings and additions in comparement with v5.
One principal thing: now bitmaps are removed from image after loading instead
of marking them in_use. It is simpler and we do not need to store superfluous 
data.
Also, we are no more interested in command line interface to dirty bitmaps.
So it is dropped.  If someone needs it I can add it later.

Vladimir Sementsov-Ogievskiy (30):
  specs/qcow2: fix bitmap granularity qemu-specific note
  specs/qcow2: do not use wording 'bitmap header'
  hbitmap: improve dirty iter
  tests: add hbitmap iter test
  block: fix bdrv_dirty_bitmap_granularity signature
  block/dirty-bitmap: add deserialize_ones func
  qcow2-refcount: rename inc_refcounts() and make it public
  qcow2: add bitmaps extension
  block/dirty-bitmap: fix comment for BlockDirtyBitmap.disabled field
  block/dirty-bitmap: add readonly field to BdrvDirtyBitmap
  qcow2: autoloading dirty bitmaps
  block: refactor bdrv_reopen_commit
  block: new bdrv_reopen_bitmaps_rw interface
  qcow2: support .bdrv_reopen_bitmaps_rw
  block/dirty-bitmap: add autoload field to BdrvDirtyBitmap
  block: bdrv_close: release bitmaps after drv->bdrv_close
  block: introduce persistent dirty bitmaps
  block/dirty-bitmap: add bdrv_dirty_bitmap_next()
  qcow2: add persistent dirty bitmaps support
  qcow2: store bitmaps on reopening image as read-only
  block: add bdrv_can_store_new_dirty_bitmap
  qcow2: add .bdrv_can_store_new_dirty_bitmap
  qmp: add persistent flag to block-dirty-bitmap-add
  qmp: add autoload parameter to block-dirty-bitmap-add
  qmp: add x-debug-block-dirty-bitmap-sha256
  iotests: test qcow2 persistent dirty bitmap
  block/dirty-bitmap: add bdrv_remove_persistent_dirty_bitmap
  qcow2: add .bdrv_remove_persistent_dirty_bitmap
  qmp: block-dirty-bitmap-remove: remove persistent
  block: release persistent bitmaps on inactivate

 block.c  |   65 +-
 block/Makefile.objs  |2 +-
 block/dirty-bitmap.c |  154 -
 block/io.c   |8 +
 block/qcow2-bitmap.c | 1481 ++
 block/qcow2-refcount.c   |   59 +-
 block/qcow2.c|  155 -
 block/qcow2.h|   43 ++
 blockdev.c   |   73 ++-
 docs/interop/qcow2.txt   |8 +-
 include/block/block.h|3 +
 include/block/block_int.h|   14 +
 include/block/dirty-bitmap.h

[Qemu-block] [PATCH v22 10/30] block/dirty-bitmap: add readonly field to BdrvDirtyBitmap

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

It will be needed in following commits for persistent bitmaps.
If bitmap is loaded from read-only storage (and we can't mark it
"in use" in this storage) corresponding BdrvDirtyBitmap should be
read-only.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/dirty-bitmap.c | 36 
 block/io.c   |  8 
 blockdev.c   |  6 ++
 include/block/dirty-bitmap.h |  4 
 4 files changed, 54 insertions(+)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index a8fe149c4a..17d3068336 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -46,6 +46,12 @@ struct BdrvDirtyBitmap {
 bool disabled;  /* Bitmap is disabled. It ignores all writes to
the device */
 int active_iterators;   /* How many iterators are active */
+bool readonly;  /* Bitmap is read-only. This field also
+   prevents the respective image from being
+   modified (i.e. blocks writes and discards).
+   Such operations must fail and both the image
+   and this bitmap must remain unchanged while
+   this flag is set. */
 QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
 
@@ -505,6 +511,7 @@ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
   int64_t cur_sector, int64_t nr_sectors)
 {
 assert(bdrv_dirty_bitmap_enabled(bitmap));
+assert(!bdrv_dirty_bitmap_readonly(bitmap));
 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
 }
 
@@ -521,6 +528,7 @@ void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
 int64_t cur_sector, int64_t nr_sectors)
 {
 assert(bdrv_dirty_bitmap_enabled(bitmap));
+assert(!bdrv_dirty_bitmap_readonly(bitmap));
 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
 }
 
@@ -535,6 +543,7 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
 {
 assert(bdrv_dirty_bitmap_enabled(bitmap));
+assert(!bdrv_dirty_bitmap_readonly(bitmap));
 bdrv_dirty_bitmap_lock(bitmap);
 if (!out) {
 hbitmap_reset_all(bitmap->bitmap);
@@ -551,6 +560,7 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, 
HBitmap *in)
 {
 HBitmap *tmp = bitmap->bitmap;
 assert(bdrv_dirty_bitmap_enabled(bitmap));
+assert(!bdrv_dirty_bitmap_readonly(bitmap));
 bitmap->bitmap = in;
 hbitmap_free(tmp);
 }
@@ -613,6 +623,7 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t 
cur_sector,
 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
 continue;
 }
+assert(!bdrv_dirty_bitmap_readonly(bitmap));
 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
 }
 bdrv_dirty_bitmaps_unlock(bs);
@@ -635,3 +646,28 @@ int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap)
 {
 return hbitmap_count(bitmap->meta);
 }
+
+bool bdrv_dirty_bitmap_readonly(const BdrvDirtyBitmap *bitmap)
+{
+return bitmap->readonly;
+}
+
+/* Called with BQL taken. */
+void bdrv_dirty_bitmap_set_readonly(BdrvDirtyBitmap *bitmap, bool value)
+{
+qemu_mutex_lock(bitmap->mutex);
+bitmap->readonly = value;
+qemu_mutex_unlock(bitmap->mutex);
+}
+
+bool bdrv_has_readonly_bitmaps(BlockDriverState *bs)
+{
+BdrvDirtyBitmap *bm;
+QLIST_FOREACH(bm, >dirty_bitmaps, list) {
+if (bm->readonly) {
+return true;
+}
+}
+
+return false;
+}
diff --git a/block/io.c b/block/io.c
index 91611ffb2a..49057f19af 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1343,6 +1343,10 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild 
*child,
 uint64_t bytes_remaining = bytes;
 int max_transfer;
 
+if (bdrv_has_readonly_bitmaps(bs)) {
+return -EPERM;
+}
+
 assert(is_power_of_2(align));
 assert((offset & (align - 1)) == 0);
 assert((bytes & (align - 1)) == 0);
@@ -2435,6 +2439,10 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, 
int64_t offset,
 return -ENOMEDIUM;
 }
 
+if (bdrv_has_readonly_bitmaps(bs)) {
+return -EPERM;
+}
+
 ret = bdrv_check_byte_request(bs, offset, count);
 if (ret < 0) {
 return ret;
diff --git a/blockdev.c b/blockdev.c
index f92dcf24bf..64e03c0caf 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2023,6 +2023,9 @@ static void 
block_dirty_bitmap_clear_prepare(BlkActionState *common,
 } else if (!bdrv_dirty_bitmap_enabled(state->bitmap)) {
 error_setg(errp, "Cannot clear a disabled bitmap");
 return;
+} else if (bdrv_dirty_bitmap_readonly(state->bitmap)) {
+error_setg(errp, "Cannot clear a readonly bitmap");
+retur

[Qemu-block] [PATCH v22 23/30] qmp: add persistent flag to block-dirty-bitmap-add

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Add optional 'persistent' flag to qmp command block-dirty-bitmap-add.
Default is false.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Signed-off-by: Denis V. Lunev <d...@openvz.org>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 blockdev.c   | 18 +-
 qapi/block-core.json |  8 +++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 64e03c0caf..125acabc07 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1973,6 +1973,7 @@ static void block_dirty_bitmap_add_prepare(BlkActionState 
*common,
 /* AIO context taken and released within qmp_block_dirty_bitmap_add */
 qmp_block_dirty_bitmap_add(action->node, action->name,
action->has_granularity, action->granularity,
+   action->has_persistent, action->persistent,
_err);
 
 if (!local_err) {
@@ -2720,9 +2721,11 @@ out:
 
 void qmp_block_dirty_bitmap_add(const char *node, const char *name,
 bool has_granularity, uint32_t granularity,
+bool has_persistent, bool persistent,
 Error **errp)
 {
 BlockDriverState *bs;
+BdrvDirtyBitmap *bitmap;
 
 if (!name || name[0] == '\0') {
 error_setg(errp, "Bitmap name cannot be empty");
@@ -2745,7 +2748,20 @@ void qmp_block_dirty_bitmap_add(const char *node, const 
char *name,
 granularity = bdrv_get_default_bitmap_granularity(bs);
 }
 
-bdrv_create_dirty_bitmap(bs, granularity, name, errp);
+if (!has_persistent) {
+persistent = false;
+}
+
+if (persistent &&
+!bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp))
+{
+return;
+}
+
+bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp);
+if (bitmap != NULL) {
+bdrv_dirty_bitmap_set_persistance(bitmap, persistent);
+}
 }
 
 void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index f85c2235c7..13f98ec146 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1561,10 +1561,16 @@
 # @granularity: the bitmap granularity, default is 64k for
 #   block-dirty-bitmap-add
 #
+# @persistent: the bitmap is persistent, i.e. it will be saved to the
+#  corresponding block device image file on its close. For now only
+#  Qcow2 disks support persistent bitmaps. Default is false for
+#  block-dirty-bitmap-add. (Since: 2.10)
+#
 # Since: 2.4
 ##
 { 'struct': 'BlockDirtyBitmapAdd',
-  'data': { 'node': 'str', 'name': 'str', '*granularity': 'uint32' } }
+  'data': { 'node': 'str', 'name': 'str', '*granularity': 'uint32',
+'*persistent': 'bool' } }
 
 ##
 # @block-dirty-bitmap-add:
-- 
2.11.1

Re: [Qemu-block] [PATCH v22 13/30] block: new bdrv_reopen_bitmaps_rw interface

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

It is interesting, but I see this problem only in your answers, in my 
letters I see this white-space on its place.


28.06.2017 15:36, Eric Blake wrote:

[meta-comment]

On 06/28/2017 07:10 AM, Vladimir Sementsov-Ogievskiy wrote:

28.06.2017 15:05, Vladimir Sementsov-Ogievskiy wrote:

Add format driver handler, which should mark loaded read-only
bitmaps as 'IN_USE' in the image and unset read_only field in
corresponding BdrvDirtyBitmap's.

Signed-off-by: Vladimir Sementsov-Ogievskiy<vsement...@virtuozzo.com>
Reviewed-by: John Snow<js...@redhat.com>

Your original message had spaces before '<' in the email addresses, but
it got lost here...



Forgot to add:

Reviewed-by: Max Reitz<mre...@redhat.com>

...this one also lacks the space.  I'm not sure if git cares, but it may
be worth investigating why your mailer eats the space when you reply
manually rather than sending via git; and for consistency, it is worth
keeping the space (for example, we like to grep 'git log' for learning
how active various contributors are, and having a consistent usage of
space before < in an email address can make the task easier).



--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH v22 13/30] block: new bdrv_reopen_bitmaps_rw interface

2017-06-28 Thread Vladimir Sementsov-Ogievskiy


28.06.2017 16:02, Vladimir Sementsov-Ogievskiy wrote:
It is interesting, but I see this problem only in your answers, in my 
letters I see this white-space on its place.


In outgoing letter I see this white-space, but in letter from 
mailing-list it is absent.




28.06.2017 15:36, Eric Blake wrote:

[meta-comment]

On 06/28/2017 07:10 AM, Vladimir Sementsov-Ogievskiy wrote:

28.06.2017 15:05, Vladimir Sementsov-Ogievskiy wrote:

Add format driver handler, which should mark loaded read-only
bitmaps as 'IN_USE' in the image and unset read_only field in
corresponding BdrvDirtyBitmap's.

Signed-off-by: Vladimir Sementsov-Ogievskiy<vsement...@virtuozzo.com>
Reviewed-by: John Snow<js...@redhat.com>

Your original message had spaces before '<' in the email addresses, but
it got lost here...



Forgot to add:

Reviewed-by: Max Reitz<mre...@redhat.com>

...this one also lacks the space.  I'm not sure if git cares, but it may
be worth investigating why your mailer eats the space when you reply
manually rather than sending via git; and for consistency, it is worth
keeping the space (for example, we like to grep 'git log' for learning
how active various contributors are, and having a consistent usage of
space before < in an email address can make the task easier).





--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH v22 13/30] block: new bdrv_reopen_bitmaps_rw interface

2017-06-28 Thread Vladimir Sementsov-Ogievskiy


Finally, it looks like thunderbird bug
https://bugzilla.mozilla.org/show_bug.cgi?id=1160880

[sorry for so much offtopic]

28.06.2017 16:13, Vladimir Sementsov-Ogievskiy wrote:

28.06.2017 16:02, Vladimir Sementsov-Ogievskiy wrote:
It is interesting, but I see this problem only in your answers, in my 
letters I see this white-space on its place.


In outgoing letter I see this white-space, but in letter from 
mailing-list it is absent.




28.06.2017 15:36, Eric Blake wrote:

[meta-comment]

On 06/28/2017 07:10 AM, Vladimir Sementsov-Ogievskiy wrote:

28.06.2017 15:05, Vladimir Sementsov-Ogievskiy wrote:

Add format driver handler, which should mark loaded read-only
bitmaps as 'IN_USE' in the image and unset read_only field in
corresponding BdrvDirtyBitmap's.

Signed-off-by: Vladimir Sementsov-Ogievskiy<vsement...@virtuozzo.com>
Reviewed-by: John Snow<js...@redhat.com>

Your original message had spaces before '<' in the email addresses, but
it got lost here...



Forgot to add:

Reviewed-by: Max Reitz<mre...@redhat.com>

...this one also lacks the space.  I'm not sure if git cares, but it may
be worth investigating why your mailer eats the space when you reply
manually rather than sending via git; and for consistency, it is worth
keeping the space (for example, we like to grep 'git log' for learning
how active various contributors are, and having a consistent usage of
space before < in an email address can make the task easier).








--
Best regards,
Vladimir

[Qemu-block] [PATCH v22 30/30] block: release persistent bitmaps on inactivate

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

We should release them here to reload on invalidate cache.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
---
 block.c  |  4 
 block/dirty-bitmap.c | 29 +++--
 include/block/dirty-bitmap.h |  1 +
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/block.c b/block.c
index b2719bceff..acc6e4de1c 100644
--- a/block.c
+++ b/block.c
@@ -4156,6 +4156,10 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs,
 }
 }
 
+/* At this point persistent bitmaps should be already stored by the format
+ * driver */
+bdrv_release_persistent_dirty_bitmaps(bs);
+
 return 0;
 }
 
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index b2ca78b4d0..543bddb9b5 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -356,15 +356,20 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
 bdrv_dirty_bitmaps_unlock(bs);
 }
 
+static bool bdrv_dirty_bitmap_has_name(BdrvDirtyBitmap *bitmap)
+{
+return !!bdrv_dirty_bitmap_name(bitmap);
+}
+
 /* Called with BQL taken.  */
-static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
-  BdrvDirtyBitmap *bitmap,
-  bool only_named)
+static void bdrv_do_release_matching_dirty_bitmap(
+BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+bool (*cond)(BdrvDirtyBitmap *bitmap))
 {
 BdrvDirtyBitmap *bm, *next;
 bdrv_dirty_bitmaps_lock(bs);
 QLIST_FOREACH_SAFE(bm, >dirty_bitmaps, list, next) {
-if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
+if ((!bitmap || bm == bitmap) && (!cond || cond(bm))) {
 assert(!bm->active_iterators);
 assert(!bdrv_dirty_bitmap_frozen(bm));
 assert(!bm->meta);
@@ -389,7 +394,7 @@ out:
 /* Called with BQL taken.  */
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 {
-bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
+bdrv_do_release_matching_dirty_bitmap(bs, bitmap, NULL);
 }
 
 /**
@@ -400,7 +405,19 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, 
BdrvDirtyBitmap *bitmap)
  */
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
 {
-bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
+bdrv_do_release_matching_dirty_bitmap(bs, NULL, 
bdrv_dirty_bitmap_has_name);
+}
+
+/**
+ * Release all persistent dirty bitmaps attached to a BDS (for use in
+ * bdrv_inactivate_recurse()).
+ * There must not be any frozen bitmaps attached.
+ * This function does not remove persistent bitmaps from the storage.
+ */
+void bdrv_release_persistent_dirty_bitmaps(BlockDriverState *bs)
+{
+bdrv_do_release_matching_dirty_bitmap(bs, NULL,
+  bdrv_dirty_bitmap_get_persistance);
 }
 
 /**
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index d38233efd8..cbd9704e6a 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -25,6 +25,7 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
+void bdrv_release_persistent_dirty_bitmaps(BlockDriverState *bs);
 void bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
  const char *name,
  Error **errp);
-- 
2.11.1

[Qemu-block] [PATCH v22 04/30] tests: add hbitmap iter test

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Test that hbitmap iter is resistant to bitmap resetting.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Signed-off-by: Denis V. Lunev <d...@openvz.org>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 tests/test-hbitmap.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
index 23773d2051..1acb353889 100644
--- a/tests/test-hbitmap.c
+++ b/tests/test-hbitmap.c
@@ -909,6 +909,22 @@ static void hbitmap_test_add(const char *testpath,
hbitmap_test_teardown);
 }
 
+static void test_hbitmap_iter_and_reset(TestHBitmapData *data,
+const void *unused)
+{
+HBitmapIter hbi;
+
+hbitmap_test_init(data, L1 * 2, 0);
+hbitmap_set(data->hb, 0, data->size);
+
+hbitmap_iter_init(, data->hb, BITS_PER_LONG - 1);
+
+hbitmap_iter_next();
+
+hbitmap_reset_all(data->hb);
+hbitmap_iter_next();
+}
+
 int main(int argc, char **argv)
 {
 g_test_init(, , NULL);
@@ -966,6 +982,9 @@ int main(int argc, char **argv)
  test_hbitmap_serialize_part);
 hbitmap_test_add("/hbitmap/serialize/zeroes",
  test_hbitmap_serialize_zeroes);
+
+hbitmap_test_add("/hbitmap/iter/iter_and_reset",
+ test_hbitmap_iter_and_reset);
 g_test_run();
 
 return 0;
-- 
2.11.1

[Qemu-block] [PATCH v22 16/30] block: bdrv_close: release bitmaps after drv->bdrv_close

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Release bitmaps after 'if (bs->drv) { ... }' block. This will allow
format driver to save persistent bitmaps, which will appear in following
commits.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
---
 block.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block.c b/block.c
index 3f83da178d..c649afec91 100644
--- a/block.c
+++ b/block.c
@@ -3061,9 +3061,6 @@ static void bdrv_close(BlockDriverState *bs)
 bdrv_flush(bs);
 bdrv_drain(bs); /* in case flush left pending I/O */
 
-bdrv_release_named_dirty_bitmaps(bs);
-assert(QLIST_EMPTY(>dirty_bitmaps));
-
 if (bs->drv) {
 BdrvChild *child, *next;
 
@@ -3102,6 +3099,9 @@ static void bdrv_close(BlockDriverState *bs)
 bs->full_open_options = NULL;
 }
 
+bdrv_release_named_dirty_bitmaps(bs);
+assert(QLIST_EMPTY(>dirty_bitmaps));
+
 QLIST_FOREACH_SAFE(ban, >aio_notifiers, list, ban_next) {
 g_free(ban);
 }
-- 
2.11.1

[Qemu-block] [PATCH v22 03/30] hbitmap: improve dirty iter

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Make dirty iter resistant to resetting bits in corresponding HBitmap.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 include/qemu/hbitmap.h | 26 --
 util/hbitmap.c | 23 ++-
 2 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 9239fe515e..6b04391266 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -256,10 +256,9 @@ void hbitmap_free(HBitmap *hb);
  * the lowest-numbered bit that is set in @hb, starting at @first.
  *
  * Concurrent setting of bits is acceptable, and will at worst cause the
- * iteration to miss some of those bits.  Resetting bits before the current
- * position of the iterator is also okay.  However, concurrent resetting of
- * bits can lead to unexpected behavior if the iterator has not yet reached
- * those bits.
+ * iteration to miss some of those bits.
+ *
+ * The concurrent resetting of bits is OK.
  */
 void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first);
 
@@ -298,24 +297,7 @@ void hbitmap_free_meta(HBitmap *hb);
  * Return the next bit that is set in @hbi's associated HBitmap,
  * or -1 if all remaining bits are zero.
  */
-static inline int64_t hbitmap_iter_next(HBitmapIter *hbi)
-{
-unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1];
-int64_t item;
-
-if (cur == 0) {
-cur = hbitmap_iter_skip_words(hbi);
-if (cur == 0) {
-return -1;
-}
-}
-
-/* The next call will resume work from the next bit.  */
-hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
-item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur);
-
-return item << hbi->granularity;
-}
+int64_t hbitmap_iter_next(HBitmapIter *hbi);
 
 /**
  * hbitmap_iter_next_word:
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 35088e19c4..0b38817505 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -106,8 +106,9 @@ unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
 
 unsigned long cur;
 do {
-cur = hbi->cur[--i];
+i--;
 pos >>= BITS_PER_LEVEL;
+cur = hbi->cur[i] & hb->levels[i][pos];
 } while (cur == 0);
 
 /* Check for end of iteration.  We always use fewer than BITS_PER_LONG
@@ -139,6 +140,26 @@ unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
 return cur;
 }
 
+int64_t hbitmap_iter_next(HBitmapIter *hbi)
+{
+unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1] &
+hbi->hb->levels[HBITMAP_LEVELS - 1][hbi->pos];
+int64_t item;
+
+if (cur == 0) {
+cur = hbitmap_iter_skip_words(hbi);
+if (cur == 0) {
+return -1;
+}
+}
+
+/* The next call will resume work from the next bit.  */
+hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
+item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur);
+
+return item << hbi->granularity;
+}
+
 void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first)
 {
 unsigned i, bit;
-- 
2.11.1

[Qemu-block] [PATCH v22 25/30] qmp: add x-debug-block-dirty-bitmap-sha256

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/dirty-bitmap.c |  5 +
 blockdev.c   | 25 +
 include/block/dirty-bitmap.h |  1 +
 include/qemu/hbitmap.h   |  8 
 qapi/block-core.json | 27 +++
 tests/Makefile.include   |  2 +-
 util/hbitmap.c   | 11 +++
 7 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index d1469418e6..5fcf917707 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -725,3 +725,8 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState 
*bs,
 return bitmap == NULL ? QLIST_FIRST(>dirty_bitmaps) :
 QLIST_NEXT(bitmap, list);
 }
+
+char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp)
+{
+return hbitmap_sha256(bitmap->bitmap, errp);
+}
diff --git a/blockdev.c b/blockdev.c
index 4bb7033994..3c8fb75208 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2832,6 +2832,31 @@ void qmp_block_dirty_bitmap_clear(const char *node, 
const char *name,
 bdrv_clear_dirty_bitmap(bitmap, NULL);
 }
 
+BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node,
+  const char *name,
+  Error **errp)
+{
+BdrvDirtyBitmap *bitmap;
+BlockDriverState *bs;
+BlockDirtyBitmapSha256 *ret = NULL;
+char *sha256;
+
+bitmap = block_dirty_bitmap_lookup(node, name, , errp);
+if (!bitmap || !bs) {
+return NULL;
+}
+
+sha256 = bdrv_dirty_bitmap_sha256(bitmap, errp);
+if (sha256 == NULL) {
+return NULL;
+}
+
+ret = g_new(BlockDirtyBitmapSha256, 1);
+ret->sha256 = sha256;
+
+return ret;
+}
+
 void hmp_drive_del(Monitor *mon, const QDict *qdict)
 {
 const char *id = qdict_get_str(qdict, "id");
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index ccf2f81640..744479bc76 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -97,5 +97,6 @@ bool bdrv_dirty_bitmap_get_persistance(BdrvDirtyBitmap 
*bitmap);
 bool bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs);
 BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
 BdrvDirtyBitmap *bitmap);
+char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp);
 
 #endif
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index b52304ac29..d3a74a21fc 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -253,6 +253,14 @@ void hbitmap_deserialize_ones(HBitmap *hb, uint64_t start, 
uint64_t count,
 void hbitmap_deserialize_finish(HBitmap *hb);
 
 /**
+ * hbitmap_sha256:
+ * @bitmap: HBitmap to operate on.
+ *
+ * Returns SHA256 hash of the last level.
+ */
+char *hbitmap_sha256(const HBitmap *bitmap, Error **errp);
+
+/**
  * hbitmap_free:
  * @hb: HBitmap to operate on.
  *
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 5c42cc7790..6ad8585400 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1644,6 +1644,33 @@
   'data': 'BlockDirtyBitmap' }
 
 ##
+# @BlockDirtyBitmapSha256:
+#
+# SHA256 hash of dirty bitmap data
+#
+# @sha256: ASCII representation of SHA256 bitmap hash
+#
+# Since: 2.10
+##
+  { 'struct': 'BlockDirtyBitmapSha256',
+'data': {'sha256': 'str'} }
+
+##
+# @x-debug-block-dirty-bitmap-sha256:
+#
+# Get bitmap SHA256
+#
+# Returns: BlockDirtyBitmapSha256 on success
+#  If @node is not a valid block device, DeviceNotFound
+#  If @name is not found or if hashing has failed, GenericError with an
+#  explanation
+#
+# Since: 2.10
+##
+  { 'command': 'x-debug-block-dirty-bitmap-sha256',
+'data': 'BlockDirtyBitmap', 'returns': 'BlockDirtyBitmapSha256' }
+
+##
 # @blockdev-mirror:
 #
 # Start mirroring a block device's writes to a new destination.
diff --git a/tests/Makefile.include b/tests/Makefile.include
index ae889cae02..c738e92673 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -553,7 +553,7 @@ tests/test-blockjob$(EXESUF): tests/test-blockjob.o 
$(test-block-obj-y) $(test-u
 tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o 
$(test-block-obj-y) $(test-util-obj-y)
 tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y)
 tests/test-iov$(EXESUF): tests/test-iov.o $(test-util-obj-y)
-tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o $(test-util-obj-y)
+tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o $(test-util-obj-y) 
$(test-crypto-obj-y)
 tests/test-x86-cpuid$(EXESUF): tests/test-x86-cpuid.o
 tests/test-xbzrle$(EXESUF): tests/test-xbzrle.o migration/xbzrle.o 
migration/page_cache.o $(test-util-obj-y)
 tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 0c15

[Qemu-block] [PATCH v22 07/30] qcow2-refcount: rename inc_refcounts() and make it public

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

This is needed for the following patch, which will introduce refcounts
checking for qcow2 bitmaps.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block/qcow2-refcount.c | 53 ++
 block/qcow2.h  |  4 
 2 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 7c06061aae..d7066c875b 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1323,11 +1323,10 @@ static int realloc_refcount_array(BDRVQcow2State *s, 
void **array,
  *
  * Modifies the number of errors in res.
  */
-static int inc_refcounts(BlockDriverState *bs,
- BdrvCheckResult *res,
- void **refcount_table,
- int64_t *refcount_table_size,
- int64_t offset, int64_t size)
+int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
+ void **refcount_table,
+ int64_t *refcount_table_size,
+ int64_t offset, int64_t size)
 {
 BDRVQcow2State *s = bs->opaque;
 uint64_t start, last, cluster_offset, k, refcount;
@@ -1420,8 +1419,9 @@ static int check_refcounts_l2(BlockDriverState *bs, 
BdrvCheckResult *res,
 nb_csectors = ((l2_entry >> s->csize_shift) &
s->csize_mask) + 1;
 l2_entry &= s->cluster_offset_mask;
-ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-l2_entry & ~511, nb_csectors * 512);
+ret = qcow2_inc_refcounts_imrt(bs, res,
+   refcount_table, refcount_table_size,
+   l2_entry & ~511, nb_csectors * 512);
 if (ret < 0) {
 goto fail;
 }
@@ -1454,8 +1454,9 @@ static int check_refcounts_l2(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 
 /* Mark cluster as used */
-ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-offset, s->cluster_size);
+ret = qcow2_inc_refcounts_imrt(bs, res,
+   refcount_table, refcount_table_size,
+   offset, s->cluster_size);
 if (ret < 0) {
 goto fail;
 }
@@ -1508,8 +1509,8 @@ static int check_refcounts_l1(BlockDriverState *bs,
 l1_size2 = l1_size * sizeof(uint64_t);
 
 /* Mark L1 table as used */
-ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-l1_table_offset, l1_size2);
+ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, 
refcount_table_size,
+   l1_table_offset, l1_size2);
 if (ret < 0) {
 goto fail;
 }
@@ -1538,8 +1539,9 @@ static int check_refcounts_l1(BlockDriverState *bs,
 if (l2_offset) {
 /* Mark L2 table as used */
 l2_offset &= L1E_OFFSET_MASK;
-ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-l2_offset, s->cluster_size);
+ret = qcow2_inc_refcounts_imrt(bs, res,
+   refcount_table, refcount_table_size,
+   l2_offset, s->cluster_size);
 if (ret < 0) {
 goto fail;
 }
@@ -1757,14 +1759,15 @@ static int check_refblocks(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 
 res->corruptions_fixed++;
-ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-offset, s->cluster_size);
+ret = qcow2_inc_refcounts_imrt(bs, res,
+   refcount_table, nb_clusters,
+   offset, s->cluster_size);
 if (ret < 0) {
 return ret;
 }
 /* No need to check whether the refcount is now greater than 1:
  * This area was just allocated and zeroed, so it can only be
- * exactly 1 after inc_refcounts() */
+ * exactly 1 after qcow2_inc_refcounts_imrt() */
 continue;
 
 resize_fail:
@@ -1779,8 +1782,8 @@ resize_fail:
 }
 
 if (offset != 0) {
-ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-offset, s->cluster_size);
+ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, 
nb_clusters,
+

[Qemu-block] [PATCH v22 01/30] specs/qcow2: fix bitmap granularity qemu-specific note

2017-06-28 Thread Vladimir Sementsov-Ogievskiy

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 docs/interop/qcow2.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index 80cdfd0e91..dda53dd2a3 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -472,8 +472,7 @@ Structure of a bitmap directory entry:
  17:granularity_bits
 Granularity bits. Valid values: 0 - 63.
 
-Note: Qemu currently doesn't support granularity_bits
-greater than 31.
+Note: Qemu currently supports only values 9 - 31.
 
 Granularity is calculated as
 granularity = 1 << granularity_bits
-- 
2.11.1

Re: [Qemu-block] [PATCH v22 13/30] block: new bdrv_reopen_bitmaps_rw interface

2017-06-28 Thread Vladimir Sementsov-Ogievskiy


28.06.2017 15:05, Vladimir Sementsov-Ogievskiy wrote:

Add format driver handler, which should mark loaded read-only
bitmaps as 'IN_USE' in the image and unset read_only field in
corresponding BdrvDirtyBitmap's.

Signed-off-by: Vladimir Sementsov-Ogievskiy<vsement...@virtuozzo.com>
Reviewed-by: John Snow<js...@redhat.com>



Forgot to add:

Reviewed-by: Max Reitz<mre...@redhat.com>


--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH v3 1/3] block: add bdrv_get_format_alloc_stat format interface

2017-06-29 Thread Vladimir Sementsov-Ogievskiy


29.06.2017 03:15, John Snow wrote:


On 06/28/2017 11:59 AM, Vladimir Sementsov-Ogievskiy wrote:

27.06.2017 02:19, John Snow wrote:

On 06/06/2017 12:26 PM, Vladimir Sementsov-Ogievskiy wrote:

The function should collect statistics, about used/unused by top-level
format driver space (in its .file) and allocation status
(data/zero/discarded/after-eof) of corresponding areas in this .file.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
   block.c   | 16 ++
   include/block/block.h |  3 +++
   include/block/block_int.h |  2 ++
   qapi/block-core.json  | 55
+++
   4 files changed, 76 insertions(+)

diff --git a/block.c b/block.c
index 50ba264143..7d720ae0c2 100644
--- a/block.c
+++ b/block.c
@@ -3407,6 +3407,22 @@ int64_t
bdrv_get_allocated_file_size(BlockDriverState *bs)
   }
 /**
+ * Collect format allocation info. See BlockFormatAllocInfo
definition in
+ * qapi/block-core.json.
+ */
+int bdrv_get_format_alloc_stat(BlockDriverState *bs,
BlockFormatAllocInfo *bfai)
+{
+BlockDriver *drv = bs->drv;
+if (!drv) {
+return -ENOMEDIUM;
+}
+if (drv->bdrv_get_format_alloc_stat) {
+return drv->bdrv_get_format_alloc_stat(bs, bfai);
+}
+return -ENOTSUP;
+}
+
+/**
* Return number of sectors on success, -errno on error.
*/
   int64_t bdrv_nb_sectors(BlockDriverState *bs)
diff --git a/include/block/block.h b/include/block/block.h
index 9b355e92d8..646376a772 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -335,6 +335,9 @@ typedef enum {
 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix);
   +int bdrv_get_format_alloc_stat(BlockDriverState *bs,
+   BlockFormatAllocInfo *bfai);
+
   /* The units of offset and total_work_size may be chosen
arbitrarily by the
* block driver; total_work_size may change during the course of
the amendment
* operation */
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8d3724cce6..458c715e99 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -208,6 +208,8 @@ struct BlockDriver {
   int64_t (*bdrv_getlength)(BlockDriverState *bs);
   bool has_variable_length;
   int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
+int (*bdrv_get_format_alloc_stat)(BlockDriverState *bs,
+  BlockFormatAllocInfo *bfai);
 int coroutine_fn
(*bdrv_co_pwritev_compressed)(BlockDriverState *bs,
   uint64_t offset, uint64_t bytes, QEMUIOVector *qiov);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index ea0b3e8b13..fd7b52bd69 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -139,6 +139,61 @@
  '*format-specific': 'ImageInfoSpecific' } }
 ##
+# @BlockFormatAllocInfo:
+#

I apologize in advance, but I don't understand this patch very well. Let
me ask some questions to get patch review rolling again, since you've
been waiting a bit.


+#
+# Allocation relations between format file and underlying protocol
file.
+# All fields are in bytes.
+#

The format file in this case would be ... what, the virtual file
represented by the qcow2? and the underlying protocol file is the raw
file that is the qcow2 itself?

yes


+# There are two types of the format file portions: 'used' and
'unused'. It's up
+# to the format how to interpret these types. For now the only
format supporting
+# the feature is Qcow2 and for this case 'used' are clusters with
positive
+# refcount and unused a clusters with zero refcount. Described
portions include
+# all format file allocations, not only virtual disk data (metadata,
internal
+# snapshots, etc. are included).

I guess the semantic differentiation between "used" and "unused" is left
to the individual fields, below.

hmm, I don't understand. differentiation is up to the format, and for
qcow2 it is described above


+#
+# For the underlying file there are native block-status types of the
portions:
+#  - data: allocated data
+#  - zero: read-as-zero holes
+#  - discarded: not allocated
+# 4th additional type is 'overrun', which is for the format file
portions beyond
+# the end of the underlying file.
+#
+# So, the fields are:
+#
+# @used-data: used by the format file and backed by data in the
underlying file
+#

I assume this is "defined and addressable data".


+# @used-zero: used by the format file and backed by a hole in the
underlying
+# file
+#

By a hole? Can you give me an example? Do you mean like a filesystem
hole ala falloc()?

-zero, -data and -discarded are the block status of corresponding area
in underlying file.

so, if underlying file is raw, yes, it should be a filesystem hole.

example:
-
# ./qemu-img create -f qcow2 x 1G
Formatting 'x', fmt=qcow2 size=1073741824 encryption=off
cluster_size=65536

Re: [Qemu-block] [PATCH v22 00/30] qcow2: persistent dirty bitmaps

2017-06-28 Thread Vladimir Sementsov-Ogievskiy


28.06.2017 16:01, Paolo Bonzini wrote:



On 28/06/2017 14:05, Vladimir Sementsov-Ogievskiy wrote:

Rebase on master, so changes, mostly related to new dirty bitmaps mutex:

10: - asserts now in bdrv_{re,}set_dirty_bitmap_locked functions.
 - also add assert into bdrv_undo_clear_dirty_bitmap (the only change, not 
related to rebase)
 - add mutex lock into bdrv_dirty_bitmap_set_readonly (as it changes bitmap 
list,
   so the lock should be taken)
 - return instead of go-to in qmp_block_dirty_bitmap_clear
 - in dirty-bitmaps.h, move bdrv_dirty_bitmap_set_readonly before block
   "Functions that require manual locking", move
   bdrv_dirty_bitmap_readonly and bdrv_has_readonly_bitmaps into this block
15: - add mutex lock/unlock into bdrv_dirty_bitmap_set_autoload
 - in dirty-bitmaps.h, move bdrv_dirty_bitmap_set_autoload before block
   "Functions that require manual locking", move
   bdrv_dirty_bitmap_get_autoload into this block
17: - add mutex lock/unlock into bdrv_dirty_bitmap_set_persistance
 - in dirty-bitmaps.h, move bdrv_dirty_bitmap_set_persistance before block
   "Functions that require manual locking", move
   bdrv_dirty_bitmap_get_persistance and
   bdrv_has_changed_persistent_bitmaps into this block
18: in dirty-bitmaps.h, move bdrv_dirty_bitmap_next into block
 "Functions that require manual locking". (do not remove r-b, as it is
 just one empty line removed before function declaration)
23: return instead of go-to in qmp_block_dirty_bitmap_add
24: return instead of go-to in qmp_block_dirty_bitmap_add
25: - return instead of go-to
 - remove aio_context_acquire/release calls
 - no aio_context parameter for block_dirty_bitmap_lookup
 - in dirty-bitmaps.h, move bdrv_dirty_bitmap_sha256 into block
 "Functions that require manual locking".
29: - return instead of go-to in qmp_block_dirty_bitmap_remove


All looks good, thanks.  I'll rebase my own fixes on top of these
patches, no need to have you respin them.

Paolo



Thank you! And for thunderbird-work-around too!

--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH v22 13/30] block: new bdrv_reopen_bitmaps_rw interface

2017-06-28 Thread Vladimir Sementsov-Ogievskiy


28.06.2017 16:31, Paolo Bonzini wrote:


On 28/06/2017 15:02, Vladimir Sementsov-Ogievskiy wrote:

It is interesting, but I see this problem only in your answers, in my
letters I see this white-space on its place.

That's the good old Thunderbird "format=flowed" bug.

Vladimir, download
http://people.redhat.com/pbonzini/format-flawed.tar.gz and place it into
~/.thunderbird//extensions.  It works around the bug.


unfortunately, with this buttons 'reply' and 'reply to all' do nothing..



Paolo


28.06.2017 15:36, Eric Blake wrote:

[meta-comment]

On 06/28/2017 07:10 AM, Vladimir Sementsov-Ogievskiy wrote:

28.06.2017 15:05, Vladimir Sementsov-Ogievskiy wrote:

Add format driver handler, which should mark loaded read-only
bitmaps as 'IN_USE' in the image and unset read_only field in
corresponding BdrvDirtyBitmap's.

Signed-off-by: Vladimir Sementsov-Ogievskiy<vsement...@virtuozzo.com>
Reviewed-by: John Snow<js...@redhat.com>

Your original message had spaces before '<' in the email addresses, but
it got lost here...



--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH v3 1/3] block: add bdrv_get_format_alloc_stat format interface

2017-06-28 Thread Vladimir Sementsov-Ogievskiy


27.06.2017 02:19, John Snow wrote:


On 06/06/2017 12:26 PM, Vladimir Sementsov-Ogievskiy wrote:

The function should collect statistics, about used/unused by top-level
format driver space (in its .file) and allocation status
(data/zero/discarded/after-eof) of corresponding areas in this .file.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
  block.c   | 16 ++
  include/block/block.h |  3 +++
  include/block/block_int.h |  2 ++
  qapi/block-core.json  | 55 +++
  4 files changed, 76 insertions(+)

diff --git a/block.c b/block.c
index 50ba264143..7d720ae0c2 100644
--- a/block.c
+++ b/block.c
@@ -3407,6 +3407,22 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState 
*bs)
  }
  
  /**

+ * Collect format allocation info. See BlockFormatAllocInfo definition in
+ * qapi/block-core.json.
+ */
+int bdrv_get_format_alloc_stat(BlockDriverState *bs, BlockFormatAllocInfo 
*bfai)
+{
+BlockDriver *drv = bs->drv;
+if (!drv) {
+return -ENOMEDIUM;
+}
+if (drv->bdrv_get_format_alloc_stat) {
+return drv->bdrv_get_format_alloc_stat(bs, bfai);
+}
+return -ENOTSUP;
+}
+
+/**
   * Return number of sectors on success, -errno on error.
   */
  int64_t bdrv_nb_sectors(BlockDriverState *bs)
diff --git a/include/block/block.h b/include/block/block.h
index 9b355e92d8..646376a772 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -335,6 +335,9 @@ typedef enum {
  
  int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
  
+int bdrv_get_format_alloc_stat(BlockDriverState *bs,

+   BlockFormatAllocInfo *bfai);
+
  /* The units of offset and total_work_size may be chosen arbitrarily by the
   * block driver; total_work_size may change during the course of the amendment
   * operation */
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8d3724cce6..458c715e99 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -208,6 +208,8 @@ struct BlockDriver {
  int64_t (*bdrv_getlength)(BlockDriverState *bs);
  bool has_variable_length;
  int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
+int (*bdrv_get_format_alloc_stat)(BlockDriverState *bs,
+  BlockFormatAllocInfo *bfai);
  
  int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs,

  uint64_t offset, uint64_t bytes, QEMUIOVector *qiov);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index ea0b3e8b13..fd7b52bd69 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -139,6 +139,61 @@
 '*format-specific': 'ImageInfoSpecific' } }
  
  ##

+# @BlockFormatAllocInfo:
+#

I apologize in advance, but I don't understand this patch very well. Let
me ask some questions to get patch review rolling again, since you've
been waiting a bit.


+#
+# Allocation relations between format file and underlying protocol file.
+# All fields are in bytes.
+#

The format file in this case would be ... what, the virtual file
represented by the qcow2? and the underlying protocol file is the raw
file that is the qcow2 itself?


yes




+# There are two types of the format file portions: 'used' and 'unused'. It's up
+# to the format how to interpret these types. For now the only format 
supporting
+# the feature is Qcow2 and for this case 'used' are clusters with positive
+# refcount and unused a clusters with zero refcount. Described portions include
+# all format file allocations, not only virtual disk data (metadata, internal
+# snapshots, etc. are included).

I guess the semantic differentiation between "used" and "unused" is left
to the individual fields, below.


hmm, I don't understand. differentiation is up to the format, and for 
qcow2 it is described above





+#
+# For the underlying file there are native block-status types of the portions:
+#  - data: allocated data
+#  - zero: read-as-zero holes
+#  - discarded: not allocated
+# 4th additional type is 'overrun', which is for the format file portions 
beyond
+# the end of the underlying file.
+#
+# So, the fields are:
+#
+# @used-data: used by the format file and backed by data in the underlying file
+#

I assume this is "defined and addressable data".


+# @used-zero: used by the format file and backed by a hole in the underlying
+# file
+#

By a hole? Can you give me an example? Do you mean like a filesystem
hole ala falloc()?


-zero, -data and -discarded are the block status of corresponding area 
in underlying file.


so, if underlying file is raw, yes, it should be a filesystem hole.

example:
-
# ./qemu-img create -f qcow2 x 1G
Formatting 'x', fmt=qcow2 size=1073741824 encryption=off 
cluster_size=65536 lazy_refcounts=off refcount_bits=16

# ./qemu-img check x
No errors were found on the image.
Im

Re: [Qemu-block] [Qemu-devel] [PATCH v22 23/30] qmp: add persistent flag to block-dirty-bitmap-add

2017-07-07 Thread Vladimir Sementsov-Ogievskiy


07.07.2017 10:54, Markus Armbruster wrote:

QAPI schema review only...  I apologize for its lateness.

Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com> writes:


Add optional 'persistent' flag to qmp command block-dirty-bitmap-add.
Default is false.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Signed-off-by: Denis V. Lunev <d...@openvz.org>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---

[...]

diff --git a/qapi/block-core.json b/qapi/block-core.json
index f85c2235c7..13f98ec146 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1561,10 +1561,16 @@
  # @granularity: the bitmap granularity, default is 64k for
  #   block-dirty-bitmap-add
  #
+# @persistent: the bitmap is persistent, i.e. it will be saved to the
+#  corresponding block device image file on its close. For now only
+#  Qcow2 disks support persistent bitmaps. Default is false for
+#  block-dirty-bitmap-add. (Since: 2.10)

"for block-dirty-bitmap-add" suggests there could be other users, with
different (but unspecified) defaults.  What about replacing the sentence
by "(default: false)"?


then, this should be done in other places, @granularity is an example.



Please wrap your comment lines around column 70.


+#
  # Since: 2.4
  ##
  { 'struct': 'BlockDirtyBitmapAdd',
-  'data': { 'node': 'str', 'name': 'str', '*granularity': 'uint32' } }
+  'data': { 'node': 'str', 'name': 'str', '*granularity': 'uint32',
+'*persistent': 'bool' } }
  
  ##

  # @block-dirty-bitmap-add:



--
Best regards,
Vladimir

[Qemu-block] [PATCH 15/24] block: add bdrv_can_store_new_dirty_bitmap

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

This will be needed to check some restrictions before making bitmap
persistent in qmp-block-dirty-bitmap-add (this functionality will be
added by future patch)

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block.c   | 22 ++
 include/block/block.h |  3 +++
 include/block/block_int.h |  5 +
 3 files changed, 30 insertions(+)

diff --git a/block.c b/block.c
index 16cf522219..cf9919a5e0 100644
--- a/block.c
+++ b/block.c
@@ -4093,3 +4093,25 @@ void bdrv_del_child(BlockDriverState *parent_bs, 
BdrvChild *child, Error **errp)
 
 parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
 }
+
+bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+ uint32_t granularity, Error **errp)
+{
+BlockDriver *drv = bs->drv;
+
+if (!drv) {
+error_setg_errno(errp, ENOMEDIUM,
+ "Can't store persistent bitmaps to %s",
+ bdrv_get_device_or_node_name(bs));
+return false;
+}
+
+if (!drv->bdrv_can_store_new_dirty_bitmap) {
+error_setg_errno(errp, ENOTSUP,
+ "Can't store persistent bitmaps to %s",
+ bdrv_get_device_or_node_name(bs));
+return false;
+}
+
+return drv->bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp);
+}
diff --git a/include/block/block.h b/include/block/block.h
index 4e81f2069b..93718ab245 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -551,4 +551,7 @@ void bdrv_add_child(BlockDriverState *parent, 
BlockDriverState *child,
 Error **errp);
 void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
 
+bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
+ uint32_t granularity, Error **errp);
+
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 2d92d7edfe..f943a7c24e 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -320,6 +320,11 @@ struct BlockDriver {
 void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
Error **errp);
 
+bool (*bdrv_can_store_new_dirty_bitmap)(BlockDriverState *bs,
+const char *name,
+uint32_t granularity,
+Error **errp);
+
 QLIST_ENTRY(BlockDriver) list;
 };
 
-- 
2.11.1

[Qemu-block] [PATCH v17 00/24] qcow2: persistent dirty bitmaps

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

ature. see 07-09 description
- for now I just free old clusters and allocate new. This will be improved
  with a separate patch.

patch about "delete bitmaps on truncate" is removed. This case will be handled 
later.
IN_USE, autoclear, check-constraints things are merged into other patches.

14-15 are mew patches, to early check possibility of creating persistent bitmap 
with
  specified name and granularity in specified BDS

16-17: spelling, rewording, indenting, tiny code simplifying, check can_store 
(by 14-15),
s/if (autoload && !persistent)/if (has_autoload && !persistent)/,
rebase (deleted qmp-commands.hx)

18: alternative to md5 in query-block:
  - separated qmp command -x-debug-block-dirty-bitmap-sha256
  - as adviced by Daniel P. Berrange in my parallel thread:
- sha256 instead of md5
- use qcrypto_hash_* instead of GChecksum
  - fix bug =) (size was wrong in hbitmap_md5)

19: s/3999/3fff/, use x-debug-block-dirty-bitmap-sha256

20: new patch to rename and publish inc_refcounts

21: some fixes and refactoring mostyly by Max's comments.

Max, Eric, great tanks for your review!
I hope, I've covered most of your comments by this update.

TODO:
- handle reopening image RO->RW and incoming migration, set IN_USE for existing 
loaded bitmaps
  in these cases.
- reuse old, already allocated data clusters for bitmaps storing
- truncate bitmaps in the image on truncate


v7:

https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=refs%2Ftags%2Fqcow2-bitmap-v7
based on block-next (https://github.com/XanClic/qemu/commits/block-next)

- a lot of refactoring and reordering of patches.
- dead code removed (bdrv_dirty_bitmap_load, etc.)
- do not maintain extra data for now
- do not store dirty bitmap directory in memory
  (as we use it seldom, we can just reread if needed)

By Kevin's review:
01 - commit message changed: fix->improvement (as it was not a bug)
03 - r-b
04 - r-b
05 - add 21 patch to fix spec, also, removed all (I hope) mentions of
 "Bitmap Header", switch to one unified name for it - "Bitmap
 Directory Entry", to avoid misunderstanding with Qcow2 header.
 (also, add patch 22, to fix it in spec)
v6.06 - improve for_each_dir_entry loop, reorder patches, other small fixes
v6.07 ~> v7.09 - dead code removed, I've moved to one function 
.bdrv_store_persistent_bitmaps and have wrapper and callback in one
patch (with also some other staff. If it not ok, I can split them)
v6.08 - about keeping bitmap directory instead of bitmap list: no I don't keep
it at all.
v6.16 - old bdrv_ bitmap-storing related functions are removed. The new one is
bdrv_store_persistent_bitmaps.


v6:
https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=refs%2Ftags%2Fqcow2-bitmap-v6
based on block-next (https://github.com/XanClic/qemu/commits/block-next)

There are a lot of changes, reorderings and additions in comparement with v5.
One principal thing: now bitmaps are removed from image after loading instead
of marking them in_use. It is simpler and we do not need to store superfluous 
data.
Also, we are no more interested in command line interface to dirty bitmaps.
So it is dropped.  If someone needs it I can add it later.

Vladimir Sementsov-Ogievskiy (24):
  specs/qcow2: fix bitmap granularity qemu-specific note
  specs/qcow2: do not use wording 'bitmap header'
  hbitmap: improve dirty iter
  tests: add hbitmap iter test
  block: fix bdrv_dirty_bitmap_granularity signature
  block/dirty-bitmap: add deserialize_ones func
  qcow2-refcount: rename inc_refcounts() and make it public
  qcow2: add bitmaps extension
  qcow2: autoloading dirty bitmaps
  block/dirty-bitmap: add autoload field to BdrvDirtyBitmap
  block: bdrv_close: release bitmaps after drv->bdrv_close
  block: introduce persistent dirty bitmaps
  block/dirty-bitmap: add bdrv_dirty_bitmap_next()
  qcow2: add persistent dirty bitmaps support
  block: add bdrv_can_store_new_dirty_bitmap
  qcow2: add .bdrv_can_store_new_dirty_bitmap
  qmp: add persistent flag to block-dirty-bitmap-add
  qmp: add autoload parameter to block-dirty-bitmap-add
  qmp: add x-debug-block-dirty-bitmap-sha256
  iotests: test qcow2 persistent dirty bitmap
  block/dirty-bitmap: add bdrv_remove_persistent_dirty_bitmap
  qcow2: add .bdrv_remove_persistent_dirty_bitmap
  qmp: block-dirty-bitmap-remove: remove persistent
  block: release persistent bitmaps on inactivate

 block.c  |   32 +-
 block/Makefile.objs  |2 +-
 block/dirty-bitmap.c |  109 +++-
 block/qcow2-bitmap.c | 1384 ++
 block/qcow2-refcount.c   |   59 +-
 block/qcow2.c|  149 -
 block/qcow2.h|   41 ++
 blockdev.c   |   71 ++-
 docs/specs/qcow2.txt |8 +-
 include/block/block.h|3 +
 include/block/block_int.h|8 +
 include/block/dirty-bitma

[Qemu-block] [PATCH 07/24] qcow2-refcount: rename inc_refcounts() and make it public

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

This is needed for the following patch, which will introduce refcounts
checking for qcow2 bitmaps.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block/qcow2-refcount.c | 53 ++
 block/qcow2.h  |  4 
 2 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index cbfb3fe064..14a736d245 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1309,11 +1309,10 @@ static int realloc_refcount_array(BDRVQcow2State *s, 
void **array,
  *
  * Modifies the number of errors in res.
  */
-static int inc_refcounts(BlockDriverState *bs,
- BdrvCheckResult *res,
- void **refcount_table,
- int64_t *refcount_table_size,
- int64_t offset, int64_t size)
+int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
+ void **refcount_table,
+ int64_t *refcount_table_size,
+ int64_t offset, int64_t size)
 {
 BDRVQcow2State *s = bs->opaque;
 uint64_t start, last, cluster_offset, k, refcount;
@@ -1406,8 +1405,9 @@ static int check_refcounts_l2(BlockDriverState *bs, 
BdrvCheckResult *res,
 nb_csectors = ((l2_entry >> s->csize_shift) &
s->csize_mask) + 1;
 l2_entry &= s->cluster_offset_mask;
-ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-l2_entry & ~511, nb_csectors * 512);
+ret = qcow2_inc_refcounts_imrt(bs, res,
+   refcount_table, refcount_table_size,
+   l2_entry & ~511, nb_csectors * 512);
 if (ret < 0) {
 goto fail;
 }
@@ -1445,8 +1445,9 @@ static int check_refcounts_l2(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 
 /* Mark cluster as used */
-ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-offset, s->cluster_size);
+ret = qcow2_inc_refcounts_imrt(bs, res,
+   refcount_table, refcount_table_size,
+   offset, s->cluster_size);
 if (ret < 0) {
 goto fail;
 }
@@ -1498,8 +1499,8 @@ static int check_refcounts_l1(BlockDriverState *bs,
 l1_size2 = l1_size * sizeof(uint64_t);
 
 /* Mark L1 table as used */
-ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-l1_table_offset, l1_size2);
+ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, 
refcount_table_size,
+   l1_table_offset, l1_size2);
 if (ret < 0) {
 goto fail;
 }
@@ -1528,8 +1529,9 @@ static int check_refcounts_l1(BlockDriverState *bs,
 if (l2_offset) {
 /* Mark L2 table as used */
 l2_offset &= L1E_OFFSET_MASK;
-ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-l2_offset, s->cluster_size);
+ret = qcow2_inc_refcounts_imrt(bs, res,
+   refcount_table, refcount_table_size,
+   l2_offset, s->cluster_size);
 if (ret < 0) {
 goto fail;
 }
@@ -1744,14 +1746,15 @@ static int check_refblocks(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 
 res->corruptions_fixed++;
-ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-offset, s->cluster_size);
+ret = qcow2_inc_refcounts_imrt(bs, res,
+   refcount_table, nb_clusters,
+   offset, s->cluster_size);
 if (ret < 0) {
 return ret;
 }
 /* No need to check whether the refcount is now greater than 1:
  * This area was just allocated and zeroed, so it can only be
- * exactly 1 after inc_refcounts() */
+ * exactly 1 after qcow2_inc_refcounts_imrt() */
 continue;
 
 resize_fail:
@@ -1766,8 +1769,8 @@ resize_fail:
 }
 
 if (offset != 0) {
-ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-offset, s->cluster_size);
+ret = qcow2_inc_refcounts_imrt(bs, res, refcount_table, 
nb_clusters,
+

[Qemu-block] [PATCH 22/24] qcow2: add .bdrv_remove_persistent_dirty_bitmap

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Realize .bdrv_remove_persistent_dirty_bitmap interface.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block/qcow2-bitmap.c | 41 +
 block/qcow2.c|  1 +
 block/qcow2.h|  3 +++
 3 files changed, 45 insertions(+)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index c911a08d1b..b9b5ab56a8 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -1163,6 +1163,47 @@ static Qcow2Bitmap *find_bitmap_by_name(Qcow2BitmapList 
*bm_list,
 return NULL;
 }
 
+void qcow2_remove_persistent_dirty_bitmap(BlockDriverState *bs,
+  const char *name,
+  Error **errp)
+{
+int ret;
+BDRVQcow2State *s = bs->opaque;
+Qcow2Bitmap *bm;
+Qcow2BitmapList *bm_list;
+
+if (s->nb_bitmaps == 0) {
+/* Absence of the bitmap is not an error: see explanation above
+ * bdrv_remove_persistent_dirty_bitmap() definition. */
+return;
+}
+
+bm_list = bitmap_list_load(bs, s->bitmap_directory_offset,
+   s->bitmap_directory_size, errp);
+if (bm_list == NULL) {
+return;
+}
+
+bm = find_bitmap_by_name(bm_list, name);
+if (bm == NULL) {
+goto fail;
+}
+
+QSIMPLEQ_REMOVE(bm_list, bm, Qcow2Bitmap, entry);
+
+ret = update_ext_header_and_dir(bs, bm_list);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "Failed to update bitmap extension");
+goto fail;
+}
+
+free_bitmap_clusters(bs, >table);
+
+fail:
+bitmap_free(bm);
+bitmap_list_free(bm_list);
+}
+
 void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp)
 {
 BdrvDirtyBitmap *bitmap;
diff --git a/block/qcow2.c b/block/qcow2.c
index fde4769e68..43bac94f08 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3563,6 +3563,7 @@ BlockDriver bdrv_qcow2 = {
 .bdrv_attach_aio_context  = qcow2_attach_aio_context,
 
 .bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap,
+.bdrv_remove_persistent_dirty_bitmap = 
qcow2_remove_persistent_dirty_bitmap,
 };
 
 static void bdrv_qcow2_init(void)
diff --git a/block/qcow2.h b/block/qcow2.h
index fd311f383d..93536d7a3f 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -626,5 +626,8 @@ bool qcow2_can_store_new_dirty_bitmap(BlockDriverState *bs,
   const char *name,
   uint32_t granularity,
   Error **errp);
+void qcow2_remove_persistent_dirty_bitmap(BlockDriverState *bs,
+  const char *name,
+  Error **errp);
 
 #endif
-- 
2.11.1

[Qemu-block] [PATCH 05/24] block: fix bdrv_dirty_bitmap_granularity signature

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Make getter signature const-correct. This allows other functions with
const dirty bitmap parameter use bdrv_dirty_bitmap_granularity().

Reviewed-by: Eric Blake <ebl...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
Reviewed-by: Kevin Wolf <kw...@redhat.com>
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/dirty-bitmap.c | 2 +-
 include/block/dirty-bitmap.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 519737c8d3..186941cfc3 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -388,7 +388,7 @@ uint32_t 
bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
 return granularity;
 }
 
-uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
+uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap)
 {
 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
 }
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 9dea14ba03..7cbe623ba7 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -29,7 +29,7 @@ void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs);
-uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap);
+uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap);
 uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
-- 
2.11.1

[Qemu-block] [PATCH 23/24] qmp: block-dirty-bitmap-remove: remove persistent

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Remove persistent bitmap from the storage on block-dirty-bitmap-remove.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 blockdev.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/blockdev.c b/blockdev.c
index c41b791289..a365cdf3ed 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2767,6 +2767,7 @@ void qmp_block_dirty_bitmap_remove(const char *node, 
const char *name,
 AioContext *aio_context;
 BlockDriverState *bs;
 BdrvDirtyBitmap *bitmap;
+Error *local_err = NULL;
 
 bitmap = block_dirty_bitmap_lookup(node, name, , _context, errp);
 if (!bitmap || !bs) {
@@ -2779,6 +2780,15 @@ void qmp_block_dirty_bitmap_remove(const char *node, 
const char *name,
name);
 goto out;
 }
+
+if (bdrv_dirty_bitmap_get_persistance(bitmap)) {
+bdrv_remove_persistent_dirty_bitmap(bs, name, _err);
+if (local_err != NULL) {
+error_propagate(errp, local_err);
+goto out;
+}
+}
+
 bdrv_dirty_bitmap_make_anon(bitmap);
 bdrv_release_dirty_bitmap(bs, bitmap);
 
-- 
2.11.1

[Qemu-block] [PATCH 16/24] qcow2: add .bdrv_can_store_new_dirty_bitmap

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Realize .bdrv_can_store_new_dirty_bitmap interface.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: John Snow <js...@redhat.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
---
 block/qcow2-bitmap.c | 51 +++
 block/qcow2.c|  2 ++
 block/qcow2.h|  4 
 3 files changed, 57 insertions(+)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 94bb3d9132..c911a08d1b 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -1290,3 +1290,54 @@ fail:
 
 bitmap_list_free(bm_list);
 }
+
+bool qcow2_can_store_new_dirty_bitmap(BlockDriverState *bs,
+  const char *name,
+  uint32_t granularity,
+  Error **errp)
+{
+BDRVQcow2State *s = bs->opaque;
+bool found;
+Qcow2BitmapList *bm_list;
+
+if (check_constraints_on_bitmap(bs, name, granularity, errp) != 0) {
+goto fail;
+}
+
+if (s->nb_bitmaps == 0) {
+return true;
+}
+
+if (s->nb_bitmaps >= QCOW2_MAX_BITMAPS) {
+error_setg(errp,
+   "Maximum number of persistent bitmaps is already reached");
+goto fail;
+}
+
+if (s->bitmap_directory_size + calc_dir_entry_size(strlen(name), 0) >
+QCOW2_MAX_BITMAP_DIRECTORY_SIZE)
+{
+error_setg(errp, "Not enough space in the bitmap directory");
+goto fail;
+}
+
+bm_list = bitmap_list_load(bs, s->bitmap_directory_offset,
+   s->bitmap_directory_size, errp);
+if (bm_list == NULL) {
+goto fail;
+}
+
+found = find_bitmap_by_name(bm_list, name);
+bitmap_list_free(bm_list);
+if (found) {
+error_setg(errp, "Bitmap with the same name is already stored");
+goto fail;
+}
+
+return true;
+
+fail:
+error_prepend(errp, "Can't make bitmap '%s' persistent in '%s': ",
+  name, bdrv_get_device_or_node_name(bs));
+return false;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index a6f8a191ad..fde4769e68 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3561,6 +3561,8 @@ BlockDriver bdrv_qcow2 = {
 
 .bdrv_detach_aio_context  = qcow2_detach_aio_context,
 .bdrv_attach_aio_context  = qcow2_attach_aio_context,
+
+.bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap,
 };
 
 static void bdrv_qcow2_init(void)
diff --git a/block/qcow2.h b/block/qcow2.h
index f8ac670f4a..fd311f383d 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -622,5 +622,9 @@ int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
   int64_t *refcount_table_size);
 bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState *bs, Error **errp);
 void qcow2_store_persistent_dirty_bitmaps(BlockDriverState *bs, Error **errp);
+bool qcow2_can_store_new_dirty_bitmap(BlockDriverState *bs,
+  const char *name,
+  uint32_t granularity,
+  Error **errp);
 
 #endif
-- 
2.11.1

[Qemu-block] [PATCH 09/24] qcow2: autoloading dirty bitmaps

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Auto loading bitmaps are bitmaps in Qcow2, with the AUTO flag set. They
are loaded when the image is opened and become BdrvDirtyBitmaps for the
corresponding drive.

Extra data in bitmaps is not supported for now.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/qcow2-bitmap.c | 377 +++
 block/qcow2.c|  23 +++-
 block/qcow2.h|   2 +
 3 files changed, 397 insertions(+), 5 deletions(-)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index b8e472b3e8..95e102e71f 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -44,6 +44,8 @@
 
 /* Bitmap directory entry flags */
 #define BME_RESERVED_FLAGS 0xfffcU
+#define BME_FLAG_IN_USE (1U << 0)
+#define BME_FLAG_AUTO   (1U << 1)
 
 /* bits [1, 8] U [56, 63] are reserved */
 #define BME_TABLE_ENTRY_RESERVED_MASK 0xff0001feULL
@@ -85,6 +87,23 @@ typedef enum BitmapType {
 BT_DIRTY_TRACKING_BITMAP = 1
 } BitmapType;
 
+static inline bool can_write(BlockDriverState *bs)
+{
+return !bdrv_is_read_only(bs) && !(bdrv_get_flags(bs) & BDRV_O_INACTIVE);
+}
+
+static int update_header_sync(BlockDriverState *bs)
+{
+int ret;
+
+ret = qcow2_update_header(bs);
+if (ret < 0) {
+return ret;
+}
+
+return bdrv_flush(bs);
+}
+
 static int check_table_entry(uint64_t entry, int cluster_size)
 {
 uint64_t offset;
@@ -146,6 +165,120 @@ fail:
 return ret;
 }
 
+/* This function returns the number of disk sectors covered by a single qcow2
+ * cluster of bitmap data. */
+static uint64_t sectors_covered_by_bitmap_cluster(const BDRVQcow2State *s,
+  const BdrvDirtyBitmap 
*bitmap)
+{
+uint32_t sector_granularity =
+bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
+
+return (uint64_t)sector_granularity * (s->cluster_size << 3);
+}
+
+/* bitmap table entries must satisfy specification constraints */
+static int load_bitmap_data(BlockDriverState *bs,
+const uint64_t *bitmap_table,
+uint32_t bitmap_table_size,
+BdrvDirtyBitmap *bitmap)
+{
+int ret = 0;
+BDRVQcow2State *s = bs->opaque;
+uint64_t sector, sbc;
+uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap);
+uint8_t *buf = NULL;
+uint64_t i, tab_size =
+size_to_clusters(s,
+bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size));
+
+if (tab_size != bitmap_table_size || tab_size > BME_MAX_TABLE_SIZE) {
+return -EINVAL;
+}
+
+bdrv_clear_dirty_bitmap(bitmap, NULL);
+
+buf = g_malloc(s->cluster_size);
+sbc = sectors_covered_by_bitmap_cluster(s, bitmap);
+for (i = 0, sector = 0; i < tab_size; ++i, sector += sbc) {
+uint64_t count = MIN(bm_size - sector, sbc);
+uint64_t entry = bitmap_table[i];
+uint64_t offset = entry & BME_TABLE_ENTRY_OFFSET_MASK;
+
+assert(check_table_entry(entry, s->cluster_size) == 0);
+
+if (offset == 0) {
+if (entry & BME_TABLE_ENTRY_FLAG_ALL_ONES) {
+bdrv_dirty_bitmap_deserialize_ones(bitmap, sector, count,
+   false);
+} else {
+/* No need to deserialize zeros because the dirty bitmap is
+ * already cleared */
+}
+} else {
+ret = bdrv_pread(bs->file, offset, buf, s->cluster_size);
+if (ret < 0) {
+goto finish;
+}
+bdrv_dirty_bitmap_deserialize_part(bitmap, buf, sector, count,
+   false);
+}
+}
+ret = 0;
+
+bdrv_dirty_bitmap_deserialize_finish(bitmap);
+
+finish:
+g_free(buf);
+
+return ret;
+}
+
+static BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs,
+Qcow2Bitmap *bm, Error **errp)
+{
+int ret;
+uint64_t *bitmap_table = NULL;
+uint32_t granularity;
+BdrvDirtyBitmap *bitmap = NULL;
+
+if (bm->flags & BME_FLAG_IN_USE) {
+error_setg(errp, "Bitmap '%s' is in use", bm->name);
+goto fail;
+}
+
+ret = bitmap_table_load(bs, >table, _table);
+if (ret < 0) {
+error_setg_errno(errp, -ret,
+ "Could not read bitmap_table table from image for "
+ "bitmap '%s'", bm->name);
+goto fail;
+}
+
+granularity = 1U << bm->granularity_bits;
+bitmap = bdrv_create_dirty_bitmap(bs, granularity, bm->name, errp);
+if (bitmap == NULL) {
+goto fail;
+}
+
+ret = load_bitmap_data(bs, bitmap_table, bm->table.size, bitmap);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "Could not read bitmap

[Qemu-block] [PATCH 19/24] qmp: add x-debug-block-dirty-bitmap-sha256

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block/dirty-bitmap.c |  5 +
 blockdev.c   | 29 +
 include/block/dirty-bitmap.h |  2 ++
 include/qemu/hbitmap.h   |  8 
 qapi/block-core.json | 27 +++
 tests/Makefile.include   |  2 +-
 util/hbitmap.c   | 11 +++
 7 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 9208844a7d..b684d8b00e 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -588,3 +588,8 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState 
*bs,
 return bitmap == NULL ? QLIST_FIRST(>dirty_bitmaps) :
 QLIST_NEXT(bitmap, list);
 }
+
+char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp)
+{
+return hbitmap_sha256(bitmap->bitmap, errp);
+}
diff --git a/blockdev.c b/blockdev.c
index e32ac69e4b..c41b791289 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2820,6 +2820,35 @@ void qmp_block_dirty_bitmap_clear(const char *node, 
const char *name,
 aio_context_release(aio_context);
 }
 
+BlockDirtyBitmapSha256 *qmp_x_debug_block_dirty_bitmap_sha256(const char *node,
+  const char *name,
+  Error **errp)
+{
+AioContext *aio_context;
+BdrvDirtyBitmap *bitmap;
+BlockDriverState *bs;
+BlockDirtyBitmapSha256 *ret = NULL;
+char *sha256;
+
+bitmap = block_dirty_bitmap_lookup(node, name, , _context, errp);
+if (!bitmap || !bs) {
+return NULL;
+}
+
+sha256 = bdrv_dirty_bitmap_sha256(bitmap, errp);
+if (sha256 == NULL) {
+goto out;
+}
+
+ret = g_new(BlockDirtyBitmapSha256, 1);
+ret->sha256 = sha256;
+
+out:
+aio_context_release(aio_context);
+
+return ret;
+}
+
 void hmp_drive_del(Monitor *mon, const QDict *qdict)
 {
 const char *id = qdict_get_str(qdict, "id");
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index d71edc4d13..b022b34964 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -86,4 +86,6 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
 
 bool bdrv_has_persistent_bitmaps(BlockDriverState *bs);
 
+char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp);
+
 #endif
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index b52304ac29..d3a74a21fc 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -253,6 +253,14 @@ void hbitmap_deserialize_ones(HBitmap *hb, uint64_t start, 
uint64_t count,
 void hbitmap_deserialize_finish(HBitmap *hb);
 
 /**
+ * hbitmap_sha256:
+ * @bitmap: HBitmap to operate on.
+ *
+ * Returns SHA256 hash of the last level.
+ */
+char *hbitmap_sha256(const HBitmap *bitmap, Error **errp);
+
+/**
  * hbitmap_free:
  * @hb: HBitmap to operate on.
  *
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 09dcf4e6ae..d6f173448a 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1642,6 +1642,33 @@
   'data': 'BlockDirtyBitmap' }
 
 ##
+# @BlockDirtyBitmapSha256:
+#
+# SHA256 hash of dirty bitmap data
+#
+# @sha256: ASCII representation of SHA256 bitmap hash
+#
+# Since: 2.9
+##
+  { 'struct': 'BlockDirtyBitmapSha256',
+'data': {'sha256': 'str'} }
+
+##
+# @x-debug-block-dirty-bitmap-sha256:
+#
+# Get bitmap SHA256
+#
+# Returns: BlockDirtyBitmapSha256 on success
+#  If @node is not a valid block device, DeviceNotFound
+#  If @name is not found or if hashing has failed, GenericError with an
+#  explanation
+#
+# Since: 2.9
+##
+  { 'command': 'x-debug-block-dirty-bitmap-sha256',
+'data': 'BlockDirtyBitmap', 'returns': 'BlockDirtyBitmapSha256' }
+
+##
 # @blockdev-mirror:
 #
 # Start mirroring a block device's writes to a new destination.
diff --git a/tests/Makefile.include b/tests/Makefile.include
index ad35a6494b..d0809fefb2 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -519,7 +519,7 @@ tests/test-blockjob$(EXESUF): tests/test-blockjob.o 
$(test-block-obj-y) $(test-u
 tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o 
$(test-block-obj-y) $(test-util-obj-y)
 tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y)
 tests/test-iov$(EXESUF): tests/test-iov.o $(test-util-obj-y)
-tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o $(test-util-obj-y)
+tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o $(test-util-obj-y) 
$(test-crypto-obj-y)
 tests/test-x86-cpuid$(EXESUF): tests/test-x86-cpuid.o
 tests/test-xbzrle$(EXESUF): tests/test-xbzrle.o migration/xbzrle.o 
page_cache.o $(test-util-obj-y)
 tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o
diff --git a/util/hbitma

[Qemu-block] [PATCH 01/24] specs/qcow2: fix bitmap granularity qemu-specific note

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 docs/specs/qcow2.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/specs/qcow2.txt b/docs/specs/qcow2.txt
index 80cdfd0e91..dda53dd2a3 100644
--- a/docs/specs/qcow2.txt
+++ b/docs/specs/qcow2.txt
@@ -472,8 +472,7 @@ Structure of a bitmap directory entry:
  17:granularity_bits
 Granularity bits. Valid values: 0 - 63.
 
-Note: Qemu currently doesn't support granularity_bits
-greater than 31.
+Note: Qemu currently supports only values 9 - 31.
 
 Granularity is calculated as
 granularity = 1 << granularity_bits
-- 
2.11.1

[Qemu-block] [PATCH 13/24] block/dirty-bitmap: add bdrv_dirty_bitmap_next()

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block/dirty-bitmap.c | 7 +++
 include/block/dirty-bitmap.h | 3 +++
 2 files changed, 10 insertions(+)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index d2fbf55964..9208844a7d 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -581,3 +581,10 @@ bool bdrv_has_persistent_bitmaps(BlockDriverState *bs)
 
 return false;
 }
+
+BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
+BdrvDirtyBitmap *bitmap)
+{
+return bitmap == NULL ? QLIST_FIRST(>dirty_bitmaps) :
+QLIST_NEXT(bitmap, list);
+}
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 8dbd16b040..d71edc4d13 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -81,6 +81,9 @@ void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap 
*bitmap,
 bool persistent);
 bool bdrv_dirty_bitmap_get_persistance(BdrvDirtyBitmap *bitmap);
 
+BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
+BdrvDirtyBitmap *bitmap);
+
 bool bdrv_has_persistent_bitmaps(BlockDriverState *bs);
 
 #endif
-- 
2.11.1

[Qemu-block] [PATCH 14/24] qcow2: add persistent dirty bitmaps support

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Store persistent dirty bitmaps in qcow2 image.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/qcow2-bitmap.c | 473 +++
 block/qcow2.c|   9 +
 block/qcow2.h|   1 +
 3 files changed, 483 insertions(+)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 928a1b2e67..94bb3d9132 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -27,6 +27,7 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "qemu/cutils.h"
 
 #include "block/block_int.h"
 #include "block/qcow2.h"
@@ -42,6 +43,10 @@
 #define BME_MIN_GRANULARITY_BITS 9
 #define BME_MAX_NAME_SIZE 1023
 
+#if BME_MAX_TABLE_SIZE * 8ULL > INT_MAX
+#error In the code bitmap table physical size assumed to fit into int
+#endif
+
 /* Bitmap directory entry flags */
 #define BME_RESERVED_FLAGS 0xfffcU
 #define BME_FLAG_IN_USE (1U << 0)
@@ -72,6 +77,8 @@ typedef struct Qcow2BitmapTable {
 uint32_t size; /* number of 64bit entries */
 QSIMPLEQ_ENTRY(Qcow2BitmapTable) entry;
 } Qcow2BitmapTable;
+typedef QSIMPLEQ_HEAD(Qcow2BitmapTableList, Qcow2BitmapTable)
+Qcow2BitmapTableList;
 
 typedef struct Qcow2Bitmap {
 Qcow2BitmapTable table;
@@ -79,6 +86,8 @@ typedef struct Qcow2Bitmap {
 uint8_t granularity_bits;
 char *name;
 
+BdrvDirtyBitmap *dirty_bitmap;
+
 QSIMPLEQ_ENTRY(Qcow2Bitmap) entry;
 } Qcow2Bitmap;
 typedef QSIMPLEQ_HEAD(Qcow2BitmapList, Qcow2Bitmap) Qcow2BitmapList;
@@ -104,6 +113,15 @@ static int update_header_sync(BlockDriverState *bs)
 return bdrv_flush(bs);
 }
 
+static inline void bitmap_table_to_be(uint64_t *bitmap_table, size_t size)
+{
+size_t i;
+
+for (i = 0; i < size; ++i) {
+cpu_to_be64s(_table[i]);
+}
+}
+
 static int check_table_entry(uint64_t entry, int cluster_size)
 {
 uint64_t offset;
@@ -127,6 +145,70 @@ static int check_table_entry(uint64_t entry, int 
cluster_size)
 return 0;
 }
 
+static int check_constraints_on_bitmap(BlockDriverState *bs,
+   const char *name,
+   uint32_t granularity,
+   Error **errp)
+{
+BDRVQcow2State *s = bs->opaque;
+int granularity_bits = ctz32(granularity);
+int64_t len = bdrv_getlength(bs);
+
+assert(granularity > 0);
+assert((granularity & (granularity - 1)) == 0);
+
+if (len < 0) {
+error_setg_errno(errp, -len, "Failed to get size of '%s'",
+ bdrv_get_device_or_node_name(bs));
+return len;
+}
+
+if (granularity_bits > BME_MAX_GRANULARITY_BITS) {
+error_setg(errp, "Granularity exceeds maximum (%llu bytes)",
+   1ULL << BME_MAX_GRANULARITY_BITS);
+return -EINVAL;
+}
+if (granularity_bits < BME_MIN_GRANULARITY_BITS) {
+error_setg(errp, "Granularity is under minimum (%llu bytes)",
+   1ULL << BME_MIN_GRANULARITY_BITS);
+return -EINVAL;
+}
+
+if ((len > (uint64_t)BME_MAX_PHYS_SIZE << granularity_bits) ||
+(len > (uint64_t)BME_MAX_TABLE_SIZE * s->cluster_size <<
+   granularity_bits))
+{
+error_setg(errp, "Too much space will be occupied by the bitmap. "
+   "Use larger granularity");
+return -EINVAL;
+}
+
+if (strlen(name) > BME_MAX_NAME_SIZE) {
+error_setg(errp, "Name length exceeds maximum (%u characters)",
+   BME_MAX_NAME_SIZE);
+return -EINVAL;
+}
+
+return 0;
+}
+
+static void clear_bitmap_table(BlockDriverState *bs, uint64_t *bitmap_table,
+   uint32_t bitmap_table_size)
+{
+BDRVQcow2State *s = bs->opaque;
+int i;
+
+for (i = 0; i < bitmap_table_size; ++i) {
+uint64_t addr = bitmap_table[i] & BME_TABLE_ENTRY_OFFSET_MASK;
+if (!addr) {
+continue;
+}
+
+qcow2_free_clusters(bs, addr, s->cluster_size, QCOW2_DISCARD_OTHER);
+bitmap_table[i] = 0;
+}
+}
+
 static int bitmap_table_load(BlockDriverState *bs, Qcow2BitmapTable *tb,
  uint64_t **bitmap_table)
 {
@@ -165,6 +247,28 @@ fail:
 return ret;
 }
 
+static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb)
+{
+int ret;
+uint64_t *bitmap_table;
+
+ret = bitmap_table_load(bs, tb, _table);
+if (ret < 0) {
+assert(bitmap_table == NULL);
+return ret;
+}
+
+clear_bitmap_table(bs, bitmap_table, tb->size);
+qcow2_free_clusters(bs, tb->offset, tb->size * sizeof(uint64_t),
+QCOW2_DISCARD_OTHER);
+g_free(bitmap_table);
+
+tb->offset = 0;
+tb->size = 0;
+
+return 0;
+}
+
 /*

[Qemu-block] [PATCH 17/24] qmp: add persistent flag to block-dirty-bitmap-add

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Add optional 'persistent' flag to qmp command block-dirty-bitmap-add.
Default is false.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Signed-off-by: Denis V. Lunev <d...@openvz.org>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 blockdev.c   | 18 +-
 qapi/block-core.json |  8 +++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 245e1e1d17..40605faccc 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1967,6 +1967,7 @@ static void block_dirty_bitmap_add_prepare(BlkActionState 
*common,
 /* AIO context taken and released within qmp_block_dirty_bitmap_add */
 qmp_block_dirty_bitmap_add(action->node, action->name,
action->has_granularity, action->granularity,
+   action->has_persistent, action->persistent,
_err);
 
 if (!local_err) {
@@ -2696,10 +2697,12 @@ out:
 
 void qmp_block_dirty_bitmap_add(const char *node, const char *name,
 bool has_granularity, uint32_t granularity,
+bool has_persistent, bool persistent,
 Error **errp)
 {
 AioContext *aio_context;
 BlockDriverState *bs;
+BdrvDirtyBitmap *bitmap;
 
 if (!name || name[0] == '\0') {
 error_setg(errp, "Bitmap name cannot be empty");
@@ -2725,7 +2728,20 @@ void qmp_block_dirty_bitmap_add(const char *node, const 
char *name,
 granularity = bdrv_get_default_bitmap_granularity(bs);
 }
 
-bdrv_create_dirty_bitmap(bs, granularity, name, errp);
+if (!has_persistent) {
+persistent = false;
+}
+
+if (persistent &&
+!bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp))
+{
+goto out;
+}
+
+bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp);
+if (bitmap != NULL) {
+bdrv_dirty_bitmap_set_persistance(bitmap, persistent);
+}
 
  out:
 aio_context_release(aio_context);
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 932f5bb3b4..535df20212 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1559,10 +1559,16 @@
 # @granularity: #optional the bitmap granularity, default is 64k for
 #   block-dirty-bitmap-add
 #
+# @persistent: #optional the bitmap is persistent, i.e. it will be saved to the
+#  corresponding block device image file on its close. For now only
+#  Qcow2 disks support persistent bitmaps. Default is false.
+#  (Since 2.9)
+#
 # Since: 2.4
 ##
 { 'struct': 'BlockDirtyBitmapAdd',
-  'data': { 'node': 'str', 'name': 'str', '*granularity': 'uint32' } }
+  'data': { 'node': 'str', 'name': 'str', '*granularity': 'uint32',
+'*persistent': 'bool' } }
 
 ##
 # @block-dirty-bitmap-add:
-- 
2.11.1

[Qemu-block] [PATCH 08/24] qcow2: add bitmaps extension

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Add bitmap extension as specified in docs/specs/qcow2.txt.
For now, just mirror extension header into Qcow2 state and check
constraints. Also, calculate refcounts for qcow2 bitmaps, to not break
qemu-img check.

For now, disable image resize if it has bitmaps. It will be fixed later.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block/Makefile.objs|   2 +-
 block/qcow2-bitmap.c   | 439 +
 block/qcow2-refcount.c |   6 +
 block/qcow2.c  | 124 +-
 block/qcow2.h  |  27 +++
 5 files changed, 592 insertions(+), 6 deletions(-)
 create mode 100644 block/qcow2-bitmap.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index c6bd14e883..ff8d18511b 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,5 +1,5 @@
 block-obj-y += raw-format.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o 
dmg.o
-block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
+block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o qcow2-bitmap.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
new file mode 100644
index 00..b8e472b3e8
--- /dev/null
+++ b/block/qcow2-bitmap.c
@@ -0,0 +1,439 @@
+/*
+ * Bitmaps for the QCOW version 2 format
+ *
+ * Copyright (c) 2014-2017 Vladimir Sementsov-Ogievskiy
+ *
+ * This file is derived from qcow2-snapshot.c, original copyright:
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+
+#include "block/block_int.h"
+#include "block/qcow2.h"
+
+/* NOTICE: BME here means Bitmaps Extension and used as a namespace for
+ * _internal_ constants. Please do not use this _internal_ abbreviation for
+ * other needs and/or outside of this file. */
+
+/* Bitmap directory entry constraints */
+#define BME_MAX_TABLE_SIZE 0x800
+#define BME_MAX_PHYS_SIZE 0x2000 /* restrict BdrvDirtyBitmap size in RAM */
+#define BME_MAX_GRANULARITY_BITS 31
+#define BME_MIN_GRANULARITY_BITS 9
+#define BME_MAX_NAME_SIZE 1023
+
+/* Bitmap directory entry flags */
+#define BME_RESERVED_FLAGS 0xfffcU
+
+/* bits [1, 8] U [56, 63] are reserved */
+#define BME_TABLE_ENTRY_RESERVED_MASK 0xff0001feULL
+#define BME_TABLE_ENTRY_OFFSET_MASK 0x00fffe00ULL
+#define BME_TABLE_ENTRY_FLAG_ALL_ONES (1ULL << 0)
+
+typedef struct QEMU_PACKED Qcow2BitmapDirEntry {
+/* header is 8 byte aligned */
+uint64_t bitmap_table_offset;
+
+uint32_t bitmap_table_size;
+uint32_t flags;
+
+uint8_t type;
+uint8_t granularity_bits;
+uint16_t name_size;
+uint32_t extra_data_size;
+/* extra data follows  */
+/* name follows  */
+} Qcow2BitmapDirEntry;
+
+typedef struct Qcow2BitmapTable {
+uint64_t offset;
+uint32_t size; /* number of 64bit entries */
+QSIMPLEQ_ENTRY(Qcow2BitmapTable) entry;
+} Qcow2BitmapTable;
+
+typedef struct Qcow2Bitmap {
+Qcow2BitmapTable table;
+uint32_t flags;
+uint8_t granularity_bits;
+char *name;
+
+QSIMPLEQ_ENTRY(Qcow2Bitmap) entry;
+} Qcow2Bitmap;
+typedef QSIMPLEQ_HEAD(Qcow2BitmapList, Qcow2Bitmap) Qcow2BitmapList;
+
+typedef enum BitmapType {
+BT_DIRTY_TRACKING_BITMAP = 1
+} BitmapType;
+
+static int check_table_entry(uint64_t entry, int cluster_size)
+{
+uint64_t offset;
+
+if (entry & BME_TABLE_ENTRY_RESERVED_MASK) {
+return -EINVAL;
+}
+
+offset = entry & BME_TABLE_ENTRY_OFFSET_MASK;
+if (offset != 0) {
+/* if offset specified, bit 0 is r

[Qemu-block] [PATCH 11/24] block: bdrv_close: release bitmaps after drv->bdrv_close

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Release bitmaps after 'if (bs->drv) { ... }' block. This will allow
format driver to save persistent bitmaps, which will appear in following
commits.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block.c b/block.c
index a0346c80c6..16cf522219 100644
--- a/block.c
+++ b/block.c
@@ -2322,9 +2322,6 @@ static void bdrv_close(BlockDriverState *bs)
 bdrv_flush(bs);
 bdrv_drain(bs); /* in case flush left pending I/O */
 
-bdrv_release_named_dirty_bitmaps(bs);
-assert(QLIST_EMPTY(>dirty_bitmaps));
-
 if (bs->drv) {
 BdrvChild *child, *next;
 
@@ -2363,6 +2360,9 @@ static void bdrv_close(BlockDriverState *bs)
 bs->full_open_options = NULL;
 }
 
+bdrv_release_named_dirty_bitmaps(bs);
+assert(QLIST_EMPTY(>dirty_bitmaps));
+
 QLIST_FOREACH_SAFE(ban, >aio_notifiers, list, ban_next) {
 g_free(ban);
 }
-- 
2.11.1

[Qemu-block] [PATCH 18/24] qmp: add autoload parameter to block-dirty-bitmap-add

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Optional. Default is false.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Signed-off-by: Denis V. Lunev <d...@openvz.org>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 blockdev.c   | 18 --
 qapi/block-core.json |  6 +-
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 40605faccc..e32ac69e4b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1968,6 +1968,7 @@ static void block_dirty_bitmap_add_prepare(BlkActionState 
*common,
 qmp_block_dirty_bitmap_add(action->node, action->name,
action->has_granularity, action->granularity,
action->has_persistent, action->persistent,
+   action->has_autoload, action->autoload,
_err);
 
 if (!local_err) {
@@ -2698,6 +2699,7 @@ out:
 void qmp_block_dirty_bitmap_add(const char *node, const char *name,
 bool has_granularity, uint32_t granularity,
 bool has_persistent, bool persistent,
+bool has_autoload, bool autoload,
 Error **errp)
 {
 AioContext *aio_context;
@@ -2731,6 +2733,15 @@ void qmp_block_dirty_bitmap_add(const char *node, const 
char *name,
 if (!has_persistent) {
 persistent = false;
 }
+if (!has_autoload) {
+autoload = false;
+}
+
+if (has_autoload && !persistent) {
+error_setg(errp, "Autoload flag must be used only for persistent "
+ "bitmaps");
+goto out;
+}
 
 if (persistent &&
 !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp))
@@ -2739,10 +2750,13 @@ void qmp_block_dirty_bitmap_add(const char *node, const 
char *name,
 }
 
 bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp);
-if (bitmap != NULL) {
-bdrv_dirty_bitmap_set_persistance(bitmap, persistent);
+if (bitmap == NULL) {
+goto out;
 }
 
+bdrv_dirty_bitmap_set_persistance(bitmap, persistent);
+bdrv_dirty_bitmap_set_autoload(bitmap, autoload);
+
  out:
 aio_context_release(aio_context);
 }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 535df20212..09dcf4e6ae 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1564,11 +1564,15 @@
 #  Qcow2 disks support persistent bitmaps. Default is false.
 #  (Since 2.9)
 #
+# @autoload: #optional the bitmap will be automatically loaded when the image
+#it is stored in is opened. This flag may only be specified for
+#persistent bitmaps. Default is false. (Since 2.9)
+#
 # Since: 2.4
 ##
 { 'struct': 'BlockDirtyBitmapAdd',
   'data': { 'node': 'str', 'name': 'str', '*granularity': 'uint32',
-'*persistent': 'bool' } }
+'*persistent': 'bool', '*autoload': 'bool' } }
 
 ##
 # @block-dirty-bitmap-add:
-- 
2.11.1

[Qemu-block] [PATCH 20/24] iotests: test qcow2 persistent dirty bitmap

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 tests/qemu-iotests/165 | 89 ++
 tests/qemu-iotests/165.out |  5 +++
 tests/qemu-iotests/group   |  1 +
 3 files changed, 95 insertions(+)
 create mode 100755 tests/qemu-iotests/165
 create mode 100644 tests/qemu-iotests/165.out

diff --git a/tests/qemu-iotests/165 b/tests/qemu-iotests/165
new file mode 100755
index 00..d583b33733
--- /dev/null
+++ b/tests/qemu-iotests/165
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+#
+# Tests for persistent dirty bitmaps.
+#
+# Copyright: Vladimir Sementsov-Ogievskiy 2015-2017
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+import iotests
+from iotests import qemu_img
+
+disk = os.path.join(iotests.test_dir, 'disk')
+disk_size = 0x4000 # 1G
+
+# regions for qemu_io: (start, count) in bytes
+regions1 = ((0,0x10),
+(0x20, 0x10))
+
+regions2 = ((0x1000, 0x2),
+(0x3fff, 0x1))
+
+class TestPersistentDirtyBitmap(iotests.QMPTestCase):
+
+def setUp(self):
+qemu_img('create', '-f', iotests.imgfmt, disk, str(disk_size))
+
+def tearDown(self):
+os.remove(disk)
+
+def mkVm(self):
+return iotests.VM().add_drive(disk)
+
+def getSha256(self):
+result = self.vm.qmp('x-debug-block-dirty-bitmap-sha256',
+ node='drive0', name='bitmap0')
+return result['return']['sha256']
+
+def checkBitmap(self, sha256):
+result = self.vm.qmp('x-debug-block-dirty-bitmap-sha256',
+ node='drive0', name='bitmap0')
+self.assert_qmp(result, 'return/sha256', sha256);
+
+def writeRegions(self, regions):
+for r in regions:
+self.vm.hmp_qemu_io('drive0',
+'write %d %d' % r)
+
+def qmpAddBitmap(self):
+self.vm.qmp('block-dirty-bitmap-add', node='drive0',
+name='bitmap0', persistent=True, autoload=True)
+
+def test_persistent(self):
+self.vm = self.mkVm()
+self.vm.launch()
+self.qmpAddBitmap()
+
+self.writeRegions(regions1)
+sha256 = self.getSha256()
+
+self.vm.shutdown()
+self.vm = self.mkVm()
+self.vm.launch()
+
+self.checkBitmap(sha256)
+self.writeRegions(regions2)
+sha256 = self.getSha256()
+
+self.vm.shutdown()
+self.vm.launch()
+
+self.checkBitmap(sha256)
+
+self.vm.shutdown()
+
+if __name__ == '__main__':
+iotests.main(supported_fmts=['qcow2'])
diff --git a/tests/qemu-iotests/165.out b/tests/qemu-iotests/165.out
new file mode 100644
index 00..ae1213e6f8
--- /dev/null
+++ b/tests/qemu-iotests/165.out
@@ -0,0 +1,5 @@
+.
+--
+Ran 1 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 866c1a032d..8bd7a84f5a 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -162,6 +162,7 @@
 159 rw auto quick
 160 rw auto quick
 162 auto quick
+165 rw auto quick
 170 rw auto quick
 171 rw auto quick
 172 auto
-- 
2.11.1

[Qemu-block] [PATCH 06/24] block/dirty-bitmap: add deserialize_ones func

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Add bdrv_dirty_bitmap_deserialize_ones() function, which is needed for
qcow2 bitmap loading, to handle unallocated bitmap parts, marked as
all-ones.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Kevin Wolf <kw...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block/dirty-bitmap.c |  7 +++
 include/block/dirty-bitmap.h |  3 +++
 include/qemu/hbitmap.h   | 15 +++
 util/hbitmap.c   | 17 +
 4 files changed, 42 insertions(+)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 186941cfc3..90af37287f 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -499,6 +499,13 @@ void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap 
*bitmap,
 hbitmap_deserialize_zeroes(bitmap->bitmap, start, count, finish);
 }
 
+void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap,
+uint64_t start, uint64_t count,
+bool finish)
+{
+hbitmap_deserialize_ones(bitmap->bitmap, start, count, finish);
+}
+
 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
 {
 hbitmap_deserialize_finish(bitmap->bitmap);
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 7cbe623ba7..1e17729ac2 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -70,6 +70,9 @@ void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap 
*bitmap,
 void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
   uint64_t start, uint64_t count,
   bool finish);
+void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap,
+uint64_t start, uint64_t count,
+bool finish);
 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
 
 #endif
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 6b04391266..b52304ac29 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -229,6 +229,21 @@ void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t 
start, uint64_t count,
 bool finish);
 
 /**
+ * hbitmap_deserialize_ones
+ * @hb: HBitmap to operate on.
+ * @start: First bit to restore.
+ * @count: Number of bits to restore.
+ * @finish: Whether to call hbitmap_deserialize_finish automatically.
+ *
+ * Fills the bitmap with ones.
+ *
+ * If @finish is false, caller must call hbitmap_serialize_finish before using
+ * the bitmap.
+ */
+void hbitmap_deserialize_ones(HBitmap *hb, uint64_t start, uint64_t count,
+  bool finish);
+
+/**
  * hbitmap_deserialize_finish
  * @hb: HBitmap to operate on.
  *
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 0b38817505..0c1591a594 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -551,6 +551,23 @@ void hbitmap_deserialize_zeroes(HBitmap *hb, uint64_t 
start, uint64_t count,
 }
 }
 
+void hbitmap_deserialize_ones(HBitmap *hb, uint64_t start, uint64_t count,
+  bool finish)
+{
+uint64_t el_count;
+unsigned long *first;
+
+if (!count) {
+return;
+}
+serialization_chunk(hb, start, count, , _count);
+
+memset(first, 0xff, el_count * sizeof(unsigned long));
+if (finish) {
+hbitmap_deserialize_finish(hb);
+}
+}
+
 void hbitmap_deserialize_finish(HBitmap *bitmap)
 {
 int64_t i, size, prev_size;
-- 
2.11.1

[Qemu-block] [PATCH 24/24] block: release persistent bitmaps on inactivate

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

We should release them here to reload on invalidate cache.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---

Alternative is to release bitmaps from format driver, immediately after
storing them - I'm not sure what is better.

If this is OK, I can merge this to some point earlier in the series
 (if needed)

 block.c  |  4 
 block/dirty-bitmap.c | 29 +++--
 include/block/dirty-bitmap.h |  1 +
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/block.c b/block.c
index cf9919a5e0..90f4a2f91d 100644
--- a/block.c
+++ b/block.c
@@ -3305,6 +3305,10 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs,
 if (setting_flag) {
 bs->open_flags |= BDRV_O_INACTIVE;
 }
+
+/* At this point persistent bitmaps should be stored by format driver */
+bdrv_release_persistent_dirty_bitmaps(bs);
+
 return 0;
 }
 
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 05e0aaef90..9b83170319 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -294,13 +294,18 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
 }
 }
 
-static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
-  BdrvDirtyBitmap *bitmap,
-  bool only_named)
+static bool bdrv_dirty_bitmap_has_name(BdrvDirtyBitmap *bitmap)
+{
+return !!bdrv_dirty_bitmap_name(bitmap);
+}
+
+static void bdrv_do_release_matching_dirty_bitmap(
+BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+bool (*cond)(BdrvDirtyBitmap *bitmap))
 {
 BdrvDirtyBitmap *bm, *next;
 QLIST_FOREACH_SAFE(bm, >dirty_bitmaps, list, next) {
-if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
+if ((!bitmap || bm == bitmap) && (!cond || cond(bm))) {
 assert(!bm->active_iterators);
 assert(!bdrv_dirty_bitmap_frozen(bm));
 assert(!bm->meta);
@@ -321,7 +326,7 @@ static void 
bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
 
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 {
-bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
+bdrv_do_release_matching_dirty_bitmap(bs, bitmap, NULL);
 }
 
 /**
@@ -331,7 +336,19 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, 
BdrvDirtyBitmap *bitmap)
  */
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
 {
-bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
+bdrv_do_release_matching_dirty_bitmap(bs, NULL, 
bdrv_dirty_bitmap_has_name);
+}
+
+/**
+ * Release all persistent dirty bitmaps attached to a BDS (for use in
+ * bdrv_inactivate_recurse()).
+ * There must not be any frozen bitmaps attached.
+ * This function does not remove persistent bitmaps from the storage.
+ */
+void bdrv_release_persistent_dirty_bitmaps(BlockDriverState *bs)
+{
+bdrv_do_release_matching_dirty_bitmap(bs, NULL,
+  bdrv_dirty_bitmap_get_persistance);
 }
 
 /**
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index ce126109e0..2113633519 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -25,6 +25,7 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
+void bdrv_release_persistent_dirty_bitmaps(BlockDriverState *bs);
 void bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
  const char *name,
  Error **errp);
-- 
2.11.1

[Qemu-block] [PATCH 04/24] tests: add hbitmap iter test

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Test that hbitmap iter is resistant to bitmap resetting.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Signed-off-by: Denis V. Lunev <d...@openvz.org>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 tests/test-hbitmap.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
index 23773d2051..1acb353889 100644
--- a/tests/test-hbitmap.c
+++ b/tests/test-hbitmap.c
@@ -909,6 +909,22 @@ static void hbitmap_test_add(const char *testpath,
hbitmap_test_teardown);
 }
 
+static void test_hbitmap_iter_and_reset(TestHBitmapData *data,
+const void *unused)
+{
+HBitmapIter hbi;
+
+hbitmap_test_init(data, L1 * 2, 0);
+hbitmap_set(data->hb, 0, data->size);
+
+hbitmap_iter_init(, data->hb, BITS_PER_LONG - 1);
+
+hbitmap_iter_next();
+
+hbitmap_reset_all(data->hb);
+hbitmap_iter_next();
+}
+
 int main(int argc, char **argv)
 {
 g_test_init(, , NULL);
@@ -966,6 +982,9 @@ int main(int argc, char **argv)
  test_hbitmap_serialize_part);
 hbitmap_test_add("/hbitmap/serialize/zeroes",
  test_hbitmap_serialize_zeroes);
+
+hbitmap_test_add("/hbitmap/iter/iter_and_reset",
+ test_hbitmap_iter_and_reset);
 g_test_run();
 
 return 0;
-- 
2.11.1

[Qemu-block] [PATCH 03/24] hbitmap: improve dirty iter

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Make dirty iter resistant to resetting bits in corresponding HBitmap.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 include/qemu/hbitmap.h | 26 --
 util/hbitmap.c | 23 ++-
 2 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 9239fe515e..6b04391266 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -256,10 +256,9 @@ void hbitmap_free(HBitmap *hb);
  * the lowest-numbered bit that is set in @hb, starting at @first.
  *
  * Concurrent setting of bits is acceptable, and will at worst cause the
- * iteration to miss some of those bits.  Resetting bits before the current
- * position of the iterator is also okay.  However, concurrent resetting of
- * bits can lead to unexpected behavior if the iterator has not yet reached
- * those bits.
+ * iteration to miss some of those bits.
+ *
+ * The concurrent resetting of bits is OK.
  */
 void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first);
 
@@ -298,24 +297,7 @@ void hbitmap_free_meta(HBitmap *hb);
  * Return the next bit that is set in @hbi's associated HBitmap,
  * or -1 if all remaining bits are zero.
  */
-static inline int64_t hbitmap_iter_next(HBitmapIter *hbi)
-{
-unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1];
-int64_t item;
-
-if (cur == 0) {
-cur = hbitmap_iter_skip_words(hbi);
-if (cur == 0) {
-return -1;
-}
-}
-
-/* The next call will resume work from the next bit.  */
-hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
-item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur);
-
-return item << hbi->granularity;
-}
+int64_t hbitmap_iter_next(HBitmapIter *hbi);
 
 /**
  * hbitmap_iter_next_word:
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 35088e19c4..0b38817505 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -106,8 +106,9 @@ unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
 
 unsigned long cur;
 do {
-cur = hbi->cur[--i];
+i--;
 pos >>= BITS_PER_LEVEL;
+cur = hbi->cur[i] & hb->levels[i][pos];
 } while (cur == 0);
 
 /* Check for end of iteration.  We always use fewer than BITS_PER_LONG
@@ -139,6 +140,26 @@ unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
 return cur;
 }
 
+int64_t hbitmap_iter_next(HBitmapIter *hbi)
+{
+unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1] &
+hbi->hb->levels[HBITMAP_LEVELS - 1][hbi->pos];
+int64_t item;
+
+if (cur == 0) {
+cur = hbitmap_iter_skip_words(hbi);
+if (cur == 0) {
+return -1;
+}
+}
+
+/* The next call will resume work from the next bit.  */
+hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
+item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur);
+
+return item << hbi->granularity;
+}
+
 void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first)
 {
 unsigned i, bit;
-- 
2.11.1

[Qemu-block] [PATCH 21/24] block/dirty-bitmap: add bdrv_remove_persistent_dirty_bitmap

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Interface for removing persistent bitmap from its storage.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block/dirty-bitmap.c | 18 ++
 include/block/block_int.h|  3 +++
 include/block/dirty-bitmap.h |  3 +++
 3 files changed, 24 insertions(+)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index b684d8b00e..05e0aaef90 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -327,12 +327,30 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, 
BdrvDirtyBitmap *bitmap)
 /**
  * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
  * There must not be any frozen bitmaps attached.
+ * This function does not remove persistent bitmaps from the storage.
  */
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
 {
 bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
 }
 
+/**
+ * Remove persistent dirty bitmap from the storage if it exists.
+ * Absence of bitmap is not an error, because we have the following scenario:
+ * BdrvDirtyBitmap can have .persistent = true but not yet saved and have no
+ * stored version. For such bitmap bdrv_remove_persistent_dirty_bitmap() should
+ * not fail.
+ * This function doesn't release corresponding BdrvDirtyBitmap.
+ */
+void bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
+ const char *name,
+ Error **errp)
+{
+if (bs->drv && bs->drv->bdrv_remove_persistent_dirty_bitmap) {
+bs->drv->bdrv_remove_persistent_dirty_bitmap(bs, name, errp);
+}
+}
+
 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 {
 assert(!bdrv_dirty_bitmap_frozen(bitmap));
diff --git a/include/block/block_int.h b/include/block/block_int.h
index f943a7c24e..f7f7dd7717 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -324,6 +324,9 @@ struct BlockDriver {
 const char *name,
 uint32_t granularity,
 Error **errp);
+void (*bdrv_remove_persistent_dirty_bitmap)(BlockDriverState *bs,
+const char *name,
+Error **errp);
 
 QLIST_ENTRY(BlockDriver) list;
 };
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index b022b34964..ce126109e0 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -25,6 +25,9 @@ BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
+void bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
+ const char *name,
+ Error **errp);
 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
-- 
2.11.1

[Qemu-block] [PATCH 02/24] specs/qcow2: do not use wording 'bitmap header'

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

A bitmap directory entry is sometimes called a 'bitmap header'. This
patch leaves only one name - 'bitmap directory entry'. The name 'bitmap
header' creates misunderstandings with 'qcow2 header' and 'qcow2 bitmap
header extension' (which is extension of qcow2 header)

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Eric Blake <ebl...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 docs/specs/qcow2.txt | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/docs/specs/qcow2.txt b/docs/specs/qcow2.txt
index dda53dd2a3..8874e8c774 100644
--- a/docs/specs/qcow2.txt
+++ b/docs/specs/qcow2.txt
@@ -201,7 +201,7 @@ The fields of the bitmaps extension are:
 
   8 - 15:  bitmap_directory_size
Size of the bitmap directory in bytes. It is the cumulative
-   size of all (nb_bitmaps) bitmap headers.
+   size of all (nb_bitmaps) bitmap directory entries.
 
  16 - 23:  bitmap_directory_offset
Offset into the image file at which the bitmap directory
@@ -426,8 +426,7 @@ Each bitmap saved in the image is described in a bitmap 
directory entry. The
 bitmap directory is a contiguous area in the image file, whose starting offset
 and length are given by the header extension fields bitmap_directory_offset and
 bitmap_directory_size. The entries of the bitmap directory have variable
-length, depending on the lengths of the bitmap name and extra data. These
-entries are also called bitmap headers.
+length, depending on the lengths of the bitmap name and extra data.
 
 Structure of a bitmap directory entry:
 
-- 
2.11.1

[Qemu-block] [PATCH 10/24] block/dirty-bitmap: add autoload field to BdrvDirtyBitmap

2017-04-26 Thread Vladimir Sementsov-Ogievskiy

Mirror AUTO flag from Qcow2 bitmap in BdrvDirtyBitmap. This will be
needed in future, to save this flag back to Qcow2 for persistent
bitmaps.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Max Reitz <mre...@redhat.com>
Reviewed-by: John Snow <js...@redhat.com>
---
 block/dirty-bitmap.c | 15 +++
 block/qcow2-bitmap.c |  2 ++
 include/block/dirty-bitmap.h |  2 ++
 3 files changed, 19 insertions(+)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 90af37287f..a9dfce8d00 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -44,6 +44,8 @@ struct BdrvDirtyBitmap {
 int64_t size;   /* Size of the bitmap (Number of sectors) */
 bool disabled;  /* Bitmap is read-only */
 int active_iterators;   /* How many iterators are active */
+bool autoload;  /* For persistent bitmaps: bitmap must be
+   autoloaded on image opening */
 QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
 
@@ -70,6 +72,7 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
 assert(!bdrv_dirty_bitmap_frozen(bitmap));
 g_free(bitmap->name);
 bitmap->name = NULL;
+bitmap->autoload = false;
 }
 
 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
@@ -238,6 +241,8 @@ BdrvDirtyBitmap 
*bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
 bitmap->name = NULL;
 successor->name = name;
 bitmap->successor = NULL;
+successor->autoload = bitmap->autoload;
+bitmap->autoload = false;
 bdrv_release_dirty_bitmap(bs, bitmap);
 
 return successor;
@@ -540,3 +545,13 @@ int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap)
 {
 return hbitmap_count(bitmap->meta);
 }
+
+void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload)
+{
+bitmap->autoload = autoload;
+}
+
+bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap)
+{
+return bitmap->autoload;
+}
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 95e102e71f..5718567b21 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -786,6 +786,8 @@ bool qcow2_load_autoloading_dirty_bitmaps(BlockDriverState 
*bs, Error **errp)
 if (bitmap == NULL) {
 goto fail;
 }
+
+bdrv_dirty_bitmap_set_autoload(bitmap, true);
 bm->flags |= BME_FLAG_IN_USE;
 created_dirty_bitmaps =
 g_slist_append(created_dirty_bitmaps, bitmap);
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 1e17729ac2..45a389a20a 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -75,4 +75,6 @@ void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap 
*bitmap,
 bool finish);
 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
 
+void bdrv_dirty_bitmap_set_autoload(BdrvDirtyBitmap *bitmap, bool autoload);
+bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap);
 #endif
-- 
2.11.1

Re: [Qemu-block] [PATCH 12/17] migration: add postcopy migration of dirty bitmaps

2017-04-26 Thread Vladimir Sementsov-Ogievskiy


25.02.2017 20:56, Vladimir Sementsov-Ogievskiy wrote:

16.02.2017 16:04, Fam Zheng wrote:

On Mon, 02/13 12:54, Vladimir Sementsov-Ogievskiy wrote:

Postcopy migration of dirty bitmaps. Only named dirty bitmaps,
associated with root nodes and non-root named nodes are migrated.

If destination qemu is already containing a dirty bitmap with the 
same name
as a migrated bitmap (for the same node), than, if their 
granularities are


[...]


+
+#define CHUNK_SIZE (1 << 10)
+
+/* Flags occupy from one to four bytes. In all but one the 7-th 
(EXTRA_FLAGS)

+ * bit should be set. */
+#define DIRTY_BITMAP_MIG_FLAG_EOS   0x01
+#define DIRTY_BITMAP_MIG_FLAG_ZEROES0x02
+#define DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME   0x04
+#define DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME   0x08
+#define DIRTY_BITMAP_MIG_FLAG_START 0x10
+#define DIRTY_BITMAP_MIG_FLAG_COMPLETE  0x20
+#define DIRTY_BITMAP_MIG_FLAG_BITS  0x40
+
+#define DIRTY_BITMAP_MIG_EXTRA_FLAGS0x80
+#define DIRTY_BITMAP_MIG_FLAGS_SIZE_16  0x8000
This flag means two bytes, right? But your above comment says "7-th 
bit should

be set". This doesn't make sense. Should this be "0x80" too?


Hmm, good caught, you are right. Also, the comment should be fixed so 
that there are may be 1,2 or 4 bytes, and of course EXTRA_FLAGS bit 
may be only in first and second bytes (the code do so).


Aha, now I understand that 0x8000 is ok for big endian






+#define DIRTY_BITMAP_MIG_FLAGS_SIZE_32  0x8080


and this is not ok)

I think, I'll just drop this. Anyway it is a dead code, we do not send 
flags more than 1 byte in the code. On the other hand, receive flags 
path is absolutely ok, and it is for backward compatibility - we just 
ignore unknown flags.



+
+#define DEBUG_DIRTY_BITMAP_MIGRATION 0


[...]


+
+static void send_bitmap_bits(QEMUFile *f, DirtyBitmapMigBitmapState 
*dbms,
+ uint64_t start_sector, uint32_t 
nr_sectors)

+{
+/* align for buffer_is_zero() */
+uint64_t align = 4 * sizeof(long);
+uint64_t unaligned_size =
+bdrv_dirty_bitmap_serialization_size(dbms->bitmap,
+ start_sector, nr_sectors);
+uint64_t buf_size = (unaligned_size + align - 1) & ~(align - 1);
+uint8_t *buf = g_malloc0(buf_size);
+uint32_t flags = DIRTY_BITMAP_MIG_FLAG_BITS;
+
+bdrv_dirty_bitmap_serialize_part(dbms->bitmap, buf,
+ start_sector, nr_sectors);
While these bdrv_dirty_bitmap_* calls here seem fine, BdrvDirtyBitmap 
API is not
in general thread-safe, while this function is called without any 
lock. This
feels dangerous, as noted below, I'm most concerned about 
use-after-free.


This should be safe as it is a postcopy migration - source should be 
already inactive.





+
+if (buffer_is_zero(buf, buf_size)) {
+g_free(buf);
+buf = NULL;
+flags |= DIRTY_BITMAP_MIG_FLAG_ZEROES;
+}
+
+trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size);
+
+send_bitmap_header(f, dbms, flags);
+
+qemu_put_be64(f, start_sector);
+qemu_put_be32(f, nr_sectors);
+
+/* if a block is zero we need to flush here since the network
+ * bandwidth is now a lot higher than the storage device 
bandwidth.

+ * thus if we queue zero blocks we slow down the migration. */
+if (flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) {
+qemu_fflush(f);
+} else {
+qemu_put_be64(f, buf_size);
+qemu_put_buffer(f, buf, buf_size);
+}
+
+g_free(buf);
+}
+
+


[...]


+
+static void dirty_bitmap_save_pending(QEMUFile *f, void *opaque,
+  uint64_t max_size,
+  uint64_t *res_precopy_only,
+  uint64_t *res_compatible,
+  uint64_t *res_postcopy_only)
+{
+DirtyBitmapMigBitmapState *dbms;
+uint64_t pending = 0;
+
+qemu_mutex_lock_iothread();

Why do you need the BQL here but not in bulk_phase()?


bulk_phase is in postcopy, source is inactive





--
Best regards,
Vladimir

Re: [Qemu-block] [Qemu-devel] [PATCH 15/17] iotests: add default node-name

2017-04-26 Thread Vladimir Sementsov-Ogievskiy


11.04.2017 06:37, Vladimir Sementsov-Ogievskiy wrote:

11.04.2017 00:49, John Snow wrote:


On 02/17/2017 02:51 PM, Dr. David Alan Gilbert wrote:

* Fam Zheng (f...@redhat.com) wrote:

On Fri, 02/17 16:36, Vladimir Sementsov-Ogievskiy wrote:

17.02.2017 15:21, Fam Zheng wrote:

On Fri, 02/17 13:20, Vladimir Sementsov-Ogievskiy wrote:

16.02.2017 16:48, Fam Zheng wrote:

On Mon, 02/13 12:54, Vladimir Sementsov-Ogievskiy wrote:
When testing migration, auto-generated by qemu node-names 
differs in
source and destination qemu and migration fails. After this 
patch,

auto-generated by iotest nodenames will be the same.
What should be done in libvirt to make sure the node-names are 
matching

correctly at both sides?

Hmm, just set node names appropriately?
But I think the problem is that node names are not configurable 
from libvirt
today, and then the migration will fail. Should the device name 
take precedence

in the code, to make it easier?

libvirt can use same parameters as I in this patch..
If I'm not mistaken, libvirt can be patched to explicitly set the 
same node
names in the QEMU command line, but that is some extra work to do 
there. My
point is if device names are used during migration, when available, 
this patch

and the libvirt change is not necessary.
Always best to check with libvirt guys to see what makes sense for 
them;

ccing in jdenemar.

Dave


Fam

--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK


Sorry for the necromancy, looks like discussion died here. Vladimir,
what's plans?

--js


Looks like libvirt guys are not interested ;)

I'm very busy now with other tasks, I think I'll continue work on my 
series in the list in about 1-2 weeks





Better is push "qcow2: persistent dirty bitmaps" series first, as after 
Kevin's request about storing bitmaps in inactivate, we need some 
additional mechanism here, to prevent this storing (some flag 
somewhere). So, I'll wait for persistent series to be accepted and then 
update these series.



--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH 2/4] qmp-cont: invalidate on RUN_STATE_PRELAUNCH

2017-04-26 Thread Vladimir Sementsov-Ogievskiy


26.04.2017 15:22, Kashyap Chamarthy wrote:

On Tue, Mar 07, 2017 at 01:11:23PM +0300, Vladimir Sementsov-Ogievskiy wrote:

07.03.2017 13:02, Kevin Wolf wrote:

Am 25.02.2017 um 20:31 hat Vladimir Sementsov-Ogievskiy geschrieben:

We must invalidate on RUN_STATE_PRELAUNCH too, as it is available
through qmp_system_reset from RUN_STATE_POSTMIGRATE. Otherwise, we will
come to

qemu-kvm: block/io.c:1406: bdrv_co_do_pwritev:
 Assertion `!(bs->open_flags & 0x0800)' failed.

on the first write after vm start.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>

Wouldn't it make more sense to invalidate in qmp_system_reset() where
the migration states are left?

Or maybe BDRV_O_INACTIVE could even be tied directly to runstates? Not
sure how realistic this one is, but if we start adding invalidate_cache
calls all over the place, maybe that's a sign that we need to look for a
more central place.

I've proposed it in cover letter) These bugs and my fixes are just show that
something should be rethought.. I don't claim that these fixes are true way,
they are just the simplest.

Hi Vladimir,

I wonder if you have a new version of your patch ("qmp-cont: invalidate
on RUN_STATE_PRELAUNCH").

Hailiang Zhang tells me on this the below thread that your patch fixes
the issue:

   http://lists.nongnu.org/archive/html/qemu-devel/2017-04/msg03925.html
   -- [QEMU-2.8] Source QEMU crashes with: "bdrv_co_pwritev: Assertion
   `!(bs->open_flags & BDRV_O_INACTIVE)' failed"



No, I haven't new version, discussion is unfinished about true way of 
doing it. This version is ok with the case it fixes.



--
Best regards,
Vladimir

Re: [Qemu-block] [Qemu-devel] [PATCH v17 00/24] qcow2: persistent dirty bitmaps

2017-04-28 Thread Vladimir Sementsov-Ogievskiy


27.04.2017 19:43, John Snow wrote:


On 04/26/2017 07:30 AM, Vladimir Sementsov-Ogievskiy wrote:

Hi all!

There is a new update of qcow2-bitmap series - v17.

web: 
https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=qcow2-bitmap-v17
git: https://src.openvz.org/scm/~vsementsov/qemu.git (tag qcow2-bitmap-v17)


Hm, what is this branch based on? My patches tools are having trouble
finding common ancestors -- 2d6752d38d8acda6aae674a72b72be05482a58eb I
guess, but that's pre-rc0, and the histories don't quite match up.

I did a rebase myself, but I notice that patch 14/24; "qcow2: add
persistent dirty bitmaps support" causes a large number of iotest
regressions:

Failures: 007 009 010 011 012 013 014 015 017 018 019 020 022 023 024
025 026 028 029 030 031 032 034 035 037 038 039 040 041 042 043 044 046
047 048 050 051 052 053 055 056 058 060 061 062 063 065 066 073 074 080
082 086 089 090 095 097 098 102 104 110 112 114 115 117 121 122 124 129
130 132 133 138 140 141 142 150 152 154 155 156 158 159 170 176

Can you give that a look and if you respin, rebase on top of origin/master?


ohh, sorry, I've not rebased. will do



Thanks,
-John


v17:

08: add r-b's by Max and John
09: clear unknown autoclear features from BDRVQcow2State before calling
 qcow2_load_autoloading_dirty_bitmaps(), and also do not extra update
 header if it is updated by qcow2_load_autoloading_dirty_bitmaps().
11: new patch, splitted out from 16
12: rewrite commit message
14: changing bdrv_close moved to separate new patch 11
 s/1/1ULL/ ; s/%u/%llu/ for two errors
16: s/No/Not/
 add Max's r-b
24: new patch


v16:

mostly by Kevin's comments:
07: just moved here, as preparation for merging refcounts to 08
08: move "qcow2-bitmap: refcounts" staff to this patch, to not break qcow2-img 
check
 drop r-b's
 move necessary supporting static functions to this patch too (to not break 
compilation)
 fprintf -> error_report
 other small changes with error messages and comments
 code style
 for qcow2-bitmap.c:
   'include "exec/log.h"' was dropped
   s/1/(1U << 0) for BME_FLAG_IN_USE
   add BME_TABLE_ENTRY_FLAG_ALL_ONES to replace magic 1
   don't check 'cluster_size <= 0' in check_table_entry
old "[PATCH v15 08/25] block: introduce auto-loading bitmaps" was dropped
09: was "[PATCH v15 09/25] qcow2: add .bdrv_load_autoloading_dirty_bitmaps"
 drop r-b's
 some staff was moved to 08
 update_header_sync - error on bdrv_flush fail
 rename disk_sectors_in_bitmap_cluster to sectors_covered_by_bitmap_cluster
  and adjust comment.
  so, variable for storing this function result: s/dsc/sbc/
 s/1/BME_TABLE_ENTRY_FLAG_ALL_ONES/
 also, as Qcow2BitmapTable already introduced in 08,
s/table_offset/table.offset/ and s/table_size/table.size, etc..
 update_ext_header_and_dir_in_place: add comments, add additional
   update_header_sync, to reduce indeterminacy in case of error.
 call qcow2_load_autoloading_dirty_bitmaps directly from qcow2_open
 no .bdrv_load_autoloading_dirty_bitmaps
11: drop bdrv_store_persistent_dirty_bitmaps at all.
 drop r-b's
13: rename patch, rewrite commit msg
 drop r-b's
 move bdrv_release_named_dirty_bitmaps in bdrv_close() after 
drv->bdrv_close()
 Qcow2BitmapTable is already introduced, so no
   s/table_offset/table.offset/ and s/table_size/table.size, etc.. here
 old 25 with check_constraints_on_bitmap() improvements merged here (Eric)
 like in 09, s/dsc/sbc/ and 
s/disk_sectors_in_bitmap_cluster/sectors_covered_by_bitmap_cluster/
 no .bdrv_store_persistent_dirty_bitmaps
 call qcow2_store_persistent_dirty_bitmaps directly from qcow2_inactivate
15: corresponding part of 25 merged here. Add John's r-b, as 25 was also 
reviewed by John.
16: add John's r-b


v15:
13,14: add John's r-b
15: qcow2_can_store_new_dirty_bitmap:
   - check max dirty bitmaps and bitmap directory overhead
   - switch to error_prepend
 rm Max's r-b
 not add John's r-b
17-24: add John's r-b
25: changed because 15 changed,
 not add John's r-b


v14:

07: use '|=' to update need_update_header
 add John's r-b
 add Max's r-b
09: remove unused bitmap_table_to_cpu()
 left Max's r-b, hope it's ok
 add John's r-b
10: remove extra new line
 add John's r-b
11: add John's r-b
12: add John's r-b
13: small fixes by John's review:
- remove weird g_free of NULL pointer from
if (tb == NULL) {
g_free(tb);
return -EINVAL;
}
- remove extra comment "/* errp is already set */"
- s/"Too large bitmap directory"/"Bitmap directory is too large"/
 left Max's r-b, hope you don't mind
22: add Max's r-b
23: add Max's r-b
24: add Max's r-b
25: new patch to improve error message on check_constraints_on_bitmap fail
 


v13: Ju

[Qemu-block] ping Re: [PATCH 3/4] savevm: fix savevm after migration

2017-04-25 Thread Vladimir Sementsov-Ogievskiy


29.03.2017 18:53, Paolo Bonzini wrote:


On 29/03/2017 17:29, Dr. David Alan Gilbert wrote:

'abort' is not very good too I think. migration is completed, nothing to
abort.. (may be successful migration to file for suspend, some kind of
vm cloning, etc)

There is already migrate_cancel.  Does it make sense to make it
reactivate fds if migration is completed?

It's potentially racy to do that.
Imagine if your migration is almost finished and you issue a migrate_cancel,
what happens?
Maybe it cancelled it.
Maybe it just completed in time - and you really better not be accessing
the disks on the source unless you're sure the destination isn't running.

If you want to avoid races, you probably have to use -S anyway on the
destination and do more handshaking between source and destination.  But
yes, just to be safe it'd be better to add a new flag (-f for HMP,
'cancel-completed-migration':true for QMP or something like that).

Paolo


Looks like we haven't final conclusion on this (solve problems, noted in 
patches 2 and 3). Are someone working on this now?


--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH] iotests: fix 185

2017-08-08 Thread Vladimir Sementsov-Ogievskiy


08.08.2017 11:53, Kevin Wolf wrote:

Am 08.08.2017 um 10:42 hat Vladimir Sementsov-Ogievskiy geschrieben:

07.08.2017 18:57, Kevin Wolf wrote:

Am 07.08.2017 um 16:16 hat Vladimir Sementsov-Ogievskiy geschrieben:

185 iotest is broken.

How to test:

i=0; while ./check -qcow2 -nocache 185; do ((i+=1)); echo N = $i; \

done; echo N = $i

finished for me like this:

185 2s ... - output mismatch (see 185.out.bad)
--- /work/src/qemu/master/tests/qemu-iotests/185.out2017-07-14 \
  15:14:29.520343805 +0300
+++ 185.out.bad 2017-08-07 16:51:02.231922900 +0300
@@ -37,7 +37,7 @@
   {"return": {}}
   {"return": {}}
   {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, \
   "event": "SHUTDOWN", "data": {"guest": false}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, \
  "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", \
  "len": 4194304, "offset": 4194304, "speed": 65536, "type": \
  "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, \
  "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", \
  "len": 0, "offset": 0, "speed": 65536, "type": "mirror"}}

   === Start backup job and exit qemu ===

Failures: 185
Failed 1 of 1 tests
N = 8

It doesn't seems logical to expect the constant offset on cancel,
so let filter it out.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>

I'm quoting 185:

# Note that the reference output intentionally includes the 'offset' field in
# BLOCK_JOB_CANCELLED events for all of the following block jobs. They are
# predictable and any change in the offsets would hint at a bug in the job
# throttling code.
#
# In order to achieve these predictable offsets, all of the following tests
# use speed=65536. Each job will perform exactly one iteration before it has
# to sleep at least for a second, which is plenty of time for the 'quit' QMP
# command to be received (after receiving the command, the rest runs
# synchronously, so jobs can arbitrarily continue or complete).
#
# The buffer size for commit and streaming is 512k (waiting for 8 seconds after
# the first request), for active commit and mirror it's large enough to cover
# the full 4M, and for backup it's the qcow2 cluster size, which we know is
# 64k. As all of these are at least as large as the speed, we are sure that the
# offset doesn't advance after the first iteration before qemu exits.

So before we change the expected output, can we explain why the offsets
aren't predictable, even if throttling is used and contrary to what the
comment says? Should we sleep a little before issuing 'quit'?

Throttling "guaranties" that there will not be more than one request. But
what prevent less than one, i.e. zero, like in my reproduction?

Yes, I understand. Can we somehow make sure that at least one iteration
is made? I'd really like to keep the functional test for block job
throttling. I suppose a simple 'sleep 0.1' would do the trick, though
it's not very clean.

Kevin



I've started with 'sleep 0.5', now there are >100 successful 
iterations... The other way is to check in test that there was 0 or 1 
requests, but for this it looks better to rewrite it in python.



--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH] iotests: fix 185

2017-08-08 Thread Vladimir Sementsov-Ogievskiy


08.08.2017 12:04, Vladimir Sementsov-Ogievskiy wrote:

08.08.2017 11:53, Kevin Wolf wrote:

Am 08.08.2017 um 10:42 hat Vladimir Sementsov-Ogievskiy geschrieben:

07.08.2017 18:57, Kevin Wolf wrote:

Am 07.08.2017 um 16:16 hat Vladimir Sementsov-Ogievskiy geschrieben:

185 iotest is broken.

How to test:

i=0; while ./check -qcow2 -nocache 185; do ((i+=1)); echo N = $i; \

done; echo N = $i

finished for me like this:

185 2s ... - output mismatch (see 185.out.bad)
--- /work/src/qemu/master/tests/qemu-iotests/185.out 2017-07-14 \
  15:14:29.520343805 +0300
+++ 185.out.bad 2017-08-07 16:51:02.231922900 +0300
@@ -37,7 +37,7 @@
   {"return": {}}
   {"return": {}}
   {"timestamp": {"seconds":  TIMESTAMP, "microseconds": 
TIMESTAMP}, \

   "event": "SHUTDOWN", "data": {"guest": false}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds": TIMESTAMP}, \
  "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", \
  "len": 4194304, "offset": 4194304, "speed": 65536, 
"type": \

  "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds": TIMESTAMP}, \
  "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", \
  "len": 0, "offset": 0, "speed": 65536, "type": "mirror"}}

   === Start backup job and exit qemu ===

Failures: 185
Failed 1 of 1 tests
N = 8

It doesn't seems logical to expect the constant offset on cancel,
so let filter it out.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
<vsement...@virtuozzo.com>

I'm quoting 185:

# Note that the reference output intentionally includes the 
'offset' field in
# BLOCK_JOB_CANCELLED events for all of the following block jobs. 
They are
# predictable and any change in the offsets would hint at a bug in 
the job

# throttling code.
#
# In order to achieve these predictable offsets, all of the 
following tests
# use speed=65536. Each job will perform exactly one iteration 
before it has
# to sleep at least for a second, which is plenty of time for the 
'quit' QMP

# command to be received (after receiving the command, the rest runs
# synchronously, so jobs can arbitrarily continue or complete).
#
# The buffer size for commit and streaming is 512k (waiting for 8 
seconds after
# the first request), for active commit and mirror it's large 
enough to cover
# the full 4M, and for backup it's the qcow2 cluster size, which we 
know is
# 64k. As all of these are at least as large as the speed, we are 
sure that the

# offset doesn't advance after the first iteration before qemu exits.

So before we change the expected output, can we explain why the 
offsets
aren't predictable, even if throttling is used and contrary to what 
the

comment says? Should we sleep a little before issuing 'quit'?
Throttling "guaranties" that there will not be more than one 
request. But

what prevent less than one, i.e. zero, like in my reproduction?

Yes, I understand. Can we somehow make sure that at least one iteration
is made? I'd really like to keep the functional test for block job
throttling. I suppose a simple 'sleep 0.1' would do the trick, though
it's not very clean.

Kevin



I've started with 'sleep 0.5', now there are >100 successful 
iterations... The other way is to check in test that there was 0 or 1 
requests, but for this it looks better to rewrite it in python.





is sleep for ms portable?



--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH] iotests: fix 185

2017-08-08 Thread Vladimir Sementsov-Ogievskiy


07.08.2017 18:57, Kevin Wolf wrote:

Am 07.08.2017 um 16:16 hat Vladimir Sementsov-Ogievskiy geschrieben:

185 iotest is broken.

How to test:

i=0; while ./check -qcow2 -nocache 185; do ((i+=1)); echo N = $i; \

   done; echo N = $i

finished for me like this:

185 2s ... - output mismatch (see 185.out.bad)
--- /work/src/qemu/master/tests/qemu-iotests/185.out2017-07-14 \
 15:14:29.520343805 +0300
+++ 185.out.bad 2017-08-07 16:51:02.231922900 +0300
@@ -37,7 +37,7 @@
  {"return": {}}
  {"return": {}}
  {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, \
  "event": "SHUTDOWN", "data": {"guest": false}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, \
 "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", \
 "len": 4194304, "offset": 4194304, "speed": 65536, "type": \
 "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, \
 "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", \
 "len": 0, "offset": 0, "speed": 65536, "type": "mirror"}}

  === Start backup job and exit qemu ===

Failures: 185
Failed 1 of 1 tests
N = 8

It doesn't seems logical to expect the constant offset on cancel,
so let filter it out.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>

I'm quoting 185:

# Note that the reference output intentionally includes the 'offset' field in
# BLOCK_JOB_CANCELLED events for all of the following block jobs. They are
# predictable and any change in the offsets would hint at a bug in the job
# throttling code.
#
# In order to achieve these predictable offsets, all of the following tests
# use speed=65536. Each job will perform exactly one iteration before it has
# to sleep at least for a second, which is plenty of time for the 'quit' QMP
# command to be received (after receiving the command, the rest runs
# synchronously, so jobs can arbitrarily continue or complete).
#
# The buffer size for commit and streaming is 512k (waiting for 8 seconds after
# the first request), for active commit and mirror it's large enough to cover
# the full 4M, and for backup it's the qcow2 cluster size, which we know is
# 64k. As all of these are at least as large as the speed, we are sure that the
# offset doesn't advance after the first iteration before qemu exits.

So before we change the expected output, can we explain why the offsets
aren't predictable, even if throttling is used and contrary to what the
comment says? Should we sleep a little before issuing 'quit'?


Throttling "guaranties" that there will not be more than one request. 
But what prevent less than one, i.e. zero, like in my reproduction?




(By the way, I couldn't reproduce in N = 128 attempts, so it doesn't
look like I can look into what's happening in detail, except with code
review.)

Kevin



--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH 07/18] nbd: Minimal structured read for client

2017-08-01 Thread Vladimir Sementsov-Ogievskiy


01.08.2017 18:41, Vladimir Sementsov-Ogievskiy wrote:

07.02.2017 23:14, Eric Blake wrote:

On 02/03/2017 09:47 AM, Vladimir Sementsov-Ogievskiy wrote:
Minimal implementation: always send DF flag, to not deal with 
fragmented

replies.

This works well with your minimal server implementation, but I worry
that it will cause us to fall over when talking to a fully-compliant
server that chooses to send EOVERFLOW errors for any request larger than
64k when DF is set; it also makes it impossible to benefit from sparse
reads.  I guess that means we need to start thinking about followup
patches to flush out our implementation.  But maybe I can live with this
patch as is, since the goal of your series was not so much the full
power of structured reads, but getting to a point where we could use
structured reply for block status, even if it means your client can only
communicate with qemu-nbd as server for now, as long as we do get to the
rest of the patches for a full-blown structured read.


Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
  block/nbd-client.c  |  47 +++
  block/nbd-client.h  |   2 +
  include/block/nbd.h |  15 +++--
  nbd/client.c| 170 
++--

  qemu-nbd.c  |   2 +-
  5 files changed, 203 insertions(+), 33 deletions(-)

Hmm - no change to the testsuite. Structured reads seems like the sort
of thing that it would be nice to test with some canned server replies,
particularly with server behavior that is permitted by the NBD protocol
but does not happen by default in qemu-nbd.


diff --git a/block/nbd-client.c b/block/nbd-client.c
index 3779c6c999..ff96bd1635 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -180,13 +180,20 @@ static void 
nbd_co_receive_reply(NBDClientSession *s,

  *reply = s->reply;
  if (reply->handle != request->handle ||
  !s->ioc) {
+reply->simple = true;
  reply->error = EIO;

I don't think this is quite right - by setting reply->simple to true,
you are forcing the caller to treat this as the final packet related to
this request->handle, even though that might not be the case.

As it is, I wonder if this code is correct, even before your patch - the
server is allowed to give responses out-of-order (if we request multiple
reads without waiting for the first response) - I don't see how setting
reply->error to EIO if the request->handle indicates that we are
receiving an out-of-order response to some other packet, but that our
request is still awaiting traffic.


Hmm, looks like it should initiate disconnect instead of just 
reporting error to io operation caller.





Also, nbd_co_send_request errors are not handled but just returned to 
the caller. Shouldn't the first


error on socket io initiate disconnect? I think, only the io errors, 
transferred from nbd-export block device


should be just returned to bdrv_{io} caller. NBD-protocol related errors 
(invalid handles, etc.) should initiate


disconnect, so that current and all future bdrv_{io}'s from client 
return -EIO.



--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH 07/18] nbd: Minimal structured read for client

2017-08-01 Thread Vladimir Sementsov-Ogievskiy


07.02.2017 23:14, Eric Blake wrote:

On 02/03/2017 09:47 AM, Vladimir Sementsov-Ogievskiy wrote:

Minimal implementation: always send DF flag, to not deal with fragmented
replies.

This works well with your minimal server implementation, but I worry
that it will cause us to fall over when talking to a fully-compliant
server that chooses to send EOVERFLOW errors for any request larger than
64k when DF is set; it also makes it impossible to benefit from sparse
reads.  I guess that means we need to start thinking about followup
patches to flush out our implementation.  But maybe I can live with this
patch as is, since the goal of your series was not so much the full
power of structured reads, but getting to a point where we could use
structured reply for block status, even if it means your client can only
communicate with qemu-nbd as server for now, as long as we do get to the
rest of the patches for a full-blown structured read.


Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
  block/nbd-client.c  |  47 +++
  block/nbd-client.h  |   2 +
  include/block/nbd.h |  15 +++--
  nbd/client.c| 170 ++--
  qemu-nbd.c  |   2 +-
  5 files changed, 203 insertions(+), 33 deletions(-)

Hmm - no change to the testsuite. Structured reads seems like the sort
of thing that it would be nice to test with some canned server replies,
particularly with server behavior that is permitted by the NBD protocol
but does not happen by default in qemu-nbd.


diff --git a/block/nbd-client.c b/block/nbd-client.c
index 3779c6c999..ff96bd1635 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -180,13 +180,20 @@ static void nbd_co_receive_reply(NBDClientSession *s,
  *reply = s->reply;
  if (reply->handle != request->handle ||
  !s->ioc) {
+reply->simple = true;
  reply->error = EIO;

I don't think this is quite right - by setting reply->simple to true,
you are forcing the caller to treat this as the final packet related to
this request->handle, even though that might not be the case.

As it is, I wonder if this code is correct, even before your patch - the
server is allowed to give responses out-of-order (if we request multiple
reads without waiting for the first response) - I don't see how setting
reply->error to EIO if the request->handle indicates that we are
receiving an out-of-order response to some other packet, but that our
request is still awaiting traffic.


Hmm, looks like it should initiate disconnect instead of just reporting 
error to io operation caller.



--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH] iotests: fix 185

2017-08-08 Thread Vladimir Sementsov-Ogievskiy


08.08.2017 12:04, Vladimir Sementsov-Ogievskiy wrote:

08.08.2017 12:04, Vladimir Sementsov-Ogievskiy wrote:

08.08.2017 11:53, Kevin Wolf wrote:

Am 08.08.2017 um 10:42 hat Vladimir Sementsov-Ogievskiy geschrieben:

07.08.2017 18:57, Kevin Wolf wrote:

Am 07.08.2017 um 16:16 hat Vladimir Sementsov-Ogievskiy geschrieben:

185 iotest is broken.

How to test:

i=0; while ./check -qcow2 -nocache 185; do ((i+=1)); echo N = $i; \

done; echo N = $i

finished for me like this:

185 2s ... - output mismatch (see 185.out.bad)
--- /work/src/qemu/master/tests/qemu-iotests/185.out 2017-07-14 \
  15:14:29.520343805 +0300
+++ 185.out.bad 2017-08-07 16:51:02.231922900 +0300
@@ -37,7 +37,7 @@
   {"return": {}}
   {"return": {}}
   {"timestamp": {"seconds":  TIMESTAMP, "microseconds": 
TIMESTAMP}, \

   "event": "SHUTDOWN", "data": {"guest": false}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds": TIMESTAMP}, \
  "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", \
  "len": 4194304, "offset": 4194304, "speed": 65536, 
"type": \

  "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds": TIMESTAMP}, \
  "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", \
  "len": 0, "offset": 0, "speed": 65536, "type": "mirror"}}

   === Start backup job and exit qemu ===

Failures: 185
Failed 1 of 1 tests
N = 8

It doesn't seems logical to expect the constant offset on cancel,
so let filter it out.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
<vsement...@virtuozzo.com>

I'm quoting 185:

# Note that the reference output intentionally includes the 
'offset' field in
# BLOCK_JOB_CANCELLED events for all of the following block jobs. 
They are
# predictable and any change in the offsets would hint at a bug in 
the job

# throttling code.
#
# In order to achieve these predictable offsets, all of the 
following tests
# use speed=65536. Each job will perform exactly one iteration 
before it has
# to sleep at least for a second, which is plenty of time for the 
'quit' QMP

# command to be received (after receiving the command, the rest runs
# synchronously, so jobs can arbitrarily continue or complete).
#
# The buffer size for commit and streaming is 512k (waiting for 8 
seconds after
# the first request), for active commit and mirror it's large 
enough to cover
# the full 4M, and for backup it's the qcow2 cluster size, which 
we know is
# 64k. As all of these are at least as large as the speed, we are 
sure that the

# offset doesn't advance after the first iteration before qemu exits.

So before we change the expected output, can we explain why the 
offsets
aren't predictable, even if throttling is used and contrary to 
what the

comment says? Should we sleep a little before issuing 'quit'?
Throttling "guaranties" that there will not be more than one 
request. But

what prevent less than one, i.e. zero, like in my reproduction?

Yes, I understand. Can we somehow make sure that at least one iteration
is made? I'd really like to keep the functional test for block job
throttling. I suppose a simple 'sleep 0.1' would do the trick, though
it's not very clean.

Kevin



I've started with 'sleep 0.5', now there are >100 successful 
iterations... The other way is to check in test that there was 0 or 1 
requests, but for this it looks better to rewrite it in python.





is sleep for ms portable?




>1500 successful iterations, so, 'sleep 0.5' is OK for me.


--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH v2 for 2.10] iotests: fix 185

2017-08-09 Thread Vladimir Sementsov-Ogievskiy


09.08.2017 18:41, Eric Blake wrote:

On 08/09/2017 10:19 AM, Vladimir Sementsov-Ogievskiy wrote:

09.08.2017 18:17, Vladimir Sementsov-Ogievskiy wrote:

185 can sometimes produce wrong output like this:

This is because quite happens before first mirror request is done

s/quite/quit/


(and, in specified case, even before block-job len field is set).
To prevent it let's just add a sleep for 0.3 seconds before quite.

Here, as well.

Maybe:

This is because, under heavy load, the quit can happen before the first
iteration of the mirror request has occurred.  To make sure we've had
time to iterate, let's just add a sleep for 0.3 seconds before quitting.



   "return"
   +# If we don't sleep here 'quit' command may be handled before
+# the first mirror iteration is done
+sleep 0.5

The commit message disagrees with the code (.3 vs. .5) - which one is
correct?


Let it be .5, as I've tested it.


--
Best regards,
Vladimir

[Qemu-block] [PATCH v2 for 2.10] block/nbd-client: always return EIO on and after the first io channel error

2017-08-08 Thread Vladimir Sementsov-Ogievskiy

Do not communicate after the first error to avoid communicating throught
broken channel. The only exclusion is try to send NBD_CMD_DISC anyway on
in nbd_client_close.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---

Hi all. Here is a patch, fixing a problem noted in
[PATCH 06/17] block/nbd-client: fix nbd_read_reply_entry
and
[PATCH 17/17] block/nbd-client: always return EIO on and after the first io 
channel error
and discussed on list.

If it will be applied to 2.10, then I'll rebase my 'nbd client refactoring and 
fixing'
on it (for 2.11). If not - I'll prefer not rebase the series, so, do not apply 
this
patch for 2.11.

v2: set eio_to_all in nbd_co_send_request and nbd_co_receive_reply in case of 
error

 block/nbd-client.h |  1 +
 block/nbd-client.c | 65 +++---
 2 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index df80771357..28db9922c8 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -29,6 +29,7 @@ typedef struct NBDClientSession {
 
 Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
 NBDReply reply;
+bool eio_to_all;
 } NBDClientSession;
 
 NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 25dd28406b..a72cb7690a 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -49,6 +49,8 @@ static void nbd_teardown_connection(BlockDriverState *bs)
 {
 NBDClientSession *client = nbd_get_client_session(bs);
 
+client->eio_to_all = true;
+
 if (!client->ioc) { /* Already closed */
 return;
 }
@@ -74,12 +76,16 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 Error *local_err = NULL;
 
 for (;;) {
+if (s->eio_to_all) {
+break;
+}
+
 assert(s->reply.handle == 0);
 ret = nbd_receive_reply(s->ioc, >reply, _err);
 if (ret < 0) {
 error_report_err(local_err);
 }
-if (ret <= 0) {
+if (ret <= 0 || s->eio_to_all) {
 break;
 }
 
@@ -107,6 +113,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 qemu_coroutine_yield();
 }
 
+s->eio_to_all = true;
 nbd_recv_coroutines_enter_all(s);
 s->read_reply_co = NULL;
 }
@@ -118,6 +125,10 @@ static int nbd_co_send_request(BlockDriverState *bs,
 NBDClientSession *s = nbd_get_client_session(bs);
 int rc, ret, i;
 
+if (s->eio_to_all) {
+return -EIO;
+}
+
 qemu_co_mutex_lock(>send_mutex);
 while (s->in_flight == MAX_NBD_REQUESTS) {
 qemu_co_queue_wait(>free_sema, >send_mutex);
@@ -135,15 +146,15 @@ static int nbd_co_send_request(BlockDriverState *bs,
 assert(i < MAX_NBD_REQUESTS);
 request->handle = INDEX_TO_HANDLE(s, i);
 
-if (!s->ioc) {
+if (s->eio_to_all) {
 qemu_co_mutex_unlock(>send_mutex);
-return -EPIPE;
+return -EIO;
 }
 
 if (qiov) {
 qio_channel_set_cork(s->ioc, true);
 rc = nbd_send_request(s->ioc, request);
-if (rc >= 0) {
+if (rc >= 0 && !s->eio_to_all) {
 ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false,
   NULL);
 if (ret != request->len) {
@@ -155,7 +166,13 @@ static int nbd_co_send_request(BlockDriverState *bs,
 rc = nbd_send_request(s->ioc, request);
 }
 qemu_co_mutex_unlock(>send_mutex);
-return rc;
+
+if (rc < 0 || s->eio_to_all) {
+s->eio_to_all = true;
+return -EIO;
+}
+
+return 0;
 }
 
 static void nbd_co_receive_reply(NBDClientSession *s,
@@ -169,14 +186,16 @@ static void nbd_co_receive_reply(NBDClientSession *s,
 qemu_coroutine_yield();
 *reply = s->reply;
 if (reply->handle != request->handle ||
-!s->ioc) {
+!s->ioc || s->eio_to_all) {
 reply->error = EIO;
+s->eio_to_all = true;
 } else {
 if (qiov && reply->error == 0) {
 ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true,
   NULL);
-if (ret != request->len) {
+if (ret != request->len || s->eio_to_all) {
 reply->error = EIO;
+s->eio_to_all = true;
 }
 }
 
@@ -225,8 +244,10 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t 
offset,
 } else {
 nbd_co_receive_reply(client, , , qiov);
 }
-nbd_coroutine_end(bs, );
-return -reply.error;
+if (request.handle != 0) {
+nbd_coroutine_end(bs, );
+}
+return client->eio_to_all ? -EIO : -reply.error;
 }
 
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
@@ -254,8 +275,10 @@ int nbd_client_co_pwritev

Re: [Qemu-block] [Qemu-devel] [PATCH] iotests: fix 185

2017-08-08 Thread Vladimir Sementsov-Ogievskiy


08.08.2017 18:07, Eric Blake wrote:

On 08/08/2017 04:04 AM, Vladimir Sementsov-Ogievskiy wrote:


Throttling "guaranties" that there will not be more than one
request. But
what prevent less than one, i.e. zero, like in my reproduction?

Yes, I understand. Can we somehow make sure that at least one iteration
is made? I'd really like to keep the functional test for block job
throttling. I suppose a simple 'sleep 0.1' would do the trick, though
it's not very clean.

Kevin


I've started with 'sleep 0.5', now there are >100 successful
iterations... The other way is to check in test that there was 0 or 1
requests, but for this it looks better to rewrite it in python.



is sleep for ms portable?

Sadly, sub-second sleep is a GNU coreutils feature; I suspect the BSD
machines may fail to parse it.  (Of course, we could do some sort of
'sleep $SMALL', where $SMALL is 0.5 if sleep supports it, and 1 otherwise).


sleep for 1 second may lead to more then one request done before qemu quite.


--
Best regards,
Vladimir

[Qemu-block] [PATCH for 2.10] block/nbd-client: always return EIO on and after the first io channel error

2017-08-08 Thread Vladimir Sementsov-Ogievskiy

Do not communicate after the first error to avoid communicating throught
broken channel. The only exclusion is try to send NBD_CMD_DISC anyway on
in nbd_client_close.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---

Hi all. Here is a patch, fixing a problem noted in
[PATCH 06/17] block/nbd-client: fix nbd_read_reply_entry
and
[PATCH 17/17] block/nbd-client: always return EIO on and after the first io 
channel error
and discussed on list.

If it will be applied to 2.10, then I'll rebase my 'nbd client refactoring and 
fixing'
on it (for 2.11). If not - I'll prefer not rebase the series, so, do not apply 
this
patch for 2.11.

 block/nbd-client.h |  1 +
 block/nbd-client.c | 58 +-
 2 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index df80771357..28db9922c8 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -29,6 +29,7 @@ typedef struct NBDClientSession {
 
 Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
 NBDReply reply;
+bool eio_to_all;
 } NBDClientSession;
 
 NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 25dd28406b..1282b2484e 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -49,6 +49,8 @@ static void nbd_teardown_connection(BlockDriverState *bs)
 {
 NBDClientSession *client = nbd_get_client_session(bs);
 
+client->eio_to_all = true;
+
 if (!client->ioc) { /* Already closed */
 return;
 }
@@ -74,12 +76,16 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 Error *local_err = NULL;
 
 for (;;) {
+if (s->eio_to_all) {
+break;
+}
+
 assert(s->reply.handle == 0);
 ret = nbd_receive_reply(s->ioc, >reply, _err);
 if (ret < 0) {
 error_report_err(local_err);
 }
-if (ret <= 0) {
+if (ret <= 0 || s->eio_to_all) {
 break;
 }
 
@@ -107,6 +113,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 qemu_coroutine_yield();
 }
 
+s->eio_to_all = true;
 nbd_recv_coroutines_enter_all(s);
 s->read_reply_co = NULL;
 }
@@ -118,6 +125,10 @@ static int nbd_co_send_request(BlockDriverState *bs,
 NBDClientSession *s = nbd_get_client_session(bs);
 int rc, ret, i;
 
+if (s->eio_to_all) {
+return -EIO;
+}
+
 qemu_co_mutex_lock(>send_mutex);
 while (s->in_flight == MAX_NBD_REQUESTS) {
 qemu_co_queue_wait(>free_sema, >send_mutex);
@@ -135,15 +146,15 @@ static int nbd_co_send_request(BlockDriverState *bs,
 assert(i < MAX_NBD_REQUESTS);
 request->handle = INDEX_TO_HANDLE(s, i);
 
-if (!s->ioc) {
+if (s->eio_to_all) {
 qemu_co_mutex_unlock(>send_mutex);
-return -EPIPE;
+return -EIO;
 }
 
 if (qiov) {
 qio_channel_set_cork(s->ioc, true);
 rc = nbd_send_request(s->ioc, request);
-if (rc >= 0) {
+if (rc >= 0 && !s->eio_to_all) {
 ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false,
   NULL);
 if (ret != request->len) {
@@ -155,7 +166,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
 rc = nbd_send_request(s->ioc, request);
 }
 qemu_co_mutex_unlock(>send_mutex);
-return rc;
+
+return s->eio_to_all ? -EIO : rc;
 }
 
 static void nbd_co_receive_reply(NBDClientSession *s,
@@ -169,13 +181,13 @@ static void nbd_co_receive_reply(NBDClientSession *s,
 qemu_coroutine_yield();
 *reply = s->reply;
 if (reply->handle != request->handle ||
-!s->ioc) {
+!s->ioc || s->eio_to_all) {
 reply->error = EIO;
 } else {
 if (qiov && reply->error == 0) {
 ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true,
   NULL);
-if (ret != request->len) {
+if (ret != request->len || s->eio_to_all) {
 reply->error = EIO;
 }
 }
@@ -225,8 +237,10 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t 
offset,
 } else {
 nbd_co_receive_reply(client, , , qiov);
 }
-nbd_coroutine_end(bs, );
-return -reply.error;
+if (request.handle != 0) {
+nbd_coroutine_end(bs, );
+}
+return client->eio_to_all ? -EIO : -reply.error;
 }
 
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
@@ -254,8 +268,10 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t 
offset,
 } else {
 nbd_co_receive_reply(client, , , NULL);
 }
-nbd_coroutine_end(bs, );
-return -reply.error;
+if (request.handle != 0) {
+nbd_coroutine_end(bs, );
+}
+

Re: [Qemu-block] [PATCH for 2.10] block/nbd-client: always return EIO on and after the first io channel error

2017-08-08 Thread Vladimir Sementsov-Ogievskiy


08.08.2017 17:29, Vladimir Sementsov-Ogievskiy wrote:

Do not communicate after the first error to avoid communicating throught
broken channel. The only exclusion is try to send NBD_CMD_DISC anyway on
in nbd_client_close.


worth add: To simplify things, return -EIO in case of disconnect too.

(refers to:

-if (!s->ioc) {
+if (s->eio_to_all) {
 qemu_co_mutex_unlock(>send_mutex);
-return -EPIPE;
+return -EIO;
 }

and to:

@@ -49,6 +49,8 @@ static void nbd_teardown_connection(BlockDriverState *bs)
 {
 NBDClientSession *client = nbd_get_client_session(bs);
 
+client->eio_to_all = true;

+

However, I'm unsure about, how s->ioc may be NULL in first chunk, and if 
it possible, why
it is not checked everywhere. read/write after bdrv_close? Should it be 
handled on higher level?)




Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---

Hi all. Here is a patch, fixing a problem noted in
[PATCH 06/17] block/nbd-client: fix nbd_read_reply_entry
and
[PATCH 17/17] block/nbd-client: always return EIO on and after the first io 
channel error
and discussed on list.

If it will be applied to 2.10, then I'll rebase my 'nbd client refactoring and 
fixing'
on it (for 2.11). If not - I'll prefer not rebase the series, so, do not apply 
this
patch for 2.11.

  block/nbd-client.h |  1 +
  block/nbd-client.c | 58 +-
  2 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index df80771357..28db9922c8 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -29,6 +29,7 @@ typedef struct NBDClientSession {
  
  Coroutine *recv_coroutine[MAX_NBD_REQUESTS];

  NBDReply reply;
+bool eio_to_all;
  } NBDClientSession;
  
  NBDClientSession *nbd_get_client_session(BlockDriverState *bs);

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 25dd28406b..1282b2484e 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -49,6 +49,8 @@ static void nbd_teardown_connection(BlockDriverState *bs)
  {
  NBDClientSession *client = nbd_get_client_session(bs);
  
+client->eio_to_all = true;

+
  if (!client->ioc) { /* Already closed */
  return;
  }
@@ -74,12 +76,16 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
  Error *local_err = NULL;
  
  for (;;) {

+if (s->eio_to_all) {
+break;
+}
+
  assert(s->reply.handle == 0);
  ret = nbd_receive_reply(s->ioc, >reply, _err);
  if (ret < 0) {
  error_report_err(local_err);
  }
-if (ret <= 0) {
+if (ret <= 0 || s->eio_to_all) {
  break;
  }
  
@@ -107,6 +113,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)

  qemu_coroutine_yield();
  }
  
+s->eio_to_all = true;

  nbd_recv_coroutines_enter_all(s);
  s->read_reply_co = NULL;
  }
@@ -118,6 +125,10 @@ static int nbd_co_send_request(BlockDriverState *bs,
  NBDClientSession *s = nbd_get_client_session(bs);
  int rc, ret, i;
  
+if (s->eio_to_all) {

+return -EIO;
+}
+
  qemu_co_mutex_lock(>send_mutex);
  while (s->in_flight == MAX_NBD_REQUESTS) {
  qemu_co_queue_wait(>free_sema, >send_mutex);
@@ -135,15 +146,15 @@ static int nbd_co_send_request(BlockDriverState *bs,
  assert(i < MAX_NBD_REQUESTS);
  request->handle = INDEX_TO_HANDLE(s, i);
  
-if (!s->ioc) {

+if (s->eio_to_all) {
  qemu_co_mutex_unlock(>send_mutex);
-return -EPIPE;
+return -EIO;
  }
  
  if (qiov) {

  qio_channel_set_cork(s->ioc, true);
  rc = nbd_send_request(s->ioc, request);
-if (rc >= 0) {
+if (rc >= 0 && !s->eio_to_all) {
  ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false,
NULL);
  if (ret != request->len) {
@@ -155,7 +166,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
  rc = nbd_send_request(s->ioc, request);
  }
  qemu_co_mutex_unlock(>send_mutex);
-return rc;
+
+return s->eio_to_all ? -EIO : rc;
  }
  
  static void nbd_co_receive_reply(NBDClientSession *s,

@@ -169,13 +181,13 @@ static void nbd_co_receive_reply(NBDClientSession *s,
  qemu_coroutine_yield();
  *reply = s->reply;
  if (reply->handle != request->handle ||
-!s->ioc) {
+!s->ioc || s->eio_to_all) {
  reply->error = EIO;
  } else {
  if (qiov && reply->error == 0) {
  ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true,
NULL);
-if (ret != request->len) {
+if (ret != request->len || s->eio_to_all) {

Re: [Qemu-block] [PATCH for 2.10] block/nbd-client: always return EIO on and after the first io channel error

2017-08-08 Thread Vladimir Sementsov-Ogievskiy


08.08.2017 17:44, Eric Blake wrote:

On 08/08/2017 09:29 AM, Vladimir Sementsov-Ogievskiy wrote:

Do not communicate after the first error to avoid communicating throught
broken channel. The only exclusion is try to send NBD_CMD_DISC anyway on
in nbd_client_close.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---

Hi all. Here is a patch, fixing a problem noted in
[PATCH 06/17] block/nbd-client: fix nbd_read_reply_entry
and
[PATCH 17/17] block/nbd-client: always return EIO on and after the first io 
channel error
and discussed on list.

If it will be applied to 2.10, then I'll rebase my 'nbd client refactoring and 
fixing'
on it (for 2.11). If not - I'll prefer not rebase the series, so, do not apply 
this
patch for 2.11.

It may be possible to do something even smaller:


  block/nbd-client.h |  1 +
  block/nbd-client.c | 58 +-
  2 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index df80771357..28db9922c8 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -29,6 +29,7 @@ typedef struct NBDClientSession {
  
  Coroutine *recv_coroutine[MAX_NBD_REQUESTS];

  NBDReply reply;
+bool eio_to_all;
  } NBDClientSession;
  
  NBDClientSession *nbd_get_client_session(BlockDriverState *bs);

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 25dd28406b..1282b2484e 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -49,6 +49,8 @@ static void nbd_teardown_connection(BlockDriverState *bs)
  {
  NBDClientSession *client = nbd_get_client_session(bs);
  
+client->eio_to_all = true;

+
  if (!client->ioc) { /* Already closed */
  return;
  }
@@ -74,12 +76,16 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
  Error *local_err = NULL;
  
  for (;;) {

+if (s->eio_to_all) {
+break;
+}
+

How is this conditional ever reached? nbd_read_reply_entry() is our
workhorse, that is supposed to run until the client is ready to disconnect.


I forget to set eio_to_all on error in sending request.




  assert(s->reply.handle == 0);
  ret = nbd_receive_reply(s->ioc, >reply, _err);
  if (ret < 0) {
  error_report_err(local_err);
  }
-if (ret <= 0) {
+if (ret <= 0 || s->eio_to_all) {
  break;
  }

This says we're now supposed to break out of the while loop.  So unless
something can set s->eio_to_all during
aio_co_wake(s->recv_coroutine[i]), the next iteration of the loop won't
hit the first conditional.

Meanwhile, even if we skip the first conditional, this second
conditional will still end the loop prior to acting on anything received
from the server (the difference being whether we called
nbd_receive_reply() one additional time, but I don't see that as getting
in the way of a clean exit).

But my question is whether we can just go with a simpler fix: if we ever
break out of the workhorse loop of nbd_read_reply_entry(), then (and
only then) is when we call nbd_recv_coroutines_enter_all() to mop up any
pending transactions before tearing down the coroutines.  Is there
something we can do in nbd_recv_coroutines_enter_all() that will
guarantee that our final entry into each coroutine will fail?


anyway, you should prevent the following new requests, using some 
additional variable or disconnect..


Also, I think, an error in sending requests should cause finishing of 
this loop in nbd_read_reply_entry, so, an error condition should be 
checked after each yield.





I'm still playing with the idea locally.



--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH v2 for 2.10] iotests: fix 185

2017-08-09 Thread Vladimir Sementsov-Ogievskiy


09.08.2017 18:17, Vladimir Sementsov-Ogievskiy wrote:

185 can sometimes produce wrong output like this:

=
185 2s ... - output mismatch (see 185.out.bad)
--- /work/src/qemu/master/tests/qemu-iotests/185.out2017-07-14 \
 15:14:29.520343805 +0300
+++ 185.out.bad 2017-08-07 16:51:02.231922900 +0300
@@ -37,7 +37,7 @@
  {"return": {}}
  {"return": {}}
  {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, \
  "event": "SHUTDOWN", "data": {"guest": false}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, \
 "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", \
 "len": 4194304, "offset": 4194304, "speed": 65536, "type": \
 "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, \
 "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", \
 "len": 0, "offset": 0, "speed": 65536, "type": "mirror"}}

  === Start backup job and exit qemu ===

Failures: 185
Failed 1 of 1 tests
=

This is because quite happens before first mirror request is done


s/quite/quit/


(and, in specified case, even before block-job len field is set).
To prevent it let's just add a sleep for 0.3 seconds before quite.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---

v2: just add a sleep instead of filtering output (proposed by Kevin)

  tests/qemu-iotests/185 | 4 
  1 file changed, 4 insertions(+)

diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185
index 0eda371f27..f5b47e4c1a 100755
--- a/tests/qemu-iotests/185
+++ b/tests/qemu-iotests/185
@@ -156,6 +156,10 @@ _send_qemu_cmd $h \
'speed': 65536 } }" \
  "return"
  
+# If we don't sleep here 'quit' command may be handled before

+# the first mirror iteration is done
+sleep 0.5
+
  _send_qemu_cmd $h "{ 'execute': 'quit' }" "return"
  wait=1 _cleanup_qemu
  



--
Best regards,
Vladimir

[Qemu-block] [PATCH v2 for 2.10] iotests: fix 185

2017-08-09 Thread Vladimir Sementsov-Ogievskiy

185 can sometimes produce wrong output like this:

=
185 2s ... - output mismatch (see 185.out.bad)
--- /work/src/qemu/master/tests/qemu-iotests/185.out2017-07-14 \
15:14:29.520343805 +0300
+++ 185.out.bad 2017-08-07 16:51:02.231922900 +0300
@@ -37,7 +37,7 @@
 {"return": {}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, \
 "event": "SHUTDOWN", "data": {"guest": false}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, \
"event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", \
"len": 4194304, "offset": 4194304, "speed": 65536, "type": \
"mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, \
"event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", \
"len": 0, "offset": 0, "speed": 65536, "type": "mirror"}}

 === Start backup job and exit qemu ===

Failures: 185
Failed 1 of 1 tests
=

This is because quite happens before first mirror request is done
(and, in specified case, even before block-job len field is set).
To prevent it let's just add a sleep for 0.3 seconds before quite.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---

v2: just add a sleep instead of filtering output (proposed by Kevin)

 tests/qemu-iotests/185 | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185
index 0eda371f27..f5b47e4c1a 100755
--- a/tests/qemu-iotests/185
+++ b/tests/qemu-iotests/185
@@ -156,6 +156,10 @@ _send_qemu_cmd $h \
   'speed': 65536 } }" \
 "return"
 
+# If we don't sleep here 'quit' command may be handled before
+# the first mirror iteration is done
+sleep 0.5
+
 _send_qemu_cmd $h "{ 'execute': 'quit' }" "return"
 wait=1 _cleanup_qemu
 
-- 
2.11.1

Re: [Qemu-block] [PATCH 02/17] nbd/client: refactor nbd_read_eof

2017-08-07 Thread Vladimir Sementsov-Ogievskiy


07.08.2017 14:42, Eric Blake wrote:

On 08/04/2017 10:14 AM, Vladimir Sementsov-Ogievskiy wrote:

Refactor nbd_read_eof to return 1 on success, 0 on eof, when no
data was read and <0 for other cases, because returned size of
read data is not actually used.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
  nbd/nbd-internal.h | 34 +-
  nbd/client.c   |  5 -
  tests/qemu-iotests/083.out |  4 ++--
  3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 396ddb4d3e..3fb0b6098a 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -77,21 +77,37 @@
  #define NBD_ESHUTDOWN  108
  
  /* nbd_read_eof

- * Tries to read @size bytes from @ioc. Returns number of bytes actually read.
- * May return a value >= 0 and < size only on EOF, i.e. when iteratively called
- * qio_channel_readv() returns 0. So, there is no need to call nbd_read_eof
- * iteratively.
+ * Tries to read @size bytes from @ioc.
+ * Returns 1 on success
+ * 0 on eof, when no data was read (errp is not set)
+ * -EINVAL on eof, when some data < @size was read until eof
+ * < 0 on read fail

In general, mixing negative errno value and generic < 0 in the same
function is most likely ambiguous.


Hmm, but this is entirely what we do so often:

if (,,) return -EINVAL;

return some_other_func().

last two lines may be rewritten like this:
+ * < 0 on fail




   */
-static inline ssize_t nbd_read_eof(QIOChannel *ioc, void *buffer, size_t size,
-   Error **errp)
+static inline int nbd_read_eof(QIOChannel *ioc, void *buffer, size_t size,
+   Error **errp)
  {
  struct iovec iov = { .iov_base = buffer, .iov_len = size };
+ssize_t ret;
+
  /* Sockets are kept in blocking mode in the negotiation phase.  After
   * that, a non-readable socket simply means that another thread stole
   * our request/reply.  Synchronization is done with recv_coroutine, so
   * that this is coroutine-safe.
   */
-return nbd_rwv(ioc, , 1, size, true, errp);
+
+assert(size > 0);

Effectively the same as assert(size != 0).


+
+ret = nbd_rwv(ioc, , 1, size, true, errp);
+if (ret <= 0) {
+return ret;
+}

So this is a negative errno (or 0 on EOF),


if it is < 0, it can be only -EIO, specified in nbd_rwv "by hand". it is 
unrelated to read read/write errno's





+
+if (ret != size) {
+error_setg(errp, "End of file");
+return -EINVAL;

and so is this. Which makes the function documentation not quite
accurate; you aren't mixing a generic < 0.


hmm.. my wordings are weird sometimes, sorry for that :(. and thank you 
for your patience.





+}
+
+return 1;
  }
  
  /* nbd_read

@@ -100,9 +116,9 @@ static inline ssize_t nbd_read_eof(QIOChannel *ioc, void 
*buffer, size_t size,
  static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size,
 Error **errp)
  {
-ssize_t ret = nbd_read_eof(ioc, buffer, size, errp);
+int ret = nbd_read_eof(ioc, buffer, size, errp);
  
-if (ret >= 0 && ret != size) {

+if (ret == 0) {
  ret = -EINVAL;
  error_setg(errp, "End of file");

Why do we have to set errp here instead of in nbd_read_eof()? Is there
ever any case where hitting early EOF is not something that should be
treated as an error?


yes. it is the only usage of nbd_read_eof - in nbd_receive_reply. This 
used to understand that there no more replies to read.





  }
diff --git a/nbd/client.c b/nbd/client.c
index f1c16b588f..4556056daa 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -925,11 +925,6 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply 
*reply, Error **errp)
  return ret;
  }
  
-if (ret != sizeof(buf)) {

-error_setg(errp, "read failed");
-return -EINVAL;
-}
-
  /* Reply
 [ 0 ..  3]magic   (NBD_REPLY_MAGIC)
 [ 4 ..  7]error   (0 == no error)
diff --git a/tests/qemu-iotests/083.out b/tests/qemu-iotests/083.out
index a24c6bfece..d3bea1b2f5 100644
--- a/tests/qemu-iotests/083.out
+++ b/tests/qemu-iotests/083.out
@@ -69,12 +69,12 @@ read failed: Input/output error
  
  === Check disconnect 4 reply ===
  
-read failed

+End of file
  read failed: Input/output error

At least you tracked that your changes tweak the error message.  But I'm
not yet convinced whether this change simplifies anything.  Is there a
later patch that is easier to write with the new semantics which was not
possible with the pre-patch semantics?


This patch just moves error handling one level down (do not propagate 
unused information). And removes (with the following patch) last remains 
of ssize_t and returning number of bytes in nbd/client.c - for consistency.

Let nbd_rwv  to be the only function returning number of bytes.





--
Best regards,
Vladimir

[Qemu-block] [PATCH] iotests: fix 185

2017-08-07 Thread Vladimir Sementsov-Ogievskiy

185 iotest is broken.

How to test:
> i=0; while ./check -qcow2 -nocache 185; do ((i+=1)); echo N = $i; \
  done; echo N = $i

finished for me like this:

185 2s ... - output mismatch (see 185.out.bad)
--- /work/src/qemu/master/tests/qemu-iotests/185.out2017-07-14 \
15:14:29.520343805 +0300
+++ 185.out.bad 2017-08-07 16:51:02.231922900 +0300
@@ -37,7 +37,7 @@
 {"return": {}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, \
 "event": "SHUTDOWN", "data": {"guest": false}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, \
"event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", \
"len": 4194304, "offset": 4194304, "speed": 65536, "type": \
"mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, \
"event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", \
"len": 0, "offset": 0, "speed": 65536, "type": "mirror"}}

 === Start backup job and exit qemu ===

Failures: 185
Failed 1 of 1 tests
N = 8

It doesn't seems logical to expect the constant offset on cancel,
so let filter it out.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 tests/qemu-iotests/185 | 2 +-
 tests/qemu-iotests/185.out | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185
index 0eda371f27..446d78447e 100755
--- a/tests/qemu-iotests/185
+++ b/tests/qemu-iotests/185
@@ -157,7 +157,7 @@ _send_qemu_cmd $h \
 "return"
 
 _send_qemu_cmd $h "{ 'execute': 'quit' }" "return"
-wait=1 _cleanup_qemu
+wait=1 _cleanup_qemu | _filter_block_job_offset
 
 echo
 echo === Start backup job and exit qemu ===
diff --git a/tests/qemu-iotests/185.out b/tests/qemu-iotests/185.out
index 57eaf8d699..f51af627fe 100644
--- a/tests/qemu-iotests/185.out
+++ b/tests/qemu-iotests/185.out
@@ -37,7 +37,7 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=67108864 
cluster_size=65536 l
 {"return": {}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"SHUTDOWN", "data": {"guest": false}}
-{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 
4194304, "speed": 65536, "type": "mirror"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 4194304, "offset": 
OFFSET, "speed": 65536, "type": "mirror"}}
 
 === Start backup job and exit qemu ===
 
-- 
2.11.1

Re: [Qemu-block] [PATCH 11/17] block/nbd-client: fix nbd_co_request: set s->reply.handle to 0 on error

2017-08-07 Thread Vladimir Sementsov-Ogievskiy


07.08.2017 14:55, Eric Blake wrote:

On 08/04/2017 10:14 AM, Vladimir Sementsov-Ogievskiy wrote:

We set s->reply.handle to 0 on one error path and don't set on another.
For consistancy and to avoid assert in nbd_read_reply_entry let's
set s->reply.handle to 0 in case of wrong handle too.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
  block/nbd-client.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

Can this assertion be triggered now (presumably, with a broken server)?
I'm trying to figure out if this is 2.10 material.

[Urgh. If a broken server is able to cause an assertion failure that
causes a client to abort on an assertion failure, that probably deserves
a CVE]

Hmm looks like I've mistaken, if handle is wrong than read_reply_co 
should be already finished, so it's impossible



--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH 06/17] block/nbd-client: fix nbd_read_reply_entry

2017-08-07 Thread Vladimir Sementsov-Ogievskiy


07.08.2017 14:52, Eric Blake wrote:

On 08/04/2017 10:14 AM, Vladimir Sementsov-Ogievskiy wrote:

Set reply.handle to 0 on error path to prevent normal path of
nbd_co_receive_reply.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
  block/nbd-client.c | 1 +
  1 file changed, 1 insertion(+)

Can you document a case where not fixing this would be an observable bug
(even if it requires using gdb and single-stepping between client and
server to make what is otherwise a racy situation easy to see)?  I'm
trying to figure out if this is 2.10 material.


it is simple enough:

run qemu-nbd in gdb, set break on nbd_send_reply, and when it shoot s,
next up to "stl_be_p(buf, NBD_REPLY_MAGIC);" and after it do "call 
stl_be_p(buf, 1000)"


run qemu-io with some read in gdb, set break on
br block/nbd-client.c:83

( it is break; after failed nbd_receive_reply call)

and on
br block/nbd-client.c:170

(it is in nbd_co_receive_reply after yield)

on first break we will be sure that  nbd_receive_reply failed,
on second we will be sure by
(gdb) p s->reply
$1 = {handle = 93825000680144, error = 0}
(gdb) p request->handle
$2 = 93825000680144

that we are on normal receiving path.




diff --git a/block/nbd-client.c b/block/nbd-client.c
index dc19894a7c..0c88d84de6 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -107,6 +107,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
  qemu_coroutine_yield();
  }
  
+s->reply.handle = 0;

  nbd_recv_coroutines_enter_all(s);
  s->read_reply_co = NULL;
  }



--
Best regards,
Vladimir

[Qemu-block] [PATCH 11/17] block/nbd-client: fix nbd_co_request: set s->reply.handle to 0 on error

2017-08-04 Thread Vladimir Sementsov-Ogievskiy

We set s->reply.handle to 0 on one error path and don't set on another.
For consistancy and to avoid assert in nbd_read_reply_entry let's
set s->reply.handle to 0 in case of wrong handle too.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/nbd-client.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index d6965d24db..b84cab4079 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -183,13 +183,13 @@ static int nbd_co_request(BlockDriverState *bs,
 reply.error = EIO;
 }
 }
-
-/* Tell the read handler to read another header.  */
-s->reply.handle = 0;
 }
 rc = -reply.error;
 
 out:
+/* Tell the read handler to read another header.  */
+s->reply.handle = 0;
+
 s->recv_coroutine[i] = NULL;
 
 /* Kick the read_reply_co to get the next reply.  */
-- 
2.11.1

[Qemu-block] [PATCH 02/17] nbd/client: refactor nbd_read_eof

2017-08-04 Thread Vladimir Sementsov-Ogievskiy

Refactor nbd_read_eof to return 1 on success, 0 on eof, when no
data was read and <0 for other cases, because returned size of
read data is not actually used.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 nbd/nbd-internal.h | 34 +-
 nbd/client.c   |  5 -
 tests/qemu-iotests/083.out |  4 ++--
 3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 396ddb4d3e..3fb0b6098a 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -77,21 +77,37 @@
 #define NBD_ESHUTDOWN  108
 
 /* nbd_read_eof
- * Tries to read @size bytes from @ioc. Returns number of bytes actually read.
- * May return a value >= 0 and < size only on EOF, i.e. when iteratively called
- * qio_channel_readv() returns 0. So, there is no need to call nbd_read_eof
- * iteratively.
+ * Tries to read @size bytes from @ioc.
+ * Returns 1 on success
+ * 0 on eof, when no data was read (errp is not set)
+ * -EINVAL on eof, when some data < @size was read until eof
+ * < 0 on read fail
  */
-static inline ssize_t nbd_read_eof(QIOChannel *ioc, void *buffer, size_t size,
-   Error **errp)
+static inline int nbd_read_eof(QIOChannel *ioc, void *buffer, size_t size,
+   Error **errp)
 {
 struct iovec iov = { .iov_base = buffer, .iov_len = size };
+ssize_t ret;
+
 /* Sockets are kept in blocking mode in the negotiation phase.  After
  * that, a non-readable socket simply means that another thread stole
  * our request/reply.  Synchronization is done with recv_coroutine, so
  * that this is coroutine-safe.
  */
-return nbd_rwv(ioc, , 1, size, true, errp);
+
+assert(size > 0);
+
+ret = nbd_rwv(ioc, , 1, size, true, errp);
+if (ret <= 0) {
+return ret;
+}
+
+if (ret != size) {
+error_setg(errp, "End of file");
+return -EINVAL;
+}
+
+return 1;
 }
 
 /* nbd_read
@@ -100,9 +116,9 @@ static inline ssize_t nbd_read_eof(QIOChannel *ioc, void 
*buffer, size_t size,
 static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size,
Error **errp)
 {
-ssize_t ret = nbd_read_eof(ioc, buffer, size, errp);
+int ret = nbd_read_eof(ioc, buffer, size, errp);
 
-if (ret >= 0 && ret != size) {
+if (ret == 0) {
 ret = -EINVAL;
 error_setg(errp, "End of file");
 }
diff --git a/nbd/client.c b/nbd/client.c
index f1c16b588f..4556056daa 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -925,11 +925,6 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply 
*reply, Error **errp)
 return ret;
 }
 
-if (ret != sizeof(buf)) {
-error_setg(errp, "read failed");
-return -EINVAL;
-}
-
 /* Reply
[ 0 ..  3]magic   (NBD_REPLY_MAGIC)
[ 4 ..  7]error   (0 == no error)
diff --git a/tests/qemu-iotests/083.out b/tests/qemu-iotests/083.out
index a24c6bfece..d3bea1b2f5 100644
--- a/tests/qemu-iotests/083.out
+++ b/tests/qemu-iotests/083.out
@@ -69,12 +69,12 @@ read failed: Input/output error
 
 === Check disconnect 4 reply ===
 
-read failed
+End of file
 read failed: Input/output error
 
 === Check disconnect 8 reply ===
 
-read failed
+End of file
 read failed: Input/output error
 
 === Check disconnect before data ===
-- 
2.11.1

[Qemu-block] [PATCH 13/17] block/nbd-client: refactor NBDClientSession.recv_coroutine

2017-08-04 Thread Vladimir Sementsov-Ogievskiy

Move from recv_coroutine[i] to requests[i].co. This is needed for
further refactoring, new fields will be added to created structure.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/nbd-client.h |  4 +++-
 block/nbd-client.c | 20 ++--
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index df80771357..48e2559df6 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -27,7 +27,9 @@ typedef struct NBDClientSession {
 Coroutine *read_reply_co;
 int in_flight;
 
-Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
+struct {
+Coroutine *co;
+} requests[MAX_NBD_REQUESTS];
 NBDReply reply;
 } NBDClientSession;
 
diff --git a/block/nbd-client.c b/block/nbd-client.c
index d6145c7db0..5eb126c399 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -39,8 +39,8 @@ static void nbd_recv_coroutines_wake_all(NBDClientSession *s)
 int i;
 
 for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-if (s->recv_coroutine[i]) {
-aio_co_wake(s->recv_coroutine[i]);
+if (s->requests[i].co) {
+aio_co_wake(s->requests[i].co);
 }
 }
 }
@@ -88,22 +88,22 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
  * one coroutine is called until the reply finishes.
  */
 i = HANDLE_TO_INDEX(s, s->reply.handle);
-if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {
+if (i >= MAX_NBD_REQUESTS || !s->requests[i].co) {
 break;
 }
 
-/* We're woken up by the recv_coroutine itself.  Note that there
+/* We're woken up by the receiving coroutine itself.  Note that there
  * is no race between yielding and reentering read_reply_co.  This
  * is because:
  *
- * - if recv_coroutine[i] runs on the same AioContext, it is only
+ * - if requests[i].co runs on the same AioContext, it is only
  *   entered after we yield
  *
- * - if recv_coroutine[i] runs on a different AioContext, reentering
+ * - if requests[i].co runs on a different AioContext, reentering
  *   read_reply_co happens through a bottom half, which can only
  *   run after we yield.
  */
-aio_co_wake(s->recv_coroutine[i]);
+aio_co_wake(s->requests[i].co);
 qemu_coroutine_yield();
 }
 
@@ -126,8 +126,8 @@ static int nbd_co_request(BlockDriverState *bs,
 s->in_flight++;
 
 for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-if (s->recv_coroutine[i] == NULL) {
-s->recv_coroutine[i] = qemu_coroutine_self();
+if (s->requests[i].co == NULL) {
+s->requests[i].co = qemu_coroutine_self();
 break;
 }
 }
@@ -189,7 +189,7 @@ out:
 /* Tell the read handler to read another header.  */
 s->reply.handle = 0;
 
-s->recv_coroutine[i] = NULL;
+s->requests[i].co = NULL;
 
 /* Kick the read_reply_co to get the next reply.  */
 if (s->read_reply_co) {
-- 
2.11.1

[Qemu-block] [PATCH 06/17] block/nbd-client: fix nbd_read_reply_entry

2017-08-04 Thread Vladimir Sementsov-Ogievskiy

Set reply.handle to 0 on error path to prevent normal path of
nbd_co_receive_reply.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/nbd-client.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index dc19894a7c..0c88d84de6 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -107,6 +107,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 qemu_coroutine_yield();
 }
 
+s->reply.handle = 0;
 nbd_recv_coroutines_enter_all(s);
 s->read_reply_co = NULL;
 }
-- 
2.11.1

[Qemu-block] [PATCH 00/17] nbd client refactoring and fixing

2017-08-04 Thread Vladimir Sementsov-Ogievskiy

A bit more refactoring and fixing before BLOCK_STATUS series.
I've tried to make individual patches simple enough, so there are
a lot of them.

Vladimir Sementsov-Ogievskiy (17):
  nbd/client: fix nbd_opt_go
  nbd/client: refactor nbd_read_eof
  nbd/client: refactor nbd_receive_reply
  nbd/client: fix nbd_send_request to return int
  block/nbd-client: get rid of ssize_t
  block/nbd-client: fix nbd_read_reply_entry
  block/nbd-client: refactor request send/receive
  block/nbd-client: rename nbd_recv_coroutines_enter_all
  block/nbd-client: move nbd_co_receive_reply content into
nbd_co_request
  block/nbd-client: move nbd_coroutine_end content into nbd_co_request
  block/nbd-client: fix nbd_co_request: set s->reply.handle to 0 on
error
  block/nbd-client: refactor nbd_co_request
  block/nbd-client: refactor NBDClientSession.recv_coroutine
  block/nbd-client: exit reply-reading coroutine on incorrect handle
  block/nbd-client: refactor reading reply
  block/nbd-client: drop reply field from NBDClientSession
  block/nbd-client: always return EIO on and after the first io channel
error

 block/nbd-client.h |   9 ++-
 include/block/nbd.h|   4 +-
 nbd/nbd-internal.h |  34 ++---
 block/nbd-client.c | 173 ++---
 nbd/client.c   |  21 +++---
 tests/qemu-iotests/083.out |   4 +-
 6 files changed, 115 insertions(+), 130 deletions(-)

-- 
2.11.1

[Qemu-block] [PATCH 08/17] block/nbd-client: rename nbd_recv_coroutines_enter_all

2017-08-04 Thread Vladimir Sementsov-Ogievskiy

Rename nbd_recv_coroutines_enter_all to nbd_recv_coroutines_wake_all,
as it most probably just add all recv coroutines into co_queue_wakeup,
not directly enter them.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/nbd-client.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index c9ade9b517..8ad2264a40 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -34,7 +34,7 @@
 #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
 #define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
 
-static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
+static void nbd_recv_coroutines_wake_all(NBDClientSession *s)
 {
 int i;
 
@@ -108,7 +108,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 }
 
 s->reply.handle = 0;
-nbd_recv_coroutines_enter_all(s);
+nbd_recv_coroutines_wake_all(s);
 s->read_reply_co = NULL;
 }
 
-- 
2.11.1

[Qemu-block] [PATCH 15/17] block/nbd-client: refactor reading reply

2017-08-04 Thread Vladimir Sementsov-Ogievskiy

Read the whole reply in one place - in nbd_read_reply_entry.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/nbd-client.h |  1 +
 block/nbd-client.c | 27 +--
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index aa36be8950..0f84ccc073 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -30,6 +30,7 @@ typedef struct NBDClientSession {
 struct {
 Coroutine *co;
 NBDRequest *request;
+QEMUIOVector *qiov;
 } requests[MAX_NBD_REQUESTS];
 NBDReply reply;
 } NBDClientSession;
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 0e12db4be3..61780c5df9 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -94,6 +94,18 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 break;
 }
 
+if (s->reply.error == 0 &&
+s->requests[i].request->type == NBD_CMD_READ)
+{
+assert(s->requests[i].qiov != NULL);
+ret = nbd_rwv(s->ioc, s->requests[i].qiov->iov,
+  s->requests[i].qiov->niov,
+  s->requests[i].request->len, true, NULL);
+if (ret != s->requests[i].request->len) {
+break;
+}
+}
+
 /* We're woken up by the receiving coroutine itself.  Note that there
  * is no race between yielding and reentering read_reply_co.  This
  * is because:
@@ -138,6 +150,7 @@ static int nbd_co_request(BlockDriverState *bs,
 assert(i < MAX_NBD_REQUESTS);
 request->handle = INDEX_TO_HANDLE(s, i);
 s->requests[i].request = request;
+s->requests[i].qiov = qiov;
 
 if (!s->ioc) {
 qemu_co_mutex_unlock(>send_mutex);
@@ -165,12 +178,6 @@ static int nbd_co_request(BlockDriverState *bs,
 goto out;
 }
 
-if (request->type == NBD_CMD_READ) {
-assert(qiov != NULL);
-} else {
-qiov = NULL;
-}
-
 /* Wait until we're woken up by nbd_read_reply_entry.  */
 qemu_coroutine_yield();
 if (!s->ioc || s->reply.handle == 0) {
@@ -180,14 +187,6 @@ static int nbd_co_request(BlockDriverState *bs,
 
 assert(s->reply.handle == request->handle);
 
-if (qiov && s->reply.error == 0) {
-ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true, NULL);
-if (ret != request->len) {
-rc = -EIO;
-goto out;
-}
-}
-
 rc = -s->reply.error;
 
 out:
-- 
2.11.1

[Qemu-block] [PATCH 04/17] nbd/client: fix nbd_send_request to return int

2017-08-04 Thread Vladimir Sementsov-Ogievskiy

Fix nbd_send_request to return int, as it returns a return value
of nbd_write (which is int), and the only user of nbd_send_request's
return value (nbd_co_send_request) consider it as int too.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 include/block/nbd.h | 2 +-
 nbd/client.c| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index f7450608b4..040cdd2e60 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -163,7 +163,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
   Error **errp);
 int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
  Error **errp);
-ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request);
+int nbd_send_request(QIOChannel *ioc, NBDRequest *request);
 int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp);
 int nbd_client(int fd);
 int nbd_disconnect(int fd);
diff --git a/nbd/client.c b/nbd/client.c
index a1758a1931..00cba45853 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -896,7 +896,7 @@ int nbd_disconnect(int fd)
 }
 #endif
 
-ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request)
+int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
 {
 uint8_t buf[NBD_REQUEST_SIZE];
 
-- 
2.11.1

[Qemu-block] [PATCH 07/17] block/nbd-client: refactor request send/receive

2017-08-04 Thread Vladimir Sementsov-Ogievskiy

Move nbd_co_receive_reply and nbd_coroutine_end calls into
nbd_co_send_request and rename the latter to just nbd_co_request.

This removes code duplications in nbd_client_co_{pwrite,pread,...}
functions. Also this is needed for further refactoring.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/nbd-client.c | 89 --
 1 file changed, 33 insertions(+), 56 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 0c88d84de6..c9ade9b517 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -112,12 +112,20 @@ static coroutine_fn void nbd_read_reply_entry(void 
*opaque)
 s->read_reply_co = NULL;
 }
 
-static int nbd_co_send_request(BlockDriverState *bs,
-   NBDRequest *request,
-   QEMUIOVector *qiov)
+static void nbd_co_receive_reply(NBDClientSession *s,
+ NBDRequest *request,
+ NBDReply *reply,
+ QEMUIOVector *qiov);
+static void nbd_coroutine_end(BlockDriverState *bs,
+  NBDRequest *request);
+
+static int nbd_co_request(BlockDriverState *bs,
+  NBDRequest *request,
+  QEMUIOVector *qiov)
 {
 NBDClientSession *s = nbd_get_client_session(bs);
 int rc, ret, i;
+NBDReply reply;
 
 qemu_co_mutex_lock(>send_mutex);
 while (s->in_flight == MAX_NBD_REQUESTS) {
@@ -141,7 +149,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
 return -EPIPE;
 }
 
-if (qiov) {
+if (request->type == NBD_CMD_WRITE) {
+assert(qiov != NULL);
 qio_channel_set_cork(s->ioc, true);
 rc = nbd_send_request(s->ioc, request);
 if (rc >= 0) {
@@ -156,6 +165,21 @@ static int nbd_co_send_request(BlockDriverState *bs,
 rc = nbd_send_request(s->ioc, request);
 }
 qemu_co_mutex_unlock(>send_mutex);
+
+if (rc < 0) {
+goto out;
+}
+
+if (request->type == NBD_CMD_READ) {
+assert(qiov != NULL);
+} else {
+qiov = NULL;
+}
+nbd_co_receive_reply(s, request, , qiov);
+rc = -reply.error;
+
+out:
+nbd_coroutine_end(bs, request);
 return rc;
 }
 
@@ -208,26 +232,16 @@ static void nbd_coroutine_end(BlockDriverState *bs,
 int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
  uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
-NBDClientSession *client = nbd_get_client_session(bs);
 NBDRequest request = {
 .type = NBD_CMD_READ,
 .from = offset,
 .len = bytes,
 };
-NBDReply reply;
-int ret;
 
 assert(bytes <= NBD_MAX_BUFFER_SIZE);
 assert(!flags);
 
-ret = nbd_co_send_request(bs, , NULL);
-if (ret < 0) {
-reply.error = -ret;
-} else {
-nbd_co_receive_reply(client, , , qiov);
-}
-nbd_coroutine_end(bs, );
-return -reply.error;
+return nbd_co_request(bs, , qiov);
 }
 
 int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
@@ -239,8 +253,6 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t 
offset,
 .from = offset,
 .len = bytes,
 };
-NBDReply reply;
-int ret;
 
 if (flags & BDRV_REQ_FUA) {
 assert(client->info.flags & NBD_FLAG_SEND_FUA);
@@ -249,27 +261,18 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t 
offset,
 
 assert(bytes <= NBD_MAX_BUFFER_SIZE);
 
-ret = nbd_co_send_request(bs, , qiov);
-if (ret < 0) {
-reply.error = -ret;
-} else {
-nbd_co_receive_reply(client, , , NULL);
-}
-nbd_coroutine_end(bs, );
-return -reply.error;
+return nbd_co_request(bs, , qiov);
 }
 
 int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
 int bytes, BdrvRequestFlags flags)
 {
-int ret;
 NBDClientSession *client = nbd_get_client_session(bs);
 NBDRequest request = {
 .type = NBD_CMD_WRITE_ZEROES,
 .from = offset,
 .len = bytes,
 };
-NBDReply reply;
 
 if (!(client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
 return -ENOTSUP;
@@ -283,22 +286,13 @@ int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, 
int64_t offset,
 request.flags |= NBD_CMD_FLAG_NO_HOLE;
 }
 
-ret = nbd_co_send_request(bs, , NULL);
-if (ret < 0) {
-reply.error = -ret;
-} else {
-nbd_co_receive_reply(client, , , NULL);
-}
-nbd_coroutine_end(bs, );
-return -reply.error;
+return nbd_co_request(bs, , NULL);
 }
 
 int nbd_client_co_flush(BlockDriverState *bs)
 {
 NBDClientSession *client = nbd_get_client_session(bs);
 NBDRequest request = { .type = NBD_CMD_FLUSH };
-NBDReply reply;
-int ret;
 
 if (!(client->info.flags & NBD_FLAG

[Qemu-block] [PATCH 16/17] block/nbd-client: drop reply field from NBDClientSession

2017-08-04 Thread Vladimir Sementsov-Ogievskiy

Drop 'reply' from NBDClientSession. It's usage is not very transparent:

1. it is used to deliver error to receiving coroutine, and receiving
   coroutine must save or handle it somehow and then zero out
   it in NBDClientSession.
2. it is used to inform receiving coroutines that nbd_read_reply_entry
   is out for some reason (error or disconnect)

To simplify this scheme:
- drop NBDClientSession.reply
- introduce NBDClientSession.requests[...].ret for (1)
- introduce NBDClientSession.eio_to_all for (2)

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/nbd-client.h |  3 ++-
 block/nbd-client.c | 25 ++---
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index 0f84ccc073..0b0aa67342 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -31,8 +31,9 @@ typedef struct NBDClientSession {
 Coroutine *co;
 NBDRequest *request;
 QEMUIOVector *qiov;
+int ret;
 } requests[MAX_NBD_REQUESTS];
-NBDReply reply;
+bool eio_to_all;
 } NBDClientSession;
 
 NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 61780c5df9..7c151b3dd3 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -72,10 +72,10 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 uint64_t i;
 int ret;
 Error *local_err = NULL;
+NBDReply reply;
 
 for (;;) {
-assert(s->reply.handle == 0);
-ret = nbd_receive_reply(s->ioc, >reply, _err);
+ret = nbd_receive_reply(s->ioc, , _err);
 if (ret < 0) {
 error_report_err(local_err);
 }
@@ -87,16 +87,14 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
  * handler acts as a synchronization point and ensures that only
  * one coroutine is called until the reply finishes.
  */
-i = HANDLE_TO_INDEX(s, s->reply.handle);
+i = HANDLE_TO_INDEX(s, reply.handle);
 if (i >= MAX_NBD_REQUESTS || !s->requests[i].co ||
-s->reply.handle != s->requests[i].request->handle)
+reply.handle != s->requests[i].request->handle)
 {
 break;
 }
 
-if (s->reply.error == 0 &&
-s->requests[i].request->type == NBD_CMD_READ)
-{
+if (reply.error == 0 && s->requests[i].request->type == NBD_CMD_READ) {
 assert(s->requests[i].qiov != NULL);
 ret = nbd_rwv(s->ioc, s->requests[i].qiov->iov,
   s->requests[i].qiov->niov,
@@ -106,6 +104,8 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 }
 }
 
+s->requests[i].ret = -reply.error;
+
 /* We're woken up by the receiving coroutine itself.  Note that there
  * is no race between yielding and reentering read_reply_co.  This
  * is because:
@@ -121,7 +121,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 qemu_coroutine_yield();
 }
 
-s->reply.handle = 0;
+s->eio_to_all = true;
 nbd_recv_coroutines_wake_all(s);
 s->read_reply_co = NULL;
 }
@@ -180,19 +180,14 @@ static int nbd_co_request(BlockDriverState *bs,
 
 /* Wait until we're woken up by nbd_read_reply_entry.  */
 qemu_coroutine_yield();
-if (!s->ioc || s->reply.handle == 0) {
+if (!s->ioc || s->eio_to_all) {
 rc = -EIO;
 goto out;
 }
 
-assert(s->reply.handle == request->handle);
-
-rc = -s->reply.error;
+rc = s->requests[i].ret;
 
 out:
-/* Tell the read handler to read another header.  */
-s->reply.handle = 0;
-
 s->requests[i].co = NULL;
 
 /* Kick the read_reply_co to get the next reply.  */
-- 
2.11.1

[Qemu-block] [PATCH 05/17] block/nbd-client: get rid of ssize_t

2017-08-04 Thread Vladimir Sementsov-Ogievskiy

Use int variable for nbd_co_send_request return value (as
nbd_co_send_request returns int).

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/nbd-client.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 25dd28406b..dc19894a7c 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -214,7 +214,7 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t 
offset,
 .len = bytes,
 };
 NBDReply reply;
-ssize_t ret;
+int ret;
 
 assert(bytes <= NBD_MAX_BUFFER_SIZE);
 assert(!flags);
@@ -239,7 +239,7 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t 
offset,
 .len = bytes,
 };
 NBDReply reply;
-ssize_t ret;
+int ret;
 
 if (flags & BDRV_REQ_FUA) {
 assert(client->info.flags & NBD_FLAG_SEND_FUA);
@@ -261,7 +261,7 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t 
offset,
 int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
 int bytes, BdrvRequestFlags flags)
 {
-ssize_t ret;
+int ret;
 NBDClientSession *client = nbd_get_client_session(bs);
 NBDRequest request = {
 .type = NBD_CMD_WRITE_ZEROES,
@@ -297,7 +297,7 @@ int nbd_client_co_flush(BlockDriverState *bs)
 NBDClientSession *client = nbd_get_client_session(bs);
 NBDRequest request = { .type = NBD_CMD_FLUSH };
 NBDReply reply;
-ssize_t ret;
+int ret;
 
 if (!(client->info.flags & NBD_FLAG_SEND_FLUSH)) {
 return 0;
@@ -325,7 +325,7 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t 
offset, int bytes)
 .len = bytes,
 };
 NBDReply reply;
-ssize_t ret;
+int ret;
 
 if (!(client->info.flags & NBD_FLAG_SEND_TRIM)) {
 return 0;
-- 
2.11.1

[Qemu-block] [PATCH 14/17] block/nbd-client: exit reply-reading coroutine on incorrect handle

2017-08-04 Thread Vladimir Sementsov-Ogievskiy

Check reply-handle == request-handle in the same place, where
recv coroutine number is calculated from reply->handle and it's
correctness checked - in nbd_read_reply_entry.

Also finish nbd_read_reply_entry in case of reply-handle !=
request-handle in the same way as in case of incorrect reply-handle.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/nbd-client.h | 1 +
 block/nbd-client.c | 9 +++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index 48e2559df6..aa36be8950 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -29,6 +29,7 @@ typedef struct NBDClientSession {
 
 struct {
 Coroutine *co;
+NBDRequest *request;
 } requests[MAX_NBD_REQUESTS];
 NBDReply reply;
 } NBDClientSession;
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 5eb126c399..0e12db4be3 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -88,7 +88,9 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
  * one coroutine is called until the reply finishes.
  */
 i = HANDLE_TO_INDEX(s, s->reply.handle);
-if (i >= MAX_NBD_REQUESTS || !s->requests[i].co) {
+if (i >= MAX_NBD_REQUESTS || !s->requests[i].co ||
+s->reply.handle != s->requests[i].request->handle)
+{
 break;
 }
 
@@ -135,6 +137,7 @@ static int nbd_co_request(BlockDriverState *bs,
 g_assert(qemu_in_coroutine());
 assert(i < MAX_NBD_REQUESTS);
 request->handle = INDEX_TO_HANDLE(s, i);
+s->requests[i].request = request;
 
 if (!s->ioc) {
 qemu_co_mutex_unlock(>send_mutex);
@@ -170,11 +173,13 @@ static int nbd_co_request(BlockDriverState *bs,
 
 /* Wait until we're woken up by nbd_read_reply_entry.  */
 qemu_coroutine_yield();
-if (s->reply.handle != request->handle || !s->ioc) {
+if (!s->ioc || s->reply.handle == 0) {
 rc = -EIO;
 goto out;
 }
 
+assert(s->reply.handle == request->handle);
+
 if (qiov && s->reply.error == 0) {
 ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true, NULL);
 if (ret != request->len) {
-- 
2.11.1

[Qemu-block] [PATCH 03/17] nbd/client: refactor nbd_receive_reply

2017-08-04 Thread Vladimir Sementsov-Ogievskiy

Refactor nbd_receive_reply to return 1 on success, 0 on eof, when no
data was read and <0 for other cases, because returned size of read
data is not actually used.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 include/block/nbd.h |  2 +-
 nbd/client.c| 12 +---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index 9c3d0a5868..f7450608b4 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -164,7 +164,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
 int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
  Error **errp);
 ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request);
-ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp);
+int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp);
 int nbd_client(int fd);
 int nbd_disconnect(int fd);
 
diff --git a/nbd/client.c b/nbd/client.c
index 4556056daa..a1758a1931 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -914,11 +914,16 @@ ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest 
*request)
 return nbd_write(ioc, buf, sizeof(buf), NULL);
 }
 
-ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
+/* nbd_receive_reply
+ * Returns 1 on success
+ * 0 on eof, when no data was read from @ioc (errp is not set)
+ * < 0 on fail
+ */
+int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
 {
 uint8_t buf[NBD_REPLY_SIZE];
 uint32_t magic;
-ssize_t ret;
+int ret;
 
 ret = nbd_read_eof(ioc, buf, sizeof(buf), errp);
 if (ret <= 0) {
@@ -948,6 +953,7 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, 
Error **errp)
 error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
 return -EINVAL;
 }
-return sizeof(buf);
+
+return 1;
 }
 
-- 
2.11.1

[Qemu-block] [PATCH 12/17] block/nbd-client: refactor nbd_co_request

2017-08-04 Thread Vladimir Sementsov-Ogievskiy

Reduce nesting, get rid of extra variable.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 block/nbd-client.c | 25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index b84cab4079..d6145c7db0 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -118,7 +118,6 @@ static int nbd_co_request(BlockDriverState *bs,
 {
 NBDClientSession *s = nbd_get_client_session(bs);
 int rc, ret, i;
-NBDReply reply;
 
 qemu_co_mutex_lock(>send_mutex);
 while (s->in_flight == MAX_NBD_REQUESTS) {
@@ -171,20 +170,20 @@ static int nbd_co_request(BlockDriverState *bs,
 
 /* Wait until we're woken up by nbd_read_reply_entry.  */
 qemu_coroutine_yield();
-reply = s->reply;
-if (reply.handle != request->handle ||
-!s->ioc) {
-reply.error = EIO;
-} else {
-if (qiov && reply.error == 0) {
-ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true,
-  NULL);
-if (ret != request->len) {
-reply.error = EIO;
-}
+if (s->reply.handle != request->handle || !s->ioc) {
+rc = -EIO;
+goto out;
+}
+
+if (qiov && s->reply.error == 0) {
+ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true, NULL);
+if (ret != request->len) {
+rc = -EIO;
+goto out;
 }
 }
-rc = -reply.error;
+
+rc = -s->reply.error;
 
 out:
 /* Tell the read handler to read another header.  */
-- 
2.11.1

[Qemu-block] [PATCH 01/17] nbd/client: fix nbd_opt_go

2017-08-04 Thread Vladimir Sementsov-Ogievskiy

Do not send NBD_OPT_ABORT to the broken server. After sending
NBD_REP_ACK on NBD_OPT_GO server is most probably in transmission
phase, when option sending is finished.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 nbd/client.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/nbd/client.c b/nbd/client.c
index 0a17de80b5..f1c16b588f 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -399,12 +399,10 @@ static int nbd_opt_go(QIOChannel *ioc, const char 
*wantname,
phase, but make sure it sent flags */
 if (len) {
 error_setg(errp, "server sent invalid NBD_REP_ACK");
-nbd_send_opt_abort(ioc);
 return -1;
 }
 if (!info->flags) {
 error_setg(errp, "broken server omitted NBD_INFO_EXPORT");
-nbd_send_opt_abort(ioc);
 return -1;
 }
 trace_nbd_opt_go_success();
-- 
2.11.1

< 3 4 5 6 7 8 9 10 11 12 >

701 - 800 of 9422 matches

Mail list logo