[Qemu-block] [PATCH 0/2] block/rbd: enable filename parsing on open

2018-09-10 Thread Jeff Cody
This series enables filename parsing on open, on the old key/value pair.
Recent changes to new option formats for rbd broke some images.  See
Patch 2 for more details.

Jeff Cody (2):
  block/rbd: pull out qemu_rbd_convert_options
  block/rbd: Attempt to parse legacy filenames

 block/rbd.c | 66 +
 1 file changed, 51 insertions(+), 15 deletions(-)

-- 
2.17.1




[Qemu-block] [PATCH 1/2] block/rbd: pull out qemu_rbd_convert_options

2018-09-10 Thread Jeff Cody
Code movement to pull the conversion from Qdict to BlockdevOptionsRbd
into a helper function.

Signed-off-by: Jeff Cody 
---
 block/rbd.c | 36 
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index ca8e5bbace..a8e79d01d2 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -655,12 +655,34 @@ failed_opts:
 return r;
 }
 
+static int qemu_rbd_convert_options(BlockDriverState *bs, QDict *options,
+BlockdevOptionsRbd **opts, Error **errp)
+{
+Visitor *v;
+Error *local_err = NULL;
+
+/* Convert the remaining options into a QAPI object */
+v = qobject_input_visitor_new_flat_confused(options, errp);
+if (!v) {
+return -EINVAL;
+}
+
+visit_type_BlockdevOptionsRbd(v, NULL, opts, _err);
+visit_free(v);
+
+if (local_err) {
+error_propagate(errp, local_err);
+return -EINVAL;
+}
+
+return 0;
+}
+
 static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
  Error **errp)
 {
 BDRVRBDState *s = bs->opaque;
 BlockdevOptionsRbd *opts = NULL;
-Visitor *v;
 const QDictEntry *e;
 Error *local_err = NULL;
 char *keypairs, *secretid;
@@ -676,19 +698,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 qdict_del(options, "password-secret");
 }
 
-/* Convert the remaining options into a QAPI object */
-v = qobject_input_visitor_new_flat_confused(options, errp);
-if (!v) {
-r = -EINVAL;
-goto out;
-}
-
-visit_type_BlockdevOptionsRbd(v, NULL, , _err);
-visit_free(v);
-
+r = qemu_rbd_convert_options(bs, options, , _err);
 if (local_err) {
 error_propagate(errp, local_err);
-r = -EINVAL;
 goto out;
 }
 
-- 
2.17.1




[Qemu-block] [PATCH 2/2] block/rbd: Attempt to parse legacy filenames

2018-09-10 Thread Jeff Cody
When we converted rbd to get rid of the older key/value-centric
encoding format, we broke compatibility with image files with backing
file strings encoded in the old format.

This leaves a bit of an ugly conundrum, and a hacky solution.

If the initial attempt to parse the "proper" options fails, it assumes
that we may have an older key/value encoded filename.  Fall back to
attempting to parse the filename, and extract the required options from
it.  If that fails, pass along the original error message.

This approach has a potential drawback: if for some reason there are
some options supplied the new way, and some the old way, we may not
catch all the old options if they are not required options (since it
won't cause the initial failure).

Signed-off-by: Jeff Cody 
---
 block/rbd.c | 30 +++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index a8e79d01d2..bce86b8bde 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -685,7 +685,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 BlockdevOptionsRbd *opts = NULL;
 const QDictEntry *e;
 Error *local_err = NULL;
-char *keypairs, *secretid;
+char *keypairs, *secretid, *filename;
 int r;
 
 keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs"));
@@ -700,8 +700,32 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 
 r = qemu_rbd_convert_options(bs, options, , _err);
 if (local_err) {
-error_propagate(errp, local_err);
-goto out;
+/* If the initial attempt to convert and process the options failed,
+ * we may be attempting to open an image file that has the rbd options
+ * specified in the older format consisting of all key/value pairs
+ * encoded in the filename.  Go ahead and attempt to parse the
+ * filename, and see if we can pull out the required options */
+Error *parse_err = NULL;
+
+filename = g_strdup(qdict_get_try_str(options, "filename"));
+qdict_del(options, "filename");
+
+qemu_rbd_parse_filename(filename, options, NULL);
+
+g_free(keypairs);
+keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs"));
+if (keypairs) {
+qdict_del(options, "=keyvalue-pairs");
+}
+
+r = qemu_rbd_convert_options(bs, options, , _err);
+if (parse_err) {
+/* if the second attempt failed, pass along the original error
+ * message for the current format */
+error_propagate(errp, local_err);
+error_free(parse_err);
+goto out;
+}
 }
 
 /* Remove the processed options from the QDict (the visitor processes
-- 
2.17.1




Re: [Qemu-block] [Qemu-devel] [PATCH v2 3/4] blockdev: rename block-dirty-bitmap-clear transaction handlers

2018-09-10 Thread John Snow



On 07/06/2018 07:36 AM, Vladimir Sementsov-Ogievskiy wrote:
> Rename block-dirty-bitmap-clear transaction handlers to reuse them for
> x-block-dirty-bitmap-merge transaction in the following patch.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  blockdev.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/blockdev.c b/blockdev.c
> index 9cb29ca63e..5348e8ba9b 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -2026,7 +2026,7 @@ static void 
> block_dirty_bitmap_clear_prepare(BlkActionState *common,
>  bdrv_clear_dirty_bitmap(state->bitmap, >backup);
>  }
>  
> -static void block_dirty_bitmap_clear_abort(BlkActionState *common)
> +static void block_dirty_bitmap_restore(BlkActionState *common)
>  {
>  BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
>   common, common);
> @@ -2036,7 +2036,7 @@ static void 
> block_dirty_bitmap_clear_abort(BlkActionState *common)
>  }
>  }
>  
> -static void block_dirty_bitmap_clear_commit(BlkActionState *common)
> +static void block_dirty_bitmap_free_backup(BlkActionState *common)
>  {
>  BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
>   common, common);
> @@ -2170,8 +2170,8 @@ static const BlkActionOps actions[] = {
>  [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = {
>  .instance_size = sizeof(BlockDirtyBitmapState),
>  .prepare = block_dirty_bitmap_clear_prepare,
> -.commit = block_dirty_bitmap_clear_commit,
> -.abort = block_dirty_bitmap_clear_abort,
> +.commit = block_dirty_bitmap_free_backup,
> +.abort = block_dirty_bitmap_restore,
>  },
>  [TRANSACTION_ACTION_KIND_X_BLOCK_DIRTY_BITMAP_ENABLE] = {
>  .instance_size = sizeof(BlockDirtyBitmapState),
> 

Reviewed-by: John Snow 



Re: [Qemu-block] [Qemu-devel] [PATCH v2 2/4] dirty-bitmap: restore bitmap after merge

2018-09-10 Thread John Snow
We actually don't restore anything just yet, so this commit message is a
little off I think.

On 07/06/2018 07:36 AM, Vladimir Sementsov-Ogievskiy wrote:
> Add backup parameter to bdrv_merge_dirty_bitmap() and refactor
> bdrv_undo_clear_dirty_bitmap() to use it both for undo clean and merge.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  include/block/block_int.h|  2 +-
>  include/block/dirty-bitmap.h |  2 +-
>  include/qemu/hbitmap.h   | 25 -
>  block/dirty-bitmap.c | 21 -
>  blockdev.c   |  4 ++--
>  util/hbitmap.c   | 11 ---
>  6 files changed, 44 insertions(+), 21 deletions(-)
> 
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index af71b414be..64e38b4fae 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -1144,7 +1144,7 @@ bool blk_dev_is_medium_locked(BlockBackend *blk);
>  void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
>  
>  void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
> -void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in);
> +void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup);
>  
>  void bdrv_inc_in_flight(BlockDriverState *bs);
>  void bdrv_dec_in_flight(BlockDriverState *bs);
> diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
> index 259bd27c40..201ff7f20b 100644
> --- a/include/block/dirty-bitmap.h
> +++ b/include/block/dirty-bitmap.h
> @@ -71,7 +71,7 @@ void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap 
> *bitmap,
> bool persistent);
>  void bdrv_dirty_bitmap_set_qmp_locked(BdrvDirtyBitmap *bitmap, bool 
> qmp_locked);
>  void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap 
> *src,
> - Error **errp);
> + HBitmap **backup, Error **errp);
>  
>  /* Functions that require manual locking.  */
>  void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap);
> diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
> index ddca52c48e..a7cb780592 100644
> --- a/include/qemu/hbitmap.h
> +++ b/include/qemu/hbitmap.h
> @@ -73,16 +73,23 @@ void hbitmap_truncate(HBitmap *hb, uint64_t size);
>  
>  /**
>   * hbitmap_merge:
> - * @a: The bitmap to store the result in.
> - * @b: The bitmap to merge into @a.
> - * @return true if the merge was successful,
> - * false if it was not attempted.
> - *
> - * Merge two bitmaps together.
> - * A := A (BITOR) B.
> - * B is left unmodified.
> + *
> + * Store result of merging @a and @b into @result.
> + * @result is allowed to be equal to @a or @b.
> + *
> + * Return true if the merge was successful,
> + *false if it was not attempted.
> + */
> +bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result);

I wonder if this confuses any optimization or analysis tools. I'll
assume it's safe.

> +
> +/**
> + * hbitmap_can_merge:
> + *
> + * hbitmap_can_merge(a, b) && hbitmap_can_merge(a, result) is sufficient and
> + * necessary for hbitmap_merge will not fail.
> + *
>   */
> -bool hbitmap_merge(HBitmap *a, const HBitmap *b);
> +bool hbitmap_can_merge(const HBitmap *a, const HBitmap *b);
>  
>  /**
>   * hbitmap_empty:
> diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
> index 6c8761e027..8ac933cf1c 100644
> --- a/block/dirty-bitmap.c
> +++ b/block/dirty-bitmap.c
> @@ -314,7 +314,7 @@ BdrvDirtyBitmap 
> *bdrv_reclaim_dirty_bitmap_locked(BlockDriverState *bs,
>  return NULL;
>  }
>  
> -if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
> +if (!hbitmap_merge(parent->bitmap, successor->bitmap, parent->bitmap)) {
>  error_setg(errp, "Merging of parent and successor bitmap failed");
>  return NULL;
>  }
> @@ -633,12 +633,12 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, 
> HBitmap **out)
>  bdrv_dirty_bitmap_unlock(bitmap);
>  }
>  
> -void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
> +void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup)
>  {
>  HBitmap *tmp = bitmap->bitmap;
>  assert(bdrv_dirty_bitmap_enabled(bitmap));
>  assert(!bdrv_dirty_bitmap_readonly(bitmap));
> -bitmap->bitmap = in;
> +bitmap->bitmap = backup;
>  hbitmap_free(tmp);
>  }
>  
> @@ -791,8 +791,10 @@ int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap 
> *bitmap, uint64_t offset)
>  }
>  
>  void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap 
> *src,
> - Error **errp)
> + HBitmap **backup, Error **errp)
>  {
> +bool ret;
> +
>  /* only bitmaps from one bds are supported */
>  assert(dest->mutex == src->mutex);
>  
> @@ -810,11 +812,20 @@ void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, 
> const BdrvDirtyBitmap *src,
>  goto out;
>  }
>  
> -

Re: [Qemu-block] [Qemu-devel] [PATCH v3 8/8] Revert "hbitmap: Add @advance param to hbitmap_iter_next()"

2018-09-10 Thread John Snow



On 08/14/2018 08:14 AM, Vladimir Sementsov-Ogievskiy wrote:
> This reverts commit a33fbb4f8b64226becf502a123733776ce319b24.
> 
> The functionality is unused.
> 
> Note: in addition to automatic revert, drop second parameter in
> hbitmap_iter_next() call from hbitmap_next_dirty_area() too.
> 

Hm, it's not really a "revert" in the revision control sense then, but I
don't think I care about that kind of pedantic distinction personally.

6,7,8: Reviewed-by: John Snow 

> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  include/qemu/hbitmap.h |  5 +
>  block/backup.c |  2 +-
>  block/dirty-bitmap.c   |  2 +-
>  tests/test-hbitmap.c   | 26 +-
>  util/hbitmap.c | 12 
>  5 files changed, 20 insertions(+), 27 deletions(-)
> 
> diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
> index 7800317bf3..6d205167ce 100644
> --- a/include/qemu/hbitmap.h
> +++ b/include/qemu/hbitmap.h
> @@ -345,14 +345,11 @@ void hbitmap_free_meta(HBitmap *hb);
>  /**
>   * hbitmap_iter_next:
>   * @hbi: HBitmapIter to operate on.
> - * @advance: If true, advance the iterator.  Otherwise, the next call
> - *   of this function will return the same result (if that
> - *   position is still dirty).
>   *
>   * Return the next bit that is set in @hbi's associated HBitmap,
>   * or -1 if all remaining bits are zero.
>   */
> -int64_t hbitmap_iter_next(HBitmapIter *hbi, bool advance);
> +int64_t hbitmap_iter_next(HBitmapIter *hbi);
>  
>  /**
>   * hbitmap_iter_next_word:
> diff --git a/block/backup.c b/block/backup.c
> index 9bfd3f7189..f033148f21 100644
> --- a/block/backup.c
> +++ b/block/backup.c
> @@ -421,7 +421,7 @@ static int coroutine_fn 
> backup_run_incremental(BackupBlockJob *job)
>  HBitmapIter hbi;
>  
>  hbitmap_iter_init(, job->copy_bitmap, 0);
> -while ((cluster = hbitmap_iter_next(, true)) != -1) {
> +while ((cluster = hbitmap_iter_next()) != -1) {
>  do {
>  if (yield_and_check(job)) {
>  return 0;
> diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
> index d9333175b3..d0d602ff52 100644
> --- a/block/dirty-bitmap.c
> +++ b/block/dirty-bitmap.c
> @@ -525,7 +525,7 @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter)
>  
>  int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
>  {
> -return hbitmap_iter_next(>hbi, true);
> +return hbitmap_iter_next(>hbi);
>  }
>  
>  /* Called within bdrv_dirty_bitmap_lock..unlock */
> diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
> index 5e43056970..17a3c807de 100644
> --- a/tests/test-hbitmap.c
> +++ b/tests/test-hbitmap.c
> @@ -46,7 +46,7 @@ static void hbitmap_test_check(TestHBitmapData *data,
>  
>  i = first;
>  for (;;) {
> -next = hbitmap_iter_next(, true);
> +next = hbitmap_iter_next();
>  if (next < 0) {
>  next = data->size;
>  }
> @@ -435,25 +435,25 @@ static void 
> test_hbitmap_iter_granularity(TestHBitmapData *data,
>  /* Note that hbitmap_test_check has to be invoked manually in this test. 
>  */
>  hbitmap_test_init(data, 131072 << 7, 7);
>  hbitmap_iter_init(, data->hb, 0);
> -g_assert_cmpint(hbitmap_iter_next(, true), <, 0);
> +g_assert_cmpint(hbitmap_iter_next(), <, 0);
>  
>  hbitmap_test_set(data, ((L2 + L1 + 1) << 7) + 8, 8);
>  hbitmap_iter_init(, data->hb, 0);
> -g_assert_cmpint(hbitmap_iter_next(, true), ==, (L2 + L1 + 1) << 7);
> -g_assert_cmpint(hbitmap_iter_next(, true), <, 0);
> +g_assert_cmpint(hbitmap_iter_next(), ==, (L2 + L1 + 1) << 7);
> +g_assert_cmpint(hbitmap_iter_next(), <, 0);
>  
>  hbitmap_iter_init(, data->hb, (L2 + L1 + 2) << 7);
> -g_assert_cmpint(hbitmap_iter_next(, true), <, 0);
> +g_assert_cmpint(hbitmap_iter_next(), <, 0);
>  
>  hbitmap_test_set(data, (131072 << 7) - 8, 8);
>  hbitmap_iter_init(, data->hb, 0);
> -g_assert_cmpint(hbitmap_iter_next(, true), ==, (L2 + L1 + 1) << 7);
> -g_assert_cmpint(hbitmap_iter_next(, true), ==, 131071 << 7);
> -g_assert_cmpint(hbitmap_iter_next(, true), <, 0);
> +g_assert_cmpint(hbitmap_iter_next(), ==, (L2 + L1 + 1) << 7);
> +g_assert_cmpint(hbitmap_iter_next(), ==, 131071 << 7);
> +g_assert_cmpint(hbitmap_iter_next(), <, 0);
>  
>  hbitmap_iter_init(, data->hb, (L2 + L1 + 2) << 7);
> -g_assert_cmpint(hbitmap_iter_next(, true), ==, 131071 << 7);
> -g_assert_cmpint(hbitmap_iter_next(, true), <, 0);
> +g_assert_cmpint(hbitmap_iter_next(), ==, 131071 << 7);
> +g_assert_cmpint(hbitmap_iter_next(), <, 0);
>  }
>  
>  static void hbitmap_test_set_boundary_bits(TestHBitmapData *data, ssize_t 
> diff)
> @@ -893,7 +893,7 @@ static void test_hbitmap_serialize_zeroes(TestHBitmapData 
> *data,
>  for (i = 0; i < num_positions; i++) {
>  hbitmap_deserialize_zeroes(data->hb, positions[i], min_l1, true);
>  hbitmap_iter_init(, data->hb, 0);
> -next = hbitmap_iter_next(, 

Re: [Qemu-block] [Qemu-devel] [PATCH v3 5/8] block/mirror: fix and improve do_sync_target_write

2018-09-10 Thread John Snow



On 08/14/2018 08:14 AM, Vladimir Sementsov-Ogievskiy wrote:
> Use bdrv_dirty_bitmap_next_dirty_area() instead of
> bdrv_dirty_iter_next_area(), because of the following problems of
> bdrv_dirty_iter_next_area():
> 
> 1. Using HBitmap iterators we should carefully handle unaligned offset,
> as first call to hbitmap_iter_next() may return a value less than
> original offset (actually, it will be original offset rounded down to
> bitmap granularity). This handling is not done in
> do_sync_target_write().
> 
> 2. bdrv_dirty_iter_next_area() handles unaligned max_offset
> incorrectly:
> 
> look at the code:
> if (max_offset == iter->bitmap->size) {
> /* If max_offset points to the image end, round it up by the
>  * bitmap granularity */
> gran_max_offset = ROUND_UP(max_offset, granularity);
> } else {
> gran_max_offset = max_offset;
> }
> 
> ret = hbitmap_iter_next(>hbi, false);
> if (ret < 0 || ret + granularity > gran_max_offset) {
> return false;
> }
> 
> and assume that max_offset != iter->bitmap->size but still unaligned.
> if 0 < ret < max_offset we found dirty area, but the function can
> return false in this case (if ret + granularity > max_offset).
> 
> 3. bdrv_dirty_iter_next_area() uses inefficient loop to find the end of
> the dirty area. Let's use more efficient hbitmap_next_zero instead
> (bdrv_dirty_bitmap_next_dirty_area() do so)
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  block/mirror.c | 16 +++-
>  1 file changed, 7 insertions(+), 9 deletions(-)
> 
> diff --git a/block/mirror.c b/block/mirror.c
> index b48c3f8cf5..d2806812c8 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -1167,25 +1167,22 @@ static void do_sync_target_write(MirrorBlockJob *job, 
> MirrorMethod method,
>   uint64_t offset, uint64_t bytes,
>   QEMUIOVector *qiov, int flags)
>  {
> -BdrvDirtyBitmapIter *iter;
>  QEMUIOVector target_qiov;
> -uint64_t dirty_offset;
> -int dirty_bytes;
> +uint64_t dirty_offset = offset;
> +uint64_t dirty_bytes;
>  
>  if (qiov) {
>  qemu_iovec_init(_qiov, qiov->niov);
>  }
>  
> -iter = bdrv_dirty_iter_new(job->dirty_bitmap);
> -bdrv_set_dirty_iter(iter, offset);
> -
>  while (true) {
>  bool valid_area;
>  int ret;
>  
>  bdrv_dirty_bitmap_lock(job->dirty_bitmap);
> -valid_area = bdrv_dirty_iter_next_area(iter, offset + bytes,
> -   _offset, _bytes);
> +valid_area = bdrv_dirty_bitmap_next_dirty_area(
> +job->dirty_bitmap, _offset,
> +MIN(offset + bytes, dirty_offset + INT_MAX), _bytes);

Looks correct, though the invocation looks harder to read now. Might be
a sign that changing the signature is a good idea which I think
you're planning to do.

ACK.

>  if (!valid_area) {
>  bdrv_dirty_bitmap_unlock(job->dirty_bitmap);
>  break;
> @@ -1241,9 +1238,10 @@ static void do_sync_target_write(MirrorBlockJob *job, 
> MirrorMethod method,
>  break;
>  }
>  }
> +
> +dirty_offset += dirty_bytes;
>  }
>  
> -bdrv_dirty_iter_free(iter);
>  if (qiov) {
>  qemu_iovec_destroy(_qiov);
>  }
> 



Re: [Qemu-block] [Qemu-devel] [PATCH v2 1/4] drity-bitmap: switch assert-fails to errors in bdrv_merge_dirty_bitmap

2018-09-10 Thread John Snow
s/drity-bitmap/dirty-bitmap/g

On 07/06/2018 07:36 AM, Vladimir Sementsov-Ogievskiy wrote:
> Move checks from qmp_x_block_dirty_bitmap_merge() to
> bdrv_merge_dirty_bitmap(), to share them with dirty bitmap merge
> transaction action in future commit.
> 
> Note: for now, only qmp_x_block_dirty_bitmap_merge() calls
> bdrv_merge_dirty_bitmap().
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  block/dirty-bitmap.c | 15 +--
>  blockdev.c   | 10 --
>  2 files changed, 13 insertions(+), 12 deletions(-)
> 
> diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
> index c9b8a6fd52..6c8761e027 100644
> --- a/block/dirty-bitmap.c
> +++ b/block/dirty-bitmap.c
> @@ -798,12 +798,23 @@ void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, 
> const BdrvDirtyBitmap *src,
>  
>  qemu_mutex_lock(dest->mutex);
>  
> -assert(bdrv_dirty_bitmap_enabled(dest));
> -assert(!bdrv_dirty_bitmap_readonly(dest));
> +if (bdrv_dirty_bitmap_frozen(dest)) {
> +error_setg(errp, "Bitmap '%s' is frozen and cannot be modified",
> +   dest->name);
> +goto out;
> +}
> +
> +if (bdrv_dirty_bitmap_readonly(dest)) {
> +error_setg(errp, "Bitmap '%s' is readonly and cannot be modified",
> +   dest->name);
> +goto out;
> +}
>  
>  if (!hbitmap_merge(dest->bitmap, src->bitmap)) {
>  error_setg(errp, "Bitmaps are incompatible and can't be merged");
> +goto out;

This is unusual right before the out: segment, but harmless.

>  }
>  
> +out:
>  qemu_mutex_unlock(dest->mutex);
>  }
> diff --git a/blockdev.c b/blockdev.c
> index 72f5347df5..902338e815 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -2955,16 +2955,6 @@ void qmp_x_block_dirty_bitmap_merge(const char *node, 
> const char *dst_name,
>  return;
>  }
>  
> -if (bdrv_dirty_bitmap_frozen(dst)) {
> -error_setg(errp, "Bitmap '%s' is frozen and cannot be modified",
> -   dst_name);
> -return;
> -} else if (bdrv_dirty_bitmap_readonly(dst)) {
> -error_setg(errp, "Bitmap '%s' is readonly and cannot be modified",
> -   dst_name);
> -return;
> -}
> -
>  src = bdrv_find_dirty_bitmap(bs, src_name);
>  if (!src) {
>  error_setg(errp, "Dirty bitmap '%s' not found", src_name);
> 

When fixing the commit title, we can add:

Reviewed-by: John Snow 



Re: [Qemu-block] [Qemu-devel] [PATCH v3 4/8] tests: add tests for hbitmap_next_dirty_area

2018-09-10 Thread John Snow



On 08/14/2018 08:14 AM, Vladimir Sementsov-Ogievskiy wrote:
> Signed-off-by: Vladimir Sementsov-Ogievskiy 

ACK, and
Tested-by: John Snow 

> ---
>  tests/test-hbitmap.c | 106 
> +++
>  1 file changed, 106 insertions(+)
> 
> diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
> index dddb67c3c5..af5142b481 100644
> --- a/tests/test-hbitmap.c
> +++ b/tests/test-hbitmap.c
> @@ -1016,6 +1016,105 @@ static void test_hbitmap_next_zero_4(TestHBitmapData 
> *data, const void *unused)
>  test_hbitmap_next_zero_do(data, 4);
>  }
>  
> +static void test_hbitmap_next_dirty_area_check(TestHBitmapData *data,
> +   uint64_t offset,
> +   uint64_t count)
> +{
> +uint64_t off1, off2;
> +uint64_t len1 = 0, len2 = 0;
> +bool ret1, ret2;
> +int64_t end;
> +
> +off1 = offset;
> +end = count ? offset + count : 0;
> +ret1 = hbitmap_next_dirty_area(data->hb, , end, );
> +
> +end = count ? offset + count : data->size;
> +
> +for (off2 = offset; off2 < end && !hbitmap_get(data->hb, off2); off2++) {
> +;
> +}
> +
> +for (len2 = 1; off2 + len2 < end && hbitmap_get(data->hb, off2 + len2);
> + len2++)
> +{
> +}
> +
> +ret2 = off2 < end;
> +if (!ret2) {
> +/* leave unchanged */
> +off2 = offset;
> +len2 = 0;
> +}
> +
> +g_assert_cmpint(ret1, ==, ret2);
> +g_assert_cmpint(off1, ==, off2);
> +g_assert_cmpint(len1, ==, len2);
> +}
> +
> +static void test_hbitmap_next_dirty_area_do(TestHBitmapData *data,
> +int granularity)
> +{
> +hbitmap_test_init(data, L3, granularity);
> +test_hbitmap_next_dirty_area_check(data, 0, 0);
> +test_hbitmap_next_dirty_area_check(data, 0, 1);
> +test_hbitmap_next_dirty_area_check(data, L3 - 1, 1);
> +
> +hbitmap_set(data->hb, L2, 1);
> +test_hbitmap_next_dirty_area_check(data, 0, 1);
> +test_hbitmap_next_dirty_area_check(data, 0, L2);
> +test_hbitmap_next_dirty_area_check(data, 0, 0);
> +test_hbitmap_next_dirty_area_check(data, L2 - 1, 0);
> +test_hbitmap_next_dirty_area_check(data, L2 - 1, 1);
> +test_hbitmap_next_dirty_area_check(data, L2 - 1, 2);
> +test_hbitmap_next_dirty_area_check(data, L2 - 1, 3);
> +test_hbitmap_next_dirty_area_check(data, L2, 0);
> +test_hbitmap_next_dirty_area_check(data, L2, 1);
> +test_hbitmap_next_dirty_area_check(data, L2 + 1, 1);
> +
> +hbitmap_set(data->hb, L2 + 5, L1);
> +test_hbitmap_next_dirty_area_check(data, 0, 0);
> +test_hbitmap_next_dirty_area_check(data, L2 - 2, 8);
> +test_hbitmap_next_dirty_area_check(data, L2 + 1, 5);
> +test_hbitmap_next_dirty_area_check(data, L2 + 1, 3);
> +test_hbitmap_next_dirty_area_check(data, L2 + 4, L1);
> +test_hbitmap_next_dirty_area_check(data, L2 + 5, L1);
> +test_hbitmap_next_dirty_area_check(data, L2 + 7, L1);
> +test_hbitmap_next_dirty_area_check(data, L2 + L1, L1);
> +test_hbitmap_next_dirty_area_check(data, L2, 0);
> +test_hbitmap_next_dirty_area_check(data, L2 + 1, 0);
> +
> +hbitmap_set(data->hb, L2 * 2, L3 - L2 * 2);
> +test_hbitmap_next_dirty_area_check(data, 0, 0);
> +test_hbitmap_next_dirty_area_check(data, L2, 0);
> +test_hbitmap_next_dirty_area_check(data, L2 + 1, 0);
> +test_hbitmap_next_dirty_area_check(data, L2 + 5 + L1 - 1, 0);
> +test_hbitmap_next_dirty_area_check(data, L2 + 5 + L1, 5);
> +test_hbitmap_next_dirty_area_check(data, L2 * 2 - L1, L1 + 1);
> +test_hbitmap_next_dirty_area_check(data, L2 * 2, L2);
> +
> +hbitmap_set(data->hb, 0, L3);
> +test_hbitmap_next_dirty_area_check(data, 0, 0);
> +}
> +
> +static void test_hbitmap_next_dirty_area_0(TestHBitmapData *data,
> +   const void *unused)
> +{
> +test_hbitmap_next_dirty_area_do(data, 0);
> +}
> +
> +static void test_hbitmap_next_dirty_area_1(TestHBitmapData *data,
> +   const void *unused)
> +{
> +test_hbitmap_next_dirty_area_do(data, 1);
> +}
> +
> +static void test_hbitmap_next_dirty_area_4(TestHBitmapData *data,
> +   const void *unused)
> +{
> +test_hbitmap_next_dirty_area_do(data, 4);
> +}
> +
>  int main(int argc, char **argv)
>  {
>  g_test_init(, , NULL);
> @@ -1082,6 +1181,13 @@ int main(int argc, char **argv)
>  hbitmap_test_add("/hbitmap/next_zero/next_zero_4",
>   test_hbitmap_next_zero_4);
>  
> +hbitmap_test_add("/hbitmap/next_dirty_area/next_dirty_area_0",
> + test_hbitmap_next_dirty_area_0);
> +hbitmap_test_add("/hbitmap/next_dirty_area/next_dirty_area_1",
> + test_hbitmap_next_dirty_area_1);
> +hbitmap_test_add("/hbitmap/next_dirty_area/next_dirty_area_4",
> + 

Re: [Qemu-block] [Qemu-devel] [PATCH v3 3/8] dirty-bitmap: add bdrv_dirty_bitmap_next_dirty_area

2018-09-10 Thread John Snow



On 08/14/2018 08:14 AM, Vladimir Sementsov-Ogievskiy wrote:
> The function alters bdrv_dirty_iter_next_area(), which is wrong and
> less efficient (see further commit
> "block/mirror: fix and improve do_sync_target_write" for description).
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  include/block/dirty-bitmap.h |  3 +++
>  include/qemu/hbitmap.h   | 15 +++
>  block/dirty-bitmap.c |  7 +++
>  util/hbitmap.c   | 38 ++
>  4 files changed, 63 insertions(+)
> 
> diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
> index 27f5299c4e..cb9162fa7e 100644
> --- a/include/block/dirty-bitmap.h
> +++ b/include/block/dirty-bitmap.h
> @@ -100,6 +100,9 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState 
> *bs,
>  char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp);
>  int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, uint64_t start,
>  int64_t end);
> +bool bdrv_dirty_bitmap_next_dirty_area(BdrvDirtyBitmap *bitmap,
> +   uint64_t *offset, uint64_t end,
> +   uint64_t *length);
>  BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BlockDriverState *bs,
>BdrvDirtyBitmap *bitmap,
>Error **errp);
> diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
> index fe4dfde27a..7800317bf3 100644
> --- a/include/qemu/hbitmap.h
> +++ b/include/qemu/hbitmap.h
> @@ -306,6 +306,21 @@ unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi);
>   */
>  int64_t hbitmap_next_zero(const HBitmap *hb, uint64_t start, int64_t end);
>  
> +/* hbitmap_next_dirty_area:
> + * @hb: The HBitmap to operate on
> + * @offset: in-out parameter.
> + *  in: the offset to start from
> + *  out: (if area found) start of found area
> + * @end: end of requested region. (*@offset + *@length) will be <= @end
> + * @length: length of found area
> + *
> + * If dirty area found within [@offset, @end), returns true and sets @offset
> + * and @length appropriately. Otherwise returns true and leaves @offset and
> + * @length unchanged.

It returns true in both cases? I think you mean 'false' the second time.

> + */
> +bool hbitmap_next_dirty_area(const HBitmap *hb, uint64_t *offset,
> + uint64_t end, uint64_t *length);
> +
>  /* hbitmap_create_meta:
>   * Create a "meta" hbitmap to track dirtiness of the bits in this HBitmap.
>   * The caller owns the created bitmap and must call hbitmap_free_meta(hb) to
> diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
> index 037ae62726..4af20a1beb 100644
> --- a/block/dirty-bitmap.c
> +++ b/block/dirty-bitmap.c
> @@ -791,6 +791,13 @@ int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap 
> *bitmap, uint64_t offset,
>  return hbitmap_next_zero(bitmap->bitmap, offset, end);
>  }
>  
> +bool bdrv_dirty_bitmap_next_dirty_area(BdrvDirtyBitmap *bitmap,
> +   uint64_t *offset, uint64_t end,
> +   uint64_t *length)
> +{
> +return hbitmap_next_dirty_area(bitmap->bitmap, offset, end, length);
> +}
> +
>  void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap 
> *src,
>   Error **errp)
>  {
> diff --git a/util/hbitmap.c b/util/hbitmap.c
> index 1687372504..bf88c1223e 100644
> --- a/util/hbitmap.c
> +++ b/util/hbitmap.c
> @@ -244,6 +244,44 @@ int64_t hbitmap_next_zero(const HBitmap *hb, uint64_t 
> start, int64_t end)
>  return res;
>  }
>  
> +bool hbitmap_next_dirty_area(const HBitmap *hb, uint64_t *offset,
> + uint64_t end, uint64_t *length)
> +{
> +HBitmapIter hbi;
> +int64_t off1, off0;

Can we have more descriptive names for these?

> +uint32_t granularity = 1UL << hb->granularity;
> +
> +if (end == 0) {
> +end = hb->orig_size;
> +}
> +
> +hbitmap_iter_init(, hb, *offset);

Even with a new iterator in every call, this is still faster than the
old one? ... I guess the old one uses a new iterator for every call, too.

Well, this does look more straightforward and we don't loop as much, so
it seems right.

> +off1 = hbitmap_iter_next(, true);
> +
> +if (off1 < 0 || off1 >= end) {
> +return false;
> +}
> +
> +if (off1 + granularity >= end) {
> +if (off1 > *offset) {
> +*offset = off1;
> +}
> +*length = end - *offset;
> +return true;
> +}
> +
> +off0 = hbitmap_next_zero(hb, off1 + granularity, end);
> +if (off0 < 0) {
> +off0 = end;
> +}
> +
> +if (off1 > *offset) {
> +*offset = off1;
> +}
> +*length = off0 - *offset;
> +return true;
> +}
> +

I might have consolidated setting the output parameters to one block
instead of 

Re: [Qemu-block] [PATCH 0/4 for-3.0?] NBD fixes for unaligned images

2018-09-10 Thread Eric Blake

On 9/10/18 2:25 PM, John Snow wrote:

Hi, has this series been superseded or do we need a respin for 3.1?


Needs a respin.

There are two issues still to be solved:

1. Asking the top-most block layer for its alignment isn't necessarily 
right for qemu as server.  If we have:


backing (512) <- active (4k)

and tell the client that they can't access anything smaller than 4k 
granularity, but then read through to the backing layer which does just 
that, then we're wrong.  Either the block layer has to be beefed up to 
make bdrv_block_status() round backing file's smaller granularity into 
the size we advertised (perhaps by adding a flag to bdrv_block_status() 
to declare whether we prefer the most accurate information vs. the 
rounded information) - or else qemu as NBD server should ALWAYS 
advertise a blocksize of 1-512 (1 because we can, even if by 
read-modify-write; or 512 because except for EOF issues on a 
non-sector-aligned file, we don't encounter mid-sector transitions 
anywhere else, and EOF is easier to special case than everywhere).


2. Patch 5 fixes qemu as client to not round valid requests past EOF, 
but does not fix the case of a request that intentionally exceeds EOF 
but fits within the rounded file size from reaching the server. Either 
the NBD client code should itself perform EOF validation (reads from the 
EOF hole see zero, writes of anything other than zero fail with ENOSPC), 
or the NBD client code should round the server's file size down instead 
of up.  I haven't decided which approach is better.


But right now, these fixes are taking a back seat to me trying to get a 
libvirt incremental backup demo working.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



Re: [Qemu-block] [PATCH 0/4 for-3.0?] NBD fixes for unaligned images

2018-09-10 Thread John Snow
Hi, has this series been superseded or do we need a respin for 3.1?

(It's not quite been a month, but it caught my eye.)

--js

On 08/02/2018 10:48 AM, Eric Blake wrote:
> Rich reported a bug when using qemu as client to nbdkit serving
> a non-sector-aligned image; in turn, I found a second bug with
> qemu as server of such an image.
> 
> Both bugs were present in 2.12, and thus are not new regressions
> in 3.0. If there is a reason to spin -rc4, then these could be
> included; but this series alone is not a driving reason to cause
> -rc4.
> 
> Eric Blake (4):
>   block: Add bdrv_get_request_alignment()
>   nbd/server: Advertise actual minimum block size
>   iotests: Add 228 to test NBD on unaligned images
>   nbd/client: Deal with unaligned size from server
> 
>  include/sysemu/block-backend.h |  1 +
>  block/block-backend.c  |  7 +++
>  block/nbd.c| 11 -
>  nbd/server.c   | 10 +++--
>  tests/qemu-iotests/228 | 96 
> ++
>  tests/qemu-iotests/228.out |  8 
>  tests/qemu-iotests/group   |  1 +
>  7 files changed, 129 insertions(+), 5 deletions(-)
>  create mode 100755 tests/qemu-iotests/228
>  create mode 100644 tests/qemu-iotests/228.out
> 



Re: [Qemu-block] [PATCH v2] qcow2: Release dirty entries with cache-clean-interval

2018-09-10 Thread John Snow



On 08/09/2018 09:44 AM, Alberto Garcia wrote:
> The cache-clean-interval option is used to periodically release unused
> entries from the L2 and refcount caches. Dirty cache entries are left
> untouched, even if they are otherwise valid candidates for removal.
> 
> This patch allows releasing those entries by flushing them to disk
> first. This is a blocking operation, so we need to do it in coroutine
> context.
> 
> Signed-off-by: Alberto Garcia 

This hit over a month old today, do we still want this for 3.1?
--js

> ---
>  block/qcow2-cache.c | 26 --
>  block/qcow2.c   | 15 ---
>  block/qcow2.h   |  2 +-
>  3 files changed, 33 insertions(+), 10 deletions(-)
> 
> diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
> index d9dafa31e5..33207eca9b 100644
> --- a/block/qcow2-cache.c
> +++ b/block/qcow2-cache.c
> @@ -46,6 +46,8 @@ struct Qcow2Cache {
>  uint64_tcache_clean_lru_counter;
>  };
>  
> +static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int 
> i);
> +
>  static inline void *qcow2_cache_get_table_addr(Qcow2Cache *c, int table)
>  {
>  return (uint8_t *) c->table_array + (size_t) table * c->table_size;
> @@ -86,26 +88,36 @@ static void qcow2_cache_table_release(Qcow2Cache *c, int 
> i, int num_tables)
>  #endif
>  }
>  
> -static inline bool can_clean_entry(Qcow2Cache *c, int i)
> +static inline bool can_clean_entry(BlockDriverState *bs, Qcow2Cache *c, int 
> i)
>  {
>  Qcow2CachedTable *t = >entries[i];
> -return t->ref == 0 && !t->dirty && t->offset != 0 &&
> -t->lru_counter <= c->cache_clean_lru_counter;
> +if (t->ref || !t->offset || t->lru_counter > c->cache_clean_lru_counter) 
> {
> +return false;
> +}
> +
> +if (qcow2_cache_entry_flush(bs, c, i) < 0) {
> +return false;
> +}
> +
> +return true;
>  }
>  
> -void qcow2_cache_clean_unused(Qcow2Cache *c)
> +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
>  {
>  int i = 0;
> +
> +bdrv_inc_in_flight(bs);
> +
>  while (i < c->size) {
>  int to_clean = 0;
>  
>  /* Skip the entries that we don't need to clean */
> -while (i < c->size && !can_clean_entry(c, i)) {
> +while (i < c->size && !can_clean_entry(bs, c, i)) {
>  i++;
>  }
>  
>  /* And count how many we can clean in a row */
> -while (i < c->size && can_clean_entry(c, i)) {
> +while (i < c->size && can_clean_entry(bs, c, i)) {
>  c->entries[i].offset = 0;
>  c->entries[i].lru_counter = 0;
>  i++;
> @@ -118,6 +130,8 @@ void qcow2_cache_clean_unused(Qcow2Cache *c)
>  }
>  
>  c->cache_clean_lru_counter = c->lru_counter;
> +
> +bdrv_dec_in_flight(bs);
>  }
>  
>  Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
> diff --git a/block/qcow2.c b/block/qcow2.c
> index ec9e6238a0..2fcb41b12f 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -728,16 +728,25 @@ static const char 
> *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
>  [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY,
>  };
>  
> -static void cache_clean_timer_cb(void *opaque)
> +static void coroutine_fn cache_co_clean_unused(void *opaque)
>  {
>  BlockDriverState *bs = opaque;
>  BDRVQcow2State *s = bs->opaque;
> -qcow2_cache_clean_unused(s->l2_table_cache);
> -qcow2_cache_clean_unused(s->refcount_block_cache);
> +
> +qemu_co_mutex_lock(>lock);
> +qcow2_cache_clean_unused(bs, s->l2_table_cache);
> +qcow2_cache_clean_unused(bs, s->refcount_block_cache);
> +qemu_co_mutex_unlock(>lock);
> +
>  timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
>(int64_t) s->cache_clean_interval * 1000);
>  }
>  
> +static void cache_clean_timer_cb(void *opaque)
> +{
> +qemu_coroutine_enter(qemu_coroutine_create(cache_co_clean_unused, 
> opaque));
> +}
> +
>  static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
>  {
>  BDRVQcow2State *s = bs->opaque;
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 81b844e936..e8b390ba4b 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -659,7 +659,7 @@ int qcow2_cache_set_dependency(BlockDriverState *bs, 
> Qcow2Cache *c,
>  Qcow2Cache *dependency);
>  void qcow2_cache_depends_on_flush(Qcow2Cache *c);
>  
> -void qcow2_cache_clean_unused(Qcow2Cache *c);
> +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c);
>  int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
>  
>  int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
> 




Re: [Qemu-block] [Qemu-devel] [PATCH v3 1/8] dirty-bitmap: improve bdrv_dirty_bitmap_next_zero

2018-09-10 Thread Eric Blake

On 9/10/18 11:49 AM, Vladimir Sementsov-Ogievskiy wrote:


-int64_t hbitmap_next_zero(const HBitmap *hb, uint64_t start);
+int64_t hbitmap_next_zero(const HBitmap *hb, uint64_t start, int64_t 
end);

The interface looks weird because we can define a 'start' that's beyond
the 'end'.

I realize that you need a signed integer for 'end' to signify EOF...
should we do a 'bytes' parameter instead? (Did you already do that in an
earlier version and we changed it?)

Well, it's not a big deal to me personally.


interface with constant end parameter is more comfortable for loop: we 
don't need to update 'bytes' parameter on each iteration


But there's still the question of WHO should be calculating end. Your 
interface argues for the caller:


hbitmap_next_zero(start, start + bytes)

int64_t hbitmap_next_zero(...)
{
while (offset != end) ...
}

while we're asking about a consistent interface for the caller (if most 
callers already have a 'bytes' rather than an 'end' computed):


hbitmap_next_zero(start, bytes)

int64_t hbitmap_next_zero(...)
{
int64_t end = start + bytes;
while (offset != end) ...
}

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



Re: [Qemu-block] [Qemu-devel] [PATCH v3 1/8] dirty-bitmap: improve bdrv_dirty_bitmap_next_zero

2018-09-10 Thread Vladimir Sementsov-Ogievskiy

10.09.2018 19:55, Eric Blake wrote:

On 9/10/18 11:49 AM, Vladimir Sementsov-Ogievskiy wrote:


-int64_t hbitmap_next_zero(const HBitmap *hb, uint64_t start);
+int64_t hbitmap_next_zero(const HBitmap *hb, uint64_t start, 
int64_t end);

The interface looks weird because we can define a 'start' that's beyond
the 'end'.

I realize that you need a signed integer for 'end' to signify EOF...
should we do a 'bytes' parameter instead? (Did you already do that 
in an

earlier version and we changed it?)

Well, it's not a big deal to me personally.


interface with constant end parameter is more comfortable for loop: 
we don't need to update 'bytes' parameter on each iteration


But there's still the question of WHO should be calculating end. Your 
interface argues for the caller:


hbitmap_next_zero(start, start + bytes)

int64_t hbitmap_next_zero(...)
{
    while (offset != end) ...
}

while we're asking about a consistent interface for the caller (if 
most callers already have a 'bytes' rather than an 'end' computed):


hbitmap_next_zero(start, bytes)

int64_t hbitmap_next_zero(...)
{
    int64_t end = start + bytes;
    while (offset != end) ...
}



Yes, that's an issue. Ok, if you are not comfortable with start,end, I 
can switch to start,bytes.


--
Best regards,
Vladimir




Re: [Qemu-block] [PATCH for-3.1 v10 00/31] block: Fix some filename generation issues

2018-09-10 Thread Max Reitz
On 10.09.18 17:18, Kevin Wolf wrote:
> Am 09.08.2018 um 23:34 hat Max Reitz geschrieben:
>> Once more, I’ll spare both me and you another iteration of the cover
>> letter, so see here:
>>
>> http://lists.nongnu.org/archive/html/qemu-block/2017-09/msg01030.html
>>
>> (Although this series no longer includes a @base-directory option.)
>>
>> In regards to the last version, the biggest change is that I dropped
>> backing_overridden and instead try to compare the filename from the
>> image header with the filename of the actual backing BDS to find out
>> whether the backing file has been overridden.
>>
>> In order that this doesn’t break whenever the header contains a slightly
>> unusual (“non-canonical”) backing filename (e.g. “file:foo.qcow2” or
>> “nbd:localhost:10809” instead of “nbd://localhost:10809”, i.e. something
>> different from what bdrv_refresh_filename() would generate), when the
>> reference filename in the BDS (auto_backing_file) is used to open the
>> backing file, it is updated from the backing BDS's resulting filename.
> 
> This doesn't seem to apply cleanly any more.

I know, but is that a "Please send your current v11 to the list"?

Max



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [Qemu-devel] [PATCH v3 1/8] dirty-bitmap: improve bdrv_dirty_bitmap_next_zero

2018-09-10 Thread Vladimir Sementsov-Ogievskiy

08.09.2018 00:49, John Snow wrote:


On 08/14/2018 08:14 AM, Vladimir Sementsov-Ogievskiy wrote:

Add bytes parameter to the function, to limit searched range.


I'm going to assume that Eric Blake has been through here and commented
on the interface itself.


Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  include/block/dirty-bitmap.h |  3 ++-
  include/qemu/hbitmap.h   | 10 --
  block/backup.c   |  2 +-
  block/dirty-bitmap.c |  5 +++--
  nbd/server.c |  2 +-
  tests/test-hbitmap.c |  2 +-
  util/hbitmap.c   | 25 -
  7 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 259bd27c40..27f5299c4e 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -98,7 +98,8 @@ bool bdrv_has_changed_persistent_bitmaps(BlockDriverState 
*bs);
  BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
  BdrvDirtyBitmap *bitmap);
  char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp);
-int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, uint64_t start);
+int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, uint64_t start,
+int64_t end);
  BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap_locked(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
Error **errp);
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index ddca52c48e..fe4dfde27a 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -295,10 +295,16 @@ unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi);
  /* hbitmap_next_zero:
   * @hb: The HBitmap to operate on
   * @start: The bit to start from.
+ * @end: End of range to search in. If @end is -1, search up to the bitmap
+ *   end.
   *
- * Find next not dirty bit.
+ * Find next not dirty bit within range [@start, @end), or from
+ * @start to the bitmap end if @end is -1. If not found, return -1.
+ *
+ * @end may be greater than original bitmap size, in this case, search up to

"original" bitmap size? I think that's just an implementation detail, we
can drop 'original' here, yes?


hm, no. we have field "size", which is not "original". But it all means 
that this place needs a bit more refactoring.





+ * the bitmap end.
   */
-int64_t hbitmap_next_zero(const HBitmap *hb, uint64_t start);
+int64_t hbitmap_next_zero(const HBitmap *hb, uint64_t start, int64_t end);
  

The interface looks weird because we can define a 'start' that's beyond
the 'end'.

I realize that you need a signed integer for 'end' to signify EOF...
should we do a 'bytes' parameter instead? (Did you already do that in an
earlier version and we changed it?)

Well, it's not a big deal to me personally.


interface with constant end parameter is more comfortable for loop: we 
don't need to update 'bytes' parameter on each iteration





  /* hbitmap_create_meta:
   * Create a "meta" hbitmap to track dirtiness of the bits in this HBitmap.
diff --git a/block/backup.c b/block/backup.c
index 8630d32926..9bfd3f7189 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -458,7 +458,7 @@ static void 
backup_incremental_init_copy_bitmap(BackupBlockJob *job)
  break;
  }
  
-offset = bdrv_dirty_bitmap_next_zero(job->sync_bitmap, offset);

+offset = bdrv_dirty_bitmap_next_zero(job->sync_bitmap, offset, -1);
  if (offset == -1) {
  hbitmap_set(job->copy_bitmap, cluster, end - cluster);
  break;
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index c9b8a6fd52..037ae62726 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -785,9 +785,10 @@ char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap 
*bitmap, Error **errp)
  return hbitmap_sha256(bitmap->bitmap, errp);
  }
  
-int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, uint64_t offset)

+int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, uint64_t offset,
+int64_t end)
  {
-return hbitmap_next_zero(bitmap->bitmap, offset);
+return hbitmap_next_zero(bitmap->bitmap, offset, end);
  }
  
  void bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const BdrvDirtyBitmap *src,

diff --git a/nbd/server.c b/nbd/server.c
index ea5fe0eb33..07920d123b 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1952,7 +1952,7 @@ static unsigned int bitmap_to_extents(BdrvDirtyBitmap 
*bitmap, uint64_t offset,
  assert(begin < overall_end && nb_extents);
  while (begin < overall_end && i < nb_extents) {
  if (dirty) {
-end = bdrv_dirty_bitmap_next_zero(bitmap, begin);
+end = bdrv_dirty_bitmap_next_zero(bitmap, begin, -1);
  } else {
  bdrv_set_dirty_iter(it, begin);
  end = 

Re: [Qemu-block] [Qemu-devel] [PATCH] qemu-iotests: Test snapshot=on with nonexistent TMPDIR

2018-09-10 Thread Alberto Garcia
On Mon 10 Sep 2018 05:55:15 PM CEST, Eric Blake wrote:
>> Hm, it actually doesn't crash for me even without the fix. Anyway, I
>> don't have a good idea to make it more likely to crash and it's
>> certainly better than nothing.
>
> Does running the test under valgrind reliably see the use-after-free?

Good question! :-)

Unfortunately valgrind also needs a valid TMPDIR, so if you change it in
order to reproduce the bug then valgrind won't run.

I don't know if there's a way to tell valgrind to run the specified
program with its own environment variables, but you can simply edit
QEMU's get_tmp_filename() to always return an invalid directory, and
then you get the expected result:

 Invalid read of size 8 
at 0x859914: qobject_unref_impl (qobject.h:98)
by 0x85F8EA: bdrv_open_inherit (block.c:2831)
by 0x85F963: bdrv_open (block.c:2839)
by 0x8BDD19: blk_new_open (block-backend.c:375)
by 0x58A88A: blockdev_init (blockdev.c:599)
by 0x58B6C4: drive_new (blockdev.c:990)
by 0x59C004: drive_init_func (vl.c:1143)
by 0x9A0CE3: qemu_opts_foreach (qemu-option.c:1106)
by 0x5A4692: main (vl.c:4454)
  Address 0x1df67458 is 8 bytes inside a block of size 4,120 free'd
at 0x4C2CDDB: free (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
by 0x97AD5F: qdict_destroy_obj (qdict.c:459)
by 0x97CC04: qobject_destroy (qobject.c:41)
by 0x85996F: qobject_unref_impl (qobject.h:100)
by 0x85F6D4: bdrv_open_inherit (block.c:2794)
by 0x85F963: bdrv_open (block.c:2839)
  [...]

Berto



Re: [Qemu-block] [PATCH for-3.1 v10 04/31] block: Add BDS.auto_backing_file

2018-09-10 Thread Alberto Garcia
On Fri 07 Sep 2018 02:42:53 PM CEST, Max Reitz wrote:
> The whole purpose of bs->auto_backing_file is so you can compare it
> with bs->backing->bs->filename, see when it differs, and then you know
> that the user has changed the backing file from what it would be based
> on the image header alone (that is, if the user hadn't specified any
> @backing option at all).
  [...]
> (Now you could ask yourself, if we know the user hasn't specified any
> options to override the backing file -- why don't we just store a
> flag?  I tried that, Kevin wasn't really happy about it because it's
> hard to catch all corner cases.  There are many ways to change a
> backing file, and if you want to find out when a backing file has been
> changed to exactly what the image header says (this is the ideal
> outcome of the commit block job, for instance), you basically get back
> to the same problem of comparing filenames.)

Ok, I think this clarifies a lot. Thanks!

Berto



Re: [Qemu-block] [Qemu-devel] [PATCH v3 1/8] dirty-bitmap: improve bdrv_dirty_bitmap_next_zero

2018-09-10 Thread John Snow



On 09/10/2018 10:59 AM, Eric Blake wrote:
> On 9/7/18 4:49 PM, John Snow wrote:
>>
>>
>> On 08/14/2018 08:14 AM, Vladimir Sementsov-Ogievskiy wrote:
>>> Add bytes parameter to the function, to limit searched range.
>>>
>>
>> I'm going to assume that Eric Blake has been through here and commented
>> on the interface itself.
> 
> Actually, I haven't had time to look at this series in depth. Do you
> need me to?
> 

Not necessarily, it's just that I didn't read v1 or v2 so I was just
being cautious against recommending changes that maybe we already
recommended against in a different direction.

Historically you've cared the most about start/end/offset/bytes naming
and conventions, so I just made an assumption.

--js

>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy 
>>> ---
>>>   include/block/dirty-bitmap.h |  3 ++-
>>>   include/qemu/hbitmap.h   | 10 --
>>>   block/backup.c   |  2 +-
>>>   block/dirty-bitmap.c |  5 +++--
>>>   nbd/server.c |  2 +-
>>>   tests/test-hbitmap.c |  2 +-
>>>   util/hbitmap.c   | 25 -
>>>   7 files changed, 36 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
>>> index 259bd27c40..27f5299c4e 100644
>>> --- a/include/block/dirty-bitmap.h
>>> +++ b/include/block/dirty-bitmap.h
>>> @@ -98,7 +98,8 @@ bool
>>> bdrv_has_changed_persistent_bitmaps(BlockDriverState *bs);
>>>   BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
>>>   BdrvDirtyBitmap *bitmap);
>>>   char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error
>>> **errp);
>>> -int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap,
>>> uint64_t start);
>>> +int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap,
>>> uint64_t start,
>>> +    int64_t end);
> 
> It's already seeming a bit odd to mix uint64_t AND int64_t for the two
> parameters.  Is the intent to allow -1 to mean "end of the bitmap
> instead of a specific end range"? But you can get that with UINT64_MAX
> just as easily, and still get away with spelling it -1 in the source.
> 
> 
>>> + * the bitmap end.
>>>    */
>>> -int64_t hbitmap_next_zero(const HBitmap *hb, uint64_t start);
>>> +int64_t hbitmap_next_zero(const HBitmap *hb, uint64_t start, int64_t
>>> end);
>>>   
>>
>> The interface looks weird because we can define a 'start' that's beyond
>> the 'end'.
>>
>> I realize that you need a signed integer for 'end' to signify EOF...
>> should we do a 'bytes' parameter instead? (Did you already do that in an
>> earlier version and we changed it?)
>>
>> Well, it's not a big deal to me personally.
> 
> It should always be possible to convert in either direction between
> [start,end) and [start,start+bytes); it boils down to a question of
> convenience (which form is easier for the majority of callers) and
> consistency (which form do we use more frequently in the block layer). I
> haven't checked closely, but I think start+bytes is more common than end
> in our public block layer APIs.
> 



Re: [Qemu-block] [Qemu-devel] [PATCH] qemu-iotests: Test snapshot=on with nonexistent TMPDIR

2018-09-10 Thread Eric Blake

On 9/10/18 4:57 AM, Kevin Wolf wrote:

Am 10.09.2018 um 11:29 hat Alberto Garcia geschrieben:

We just fixed a bug that was causing a use-after-free when QEMU was
unable to create a temporary snapshot. This is a test case for this
scenario.

Signed-off-by: Alberto Garcia 


Hm, it actually doesn't crash for me even without the fix. Anyway, I
don't have a good idea to make it more likely to crash and it's
certainly better than nothing.


Does running the test under valgrind reliably see the use-after-free?

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



Re: [Qemu-block] [PATCH for-3.1 v10 00/31] block: Fix some filename generation issues

2018-09-10 Thread Kevin Wolf
Am 09.08.2018 um 23:34 hat Max Reitz geschrieben:
> Once more, I’ll spare both me and you another iteration of the cover
> letter, so see here:
> 
> http://lists.nongnu.org/archive/html/qemu-block/2017-09/msg01030.html
> 
> (Although this series no longer includes a @base-directory option.)
> 
> In regards to the last version, the biggest change is that I dropped
> backing_overridden and instead try to compare the filename from the
> image header with the filename of the actual backing BDS to find out
> whether the backing file has been overridden.
> 
> In order that this doesn’t break whenever the header contains a slightly
> unusual (“non-canonical”) backing filename (e.g. “file:foo.qcow2” or
> “nbd:localhost:10809” instead of “nbd://localhost:10809”, i.e. something
> different from what bdrv_refresh_filename() would generate), when the
> reference filename in the BDS (auto_backing_file) is used to open the
> backing file, it is updated from the backing BDS's resulting filename.

This doesn't seem to apply cleanly any more.

Kevin



Re: [Qemu-block] [PATCH v2] job: Fix nested aio_poll() hanging in job_txn_apply

2018-09-10 Thread Kevin Wolf
Am 24.08.2018 um 04:43 hat Fam Zheng geschrieben:
> All callers have acquired ctx already. Doing that again results in
> aio_poll() hang. This fixes the problem that a BDRV_POLL_WHILE() in the
> callback cannot make progress because ctx is recursively locked, for
> example, when drive-backup finishes.
> 
> There are two callers of job_finalize():
> 
> fam@lemon:~/work/qemu [master]$ git grep -w -A1 '^\s*job_finalize'
> blockdev.c:job_finalize(>job, errp);
> blockdev.c-aio_context_release(aio_context);
> --
> job-qmp.c:job_finalize(job, errp);
> job-qmp.c-aio_context_release(aio_context);
> --
> tests/test-blockjob.c:job_finalize(>job, _abort);
> tests/test-blockjob.c-assert(job->job.status == JOB_STATUS_CONCLUDED);
> 
> Ignoring the test, it's easy to see both callers to job_finalize (and
> job_do_finalize) have acquired the context.
> 
> Cc: qemu-sta...@nongnu.org
> Reported-by: Gu Nini 
> Reviewed-by: Eric Blake 
> Signed-off-by: Fam Zheng 

Thanks, applied to the block branch.

Kevin



Re: [Qemu-block] backend for blk or fs with guaranteed blocking/synchronous I/O

2018-09-10 Thread Artem Pisarenko
It looks like things are even worse. Guest demonstrates strange timings
even without access to anything external to machine. I've added Paolo
Bonzini to CC, because issue looks related to cpu/tcg/memory stuff.

I've written simple test script running parallel 'dd' utility processes
operating on files located in RAM. QEMU machine with multiple vCPUs.
Moreover, it have separate NUMA nodes for each vCPU.
Script in brief: it accepts argument with desired processes count, for each
process it mounts tmpfs, binded to node memory, and runs 'dd', binded both
to node cpu and memory, which copies files located on that tmpfs.
It's expected that overall execution time of N parallel processes (or speed
of copying) should always be the same, not depending on N value (of course,
provided that N <= nodes_count and 'dd' is single-threaded). Because it's
just as simple as simple loops of instructions just loading and storing
values in memory local to each CPU. No common resources should be involved
- neither software (such as some target OS lock/mutex), nor hardware (such
as memory bus). It should be almost ideal parallelization.
But it's not only degradates when increasing N, but even does it
proportionally !!! Same test running oh host machine (just multicore, no
NUMA) shows expected results: it has degradation (because of common memory
bus), but with non-linear dependency on N.

Script ("test.sh"):
#!/bin/bash
N=$1
# Preparation...
if command -v numactl >/dev/null; then
  USE_NUMA_BIND=1
else
  USE_NUMA_BIND=0
fi
for i in $(seq 0 $((N - 1)));
do
  mkdir -p /mnt/testmnt_$i
  if [[ "$USE_NUMA_BIND" == 1 ]] ; then
TMPFS_EXTRA_OPT=",mpol=bind:$i"; fi
  mount -t tmpfs -o
size=25M,noatime,nodiratime,norelatime$TMPFS_EXTRA_OPT tmpfs /mnt/testmnt_$i
  dd if=/dev/zero of=/mnt/testmnt_$i/testfile_r bs=10M count=1
>/dev/null 2>&1
done
# Running...
for i in $(seq 0 $((N - 1)));
do
  if [[ "$USE_NUMA_BIND" == 1 ]] ; then PREFIX_RUN="numactl
--cpunodebind=$i --membind=$i"; fi
  $PREFIX_RUN dd if=/mnt/testmnt_$i/testfile_r
of=/mnt/testmnt_$i/testfile_w bs=100 count=10 2>&1 | sed -n 's/^.*,
\(.*\)$/\1/p' &
done
# Cleanup...
wait
for i in $(seq 0 $((N - 1))); do umount /mnt/testmnt_$i; done
rm -rf /mnt/testmnt_*

Corresponding QEMU command line fragment:
"-machine accel=tcg -m 2048 -icount 1,sleep=off -rtc clock=vm -smp 10
-cpu qemu64 -numa node -numa node -numa node -numa node -numa node -numa
node -numa node -numa node -numa node -numa node"
(Removing -icount or numa nodes don't change results.)

Example runs on my Intel Core i7-7700 host (adequate results):
  artem@host:~$ sudo ./test.sh 1
  117 MB/s
  artem@host:~$ sudo ./test.sh 10
  91,1 MB/s
  89,3 MB/s
  90,4 MB/s
  85,0 MB/s
  68,7 MB/s
  63,1 MB/s
  62,0 MB/s
  55,9 MB/s
  54,1 MB/s
  56,0 MB/s

Example runs on my tiny linux x86_64 guest (strange results):
  root@guest:~# ./test.sh 1
  17.5 MB/s
  root@guest:~# ./test.sh 10
  3.2 MB/s
  2.7 MB/s
  2.6 MB/s
  2.0 MB/s
  2.0 MB/s
  1.9 MB/s
  1.8 MB/s
  1.8 MB/s
  1.8 MB/s
  1.8 MB/s

Please, explain these results. Or maybe I wrong and it's normal ?


чт, 6 сент. 2018 г. в 16:24, Artem Pisarenko :

> Hi all,
>
> I'm developing paravirtualized target linux system which runs multiple
> linux containers (LXC) inside itself. (For those, who unfamiliar with LXC,
> simply put, it's an isolated group of userspace processes with their own
> rootfs.) Each container should be provided access to its rootfs located at
> host and execution of container should be deterministic. Particularly, it
> means that container I/O operations must be synchronized within some
> predefined quantum of guest _virtual_ time, i.e. its I/O activity shouldn't
> be delayed by host performance or activities on host and other containers.
> In other words, guest should see it's like either infinite throughput and
> zero latency, or some predefined throughput/latency characteristics
> guaranteed per each rootfs.
>
> While other sources of non-determinism are seem to be eliminated (using
> TCG, -icount, etc.), asynchronous I/O still introduces it.
>
> What is scope of "(asynchronous) I/O" term within qemu? Is it something
> related to block devices layer only, or generic term, covering whole
> datapath between vCPU and backend?
> If it relates to block devices only, does usage of VirtFS guarantee
> deterministic access, or it still involves some asynchrony relative to
> guest virtual clock?
> Is it possible to force asynchronous I/O within qemu to be blocking by
> some external means (host OS configuration, hooks, etc.) ? I know, it may
> greatly slow down guest performance, but it's still better than nothing.
> Maybe some trivial patch can be made to qemu code at virtio, block backend
> or platform syscalls level?
> Maybe I/O automatically (and guaranteed) fallbacks to synchronous mode in
> some particular configurations, such as using block device with image
> 

Re: [Qemu-block] [Qemu-devel] [PATCH v3 1/8] dirty-bitmap: improve bdrv_dirty_bitmap_next_zero

2018-09-10 Thread Eric Blake

On 9/7/18 4:49 PM, John Snow wrote:



On 08/14/2018 08:14 AM, Vladimir Sementsov-Ogievskiy wrote:

Add bytes parameter to the function, to limit searched range.



I'm going to assume that Eric Blake has been through here and commented
on the interface itself.


Actually, I haven't had time to look at this series in depth. Do you 
need me to?





Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  include/block/dirty-bitmap.h |  3 ++-
  include/qemu/hbitmap.h   | 10 --
  block/backup.c   |  2 +-
  block/dirty-bitmap.c |  5 +++--
  nbd/server.c |  2 +-
  tests/test-hbitmap.c |  2 +-
  util/hbitmap.c   | 25 -
  7 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 259bd27c40..27f5299c4e 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -98,7 +98,8 @@ bool bdrv_has_changed_persistent_bitmaps(BlockDriverState 
*bs);
  BdrvDirtyBitmap *bdrv_dirty_bitmap_next(BlockDriverState *bs,
  BdrvDirtyBitmap *bitmap);
  char *bdrv_dirty_bitmap_sha256(const BdrvDirtyBitmap *bitmap, Error **errp);
-int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, uint64_t start);
+int64_t bdrv_dirty_bitmap_next_zero(BdrvDirtyBitmap *bitmap, uint64_t start,
+int64_t end);


It's already seeming a bit odd to mix uint64_t AND int64_t for the two 
parameters.  Is the intent to allow -1 to mean "end of the bitmap 
instead of a specific end range"? But you can get that with UINT64_MAX 
just as easily, and still get away with spelling it -1 in the source.




+ * the bitmap end.
   */
-int64_t hbitmap_next_zero(const HBitmap *hb, uint64_t start);
+int64_t hbitmap_next_zero(const HBitmap *hb, uint64_t start, int64_t end);
  


The interface looks weird because we can define a 'start' that's beyond
the 'end'.

I realize that you need a signed integer for 'end' to signify EOF...
should we do a 'bytes' parameter instead? (Did you already do that in an
earlier version and we changed it?)

Well, it's not a big deal to me personally.


It should always be possible to convert in either direction between 
[start,end) and [start,start+bytes); it boils down to a question of 
convenience (which form is easier for the majority of callers) and 
consistency (which form do we use more frequently in the block layer). I 
haven't checked closely, but I think start+bytes is more common than end 
in our public block layer APIs.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



[Qemu-block] [PATCH 1/2] virtio: Return true from virtio_queue_empty if broken

2018-09-10 Thread Fam Zheng
Both virtio-blk and virtio-scsi use virtio_queue_empty() as the
loop condition in VQ handlers (virtio_blk_handle_vq,
virtio_scsi_handle_cmd_vq). When a device is marked broken in
virtqueue_pop, for example if a vIOMMU address translation failed, we
want to break out of the loop.

This fixes a hanging problem when booting a CentOS 3.10.0-862.el7.x86_64
kernel with ATS enabled:

  $ qemu-system-x86_64 \
... \
-device intel-iommu,intremap=on,caching-mode=on,eim=on,device-iotlb=on \
-device virtio-scsi-pci,iommu_platform=on,ats=on,id=scsi0,bus=pci.4,addr=0x0

The dead loop happens immediately when the kernel boots and initializes
the device, where virtio_scsi_data_plane_handle_cmd will not return:

> ...
> #13 0x5586602b7793 in virtio_scsi_handle_cmd_vq
> #14 0x5586602b8d66 in virtio_scsi_data_plane_handle_cmd
> #15 0x5586602ddab7 in virtio_queue_notify_aio_vq
> #16 0x5586602dfc9f in virtio_queue_host_notifier_aio_poll
> #17 0x5586607885da in run_poll_handlers_once
> #18 0x55866078880e in try_poll_mode
> #19 0x5586607888eb in aio_poll
> #20 0x558660784561 in aio_wait_bh_oneshot
> #21 0x5586602b9582 in virtio_scsi_dataplane_stop
> #22 0x5586605a7110 in virtio_bus_stop_ioeventfd
> #23 0x5586605a9426 in virtio_pci_stop_ioeventfd
> #24 0x5586605ab808 in virtio_pci_common_write
> #25 0x558660242396 in memory_region_write_accessor
> #26 0x5586602425ab in access_with_adjusted_size
> #27 0x558660245281 in memory_region_dispatch_write
> #28 0x5586601e008e in flatview_write_continue
> #29 0x5586601e01d8 in flatview_write
> #30 0x5586601e04de in address_space_write
> #31 0x5586601e052f in address_space_rw
> #32 0x5586602607f2 in kvm_cpu_exec
> #33 0x558660227148 in qemu_kvm_cpu_thread_fn
> #34 0x55866078bde7 in qemu_thread_start
> #35 0x7f5784906594 in start_thread
> #36 0x7f5784639e6f in clone

With this patch, virtio_queue_empty will now return 1 as soon as the
vdev is marked as broken, after a "virtio: zero sized buffers are not
allowed" error.

To be consistent, update virtio_queue_empty_rcu as well.

Signed-off-by: Fam Zheng 
---
 hw/virtio/virtio.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index d4e4d98b59..7a05c9e52c 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -358,6 +358,10 @@ int virtio_queue_ready(VirtQueue *vq)
  * Called within rcu_read_lock().  */
 static int virtio_queue_empty_rcu(VirtQueue *vq)
 {
+if (unlikely(vq->vdev->broken)) {
+return 1;
+}
+
 if (unlikely(!vq->vring.avail)) {
 return 1;
 }
@@ -373,6 +377,10 @@ int virtio_queue_empty(VirtQueue *vq)
 {
 bool empty;
 
+if (unlikely(vq->vdev->broken)) {
+return 1;
+}
+
 if (unlikely(!vq->vring.avail)) {
 return 1;
 }
-- 
2.17.1




[Qemu-block] [PATCH 2/2] virtio-scsi/virtio-blk: Disable poll handlers when stopping vq handler

2018-09-10 Thread Fam Zheng
We have this unwanted call stack:

  > ...
  > #13 0x5586602b7793 in virtio_scsi_handle_cmd_vq
  > #14 0x5586602b8d66 in virtio_scsi_data_plane_handle_cmd
  > #15 0x5586602ddab7 in virtio_queue_notify_aio_vq
  > #16 0x5586602dfc9f in virtio_queue_host_notifier_aio_poll
  > #17 0x5586607885da in run_poll_handlers_once
  > #18 0x55866078880e in try_poll_mode
  > #19 0x5586607888eb in aio_poll
  > #20 0x558660784561 in aio_wait_bh_oneshot
  > #21 0x5586602b9582 in virtio_scsi_dataplane_stop
  > #22 0x5586605a7110 in virtio_bus_stop_ioeventfd
  > #23 0x5586605a9426 in virtio_pci_stop_ioeventfd
  > #24 0x5586605ab808 in virtio_pci_common_write
  > #25 0x558660242396 in memory_region_write_accessor
  > #26 0x5586602425ab in access_with_adjusted_size
  > #27 0x558660245281 in memory_region_dispatch_write
  > #28 0x5586601e008e in flatview_write_continue
  > #29 0x5586601e01d8 in flatview_write
  > #30 0x5586601e04de in address_space_write
  > #31 0x5586601e052f in address_space_rw
  > #32 0x5586602607f2 in kvm_cpu_exec
  > #33 0x558660227148 in qemu_kvm_cpu_thread_fn
  > #34 0x55866078bde7 in qemu_thread_start
  > #35 0x7f5784906594 in start_thread
  > #36 0x7f5784639e6f in clone

Avoid it with the aio_disable_external/aio_enable_external pair, so that
no vq poll handlers can be called in aio_wait_bh_oneshot.

Signed-off-by: Fam Zheng 
---
 hw/block/dataplane/virtio-blk.c | 2 ++
 hw/scsi/virtio-scsi-dataplane.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 8c37bd314a..8ab54218c1 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -279,7 +279,9 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
 trace_virtio_blk_data_plane_stop(s);
 
 aio_context_acquire(s->ctx);
+aio_disable_external(s->ctx);
 aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
+aio_enable_external(s->ctx);
 
 /* Drain and switch bs back to the QEMU main loop */
 blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context());
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index b995bab3a2..1452398491 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -208,7 +208,9 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
 s->dataplane_stopping = true;
 
 aio_context_acquire(s->ctx);
+aio_disable_external(s->ctx);
 aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
+aio_enable_external(s->ctx);
 aio_context_release(s->ctx);
 
 blk_drain_all(); /* ensure there are no in-flight requests */
-- 
2.17.1




[Qemu-block] [PATCH 0/2] virtio-scsi: Fix QEMU hang with vIOMMU and ATS

2018-09-10 Thread Fam Zheng
The first patch describes the bug and the reproducer.

The VQ poll handler that is called by mistake within virtio_scsi_dataplane_stop
enters a dead loop because it fails to detect an error state. Fix both sides of
the problem: the handler should break out from the loop if no progress can be
made due to virtio_error; the handler shouldn't be called in that situation in
the first place.

Fam Zheng (2):
  virtio: Return true from virtio_queue_empty if broken
  virtio-scsi/virtio-blk: Disable poll handlers when stopping vq handler

 hw/block/dataplane/virtio-blk.c | 2 ++
 hw/scsi/virtio-scsi-dataplane.c | 2 ++
 hw/virtio/virtio.c  | 8 
 3 files changed, 12 insertions(+)

-- 
2.17.1




Re: [Qemu-block] [PATCH v8 0/8] Take the image size into account when allocating the L2 cache

2018-09-10 Thread Kevin Wolf
Hi Leonid,

Am 13.08.2018 um 03:07 hat Leonid Bloch geschrieben:
> This series makes the qcow2 L2 cache assignment aware of the image size,
> with the intention for it to cover the entire image. The importance of
> this change is in noticeable performance improvement, especially with
> heavy random I/O. The memory overhead is not big in most cases, as only
> 1 MB of cache for every 8 GB of image size is used. For cases with very
> large images and/or small cluster sizes, or systems with limited RAM
> resources, there is an upper limit on the default L2 cache: 32 MB. To
> modify this limit one can use the already existing 'l2-cache-size' and
> 'cache-size' options. Moreover, this fixes the behavior of 'l2-cache-size',
> as it was documented as the *maximum* L2 cache size, but in practice
> behaved as the absolute size.
> 
> To compensate the memory overhead which may be increased following this
> behavior, the default cache-clean-interval is set to 10 minutes by default
> (was disabled by default before).
> 
> The L2 cache is also resized accordingly, by default, if the image is
> resized.

after digging through the most urgent part of my post-vacation backlog,
I saw this series again. It's been a while, but I don't think much is
missing, so let's get this finished now. :-)

We had some discussion in the v7 thread after you posted v8, and you
mentioned there that you'd do some changes in a v9 (at first sight, just
some test case stuff). Are you still planning to send that v9?

Kevin



Re: [Qemu-block] [Qemu-devel] [PATCH] qemu-img.c: add help for each command

2018-09-10 Thread Programmingkid


> On Sep 10, 2018, at 4:16 AM, Kevin Wolf  wrote:
> 
> Am 08.09.2018 um 05:16 hat Programmingkid geschrieben:
>> 
>>> On Sep 7, 2018, at 11:13 PM, Peter Maydell  wrote:
>>> 
>>> On 8 September 2018 at 04:01, John Arbuckle  
>>> wrote:
>>> 
 +/* print the help for this command */
 +if (strcmp("--help", argv[optind + 1]) == 0) {
 +if (strcmp("amend", cmdname) == 0) {
 +help_amend();
 +} else if (strcmp("bench", cmdname) == 0) {
 +help_bench();
 +} else if (strcmp("check", cmdname) == 0) {
 +help_check();
 +} else if (strcmp("commit", cmdname) == 0) {
 +help_commit();
 +} else if (strcmp("compare", cmdname) == 0) {
 +help_compare();
 +} else if (strcmp("convert", cmdname) == 0) {
 +help_convert();
 +} else if (strcmp("create", cmdname) == 0) {
 +help_create();
 +} else if (strcmp("dd", cmdname) == 0) {
 +help_dd();
 +} else if (strcmp("info", cmdname) == 0) {
 +help_info();
 +} else if (strcmp("map", cmdname) == 0) {
 +help_map();
 +} else if (strcmp("measure", cmdname) == 0) {
 +help_measure();
 +} else if (strcmp("snapshot", cmdname) == 0) {
 +help_snapshot();
 +} else if (strcmp("rebase", cmdname) == 0) {
 +help_rebase();
 +} else if (strcmp("resize", cmdname) == 0) {
 +help_resize();
>>> 
>>> Any time you find yourself writing very repetitive code like
>>> this, it's a good idea to ask yourself "is there a way to make
>>> this data-driven?".
>>> 
>>> thanks
>>> -- PMM
>> 
>> Did you want me to loop thru an array of strings instead?
> 
> There is already an array of all subcommands, img_cmds. You should
> probably add another field there for the help text.

Even though I would prefer to leave the img_cmds array alone, I do
see the advantages of your suggestion. All the code that checks which
command is being used could go. I will ad a help_text field to this
array.

> Also, your line wrapping (even mid-word!) is awful. I'm not sure we
> really have to fill 80 characters in the output and can't simply keep
> the lines a bit shorter so that 80 characters in the source are enough.

Yeah it does look very unreadable like that.

> But if we do want to use the full 80 characters in the output, I'd
> rather break the limit from the coding style and use longer lines in the
> source coe than doing what you did.

Using lines longer than 80 would make the patch a lot easier to read. I will do 
this.

Thank you.


Re: [Qemu-block] [PATCH v0 2/2] block: postpone the coroutine executing if the BDS's is drained

2018-09-10 Thread Kevin Wolf
Am 29.06.2018 um 14:40 hat Denis Plotnikov geschrieben:
> Fixes the problem of ide request appearing when the BDS is in
> the "drained section".
> 
> Without the patch the request can come and be processed by the main
> event loop, as the ide requests are processed by the main event loop
> and the main event loop doesn't stop when its context is in the
> "drained section".
> The request execution is postponed until the end of "drained section".
> 
> The patch doesn't modify ide specific code, as well as any other
> device code. Instead, it modifies the infrastructure of asynchronous
> Block Backend requests, in favor of postponing the requests arisen
> when in "drained section" to remove the possibility of request appearing
> for all the infrastructure clients.
> 
> This approach doesn't make vCPU processing the request wait untill
> the end of request processing.
> 
> Signed-off-by: Denis Plotnikov 

I generally agree with the idea that requests should be queued during a
drained section. However, I think there are a few fundamental problems
with the implementation in this series:

1) aio_disable_external() is already a layering violation and we'd like
   to get rid of it (by replacing it with a BlockDevOps callback from
   BlockBackend to the devices), so adding more functionality there
   feels like a step in the wrong direction.

2) Only blk_aio_* are fixed, while we also have synchronous public
   interfaces (blk_pread/pwrite) as well as coroutine-based ones
   (blk_co_*). They need to be postponed as well.

   blk_co_preadv/pwritev() are the common point in the call chain for
   all of these variants, so this is where the fix needs to live.

3) Within a drained section, you want requests from other users to be
   blocked, but not your own ones (essentially you want exclusive
   access). We don't have blk_drained_begin/end() yet, so this is not
   something to implement right now, but let's keep this requirement in
   mind and choose a design that allows this.

I believe the whole logic should be kept local to BlockBackend, and
blk_root_drained_begin/end() should be the functions that start queuing
requests or let queued requests resume.

As we are already in coroutine context in blk_co_preadv/pwritev(), after
checking that blk->quiesce_counter > 0, we can enter the coroutine
object into a list and yield. blk_root_drained_end() calls aio_co_wake()
for each of the queued coroutines. This should be all that we need to
manage.

Kevin



Re: [Qemu-block] forward of qemu bug regarding random qemu hangs at shutdown

2018-09-10 Thread Marc Hartmayer
On Fri, Sep 07, 2018 at 06:21 PM +0200, Marc Hartmayer  
wrote:
> On Fri, Sep 07, 2018 at 11:34 AM +0200, Kevin Wolf  wrote:
>> Am 06.09.2018 um 21:29 hat Christian Borntraeger geschrieben:

[…snip…]

>>
>> I think we need more information there. Can you set a breakpoint on
>> bdrv_drain_all_begin() to see where it's even called? When I start a
>> qemu instance without a block device, the first time this is called is
>> during shutdown after the mainloop (i.e. after the place where you're
>> seeing a hang).
>
> I can try that.

Okay, I’ve instrumented that function using SystemTap.

The first (and actually the only) call of bdrv_drain_all_begin of a
hanging QEMU is

161.311 bdrv_drain_all_begin 19105
 0x559bdc933045 : _nocheck__trace_bdrv_drain_all_begin+0x4/0x8 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x559bdc933052 : trace_bdrv_drain_all_begin+0x9/0xc 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x559bdc93472d : bdrv_drain_all_begin+0x16/0x21e 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x559bdc9349e5 : bdrv_drain_all+0x9/0x11 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x559bdc49beba : do_vm_stop+0x5a/0x6c 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x559bdc49db56 : vm_stop+0x3e/0x40 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x559bdc61a77a : main_loop_should_exit+0xae/0x191 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x559bdc61a872 : main_loop+0x15/0x1f 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x559bdc6221ac : main+0x3f1c/0x3f7b 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x7f41160a688a [/usr/lib64/libc-2.25.so+0x2088a/0x3d5000]


The calls of bdrv_drain_all_begin of a non-hanging QEMU are:

160.651 bdrv_drain_all_begin 19044
 0x55f558a0d045 : _nocheck__trace_bdrv_drain_all_begin+0x4/0x8 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f558a0d052 : trace_bdrv_drain_all_begin+0x9/0xc 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f558a0e72d : bdrv_drain_all_begin+0x16/0x21e 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f558a0e9e5 : bdrv_drain_all+0x9/0x11 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f558575eba : do_vm_stop+0x5a/0x6c 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f558577b56 : vm_stop+0x3e/0x40 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f5586f477a : main_loop_should_exit+0xae/0x191 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f5586f4872 : main_loop+0x15/0x1f 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f5586fc1ac : main+0x3f1c/0x3f7b 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x7f48c929f88a [/usr/lib64/libc-2.25.so+0x2088a/0x3d5000]
160.652 bdrv_drain_all_begin 19044
 0x55f558a0d045 : _nocheck__trace_bdrv_drain_all_begin+0x4/0x8 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f558a0d052 : trace_bdrv_drain_all_begin+0x9/0xc 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f558a0e72d : bdrv_drain_all_begin+0x16/0x21e 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f558a0e9e5 : bdrv_drain_all+0x9/0x11 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f558575eba : do_vm_stop+0x5a/0x6c 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f558575edf : vm_shutdown+0x13/0x15 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f5586fc1b6 : main+0x3f26/0x3f7b 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x7f48c929f88a [/usr/lib64/libc-2.25.so+0x2088a/0x3d5000]
160.652 bdrv_drain_all_begin 19044
 0x55f558a0d045 : _nocheck__trace_bdrv_drain_all_begin+0x4/0x8 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f558a0d052 : trace_bdrv_drain_all_begin+0x9/0xc 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f558a0e72d : bdrv_drain_all_begin+0x16/0x21e 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f558a0e9e5 : bdrv_drain_all+0x9/0x11 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f55899e4a4 : bdrv_close_all+0x3c/0x74 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x55f5586fc1c0 : main+0x3f30/0x3f7b 
[/usr/local/qemu/master/bin/qemu-system-x86_64]
 0x7f48c929f88a [/usr/lib64/libc-2.25.so+0x2088a/0x3d5000]

Hope this helps.

[…snip]

--
Kind regards / Beste Grüße
   Marc Hartmayer

IBM Deutschland Research & Development GmbH
Vorsitzende des Aufsichtsrats: Martina Koederitz
Geschäftsführung: Dirk Wittkopp
Sitz der Gesellschaft: Böblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294




Re: [Qemu-block] forward of qemu bug regarding random qemu hangs at shutdown

2018-09-10 Thread Marc Hartmayer
On Mon, Sep 10, 2018 at 10:21 AM +0200, Kevin Wolf  wrote:
> Am 07.09.2018 um 18:21 hat Marc Hartmayer geschrieben:
>> On Fri, Sep 07, 2018 at 11:34 AM +0200, Kevin Wolf  wrote:
>> > Am 06.09.2018 um 21:29 hat Christian Borntraeger geschrieben:
>> >> Kevin,
>> >>
>> >> for reference, it seems that his bug report somehow got lost.
>> >> https://bugs.launchpad.net/qemu/+bug/1788582
>> >
>> > That looks... interesting. The reproducer doesn't even seem to use a
>> > block device, and the backtrace shows a QEMU that is just sitting in the
>> > main loop waiting for events, not somewhere in the shutdown process
>> > after exiting the main loop where bdrv_drain_all() would be called. I
>> > fail to even come up with a theory about any connection between this and
>> > commit 0f12264e7.
>> >
>> > I think we need more information there. Can you set a breakpoint on
>> > bdrv_drain_all_begin() to see where it's even called? When I start a
>> > qemu instance without a block device, the first time this is called is
>> > during shutdown after the mainloop (i.e. after the place where you're
>> > seeing a hang).
>>
>> I can try that.
>>
>> >
>> > Maybe bisect within the commit that seems to cause the bug, by
>> > selectively disabling some hunks in it?
>>
>> If I remove the line(s)
>>
>> /* Execute pending BHs first (may modify the graph) and check everything
>>  * else only after the BHs have executed. */
>> while (aio_poll(qemu_get_aio_context(), false));
>>
>> in the function 'bdrv_drain_all_poll', then it works.
>
> It still doesn't make sense to me why this would make any difference
> without a block device (and without iothreads), but you could give this
> patch series of mine a try:
>
> [PATCH 00/14] Fix some jobs/drain/aio_poll related hangs
>
> Amongst others, it does remove the line you quoted.

With the patches applied, I can no longer reproduce the bug.

>
> Kevin
>
--
Kind regards / Beste Grüße
   Marc Hartmayer

IBM Deutschland Research & Development GmbH
Vorsitzende des Aufsichtsrats: Martina Koederitz
Geschäftsführung: Dirk Wittkopp
Sitz der Gesellschaft: Böblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294




Re: [Qemu-block] [PATCH] qemu-iotests: Test snapshot=on with nonexistent TMPDIR

2018-09-10 Thread Alberto Garcia
On Mon 10 Sep 2018 11:57:48 AM CEST, Kevin Wolf wrote:
> Am 10.09.2018 um 11:29 hat Alberto Garcia geschrieben:
>> We just fixed a bug that was causing a use-after-free when QEMU was
>> unable to create a temporary snapshot. This is a test case for this
>> scenario.
>> 
>> Signed-off-by: Alberto Garcia 
>
> Hm, it actually doesn't crash for me even without the fix. Anyway, I
> don't have a good idea to make it more likely to crash and it's
> certainly better than nothing.

Yeah, I had the same problem, I could make it crash very easily last
week, and I can make it crash with the QEMU package shipped with my
distro, but I tried now with master and it doesn't crash. Well, it's
undefined behavior after all :-)

Berto



Re: [Qemu-block] [Qemu-devel] [PATCH v0 0/2] Postponed actions

2018-09-10 Thread Denis Plotnikov

PING PING!

On 28.08.2018 13:23, Denis Plotnikov wrote:



On 27.08.2018 19:05, John Snow wrote:



On 08/27/2018 03:05 AM, Denis Plotnikov wrote:

PING! PING!



Sorry, Kevin and Stefan are both on PTO right now, I think. I can't
promise I have the time to look soon, but you at least deserve an answer
for the radio silence the last week.

--js

Thanks for the response!
I'll be waiting for some comments!

Denis



On 14.08.2018 10:08, Denis Plotnikov wrote:



On 13.08.2018 19:30, Kevin Wolf wrote:

Am 13.08.2018 um 10:32 hat Denis Plotnikov geschrieben:

Ping ping!

On 16.07.2018 21:59, John Snow wrote:



On 07/16/2018 11:01 AM, Denis Plotnikov wrote:

Ping!



I never saw a reply to Stefan's question on July 2nd, did you reply
off-list?

--js

Yes, I did. I talked to Stefan why the patch set appeared.


The rest of us still don't know the answer. I had the same question.

Kevin

Yes, that's my fault. I should have post it earlier.

I reviewed the problem once again and come up with the following
explanation.
Indeed, if the global lock has been taken by the main thread the vCPU
threads won't be able to execute mmio ide.
But, if the main thread will release the lock then nothing will prevent
vCPU threads form execution what they want, e.g writing to the block
device.

In case of running the mirroring it is possible. Let's take a look
at the following snippet of mirror_run. This is a part the mirroring
completion part.

  bdrv_drained_begin(bs);
  cnt = bdrv_get_dirty_count(s->dirty_bitmap);
  >>  if (cnt > 0 || mirror_flush(s) < 0) {
  bdrv_drained_end(bs);
  continue;
  }

(X)     assert(QLIST_EMPTY(>tracked_requests));

mirror_flush here can yield the current coroutine so nothing more can
be executed.
We could end up with the situation when the main loop have to revolve
to poll for another timer/bh to process. While revolving it releases
the global lock. If the global lock is waited for by a vCPU (any
other) thread, the waiting thread will get the lock and make what it
intends.

This is something that I can observe:

mirror_flush yields coroutine, the main thread revolves and locks
because a vCPU was waiting for the lock. Now the vCPU thread owns the
lock and the main thread waits for the lock releasing.
The vCPU thread does cmd_write_dma and releases the lock. Then, the 
main

thread gets the lock and continues to run eventually proceeding with
the coroutine yeiled.
If the vCPU requests aren't completed by the moment we will assert at
(X). If the vCPU requests are completed we won't even notice that we
had some writes while in the drained section.

Denis



On 29.06.2018 15:40, Denis Plotnikov wrote:

There are cases when a request to a block driver state shouldn't
have
appeared producing dangerous race conditions.
This misbehaviour is usually happens with storage devices emulated
without eventfd for guest to host notifications like IDE.

The issue arises when the context is in the "drained" section
and doesn't expect the request to come, but request comes from the
device not using iothread and which context is processed by the 
main

loop.

The main loop apart of the iothread event loop isn't blocked by 
the

"drained" section.
The request coming and processing while in "drained" section can
spoil
the
block driver state consistency.

This behavior can be observed in the following KVM-based case:

1. Setup a VM with an IDE disk.
2. Inside a VM start a disk writing load for the IDE device
      e.g: dd if= of= bs=X count=Y oflag=direct
3. On the host create a mirroring block job for the IDE device
      e.g: drive_mirror  
4. On the host finish the block job
      e.g: block_job_complete 
     Having done the 4th action, you could get an assert:
assert(QLIST_EMPTY(>tracked_requests)) from mirror_run.
On my setup, the assert is 1/3 reproducible.

The patch series introduces the mechanism to postpone the requests
until the BDS leaves "drained" section for the devices not using
iothreads.
Also, it modifies the asynchronous block backend infrastructure
to use
that mechanism to release the assert bug for IDE devices.

Denis Plotnikov (2):
      async: add infrastructure for postponed actions
      block: postpone the coroutine executing if the BDS's is 
drained


     block/block-backend.c | 58
++-
     include/block/aio.h   | 63
+++
     util/async.c  | 33 +++
     3 files changed, 142 insertions(+), 12 deletions(-)





--
Best,
Denis










--
Best,
Denis



Re: [Qemu-block] [PATCH] qemu-iotests: Test snapshot=on with nonexistent TMPDIR

2018-09-10 Thread Kevin Wolf
Am 10.09.2018 um 11:29 hat Alberto Garcia geschrieben:
> We just fixed a bug that was causing a use-after-free when QEMU was
> unable to create a temporary snapshot. This is a test case for this
> scenario.
> 
> Signed-off-by: Alberto Garcia 

Hm, it actually doesn't crash for me even without the fix. Anyway, I
don't have a good idea to make it more likely to crash and it's
certainly better than nothing.

Thanks, applied to the block branch.

Kevin



Re: [Qemu-block] [PATCH] block: Fix use after free error in bdrv_open_inherit()

2018-09-10 Thread Alberto Garcia
On Mon 10 Sep 2018 10:34:20 AM CEST, Kevin Wolf  wrote:
> Am 06.09.2018 um 16:25 hat Alberto Garcia geschrieben:
>> When a block device is opened with BDRV_O_SNAPSHOT and the
>> bdrv_append_temp_snapshot() call fails then the error code path tries
>> to unref the already destroyed 'options' QDict.
>> 
>> This can be reproduced easily by setting TMPDIR to a location where
>> the QEMU process can't write:
>> 
>>$ TMPDIR=/nonexistent $QEMU -drive driver=null-co,snapshot=on
>> 
>> Signed-off-by: Alberto Garcia 
>
> Thanks, applied to the block branch.
>
> But can we add the reproducer to some iotests case?

Yup, I just sent it.

Berto



[Qemu-block] [PATCH] qemu-iotests: Test snapshot=on with nonexistent TMPDIR

2018-09-10 Thread Alberto Garcia
We just fixed a bug that was causing a use-after-free when QEMU was
unable to create a temporary snapshot. This is a test case for this
scenario.

Signed-off-by: Alberto Garcia 
---
 tests/qemu-iotests/051| 3 +++
 tests/qemu-iotests/051.out| 3 +++
 tests/qemu-iotests/051.pc.out | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051
index ee9c820d0f..25d3b2d478 100755
--- a/tests/qemu-iotests/051
+++ b/tests/qemu-iotests/051
@@ -354,6 +354,9 @@ printf %b "qemu-io $device_id \"write -P 0x33 0 
4k\"\ncommit $device_id\n" |
 
 $QEMU_IO -c "read -P 0x33 0 4k" "$TEST_IMG" | _filter_qemu_io
 
+# Using snapshot=on with a non-existent TMPDIR
+TMPDIR=/nonexistent run_qemu -drive driver=null-co,snapshot=on
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out
index b7273505c7..793af2ab96 100644
--- a/tests/qemu-iotests/051.out
+++ b/tests/qemu-iotests/051.out
@@ -455,4 +455,7 @@ wrote 4096/4096 bytes at offset 0
 
 read 4096/4096 bytes at offset 0
 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Testing: -drive driver=null-co,snapshot=on
+QEMU_PROG: -drive driver=null-co,snapshot=on: Could not get temporary 
filename: No such file or directory
+
 *** done
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
index e9257fe318..ca64edae6a 100644
--- a/tests/qemu-iotests/051.pc.out
+++ b/tests/qemu-iotests/051.pc.out
@@ -527,4 +527,7 @@ wrote 4096/4096 bytes at offset 0
 
 read 4096/4096 bytes at offset 0
 4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+Testing: -drive driver=null-co,snapshot=on
+QEMU_PROG: -drive driver=null-co,snapshot=on: Could not get temporary 
filename: No such file or directory
+
 *** done
-- 
2.11.0




Re: [Qemu-block] [PATCH] block: Fix use after free error in bdrv_open_inherit()

2018-09-10 Thread Kevin Wolf
Am 06.09.2018 um 16:25 hat Alberto Garcia geschrieben:
> When a block device is opened with BDRV_O_SNAPSHOT and the
> bdrv_append_temp_snapshot() call fails then the error code path tries
> to unref the already destroyed 'options' QDict.
> 
> This can be reproduced easily by setting TMPDIR to a location where
> the QEMU process can't write:
> 
>$ TMPDIR=/nonexistent $QEMU -drive driver=null-co,snapshot=on
> 
> Signed-off-by: Alberto Garcia 

Thanks, applied to the block branch.

But can we add the reproducer to some iotests case?

Kevin



Re: [Qemu-block] forward of qemu bug regarding random qemu hangs at shutdown

2018-09-10 Thread Kevin Wolf
Am 07.09.2018 um 18:21 hat Marc Hartmayer geschrieben:
> On Fri, Sep 07, 2018 at 11:34 AM +0200, Kevin Wolf  wrote:
> > Am 06.09.2018 um 21:29 hat Christian Borntraeger geschrieben:
> >> Kevin,
> >>
> >> for reference, it seems that his bug report somehow got lost.
> >> https://bugs.launchpad.net/qemu/+bug/1788582
> >
> > That looks... interesting. The reproducer doesn't even seem to use a
> > block device, and the backtrace shows a QEMU that is just sitting in the
> > main loop waiting for events, not somewhere in the shutdown process
> > after exiting the main loop where bdrv_drain_all() would be called. I
> > fail to even come up with a theory about any connection between this and
> > commit 0f12264e7.
> >
> > I think we need more information there. Can you set a breakpoint on
> > bdrv_drain_all_begin() to see where it's even called? When I start a
> > qemu instance without a block device, the first time this is called is
> > during shutdown after the mainloop (i.e. after the place where you're
> > seeing a hang).
> 
> I can try that.
> 
> >
> > Maybe bisect within the commit that seems to cause the bug, by
> > selectively disabling some hunks in it?
> 
> If I remove the line(s)
> 
> /* Execute pending BHs first (may modify the graph) and check everything
>  * else only after the BHs have executed. */
> while (aio_poll(qemu_get_aio_context(), false));
> 
> in the function 'bdrv_drain_all_poll', then it works.

It still doesn't make sense to me why this would make any difference
without a block device (and without iothreads), but you could give this
patch series of mine a try:

[PATCH 00/14] Fix some jobs/drain/aio_poll related hangs

Amongst others, it does remove the line you quoted.

Kevin



Re: [Qemu-block] [Qemu-devel] [PATCH] qemu-img.c: add help for each command

2018-09-10 Thread Kevin Wolf
Am 08.09.2018 um 05:16 hat Programmingkid geschrieben:
> 
> > On Sep 7, 2018, at 11:13 PM, Peter Maydell  wrote:
> > 
> > On 8 September 2018 at 04:01, John Arbuckle  
> > wrote:
> > 
> >> +/* print the help for this command */
> >> +if (strcmp("--help", argv[optind + 1]) == 0) {
> >> +if (strcmp("amend", cmdname) == 0) {
> >> +help_amend();
> >> +} else if (strcmp("bench", cmdname) == 0) {
> >> +help_bench();
> >> +} else if (strcmp("check", cmdname) == 0) {
> >> +help_check();
> >> +} else if (strcmp("commit", cmdname) == 0) {
> >> +help_commit();
> >> +} else if (strcmp("compare", cmdname) == 0) {
> >> +help_compare();
> >> +} else if (strcmp("convert", cmdname) == 0) {
> >> +help_convert();
> >> +} else if (strcmp("create", cmdname) == 0) {
> >> +help_create();
> >> +} else if (strcmp("dd", cmdname) == 0) {
> >> +help_dd();
> >> +} else if (strcmp("info", cmdname) == 0) {
> >> +help_info();
> >> +} else if (strcmp("map", cmdname) == 0) {
> >> +help_map();
> >> +} else if (strcmp("measure", cmdname) == 0) {
> >> +help_measure();
> >> +} else if (strcmp("snapshot", cmdname) == 0) {
> >> +help_snapshot();
> >> +} else if (strcmp("rebase", cmdname) == 0) {
> >> +help_rebase();
> >> +} else if (strcmp("resize", cmdname) == 0) {
> >> +help_resize();
> > 
> > Any time you find yourself writing very repetitive code like
> > this, it's a good idea to ask yourself "is there a way to make
> > this data-driven?".
> > 
> > thanks
> > -- PMM
> 
> Did you want me to loop thru an array of strings instead?

There is already an array of all subcommands, img_cmds. You should
probably add another field there for the help text.

Also, your line wrapping (even mid-word!) is awful. I'm not sure we
really have to fill 80 characters in the output and can't simply keep
the lines a bit shorter so that 80 characters in the source are enough.

But if we do want to use the full 80 characters in the output, I'd
rather break the limit from the coding style and use longer lines in the
source coe than doing what you did.

Kevin