Re: [Qemu-block] [PATCH 71/88] block: avoid use of g_new0()

2017-10-06 Thread Philippe Mathieu-Daudé
On 10/06/2017 08:50 PM, Philippe Mathieu-Daudé wrote:
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  block/qcow2.c | 2 +-
>  block/vhdx.c  | 9 +
>  2 files changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/block/qcow2.c b/block/qcow2.c
> index f63d1831f8..3e7d6c81be 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -2738,7 +2738,7 @@ static int qcow2_create2(const char *filename, int64_t 
> total_size,
>  
>  /* Write the header */
>  QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
> -header = g_malloc0(cluster_size);
> +header = g_malloc(cluster_size);

self-NACK since this is wrong.

>  *header = (QCowHeader) {
>  .magic  = cpu_to_be32(QCOW_MAGIC),
>  .version= cpu_to_be32(version),
> diff --git a/block/vhdx.c b/block/vhdx.c
> index 8260fb46cd..91e532df8a 100644
> --- a/block/vhdx.c
> +++ b/block/vhdx.c
> @@ -244,10 +244,11 @@ static void vhdx_region_register(BDRVVHDXState *s,
>  {
>  VHDXRegionEntry *r;
>  
> -r = g_new0(VHDXRegionEntry, 1);
> -
> -r->start = start;
> -r->end = start + length;
> +r = g_new(VHDXRegionEntry, 1);
> +*r = (VHDXRegionEntry) {
> +.start = start,
> +.end = start + length,

this is not wrong since all members are initialized, but it is not good
code practice (if the VHDXRegionEntry structure is expanded with another
member).

> +};
>  
>  QLIST_INSERT_HEAD(>regions, r, entries);
>  }
> 



[Qemu-block] [PATCH 73/88] hw/block/xen_disk: avoid use of g_new0()

2017-10-06 Thread Philippe Mathieu-Daudé
From: Jan Beulich 

Prefer g_new() / g_new0() to be farther backwards compatible with older
glib versions. As there's no point in zeroing the allocation here (the
loop right afterwards fully initializes the memory), use the former.

Signed-off-by: Jan Beulich 
Reviewed-by: Paul Durrant 
Acked-by: Anthony Perard 
Signed-off-by: Philippe Mathieu-Daudé 
[PMD: rebase & subject tweak]
---
 hw/block/xen_disk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 536e2ee735..6d2fd4d284 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -1232,7 +1232,7 @@ static int blk_connect(struct XenDevice *xendev)
 return -1;
 }
 
-domids = g_new0(uint32_t, blkdev->nr_ring_ref);
+domids = g_new(uint32_t, blkdev->nr_ring_ref);
 for (i = 0; i < blkdev->nr_ring_ref; i++) {
 domids[i] = blkdev->xendev.dom;
 }
-- 
2.14.2




[Qemu-block] [PATCH 71/88] block: avoid use of g_new0()

2017-10-06 Thread Philippe Mathieu-Daudé
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/qcow2.c | 2 +-
 block/vhdx.c  | 9 +
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index f63d1831f8..3e7d6c81be 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2738,7 +2738,7 @@ static int qcow2_create2(const char *filename, int64_t 
total_size,
 
 /* Write the header */
 QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
-header = g_malloc0(cluster_size);
+header = g_malloc(cluster_size);
 *header = (QCowHeader) {
 .magic  = cpu_to_be32(QCOW_MAGIC),
 .version= cpu_to_be32(version),
diff --git a/block/vhdx.c b/block/vhdx.c
index 8260fb46cd..91e532df8a 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -244,10 +244,11 @@ static void vhdx_region_register(BDRVVHDXState *s,
 {
 VHDXRegionEntry *r;
 
-r = g_new0(VHDXRegionEntry, 1);
-
-r->start = start;
-r->end = start + length;
+r = g_new(VHDXRegionEntry, 1);
+*r = (VHDXRegionEntry) {
+.start = start,
+.end = start + length,
+};
 
 QLIST_INSERT_HEAD(>regions, r, entries);
 }
-- 
2.14.2




[Qemu-block] [PATCH 72/88] hw/block/nvme: use g_new() family of functions

2017-10-06 Thread Philippe Mathieu-Daudé
Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/block/nvme.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 9aa32692a3..ff712fa8cc 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -454,7 +454,7 @@ static uint16_t nvme_create_sq(NvmeCtrl *n, NvmeCmd *cmd)
 if (!(NVME_SQ_FLAGS_PC(qflags))) {
 return NVME_INVALID_FIELD | NVME_DNR;
 }
-sq = g_malloc0(sizeof(*sq));
+sq = g_new0(NvmeSQueue, 1);
 nvme_init_sq(sq, n, prp1, sqid, cqid, qsize + 1);
 return NVME_SUCCESS;
 }
@@ -532,7 +532,7 @@ static uint16_t nvme_create_cq(NvmeCtrl *n, NvmeCmd *cmd)
 return NVME_INVALID_FIELD | NVME_DNR;
 }
 
-cq = g_malloc0(sizeof(*cq));
+cq = g_new0(NvmeCQueue, 1);
 nvme_init_cq(cq, n, prp1, cqid, vector, qsize + 1,
 NVME_CQ_FLAGS_IEN(qflags));
 return NVME_SUCCESS;
-- 
2.14.2




[Qemu-block] [PATCH 70/88] block: use g_new() family of functions

2017-10-06 Thread Philippe Mathieu-Daudé
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/backup.c |  2 +-
 block/blkdebug.c   |  4 ++--
 block/commit.c |  2 +-
 block/linux-aio.c  |  2 +-
 block/mirror.c |  2 +-
 block/qapi.c   |  4 ++--
 block/qcow2-refcount.c | 12 +---
 block/qed-l2-cache.c   |  2 +-
 block/sheepdog.c   |  2 +-
 block/stream.c |  2 +-
 block/vhdx.c   |  2 +-
 block/win32-aio.c  |  2 +-
 12 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 06ddbfd03d..cd73e7d905 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -514,7 +514,7 @@ static void coroutine_fn backup_run(void *opaque)
 qemu_co_rwlock_unlock(>flush_rwlock);
 g_free(job->done_bitmap);
 
-data = g_malloc(sizeof(*data));
+data = g_new(BackupCompleteData, 1);
 data->ret = ret;
 block_job_defer_to_main_loop(>common, backup_complete, data);
 }
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 46e53f2f09..4d05bcb1d9 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -175,7 +175,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error 
**errp)
 }
 
 /* Set attributes common for all actions */
-rule = g_malloc0(sizeof(*rule));
+rule = g_new(struct BlkdebugRule, 1);
 *rule = (struct BlkdebugRule) {
 .event  = event,
 .action = d->action,
@@ -727,7 +727,7 @@ static int blkdebug_debug_breakpoint(BlockDriverState *bs, 
const char *event,
 return -ENOENT;
 }
 
-rule = g_malloc(sizeof(*rule));
+rule = g_new(struct BlkdebugRule, 1);
 *rule = (struct BlkdebugRule) {
 .event  = blkdebug_event,
 .action = ACTION_SUSPEND,
diff --git a/block/commit.c b/block/commit.c
index 5036eec434..e698311d5f 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -210,7 +210,7 @@ static void coroutine_fn commit_run(void *opaque)
 out:
 qemu_vfree(buf);
 
-data = g_malloc(sizeof(*data));
+data = g_new(CommitCompleteData, 1);
 data->ret = ret;
 block_job_defer_to_main_loop(>common, commit_complete, data);
 }
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 88b8d55ec7..e5a9d7fdeb 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -474,7 +474,7 @@ LinuxAioState *laio_init(void)
 {
 LinuxAioState *s;
 
-s = g_malloc0(sizeof(*s));
+s = g_new0(LinuxAioState, 1);
 if (event_notifier_init(>e, false) < 0) {
 goto out_free_state;
 }
diff --git a/block/mirror.c b/block/mirror.c
index 153758ca9f..948e5321a7 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -904,7 +904,7 @@ immediate_exit:
 g_free(s->in_flight_bitmap);
 bdrv_dirty_iter_free(s->dbi);
 
-data = g_malloc(sizeof(*data));
+data = g_new(MirrorExitData, 1);
 data->ret = ret;
 
 if (need_drain) {
diff --git a/block/qapi.c b/block/qapi.c
index 7fa2437923..808246bfc4 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -455,7 +455,7 @@ static BlockStats *bdrv_query_bds_stats(BlockDriverState 
*bs,
 {
 BlockStats *s = NULL;
 
-s = g_malloc0(sizeof(*s));
+s = g_new0(BlockStats, 1);
 s->stats = g_malloc0(sizeof(*s->stats));
 
 if (!bs) {
@@ -503,7 +503,7 @@ BlockInfoList *qmp_query_block(Error **errp)
 continue;
 }
 
-info = g_malloc0(sizeof(*info));
+info = g_new0(BlockInfoList, 1);
 bdrv_query_info(blk, >value, _err);
 if (local_err) {
 error_propagate(errp, local_err);
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index aa3fd6cf17..dd2387d2bb 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -755,7 +755,7 @@ static void update_refcount_discard(BlockDriverState *bs,
 }
 }
 
-d = g_malloc(sizeof(*d));
+d = g_new(Qcow2DiscardRegion, 1);
 *d = (Qcow2DiscardRegion) {
 .bs = bs,
 .offset = offset,
@@ -2189,9 +2189,8 @@ write_refblocks:
 
 reftable_size = ROUND_UP((refblock_index + 1) * sizeof(uint64_t),
  s->cluster_size) / sizeof(uint64_t);
-new_on_disk_reftable = g_try_realloc(on_disk_reftable,
- reftable_size *
- sizeof(uint64_t));
+new_on_disk_reftable = g_try_renew(uint64_t, on_disk_reftable,
+   reftable_size);
 if (!new_on_disk_reftable) {
 res->check_errors++;
 ret = -ENOMEM;
@@ -2656,8 +2655,7 @@ static int alloc_refblock(BlockDriverState *bs, uint64_t 
**reftable,
 return -ENOTSUP;
 }
 
-new_reftable = g_try_realloc(*reftable, new_reftable_size *
-sizeof(uint64_t));
+new_reftable = g_try_renew(uint64_t, *reftable, new_reftable_size);
 if (!new_reftable) {
 error_setg(errp, "Failed to increase reftable buffer 

[Qemu-block] [PATCH 61/88] tests: use g_new() family of functions

2017-10-06 Thread Philippe Mathieu-Daudé
From: Marc-André Lureau 

Signed-off-by: Marc-André Lureau 
Signed-off-by: Philippe Mathieu-Daudé 
[PMD: split of some files in other commits of the same series, add libqtest.c]
---
 tests/ahci-test.c | 4 ++--
 tests/fw_cfg-test.c   | 4 ++--
 tests/libqos/ahci.c   | 2 +-
 tests/libqos/libqos.c | 2 +-
 tests/libqos/malloc.c | 6 +++---
 tests/libqtest.c  | 2 +-
 tests/pc-cpu-test.c   | 2 +-
 7 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/ahci-test.c b/tests/ahci-test.c
index 999121bb7c..cb84edc8fb 100644
--- a/tests/ahci-test.c
+++ b/tests/ahci-test.c
@@ -155,7 +155,7 @@ static AHCIQState *ahci_vboot(const char *cli, va_list ap)
 {
 AHCIQState *s;
 
-s = g_malloc0(sizeof(AHCIQState));
+s = g_new0(AHCIQState, 1);
 s->parent = qtest_pc_vboot(cli, ap);
 alloc_set_flags(s->parent->alloc, ALLOC_LEAK_ASSERT);
 
@@ -1806,7 +1806,7 @@ static void create_ahci_io_test(enum IOMode type, enum 
AddrMode addr,
 char *name;
 AHCIIOTestOptions *opts;
 
-opts = g_malloc(sizeof(AHCIIOTestOptions));
+opts = g_new(AHCIIOTestOptions, 1);
 opts->length = len;
 opts->address_type = addr;
 opts->io_type = type;
diff --git a/tests/fw_cfg-test.c b/tests/fw_cfg-test.c
index 688342bed5..81f45bdfc8 100644
--- a/tests/fw_cfg-test.c
+++ b/tests/fw_cfg-test.c
@@ -79,8 +79,8 @@ static void test_fw_cfg_numa(void)
 
 g_assert_cmpint(qfw_cfg_get_u64(fw_cfg, FW_CFG_NUMA), ==, nb_nodes);
 
-cpu_mask = g_malloc0(sizeof(uint64_t) * max_cpus);
-node_mask = g_malloc0(sizeof(uint64_t) * nb_nodes);
+cpu_mask = g_new0(uint64_t, max_cpus);
+node_mask = g_new0(uint64_t, nb_nodes);
 
 qfw_cfg_read_data(fw_cfg, cpu_mask, sizeof(uint64_t) * max_cpus);
 qfw_cfg_read_data(fw_cfg, node_mask, sizeof(uint64_t) * nb_nodes);
diff --git a/tests/libqos/ahci.c b/tests/libqos/ahci.c
index 1ca7f456b5..13c0749582 100644
--- a/tests/libqos/ahci.c
+++ b/tests/libqos/ahci.c
@@ -843,7 +843,7 @@ AHCICommand *ahci_command_create(uint8_t command_name)
 AHCICommand *cmd;
 
 g_assert(props);
-cmd = g_malloc0(sizeof(AHCICommand));
+cmd = g_new0(AHCICommand, 1);
 g_assert(!(props->dma && props->pio));
 g_assert(!(props->lba28 && props->lba48));
 g_assert(!(props->read && props->write));
diff --git a/tests/libqos/libqos.c b/tests/libqos/libqos.c
index 6226546c28..991bc1aec2 100644
--- a/tests/libqos/libqos.c
+++ b/tests/libqos/libqos.c
@@ -17,7 +17,7 @@ QOSState *qtest_vboot(QOSOps *ops, const char *cmdline_fmt, 
va_list ap)
 {
 char *cmdline;
 
-struct QOSState *qs = g_malloc(sizeof(QOSState));
+struct QOSState *qs = g_new(QOSState, 1);
 
 cmdline = g_strdup_vprintf(cmdline_fmt, ap);
 qs->qts = qtest_start(cmdline);
diff --git a/tests/libqos/malloc.c b/tests/libqos/malloc.c
index b8eff5f495..ac05874b0a 100644
--- a/tests/libqos/malloc.c
+++ b/tests/libqos/malloc.c
@@ -129,7 +129,7 @@ static MemBlock *mlist_new(uint64_t addr, uint64_t size)
 if (!size) {
 return NULL;
 }
-block = g_malloc0(sizeof(MemBlock));
+block = g_new0(MemBlock, 1);
 
 block->addr = addr;
 block->size = size;
@@ -305,8 +305,8 @@ QGuestAllocator *alloc_init(uint64_t start, uint64_t end)
 s->start = start;
 s->end = end;
 
-s->used = g_malloc(sizeof(MemList));
-s->free = g_malloc(sizeof(MemList));
+s->used = g_new(MemList, 1);
+s->free = g_new(MemList, 1);
 QTAILQ_INIT(s->used);
 QTAILQ_INIT(s->free);
 
diff --git a/tests/libqtest.c b/tests/libqtest.c
index cbd709470b..adf71188b6 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -171,7 +171,7 @@ QTestState *qtest_init_without_qmp_handshake(const char 
*extra_args)
 gchar *command;
 const char *qemu_binary = qtest_qemu_binary();
 
-s = g_malloc(sizeof(*s));
+s = g_new(QTestState, 1);
 
 socket_path = g_strdup_printf("/tmp/qtest-%d.sock", getpid());
 qmp_socket_path = g_strdup_printf("/tmp/qtest-%d.qmp", getpid());
diff --git a/tests/pc-cpu-test.c b/tests/pc-cpu-test.c
index c4211a4e85..11d3e810ef 100644
--- a/tests/pc-cpu-test.c
+++ b/tests/pc-cpu-test.c
@@ -87,7 +87,7 @@ static void add_pc_test_case(const char *mname)
 if (!g_str_has_prefix(mname, "pc-")) {
 return;
 }
-data = g_malloc(sizeof(PCTestData));
+data = g_new(PCTestData, 1);
 data->machine = g_strdup(mname);
 data->cpu_model = "Haswell"; /* 1.3+ theoretically */
 data->sockets = 1;
-- 
2.14.2




[Qemu-block] [PATCH 53/88] iSCSI: use g_new() family of functions

2017-10-06 Thread Philippe Mathieu-Daudé
From: Marc-André Lureau 

Signed-off-by: Marc-André Lureau 
---
 block/iscsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/iscsi.c b/block/iscsi.c
index 4683f3b244..f9f910168d 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1001,7 +1001,7 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
 return NULL;
 }
 
-acb->task = malloc(sizeof(struct scsi_task));
+acb->task = g_new(struct scsi_task, 1);
 if (acb->task == NULL) {
 error_report("iSCSI: Failed to allocate task for scsi command. %s",
  iscsi_get_error(iscsi));
-- 
2.14.2




[Qemu-block] [PATCH 13/88] Dirty Bitmaps: use g_new() family of functions

2017-10-06 Thread Philippe Mathieu-Daudé
From: Marc-André Lureau 

Signed-off-by: Marc-André Lureau 
Signed-off-by: Philippe Mathieu-Daudé 
[PMD: squashed tests/test-hbitmap.c changes]
---
 tests/test-hbitmap.c | 2 +-
 util/hbitmap.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
index af41642346..fea3a64712 100644
--- a/tests/test-hbitmap.c
+++ b/tests/test-hbitmap.c
@@ -122,7 +122,7 @@ static void hbitmap_test_truncate_impl(TestHBitmapData 
*data,
 
 n = hbitmap_test_array_size(size);
 m = hbitmap_test_array_size(data->old_size);
-data->bits = g_realloc(data->bits, sizeof(unsigned long) * n);
+data->bits = g_renew(unsigned long, data->bits, n);
 if (n > m) {
 memset(>bits[m], 0x00, sizeof(unsigned long) * (n - m));
 }
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 2f9d0fdbd0..4eb0188836 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -668,7 +668,7 @@ void hbitmap_truncate(HBitmap *hb, uint64_t size)
 }
 old = hb->sizes[i];
 hb->sizes[i] = size;
-hb->levels[i] = g_realloc(hb->levels[i], size * sizeof(unsigned long));
+hb->levels[i] = g_renew(unsigned long, hb->levels[i], size);
 if (!shrink) {
 memset(>levels[i][old], 0x00,
(size - old) * sizeof(*hb->levels[i]));
-- 
2.14.2




Re: [Qemu-block] [Qemu-devel] [PULL 00/54] Block layer patches

2017-10-06 Thread Peter Maydell
On 6 October 2017 at 16:53, Kevin Wolf <kw...@redhat.com> wrote:
> The following changes since commit a26a98dfb9d448d7234d931ae3720feddf6f0651:
>
>   Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20171006' into 
> staging (2017-10-06 13:19:03 +0100)
>
> are available in the git repository at:
>
>   git://repo.or.cz/qemu/kevin.git tags/for-upstream
>
> for you to fetch changes up to fc3fd63fc0573ffd2ee569591a2e7f6c7310fd18:
>
>   Merge remote-tracking branch 'mreitz/tags/pull-block-2017-10-06' into 
> queue-block (2017-10-06 16:32:08 +0200)
>
> 
> Block layer patches
>
> 

Applied, thanks.

-- PMM



[Qemu-block] [PULL 53/54] qcow2: truncate the tail of the image file after shrinking the image

2017-10-06 Thread Kevin Wolf
From: Pavel Butsykin 

Now after shrinking the image, at the end of the image file, there might be a
tail that probably will never be used. So we can find the last used cluster and
cut the tail.

Signed-off-by: Pavel Butsykin 
Reviewed-by: John Snow 
Message-id: 20170929121613.25997-3-pbutsy...@virtuozzo.com
Signed-off-by: Max Reitz 
---
 block/qcow2.h  |  1 +
 block/qcow2-refcount.c | 22 ++
 block/qcow2.c  | 23 +++
 3 files changed, 46 insertions(+)

diff --git a/block/qcow2.h b/block/qcow2.h
index 5a289a81e2..782a206ecb 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -597,6 +597,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int 
refcount_order,
 BlockDriverAmendStatusCB *status_cb,
 void *cb_opaque, Error **errp);
 int qcow2_shrink_reftable(BlockDriverState *bs);
+int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size);
 
 /* qcow2-cluster.c functions */
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 88d5a3f1ad..aa3fd6cf17 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -3181,3 +3181,25 @@ out:
 g_free(reftable_tmp);
 return ret;
 }
+
+int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
+{
+BDRVQcow2State *s = bs->opaque;
+int64_t i;
+
+for (i = size_to_clusters(s, size) - 1; i >= 0; i--) {
+uint64_t refcount;
+int ret = qcow2_get_refcount(bs, i, );
+if (ret < 0) {
+fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
+i, strerror(-ret));
+return ret;
+}
+if (refcount > 0) {
+return i;
+}
+}
+qcow2_signal_corruption(bs, true, -1, -1,
+"There are no references in the refcount table.");
+return -EIO;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index 960b3ab977..f63d1831f8 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3107,6 +3107,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t 
offset,
 new_l1_size = size_to_l1(s, offset);
 
 if (offset < old_length) {
+int64_t last_cluster, old_file_size;
 if (prealloc != PREALLOC_MODE_OFF) {
 error_setg(errp,
"Preallocation can't be used for shrinking an image");
@@ -3135,6 +3136,28 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t 
offset,
  "Failed to discard unused refblocks");
 return ret;
 }
+
+old_file_size = bdrv_getlength(bs->file->bs);
+if (old_file_size < 0) {
+error_setg_errno(errp, -old_file_size,
+ "Failed to inquire current file length");
+return old_file_size;
+}
+last_cluster = qcow2_get_last_cluster(bs, old_file_size);
+if (last_cluster < 0) {
+error_setg_errno(errp, -last_cluster,
+ "Failed to find the last cluster");
+return last_cluster;
+}
+if ((last_cluster + 1) * s->cluster_size < old_file_size) {
+ret = bdrv_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
+PREALLOC_MODE_OFF, NULL);
+if (ret < 0) {
+warn_report("Failed to truncate the tail of the image: %s",
+strerror(-ret));
+ret = 0;
+}
+}
 } else {
 ret = qcow2_grow_l1_table(bs, new_l1_size, true);
 if (ret < 0) {
-- 
2.13.6




[Qemu-block] [PULL 54/54] block/mirror: check backing in bdrv_mirror_top_flush

2017-10-06 Thread Kevin Wolf
From: Vladimir Sementsov-Ogievskiy 

Backing may be zero after failed bdrv_append in mirror_start_job,
which leads to SIGSEGV.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-id: 20170929152255.5431-1-vsement...@virtuozzo.com
Signed-off-by: Max Reitz 
---
 block/mirror.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/block/mirror.c b/block/mirror.c
index 3b6f0c5772..153758ca9f 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1041,6 +1041,10 @@ static int coroutine_fn 
bdrv_mirror_top_pwritev(BlockDriverState *bs,
 
 static int coroutine_fn bdrv_mirror_top_flush(BlockDriverState *bs)
 {
+if (bs->backing == NULL) {
+/* we can be here after failed bdrv_append in mirror_start_job */
+return 0;
+}
 return bdrv_co_flush(bs->backing->bs);
 }
 
-- 
2.13.6




[Qemu-block] [PULL 49/54] block: support passthrough of BDRV_REQ_FUA in crypto driver

2017-10-06 Thread Kevin Wolf
From: "Daniel P. Berrange" 

The BDRV_REQ_FUA flag can trivially be allowed in the crypt driver
as a passthrough to the underlying block driver.

Reviewed-by: Max Reitz 
Reviewed-by: Eric Blake 
Signed-off-by: Daniel P. Berrange 
Message-id: 20170927125340.12360-7-berra...@redhat.com
Signed-off-by: Max Reitz 
---
 block/crypto.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/block/crypto.c b/block/crypto.c
index edf53d49d1..60ddf8623e 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -279,6 +279,9 @@ static int block_crypto_open_generic(QCryptoBlockFormat 
format,
 return -EINVAL;
 }
 
+bs->supported_write_flags = BDRV_REQ_FUA &
+bs->file->bs->supported_write_flags;
+
 opts = qemu_opts_create(opts_spec, NULL, 0, _abort);
 qemu_opts_absorb_qdict(opts, options, _err);
 if (local_err) {
@@ -462,7 +465,7 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t 
offset, uint64_t bytes,
 uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
 uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
 
-assert(!flags);
+assert(!(flags & ~BDRV_REQ_FUA));
 assert(payload_offset < INT64_MAX);
 assert(QEMU_IS_ALIGNED(offset, sector_size));
 assert(QEMU_IS_ALIGNED(bytes, sector_size));
@@ -495,7 +498,7 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t 
offset, uint64_t bytes,
 qemu_iovec_add(_qiov, cipher_data, cur_bytes);
 
 ret = bdrv_co_pwritev(bs->file, payload_offset + offset + bytes_done,
-  cur_bytes, _qiov, 0);
+  cur_bytes, _qiov, flags);
 if (ret < 0) {
 goto cleanup;
 }
-- 
2.13.6




[Qemu-block] [PULL 52/54] qcow2: fix return error code in qcow2_truncate()

2017-10-06 Thread Kevin Wolf
From: Pavel Butsykin 

Signed-off-by: Pavel Butsykin 
Reviewed-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Max Reitz 
Message-id: 20170929121613.25997-2-pbutsy...@virtuozzo.com
Signed-off-by: Max Reitz 
---
 block/qcow2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 33597394b5..960b3ab977 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3167,7 +3167,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t 
offset,
 if (old_file_size < 0) {
 error_setg_errno(errp, -old_file_size,
  "Failed to inquire current file length");
-return ret;
+return old_file_size;
 }
 
 nb_new_data_clusters = DIV_ROUND_UP(offset - old_length,
@@ -3196,7 +3196,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t 
offset,
 if (allocation_start < 0) {
 error_setg_errno(errp, -allocation_start,
  "Failed to resize refcount structures");
-return -allocation_start;
+return allocation_start;
 }
 
 clusters_allocated = qcow2_alloc_clusters_at(bs, allocation_start,
-- 
2.13.6




[Qemu-block] [PULL 48/54] block: convert qcrypto_block_encrypt|decrypt to take bytes offset

2017-10-06 Thread Kevin Wolf
From: "Daniel P. Berrange" 

Instead of sector offset, take the bytes offset when encrypting
or decrypting data.

Signed-off-by: Daniel P. Berrange 
Message-id: 20170927125340.12360-6-berra...@redhat.com
Reviewed-by: Eric Blake 
Reviewed-by: Max Reitz 
Signed-off-by: Max Reitz 
---
 crypto/blockpriv.h |  4 ++--
 include/crypto/block.h | 14 --
 block/crypto.c | 12 
 block/qcow.c   | 11 +++
 block/qcow2-cluster.c  |  8 +++-
 block/qcow2.c  |  4 ++--
 crypto/block-luks.c| 12 
 crypto/block-qcow.c| 12 
 crypto/block.c | 20 ++--
 9 files changed, 56 insertions(+), 41 deletions(-)

diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h
index d227522d88..41840abcec 100644
--- a/crypto/blockpriv.h
+++ b/crypto/blockpriv.h
@@ -82,7 +82,7 @@ int qcrypto_block_decrypt_helper(QCryptoCipher *cipher,
  size_t niv,
  QCryptoIVGen *ivgen,
  int sectorsize,
- uint64_t startsector,
+ uint64_t offset,
  uint8_t *buf,
  size_t len,
  Error **errp);
@@ -91,7 +91,7 @@ int qcrypto_block_encrypt_helper(QCryptoCipher *cipher,
  size_t niv,
  QCryptoIVGen *ivgen,
  int sectorsize,
- uint64_t startsector,
+ uint64_t offset,
  uint8_t *buf,
  size_t len,
  Error **errp);
diff --git a/include/crypto/block.h b/include/crypto/block.h
index 13232b2472..cd18f46d56 100644
--- a/include/crypto/block.h
+++ b/include/crypto/block.h
@@ -161,18 +161,19 @@ QCryptoBlockInfo *qcrypto_block_get_info(QCryptoBlock 
*block,
 /**
  * @qcrypto_block_decrypt:
  * @block: the block encryption object
- * @startsector: the sector from which @buf was read
+ * @offset: the position at which @iov was read
  * @buf: the buffer to decrypt
  * @len: the length of @buf in bytes
  * @errp: pointer to a NULL-initialized error object
  *
  * Decrypt @len bytes of cipher text in @buf, writing
- * plain text back into @buf
+ * plain text back into @buf. @len and @offset must be
+ * a multiple of the encryption format sector size.
  *
  * Returns 0 on success, -1 on failure
  */
 int qcrypto_block_decrypt(QCryptoBlock *block,
-  uint64_t startsector,
+  uint64_t offset,
   uint8_t *buf,
   size_t len,
   Error **errp);
@@ -180,18 +181,19 @@ int qcrypto_block_decrypt(QCryptoBlock *block,
 /**
  * @qcrypto_block_encrypt:
  * @block: the block encryption object
- * @startsector: the sector to which @buf will be written
+ * @offset: the position at which @iov will be written
  * @buf: the buffer to decrypt
  * @len: the length of @buf in bytes
  * @errp: pointer to a NULL-initialized error object
  *
  * Encrypt @len bytes of plain text in @buf, writing
- * cipher text back into @buf
+ * cipher text back into @buf. @len and @offset must be
+ * a multiple of the encryption format sector size.
  *
  * Returns 0 on success, -1 on failure
  */
 int qcrypto_block_encrypt(QCryptoBlock *block,
-  uint64_t startsector,
+  uint64_t offset,
   uint8_t *buf,
   size_t len,
   Error **errp);
diff --git a/block/crypto.c b/block/crypto.c
index 965c173b01..edf53d49d1 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -398,7 +398,6 @@ block_crypto_co_preadv(BlockDriverState *bs, uint64_t 
offset, uint64_t bytes,
 int ret = 0;
 uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
 uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
-uint64_t sector_num = offset / sector_size;
 
 assert(!flags);
 assert(payload_offset < INT64_MAX);
@@ -430,15 +429,14 @@ block_crypto_co_preadv(BlockDriverState *bs, uint64_t 
offset, uint64_t bytes,
 goto cleanup;
 }
 
-if (qcrypto_block_decrypt(crypto->block, sector_num, cipher_data,
-  cur_bytes, NULL) < 0) {
+if (qcrypto_block_decrypt(crypto->block, offset + bytes_done,
+  cipher_data, cur_bytes, NULL) < 0) {
 ret = -EIO;
 goto cleanup;
 }
 
 qemu_iovec_from_buf(qiov, bytes_done, cipher_data, cur_bytes);
 
-sector_num += cur_bytes / sector_size;
 bytes -= cur_bytes;
 bytes_done += 

[Qemu-block] [PULL 50/54] block/mirror: check backing in bdrv_mirror_top_refresh_filename

2017-10-06 Thread Kevin Wolf
From: Vladimir Sementsov-Ogievskiy 

Backing may be zero after failed bdrv_attach_child in
bdrv_set_backing_hd, which leads to SIGSEGV.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-id: 20170928120300.58164-1-vsement...@virtuozzo.com
Reviewed-by: John Snow 
Signed-off-by: Max Reitz 
---
 block/mirror.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/block/mirror.c b/block/mirror.c
index 459b80f8f3..3b6f0c5772 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1058,6 +1058,11 @@ static int coroutine_fn 
bdrv_mirror_top_pdiscard(BlockDriverState *bs,
 
 static void bdrv_mirror_top_refresh_filename(BlockDriverState *bs, QDict *opts)
 {
+if (bs->backing == NULL) {
+/* we can be here after failed bdrv_attach_child in
+ * bdrv_set_backing_hd */
+return;
+}
 bdrv_refresh_filename(bs->backing->bs);
 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
 bs->backing->bs->filename);
-- 
2.13.6




[Qemu-block] [PULL 46/54] block: fix data type casting for crypto payload offset

2017-10-06 Thread Kevin Wolf
From: "Daniel P. Berrange" 

The crypto APIs report the offset of the data payload as an uint64_t
type, but the block driver is casting to size_t or ssize_t which will
potentially truncate.

Most of the block APIs use int64_t for offsets meanwhile, so even if
using uint64_t in the crypto block driver we are still at risk of
truncation.

Change the block crypto driver to use uint64_t, but add asserts that
the value is less than INT64_MAX.

Reviewed-by: Max Reitz 
Reviewed-by: Eric Blake 
Signed-off-by: Daniel P. Berrange 
Message-id: 20170927125340.12360-4-berra...@redhat.com
Signed-off-by: Max Reitz 
---
 block/crypto.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/block/crypto.c b/block/crypto.c
index 684cabeaf8..61f5d77bc0 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -364,8 +364,9 @@ static int block_crypto_truncate(BlockDriverState *bs, 
int64_t offset,
  PreallocMode prealloc, Error **errp)
 {
 BlockCrypto *crypto = bs->opaque;
-size_t payload_offset =
+uint64_t payload_offset =
 qcrypto_block_get_payload_offset(crypto->block);
+assert(payload_offset < (INT64_MAX - offset));
 
 offset += payload_offset;
 
@@ -395,8 +396,9 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t 
sector_num,
 uint8_t *cipher_data = NULL;
 QEMUIOVector hd_qiov;
 int ret = 0;
-size_t payload_offset =
+uint64_t payload_offset =
 qcrypto_block_get_payload_offset(crypto->block) / 512;
+assert(payload_offset < (INT64_MAX / 512));
 
 qemu_iovec_init(_qiov, qiov->niov);
 
@@ -462,8 +464,9 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t 
sector_num,
 uint8_t *cipher_data = NULL;
 QEMUIOVector hd_qiov;
 int ret = 0;
-size_t payload_offset =
+uint64_t payload_offset =
 qcrypto_block_get_payload_offset(crypto->block) / 512;
+assert(payload_offset < (INT64_MAX / 512));
 
 qemu_iovec_init(_qiov, qiov->niov);
 
@@ -524,7 +527,9 @@ static int64_t block_crypto_getlength(BlockDriverState *bs)
 BlockCrypto *crypto = bs->opaque;
 int64_t len = bdrv_getlength(bs->file->bs);
 
-ssize_t offset = qcrypto_block_get_payload_offset(crypto->block);
+uint64_t offset = qcrypto_block_get_payload_offset(crypto->block);
+assert(offset < INT64_MAX);
+assert(offset < len);
 
 len -= offset;
 
-- 
2.13.6




[Qemu-block] [PULL 43/54] iotests: Add test 197 for covering copy-on-read

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Add a test for qcow2 copy-on-read behavior, including exposure
for the just-fixed bugs.

The copy-on-read behavior is always to a qcow2 image, but the
test is careful to allow running with most image protocol/format
combos as the backing file being copied from (luks being the
exception, as it is harder to pass the right secret to all the
right places).  In fact, for './check nbd', this appears to be
the first time we've had a qcow2 image wrapping NBD, requiring
an additional line in _filter_img_create to match the similar
line in _filter_img_info.

Invoking blkdebug to prove we don't write too much took some
effort to get working; and it requires that $TEST_WRAP (based
on $TEST_DIR) not be subject to word splitting.  We may decide
later to have the entire iotests suite use relative rather than
absolute names, to avoid problems inherited by the absolute
name of $PWD or $TEST_DIR, at which point the sanity check in
this commit could be simplified.

This test requires at least 2G of consecutive memory to succeed;
as such, it is prone to spurious failures, particularly on
32-bit machines under load.  This situation is detected and
triggers an early exit to skip the test, rather than a failure.
To manually provoke this setup on a beefier machine, I used:
  $ (ulimit -S -v 100; ./check -qcow2 197)

Signed-off-by: Eric Blake 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/197   | 109 +++
 tests/qemu-iotests/197.out   |  26 ++
 tests/qemu-iotests/common.filter |   1 +
 tests/qemu-iotests/group |   1 +
 4 files changed, 137 insertions(+)
 create mode 100755 tests/qemu-iotests/197
 create mode 100644 tests/qemu-iotests/197.out

diff --git a/tests/qemu-iotests/197 b/tests/qemu-iotests/197
new file mode 100755
index 00..887eb4f496
--- /dev/null
+++ b/tests/qemu-iotests/197
@@ -0,0 +1,109 @@
+#!/bin/bash
+#
+# Test case for copy-on-read into qcow2
+#
+# Copyright (C) 2017 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+# creator
+owner=ebl...@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+status=1 # failure is the default!
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+TEST_WRAP="$TEST_DIR/t.wrap.qcow2"
+BLKDBG_CONF="$TEST_DIR/blkdebug.conf"
+
+# Sanity check: our use of blkdebug fails if $TEST_DIR contains spaces
+# or other problems
+case "$TEST_DIR" in
+*[^-_a-zA-Z0-9/]*)
+_notrun "Suspicious TEST_DIR='$TEST_DIR', cowardly refusing to run" ;;
+esac
+
+_cleanup()
+{
+_cleanup_test_img
+rm -f "$BLKDBG_CONF"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# Test is supported for any backing file; but we force qcow2 for our wrapper.
+_supported_fmt generic
+_supported_proto generic
+_supported_os Linux
+# LUKS support may be possible, but it complicates things.
+_unsupported_fmt luks
+
+echo
+echo '=== Copy-on-read ==='
+echo
+
+# Prep the images
+_make_test_img 4G
+$QEMU_IO -c "write -P 55 3G 1k" "$TEST_IMG" | _filter_qemu_io
+IMGPROTO=file IMGFMT=qcow2 IMGOPTS= TEST_IMG_FILE="$TEST_WRAP" \
+_make_test_img -F "$IMGFMT" -b "$TEST_IMG" | _filter_img_create
+$QEMU_IO -f qcow2 -c "write -z -u 1M 64k" "$TEST_WRAP" | _filter_qemu_io
+
+# Ensure that a read of two clusters, but where one is already allocated,
+# does not re-write the allocated cluster
+cat > "$BLKDBG_CONF" <&1 | _filter_qemu_io)
+case $output in
+*allocate*)
+_notrun "Insufficent memory to run test" ;;
+*) printf '%s\n' "$output" ;;
+esac
+$QEMU_IO -f qcow2 -C -c "read -P 0 $((3*1024*1024*1024 + 1024)) 1k" \
+"$TEST_WRAP" | _filter_qemu_io
+
+# Copy-on-read is incompatible with read-only
+$QEMU_IO -f qcow2 -C -r "$TEST_WRAP" 2>&1 | _filter_testdir
+
+# Break the backing chain, and show that images are identical, and that
+# we properly copied over explicit zeros.
+$QEMU_IMG rebase -u -b "" -f qcow2 "$TEST_WRAP"
+$QEMU_IO -f qcow2 -c map "$TEST_WRAP"
+_check_test_img
+$QEMU_IMG compare -f $IMGFMT -F qcow2 "$TEST_IMG" "$TEST_WRAP"
+
+# success, all done
+echo '*** done'
+status=0
diff --git a/tests/qemu-iotests/197.out b/tests/qemu-iotests/197.out
new file mode 100644
index 00..52b4137d7b
--- /dev/null
+++ b/tests/qemu-iotests/197.out
@@ -0,0 +1,26 @@
+QA output created 

[Qemu-block] [PULL 42/54] block: Perform copy-on-read in loop

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Improve our braindead copy-on-read implementation.  Pre-patch,
we have multiple issues:
- we create a bounce buffer and perform a write for the entire
request, even if the active image already has 99% of the
clusters occupied, and really only needs to copy-on-read the
remaining 1% of the clusters
- our bounce buffer was as large as the read request, and can
needlessly exhaust our memory by using double the memory of
the request size (the original request plus our bounce buffer),
rather than a capped maximum overhead beyond the original
- if a driver has a max_transfer limit, we are bypassing the
normal code in bdrv_aligned_preadv() that fragments to that
limit, and instead attempt to read the entire buffer from the
driver in one go, which some drivers may assert on
- a client can request a large request of nearly 2G such that
rounding the request out to cluster boundaries results in a
byte count larger than 2G.  While this cannot exceed 32 bits,
it DOES have some follow-on problems:
-- the call to bdrv_driver_pread() can assert for exceeding
BDRV_REQUEST_MAX_BYTES, if the driver is old and lacks
.bdrv_co_preadv
-- if the buffer is all zeroes, the subsequent call to
bdrv_co_do_pwrite_zeroes is a no-op due to a negative size,
which means we did not actually copy on read

Fix all of these issues by breaking up the action into a loop,
where each iteration is capped to sane limits.  Also, querying
the allocation status allows us to optimize: when data is
already present in the active layer, we don't need to bounce.

Note that the code has a telling comment that copy-on-read
should probably be a filter driver rather than a bolt-on hack
in io.c; but that remains a task for another day.

CC: qemu-sta...@nongnu.org
Signed-off-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/io.c | 120 +
 1 file changed, 82 insertions(+), 38 deletions(-)

diff --git a/block/io.c b/block/io.c
index a5598ed869..8e419070b5 100644
--- a/block/io.c
+++ b/block/io.c
@@ -34,6 +34,9 @@
 
 #define NOT_DONE 0x7fff /* used while emulated sync operation in progress 
*/
 
+/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
+#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
+
 static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
 int64_t offset, int bytes, BdrvRequestFlags flags);
 
@@ -945,11 +948,14 @@ static int coroutine_fn 
bdrv_co_do_copy_on_readv(BdrvChild *child,
 
 BlockDriver *drv = bs->drv;
 struct iovec iov;
-QEMUIOVector bounce_qiov;
+QEMUIOVector local_qiov;
 int64_t cluster_offset;
 unsigned int cluster_bytes;
 size_t skip_bytes;
 int ret;
+int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
+BDRV_REQUEST_MAX_BYTES);
+unsigned int progress = 0;
 
 /* FIXME We cannot require callers to have write permissions when all they
  * are doing is a read request. If we did things right, write permissions
@@ -961,53 +967,95 @@ static int coroutine_fn 
bdrv_co_do_copy_on_readv(BdrvChild *child,
 // assert(child->perm & (BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE));
 
 /* Cover entire cluster so no additional backing file I/O is required when
- * allocating cluster in the image file.
+ * allocating cluster in the image file.  Note that this value may exceed
+ * BDRV_REQUEST_MAX_BYTES (even when the original read did not), which
+ * is one reason we loop rather than doing it all at once.
  */
 bdrv_round_to_clusters(bs, offset, bytes, _offset, _bytes);
+skip_bytes = offset - cluster_offset;
 
 trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
cluster_offset, cluster_bytes);
 
-iov.iov_len = cluster_bytes;
-iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
+bounce_buffer = qemu_try_blockalign(bs,
+MIN(MIN(max_transfer, cluster_bytes),
+MAX_BOUNCE_BUFFER));
 if (bounce_buffer == NULL) {
 ret = -ENOMEM;
 goto err;
 }
 
-qemu_iovec_init_external(_qiov, , 1);
+while (cluster_bytes) {
+int64_t pnum;
 
-ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes,
- _qiov, 0);
-if (ret < 0) {
-goto err;
-}
+ret = bdrv_is_allocated(bs, cluster_offset,
+MIN(cluster_bytes, max_transfer), );
+if (ret < 0) {
+/* Safe to treat errors in querying allocation as if
+ * unallocated; we'll probably fail again soon on the
+ * read, but at least that will set a decent errno.
+ */
+pnum = MIN(cluster_bytes, max_transfer);
+   

[Qemu-block] [PULL 37/54] commit: Remove overlay_bs

2017-10-06 Thread Kevin Wolf
We don't need to make any assumptions about the graph layout above the
top node of the commit operation any more. Remove the use of
bdrv_find_overlay() and related variables from the commit job code.

bdrv_drop_intermediate() doesn't use the 'active' parameter any more, so
we can just drop it.

The overlay node was previously added to the block job to get a
BLK_PERM_GRAPH_MOD. We really need to respect those permissions in
bdrv_drop_intermediate() now, but as long as we haven't figured out yet
how BLK_PERM_GRAPH_MOD is actually supposed to work, just leave a TODO
comment there.

With this change, it is now possible to perform another block job on an
overlay node without conflicts. qemu-iotests 030 is changed accordingly.

Signed-off-by: Kevin Wolf 
Reviewed-by: Eric Blake 
---
 include/block/block.h  |  3 +--
 block.c|  6 +++--
 block/commit.c | 62 --
 tests/qemu-iotests/030 |  4 
 4 files changed, 20 insertions(+), 55 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 3c3af462e4..d5c2731a03 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -315,8 +315,7 @@ int bdrv_commit(BlockDriverState *bs);
 int bdrv_change_backing_file(BlockDriverState *bs,
 const char *backing_file, const char *backing_fmt);
 void bdrv_register(BlockDriver *bdrv);
-int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
-   BlockDriverState *base,
+int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
const char *backing_file_str);
 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
 BlockDriverState *bs);
diff --git a/block.c b/block.c
index 1b098d4d09..46eb1728da 100644
--- a/block.c
+++ b/block.c
@@ -3491,8 +3491,8 @@ BlockDriverState *bdrv_find_base(BlockDriverState *bs)
  *  if active == top, that is considered an error
  *
  */
-int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
-   BlockDriverState *base, const char 
*backing_file_str)
+int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
+   const char *backing_file_str)
 {
 BdrvChild *c, *next;
 Error *local_err = NULL;
@@ -3510,6 +3510,8 @@ int bdrv_drop_intermediate(BlockDriverState *active, 
BlockDriverState *top,
 }
 
 /* success - we can delete the intermediate states, and link top->base */
+/* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once
+ * we've figured out how they should work. */
 backing_file_str = backing_file_str ? backing_file_str : base->filename;
 
 QLIST_FOREACH_SAFE(c, >parents, next_parent, next) {
diff --git a/block/commit.c b/block/commit.c
index 610e1cd8f5..5036eec434 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -36,13 +36,11 @@ enum {
 typedef struct CommitBlockJob {
 BlockJob common;
 RateLimit limit;
-BlockDriverState *active;
 BlockDriverState *commit_top_bs;
 BlockBackend *top;
 BlockBackend *base;
 BlockdevOnError on_error;
 int base_flags;
-int orig_overlay_flags;
 char *backing_file_str;
 } CommitBlockJob;
 
@@ -81,18 +79,15 @@ static void commit_complete(BlockJob *job, void *opaque)
 {
 CommitBlockJob *s = container_of(job, CommitBlockJob, common);
 CommitCompleteData *data = opaque;
-BlockDriverState *active = s->active;
 BlockDriverState *top = blk_bs(s->top);
 BlockDriverState *base = blk_bs(s->base);
-BlockDriverState *overlay_bs = bdrv_find_overlay(active, s->commit_top_bs);
+BlockDriverState *commit_top_bs = s->commit_top_bs;
 int ret = data->ret;
 bool remove_commit_top_bs = false;
 
-/* Make sure overlay_bs and top stay around until bdrv_set_backing_hd() */
+/* Make sure commit_top_bs and top stay around until bdrv_replace_node() */
 bdrv_ref(top);
-if (overlay_bs) {
-bdrv_ref(overlay_bs);
-}
+bdrv_ref(commit_top_bs);
 
 /* Remove base node parent that still uses BLK_PERM_WRITE/RESIZE before
  * the normal backing chain can be restored. */
@@ -100,9 +95,9 @@ static void commit_complete(BlockJob *job, void *opaque)
 
 if (!block_job_is_cancelled(>common) && ret == 0) {
 /* success */
-ret = bdrv_drop_intermediate(active, s->commit_top_bs, base,
+ret = bdrv_drop_intermediate(s->commit_top_bs, base,
  s->backing_file_str);
-} else if (overlay_bs) {
+} else {
 /* XXX Can (or should) we somehow keep 'consistent read' blocked even
  * after the failed/cancelled commit job is gone? If we already wrote
  * something to base, the intermediate images aren't valid any more. */
@@ -115,9 +110,6 @@ static void commit_complete(BlockJob *job, void *opaque)
 if (s->base_flags != 

[Qemu-block] [PULL 39/54] block: Uniform handling of 0-length bdrv_get_block_status()

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Handle a 0-length block status request up front, with a uniform
return value claiming the area is not allocated.

Most callers don't pass a length of 0 to bdrv_get_block_status()
and friends; but it definitely happens with a 0-length read when
copy-on-read is enabled.  While we could audit all callers to
ensure that they never make a 0-length request, and then assert
that fact, it was just as easy to fix things to always report
success (as long as the callers are careful to not go into an
infinite loop).  However, we had inconsistent behavior on whether
the status is reported as allocated or defers to the backing
layer, depending on what callbacks the driver implements, and
possibly wasting quite a few CPU cycles to get to that answer.
Consistently reporting unallocated up front doesn't really hurt
anything, and makes it easier both for callers (0-length requests
now have well-defined behavior) and for drivers (drivers don't
have to deal with 0-length requests).

Signed-off-by: Eric Blake 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/io.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/block/io.c b/block/io.c
index e0f904583f..94f74703b7 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1777,6 +1777,10 @@ static int64_t coroutine_fn 
bdrv_co_get_block_status(BlockDriverState *bs,
 *pnum = 0;
 return BDRV_BLOCK_EOF;
 }
+if (!nb_sectors) {
+*pnum = 0;
+return 0;
+}
 
 n = total_sectors - sector_num;
 if (n < nb_sectors) {
-- 
2.13.6




[Qemu-block] [PULL 41/54] block: Add blkdebug hook for copy-on-read

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Make it possible to inject errors on writes performed during a
read operation due to copy-on-read semantics.

Signed-off-by: Eric Blake 
Reviewed-by: Jeff Cody 
Reviewed-by: Kevin Wolf 
Reviewed-by: John Snow 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 qapi/block-core.json | 5 -
 block/io.c   | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 750bb0c77c..ab96e348e6 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2538,6 +2538,8 @@
 #
 # @l1_shrink_free_l2_clusters: discard the l2 tables. (since 2.11)
 #
+# @cor_write: a write due to copy-on-read (since 2.11)
+#
 # Since: 2.9
 ##
 { 'enum': 'BlkdebugEvent', 'prefix': 'BLKDBG',
@@ -2555,7 +2557,8 @@
 'flush_to_disk', 'pwritev_rmw_head', 'pwritev_rmw_after_head',
 'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev',
 'pwritev_zero', 'pwritev_done', 'empty_image_prepare',
-'l1_shrink_write_table', 'l1_shrink_free_l2_clusters' ] }
+'l1_shrink_write_table', 'l1_shrink_free_l2_clusters',
+'cor_write'] }
 
 ##
 # @BlkdebugInjectErrorOptions:
diff --git a/block/io.c b/block/io.c
index 94f74703b7..a5598ed869 100644
--- a/block/io.c
+++ b/block/io.c
@@ -983,6 +983,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild 
*child,
 goto err;
 }
 
+bdrv_debug_event(bs, BLKDBG_COR_WRITE);
 if (drv->bdrv_co_pwrite_zeroes &&
 buffer_is_zero(bounce_buffer, iov.iov_len)) {
 /* FIXME: Should we (perhaps conditionally) be setting
-- 
2.13.6




[Qemu-block] [PULL 47/54] block: convert crypto driver to bdrv_co_preadv|pwritev

2017-10-06 Thread Kevin Wolf
From: "Daniel P. Berrange" 

Make the crypto driver implement the bdrv_co_preadv|pwritev
callbacks, and also use bdrv_co_preadv|pwritev for I/O
with the protocol driver beneath. This replaces sector based
I/O with byte based I/O, and allows us to stop assuming the
physical sector size matches the encryption sector size.

Signed-off-by: Daniel P. Berrange 
Message-id: 20170927125340.12360-5-berra...@redhat.com
Reviewed-by: Eric Blake 
Reviewed-by: Max Reitz 
Signed-off-by: Max Reitz 
---
 block/crypto.c | 106 +
 1 file changed, 54 insertions(+), 52 deletions(-)

diff --git a/block/crypto.c b/block/crypto.c
index 61f5d77bc0..965c173b01 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -387,18 +387,23 @@ static void block_crypto_close(BlockDriverState *bs)
 #define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024)
 
 static coroutine_fn int
-block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num,
-  int remaining_sectors, QEMUIOVector *qiov)
+block_crypto_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+   QEMUIOVector *qiov, int flags)
 {
 BlockCrypto *crypto = bs->opaque;
-int cur_nr_sectors; /* number of sectors in current iteration */
+uint64_t cur_bytes; /* number of bytes in current iteration */
 uint64_t bytes_done = 0;
 uint8_t *cipher_data = NULL;
 QEMUIOVector hd_qiov;
 int ret = 0;
-uint64_t payload_offset =
-qcrypto_block_get_payload_offset(crypto->block) / 512;
-assert(payload_offset < (INT64_MAX / 512));
+uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
+uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
+uint64_t sector_num = offset / sector_size;
+
+assert(!flags);
+assert(payload_offset < INT64_MAX);
+assert(QEMU_IS_ALIGNED(offset, sector_size));
+assert(QEMU_IS_ALIGNED(bytes, sector_size));
 
 qemu_iovec_init(_qiov, qiov->niov);
 
@@ -413,37 +418,29 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t 
sector_num,
 goto cleanup;
 }
 
-while (remaining_sectors) {
-cur_nr_sectors = remaining_sectors;
-
-if (cur_nr_sectors > (BLOCK_CRYPTO_MAX_IO_SIZE / 512)) {
-cur_nr_sectors = (BLOCK_CRYPTO_MAX_IO_SIZE / 512);
-}
+while (bytes) {
+cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE);
 
 qemu_iovec_reset(_qiov);
-qemu_iovec_add(_qiov, cipher_data, cur_nr_sectors * 512);
+qemu_iovec_add(_qiov, cipher_data, cur_bytes);
 
-ret = bdrv_co_readv(bs->file,
-payload_offset + sector_num,
-cur_nr_sectors, _qiov);
+ret = bdrv_co_preadv(bs->file, payload_offset + offset + bytes_done,
+ cur_bytes, _qiov, 0);
 if (ret < 0) {
 goto cleanup;
 }
 
-if (qcrypto_block_decrypt(crypto->block,
-  sector_num,
-  cipher_data, cur_nr_sectors * 512,
-  NULL) < 0) {
+if (qcrypto_block_decrypt(crypto->block, sector_num, cipher_data,
+  cur_bytes, NULL) < 0) {
 ret = -EIO;
 goto cleanup;
 }
 
-qemu_iovec_from_buf(qiov, bytes_done,
-cipher_data, cur_nr_sectors * 512);
+qemu_iovec_from_buf(qiov, bytes_done, cipher_data, cur_bytes);
 
-remaining_sectors -= cur_nr_sectors;
-sector_num += cur_nr_sectors;
-bytes_done += cur_nr_sectors * 512;
+sector_num += cur_bytes / sector_size;
+bytes -= cur_bytes;
+bytes_done += cur_bytes;
 }
 
  cleanup:
@@ -455,18 +452,23 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t 
sector_num,
 
 
 static coroutine_fn int
-block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num,
-   int remaining_sectors, QEMUIOVector *qiov)
+block_crypto_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+QEMUIOVector *qiov, int flags)
 {
 BlockCrypto *crypto = bs->opaque;
-int cur_nr_sectors; /* number of sectors in current iteration */
+uint64_t cur_bytes; /* number of bytes in current iteration */
 uint64_t bytes_done = 0;
 uint8_t *cipher_data = NULL;
 QEMUIOVector hd_qiov;
 int ret = 0;
-uint64_t payload_offset =
-qcrypto_block_get_payload_offset(crypto->block) / 512;
-assert(payload_offset < (INT64_MAX / 512));
+uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
+uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
+uint64_t sector_num = offset / sector_size;
+
+assert(!flags);
+assert(payload_offset < INT64_MAX);
+

[Qemu-block] [PULL 51/54] iotests: Fix 195 if IMGFMT is part of TEST_DIR

2017-10-06 Thread Kevin Wolf
From: Max Reitz 

do_run_qemu() in iotest 195 first applies _filter_imgfmt when printing
qemu's command line and _filter_testdir only afterwards.  Therefore, if
the image format is part of the test directory path, _filter_testdir
will no longer apply and the actual output will differ from the
reference output even in case of success.

For example, TEST_DIR might be "/tmp/test-qcow2", in which case
_filter_imgfmt first transforms this to "/tmp/test-IMGFMT" which is no
longer recognized as the TEST_DIR by _filter_testdir.

Fix this by not applying _filter_imgfmt in do_run_qemu() but in
run_qemu() instead, and only after _filter_testdir.

Signed-off-by: Max Reitz 
Message-id: 20170927211334.3988-1-mre...@redhat.com
Reviewed-by: Eric Blake 
Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/195 | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/195 b/tests/qemu-iotests/195
index 05a239cbf5..e7a403ded2 100755
--- a/tests/qemu-iotests/195
+++ b/tests/qemu-iotests/195
@@ -44,15 +44,16 @@ _supported_os Linux
 
 function do_run_qemu()
 {
-echo Testing: "$@" | _filter_imgfmt
+echo Testing: "$@"
 $QEMU -nographic -qmp-pretty stdio -serial none "$@"
 echo
 }
 
 function run_qemu()
 {
-do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp \
-  | _filter_qemu_io | _filter_generated_node_ids
+do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_imgfmt | _filter_qemu \
+  | _filter_qmp | _filter_qemu_io \
+  | _filter_generated_node_ids
 }
 
 size=64M
-- 
2.13.6




[Qemu-block] [PULL 44/54] block: use 1 MB bounce buffers for crypto instead of 16KB

2017-10-06 Thread Kevin Wolf
From: "Daniel P. Berrange" 

Using 16KB bounce buffers creates a significant performance
penalty for I/O to encrypted volumes on storage which high
I/O latency (rotating rust & network drives), because it
triggers lots of fairly small I/O operations.

On tests with rotating rust, and cache=none|directsync,
write speed increased from 2MiB/s to 32MiB/s, on a par
with that achieved by the in-kernel luks driver. With
other cache modes the in-kernel driver is still notably
faster because it is able to report completion of the
I/O request before any encryption is done, while the
in-QEMU driver must encrypt the data before completion.

Signed-off-by: Daniel P. Berrange 
Message-id: 20170927125340.12360-2-berra...@redhat.com
Reviewed-by: Eric Blake 
Reviewed-by: Max Reitz 
Signed-off-by: Max Reitz 
---
 block/crypto.c | 28 +++-
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/block/crypto.c b/block/crypto.c
index 58ef6f2f52..684cabeaf8 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -379,7 +379,11 @@ static void block_crypto_close(BlockDriverState *bs)
 }
 
 
-#define BLOCK_CRYPTO_MAX_SECTORS 32
+/*
+ * 1 MB bounce buffer gives good performance / memory tradeoff
+ * when using cache=none|directsync.
+ */
+#define BLOCK_CRYPTO_MAX_IO_SIZE (1024 * 1024)
 
 static coroutine_fn int
 block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num,
@@ -396,12 +400,11 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t 
sector_num,
 
 qemu_iovec_init(_qiov, qiov->niov);
 
-/* Bounce buffer so we have a linear mem region for
- * entire sector. XXX optimize so we avoid bounce
- * buffer in case that qiov->niov == 1
+/* Bounce buffer because we don't wish to expose cipher text
+ * in qiov which points to guest memory.
  */
 cipher_data =
-qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
+qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE,
   qiov->size));
 if (cipher_data == NULL) {
 ret = -ENOMEM;
@@ -411,8 +414,8 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t 
sector_num,
 while (remaining_sectors) {
 cur_nr_sectors = remaining_sectors;
 
-if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
-cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
+if (cur_nr_sectors > (BLOCK_CRYPTO_MAX_IO_SIZE / 512)) {
+cur_nr_sectors = (BLOCK_CRYPTO_MAX_IO_SIZE / 512);
 }
 
 qemu_iovec_reset(_qiov);
@@ -464,12 +467,11 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t 
sector_num,
 
 qemu_iovec_init(_qiov, qiov->niov);
 
-/* Bounce buffer so we have a linear mem region for
- * entire sector. XXX optimize so we avoid bounce
- * buffer in case that qiov->niov == 1
+/* Bounce buffer because we're not permitted to touch
+ * contents of qiov - it points to guest memory.
  */
 cipher_data =
-qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
+qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE,
   qiov->size));
 if (cipher_data == NULL) {
 ret = -ENOMEM;
@@ -479,8 +481,8 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t 
sector_num,
 while (remaining_sectors) {
 cur_nr_sectors = remaining_sectors;
 
-if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
-cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
+if (cur_nr_sectors > (BLOCK_CRYPTO_MAX_IO_SIZE / 512)) {
+cur_nr_sectors = (BLOCK_CRYPTO_MAX_IO_SIZE / 512);
 }
 
 qemu_iovec_to_buf(qiov, bytes_done,
-- 
2.13.6




[Qemu-block] [PULL 31/54] qemu-iotests: get rid of $iam

2017-10-06 Thread Kevin Wolf
From: Paolo Bonzini 

The variable is almost unused, and one of the two uses is actually
uninitialized.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Eric Blake 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/check | 5 +
 tests/qemu-iotests/common.rc | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index 2d2ef687ad..010bf48e92 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -30,12 +30,9 @@ interrupt=true
 # by default don't output timestamps
 timestamp=${TIMESTAMP:=false}
 
-# generic initialization
-iam=check
-
 _init_error()
 {
-echo "$iam: $1" >&2
+echo "check: $1" >&2
 exit 1
 }
 
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index f4dc0104e6..0e8a33c696 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -43,7 +43,7 @@ poke_file()
 
 if ! . ./common.config
 then
-echo "$iam: failed to source common.config"
+echo "$0: failed to source common.config"
 exit 1
 fi
 
-- 
2.13.6




[Qemu-block] [PULL 27/54] qemu-iotests: limit non-_PROG-suffixed variables to common.rc

2017-10-06 Thread Kevin Wolf
From: Paolo Bonzini 

These are never used by "check", with one exception that does not need
$QEMU_OPTIONS.  Keep them in common.rc, which will be soon included only
by the tests.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Eric Blake 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/039.out   | 10 +++---
 tests/qemu-iotests/061.out   |  4 +--
 tests/qemu-iotests/137.out   |  2 +-
 tests/qemu-iotests/common.config | 69 ++--
 tests/qemu-iotests/common.rc | 65 +
 5 files changed, 75 insertions(+), 75 deletions(-)

diff --git a/tests/qemu-iotests/039.out b/tests/qemu-iotests/039.out
index c6e0ac2da3..724d7b2508 100644
--- a/tests/qemu-iotests/039.out
+++ b/tests/qemu-iotests/039.out
@@ -11,7 +11,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-./common.config: Killed  ( if [ "${VALGRIND_QEMU}" == "y" ]; 
then
+./common.rc: Killed  ( if [ "${VALGRIND_QEMU}" == "y" ]; then
 exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 
"$QEMU_IO_PROG" $QEMU_IO_ARGS "$@";
 else
 exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@";
@@ -50,7 +50,7 @@ read 512/512 bytes at offset 0
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-./common.config: Killed  ( if [ "${VALGRIND_QEMU}" == "y" ]; 
then
+./common.rc: Killed  ( if [ "${VALGRIND_QEMU}" == "y" ]; then
 exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 
"$QEMU_IO_PROG" $QEMU_IO_ARGS "$@";
 else
 exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@";
@@ -68,7 +68,7 @@ incompatible_features 0x0
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-./common.config: Killed  ( if [ "${VALGRIND_QEMU}" == "y" ]; 
then
+./common.rc: Killed  ( if [ "${VALGRIND_QEMU}" == "y" ]; then
 exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 
"$QEMU_IO_PROG" $QEMU_IO_ARGS "$@";
 else
 exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@";
@@ -91,7 +91,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-./common.config: Killed  ( if [ "${VALGRIND_QEMU}" == "y" ]; 
then
+./common.rc: Killed  ( if [ "${VALGRIND_QEMU}" == "y" ]; then
 exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 
"$QEMU_IO_PROG" $QEMU_IO_ARGS "$@";
 else
 exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@";
@@ -105,7 +105,7 @@ Data may be corrupted, or further writes to the image may 
corrupt it.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728
 wrote 512/512 bytes at offset 0
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-./common.config: Killed  ( if [ "${VALGRIND_QEMU}" == "y" ]; 
then
+./common.rc: Killed  ( if [ "${VALGRIND_QEMU}" == "y" ]; then
 exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 
"$QEMU_IO_PROG" $QEMU_IO_ARGS "$@";
 else
 exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@";
diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out
index a431b7f305..942485de99 100644
--- a/tests/qemu-iotests/061.out
+++ b/tests/qemu-iotests/061.out
@@ -57,7 +57,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 wrote 131072/131072 bytes at offset 0
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-./common.config: Killed  ( if [ "${VALGRIND_QEMU}" == "y" ]; 
then
+./common.rc: Killed  ( if [ "${VALGRIND_QEMU}" == "y" ]; then
 exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 
"$QEMU_IO_PROG" $QEMU_IO_ARGS "$@";
 else
 exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@";
@@ -219,7 +219,7 @@ No errors were found on the image.
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
 wrote 131072/131072 bytes at offset 0
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-./common.config: Killed  ( if [ "${VALGRIND_QEMU}" == "y" ]; 
then
+./common.rc: Killed  ( if [ "${VALGRIND_QEMU}" == "y" ]; then
 exec valgrind --log-file="${VALGRIND_LOGFILE}" --error-exitcode=99 
"$QEMU_IO_PROG" $QEMU_IO_ARGS "$@";
 else
 exec "$QEMU_IO_PROG" $QEMU_IO_ARGS "$@";
diff --git a/tests/qemu-iotests/137.out b/tests/qemu-iotests/137.out
index c0e753483b..05efd74d17 100644
--- a/tests/qemu-iotests/137.out
+++ b/tests/qemu-iotests/137.out
@@ -31,7 +31,7 @@ Cache clean interval too big
 

[Qemu-block] [PULL 30/54] qemu-iotests: fix uninitialized variable

2017-10-06 Thread Kevin Wolf
From: Paolo Bonzini 

The variable is used in "common" but defined only after the file
is sourced.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Eric Blake 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/check  | 2 --
 tests/qemu-iotests/common | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index e39680ade2..2d2ef687ad 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -77,8 +77,6 @@ fi
 
 TIMESTAMP_FILE=check.time-$IMGPROTO-$IMGFMT
 
-tmp="${TEST_DIR}"/$$
-
 _wallclock()
 {
 date "+%H %M %S" | awk '{ print $1*3600 + $2*60 + $3 }'
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index ee313af92f..365d3c4349 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -67,6 +67,8 @@ sortme=false
 expunge=true
 have_test_arg=false
 cachemode=false
+
+tmp="${TEST_DIR}"/$$
 rm -f $tmp.list $tmp.tmp $tmp.sed
 
 export IMGFMT=raw
-- 
2.13.6




[Qemu-block] [PULL 40/54] iotests: Restore stty settings on completion

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Executing qemu with a terminal as stdin will temporarily alter stty
settings on that terminal (for example, disabling echo), because of
how we run both the monitor and any multiplexing with guest input.
Normally, qemu restores the original settings on exit; but if an
iotest triggers qemu to abort in the middle, we can be left with
the altered terminal setup.  This can make life very annoying when
debugging an iotest failure (not everyone remembers the trick of
blind-typing 'stty sane' without echo, and some people prefer
terminal settings that are slightly different than the defaults
picked by 'stty sane').

It is possible to avoid qemu corrupting the terminal by not passing
a terminal to qemu's stdin in the first place (as in, use
'./check ... 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/check | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index 176cb8e937..e6b6ff7a04 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -134,6 +134,13 @@ export VALGRIND_QEMU=
 export IMGKEYSECRET=
 export IMGOPTSSYNTAX=false
 
+# Save current tty settings, since an aborting qemu call may leave things
+# screwed up
+STTY_RESTORE=
+if test -t 0; then
+STTY_RESTORE=$(stty -g)
+fi
+
 for r
 do
 
@@ -664,6 +671,9 @@ END{ if (NR > 0) {
 needwrap=false
 fi
 
+if test -n "$STTY_RESTORE"; then
+stty $STTY_RESTORE
+fi
 rm -f "${TEST_DIR}"/*.out "${TEST_DIR}"/*.err "${TEST_DIR}"/*.time
 rm -f "${TEST_DIR}"/check.pid "${TEST_DIR}"/check.sts
 rm -f $tmp.*
-- 
2.13.6




[Qemu-block] [PULL 36/54] qemu-iotests: Test commit block job where top has two parents

2017-10-06 Thread Kevin Wolf
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/191 | 153 +
 tests/qemu-iotests/191.out | 827 +
 tests/qemu-iotests/group   |   1 +
 3 files changed, 981 insertions(+)
 create mode 100755 tests/qemu-iotests/191
 create mode 100644 tests/qemu-iotests/191.out

diff --git a/tests/qemu-iotests/191 b/tests/qemu-iotests/191
new file mode 100755
index 00..ac2b88fd78
--- /dev/null
+++ b/tests/qemu-iotests/191
@@ -0,0 +1,153 @@
+#!/bin/bash
+#
+# Test commit block job where top has two parents
+#
+# Copyright (C) 2017 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+# creator
+owner=kw...@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+status=1 # failure is the default!
+
+MIG_SOCKET="${TEST_DIR}/migrate"
+
+_cleanup()
+{
+rm -f "${TEST_IMG}.mid"
+rm -f "${TEST_IMG}.ovl2"
+rm -f "${TEST_IMG}.ovl3"
+_cleanup_test_img
+_cleanup_qemu
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+. ./common.qemu
+
+_supported_fmt qcow2
+_unsupported_imgopts compat=0.10
+_supported_proto file
+_supported_os Linux
+
+size=64M
+
+echo
+echo === Preparing and starting VM ===
+echo
+
+TEST_IMG="${TEST_IMG}.base" _make_test_img $size
+TEST_IMG="${TEST_IMG}.mid" _make_test_img -b "${TEST_IMG}.base"
+_make_test_img -b "${TEST_IMG}.mid"
+TEST_IMG="${TEST_IMG}.ovl2" _make_test_img -b "${TEST_IMG}.mid"
+
+$QEMU_IO -c 'write -P 0x55 1M 64k' "${TEST_IMG}.mid" | _filter_qemu_io
+
+qemu_comm_method="qmp"
+qmp_pretty="y"
+
+_launch_qemu \
+-blockdev 
"driver=${IMGFMT},file.driver=file,file.filename=${TEST_IMG}.base,node-name=base"
 \
+-blockdev 
"driver=${IMGFMT},file.driver=file,file.filename=${TEST_IMG}.mid,node-name=mid,backing=base"
 \
+-blockdev 
"driver=${IMGFMT},file.driver=file,file.filename=${TEST_IMG},node-name=top,backing=mid"
 \
+-blockdev 
"driver=${IMGFMT},file.driver=file,file.filename=${TEST_IMG}.ovl2,node-name=top2,backing=mid"
+h=$QEMU_HANDLE
+_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" '^}'
+
+echo
+echo === Perform commit job ===
+echo
+
+_send_qemu_cmd $h \
+"{ 'execute': 'block-commit',
+   'arguments': { 'job-id': 'commit0',
+  'device': 'top',
+  'base':'$TEST_IMG.base',
+  'top': '$TEST_IMG.mid' } }" \
+"BLOCK_JOB_COMPLETED"
+_send_qemu_cmd $h "" "^}"
+
+echo
+echo === Check that both top and top2 point to base now ===
+echo
+
+_send_qemu_cmd $h "{ 'execute': 'query-named-block-nodes' }" "^}" |
+_filter_generated_node_ids
+
+_send_qemu_cmd $h "{ 'execute': 'quit' }" "^}"
+wait=1 _cleanup_qemu
+
+_img_info
+TEST_IMG="$TEST_IMG.ovl2" _img_info
+
+
+echo
+echo === Preparing and starting VM with -drive ===
+echo
+
+TEST_IMG="${TEST_IMG}.base" _make_test_img $size
+TEST_IMG="${TEST_IMG}.mid" _make_test_img -b "${TEST_IMG}.base"
+_make_test_img -b "${TEST_IMG}.mid"
+TEST_IMG="${TEST_IMG}.ovl2" _make_test_img -b "${TEST_IMG}.mid"
+TEST_IMG="${TEST_IMG}.ovl3" _make_test_img -b "${TEST_IMG}.ovl2"
+
+$QEMU_IO -c 'write -P 0x55 1M 64k' "${TEST_IMG}.mid" | _filter_qemu_io
+
+qemu_comm_method="qmp"
+qmp_pretty="y"
+
+_launch_qemu \
+-drive 
"driver=${IMGFMT},file=${TEST_IMG},node-name=top,backing.node-name=mid" \
+-drive 
"driver=${IMGFMT},file=${TEST_IMG}.ovl3,node-name=top2,backing.backing=mid"
+h=$QEMU_HANDLE
+_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" '^}'
+
+echo
+echo === Perform commit job ===
+echo
+
+_send_qemu_cmd $h \
+"{ 'execute': 'block-commit',
+   'arguments': { 'job-id': 'commit0',
+  'device': 'top',
+  'base':'$TEST_IMG.base',
+  'top': '$TEST_IMG.mid' } }" \
+"BLOCK_JOB_COMPLETED"
+_send_qemu_cmd $h "" "^}"
+
+echo
+echo === Check that both top and top2 point to base now ===
+echo
+
+_send_qemu_cmd $h "{ 'execute': 'query-named-block-nodes' }" "^}" |
+_filter_generated_node_ids
+
+_send_qemu_cmd $h "{ 'execute': 'quit' }" "^}"
+wait=1 _cleanup_qemu
+
+_img_info
+TEST_IMG="$TEST_IMG.ovl2" _img_info
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/191.out b/tests/qemu-iotests/191.out
new file mode 100644
index 00..7bfcd2d5d8
--- /dev/null
+++ 

[Qemu-block] [PULL 35/54] qemu-iotests: Allow QMP pretty printing in common.qemu

2017-10-06 Thread Kevin Wolf
QMP responses to certain commands can become quite long, which doesn't
only make reading them hard, but also means that the maximum line length
in patch emails can be exceeded. Allow tests to switch to QMP pretty
printing, which results in more, but shorter lines.

We also need to make sure to keep indentation in the response for this
to work as expected.

Signed-off-by: Kevin Wolf 
Reviewed-by: Eric Blake 
---
 tests/qemu-iotests/common.qemu | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu
index 9f9aecc9df..7b3052dc79 100644
--- a/tests/qemu-iotests/common.qemu
+++ b/tests/qemu-iotests/common.qemu
@@ -56,13 +56,13 @@ function _timed_wait_for()
 shift
 
 QEMU_STATUS[$h]=0
-while read -t ${QEMU_COMM_TIMEOUT} resp <&${QEMU_OUT[$h]}
+while IFS= read -t ${QEMU_COMM_TIMEOUT} resp <&${QEMU_OUT[$h]}
 do
 if [ -z "${silent}" ]; then
 echo "${resp}" | _filter_testdir | _filter_qemu \
| _filter_qemu_io | _filter_qmp | _filter_hmp
 fi
-grep -q "${*}" < <(echo ${resp})
+grep -q "${*}" < <(echo "${resp}")
 if [ $? -eq 0 ]; then
 return
 fi
@@ -130,6 +130,7 @@ function _send_qemu_cmd()
 # $qemu_comm_method: set this variable to 'monitor' (case insensitive)
 #to use the QEMU HMP monitor for communication.
 #Otherwise, the default of QMP is used.
+# $qmp_pretty: Set this variable to 'y' to enable QMP pretty printing.
 # $keep_stderr: Set this variable to 'y' to keep QEMU's stderr output on 
stderr.
 #   If this variable is empty, stderr will be redirected to stdout.
 # Returns:
@@ -146,7 +147,11 @@ function _launch_qemu()
 comm="-monitor stdio"
 else
 local qemu_comm_method="qmp"
-comm="-monitor none -qmp stdio"
+if [ "$qmp_pretty" = "y" ]; then
+comm="-monitor none -qmp-pretty stdio"
+else
+comm="-monitor none -qmp stdio"
+fi
 fi
 
 fifo_out=${QEMU_FIFO_OUT}_${_QEMU_HANDLE}
@@ -193,6 +198,9 @@ function _launch_qemu()
 then
 # Don't print response, since it has version information in it
 silent=yes _timed_wait_for ${_QEMU_HANDLE} "capabilities"
+if [ "$qmp_pretty" = "y" ]; then
+silent=yes _timed_wait_for ${_QEMU_HANDLE} "^}"
+fi
 fi
 QEMU_HANDLE=${_QEMU_HANDLE}
 let _QEMU_HANDLE++
-- 
2.13.6




[Qemu-block] [PULL 24/54] qemu-iotests: get rid of AWK_PROG

2017-10-06 Thread Kevin Wolf
From: Paolo Bonzini 

Signed-off-by: Paolo Bonzini 
Reviewed-by: Eric Blake 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/check | 4 ++--
 tests/qemu-iotests/common| 2 +-
 tests/qemu-iotests/common.config | 3 ---
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index 041780b09f..67da67bd54 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -128,7 +128,7 @@ tmp="${TEST_DIR}"/$$
 
 _wallclock()
 {
-date "+%H %M %S" | $AWK_PROG '{ print $1*3600 + $2*60 + $3 }'
+date "+%H %M %S" | awk '{ print $1*3600 + $2*60 + $3 }'
 }
 
 _timestamp()
@@ -147,7 +147,7 @@ _wrapup()
 if [ -f $TIMESTAMP_FILE -a -f $tmp.time ]
 then
 cat $TIMESTAMP_FILE $tmp.time \
-| $AWK_PROG '
+| awk '
 { t[$1] = $2 }
 END{ if (NR > 0) {
 for (i in t) print i " " t[i]
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index 867918895b..130f647a4d 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -366,7 +366,7 @@ testlist options
 if $xpand
 then
 have_test_arg=true
-$AWK_PROG 

[Qemu-block] [PULL 45/54] crypto: expose encryption sector size in APIs

2017-10-06 Thread Kevin Wolf
From: "Daniel P. Berrange" 

While current encryption schemes all have a fixed sector size of
512 bytes, this is not guaranteed to be the case in future. Expose
the sector size in the APIs so the block layer can remove assumptions
about fixed 512 byte sectors.

Reviewed-by: Max Reitz 
Reviewed-by: Eric Blake 
Signed-off-by: Daniel P. Berrange 
Message-id: 20170927125340.12360-3-berra...@redhat.com
Signed-off-by: Max Reitz 
---
 crypto/blockpriv.h |  1 +
 include/crypto/block.h | 15 +++
 crypto/block-luks.c|  6 --
 crypto/block-qcow.c|  1 +
 crypto/block.c |  6 ++
 5 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h
index 0edb810e22..d227522d88 100644
--- a/crypto/blockpriv.h
+++ b/crypto/blockpriv.h
@@ -36,6 +36,7 @@ struct QCryptoBlock {
 QCryptoHashAlgorithm kdfhash;
 size_t niv;
 uint64_t payload_offset; /* In bytes */
+uint64_t sector_size; /* In bytes */
 };
 
 struct QCryptoBlockDriver {
diff --git a/include/crypto/block.h b/include/crypto/block.h
index f0e543bee1..13232b2472 100644
--- a/include/crypto/block.h
+++ b/include/crypto/block.h
@@ -241,6 +241,21 @@ QCryptoHashAlgorithm 
qcrypto_block_get_kdf_hash(QCryptoBlock *block);
 uint64_t qcrypto_block_get_payload_offset(QCryptoBlock *block);
 
 /**
+ * qcrypto_block_get_sector_size:
+ * @block: the block encryption object
+ *
+ * Get the size of sectors used for payload encryption. A new
+ * IV is used at the start of each sector. The encryption
+ * sector size is not required to match the sector size of the
+ * underlying storage. For example LUKS will always use a 512
+ * byte sector size, even if the volume is on a disk with 4k
+ * sectors.
+ *
+ * Returns: the sector in bytes
+ */
+uint64_t qcrypto_block_get_sector_size(QCryptoBlock *block);
+
+/**
  * qcrypto_block_free:
  * @block: the block encryption object
  *
diff --git a/crypto/block-luks.c b/crypto/block-luks.c
index 36bc856084..a9062bb0f2 100644
--- a/crypto/block-luks.c
+++ b/crypto/block-luks.c
@@ -846,8 +846,9 @@ qcrypto_block_luks_open(QCryptoBlock *block,
 }
 }
 
+block->sector_size = QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
 block->payload_offset = luks->header.payload_offset *
-QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
+block->sector_size;
 
 luks->cipher_alg = cipheralg;
 luks->cipher_mode = ciphermode;
@@ -1240,8 +1241,9 @@ qcrypto_block_luks_create(QCryptoBlock *block,
QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)) *
  QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS);
 
+block->sector_size = QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
 block->payload_offset = luks->header.payload_offset *
-QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
+block->sector_size;
 
 /* Reserve header space to match payload offset */
 initfunc(block, block->payload_offset, opaque, _err);
diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c
index a456fe338b..4dd594a9ba 100644
--- a/crypto/block-qcow.c
+++ b/crypto/block-qcow.c
@@ -80,6 +80,7 @@ qcrypto_block_qcow_init(QCryptoBlock *block,
 goto fail;
 }
 
+block->sector_size = QCRYPTO_BLOCK_QCOW_SECTOR_SIZE;
 block->payload_offset = 0;
 
 return 0;
diff --git a/crypto/block.c b/crypto/block.c
index c382393d9a..a7a9ad240e 100644
--- a/crypto/block.c
+++ b/crypto/block.c
@@ -170,6 +170,12 @@ uint64_t qcrypto_block_get_payload_offset(QCryptoBlock 
*block)
 }
 
 
+uint64_t qcrypto_block_get_sector_size(QCryptoBlock *block)
+{
+return block->sector_size;
+}
+
+
 void qcrypto_block_free(QCryptoBlock *block)
 {
 if (!block) {
-- 
2.13.6




[Qemu-block] [PULL 29/54] qemu-iotests: disintegrate more parts of common.config

2017-10-06 Thread Kevin Wolf
From: Paolo Bonzini 

Split "check" parts from tests part.

For the directory setup, the actual computation of directories goes
in "check", while the sanity checks go in the tests.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Eric Blake 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/common| 24 
 tests/qemu-iotests/common.config | 49 
 tests/qemu-iotests/common.qemu   |  1 +
 tests/qemu-iotests/common.rc | 25 
 4 files changed, 50 insertions(+), 49 deletions(-)

diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index abacc24114..ee313af92f 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -48,6 +48,14 @@ set_prog_path()
 fi
 }
 
+if [ -z "$TEST_DIR" ]; then
+TEST_DIR=`pwd`/scratch
+fi
+
+if [ ! -e "$TEST_DIR" ]; then
+mkdir "$TEST_DIR"
+fi
+
 diff="diff -u"
 verbose=false
 debug=false
@@ -440,6 +448,13 @@ if [ "$IMGFMT" == "luks" ] && ! (echo "$IMGOPTS" | grep 
"iter-time=" > /dev/null
 IMGOPTS=$(_optstr_add "$IMGOPTS" "iter-time=10")
 fi
 
+if [ -z "$SAMPLE_IMG_DIR" ]; then
+SAMPLE_IMG_DIR="$source_iotests/sample_images"
+fi
+
+export TEST_DIR
+export SAMPLE_IMG_DIR
+
 if [ -s $tmp.list ]
 then
 # found some valid test numbers ... this is good
@@ -524,3 +539,12 @@ if [ -x "$build_iotests/socket_scm_helper" ]
 then
 export SOCKET_SCM_HELPER="$build_iotests/socket_scm_helper"
 fi
+
+default_machine=$($QEMU_PROG -machine help | sed -n '/(default)/ s/ .*//p')
+default_alias_machine=$($QEMU_PROG -machine help | \
+   sed -n "/(alias of $default_machine)/ { s/ .*//p; q; }")
+if [[ "$default_alias_machine" ]]; then
+default_machine="$default_alias_machine"
+fi
+
+export QEMU_DEFAULT_MACHINE="$default_machine"
diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config
index c7f0a7a7e0..cdcda54546 100644
--- a/tests/qemu-iotests/common.config
+++ b/tests/qemu-iotests/common.config
@@ -26,8 +26,6 @@ arch=`uname -m`
 
 export PWD=`pwd`
 
-export _QEMU_HANDLE=0
-
 # make sure we have a standard umask
 umask 022
 
@@ -40,52 +38,5 @@ _optstr_add()
 fi
 }
 
-QEMU_IMG_EXTRA_ARGS=
-if [ "$IMGOPTSSYNTAX" = "true" ]; then
-QEMU_IMG_EXTRA_ARGS="--image-opts $QEMU_IMG_EXTRA_ARGS"
-if [ -n "$IMGKEYSECRET" ]; then
-QEMU_IMG_EXTRA_ARGS="--object secret,id=keysec0,data=$IMGKEYSECRET 
$QEMU_IMG_EXTRA_ARGS"
-fi
-fi
-export QEMU_IMG_EXTRA_ARGS
-
-
-default_machine=$($QEMU_PROG -machine help | sed -n '/(default)/ s/ .*//p')
-default_alias_machine=$($QEMU_PROG -machine help | \
-   sed -n "/(alias of $default_machine)/ { s/ .*//p; q; }")
-if [[ "$default_alias_machine" ]]; then
-default_machine="$default_alias_machine"
-fi
-
-export QEMU_DEFAULT_MACHINE="$default_machine"
-
-if [ -z "$TEST_DIR" ]; then
-TEST_DIR=`pwd`/scratch
-fi
-
-QEMU_TEST_DIR="${TEST_DIR}"
-
-if [ ! -e "$TEST_DIR" ]; then
-mkdir "$TEST_DIR"
-fi
-
-if [ ! -d "$TEST_DIR" ]; then
-echo "common.config: Error: \$TEST_DIR ($TEST_DIR) is not a directory"
-exit 1
-fi
-
-export TEST_DIR
-
-if [ -z "$SAMPLE_IMG_DIR" ]; then
-SAMPLE_IMG_DIR="$source_iotests/sample_images"
-fi
-
-if [ ! -d "$SAMPLE_IMG_DIR" ]; then
-echo "common.config: Error: \$SAMPLE_IMG_DIR ($SAMPLE_IMG_DIR) is not a 
directory"
-exit 1
-fi
-
-export SAMPLE_IMG_DIR
-
 # make sure this script returns success
 true
diff --git a/tests/qemu-iotests/common.qemu b/tests/qemu-iotests/common.qemu
index 7645f1dc72..9f9aecc9df 100644
--- a/tests/qemu-iotests/common.qemu
+++ b/tests/qemu-iotests/common.qemu
@@ -31,6 +31,7 @@ QEMU_FIFO_IN="${QEMU_TEST_DIR}/qmp-in-$$"
 QEMU_FIFO_OUT="${QEMU_TEST_DIR}/qmp-out-$$"
 
 QEMU_HANDLE=0
+export _QEMU_HANDLE=0
 
 # If bash version is >= 4.1, these will be overwritten and dynamic
 # file descriptor values assigned.
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 20f6821a69..f4dc0104e6 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -114,6 +114,10 @@ export QEMU_VXHS=_qemu_vxhs_wrapper
 
 if [ "$IMGOPTSSYNTAX" = "true" ]; then
 DRIVER="driver=$IMGFMT"
+QEMU_IMG_EXTRA_ARGS="--image-opts $QEMU_IMG_EXTRA_ARGS"
+if [ -n "$IMGKEYSECRET" ]; then
+QEMU_IMG_EXTRA_ARGS="--object secret,id=keysec0,data=$IMGKEYSECRET 
$QEMU_IMG_EXTRA_ARGS"
+fi
 if [ "$IMGFMT" = "luks" ]; then
 DRIVER="$DRIVER,key-secret=keysec0"
 fi
@@ -133,6 +137,7 @@ if [ "$IMGOPTSSYNTAX" = "true" ]; then
 
TEST_IMG="$DRIVER,file.driver=$IMGPROTO,file.filename=$TEST_DIR/t.$IMGFMT"
 fi
 else
+QEMU_IMG_EXTRA_ARGS=
 if [ "$IMGPROTO" = "file" ]; then
 TEST_IMG=$TEST_DIR/t.$IMGFMT
 elif [ "$IMGPROTO" = "nbd" ]; then
@@ -153,6 +158,26 @@ else
 fi
 ORIG_TEST_IMG="$TEST_IMG"
 
+if [ -z "$TEST_DIR" ]; then
+TEST_DIR=`pwd`/scratch
+fi
+

[Qemu-block] [PULL 21/54] dirty-bitmap: Convert internal hbitmap size/granularity

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Now that all callers are using byte-based interfaces, there's no
reason for our internal hbitmap to remain with sector-based
granularity.  It also simplifies our internal scaling, since we
already know that hbitmap widens requests out to granularity
boundaries.

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/dirty-bitmap.c | 62 +++-
 1 file changed, 18 insertions(+), 44 deletions(-)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 58a3f330a9..bd04e991b1 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -38,7 +38,7 @@
  */
 struct BdrvDirtyBitmap {
 QemuMutex *mutex;
-HBitmap *bitmap;/* Dirty sector bitmap implementation */
+HBitmap *bitmap;/* Dirty bitmap implementation */
 HBitmap *meta;  /* Meta dirty bitmap */
 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
 char *name; /* Optional non-empty unique ID */
@@ -130,12 +130,7 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState 
*bs,
 }
 bitmap = g_new0(BdrvDirtyBitmap, 1);
 bitmap->mutex = >dirty_bitmap_mutex;
-/*
- * TODO - let hbitmap track full granularity. For now, it is tracking
- * only sector granularity, as a shortcut for our iterators.
- */
-bitmap->bitmap = hbitmap_alloc(DIV_ROUND_UP(bitmap_size, BDRV_SECTOR_SIZE),
-   ctz32(granularity) - BDRV_SECTOR_BITS);
+bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(granularity));
 bitmap->size = bitmap_size;
 bitmap->name = g_strdup(name);
 bitmap->disabled = false;
@@ -312,7 +307,7 @@ void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, 
int64_t bytes)
 QLIST_FOREACH(bitmap, >dirty_bitmaps, list) {
 assert(!bdrv_dirty_bitmap_frozen(bitmap));
 assert(!bitmap->active_iterators);
-hbitmap_truncate(bitmap->bitmap, DIV_ROUND_UP(bytes, 
BDRV_SECTOR_SIZE));
+hbitmap_truncate(bitmap->bitmap, bytes);
 bitmap->size = bytes;
 }
 bdrv_dirty_bitmaps_unlock(bs);
@@ -442,7 +437,7 @@ bool bdrv_get_dirty_locked(BlockDriverState *bs, 
BdrvDirtyBitmap *bitmap,
int64_t offset)
 {
 if (bitmap) {
-return hbitmap_get(bitmap->bitmap, offset >> BDRV_SECTOR_BITS);
+return hbitmap_get(bitmap->bitmap, offset);
 } else {
 return false;
 }
@@ -470,7 +465,7 @@ uint32_t 
bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
 
 uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap)
 {
-return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
+return 1U << hbitmap_granularity(bitmap->bitmap);
 }
 
 BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap)
@@ -503,20 +498,16 @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter)
 
 int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
 {
-int64_t ret = hbitmap_iter_next(>hbi);
-return ret < 0 ? -1 : ret * BDRV_SECTOR_SIZE;
+return hbitmap_iter_next(>hbi);
 }
 
 /* Called within bdrv_dirty_bitmap_lock..unlock */
 void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
   int64_t offset, int64_t bytes)
 {
-int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
-
 assert(bdrv_dirty_bitmap_enabled(bitmap));
 assert(!bdrv_dirty_bitmap_readonly(bitmap));
-hbitmap_set(bitmap->bitmap, offset >> BDRV_SECTOR_BITS,
-end_sector - (offset >> BDRV_SECTOR_BITS));
+hbitmap_set(bitmap->bitmap, offset, bytes);
 }
 
 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
@@ -531,12 +522,9 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
 void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
 int64_t offset, int64_t bytes)
 {
-int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
-
 assert(bdrv_dirty_bitmap_enabled(bitmap));
 assert(!bdrv_dirty_bitmap_readonly(bitmap));
-hbitmap_reset(bitmap->bitmap, offset >> BDRV_SECTOR_BITS,
-  end_sector - (offset >> BDRV_SECTOR_BITS));
+hbitmap_reset(bitmap->bitmap, offset, bytes);
 }
 
 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
@@ -556,8 +544,7 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, 
HBitmap **out)
 hbitmap_reset_all(bitmap->bitmap);
 } else {
 HBitmap *backup = bitmap->bitmap;
-bitmap->bitmap = hbitmap_alloc(DIV_ROUND_UP(bitmap->size,
-BDRV_SECTOR_SIZE),
+bitmap->bitmap = hbitmap_alloc(bitmap->size,
hbitmap_granularity(backup));
 *out = backup;
 }
@@ -576,51 +563,40 @@ 

[Qemu-block] [PULL 22/54] hw/block/onenand: Remove dead code block

2017-10-06 Thread Kevin Wolf
From: Thomas Huth 

The condition of the for-loop makes sure that b is always smaller
than s->blocks, so the "if (b >= s->blocks)" statement is completely
superfluous here.

Buglink: https://bugs.launchpad.net/qemu/+bug/1715007
Signed-off-by: Thomas Huth 
Reviewed-by: Laurent Vivier 
Signed-off-by: Kevin Wolf 
---
 hw/block/onenand.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/hw/block/onenand.c b/hw/block/onenand.c
index 30e40f3914..de65c9ebb9 100644
--- a/hw/block/onenand.c
+++ b/hw/block/onenand.c
@@ -520,10 +520,6 @@ static void onenand_command(OneNANDState *s)
 s->intstatus |= ONEN_INT;
 
 for (b = 0; b < s->blocks; b ++) {
-if (b >= s->blocks) {
-s->status |= ONEN_ERR_CMD;
-break;
-}
 if (s->blockwp[b] == ONEN_LOCK_LOCKTIGHTEN)
 break;
 
-- 
2.13.6




[Qemu-block] [PULL 20/54] dirty-bitmap: Switch bdrv_set_dirty() to bytes

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Both callers already had bytes available, but were scaling to
sectors.  Move the scaling to internal code.  In the case of
bdrv_aligned_pwritev(), we are now passing the exact offset
rather than a rounded sector-aligned value, but that's okay
as long as dirty bitmap widens start/bytes to granularity
boundaries.

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 include/block/block_int.h | 2 +-
 block/dirty-bitmap.c  | 7 ---
 block/io.c| 6 ++
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 99abe2ce74..79366b94b5 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1028,7 +1028,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force);
 bool blk_dev_is_tray_open(BlockBackend *blk);
 bool blk_dev_is_medium_locked(BlockBackend *blk);
 
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int64_t nr_sect);
+void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
 bool bdrv_requests_pending(BlockDriverState *bs);
 
 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 117837b3cc..58a3f330a9 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -628,10 +628,10 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap 
*bitmap)
 hbitmap_deserialize_finish(bitmap->bitmap);
 }
 
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
-int64_t nr_sectors)
+void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
 BdrvDirtyBitmap *bitmap;
+int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
 
 if (QLIST_EMPTY(>dirty_bitmaps)) {
 return;
@@ -643,7 +643,8 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t 
cur_sector,
 continue;
 }
 assert(!bdrv_dirty_bitmap_readonly(bitmap));
-hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+hbitmap_set(bitmap->bitmap, offset >> BDRV_SECTOR_BITS,
+end_sector - (offset >> BDRV_SECTOR_BITS));
 }
 bdrv_dirty_bitmaps_unlock(bs);
 }
diff --git a/block/io.c b/block/io.c
index d633b0f851..e0f904583f 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1334,7 +1334,6 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild 
*child,
 bool waited;
 int ret;
 
-int64_t start_sector = offset >> BDRV_SECTOR_BITS;
 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
 uint64_t bytes_remaining = bytes;
 int max_transfer;
@@ -1409,7 +1408,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild 
*child,
 bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
 
 atomic_inc(>write_gen);
-bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
+bdrv_set_dirty(bs, offset, bytes);
 
 stat64_max(>wr_highest_offset, offset + bytes);
 
@@ -2438,8 +2437,7 @@ int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, 
int64_t offset,
 ret = 0;
 out:
 atomic_inc(>write_gen);
-bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
-   req.bytes >> BDRV_SECTOR_BITS);
+bdrv_set_dirty(bs, req.offset, req.bytes);
 tracked_request_end();
 bdrv_dec_in_flight(bs);
 return ret;
-- 
2.13.6




[Qemu-block] [PULL 23/54] qemu-iotests: remove dead code

2017-10-06 Thread Kevin Wolf
From: Paolo Bonzini 

This includes shell function, shell variables and command line options
(randomize.awk does not exist).

Signed-off-by: Paolo Bonzini 
Reviewed-by: Eric Blake 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/check | 28 -
 tests/qemu-iotests/common| 23 --
 tests/qemu-iotests/common.config | 26 ---
 tests/qemu-iotests/common.rc | 68 
 4 files changed, 145 deletions(-)

diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index 4583a0c269..041780b09f 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -65,7 +65,6 @@ then
 export SOCKET_SCM_HELPER="$build_iotests/socket_scm_helper"
 fi
 
-# if ./qemu exists, it should be prioritized and will be chosen by 
common.config
 if [[ -z "$QEMU_PROG" && ! -x './qemu' ]]
 then
 arch=$(uname -m 2> /dev/null)
@@ -140,12 +139,6 @@ _timestamp()
 
 _wrapup()
 {
-# for hangcheck ...
-# remove files that were used by hangcheck
-#
-[ -f "${TEST_DIR}"/check.pid ] && rm -rf "${TEST_DIR}"/check.pid
-[ -f "${TEST_DIR}"/check.sts ] && rm -rf "${TEST_DIR}"/check.sts
-
 if $showme
 then
 :
@@ -201,24 +194,6 @@ END{ if (NR > 0) {
 
 trap "_wrapup; exit \$status" 0 1 2 3 15
 
-# for hangcheck ...
-# Save pid of check in a well known place, so that hangcheck can be sure it
-# has the right pid (getting the pid from ps output is not reliable enough).
-#
-rm -rf "${TEST_DIR}"/check.pid
-echo $$ > "${TEST_DIR}"/check.pid
-
-# for hangcheck ...
-# Save the status of check in a well known place, so that hangcheck can be
-# sure to know where check is up to (getting test number from ps output is
-# not reliable enough since the trace stuff has been introduced).
-#
-rm -rf "${TEST_DIR}"/check.sts
-echo "preamble" > "${TEST_DIR}"/check.sts
-
-# don't leave old full output behind on a clean run
-rm -f check.full
-
 [ -f $TIMESTAMP_FILE ] || touch $TIMESTAMP_FILE
 
 FULL_IMGFMT_DETAILS=`_full_imgfmt_details`
@@ -276,9 +251,6 @@ do
 fi
 rm -f core $seq.notrun
 
-# for hangcheck ...
-echo "$seq" > "${TEST_DIR}"/check.sts
-
 start=`_wallclock`
 $timestamp && printf %s "[$(date "+%T")]"
 
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index d34c11c056..867918895b 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -19,17 +19,6 @@
 # common procedures for QA scripts
 #
 
-_setenvironment()
-{
-MSGVERB="text:action"
-export MSGVERB
-}
-
-rm -f "$OUTPUT_DIR/$iam.out"
-_setenvironment
-
-check=${check-true}
-
 diff="diff -u"
 verbose=false
 debug=false
@@ -40,7 +29,6 @@ showme=false
 sortme=false
 expunge=true
 have_test_arg=false
-randomize=false
 cachemode=false
 rm -f $tmp.list $tmp.tmp $tmp.sed
 
@@ -170,7 +158,6 @@ other options
 -n  show me, do not run tests
 -o options  -o options to pass to qemu-img create/convert
 -T  output timestamps
--r  randomize test order
 -c mode cache mode
 
 testlist options
@@ -327,11 +314,6 @@ testlist options
 cachemode=true
 xpand=false
 ;;
--r)# randomize test order
-randomize=true
-xpand=false
-;;
-
 -T)# turn on timestamp output
 timestamp=true
 xpand=false
@@ -445,11 +427,6 @@ fi
 list=`sort $tmp.list`
 rm -f $tmp.list $tmp.tmp $tmp.sed
 
-if $randomize
-then
-list=`echo $list | awk -f randomize.awk`
-fi
-
 [ "$QEMU" = "" ] && _fatal "qemu not found"
 [ "$QEMU_IMG" = "" ] && _fatal "qemu-img not found"
 [ "$QEMU_IO" = "" ] && _fatal "qemu-io not found"
diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config
index e0883a0c65..b599c72211 100644
--- a/tests/qemu-iotests/common.config
+++ b/tests/qemu-iotests/common.config
@@ -15,33 +15,14 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see .
 #
-#
-# setup and check for config parameters, and in particular
-#
-# EMAIL -   email of the script runner.
-# TEST_DIR -scratch test directory
-#
-# - These can be added to $HOST_CONFIG_DIR (witch default to ./config)
-#   below or a separate local configuration file can be used (using
-#   the HOST_OPTIONS variable).
-# - This script is shared by the stress test system and the auto-qa
-#   system (includes both regression test and benchmark components).
-# - this script shouldn't make any assertions about filesystem
-#   validity or mountedness.
-#
-
 # all tests should use a common language setting to prevent golden
 # output mismatches.
 export LANG=C
 
 PATH=".:$PATH"
 
-HOST=`hostname -s 2> /dev/null`
 HOSTOS=`uname -s`
 

[Qemu-block] [PULL 38/54] qemu-io: Add -C for opening with copy-on-read

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Make it easier to enable copy-on-read during iotests, by
exposing a new bool option to main and open.

Signed-off-by: Eric Blake 
Reviewed-by: Jeff Cody 
Reviewed-by: Kevin Wolf 
Reviewed-by: John Snow 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 qemu-io.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/qemu-io.c b/qemu-io.c
index 265445ad89..c70bde3eb1 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -102,6 +102,7 @@ static void open_help(void)
 " Opens a file for subsequent use by all of the other qemu-io commands.\n"
 " -r, -- open file read-only\n"
 " -s, -- use snapshot file\n"
+" -C, -- use copy-on-read\n"
 " -n, -- disable host cache, short for -t none\n"
 " -U, -- force shared permissions\n"
 " -k, -- use kernel AIO implementation (on Linux only)\n"
@@ -120,7 +121,7 @@ static const cmdinfo_t open_cmd = {
 .argmin = 1,
 .argmax = -1,
 .flags  = CMD_NOFILE_OK,
-.args   = "[-rsnkU] [-t cache] [-d discard] [-o options] [path]",
+.args   = "[-rsCnkU] [-t cache] [-d discard] [-o options] [path]",
 .oneline= "open the file specified by path",
 .help   = open_help,
 };
@@ -145,7 +146,7 @@ static int open_f(BlockBackend *blk, int argc, char **argv)
 QDict *opts;
 bool force_share = false;
 
-while ((c = getopt(argc, argv, "snro:kt:d:U")) != -1) {
+while ((c = getopt(argc, argv, "snCro:kt:d:U")) != -1) {
 switch (c) {
 case 's':
 flags |= BDRV_O_SNAPSHOT;
@@ -154,6 +155,9 @@ static int open_f(BlockBackend *blk, int argc, char **argv)
 flags |= BDRV_O_NOCACHE;
 writethrough = false;
 break;
+case 'C':
+flags |= BDRV_O_COPY_ON_READ;
+break;
 case 'r':
 readonly = 1;
 break;
@@ -251,6 +255,7 @@ static void usage(const char *name)
 "  -r, --read-only  export read-only\n"
 "  -s, --snapshot   use snapshot file\n"
 "  -n, --nocachedisable host cache, short for -t none\n"
+"  -C, --copy-on-read   enable copy-on-read\n"
 "  -m, --misalign   misalign allocations for O_DIRECT\n"
 "  -k, --native-aio use kernel AIO implementation (on Linux only)\n"
 "  -t, --cache=MODE use the given cache mode for the image\n"
@@ -439,7 +444,7 @@ static QemuOptsList file_opts = {
 int main(int argc, char **argv)
 {
 int readonly = 0;
-const char *sopt = "hVc:d:f:rsnmkt:T:U";
+const char *sopt = "hVc:d:f:rsnCmkt:T:U";
 const struct option lopt[] = {
 { "help", no_argument, NULL, 'h' },
 { "version", no_argument, NULL, 'V' },
@@ -448,6 +453,7 @@ int main(int argc, char **argv)
 { "read-only", no_argument, NULL, 'r' },
 { "snapshot", no_argument, NULL, 's' },
 { "nocache", no_argument, NULL, 'n' },
+{ "copy-on-read", no_argument, NULL, 'C' },
 { "misalign", no_argument, NULL, 'm' },
 { "native-aio", no_argument, NULL, 'k' },
 { "discard", required_argument, NULL, 'd' },
@@ -492,6 +498,9 @@ int main(int argc, char **argv)
 flags |= BDRV_O_NOCACHE;
 writethrough = false;
 break;
+case 'C':
+flags |= BDRV_O_COPY_ON_READ;
+break;
 case 'd':
 if (bdrv_parse_discard_flags(optarg, ) < 0) {
 error_report("Invalid discard option: %s", optarg);
-- 
2.13.6




[Qemu-block] [PULL 14/54] dirty-bitmap: Change bdrv_get_dirty_locked() to take bytes

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Half the callers were already scaling bytes to sectors; the other
half can eventually be simplified to use byte iteration.  Both
callers were already using the result as a bool, so make that
explicit.  Making the change also makes it easier for a future
dirty-bitmap patch to offload scaling over to the internal hbitmap.

Remember, asking whether a byte is dirty is effectively asking
whether the entire granularity containing the byte is dirty, since
we only track dirtiness by granularity.

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Juan Quintela 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 include/block/dirty-bitmap.h | 4 ++--
 block/dirty-bitmap.c | 8 
 block/mirror.c   | 3 +--
 migration/block.c| 3 ++-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 757fc4c5b8..9e39537e4b 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -72,8 +72,8 @@ void bdrv_dirty_bitmap_set_persistance(BdrvDirtyBitmap 
*bitmap,
 /* Functions that require manual locking.  */
 void bdrv_dirty_bitmap_lock(BdrvDirtyBitmap *bitmap);
 void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap);
-int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
-  int64_t sector);
+bool bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+   int64_t offset);
 void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
   int64_t cur_sector, int64_t nr_sectors);
 void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 8322e23f0d..ad559c62b1 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -438,13 +438,13 @@ BlockDirtyInfoList 
*bdrv_query_dirty_bitmaps(BlockDriverState *bs)
 }
 
 /* Called within bdrv_dirty_bitmap_lock..unlock */
-int bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
-  int64_t sector)
+bool bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+   int64_t offset)
 {
 if (bitmap) {
-return hbitmap_get(bitmap->bitmap, sector);
+return hbitmap_get(bitmap->bitmap, offset >> BDRV_SECTOR_BITS);
 } else {
-return 0;
+return false;
 }
 }
 
diff --git a/block/mirror.c b/block/mirror.c
index c5212d2c8d..545dfc50aa 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -361,8 +361,7 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
 int64_t next_offset = offset + nb_chunks * s->granularity;
 int64_t next_chunk = next_offset / s->granularity;
 if (next_offset >= s->bdev_length ||
-!bdrv_get_dirty_locked(source, s->dirty_bitmap,
-   next_offset >> BDRV_SECTOR_BITS)) {
+!bdrv_get_dirty_locked(source, s->dirty_bitmap, next_offset)) {
 break;
 }
 if (test_bit(next_chunk, s->in_flight_bitmap)) {
diff --git a/migration/block.c b/migration/block.c
index cbcadce168..f5bcf5505c 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -535,7 +535,8 @@ static int mig_save_device_dirty(QEMUFile *f, 
BlkMigDevState *bmds,
 blk_mig_unlock();
 }
 bdrv_dirty_bitmap_lock(bmds->dirty_bitmap);
-if (bdrv_get_dirty_locked(bs, bmds->dirty_bitmap, sector)) {
+if (bdrv_get_dirty_locked(bs, bmds->dirty_bitmap,
+  sector * BDRV_SECTOR_SIZE)) {
 if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
 nr_sectors = total_sectors - sector;
 } else {
-- 
2.13.6




[Qemu-block] [PULL 13/54] dirty-bitmap: Change bdrv_get_dirty_count() to report bytes

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Thanks to recent cleanups, all callers were scaling a return value
of sectors into bytes; do the scaling internally instead.

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/dirty-bitmap.c |  4 ++--
 block/mirror.c   | 16 ++--
 migration/block.c|  2 +-
 3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index e451916187..8322e23f0d 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -423,7 +423,7 @@ BlockDirtyInfoList 
*bdrv_query_dirty_bitmaps(BlockDriverState *bs)
 QLIST_FOREACH(bm, >dirty_bitmaps, list) {
 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
-info->count = bdrv_get_dirty_count(bm) << BDRV_SECTOR_BITS;
+info->count = bdrv_get_dirty_count(bm);
 info->granularity = bdrv_dirty_bitmap_granularity(bm);
 info->has_name = !!bm->name;
 info->name = g_strdup(bm->name);
@@ -652,7 +652,7 @@ void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t 
offset)
 
 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
 {
-return hbitmap_count(bitmap->bitmap);
+return hbitmap_count(bitmap->bitmap) << BDRV_SECTOR_BITS;
 }
 
 int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap)
diff --git a/block/mirror.c b/block/mirror.c
index de0a02778c..c5212d2c8d 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -340,8 +340,7 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
 if (offset < 0) {
 bdrv_set_dirty_iter(s->dbi, 0);
 offset = bdrv_dirty_iter_next(s->dbi);
-trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap) *
-  BDRV_SECTOR_SIZE);
+trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
 assert(offset >= 0);
 }
 bdrv_dirty_bitmap_unlock(s->dirty_bitmap);
@@ -811,11 +810,10 @@ static void coroutine_fn mirror_run(void *opaque)
 
 cnt = bdrv_get_dirty_count(s->dirty_bitmap);
 /* s->common.offset contains the number of bytes already processed so
- * far, cnt is the number of dirty sectors remaining and
+ * far, cnt is the number of dirty bytes remaining and
  * s->bytes_in_flight is the number of bytes currently being
  * processed; together those are the current total operation length */
-s->common.len = s->common.offset + s->bytes_in_flight +
-cnt * BDRV_SECTOR_SIZE;
+s->common.len = s->common.offset + s->bytes_in_flight + cnt;
 
 /* Note that even when no rate limit is applied we need to yield
  * periodically with no pending I/O so that bdrv_drain_all() returns.
@@ -827,8 +825,7 @@ static void coroutine_fn mirror_run(void *opaque)
 s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
 if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
 (cnt == 0 && s->in_flight > 0)) {
-trace_mirror_yield(s, cnt * BDRV_SECTOR_SIZE,
-   s->buf_free_count, s->in_flight);
+trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
 mirror_wait_for_io(s);
 continue;
 } else if (cnt != 0) {
@@ -869,7 +866,7 @@ static void coroutine_fn mirror_run(void *opaque)
  * whether to switch to target check one last time if I/O has
  * come in the meanwhile, and if not flush the data to disk.
  */
-trace_mirror_before_drain(s, cnt * BDRV_SECTOR_SIZE);
+trace_mirror_before_drain(s, cnt);
 
 bdrv_drained_begin(bs);
 cnt = bdrv_get_dirty_count(s->dirty_bitmap);
@@ -888,8 +885,7 @@ static void coroutine_fn mirror_run(void *opaque)
 }
 
 ret = 0;
-trace_mirror_before_sleep(s, cnt * BDRV_SECTOR_SIZE,
-  s->synced, delay_ns);
+trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
 if (!s->synced) {
 block_job_sleep_ns(>common, QEMU_CLOCK_REALTIME, delay_ns);
 if (block_job_is_cancelled(>common)) {
diff --git a/migration/block.c b/migration/block.c
index 606ad4db92..cbcadce168 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -672,7 +672,7 @@ static int64_t get_remaining_dirty(void)
 aio_context_release(blk_get_aio_context(bmds->blk));
 }
 
-return dirty << BDRV_SECTOR_BITS;
+return dirty;
 }
 
 
-- 
2.13.6




[Qemu-block] [PULL 34/54] commit: Support multiple roots above top node

2017-10-06 Thread Kevin Wolf
This changes the commit block job to support operation in a graph where
there is more than a single active layer that references the top node.

This involves inserting the commit filter node not only on the path
between the given active node and the top node, but between the top node
and all of its parents.

On completion, bdrv_drop_intermediate() must consider all parents for
updating the backing file link. These parents may be backing files
themselves and as such read-only; reopen them temporarily if necessary.
Previously this was achieved by the bdrv_reopen() calls in the commit
block job that made overlay_bs read-write for the whole duration of the
block job, even though write access is only needed on completion.

Now that we consider all parents, overlay_bs is meaningless. It is left
in place in this commit, but we'll remove it soon.

Signed-off-by: Kevin Wolf 
---
 block.c| 68 ++
 block/commit.c |  2 +-
 2 files changed, 41 insertions(+), 29 deletions(-)

diff --git a/block.c b/block.c
index ab354ba82a..1b098d4d09 100644
--- a/block.c
+++ b/block.c
@@ -985,14 +985,26 @@ static int bdrv_backing_update_filename(BdrvChild *c, 
BlockDriverState *base,
 const char *filename, Error **errp)
 {
 BlockDriverState *parent = c->opaque;
+int orig_flags = bdrv_get_flags(parent);
 int ret;
 
+if (!(orig_flags & BDRV_O_RDWR)) {
+ret = bdrv_reopen(parent, orig_flags | BDRV_O_RDWR, errp);
+if (ret < 0) {
+return ret;
+}
+}
+
 ret = bdrv_change_backing_file(parent, filename,
base->drv ? base->drv->format_name : "");
 if (ret < 0) {
 error_setg_errno(errp, ret, "Could not update backing file link");
 }
 
+if (!(orig_flags & BDRV_O_RDWR)) {
+bdrv_reopen(parent, orig_flags, NULL);
+}
+
 return ret;
 }
 
@@ -3482,7 +3494,7 @@ BlockDriverState *bdrv_find_base(BlockDriverState *bs)
 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
BlockDriverState *base, const char 
*backing_file_str)
 {
-BlockDriverState *new_top_bs = NULL;
+BdrvChild *c, *next;
 Error *local_err = NULL;
 int ret = -EIO;
 
@@ -3492,42 +3504,42 @@ int bdrv_drop_intermediate(BlockDriverState *active, 
BlockDriverState *top,
 goto exit;
 }
 
-new_top_bs = bdrv_find_overlay(active, top);
-
-if (new_top_bs == NULL) {
-/* we could not find the image above 'top', this is an error */
-goto exit;
-}
-
-/* special case of new_top_bs->backing->bs already pointing to base - 
nothing
- * to do, no intermediate images */
-if (backing_bs(new_top_bs) == base) {
-ret = 0;
-goto exit;
-}
-
 /* Make sure that base is in the backing chain of top */
 if (!bdrv_chain_contains(top, base)) {
 goto exit;
 }
 
 /* success - we can delete the intermediate states, and link top->base */
-if (new_top_bs->backing->role->update_filename) {
-backing_file_str = backing_file_str ? backing_file_str : 
base->filename;
-ret = new_top_bs->backing->role->update_filename(new_top_bs->backing,
- base, 
backing_file_str,
- _err);
-if (ret < 0) {
-bdrv_set_backing_hd(new_top_bs, top, _abort);
+backing_file_str = backing_file_str ? backing_file_str : base->filename;
+
+QLIST_FOREACH_SAFE(c, >parents, next_parent, next) {
+/* Check whether we are allowed to switch c from top to base */
+GSList *ignore_children = g_slist_prepend(NULL, c);
+bdrv_check_update_perm(base, NULL, c->perm, c->shared_perm,
+   ignore_children, _err);
+if (local_err) {
+ret = -EPERM;
+error_report_err(local_err);
 goto exit;
 }
-}
+g_slist_free(ignore_children);
 
-bdrv_set_backing_hd(new_top_bs, base, _err);
-if (local_err) {
-ret = -EPERM;
-error_report_err(local_err);
-goto exit;
+/* If so, update the backing file path in the image file */
+if (c->role->update_filename) {
+ret = c->role->update_filename(c, base, backing_file_str,
+   _err);
+if (ret < 0) {
+bdrv_abort_perm_update(base);
+error_report_err(local_err);
+goto exit;
+}
+}
+
+/* Do the actual switch in the in-memory graph.
+ * Completes bdrv_check_update_perm() transaction internally. */
+bdrv_ref(base);
+bdrv_replace_child(c, base);
+bdrv_unref(top);
 }
 
 ret = 0;
diff --git a/block/commit.c b/block/commit.c
index 8f0e83578a..610e1cd8f5 100644
--- 

[Qemu-block] [PULL 12/54] dirty-bitmap: Change bdrv_dirty_iter_next() to report byte offset

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Thanks to recent cleanups, most callers were scaling a return value
of sectors into bytes (the exception, in qcow2-bitmap, will be
converted to byte-based iteration later).  Update the interface to
do the scaling internally instead.

In qcow2-bitmap, the code was specifically checking for an error
return of -1.  To avoid a regression, we either have to make sure
we continue to return -1 (rather than a scaled -512) on error, or
we have to fix the caller to treat all negative values as error
rather than just one magic value.  It's easy enough to make both
changes at the same time, even though either one in isolation
would work.

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/backup.c   | 2 +-
 block/dirty-bitmap.c | 3 ++-
 block/mirror.c   | 8 
 block/qcow2-bitmap.c | 2 +-
 4 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index ac9c018717..06ddbfd03d 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -375,7 +375,7 @@ static int coroutine_fn 
backup_run_incremental(BackupBlockJob *job)
 dbi = bdrv_dirty_iter_new(job->sync_bitmap);
 
 /* Find the next dirty sector(s) */
-while ((offset = bdrv_dirty_iter_next(dbi) * BDRV_SECTOR_SIZE) >= 0) {
+while ((offset = bdrv_dirty_iter_next(dbi)) >= 0) {
 cluster = offset / job->cluster_size;
 
 /* Fake progress updates for any clusters we skipped */
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 84509476ba..e451916187 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -503,7 +503,8 @@ void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter)
 
 int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
 {
-return hbitmap_iter_next(>hbi);
+int64_t ret = hbitmap_iter_next(>hbi);
+return ret < 0 ? -1 : ret * BDRV_SECTOR_SIZE;
 }
 
 /* Called within bdrv_dirty_bitmap_lock..unlock */
diff --git a/block/mirror.c b/block/mirror.c
index 0c705e0b4f..de0a02778c 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -336,10 +336,10 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
 int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES);
 
 bdrv_dirty_bitmap_lock(s->dirty_bitmap);
-offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
+offset = bdrv_dirty_iter_next(s->dbi);
 if (offset < 0) {
 bdrv_set_dirty_iter(s->dbi, 0);
-offset = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
+offset = bdrv_dirty_iter_next(s->dbi);
 trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap) *
   BDRV_SECTOR_SIZE);
 assert(offset >= 0);
@@ -370,11 +370,11 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
 break;
 }
 
-next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
+next_dirty = bdrv_dirty_iter_next(s->dbi);
 if (next_dirty > next_offset || next_dirty < 0) {
 /* The bitmap iterator's cache is stale, refresh it */
 bdrv_set_dirty_iter(s->dbi, next_offset);
-next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
+next_dirty = bdrv_dirty_iter_next(s->dbi);
 }
 assert(next_dirty == next_offset);
 nb_chunks++;
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 90756cf561..2d8dcba3e8 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -1109,7 +1109,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
 sbc = limit >> BDRV_SECTOR_BITS;
 assert(DIV_ROUND_UP(bm_size, limit) == tb_size);
 
-while ((sector = bdrv_dirty_iter_next(dbi)) != -1) {
+while ((sector = bdrv_dirty_iter_next(dbi) >> BDRV_SECTOR_BITS) >= 0) {
 uint64_t cluster = sector / sbc;
 uint64_t end, write_size;
 int64_t off;
-- 
2.13.6




[Qemu-block] [PULL 26/54] qemu-iotests: cleanup and fix search for programs

2017-10-06 Thread Kevin Wolf
From: Paolo Bonzini 

Instead of ./check failing when a binary is missing, we try each test
case now and each one fails with tons of test case diffs.  Also, all the
variables were initialized by "check" prior to "common" being sourced,
and then (uselessly) checked for emptiness again in "check".

Centralize the search for programs in "common" (which will soon be
one with "check"), including the "realpath" invocation which can be done
just once in "check" rather than in the tests.

For qnio_server, move the detection to "common", simplifying
set_prog_path to stop handling the unused second argument, and
embedding the "realpath" pass.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Eric Blake 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/check | 41 -
 tests/qemu-iotests/common| 77 +---
 tests/qemu-iotests/common.config | 61 +--
 3 files changed, 73 insertions(+), 106 deletions(-)

diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index 67da67bd54..03871d0681 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -60,47 +60,6 @@ fi
 
 build_root="$build_iotests/../.."
 
-if [ -x "$build_iotests/socket_scm_helper" ]
-then
-export SOCKET_SCM_HELPER="$build_iotests/socket_scm_helper"
-fi
-
-if [[ -z "$QEMU_PROG" && ! -x './qemu' ]]
-then
-arch=$(uname -m 2> /dev/null)
-
-if [[ -n $arch && -x "$build_root/$arch-softmmu/qemu-system-$arch" ]]
-then
-export QEMU_PROG="$build_root/$arch-softmmu/qemu-system-$arch"
-else
-pushd "$build_root" > /dev/null
-for binary in *-softmmu/qemu-system-*
-do
-if [ -x "$binary" ]
-then
-export QEMU_PROG="$build_root/$binary"
-break
-fi
-done
-popd > /dev/null
-fi
-fi
-
-if [[ -z $QEMU_IMG_PROG && -x "$build_root/qemu-img" && ! -x './qemu-img' ]]
-then
-export QEMU_IMG_PROG="$build_root/qemu-img"
-fi
-
-if [[ -z $QEMU_IO_PROG && -x "$build_root/qemu-io" && ! -x './qemu-io' ]]
-then
-export QEMU_IO_PROG="$build_root/qemu-io"
-fi
-
-if [[ -z $QEMU_NBD_PROG && -x "$build_root/qemu-nbd" && ! -x './qemu-nbd' ]]
-then
-export QEMU_NBD_PROG="$build_root/qemu-nbd"
-fi
-
 # we need common.env
 if ! . "$build_iotests/common.env"
 then
diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index 2e98e64d5c..abacc24114 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -37,6 +37,17 @@ _full_platform_details()
 echo "$os/$platform $host $kernel"
 }
 
+# $1 = prog to look for
+set_prog_path()
+{
+p=`command -v $1 2> /dev/null`
+if [ -n "$p" -a -x "$p" ]; then
+realpath -- "$(type -p "$p")"
+else
+return 1
+fi
+}
+
 diff="diff -u"
 verbose=false
 debug=false
@@ -450,10 +461,66 @@ fi
 list=`sort $tmp.list`
 rm -f $tmp.list $tmp.tmp $tmp.sed
 
-[ "$QEMU" = "" ] && _fatal "qemu not found"
-[ "$QEMU_IMG" = "" ] && _fatal "qemu-img not found"
-[ "$QEMU_IO" = "" ] && _fatal "qemu-io not found"
+if [ -z "$QEMU_PROG" ]
+then
+if [ -x "$build_iotests/qemu" ]; then
+export QEMU_PROG="$build_iotests/qemu"
+elif [ -x "$build_root/$arch-softmmu/qemu-system-$arch" ]; then
+export QEMU_PROG="$build_root/$arch-softmmu/qemu-system-$arch"
+else
+pushd "$build_root" > /dev/null
+for binary in *-softmmu/qemu-system-*
+do
+if [ -x "$binary" ]
+then
+export QEMU_PROG="$build_root/$binary"
+break
+fi
+done
+popd > /dev/null
+[ "$QEMU_PROG" = "" ] && _init_error "qemu not found"
+fi
+fi
+export QEMU_PROG=$(realpath -- "$(type -p "$QEMU_PROG")")
+
+if [ -z "$QEMU_IMG_PROG" ]; then
+if [ -x "$build_iotests/qemu-img" ]; then
+export QEMU_IMG_PROG="$build_iotests/qemu-img"
+elif [ -x "$build_root/qemu-img" ]; then
+export QEMU_IMG_PROG="$build_root/qemu-img"
+else
+_init_error "qemu-img not found"
+fi
+fi
+export QEMU_IMG_PROG=$(realpath -- "$(type -p "$QEMU_IMG_PROG")")
+
+if [ -z "$QEMU_IO_PROG" ]; then
+if [ -x "$build_iotests/qemu-io" ]; then
+export QEMU_IO_PROG="$build_iotests/qemu-io"
+elif [ -x "$build_root/qemu-io" ]; then
+export QEMU_IO_PROG="$build_root/qemu-io"
+else
+_init_error "qemu-io not found"
+fi
+fi
+export QEMU_IO_PROG=$(realpath -- "$(type -p "$QEMU_IO_PROG")")
 
-if [ "$IMGPROTO" = "nbd" ] ; then
-[ "$QEMU_NBD" = "" ] && _fatal "qemu-nbd not found"
+if [ -z $QEMU_NBD_PROG ]; then
+if [ -x "$build_iotests/qemu-nbd" ]; then
+export QEMU_NBD_PROG="$build_iotests/qemu-nbd"
+elif [ -x "$build_root/qemu-nbd" ]; then
+export QEMU_NBD_PROG="$build_root/qemu-nbd"
+else
+_init_error "qemu-nbd not found"
+fi
+fi

[Qemu-block] [PULL 32/54] qemu-iotests: merge "check" and "common"

2017-10-06 Thread Kevin Wolf
From: Paolo Bonzini 

"check" is full of qemu-iotests--specific details.  Separating it
from "common" does not make much sense anymore.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Eric Blake 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/check  | 533 +++-
 tests/qemu-iotests/common | 552 --
 2 files changed, 531 insertions(+), 554 deletions(-)
 delete mode 100644 tests/qemu-iotests/common

diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index 010bf48e92..176cb8e937 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -69,8 +69,537 @@ then
 _init_error "failed to source common.config"
 fi
 
-# we need common
-. "$source_iotests/common"
+_full_imgfmt_details()
+{
+if [ -n "$IMGOPTS" ]; then
+echo "$IMGFMT ($IMGOPTS)"
+else
+echo "$IMGFMT"
+fi
+}
+
+_full_platform_details()
+{
+os=`uname -s`
+host=`hostname -s`
+kernel=`uname -r`
+platform=`uname -m`
+echo "$os/$platform $host $kernel"
+}
+
+# $1 = prog to look for
+set_prog_path()
+{
+p=`command -v $1 2> /dev/null`
+if [ -n "$p" -a -x "$p" ]; then
+realpath -- "$(type -p "$p")"
+else
+return 1
+fi
+}
+
+if [ -z "$TEST_DIR" ]; then
+TEST_DIR=`pwd`/scratch
+fi
+
+if [ ! -e "$TEST_DIR" ]; then
+mkdir "$TEST_DIR"
+fi
+
+diff="diff -u"
+verbose=false
+debug=false
+group=false
+xgroup=false
+imgopts=false
+showme=false
+sortme=false
+expunge=true
+have_test_arg=false
+cachemode=false
+
+tmp="${TEST_DIR}"/$$
+rm -f $tmp.list $tmp.tmp $tmp.sed
+
+export IMGFMT=raw
+export IMGFMT_GENERIC=true
+export IMGPROTO=file
+export IMGOPTS=""
+export CACHEMODE="writeback"
+export QEMU_IO_OPTIONS=""
+export QEMU_IO_OPTIONS_NO_FMT=""
+export CACHEMODE_IS_DEFAULT=true
+export QEMU_OPTIONS="-nodefaults -machine accel=qtest"
+export VALGRIND_QEMU=
+export IMGKEYSECRET=
+export IMGOPTSSYNTAX=false
+
+for r
+do
+
+if $group
+then
+# arg after -g
+group_list=`sed -n <"$source_iotests/group" -e 's/$/ /' -e 
"/^[0-9][0-9][0-9].* $r /"'{
+s/ .*//p
+}'`
+if [ -z "$group_list" ]
+then
+echo "Group \"$r\" is empty or not defined?"
+exit 1
+fi
+[ ! -s $tmp.list ] && touch $tmp.list
+for t in $group_list
+do
+if grep -s "^$t\$" $tmp.list >/dev/null
+then
+:
+else
+echo "$t" >>$tmp.list
+fi
+done
+group=false
+continue
+
+elif $xgroup
+then
+# arg after -x
+# Populate $tmp.list with all tests
+awk '/^[0-9]{3,}/ {print $1}' "${source_iotests}/group" > $tmp.list 
2>/dev/null
+group_list=`sed -n <"$source_iotests/group" -e 's/$/ /' -e 
"/^[0-9][0-9][0-9].* $r /"'{
+s/ .*//p
+}'`
+if [ -z "$group_list" ]
+then
+echo "Group \"$r\" is empty or not defined?"
+exit 1
+fi
+numsed=0
+rm -f $tmp.sed
+for t in $group_list
+do
+if [ $numsed -gt 100 ]
+then
+sed -f $tmp.sed <$tmp.list >$tmp.tmp
+mv $tmp.tmp $tmp.list
+numsed=0
+rm -f $tmp.sed
+fi
+echo "/^$t\$/d" >>$tmp.sed
+numsed=`expr $numsed + 1`
+done
+sed -f $tmp.sed <$tmp.list >$tmp.tmp
+mv $tmp.tmp $tmp.list
+xgroup=false
+continue
+
+elif $imgopts
+then
+IMGOPTS="$r"
+imgopts=false
+continue
+elif $cachemode
+then
+CACHEMODE="$r"
+CACHEMODE_IS_DEFAULT=false
+cachemode=false
+continue
+fi
+
+xpand=true
+case "$r"
+in
+
+-\? | -h | --help)# usage
+echo "Usage: $0 [options] [testlist]"'
+
+common options
+-v  verbose
+-d  debug
+
+image format options
+-rawtest raw (default)
+-bochs  test bochs
+-cloop  test cloop
+-parallels  test parallels
+-qcow   test qcow
+-qcow2  test qcow2
+-qedtest qed
+-vditest vdi
+-vpctest vpc
+-vhdx   test vhdx
+-vmdk   test vmdk
+-luks   test luks
+
+image protocol options
+-file   test file (default)
+-rbdtest rbd
+-sheepdog   test sheepdog
+-nbdtest nbd
+-sshtest ssh
+-nfstest nfs
+-vxhs   test vxhs
+
+other options
+-xdiff  graphical mode diff
+-nocacheuse O_DIRECT on backing file
+-misalign   misalign memory allocations
+

[Qemu-block] [PULL 28/54] qemu-iotests: do not include common.rc in "check"

2017-10-06 Thread Kevin Wolf
From: Paolo Bonzini 

It only provides functions used by the test programs.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Eric Blake 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/check |  6 --
 tests/qemu-iotests/common.rc | 13 +
 2 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
index 03871d0681..e39680ade2 100755
--- a/tests/qemu-iotests/check
+++ b/tests/qemu-iotests/check
@@ -72,12 +72,6 @@ then
 _init_error "failed to source common.config"
 fi
 
-# we need common.rc
-if ! . "$source_iotests/common.rc"
-then
-_init_error "failed to source common.rc"
-fi
-
 # we need common
 . "$source_iotests/common"
 
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index e7f74b4dbd..20f6821a69 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -40,14 +40,11 @@ poke_file()
 printf "$3" | dd "of=$1" bs=1 "seek=$2" conv=notrunc &>/dev/null
 }
 
-# we need common.config
-if [ "$iam" != "check" ]
-then
-if ! . ./common.config
-then
-echo "$iam: failed to source common.config"
-exit 1
-fi
+
+if ! . ./common.config
+then
+echo "$iam: failed to source common.config"
+exit 1
 fi
 
 _qemu_wrapper()
-- 
2.13.6




[Qemu-block] [PULL 33/54] block: Introduce BdrvChildRole.update_filename

2017-10-06 Thread Kevin Wolf
There is no good reason for bdrv_drop_intermediate() to know the active
layer above the subchain it is operating on - even more so, because
the assumption that there is a single active layer above it is not
generally true.

In order to prepare removal of the active parameter, use a BdrvChildRole
callback to update the backing file string in the overlay image instead
of directly calling bdrv_change_backing_file().

Signed-off-by: Kevin Wolf 
Reviewed-by: Eric Blake 
---
 include/block/block_int.h |  6 ++
 block.c   | 33 -
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 79366b94b5..7e8a206239 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -544,6 +544,12 @@ struct BdrvChildRole {
 
 void (*attach)(BdrvChild *child);
 void (*detach)(BdrvChild *child);
+
+/* Notifies the parent that the filename of its child has changed (e.g.
+ * because the direct child was removed from the backing chain), so that it
+ * can update its reference. */
+int (*update_filename)(BdrvChild *child, BlockDriverState *new_base,
+   const char *filename, Error **errp);
 };
 
 extern const BdrvChildRole child_file;
diff --git a/block.c b/block.c
index ef5af81f66..ab354ba82a 100644
--- a/block.c
+++ b/block.c
@@ -981,6 +981,21 @@ static void bdrv_backing_options(int *child_flags, QDict 
*child_options,
 *child_flags = flags;
 }
 
+static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
+const char *filename, Error **errp)
+{
+BlockDriverState *parent = c->opaque;
+int ret;
+
+ret = bdrv_change_backing_file(parent, filename,
+   base->drv ? base->drv->format_name : "");
+if (ret < 0) {
+error_setg_errno(errp, ret, "Could not update backing file link");
+}
+
+return ret;
+}
+
 const BdrvChildRole child_backing = {
 .get_parent_desc = bdrv_child_get_parent_desc,
 .attach  = bdrv_backing_attach,
@@ -989,6 +1004,7 @@ const BdrvChildRole child_backing = {
 .drained_begin   = bdrv_child_cb_drained_begin,
 .drained_end = bdrv_child_cb_drained_end,
 .inactivate  = bdrv_child_cb_inactivate,
+.update_filename = bdrv_backing_update_filename,
 };
 
 static int bdrv_open_flags(BlockDriverState *bs, int flags)
@@ -3470,6 +3486,8 @@ int bdrv_drop_intermediate(BlockDriverState *active, 
BlockDriverState *top,
 Error *local_err = NULL;
 int ret = -EIO;
 
+bdrv_ref(top);
+
 if (!top->drv || !base->drv) {
 goto exit;
 }
@@ -3494,11 +3512,15 @@ int bdrv_drop_intermediate(BlockDriverState *active, 
BlockDriverState *top,
 }
 
 /* success - we can delete the intermediate states, and link top->base */
-backing_file_str = backing_file_str ? backing_file_str : base->filename;
-ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
-   base->drv ? base->drv->format_name : "");
-if (ret) {
-goto exit;
+if (new_top_bs->backing->role->update_filename) {
+backing_file_str = backing_file_str ? backing_file_str : 
base->filename;
+ret = new_top_bs->backing->role->update_filename(new_top_bs->backing,
+ base, 
backing_file_str,
+ _err);
+if (ret < 0) {
+bdrv_set_backing_hd(new_top_bs, top, _abort);
+goto exit;
+}
 }
 
 bdrv_set_backing_hd(new_top_bs, base, _err);
@@ -3510,6 +3532,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, 
BlockDriverState *top,
 
 ret = 0;
 exit:
+bdrv_unref(top);
 return ret;
 }
 
-- 
2.13.6




[Qemu-block] [PULL 25/54] qemu-iotests: move "check" code out of common.rc

2017-10-06 Thread Kevin Wolf
From: Paolo Bonzini 

Some functions in common.rc are never used by the tests.  Move
them out of that file and into common, which is already included
only by "check".

Code that actually *is* common to "check" and tests can be placed in
common.config.

Signed-off-by: Paolo Bonzini 
Reviewed-by: Eric Blake 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/common| 25 -
 tests/qemu-iotests/common.config | 12 
 tests/qemu-iotests/common.rc | 40 
 3 files changed, 36 insertions(+), 41 deletions(-)

diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common
index 130f647a4d..2e98e64d5c 100644
--- a/tests/qemu-iotests/common
+++ b/tests/qemu-iotests/common
@@ -19,6 +19,24 @@
 # common procedures for QA scripts
 #
 
+_full_imgfmt_details()
+{
+if [ -n "$IMGOPTS" ]; then
+echo "$IMGFMT ($IMGOPTS)"
+else
+echo "$IMGFMT"
+fi
+}
+
+_full_platform_details()
+{
+os=`uname -s`
+host=`hostname -s`
+kernel=`uname -r`
+platform=`uname -m`
+echo "$os/$platform $host $kernel"
+}
+
 diff="diff -u"
 verbose=false
 debug=false
@@ -404,7 +422,12 @@ if [ "$IMGOPTSSYNTAX" != "true" ]; then
 fi
 
 # Set default options for qemu-img create -o if they were not specified
-_set_default_imgopts
+if [ "$IMGFMT" == "qcow2" ] && ! (echo "$IMGOPTS" | grep "compat=" > 
/dev/null); then
+IMGOPTS=$(_optstr_add "$IMGOPTS" "compat=1.1")
+fi
+if [ "$IMGFMT" == "luks" ] && ! (echo "$IMGOPTS" | grep "iter-time=" > 
/dev/null); then
+IMGOPTS=$(_optstr_add "$IMGOPTS" "iter-time=10")
+fi
 
 if [ -s $tmp.list ]
 then
diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config
index 0f571d46eb..91da65f3dc 100644
--- a/tests/qemu-iotests/common.config
+++ b/tests/qemu-iotests/common.config
@@ -27,6 +27,9 @@ export PWD=`pwd`
 
 export _QEMU_HANDLE=0
 
+# make sure we have a standard umask
+umask 022
+
 # $1 = prog to look for, $2* = default pathnames if not found in $PATH
 set_prog_path()
 {
@@ -49,6 +52,15 @@ set_prog_path()
 return 1
 }
 
+_optstr_add()
+{
+if [ -n "$1" ]; then
+echo "$1,$2"
+else
+echo "$2"
+fi
+}
+
 _fatal()
 {
 echo "$*"
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 5938d5145f..6f6e03366f 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -50,9 +50,6 @@ then
 fi
 fi
 
-# make sure we have a standard umask
-umask 022
-
 if [ "$IMGOPTSSYNTAX" = "true" ]; then
 DRIVER="driver=$IMGFMT"
 if [ "$IMGFMT" = "luks" ]; then
@@ -94,25 +91,6 @@ else
 fi
 ORIG_TEST_IMG="$TEST_IMG"
 
-_optstr_add()
-{
-if [ -n "$1" ]; then
-echo "$1,$2"
-else
-echo "$2"
-fi
-}
-
-_set_default_imgopts()
-{
-if [ "$IMGFMT" == "qcow2" ] && ! (echo "$IMGOPTS" | grep "compat=" > 
/dev/null); then
-IMGOPTS=$(_optstr_add "$IMGOPTS" "compat=1.1")
-fi
-if [ "$IMGFMT" == "luks" ] && ! (echo "$IMGOPTS" | grep "iter-time=" > 
/dev/null); then
-IMGOPTS=$(_optstr_add "$IMGOPTS" "iter-time=10")
-fi
-}
-
 _use_sample_img()
 {
 SAMPLE_IMG_FILE="${1%\.bz2}"
@@ -428,23 +406,5 @@ _require_command()
 [ -x "$c" ] || _notrun "$1 utility required, skipped this test"
 }
 
-_full_imgfmt_details()
-{
-if [ -n "$IMGOPTS" ]; then
-echo "$IMGFMT ($IMGOPTS)"
-else
-echo "$IMGFMT"
-fi
-}
-
-_full_platform_details()
-{
-os=`uname -s`
-host=`hostname -s`
-kernel=`uname -r`
-platform=`uname -m`
-echo "$os/$platform $host $kernel"
-}
-
 # make sure this script returns success
 true
-- 
2.13.6




[Qemu-block] [PULL 09/54] dirty-bitmap: Change bdrv_dirty_bitmap_*serialize*() to take bytes

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Right now, the dirty-bitmap code exposes the fact that we use
a scale of sector granularity in the underlying hbitmap to anything
that wants to serialize a dirty bitmap.  It's nicer to uniformly
expose bytes as our dirty-bitmap interface, matching the previous
change to bitmap size.  The only caller to serialization is currently
qcow2-cluster.c, which becomes a bit more verbose because it is still
tracking sectors for other reasons, but a later patch will fix that
to more uniformly use byte offsets everywhere.  Likewise, within
dirty-bitmap, we have to add more assertions that we are not
truncating incorrectly, which can go away once the internal hbitmap
is byte-based rather than sector-based.

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 include/block/dirty-bitmap.h | 14 +++---
 block/dirty-bitmap.c | 37 -
 block/qcow2-bitmap.c | 22 ++
 3 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 7a27590047..5f34a1a3c7 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -49,19 +49,19 @@ BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap 
*bitmap,
 void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter);
 
 uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap,
-  uint64_t start, uint64_t count);
+  uint64_t offset, uint64_t bytes);
 uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap);
 void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap,
-  uint8_t *buf, uint64_t start,
-  uint64_t count);
+  uint8_t *buf, uint64_t offset,
+  uint64_t bytes);
 void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap,
-uint8_t *buf, uint64_t start,
-uint64_t count, bool finish);
+uint8_t *buf, uint64_t offset,
+uint64_t bytes, bool finish);
 void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
-  uint64_t start, uint64_t count,
+  uint64_t offset, uint64_t bytes,
   bool finish);
 void bdrv_dirty_bitmap_deserialize_ones(BdrvDirtyBitmap *bitmap,
-uint64_t start, uint64_t count,
+uint64_t offset, uint64_t bytes,
 bool finish);
 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
 
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 6d32554b4e..555c736024 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -568,42 +568,53 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap 
*bitmap, HBitmap *in)
 }
 
 uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap,
-  uint64_t start, uint64_t count)
+  uint64_t offset, uint64_t bytes)
 {
-return hbitmap_serialization_size(bitmap->bitmap, start, count);
+assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE));
+return hbitmap_serialization_size(bitmap->bitmap,
+  offset >> BDRV_SECTOR_BITS,
+  bytes >> BDRV_SECTOR_BITS);
 }
 
 uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap)
 {
-return hbitmap_serialization_align(bitmap->bitmap);
+return hbitmap_serialization_align(bitmap->bitmap) * BDRV_SECTOR_SIZE;
 }
 
 void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap,
-  uint8_t *buf, uint64_t start,
-  uint64_t count)
+  uint8_t *buf, uint64_t offset,
+  uint64_t bytes)
 {
-hbitmap_serialize_part(bitmap->bitmap, buf, start, count);
+assert(QEMU_IS_ALIGNED(offset | bytes, BDRV_SECTOR_SIZE));
+hbitmap_serialize_part(bitmap->bitmap, buf, offset >> BDRV_SECTOR_BITS,
+   bytes >> BDRV_SECTOR_BITS);
 }
 
 void bdrv_dirty_bitmap_deserialize_part(BdrvDirtyBitmap *bitmap,
-uint8_t *buf, uint64_t start,
-uint64_t count, bool finish)
+

[Qemu-block] [PULL 18/54] qcow2: Switch load_bitmap_data() to byte-based iteration

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Now that we have adjusted the majority of the calls this function
makes to be byte-based, it is easier to read the code if it makes
passes over the image using bytes rather than sectors.

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/qcow2-bitmap.c | 22 --
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 2d8dcba3e8..02512a21f2 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -291,9 +291,8 @@ static int load_bitmap_data(BlockDriverState *bs,
 {
 int ret = 0;
 BDRVQcow2State *s = bs->opaque;
-uint64_t sector, limit, sbc;
+uint64_t offset, limit;
 uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap);
-uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE);
 uint8_t *buf = NULL;
 uint64_t i, tab_size =
 size_to_clusters(s,
@@ -305,32 +304,27 @@ static int load_bitmap_data(BlockDriverState *bs,
 
 buf = g_malloc(s->cluster_size);
 limit = bytes_covered_by_bitmap_cluster(s, bitmap);
-sbc = limit >> BDRV_SECTOR_BITS;
-for (i = 0, sector = 0; i < tab_size; ++i, sector += sbc) {
-uint64_t count = MIN(bm_sectors - sector, sbc);
+for (i = 0, offset = 0; i < tab_size; ++i, offset += limit) {
+uint64_t count = MIN(bm_size - offset, limit);
 uint64_t entry = bitmap_table[i];
-uint64_t offset = entry & BME_TABLE_ENTRY_OFFSET_MASK;
+uint64_t data_offset = entry & BME_TABLE_ENTRY_OFFSET_MASK;
 
 assert(check_table_entry(entry, s->cluster_size) == 0);
 
-if (offset == 0) {
+if (data_offset == 0) {
 if (entry & BME_TABLE_ENTRY_FLAG_ALL_ONES) {
-bdrv_dirty_bitmap_deserialize_ones(bitmap,
-   sector * BDRV_SECTOR_SIZE,
-   count * BDRV_SECTOR_SIZE,
+bdrv_dirty_bitmap_deserialize_ones(bitmap, offset, count,
false);
 } else {
 /* No need to deserialize zeros because the dirty bitmap is
  * already cleared */
 }
 } else {
-ret = bdrv_pread(bs->file, offset, buf, s->cluster_size);
+ret = bdrv_pread(bs->file, data_offset, buf, s->cluster_size);
 if (ret < 0) {
 goto finish;
 }
-bdrv_dirty_bitmap_deserialize_part(bitmap, buf,
-   sector * BDRV_SECTOR_SIZE,
-   count * BDRV_SECTOR_SIZE,
+bdrv_dirty_bitmap_deserialize_part(bitmap, buf, offset, count,
false);
 }
 }
-- 
2.13.6




[Qemu-block] [PULL 07/54] dirty-bitmap: Change bdrv_dirty_bitmap_size() to report bytes

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

We're already reporting bytes for bdrv_dirty_bitmap_granularity();
mixing bytes and sectors in our return values is a recipe for
confusion.  A later cleanup will convert dirty bitmap internals
to be entirely byte-based, but in the meantime, we should report
the bitmap size in bytes.

The only external caller in qcow2-bitmap.c is temporarily more verbose
(because it is still using sector-based math), but will later be
switched to track progress by bytes instead of sectors.

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/dirty-bitmap.c |  2 +-
 block/qcow2-bitmap.c | 14 --
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index ee164fb518..75af32 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -175,7 +175,7 @@ void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 
 int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)
 {
-return bitmap->size;
+return bitmap->size * BDRV_SECTOR_SIZE;
 }
 
 const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap)
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 1fc375b40a..9c185b5202 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -295,10 +295,11 @@ static int load_bitmap_data(BlockDriverState *bs,
 BDRVQcow2State *s = bs->opaque;
 uint64_t sector, sbc;
 uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap);
+uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE);
 uint8_t *buf = NULL;
 uint64_t i, tab_size =
 size_to_clusters(s,
-bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size));
+bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_sectors));
 
 if (tab_size != bitmap_table_size || tab_size > BME_MAX_TABLE_SIZE) {
 return -EINVAL;
@@ -307,7 +308,7 @@ static int load_bitmap_data(BlockDriverState *bs,
 buf = g_malloc(s->cluster_size);
 sbc = sectors_covered_by_bitmap_cluster(s, bitmap);
 for (i = 0, sector = 0; i < tab_size; ++i, sector += sbc) {
-uint64_t count = MIN(bm_size - sector, sbc);
+uint64_t count = MIN(bm_sectors - sector, sbc);
 uint64_t entry = bitmap_table[i];
 uint64_t offset = entry & BME_TABLE_ENTRY_OFFSET_MASK;
 
@@ -1077,13 +1078,14 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
 int64_t sector;
 uint64_t sbc;
 uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap);
+uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE);
 const char *bm_name = bdrv_dirty_bitmap_name(bitmap);
 uint8_t *buf = NULL;
 BdrvDirtyBitmapIter *dbi;
 uint64_t *tb;
 uint64_t tb_size =
 size_to_clusters(s,
-bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_size));
+bdrv_dirty_bitmap_serialization_size(bitmap, 0, bm_sectors));
 
 if (tb_size > BME_MAX_TABLE_SIZE ||
 tb_size * s->cluster_size > BME_MAX_PHYS_SIZE)
@@ -1101,7 +1103,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
 dbi = bdrv_dirty_iter_new(bitmap, 0);
 buf = g_malloc(s->cluster_size);
 sbc = sectors_covered_by_bitmap_cluster(s, bitmap);
-assert(DIV_ROUND_UP(bm_size, sbc) == tb_size);
+assert(DIV_ROUND_UP(bm_sectors, sbc) == tb_size);
 
 while ((sector = bdrv_dirty_iter_next(dbi)) != -1) {
 uint64_t cluster = sector / sbc;
@@ -1109,7 +,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
 int64_t off;
 
 sector = cluster * sbc;
-end = MIN(bm_size, sector + sbc);
+end = MIN(bm_sectors, sector + sbc);
 write_size =
 bdrv_dirty_bitmap_serialization_size(bitmap, sector, end - sector);
 assert(write_size <= s->cluster_size);
@@ -1141,7 +1143,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
 goto fail;
 }
 
-if (end >= bm_size) {
+if (end >= bm_sectors) {
 break;
 }
 
-- 
2.13.6




[Qemu-block] [PULL 16/54] mirror: Switch mirror_dirty_init() to byte-based iteration

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Now that we have adjusted the majority of the calls this function
makes to be byte-based, it is easier to read the code if it makes
passes over the image using bytes rather than sectors.

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/mirror.c | 38 ++
 1 file changed, 14 insertions(+), 24 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 5c0f79f56a..459b80f8f3 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -612,15 +612,13 @@ static void mirror_throttle(MirrorBlockJob *s)
 
 static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 {
-int64_t sector_num, end;
+int64_t offset;
 BlockDriverState *base = s->base;
 BlockDriverState *bs = s->source;
 BlockDriverState *target_bs = blk_bs(s->target);
-int ret, n;
+int ret;
 int64_t count;
 
-end = s->bdev_length / BDRV_SECTOR_SIZE;
-
 if (base == NULL && !bdrv_has_zero_init(target_bs)) {
 if (!bdrv_can_write_zeroes_with_unmap(target_bs)) {
 bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, s->bdev_length);
@@ -628,9 +626,9 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 }
 
 s->initial_zeroing_ongoing = true;
-for (sector_num = 0; sector_num < end; ) {
-int nb_sectors = MIN(end - sector_num,
-QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);
+for (offset = 0; offset < s->bdev_length; ) {
+int bytes = MIN(s->bdev_length - offset,
+QEMU_ALIGN_DOWN(INT_MAX, s->granularity));
 
 mirror_throttle(s);
 
@@ -646,9 +644,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 continue;
 }
 
-mirror_do_zero_or_discard(s, sector_num * BDRV_SECTOR_SIZE,
-  nb_sectors * BDRV_SECTOR_SIZE, false);
-sector_num += nb_sectors;
+mirror_do_zero_or_discard(s, offset, bytes, false);
+offset += bytes;
 }
 
 mirror_wait_for_all_io(s);
@@ -656,10 +653,10 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob 
*s)
 }
 
 /* First part, loop on the sectors and initialize the dirty bitmap.  */
-for (sector_num = 0; sector_num < end; ) {
+for (offset = 0; offset < s->bdev_length; ) {
 /* Just to make sure we are not exceeding int limit. */
-int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
- end - sector_num);
+int bytes = MIN(s->bdev_length - offset,
+QEMU_ALIGN_DOWN(INT_MAX, s->granularity));
 
 mirror_throttle(s);
 
@@ -667,23 +664,16 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob 
*s)
 return 0;
 }
 
-ret = bdrv_is_allocated_above(bs, base, sector_num * BDRV_SECTOR_SIZE,
-  nb_sectors * BDRV_SECTOR_SIZE, );
+ret = bdrv_is_allocated_above(bs, base, offset, bytes, );
 if (ret < 0) {
 return ret;
 }
 
-/* TODO: Relax this once bdrv_is_allocated_above and dirty
- * bitmaps no longer require sector alignment. */
-assert(QEMU_IS_ALIGNED(count, BDRV_SECTOR_SIZE));
-n = count >> BDRV_SECTOR_BITS;
-assert(n > 0);
+assert(count);
 if (ret == 1) {
-bdrv_set_dirty_bitmap(s->dirty_bitmap,
-  sector_num * BDRV_SECTOR_SIZE,
-  n * BDRV_SECTOR_SIZE);
+bdrv_set_dirty_bitmap(s->dirty_bitmap, offset, count);
 }
-sector_num += n;
+offset += count;
 }
 return 0;
 }
-- 
2.13.6




[Qemu-block] [PULL 10/54] qcow2: Switch sectors_covered_by_bitmap_cluster() to byte-based

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

We are gradually converting to byte-based interfaces, as they are
easier to reason about than sector-based.  Change the qcow2 bitmap
helper function sectors_covered_by_bitmap_cluster(), renaming it
to bytes_covered_by_bitmap_cluster() in the process.

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/qcow2-bitmap.c | 28 ++--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 9e76e6..8324468b16 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -269,18 +269,16 @@ static int free_bitmap_clusters(BlockDriverState *bs, 
Qcow2BitmapTable *tb)
 return 0;
 }
 
-/* This function returns the number of disk sectors covered by a single qcow2
- * cluster of bitmap data. */
-static uint64_t sectors_covered_by_bitmap_cluster(const BDRVQcow2State *s,
-  const BdrvDirtyBitmap 
*bitmap)
+/* Return the disk size covered by a single qcow2 cluster of bitmap data. */
+static uint64_t bytes_covered_by_bitmap_cluster(const BDRVQcow2State *s,
+const BdrvDirtyBitmap *bitmap)
 {
-uint64_t sector_granularity =
-bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
-uint64_t sbc = sector_granularity * (s->cluster_size << 3);
+uint64_t granularity = bdrv_dirty_bitmap_granularity(bitmap);
+uint64_t limit = granularity * (s->cluster_size << 3);
 
-assert(QEMU_IS_ALIGNED(sbc << BDRV_SECTOR_BITS,
+assert(QEMU_IS_ALIGNED(limit,
bdrv_dirty_bitmap_serialization_align(bitmap)));
-return sbc;
+return limit;
 }
 
 /* load_bitmap_data
@@ -293,7 +291,7 @@ static int load_bitmap_data(BlockDriverState *bs,
 {
 int ret = 0;
 BDRVQcow2State *s = bs->opaque;
-uint64_t sector, sbc;
+uint64_t sector, limit, sbc;
 uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap);
 uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE);
 uint8_t *buf = NULL;
@@ -306,7 +304,8 @@ static int load_bitmap_data(BlockDriverState *bs,
 }
 
 buf = g_malloc(s->cluster_size);
-sbc = sectors_covered_by_bitmap_cluster(s, bitmap);
+limit = bytes_covered_by_bitmap_cluster(s, bitmap);
+sbc = limit >> BDRV_SECTOR_BITS;
 for (i = 0, sector = 0; i < tab_size; ++i, sector += sbc) {
 uint64_t count = MIN(bm_sectors - sector, sbc);
 uint64_t entry = bitmap_table[i];
@@ -1080,7 +1079,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
 int ret;
 BDRVQcow2State *s = bs->opaque;
 int64_t sector;
-uint64_t sbc;
+uint64_t limit, sbc;
 uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap);
 uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE);
 const char *bm_name = bdrv_dirty_bitmap_name(bitmap);
@@ -1106,8 +1105,9 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
 
 dbi = bdrv_dirty_iter_new(bitmap, 0);
 buf = g_malloc(s->cluster_size);
-sbc = sectors_covered_by_bitmap_cluster(s, bitmap);
-assert(DIV_ROUND_UP(bm_sectors, sbc) == tb_size);
+limit = bytes_covered_by_bitmap_cluster(s, bitmap);
+sbc = limit >> BDRV_SECTOR_BITS;
+assert(DIV_ROUND_UP(bm_size, limit) == tb_size);
 
 while ((sector = bdrv_dirty_iter_next(dbi)) != -1) {
 uint64_t cluster = sector / sbc;
-- 
2.13.6




[Qemu-block] [PULL 06/54] dirty-bitmap: Avoid size query failure during truncate

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

We've previously fixed several places where we failed to account
for possible errors from bdrv_nb_sectors().  Fix another one by
making bdrv_dirty_bitmap_truncate() take the new size from the
caller instead of querying itself; then adjust the sole caller
bdrv_truncate() to pass the size just determined by a successful
resize, or to reuse the size given to the original truncate
operation when refresh_total_sectors() was not able to confirm the
actual size (the two sizes can potentially differ according to
rounding constraints), thus avoiding sizing the bitmaps to -1.
This also fixes a bug where not all failure paths in
bdrv_truncate() would set errp.

Note that bdrv_truncate() is still a bit awkward.  We may want
to revisit it later and clean up things to better guarantee that
a resize attempt either fails cleanly up front, or cannot fail
after guest-visible changes have been made (if temporary changes
are made, then they need to be cleanly rolled back).  But that
is a task for another day; for now, the goal is the bare minimum
fix to ensure that just bdrv_dirty_bitmap_truncate() cannot fail.

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: John Snow 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 include/block/dirty-bitmap.h |  2 +-
 block.c  | 16 +++-
 block/dirty-bitmap.c |  6 +++---
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 8fd842eac9..7a27590047 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -83,7 +83,7 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
 void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num);
 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
 int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
-void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes);
 bool bdrv_dirty_bitmap_readonly(const BdrvDirtyBitmap *bitmap);
 bool bdrv_has_readonly_bitmaps(BlockDriverState *bs);
 bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap);
diff --git a/block.c b/block.c
index 528cda7b2c..ef5af81f66 100644
--- a/block.c
+++ b/block.c
@@ -3545,12 +3545,18 @@ int bdrv_truncate(BdrvChild *child, int64_t offset, 
PreallocMode prealloc,
 assert(!(bs->open_flags & BDRV_O_INACTIVE));
 
 ret = drv->bdrv_truncate(bs, offset, prealloc, errp);
-if (ret == 0) {
-ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
-bdrv_dirty_bitmap_truncate(bs);
-bdrv_parent_cb_resize(bs);
-atomic_inc(>write_gen);
+if (ret < 0) {
+return ret;
+}
+ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "Could not refresh total sector count");
+} else {
+offset = bs->total_sectors * BDRV_SECTOR_SIZE;
 }
+bdrv_dirty_bitmap_truncate(bs, offset);
+bdrv_parent_cb_resize(bs);
+atomic_inc(>write_gen);
 return ret;
 }
 
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 42a55e4a4b..ee164fb518 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -1,7 +1,7 @@
 /*
  * Block Dirty Bitmap
  *
- * Copyright (c) 2016 Red Hat. Inc
+ * Copyright (c) 2016-2017 Red Hat. Inc
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to 
deal
@@ -302,10 +302,10 @@ BdrvDirtyBitmap 
*bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
  * Truncates _all_ bitmaps attached to a BDS.
  * Called with BQL taken.
  */
-void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes)
 {
 BdrvDirtyBitmap *bitmap;
-uint64_t size = bdrv_nb_sectors(bs);
+int64_t size = DIV_ROUND_UP(bytes, BDRV_SECTOR_SIZE);
 
 bdrv_dirty_bitmaps_lock(bs);
 QLIST_FOREACH(bitmap, >dirty_bitmaps, list) {
-- 
2.13.6




[Qemu-block] [PULL 08/54] dirty-bitmap: Track bitmap size by bytes

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

We are still using an internal hbitmap that tracks a size in sectors,
with the granularity scaled down accordingly, because it lets us
use a shortcut for our iterators which are currently sector-based.
But there's no reason we can't track the dirty bitmap size in bytes,
since it is (mostly) an internal-only variable (remember, the size
is how many bytes are covered by the bitmap, not how many bytes the
bitmap occupies).  A later cleanup will convert dirty bitmap
internals to be entirely byte-based, eliminating the intermediate
sector rounding added here; and technically, since bdrv_getlength()
already rounds up to sectors, our use of DIV_ROUND_UP is more for
theoretical completeness than for any actual rounding.

Use is_power_of_2() while at it, instead of open-coding that.

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/dirty-bitmap.c | 26 ++
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 75af32..6d32554b4e 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -42,7 +42,7 @@ struct BdrvDirtyBitmap {
 HBitmap *meta;  /* Meta dirty bitmap */
 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
 char *name; /* Optional non-empty unique ID */
-int64_t size;   /* Size of the bitmap (Number of sectors) */
+int64_t size;   /* Size of the bitmap, in bytes */
 bool disabled;  /* Bitmap is disabled. It ignores all writes to
the device */
 int active_iterators;   /* How many iterators are active */
@@ -115,17 +115,14 @@ BdrvDirtyBitmap 
*bdrv_create_dirty_bitmap(BlockDriverState *bs,
 {
 int64_t bitmap_size;
 BdrvDirtyBitmap *bitmap;
-uint32_t sector_granularity;
 
-assert((granularity & (granularity - 1)) == 0);
+assert(is_power_of_2(granularity) && granularity >= BDRV_SECTOR_SIZE);
 
 if (name && bdrv_find_dirty_bitmap(bs, name)) {
 error_setg(errp, "Bitmap already exists: %s", name);
 return NULL;
 }
-sector_granularity = granularity >> BDRV_SECTOR_BITS;
-assert(sector_granularity);
-bitmap_size = bdrv_nb_sectors(bs);
+bitmap_size = bdrv_getlength(bs);
 if (bitmap_size < 0) {
 error_setg_errno(errp, -bitmap_size, "could not get length of device");
 errno = -bitmap_size;
@@ -133,7 +130,12 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState 
*bs,
 }
 bitmap = g_new0(BdrvDirtyBitmap, 1);
 bitmap->mutex = >dirty_bitmap_mutex;
-bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
+/*
+ * TODO - let hbitmap track full granularity. For now, it is tracking
+ * only sector granularity, as a shortcut for our iterators.
+ */
+bitmap->bitmap = hbitmap_alloc(DIV_ROUND_UP(bitmap_size, BDRV_SECTOR_SIZE),
+   ctz32(granularity) - BDRV_SECTOR_BITS);
 bitmap->size = bitmap_size;
 bitmap->name = g_strdup(name);
 bitmap->disabled = false;
@@ -175,7 +177,7 @@ void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 
 int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)
 {
-return bitmap->size * BDRV_SECTOR_SIZE;
+return bitmap->size;
 }
 
 const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap)
@@ -305,14 +307,13 @@ BdrvDirtyBitmap 
*bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
 void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes)
 {
 BdrvDirtyBitmap *bitmap;
-int64_t size = DIV_ROUND_UP(bytes, BDRV_SECTOR_SIZE);
 
 bdrv_dirty_bitmaps_lock(bs);
 QLIST_FOREACH(bitmap, >dirty_bitmaps, list) {
 assert(!bdrv_dirty_bitmap_frozen(bitmap));
 assert(!bitmap->active_iterators);
-hbitmap_truncate(bitmap->bitmap, size);
-bitmap->size = size;
+hbitmap_truncate(bitmap->bitmap, DIV_ROUND_UP(bytes, 
BDRV_SECTOR_SIZE));
+bitmap->size = bytes;
 }
 bdrv_dirty_bitmaps_unlock(bs);
 }
@@ -549,7 +550,8 @@ void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, 
HBitmap **out)
 hbitmap_reset_all(bitmap->bitmap);
 } else {
 HBitmap *backup = bitmap->bitmap;
-bitmap->bitmap = hbitmap_alloc(bitmap->size,
+bitmap->bitmap = hbitmap_alloc(DIV_ROUND_UP(bitmap->size,
+BDRV_SECTOR_SIZE),
hbitmap_granularity(backup));
 *out = backup;
 }
-- 
2.13.6




[Qemu-block] [PULL 05/54] dirty-bitmap: Drop unused functions

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

We had several functions that no one is currently using, and which
use sector-based interfaces.  I'm trying to convert towards byte-based
interfaces, so it's easier to just drop the unused functions:

bdrv_dirty_bitmap_get_meta
bdrv_dirty_bitmap_get_meta_locked
bdrv_dirty_bitmap_reset_meta
bdrv_dirty_bitmap_meta_granularity

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 include/block/dirty-bitmap.h | 10 --
 block/dirty-bitmap.c | 44 
 2 files changed, 54 deletions(-)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index a79a58d2c3..8fd842eac9 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -34,7 +34,6 @@ void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs);
 uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap);
-uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
 const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap);
@@ -44,15 +43,6 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
int64_t cur_sector, int64_t nr_sectors);
 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
  int64_t cur_sector, int64_t nr_sectors);
-int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
-   BdrvDirtyBitmap *bitmap, int64_t sector,
-   int nb_sectors);
-int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
-  BdrvDirtyBitmap *bitmap, int64_t sector,
-  int nb_sectors);
-void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
-  BdrvDirtyBitmap *bitmap, int64_t sector,
-  int nb_sectors);
 BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap);
 BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap,
  uint64_t first_sector);
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 0490ca3aff..42a55e4a4b 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -173,45 +173,6 @@ void bdrv_release_meta_dirty_bitmap(BdrvDirtyBitmap 
*bitmap)
 qemu_mutex_unlock(bitmap->mutex);
 }
 
-int bdrv_dirty_bitmap_get_meta_locked(BlockDriverState *bs,
-  BdrvDirtyBitmap *bitmap, int64_t sector,
-  int nb_sectors)
-{
-uint64_t i;
-int sectors_per_bit = 1 << hbitmap_granularity(bitmap->meta);
-
-/* To optimize: we can make hbitmap to internally check the range in a
- * coarse level, or at least do it word by word. */
-for (i = sector; i < sector + nb_sectors; i += sectors_per_bit) {
-if (hbitmap_get(bitmap->meta, i)) {
-return true;
-}
-}
-return false;
-}
-
-int bdrv_dirty_bitmap_get_meta(BlockDriverState *bs,
-   BdrvDirtyBitmap *bitmap, int64_t sector,
-   int nb_sectors)
-{
-bool dirty;
-
-qemu_mutex_lock(bitmap->mutex);
-dirty = bdrv_dirty_bitmap_get_meta_locked(bs, bitmap, sector, nb_sectors);
-qemu_mutex_unlock(bitmap->mutex);
-
-return dirty;
-}
-
-void bdrv_dirty_bitmap_reset_meta(BlockDriverState *bs,
-  BdrvDirtyBitmap *bitmap, int64_t sector,
-  int nb_sectors)
-{
-qemu_mutex_lock(bitmap->mutex);
-hbitmap_reset(bitmap->meta, sector, nb_sectors);
-qemu_mutex_unlock(bitmap->mutex);
-}
-
 int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)
 {
 return bitmap->size;
@@ -511,11 +472,6 @@ uint32_t bdrv_dirty_bitmap_granularity(const 
BdrvDirtyBitmap *bitmap)
 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
 }
 
-uint32_t bdrv_dirty_bitmap_meta_granularity(BdrvDirtyBitmap *bitmap)
-{
-return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->meta);
-}
-
 BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap,
  uint64_t first_sector)
 {
-- 
2.13.6




[Qemu-block] [PULL 15/54] dirty-bitmap: Change bdrv_[re]set_dirty_bitmap() to use bytes

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Some of the callers were already scaling bytes to sectors; others
can be easily converted to pass byte offsets, all in our shift
towards a consistent byte interface everywhere.  Making the change
will also make it easier to write the hold-out callers to use byte
rather than sectors for their iterations; it also makes it easier
for a future dirty-bitmap patch to offload scaling over to the
internal hbitmap.  Although all callers happen to pass
sector-aligned values, make the internal scaling robust to any
sub-sector requests.

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 include/block/dirty-bitmap.h |  8 
 block/dirty-bitmap.c | 22 ++
 block/mirror.c   | 16 
 migration/block.c|  7 +--
 4 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 9e39537e4b..3579a7597c 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -40,9 +40,9 @@ const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap 
*bitmap);
 int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap);
 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-   int64_t cur_sector, int64_t nr_sectors);
+   int64_t offset, int64_t bytes);
 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
- int64_t cur_sector, int64_t nr_sectors);
+ int64_t offset, int64_t bytes);
 BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap);
 BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap);
 void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter);
@@ -75,9 +75,9 @@ void bdrv_dirty_bitmap_unlock(BdrvDirtyBitmap *bitmap);
 bool bdrv_get_dirty_locked(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
int64_t offset);
 void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
-  int64_t cur_sector, int64_t nr_sectors);
+  int64_t offset, int64_t bytes);
 void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
-int64_t cur_sector, int64_t nr_sectors);
+int64_t offset, int64_t bytes);
 int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
 void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t offset);
 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index ad559c62b1..117837b3cc 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -509,35 +509,41 @@ int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter)
 
 /* Called within bdrv_dirty_bitmap_lock..unlock */
 void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
-  int64_t cur_sector, int64_t nr_sectors)
+  int64_t offset, int64_t bytes)
 {
+int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+
 assert(bdrv_dirty_bitmap_enabled(bitmap));
 assert(!bdrv_dirty_bitmap_readonly(bitmap));
-hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+hbitmap_set(bitmap->bitmap, offset >> BDRV_SECTOR_BITS,
+end_sector - (offset >> BDRV_SECTOR_BITS));
 }
 
 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-   int64_t cur_sector, int64_t nr_sectors)
+   int64_t offset, int64_t bytes)
 {
 bdrv_dirty_bitmap_lock(bitmap);
-bdrv_set_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
+bdrv_set_dirty_bitmap_locked(bitmap, offset, bytes);
 bdrv_dirty_bitmap_unlock(bitmap);
 }
 
 /* Called within bdrv_dirty_bitmap_lock..unlock */
 void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
-int64_t cur_sector, int64_t nr_sectors)
+int64_t offset, int64_t bytes)
 {
+int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+
 assert(bdrv_dirty_bitmap_enabled(bitmap));
 assert(!bdrv_dirty_bitmap_readonly(bitmap));
-hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+hbitmap_reset(bitmap->bitmap, offset >> BDRV_SECTOR_BITS,
+  end_sector - (offset >> BDRV_SECTOR_BITS));
 }
 
 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
- int64_t cur_sector, int64_t nr_sectors)
+ int64_t offset, int64_t bytes)
 {
 bdrv_dirty_bitmap_lock(bitmap);
-bdrv_reset_dirty_bitmap_locked(bitmap, cur_sector, nr_sectors);
+bdrv_reset_dirty_bitmap_locked(bitmap, 

[Qemu-block] [PULL 19/54] qcow2: Switch store_bitmap_data() to byte-based iteration

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Now that we have adjusted the majority of the calls this function
makes to be byte-based, it is easier to read the code if it makes
passes over the image using bytes rather than sectors.

iotests 165 was rather weak - on a default 64k-cluster image, where
bitmap granularity also defaults to 64k bytes, a single cluster of
the bitmap table thus covers (64*1024*8) bits which each cover 64k
bytes, or 32G of image space.  But the test only uses a 1G image,
so it cannot trigger any more than one loop of the code in
store_bitmap_data(); and it was writing to the first cluster.  In
order to test that we are properly aligning which portions of the
bitmap are being written to the file, we really want to test a case
where the first dirty bit returned by bdrv_dirty_iter_next() is not
aligned to the start of a cluster, which we can do by modifying the
test to write data that doesn't happen to fall in the first cluster
of the image.

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: John Snow 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/qcow2-bitmap.c   | 31 ---
 tests/qemu-iotests/165 |  2 +-
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 02512a21f2..f45e46cfbd 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -1072,10 +1072,9 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
 {
 int ret;
 BDRVQcow2State *s = bs->opaque;
-int64_t sector;
-uint64_t limit, sbc;
+int64_t offset;
+uint64_t limit;
 uint64_t bm_size = bdrv_dirty_bitmap_size(bitmap);
-uint64_t bm_sectors = DIV_ROUND_UP(bm_size, BDRV_SECTOR_SIZE);
 const char *bm_name = bdrv_dirty_bitmap_name(bitmap);
 uint8_t *buf = NULL;
 BdrvDirtyBitmapIter *dbi;
@@ -1100,18 +1099,22 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
 dbi = bdrv_dirty_iter_new(bitmap);
 buf = g_malloc(s->cluster_size);
 limit = bytes_covered_by_bitmap_cluster(s, bitmap);
-sbc = limit >> BDRV_SECTOR_BITS;
 assert(DIV_ROUND_UP(bm_size, limit) == tb_size);
 
-while ((sector = bdrv_dirty_iter_next(dbi) >> BDRV_SECTOR_BITS) >= 0) {
-uint64_t cluster = sector / sbc;
+while ((offset = bdrv_dirty_iter_next(dbi)) >= 0) {
+uint64_t cluster = offset / limit;
 uint64_t end, write_size;
 int64_t off;
 
-sector = cluster * sbc;
-end = MIN(bm_sectors, sector + sbc);
-write_size = bdrv_dirty_bitmap_serialization_size(bitmap,
-sector * BDRV_SECTOR_SIZE, (end - sector) * BDRV_SECTOR_SIZE);
+/*
+ * We found the first dirty offset, but want to write out the
+ * entire cluster of the bitmap that includes that offset,
+ * including any leading zero bits.
+ */
+offset = QEMU_ALIGN_DOWN(offset, limit);
+end = MIN(bm_size, offset + limit);
+write_size = bdrv_dirty_bitmap_serialization_size(bitmap, offset,
+  end - offset);
 assert(write_size <= s->cluster_size);
 
 off = qcow2_alloc_clusters(bs, s->cluster_size);
@@ -1123,9 +1126,7 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
 }
 tb[cluster] = off;
 
-bdrv_dirty_bitmap_serialize_part(bitmap, buf,
- sector * BDRV_SECTOR_SIZE,
- (end - sector) * BDRV_SECTOR_SIZE);
+bdrv_dirty_bitmap_serialize_part(bitmap, buf, offset, end - offset);
 if (write_size < s->cluster_size) {
 memset(buf + write_size, 0, s->cluster_size - write_size);
 }
@@ -1143,11 +1144,11 @@ static uint64_t *store_bitmap_data(BlockDriverState *bs,
 goto fail;
 }
 
-if (end >= bm_sectors) {
+if (end >= bm_size) {
 break;
 }
 
-bdrv_set_dirty_iter(dbi, end * BDRV_SECTOR_SIZE);
+bdrv_set_dirty_iter(dbi, end);
 }
 
 *bitmap_table_size = tb_size;
diff --git a/tests/qemu-iotests/165 b/tests/qemu-iotests/165
index 74d7b79a0b..a3932db3de 100755
--- a/tests/qemu-iotests/165
+++ b/tests/qemu-iotests/165
@@ -27,7 +27,7 @@ disk = os.path.join(iotests.test_dir, 'disk')
 disk_size = 0x4000 # 1G
 
 # regions for qemu_io: (start, count) in bytes
-regions1 = ((0,0x10),
+regions1 = ((0x0fff00, 0x1),
 (0x20, 0x10))
 
 regions2 = ((0x1000, 0x2),
-- 
2.13.6




[Qemu-block] [PULL 11/54] dirty-bitmap: Set iterator start by offset, not sector

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

All callers to bdrv_dirty_iter_new() passed 0 for their initial
starting point, drop that parameter.

Most callers to bdrv_set_dirty_iter() were scaling a byte offset to
a sector number; the exception qcow2-bitmap will be converted later
to use byte rather than sector iteration.  Move the scaling to occur
internally to dirty bitmap code instead, so that callers now pass
in bytes.

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 include/block/dirty-bitmap.h | 5 ++---
 block/backup.c   | 5 ++---
 block/dirty-bitmap.c | 9 -
 block/mirror.c   | 4 ++--
 block/qcow2-bitmap.c | 4 ++--
 5 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 5f34a1a3c7..757fc4c5b8 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -44,8 +44,7 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
  int64_t cur_sector, int64_t nr_sectors);
 BdrvDirtyBitmapIter *bdrv_dirty_meta_iter_new(BdrvDirtyBitmap *bitmap);
-BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap,
- uint64_t first_sector);
+BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap);
 void bdrv_dirty_iter_free(BdrvDirtyBitmapIter *iter);
 
 uint64_t bdrv_dirty_bitmap_serialization_size(const BdrvDirtyBitmap *bitmap,
@@ -80,7 +79,7 @@ void bdrv_set_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
 void bdrv_reset_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap,
 int64_t cur_sector, int64_t nr_sectors);
 int64_t bdrv_dirty_iter_next(BdrvDirtyBitmapIter *iter);
-void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t sector_num);
+void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *hbi, int64_t offset);
 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
 int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap);
 void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes);
diff --git a/block/backup.c b/block/backup.c
index 517c300a49..ac9c018717 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -372,7 +372,7 @@ static int coroutine_fn 
backup_run_incremental(BackupBlockJob *job)
 
 granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
 clusters_per_iter = MAX((granularity / job->cluster_size), 1);
-dbi = bdrv_dirty_iter_new(job->sync_bitmap, 0);
+dbi = bdrv_dirty_iter_new(job->sync_bitmap);
 
 /* Find the next dirty sector(s) */
 while ((offset = bdrv_dirty_iter_next(dbi) * BDRV_SECTOR_SIZE) >= 0) {
@@ -403,8 +403,7 @@ static int coroutine_fn 
backup_run_incremental(BackupBlockJob *job)
 /* If the bitmap granularity is smaller than the backup granularity,
  * we need to advance the iterator pointer to the next cluster. */
 if (granularity < job->cluster_size) {
-bdrv_set_dirty_iter(dbi,
-cluster * job->cluster_size / 
BDRV_SECTOR_SIZE);
+bdrv_set_dirty_iter(dbi, cluster * job->cluster_size);
 }
 
 last_cluster = cluster - 1;
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 555c736024..84509476ba 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -473,11 +473,10 @@ uint32_t bdrv_dirty_bitmap_granularity(const 
BdrvDirtyBitmap *bitmap)
 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
 }
 
-BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap,
- uint64_t first_sector)
+BdrvDirtyBitmapIter *bdrv_dirty_iter_new(BdrvDirtyBitmap *bitmap)
 {
 BdrvDirtyBitmapIter *iter = g_new(BdrvDirtyBitmapIter, 1);
-hbitmap_iter_init(>hbi, bitmap->bitmap, first_sector);
+hbitmap_iter_init(>hbi, bitmap->bitmap, 0);
 iter->bitmap = bitmap;
 bitmap->active_iterators++;
 return iter;
@@ -645,9 +644,9 @@ void bdrv_set_dirty(BlockDriverState *bs, int64_t 
cur_sector,
 /**
  * Advance a BdrvDirtyBitmapIter to an arbitrary offset.
  */
-void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t sector_num)
+void bdrv_set_dirty_iter(BdrvDirtyBitmapIter *iter, int64_t offset)
 {
-hbitmap_iter_init(>hbi, iter->hbi.hb, sector_num);
+hbitmap_iter_init(>hbi, iter->hbi.hb, offset >> BDRV_SECTOR_BITS);
 }
 
 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
diff --git a/block/mirror.c b/block/mirror.c
index 6f5cb9f26c..0c705e0b4f 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -373,7 +373,7 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
 next_dirty = bdrv_dirty_iter_next(s->dbi) * BDRV_SECTOR_SIZE;
 if (next_dirty > next_offset || next_dirty < 0) {
 /* 

[Qemu-block] [PULL 17/54] qcow2: Switch qcow2_measure() to byte-based iteration

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

This is new code, but it is easier to read if it makes passes over
the image using bytes rather than sectors (and will get easier in
the future when bdrv_get_block_status is converted to byte-based).

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/qcow2.c | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 970006fc1d..b8da8ca105 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3673,20 +3673,19 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, 
BlockDriverState *in_bs,
  */
 required = virtual_size;
 } else {
-int cluster_sectors = cluster_size / BDRV_SECTOR_SIZE;
-int64_t sector_num;
+int64_t offset;
 int pnum = 0;
 
-for (sector_num = 0;
- sector_num < ssize / BDRV_SECTOR_SIZE;
- sector_num += pnum) {
-int nb_sectors = MIN(ssize / BDRV_SECTOR_SIZE - sector_num,
- BDRV_REQUEST_MAX_SECTORS);
+for (offset = 0; offset < ssize;
+ offset += pnum * BDRV_SECTOR_SIZE) {
+int nb_sectors = MIN(ssize - offset,
+ BDRV_REQUEST_MAX_BYTES) / 
BDRV_SECTOR_SIZE;
 BlockDriverState *file;
 int64_t ret;
 
 ret = bdrv_get_block_status_above(in_bs, NULL,
-  sector_num, nb_sectors,
+  offset >> BDRV_SECTOR_BITS,
+  nb_sectors,
   , );
 if (ret < 0) {
 error_setg_errno(_err, -ret,
@@ -3699,12 +3698,11 @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, 
BlockDriverState *in_bs,
 } else if ((ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) ==
(BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED)) {
 /* Extend pnum to end of cluster for next iteration */
-pnum = ROUND_UP(sector_num + pnum, cluster_sectors) -
-   sector_num;
+pnum = (ROUND_UP(offset + pnum * BDRV_SECTOR_SIZE,
+ cluster_size) - offset) >> BDRV_SECTOR_BITS;
 
 /* Count clusters we've seen */
-required += (sector_num % cluster_sectors + pnum) *
-BDRV_SECTOR_SIZE;
+required += offset % cluster_size + pnum * 
BDRV_SECTOR_SIZE;
 }
 }
 }
-- 
2.13.6




[Qemu-block] [PULL 03/54] hbitmap: Rename serialization_granularity to serialization_align

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

The only client of hbitmap_serialization_granularity() is dirty-bitmap's
bdrv_dirty_bitmap_serialization_align().  Keeping the two names consistent
is worthwhile, and the shorter name is more representative of what the
function returns (the required alignment to be used for start/count of
other serialization functions, where violating the alignment causes
assertion failures).

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 include/qemu/hbitmap.h |  8 
 block/dirty-bitmap.c   |  2 +-
 tests/test-hbitmap.c   | 10 +-
 util/hbitmap.c |  8 
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index d3a74a21fc..81e78043d1 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -159,16 +159,16 @@ bool hbitmap_get(const HBitmap *hb, uint64_t item);
 bool hbitmap_is_serializable(const HBitmap *hb);
 
 /**
- * hbitmap_serialization_granularity:
+ * hbitmap_serialization_align:
  * @hb: HBitmap to operate on.
  *
- * Granularity of serialization chunks, used by other serialization functions.
- * For every chunk:
+ * Required alignment of serialization chunks, used by other serialization
+ * functions. For every chunk:
  * 1. Chunk start should be aligned to this granularity.
  * 2. Chunk size should be aligned too, except for last chunk (for which
  *  start + count == hb->size)
  */
-uint64_t hbitmap_serialization_granularity(const HBitmap *hb);
+uint64_t hbitmap_serialization_align(const HBitmap *hb);
 
 /**
  * hbitmap_serialization_size:
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 30462d4f9a..0490ca3aff 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -617,7 +617,7 @@ uint64_t bdrv_dirty_bitmap_serialization_size(const 
BdrvDirtyBitmap *bitmap,
 
 uint64_t bdrv_dirty_bitmap_serialization_align(const BdrvDirtyBitmap *bitmap)
 {
-return hbitmap_serialization_granularity(bitmap->bitmap);
+return hbitmap_serialization_align(bitmap->bitmap);
 }
 
 void bdrv_dirty_bitmap_serialize_part(const BdrvDirtyBitmap *bitmap,
diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c
index 1acb353889..af41642346 100644
--- a/tests/test-hbitmap.c
+++ b/tests/test-hbitmap.c
@@ -738,15 +738,15 @@ static void test_hbitmap_meta_one(TestHBitmapData *data, 
const void *unused)
 }
 }
 
-static void test_hbitmap_serialize_granularity(TestHBitmapData *data,
-   const void *unused)
+static void test_hbitmap_serialize_align(TestHBitmapData *data,
+ const void *unused)
 {
 int r;
 
 hbitmap_test_init(data, L3 * 2, 3);
 g_assert(hbitmap_is_serializable(data->hb));
 
-r = hbitmap_serialization_granularity(data->hb);
+r = hbitmap_serialization_align(data->hb);
 g_assert_cmpint(r, ==, 64 << 3);
 }
 
@@ -974,8 +974,8 @@ int main(int argc, char **argv)
 hbitmap_test_add("/hbitmap/meta/word", test_hbitmap_meta_word);
 hbitmap_test_add("/hbitmap/meta/sector", test_hbitmap_meta_sector);
 
-hbitmap_test_add("/hbitmap/serialize/granularity",
- test_hbitmap_serialize_granularity);
+hbitmap_test_add("/hbitmap/serialize/align",
+ test_hbitmap_serialize_align);
 hbitmap_test_add("/hbitmap/serialize/basic",
  test_hbitmap_serialize_basic);
 hbitmap_test_add("/hbitmap/serialize/part",
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 21535cc90b..2f9d0fdbd0 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -413,14 +413,14 @@ bool hbitmap_is_serializable(const HBitmap *hb)
 {
 /* Every serialized chunk must be aligned to 64 bits so that endianness
  * requirements can be fulfilled on both 64 bit and 32 bit hosts.
- * We have hbitmap_serialization_granularity() which converts this
+ * We have hbitmap_serialization_align() which converts this
  * alignment requirement from bitmap bits to items covered (e.g. sectors).
  * That value is:
  *64 << hb->granularity
  * Since this value must not exceed UINT64_MAX, hb->granularity must be
  * less than 58 (== 64 - 6, where 6 is ld(64), i.e. 1 << 6 == 64).
  *
- * In order for hbitmap_serialization_granularity() to always return a
+ * In order for hbitmap_serialization_align() to always return a
  * meaningful value, bitmaps that are to be serialized must have a
  * granularity of less than 58. */
 
@@ -437,7 +437,7 @@ bool hbitmap_get(const HBitmap *hb, uint64_t item)
 return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0;
 }
 
-uint64_t hbitmap_serialization_granularity(const HBitmap *hb)
+uint64_t hbitmap_serialization_align(const HBitmap *hb)
 {
 

[Qemu-block] [PULL 00/54] Block layer patches

2017-10-06 Thread Kevin Wolf
The following changes since commit a26a98dfb9d448d7234d931ae3720feddf6f0651:

  Merge remote-tracking branch 'remotes/cohuck/tags/s390x-20171006' into 
staging (2017-10-06 13:19:03 +0100)

are available in the git repository at:

  git://repo.or.cz/qemu/kevin.git tags/for-upstream

for you to fetch changes up to fc3fd63fc0573ffd2ee569591a2e7f6c7310fd18:

  Merge remote-tracking branch 'mreitz/tags/pull-block-2017-10-06' into 
queue-block (2017-10-06 16:32:08 +0200)


Block layer patches


Daniel P. Berrange (6):
  block: use 1 MB bounce buffers for crypto instead of 16KB
  crypto: expose encryption sector size in APIs
  block: fix data type casting for crypto payload offset
  block: convert crypto driver to bdrv_co_preadv|pwritev
  block: convert qcrypto_block_encrypt|decrypt to take bytes offset
  block: support passthrough of BDRV_REQ_FUA in crypto driver

Eric Blake (27):
  block: Typo fix in copy_on_readv()
  block: Make bdrv_img_create() size selection easier to read
  hbitmap: Rename serialization_granularity to serialization_align
  qcow2: Ensure bitmap serialization is aligned
  dirty-bitmap: Drop unused functions
  dirty-bitmap: Avoid size query failure during truncate
  dirty-bitmap: Change bdrv_dirty_bitmap_size() to report bytes
  dirty-bitmap: Track bitmap size by bytes
  dirty-bitmap: Change bdrv_dirty_bitmap_*serialize*() to take bytes
  qcow2: Switch sectors_covered_by_bitmap_cluster() to byte-based
  dirty-bitmap: Set iterator start by offset, not sector
  dirty-bitmap: Change bdrv_dirty_iter_next() to report byte offset
  dirty-bitmap: Change bdrv_get_dirty_count() to report bytes
  dirty-bitmap: Change bdrv_get_dirty_locked() to take bytes
  dirty-bitmap: Change bdrv_[re]set_dirty_bitmap() to use bytes
  mirror: Switch mirror_dirty_init() to byte-based iteration
  qcow2: Switch qcow2_measure() to byte-based iteration
  qcow2: Switch load_bitmap_data() to byte-based iteration
  qcow2: Switch store_bitmap_data() to byte-based iteration
  dirty-bitmap: Switch bdrv_set_dirty() to bytes
  dirty-bitmap: Convert internal hbitmap size/granularity
  qemu-io: Add -C for opening with copy-on-read
  block: Uniform handling of 0-length bdrv_get_block_status()
  iotests: Restore stty settings on completion
  block: Add blkdebug hook for copy-on-read
  block: Perform copy-on-read in loop
  iotests: Add test 197 for covering copy-on-read

Kevin Wolf (6):
  block: Introduce BdrvChildRole.update_filename
  commit: Support multiple roots above top node
  qemu-iotests: Allow QMP pretty printing in common.qemu
  qemu-iotests: Test commit block job where top has two parents
  commit: Remove overlay_bs
  Merge remote-tracking branch 'mreitz/tags/pull-block-2017-10-06' into 
queue-block

Max Reitz (1):
  iotests: Fix 195 if IMGFMT is part of TEST_DIR

Paolo Bonzini (10):
  qemu-iotests: remove dead code
  qemu-iotests: get rid of AWK_PROG
  qemu-iotests: move "check" code out of common.rc
  qemu-iotests: cleanup and fix search for programs
  qemu-iotests: limit non-_PROG-suffixed variables to common.rc
  qemu-iotests: do not include common.rc in "check"
  qemu-iotests: disintegrate more parts of common.config
  qemu-iotests: fix uninitialized variable
  qemu-iotests: get rid of $iam
  qemu-iotests: merge "check" and "common"

Pavel Butsykin (2):
  qcow2: fix return error code in qcow2_truncate()
  qcow2: truncate the tail of the image file after shrinking the image

Thomas Huth (1):
  hw/block/onenand: Remove dead code block

Vladimir Sementsov-Ogievskiy (2):
  block/mirror: check backing in bdrv_mirror_top_refresh_filename
  block/mirror: check backing in bdrv_mirror_top_flush

 qapi/block-core.json |   5 +-
 block/qcow2.h|   1 +
 crypto/blockpriv.h   |   5 +-
 include/block/block.h|   3 +-
 include/block/block_int.h|   8 +-
 include/block/dirty-bitmap.h |  43 +-
 include/crypto/block.h   |  29 +-
 include/qemu/hbitmap.h   |   8 +-
 block.c  | 109 --
 block/backup.c   |   7 +-
 block/commit.c   |  64 +--
 block/crypto.c   | 130 +++---
 block/dirty-bitmap.c | 134 ++-
 block/io.c   | 131 +--
 block/mirror.c   |  88 ++---
 block/qcow.c |  11 +-
 block/qcow2-bitmap.c |  62 +--
 block/qcow2-cluster.c|   8 +-
 block/qcow2-refcount.c   |  22 ++
 block/qcow2.c|  53 ++-
 crypto/block-luks.c  |  18 +-
 crypto/block-qcow.c  |  13 +-

[Qemu-block] [PULL 04/54] qcow2: Ensure bitmap serialization is aligned

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

When subdividing a bitmap serialization, the code in hbitmap.c
enforces that start/count parameters are aligned (except that
count can end early at end-of-bitmap).  We exposed this required
alignment through bdrv_dirty_bitmap_serialization_align(), but
forgot to actually check that we comply with it.

Fortunately, qcow2 is never dividing bitmap serialization smaller
than one cluster (which is a minimum of 512 bytes); so we are
always compliant with the serialization alignment (which insists
that we partition at least 64 bits per chunk) because we are doing
at least 4k bits per chunk.

Still, it's safer to add an assertion (for the unlikely case that
we'd ever support a cluster smaller than 512 bytes, or if the
hbitmap implementation changes what it considers to be aligned),
rather than leaving bdrv_dirty_bitmap_serialization_align()
without a caller.

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/qcow2-bitmap.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 14f41d0427..1fc375b40a 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -274,10 +274,13 @@ static int free_bitmap_clusters(BlockDriverState *bs, 
Qcow2BitmapTable *tb)
 static uint64_t sectors_covered_by_bitmap_cluster(const BDRVQcow2State *s,
   const BdrvDirtyBitmap 
*bitmap)
 {
-uint32_t sector_granularity =
+uint64_t sector_granularity =
 bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS;
+uint64_t sbc = sector_granularity * (s->cluster_size << 3);
 
-return (uint64_t)sector_granularity * (s->cluster_size << 3);
+assert(QEMU_IS_ALIGNED(sbc,
+   bdrv_dirty_bitmap_serialization_align(bitmap)));
+return sbc;
 }
 
 /* load_bitmap_data
-- 
2.13.6




[Qemu-block] [PULL 01/54] block: Typo fix in copy_on_readv()

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

Signed-off-by: Eric Blake 
Signed-off-by: Kevin Wolf 
---
 block/io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/io.c b/block/io.c
index 4378ae4c7d..d633b0f851 100644
--- a/block/io.c
+++ b/block/io.c
@@ -954,7 +954,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild 
*child,
 /* FIXME We cannot require callers to have write permissions when all they
  * are doing is a read request. If we did things right, write permissions
  * would be obtained anyway, but internally by the copy-on-read code. As
- * long as it is implemented here rather than in a separat filter driver,
+ * long as it is implemented here rather than in a separate filter driver,
  * the copy-on-read code doesn't have its own BdrvChild, however, for which
  * it could request permissions. Therefore we have to bypass the permission
  * system for the moment. */
-- 
2.13.6




[Qemu-block] [PULL 02/54] block: Make bdrv_img_create() size selection easier to read

2017-10-06 Thread Kevin Wolf
From: Eric Blake 

All callers of bdrv_img_create() pass in a size, or -1 to read the
size from the backing file.  We then set that size as the QemuOpt
default, which means we will reuse that default rather than the
final parameter to qemu_opt_get_size() several lines later.  But
it is rather confusing to read subsequent checks of 'size == -1'
when it looks (without seeing the full context) like size defaults
to 0; it also doesn't help that a size of 0 is valid (for some
formats).

Rework the logic to make things more legible.

Signed-off-by: Eric Blake 
Reviewed-by: John Snow 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 5c65fac672..528cda7b2c 100644
--- a/block.c
+++ b/block.c
@@ -4488,7 +4488,7 @@ void bdrv_img_create(const char *filename, const char 
*fmt,
 
 /* The size for the image must always be specified, unless we have a 
backing
  * file and we have not been forbidden from opening it. */
-size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
+size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, img_size);
 if (backing_file && !(flags & BDRV_O_NO_BACKING)) {
 BlockDriverState *bs;
 char *full_backing = g_new0(char, PATH_MAX);
-- 
2.13.6




Re: [Qemu-block] [Qemu-devel] Patch to add helpful tracing output for driver authors in NVMe emulation

2017-10-06 Thread Daniel P. Berrange
On Fri, Oct 06, 2017 at 09:58:33AM -0400, Doug Gale wrote:
> I tried to get tracing to work before and I have never gotten it
> working. I'll give it another try and redo the patch with tracing
> infrastructure if necessary. The printf are nice because the dev can
> just look at the terminal where qemu is running. Can you view the
> trace output in realtime? When their code is sitting at a breakpoint
> in their driver, can they see the last thing that was traced right
> there? Or do they have to go run some cumbersome command and wade
> through it after an entire test run after the vm shut down?

There's a variety of trace backends available. By default QEMU enables
the 'log' backend these days. So with a default build of QEMU you can
use the '-d' arg to turn on printing: eg.:

   qemu-system-x86_64 ...args...-d trace:qio*

and that'll turn on every tracepoint with 'qio' as a prefix. The
trace messages will be printed straight to the console. eg:

$ qemu-system-x86_64 -d trace:qio* -serial unix:/tmp/foo,server
15774@1507298448.842745:qio_channel_socket_new Socket new ioc=0x5598b0565d60
15774@1507298448.842757:qio_channel_socket_listen_sync Socket listen sync 
ioc=0x5598b0565d60 addr=0x5598b0565c50
15774@1507298448.842803:qio_channel_socket_listen_complete Socket listen 
complete ioc=0x5598b0565d60 fd=16
qemu-system-x86_64: -serial unix:/tmp/foo,server: info: QEMU waiting for 
connection on: disconnected:unix:/tmp/foo,server
15774@1507298448.842818:qio_channel_socket_new Socket new ioc=0x5598b0564060
15774@1507298448.842820:qio_channel_socket_accept Socket accept start 
ioc=0x5598b0565d60


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|



Re: [Qemu-block] [Qemu-devel] Patch to add helpful tracing output for driver authors in NVMe emulation

2017-10-06 Thread Doug Gale
I tried to get tracing to work before and I have never gotten it
working. I'll give it another try and redo the patch with tracing
infrastructure if necessary. The printf are nice because the dev can
just look at the terminal where qemu is running. Can you view the
trace output in realtime? When their code is sitting at a breakpoint
in their driver, can they see the last thing that was traced right
there? Or do they have to go run some cumbersome command and wade
through it after an entire test run after the vm shut down?


On Fri, Oct 6, 2017 at 9:54 AM, Daniel P. Berrange  wrote:
> On Fri, Oct 06, 2017 at 08:50:31AM -0500, Eric Blake wrote:
>> On 10/05/2017 06:18 PM, Doug Gale wrote:
>> > I added the tracing output in this patch to assist me in implementing
>> > an NVMe driver. It helped tremendously.
>> >
>> >>From 1d19086cdef8d492929852d582cb41dcc5026f71 Mon Sep 17 00:00:00 2001
>> > From: Doug Gale 
>> > Date: Thu, 5 Oct 2017 19:02:03 -0400
>> > Subject: [PATCH] Add tracing output to NVMe emulation to help driver 
>> > authors.
>> >
>> > It is off by default, enable it by uncommenting #define DEBUG_NVME
>> > or through CFLAGS
>> >
>> > Signed-off-by: Doug Gale 
>> > ---
>> >  hw/block/nvme.c | 191 
>> > +++-
>> >  1 file changed, 177 insertions(+), 14 deletions(-)
>> >
>> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
>> > index 9aa32692a3..74220c0171 100644
>> > --- a/hw/block/nvme.c
>> > +++ b/hw/block/nvme.c
>> > @@ -36,6 +36,14 @@
>> >
>> >  #include "nvme.h"
>> >
>> > +//#define DEBUG_NVME
>> > +
>> > +#ifdef DEBUG_NVME
>> > +#define DPRINTF(fmt, ...) fprintf(stderr, "nvme: " fmt "\n", ## 
>> > __VA_ARGS__)
>> > +#else
>> > +#define DPRINTF(fmt, ...) ((void)0)
>> > +#endif
>>
>> As Kevin said, using trace-events is nicer than fprintf().  But if you
>> absolutely insist on fprintf(), then do NOT do it like this, because
>> this leads to bit-rot.  Instead, do it in a form that lets -Wformat
>> checking work even when tracing is disabled:
>
> [snip]
>
> IMHO using of trace() instead of fprintf() is pretty much mandatory
> at this point. We've been making a concious effort to replace fprintfs
> with trace across code, so shouldn't add more fprintfs. It is just so
> much more useful to be able to enable the debugging without having to
> recompile the binary.
>
> Regards,
> Daniel
> --
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|



Re: [Qemu-block] Patch to add helpful tracing output for driver authors in NVMe emulation

2017-10-06 Thread Stefan Hajnoczi
On Fri, Oct 06, 2017 at 11:49:56AM +0200, Kevin Wolf wrote:
> Am 06.10.2017 um 01:18 hat Doug Gale geschrieben:
> > I added the tracing output in this patch to assist me in implementing
> > an NVMe driver. It helped tremendously.
> > 
> > From 1d19086cdef8d492929852d582cb41dcc5026f71 Mon Sep 17 00:00:00 2001
> > From: Doug Gale 
> > Date: Thu, 5 Oct 2017 19:02:03 -0400
> > Subject: [PATCH] Add tracing output to NVMe emulation to help driver 
> > authors.
> > 
> > It is off by default, enable it by uncommenting #define DEBUG_NVME
> > or through CFLAGS
> > 
> > Signed-off-by: Doug Gale 
> 
> I think it's generally preferable to use the actual tracing
> infrastructure instead of fprintf. This would allow to enable the trace
> points with the command line options instead of only at compile time and
> also to get machine-readable output if necessary.
> 
> The tracing infrastructure is documented in docs/devel/tracing.txt.

Here is a quick example of enabling trace events whose name starts with
"cpu_":

  $ x86_64-softmmu/qemu-system-x86_64 -trace enable=cpu_\*
  7996@1507297762.170096:cpu_get_apic_base 0xfee00900

The default trace backend prints to QEMU's log (stderr).  This makes it
easy to get at tracing output during development.

In order to add trace events to nvme.c you need to edit
hw/block/trace-events.  I suggest naming them with a "nvme_" prefix.

Stefan



Re: [Qemu-block] [Qemu-devel] Patch to add helpful tracing output for driver authors in NVMe emulation

2017-10-06 Thread Daniel P. Berrange
On Fri, Oct 06, 2017 at 08:50:31AM -0500, Eric Blake wrote:
> On 10/05/2017 06:18 PM, Doug Gale wrote:
> > I added the tracing output in this patch to assist me in implementing
> > an NVMe driver. It helped tremendously.
> > 
> >>From 1d19086cdef8d492929852d582cb41dcc5026f71 Mon Sep 17 00:00:00 2001
> > From: Doug Gale 
> > Date: Thu, 5 Oct 2017 19:02:03 -0400
> > Subject: [PATCH] Add tracing output to NVMe emulation to help driver 
> > authors.
> > 
> > It is off by default, enable it by uncommenting #define DEBUG_NVME
> > or through CFLAGS
> > 
> > Signed-off-by: Doug Gale 
> > ---
> >  hw/block/nvme.c | 191 
> > +++-
> >  1 file changed, 177 insertions(+), 14 deletions(-)
> > 
> > diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> > index 9aa32692a3..74220c0171 100644
> > --- a/hw/block/nvme.c
> > +++ b/hw/block/nvme.c
> > @@ -36,6 +36,14 @@
> > 
> >  #include "nvme.h"
> > 
> > +//#define DEBUG_NVME
> > +
> > +#ifdef DEBUG_NVME
> > +#define DPRINTF(fmt, ...) fprintf(stderr, "nvme: " fmt "\n", ## 
> > __VA_ARGS__)
> > +#else
> > +#define DPRINTF(fmt, ...) ((void)0)
> > +#endif
> 
> As Kevin said, using trace-events is nicer than fprintf().  But if you
> absolutely insist on fprintf(), then do NOT do it like this, because
> this leads to bit-rot.  Instead, do it in a form that lets -Wformat
> checking work even when tracing is disabled:

[snip]

IMHO using of trace() instead of fprintf() is pretty much mandatory
at this point. We've been making a concious effort to replace fprintfs
with trace across code, so shouldn't add more fprintfs. It is just so
much more useful to be able to enable the debugging without having to
recompile the binary.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|



[Qemu-block] Patch to add helpful tracing output for driver authors in NVMe emulation

2017-10-06 Thread Doug Gale
I added the tracing output in this patch to assist me in implementing
an NVMe driver. It helped tremendously.

>From 1d19086cdef8d492929852d582cb41dcc5026f71 Mon Sep 17 00:00:00 2001
From: Doug Gale 
Date: Thu, 5 Oct 2017 19:02:03 -0400
Subject: [PATCH] Add tracing output to NVMe emulation to help driver authors.

It is off by default, enable it by uncommenting #define DEBUG_NVME
or through CFLAGS

Signed-off-by: Doug Gale 
---
 hw/block/nvme.c | 191 +++-
 1 file changed, 177 insertions(+), 14 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 9aa32692a3..74220c0171 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -36,6 +36,14 @@

 #include "nvme.h"

+//#define DEBUG_NVME
+
+#ifdef DEBUG_NVME
+#define DPRINTF(fmt, ...) fprintf(stderr, "nvme: " fmt "\n", ## __VA_ARGS__)
+#else
+#define DPRINTF(fmt, ...) ((void)0)
+#endif
+
 static void nvme_process_sq(void *opaque);

 static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
@@ -86,10 +94,14 @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
 {
 if (cq->irq_enabled) {
 if (msix_enabled(&(n->parent_obj))) {
+DPRINTF("raising MSI-X IRQ vector %u", cq->vector);
 msix_notify(&(n->parent_obj), cq->vector);
 } else {
+DPRINTF("pulsing IRQ pin");
 pci_irq_pulse(>parent_obj);
 }
+} else {
+DPRINTF("IRQ is masked");
 }
 }

@@ -101,9 +113,11 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
QEMUIOVector *iov, uint64_t prp1,
 int num_prps = (len >> n->page_bits) + 1;

 if (!prp1) {
+DPRINTF("Invalid PRP!");
 return NVME_INVALID_FIELD | NVME_DNR;
 } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
+DPRINTF("PRP in controller memory");
 qsg->nsg = 0;
 qemu_iovec_init(iov, num_prps);
 qemu_iovec_add(iov, (void *)>cmbuf[prp1 -
n->ctrl_mem.addr], trans_len);
@@ -168,6 +182,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
QEMUIOVector *iov, uint64_t prp1,

  unmap:
 qemu_sglist_destroy(qsg);
+DPRINTF("invalid SGL!");
 return NVME_INVALID_FIELD | NVME_DNR;
 }

@@ -178,16 +193,22 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n,
uint8_t *ptr, uint32_t len,
 QEMUIOVector iov;
 uint16_t status = NVME_SUCCESS;

+DPRINTF("DMA read, prp1=0x%"PRIx64" prp2=0x%"PRIx64,
+prp1, prp2);
+
 if (nvme_map_prp(, , prp1, prp2, len, n)) {
+DPRINTF("DMA read invalid PRP field!");
 return NVME_INVALID_FIELD | NVME_DNR;
 }
 if (qsg.nsg > 0) {
 if (dma_buf_read(ptr, len, )) {
+DPRINTF("DMA read invalid SGL field!");
 status = NVME_INVALID_FIELD | NVME_DNR;
 }
 qemu_sglist_destroy();
 } else {
 if (qemu_iovec_to_buf(, 0, ptr, len) != len) {
+DPRINTF("invalid field!");
 status = NVME_INVALID_FIELD | NVME_DNR;
 }
 qemu_iovec_destroy();
@@ -274,6 +295,7 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n,
NvmeNamespace *ns, NvmeCmd *cmd,
 uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS);

 if (slba + nlb > ns->id_ns.nsze) {
+DPRINTF("Invalid LBA!");
 return NVME_LBA_RANGE | NVME_DNR;
 }

@@ -301,13 +323,19 @@ static uint16_t nvme_rw(NvmeCtrl *n,
NvmeNamespace *ns, NvmeCmd *cmd,
 int is_write = rw->opcode == NVME_CMD_WRITE ? 1 : 0;
 enum BlockAcctType acct = is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;

+DPRINTF("%s %"PRIu32" blocks (%"PRIu64" bytes) from LBA %"PRIu64,
+is_write ? "write" : "read",
+nlb, data_size, slba);
+
 if ((slba + nlb) > ns->id_ns.nsze) {
 block_acct_invalid(blk_get_stats(n->conf.blk), acct);
+DPRINTF("Invalid LBA!");
 return NVME_LBA_RANGE | NVME_DNR;
 }

 if (nvme_map_prp(>qsg, >iov, prp1, prp2, data_size, n)) {
 block_acct_invalid(blk_get_stats(n->conf.blk), acct);
+DPRINTF("Invalid PRP!");
 return NVME_INVALID_FIELD | NVME_DNR;
 }

@@ -337,6 +365,7 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd
*cmd, NvmeRequest *req)
 uint32_t nsid = le32_to_cpu(cmd->nsid);

 if (nsid == 0 || nsid > n->num_namespaces) {
+DPRINTF("Invalid namespace!");
 return NVME_INVALID_NSID | NVME_DNR;
 }

@@ -350,6 +379,7 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeCmd
*cmd, NvmeRequest *req)
 case NVME_CMD_READ:
 return nvme_rw(n, ns, cmd, req);
 default:
+DPRINTF("Invalid opcode!");
 return NVME_INVALID_OPCODE | NVME_DNR;
 }
 }
@@ -374,9 +404,12 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeCmd *cmd)
 uint16_t qid = le16_to_cpu(c->qid);

 if (!qid || nvme_check_sqid(n, qid)) {
+DPRINTF("invalid submission queue deletion! qid=%u", qid);
 return 

Re: [Qemu-block] [Qemu-devel] Patch to add helpful tracing output for driver authors in NVMe emulation

2017-10-06 Thread Eric Blake
On 10/05/2017 06:18 PM, Doug Gale wrote:
> I added the tracing output in this patch to assist me in implementing
> an NVMe driver. It helped tremendously.
> 
>>From 1d19086cdef8d492929852d582cb41dcc5026f71 Mon Sep 17 00:00:00 2001
> From: Doug Gale 
> Date: Thu, 5 Oct 2017 19:02:03 -0400
> Subject: [PATCH] Add tracing output to NVMe emulation to help driver authors.
> 
> It is off by default, enable it by uncommenting #define DEBUG_NVME
> or through CFLAGS
> 
> Signed-off-by: Doug Gale 
> ---
>  hw/block/nvme.c | 191 
> +++-
>  1 file changed, 177 insertions(+), 14 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 9aa32692a3..74220c0171 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -36,6 +36,14 @@
> 
>  #include "nvme.h"
> 
> +//#define DEBUG_NVME
> +
> +#ifdef DEBUG_NVME
> +#define DPRINTF(fmt, ...) fprintf(stderr, "nvme: " fmt "\n", ## __VA_ARGS__)
> +#else
> +#define DPRINTF(fmt, ...) ((void)0)
> +#endif

As Kevin said, using trace-events is nicer than fprintf().  But if you
absolutely insist on fprintf(), then do NOT do it like this, because
this leads to bit-rot.  Instead, do it in a form that lets -Wformat
checking work even when tracing is disabled:

#ifdef DEBUG_NVME
# define DEBUG_NVME_PRINT 1
#else
# define DEBUG_NVME_PRINT 0
#endif
#define DPRINTF(fmt, ...) \
do { \
if (DEBUG_NVME_PRINT) { \
fprintf(stderr, "nvme: " fmt "\n", ## __VA_ARGS__); \
} \
while (0)

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH RFC] block: add block-insert-node QMP command

2017-10-06 Thread Manos Pitsidianakis

On Fri, Oct 06, 2017 at 02:59:55PM +0200, Max Reitz wrote:

On 2017-10-04 23:04, Manos Pitsidianakis wrote:

On Wed, Oct 04, 2017 at 08:09:24PM +0200, Max Reitz wrote:

On 2017-10-04 19:05, Manos Pitsidianakis wrote:

On Wed, Oct 04, 2017 at 02:49:27PM +0200, Max Reitz wrote:

On 2017-08-15 09:45, Manos Pitsidianakis wrote:

block-insert-node and its pair command block-remove-node provide
runtime
insertion and removal of filter nodes.

block-insert-node takes a (parent, child) and (node, child) pair of
edges and unrefs the (parent, child) BdrvChild relationship and
creates
a new (parent, node) child with the same BdrvChildRole.

This is a different approach than x-blockdev-change which uses the
driver
methods bdrv_add_child() and bdrv_del_child(),


Why? :-)

Can't we reuse x-blockdev-change? As far as I'm concerned, this was one
of its roles, and at least I don't want to have both x-blockdev-change
and these new commands.

I think it would be a good idea just to change bdrv_add_child() and
bdrv_del_child(): If the driver has a bdrv_{add,del}_child() callback,
invoke that.  If it doesn't, then just attach the child.

Of course, it may turn out that x-blockdev-change is lacking something
(e.g. a way to specify as what kind of child a new node is to be
attached).  Then we should either extend it so that it covers what it's
lacking, or replace it completely by these new commands.  In the latter
case, however, they would need to cover the existing use case which is
reconfiguring the quorum driver.  (And that would mean it would have to
invoke bdrv_add_child()/bdrv_del_child() when the driver has them.)

Max



I think the two use cases are this:

a) Adding/removing a replication child to an existing quorum node
b) Insert a filter between two existing nodes.


For me both are the same: Adding/removing nodes into/from the graph.

The difference is how those children are added (or removed, but it's the
same in reverse): In case of quorum, you can simply attach a new child
because it supports a variable number of children.  Another case where
this would work are all block drivers which support backing files (you
can freely attach/detach those).


Doesn't blockdev-snapshot-sync cover this? (I may be missing something).


Not really, because it doesn't add a new child.  But it's still a good
point, because your block-insert-node command would make it obsolete,
actually.

blockdev-snapshot literally is a special-cased block-insert-node.  And
blockdev-snapshot-sync then is qemu-img create + blockdev-add +
blockdev-snapshot.


Now that we're on this topic, quorum might be a good candidate for
*_reopen when and if it lands on QMP: Reconfiguring the children could
be done by reopening the BDS with new options.


Hmmm...  I guess that would work, too.  But intuitively, that seems a
bit heavy-weight to me


In this series, it's not about adding or removing new children, but
instead "injecting" them into an edge: An existing child is replaced,
but it now serves as some child of the new one.

(I guess writing too much trying to get my point across, sorry...)

The gist is that for me it's not so much about "quorum" or "filter
nodes".  It's about adding a new child to a node vs. replacing an
existing one.


These are not directly compatible semantically, but as you said
x-blockdev-change can perform b) if bdrv_add_child()/bdrv_del_child()
are not implemented. Wouldn't that be unnecessary overloading?


Yes, I think it would be. :-)

So say we have these two trees in our graph:

[ Parent BDS -> Child BDS ]
[ Filter BDS -> Child BDS ]

So here's what you can do with x-blockdev-change (in theory; in practice
it can only do that for quorum):
- Remove a child, so
   [ Parent BDS -> Child BDS ]
 is split into two trees
   [ Parent BDS ] and
   [ Child BDS ]
- Add a child, so we can merge
   [ Parent BDS ] and
   [ Filter BDS -> Child BDS ]
 into
   [ Parent BDS -> Filter BDS -> Child BDS ]



Yes, of course this would have to be done in one transaction.


Would it?  If you want to put Filter BDS into the chain, you can just
create [ Filter BDS ] without any child, and then use a single
x-blockdev-change invocation to inject it.

The only thing that's necessary is that the filter BDS would have to be
able to handle having no child.


Yeah that was what I had in mind.


[...]


So after I've written all of this, I feel like the new insert-node and
remove-node commands are exactly what x-blockdev-change should do when
asked to replace a node.  The only difference is that x-blockdev-change
would allow you to replace any node with anything, without the
constraints that block-insert-node and block-remove-node exact.

(And node replacement with x-blockdev-change would work by specifying
all three parameters.)

Not sure if that makes sense, I hope it does. :-)



Hm, I can't think of a way to fit that into x-blockdev-change *and* keep
the bdrv_add_child/bdrv_del_child functionality into consideration
(since we'd have to keep both). This 

Re: [Qemu-block] [PATCH 8/8] nbd: Minimal structured read for client

2017-10-06 Thread Eric Blake
On 10/06/2017 02:34 AM, Vladimir Sementsov-Ogievskiy wrote:

>> - if the chunk is NBD_REPLY_TYPE_NONE, there is no payload, and this
>> should be the final chunk, so the return to the caller can be the same
>> as for old-style (return 1 if we had no earlier error packets, or the
>> saved negative value corresponding to the first error received)
>> - if the command is read, we can read the payload into qiov (saves
>> malloc'ing space for the reply only to copy it into the qiov), so we
> 
> but here you said "This is putting a LOT of smarts directly into the
> receive routine"

About the only reason to justify putting smarts into the receive routine
is if it is the most common case, where the overhead of not
special-casing will penalize us.  READ happens to be a frequent command,
all other commands, like BLOCK_STATUS, will probably be infrequent.
Making READ malloc the result, only to then copy it into the qiov, is a
waste of time; no other structured command will pass a qiov.  So yeah,
maybe I'm stating things a bit differently than in my earlier messages,
but that's because I've now stated reasonings for why it is okay to
special-case READ with a qiov differently from all other commands (none
of which will pass a qiov to the receive routine).

Thanks for persisting with this, and I'm looking forward to the next
revision that you post; hopefully, by taking this discussion, we are
making sure that the design is solid.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH RFC] block: add block-insert-node QMP command

2017-10-06 Thread Max Reitz
On 2017-10-04 23:04, Manos Pitsidianakis wrote:
> On Wed, Oct 04, 2017 at 08:09:24PM +0200, Max Reitz wrote:
>> On 2017-10-04 19:05, Manos Pitsidianakis wrote:
>>> On Wed, Oct 04, 2017 at 02:49:27PM +0200, Max Reitz wrote:
 On 2017-08-15 09:45, Manos Pitsidianakis wrote:
> block-insert-node and its pair command block-remove-node provide
> runtime
> insertion and removal of filter nodes.
>
> block-insert-node takes a (parent, child) and (node, child) pair of
> edges and unrefs the (parent, child) BdrvChild relationship and
> creates
> a new (parent, node) child with the same BdrvChildRole.
>
> This is a different approach than x-blockdev-change which uses the
> driver
> methods bdrv_add_child() and bdrv_del_child(),

 Why? :-)

 Can't we reuse x-blockdev-change? As far as I'm concerned, this was one
 of its roles, and at least I don't want to have both x-blockdev-change
 and these new commands.

 I think it would be a good idea just to change bdrv_add_child() and
 bdrv_del_child(): If the driver has a bdrv_{add,del}_child() callback,
 invoke that.  If it doesn't, then just attach the child.

 Of course, it may turn out that x-blockdev-change is lacking something
 (e.g. a way to specify as what kind of child a new node is to be
 attached).  Then we should either extend it so that it covers what it's
 lacking, or replace it completely by these new commands.  In the latter
 case, however, they would need to cover the existing use case which is
 reconfiguring the quorum driver.  (And that would mean it would have to
 invoke bdrv_add_child()/bdrv_del_child() when the driver has them.)

 Max

>>>
>>> I think the two use cases are this:
>>>
>>> a) Adding/removing a replication child to an existing quorum node
>>> b) Insert a filter between two existing nodes.
>>
>> For me both are the same: Adding/removing nodes into/from the graph.
>>
>> The difference is how those children are added (or removed, but it's the
>> same in reverse): In case of quorum, you can simply attach a new child
>> because it supports a variable number of children.  Another case where
>> this would work are all block drivers which support backing files (you
>> can freely attach/detach those).
> 
> Doesn't blockdev-snapshot-sync cover this? (I may be missing something).

Not really, because it doesn't add a new child.  But it's still a good
point, because your block-insert-node command would make it obsolete,
actually.

blockdev-snapshot literally is a special-cased block-insert-node.  And
blockdev-snapshot-sync then is qemu-img create + blockdev-add +
blockdev-snapshot.

> Now that we're on this topic, quorum might be a good candidate for
> *_reopen when and if it lands on QMP: Reconfiguring the children could
> be done by reopening the BDS with new options.

Hmmm...  I guess that would work, too.  But intuitively, that seems a
bit heavy-weight to me

>> In this series, it's not about adding or removing new children, but
>> instead "injecting" them into an edge: An existing child is replaced,
>> but it now serves as some child of the new one.
>>
>> (I guess writing too much trying to get my point across, sorry...)
>>
>> The gist is that for me it's not so much about "quorum" or "filter
>> nodes".  It's about adding a new child to a node vs. replacing an
>> existing one.
>>
>>> These are not directly compatible semantically, but as you said
>>> x-blockdev-change can perform b) if bdrv_add_child()/bdrv_del_child()
>>> are not implemented. Wouldn't that be unnecessary overloading?
>>
>> Yes, I think it would be. :-)
>>
>> So say we have these two trees in our graph:
>>
>> [ Parent BDS -> Child BDS ]
>> [ Filter BDS -> Child BDS ]
>>
>> So here's what you can do with x-blockdev-change (in theory; in practice
>> it can only do that for quorum):
>> - Remove a child, so
>>    [ Parent BDS -> Child BDS ]
>>  is split into two trees
>>    [ Parent BDS ] and
>>    [ Child BDS ]
>> - Add a child, so we can merge
>>    [ Parent BDS ] and
>>    [ Filter BDS -> Child BDS ]
>>  into
>>    [ Parent BDS -> Filter BDS -> Child BDS ]
>>
> 
> Yes, of course this would have to be done in one transaction.

Would it?  If you want to put Filter BDS into the chain, you can just
create [ Filter BDS ] without any child, and then use a single
x-blockdev-change invocation to inject it.

The only thing that's necessary is that the filter BDS would have to be
able to handle having no child.

[...]

>> So after I've written all of this, I feel like the new insert-node and
>> remove-node commands are exactly what x-blockdev-change should do when
>> asked to replace a node.  The only difference is that x-blockdev-change
>> would allow you to replace any node with anything, without the
>> constraints that block-insert-node and block-remove-node exact.
>>
>> (And node replacement with x-blockdev-change would work by specifying
>> all three 

Re: [Qemu-block] [PATCH v3 0/6] block: Avoid copy-on-read assertions

2017-10-06 Thread Kevin Wolf
Am 05.10.2017 um 21:02 hat Eric Blake geschrieben:
> During my quest to switch block status to be byte-based, John
> forced me to evaluate whether we have a situation during
> copy-on-read where we could exceed BDRV_REQUEST_MAX_BYTES [1].
> Sure enough, we have a number of pre-existing bugs in the
> copy-on-read code.  Fix those, along with adding a test.
> 
> Available as a tag at:
> git fetch git://repo.or.cz/qemu/ericb.git nbd-copy-on-read-v3
> 
> Since v2 (available at [2]):
> - add a new patch to fix an iotests wart
> - tweak patch 5 (now 6) to skip rather than fail on limited memory [patchew]
> - tweak patch 2 condition for legibility [Stefan]
> - add R-b

Thanks, applied to the block branch.

Kevin



Re: [Qemu-block] Patch to add helpful tracing output for driver authors in NVMe emulation

2017-10-06 Thread Kevin Wolf
Am 06.10.2017 um 01:18 hat Doug Gale geschrieben:
> I added the tracing output in this patch to assist me in implementing
> an NVMe driver. It helped tremendously.
> 
> From 1d19086cdef8d492929852d582cb41dcc5026f71 Mon Sep 17 00:00:00 2001
> From: Doug Gale 
> Date: Thu, 5 Oct 2017 19:02:03 -0400
> Subject: [PATCH] Add tracing output to NVMe emulation to help driver authors.
> 
> It is off by default, enable it by uncommenting #define DEBUG_NVME
> or through CFLAGS
> 
> Signed-off-by: Doug Gale 

I think it's generally preferable to use the actual tracing
infrastructure instead of fprintf. This would allow to enable the trace
points with the command line options instead of only at compile time and
also to get machine-readable output if necessary.

The tracing infrastructure is documented in docs/devel/tracing.txt.

Kevin



Re: [Qemu-block] [Qemu-devel] [PATCH v2 0/4] blockjobs: add explicit job reaping

2017-10-06 Thread Kevin Wolf
Am 06.10.2017 um 05:56 hat John Snow geschrieben:
> On 10/05/2017 07:38 AM, Kevin Wolf wrote:
> > Let me try to just consolidate all of the above into a single state
> > machine:
> > 
> > 1.  CREATED --> RUNNING
> > driver callback: .start
> > 2a. RUNNING --> READY | CANCELLED
> > via: auto transition (when bulk copy is finished) / block-job-cancel
> > event: BLOCK_JOB_READY
> > 2b. READY --> READY (COMPLETING) | READY (CANCELLING)
> > via: block-job-complete / block-job-cancel
> > event: none
> > driver callback: .complete / none
> > 3.  READY (CANCELLING | COMPLETING) --> DONE
> > via: auto transition
> >  (CANCELLING: after draining in-flight mirror requests;
> >   COMPLETING: when images are in sync)
> > event: BLOCK_JOB_DONE
> 
> I have some doubts about "DONE" necessarily coming prior to "PENDING" as
> this means that the transaction gives up control of the jobs at this
> point, and then "PENDING" jobs may complete one without the other,
> especially if we introduce a PREPARE() callback.
> 
> (At least, if I've understood you correctly that "DONE" is the phase
> where jobs queue up, ready to be dispatched by the transaction mechanism.)

Yes. This means that DONE is state where a job end up when it has
completed its work, which is generally a different point in time for
each job in the transaction. Something has to come there, and it can't
be PENDING yet because the transaction hasn't completed yet.

In other words, DONE is the inactive state that exists today, but is
externally exposed as RUNNING even though the job isn't actually doing
any work any more.

I don't see though why this means that the transaction has to give up
control?

> I think jobs needs to not clear that transactional hurdle until they've
> been allowed to call prepare so that we can be guaranteed that any
> changes that occur after that point in time will not fail (and cannot
> any longer affect the transactional group.)

The earliest point where the transaction can be removed from the picture
is the first call of block-job-finalize for any job in the transaction.
This is where all jobs of the transaction need to complete at least
their .prepare stage, otherwise this first job can't be finalised.

As we discussed yesterday, block-job-finalize is really an operation on
the whole transaction, but keeping it at the job level in the external
interface spares us managing transactions as named objects.

Kevin



Re: [Qemu-block] [Qemu-devel] [PATCH v4 1/2] virtio: introduce `query-virtio' QMP command

2017-10-06 Thread Dr. David Alan Gilbert
* Markus Armbruster (arm...@redhat.com) wrote:
> Jan Dakinevich  writes:
> 
> > On 10/03/2017 05:02 PM, Eric Blake wrote:
> >> On 10/03/2017 07:47 AM, Jan Dakinevich wrote:
> >>> The command is intended for gathering virtio information such as status,
> >>> feature bits, negotiation status. It is convenient and useful for debug
> >>> purpose.
> >>>
> >>> The commands returns generic virtio information for virtio such as
> >>> common feature names and status bits names and information for all
> >>> attached to current machine devices.
> >>>
> >>> To retrieve names of device-specific features `get_feature_name'
> >>> callback in VirtioDeviceClass also was introduced.
> >>>
> >>> Cc: Denis V. Lunev 
> >>> Signed-off-by: Jan Dakinevich 
> >>> ---
> >>>  hw/block/virtio-blk.c   |  21 +
> >>>  hw/char/virtio-serial-bus.c |  15 +++
> >>>  hw/display/virtio-gpu.c |  13 ++
> >>>  hw/net/virtio-net.c |  35 +++
> >>>  hw/scsi/virtio-scsi.c   |  16 +++
> >>>  hw/virtio/Makefile.objs |   2 +
> >>>  hw/virtio/virtio-balloon.c  |  15 +++
> >>>  hw/virtio/virtio-stub.c |   9 
> >>>  hw/virtio/virtio.c  | 101 
> >>> 
> >>>  include/hw/virtio/virtio.h  |   2 +
> >>>  qapi-schema.json|   1 +
> >>>  qapi/virtio.json|  94 
> >>> +
> >>>  12 files changed, 324 insertions(+)
> >>>  create mode 100644 hw/virtio/virtio-stub.c
> >>>  create mode 100644 qapi/virtio.json
> >> 
> >> This creates a new .json file, but does not touch MAINTAINERS.  Our idea
> >> in splitting the .json files was to make it easier for each sub-file
> >> that needs a specific maintainer in addition to the overall *.json line
> >> for QAPI maintainers, so this may deserve a MAINTAINERS entry.
> >> 
> >
> > Ok.
> >
> >>> +++ b/qapi/virtio.json
> >>> @@ -0,0 +1,94 @@
> >>> +# -*- Mode: Python -*-
> >>> +#
> >>> +
> >>> +##
> >>> +# = Virtio devices
> >>> +##
> >>> +
> >>> +{ 'include': 'common.json' }
> >>> +
> >>> +##
> >>> +# @VirtioInfoBit:
> >>> +#
> >>> +# Named virtio bit
> >>> +#
> >>> +# @bit: bit number
> >>> +#
> >>> +# @name: bit name
> >>> +#
> >>> +# Since: 2.11.0
> >>> +#
> >>> +##
> >>> +{
> >>> +'struct': 'VirtioInfoBit',
> >>> +'data': {
> >>> +'bit': 'uint64',
> >> 
> >> Why is this a 64-bit value? Are the values 0-63, or are they 1, 2, 4, 8,
> >> ...?  The documentation on 'bit number' is rather sparse.
> >
> > I would prefer `uint' here, but I don't see generic unsigned type (may
> > be, I am mistaken). I could use uint8 here, though.
> >
> >> 
> >>> +'name': 'str'
> >> 
> >> Wouldn't an enum type be better than an open-ended string?
> >> 
> >
> > Bit names are not known here, they are obtained from virtio device
> > implementations.
> 
> What exactly uses these bits?
> 
> Why do these uses justify pass-through?  By pass-through, I mean the
> messenger (QEMU) merely passes them along, without understanding them.
> Defeats introspection.

It should be noted originally it was HMP - this was just a debug command
and it's only getting the need to be introspectable because people
insisted it had a QMP version.

I think the intent is to print all flags, even ones we dont yet
understand.

Dave

> >>> +}
> >>> +}
> >>> +
> >>> +##
> >>> +# @VirtioInfoDevice:
> >>> +#
> >>> +# Information about specific virtio device
> >>> +#
> >>> +# @qom_path: QOM path of the device
> >> 
> >> Please make this 'qom-path' - new interfaces should prefer '-' over '_'.
> >
> > Ok.
> >
> >>> +#
> >>> +# @feature-names: names of device-specific features
> >>> +#
> >>> +# @host-features: bitmask of features, provided by devices
> >>> +#
> >>> +# @guest-features: bitmask of features, acknowledged by guest
> >>> +#
> >>> +# @status: virtio device status bitmask
> >>> +#
> >>> +# Since: 2.11.0
> >>> +#
> >>> +##
> >>> +{
> >>> +'struct': 'VirtioInfoDevice',
> >>> +'data': {
> >>> +'qom_path': 'str',
> >>> +'feature-names': ['VirtioInfoBit'],
> >>> +'host-features': 'uint64',
> >>> +'guest-features': 'uint64',
> >>> +'status': 'uint64'
> >> 
> >> I'm wondering if this is the best representation (where the caller has
> >> to parse the integer and then lookup in feature-names what each bit of
> >> the integer represents).  But I'm not sure I have anything better off
> >> the top of my head.
> >> 
> >
> > Consider it as way to tell caller about names of supported features.
> 
> "Unsigned integer interpreted as combination of well-known bit-valued
> symbols" is a fine C interface, but a pretty horrid QMP interface.
> What's wrong with doing a set the straightforward way as "array of
> enum"?
> 
> >>> +}
> >>> +}
> >>> +
> >>> +##
> >>> +# @VirtioInfo:
> >>> +#
> >>> +# Information about virtio devices
> >>> +#
> >>> +# @feature-names: names 

Re: [Qemu-block] [PATCH 8/8] nbd: Minimal structured read for client

2017-10-06 Thread Vladimir Sementsov-Ogievskiy

06.10.2017 01:12, Eric Blake wrote:

On 10/05/2017 05:36 AM, Paolo Bonzini wrote:

On 05/10/2017 12:02, Vladimir Sementsov-Ogievskiy wrote:

03.10.2017 17:06, Paolo Bonzini wrote:

On 03/10/2017 15:35, Vladimir Sementsov-Ogievskiy wrote:

In the end this probably means that you have a read_chunk_header
function and a read_chunk function.  READ has a loop that calls
read_chunk_header followed by direct reading into the QEMUIOVector,
while everyone else calls read_chunk.

accordingly to spec, we can receive several error reply chunks to any
request,
so loop, receiving them should be common for all requests I think

as well as handling error chunks should be common..

Yes, reading error chunks should be part of read_chunk_header.

Paolo

So, you want a loop in READ, and separate loop for other commands? Then
we will have separate loop for BLOCK_STATUS and for all future commands
with specific replies?

There should be a separate loop for each command.

The only difference between READ and other commands is that READ
receives directly in QEMUIOVector, while other commands can use a common
function to to receive each structured reply chunk into malloc-ed memory.

To make sure we're on the same page, here's how I see it.  At a high
level, we have:

Each command calls nbd_co_send_request once, then calls
nbd_co_receive_reply in a loop until there is an indication of the last
packet.  nbd_co_receive_reply waits for data to come from the server,
and parses the header.

If the packet is unrecognized, report failure and request a quit
(negative return value)

If it is old-style:
- if the command is read, and we did not negotiate structured read, then
we also read the payload into qiov
- if the command is read, but we negotiated structured read, the server
is buggy, so report the bug and request a quit
- for all other commands, there is no payload, return success or failure
to the caller based on the header payload
- at any rate, the reply to the caller is that this is the final packet,
and there is no payload returned (so we return negative or 1, but never 0)

Otherwise, it is new-style:
- if we did not negotiate structured reply, the server is buggy, so
report the bug and request a quit (negative return)
- if the chunk is an error, we process the entire packet and report the
error; if we have any commands that care about the error details, we
could return the error in a malloc'd discriminated union, but we can
probably get by without that. If callers don't care about details, but
the error chunk is not final, it may be easier to just store the fact
that an error occurred and return 0 to tell the caller to keep looping,
and return the negative value later when the final chunk is finally received
- if the chunk is NBD_REPLY_TYPE_NONE, there is no payload, and this
should be the final chunk, so the return to the caller can be the same
as for old-style (return 1 if we had no earlier error packets, or the
saved negative value corresponding to the first error received)
- if the command is read, we can read the payload into qiov (saves
malloc'ing space for the reply only to copy it into the qiov), so we


but here you said "This is putting a LOT of smarts directly into the 
receive routine"



don't have to return any data
- for any other command, we malloc space for the non-error payload, and
then it is up to the command's loop to process the payload

so the signature can be something like:

int nbd_co_receive_reply(NBDClientSession *s, QEMUIOVector *qiov,
  void **payload)

where it returns -errno on failure, 0 if the command is not complete,
and 1 if the command is done.  READ passes qiov, which is fully
populated when the function returns 1; all other commands pass NULL.
Commands pass NULL for payload if they don't expect a payload return
(this includes READ); but a command that expects a payload
(BLOCK_STATUS) passes a pointer in payload and gets malloc'd space
stored there if return is 0 or 1.

Does that sound like we're on the right design track?




--
Best regards,
Vladimir




Re: [Qemu-block] [PATCH 8/8] nbd: Minimal structured read for client

2017-10-06 Thread Vladimir Sementsov-Ogievskiy

06.10.2017 10:09, Vladimir Sementsov-Ogievskiy wrote:

06.10.2017 01:12, Eric Blake wrote:

On 10/05/2017 05:36 AM, Paolo Bonzini wrote:

On 05/10/2017 12:02, Vladimir Sementsov-Ogievskiy wrote:

03.10.2017 17:06, Paolo Bonzini wrote:

On 03/10/2017 15:35, Vladimir Sementsov-Ogievskiy wrote:

In the end this probably means that you have a read_chunk_header
function and a read_chunk function.  READ has a loop that calls
read_chunk_header followed by direct reading into the 
QEMUIOVector,

while everyone else calls read_chunk.
accordingly to spec, we can receive several error reply chunks 
to any

request,
so loop, receiving them should be common for all requests I think

as well as handling error chunks should be common..

Yes, reading error chunks should be part of read_chunk_header.

Paolo
So, you want a loop in READ, and separate loop for other commands? 
Then
we will have separate loop for BLOCK_STATUS and for all future 
commands

with specific replies?

There should be a separate loop for each command.

The only difference between READ and other commands is that READ
receives directly in QEMUIOVector, while other commands can use a 
common
function to to receive each structured reply chunk into malloc-ed 
memory.

To make sure we're on the same page, here's how I see it.  At a high
level, we have:

Each command calls nbd_co_send_request once, then calls
nbd_co_receive_reply in a loop until there is an indication of the last
packet.  nbd_co_receive_reply waits for data to come from the server,
and parses the header.

If the packet is unrecognized, report failure and request a quit
(negative return value)

If it is old-style:
- if the command is read, and we did not negotiate structured read, then
we also read the payload into qiov
- if the command is read, but we negotiated structured read, the server
is buggy, so report the bug and request a quit
- for all other commands, there is no payload, return success or failure
to the caller based on the header payload
- at any rate, the reply to the caller is that this is the final packet,
and there is no payload returned (so we return negative or 1, but 
never 0)


Otherwise, it is new-style:
- if we did not negotiate structured reply, the server is buggy, so
report the bug and request a quit (negative return)
- if the chunk is an error, we process the entire packet and report the
error; if we have any commands that care about the error details, we
could return the error in a malloc'd discriminated union, but we can
probably get by without that. If callers don't care about details, but
the error chunk is not final, it may be easier to just store the fact
that an error occurred and return 0 to tell the caller to keep looping,
and return the negative value later when the final chunk is finally 
received

- if the chunk is NBD_REPLY_TYPE_NONE, there is no payload, and this
should be the final chunk, so the return to the caller can be the same
as for old-style (return 1 if we had no earlier error packets, or the
saved negative value corresponding to the first error received)
- if the command is read, we can read the payload into qiov (saves
malloc'ing space for the reply only to copy it into the qiov), so we
don't have to return any data
- for any other command, we malloc space for the non-error payload, and
then it is up to the command's loop to process the payload

so the signature can be something like:

int nbd_co_receive_reply(NBDClientSession *s, QEMUIOVector *qiov,
  void **payload)

where it returns -errno on failure, 0 if the command is not complete,
and 1 if the command is done.  READ passes qiov, which is fully
populated when the function returns 1; all other commands pass NULL.
Commands pass NULL for payload if they don't expect a payload return
(this includes READ); but a command that expects a payload
(BLOCK_STATUS) passes a pointer in payload and gets malloc'd space
stored there if return is 0 or 1.

Does that sound like we're on the right design track?




Hmm. and what is the difference with my patch? Except payload? The 
only command with payload
now is READ (except errors), but you (and me in my patch) suggest to 
fill this qiov in nbd_co_receive_reply
(nbd_co_receive_1_reply_or_chunk in my patch), so payload will be 
unused for now, we can add it

later if it will be needed for BLOCK_STATUS.



Ahm, sorry, I've already forget my original patch, which reads most of 
payload in nbd/client.c code called from reply_entry.. So, ok for me, I 
think I'll post a new version today.


--
Best regards,
Vladimir




Re: [Qemu-block] [PATCH 8/8] nbd: Minimal structured read for client

2017-10-06 Thread Vladimir Sementsov-Ogievskiy

06.10.2017 01:12, Eric Blake wrote:

On 10/05/2017 05:36 AM, Paolo Bonzini wrote:

On 05/10/2017 12:02, Vladimir Sementsov-Ogievskiy wrote:

03.10.2017 17:06, Paolo Bonzini wrote:

On 03/10/2017 15:35, Vladimir Sementsov-Ogievskiy wrote:

In the end this probably means that you have a read_chunk_header
function and a read_chunk function.  READ has a loop that calls
read_chunk_header followed by direct reading into the QEMUIOVector,
while everyone else calls read_chunk.

accordingly to spec, we can receive several error reply chunks to any
request,
so loop, receiving them should be common for all requests I think

as well as handling error chunks should be common..

Yes, reading error chunks should be part of read_chunk_header.

Paolo

So, you want a loop in READ, and separate loop for other commands? Then
we will have separate loop for BLOCK_STATUS and for all future commands
with specific replies?

There should be a separate loop for each command.

The only difference between READ and other commands is that READ
receives directly in QEMUIOVector, while other commands can use a common
function to to receive each structured reply chunk into malloc-ed memory.

To make sure we're on the same page, here's how I see it.  At a high
level, we have:

Each command calls nbd_co_send_request once, then calls
nbd_co_receive_reply in a loop until there is an indication of the last
packet.  nbd_co_receive_reply waits for data to come from the server,
and parses the header.

If the packet is unrecognized, report failure and request a quit
(negative return value)

If it is old-style:
- if the command is read, and we did not negotiate structured read, then
we also read the payload into qiov
- if the command is read, but we negotiated structured read, the server
is buggy, so report the bug and request a quit
- for all other commands, there is no payload, return success or failure
to the caller based on the header payload
- at any rate, the reply to the caller is that this is the final packet,
and there is no payload returned (so we return negative or 1, but never 0)

Otherwise, it is new-style:
- if we did not negotiate structured reply, the server is buggy, so
report the bug and request a quit (negative return)
- if the chunk is an error, we process the entire packet and report the
error; if we have any commands that care about the error details, we
could return the error in a malloc'd discriminated union, but we can
probably get by without that. If callers don't care about details, but
the error chunk is not final, it may be easier to just store the fact
that an error occurred and return 0 to tell the caller to keep looping,
and return the negative value later when the final chunk is finally received
- if the chunk is NBD_REPLY_TYPE_NONE, there is no payload, and this
should be the final chunk, so the return to the caller can be the same
as for old-style (return 1 if we had no earlier error packets, or the
saved negative value corresponding to the first error received)
- if the command is read, we can read the payload into qiov (saves
malloc'ing space for the reply only to copy it into the qiov), so we
don't have to return any data
- for any other command, we malloc space for the non-error payload, and
then it is up to the command's loop to process the payload

so the signature can be something like:

int nbd_co_receive_reply(NBDClientSession *s, QEMUIOVector *qiov,
  void **payload)

where it returns -errno on failure, 0 if the command is not complete,
and 1 if the command is done.  READ passes qiov, which is fully
populated when the function returns 1; all other commands pass NULL.
Commands pass NULL for payload if they don't expect a payload return
(this includes READ); but a command that expects a payload
(BLOCK_STATUS) passes a pointer in payload and gets malloc'd space
stored there if return is 0 or 1.

Does that sound like we're on the right design track?




Hmm. and what is the difference with my patch? Except payload? The only 
command with payload
now is READ (except errors), but you (and me in my patch) suggest to 
fill this qiov in nbd_co_receive_reply
(nbd_co_receive_1_reply_or_chunk in my patch), so payload will be unused 
for now, we can add it

later if it will be needed for BLOCK_STATUS.

--
Best regards,
Vladimir