[PATCH v13 10/10] block: apply COR-filter to block-stream jobs

2020-12-02 Thread Andrey Shinkevich via
This patch completes the series with the COR-filter applied to
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the test case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the test #030 as well.
The test case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. All the nodes involved into one job are being frozen, including
the filter node. Operations over the mentioned nodes, including the
filter one, are being blocked for other jobs. So, the filter node gets
involved into two concurrent jobs with the adjacent data node. That is
not allowed. It is what the test cases with overlapping jobs are about.
The concept of the parallel jobs with common nodes is considered vital
no more.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 97 ++
 tests/qemu-iotests/030 | 51 +++-
 tests/qemu-iotests/030.out |  4 +-
 tests/qemu-iotests/141.out |  2 +-
 tests/qemu-iotests/245 | 22 +++
 5 files changed, 86 insertions(+), 90 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 061268b..2f80fae 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -18,8 +18,10 @@
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/error-report.h"
+#include "qapi/qmp/qdict.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "block/copy-on-read.h"
 
 enum {
 /*
@@ -34,6 +36,8 @@ typedef struct StreamBlockJob {
 BlockJob common;
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
+BlockDriverState *cor_filter_bs;
+BlockDriverState *target_bs;
 BlockdevOnError on_error;
 char *backing_file_str;
 bool bs_read_only;
@@ -45,8 +49,7 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
 {
 assert(bytes < SIZE_MAX);
 
-return blk_co_preadv(blk, offset, bytes, NULL,
- BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
+return blk_co_preadv(blk, offset, bytes, NULL, BDRV_REQ_PREFETCH);
 }
 
 static void stream_abort(Job *job)
@@ -54,24 +57,21 @@ static void stream_abort(Job *job)
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 
 if (s->chain_frozen) {
-BlockJob *bjob = >common;
-bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 }
 }
 
 static int stream_prepare(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
 BlockDriverState *base_unfiltered;
 BlockDriverState *backing_bs;
 Error *local_err = NULL;
 int ret = 0;
 
-bdrv_unfreeze_backing_chain(bs, s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 s->chain_frozen = false;
 
 if (bdrv_cow_child(unfiltered_bs)) {
@@ -79,7 +79,7 @@ static int stream_prepare(Job *job)
 if (base) {
 base_id = s->backing_file_str;
 if (base_id) {
-backing_bs = bdrv_find_backing_image(bs, base_id);
+backing_bs = bdrv_find_backing_image(unfiltered_bs, base_id);
 if (backing_bs && backing_bs->drv) {
 base_fmt = backing_bs->drv->format_name;
 } else {
@@ -111,15 +111,16 @@ static void stream_clean(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
 
 /* Reopen the image back in read-only mode if necessary */
 if (s->bs_read_only) {
 /* Give up write permissions before making it read-only */
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
-bdrv_reopen_set_read_only(bs, true, NULL);
+bdrv_reopen_set_read_only(s->target_bs, true, NULL);
 }
 
+bdrv_cor_filter_drop(s->cor_filter_bs);
+
 g_free(s->backing_file_str);
 }
 
@@ -127,9 +128,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockBackend *blk = s->common.blk;
-BlockDriverState *bs = blk_bs(blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-bool enable_cor = !bdrv_cow_child(s->base_overlay);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 

[PATCH v13 03/10] copy-on-read: add filter drop function

2020-12-02 Thread Andrey Shinkevich via
Provide API for the COR-filter removal. Also, drop the filter child
permissions for an inactive state when the filter node is being
removed.
To insert the filter, the block generic layer function
bdrv_insert_node() can be used.
The new function bdrv_cor_filter_drop() may be considered as an
intermediate solution before the QEMU permission update system has
overhauled. Then we are able to implement the API function
bdrv_remove_node() on the block generic layer.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 56 
 block/copy-on-read.h | 32 ++
 2 files changed, 88 insertions(+)
 create mode 100644 block/copy-on-read.h

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..618c4c4 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,20 @@
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi/error.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+bool active;
+} BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BDRVStateCOR *state = bs->opaque;
+
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
false, errp);
@@ -42,6 +51,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+state->active = true;
+
+/*
+ * We don't need to call bdrv_child_refresh_perms() now as the permissions
+ * will be updated later when the filter node gets its parent.
+ */
+
 return 0;
 }
 
@@ -57,6 +73,17 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild 
*c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
 {
+BDRVStateCOR *s = bs->opaque;
+
+if (!s->active) {
+/*
+ * While the filter is being removed
+ */
+*nperm = 0;
+*nshared = BLK_PERM_ALL;
+return;
+}
+
 *nperm = perm & PERM_PASSTHROUGH;
 *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
 
@@ -135,6 +162,7 @@ static void cor_lock_medium(BlockDriverState *bs, bool 
locked)
 
 static BlockDriver bdrv_copy_on_read = {
 .format_name= "copy-on-read",
+.instance_size  = sizeof(BDRVStateCOR),
 
 .bdrv_open  = cor_open,
 .bdrv_child_perm= cor_child_perm,
@@ -154,6 +182,34 @@ static BlockDriver bdrv_copy_on_read = {
 .is_filter  = true,
 };
 
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
+{
+BdrvChild *child;
+BlockDriverState *bs;
+BDRVStateCOR *s = cor_filter_bs->opaque;
+
+child = bdrv_filter_child(cor_filter_bs);
+if (!child) {
+return;
+}
+bs = child->bs;
+
+/* Retain the BDS until we complete the graph change. */
+bdrv_ref(bs);
+/* Hold a guest back from writing while permissions are being reset. */
+bdrv_drained_begin(bs);
+/* Drop permissions before the graph change. */
+s->active = false;
+bdrv_child_refresh_perms(cor_filter_bs, child, _abort);
+bdrv_replace_node(cor_filter_bs, bs, _abort);
+
+bdrv_drained_end(bs);
+bdrv_unref(bs);
+bdrv_unref(cor_filter_bs);
+}
+
+
 static void bdrv_copy_on_read_init(void)
 {
 bdrv_register(_copy_on_read);
diff --git a/block/copy-on-read.h b/block/copy-on-read.h
new file mode 100644
index 000..7bf405d
--- /dev/null
+++ b/block/copy-on-read.h
@@ -0,0 +1,32 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * The filter driver performs Copy-On-Read (COR) operations
+ *
+ * Copyright (c) 2018-2020 Virtuozzo International GmbH.
+ *
+ * Author:
+ *   Andrey Shinkevich 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#ifndef BLOCK_COPY_ON_READ
+#define BLOCK_COPY_ON_READ
+
+#include "block/block_int.h"
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs);
+
+#endif /* BLOCK_COPY_ON_READ */
-- 
1.8.3.1




[PATCH v13 01/10] copy-on-read: support preadv/pwritev_part functions

2020-12-02 Thread Andrey Shinkevich via
Add support for the recently introduced functions
bdrv_co_preadv_part()
and
bdrv_co_pwritev_part()
to the COR-filter driver.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 2816e61..cb03e0f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -74,21 +74,25 @@ static int64_t cor_getlength(BlockDriverState *bs)
 }
 
 
-static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
-  uint64_t offset, uint64_t bytes,
-  QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
+   uint64_t offset, uint64_t bytes,
+   QEMUIOVector *qiov,
+   size_t qiov_offset,
+   int flags)
 {
-return bdrv_co_preadv(bs->file, offset, bytes, qiov,
-  flags | BDRV_REQ_COPY_ON_READ);
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
 }
 
 
-static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
-   uint64_t offset, uint64_t bytes,
-   QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
+uint64_t offset,
+uint64_t bytes,
+QEMUIOVector *qiov,
+size_t qiov_offset, int flags)
 {
-
-return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
+flags);
 }
 
 
@@ -137,8 +141,8 @@ static BlockDriver bdrv_copy_on_read = {
 
 .bdrv_getlength = cor_getlength,
 
-.bdrv_co_preadv = cor_co_preadv,
-.bdrv_co_pwritev= cor_co_pwritev,
+.bdrv_co_preadv_part= cor_co_preadv_part,
+.bdrv_co_pwritev_part   = cor_co_pwritev_part,
 .bdrv_co_pwrite_zeroes  = cor_co_pwrite_zeroes,
 .bdrv_co_pdiscard   = cor_co_pdiscard,
 .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed,
-- 
1.8.3.1




[PATCH v13 02/10] block: add API function to insert a node

2020-12-02 Thread Andrey Shinkevich via
Provide API for insertion a node to backing chain.

Suggested-by: Max Reitz 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block.c   | 25 +
 include/block/block.h |  2 ++
 2 files changed, 27 insertions(+)

diff --git a/block.c b/block.c
index f1cedac..b71c39f 100644
--- a/block.c
+++ b/block.c
@@ -4698,6 +4698,31 @@ static void bdrv_delete(BlockDriverState *bs)
 g_free(bs);
 }
 
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+   int flags, Error **errp)
+{
+BlockDriverState *new_node_bs;
+Error *local_err = NULL;
+
+new_node_bs =  bdrv_open(NULL, NULL, node_options, flags, errp);
+if (new_node_bs == NULL) {
+error_prepend(errp, "Could not create node: ");
+return NULL;
+}
+
+bdrv_drained_begin(bs);
+bdrv_replace_node(bs, new_node_bs, _err);
+bdrv_drained_end(bs);
+
+if (local_err) {
+bdrv_unref(new_node_bs);
+error_propagate(errp, local_err);
+return NULL;
+}
+
+return new_node_bs;
+}
+
 /*
  * Run consistency checks on an image
  *
diff --git a/include/block/block.h b/include/block/block.h
index c9d7c58..81a3894 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -350,6 +350,8 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState 
*bs_top,
  Error **errp);
 void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
Error **errp);
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+   int flags, Error **errp);
 
 int bdrv_parse_aio(const char *mode, int *flags);
 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
-- 
1.8.3.1




[PATCH v13 06/10] iotests: add #310 to test bottom node in COR driver

2020-12-02 Thread Andrey Shinkevich via
The test case #310 is similar to #216 by Max Reitz. The difference is
that the test #310 involves a bottom node to the COR filter driver.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/310 | 114 +
 tests/qemu-iotests/310.out |  15 ++
 tests/qemu-iotests/group   |   1 +
 3 files changed, 130 insertions(+)
 create mode 100755 tests/qemu-iotests/310
 create mode 100644 tests/qemu-iotests/310.out

diff --git a/tests/qemu-iotests/310 b/tests/qemu-iotests/310
new file mode 100755
index 000..c8b34cd
--- /dev/null
+++ b/tests/qemu-iotests/310
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+#
+# Copy-on-read tests using a COR filter with a bottom node
+#
+# Copyright (C) 2018 Red Hat, Inc.
+# Copyright (c) 2020 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import iotests
+from iotests import log, qemu_img, qemu_io_silent
+
+# Need backing file support
+iotests.script_initialize(supported_fmts=['qcow2', 'qcow', 'qed', 'vmdk'],
+  supported_platforms=['linux'])
+
+log('')
+log('=== Copy-on-read across nodes ===')
+log('')
+
+# This test is similar to the 216 one by Max Reitz 
+# The difference is that this test case involves a bottom node to the
+# COR filter driver.
+
+with iotests.FilePath('base.img') as base_img_path, \
+ iotests.FilePath('mid.img') as mid_img_path, \
+ iotests.FilePath('top.img') as top_img_path, \
+ iotests.VM() as vm:
+
+log('--- Setting up images ---')
+log('')
+
+assert qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M') == 0
+assert qemu_io_silent(base_img_path, '-c', 'write -P 1 0M 1M') == 0
+assert qemu_io_silent(base_img_path, '-c', 'write -P 1 3M 1M') == 0
+assert qemu_img('create', '-f', iotests.imgfmt, '-b', base_img_path,
+'-F', iotests.imgfmt, mid_img_path) == 0
+assert qemu_io_silent(mid_img_path,  '-c', 'write -P 3 2M 1M') == 0
+assert qemu_io_silent(mid_img_path,  '-c', 'write -P 3 4M 1M') == 0
+assert qemu_img('create', '-f', iotests.imgfmt, '-b', mid_img_path,
+'-F', iotests.imgfmt, top_img_path) == 0
+assert qemu_io_silent(top_img_path,  '-c', 'write -P 2 1M 1M') == 0
+
+#  0 1 2 3 4
+# top2
+# mid  3   3
+# base 1 1
+
+log('Done')
+
+log('')
+log('--- Doing COR ---')
+log('')
+
+vm.launch()
+
+log(vm.qmp('blockdev-add',
+   node_name='node0',
+   driver='copy-on-read',
+   bottom='node2',
+   file={
+   'driver': iotests.imgfmt,
+   'file': {
+   'driver': 'file',
+   'filename': top_img_path
+   },
+   'backing': {
+   'node-name': 'node2',
+   'driver': iotests.imgfmt,
+   'file': {
+   'driver': 'file',
+   'filename': mid_img_path
+   },
+   'backing': {
+   'driver': iotests.imgfmt,
+   'file': {
+   'driver': 'file',
+   'filename': base_img_path
+   }
+   },
+   }
+   }))
+
+# Trigger COR
+log(vm.qmp('human-monitor-command',
+   command_line='qemu-io node0 "read 0 5M"'))
+
+vm.shutdown()
+
+log('')
+log('--- Checking COR result ---')
+log('')
+
+assert qemu_io_silent(base_img_path, '-c', 'discard 0 4M') == 0
+assert qemu_io_silent(mid_img_path, '-c', 'discard 0M 5M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 0 0 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 2 1M 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 3 2M 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 0 3M 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 3 4M 1M') == 0
+
+log('Done')
diff --git a/tests/qemu-iotests/310.out b/tests/qemu-iotests/310.out
new file mode 100644
index 000..a70aa5c
--- /dev/null
+++ b/tests/qemu-iotests/310.out
@@ -0,0 +1,15 @@
+
+=== Copy-on-read across nodes ===
+
+--- 

[PATCH v13 09/10] stream: skip filters when writing backing file name to QCOW2 header

2020-12-02 Thread Andrey Shinkevich via
Avoid writing a filter JSON file name and a filter format name to QCOW2
image when the backing file is being changed after the block stream
job. It can occur due to a concurrent commit job on the same backing
chain.
A user is still able to assign the 'backing-file' parameter for a
block-stream job keeping in mind the possible issue mentioned above.
If the user does not specify the 'backing-file' parameter, QEMU will
assign it automatically.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 21 +++--
 blockdev.c |  8 +---
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 6e281c7..061268b 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -17,6 +17,7 @@
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
 
@@ -65,6 +66,8 @@ static int stream_prepare(Job *job)
 BlockDriverState *bs = blk_bs(bjob->blk);
 BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+BlockDriverState *base_unfiltered;
+BlockDriverState *backing_bs;
 Error *local_err = NULL;
 int ret = 0;
 
@@ -75,8 +78,22 @@ static int stream_prepare(Job *job)
 const char *base_id = NULL, *base_fmt = NULL;
 if (base) {
 base_id = s->backing_file_str;
-if (base->drv) {
-base_fmt = base->drv->format_name;
+if (base_id) {
+backing_bs = bdrv_find_backing_image(bs, base_id);
+if (backing_bs && backing_bs->drv) {
+base_fmt = backing_bs->drv->format_name;
+} else {
+error_report("Format not found for backing file %s",
+ s->backing_file_str);
+}
+} else {
+base_unfiltered = bdrv_skip_filters(base);
+if (base_unfiltered) {
+base_id = base_unfiltered->filename;
+if (base_unfiltered->drv) {
+base_fmt = base_unfiltered->drv->format_name;
+}
+}
 }
 }
 bdrv_set_backing_hd(unfiltered_bs, base, _err);
diff --git a/blockdev.c b/blockdev.c
index c917625..70900f4 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2508,7 +2508,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 BlockDriverState *base_bs = NULL;
 AioContext *aio_context;
 Error *local_err = NULL;
-const char *base_name = NULL;
 int job_flags = JOB_DEFAULT;
 
 if (!has_on_error) {
@@ -2536,7 +2535,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
-base_name = base;
 }
 
 if (has_base_node) {
@@ -2551,7 +2549,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
 bdrv_refresh_filename(base_bs);
-base_name = base_bs->filename;
 }
 
 /* Check for op blockers in the whole chain between bs and base */
@@ -2571,9 +2568,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 
-/* backing_file string overrides base bs filename */
-base_name = has_backing_file ? backing_file : base_name;
-
 if (has_auto_finalize && !auto_finalize) {
 job_flags |= JOB_MANUAL_FINALIZE;
 }
@@ -2581,7 +2575,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 job_flags |= JOB_MANUAL_DISMISS;
 }
 
-stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+stream_start(has_job_id ? job_id : NULL, bs, base_bs, backing_file,
  job_flags, has_speed ? speed : 0, on_error,
  filter_node_name, _err);
 if (local_err) {
-- 
1.8.3.1




[PATCH v13 07/10] block: include supported_read_flags into BDS structure

2020-12-02 Thread Andrey Shinkevich via
Add the new member supported_read_flags to the BlockDriverState
structure. It will control the flags set for copy-on-read operations.
Make the block generic layer evaluate supported read flags before they
go to a block driver.

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/io.c| 12 ++--
 include/block/block_int.h |  4 
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/block/io.c b/block/io.c
index ec5e152..e28b11c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1405,6 +1405,9 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 if (flags & BDRV_REQ_COPY_ON_READ) {
 int64_t pnum;
 
+/* The flag BDRV_REQ_COPY_ON_READ has reached its addressee */
+flags &= ~BDRV_REQ_COPY_ON_READ;
+
 ret = bdrv_is_allocated(bs, offset, bytes, );
 if (ret < 0) {
 goto out;
@@ -1426,9 +1429,13 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 goto out;
 }
 
+if (flags & ~bs->supported_read_flags) {
+abort();
+}
+
 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
 if (bytes <= max_bytes && bytes <= max_transfer) {
-ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
+ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, flags);
 goto out;
 }
 
@@ -1441,7 +1448,8 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 
 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
  num, qiov,
- qiov_offset + bytes - bytes_remaining, 0);
+ qiov_offset + bytes - bytes_remaining,
+ flags);
 max_bytes -= num;
 } else {
 num = bytes_remaining;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index c05fa1e..247e166 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -873,6 +873,10 @@ struct BlockDriverState {
 /* I/O Limits */
 BlockLimits bl;
 
+/*
+ * Flags honored during pread
+ */
+unsigned int supported_read_flags;
 /* Flags honored during pwrite (so far: BDRV_REQ_FUA,
  * BDRV_REQ_WRITE_UNCHANGED).
  * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
-- 
1.8.3.1




[PATCH v13 00/10] Apply COR-filter to the block-stream permanently

2020-12-02 Thread Andrey Shinkevich via
The previous version 12 was discussed in the email thread:
Message-Id: <1603390423-980205-1-git-send-email-andrey.shinkev...@virtuozzo.com>

v13:
  02: The bdrv_remove_node() was dropped.
  05: Three patches with fixes were merged into one.
  06: Minor changes based on Vladimir's suggestions.
  08: Three patches with fixes were merged into one.
  09: The search for format_name of backing file was added.
  10: The flag BLK_PERM_GRAPH_MOD was removed.

Andrey Shinkevich (10):
  copy-on-read: support preadv/pwritev_part functions
  block: add API function to insert a node
  copy-on-read: add filter drop function
  qapi: add filter-node-name to block-stream
  qapi: create BlockdevOptionsCor structure for COR driver
  iotests: add #310 to test bottom node in COR driver
  block: include supported_read_flags into BDS structure
  copy-on-read: skip non-guest reads if no copy needed
  stream: skip filters when writing backing file name to QCOW2 header
  block: apply COR-filter to block-stream jobs

 block.c|  25 +++
 block/copy-on-read.c   | 143 +
 block/copy-on-read.h   |  32 +
 block/io.c |  12 +++-
 block/monitor/block-hmp-cmds.c |   4 +-
 block/stream.c | 120 +++---
 blockdev.c |  12 ++--
 include/block/block.h  |  10 ++-
 include/block/block_int.h  |  11 +++-
 qapi/block-core.json   |  27 +++-
 tests/qemu-iotests/030 |  51 ++-
 tests/qemu-iotests/030.out |   4 +-
 tests/qemu-iotests/141.out |   2 +-
 tests/qemu-iotests/245 |  22 +--
 tests/qemu-iotests/310 | 114 
 tests/qemu-iotests/310.out |  15 +
 tests/qemu-iotests/group   |   1 +
 17 files changed, 484 insertions(+), 121 deletions(-)
 create mode 100644 block/copy-on-read.h
 create mode 100755 tests/qemu-iotests/310
 create mode 100644 tests/qemu-iotests/310.out

-- 
1.8.3.1




[PATCH v13 05/10] qapi: create BlockdevOptionsCor structure for COR driver

2020-12-02 Thread Andrey Shinkevich via
Create the BlockdevOptionsCor structure for COR driver specific options
splitting it off form the BlockdevOptionsGenericFormat. The only option
'bottom' node in the structure denotes an image file that limits the
COR operations in the backing chain.
We are going to use the COR-filter for a block-stream job and will pass
a bottom node name to the COR driver. The bottom node is the first
non-filter overlay of the base. It was introduced because the base node
itself may change due to possible concurrent jobs.

Suggested-by: Max Reitz 
Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 57 ++--
 qapi/block-core.json | 21 ++-
 2 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 618c4c4..2cddc96 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,18 +24,23 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
 #include "block/copy-on-read.h"
 
 
 typedef struct BDRVStateCOR {
 bool active;
+BlockDriverState *bottom_bs;
 } BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BlockDriverState *bottom_bs = NULL;
 BDRVStateCOR *state = bs->opaque;
+/* Find a bottom node name, if any */
+const char *bottom_node = qdict_get_try_str(options, "bottom");
 
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -51,7 +56,17 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+if (bottom_node) {
+bottom_bs = bdrv_lookup_bs(NULL, bottom_node, errp);
+if (!bottom_bs) {
+error_setg(errp, "Bottom node '%s' not found", bottom_node);
+qdict_del(options, "bottom");
+return -EINVAL;
+}
+qdict_del(options, "bottom");
+}
 state->active = true;
+state->bottom_bs = bottom_bs;
 
 /*
  * We don't need to call bdrv_child_refresh_perms() now as the permissions
@@ -107,8 +122,46 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
size_t qiov_offset,
int flags)
 {
-return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
-   flags | BDRV_REQ_COPY_ON_READ);
+int64_t n;
+int local_flags;
+int ret;
+BDRVStateCOR *state = bs->opaque;
+
+if (!state->bottom_bs) {
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
+}
+
+while (bytes) {
+local_flags = flags;
+
+/* In case of failure, try to copy-on-read anyway */
+ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+if (ret <= 0) {
+ret = 
bdrv_is_allocated_above(bdrv_backing_chain_next(bs->file->bs),
+  state->bottom_bs, true, offset,
+  n, );
+if (ret == 1 || ret < 0) {
+local_flags |= BDRV_REQ_COPY_ON_READ;
+}
+/* Finish earlier if the end of a backing file has been reached */
+if (n == 0) {
+break;
+}
+}
+
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
+
+offset += n;
+qiov_offset += n;
+bytes -= n;
+}
+
+return 0;
 }
 
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 8ef3df6..04055ef 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3942,6 +3942,25 @@
   'data': { 'throttle-group': 'str',
 'file' : 'BlockdevRef'
  } }
+
+##
+# @BlockdevOptionsCor:
+#
+# Driver specific block device options for the copy-on-read driver.
+#
+# @bottom: the name of a non-filter node (allocation-bearing layer) that limits
+#  the COR operations in the backing chain (inclusive).
+#  For the block-stream job, it will be the first non-filter overlay of
+#  the base node. We do not involve the base node into the COR
+#  operations because the base may change due to a concurrent
+#  block-commit job on the same backing chain.
+#
+# Since: 5.2
+##
+{ 'struct': 'BlockdevOptionsCor',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { '*bottom': 'str' } }
+
 ##
 # @BlockdevOptions:
 #
@@ -3994,7 +4013,7 @@
   'bochs':  

[PATCH v13 08/10] copy-on-read: skip non-guest reads if no copy needed

2020-12-02 Thread Andrey Shinkevich via
If the flag BDRV_REQ_PREFETCH was set, skip idling read/write
operations in COR-driver. It can be taken into account for the
COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Add the BDRV_REQ_PREFETCH flag to the supported_read_flags of the
COR-filter.

block: Modify the comment for the flag BDRV_REQ_PREFETCH as we are
going to use it alone and pass it to the COR-filter driver for further
processing.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c  | 14 ++
 include/block/block.h |  8 +---
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 2cddc96..123d197 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -49,6 +49,8 @@ static int cor_open(BlockDriverState *bs, QDict *options, int 
flags,
 return -EINVAL;
 }
 
+bs->supported_read_flags = BDRV_REQ_PREFETCH;
+
 bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
 (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
 
@@ -150,10 +152,14 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
 }
 }
 
-ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
-  local_flags);
-if (ret < 0) {
-return ret;
+/* Skip if neither read nor write are needed */
+if ((local_flags & (BDRV_REQ_PREFETCH | BDRV_REQ_COPY_ON_READ)) !=
+BDRV_REQ_PREFETCH) {
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
 }
 
 offset += n;
diff --git a/include/block/block.h b/include/block/block.h
index 81a3894..3499554 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -81,9 +81,11 @@ typedef enum {
 BDRV_REQ_NO_FALLBACK= 0x100,
 
 /*
- * BDRV_REQ_PREFETCH may be used only together with BDRV_REQ_COPY_ON_READ
- * on read request and means that caller doesn't really need data to be
- * written to qiov parameter which may be NULL.
+ * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
+ * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
+ * filter is involved), in which case it signals that the COR operation
+ * need not read the data into memory (qiov) but only ensure they are
+ * copied to the top layer (i.e., that COR operation is done).
  */
 BDRV_REQ_PREFETCH  = 0x200,
 /* Mask of valid flags */
-- 
1.8.3.1




[PATCH v13 04/10] qapi: add filter-node-name to block-stream

2020-12-02 Thread Andrey Shinkevich via
Provide the possibility to pass the 'filter-node-name' parameter to the
block-stream job as it is done for the commit block job.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/monitor/block-hmp-cmds.c | 4 ++--
 block/stream.c | 4 +++-
 blockdev.c | 4 +++-
 include/block/block_int.h  | 7 ++-
 qapi/block-core.json   | 6 ++
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index d15a2be..e8a58f3 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -508,8 +508,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 
 qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
  false, NULL, qdict_haskey(qdict, "speed"), speed, true,
- BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
- );
+ BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, false, 
false,
+ false, );
 
 hmp_handle_error(mon, error);
 }
diff --git a/block/stream.c b/block/stream.c
index 236384f..6e281c7 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -221,7 +221,9 @@ static const BlockJobDriver stream_job_driver = {
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp)
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp)
 {
 StreamBlockJob *s;
 BlockDriverState *iter;
diff --git a/blockdev.c b/blockdev.c
index fe6fb5d..c917625 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2499,6 +2499,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
   bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
+  bool has_filter_node_name, const char *filter_node_name,
   bool has_auto_finalize, bool auto_finalize,
   bool has_auto_dismiss, bool auto_dismiss,
   Error **errp)
@@ -2581,7 +2582,8 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 
 stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
- job_flags, has_speed ? speed : 0, on_error, _err);
+ job_flags, has_speed ? speed : 0, on_error,
+ filter_node_name, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto out;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 95d9333..c05fa1e 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1134,6 +1134,9 @@ int is_windows_drive(const char *filename);
  *  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means
+ * that a node name should be autogenerated.
  * @errp: Error object.
  *
  * Start a streaming operation on @bs.  Clusters that are unallocated
@@ -1146,7 +1149,9 @@ int is_windows_drive(const char *filename);
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp);
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp);
 
 /**
  * commit_start:
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 04ad80b..8ef3df6 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2543,6 +2543,11 @@
 #'stop' and 'enospc' can only be used if the block device
 #supports io-status (see BlockInfo).  Since 1.3.
 #
+# @filter-node-name: the node name that should be assigned to the
+#filter driver that the stream job inserts into the graph
+#above @device. If this option is not given, a node name is
+#autogenerated. (Since: 5.2)
+#
 # @auto-finalize: When false, this job will wait in a PENDING state after it 
has
 # finished its work, waiting for @block-job-finalize before
 # making any block graph changes.
@@ -2573,6 +2578,7 @@
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
 '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
 '*on-error': 'BlockdevOnError',
+ 

[PATCH v3 3/5] monitor: let QMP monitor track JSON message content

2020-11-27 Thread Andrey Shinkevich via
We are going to allow the QMP monitor reading data from input channel
more than one byte at once to increase the performance. With the OOB
compatibility disabled, the monitor queues one QMP command at most. It
was done for the backward compatibility as stated in the comment before
pushing a command into the queue. To keep that concept functional, the
monitor should track the end of a single QMP command. It allows the
dispatcher handling the command and send a response to client in time.

Signed-off-by: Andrey Shinkevich 
---
 include/qapi/qmp/json-parser.h |  5 +++--
 monitor/qmp.c  | 18 --
 qga/main.c |  2 +-
 qobject/json-lexer.c   | 30 +-
 qobject/json-parser-int.h  |  8 +---
 qobject/json-streamer.c| 15 ---
 qobject/qjson.c|  2 +-
 tests/qtest/libqtest.c |  2 +-
 8 files changed, 56 insertions(+), 26 deletions(-)

diff --git a/include/qapi/qmp/json-parser.h b/include/qapi/qmp/json-parser.h
index 7345a9b..039addb 100644
--- a/include/qapi/qmp/json-parser.h
+++ b/include/qapi/qmp/json-parser.h
@@ -36,8 +36,9 @@ void json_message_parser_init(JSONMessageParser *parser,
Error *err),
   void *opaque, va_list *ap);
 
-void json_message_parser_feed(JSONMessageParser *parser,
- const char *buffer, size_t size);
+size_t  json_message_parser_feed(JSONMessageParser *parser,
+ const char *buffer, size_t size,
+ bool track_qmp);
 
 void json_message_parser_flush(JSONMessageParser *parser);
 
diff --git a/monitor/qmp.c b/monitor/qmp.c
index a86ed35..0b39c62 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -367,8 +367,22 @@ static void handle_qmp_command(void *opaque, QObject *req, 
Error *err)
 static void monitor_qmp_read(void *opaque, const uint8_t *buf, int size)
 {
 MonitorQMP *mon = opaque;
-
-json_message_parser_feed(>parser, (const char *) buf, size);
+char *cursor = (char *) buf;
+size_t len;
+
+while (size > 0) {
+len = json_message_parser_feed(>parser, (const char *) cursor,
+   size, true);
+cursor += len;
+size -= len;
+
+if (size > 0) {
+/* Let the dispatcher process the QMP command */
+while (qatomic_mb_read(>common.suspend_cnt)) {
+g_usleep(20);
+}
+}
+}
 }
 
 static QDict *qmp_greeting(MonitorQMP *mon)
diff --git a/qga/main.c b/qga/main.c
index dea6a3a..16de642 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -605,7 +605,7 @@ static gboolean channel_event_cb(GIOCondition condition, 
gpointer data)
 case G_IO_STATUS_NORMAL:
 buf[count] = 0;
 g_debug("read data, count: %d, data: %s", (int)count, buf);
-json_message_parser_feed(>parser, (char *)buf, (int)count);
+json_message_parser_feed(>parser, (char *)buf, (int)count, false);
 break;
 case G_IO_STATUS_EOF:
 g_debug("received EOF");
diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c
index 632320d..1fefbae 100644
--- a/qobject/json-lexer.c
+++ b/qobject/json-lexer.c
@@ -280,10 +280,11 @@ void json_lexer_init(JSONLexer *lexer, bool 
enable_interpolation)
 lexer->x = lexer->y = 0;
 }
 
-static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
+static JSONTokenType json_lexer_feed_char(JSONLexer *lexer, char ch, bool 
flush)
 {
 int new_state;
 bool char_consumed = false;
+JSONTokenType ret;
 
 lexer->x++;
 if (ch == '\n') {
@@ -310,16 +311,16 @@ static void json_lexer_feed_char(JSONLexer *lexer, char 
ch, bool flush)
 case JSON_FLOAT:
 case JSON_KEYWORD:
 case JSON_STRING:
-json_message_process_token(lexer, lexer->token, new_state,
-   lexer->x, lexer->y);
+ret = json_message_process_token(lexer, lexer->token, new_state,
+ lexer->x, lexer->y);
 /* fall through */
 case IN_START:
 g_string_truncate(lexer->token, 0);
 new_state = lexer->start_state;
 break;
 case JSON_ERROR:
-json_message_process_token(lexer, lexer->token, JSON_ERROR,
-   lexer->x, lexer->y);
+ret = json_message_process_token(lexer, lexer->token, JSON_ERROR,
+ lexer->x, lexer->y);
 new_state = IN_RECOVERY;
 /* fall through */
 case IN_RECOVERY:
@@ -335,20 +336,31 @@ static void json_lexer_feed_char(JSONLexer *lexer, char 
ch, bool flush)
  * this is a security consideration.
  */
 if (lexer->token->len > MAX_TOKEN_SIZE) {
-json_message_process_token(lexer, lexer->token, lexer->state,
-  

[PATCH v3 5/5] monitor: increase amount of data for monitor to read

2020-11-27 Thread Andrey Shinkevich via
QMP and HMP monitors read one byte at a time from the socket or stdin,
which is very inefficient. With 100+ VMs on the host, this results in
multiple extra system calls and CPU overuse.
This patch increases the amount of read data up to 4096 bytes that fits
the buffer size on the channel level.

Suggested-by: Denis V. Lunev 
Signed-off-by: Andrey Shinkevich 
---
 monitor/monitor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monitor/monitor.c b/monitor/monitor.c
index 84222cd..43d2d3b 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -566,7 +566,7 @@ int monitor_can_read(void *opaque)
 {
 Monitor *mon = opaque;
 
-return !qatomic_mb_read(>suspend_cnt);
+return !qatomic_mb_read(>suspend_cnt) ? CHR_READ_BUF_LEN : 0;
 }
 
 void monitor_list_append(Monitor *mon)
-- 
1.8.3.1




[PATCH v3 2/5] monitor: drain requests queue with 'channel closed' event

2020-11-27 Thread Andrey Shinkevich via
When CHR_EVENT_CLOSED comes, the QMP requests queue may still contain
unprocessed commands. It can happen with QMP capability OOB enabled.
Let the dispatcher complete handling requests rest in the monitor
queue.

Signed-off-by: Andrey Shinkevich 
---
 monitor/qmp.c | 46 +-
 1 file changed, 21 insertions(+), 25 deletions(-)

diff --git a/monitor/qmp.c b/monitor/qmp.c
index 7169366..a86ed35 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -75,36 +75,32 @@ static void monitor_qmp_cleanup_req_queue_locked(MonitorQMP 
*mon)
 }
 }
 
-static void monitor_qmp_cleanup_queue_and_resume(MonitorQMP *mon)
+/*
+ * Let unprocessed QMP commands be handled.
+ */
+static void monitor_qmp_drain_queue(MonitorQMP *mon)
 {
-qemu_mutex_lock(>qmp_queue_lock);
+bool q_is_empty = false;
 
-/*
- * Same condition as in monitor_qmp_dispatcher_co(), but before
- * removing an element from the queue (hence no `- 1`).
- * Also, the queue should not be empty either, otherwise the
- * monitor hasn't been suspended yet (or was already resumed).
- */
-bool need_resume = (!qmp_oob_enabled(mon) ||
-mon->qmp_requests->length == QMP_REQ_QUEUE_LEN_MAX)
-&& !g_queue_is_empty(mon->qmp_requests);
+while (!q_is_empty) {
+qemu_mutex_lock(>qmp_queue_lock);
+q_is_empty = g_queue_is_empty(mon->qmp_requests);
+qemu_mutex_unlock(>qmp_queue_lock);
 
-monitor_qmp_cleanup_req_queue_locked(mon);
+if (!q_is_empty) {
+if (!qatomic_xchg(_dispatcher_co_busy, true)) {
+/* Kick the dispatcher coroutine */
+aio_co_wake(qmp_dispatcher_co);
+} else {
+/* Let the dispatcher do its job for a while */
+g_usleep(40);
+}
+}
+}
 
-if (need_resume) {
-/*
- * handle_qmp_command() suspended the monitor because the
- * request queue filled up, to be resumed when the queue has
- * space again.  We just emptied it; resume the monitor.
- *
- * Without this, the monitor would remain suspended forever
- * when we get here while the monitor is suspended.  An
- * unfortunately timed CHR_EVENT_CLOSED can do the trick.
- */
+if (qatomic_mb_read(>common.suspend_cnt)) {
 monitor_resume(>common);
 }
-
-qemu_mutex_unlock(>qmp_queue_lock);
 }
 
 void qmp_send_response(MonitorQMP *mon, const QDict *rsp)
@@ -418,7 +414,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent 
event)
  * stdio, it's possible that stdout is still open when stdin
  * is closed.
  */
-monitor_qmp_cleanup_queue_and_resume(mon);
+monitor_qmp_drain_queue(mon);
 json_message_parser_destroy(>parser);
 json_message_parser_init(>parser, handle_qmp_command,
  mon, NULL);
-- 
1.8.3.1




[PATCH v3 0/5] Increase amount of data for monitor to read

2020-11-27 Thread Andrey Shinkevich via
The subject was discussed here:
https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg00206.html
https://patchew.org/QEMU/20190610105906.28524-1-dplotni...@virtuozzo.com/#
Message-ID: <31dd78ba-bd64-2ed6-3c8f-eed4e904d...@virtuozzo.com>
and v2:
Message-Id: <1606146274-246154-1-git-send-email-andrey.shinkev...@virtuozzo.com>

This series is a solution for the issue with overflow of the monitor queue
with QMP requests if we keep the maximum queue length unchanged (=8).

v3:
  01: New
  02: New
  03: The additional little JSON parser removed and the resources of the
  existing JSON parser were used to track the end of a QMP command.
  04: The amount of read input data increases only.

Andrey Shinkevich (4):
  monitor: change function obsolete name in comments
  monitor: drain requests queue with 'channel closed' event
  monitor: let QMP monitor track JSON message content
  monitor: increase amount of data for monitor to read

Vladimir Sementsov-Ogievskiy (1):
  iotests: 129 don't check backup "busy"

 include/qapi/qmp/json-parser.h |  5 ++--
 monitor/monitor.c  |  2 +-
 monitor/qmp.c  | 66 --
 qga/main.c |  2 +-
 qobject/json-lexer.c   | 30 +--
 qobject/json-parser-int.h  |  8 +++--
 qobject/json-streamer.c| 15 +-
 qobject/qjson.c|  2 +-
 tests/qemu-iotests/129 |  1 -
 tests/qtest/libqtest.c |  2 +-
 10 files changed, 79 insertions(+), 54 deletions(-)

-- 
1.8.3.1




[PATCH v3 4/5] iotests: 129 don't check backup "busy"

2020-11-27 Thread Andrey Shinkevich via
From: Vladimir Sementsov-Ogievskiy 

Busy is racy, job has it's "pause-points" when it's not busy. Drop this
check.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/129 | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/qemu-iotests/129 b/tests/qemu-iotests/129
index 0e13244..3c22f64 100755
--- a/tests/qemu-iotests/129
+++ b/tests/qemu-iotests/129
@@ -67,7 +67,6 @@ class TestStopWithBlockJob(iotests.QMPTestCase):
 result = self.vm.qmp("stop")
 self.assert_qmp(result, 'return', {})
 result = self.vm.qmp("query-block-jobs")
-self.assert_qmp(result, 'return[0]/busy', True)
 self.assert_qmp(result, 'return[0]/ready', False)
 
 def test_drive_mirror(self):
-- 
1.8.3.1




[PATCH v3 1/5] monitor: change function obsolete name in comments

2020-11-27 Thread Andrey Shinkevich via
The function name monitor_qmp_bh_dispatcher() has been changed to
monitor_qmp_dispatcher_co() since the commit 9ce44e2c. Let's amend the
comments.

Signed-off-by: Andrey Shinkevich 
---
 monitor/qmp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/monitor/qmp.c b/monitor/qmp.c
index b42f8c6..7169366 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -80,7 +80,7 @@ static void monitor_qmp_cleanup_queue_and_resume(MonitorQMP 
*mon)
 qemu_mutex_lock(>qmp_queue_lock);
 
 /*
- * Same condition as in monitor_qmp_bh_dispatcher(), but before
+ * Same condition as in monitor_qmp_dispatcher_co(), but before
  * removing an element from the queue (hence no `- 1`).
  * Also, the queue should not be empty either, otherwise the
  * monitor hasn't been suspended yet (or was already resumed).
@@ -343,7 +343,7 @@ static void handle_qmp_command(void *opaque, QObject *req, 
Error *err)
 
 /*
  * Suspend the monitor when we can't queue more requests after
- * this one.  Dequeuing in monitor_qmp_bh_dispatcher() or
+ * this one.  Dequeuing in monitor_qmp_dispatcher_co() or
  * monitor_qmp_cleanup_queue_and_resume() will resume it.
  * Note that when OOB is disabled, we queue at most one command,
  * for backward compatibility.
-- 
1.8.3.1




[PATCH v2 1/2] iotests: add another bash sleep command to 247

2020-11-23 Thread Andrey Shinkevich via
This patch paves the way for the one that follows. The following patch
makes the QMP monitor to read up to 4K from stdin at once. That results
in running the bash 'sleep' command before the _qemu_proc_exec() starts
in subshell. Another 'sleep' command with an unobtrusive 'query-status'
plays as a workaround.

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/247 | 2 ++
 tests/qemu-iotests/247.out | 1 +
 2 files changed, 3 insertions(+)

diff --git a/tests/qemu-iotests/247 b/tests/qemu-iotests/247
index 87e37b3..7d316ec 100755
--- a/tests/qemu-iotests/247
+++ b/tests/qemu-iotests/247
@@ -59,6 +59,8 @@ TEST_IMG="$TEST_IMG.4" _make_test_img $size
 {"execute":"block-commit",
  "arguments":{"device":"format-4", "top-node": "format-2", 
"base-node":"format-0", "job-id":"job0"}}
 EOF
+sleep 1
+echo '{"execute":"query-status"}'
 if [ "${VALGRIND_QEMU}" == "y" ]; then
 sleep 10
 else
diff --git a/tests/qemu-iotests/247.out b/tests/qemu-iotests/247.out
index e909e83..13d9547 100644
--- a/tests/qemu-iotests/247.out
+++ b/tests/qemu-iotests/247.out
@@ -17,6 +17,7 @@ QMP_VERSION
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 134217728, "offset": 
134217728, "speed": 0, "type": "commit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
+{"return": {"status": "running", "singlestep": false, "running": true}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 *** done
-- 
1.8.3.1




[PATCH v2 2/2] monitor: increase amount of data for monitor to read

2020-11-23 Thread Andrey Shinkevich via
QMP and HMP monitors read one byte at a time from the socket or stdin,
which is very inefficient. With 100+ VMs on the host, this results in
multiple extra system calls and CPU overuse.
This patch increases the amount of read data up to 4096 bytes that fits
the buffer size on the channel level.
A JSON little parser is introduced to throttle QMP commands read from
the buffer so that incoming requests do not overflow the monitor input
queue.

Suggested-by: Denis V. Lunev 
Signed-off-by: Andrey Shinkevich 
---
 chardev/char-fd.c  | 35 +--
 chardev/char-socket.c  | 42 +++---
 chardev/char.c | 41 +
 include/chardev/char.h | 15 +++
 monitor/monitor.c  |  2 +-
 5 files changed, 129 insertions(+), 6 deletions(-)

diff --git a/chardev/char-fd.c b/chardev/char-fd.c
index 1cd62f2..15bc8f4 100644
--- a/chardev/char-fd.c
+++ b/chardev/char-fd.c
@@ -33,6 +33,8 @@
 #include "chardev/char-fd.h"
 #include "chardev/char-io.h"
 
+#include "monitor/monitor-internal.h"
+
 /* Called with chr_write_lock held.  */
 static int fd_chr_write(Chardev *chr, const uint8_t *buf, int len)
 {
@@ -45,8 +47,12 @@ static gboolean fd_chr_read(QIOChannel *chan, GIOCondition 
cond, void *opaque)
 {
 Chardev *chr = CHARDEV(opaque);
 FDChardev *s = FD_CHARDEV(opaque);
+CharBackend *be = chr->be;
+Monitor *mon = (Monitor *)be->opaque;
 int len;
 uint8_t buf[CHR_READ_BUF_LEN];
+uint8_t *cursor;
+int load, size, pos;
 ssize_t ret;
 
 len = sizeof(buf);
@@ -62,10 +68,35 @@ static gboolean fd_chr_read(QIOChannel *chan, GIOCondition 
cond, void *opaque)
 if (ret == 0) {
 remove_fd_in_watch(chr);
 qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+chr->json_thl = (const JSONthrottle){0};
 return FALSE;
 }
-if (ret > 0) {
-qemu_chr_be_write(chr, buf, ret);
+if (ret < 0) {
+return TRUE;
+}
+load = ret;
+cursor = buf;
+
+while (load > 0) {
+size = load;
+if (monitor_is_qmp(mon)) {
+/* Find the end position of a JSON command in the input buffer */
+pos = qemu_chr_end_position((const char *) cursor, size,
+>json_thl);
+if (pos >= 0) {
+size = pos + 1;
+}
+}
+
+qemu_chr_be_write(chr, cursor, size);
+cursor += size;
+load -= size;
+
+if (load > 0) {
+while (qatomic_mb_read(>suspend_cnt)) {
+g_usleep(40);
+}
+}
 }
 
 return TRUE;
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 213a4c8..30ad1d4 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -38,6 +38,8 @@
 #include "chardev/char-io.h"
 #include "qom/object.h"
 
+#include "monitor/monitor-internal.h"
+
 /***/
 /* TCP Net console */
 
@@ -522,7 +524,11 @@ static gboolean tcp_chr_read(QIOChannel *chan, 
GIOCondition cond, void *opaque)
 {
 Chardev *chr = CHARDEV(opaque);
 SocketChardev *s = SOCKET_CHARDEV(opaque);
+CharBackend *be = chr->be;
+Monitor *mon = (Monitor *)be->opaque;
 uint8_t buf[CHR_READ_BUF_LEN];
+uint8_t *cursor;
+int load, pos;
 int len, size;
 
 if ((s->state != TCP_CHARDEV_STATE_CONNECTED) ||
@@ -537,12 +543,42 @@ static gboolean tcp_chr_read(QIOChannel *chan, 
GIOCondition cond, void *opaque)
 if (size == 0 || (size == -1 && errno != EAGAIN)) {
 /* connection closed */
 tcp_chr_disconnect(chr);
-} else if (size > 0) {
+chr->json_thl = (const JSONthrottle){0};
+return TRUE;
+}
+if (size < 0) {
+return TRUE;
+}
+load = size;
+cursor = buf;
+
+while (load > 0) {
+size = load;
+if (monitor_is_qmp(mon)) {
+/* Find the end position of a JSON command in the input buffer */
+pos = qemu_chr_end_position((const char *) cursor, size,
+>json_thl);
+if (pos >= 0) {
+size = pos + 1;
+}
+}
+len = size;
+
 if (s->do_telnetopt) {
-tcp_chr_process_IAC_bytes(chr, s, buf, );
+tcp_chr_process_IAC_bytes(chr, s, cursor, );
 }
 if (size > 0) {
-qemu_chr_be_write(chr, buf, size);
+qemu_chr_be_write(chr, cursor, size);
+cursor += size;
+load -= size;
+} else {
+cursor += len;
+load -= len;
+}
+if (load > 0) {
+while (qatomic_mb_read(>suspend_cnt)) {
+g_usleep(40);
+}
 }
 }
 
diff --git a/chardev/char.c b/chardev/char.c
index aa42821..75c7bc7 100644
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -1178,6 +1178,47 @@ GSource 

[PATCH v2 0/2] Increase amount of data for monitor to read

2020-11-23 Thread Andrey Shinkevich via
The subject was discussed here:
https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg00206.html

This series is a solution for the issue with QMP monitor buffered input.
A JSON little parser is introduced to separate QMP commands read from the
input buffer so that incoming requests do not overwhelm the monitor queue.
A side effect raised in the test #247 was managed in the first patch.
It may be considered as a workaround. Any sane fix suggested will be
appreciated.

Note:
This series goes after the Vladimir's one:
'[PATCH v3 00/25] backup performance: block_status + async"'
To make the test #129 passed, the following patch should be applied first:
'[PATCH v3 01/25] iotests: 129 don't check backup "busy"'.

v2:
  02: The static JSONthrottle object was made a member of the Chardev structure.
  The fd_chr_read functions were merged.
  The monitor thread synchronization was added to protect the input queue
  from overflow.

Andrey Shinkevich (2):
  iotests: add another bash sleep command to 247
  monitor: increase amount of data for monitor to read

 chardev/char-fd.c  | 35 +--
 chardev/char-socket.c  | 42 +++---
 chardev/char.c | 41 +
 include/chardev/char.h | 15 +++
 monitor/monitor.c  |  2 +-
 tests/qemu-iotests/247 |  2 ++
 tests/qemu-iotests/247.out |  1 +
 7 files changed, 132 insertions(+), 6 deletions(-)

-- 
1.8.3.1




[PATCH 2/2] monitor: increase amount of data for monitor to read

2020-11-06 Thread Andrey Shinkevich via
QMP and HMP monitors read one byte at a time from the socket or stdin,
which is very inefficient. With 100+ VMs on the host, this results in
multiple extra system calls and CPU overuse.
This patch increases the amount of read data up to 4096 bytes that fits
the buffer size on the channel level.

Suggested-by: Denis V. Lunev 
Signed-off-by: Andrey Shinkevich 
---
 chardev/char-fd.c  | 64 +-
 chardev/char-socket.c  | 54 +++---
 chardev/char.c | 40 +
 include/chardev/char.h | 15 +++
 monitor/monitor.c  |  2 +-
 tests/qemu-iotests/247.out |  2 +-
 6 files changed, 159 insertions(+), 18 deletions(-)

diff --git a/chardev/char-fd.c b/chardev/char-fd.c
index 1cd62f2..6194fe6 100644
--- a/chardev/char-fd.c
+++ b/chardev/char-fd.c
@@ -33,6 +33,8 @@
 #include "chardev/char-fd.h"
 #include "chardev/char-io.h"
 
+#include "monitor/monitor-internal.h"
+
 /* Called with chr_write_lock held.  */
 static int fd_chr_write(Chardev *chr, const uint8_t *buf, int len)
 {
@@ -41,7 +43,7 @@ static int fd_chr_write(Chardev *chr, const uint8_t *buf, int 
len)
 return io_channel_send(s->ioc_out, buf, len);
 }
 
-static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
+static gboolean fd_chr_read_hmp(QIOChannel *chan, void *opaque)
 {
 Chardev *chr = CHARDEV(opaque);
 FDChardev *s = FD_CHARDEV(opaque);
@@ -71,6 +73,66 @@ static gboolean fd_chr_read(QIOChannel *chan, GIOCondition 
cond, void *opaque)
 return TRUE;
 }
 
+static gboolean fd_chr_read_qmp(QIOChannel *chan, void *opaque)
+{
+static JSONthrottle thl = {0};
+uint8_t *start;
+Chardev *chr = CHARDEV(opaque);
+FDChardev *s = FD_CHARDEV(opaque);
+int len, size, pos;
+ssize_t ret;
+
+if (!thl.load) {
+len = sizeof(thl.buf);
+if (len > s->max_size) {
+len = s->max_size;
+}
+if (len == 0) {
+return TRUE;
+}
+
+ret = qio_channel_read(
+chan, (gchar *)thl.buf, len, NULL);
+if (ret == 0) {
+remove_fd_in_watch(chr);
+qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+thl = (const JSONthrottle){0};
+return FALSE;
+}
+if (ret < 0) {
+return TRUE;
+}
+thl.load = ret;
+thl.cursor = 0;
+}
+
+size = thl.load;
+start = thl.buf + thl.cursor;
+pos = qemu_chr_end_position((const char *) start, size, );
+if (pos >= 0) {
+size = pos + 1;
+}
+
+qemu_chr_be_write(chr, start, size);
+thl.cursor += size;
+thl.load -= size;
+
+return TRUE;
+}
+
+static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
+{
+Chardev *chr = CHARDEV(opaque);
+CharBackend *be = chr->be;
+Monitor *mon = (Monitor *)be->opaque;
+
+if (monitor_is_qmp(mon)) {
+return fd_chr_read_qmp(chan, opaque);
+}
+
+return fd_chr_read_hmp(chan, opaque);
+}
+
 static int fd_chr_read_poll(void *opaque)
 {
 Chardev *chr = CHARDEV(opaque);
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 213a4c8..8335e8c 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -520,30 +520,54 @@ static void tcp_chr_disconnect(Chardev *chr)
 
 static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
 {
+static JSONthrottle thl = {0};
+uint8_t *start;
 Chardev *chr = CHARDEV(opaque);
 SocketChardev *s = SOCKET_CHARDEV(opaque);
-uint8_t buf[CHR_READ_BUF_LEN];
-int len, size;
+int len, size, pos;
 
 if ((s->state != TCP_CHARDEV_STATE_CONNECTED) ||
 s->max_size <= 0) {
 return TRUE;
 }
-len = sizeof(buf);
-if (len > s->max_size) {
-len = s->max_size;
-}
-size = tcp_chr_recv(chr, (void *)buf, len);
-if (size == 0 || (size == -1 && errno != EAGAIN)) {
-/* connection closed */
-tcp_chr_disconnect(chr);
-} else if (size > 0) {
-if (s->do_telnetopt) {
-tcp_chr_process_IAC_bytes(chr, s, buf, );
+
+if (!thl.load) {
+len = sizeof(thl.buf);
+if (len > s->max_size) {
+len = s->max_size;
+}
+size = tcp_chr_recv(chr, (void *)thl.buf, len);
+if (size == 0 || (size == -1 && errno != EAGAIN)) {
+/* connection closed */
+tcp_chr_disconnect(chr);
+thl = (const JSONthrottle){0};
+return TRUE;
 }
-if (size > 0) {
-qemu_chr_be_write(chr, buf, size);
+if (size < 0) {
+return TRUE;
 }
+thl.load = size;
+thl.cursor = 0;
+}
+
+size = thl.load;
+start = thl.buf + thl.cursor;
+pos = qemu_chr_end_position((const char *) start, size, );
+if (pos >= 0) {
+size = pos + 1;
+}
+len = size;
+
+if (s->do_telnetopt) {
+

[PATCH 1/2] iotests: add another bash sleep command to 247

2020-11-06 Thread Andrey Shinkevich via
This patch paves the way for the one that follows. The following patch
makes the QMP monitor to read up to 4K from stdin at once. That results
in running the bash 'sleep' command before the _qemu_proc_exec() starts
in subshell. Another 'sleep' command with an unobtrusive 'query-status'
plays as a workaround.

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/247 | 2 ++
 tests/qemu-iotests/247.out | 1 +
 2 files changed, 3 insertions(+)

diff --git a/tests/qemu-iotests/247 b/tests/qemu-iotests/247
index 87e37b3..7d316ec 100755
--- a/tests/qemu-iotests/247
+++ b/tests/qemu-iotests/247
@@ -59,6 +59,8 @@ TEST_IMG="$TEST_IMG.4" _make_test_img $size
 {"execute":"block-commit",
  "arguments":{"device":"format-4", "top-node": "format-2", 
"base-node":"format-0", "job-id":"job0"}}
 EOF
+sleep 1
+echo '{"execute":"query-status"}'
 if [ "${VALGRIND_QEMU}" == "y" ]; then
 sleep 10
 else
diff --git a/tests/qemu-iotests/247.out b/tests/qemu-iotests/247.out
index e909e83..13d9547 100644
--- a/tests/qemu-iotests/247.out
+++ b/tests/qemu-iotests/247.out
@@ -17,6 +17,7 @@ QMP_VERSION
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 134217728, "offset": 
134217728, "speed": 0, "type": "commit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
+{"return": {"status": "running", "singlestep": false, "running": true}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 *** done
-- 
1.8.3.1




[PATCH 0/2] Increase amount of data for monitor to read

2020-11-06 Thread Andrey Shinkevich via
The subject was discussed here:
https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg00206.html

This series is a solution for the issue with QMP monitor buffered input.
A little parser is introduced to throttle JSON commands read from the
buffer so that QMP requests do not overwhelm the monitor input queue.
A side effect raised in the test #247 was managed in the first patch.
It may be considered as a workaround. Any sane fix suggested will be
appreciated.

Note:
This series goes after the Vladimir's one:
'[PATCH v3 00/25] backup performance: block_status + async"'
To make the test #129 passed, the following patch should be applied first:
'[PATCH v3 01/25] iotests: 129 don't check backup "busy"'.

Andrey Shinkevich (2):
  iotests: add another bash sleep command to 247
  monitor: increase amount of data for monitor to read

 chardev/char-fd.c  | 64 +-
 chardev/char-socket.c  | 54 +++---
 chardev/char.c | 40 +
 include/chardev/char.h | 15 +++
 monitor/monitor.c  |  2 +-
 tests/qemu-iotests/247 |  2 ++
 tests/qemu-iotests/247.out |  1 +
 7 files changed, 161 insertions(+), 17 deletions(-)

-- 
1.8.3.1




[PATCH v12 12/14] copy-on-read: skip non-guest reads if no copy needed

2020-10-22 Thread Andrey Shinkevich via
If the flag BDRV_REQ_PREFETCH was set, skip idling read/write
operations in COR-driver. It can be taken into account for the
COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index a2b180a..081e661 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -153,10 +153,14 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
 }
 }
 
-ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
-  local_flags);
-if (ret < 0) {
-return ret;
+/* Skip if neither read nor write are needed */
+if ((local_flags & (BDRV_REQ_PREFETCH | BDRV_REQ_COPY_ON_READ)) !=
+BDRV_REQ_PREFETCH) {
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
 }
 
 offset += n;
-- 
1.8.3.1




[PATCH v12 06/14] copy-on-read: pass bottom node name to COR driver

2020-10-22 Thread Andrey Shinkevich via
We are going to use the COR-filter for a block-stream job.
To limit COR operations by the base node in the backing chain during
stream job, pass the bottom node name, that is the first non-filter
overlay of the base, to the copy-on-read driver as the base node itself
may change due to possible concurrent jobs.
The rest of the functionality will be implemented in the patch that
follows.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 618c4c4..3d8e4db 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,18 +24,24 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
 #include "block/copy-on-read.h"
 
 
 typedef struct BDRVStateCOR {
 bool active;
+BlockDriverState *bottom_bs;
 } BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BlockDriverState *bottom_bs = NULL;
 BDRVStateCOR *state = bs->opaque;
+/* Find a bottom node name, if any */
+const char *bottom_node = qdict_get_try_str(options, "bottom");
 
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -51,7 +57,17 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+if (bottom_node) {
+bottom_bs = bdrv_lookup_bs(NULL, bottom_node, errp);
+if (!bottom_bs) {
+error_setg(errp, QERR_BASE_NOT_FOUND, bottom_node);
+qdict_del(options, "bottom");
+return -EINVAL;
+}
+qdict_del(options, "bottom");
+}
 state->active = true;
+state->bottom_bs = bottom_bs;
 
 /*
  * We don't need to call bdrv_child_refresh_perms() now as the permissions
-- 
1.8.3.1




[PATCH v12 09/14] block: modify the comment for BDRV_REQ_PREFETCH flag

2020-10-22 Thread Andrey Shinkevich via
Modify the comment for the flag BDRV_REQ_PREFETCH as we are going to
use it alone and pass it to the COR-filter driver for further
processing.

Signed-off-by: Andrey Shinkevich 
---
 include/block/block.h | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index ae7612f..1b6742f 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -81,9 +81,11 @@ typedef enum {
 BDRV_REQ_NO_FALLBACK= 0x100,
 
 /*
- * BDRV_REQ_PREFETCH may be used only together with BDRV_REQ_COPY_ON_READ
- * on read request and means that caller doesn't really need data to be
- * written to qiov parameter which may be NULL.
+ * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
+ * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
+ * filter is involved), in which case it signals that the COR operation
+ * need not read the data into memory (qiov) but only ensure they are
+ * copied to the top layer (i.e., that COR operation is done).
  */
 BDRV_REQ_PREFETCH  = 0x200,
 /* Mask of valid flags */
-- 
1.8.3.1




[PATCH v12 13/14] stream: skip filters when writing backing file name to QCOW2 header

2020-10-22 Thread Andrey Shinkevich via
Avoid writing a filter JSON file name and a filter format name to QCOW2
image when the backing file is changed after the block stream job.
A user is still able to assign the 'backing-file' parameter for a
block-stream job keeping in mind the possible issue mentioned above.
If the user does not specify the 'backing-file' parameter, QEMU will
assign it automatically.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 15 +--
 blockdev.c |  9 ++---
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index e0540ee..1ba74ab 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -65,6 +65,7 @@ static int stream_prepare(Job *job)
 BlockDriverState *bs = blk_bs(bjob->blk);
 BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+BlockDriverState *base_unfiltered = NULL;
 Error *local_err = NULL;
 int ret = 0;
 
@@ -75,8 +76,18 @@ static int stream_prepare(Job *job)
 const char *base_id = NULL, *base_fmt = NULL;
 if (base) {
 base_id = s->backing_file_str;
-if (base->drv) {
-base_fmt = base->drv->format_name;
+if (base_id) {
+if (base->drv) {
+base_fmt = base->drv->format_name;
+}
+} else {
+base_unfiltered = bdrv_skip_filters(base);
+if (base_unfiltered) {
+base_id = base_unfiltered->filename;
+if (base_unfiltered->drv) {
+base_fmt = base_unfiltered->drv->format_name;
+}
+}
 }
 }
 bdrv_set_backing_hd(unfiltered_bs, base, _err);
diff --git a/blockdev.c b/blockdev.c
index c917625..0e9c783 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2508,7 +2508,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 BlockDriverState *base_bs = NULL;
 AioContext *aio_context;
 Error *local_err = NULL;
-const char *base_name = NULL;
 int job_flags = JOB_DEFAULT;
 
 if (!has_on_error) {
@@ -2536,7 +2535,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
-base_name = base;
 }
 
 if (has_base_node) {
@@ -2551,7 +2549,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
 bdrv_refresh_filename(base_bs);
-base_name = base_bs->filename;
 }
 
 /* Check for op blockers in the whole chain between bs and base */
@@ -2571,9 +2568,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 
-/* backing_file string overrides base bs filename */
-base_name = has_backing_file ? backing_file : base_name;
-
 if (has_auto_finalize && !auto_finalize) {
 job_flags |= JOB_MANUAL_FINALIZE;
 }
@@ -2581,7 +2575,8 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 job_flags |= JOB_MANUAL_DISMISS;
 }
 
-stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+stream_start(has_job_id ? job_id : NULL, bs, base_bs,
+ has_backing_file ? backing_file : NULL,
  job_flags, has_speed ? speed : 0, on_error,
  filter_node_name, _err);
 if (local_err) {
-- 
1.8.3.1




[PATCH v12 00/14] Apply COR-filter to the block-stream permanently

2020-10-22 Thread Andrey Shinkevich via
The node insert/remove functions were added at the block generic layer.
COR-filter options structure was added to the QAPI.
The test case #310 was added to check the 'bottom' node limit for COR.
The 'supported_read_flags' member was added to the BDS structure
(with the flags check at the block generic layer for drivers).

v12:
  02: New.
  03: Only the temporary drop filter function left.
  05: New (suggested by Max)
  06: 'base' -> 'bottom' option.
  07: Fixes based on the review of the v11.
  08: New.
  09: The comment ext was modified.
  10: The read flags check at the block generic layer.
  11: COR flag was added.
  12: The condition was fixed.
  13: The 'backing-file' parameter returned. No deprecation.
  14: The COR-filter 'add' function replaced with the 'insert node' generic
  function. Fixes based on the review of the v11.

Andrey Shinkevich (14):
  copy-on-read: support preadv/pwritev_part functions
  block: add insert/remove node functions
  copy-on-read: add filter drop function
  qapi: add filter-node-name to block-stream
  qapi: create BlockdevOptionsCor structure for COR driver
  copy-on-read: pass bottom node name to COR driver
  copy-on-read: limit COR operations to bottom node
  iotests: add #310 to test bottom node in COR driver
  block: modify the comment for BDRV_REQ_PREFETCH flag
  block: include supported_read_flags into BDS structure
  copy-on-read: add support for read flags to COR-filter
  copy-on-read: skip non-guest reads if no copy needed
  stream: skip filters when writing backing file name to QCOW2 header
  block: apply COR-filter to block-stream jobs

 block.c|  49 ++
 block/copy-on-read.c   | 144 +
 block/copy-on-read.h   |  32 +
 block/io.c |  12 +++-
 block/monitor/block-hmp-cmds.c |   4 +-
 block/stream.c | 117 ++---
 blockdev.c |  13 ++--
 include/block/block.h  |  11 +++-
 include/block/block_int.h  |  11 +++-
 qapi/block-core.json   |  27 +++-
 tests/qemu-iotests/030 |  51 ++-
 tests/qemu-iotests/030.out |   4 +-
 tests/qemu-iotests/141.out |   2 +-
 tests/qemu-iotests/245 |  22 +--
 tests/qemu-iotests/310 | 109 +++
 tests/qemu-iotests/310.out |  15 +
 tests/qemu-iotests/group   |   3 +-
 17 files changed, 503 insertions(+), 123 deletions(-)
 create mode 100644 block/copy-on-read.h
 create mode 100755 tests/qemu-iotests/310
 create mode 100644 tests/qemu-iotests/310.out

-- 
1.8.3.1




[PATCH v12 11/14] copy-on-read: add support for read flags to COR-filter

2020-10-22 Thread Andrey Shinkevich via
Add the BDRV_REQ_COPY_ON_READ and BDRV_REQ_PREFETCH flags to the
supported_read_flags of the COR-filter.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 8178a91..a2b180a 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -50,6 +50,8 @@ static int cor_open(BlockDriverState *bs, QDict *options, int 
flags,
 return -EINVAL;
 }
 
+bs->supported_read_flags = BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH;
+
 bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
 (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
 
-- 
1.8.3.1




[PATCH v12 14/14] block: apply COR-filter to block-stream jobs

2020-10-22 Thread Andrey Shinkevich via
This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 98 ++
 tests/qemu-iotests/030 | 51 +++-
 tests/qemu-iotests/030.out |  4 +-
 tests/qemu-iotests/141.out |  2 +-
 tests/qemu-iotests/245 | 22 +++
 5 files changed, 87 insertions(+), 90 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 1ba74ab..f6ed315 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -17,8 +17,10 @@
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "block/copy-on-read.h"
 
 enum {
 /*
@@ -33,6 +35,8 @@ typedef struct StreamBlockJob {
 BlockJob common;
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
+BlockDriverState *cor_filter_bs;
+BlockDriverState *target_bs;
 BlockdevOnError on_error;
 char *backing_file_str;
 bool bs_read_only;
@@ -44,8 +48,7 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
 {
 assert(bytes < SIZE_MAX);
 
-return blk_co_preadv(blk, offset, bytes, NULL,
- BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
+return blk_co_preadv(blk, offset, bytes, NULL, BDRV_REQ_PREFETCH);
 }
 
 static void stream_abort(Job *job)
@@ -53,23 +56,20 @@ static void stream_abort(Job *job)
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 
 if (s->chain_frozen) {
-BlockJob *bjob = >common;
-bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 }
 }
 
 static int stream_prepare(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
 BlockDriverState *base_unfiltered = NULL;
 Error *local_err = NULL;
 int ret = 0;
 
-bdrv_unfreeze_backing_chain(bs, s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 s->chain_frozen = false;
 
 if (bdrv_cow_child(unfiltered_bs)) {
@@ -105,15 +105,16 @@ static void stream_clean(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
 
 /* Reopen the image back in read-only mode if necessary */
 if (s->bs_read_only) {
 /* Give up write permissions before making it read-only */
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
-bdrv_reopen_set_read_only(bs, true, NULL);
+bdrv_reopen_set_read_only(s->target_bs, true, NULL);
 }
 
+bdrv_cor_filter_drop(s->cor_filter_bs);
+
 g_free(s->backing_file_str);
 }
 
@@ -121,9 +122,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockBackend *blk = s->common.blk;
-BlockDriverState *bs = blk_bs(blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-bool enable_cor = !bdrv_cow_child(s->base_overlay);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 int64_t len;
 int64_t offset = 0;
 uint64_t delay_ns = 0;
@@ -135,21 +134,12 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 return 0;
 }
 
-len = bdrv_getlength(bs);
+len = bdrv_getlength(s->target_bs);
 if (len < 0) {
 return len;
 }
 job_progress_set_remaining(>common.job, len);
 
-/* Turn on copy-on-read for the whole block device so that guest read
- * requests help us make progress.  Only do this when copying 

[PATCH v12 01/14] copy-on-read: support preadv/pwritev_part functions

2020-10-22 Thread Andrey Shinkevich via
Add support for the recently introduced functions
bdrv_co_preadv_part()
and
bdrv_co_pwritev_part()
to the COR-filter driver.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 2816e61..cb03e0f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -74,21 +74,25 @@ static int64_t cor_getlength(BlockDriverState *bs)
 }
 
 
-static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
-  uint64_t offset, uint64_t bytes,
-  QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
+   uint64_t offset, uint64_t bytes,
+   QEMUIOVector *qiov,
+   size_t qiov_offset,
+   int flags)
 {
-return bdrv_co_preadv(bs->file, offset, bytes, qiov,
-  flags | BDRV_REQ_COPY_ON_READ);
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
 }
 
 
-static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
-   uint64_t offset, uint64_t bytes,
-   QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
+uint64_t offset,
+uint64_t bytes,
+QEMUIOVector *qiov,
+size_t qiov_offset, int flags)
 {
-
-return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
+flags);
 }
 
 
@@ -137,8 +141,8 @@ static BlockDriver bdrv_copy_on_read = {
 
 .bdrv_getlength = cor_getlength,
 
-.bdrv_co_preadv = cor_co_preadv,
-.bdrv_co_pwritev= cor_co_pwritev,
+.bdrv_co_preadv_part= cor_co_preadv_part,
+.bdrv_co_pwritev_part   = cor_co_pwritev_part,
 .bdrv_co_pwrite_zeroes  = cor_co_pwrite_zeroes,
 .bdrv_co_pdiscard   = cor_co_pdiscard,
 .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed,
-- 
1.8.3.1




[PATCH v12 05/14] qapi: create BlockdevOptionsCor structure for COR driver

2020-10-22 Thread Andrey Shinkevich via
Create the BlockdevOptionsCor structure for COR driver specific options
splitting it off form the BlockdevOptionsGenericFormat. The only option
'bottom' node in the structure denotes an image file that limits the
COR operations in the backing chain.

Suggested-by: Max Reitz 
Signed-off-by: Andrey Shinkevich 
---
 qapi/block-core.json | 21 -
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 0a64306..bf465f6 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3938,6 +3938,25 @@
   'data': { 'throttle-group': 'str',
 'file' : 'BlockdevRef'
  } }
+
+##
+# @BlockdevOptionsCor:
+#
+# Driver specific block device options for the copy-on-read driver.
+#
+# @bottom: the name of a non-filter node (allocation-bearing layer) that limits
+#  the COR operations in the backing chain (inclusive).
+#  For the block-stream job, it will be the first non-filter overlay of
+#  the base node. We do not involve the base node into the COR
+#  operations because the base may change due to a concurrent
+#  block-commit job on the same backing chain.
+#
+# Since: 5.2
+##
+{ 'struct': 'BlockdevOptionsCor',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { '*bottom': 'str' } }
+
 ##
 # @BlockdevOptions:
 #
@@ -3990,7 +4009,7 @@
   'bochs':  'BlockdevOptionsGenericFormat',
   'cloop':  'BlockdevOptionsGenericFormat',
   'compress':   'BlockdevOptionsGenericFormat',
-  'copy-on-read':'BlockdevOptionsGenericFormat',
+  'copy-on-read':'BlockdevOptionsCor',
   'dmg':'BlockdevOptionsGenericFormat',
   'file':   'BlockdevOptionsFile',
   'ftp':'BlockdevOptionsCurlFtp',
-- 
1.8.3.1




[PATCH v12 10/14] block: include supported_read_flags into BDS structure

2020-10-22 Thread Andrey Shinkevich via
Add the new member supported_read_flags to the BlockDriverState
structure. It will control the flags set for copy-on-read operations.
Make the block generic layer evaluate supported read flags before they
go to a block driver.

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
---
 block/io.c| 12 ++--
 include/block/block_int.h |  4 
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/block/io.c b/block/io.c
index 54f0968..78ddf13 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1392,6 +1392,9 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 if (flags & BDRV_REQ_COPY_ON_READ) {
 int64_t pnum;
 
+/* The flag BDRV_REQ_COPY_ON_READ has reached its addressee */
+flags &= ~BDRV_REQ_COPY_ON_READ;
+
 ret = bdrv_is_allocated(bs, offset, bytes, );
 if (ret < 0) {
 goto out;
@@ -1413,9 +1416,13 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 goto out;
 }
 
+if (flags & ~bs->supported_read_flags) {
+abort();
+}
+
 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
 if (bytes <= max_bytes && bytes <= max_transfer) {
-ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
+ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, flags);
 goto out;
 }
 
@@ -1428,7 +1435,8 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 
 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
  num, qiov,
- qiov_offset + bytes - bytes_remaining, 0);
+ qiov_offset + bytes - bytes_remaining,
+ flags);
 max_bytes -= num;
 } else {
 num = bytes_remaining;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index f782737..474174c 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -873,6 +873,10 @@ struct BlockDriverState {
 /* I/O Limits */
 BlockLimits bl;
 
+/*
+ * Flags honored during pread
+ */
+unsigned int supported_read_flags;
 /* Flags honored during pwrite (so far: BDRV_REQ_FUA,
  * BDRV_REQ_WRITE_UNCHANGED).
  * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
-- 
1.8.3.1




[PATCH v12 02/14] block: add insert/remove node functions

2020-10-22 Thread Andrey Shinkevich via
Provide API for a node insertion to and removal from a backing chain.

Suggested-by: Max Reitz 
Signed-off-by: Andrey Shinkevich 
---
 block.c   | 49 +
 include/block/block.h |  3 +++
 2 files changed, 52 insertions(+)

diff --git a/block.c b/block.c
index 430edf7..502b483 100644
--- a/block.c
+++ b/block.c
@@ -4670,6 +4670,55 @@ static void bdrv_delete(BlockDriverState *bs)
 g_free(bs);
 }
 
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+   int flags, Error **errp)
+{
+BlockDriverState *new_node_bs;
+Error *local_err = NULL;
+
+new_node_bs =  bdrv_open(NULL, NULL, node_options, flags, errp);
+if (new_node_bs == NULL) {
+error_prepend(errp, "Could not create node: ");
+return NULL;
+}
+
+bdrv_drained_begin(bs);
+bdrv_replace_node(bs, new_node_bs, _err);
+bdrv_drained_end(bs);
+
+if (local_err) {
+bdrv_unref(new_node_bs);
+error_propagate(errp, local_err);
+return NULL;
+}
+
+return new_node_bs;
+}
+
+void bdrv_remove_node(BlockDriverState *bs)
+{
+BdrvChild *child;
+BlockDriverState *inferior_bs;
+
+child = bdrv_filter_or_cow_child(bs);
+if (!child) {
+return;
+}
+inferior_bs = child->bs;
+
+/* Retain the BDS until we complete the graph change. */
+bdrv_ref(inferior_bs);
+/* Hold a guest back from writing while permissions are being reset. */
+bdrv_drained_begin(inferior_bs);
+/* Refresh permissions before the graph change. */
+bdrv_child_refresh_perms(bs, child, _abort);
+bdrv_replace_node(bs, inferior_bs, _abort);
+
+bdrv_drained_end(inferior_bs);
+bdrv_unref(inferior_bs);
+bdrv_unref(bs);
+}
+
 /*
  * Run consistency checks on an image
  *
diff --git a/include/block/block.h b/include/block/block.h
index d16c401..ae7612f 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -350,6 +350,9 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState 
*bs_top,
  Error **errp);
 void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
Error **errp);
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+   int flags, Error **errp);
+void bdrv_remove_node(BlockDriverState *bs);
 
 int bdrv_parse_aio(const char *mode, int *flags);
 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
-- 
1.8.3.1




[PATCH v12 08/14] iotests: add #310 to test bottom node in COR driver

2020-10-22 Thread Andrey Shinkevich via
The test case #310 is similar to #216 by Max Reitz. The difference is
that the test #310 involves a bottom node to the COR filter driver.

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/310 | 109 +
 tests/qemu-iotests/310.out |  15 +++
 tests/qemu-iotests/group   |   3 +-
 3 files changed, 126 insertions(+), 1 deletion(-)
 create mode 100755 tests/qemu-iotests/310
 create mode 100644 tests/qemu-iotests/310.out

diff --git a/tests/qemu-iotests/310 b/tests/qemu-iotests/310
new file mode 100755
index 000..5ad7ad2
--- /dev/null
+++ b/tests/qemu-iotests/310
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+#
+# Copy-on-read tests using a COR filter with a bottom node
+#
+# Copyright (c) 2020 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import iotests
+from iotests import log, qemu_img, qemu_io_silent
+
+# Need backing file support
+iotests.script_initialize(supported_fmts=['qcow2', 'qcow', 'qed', 'vmdk'],
+  supported_platforms=['linux'])
+
+log('')
+log('=== Copy-on-read across nodes ===')
+log('')
+
+# This test is similar to the 216 one by Max Reitz 
+# The difference is that this test case involves a bottom node to the
+# COR filter driver.
+
+with iotests.FilePath('base.img') as base_img_path, \
+ iotests.FilePath('mid.img') as mid_img_path, \
+ iotests.FilePath('top.img') as top_img_path, \
+ iotests.VM() as vm:
+
+log('--- Setting up images ---')
+log('')
+
+assert qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M') == 0
+assert qemu_io_silent(base_img_path, '-c', 'write -P 1 0M 1M') == 0
+assert qemu_io_silent(base_img_path, '-c', 'write -P 1 3M 1M') == 0
+assert qemu_img('create', '-f', iotests.imgfmt, '-b', base_img_path,
+'-F', iotests.imgfmt, mid_img_path) == 0
+assert qemu_io_silent(mid_img_path,  '-c', 'write -P 3 2M 1M') == 0
+assert qemu_io_silent(mid_img_path,  '-c', 'write -P 3 4M 1M') == 0
+assert qemu_img('create', '-f', iotests.imgfmt, '-b', mid_img_path,
+'-F', iotests.imgfmt, top_img_path) == 0
+assert qemu_io_silent(top_img_path,  '-c', 'write -P 2 1M 1M') == 0
+
+log('Done')
+
+log('')
+log('--- Doing COR ---')
+log('')
+
+vm.launch()
+
+log(vm.qmp('blockdev-add',
+node_name='node0',
+driver='copy-on-read',
+bottom='node2',
+file={
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': top_img_path
+},
+'backing': {
+'node-name': 'node2',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': mid_img_path
+},
+'backing': {
+#'node-name': 'node2',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': base_img_path
+}
+},
+}
+}))
+
+# Trigger COR
+log(vm.qmp('human-monitor-command',
+   command_line='qemu-io node0 "read 0 5M"'))
+
+vm.shutdown()
+
+log('')
+log('--- Checking COR result ---')
+log('')
+
+assert qemu_io_silent(base_img_path, '-c', 'discard 0 4M') == 0
+assert qemu_io_silent(mid_img_path, '-c', 'discard 0M 5M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 1 0M 1M') != 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 2 1M 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 3 2M 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 1 3M 1M') != 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 3 4M 1M') == 0
+
+log('Done')
diff --git a/tests/qemu-iotests/310.out b/tests/qemu-iotests/310.out
new file mode 100644

[PATCH v12 07/14] copy-on-read: limit COR operations to bottom node

2020-10-22 Thread Andrey Shinkevich via
Limit COR operations to the bottom node (inclusively) in the backing
chain when the bottom node name is given. It will be useful for a block
stream job when the COR-filter is applied. The bottom node is passed as
the base itself may change due to concurrent commit jobs on the same
backing chain.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 42 --
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 3d8e4db..8178a91 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -123,8 +123,46 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
size_t qiov_offset,
int flags)
 {
-return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
-   flags | BDRV_REQ_COPY_ON_READ);
+int64_t n = 0;
+int local_flags;
+int ret;
+BDRVStateCOR *state = bs->opaque;
+
+if (!state->bottom_bs) {
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
+}
+
+while (bytes) {
+local_flags = flags;
+
+/* In case of failure, try to copy-on-read anyway */
+ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+if (!ret || ret < 0) {
+ret = 
bdrv_is_allocated_above(bdrv_backing_chain_next(bs->file->bs),
+  state->bottom_bs, true, offset,
+  n, );
+if (ret == 1 || ret < 0) {
+local_flags |= BDRV_REQ_COPY_ON_READ;
+}
+/* Finish earlier if the end of a backing file has been reached */
+if (ret == 0 && n == 0) {
+break;
+}
+}
+
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
+
+offset += n;
+qiov_offset += n;
+bytes -= n;
+}
+
+return 0;
 }
 
 
-- 
1.8.3.1




[PATCH v12 03/14] copy-on-read: add filter drop function

2020-10-22 Thread Andrey Shinkevich via
Provide API for the COR-filter removal. Also, drop the filter child
permissions for an inactive state when the filter node is being
removed. This function may be considered as an intermediate solution
before we are able to use bdrv_remove_node(). It will be possible once
the QEMU permission update system has overhauled.
To insert the filter, the block generic layer function
bdrv_insert_node() can be used.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 56 
 block/copy-on-read.h | 32 ++
 2 files changed, 88 insertions(+)
 create mode 100644 block/copy-on-read.h

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..618c4c4 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,20 @@
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi/error.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+bool active;
+} BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BDRVStateCOR *state = bs->opaque;
+
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
false, errp);
@@ -42,6 +51,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+state->active = true;
+
+/*
+ * We don't need to call bdrv_child_refresh_perms() now as the permissions
+ * will be updated later when the filter node gets its parent.
+ */
+
 return 0;
 }
 
@@ -57,6 +73,17 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild 
*c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
 {
+BDRVStateCOR *s = bs->opaque;
+
+if (!s->active) {
+/*
+ * While the filter is being removed
+ */
+*nperm = 0;
+*nshared = BLK_PERM_ALL;
+return;
+}
+
 *nperm = perm & PERM_PASSTHROUGH;
 *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
 
@@ -135,6 +162,7 @@ static void cor_lock_medium(BlockDriverState *bs, bool 
locked)
 
 static BlockDriver bdrv_copy_on_read = {
 .format_name= "copy-on-read",
+.instance_size  = sizeof(BDRVStateCOR),
 
 .bdrv_open  = cor_open,
 .bdrv_child_perm= cor_child_perm,
@@ -154,6 +182,34 @@ static BlockDriver bdrv_copy_on_read = {
 .is_filter  = true,
 };
 
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
+{
+BdrvChild *child;
+BlockDriverState *bs;
+BDRVStateCOR *s = cor_filter_bs->opaque;
+
+child = bdrv_filter_child(cor_filter_bs);
+if (!child) {
+return;
+}
+bs = child->bs;
+
+/* Retain the BDS until we complete the graph change. */
+bdrv_ref(bs);
+/* Hold a guest back from writing while permissions are being reset. */
+bdrv_drained_begin(bs);
+/* Drop permissions before the graph change. */
+s->active = false;
+bdrv_child_refresh_perms(cor_filter_bs, child, _abort);
+bdrv_replace_node(cor_filter_bs, bs, _abort);
+
+bdrv_drained_end(bs);
+bdrv_unref(bs);
+bdrv_unref(cor_filter_bs);
+}
+
+
 static void bdrv_copy_on_read_init(void)
 {
 bdrv_register(_copy_on_read);
diff --git a/block/copy-on-read.h b/block/copy-on-read.h
new file mode 100644
index 000..7bf405d
--- /dev/null
+++ b/block/copy-on-read.h
@@ -0,0 +1,32 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * The filter driver performs Copy-On-Read (COR) operations
+ *
+ * Copyright (c) 2018-2020 Virtuozzo International GmbH.
+ *
+ * Author:
+ *   Andrey Shinkevich 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#ifndef BLOCK_COPY_ON_READ
+#define BLOCK_COPY_ON_READ
+
+#include "block/block_int.h"
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs);
+
+#endif /* BLOCK_COPY_ON_READ */
-- 
1.8.3.1




[PATCH v12 04/14] qapi: add filter-node-name to block-stream

2020-10-22 Thread Andrey Shinkevich via
Provide the possibility to pass the 'filter-node-name' parameter to the
block-stream job as it is done for the commit block job.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/monitor/block-hmp-cmds.c | 4 ++--
 block/stream.c | 4 +++-
 blockdev.c | 4 +++-
 include/block/block_int.h  | 7 ++-
 qapi/block-core.json   | 6 ++
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index d15a2be..e8a58f3 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -508,8 +508,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 
 qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
  false, NULL, qdict_haskey(qdict, "speed"), speed, true,
- BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
- );
+ BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, false, 
false,
+ false, );
 
 hmp_handle_error(mon, error);
 }
diff --git a/block/stream.c b/block/stream.c
index 8ce6729..e0540ee 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -221,7 +221,9 @@ static const BlockJobDriver stream_job_driver = {
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp)
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp)
 {
 StreamBlockJob *s;
 BlockDriverState *iter;
diff --git a/blockdev.c b/blockdev.c
index fe6fb5d..c917625 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2499,6 +2499,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
   bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
+  bool has_filter_node_name, const char *filter_node_name,
   bool has_auto_finalize, bool auto_finalize,
   bool has_auto_dismiss, bool auto_dismiss,
   Error **errp)
@@ -2581,7 +2582,8 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 
 stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
- job_flags, has_speed ? speed : 0, on_error, _err);
+ job_flags, has_speed ? speed : 0, on_error,
+ filter_node_name, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto out;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 38cad9d..f782737 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1134,6 +1134,9 @@ int is_windows_drive(const char *filename);
  *  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means
+ * that a node name should be autogenerated.
  * @errp: Error object.
  *
  * Start a streaming operation on @bs.  Clusters that are unallocated
@@ -1146,7 +1149,9 @@ int is_windows_drive(const char *filename);
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp);
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp);
 
 /**
  * commit_start:
diff --git a/qapi/block-core.json b/qapi/block-core.json
index ee5ebef..0a64306 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2542,6 +2542,11 @@
 #'stop' and 'enospc' can only be used if the block device
 #supports io-status (see BlockInfo).  Since 1.3.
 #
+# @filter-node-name: the node name that should be assigned to the
+#filter driver that the stream job inserts into the graph
+#above @device. If this option is not given, a node name is
+#autogenerated. (Since: 5.2)
+#
 # @auto-finalize: When false, this job will wait in a PENDING state after it 
has
 # finished its work, waiting for @block-job-finalize before
 # making any block graph changes.
@@ -2572,6 +2577,7 @@
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
 '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
 '*on-error': 'BlockdevOnError',
+ 

[PATCH v11 13/13] block: apply COR-filter to block-stream jobs

2020-10-12 Thread Andrey Shinkevich via
This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 93 +-
 tests/qemu-iotests/030 | 51 +++--
 tests/qemu-iotests/030.out |  4 +-
 tests/qemu-iotests/141.out |  2 +-
 tests/qemu-iotests/245 | 19 +++---
 5 files changed, 81 insertions(+), 88 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index d3e1812..93564db 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -17,8 +17,10 @@
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "block/copy-on-read.h"
 
 enum {
 /*
@@ -33,6 +35,8 @@ typedef struct StreamBlockJob {
 BlockJob common;
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
+BlockDriverState *cor_filter_bs;
+BlockDriverState *target_bs;
 BlockdevOnError on_error;
 bool bs_read_only;
 bool chain_frozen;
@@ -43,8 +47,7 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
 {
 assert(bytes < SIZE_MAX);
 
-return blk_co_preadv(blk, offset, bytes, NULL,
- BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
+return blk_co_preadv(blk, offset, bytes, NULL, BDRV_REQ_PREFETCH);
 }
 
 static void stream_abort(Job *job)
@@ -52,23 +55,20 @@ static void stream_abort(Job *job)
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 
 if (s->chain_frozen) {
-BlockJob *bjob = >common;
-bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 }
 }
 
 static int stream_prepare(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
 BlockDriverState *base_unfiltered = bdrv_skip_filters(base);
 Error *local_err = NULL;
 int ret = 0;
 
-bdrv_unfreeze_backing_chain(bs, s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 s->chain_frozen = false;
 
 if (bdrv_cow_child(unfiltered_bs)) {
@@ -94,13 +94,14 @@ static void stream_clean(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
+
+bdrv_cor_filter_drop(s->cor_filter_bs);
 
 /* Reopen the image back in read-only mode if necessary */
 if (s->bs_read_only) {
 /* Give up write permissions before making it read-only */
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
-bdrv_reopen_set_read_only(bs, true, NULL);
+bdrv_reopen_set_read_only(s->target_bs, true, NULL);
 }
 }
 
@@ -108,9 +109,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockBackend *blk = s->common.blk;
-BlockDriverState *bs = blk_bs(blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-bool enable_cor = !bdrv_cow_child(s->base_overlay);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 int64_t len;
 int64_t offset = 0;
 uint64_t delay_ns = 0;
@@ -122,21 +121,12 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 return 0;
 }
 
-len = bdrv_getlength(bs);
+len = bdrv_getlength(s->target_bs);
 if (len < 0) {
 return len;
 }
 job_progress_set_remaining(>common.job, len);
 
-/* Turn on copy-on-read for the whole block device so that guest read
- * requests help us make progress.  Only do this when copying the entire
- * 

[PATCH v11 12/13] stream: remove unused backing-file name parameter

2020-10-12 Thread Andrey Shinkevich via
The 'backing-file' argument is not used by the block-stream job. It
designates a backing file name to set in QCOW2 image header after the
block-stream job finished. A backing file name of the node above base
is used instead.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c|  6 +-
 blockdev.c| 21 ++---
 include/block/block_int.h |  2 +-
 3 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 51462bd..d3e1812 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -34,7 +34,6 @@ typedef struct StreamBlockJob {
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
 BlockdevOnError on_error;
-char *backing_file_str;
 bool bs_read_only;
 bool chain_frozen;
 } StreamBlockJob;
@@ -103,8 +102,6 @@ static void stream_clean(Job *job)
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
 bdrv_reopen_set_read_only(bs, true, NULL);
 }
-
-g_free(s->backing_file_str);
 }
 
 static int coroutine_fn stream_run(Job *job, Error **errp)
@@ -220,7 +217,7 @@ static const BlockJobDriver stream_job_driver = {
 };
 
 void stream_start(const char *job_id, BlockDriverState *bs,
-  BlockDriverState *base, const char *backing_file_str,
+  BlockDriverState *base,
   int creation_flags, int64_t speed,
   BlockdevOnError on_error,
   const char *filter_node_name,
@@ -295,7 +292,6 @@ void stream_start(const char *job_id, BlockDriverState *bs,
 
 s->base_overlay = base_overlay;
 s->above_base = above_base;
-s->backing_file_str = g_strdup(backing_file_str);
 s->bs_read_only = bs_read_only;
 s->chain_frozen = true;
 
diff --git a/blockdev.c b/blockdev.c
index d719c47..019b6e0 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2498,7 +2498,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 BlockDriverState *base_bs = NULL;
 AioContext *aio_context;
 Error *local_err = NULL;
-const char *base_name = NULL;
 int job_flags = JOB_DEFAULT;
 
 if (!has_on_error) {
@@ -2526,7 +2525,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
-base_name = base;
 }
 
 if (has_base_node) {
@@ -2541,7 +2539,11 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
 bdrv_refresh_filename(base_bs);
-base_name = base_bs->filename;
+}
+
+if (has_backing_file) {
+warn_report("Use of \"backing-file\" argument is deprecated; "
+"a backing file of the node above base is used instead");
 }
 
 /* Check for op blockers in the whole chain between bs and base */
@@ -2553,17 +2555,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 }
 
-/* if we are streaming the entire chain, the result will have no backing
- * file, and specifying one is therefore an error */
-if (base_bs == NULL && has_backing_file) {
-error_setg(errp, "backing file specified, but streaming the "
- "entire chain");
-goto out;
-}
-
-/* backing_file string overrides base bs filename */
-base_name = has_backing_file ? backing_file : base_name;
-
 if (has_auto_finalize && !auto_finalize) {
 job_flags |= JOB_MANUAL_FINALIZE;
 }
@@ -2571,7 +2562,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 job_flags |= JOB_MANUAL_DISMISS;
 }
 
-stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+stream_start(has_job_id ? job_id : NULL, bs, base_bs,
  job_flags, has_speed ? speed : 0, on_error,
  filter_node_name, _err);
 if (local_err) {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index a142867..4f523c3 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1151,7 +1151,7 @@ int is_windows_drive(const char *filename);
  * BlockDriverState.
  */
 void stream_start(const char *job_id, BlockDriverState *bs,
-  BlockDriverState *base, const char *backing_file_str,
+  BlockDriverState *base,
   int creation_flags, int64_t speed,
   BlockdevOnError on_error,
   const char *filter_node_name,
-- 
1.8.3.1




[PATCH v11 11/13] stream: mark backing-file argument as deprecated

2020-10-12 Thread Andrey Shinkevich via
Whereas the block-stream job starts using a backing file name of the
base node overlay after the block-stream job completes, mark the QMP
'backing-file' argument as deprecated.

Signed-off-by: Andrey Shinkevich 
---
 docs/system/deprecated.rst | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/docs/system/deprecated.rst b/docs/system/deprecated.rst
index 8b3ab5b..7491fcf 100644
--- a/docs/system/deprecated.rst
+++ b/docs/system/deprecated.rst
@@ -285,6 +285,12 @@ details.
 The ``query-events`` command has been superseded by the more powerful
 and accurate ``query-qmp-schema`` command.
 
+``block-stream`` argument ``backing-file`` (since 5.2)
+'
+
+The argument ``backing-file`` is deprecated. QEMU uses a backing file
+name of the base node overlay after the block-stream job completes.
+
 chardev client socket with ``wait`` option (since 4.0)
 ''
 
-- 
1.8.3.1




[PATCH v11 08/13] copy-on-read: add support for BDRV_REQ_PREFETCH to COR-filter

2020-10-12 Thread Andrey Shinkevich via
Add support for the BDRV_REQ_PREFETCH flag to the supported_write_flags
of the COR-filter.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index dfbd6ad..b136895 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -50,6 +50,7 @@ static int cor_open(BlockDriverState *bs, QDict *options, int 
flags,
 return -EINVAL;
 }
 
+bs->supported_read_flags = BDRV_REQ_PREFETCH;
 bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
 (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
 
-- 
1.8.3.1




[PATCH v11 09/13] copy-on-read: skip non-guest reads if no copy needed

2020-10-12 Thread Andrey Shinkevich via
If the flag BDRV_REQ_PREFETCH was set, pass it further to the
COR-driver to skip unneeded reading. It can be taken into account for
the COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 13 +
 block/io.c   |  3 ++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index b136895..278a11a 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -148,10 +148,15 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
 }
 }
 
-ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
-  local_flags);
-if (ret < 0) {
-return ret;
+if (!!(flags & BDRV_REQ_PREFETCH) &
+!(local_flags & BDRV_REQ_COPY_ON_READ)) {
+/* Skip non-guest reads if no copy needed */
+} else {
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
 }
 
 offset += n;
diff --git a/block/io.c b/block/io.c
index 11df188..bff1808 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1512,7 +1512,8 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 
 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
 if (bytes <= max_bytes && bytes <= max_transfer) {
-ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
+ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset,
+ flags & bs->supported_read_flags);
 goto out;
 }
 
-- 
1.8.3.1




[PATCH v11 10/13] stream: skip filters when writing backing file name to QCOW2 header

2020-10-12 Thread Andrey Shinkevich via
Avoid writing a filter JSON-name to QCOW2 image when the backing file
is changed after the block stream job.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index e0540ee..51462bd 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -65,6 +65,7 @@ static int stream_prepare(Job *job)
 BlockDriverState *bs = blk_bs(bjob->blk);
 BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+BlockDriverState *base_unfiltered = bdrv_skip_filters(base);
 Error *local_err = NULL;
 int ret = 0;
 
@@ -73,10 +74,10 @@ static int stream_prepare(Job *job)
 
 if (bdrv_cow_child(unfiltered_bs)) {
 const char *base_id = NULL, *base_fmt = NULL;
-if (base) {
-base_id = s->backing_file_str;
-if (base->drv) {
-base_fmt = base->drv->format_name;
+if (base_unfiltered) {
+base_id = base_unfiltered->filename;
+if (base_unfiltered->drv) {
+base_fmt = base_unfiltered->drv->format_name;
 }
 }
 bdrv_set_backing_hd(unfiltered_bs, base, _err);
-- 
1.8.3.1




[PATCH v11 07/13] block: include supported_read_flags into BDS structure

2020-10-12 Thread Andrey Shinkevich via
Add the new member supported_read_flags to BlockDriverState structure.
It will control the BDRV_REQ_PREFETCH flag set for copy-on-read
operations.

Signed-off-by: Andrey Shinkevich 
---
 include/block/block_int.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index f782737..a142867 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -873,6 +873,10 @@ struct BlockDriverState {
 /* I/O Limits */
 BlockLimits bl;
 
+/*
+ * Flags honored during pread (so far: BDRV_REQ_PREFETCH)
+ */
+unsigned int supported_read_flags;
 /* Flags honored during pwrite (so far: BDRV_REQ_FUA,
  * BDRV_REQ_WRITE_UNCHANGED).
  * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
-- 
1.8.3.1




[PATCH v11 06/13] block: modify the comment for BDRV_REQ_PREFETCH flag

2020-10-12 Thread Andrey Shinkevich via
Modify the comment for the flag BDRV_REQ_PREFETCH as we are going to
use it alone and pass it to the COR-filter driver for further
processing.

Signed-off-by: Andrey Shinkevich 
---
 include/block/block.h | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 981ab5b..2b7efd1 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -71,9 +71,10 @@ typedef enum {
 BDRV_REQ_NO_FALLBACK= 0x100,
 
 /*
- * BDRV_REQ_PREFETCH may be used only together with BDRV_REQ_COPY_ON_READ
- * on read request and means that caller doesn't really need data to be
- * written to qiov parameter which may be NULL.
+ * BDRV_REQ_PREFETCH may be used together with the BDRV_REQ_COPY_ON_READ
+ * flag or when the COR-filter applied to read operations and means that
+ * caller doesn't really need data to be written to qiov parameter which
+ * may be NULL.
  */
 BDRV_REQ_PREFETCH  = 0x200,
 /* Mask of valid flags */
-- 
1.8.3.1




[PATCH v11 05/13] copy-on-read: limit COR operations to base in COR driver

2020-10-12 Thread Andrey Shinkevich via
Limit COR operations by the base node in the backing chain when the
overlay base node name is given. It will be useful for a block stream
job when the COR-filter is applied. The overlay base node is passed as
the base itself may change due to concurrent commit jobs on the same
backing chain.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 39 +--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index c578b1b..dfbd6ad 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -122,8 +122,43 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
size_t qiov_offset,
int flags)
 {
-return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
-   flags | BDRV_REQ_COPY_ON_READ);
+int64_t n = 0;
+int64_t size = offset + bytes;
+int local_flags;
+int ret;
+BDRVStateCOR *state = bs->opaque;
+
+if (!state->base_overlay) {
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
+}
+
+while (offset < size) {
+local_flags = flags;
+
+/* In case of failure, try to copy-on-read anyway */
+ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+if (!ret) {
+ret = bdrv_is_allocated_above(bdrv_cow_bs(bs->file->bs),
+  state->base_overlay, true, offset,
+  n, );
+if (ret) {
+local_flags |= BDRV_REQ_COPY_ON_READ;
+}
+}
+
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
+
+offset += n;
+qiov_offset += n;
+bytes -= n;
+}
+
+return 0;
 }
 
 
-- 
1.8.3.1




[PATCH v11 02/13] copy-on-read: add filter append/drop functions

2020-10-12 Thread Andrey Shinkevich via
Provide API for the COR-filter insertion/removal.
Also, drop the filter child permissions for an inactive state when the
filter node is being removed.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 88 
 block/copy-on-read.h | 35 +
 2 files changed, 123 insertions(+)
 create mode 100644 block/copy-on-read.h

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..bcccf0f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,21 @@
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+bool active;
+} BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BDRVStateCOR *state = bs->opaque;
+
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
false, errp);
@@ -42,6 +52,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+state->active = true;
+
+/*
+ * We don't need to call bdrv_child_refresh_perms() now as the permissions
+ * will be updated later when the filter node gets its parent.
+ */
+
 return 0;
 }
 
@@ -57,6 +74,17 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild 
*c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
 {
+BDRVStateCOR *s = bs->opaque;
+
+if (!s->active) {
+/*
+ * While the filter is being removed
+ */
+*nperm = 0;
+*nshared = BLK_PERM_ALL;
+return;
+}
+
 *nperm = perm & PERM_PASSTHROUGH;
 *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
 
@@ -135,6 +163,7 @@ static void cor_lock_medium(BlockDriverState *bs, bool 
locked)
 
 static BlockDriver bdrv_copy_on_read = {
 .format_name= "copy-on-read",
+.instance_size  = sizeof(BDRVStateCOR),
 
 .bdrv_open  = cor_open,
 .bdrv_child_perm= cor_child_perm,
@@ -159,4 +188,63 @@ static void bdrv_copy_on_read_init(void)
 bdrv_register(_copy_on_read);
 }
 
+
+BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
+ QDict *node_options,
+ int flags, Error **errp)
+{
+BlockDriverState *cor_filter_bs;
+Error *local_err = NULL;
+
+cor_filter_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
+if (cor_filter_bs == NULL) {
+error_prepend(errp, "Could not create COR-filter node: ");
+return NULL;
+}
+
+if (!qdict_get_try_str(node_options, "node-name")) {
+cor_filter_bs->implicit = true;
+}
+
+bdrv_drained_begin(bs);
+bdrv_replace_node(bs, cor_filter_bs, _err);
+bdrv_drained_end(bs);
+
+if (local_err) {
+bdrv_unref(cor_filter_bs);
+error_propagate(errp, local_err);
+return NULL;
+}
+
+return cor_filter_bs;
+}
+
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
+{
+BdrvChild *child;
+BlockDriverState *bs;
+BDRVStateCOR *s = cor_filter_bs->opaque;
+
+child = bdrv_filter_child(cor_filter_bs);
+if (!child) {
+return;
+}
+bs = child->bs;
+
+/* Retain the BDS until we complete the graph change. */
+bdrv_ref(bs);
+/* Hold a guest back from writing while permissions are being reset. */
+bdrv_drained_begin(bs);
+/* Drop permissions before the graph change. */
+s->active = false;
+bdrv_child_refresh_perms(cor_filter_bs, child, _abort);
+bdrv_replace_node(cor_filter_bs, bs, _abort);
+
+bdrv_drained_end(bs);
+bdrv_unref(bs);
+bdrv_unref(cor_filter_bs);
+}
+
+
 block_init(bdrv_copy_on_read_init);
diff --git a/block/copy-on-read.h b/block/copy-on-read.h
new file mode 100644
index 000..d6f2422
--- /dev/null
+++ b/block/copy-on-read.h
@@ -0,0 +1,35 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * The filter driver performs Copy-On-Read (COR) operations
+ *
+ * Copyright (c) 2018-2020 Virtuozzo International GmbH.
+ *
+ * Author:
+ *   Andrey Shinkevich 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of

[PATCH v11 04/13] copy-on-read: pass overlay base node name to COR driver

2020-10-12 Thread Andrey Shinkevich via
We are going to use the COR-filter for a block-stream job.
To limit COR operations by the base node in the backing chain during
stream job, pass the name of overlay base node to the copy-on-read
driver as base node itself may change due to possible concurrent jobs.
The rest of the functionality will be implemented in the patch that
follows.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index bcccf0f..c578b1b 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,19 +24,24 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qdict.h"
 #include "block/copy-on-read.h"
 
 
 typedef struct BDRVStateCOR {
 bool active;
+BlockDriverState *base_overlay;
 } BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BlockDriverState *base_overlay = NULL;
 BDRVStateCOR *state = bs->opaque;
+/* We need the base overlay node rather than the base itself */
+const char *base_overlay_node = qdict_get_try_str(options, "base");
 
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -52,7 +57,16 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+if (base_overlay_node) {
+qdict_del(options, "base");
+base_overlay = bdrv_lookup_bs(NULL, base_overlay_node, errp);
+if (!base_overlay) {
+error_setg(errp, QERR_BASE_NOT_FOUND, base_overlay_node);
+return -EINVAL;
+}
+}
 state->active = true;
+state->base_overlay = base_overlay;
 
 /*
  * We don't need to call bdrv_child_refresh_perms() now as the permissions
-- 
1.8.3.1




[PATCH v11 03/13] qapi: add filter-node-name to block-stream

2020-10-12 Thread Andrey Shinkevich via
Provide the possibility to pass the 'filter-node-name' parameter to the
block-stream job as it is done for the commit block job.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/monitor/block-hmp-cmds.c | 4 ++--
 block/stream.c | 4 +++-
 blockdev.c | 4 +++-
 include/block/block_int.h  | 7 ++-
 qapi/block-core.json   | 6 ++
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 4d3db5e..4e66775 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -507,8 +507,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 
 qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
  false, NULL, qdict_haskey(qdict, "speed"), speed, true,
- BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
- );
+ BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, false, 
false,
+ false, );
 
 hmp_handle_error(mon, error);
 }
diff --git a/block/stream.c b/block/stream.c
index 8ce6729..e0540ee 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -221,7 +221,9 @@ static const BlockJobDriver stream_job_driver = {
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp)
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp)
 {
 StreamBlockJob *s;
 BlockDriverState *iter;
diff --git a/blockdev.c b/blockdev.c
index bebd3ba..d719c47 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2489,6 +2489,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
   bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
+  bool has_filter_node_name, const char *filter_node_name,
   bool has_auto_finalize, bool auto_finalize,
   bool has_auto_dismiss, bool auto_dismiss,
   Error **errp)
@@ -2571,7 +2572,8 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 
 stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
- job_flags, has_speed ? speed : 0, on_error, _err);
+ job_flags, has_speed ? speed : 0, on_error,
+ filter_node_name, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto out;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 38cad9d..f782737 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1134,6 +1134,9 @@ int is_windows_drive(const char *filename);
  *  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means
+ * that a node name should be autogenerated.
  * @errp: Error object.
  *
  * Start a streaming operation on @bs.  Clusters that are unallocated
@@ -1146,7 +1149,9 @@ int is_windows_drive(const char *filename);
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp);
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp);
 
 /**
  * commit_start:
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 3c16f1e..32fb097 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2533,6 +2533,11 @@
 #'stop' and 'enospc' can only be used if the block device
 #supports io-status (see BlockInfo).  Since 1.3.
 #
+# @filter-node-name: the node name that should be assigned to the
+#filter driver that the stream job inserts into the graph
+#above @device. If this option is not given, a node name is
+#autogenerated. (Since: 5.2)
+#
 # @auto-finalize: When false, this job will wait in a PENDING state after it 
has
 # finished its work, waiting for @block-job-finalize before
 # making any block graph changes.
@@ -2563,6 +2568,7 @@
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
 '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
 '*on-error': 'BlockdevOnError',
+ 

[PATCH v11 01/13] copy-on-read: Support preadv/pwritev_part functions

2020-10-12 Thread Andrey Shinkevich via
Add support for the recently introduced functions
bdrv_co_preadv_part()
and
bdrv_co_pwritev_part()
to the COR-filter driver.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 2816e61..cb03e0f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -74,21 +74,25 @@ static int64_t cor_getlength(BlockDriverState *bs)
 }
 
 
-static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
-  uint64_t offset, uint64_t bytes,
-  QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
+   uint64_t offset, uint64_t bytes,
+   QEMUIOVector *qiov,
+   size_t qiov_offset,
+   int flags)
 {
-return bdrv_co_preadv(bs->file, offset, bytes, qiov,
-  flags | BDRV_REQ_COPY_ON_READ);
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
 }
 
 
-static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
-   uint64_t offset, uint64_t bytes,
-   QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
+uint64_t offset,
+uint64_t bytes,
+QEMUIOVector *qiov,
+size_t qiov_offset, int flags)
 {
-
-return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
+flags);
 }
 
 
@@ -137,8 +141,8 @@ static BlockDriver bdrv_copy_on_read = {
 
 .bdrv_getlength = cor_getlength,
 
-.bdrv_co_preadv = cor_co_preadv,
-.bdrv_co_pwritev= cor_co_pwritev,
+.bdrv_co_preadv_part= cor_co_preadv_part,
+.bdrv_co_pwritev_part   = cor_co_pwritev_part,
 .bdrv_co_pwrite_zeroes  = cor_co_pwrite_zeroes,
 .bdrv_co_pdiscard   = cor_co_pdiscard,
 .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed,
-- 
1.8.3.1




[PATCH v11 00/13] Apply COR-filter to the block-stream permanently

2020-10-12 Thread Andrey Shinkevich via
The iotest case test_stream_parallel still does not pass after the
COR-filter is inserted into the backing chain. As the test case may not
be initialized, it does not make a sense and was removed again.

v11:
  04: Base node overlay is used instead of base.
  05: Base node overlay is used instead of base.
  06: New.
  07: New.
  08: New.
  09: The new BDS-member 'supported_read_flags' is applied.
  10: The 'base_metadata' variable renamed to 'base_unfiltered'.
  11: New.
  12: The backing-file argument is left in the QMP interface. Warning added.
  13: The BDRV_REQ_COPY_ON_READ removed from the stream_populate();
  The 'implicit' initialization moved back to COR-filter driver.
  Base node overlay is used instead of base.

The v8 Message-Id:
<1601383109-110988-1-git-send-email-andrey.shinkev...@virtuozzo.com>

Andrey Shinkevich (13):
  copy-on-read: Support preadv/pwritev_part functions
  copy-on-read: add filter append/drop functions
  qapi: add filter-node-name to block-stream
  copy-on-read: pass overlay base node name to COR driver
  copy-on-read: limit COR operations to base in COR driver
  block: modify the comment for BDRV_REQ_PREFETCH flag
  block: include supported_read_flags into BDS structure
  copy-on-read: add support for BDRV_REQ_PREFETCH to COR-filter
  copy-on-read: skip non-guest reads if no copy needed
  stream: skip filters when writing backing file name to QCOW2 header
  stream: mark backing-file argument as deprecated
  stream: remove unused backing-file name parameter
  block: apply COR-filter to block-stream jobs

 block/copy-on-read.c   | 171 ++---
 block/copy-on-read.h   |  35 +
 block/io.c |   3 +-
 block/monitor/block-hmp-cmds.c |   4 +-
 block/stream.c | 112 ---
 blockdev.c |  25 +++---
 docs/system/deprecated.rst |   6 ++
 include/block/block.h  |   7 +-
 include/block/block_int.h  |  13 +++-
 qapi/block-core.json   |   6 ++
 tests/qemu-iotests/030 |  51 ++--
 tests/qemu-iotests/030.out |   4 +-
 tests/qemu-iotests/141.out |   2 +-
 tests/qemu-iotests/245 |  19 +++--
 14 files changed, 324 insertions(+), 134 deletions(-)
 create mode 100644 block/copy-on-read.h

-- 
1.8.3.1




[PATCH v10 7/9] stream: skip filters when writing backing file name to QCOW2 header

2020-09-29 Thread Andrey Shinkevich via
Avoid writing a filter JSON-name to QCOW2 image when the backing file
is changed after the block stream job.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index e0540ee..b0719e9 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -65,6 +65,7 @@ static int stream_prepare(Job *job)
 BlockDriverState *bs = blk_bs(bjob->blk);
 BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+BlockDriverState *base_metadata = bdrv_skip_filters(base);
 Error *local_err = NULL;
 int ret = 0;
 
@@ -73,10 +74,10 @@ static int stream_prepare(Job *job)
 
 if (bdrv_cow_child(unfiltered_bs)) {
 const char *base_id = NULL, *base_fmt = NULL;
-if (base) {
-base_id = s->backing_file_str;
-if (base->drv) {
-base_fmt = base->drv->format_name;
+if (base_metadata) {
+base_id = base_metadata->filename;
+if (base_metadata->drv) {
+base_fmt = base_metadata->drv->format_name;
 }
 }
 bdrv_set_backing_hd(unfiltered_bs, base, _err);
-- 
1.8.3.1




[PATCH v10 9/9] block: apply COR-filter to block-stream jobs

2020-09-29 Thread Andrey Shinkevich via
This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 93 ++
 tests/qemu-iotests/030 | 51 +++--
 tests/qemu-iotests/030.out |  4 +-
 tests/qemu-iotests/141.out |  2 +-
 tests/qemu-iotests/245 | 19 +++---
 5 files changed, 83 insertions(+), 86 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index fe2663f..240b3dc 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -17,8 +17,10 @@
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "block/copy-on-read.h"
 
 enum {
 /*
@@ -33,6 +35,8 @@ typedef struct StreamBlockJob {
 BlockJob common;
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
+BlockDriverState *cor_filter_bs;
+BlockDriverState *target_bs;
 BlockdevOnError on_error;
 bool bs_read_only;
 bool chain_frozen;
@@ -52,23 +56,20 @@ static void stream_abort(Job *job)
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 
 if (s->chain_frozen) {
-BlockJob *bjob = >common;
-bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 }
 }
 
 static int stream_prepare(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
 BlockDriverState *base_metadata = bdrv_skip_filters(base);
 Error *local_err = NULL;
 int ret = 0;
 
-bdrv_unfreeze_backing_chain(bs, s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 s->chain_frozen = false;
 
 if (bdrv_cow_child(unfiltered_bs)) {
@@ -94,13 +95,14 @@ static void stream_clean(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
+
+bdrv_cor_filter_drop(s->cor_filter_bs);
 
 /* Reopen the image back in read-only mode if necessary */
 if (s->bs_read_only) {
 /* Give up write permissions before making it read-only */
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
-bdrv_reopen_set_read_only(bs, true, NULL);
+bdrv_reopen_set_read_only(s->target_bs, true, NULL);
 }
 }
 
@@ -108,9 +110,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockBackend *blk = s->common.blk;
-BlockDriverState *bs = blk_bs(blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-bool enable_cor = !bdrv_cow_child(s->base_overlay);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 int64_t len;
 int64_t offset = 0;
 uint64_t delay_ns = 0;
@@ -122,21 +122,12 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 return 0;
 }
 
-len = bdrv_getlength(bs);
+len = bdrv_getlength(s->target_bs);
 if (len < 0) {
 return len;
 }
 job_progress_set_remaining(>common.job, len);
 
-/* Turn on copy-on-read for the whole block device so that guest read
- * requests help us make progress.  Only do this when copying the entire
- * backing chain since the copy-on-read operation does not take base into
- * account.
- */
-if (enable_cor) {
-bdrv_enable_copy_on_read(bs);
-}
-
 for ( ; offset < len; offset += n) {
 bool copy;
 int ret;
@@ -195,10 +186,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 }
 }
 
-

[PATCH v10 6/9] copy-on-read: skip non-guest reads if no copy needed

2020-09-29 Thread Andrey Shinkevich via
If the flag BDRV_REQ_PREFETCH was set, pass it further to the
COR-driver to skip unneeded reading. It can be taken into account for
the COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 14 ++
 block/io.c   |  2 +-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index f53f7e0..5389dca 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -145,10 +145,16 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
 }
 }
 
-ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
-  local_flags);
-if (ret < 0) {
-return ret;
+if ((flags & BDRV_REQ_PREFETCH) &
+!(local_flags & BDRV_REQ_COPY_ON_READ)) {
+/* Skip non-guest reads if no copy needed */
+} else {
+
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
 }
 
 offset += n;
diff --git a/block/io.c b/block/io.c
index 11df188..62b75a5 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1388,7 +1388,7 @@ static int coroutine_fn 
bdrv_co_do_copy_on_readv(BdrvChild *child,
 qemu_iovec_init_buf(_qiov, bounce_buffer, pnum);
 
 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
- _qiov, 0, 0);
+ _qiov, 0, flags & 
BDRV_REQ_PREFETCH);
 if (ret < 0) {
 goto err;
 }
-- 
1.8.3.1




[PATCH v10 4/9] copy-on-read: pass base node name to COR driver

2020-09-29 Thread Andrey Shinkevich via
To limit the guest's COR operations by the base node in the backing
chain during stream job, pass the base node name to the copy-on-read
driver. The rest of the functionality will be implemented in the patch
that follows.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 3c8231f..e04092f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,19 +24,23 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qdict.h"
 #include "block/copy-on-read.h"
 
 
 typedef struct BDRVStateCOR {
 bool active;
+BlockDriverState *base_bs;
 } BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BlockDriverState *base_bs = NULL;
 BDRVStateCOR *state = bs->opaque;
+const char *base_node = qdict_get_try_str(options, "base");
 
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -52,7 +56,16 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+if (base_node) {
+qdict_del(options, "base");
+base_bs = bdrv_lookup_bs(NULL, base_node, errp);
+if (!base_bs) {
+error_setg(errp, QERR_BASE_NOT_FOUND, base_node);
+return -EINVAL;
+}
+}
 state->active = true;
+state->base_bs = base_bs;
 
 /*
  * We don't need to call bdrv_child_refresh_perms() now as the permissions
-- 
1.8.3.1




[PATCH v10 8/9] block: remove unused backing-file name parameter

2020-09-29 Thread Andrey Shinkevich via
The block stream QMP parameter backing-file is in use no more. It
designates a backing file name to set in QCOW2 image header after the
block stream job finished. The base file name is used instead.

Signed-off-by: Andrey Shinkevich 
---
 block/monitor/block-hmp-cmds.c |  2 +-
 block/stream.c |  6 +-
 blockdev.c | 17 +
 include/block/block_int.h  |  2 +-
 qapi/block-core.json   | 17 +
 5 files changed, 5 insertions(+), 39 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 4e66775..5f19499 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -506,7 +506,7 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 int64_t speed = qdict_get_try_int(qdict, "speed", 0);
 
 qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
- false, NULL, qdict_haskey(qdict, "speed"), speed, true,
+ qdict_haskey(qdict, "speed"), speed, true,
  BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, false, 
false,
  false, );
 
diff --git a/block/stream.c b/block/stream.c
index b0719e9..fe2663f 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -34,7 +34,6 @@ typedef struct StreamBlockJob {
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
 BlockdevOnError on_error;
-char *backing_file_str;
 bool bs_read_only;
 bool chain_frozen;
 } StreamBlockJob;
@@ -103,8 +102,6 @@ static void stream_clean(Job *job)
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
 bdrv_reopen_set_read_only(bs, true, NULL);
 }
-
-g_free(s->backing_file_str);
 }
 
 static int coroutine_fn stream_run(Job *job, Error **errp)
@@ -220,7 +217,7 @@ static const BlockJobDriver stream_job_driver = {
 };
 
 void stream_start(const char *job_id, BlockDriverState *bs,
-  BlockDriverState *base, const char *backing_file_str,
+  BlockDriverState *base,
   int creation_flags, int64_t speed,
   BlockdevOnError on_error,
   const char *filter_node_name,
@@ -295,7 +292,6 @@ void stream_start(const char *job_id, BlockDriverState *bs,
 
 s->base_overlay = base_overlay;
 s->above_base = above_base;
-s->backing_file_str = g_strdup(backing_file_str);
 s->bs_read_only = bs_read_only;
 s->chain_frozen = true;
 
diff --git a/blockdev.c b/blockdev.c
index d719c47..b223601 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2486,7 +2486,6 @@ out:
 void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
   bool has_base, const char *base,
   bool has_base_node, const char *base_node,
-  bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
   bool has_filter_node_name, const char *filter_node_name,
@@ -2498,7 +2497,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 BlockDriverState *base_bs = NULL;
 AioContext *aio_context;
 Error *local_err = NULL;
-const char *base_name = NULL;
 int job_flags = JOB_DEFAULT;
 
 if (!has_on_error) {
@@ -2526,7 +2524,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
-base_name = base;
 }
 
 if (has_base_node) {
@@ -2541,7 +2538,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
 bdrv_refresh_filename(base_bs);
-base_name = base_bs->filename;
 }
 
 /* Check for op blockers in the whole chain between bs and base */
@@ -2553,17 +2549,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 }
 
-/* if we are streaming the entire chain, the result will have no backing
- * file, and specifying one is therefore an error */
-if (base_bs == NULL && has_backing_file) {
-error_setg(errp, "backing file specified, but streaming the "
- "entire chain");
-goto out;
-}
-
-/* backing_file string overrides base bs filename */
-base_name = has_backing_file ? backing_file : base_name;
-
 if (has_auto_finalize && !auto_finalize) {
 job_flags |= JOB_MANUAL_FINALIZE;
 }
@@ -2571,7 +2556,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 job_flags |= JOB_MANUAL_DISMISS;
 }
 
-stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+stream_start(has_job_id ? 

[PATCH v10 3/9] qapi: add filter-node-name to block-stream

2020-09-29 Thread Andrey Shinkevich via
Provide the possibility to pass the 'filter-node-name' parameter to the
block-stream job as it is done for the commit block job.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/monitor/block-hmp-cmds.c | 4 ++--
 block/stream.c | 4 +++-
 blockdev.c | 4 +++-
 include/block/block_int.h  | 7 ++-
 qapi/block-core.json   | 6 ++
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 4d3db5e..4e66775 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -507,8 +507,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 
 qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
  false, NULL, qdict_haskey(qdict, "speed"), speed, true,
- BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
- );
+ BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, false, 
false,
+ false, );
 
 hmp_handle_error(mon, error);
 }
diff --git a/block/stream.c b/block/stream.c
index 8ce6729..e0540ee 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -221,7 +221,9 @@ static const BlockJobDriver stream_job_driver = {
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp)
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp)
 {
 StreamBlockJob *s;
 BlockDriverState *iter;
diff --git a/blockdev.c b/blockdev.c
index bebd3ba..d719c47 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2489,6 +2489,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
   bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
+  bool has_filter_node_name, const char *filter_node_name,
   bool has_auto_finalize, bool auto_finalize,
   bool has_auto_dismiss, bool auto_dismiss,
   Error **errp)
@@ -2571,7 +2572,8 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 
 stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
- job_flags, has_speed ? speed : 0, on_error, _err);
+ job_flags, has_speed ? speed : 0, on_error,
+ filter_node_name, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto out;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 38cad9d..f782737 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1134,6 +1134,9 @@ int is_windows_drive(const char *filename);
  *  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means
+ * that a node name should be autogenerated.
  * @errp: Error object.
  *
  * Start a streaming operation on @bs.  Clusters that are unallocated
@@ -1146,7 +1149,9 @@ int is_windows_drive(const char *filename);
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp);
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp);
 
 /**
  * commit_start:
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 3c16f1e..32fb097 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2533,6 +2533,11 @@
 #'stop' and 'enospc' can only be used if the block device
 #supports io-status (see BlockInfo).  Since 1.3.
 #
+# @filter-node-name: the node name that should be assigned to the
+#filter driver that the stream job inserts into the graph
+#above @device. If this option is not given, a node name is
+#autogenerated. (Since: 5.2)
+#
 # @auto-finalize: When false, this job will wait in a PENDING state after it 
has
 # finished its work, waiting for @block-job-finalize before
 # making any block graph changes.
@@ -2563,6 +2568,7 @@
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
 '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
 '*on-error': 'BlockdevOnError',
+ 

[PATCH v10 2/9] copy-on-read: add filter append/drop functions

2020-09-29 Thread Andrey Shinkevich via
Provide API for the COR-filter insertion/removal.
Also, drop the filter child permissions for an inactive state when the
filter node is being removed.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 84 
 block/copy-on-read.h | 35 ++
 2 files changed, 119 insertions(+)
 create mode 100644 block/copy-on-read.h

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..3c8231f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,21 @@
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+bool active;
+} BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BDRVStateCOR *state = bs->opaque;
+
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
false, errp);
@@ -42,6 +52,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+state->active = true;
+
+/*
+ * We don't need to call bdrv_child_refresh_perms() now as the permissions
+ * will be updated later when the filter node gets its parent.
+ */
+
 return 0;
 }
 
@@ -57,6 +74,17 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild 
*c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
 {
+BDRVStateCOR *s = bs->opaque;
+
+if (!s->active) {
+/*
+ * While the filter is being removed
+ */
+*nperm = 0;
+*nshared = BLK_PERM_ALL;
+return;
+}
+
 *nperm = perm & PERM_PASSTHROUGH;
 *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
 
@@ -135,6 +163,7 @@ static void cor_lock_medium(BlockDriverState *bs, bool 
locked)
 
 static BlockDriver bdrv_copy_on_read = {
 .format_name= "copy-on-read",
+.instance_size  = sizeof(BDRVStateCOR),
 
 .bdrv_open  = cor_open,
 .bdrv_child_perm= cor_child_perm,
@@ -159,4 +188,59 @@ static void bdrv_copy_on_read_init(void)
 bdrv_register(_copy_on_read);
 }
 
+
+BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
+ QDict *node_options,
+ int flags, Error **errp)
+{
+BlockDriverState *cor_filter_bs;
+Error *local_err = NULL;
+
+cor_filter_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
+if (cor_filter_bs == NULL) {
+error_prepend(errp, "Could not create COR-filter node: ");
+return NULL;
+}
+
+bdrv_drained_begin(bs);
+bdrv_replace_node(bs, cor_filter_bs, _err);
+bdrv_drained_end(bs);
+
+if (local_err) {
+bdrv_unref(cor_filter_bs);
+error_propagate(errp, local_err);
+return NULL;
+}
+
+return cor_filter_bs;
+}
+
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
+{
+BdrvChild *child;
+BlockDriverState *bs;
+BDRVStateCOR *s = cor_filter_bs->opaque;
+
+child = bdrv_filter_child(cor_filter_bs);
+if (!child) {
+return;
+}
+bs = child->bs;
+
+/* Retain the BDS until we complete the graph change. */
+bdrv_ref(bs);
+/* Hold a guest back from writing while permissions are being reset. */
+bdrv_drained_begin(bs);
+/* Drop permissions before the graph change. */
+s->active = false;
+bdrv_child_refresh_perms(cor_filter_bs, child, _abort);
+bdrv_replace_node(cor_filter_bs, bs, _abort);
+
+bdrv_drained_end(bs);
+bdrv_unref(bs);
+bdrv_unref(cor_filter_bs);
+}
+
+
 block_init(bdrv_copy_on_read_init);
diff --git a/block/copy-on-read.h b/block/copy-on-read.h
new file mode 100644
index 000..d6f2422
--- /dev/null
+++ b/block/copy-on-read.h
@@ -0,0 +1,35 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * The filter driver performs Copy-On-Read (COR) operations
+ *
+ * Copyright (c) 2018-2020 Virtuozzo International GmbH.
+ *
+ * Author:
+ *   Andrey Shinkevich 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a 

[PATCH v10 1/9] copy-on-read: Support preadv/pwritev_part functions

2020-09-29 Thread Andrey Shinkevich via
Add support for the recently introduced functions
bdrv_co_preadv_part()
and
bdrv_co_pwritev_part()
to the COR-filter driver.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 2816e61..cb03e0f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -74,21 +74,25 @@ static int64_t cor_getlength(BlockDriverState *bs)
 }
 
 
-static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
-  uint64_t offset, uint64_t bytes,
-  QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
+   uint64_t offset, uint64_t bytes,
+   QEMUIOVector *qiov,
+   size_t qiov_offset,
+   int flags)
 {
-return bdrv_co_preadv(bs->file, offset, bytes, qiov,
-  flags | BDRV_REQ_COPY_ON_READ);
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
 }
 
 
-static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
-   uint64_t offset, uint64_t bytes,
-   QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
+uint64_t offset,
+uint64_t bytes,
+QEMUIOVector *qiov,
+size_t qiov_offset, int flags)
 {
-
-return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
+flags);
 }
 
 
@@ -137,8 +141,8 @@ static BlockDriver bdrv_copy_on_read = {
 
 .bdrv_getlength = cor_getlength,
 
-.bdrv_co_preadv = cor_co_preadv,
-.bdrv_co_pwritev= cor_co_pwritev,
+.bdrv_co_preadv_part= cor_co_preadv_part,
+.bdrv_co_pwritev_part   = cor_co_pwritev_part,
 .bdrv_co_pwrite_zeroes  = cor_co_pwrite_zeroes,
 .bdrv_co_pdiscard   = cor_co_pdiscard,
 .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed,
-- 
1.8.3.1




[PATCH v10 0/9] Apply COR-filter to the block-stream permanently

2020-09-29 Thread Andrey Shinkevich via
Despite the patch "freeze link to base node..." has been removed from the
series in the current version 9, the iotest case test_stream_parallel does
not pass after the COR-filter is inserted into the backing chain. As the
test case may not be initialized, it does not make a sense and was removed
again.
The check with bdrv_is_allocated_above() takes place in the COR-filter and
in the block-stream job both. An optimization of the block-stream job based
on the filter functionality may be made in a separate series.

v10:
  02: The missed new file block/copy-on-read.h added
v9:
  02: Refactored.
  04: Base node name is used instead of the file name.
  05: New implementation based on Max' review.
  06: New.
  07: New. The patch "freeze link to base node..." was deleted.
  08: New.
  09: The filter node options are initialized.

The v8 Message-Id:
<1598633579-221780-1-git-send-email-andrey.shinkev...@virtuozzo.com>

Andrey Shinkevich (9):
  copy-on-read: Support preadv/pwritev_part functions
  copy-on-read: add filter append/drop functions
  qapi: add filter-node-name to block-stream
  copy-on-read: pass base node name to COR driver
  copy-on-read: limit guest COR activity to base in COR driver
  copy-on-read: skip non-guest reads if no copy needed
  stream: skip filters when writing backing file name to QCOW2 header
  block: remove unused backing-file name parameter
  block: apply COR-filter to block-stream jobs

 block/copy-on-read.c   | 165 ++---
 block/copy-on-read.h   |  35 +
 block/io.c |   2 +-
 block/monitor/block-hmp-cmds.c |   6 +-
 block/stream.c | 112 +---
 blockdev.c |  21 +-
 include/block/block_int.h  |   9 ++-
 qapi/block-core.json   |  23 ++
 tests/qemu-iotests/030 |  51 ++---
 tests/qemu-iotests/030.out |   4 +-
 tests/qemu-iotests/141.out |   2 +-
 tests/qemu-iotests/245 |  19 +++--
 12 files changed, 302 insertions(+), 147 deletions(-)
 create mode 100644 block/copy-on-read.h

-- 
1.8.3.1




[PATCH v10 5/9] copy-on-read: limit guest COR activity to base in COR driver

2020-09-29 Thread Andrey Shinkevich via
Limit the guest's COR operations by the base node in the backing chain
when the base node name is given. It will be useful for a block stream
job when the COR-filter is applied.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 38 --
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index e04092f..f53f7e0 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -121,8 +121,42 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
size_t qiov_offset,
int flags)
 {
-return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
-   flags | BDRV_REQ_COPY_ON_READ);
+int64_t n = 0;
+int64_t size = offset + bytes;
+int local_flags;
+int ret;
+BDRVStateCOR *state = bs->opaque;
+
+if (!state->base_bs) {
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
+}
+
+while (offset < size) {
+local_flags = flags;
+
+/* In case of failure, try to copy-on-read anyway */
+ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+if (!ret) {
+ret = bdrv_is_allocated_above(bdrv_cow_bs(bs->file->bs),
+  state->base_bs, false, offset, n, 
);
+if (ret > 0) {
+local_flags |= BDRV_REQ_COPY_ON_READ;
+}
+}
+
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
+
+offset += n;
+qiov_offset += n;
+bytes -= n;
+}
+
+return 0;
 }
 
 
-- 
1.8.3.1




[PATCH v9 9/9] block: apply COR-filter to block-stream jobs

2020-09-28 Thread Andrey Shinkevich via
This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 93 ++
 tests/qemu-iotests/030 | 51 +++--
 tests/qemu-iotests/030.out |  4 +-
 tests/qemu-iotests/141.out |  2 +-
 tests/qemu-iotests/245 | 19 +++---
 5 files changed, 83 insertions(+), 86 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index fe2663f..240b3dc 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -17,8 +17,10 @@
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "block/copy-on-read.h"
 
 enum {
 /*
@@ -33,6 +35,8 @@ typedef struct StreamBlockJob {
 BlockJob common;
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
+BlockDriverState *cor_filter_bs;
+BlockDriverState *target_bs;
 BlockdevOnError on_error;
 bool bs_read_only;
 bool chain_frozen;
@@ -52,23 +56,20 @@ static void stream_abort(Job *job)
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 
 if (s->chain_frozen) {
-BlockJob *bjob = >common;
-bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 }
 }
 
 static int stream_prepare(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
 BlockDriverState *base_metadata = bdrv_skip_filters(base);
 Error *local_err = NULL;
 int ret = 0;
 
-bdrv_unfreeze_backing_chain(bs, s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 s->chain_frozen = false;
 
 if (bdrv_cow_child(unfiltered_bs)) {
@@ -94,13 +95,14 @@ static void stream_clean(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
+
+bdrv_cor_filter_drop(s->cor_filter_bs);
 
 /* Reopen the image back in read-only mode if necessary */
 if (s->bs_read_only) {
 /* Give up write permissions before making it read-only */
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
-bdrv_reopen_set_read_only(bs, true, NULL);
+bdrv_reopen_set_read_only(s->target_bs, true, NULL);
 }
 }
 
@@ -108,9 +110,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockBackend *blk = s->common.blk;
-BlockDriverState *bs = blk_bs(blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-bool enable_cor = !bdrv_cow_child(s->base_overlay);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 int64_t len;
 int64_t offset = 0;
 uint64_t delay_ns = 0;
@@ -122,21 +122,12 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 return 0;
 }
 
-len = bdrv_getlength(bs);
+len = bdrv_getlength(s->target_bs);
 if (len < 0) {
 return len;
 }
 job_progress_set_remaining(>common.job, len);
 
-/* Turn on copy-on-read for the whole block device so that guest read
- * requests help us make progress.  Only do this when copying the entire
- * backing chain since the copy-on-read operation does not take base into
- * account.
- */
-if (enable_cor) {
-bdrv_enable_copy_on_read(bs);
-}
-
 for ( ; offset < len; offset += n) {
 bool copy;
 int ret;
@@ -195,10 +186,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 }
 }
 
-

[PATCH v9 8/9] block: remove unused backing-file name parameter

2020-09-28 Thread Andrey Shinkevich via
The block stream QMP parameter backing-file is in use no more. It
designates a backing file name to set in QCOW2 image header after the
block stream job finished. The base file name is used instead.

Signed-off-by: Andrey Shinkevich 
---
 block/monitor/block-hmp-cmds.c |  2 +-
 block/stream.c |  6 +-
 blockdev.c | 17 +
 include/block/block_int.h  |  2 +-
 qapi/block-core.json   | 17 +
 5 files changed, 5 insertions(+), 39 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 4e66775..5f19499 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -506,7 +506,7 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 int64_t speed = qdict_get_try_int(qdict, "speed", 0);
 
 qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
- false, NULL, qdict_haskey(qdict, "speed"), speed, true,
+ qdict_haskey(qdict, "speed"), speed, true,
  BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, false, 
false,
  false, );
 
diff --git a/block/stream.c b/block/stream.c
index b0719e9..fe2663f 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -34,7 +34,6 @@ typedef struct StreamBlockJob {
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
 BlockdevOnError on_error;
-char *backing_file_str;
 bool bs_read_only;
 bool chain_frozen;
 } StreamBlockJob;
@@ -103,8 +102,6 @@ static void stream_clean(Job *job)
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
 bdrv_reopen_set_read_only(bs, true, NULL);
 }
-
-g_free(s->backing_file_str);
 }
 
 static int coroutine_fn stream_run(Job *job, Error **errp)
@@ -220,7 +217,7 @@ static const BlockJobDriver stream_job_driver = {
 };
 
 void stream_start(const char *job_id, BlockDriverState *bs,
-  BlockDriverState *base, const char *backing_file_str,
+  BlockDriverState *base,
   int creation_flags, int64_t speed,
   BlockdevOnError on_error,
   const char *filter_node_name,
@@ -295,7 +292,6 @@ void stream_start(const char *job_id, BlockDriverState *bs,
 
 s->base_overlay = base_overlay;
 s->above_base = above_base;
-s->backing_file_str = g_strdup(backing_file_str);
 s->bs_read_only = bs_read_only;
 s->chain_frozen = true;
 
diff --git a/blockdev.c b/blockdev.c
index d719c47..b223601 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2486,7 +2486,6 @@ out:
 void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
   bool has_base, const char *base,
   bool has_base_node, const char *base_node,
-  bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
   bool has_filter_node_name, const char *filter_node_name,
@@ -2498,7 +2497,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 BlockDriverState *base_bs = NULL;
 AioContext *aio_context;
 Error *local_err = NULL;
-const char *base_name = NULL;
 int job_flags = JOB_DEFAULT;
 
 if (!has_on_error) {
@@ -2526,7 +2524,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
-base_name = base;
 }
 
 if (has_base_node) {
@@ -2541,7 +2538,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
 bdrv_refresh_filename(base_bs);
-base_name = base_bs->filename;
 }
 
 /* Check for op blockers in the whole chain between bs and base */
@@ -2553,17 +2549,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 }
 
-/* if we are streaming the entire chain, the result will have no backing
- * file, and specifying one is therefore an error */
-if (base_bs == NULL && has_backing_file) {
-error_setg(errp, "backing file specified, but streaming the "
- "entire chain");
-goto out;
-}
-
-/* backing_file string overrides base bs filename */
-base_name = has_backing_file ? backing_file : base_name;
-
 if (has_auto_finalize && !auto_finalize) {
 job_flags |= JOB_MANUAL_FINALIZE;
 }
@@ -2571,7 +2556,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 job_flags |= JOB_MANUAL_DISMISS;
 }
 
-stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+stream_start(has_job_id ? 

[PATCH v9 2/9] copy-on-read: add filter append/drop functions

2020-09-28 Thread Andrey Shinkevich via
Provide API for the COR-filter insertion/removal.
Also, drop the filter child permissions for an inactive state when the
filter node is being removed.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 84 
 1 file changed, 84 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..3c8231f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,21 @@
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+bool active;
+} BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BDRVStateCOR *state = bs->opaque;
+
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
false, errp);
@@ -42,6 +52,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+state->active = true;
+
+/*
+ * We don't need to call bdrv_child_refresh_perms() now as the permissions
+ * will be updated later when the filter node gets its parent.
+ */
+
 return 0;
 }
 
@@ -57,6 +74,17 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild 
*c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
 {
+BDRVStateCOR *s = bs->opaque;
+
+if (!s->active) {
+/*
+ * While the filter is being removed
+ */
+*nperm = 0;
+*nshared = BLK_PERM_ALL;
+return;
+}
+
 *nperm = perm & PERM_PASSTHROUGH;
 *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
 
@@ -135,6 +163,7 @@ static void cor_lock_medium(BlockDriverState *bs, bool 
locked)
 
 static BlockDriver bdrv_copy_on_read = {
 .format_name= "copy-on-read",
+.instance_size  = sizeof(BDRVStateCOR),
 
 .bdrv_open  = cor_open,
 .bdrv_child_perm= cor_child_perm,
@@ -159,4 +188,59 @@ static void bdrv_copy_on_read_init(void)
 bdrv_register(_copy_on_read);
 }
 
+
+BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
+ QDict *node_options,
+ int flags, Error **errp)
+{
+BlockDriverState *cor_filter_bs;
+Error *local_err = NULL;
+
+cor_filter_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
+if (cor_filter_bs == NULL) {
+error_prepend(errp, "Could not create COR-filter node: ");
+return NULL;
+}
+
+bdrv_drained_begin(bs);
+bdrv_replace_node(bs, cor_filter_bs, _err);
+bdrv_drained_end(bs);
+
+if (local_err) {
+bdrv_unref(cor_filter_bs);
+error_propagate(errp, local_err);
+return NULL;
+}
+
+return cor_filter_bs;
+}
+
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
+{
+BdrvChild *child;
+BlockDriverState *bs;
+BDRVStateCOR *s = cor_filter_bs->opaque;
+
+child = bdrv_filter_child(cor_filter_bs);
+if (!child) {
+return;
+}
+bs = child->bs;
+
+/* Retain the BDS until we complete the graph change. */
+bdrv_ref(bs);
+/* Hold a guest back from writing while permissions are being reset. */
+bdrv_drained_begin(bs);
+/* Drop permissions before the graph change. */
+s->active = false;
+bdrv_child_refresh_perms(cor_filter_bs, child, _abort);
+bdrv_replace_node(cor_filter_bs, bs, _abort);
+
+bdrv_drained_end(bs);
+bdrv_unref(bs);
+bdrv_unref(cor_filter_bs);
+}
+
+
 block_init(bdrv_copy_on_read_init);
-- 
1.8.3.1




[PATCH v9 1/9] copy-on-read: Support preadv/pwritev_part functions

2020-09-28 Thread Andrey Shinkevich via
Add support for the recently introduced functions
bdrv_co_preadv_part()
and
bdrv_co_pwritev_part()
to the COR-filter driver.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 2816e61..cb03e0f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -74,21 +74,25 @@ static int64_t cor_getlength(BlockDriverState *bs)
 }
 
 
-static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
-  uint64_t offset, uint64_t bytes,
-  QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
+   uint64_t offset, uint64_t bytes,
+   QEMUIOVector *qiov,
+   size_t qiov_offset,
+   int flags)
 {
-return bdrv_co_preadv(bs->file, offset, bytes, qiov,
-  flags | BDRV_REQ_COPY_ON_READ);
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
 }
 
 
-static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
-   uint64_t offset, uint64_t bytes,
-   QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
+uint64_t offset,
+uint64_t bytes,
+QEMUIOVector *qiov,
+size_t qiov_offset, int flags)
 {
-
-return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
+flags);
 }
 
 
@@ -137,8 +141,8 @@ static BlockDriver bdrv_copy_on_read = {
 
 .bdrv_getlength = cor_getlength,
 
-.bdrv_co_preadv = cor_co_preadv,
-.bdrv_co_pwritev= cor_co_pwritev,
+.bdrv_co_preadv_part= cor_co_preadv_part,
+.bdrv_co_pwritev_part   = cor_co_pwritev_part,
 .bdrv_co_pwrite_zeroes  = cor_co_pwrite_zeroes,
 .bdrv_co_pdiscard   = cor_co_pdiscard,
 .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed,
-- 
1.8.3.1




[PATCH v9 0/9] Apply COR-filter to the block-stream permanently

2020-09-28 Thread Andrey Shinkevich via
Despite the patch "freeze link to base node..." has been removed from the
series in the current version 9, the iotest case test_stream_parallel does
not pass after the COR-filter is inserted into the backing chain. As the
test case may not be initialized, it does not make a sense and was removed
again.
The check with bdrv_is_allocated_above() takes place in the COR-filter and
in the block-stream job both. An optimization of the block-stream job based
on the filter functionality may be made in a separate series.

v9:
  02: Refactored.
  04: Base node name is used instead of the file name.
  05: New implementation based on Max' review.
  06: New.
  07: New. The patch "freeze link to base node..." was deleted.
  08: New.
  09: The filter node options are initialized.

The v8 Message-Id:
<1598633579-221780-1-git-send-email-andrey.shinkev...@virtuozzo.com>

Andrey Shinkevich (9):
  copy-on-read: Support preadv/pwritev_part functions
  copy-on-read: add filter append/drop functions
  qapi: add filter-node-name to block-stream
  copy-on-read: pass base node name to COR driver
  copy-on-read: limit guest COR activity to base in COR driver
  copy-on-read: skip non-guest reads if no copy needed
  stream: skip filters when writing backing file name to QCOW2 header
  block: remove unused parameter backing-file name
  block: apply COR-filter to block-stream jobs

 block/copy-on-read.c   | 165 ++---
 block/io.c |   2 +-
 block/monitor/block-hmp-cmds.c |   6 +-
 block/stream.c | 112 +---
 blockdev.c |  21 +-
 include/block/block_int.h  |   9 ++-
 qapi/block-core.json   |  23 ++
 tests/qemu-iotests/030 |  51 ++---
 tests/qemu-iotests/030.out |   4 +-
 tests/qemu-iotests/141.out |   2 +-
 tests/qemu-iotests/245 |  19 +++--
 11 files changed, 267 insertions(+), 147 deletions(-)

-- 
1.8.3.1




[PATCH v9 7/9] stream: skip filters when writing backing file name to QCOW2 header

2020-09-28 Thread Andrey Shinkevich via
Avoid writing a filter JSON-name to QCOW2 image when the backing file
is changed after the block stream job.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index e0540ee..b0719e9 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -65,6 +65,7 @@ static int stream_prepare(Job *job)
 BlockDriverState *bs = blk_bs(bjob->blk);
 BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+BlockDriverState *base_metadata = bdrv_skip_filters(base);
 Error *local_err = NULL;
 int ret = 0;
 
@@ -73,10 +74,10 @@ static int stream_prepare(Job *job)
 
 if (bdrv_cow_child(unfiltered_bs)) {
 const char *base_id = NULL, *base_fmt = NULL;
-if (base) {
-base_id = s->backing_file_str;
-if (base->drv) {
-base_fmt = base->drv->format_name;
+if (base_metadata) {
+base_id = base_metadata->filename;
+if (base_metadata->drv) {
+base_fmt = base_metadata->drv->format_name;
 }
 }
 bdrv_set_backing_hd(unfiltered_bs, base, _err);
-- 
1.8.3.1




[PATCH v9 4/9] copy-on-read: pass base node name to COR driver

2020-09-28 Thread Andrey Shinkevich via
To limit the guest's COR operations by the base node in the backing
chain during stream job, pass the base node name to the copy-on-read
driver. The rest of the functionality will be implemented in the patch
that follows.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 3c8231f..e04092f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,19 +24,23 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qdict.h"
 #include "block/copy-on-read.h"
 
 
 typedef struct BDRVStateCOR {
 bool active;
+BlockDriverState *base_bs;
 } BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BlockDriverState *base_bs = NULL;
 BDRVStateCOR *state = bs->opaque;
+const char *base_node = qdict_get_try_str(options, "base");
 
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -52,7 +56,16 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+if (base_node) {
+qdict_del(options, "base");
+base_bs = bdrv_lookup_bs(NULL, base_node, errp);
+if (!base_bs) {
+error_setg(errp, QERR_BASE_NOT_FOUND, base_node);
+return -EINVAL;
+}
+}
 state->active = true;
+state->base_bs = base_bs;
 
 /*
  * We don't need to call bdrv_child_refresh_perms() now as the permissions
-- 
1.8.3.1




[PATCH v9 5/9] copy-on-read: limit guest COR activity to base in COR driver

2020-09-28 Thread Andrey Shinkevich via
Limit the guest's COR operations by the base node in the backing chain
when the base node name is given. It will be useful for a block stream
job when the COR-filter is applied.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 38 --
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index e04092f..f53f7e0 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -121,8 +121,42 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
size_t qiov_offset,
int flags)
 {
-return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
-   flags | BDRV_REQ_COPY_ON_READ);
+int64_t n = 0;
+int64_t size = offset + bytes;
+int local_flags;
+int ret;
+BDRVStateCOR *state = bs->opaque;
+
+if (!state->base_bs) {
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
+}
+
+while (offset < size) {
+local_flags = flags;
+
+/* In case of failure, try to copy-on-read anyway */
+ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+if (!ret) {
+ret = bdrv_is_allocated_above(bdrv_cow_bs(bs->file->bs),
+  state->base_bs, false, offset, n, 
);
+if (ret > 0) {
+local_flags |= BDRV_REQ_COPY_ON_READ;
+}
+}
+
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
+
+offset += n;
+qiov_offset += n;
+bytes -= n;
+}
+
+return 0;
 }
 
 
-- 
1.8.3.1




[PATCH v9 3/9] qapi: add filter-node-name to block-stream

2020-09-28 Thread Andrey Shinkevich via
Provide the possibility to pass the 'filter-node-name' parameter to the
block-stream job as it is done for the commit block job.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/monitor/block-hmp-cmds.c | 4 ++--
 block/stream.c | 4 +++-
 blockdev.c | 4 +++-
 include/block/block_int.h  | 7 ++-
 qapi/block-core.json   | 6 ++
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 4d3db5e..4e66775 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -507,8 +507,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 
 qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
  false, NULL, qdict_haskey(qdict, "speed"), speed, true,
- BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
- );
+ BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, false, 
false,
+ false, );
 
 hmp_handle_error(mon, error);
 }
diff --git a/block/stream.c b/block/stream.c
index 8ce6729..e0540ee 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -221,7 +221,9 @@ static const BlockJobDriver stream_job_driver = {
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp)
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp)
 {
 StreamBlockJob *s;
 BlockDriverState *iter;
diff --git a/blockdev.c b/blockdev.c
index bebd3ba..d719c47 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2489,6 +2489,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
   bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
+  bool has_filter_node_name, const char *filter_node_name,
   bool has_auto_finalize, bool auto_finalize,
   bool has_auto_dismiss, bool auto_dismiss,
   Error **errp)
@@ -2571,7 +2572,8 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 
 stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
- job_flags, has_speed ? speed : 0, on_error, _err);
+ job_flags, has_speed ? speed : 0, on_error,
+ filter_node_name, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto out;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 38cad9d..f782737 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1134,6 +1134,9 @@ int is_windows_drive(const char *filename);
  *  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means
+ * that a node name should be autogenerated.
  * @errp: Error object.
  *
  * Start a streaming operation on @bs.  Clusters that are unallocated
@@ -1146,7 +1149,9 @@ int is_windows_drive(const char *filename);
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp);
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp);
 
 /**
  * commit_start:
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 3c16f1e..32fb097 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2533,6 +2533,11 @@
 #'stop' and 'enospc' can only be used if the block device
 #supports io-status (see BlockInfo).  Since 1.3.
 #
+# @filter-node-name: the node name that should be assigned to the
+#filter driver that the stream job inserts into the graph
+#above @device. If this option is not given, a node name is
+#autogenerated. (Since: 5.2)
+#
 # @auto-finalize: When false, this job will wait in a PENDING state after it 
has
 # finished its work, waiting for @block-job-finalize before
 # making any block graph changes.
@@ -2563,6 +2568,7 @@
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
 '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
 '*on-error': 'BlockdevOnError',
+ 

[PATCH v9 6/9] copy-on-read: skip non-guest reads if no copy needed

2020-09-28 Thread Andrey Shinkevich via
If the flag BDRV_REQ_PREFETCH was set, pass it further to the
COR-driver to skip unneeded reading. It can be taken into account for
the COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 14 ++
 block/io.c   |  2 +-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index f53f7e0..5389dca 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -145,10 +145,16 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
 }
 }
 
-ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
-  local_flags);
-if (ret < 0) {
-return ret;
+if ((flags & BDRV_REQ_PREFETCH) &
+!(local_flags & BDRV_REQ_COPY_ON_READ)) {
+/* Skip non-guest reads if no copy needed */
+} else {
+
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
 }
 
 offset += n;
diff --git a/block/io.c b/block/io.c
index 11df188..62b75a5 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1388,7 +1388,7 @@ static int coroutine_fn 
bdrv_co_do_copy_on_readv(BdrvChild *child,
 qemu_iovec_init_buf(_qiov, bounce_buffer, pnum);
 
 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
- _qiov, 0, 0);
+ _qiov, 0, flags & 
BDRV_REQ_PREFETCH);
 if (ret < 0) {
 goto err;
 }
-- 
1.8.3.1




[PATCH v8 7/7] block: apply COR-filter to block-stream jobs

2020-08-28 Thread Andrey Shinkevich via
This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 78 +++---
 tests/qemu-iotests/030 | 50 +++--
 tests/qemu-iotests/030.out |  4 +--
 tests/qemu-iotests/245 | 19 ---
 4 files changed, 67 insertions(+), 84 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index fee4117..ab8ba39 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -19,6 +19,7 @@
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "block/copy-on-read.h"
 
 enum {
 /*
@@ -33,6 +34,8 @@ typedef struct StreamBlockJob {
 BlockJob common;
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *base;   /* The base node */
+BlockDriverState *cor_filter_bs;
+BlockDriverState *target_bs;
 BlockdevOnError on_error;
 char *backing_file_str;
 bool bs_read_only;
@@ -53,22 +56,19 @@ static void stream_abort(Job *job)
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 
 if (s->chain_frozen) {
-BlockJob *bjob = >common;
-bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->base);
 }
 }
 
 static int stream_prepare(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 BlockDriverState *base = s->base;
 Error *local_err = NULL;
 int ret = 0;
 
-bdrv_unfreeze_backing_chain(bs, s->base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->base);
 s->chain_frozen = false;
 
 if (bdrv_cow_child(unfiltered_bs)) {
@@ -94,13 +94,14 @@ static void stream_clean(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
+
+bdrv_cor_filter_drop(s->cor_filter_bs);
 
 /* Reopen the image back in read-only mode if necessary */
 if (s->bs_read_only) {
 /* Give up write permissions before making it read-only */
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
-bdrv_reopen_set_read_only(bs, true, NULL);
+bdrv_reopen_set_read_only(s->target_bs, true, NULL);
 }
 
 g_free(s->backing_file_str);
@@ -110,9 +111,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockBackend *blk = s->common.blk;
-BlockDriverState *bs = blk_bs(blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-bool enable_cor = !bdrv_cow_child(s->base_overlay);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 int64_t len;
 int64_t offset = 0;
 uint64_t delay_ns = 0;
@@ -124,21 +123,12 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 return 0;
 }
 
-len = bdrv_getlength(bs);
+len = bdrv_getlength(s->target_bs);
 if (len < 0) {
 return len;
 }
 job_progress_set_remaining(>common.job, len);
 
-/* Turn on copy-on-read for the whole block device so that guest read
- * requests help us make progress.  Only do this when copying the entire
- * backing chain since the copy-on-read operation does not take base into
- * account.
- */
-if (enable_cor) {
-bdrv_enable_copy_on_read(bs);
-}
-
 for ( ; offset < len; offset += n) {
 bool copy;
 int ret;
@@ -197,10 +187,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 }
 }
 
-if (enable_cor) {
-bdrv_disable_copy_on_read(bs);
-}
-
 /* Do not remove the backing file if an error was there but ignored. */
 return error;
 }
@@ -230,6 +216,7 @@ void stream_start(const char 

[PATCH v8 5/7] copy-on-read: limit guest writes to base in COR driver

2020-08-28 Thread Andrey Shinkevich via
Limit the guest's COR operations by the base node in the backing chain
during a stream job.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 49 +
 1 file changed, 49 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 1f858bb..ecbd1f8 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -57,6 +57,37 @@ static BlockDriverState *get_base_by_name(BlockDriverState 
*bs,
 return base_bs;
 }
 
+/*
+ * Returns 1 if the block is allocated in a node between 
cor_filter_bs->file->bs
+ * and the base_bs in the backing chain. Otherwise, returns 0.
+ * The COR operation is allowed if the base_bs is not specified - return 1.
+ * Returns negative errno on failure.
+ */
+static int node_above_base(BlockDriverState *cor_filter_bs, uint64_t offset,
+   uint64_t bytes)
+{
+int ret;
+int64_t dummy;
+BlockDriverState *file = NULL;
+BDRVStateCOR *state = cor_filter_bs->opaque;
+
+if (!state->base_bs) {
+return 1;
+}
+
+ret = bdrv_block_status_above(cor_filter_bs->file->bs, state->base_bs,
+  offset, bytes, , NULL, );
+if (ret < 0) {
+return ret;
+}
+
+if (file) {
+return 1;
+}
+
+return 0;
+}
+
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
@@ -153,6 +184,12 @@ static int coroutine_fn 
cor_co_pwritev_part(BlockDriverState *bs,
 QEMUIOVector *qiov,
 size_t qiov_offset, int flags)
 {
+int ret = node_above_base(bs, offset, bytes);
+
+if (!ret || ret < 0) {
+return ret;
+}
+
 return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
 flags);
 }
@@ -162,6 +199,12 @@ static int coroutine_fn 
cor_co_pwrite_zeroes(BlockDriverState *bs,
  int64_t offset, int bytes,
  BdrvRequestFlags flags)
 {
+int ret = node_above_base(bs, offset, bytes);
+
+if (!ret || ret < 0) {
+return ret;
+}
+
 return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
@@ -178,6 +221,12 @@ static int coroutine_fn 
cor_co_pwritev_compressed(BlockDriverState *bs,
   uint64_t bytes,
   QEMUIOVector *qiov)
 {
+int ret = node_above_base(bs, offset, bytes);
+
+if (!ret || ret < 0) {
+return ret;
+}
+
 return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
BDRV_REQ_WRITE_COMPRESSED);
 }
-- 
1.8.3.1




[PATCH v8 6/7] block-stream: freeze link to base node during stream job

2020-08-28 Thread Andrey Shinkevich via
To keep the base node unchanged during the block-stream operation,
freeze it as the other part of the backing chain with the intermediate
nodes related to the job.
This patch revers the change made with the commit c624b015bf as the
correct base file name and its format have to be written down to the
QCOW2 header on the disk when the backing file is being changed after
the stream job completes.
This reversion incurs changes in the tests 030, 245 and discards the
test 258 where concurrent stream/commit jobs are tested. When the link
to a base node is frozen, the concurrent job cannot change the common
backing chain.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c |  29 ++--
 tests/qemu-iotests/030 |  10 +--
 tests/qemu-iotests/245 |   2 +-
 tests/qemu-iotests/258 | 161 -
 tests/qemu-iotests/258.out |  33 --
 5 files changed, 14 insertions(+), 221 deletions(-)
 delete mode 100755 tests/qemu-iotests/258
 delete mode 100644 tests/qemu-iotests/258.out

diff --git a/block/stream.c b/block/stream.c
index 8bf6b6d..fee4117 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -32,7 +32,7 @@ enum {
 typedef struct StreamBlockJob {
 BlockJob common;
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
-BlockDriverState *above_base;   /* Node directly above the base */
+BlockDriverState *base;   /* The base node */
 BlockdevOnError on_error;
 char *backing_file_str;
 bool bs_read_only;
@@ -54,7 +54,7 @@ static void stream_abort(Job *job)
 
 if (s->chain_frozen) {
 BlockJob *bjob = >common;
-bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
+bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->base);
 }
 }
 
@@ -64,11 +64,11 @@ static int stream_prepare(Job *job)
 BlockJob *bjob = >common;
 BlockDriverState *bs = blk_bs(bjob->blk);
 BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+BlockDriverState *base = s->base;
 Error *local_err = NULL;
 int ret = 0;
 
-bdrv_unfreeze_backing_chain(bs, s->above_base);
+bdrv_unfreeze_backing_chain(bs, s->base);
 s->chain_frozen = false;
 
 if (bdrv_cow_child(unfiltered_bs)) {
@@ -230,7 +230,6 @@ void stream_start(const char *job_id, BlockDriverState *bs,
 bool bs_read_only;
 int basic_flags = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
 BlockDriverState *base_overlay = bdrv_find_overlay(bs, base);
-BlockDriverState *above_base;
 
 if (!base_overlay) {
 error_setg(errp, "'%s' is not in the backing chain of '%s'",
@@ -238,20 +237,7 @@ void stream_start(const char *job_id, BlockDriverState *bs,
 return;
 }
 
-/*
- * Find the node directly above @base.  @base_overlay is a COW overlay, so
- * it must have a bdrv_cow_child(), but it is the immediate overlay of
- * @base, so between the two there can only be filters.
- */
-above_base = base_overlay;
-if (bdrv_cow_bs(above_base) != base) {
-above_base = bdrv_cow_bs(above_base);
-while (bdrv_filter_bs(above_base) != base) {
-above_base = bdrv_filter_bs(above_base);
-}
-}
-
-if (bdrv_freeze_backing_chain(bs, above_base, errp) < 0) {
+if (bdrv_freeze_backing_chain(bs, base, errp) < 0) {
 return;
 }
 
@@ -284,7 +270,6 @@ void stream_start(const char *job_id, BlockDriverState *bs,
  * above_base node might change after the call to
  * bdrv_reopen_set_read_only() due to parallel block jobs running.
  */
-base = bdrv_filter_or_cow_bs(above_base);
 for (iter = bdrv_filter_or_cow_bs(bs); iter != base;
  iter = bdrv_filter_or_cow_bs(iter))
 {
@@ -293,7 +278,7 @@ void stream_start(const char *job_id, BlockDriverState *bs,
 }
 
 s->base_overlay = base_overlay;
-s->above_base = above_base;
+s->base = base;
 s->backing_file_str = g_strdup(backing_file_str);
 s->bs_read_only = bs_read_only;
 s->chain_frozen = true;
@@ -307,5 +292,5 @@ fail:
 if (bs_read_only) {
 bdrv_reopen_set_read_only(bs, true, NULL);
 }
-bdrv_unfreeze_backing_chain(bs, above_base);
+bdrv_unfreeze_backing_chain(bs, base);
 }
diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 1cdd7e2..c565e76 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -382,7 +382,7 @@ class TestParallelOps(iotests.QMPTestCase):
 # Stream from node2 into node4
 result = self.vm.qmp('block-stream', device='node4', 
base_node='node2', job_id='node4')
 self.assert_qmp(result, 'error/desc',
-"Cannot freeze 'backing' link to 'commit-filter'")
+"Cannot change 'backing' link from 'commit-filter' to 'node2'")
 
 result = self.vm.qmp('block-job-set-speed', device='drive0', speed=0)
 self.assert_qmp(result, 'return', {})
@@ 

[PATCH v8 3/7] qapi: add filter-node-name to block-stream

2020-08-28 Thread Andrey Shinkevich via
Provide the possibility to pass the 'filter-node-name' parameter to the
block-stream job as it is done for the commit block job.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/monitor/block-hmp-cmds.c | 4 ++--
 block/stream.c | 4 +++-
 blockdev.c | 4 +++-
 include/block/block_int.h  | 7 ++-
 qapi/block-core.json   | 6 ++
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 4d3db5e..4e66775 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -507,8 +507,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 
 qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
  false, NULL, qdict_haskey(qdict, "speed"), speed, true,
- BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
- );
+ BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, false, 
false,
+ false, );
 
 hmp_handle_error(mon, error);
 }
diff --git a/block/stream.c b/block/stream.c
index b9c1141..8bf6b6d 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -221,7 +221,9 @@ static const BlockJobDriver stream_job_driver = {
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp)
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp)
 {
 StreamBlockJob *s;
 BlockDriverState *iter;
diff --git a/blockdev.c b/blockdev.c
index 237fffb..cc531cb 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2476,6 +2476,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
   bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
+  bool has_filter_node_name, const char *filter_node_name,
   bool has_auto_finalize, bool auto_finalize,
   bool has_auto_dismiss, bool auto_dismiss,
   Error **errp)
@@ -2558,7 +2559,8 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 
 stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
- job_flags, has_speed ? speed : 0, on_error, _err);
+ job_flags, has_speed ? speed : 0, on_error,
+ filter_node_name, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto out;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 465a601..3efde33 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1122,6 +1122,9 @@ int is_windows_drive(const char *filename);
  *  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means
+ * that a node name should be autogenerated.
  * @errp: Error object.
  *
  * Start a streaming operation on @bs.  Clusters that are unallocated
@@ -1134,7 +1137,9 @@ int is_windows_drive(const char *filename);
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp);
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp);
 
 /**
  * commit_start:
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 0b8ccd3..e5ccf8a 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2524,6 +2524,11 @@
 #'stop' and 'enospc' can only be used if the block device
 #supports io-status (see BlockInfo).  Since 1.3.
 #
+# @filter-node-name: the node name that should be assigned to the
+#filter driver that the stream job inserts into the graph
+#above @device. If this option is not given, a node name is
+#autogenerated. (Since: 5.2)
+#
 # @auto-finalize: When false, this job will wait in a PENDING state after it 
has
 # finished its work, waiting for @block-job-finalize before
 # making any block graph changes.
@@ -2554,6 +2559,7 @@
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
 '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
 '*on-error': 'BlockdevOnError',
+ 

[PATCH v8 4/7] copy-on-read: pass base file name to COR driver

2020-08-28 Thread Andrey Shinkevich via
To limit the guest's COR operations by the base node in the backing
chain during stream job, pass the base file name to the copy-on-read
driver. The rest of the functionality will be implemented in the patch
that follows.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 41 -
 block/copy-on-read.h |  1 +
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 0ede7aa..1f858bb 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,19 +24,45 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qdict.h"
 #include "block/copy-on-read.h"
 
 
 typedef struct BDRVStateCOR {
 bool active;
+BlockDriverState *base_bs;
 } BDRVStateCOR;
 
+/*
+ * Non-zero pointers are the caller's responsibility.
+ */
+static BlockDriverState *get_base_by_name(BlockDriverState *bs,
+  const char *base_name, Error **errp)
+{
+BlockDriverState *base_bs = NULL;
+AioContext *aio_context;
+
+base_bs = bdrv_find_backing_image(bs, base_name);
+if (base_bs == NULL) {
+error_setg(errp, QERR_BASE_NOT_FOUND, base_name);
+return NULL;
+}
+
+aio_context = bdrv_get_aio_context(bs);
+aio_context_acquire(aio_context);
+assert(bdrv_get_aio_context(base_bs) == aio_context);
+aio_context_release(aio_context);
+
+return base_bs;
+}
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BlockDriverState *base_bs = NULL;
 BDRVStateCOR *state = bs->opaque;
+const char *base_name = qdict_get_try_str(options, "base");
 
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -52,7 +78,15 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+if (base_name) {
+qdict_del(options, "base");
+base_bs = get_base_by_name(bs, base_name, errp);
+if (!base_bs) {
+return -EINVAL;
+}
+}
 state->active = true;
+state->base_bs = base_bs;
 
 /*
  * We don't need to call bdrv_child_refresh_perms() now as the permissions
@@ -190,6 +224,7 @@ static void bdrv_copy_on_read_init(void)
 
 
 static BlockDriverState *create_filter_node(BlockDriverState *bs,
+const char *base_name,
 const char *filter_node_name,
 Error **errp)
 {
@@ -197,6 +232,9 @@ static BlockDriverState 
*create_filter_node(BlockDriverState *bs,
 
 qdict_put_str(opts, "driver", "copy-on-read");
 qdict_put_str(opts, "file", bdrv_get_node_name(bs));
+if (base_name) {
+qdict_put_str(opts, "base", base_name);
+}
 if (filter_node_name) {
 qdict_put_str(opts, "node-name", filter_node_name);
 }
@@ -206,13 +244,14 @@ static BlockDriverState 
*create_filter_node(BlockDriverState *bs,
 
 
 BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
+ const char *base_name,
  const char *filter_node_name,
  Error **errp)
 {
 BlockDriverState *cor_filter_bs;
 Error *local_err = NULL;
 
-cor_filter_bs = create_filter_node(bs, filter_node_name, errp);
+cor_filter_bs = create_filter_node(bs, base_name, filter_node_name, errp);
 if (cor_filter_bs == NULL) {
 error_prepend(errp, "Could not create filter node: ");
 return NULL;
diff --git a/block/copy-on-read.h b/block/copy-on-read.h
index 1686e4e..6a7c8bb 100644
--- a/block/copy-on-read.h
+++ b/block/copy-on-read.h
@@ -28,6 +28,7 @@
 #include "block/block_int.h"
 
 BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
+ const char *base_name,
  const char *filter_node_name,
  Error **errp);
 void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs);
-- 
1.8.3.1




[PATCH v8 1/7] copy-on-read: Support preadv/pwritev_part functions

2020-08-28 Thread Andrey Shinkevich via
Add support for the recently introduced functions
bdrv_co_preadv_part()
and
bdrv_co_pwritev_part()
to the COR-filter driver.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 2816e61..cb03e0f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -74,21 +74,25 @@ static int64_t cor_getlength(BlockDriverState *bs)
 }
 
 
-static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
-  uint64_t offset, uint64_t bytes,
-  QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
+   uint64_t offset, uint64_t bytes,
+   QEMUIOVector *qiov,
+   size_t qiov_offset,
+   int flags)
 {
-return bdrv_co_preadv(bs->file, offset, bytes, qiov,
-  flags | BDRV_REQ_COPY_ON_READ);
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
 }
 
 
-static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
-   uint64_t offset, uint64_t bytes,
-   QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
+uint64_t offset,
+uint64_t bytes,
+QEMUIOVector *qiov,
+size_t qiov_offset, int flags)
 {
-
-return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
+flags);
 }
 
 
@@ -137,8 +141,8 @@ static BlockDriver bdrv_copy_on_read = {
 
 .bdrv_getlength = cor_getlength,
 
-.bdrv_co_preadv = cor_co_preadv,
-.bdrv_co_pwritev= cor_co_pwritev,
+.bdrv_co_preadv_part= cor_co_preadv_part,
+.bdrv_co_pwritev_part   = cor_co_pwritev_part,
 .bdrv_co_pwrite_zeroes  = cor_co_pwrite_zeroes,
 .bdrv_co_pdiscard   = cor_co_pdiscard,
 .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed,
-- 
1.8.3.1




[PATCH v8 2/7] copy-on-read: add filter append/drop functions

2020-08-28 Thread Andrey Shinkevich via
Provide API for the COR-filter insertion/removal.
Also, drop the filter child permissions for an inactive state when the
filter node is being removed.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 104 +++
 block/copy-on-read.h |  35 +
 2 files changed, 139 insertions(+)
 create mode 100644 block/copy-on-read.h

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..0ede7aa 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,21 @@
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+bool active;
+} BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BDRVStateCOR *state = bs->opaque;
+
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
false, errp);
@@ -42,6 +52,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+state->active = true;
+
+/*
+ * We don't need to call bdrv_child_refresh_perms() now as the permissions
+ * will be updated later when the filter node gets its parent.
+ */
+
 return 0;
 }
 
@@ -57,6 +74,17 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild 
*c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
 {
+BDRVStateCOR *s = bs->opaque;
+
+if (!s->active) {
+/*
+ * While the filter is being removed
+ */
+*nperm = 0;
+*nshared = BLK_PERM_ALL;
+return;
+}
+
 *nperm = perm & PERM_PASSTHROUGH;
 *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
 
@@ -135,6 +163,7 @@ static void cor_lock_medium(BlockDriverState *bs, bool 
locked)
 
 static BlockDriver bdrv_copy_on_read = {
 .format_name= "copy-on-read",
+.instance_size  = sizeof(BDRVStateCOR),
 
 .bdrv_open  = cor_open,
 .bdrv_child_perm= cor_child_perm,
@@ -159,4 +188,79 @@ static void bdrv_copy_on_read_init(void)
 bdrv_register(_copy_on_read);
 }
 
+
+static BlockDriverState *create_filter_node(BlockDriverState *bs,
+const char *filter_node_name,
+Error **errp)
+{
+QDict *opts = qdict_new();
+
+qdict_put_str(opts, "driver", "copy-on-read");
+qdict_put_str(opts, "file", bdrv_get_node_name(bs));
+if (filter_node_name) {
+qdict_put_str(opts, "node-name", filter_node_name);
+}
+
+return bdrv_open(NULL, NULL, opts, BDRV_O_RDWR, errp);
+}
+
+
+BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
+ const char *filter_node_name,
+ Error **errp)
+{
+BlockDriverState *cor_filter_bs;
+Error *local_err = NULL;
+
+cor_filter_bs = create_filter_node(bs, filter_node_name, errp);
+if (cor_filter_bs == NULL) {
+error_prepend(errp, "Could not create filter node: ");
+return NULL;
+}
+
+if (!filter_node_name) {
+cor_filter_bs->implicit = true;
+}
+
+bdrv_drained_begin(bs);
+bdrv_replace_node(bs, cor_filter_bs, _err);
+bdrv_drained_end(bs);
+
+if (local_err) {
+bdrv_unref(cor_filter_bs);
+error_propagate(errp, local_err);
+return NULL;
+}
+
+return cor_filter_bs;
+}
+
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
+{
+BdrvChild *child;
+BlockDriverState *bs;
+BDRVStateCOR *s = cor_filter_bs->opaque;
+
+child = bdrv_filter_child(cor_filter_bs);
+if (!child) {
+return;
+}
+bs = child->bs;
+
+/* Retain the BDS until we complete the graph change. */
+bdrv_ref(bs);
+/* Hold a guest back from writing while permissions are being reset. */
+bdrv_drained_begin(bs);
+/* Drop permissions before the graph change. */
+s->active = false;
+bdrv_child_refresh_perms(cor_filter_bs, child, _abort);
+bdrv_replace_node(cor_filter_bs, bs, _abort);
+
+bdrv_drained_end(bs);
+bdrv_unref(bs);
+bdrv_unref(cor_filter_bs);
+}
+
+
 block_init(bdrv_copy_on_read_init);
diff --git a/block/copy-on-read.h b/block/copy-on-read.h
new file mode 100644
index 000..1686e4e
--- /dev/null
+++ b/block/copy-on-read.h
@@ -0,0 +1,35 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * The filter driver performs Copy-On-Read (COR) operations
+ *
+ * Copyright (c) 

[PATCH v8 0/7] Apply COR-filter to the block-stream permanently

2020-08-28 Thread Andrey Shinkevich via
Note: this series is based on the another one "block: Deal with filters"
  by Max Reitz that could be found in the branches:
  https://git.xanclic.moe/XanClic/qemu child-access-functions-v6
  https://github.com/XanClic/qemu child-access-functions-v6

v8:
  03: qapi - version changed to 'Since: 5.2'.
  04: New.
  05: New.
  06: New.
  07: The extra bs variable and the enable_cor were dropped.

The v7 Message-Id:
<1598257914-887267-1-git-send-email-andrey.shinkev...@virtuozzo.com>

Andrey Shinkevich (7):
  copy-on-read: Support preadv/pwritev_part functions
  copy-on-read: add filter append/drop functions
  qapi: add filter-node-name to block-stream
  copy-on-read: pass base file name to COR driver
  copy-on-read: limit guest writes to base in COR driver
  block-stream: freeze link to base node during stream job
  block: apply COR-filter to block-stream jobs

 block/copy-on-read.c   | 218 ++---
 block/copy-on-read.h   |  36 +++
 block/monitor/block-hmp-cmds.c |   4 +-
 block/stream.c | 105 ++--
 blockdev.c |   4 +-
 include/block/block_int.h  |   7 +-
 qapi/block-core.json   |   6 ++
 tests/qemu-iotests/030 |  60 +++-
 tests/qemu-iotests/030.out |   4 +-
 tests/qemu-iotests/245 |  21 ++--
 tests/qemu-iotests/258 | 161 --
 tests/qemu-iotests/258.out |  33 ---
 12 files changed, 341 insertions(+), 318 deletions(-)
 create mode 100644 block/copy-on-read.h
 delete mode 100755 tests/qemu-iotests/258
 delete mode 100644 tests/qemu-iotests/258.out

-- 
1.8.3.1