[PATCH v13 10/10] block: apply COR-filter to block-stream jobs

2020-12-02 Thread Andrey Shinkevich via
This patch completes the series with the COR-filter applied to
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the test case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the test #030 as well.
The test case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. All the nodes involved into one job are being frozen, including
the filter node. Operations over the mentioned nodes, including the
filter one, are being blocked for other jobs. So, the filter node gets
involved into two concurrent jobs with the adjacent data node. That is
not allowed. It is what the test cases with overlapping jobs are about.
The concept of the parallel jobs with common nodes is considered vital
no more.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 97 ++
 tests/qemu-iotests/030 | 51 +++-
 tests/qemu-iotests/030.out |  4 +-
 tests/qemu-iotests/141.out |  2 +-
 tests/qemu-iotests/245 | 22 +++
 5 files changed, 86 insertions(+), 90 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 061268b..2f80fae 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -18,8 +18,10 @@
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/error-report.h"
+#include "qapi/qmp/qdict.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "block/copy-on-read.h"
 
 enum {
 /*
@@ -34,6 +36,8 @@ typedef struct StreamBlockJob {
 BlockJob common;
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
+BlockDriverState *cor_filter_bs;
+BlockDriverState *target_bs;
 BlockdevOnError on_error;
 char *backing_file_str;
 bool bs_read_only;
@@ -45,8 +49,7 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
 {
 assert(bytes < SIZE_MAX);
 
-return blk_co_preadv(blk, offset, bytes, NULL,
- BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
+return blk_co_preadv(blk, offset, bytes, NULL, BDRV_REQ_PREFETCH);
 }
 
 static void stream_abort(Job *job)
@@ -54,24 +57,21 @@ static void stream_abort(Job *job)
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 
 if (s->chain_frozen) {
-BlockJob *bjob = >common;
-bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 }
 }
 
 static int stream_prepare(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
 BlockDriverState *base_unfiltered;
 BlockDriverState *backing_bs;
 Error *local_err = NULL;
 int ret = 0;
 
-bdrv_unfreeze_backing_chain(bs, s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 s->chain_frozen = false;
 
 if (bdrv_cow_child(unfiltered_bs)) {
@@ -79,7 +79,7 @@ static int stream_prepare(Job *job)
 if (base) {
 base_id = s->backing_file_str;
 if (base_id) {
-backing_bs = bdrv_find_backing_image(bs, base_id);
+backing_bs = bdrv_find_backing_image(unfiltered_bs, base_id);
 if (backing_bs && backing_bs->drv) {
 base_fmt = backing_bs->drv->format_name;
 } else {
@@ -111,15 +111,16 @@ static void stream_clean(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
 
 /* Reopen the image back in read-only mode if necessary */
 if (s->bs_read_only) {
 /* Give up write permissions before making it read-only */
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
-bdrv_reopen_set_read_only(bs, true, NULL);
+bdrv_reopen_set_read_only(s->target_bs, true, NULL);
 }
 
+bdrv_cor_filter_drop(s->cor_filter_bs);
+
 g_free(s->backing_file_str);
 }
 
@@ -127,9 +128,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockBackend *blk = s->common.blk;
-BlockDriverState *bs = blk_bs(blk);
-BlockDriverState *unfilt

[PATCH v13 03/10] copy-on-read: add filter drop function

2020-12-02 Thread Andrey Shinkevich via
Provide API for the COR-filter removal. Also, drop the filter child
permissions for an inactive state when the filter node is being
removed.
To insert the filter, the block generic layer function
bdrv_insert_node() can be used.
The new function bdrv_cor_filter_drop() may be considered as an
intermediate solution before the QEMU permission update system has
overhauled. Then we are able to implement the API function
bdrv_remove_node() on the block generic layer.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 56 
 block/copy-on-read.h | 32 ++
 2 files changed, 88 insertions(+)
 create mode 100644 block/copy-on-read.h

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..618c4c4 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,20 @@
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi/error.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+bool active;
+} BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BDRVStateCOR *state = bs->opaque;
+
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
false, errp);
@@ -42,6 +51,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+state->active = true;
+
+/*
+ * We don't need to call bdrv_child_refresh_perms() now as the permissions
+ * will be updated later when the filter node gets its parent.
+ */
+
 return 0;
 }
 
@@ -57,6 +73,17 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild 
*c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
 {
+BDRVStateCOR *s = bs->opaque;
+
+if (!s->active) {
+/*
+ * While the filter is being removed
+ */
+*nperm = 0;
+*nshared = BLK_PERM_ALL;
+return;
+}
+
 *nperm = perm & PERM_PASSTHROUGH;
 *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
 
@@ -135,6 +162,7 @@ static void cor_lock_medium(BlockDriverState *bs, bool 
locked)
 
 static BlockDriver bdrv_copy_on_read = {
 .format_name= "copy-on-read",
+.instance_size  = sizeof(BDRVStateCOR),
 
 .bdrv_open  = cor_open,
 .bdrv_child_perm= cor_child_perm,
@@ -154,6 +182,34 @@ static BlockDriver bdrv_copy_on_read = {
 .is_filter  = true,
 };
 
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
+{
+BdrvChild *child;
+BlockDriverState *bs;
+BDRVStateCOR *s = cor_filter_bs->opaque;
+
+child = bdrv_filter_child(cor_filter_bs);
+if (!child) {
+return;
+}
+bs = child->bs;
+
+/* Retain the BDS until we complete the graph change. */
+bdrv_ref(bs);
+/* Hold a guest back from writing while permissions are being reset. */
+bdrv_drained_begin(bs);
+/* Drop permissions before the graph change. */
+s->active = false;
+bdrv_child_refresh_perms(cor_filter_bs, child, _abort);
+bdrv_replace_node(cor_filter_bs, bs, _abort);
+
+bdrv_drained_end(bs);
+bdrv_unref(bs);
+bdrv_unref(cor_filter_bs);
+}
+
+
 static void bdrv_copy_on_read_init(void)
 {
 bdrv_register(_copy_on_read);
diff --git a/block/copy-on-read.h b/block/copy-on-read.h
new file mode 100644
index 000..7bf405d
--- /dev/null
+++ b/block/copy-on-read.h
@@ -0,0 +1,32 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * The filter driver performs Copy-On-Read (COR) operations
+ *
+ * Copyright (c) 2018-2020 Virtuozzo International GmbH.
+ *
+ * Author:
+ *   Andrey Shinkevich 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef BLOCK_COPY_ON_READ
+#define BLOCK_COPY_ON_READ
+
+#include "block/block_int.h"
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs);
+
+#endif /* BLOCK_COPY_ON_READ */
-- 
1.8.3.1




[PATCH v13 01/10] copy-on-read: support preadv/pwritev_part functions

2020-12-02 Thread Andrey Shinkevich via
Add support for the recently introduced functions
bdrv_co_preadv_part()
and
bdrv_co_pwritev_part()
to the COR-filter driver.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 2816e61..cb03e0f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -74,21 +74,25 @@ static int64_t cor_getlength(BlockDriverState *bs)
 }
 
 
-static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
-  uint64_t offset, uint64_t bytes,
-  QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
+   uint64_t offset, uint64_t bytes,
+   QEMUIOVector *qiov,
+   size_t qiov_offset,
+   int flags)
 {
-return bdrv_co_preadv(bs->file, offset, bytes, qiov,
-  flags | BDRV_REQ_COPY_ON_READ);
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
 }
 
 
-static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
-   uint64_t offset, uint64_t bytes,
-   QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
+uint64_t offset,
+uint64_t bytes,
+QEMUIOVector *qiov,
+size_t qiov_offset, int flags)
 {
-
-return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
+flags);
 }
 
 
@@ -137,8 +141,8 @@ static BlockDriver bdrv_copy_on_read = {
 
 .bdrv_getlength = cor_getlength,
 
-.bdrv_co_preadv = cor_co_preadv,
-.bdrv_co_pwritev= cor_co_pwritev,
+.bdrv_co_preadv_part= cor_co_preadv_part,
+.bdrv_co_pwritev_part   = cor_co_pwritev_part,
 .bdrv_co_pwrite_zeroes  = cor_co_pwrite_zeroes,
 .bdrv_co_pdiscard   = cor_co_pdiscard,
 .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed,
-- 
1.8.3.1




[PATCH v13 02/10] block: add API function to insert a node

2020-12-02 Thread Andrey Shinkevich via
Provide API for insertion a node to backing chain.

Suggested-by: Max Reitz 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block.c   | 25 +
 include/block/block.h |  2 ++
 2 files changed, 27 insertions(+)

diff --git a/block.c b/block.c
index f1cedac..b71c39f 100644
--- a/block.c
+++ b/block.c
@@ -4698,6 +4698,31 @@ static void bdrv_delete(BlockDriverState *bs)
 g_free(bs);
 }
 
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+   int flags, Error **errp)
+{
+BlockDriverState *new_node_bs;
+Error *local_err = NULL;
+
+new_node_bs =  bdrv_open(NULL, NULL, node_options, flags, errp);
+if (new_node_bs == NULL) {
+error_prepend(errp, "Could not create node: ");
+return NULL;
+}
+
+bdrv_drained_begin(bs);
+bdrv_replace_node(bs, new_node_bs, _err);
+bdrv_drained_end(bs);
+
+if (local_err) {
+bdrv_unref(new_node_bs);
+error_propagate(errp, local_err);
+return NULL;
+}
+
+return new_node_bs;
+}
+
 /*
  * Run consistency checks on an image
  *
diff --git a/include/block/block.h b/include/block/block.h
index c9d7c58..81a3894 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -350,6 +350,8 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState 
*bs_top,
  Error **errp);
 void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
Error **errp);
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+   int flags, Error **errp);
 
 int bdrv_parse_aio(const char *mode, int *flags);
 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
-- 
1.8.3.1




[PATCH v13 06/10] iotests: add #310 to test bottom node in COR driver

2020-12-02 Thread Andrey Shinkevich via
The test case #310 is similar to #216 by Max Reitz. The difference is
that the test #310 involves a bottom node to the COR filter driver.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/310 | 114 +
 tests/qemu-iotests/310.out |  15 ++
 tests/qemu-iotests/group   |   1 +
 3 files changed, 130 insertions(+)
 create mode 100755 tests/qemu-iotests/310
 create mode 100644 tests/qemu-iotests/310.out

diff --git a/tests/qemu-iotests/310 b/tests/qemu-iotests/310
new file mode 100755
index 000..c8b34cd
--- /dev/null
+++ b/tests/qemu-iotests/310
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+#
+# Copy-on-read tests using a COR filter with a bottom node
+#
+# Copyright (C) 2018 Red Hat, Inc.
+# Copyright (c) 2020 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import iotests
+from iotests import log, qemu_img, qemu_io_silent
+
+# Need backing file support
+iotests.script_initialize(supported_fmts=['qcow2', 'qcow', 'qed', 'vmdk'],
+  supported_platforms=['linux'])
+
+log('')
+log('=== Copy-on-read across nodes ===')
+log('')
+
+# This test is similar to the 216 one by Max Reitz 
+# The difference is that this test case involves a bottom node to the
+# COR filter driver.
+
+with iotests.FilePath('base.img') as base_img_path, \
+ iotests.FilePath('mid.img') as mid_img_path, \
+ iotests.FilePath('top.img') as top_img_path, \
+ iotests.VM() as vm:
+
+log('--- Setting up images ---')
+log('')
+
+assert qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M') == 0
+assert qemu_io_silent(base_img_path, '-c', 'write -P 1 0M 1M') == 0
+assert qemu_io_silent(base_img_path, '-c', 'write -P 1 3M 1M') == 0
+assert qemu_img('create', '-f', iotests.imgfmt, '-b', base_img_path,
+'-F', iotests.imgfmt, mid_img_path) == 0
+assert qemu_io_silent(mid_img_path,  '-c', 'write -P 3 2M 1M') == 0
+assert qemu_io_silent(mid_img_path,  '-c', 'write -P 3 4M 1M') == 0
+assert qemu_img('create', '-f', iotests.imgfmt, '-b', mid_img_path,
+'-F', iotests.imgfmt, top_img_path) == 0
+assert qemu_io_silent(top_img_path,  '-c', 'write -P 2 1M 1M') == 0
+
+#  0 1 2 3 4
+# top2
+# mid  3   3
+# base 1 1
+
+log('Done')
+
+log('')
+log('--- Doing COR ---')
+log('')
+
+vm.launch()
+
+log(vm.qmp('blockdev-add',
+   node_name='node0',
+   driver='copy-on-read',
+   bottom='node2',
+   file={
+   'driver': iotests.imgfmt,
+   'file': {
+   'driver': 'file',
+   'filename': top_img_path
+   },
+   'backing': {
+   'node-name': 'node2',
+   'driver': iotests.imgfmt,
+   'file': {
+   'driver': 'file',
+   'filename': mid_img_path
+   },
+   'backing': {
+   'driver': iotests.imgfmt,
+   'file': {
+   'driver': 'file',
+   'filename': base_img_path
+   }
+   },
+   }
+   }))
+
+# Trigger COR
+log(vm.qmp('human-monitor-command',
+   command_line='qemu-io node0 "read 0 5M"'))
+
+vm.shutdown()
+
+log('')
+log('--- Checking COR result ---')
+log('')
+
+assert qemu_io_silent(base_img_path, '-c', 'discard 0 4M') == 0
+assert qemu_io_silent(mid_img_path, '-c', 'discard 0M 5M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 0 0 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 2 1M 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 3 2M 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 0 3M 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 3 4M 1M') == 0
+
+log('Done')
diff --git a/tests/qemu-iotests/310.out b/tests/qemu-iotests/310.out
new file mode 100644
index 000..a70aa5c
--- /dev/null
+++ b/tests/qemu-iotests/310.out
@@ -0,0 +1,15 @@
+
+=== Copy-on-r

[PATCH v13 09/10] stream: skip filters when writing backing file name to QCOW2 header

2020-12-02 Thread Andrey Shinkevich via
Avoid writing a filter JSON file name and a filter format name to QCOW2
image when the backing file is being changed after the block stream
job. It can occur due to a concurrent commit job on the same backing
chain.
A user is still able to assign the 'backing-file' parameter for a
block-stream job keeping in mind the possible issue mentioned above.
If the user does not specify the 'backing-file' parameter, QEMU will
assign it automatically.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 21 +++--
 blockdev.c |  8 +---
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 6e281c7..061268b 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -17,6 +17,7 @@
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
 
@@ -65,6 +66,8 @@ static int stream_prepare(Job *job)
 BlockDriverState *bs = blk_bs(bjob->blk);
 BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+BlockDriverState *base_unfiltered;
+BlockDriverState *backing_bs;
 Error *local_err = NULL;
 int ret = 0;
 
@@ -75,8 +78,22 @@ static int stream_prepare(Job *job)
 const char *base_id = NULL, *base_fmt = NULL;
 if (base) {
 base_id = s->backing_file_str;
-if (base->drv) {
-base_fmt = base->drv->format_name;
+if (base_id) {
+backing_bs = bdrv_find_backing_image(bs, base_id);
+if (backing_bs && backing_bs->drv) {
+base_fmt = backing_bs->drv->format_name;
+} else {
+error_report("Format not found for backing file %s",
+ s->backing_file_str);
+}
+} else {
+base_unfiltered = bdrv_skip_filters(base);
+if (base_unfiltered) {
+base_id = base_unfiltered->filename;
+if (base_unfiltered->drv) {
+base_fmt = base_unfiltered->drv->format_name;
+}
+}
 }
 }
 bdrv_set_backing_hd(unfiltered_bs, base, _err);
diff --git a/blockdev.c b/blockdev.c
index c917625..70900f4 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2508,7 +2508,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 BlockDriverState *base_bs = NULL;
 AioContext *aio_context;
 Error *local_err = NULL;
-const char *base_name = NULL;
 int job_flags = JOB_DEFAULT;
 
 if (!has_on_error) {
@@ -2536,7 +2535,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
-base_name = base;
 }
 
 if (has_base_node) {
@@ -2551,7 +2549,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
 bdrv_refresh_filename(base_bs);
-base_name = base_bs->filename;
 }
 
 /* Check for op blockers in the whole chain between bs and base */
@@ -2571,9 +2568,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 
-/* backing_file string overrides base bs filename */
-base_name = has_backing_file ? backing_file : base_name;
-
 if (has_auto_finalize && !auto_finalize) {
 job_flags |= JOB_MANUAL_FINALIZE;
 }
@@ -2581,7 +2575,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 job_flags |= JOB_MANUAL_DISMISS;
 }
 
-stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+stream_start(has_job_id ? job_id : NULL, bs, base_bs, backing_file,
  job_flags, has_speed ? speed : 0, on_error,
  filter_node_name, _err);
 if (local_err) {
-- 
1.8.3.1




[PATCH v13 07/10] block: include supported_read_flags into BDS structure

2020-12-02 Thread Andrey Shinkevich via
Add the new member supported_read_flags to the BlockDriverState
structure. It will control the flags set for copy-on-read operations.
Make the block generic layer evaluate supported read flags before they
go to a block driver.

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/io.c| 12 ++--
 include/block/block_int.h |  4 
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/block/io.c b/block/io.c
index ec5e152..e28b11c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1405,6 +1405,9 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 if (flags & BDRV_REQ_COPY_ON_READ) {
 int64_t pnum;
 
+/* The flag BDRV_REQ_COPY_ON_READ has reached its addressee */
+flags &= ~BDRV_REQ_COPY_ON_READ;
+
 ret = bdrv_is_allocated(bs, offset, bytes, );
 if (ret < 0) {
 goto out;
@@ -1426,9 +1429,13 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 goto out;
 }
 
+if (flags & ~bs->supported_read_flags) {
+abort();
+}
+
 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
 if (bytes <= max_bytes && bytes <= max_transfer) {
-ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
+ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, flags);
 goto out;
 }
 
@@ -1441,7 +1448,8 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 
 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
  num, qiov,
- qiov_offset + bytes - bytes_remaining, 0);
+ qiov_offset + bytes - bytes_remaining,
+ flags);
 max_bytes -= num;
 } else {
 num = bytes_remaining;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index c05fa1e..247e166 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -873,6 +873,10 @@ struct BlockDriverState {
 /* I/O Limits */
 BlockLimits bl;
 
+/*
+ * Flags honored during pread
+ */
+unsigned int supported_read_flags;
 /* Flags honored during pwrite (so far: BDRV_REQ_FUA,
  * BDRV_REQ_WRITE_UNCHANGED).
  * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
-- 
1.8.3.1




[PATCH v13 00/10] Apply COR-filter to the block-stream permanently

2020-12-02 Thread Andrey Shinkevich via
The previous version 12 was discussed in the email thread:
Message-Id: <1603390423-980205-1-git-send-email-andrey.shinkev...@virtuozzo.com>

v13:
  02: The bdrv_remove_node() was dropped.
  05: Three patches with fixes were merged into one.
  06: Minor changes based on Vladimir's suggestions.
  08: Three patches with fixes were merged into one.
  09: The search for format_name of backing file was added.
  10: The flag BLK_PERM_GRAPH_MOD was removed.

Andrey Shinkevich (10):
  copy-on-read: support preadv/pwritev_part functions
  block: add API function to insert a node
  copy-on-read: add filter drop function
  qapi: add filter-node-name to block-stream
  qapi: create BlockdevOptionsCor structure for COR driver
  iotests: add #310 to test bottom node in COR driver
  block: include supported_read_flags into BDS structure
  copy-on-read: skip non-guest reads if no copy needed
  stream: skip filters when writing backing file name to QCOW2 header
  block: apply COR-filter to block-stream jobs

 block.c|  25 +++
 block/copy-on-read.c   | 143 +
 block/copy-on-read.h   |  32 +
 block/io.c |  12 +++-
 block/monitor/block-hmp-cmds.c |   4 +-
 block/stream.c | 120 +++---
 blockdev.c |  12 ++--
 include/block/block.h  |  10 ++-
 include/block/block_int.h  |  11 +++-
 qapi/block-core.json   |  27 +++-
 tests/qemu-iotests/030 |  51 ++-
 tests/qemu-iotests/030.out |   4 +-
 tests/qemu-iotests/141.out |   2 +-
 tests/qemu-iotests/245 |  22 +--
 tests/qemu-iotests/310 | 114 
 tests/qemu-iotests/310.out |  15 +
 tests/qemu-iotests/group   |   1 +
 17 files changed, 484 insertions(+), 121 deletions(-)
 create mode 100644 block/copy-on-read.h
 create mode 100755 tests/qemu-iotests/310
 create mode 100644 tests/qemu-iotests/310.out

-- 
1.8.3.1




[PATCH v13 05/10] qapi: create BlockdevOptionsCor structure for COR driver

2020-12-02 Thread Andrey Shinkevich via
Create the BlockdevOptionsCor structure for COR driver specific options
splitting it off form the BlockdevOptionsGenericFormat. The only option
'bottom' node in the structure denotes an image file that limits the
COR operations in the backing chain.
We are going to use the COR-filter for a block-stream job and will pass
a bottom node name to the COR driver. The bottom node is the first
non-filter overlay of the base. It was introduced because the base node
itself may change due to possible concurrent jobs.

Suggested-by: Max Reitz 
Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 57 ++--
 qapi/block-core.json | 21 ++-
 2 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 618c4c4..2cddc96 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,18 +24,23 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
 #include "block/copy-on-read.h"
 
 
 typedef struct BDRVStateCOR {
 bool active;
+BlockDriverState *bottom_bs;
 } BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BlockDriverState *bottom_bs = NULL;
 BDRVStateCOR *state = bs->opaque;
+/* Find a bottom node name, if any */
+const char *bottom_node = qdict_get_try_str(options, "bottom");
 
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -51,7 +56,17 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+if (bottom_node) {
+bottom_bs = bdrv_lookup_bs(NULL, bottom_node, errp);
+if (!bottom_bs) {
+error_setg(errp, "Bottom node '%s' not found", bottom_node);
+qdict_del(options, "bottom");
+return -EINVAL;
+}
+qdict_del(options, "bottom");
+}
 state->active = true;
+state->bottom_bs = bottom_bs;
 
 /*
  * We don't need to call bdrv_child_refresh_perms() now as the permissions
@@ -107,8 +122,46 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
size_t qiov_offset,
int flags)
 {
-return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
-   flags | BDRV_REQ_COPY_ON_READ);
+int64_t n;
+int local_flags;
+int ret;
+BDRVStateCOR *state = bs->opaque;
+
+if (!state->bottom_bs) {
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
+}
+
+while (bytes) {
+local_flags = flags;
+
+/* In case of failure, try to copy-on-read anyway */
+ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+if (ret <= 0) {
+ret = 
bdrv_is_allocated_above(bdrv_backing_chain_next(bs->file->bs),
+  state->bottom_bs, true, offset,
+  n, );
+if (ret == 1 || ret < 0) {
+local_flags |= BDRV_REQ_COPY_ON_READ;
+}
+/* Finish earlier if the end of a backing file has been reached */
+if (n == 0) {
+break;
+}
+}
+
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
+
+offset += n;
+qiov_offset += n;
+bytes -= n;
+}
+
+return 0;
 }
 
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 8ef3df6..04055ef 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3942,6 +3942,25 @@
   'data': { 'throttle-group': 'str',
 'file' : 'BlockdevRef'
  } }
+
+##
+# @BlockdevOptionsCor:
+#
+# Driver specific block device options for the copy-on-read driver.
+#
+# @bottom: the name of a non-filter node (allocation-bearing layer) that limits
+#  the COR operations in the backing chain (inclusive).
+#  For the block-stream job, it will be the first non-filter overlay of
+#  the base node. We do not involve the base node into the COR
+#  operations because the base may change due to a concurrent
+#  block-commit job on the same backing chain.
+#
+# Since: 5.2
+##
+{ 'struct': 'BlockdevOptionsCor',
+  'base': 'Blo

[PATCH v13 08/10] copy-on-read: skip non-guest reads if no copy needed

2020-12-02 Thread Andrey Shinkevich via
If the flag BDRV_REQ_PREFETCH was set, skip idling read/write
operations in COR-driver. It can be taken into account for the
COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Add the BDRV_REQ_PREFETCH flag to the supported_read_flags of the
COR-filter.

block: Modify the comment for the flag BDRV_REQ_PREFETCH as we are
going to use it alone and pass it to the COR-filter driver for further
processing.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c  | 14 ++
 include/block/block.h |  8 +---
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 2cddc96..123d197 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -49,6 +49,8 @@ static int cor_open(BlockDriverState *bs, QDict *options, int 
flags,
 return -EINVAL;
 }
 
+bs->supported_read_flags = BDRV_REQ_PREFETCH;
+
 bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
 (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
 
@@ -150,10 +152,14 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
 }
 }
 
-ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
-  local_flags);
-if (ret < 0) {
-return ret;
+/* Skip if neither read nor write are needed */
+if ((local_flags & (BDRV_REQ_PREFETCH | BDRV_REQ_COPY_ON_READ)) !=
+BDRV_REQ_PREFETCH) {
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
 }
 
 offset += n;
diff --git a/include/block/block.h b/include/block/block.h
index 81a3894..3499554 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -81,9 +81,11 @@ typedef enum {
 BDRV_REQ_NO_FALLBACK= 0x100,
 
 /*
- * BDRV_REQ_PREFETCH may be used only together with BDRV_REQ_COPY_ON_READ
- * on read request and means that caller doesn't really need data to be
- * written to qiov parameter which may be NULL.
+ * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
+ * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
+ * filter is involved), in which case it signals that the COR operation
+ * need not read the data into memory (qiov) but only ensure they are
+ * copied to the top layer (i.e., that COR operation is done).
  */
 BDRV_REQ_PREFETCH  = 0x200,
 /* Mask of valid flags */
-- 
1.8.3.1




[PATCH v13 04/10] qapi: add filter-node-name to block-stream

2020-12-02 Thread Andrey Shinkevich via
Provide the possibility to pass the 'filter-node-name' parameter to the
block-stream job as it is done for the commit block job.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/monitor/block-hmp-cmds.c | 4 ++--
 block/stream.c | 4 +++-
 blockdev.c | 4 +++-
 include/block/block_int.h  | 7 ++-
 qapi/block-core.json   | 6 ++
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index d15a2be..e8a58f3 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -508,8 +508,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 
 qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
  false, NULL, qdict_haskey(qdict, "speed"), speed, true,
- BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
- );
+ BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, false, 
false,
+ false, );
 
 hmp_handle_error(mon, error);
 }
diff --git a/block/stream.c b/block/stream.c
index 236384f..6e281c7 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -221,7 +221,9 @@ static const BlockJobDriver stream_job_driver = {
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp)
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp)
 {
 StreamBlockJob *s;
 BlockDriverState *iter;
diff --git a/blockdev.c b/blockdev.c
index fe6fb5d..c917625 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2499,6 +2499,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
   bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
+  bool has_filter_node_name, const char *filter_node_name,
   bool has_auto_finalize, bool auto_finalize,
   bool has_auto_dismiss, bool auto_dismiss,
   Error **errp)
@@ -2581,7 +2582,8 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 
 stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
- job_flags, has_speed ? speed : 0, on_error, _err);
+ job_flags, has_speed ? speed : 0, on_error,
+ filter_node_name, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto out;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 95d9333..c05fa1e 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1134,6 +1134,9 @@ int is_windows_drive(const char *filename);
  *  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means
+ * that a node name should be autogenerated.
  * @errp: Error object.
  *
  * Start a streaming operation on @bs.  Clusters that are unallocated
@@ -1146,7 +1149,9 @@ int is_windows_drive(const char *filename);
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp);
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp);
 
 /**
  * commit_start:
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 04ad80b..8ef3df6 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2543,6 +2543,11 @@
 #'stop' and 'enospc' can only be used if the block device
 #supports io-status (see BlockInfo).  Since 1.3.
 #
+# @filter-node-name: the node name that should be assigned to the
+#filter driver that the stream job inserts into the graph
+#above @device. If this option is not given, a node name is
+#autogenerated. (Since: 5.2)
+#
 # @auto-finalize: When false, this job will wait in a PENDING state after it 
has
 # finished its work, waiting for @block-job-finalize before
 # making any block graph changes.
@@ -2573,6 +2578,7 @@
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
 '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
 '*on-error': 'Block

Re: [PATCH v12 14/14] block: apply COR-filter to block-stream jobs

2020-12-02 Thread Andrey Shinkevich



On 27.10.2020 21:24, Andrey Shinkevich wrote:


On 27.10.2020 20:57, Vladimir Sementsov-Ogievskiy wrote:

27.10.2020 20:48, Andrey Shinkevich wrote:


On 27.10.2020 19:13, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for 
copied

regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 98 
++

  tests/qemu-iotests/030 | 51 +++-
  tests/qemu-iotests/030.out |  4 +-
  tests/qemu-iotests/141.out |  2 +-
  tests/qemu-iotests/245 | 22 +++
  5 files changed, 87 insertions(+), 90 deletions(-)

diff --git a/block/stream.c b/block/stream.c



[...]

+    s = block_job_create(job_id, _job_driver, NULL, 
cor_filter_bs,

+ BLK_PERM_CONSISTENT_READ,
+ basic_flags | BLK_PERM_WRITE | 
BLK_PERM_GRAPH_MOD,


I think that BLK_PERM_GRAPH_MOD is something outdated. We have 
chain-feeze, what BLK_PERM_GRAPH_MOD adds to it? I don't know, and 
doubt that somebody knows.




That is true for the commit/mirror jobs also. If we agree to remove 
the flag BLK_PERM_GRAPH_MOD from all these jobs, it will be made in a 
separate series, won't it?


Hmm. At least, let's not implement new logic based on 
BLK_PERM_GRAPH_MOD. In original code it's only block_job_create's 
perm, not in shared_perm, not somewhere else.. So, if we keep it, 
let's keep it as is: only in perm in block_job_create, not 
implementing additional perm/shared_perm logic.




With @perm=0 in the block_job_add_bdrv(>common, "active node"...), it 
won't.





   speed, creation_flags, NULL, NULL, errp);
  if (!s) {
  goto fail;
  }
+    /*
+ * Prevent concurrent jobs trying to modify the graph 
structure here, we
+ * already have our own plans. Also don't allow resize as the 
image size is

+ * queried only at the job start and then cached.
+ */
+    if (block_job_add_bdrv(>common, "active node", bs,
+   basic_flags | BLK_PERM_GRAPH_MOD,


why not 0, like for other nodes? We don't use this BdrvChild at all, 
why to requre permissions?




Yes, '0' s right.

+   basic_flags | BLK_PERM_WRITE, 
_abort)) {

+    goto fail;
+    }
+
  /* Block all intermediate nodes between bs and base, because 



[...]


diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index dcb4b5d..0064590 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -227,61 +227,20 @@ class TestParallelOps(iotests.QMPTestCase):
  for img in self.imgs:
  os.remove(img)
-    # Test that it's possible to run several block-stream operations
-    # in parallel in the same snapshot chain
-    @unittest.skipIf(os.environ.get('QEMU_CHECK_BLOCK_AUTO'), 
'disabled in CI')

-    def test_stream_parallel(self):


Didn't we agree to add "bottom" paramter to qmp? Than this test-case 
can be rewritten using

node-names and new "bottom" stream argument.



The QMP new "bottom" option is passed to the COR-driver. It is done 
withing the stream-job code. So, it works.




I guess it will not help for the whole test. Particularly, there is 
an issue with freezing the child link to COR-filter of the cuncurrent 
job, then it fails to finish first.


We should not have such frozen link, as our bottom node should be 
above COR-filter of concurrent job.





The bdrv_freeze_backing_chain(bs, above_base, errp) does that job. Max 
insisted on keeping it.


Andrey


I have kept the test_stream_parallel() deleted in the coming v13 because 
it was agreed to make the above_base node frozen. With this, the test 
case can not pass. It is also true because the operations over the 
COR-filter node are blocked for the parallel jobs.


Andrey



[PATCH v3 3/5] monitor: let QMP monitor track JSON message content

2020-11-27 Thread Andrey Shinkevich via
We are going to allow the QMP monitor reading data from input channel
more than one byte at once to increase the performance. With the OOB
compatibility disabled, the monitor queues one QMP command at most. It
was done for the backward compatibility as stated in the comment before
pushing a command into the queue. To keep that concept functional, the
monitor should track the end of a single QMP command. It allows the
dispatcher handling the command and send a response to client in time.

Signed-off-by: Andrey Shinkevich 
---
 include/qapi/qmp/json-parser.h |  5 +++--
 monitor/qmp.c  | 18 --
 qga/main.c |  2 +-
 qobject/json-lexer.c   | 30 +-
 qobject/json-parser-int.h  |  8 +---
 qobject/json-streamer.c| 15 ---
 qobject/qjson.c|  2 +-
 tests/qtest/libqtest.c |  2 +-
 8 files changed, 56 insertions(+), 26 deletions(-)

diff --git a/include/qapi/qmp/json-parser.h b/include/qapi/qmp/json-parser.h
index 7345a9b..039addb 100644
--- a/include/qapi/qmp/json-parser.h
+++ b/include/qapi/qmp/json-parser.h
@@ -36,8 +36,9 @@ void json_message_parser_init(JSONMessageParser *parser,
Error *err),
   void *opaque, va_list *ap);
 
-void json_message_parser_feed(JSONMessageParser *parser,
- const char *buffer, size_t size);
+size_t  json_message_parser_feed(JSONMessageParser *parser,
+ const char *buffer, size_t size,
+ bool track_qmp);
 
 void json_message_parser_flush(JSONMessageParser *parser);
 
diff --git a/monitor/qmp.c b/monitor/qmp.c
index a86ed35..0b39c62 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -367,8 +367,22 @@ static void handle_qmp_command(void *opaque, QObject *req, 
Error *err)
 static void monitor_qmp_read(void *opaque, const uint8_t *buf, int size)
 {
 MonitorQMP *mon = opaque;
-
-json_message_parser_feed(>parser, (const char *) buf, size);
+char *cursor = (char *) buf;
+size_t len;
+
+while (size > 0) {
+len = json_message_parser_feed(>parser, (const char *) cursor,
+   size, true);
+cursor += len;
+size -= len;
+
+if (size > 0) {
+/* Let the dispatcher process the QMP command */
+while (qatomic_mb_read(>common.suspend_cnt)) {
+g_usleep(20);
+}
+}
+}
 }
 
 static QDict *qmp_greeting(MonitorQMP *mon)
diff --git a/qga/main.c b/qga/main.c
index dea6a3a..16de642 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -605,7 +605,7 @@ static gboolean channel_event_cb(GIOCondition condition, 
gpointer data)
 case G_IO_STATUS_NORMAL:
 buf[count] = 0;
 g_debug("read data, count: %d, data: %s", (int)count, buf);
-json_message_parser_feed(>parser, (char *)buf, (int)count);
+json_message_parser_feed(>parser, (char *)buf, (int)count, false);
 break;
 case G_IO_STATUS_EOF:
 g_debug("received EOF");
diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c
index 632320d..1fefbae 100644
--- a/qobject/json-lexer.c
+++ b/qobject/json-lexer.c
@@ -280,10 +280,11 @@ void json_lexer_init(JSONLexer *lexer, bool 
enable_interpolation)
 lexer->x = lexer->y = 0;
 }
 
-static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
+static JSONTokenType json_lexer_feed_char(JSONLexer *lexer, char ch, bool 
flush)
 {
 int new_state;
 bool char_consumed = false;
+JSONTokenType ret;
 
 lexer->x++;
 if (ch == '\n') {
@@ -310,16 +311,16 @@ static void json_lexer_feed_char(JSONLexer *lexer, char 
ch, bool flush)
 case JSON_FLOAT:
 case JSON_KEYWORD:
 case JSON_STRING:
-json_message_process_token(lexer, lexer->token, new_state,
-   lexer->x, lexer->y);
+ret = json_message_process_token(lexer, lexer->token, new_state,
+ lexer->x, lexer->y);
 /* fall through */
 case IN_START:
 g_string_truncate(lexer->token, 0);
 new_state = lexer->start_state;
 break;
 case JSON_ERROR:
-json_message_process_token(lexer, lexer->token, JSON_ERROR,
-   lexer->x, lexer->y);
+ret = json_message_process_token(lexer, lexer->token, JSON_ERROR,
+ lexer->x, lexer->y);
 new_state = IN_RECOVERY;
 /* fall through */
 case IN_RECOVERY:
@@ -335,20 +336,31 @@ static void json_lexer_feed_char(JSONLexer *lexer, char 
ch, bool flush)
  * this is a security consideration.
  */
 if (lexer->token

[PATCH v3 5/5] monitor: increase amount of data for monitor to read

2020-11-27 Thread Andrey Shinkevich via
QMP and HMP monitors read one byte at a time from the socket or stdin,
which is very inefficient. With 100+ VMs on the host, this results in
multiple extra system calls and CPU overuse.
This patch increases the amount of read data up to 4096 bytes that fits
the buffer size on the channel level.

Suggested-by: Denis V. Lunev 
Signed-off-by: Andrey Shinkevich 
---
 monitor/monitor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monitor/monitor.c b/monitor/monitor.c
index 84222cd..43d2d3b 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -566,7 +566,7 @@ int monitor_can_read(void *opaque)
 {
 Monitor *mon = opaque;
 
-return !qatomic_mb_read(>suspend_cnt);
+return !qatomic_mb_read(>suspend_cnt) ? CHR_READ_BUF_LEN : 0;
 }
 
 void monitor_list_append(Monitor *mon)
-- 
1.8.3.1




[PATCH v3 2/5] monitor: drain requests queue with 'channel closed' event

2020-11-27 Thread Andrey Shinkevich via
When CHR_EVENT_CLOSED comes, the QMP requests queue may still contain
unprocessed commands. It can happen with QMP capability OOB enabled.
Let the dispatcher complete handling requests rest in the monitor
queue.

Signed-off-by: Andrey Shinkevich 
---
 monitor/qmp.c | 46 +-
 1 file changed, 21 insertions(+), 25 deletions(-)

diff --git a/monitor/qmp.c b/monitor/qmp.c
index 7169366..a86ed35 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -75,36 +75,32 @@ static void monitor_qmp_cleanup_req_queue_locked(MonitorQMP 
*mon)
 }
 }
 
-static void monitor_qmp_cleanup_queue_and_resume(MonitorQMP *mon)
+/*
+ * Let unprocessed QMP commands be handled.
+ */
+static void monitor_qmp_drain_queue(MonitorQMP *mon)
 {
-qemu_mutex_lock(>qmp_queue_lock);
+bool q_is_empty = false;
 
-/*
- * Same condition as in monitor_qmp_dispatcher_co(), but before
- * removing an element from the queue (hence no `- 1`).
- * Also, the queue should not be empty either, otherwise the
- * monitor hasn't been suspended yet (or was already resumed).
- */
-bool need_resume = (!qmp_oob_enabled(mon) ||
-mon->qmp_requests->length == QMP_REQ_QUEUE_LEN_MAX)
-&& !g_queue_is_empty(mon->qmp_requests);
+while (!q_is_empty) {
+qemu_mutex_lock(>qmp_queue_lock);
+q_is_empty = g_queue_is_empty(mon->qmp_requests);
+qemu_mutex_unlock(>qmp_queue_lock);
 
-monitor_qmp_cleanup_req_queue_locked(mon);
+if (!q_is_empty) {
+if (!qatomic_xchg(_dispatcher_co_busy, true)) {
+/* Kick the dispatcher coroutine */
+aio_co_wake(qmp_dispatcher_co);
+} else {
+/* Let the dispatcher do its job for a while */
+g_usleep(40);
+}
+}
+}
 
-if (need_resume) {
-/*
- * handle_qmp_command() suspended the monitor because the
- * request queue filled up, to be resumed when the queue has
- * space again.  We just emptied it; resume the monitor.
- *
- * Without this, the monitor would remain suspended forever
- * when we get here while the monitor is suspended.  An
- * unfortunately timed CHR_EVENT_CLOSED can do the trick.
- */
+if (qatomic_mb_read(>common.suspend_cnt)) {
 monitor_resume(>common);
 }
-
-qemu_mutex_unlock(>qmp_queue_lock);
 }
 
 void qmp_send_response(MonitorQMP *mon, const QDict *rsp)
@@ -418,7 +414,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent 
event)
  * stdio, it's possible that stdout is still open when stdin
  * is closed.
  */
-monitor_qmp_cleanup_queue_and_resume(mon);
+monitor_qmp_drain_queue(mon);
 json_message_parser_destroy(>parser);
 json_message_parser_init(>parser, handle_qmp_command,
  mon, NULL);
-- 
1.8.3.1




Re: [PATCH v2 2/2] monitor: increase amount of data for monitor to read

2020-11-27 Thread Andrey Shinkevich

On 24.11.2020 14:03, Vladimir Sementsov-Ogievskiy wrote:

23.11.2020 18:44, Andrey Shinkevich wrote:

QMP and HMP monitors read one byte at a time from the socket or stdin,
which is very inefficient. With 100+ VMs on the host, this results in
multiple extra system calls and CPU overuse.
This patch increases the amount of read data up to 4096 bytes that fits
the buffer size on the channel level.
A JSON little parser is introduced to throttle QMP commands read from
the buffer so that incoming requests do not overflow the monitor input
queue.

Suggested-by: Denis V. Lunev
Signed-off-by: Andrey Shinkevich



Can't we just increase qmp queue instead? It seems a lot simpler:



With the OOB compatibility disabled, the monitor queues one QMP command 
at most. It was made for the backward compatibility as stated in the 
comment before pushing a command into the queue. To keep that concept 
functional, the monitor should track the end of a single QMP command. It 
allows the dispatcher handling the command and send a response to client 
in time.
With the patch below, the monitor queue will be filled with QMP commands 
as many as they will be found in the input buffer. The first command 
execution {"execute":"qmp_capabilities"} takes more time and queue will 
be filled at full. Then the dispatcher starts execution of other 
commands in the monitor queue. The process becomes synchronious. In this 
case, we need neither thread nor the queue.


Andrey



diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
index 348bfad3d5..7e721eee3f 100644
--- a/include/monitor/monitor.h
+++ b/include/monitor/monitor.h
@@ -8,7 +8,7 @@
  typedef struct MonitorHMP MonitorHMP;
  typedef struct MonitorOptions MonitorOptions;

-#define QMP_REQ_QUEUE_LEN_MAX 8
+#define QMP_REQ_QUEUE_LEN_MAX 4096

  extern QemuOptsList qemu_mon_opts;

diff --git a/monitor/monitor.c b/monitor/monitor.c
index 84222cd130..1588f00306 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -566,7 +566,7 @@ int monitor_can_read(void *opaque)
  {
  Monitor *mon = opaque;

-    return !qatomic_mb_read(>suspend_cnt);
+    return !qatomic_mb_read(>suspend_cnt) ? 4096 : 0;
  }


- with this patch tests pass and performance is even better.






[PATCH v3 0/5] Increase amount of data for monitor to read

2020-11-27 Thread Andrey Shinkevich via
The subject was discussed here:
https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg00206.html
https://patchew.org/QEMU/20190610105906.28524-1-dplotni...@virtuozzo.com/#
Message-ID: <31dd78ba-bd64-2ed6-3c8f-eed4e904d...@virtuozzo.com>
and v2:
Message-Id: <1606146274-246154-1-git-send-email-andrey.shinkev...@virtuozzo.com>

This series is a solution for the issue with overflow of the monitor queue
with QMP requests if we keep the maximum queue length unchanged (=8).

v3:
  01: New
  02: New
  03: The additional little JSON parser removed and the resources of the
  existing JSON parser were used to track the end of a QMP command.
  04: The amount of read input data increases only.

Andrey Shinkevich (4):
  monitor: change function obsolete name in comments
  monitor: drain requests queue with 'channel closed' event
  monitor: let QMP monitor track JSON message content
  monitor: increase amount of data for monitor to read

Vladimir Sementsov-Ogievskiy (1):
  iotests: 129 don't check backup "busy"

 include/qapi/qmp/json-parser.h |  5 ++--
 monitor/monitor.c  |  2 +-
 monitor/qmp.c  | 66 --
 qga/main.c |  2 +-
 qobject/json-lexer.c   | 30 +--
 qobject/json-parser-int.h  |  8 +++--
 qobject/json-streamer.c| 15 +-
 qobject/qjson.c|  2 +-
 tests/qemu-iotests/129 |  1 -
 tests/qtest/libqtest.c |  2 +-
 10 files changed, 79 insertions(+), 54 deletions(-)

-- 
1.8.3.1




[PATCH v3 4/5] iotests: 129 don't check backup "busy"

2020-11-27 Thread Andrey Shinkevich via
From: Vladimir Sementsov-Ogievskiy 

Busy is racy, job has it's "pause-points" when it's not busy. Drop this
check.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/129 | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/qemu-iotests/129 b/tests/qemu-iotests/129
index 0e13244..3c22f64 100755
--- a/tests/qemu-iotests/129
+++ b/tests/qemu-iotests/129
@@ -67,7 +67,6 @@ class TestStopWithBlockJob(iotests.QMPTestCase):
 result = self.vm.qmp("stop")
 self.assert_qmp(result, 'return', {})
 result = self.vm.qmp("query-block-jobs")
-self.assert_qmp(result, 'return[0]/busy', True)
 self.assert_qmp(result, 'return[0]/ready', False)
 
 def test_drive_mirror(self):
-- 
1.8.3.1




[PATCH v3 1/5] monitor: change function obsolete name in comments

2020-11-27 Thread Andrey Shinkevich via
The function name monitor_qmp_bh_dispatcher() has been changed to
monitor_qmp_dispatcher_co() since the commit 9ce44e2c. Let's amend the
comments.

Signed-off-by: Andrey Shinkevich 
---
 monitor/qmp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/monitor/qmp.c b/monitor/qmp.c
index b42f8c6..7169366 100644
--- a/monitor/qmp.c
+++ b/monitor/qmp.c
@@ -80,7 +80,7 @@ static void monitor_qmp_cleanup_queue_and_resume(MonitorQMP 
*mon)
 qemu_mutex_lock(>qmp_queue_lock);
 
 /*
- * Same condition as in monitor_qmp_bh_dispatcher(), but before
+ * Same condition as in monitor_qmp_dispatcher_co(), but before
  * removing an element from the queue (hence no `- 1`).
  * Also, the queue should not be empty either, otherwise the
  * monitor hasn't been suspended yet (or was already resumed).
@@ -343,7 +343,7 @@ static void handle_qmp_command(void *opaque, QObject *req, 
Error *err)
 
 /*
  * Suspend the monitor when we can't queue more requests after
- * this one.  Dequeuing in monitor_qmp_bh_dispatcher() or
+ * this one.  Dequeuing in monitor_qmp_dispatcher_co() or
  * monitor_qmp_cleanup_queue_and_resume() will resume it.
  * Note that when OOB is disabled, we queue at most one command,
  * for backward compatibility.
-- 
1.8.3.1




Re: [PATCH v2 1/2] iotests: add another bash sleep command to 247

2020-11-24 Thread Andrey Shinkevich

On 23.11.2020 18:44, Andrey Shinkevich wrote:

This patch paves the way for the one that follows. The following patch
makes the QMP monitor to read up to 4K from stdin at once. That results
in running the bash 'sleep' command before the _qemu_proc_exec() starts
in subshell. Another 'sleep' command with an unobtrusive 'query-status'
plays as a workaround.

Signed-off-by: Andrey Shinkevich 
---
  tests/qemu-iotests/247 | 2 ++
  tests/qemu-iotests/247.out | 1 +
  2 files changed, 3 insertions(+)



[...]

With the patch 2/2 of the current version 2, the test case #247 passes 
without this patch 1/2. So, it may be excluded from the series.

Thanks to Vladimir for the idea to check.

Andrey



Re: [PATCH v2 1/2] iotests: add another bash sleep command to 247

2020-11-24 Thread Andrey Shinkevich

On 24.11.2020 13:04, Vladimir Sementsov-Ogievskiy wrote:

23.11.2020 18:44, Andrey Shinkevich wrote:

This patch paves the way for the one that follows. The following patch
makes the QMP monitor to read up to 4K from stdin at once. That results
in running the bash 'sleep' command before the _qemu_proc_exec() starts


But how? Before _qemu_proc_exec() starts, qemu monitor is not runnning,
and its new behavior can't influence..



I am not a bash expert to explain 'how' but this workaround works. It's 
just a test. Maybe other colleagues can say.



If bash subshell work in unpredictable way, may be better is refactor test
to send commands one by one with help of _send_qemu_cmd. Then sleep will
be natively executed between sending commands.



Or maybe write a similar test case in Python if Kevin agrees.


in subshell. Another 'sleep' command with an unobtrusive 'query-status'
plays as a workaround.

Signed-off-by: Andrey Shinkevich 
---
  tests/qemu-iotests/247 | 2 ++
  tests/qemu-iotests/247.out | 1 +
  2 files changed, 3 insertions(+)

diff --git a/tests/qemu-iotests/247 b/tests/qemu-iotests/247
index 87e37b3..7d316ec 100755
--- a/tests/qemu-iotests/247
+++ b/tests/qemu-iotests/247
@@ -59,6 +59,8 @@ TEST_IMG="$TEST_IMG.4" _make_test_img $size
  {"execute":"block-commit",
   "arguments":{"device":"format-4", "top-node": "format-2", 
"base-node":"format-0", "job-id":"job0"}}

  EOF
+sleep 1
+echo '{"execute":"query-status"}'
  if [ "${VALGRIND_QEMU}" == "y" ]; then
  sleep 10
  else
diff --git a/tests/qemu-iotests/247.out b/tests/qemu-iotests/247.out
index e909e83..13d9547 100644
--- a/tests/qemu-iotests/247.out
+++ b/tests/qemu-iotests/247.out
@@ -17,6 +17,7 @@ QMP_VERSION
  {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, 
"event": "BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 
134217728, "offset": 134217728, "speed": 0, "type": "commit"}}
  {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, 
"event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": 
"job0"}}
  {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, 
"event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}

+{"return": {"status": "running", "singlestep": false, "running": true}}
  {"return": {}}
  {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, 
"event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}

  *** done








Re: [PATCH v2 0/2] Increase amount of data for monitor to read

2020-11-23 Thread Andrey Shinkevich

On 23.11.2020 18:44, Andrey Shinkevich wrote:

The subject was discussed here:
https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg00206.html

This series is a solution for the issue with QMP monitor buffered input.
A JSON little parser is introduced to separate QMP commands read from the
input buffer so that incoming requests do not overwhelm the monitor queue.
A side effect raised in the test #247 was managed in the first patch.
It may be considered as a workaround. Any sane fix suggested will be
appreciated.

Note:
This series goes after the Vladimir's one:
'[PATCH v3 00/25] backup performance: block_status + async"'
To make the test #129 passed, the following patch should be applied first:
'[PATCH v3 01/25] iotests: 129 don't check backup "busy"'.

v2:
   02: The static JSONthrottle object was made a member of the Chardev 
structure.
   The fd_chr_read functions were merged.
   The monitor thread synchronization was added to protect the input queue
   from overflow.

Andrey Shinkevich (2):
   iotests: add another bash sleep command to 247
   monitor: increase amount of data for monitor to read

  chardev/char-fd.c  | 35 +--
  chardev/char-socket.c  | 42 +++---
  chardev/char.c | 41 +
  include/chardev/char.h | 15 +++
  monitor/monitor.c  |  2 +-
  tests/qemu-iotests/247 |  2 ++
  tests/qemu-iotests/247.out |  1 +
  7 files changed, 132 insertions(+), 6 deletions(-)



...and with the extended number of QMP commands

time (echo "{ 'execute': 'qmp_capabilities' }"; for i in {1..1}; do 
echo "{ 'execute': 'query-block-jobs' } {"execute":"query-status"} { 
'execute': 'query-block-jobs' } {"execute":"query-status"} { 'execute': 
'query-block-jobs' } {"execute":"query-status"} { 'execute': 
'query-block-jobs' } {"execute":"query-status"}"; done; echo "{ 
'execute': 'quit' }" ) | ./build/qemu-system-x86_64 -qmp stdio > /dev/null


on master:
real0m10.112s
user0m10.168s
sys 0m4.793s

after the patch applied:
real0m4.140s
user0m4.079s
sys 0m0.785s

Andrey



Re: [PATCH v2 0/2] Increase amount of data for monitor to read

2020-11-23 Thread Andrey Shinkevich

On 23.11.2020 18:44, Andrey Shinkevich wrote:

The subject was discussed here:
https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg00206.html

This series is a solution for the issue with QMP monitor buffered input.
A JSON little parser is introduced to separate QMP commands read from the
input buffer so that incoming requests do not overwhelm the monitor queue.
A side effect raised in the test #247 was managed in the first patch.
It may be considered as a workaround. Any sane fix suggested will be
appreciated.

Note:
This series goes after the Vladimir's one:
'[PATCH v3 00/25] backup performance: block_status + async"'
To make the test #129 passed, the following patch should be applied first:
'[PATCH v3 01/25] iotests: 129 don't check backup "busy"'.

v2:
   02: The static JSONthrottle object was made a member of the Chardev 
structure.
   The fd_chr_read functions were merged.
   The monitor thread synchronization was added to protect the input queue
   from overflow.

Andrey Shinkevich (2):
   iotests: add another bash sleep command to 247
   monitor: increase amount of data for monitor to read

  chardev/char-fd.c  | 35 +--
  chardev/char-socket.c  | 42 +++---
  chardev/char.c | 41 +
  include/chardev/char.h | 15 +++
  monitor/monitor.c  |  2 +-
  tests/qemu-iotests/247 |  2 ++
  tests/qemu-iotests/247.out |  1 +
  7 files changed, 132 insertions(+), 6 deletions(-)




The Vladimir's modified test case

$ time (echo "{ 'execute': 'qmp_capabilities' }"; for i in {1..1}; 
do echo "{ 'execute': 'query-block-jobs' } {"execute":"query-status"} { 
'execute': 'query-block-jobs' } {"execute":"query-status"}"; done; echo 
"{ 'execute': 'quit' }" ) | ./build/qemu-system-x86_64 -qmp stdio > 
/dev/null


shows the following performance

on master:
real0m5.188s
user0m5.310s
sys 0m2.539s

after the patch applied:
real0m2.227s
user0m2.483s
sys 0m0.480s

Andrey



[PATCH v2 1/2] iotests: add another bash sleep command to 247

2020-11-23 Thread Andrey Shinkevich via
This patch paves the way for the one that follows. The following patch
makes the QMP monitor to read up to 4K from stdin at once. That results
in running the bash 'sleep' command before the _qemu_proc_exec() starts
in subshell. Another 'sleep' command with an unobtrusive 'query-status'
plays as a workaround.

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/247 | 2 ++
 tests/qemu-iotests/247.out | 1 +
 2 files changed, 3 insertions(+)

diff --git a/tests/qemu-iotests/247 b/tests/qemu-iotests/247
index 87e37b3..7d316ec 100755
--- a/tests/qemu-iotests/247
+++ b/tests/qemu-iotests/247
@@ -59,6 +59,8 @@ TEST_IMG="$TEST_IMG.4" _make_test_img $size
 {"execute":"block-commit",
  "arguments":{"device":"format-4", "top-node": "format-2", 
"base-node":"format-0", "job-id":"job0"}}
 EOF
+sleep 1
+echo '{"execute":"query-status"}'
 if [ "${VALGRIND_QEMU}" == "y" ]; then
 sleep 10
 else
diff --git a/tests/qemu-iotests/247.out b/tests/qemu-iotests/247.out
index e909e83..13d9547 100644
--- a/tests/qemu-iotests/247.out
+++ b/tests/qemu-iotests/247.out
@@ -17,6 +17,7 @@ QMP_VERSION
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 134217728, "offset": 
134217728, "speed": 0, "type": "commit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
+{"return": {"status": "running", "singlestep": false, "running": true}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 *** done
-- 
1.8.3.1




[PATCH v2 2/2] monitor: increase amount of data for monitor to read

2020-11-23 Thread Andrey Shinkevich via
QMP and HMP monitors read one byte at a time from the socket or stdin,
which is very inefficient. With 100+ VMs on the host, this results in
multiple extra system calls and CPU overuse.
This patch increases the amount of read data up to 4096 bytes that fits
the buffer size on the channel level.
A JSON little parser is introduced to throttle QMP commands read from
the buffer so that incoming requests do not overflow the monitor input
queue.

Suggested-by: Denis V. Lunev 
Signed-off-by: Andrey Shinkevich 
---
 chardev/char-fd.c  | 35 +--
 chardev/char-socket.c  | 42 +++---
 chardev/char.c | 41 +
 include/chardev/char.h | 15 +++
 monitor/monitor.c  |  2 +-
 5 files changed, 129 insertions(+), 6 deletions(-)

diff --git a/chardev/char-fd.c b/chardev/char-fd.c
index 1cd62f2..15bc8f4 100644
--- a/chardev/char-fd.c
+++ b/chardev/char-fd.c
@@ -33,6 +33,8 @@
 #include "chardev/char-fd.h"
 #include "chardev/char-io.h"
 
+#include "monitor/monitor-internal.h"
+
 /* Called with chr_write_lock held.  */
 static int fd_chr_write(Chardev *chr, const uint8_t *buf, int len)
 {
@@ -45,8 +47,12 @@ static gboolean fd_chr_read(QIOChannel *chan, GIOCondition 
cond, void *opaque)
 {
 Chardev *chr = CHARDEV(opaque);
 FDChardev *s = FD_CHARDEV(opaque);
+CharBackend *be = chr->be;
+Monitor *mon = (Monitor *)be->opaque;
 int len;
 uint8_t buf[CHR_READ_BUF_LEN];
+uint8_t *cursor;
+int load, size, pos;
 ssize_t ret;
 
 len = sizeof(buf);
@@ -62,10 +68,35 @@ static gboolean fd_chr_read(QIOChannel *chan, GIOCondition 
cond, void *opaque)
 if (ret == 0) {
 remove_fd_in_watch(chr);
 qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+chr->json_thl = (const JSONthrottle){0};
 return FALSE;
 }
-if (ret > 0) {
-qemu_chr_be_write(chr, buf, ret);
+if (ret < 0) {
+return TRUE;
+}
+load = ret;
+cursor = buf;
+
+while (load > 0) {
+size = load;
+if (monitor_is_qmp(mon)) {
+/* Find the end position of a JSON command in the input buffer */
+pos = qemu_chr_end_position((const char *) cursor, size,
+>json_thl);
+if (pos >= 0) {
+size = pos + 1;
+}
+}
+
+qemu_chr_be_write(chr, cursor, size);
+cursor += size;
+load -= size;
+
+if (load > 0) {
+while (qatomic_mb_read(>suspend_cnt)) {
+g_usleep(40);
+}
+}
 }
 
 return TRUE;
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 213a4c8..30ad1d4 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -38,6 +38,8 @@
 #include "chardev/char-io.h"
 #include "qom/object.h"
 
+#include "monitor/monitor-internal.h"
+
 /***/
 /* TCP Net console */
 
@@ -522,7 +524,11 @@ static gboolean tcp_chr_read(QIOChannel *chan, 
GIOCondition cond, void *opaque)
 {
 Chardev *chr = CHARDEV(opaque);
 SocketChardev *s = SOCKET_CHARDEV(opaque);
+CharBackend *be = chr->be;
+Monitor *mon = (Monitor *)be->opaque;
 uint8_t buf[CHR_READ_BUF_LEN];
+uint8_t *cursor;
+int load, pos;
 int len, size;
 
 if ((s->state != TCP_CHARDEV_STATE_CONNECTED) ||
@@ -537,12 +543,42 @@ static gboolean tcp_chr_read(QIOChannel *chan, 
GIOCondition cond, void *opaque)
 if (size == 0 || (size == -1 && errno != EAGAIN)) {
 /* connection closed */
 tcp_chr_disconnect(chr);
-} else if (size > 0) {
+chr->json_thl = (const JSONthrottle){0};
+return TRUE;
+}
+if (size < 0) {
+return TRUE;
+}
+load = size;
+cursor = buf;
+
+while (load > 0) {
+size = load;
+if (monitor_is_qmp(mon)) {
+/* Find the end position of a JSON command in the input buffer */
+pos = qemu_chr_end_position((const char *) cursor, size,
+>json_thl);
+if (pos >= 0) {
+size = pos + 1;
+}
+}
+len = size;
+
 if (s->do_telnetopt) {
-tcp_chr_process_IAC_bytes(chr, s, buf, );
+tcp_chr_process_IAC_bytes(chr, s, cursor, );
 }
 if (size > 0) {
-qemu_chr_be_write(chr, buf, size);
+qemu_chr_be_write(chr, cursor, size);
+cursor += size;
+load -= size;
+} else {
+cursor += len;
+load -= len;
+}
+if (load > 0) {
+while (qatomic_mb_read(>suspend_cnt)) {
+g_usleep(40);
+}
 }
 }
 
diff --git

[PATCH v2 0/2] Increase amount of data for monitor to read

2020-11-23 Thread Andrey Shinkevich via
The subject was discussed here:
https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg00206.html

This series is a solution for the issue with QMP monitor buffered input.
A JSON little parser is introduced to separate QMP commands read from the
input buffer so that incoming requests do not overwhelm the monitor queue.
A side effect raised in the test #247 was managed in the first patch.
It may be considered as a workaround. Any sane fix suggested will be
appreciated.

Note:
This series goes after the Vladimir's one:
'[PATCH v3 00/25] backup performance: block_status + async"'
To make the test #129 passed, the following patch should be applied first:
'[PATCH v3 01/25] iotests: 129 don't check backup "busy"'.

v2:
  02: The static JSONthrottle object was made a member of the Chardev structure.
  The fd_chr_read functions were merged.
  The monitor thread synchronization was added to protect the input queue
  from overflow.

Andrey Shinkevich (2):
  iotests: add another bash sleep command to 247
  monitor: increase amount of data for monitor to read

 chardev/char-fd.c  | 35 +--
 chardev/char-socket.c  | 42 +++---
 chardev/char.c | 41 +
 include/chardev/char.h | 15 +++
 monitor/monitor.c  |  2 +-
 tests/qemu-iotests/247 |  2 ++
 tests/qemu-iotests/247.out |  1 +
 7 files changed, 132 insertions(+), 6 deletions(-)

-- 
1.8.3.1




Re: [PATCH 2/2] monitor: increase amount of data for monitor to read

2020-11-16 Thread Andrey Shinkevich

On 09.11.2020 12:55, Vladimir Sementsov-Ogievskiy wrote:

06.11.2020 15:42, Andrey Shinkevich wrote:

QMP and HMP monitors read one byte at a time from the socket or stdin,
which is very inefficient. With 100+ VMs on the host, this results in
multiple extra system calls and CPU overuse.
This patch increases the amount of read data up to 4096 bytes that fits
the buffer size on the channel level.

Suggested-by: Denis V. Lunev 
Signed-off-by: Andrey Shinkevich 
---
  chardev/char-fd.c  | 64 
+-

  chardev/char-socket.c  | 54 +++---
  chardev/char.c | 40 +
  include/chardev/char.h | 15 +++
  monitor/monitor.c  |  2 +-
  tests/qemu-iotests/247.out |  2 +-
  6 files changed, 159 insertions(+), 18 deletions(-)


[...]


+    ret = qio_channel_read(
+    chan, (gchar *)thl.buf, len, NULL);
+    if (ret == 0) {
+    remove_fd_in_watch(chr);
+    qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+    thl = (const JSONthrottle){0};
+    return FALSE;
+    }
+    if (ret < 0) {
+    return TRUE;
+    }


large code chunk is shared with fd_chr_read_hmp(). Would be not bad to 
avoid duplication..




There were two reasons to split the function:
1. Not to make the code complicated.
2. Avoid unused buffer of 4k on the stack:
   fd_chr_read_hmp() { uint8_t buf[CHR_READ_BUF_LEN];..


+    thl.load = ret;
+    thl.cursor = 0;
+    }
+
+    size = thl.load;
+    start = thl.buf + thl.cursor;


you may use uint8_t* pointer type for thl.curser and get rid of size and 
start variables.




For the 'start', yes. And I will want the 'size' anyway.

[...]


+int qemu_chr_end_position(const char *buf, int size, JSONthrottle *thl)
+{
+    int i;
+
+    for (i = 0; i < size; i++) {
+    switch (buf[i]) {
+    case ' ':
+    case '\n':
+    case '\r':
+    continue;
+    case '{':
+    thl->brace_count++;
+    break;
+    case '}':
+    thl->brace_count--;
+    break;
+    case '[':
+    thl->bracket_count++;
+    break;
+    case ']':
+    thl->bracket_count--;


I don't think you need to care about square brackets, as QMP queries and 
answers are always json objects, i.e. in pair of '{' and '}'.




I've kept the brackets because it is another condition to put a command 
into the requests queue (see json_message_process_token()).



Andrey



Re: [PATCH 0/2] Increase amount of data for monitor to read

2020-11-09 Thread Andrey Shinkevich



On 09.11.2020 13:04, Vladimir Sementsov-Ogievskiy wrote:

09.11.2020 11:50, Vladimir Sementsov-Ogievskiy wrote:

06.11.2020 15:42, Andrey Shinkevich wrote:

The subject was discussed here:
https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg00206.html

This series is a solution for the issue with QMP monitor buffered input.
A little parser is introduced to throttle JSON commands read from the
buffer so that QMP requests do not overwhelm the monitor input queue.
A side effect raised in the test #247 was managed in the first patch.
It may be considered as a workaround. Any sane fix suggested will be
appreciated.

Note:
This series goes after the Vladimir's one:
'[PATCH v3 00/25] backup performance: block_status + async"'
To make the test #129 passed, the following patch should be applied 
first:

'[PATCH v3 01/25] iotests: 129 don't check backup "busy"'.



[...]




Positive thing: the patches do increase performance:

for me, the following command:

(echo "{ 'execute': 'qmp_capabilities' }"; for i in {1..1}; do echo 
"{ 'execute': 'query-block-jobs' }"; done; echo "{ 'execute': 'quit' }" 
) | time ./qemu-system-x86_64 -qmp stdio > /dev/null


shows 2.4s on master and 0.6s after patches




Thank you for testing it. I'd like to include the result to the patch 
description with "Tested-by: ..."


Andrey



Re: [PATCH 0/2] Increase amount of data for monitor to read

2020-11-06 Thread Andrey Shinkevich

Please exclude this address when reply:

jc...@redhat.com

Andrey



[PATCH 2/2] monitor: increase amount of data for monitor to read

2020-11-06 Thread Andrey Shinkevich via
QMP and HMP monitors read one byte at a time from the socket or stdin,
which is very inefficient. With 100+ VMs on the host, this results in
multiple extra system calls and CPU overuse.
This patch increases the amount of read data up to 4096 bytes that fits
the buffer size on the channel level.

Suggested-by: Denis V. Lunev 
Signed-off-by: Andrey Shinkevich 
---
 chardev/char-fd.c  | 64 +-
 chardev/char-socket.c  | 54 +++---
 chardev/char.c | 40 +
 include/chardev/char.h | 15 +++
 monitor/monitor.c  |  2 +-
 tests/qemu-iotests/247.out |  2 +-
 6 files changed, 159 insertions(+), 18 deletions(-)

diff --git a/chardev/char-fd.c b/chardev/char-fd.c
index 1cd62f2..6194fe6 100644
--- a/chardev/char-fd.c
+++ b/chardev/char-fd.c
@@ -33,6 +33,8 @@
 #include "chardev/char-fd.h"
 #include "chardev/char-io.h"
 
+#include "monitor/monitor-internal.h"
+
 /* Called with chr_write_lock held.  */
 static int fd_chr_write(Chardev *chr, const uint8_t *buf, int len)
 {
@@ -41,7 +43,7 @@ static int fd_chr_write(Chardev *chr, const uint8_t *buf, int 
len)
 return io_channel_send(s->ioc_out, buf, len);
 }
 
-static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
+static gboolean fd_chr_read_hmp(QIOChannel *chan, void *opaque)
 {
 Chardev *chr = CHARDEV(opaque);
 FDChardev *s = FD_CHARDEV(opaque);
@@ -71,6 +73,66 @@ static gboolean fd_chr_read(QIOChannel *chan, GIOCondition 
cond, void *opaque)
 return TRUE;
 }
 
+static gboolean fd_chr_read_qmp(QIOChannel *chan, void *opaque)
+{
+static JSONthrottle thl = {0};
+uint8_t *start;
+Chardev *chr = CHARDEV(opaque);
+FDChardev *s = FD_CHARDEV(opaque);
+int len, size, pos;
+ssize_t ret;
+
+if (!thl.load) {
+len = sizeof(thl.buf);
+if (len > s->max_size) {
+len = s->max_size;
+}
+if (len == 0) {
+return TRUE;
+}
+
+ret = qio_channel_read(
+chan, (gchar *)thl.buf, len, NULL);
+if (ret == 0) {
+remove_fd_in_watch(chr);
+qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
+thl = (const JSONthrottle){0};
+return FALSE;
+}
+if (ret < 0) {
+return TRUE;
+}
+thl.load = ret;
+thl.cursor = 0;
+}
+
+size = thl.load;
+start = thl.buf + thl.cursor;
+pos = qemu_chr_end_position((const char *) start, size, );
+if (pos >= 0) {
+size = pos + 1;
+}
+
+qemu_chr_be_write(chr, start, size);
+thl.cursor += size;
+thl.load -= size;
+
+return TRUE;
+}
+
+static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
+{
+Chardev *chr = CHARDEV(opaque);
+CharBackend *be = chr->be;
+Monitor *mon = (Monitor *)be->opaque;
+
+if (monitor_is_qmp(mon)) {
+return fd_chr_read_qmp(chan, opaque);
+}
+
+return fd_chr_read_hmp(chan, opaque);
+}
+
 static int fd_chr_read_poll(void *opaque)
 {
 Chardev *chr = CHARDEV(opaque);
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 213a4c8..8335e8c 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -520,30 +520,54 @@ static void tcp_chr_disconnect(Chardev *chr)
 
 static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
 {
+static JSONthrottle thl = {0};
+uint8_t *start;
 Chardev *chr = CHARDEV(opaque);
 SocketChardev *s = SOCKET_CHARDEV(opaque);
-uint8_t buf[CHR_READ_BUF_LEN];
-int len, size;
+int len, size, pos;
 
 if ((s->state != TCP_CHARDEV_STATE_CONNECTED) ||
 s->max_size <= 0) {
 return TRUE;
 }
-len = sizeof(buf);
-if (len > s->max_size) {
-len = s->max_size;
-}
-size = tcp_chr_recv(chr, (void *)buf, len);
-if (size == 0 || (size == -1 && errno != EAGAIN)) {
-/* connection closed */
-tcp_chr_disconnect(chr);
-} else if (size > 0) {
-if (s->do_telnetopt) {
-tcp_chr_process_IAC_bytes(chr, s, buf, );
+
+if (!thl.load) {
+len = sizeof(thl.buf);
+if (len > s->max_size) {
+len = s->max_size;
+}
+size = tcp_chr_recv(chr, (void *)thl.buf, len);
+if (size == 0 || (size == -1 && errno != EAGAIN)) {
+/* connection closed */
+tcp_chr_disconnect(chr);
+thl = (const JSONthrottle){0};
+return TRUE;
 }
-if (size > 0) {
-qemu_chr_be_write(chr, buf, size);
+if (size < 0) {
+return TRUE;
 }
+thl.load = size;
+thl.cursor = 0;
+}
+
+size = thl.load;
+start = thl.buf + thl.cursor;
+pos = qemu_chr_end_position((const char *) sta

[PATCH 1/2] iotests: add another bash sleep command to 247

2020-11-06 Thread Andrey Shinkevich via
This patch paves the way for the one that follows. The following patch
makes the QMP monitor to read up to 4K from stdin at once. That results
in running the bash 'sleep' command before the _qemu_proc_exec() starts
in subshell. Another 'sleep' command with an unobtrusive 'query-status'
plays as a workaround.

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/247 | 2 ++
 tests/qemu-iotests/247.out | 1 +
 2 files changed, 3 insertions(+)

diff --git a/tests/qemu-iotests/247 b/tests/qemu-iotests/247
index 87e37b3..7d316ec 100755
--- a/tests/qemu-iotests/247
+++ b/tests/qemu-iotests/247
@@ -59,6 +59,8 @@ TEST_IMG="$TEST_IMG.4" _make_test_img $size
 {"execute":"block-commit",
  "arguments":{"device":"format-4", "top-node": "format-2", 
"base-node":"format-0", "job-id":"job0"}}
 EOF
+sleep 1
+echo '{"execute":"query-status"}'
 if [ "${VALGRIND_QEMU}" == "y" ]; then
 sleep 10
 else
diff --git a/tests/qemu-iotests/247.out b/tests/qemu-iotests/247.out
index e909e83..13d9547 100644
--- a/tests/qemu-iotests/247.out
+++ b/tests/qemu-iotests/247.out
@@ -17,6 +17,7 @@ QMP_VERSION
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"BLOCK_JOB_COMPLETED", "data": {"device": "job0", "len": 134217728, "offset": 
134217728, "speed": 0, "type": "commit"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "job0"}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
+{"return": {"status": "running", "singlestep": false, "running": true}}
 {"return": {}}
 {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
"SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
 *** done
-- 
1.8.3.1




[PATCH 0/2] Increase amount of data for monitor to read

2020-11-06 Thread Andrey Shinkevich via
The subject was discussed here:
https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg00206.html

This series is a solution for the issue with QMP monitor buffered input.
A little parser is introduced to throttle JSON commands read from the
buffer so that QMP requests do not overwhelm the monitor input queue.
A side effect raised in the test #247 was managed in the first patch.
It may be considered as a workaround. Any sane fix suggested will be
appreciated.

Note:
This series goes after the Vladimir's one:
'[PATCH v3 00/25] backup performance: block_status + async"'
To make the test #129 passed, the following patch should be applied first:
'[PATCH v3 01/25] iotests: 129 don't check backup "busy"'.

Andrey Shinkevich (2):
  iotests: add another bash sleep command to 247
  monitor: increase amount of data for monitor to read

 chardev/char-fd.c  | 64 +-
 chardev/char-socket.c  | 54 +++---
 chardev/char.c | 40 +
 include/chardev/char.h | 15 +++
 monitor/monitor.c  |  2 +-
 tests/qemu-iotests/247 |  2 ++
 tests/qemu-iotests/247.out |  1 +
 7 files changed, 161 insertions(+), 17 deletions(-)

-- 
1.8.3.1




Re: [PATCH v12 14/14] block: apply COR-filter to block-stream jobs

2020-10-27 Thread Andrey Shinkevich



On 27.10.2020 20:57, Vladimir Sementsov-Ogievskiy wrote:

27.10.2020 20:48, Andrey Shinkevich wrote:


On 27.10.2020 19:13, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 98 
++

  tests/qemu-iotests/030 | 51 +++-
  tests/qemu-iotests/030.out |  4 +-
  tests/qemu-iotests/141.out |  2 +-
  tests/qemu-iotests/245 | 22 +++
  5 files changed, 87 insertions(+), 90 deletions(-)

diff --git a/block/stream.c b/block/stream.c



[...]

+    s = block_job_create(job_id, _job_driver, NULL, 
cor_filter_bs,

+ BLK_PERM_CONSISTENT_READ,
+ basic_flags | BLK_PERM_WRITE | 
BLK_PERM_GRAPH_MOD,


I think that BLK_PERM_GRAPH_MOD is something outdated. We have 
chain-feeze, what BLK_PERM_GRAPH_MOD adds to it? I don't know, and 
doubt that somebody knows.




That is true for the commit/mirror jobs also. If we agree to remove 
the flag BLK_PERM_GRAPH_MOD from all these jobs, it will be made in a 
separate series, won't it?


Hmm. At least, let's not implement new logic based on 
BLK_PERM_GRAPH_MOD. In original code it's only block_job_create's perm, 
not in shared_perm, not somewhere else.. So, if we keep it, let's keep 
it as is: only in perm in block_job_create, not implementing additional 
perm/shared_perm logic.




With @perm=0 in the block_job_add_bdrv(>common, "active node"...), it 
won't.





   speed, creation_flags, NULL, NULL, errp);
  if (!s) {
  goto fail;
  }
+    /*
+ * Prevent concurrent jobs trying to modify the graph structure 
here, we
+ * already have our own plans. Also don't allow resize as the 
image size is

+ * queried only at the job start and then cached.
+ */
+    if (block_job_add_bdrv(>common, "active node", bs,
+   basic_flags | BLK_PERM_GRAPH_MOD,


why not 0, like for other nodes? We don't use this BdrvChild at all, 
why to requre permissions?




Yes, '0' s right.

+   basic_flags | BLK_PERM_WRITE, 
_abort)) {

+    goto fail;
+    }
+
  /* Block all intermediate nodes between bs and base, because 



[...]


diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index dcb4b5d..0064590 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -227,61 +227,20 @@ class TestParallelOps(iotests.QMPTestCase):
  for img in self.imgs:
  os.remove(img)
-    # Test that it's possible to run several block-stream operations
-    # in parallel in the same snapshot chain
-    @unittest.skipIf(os.environ.get('QEMU_CHECK_BLOCK_AUTO'), 
'disabled in CI')

-    def test_stream_parallel(self):


Didn't we agree to add "bottom" paramter to qmp? Than this test-case 
can be rewritten using

node-names and new "bottom" stream argument.



I guess it will not help for the whole test. Particularly, there is an 
issue with freezing the child link to COR-filter of the cuncurrent 
job, then it fails to finish first.


We should not have such frozen link, as our bottom node should be above 
COR-filter of concurrent job.





The bdrv_freeze_backing_chain(bs, above_base, errp) does that job. Max 
insisted on keeping it.


Andrey



Re: [PATCH v12 14/14] block: apply COR-filter to block-stream jobs

2020-10-27 Thread Andrey Shinkevich



On 27.10.2020 19:13, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 98 
++

  tests/qemu-iotests/030 | 51 +++-
  tests/qemu-iotests/030.out |  4 +-
  tests/qemu-iotests/141.out |  2 +-
  tests/qemu-iotests/245 | 22 +++
  5 files changed, 87 insertions(+), 90 deletions(-)

diff --git a/block/stream.c b/block/stream.c



[...]

+    s = block_job_create(job_id, _job_driver, NULL, 
cor_filter_bs,

+ BLK_PERM_CONSISTENT_READ,
+ basic_flags | BLK_PERM_WRITE | 
BLK_PERM_GRAPH_MOD,


I think that BLK_PERM_GRAPH_MOD is something outdated. We have 
chain-feeze, what BLK_PERM_GRAPH_MOD adds to it? I don't know, and doubt 
that somebody knows.




That is true for the commit/mirror jobs also. If we agree to remove the 
flag BLK_PERM_GRAPH_MOD from all these jobs, it will be made in a 
separate series, won't it?



   speed, creation_flags, NULL, NULL, errp);
  if (!s) {
  goto fail;
  }
+    /*
+ * Prevent concurrent jobs trying to modify the graph structure 
here, we
+ * already have our own plans. Also don't allow resize as the 
image size is

+ * queried only at the job start and then cached.
+ */
+    if (block_job_add_bdrv(>common, "active node", bs,
+   basic_flags | BLK_PERM_GRAPH_MOD,


why not 0, like for other nodes? We don't use this BdrvChild at all, why 
to requre permissions?




Yes, '0' s right.

+   basic_flags | BLK_PERM_WRITE, 
_abort)) {

+    goto fail;
+    }
+
  /* Block all intermediate nodes between bs and base, because 



[...]


diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index dcb4b5d..0064590 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -227,61 +227,20 @@ class TestParallelOps(iotests.QMPTestCase):
  for img in self.imgs:
  os.remove(img)
-    # Test that it's possible to run several block-stream operations
-    # in parallel in the same snapshot chain
-    @unittest.skipIf(os.environ.get('QEMU_CHECK_BLOCK_AUTO'), 
'disabled in CI')

-    def test_stream_parallel(self):


Didn't we agree to add "bottom" paramter to qmp? Than this test-case can 
be rewritten using

node-names and new "bottom" stream argument.



I guess it will not help for the whole test. Particularly, there is an 
issue with freezing the child link to COR-filter of the cuncurrent job, 
then it fails to finish first.


Andrey



Re: [PATCH v12 13/14] stream: skip filters when writing backing file name to QCOW2 header

2020-10-27 Thread Andrey Shinkevich

On 27.10.2020 19:21, Vladimir Sementsov-Ogievskiy wrote:

27.10.2020 19:01, Andrey Shinkevich wrote:

On 27.10.2020 18:09, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

Avoid writing a filter JSON file name and a filter format name to QCOW2
image when the backing file is changed after the block stream job.
A user is still able to assign the 'backing-file' parameter for a
block-stream job keeping in mind the possible issue mentioned above.
If the user does not specify the 'backing-file' parameter, QEMU will
assign it automatically.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 15 +--
  blockdev.c |  9 ++---
  2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index e0540ee..1ba74ab 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -65,6 +65,7 @@ static int stream_prepare(Job *job)
  BlockDriverState *bs = blk_bs(bjob->blk);
  BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
  BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+    BlockDriverState *base_unfiltered = NULL;
  Error *local_err = NULL;
  int ret = 0;
@@ -75,8 +76,18 @@ static int stream_prepare(Job *job)
  const char *base_id = NULL, *base_fmt = NULL;
  if (base) {
  base_id = s->backing_file_str;
-    if (base->drv) {
-    base_fmt = base->drv->format_name;
+    if (base_id) {
+    if (base->drv) {
+    base_fmt = base->drv->format_name;


hmm. this doesn't make real sense: so, we assume that user specified 
backing_file_str, which may not relate to base, but we use 
base->drv->format_name? But it may be name of the filter driver, 
which would be wrong..


Any ideas?

1. we can use base_fmt=NULL, to provoke probing on next open of the 
qcow2 file..


I would choose this item #1 but have to check the probing code 
logic... Particularly, I do not remember now if the probing is able to 
recognize a protocol.
The logic for the format_name in the QEMU existent code (I has kept it 
here in the patch) is a slippery way for an imprudent user. That's why 
I staked on the backing_file_str deprication in the previous version.



2. we can do probing now
3. we can at least check, if backing_file_str == 


Not bad for the sanity check but we will search a node by the file 
name again - not good ((


Not search, but only check one very likely option.


Yes, just strcmp(). And why a user may not merely specify a desired 
backing file as the base?




Additionally to 1. or 3. (or combined), or even keeping things as is 
(i.e. wrong, but it is preexisting), we can:


  - add backing-format argument to qapi as pair for backing-file
  - deprecate using backing-file without backing-format.

Then, after deprecation period we'll have correct code. This may be done 
in separate.




base_unfiltered->filename, in this case we can use 
base_unfiltered->drv->format_name




+    }
+    } else {
+    base_unfiltered = bdrv_skip_filters(base);
+    if (base_unfiltered) {
+    base_id = base_unfiltered->filename;
+    if (base_unfiltered->drv) {
+    base_fmt = base_unfiltered->drv->format_name;
+    }
+    }
  }
  }
  bdrv_set_backing_hd(unfiltered_bs, base, _err);
diff --git a/blockdev.c b/blockdev.c
index c917625..0e9c783 100644
--- a/blockdev.c
+++ b/blockdev.c


[...]


-    stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+    stream_start(has_job_id ? job_id : NULL, bs, base_bs,
+ has_backing_file ? backing_file : NULL,


backing_file should be NULL if has_backing_file is false, so you can 
use just backing_file instead of ternary operator.




Yes, if reliable. I has kept the conformation with the ternary 
operator at the first parameter above.


Andrey


   job_flags, has_speed ? speed : 0, on_error,
   filter_node_name, _err);
  if (local_err) {











Re: [PATCH v12 13/14] stream: skip filters when writing backing file name to QCOW2 header

2020-10-27 Thread Andrey Shinkevich

On 27.10.2020 18:09, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

Avoid writing a filter JSON file name and a filter format name to QCOW2
image when the backing file is changed after the block stream job.
A user is still able to assign the 'backing-file' parameter for a
block-stream job keeping in mind the possible issue mentioned above.
If the user does not specify the 'backing-file' parameter, QEMU will
assign it automatically.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 15 +--
  blockdev.c |  9 ++---
  2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index e0540ee..1ba74ab 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -65,6 +65,7 @@ static int stream_prepare(Job *job)
  BlockDriverState *bs = blk_bs(bjob->blk);
  BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
  BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+    BlockDriverState *base_unfiltered = NULL;
  Error *local_err = NULL;
  int ret = 0;
@@ -75,8 +76,18 @@ static int stream_prepare(Job *job)
  const char *base_id = NULL, *base_fmt = NULL;
  if (base) {
  base_id = s->backing_file_str;
-    if (base->drv) {
-    base_fmt = base->drv->format_name;
+    if (base_id) {
+    if (base->drv) {
+    base_fmt = base->drv->format_name;


hmm. this doesn't make real sense: so, we assume that user specified 
backing_file_str, which may not relate to base, but we use 
base->drv->format_name? But it may be name of the filter driver, which 
would be wrong..


Any ideas?

1. we can use base_fmt=NULL, to provoke probing on next open of the 
qcow2 file..


I would choose this item #1 but have to check the probing code logic... 
Particularly, I do not remember now if the probing is able to recognize 
a protocol.
The logic for the format_name in the QEMU existent code (I has kept it 
here in the patch) is a slippery way for an imprudent user. That's why I 
staked on the backing_file_str deprication in the previous version.



2. we can do probing now
3. we can at least check, if backing_file_str == 


Not bad for the sanity check but we will search a node by the file name 
again - not good ((


base_unfiltered->filename, in this case we can use 
base_unfiltered->drv->format_name




+    }
+    } else {
+    base_unfiltered = bdrv_skip_filters(base);
+    if (base_unfiltered) {
+    base_id = base_unfiltered->filename;
+    if (base_unfiltered->drv) {
+    base_fmt = base_unfiltered->drv->format_name;
+    }
+    }
  }
  }
  bdrv_set_backing_hd(unfiltered_bs, base, _err);
diff --git a/blockdev.c b/blockdev.c
index c917625..0e9c783 100644
--- a/blockdev.c
+++ b/blockdev.c


[...]


-    stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+    stream_start(has_job_id ? job_id : NULL, bs, base_bs,
+ has_backing_file ? backing_file : NULL,


backing_file should be NULL if has_backing_file is false, so you can use 
just backing_file instead of ternary operator.




Yes, if reliable. I has kept the conformation with the ternary operator 
at the first parameter above.


Andrey


   job_flags, has_speed ? speed : 0, on_error,
   filter_node_name, _err);
  if (local_err) {








Re: [PATCH v12 06/14] copy-on-read: pass bottom node name to COR driver

2020-10-23 Thread Andrey Shinkevich



On 23.10.2020 17:45, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

We are going to use the COR-filter for a block-stream job.
To limit COR operations by the base node in the backing chain during
stream job, pass the bottom node name, that is the first non-filter
overlay of the base, to the copy-on-read driver as the base node itself
may change due to possible concurrent jobs.
The rest of the functionality will be implemented in the patch that
follows.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 16 
  1 file changed, 16 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 618c4c4..3d8e4db 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,18 +24,24 @@
  #include "block/block_int.h"
  #include "qemu/module.h"
  #include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
  #include "block/copy-on-read.h"
  typedef struct BDRVStateCOR {
  bool active;
+    BlockDriverState *bottom_bs;
  } BDRVStateCOR;
  static int cor_open(BlockDriverState *bs, QDict *options, int flags,
  Error **errp)
  {
+    BlockDriverState *bottom_bs = NULL;
  BDRVStateCOR *state = bs->opaque;
+    /* Find a bottom node name, if any */
+    const char *bottom_node = qdict_get_try_str(options, "bottom");
  bs->file = bdrv_open_child(NULL, options, "file", bs, 
_of_bds,
 BDRV_CHILD_FILTERED | 
BDRV_CHILD_PRIMARY,
@@ -51,7 +57,17 @@ static int cor_open(BlockDriverState *bs, QDict 
*options, int flags,

  ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
  bs->file->bs->supported_zero_flags);
+    if (bottom_node) {
+    bottom_bs = bdrv_lookup_bs(NULL, bottom_node, errp);
+    if (!bottom_bs) {
+    error_setg(errp, QERR_BASE_NOT_FOUND, bottom_node);


QERR_BASE_NOT_FOUND is unrelated here. Also, I see a comment in qerror.h 
that such macros should not be used in new code. And don't forget to 
drop qerror.h include line.




I have been surprized because I don't have it in my branch and instead I do:
error_setg(errp, "Bottom node '%s' not found", bottom_node);


+    qdict_del(options, "bottom");


this may be moved above "bottom_bs = ..", to not call it after "if" in 
separate.




Please, see the "Re: [PATCH v11 04/13] copy-on-read: pass overlay base 
node name to COR driver".



+    return -EINVAL;
+    }
+    qdict_del(options, "bottom");
+    }
  state->active = true;
+    state->bottom_bs = bottom_bs;
  /*
   * We don't need to call bdrv_child_refresh_perms() now as the 
permissions









[PATCH v12 12/14] copy-on-read: skip non-guest reads if no copy needed

2020-10-22 Thread Andrey Shinkevich via
If the flag BDRV_REQ_PREFETCH was set, skip idling read/write
operations in COR-driver. It can be taken into account for the
COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index a2b180a..081e661 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -153,10 +153,14 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
 }
 }
 
-ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
-  local_flags);
-if (ret < 0) {
-return ret;
+/* Skip if neither read nor write are needed */
+if ((local_flags & (BDRV_REQ_PREFETCH | BDRV_REQ_COPY_ON_READ)) !=
+BDRV_REQ_PREFETCH) {
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
 }
 
 offset += n;
-- 
1.8.3.1




[PATCH v12 06/14] copy-on-read: pass bottom node name to COR driver

2020-10-22 Thread Andrey Shinkevich via
We are going to use the COR-filter for a block-stream job.
To limit COR operations by the base node in the backing chain during
stream job, pass the bottom node name, that is the first non-filter
overlay of the base, to the copy-on-read driver as the base node itself
may change due to possible concurrent jobs.
The rest of the functionality will be implemented in the patch that
follows.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 618c4c4..3d8e4db 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,18 +24,24 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
 #include "block/copy-on-read.h"
 
 
 typedef struct BDRVStateCOR {
 bool active;
+BlockDriverState *bottom_bs;
 } BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BlockDriverState *bottom_bs = NULL;
 BDRVStateCOR *state = bs->opaque;
+/* Find a bottom node name, if any */
+const char *bottom_node = qdict_get_try_str(options, "bottom");
 
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -51,7 +57,17 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+if (bottom_node) {
+bottom_bs = bdrv_lookup_bs(NULL, bottom_node, errp);
+if (!bottom_bs) {
+error_setg(errp, QERR_BASE_NOT_FOUND, bottom_node);
+qdict_del(options, "bottom");
+return -EINVAL;
+}
+qdict_del(options, "bottom");
+}
 state->active = true;
+state->bottom_bs = bottom_bs;
 
 /*
  * We don't need to call bdrv_child_refresh_perms() now as the permissions
-- 
1.8.3.1




[PATCH v12 09/14] block: modify the comment for BDRV_REQ_PREFETCH flag

2020-10-22 Thread Andrey Shinkevich via
Modify the comment for the flag BDRV_REQ_PREFETCH as we are going to
use it alone and pass it to the COR-filter driver for further
processing.

Signed-off-by: Andrey Shinkevich 
---
 include/block/block.h | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index ae7612f..1b6742f 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -81,9 +81,11 @@ typedef enum {
 BDRV_REQ_NO_FALLBACK= 0x100,
 
 /*
- * BDRV_REQ_PREFETCH may be used only together with BDRV_REQ_COPY_ON_READ
- * on read request and means that caller doesn't really need data to be
- * written to qiov parameter which may be NULL.
+ * BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
+ * (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
+ * filter is involved), in which case it signals that the COR operation
+ * need not read the data into memory (qiov) but only ensure they are
+ * copied to the top layer (i.e., that COR operation is done).
  */
 BDRV_REQ_PREFETCH  = 0x200,
 /* Mask of valid flags */
-- 
1.8.3.1




[PATCH v12 13/14] stream: skip filters when writing backing file name to QCOW2 header

2020-10-22 Thread Andrey Shinkevich via
Avoid writing a filter JSON file name and a filter format name to QCOW2
image when the backing file is changed after the block stream job.
A user is still able to assign the 'backing-file' parameter for a
block-stream job keeping in mind the possible issue mentioned above.
If the user does not specify the 'backing-file' parameter, QEMU will
assign it automatically.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 15 +--
 blockdev.c |  9 ++---
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index e0540ee..1ba74ab 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -65,6 +65,7 @@ static int stream_prepare(Job *job)
 BlockDriverState *bs = blk_bs(bjob->blk);
 BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+BlockDriverState *base_unfiltered = NULL;
 Error *local_err = NULL;
 int ret = 0;
 
@@ -75,8 +76,18 @@ static int stream_prepare(Job *job)
 const char *base_id = NULL, *base_fmt = NULL;
 if (base) {
 base_id = s->backing_file_str;
-if (base->drv) {
-base_fmt = base->drv->format_name;
+if (base_id) {
+if (base->drv) {
+base_fmt = base->drv->format_name;
+}
+} else {
+base_unfiltered = bdrv_skip_filters(base);
+if (base_unfiltered) {
+base_id = base_unfiltered->filename;
+if (base_unfiltered->drv) {
+base_fmt = base_unfiltered->drv->format_name;
+}
+}
 }
 }
 bdrv_set_backing_hd(unfiltered_bs, base, _err);
diff --git a/blockdev.c b/blockdev.c
index c917625..0e9c783 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2508,7 +2508,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 BlockDriverState *base_bs = NULL;
 AioContext *aio_context;
 Error *local_err = NULL;
-const char *base_name = NULL;
 int job_flags = JOB_DEFAULT;
 
 if (!has_on_error) {
@@ -2536,7 +2535,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
-base_name = base;
 }
 
 if (has_base_node) {
@@ -2551,7 +2549,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
 bdrv_refresh_filename(base_bs);
-base_name = base_bs->filename;
 }
 
 /* Check for op blockers in the whole chain between bs and base */
@@ -2571,9 +2568,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 
-/* backing_file string overrides base bs filename */
-base_name = has_backing_file ? backing_file : base_name;
-
 if (has_auto_finalize && !auto_finalize) {
 job_flags |= JOB_MANUAL_FINALIZE;
 }
@@ -2581,7 +2575,8 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 job_flags |= JOB_MANUAL_DISMISS;
 }
 
-stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+stream_start(has_job_id ? job_id : NULL, bs, base_bs,
+ has_backing_file ? backing_file : NULL,
  job_flags, has_speed ? speed : 0, on_error,
  filter_node_name, _err);
 if (local_err) {
-- 
1.8.3.1




[PATCH v12 00/14] Apply COR-filter to the block-stream permanently

2020-10-22 Thread Andrey Shinkevich via
The node insert/remove functions were added at the block generic layer.
COR-filter options structure was added to the QAPI.
The test case #310 was added to check the 'bottom' node limit for COR.
The 'supported_read_flags' member was added to the BDS structure
(with the flags check at the block generic layer for drivers).

v12:
  02: New.
  03: Only the temporary drop filter function left.
  05: New (suggested by Max)
  06: 'base' -> 'bottom' option.
  07: Fixes based on the review of the v11.
  08: New.
  09: The comment ext was modified.
  10: The read flags check at the block generic layer.
  11: COR flag was added.
  12: The condition was fixed.
  13: The 'backing-file' parameter returned. No deprecation.
  14: The COR-filter 'add' function replaced with the 'insert node' generic
  function. Fixes based on the review of the v11.

Andrey Shinkevich (14):
  copy-on-read: support preadv/pwritev_part functions
  block: add insert/remove node functions
  copy-on-read: add filter drop function
  qapi: add filter-node-name to block-stream
  qapi: create BlockdevOptionsCor structure for COR driver
  copy-on-read: pass bottom node name to COR driver
  copy-on-read: limit COR operations to bottom node
  iotests: add #310 to test bottom node in COR driver
  block: modify the comment for BDRV_REQ_PREFETCH flag
  block: include supported_read_flags into BDS structure
  copy-on-read: add support for read flags to COR-filter
  copy-on-read: skip non-guest reads if no copy needed
  stream: skip filters when writing backing file name to QCOW2 header
  block: apply COR-filter to block-stream jobs

 block.c|  49 ++
 block/copy-on-read.c   | 144 +
 block/copy-on-read.h   |  32 +
 block/io.c |  12 +++-
 block/monitor/block-hmp-cmds.c |   4 +-
 block/stream.c | 117 ++---
 blockdev.c |  13 ++--
 include/block/block.h  |  11 +++-
 include/block/block_int.h  |  11 +++-
 qapi/block-core.json   |  27 +++-
 tests/qemu-iotests/030 |  51 ++-
 tests/qemu-iotests/030.out |   4 +-
 tests/qemu-iotests/141.out |   2 +-
 tests/qemu-iotests/245 |  22 +--
 tests/qemu-iotests/310 | 109 +++
 tests/qemu-iotests/310.out |  15 +
 tests/qemu-iotests/group   |   3 +-
 17 files changed, 503 insertions(+), 123 deletions(-)
 create mode 100644 block/copy-on-read.h
 create mode 100755 tests/qemu-iotests/310
 create mode 100644 tests/qemu-iotests/310.out

-- 
1.8.3.1




[PATCH v12 11/14] copy-on-read: add support for read flags to COR-filter

2020-10-22 Thread Andrey Shinkevich via
Add the BDRV_REQ_COPY_ON_READ and BDRV_REQ_PREFETCH flags to the
supported_read_flags of the COR-filter.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 8178a91..a2b180a 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -50,6 +50,8 @@ static int cor_open(BlockDriverState *bs, QDict *options, int 
flags,
 return -EINVAL;
 }
 
+bs->supported_read_flags = BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH;
+
 bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
 (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
 
-- 
1.8.3.1




[PATCH v12 14/14] block: apply COR-filter to block-stream jobs

2020-10-22 Thread Andrey Shinkevich via
This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 98 ++
 tests/qemu-iotests/030 | 51 +++-
 tests/qemu-iotests/030.out |  4 +-
 tests/qemu-iotests/141.out |  2 +-
 tests/qemu-iotests/245 | 22 +++
 5 files changed, 87 insertions(+), 90 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 1ba74ab..f6ed315 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -17,8 +17,10 @@
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "block/copy-on-read.h"
 
 enum {
 /*
@@ -33,6 +35,8 @@ typedef struct StreamBlockJob {
 BlockJob common;
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
+BlockDriverState *cor_filter_bs;
+BlockDriverState *target_bs;
 BlockdevOnError on_error;
 char *backing_file_str;
 bool bs_read_only;
@@ -44,8 +48,7 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
 {
 assert(bytes < SIZE_MAX);
 
-return blk_co_preadv(blk, offset, bytes, NULL,
- BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
+return blk_co_preadv(blk, offset, bytes, NULL, BDRV_REQ_PREFETCH);
 }
 
 static void stream_abort(Job *job)
@@ -53,23 +56,20 @@ static void stream_abort(Job *job)
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 
 if (s->chain_frozen) {
-BlockJob *bjob = >common;
-bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 }
 }
 
 static int stream_prepare(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
 BlockDriverState *base_unfiltered = NULL;
 Error *local_err = NULL;
 int ret = 0;
 
-bdrv_unfreeze_backing_chain(bs, s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 s->chain_frozen = false;
 
 if (bdrv_cow_child(unfiltered_bs)) {
@@ -105,15 +105,16 @@ static void stream_clean(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
 
 /* Reopen the image back in read-only mode if necessary */
 if (s->bs_read_only) {
 /* Give up write permissions before making it read-only */
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
-bdrv_reopen_set_read_only(bs, true, NULL);
+bdrv_reopen_set_read_only(s->target_bs, true, NULL);
 }
 
+bdrv_cor_filter_drop(s->cor_filter_bs);
+
 g_free(s->backing_file_str);
 }
 
@@ -121,9 +122,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockBackend *blk = s->common.blk;
-BlockDriverState *bs = blk_bs(blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-bool enable_cor = !bdrv_cow_child(s->base_overlay);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 int64_t len;
 int64_t offset = 0;
 uint64_t delay_ns = 0;
@@ -135,21 +134,12 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 return 0;
 }
 
-len = bdrv_getlength(bs);
+len = bdrv_getlength(s->target_bs);
 if (len < 0) {
 return len;
 }
 job_progress_set_remaining(>common.

[PATCH v12 01/14] copy-on-read: support preadv/pwritev_part functions

2020-10-22 Thread Andrey Shinkevich via
Add support for the recently introduced functions
bdrv_co_preadv_part()
and
bdrv_co_pwritev_part()
to the COR-filter driver.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 2816e61..cb03e0f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -74,21 +74,25 @@ static int64_t cor_getlength(BlockDriverState *bs)
 }
 
 
-static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
-  uint64_t offset, uint64_t bytes,
-  QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
+   uint64_t offset, uint64_t bytes,
+   QEMUIOVector *qiov,
+   size_t qiov_offset,
+   int flags)
 {
-return bdrv_co_preadv(bs->file, offset, bytes, qiov,
-  flags | BDRV_REQ_COPY_ON_READ);
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
 }
 
 
-static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
-   uint64_t offset, uint64_t bytes,
-   QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
+uint64_t offset,
+uint64_t bytes,
+QEMUIOVector *qiov,
+size_t qiov_offset, int flags)
 {
-
-return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
+flags);
 }
 
 
@@ -137,8 +141,8 @@ static BlockDriver bdrv_copy_on_read = {
 
 .bdrv_getlength = cor_getlength,
 
-.bdrv_co_preadv = cor_co_preadv,
-.bdrv_co_pwritev= cor_co_pwritev,
+.bdrv_co_preadv_part= cor_co_preadv_part,
+.bdrv_co_pwritev_part   = cor_co_pwritev_part,
 .bdrv_co_pwrite_zeroes  = cor_co_pwrite_zeroes,
 .bdrv_co_pdiscard   = cor_co_pdiscard,
 .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed,
-- 
1.8.3.1




[PATCH v12 05/14] qapi: create BlockdevOptionsCor structure for COR driver

2020-10-22 Thread Andrey Shinkevich via
Create the BlockdevOptionsCor structure for COR driver specific options
splitting it off form the BlockdevOptionsGenericFormat. The only option
'bottom' node in the structure denotes an image file that limits the
COR operations in the backing chain.

Suggested-by: Max Reitz 
Signed-off-by: Andrey Shinkevich 
---
 qapi/block-core.json | 21 -
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 0a64306..bf465f6 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3938,6 +3938,25 @@
   'data': { 'throttle-group': 'str',
 'file' : 'BlockdevRef'
  } }
+
+##
+# @BlockdevOptionsCor:
+#
+# Driver specific block device options for the copy-on-read driver.
+#
+# @bottom: the name of a non-filter node (allocation-bearing layer) that limits
+#  the COR operations in the backing chain (inclusive).
+#  For the block-stream job, it will be the first non-filter overlay of
+#  the base node. We do not involve the base node into the COR
+#  operations because the base may change due to a concurrent
+#  block-commit job on the same backing chain.
+#
+# Since: 5.2
+##
+{ 'struct': 'BlockdevOptionsCor',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { '*bottom': 'str' } }
+
 ##
 # @BlockdevOptions:
 #
@@ -3990,7 +4009,7 @@
   'bochs':  'BlockdevOptionsGenericFormat',
   'cloop':  'BlockdevOptionsGenericFormat',
   'compress':   'BlockdevOptionsGenericFormat',
-  'copy-on-read':'BlockdevOptionsGenericFormat',
+  'copy-on-read':'BlockdevOptionsCor',
   'dmg':'BlockdevOptionsGenericFormat',
   'file':   'BlockdevOptionsFile',
   'ftp':'BlockdevOptionsCurlFtp',
-- 
1.8.3.1




[PATCH v12 10/14] block: include supported_read_flags into BDS structure

2020-10-22 Thread Andrey Shinkevich via
Add the new member supported_read_flags to the BlockDriverState
structure. It will control the flags set for copy-on-read operations.
Make the block generic layer evaluate supported read flags before they
go to a block driver.

Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
---
 block/io.c| 12 ++--
 include/block/block_int.h |  4 
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/block/io.c b/block/io.c
index 54f0968..78ddf13 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1392,6 +1392,9 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 if (flags & BDRV_REQ_COPY_ON_READ) {
 int64_t pnum;
 
+/* The flag BDRV_REQ_COPY_ON_READ has reached its addressee */
+flags &= ~BDRV_REQ_COPY_ON_READ;
+
 ret = bdrv_is_allocated(bs, offset, bytes, );
 if (ret < 0) {
 goto out;
@@ -1413,9 +1416,13 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 goto out;
 }
 
+if (flags & ~bs->supported_read_flags) {
+abort();
+}
+
 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
 if (bytes <= max_bytes && bytes <= max_transfer) {
-ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
+ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, flags);
 goto out;
 }
 
@@ -1428,7 +1435,8 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 
 ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
  num, qiov,
- qiov_offset + bytes - bytes_remaining, 0);
+ qiov_offset + bytes - bytes_remaining,
+ flags);
 max_bytes -= num;
 } else {
 num = bytes_remaining;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index f782737..474174c 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -873,6 +873,10 @@ struct BlockDriverState {
 /* I/O Limits */
 BlockLimits bl;
 
+/*
+ * Flags honored during pread
+ */
+unsigned int supported_read_flags;
 /* Flags honored during pwrite (so far: BDRV_REQ_FUA,
  * BDRV_REQ_WRITE_UNCHANGED).
  * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
-- 
1.8.3.1




[PATCH v12 02/14] block: add insert/remove node functions

2020-10-22 Thread Andrey Shinkevich via
Provide API for a node insertion to and removal from a backing chain.

Suggested-by: Max Reitz 
Signed-off-by: Andrey Shinkevich 
---
 block.c   | 49 +
 include/block/block.h |  3 +++
 2 files changed, 52 insertions(+)

diff --git a/block.c b/block.c
index 430edf7..502b483 100644
--- a/block.c
+++ b/block.c
@@ -4670,6 +4670,55 @@ static void bdrv_delete(BlockDriverState *bs)
 g_free(bs);
 }
 
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+   int flags, Error **errp)
+{
+BlockDriverState *new_node_bs;
+Error *local_err = NULL;
+
+new_node_bs =  bdrv_open(NULL, NULL, node_options, flags, errp);
+if (new_node_bs == NULL) {
+error_prepend(errp, "Could not create node: ");
+return NULL;
+}
+
+bdrv_drained_begin(bs);
+bdrv_replace_node(bs, new_node_bs, _err);
+bdrv_drained_end(bs);
+
+if (local_err) {
+bdrv_unref(new_node_bs);
+error_propagate(errp, local_err);
+return NULL;
+}
+
+return new_node_bs;
+}
+
+void bdrv_remove_node(BlockDriverState *bs)
+{
+BdrvChild *child;
+BlockDriverState *inferior_bs;
+
+child = bdrv_filter_or_cow_child(bs);
+if (!child) {
+return;
+}
+inferior_bs = child->bs;
+
+/* Retain the BDS until we complete the graph change. */
+bdrv_ref(inferior_bs);
+/* Hold a guest back from writing while permissions are being reset. */
+bdrv_drained_begin(inferior_bs);
+/* Refresh permissions before the graph change. */
+bdrv_child_refresh_perms(bs, child, _abort);
+bdrv_replace_node(bs, inferior_bs, _abort);
+
+bdrv_drained_end(inferior_bs);
+bdrv_unref(inferior_bs);
+bdrv_unref(bs);
+}
+
 /*
  * Run consistency checks on an image
  *
diff --git a/include/block/block.h b/include/block/block.h
index d16c401..ae7612f 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -350,6 +350,9 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState 
*bs_top,
  Error **errp);
 void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
Error **errp);
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+   int flags, Error **errp);
+void bdrv_remove_node(BlockDriverState *bs);
 
 int bdrv_parse_aio(const char *mode, int *flags);
 int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
-- 
1.8.3.1




[PATCH v12 08/14] iotests: add #310 to test bottom node in COR driver

2020-10-22 Thread Andrey Shinkevich via
The test case #310 is similar to #216 by Max Reitz. The difference is
that the test #310 involves a bottom node to the COR filter driver.

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/310 | 109 +
 tests/qemu-iotests/310.out |  15 +++
 tests/qemu-iotests/group   |   3 +-
 3 files changed, 126 insertions(+), 1 deletion(-)
 create mode 100755 tests/qemu-iotests/310
 create mode 100644 tests/qemu-iotests/310.out

diff --git a/tests/qemu-iotests/310 b/tests/qemu-iotests/310
new file mode 100755
index 000..5ad7ad2
--- /dev/null
+++ b/tests/qemu-iotests/310
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+#
+# Copy-on-read tests using a COR filter with a bottom node
+#
+# Copyright (c) 2020 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import iotests
+from iotests import log, qemu_img, qemu_io_silent
+
+# Need backing file support
+iotests.script_initialize(supported_fmts=['qcow2', 'qcow', 'qed', 'vmdk'],
+  supported_platforms=['linux'])
+
+log('')
+log('=== Copy-on-read across nodes ===')
+log('')
+
+# This test is similar to the 216 one by Max Reitz 
+# The difference is that this test case involves a bottom node to the
+# COR filter driver.
+
+with iotests.FilePath('base.img') as base_img_path, \
+ iotests.FilePath('mid.img') as mid_img_path, \
+ iotests.FilePath('top.img') as top_img_path, \
+ iotests.VM() as vm:
+
+log('--- Setting up images ---')
+log('')
+
+assert qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M') == 0
+assert qemu_io_silent(base_img_path, '-c', 'write -P 1 0M 1M') == 0
+assert qemu_io_silent(base_img_path, '-c', 'write -P 1 3M 1M') == 0
+assert qemu_img('create', '-f', iotests.imgfmt, '-b', base_img_path,
+'-F', iotests.imgfmt, mid_img_path) == 0
+assert qemu_io_silent(mid_img_path,  '-c', 'write -P 3 2M 1M') == 0
+assert qemu_io_silent(mid_img_path,  '-c', 'write -P 3 4M 1M') == 0
+assert qemu_img('create', '-f', iotests.imgfmt, '-b', mid_img_path,
+'-F', iotests.imgfmt, top_img_path) == 0
+assert qemu_io_silent(top_img_path,  '-c', 'write -P 2 1M 1M') == 0
+
+log('Done')
+
+log('')
+log('--- Doing COR ---')
+log('')
+
+vm.launch()
+
+log(vm.qmp('blockdev-add',
+node_name='node0',
+driver='copy-on-read',
+bottom='node2',
+file={
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': top_img_path
+},
+'backing': {
+'node-name': 'node2',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': mid_img_path
+},
+'backing': {
+#'node-name': 'node2',
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': base_img_path
+}
+},
+}
+}))
+
+# Trigger COR
+log(vm.qmp('human-monitor-command',
+   command_line='qemu-io node0 "read 0 5M"'))
+
+vm.shutdown()
+
+log('')
+log('--- Checking COR result ---')
+log('')
+
+assert qemu_io_silent(base_img_path, '-c', 'discard 0 4M') == 0
+assert qemu_io_silent(mid_img_path, '-c', 'discard 0M 5M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 1 0M 1M') != 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 2 1M 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 3 2M 1M') == 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 1 3M 1M') != 0
+assert qemu_io_silent(top_img_path,  '-c', 'read -P 3 4M 1M') == 0
+
+log('Done')
diff --git a/tests/qemu-iotests/310.out b/tests/qemu-iotests/310.out
new 

[PATCH v12 07/14] copy-on-read: limit COR operations to bottom node

2020-10-22 Thread Andrey Shinkevich via
Limit COR operations to the bottom node (inclusively) in the backing
chain when the bottom node name is given. It will be useful for a block
stream job when the COR-filter is applied. The bottom node is passed as
the base itself may change due to concurrent commit jobs on the same
backing chain.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 42 --
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 3d8e4db..8178a91 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -123,8 +123,46 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
size_t qiov_offset,
int flags)
 {
-return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
-   flags | BDRV_REQ_COPY_ON_READ);
+int64_t n = 0;
+int local_flags;
+int ret;
+BDRVStateCOR *state = bs->opaque;
+
+if (!state->bottom_bs) {
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
+}
+
+while (bytes) {
+local_flags = flags;
+
+/* In case of failure, try to copy-on-read anyway */
+ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+if (!ret || ret < 0) {
+ret = 
bdrv_is_allocated_above(bdrv_backing_chain_next(bs->file->bs),
+  state->bottom_bs, true, offset,
+  n, );
+if (ret == 1 || ret < 0) {
+local_flags |= BDRV_REQ_COPY_ON_READ;
+}
+/* Finish earlier if the end of a backing file has been reached */
+if (ret == 0 && n == 0) {
+break;
+}
+}
+
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
+
+offset += n;
+qiov_offset += n;
+bytes -= n;
+}
+
+return 0;
 }
 
 
-- 
1.8.3.1




[PATCH v12 03/14] copy-on-read: add filter drop function

2020-10-22 Thread Andrey Shinkevich via
Provide API for the COR-filter removal. Also, drop the filter child
permissions for an inactive state when the filter node is being
removed. This function may be considered as an intermediate solution
before we are able to use bdrv_remove_node(). It will be possible once
the QEMU permission update system has overhauled.
To insert the filter, the block generic layer function
bdrv_insert_node() can be used.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 56 
 block/copy-on-read.h | 32 ++
 2 files changed, 88 insertions(+)
 create mode 100644 block/copy-on-read.h

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..618c4c4 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,20 @@
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi/error.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+bool active;
+} BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BDRVStateCOR *state = bs->opaque;
+
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
false, errp);
@@ -42,6 +51,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+state->active = true;
+
+/*
+ * We don't need to call bdrv_child_refresh_perms() now as the permissions
+ * will be updated later when the filter node gets its parent.
+ */
+
 return 0;
 }
 
@@ -57,6 +73,17 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild 
*c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
 {
+BDRVStateCOR *s = bs->opaque;
+
+if (!s->active) {
+/*
+ * While the filter is being removed
+ */
+*nperm = 0;
+*nshared = BLK_PERM_ALL;
+return;
+}
+
 *nperm = perm & PERM_PASSTHROUGH;
 *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
 
@@ -135,6 +162,7 @@ static void cor_lock_medium(BlockDriverState *bs, bool 
locked)
 
 static BlockDriver bdrv_copy_on_read = {
 .format_name= "copy-on-read",
+.instance_size  = sizeof(BDRVStateCOR),
 
 .bdrv_open  = cor_open,
 .bdrv_child_perm= cor_child_perm,
@@ -154,6 +182,34 @@ static BlockDriver bdrv_copy_on_read = {
 .is_filter  = true,
 };
 
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
+{
+BdrvChild *child;
+BlockDriverState *bs;
+BDRVStateCOR *s = cor_filter_bs->opaque;
+
+child = bdrv_filter_child(cor_filter_bs);
+if (!child) {
+return;
+}
+bs = child->bs;
+
+/* Retain the BDS until we complete the graph change. */
+bdrv_ref(bs);
+/* Hold a guest back from writing while permissions are being reset. */
+bdrv_drained_begin(bs);
+/* Drop permissions before the graph change. */
+s->active = false;
+bdrv_child_refresh_perms(cor_filter_bs, child, _abort);
+bdrv_replace_node(cor_filter_bs, bs, _abort);
+
+bdrv_drained_end(bs);
+bdrv_unref(bs);
+bdrv_unref(cor_filter_bs);
+}
+
+
 static void bdrv_copy_on_read_init(void)
 {
 bdrv_register(_copy_on_read);
diff --git a/block/copy-on-read.h b/block/copy-on-read.h
new file mode 100644
index 000..7bf405d
--- /dev/null
+++ b/block/copy-on-read.h
@@ -0,0 +1,32 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * The filter driver performs Copy-On-Read (COR) operations
+ *
+ * Copyright (c) 2018-2020 Virtuozzo International GmbH.
+ *
+ * Author:
+ *   Andrey Shinkevich 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef BLOCK_COPY_ON_READ
+#define BLOCK_COPY_ON_READ
+
+#include "block/block_int.h"
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs);
+
+#endif /* BLOCK_COPY_ON_READ */
-- 
1.8.3.1




[PATCH v12 04/14] qapi: add filter-node-name to block-stream

2020-10-22 Thread Andrey Shinkevich via
Provide the possibility to pass the 'filter-node-name' parameter to the
block-stream job as it is done for the commit block job.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/monitor/block-hmp-cmds.c | 4 ++--
 block/stream.c | 4 +++-
 blockdev.c | 4 +++-
 include/block/block_int.h  | 7 ++-
 qapi/block-core.json   | 6 ++
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index d15a2be..e8a58f3 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -508,8 +508,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 
 qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
  false, NULL, qdict_haskey(qdict, "speed"), speed, true,
- BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
- );
+ BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, false, 
false,
+ false, );
 
 hmp_handle_error(mon, error);
 }
diff --git a/block/stream.c b/block/stream.c
index 8ce6729..e0540ee 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -221,7 +221,9 @@ static const BlockJobDriver stream_job_driver = {
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp)
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp)
 {
 StreamBlockJob *s;
 BlockDriverState *iter;
diff --git a/blockdev.c b/blockdev.c
index fe6fb5d..c917625 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2499,6 +2499,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
   bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
+  bool has_filter_node_name, const char *filter_node_name,
   bool has_auto_finalize, bool auto_finalize,
   bool has_auto_dismiss, bool auto_dismiss,
   Error **errp)
@@ -2581,7 +2582,8 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 
 stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
- job_flags, has_speed ? speed : 0, on_error, _err);
+ job_flags, has_speed ? speed : 0, on_error,
+ filter_node_name, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto out;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 38cad9d..f782737 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1134,6 +1134,9 @@ int is_windows_drive(const char *filename);
  *  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means
+ * that a node name should be autogenerated.
  * @errp: Error object.
  *
  * Start a streaming operation on @bs.  Clusters that are unallocated
@@ -1146,7 +1149,9 @@ int is_windows_drive(const char *filename);
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp);
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp);
 
 /**
  * commit_start:
diff --git a/qapi/block-core.json b/qapi/block-core.json
index ee5ebef..0a64306 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2542,6 +2542,11 @@
 #'stop' and 'enospc' can only be used if the block device
 #supports io-status (see BlockInfo).  Since 1.3.
 #
+# @filter-node-name: the node name that should be assigned to the
+#filter driver that the stream job inserts into the graph
+#above @device. If this option is not given, a node name is
+#autogenerated. (Since: 5.2)
+#
 # @auto-finalize: When false, this job will wait in a PENDING state after it 
has
 # finished its work, waiting for @block-job-finalize before
 # making any block graph changes.
@@ -2572,6 +2577,7 @@
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
 '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
 '*on-error': 'Block

Re: [PATCH v11 09/13] copy-on-read: skip non-guest reads if no copy needed

2020-10-22 Thread Andrey Shinkevich



On 21.10.2020 23:43, Andrey Shinkevich wrote:

On 14.10.2020 18:22, Vladimir Sementsov-Ogievskiy wrote:

14.10.2020 15:51, Max Reitz wrote:

On 12.10.20 19:43, Andrey Shinkevich wrote:

If the flag BDRV_REQ_PREFETCH was set, pass it further to the
COR-driver to skip unneeded reading. It can be taken into account for
the COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Signed-off-by: Andrey Shinkevich 
---


[...]


diff --git a/block/io.c b/block/io.c
index 11df188..bff1808 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1512,7 +1512,8 @@ static int coroutine_fn 
bdrv_aligned_preadv(BdrvChild *child,

  max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
  if (bytes <= max_bytes && bytes <= max_transfer) {
-    ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 
qiov_offset, 0);

+    ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset,
+ flags & bs->supported_read_flags);



When BDRV_REQ_PREFETCH is passed, qiov may be (and generally should 
be) NULL. This means, that we can't just drop the flag when call the 
driver that doesn't support it.


Actually, if driver doesn't support the PREFETCH flag we should do 
nothing.





Ah, OK.  I see.  I expected this to be a separate patch.  I still wonder
why it isn’t.




Could it be part of patch 07? I mean introduce new field 
supported_read_flags and handle it in generic code in one patch, prior 
to implementing support for it in COR driver.





We have to add the supported flags for the COR driver in the same patch. 
Or before handling the supported_read_flags at the generic layer 
(handling zero does not make a sence). Otherwise, the test #216 (where 
the COR-filter is applied) will not pass.


Andrey


I have found a workaround and am going to send all the related patches 
as a separate series.


Andrey



Re: [PATCH v11 09/13] copy-on-read: skip non-guest reads if no copy needed

2020-10-21 Thread Andrey Shinkevich

On 14.10.2020 18:22, Vladimir Sementsov-Ogievskiy wrote:

14.10.2020 15:51, Max Reitz wrote:

On 12.10.20 19:43, Andrey Shinkevich wrote:

If the flag BDRV_REQ_PREFETCH was set, pass it further to the
COR-driver to skip unneeded reading. It can be taken into account for
the COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 13 +
  block/io.c   |  3 ++-
  2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index b136895..278a11a 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -148,10 +148,15 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,

  }
  }
-    ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, 
qiov_offset,

-  local_flags);
-    if (ret < 0) {
-    return ret;
+    if (!!(flags & BDRV_REQ_PREFETCH) &


How about dropping the double negation and using a logical && instead of
the binary &?


+    !(local_flags & BDRV_REQ_COPY_ON_READ)) {
+    /* Skip non-guest reads if no copy needed */
+    } else {


Hm.  I would have just written the negated form

(!(flags & BDRV_REQ_PREFETCH) || (local_flags & BDRV_REQ_COPY_ON_READ))

and put the “skip” comment above that condition.

(Since local_flags is initialized to flags, it can be written as a
single comparison, but that’s a matter of taste and I’m not going to
recommend either over the other:

((local_flags & (BDRV_REQ_PREFETCH | BDRV_REQ_COPY_ON_READ)) !=
BDRV_REQ_PREFETCH)

)

+    ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, 
qiov_offset,

+  local_flags);
+    if (ret < 0) {
+    return ret;
+    }
  }
  offset += n;
diff --git a/block/io.c b/block/io.c
index 11df188..bff1808 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1512,7 +1512,8 @@ static int coroutine_fn 
bdrv_aligned_preadv(BdrvChild *child,

  max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
  if (bytes <= max_bytes && bytes <= max_transfer) {
-    ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 
qiov_offset, 0);

+    ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset,
+ flags & bs->supported_read_flags);



When BDRV_REQ_PREFETCH is passed, qiov may be (and generally should be) 
NULL. This means, that we can't just drop the flag when call the driver 
that doesn't support it.


Actually, if driver doesn't support the PREFETCH flag we should do nothing.




Ah, OK.  I see.  I expected this to be a separate patch.  I still wonder
why it isn’t.




Could it be part of patch 07? I mean introduce new field 
supported_read_flags and handle it in generic code in one patch, prior 
to implementing support for it in COR driver.





We have to add the supported flags for the COR driver in the same patch. 
Or before handling the supported_read_flags at the generic layer 
(handling zero does not make a sence). Otherwise, the test #216 (where 
the COR-filter is applied) will not pass.


Andrey



Re: [PATCH v11 13/13] block: apply COR-filter to block-stream jobs

2020-10-20 Thread Andrey Shinkevich

On 16.10.2020 18:45, Vladimir Sementsov-Ogievskiy wrote:

15.10.2020 20:16, Andrey Shinkevich wrote:

On 14.10.2020 19:24, Max Reitz wrote:

On 12.10.20 19:43, Andrey Shinkevich wrote:


[...]


---
  block/stream.c | 93 
+-

  tests/qemu-iotests/030 | 51 +++--
  tests/qemu-iotests/030.out |  4 +-
  tests/qemu-iotests/141.out |  2 +-
  tests/qemu-iotests/245 | 19 +++---
  5 files changed, 81 insertions(+), 88 deletions(-)


Looks like stream_run() could be a bit streamlined now (the allocation
checking should be unnecessary, unconditionally calling
stream_populate() should be sufficient), but not necessary now.



That is what I had kept in my mind when I tackled this patch. But 
there is an underwater reef to streamline. Namely, how the 
block-stream job gets known about a long unallocated tail to exit the 
loop earlier in the stream_run(). Shall we return the '-EOF' or 
another error code from the cor_co_preadv_part() to be handled by the 
stream_run()? Any other suggestions, if any, will be appreciated.


Just calling read CHUNK by CHUNK may be less efficient than 
is_allocated()-driven loop: you may end up with splitting regions 
unaligned to CHUNK-granularity, which would not be splitted with 
is_allocated()-driven loop. Current loop allows chunks unaligned to CHUNK.


The cor_co_preadv_part() will check for the end of a file in the next 
version. So, the unalignment is not going to be the issue.


Andrey



So, I think, it's better to keep is_allocated() logic as is for now.







Re: [PATCH v11 13/13] block: apply COR-filter to block-stream jobs

2020-10-16 Thread Andrey Shinkevich

On 15.10.2020 20:16, Andrey Shinkevich wrote:

On 14.10.2020 19:24, Max Reitz wrote:

On 12.10.20 19:43, Andrey Shinkevich wrote:


[...]


---
  block/stream.c | 93 
+-

  tests/qemu-iotests/030 | 51 +++--
  tests/qemu-iotests/030.out |  4 +-
  tests/qemu-iotests/141.out |  2 +-
  tests/qemu-iotests/245 | 19 +++---
  5 files changed, 81 insertions(+), 88 deletions(-)


Looks like stream_run() could be a bit streamlined now (the allocation
checking should be unnecessary, unconditionally calling
stream_populate() should be sufficient), but not necessary now.



That is what I had kept in my mind when I tackled this patch. But there 
is an underwater reef to streamline. Namely, how the block-stream job 
gets known about a long unallocated tail to exit the loop earlier in the 
stream_run(). Shall we return the '-EOF' or another error code from the 
cor_co_preadv_part() to be handled by the stream_run()? Any other 
suggestions, if any, will be appreciated.



diff --git a/block/stream.c b/block/stream.c
index d3e1812..93564db 100644
--- a/block/stream.c
+++ b/block/stream.c


[...]



+
+    cor_filter_bs = bdrv_cor_filter_append(bs, opts, BDRV_O_RDWR, 
errp);

+    if (cor_filter_bs == NULL) {
+    goto fail;
+    }
+
+    if (bdrv_freeze_backing_chain(cor_filter_bs, bs, errp) < 0) {


Is there a reason why we can’t combine this with the
bdrv_free_backing_chain() from bs down to above_base?  I mean, the
effect should be the same, just asking.



The bdrv_freeze_backing_chain(bs, above_base, errp) is called before the 
bdrv_reopen_set_read_only() to keep the backing chain safe during the 
context switch. Then we will want to freeze the 'COR -> TOP BS' link as 
well. Freezing/unfreezing parts is simlier to manage than doing that 
with the whole chain.
If we decide to invoke the bdrv_reopen_set_read_only() after freezing 
the backing chain together with the COR-filter, we will not be able to 
get the 'write' permission on the read-only node.




+    bdrv_cor_filter_drop(cor_filter_bs);
+    cor_filter_bs = NULL;
+    goto fail;
+    }
+
+    s = block_job_create(job_id, _job_driver, NULL, 
cor_filter_bs,

+ BLK_PERM_CONSISTENT_READ,
+ basic_flags | BLK_PERM_WRITE | 
BLK_PERM_GRAPH_MOD,


Not that I’m an expert on the GRAPH_MOD permission, but why is this
shared here but not below?  Shouldn’t it be the same in both cases?
(Same for taking it as a permission.)



When we invoke the block_job_add_bdrv(>common, "active node", bs,..) 
below (particularly, we need it to block the operations on the top node, 
bdrv_op_block_all()), we ask for the GRAPH_MOD permission for the top 
node. To allow that, the parent filter node should share that permission 
for the underlying node. Otherwise, we get assertion failed in the 
bdrv_check_update_perm() called from bdrv_replace_node() when we remove 
the filter.




I will add my comments above to the code.

Andrey


[...]



Re: [PATCH v11 05/13] copy-on-read: limit COR operations to base in COR driver

2020-10-15 Thread Andrey Shinkevich

On 15.10.2020 18:56, Max Reitz wrote:

On 14.10.20 20:57, Andrey Shinkevich wrote:

On 14.10.2020 15:01, Max Reitz wrote:

On 12.10.20 19:43, Andrey Shinkevich wrote:

Limit COR operations by the base node in the backing chain when the
overlay base node name is given. It will be useful for a block stream
job when the COR-filter is applied. The overlay base node is passed as
the base itself may change due to concurrent commit jobs on the same
backing chain.

Signed-off-by: Andrey Shinkevich 
---
   block/copy-on-read.c | 39 +--
   1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index c578b1b..dfbd6ad 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -122,8 +122,43 @@ static int coroutine_fn
cor_co_preadv_part(BlockDriverState *bs,
  size_t qiov_offset,
  int flags)
   {


[...]


+    ret = bdrv_is_allocated_above(bdrv_cow_bs(bs->file->bs),
+  state->base_overlay, true,
offset,
+  n, );
+    if (ret) {
+    local_flags |= BDRV_REQ_COPY_ON_READ;
+    }
+    }


Furthermore, I just noticed – can the is_allocated functions not return
0 in @n, when @offset is a the EOF?  Is that something to look out for?
   (I’m not sure.)

Max



The check for EOF is managed earlier in the stream_run() for a
block-stream job. For other cases of using the COR-filter, the check for
EOF can be added to the cor_co_preadv_part().
I would be more than happy if we can escape the duplicated checking for
is_allocated in the block-stream. But how the stream_run() can stop
calling the blk_co_preadv() when EOF is reached if is_allocated removed
from it?


True.  Is it that bad to lose that optimization, though?  (And I would
expect the case of a short backing file to be rather rare, too.)


May the cor_co_preadv_part() return EOF (or other error code)
to be handled by a caller if (ret == 0 && n == 0 && (flags &
BDRV_REQ_PREFETCH)?


That sounds like a bad hack.  I’d rather keep the double is_allocated().

But what would be the problem with losing the short backing file
optimization?  Just performance?  Or would we end up writing actual
zeroes into the overlay past the end of the backing file?  Hm, probably
not, if the COR filter would detect that case and handle it like stream
does.

So it seems only a question of performance to me, and I don’t think it
would be that bad to in this rather rare case to have a bunch of useless
is_allocated and is_allocated_above calls past the backing file’s EOF.
(Maybe I’m wrong, though.)

Max



Thank you, Max, for sharing your thoughts on this subject.
The double check for the is_allocated in the stream_run() is a 
performance degradation also.
And we will make a check for the EOF in the cor_co_preadv_part() in 
either case, won't we?


Andrey



Re: [PATCH v11 13/13] block: apply COR-filter to block-stream jobs

2020-10-15 Thread Andrey Shinkevich

On 14.10.2020 19:24, Max Reitz wrote:

On 12.10.20 19:43, Andrey Shinkevich wrote:


[...]


---
  block/stream.c | 93 +-
  tests/qemu-iotests/030 | 51 +++--
  tests/qemu-iotests/030.out |  4 +-
  tests/qemu-iotests/141.out |  2 +-
  tests/qemu-iotests/245 | 19 +++---
  5 files changed, 81 insertions(+), 88 deletions(-)


Looks like stream_run() could be a bit streamlined now (the allocation
checking should be unnecessary, unconditionally calling
stream_populate() should be sufficient), but not necessary now.



That is what I had kept in my mind when I tackled this patch. But there 
is an underwater reef to streamline. Namely, how the block-stream job 
gets known about a long unallocated tail to exit the loop earlier in the 
stream_run(). Shall we return the '-EOF' or another error code from the 
cor_co_preadv_part() to be handled by the stream_run()? Any other 
suggestions, if any, will be appreciated.



diff --git a/block/stream.c b/block/stream.c
index d3e1812..93564db 100644
--- a/block/stream.c
+++ b/block/stream.c


[...]



+
+cor_filter_bs = bdrv_cor_filter_append(bs, opts, BDRV_O_RDWR, errp);
+if (cor_filter_bs == NULL) {
+goto fail;
+}
+
+if (bdrv_freeze_backing_chain(cor_filter_bs, bs, errp) < 0) {


Is there a reason why we can’t combine this with the
bdrv_free_backing_chain() from bs down to above_base?  I mean, the
effect should be the same, just asking.



The bdrv_freeze_backing_chain(bs, above_base, errp) is called before the 
bdrv_reopen_set_read_only() to keep the backing chain safe during the 
context switch. Then we will want to freeze the 'COR -> TOP BS' link as 
well. Freezing/unfreezing parts is simlier to manage than doing that 
with the whole chain.
If we decide to invoke the bdrv_reopen_set_read_only() after freezing 
the backing chain together with the COR-filter, we will not be able to 
get the 'write' permission on the read-only node.




+bdrv_cor_filter_drop(cor_filter_bs);
+cor_filter_bs = NULL;
+goto fail;
+}
+
+s = block_job_create(job_id, _job_driver, NULL, cor_filter_bs,
+ BLK_PERM_CONSISTENT_READ,
+ basic_flags | BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD,


Not that I’m an expert on the GRAPH_MOD permission, but why is this
shared here but not below?  Shouldn’t it be the same in both cases?
(Same for taking it as a permission.)



When we invoke the block_job_add_bdrv(>common, "active node", bs,..) 
below (particularly, we need it to block the operations on the top node, 
bdrv_op_block_all()), we ask for the GRAPH_MOD permission for the top 
node. To allow that, the parent filter node should share that permission 
for the underlying node. Otherwise, we get assertion failed in the 
bdrv_check_update_perm() called from bdrv_replace_node() when we remove 
the filter.



   speed, creation_flags, NULL, NULL, errp);
  if (!s) {
  goto fail;
  }
  
+/*

+ * Prevent concurrent jobs trying to modify the graph structure here, we
+ * already have our own plans. Also don't allow resize as the image size is
+ * queried only at the job start and then cached.
+ */
+if (block_job_add_bdrv(>common, "active node", bs,
+   basic_flags | BLK_PERM_GRAPH_MOD,
+   basic_flags | BLK_PERM_WRITE, _abort)) {
+goto fail;
+}
+
  /* Block all intermediate nodes between bs and base, because they will
   * disappear from the chain after this operation. The streaming job reads
   * every block only once, assuming that it doesn't change, so forbid 
writes


[...]


diff --git a/tests/qemu-iotests/245 b/tests/qemu-iotests/245
index e60c832..940e85a 100755
--- a/tests/qemu-iotests/245
+++ b/tests/qemu-iotests/245
@@ -899,17 +899,26 @@ class TestBlockdevReopen(iotests.QMPTestCase):
  # make hd1 read-only and block-stream requires it to be read-write
  # (Which error message appears depends on whether the stream job is
  # already done with copying at this point.)


Hm.  Let’s look at the set of messages below... [1]


-self.reopen(opts, {},
+# As the COR-filter node is inserted into the backing chain with the
+# 'block-stream' operation, we move the options to their proper nodes.
+opts = hd_opts(1)


Oh, so this patch changes it so that only the subtree below hd1 is
reopened, and we don’t have to deal with the filter options.  Got it.
(I think.)



Yes, that's right.


+opts['backing'] = hd_opts(2)
+opts['backing']['backing'] = None
+self.reopen(opts, {'read-only': True},
  ["Can't set node 'hd1' to r/o with copy-on-read enabled",


[1]

This isn’t done anymore as of this patch.  So I don’t think this error
message can still appear.  Will some other me

Re: [PATCH v11 11/13] stream: mark backing-file argument as deprecated

2020-10-15 Thread Andrey Shinkevich

On 14.10.2020 18:43, Vladimir Sementsov-Ogievskiy wrote:

14.10.2020 18:03, Max Reitz wrote:

On 12.10.20 19:43, Andrey Shinkevich wrote:

Whereas the block-stream job starts using a backing file name of the
base node overlay after the block-stream job completes, mark the QMP
'backing-file' argument as deprecated.

Signed-off-by: Andrey Shinkevich 
---
  docs/system/deprecated.rst | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/docs/system/deprecated.rst b/docs/system/deprecated.rst
index 8b3ab5b..7491fcf 100644
--- a/docs/system/deprecated.rst
+++ b/docs/system/deprecated.rst
@@ -285,6 +285,12 @@ details.
  The ``query-events`` command has been superseded by the more powerful
  and accurate ``query-qmp-schema`` command.
+``block-stream`` argument ``backing-file`` (since 5.2)
+'
+
+The argument ``backing-file`` is deprecated. QEMU uses a backing file
+name of the base node overlay after the block-stream job completes.
+


Hm, why?  I don’t see the problem with it.



My wrong idea, sorry. I believed that the argument is unused when I were 
reviewing v10. But it actually become unused during the series and it is 
wrong.




I missed searching for calls to the qmp_block_stream() in the QEMU 
dinamically generated code. Will roll back.


Andrey



Re: [PATCH v11 09/13] copy-on-read: skip non-guest reads if no copy needed

2020-10-14 Thread Andrey Shinkevich

On 14.10.2020 15:51, Max Reitz wrote:

On 12.10.20 19:43, Andrey Shinkevich wrote:

If the flag BDRV_REQ_PREFETCH was set, pass it further to the
COR-driver to skip unneeded reading. It can be taken into account for
the COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 13 +
  block/io.c   |  3 ++-
  2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index b136895..278a11a 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -148,10 +148,15 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
  }
  }
  
-ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,

-  local_flags);
-if (ret < 0) {
-return ret;
+if (!!(flags & BDRV_REQ_PREFETCH) &


How about dropping the double negation and using a logical && instead of
the binary &?



Yes, that's correct.


+!(local_flags & BDRV_REQ_COPY_ON_READ)) {
+/* Skip non-guest reads if no copy needed */
+} else {


Hm.  I would have just written the negated form

(!(flags & BDRV_REQ_PREFETCH) || (local_flags & BDRV_REQ_COPY_ON_READ))

and put the “skip” comment above that condition.

(Since local_flags is initialized to flags, it can be written as a
single comparison, but that’s a matter of taste and I’m not going to
recommend either over the other:



I played with the flags to make the idea obvious for the eye of a 
beholder: "we neither read nor write".
Comparing the BDRV_REQ_PREFETCH against the 'flags' means that the flag 
comes from outside of the function.

And the empty section means we do nothing in that case.
Eventually, I will pick up the brief expression below.
Thanks,

Andrey


((local_flags & (BDRV_REQ_PREFETCH | BDRV_REQ_COPY_ON_READ)) !=
BDRV_REQ_PREFETCH)

)


+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
  }
  
  offset += n;

diff --git a/block/io.c b/block/io.c
index 11df188..bff1808 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1512,7 +1512,8 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
  
  max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);

  if (bytes <= max_bytes && bytes <= max_transfer) {
-ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
+ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset,
+ flags & bs->supported_read_flags);


Ah, OK.  I see.  I expected this to be a separate patch.  I still wonder
why it isn’t.

Max


  goto out;
  }
  








Re: [PATCH v11 06/13] block: modify the comment for BDRV_REQ_PREFETCH flag

2020-10-14 Thread Andrey Shinkevich

On 14.10.2020 15:22, Max Reitz wrote:

On 12.10.20 19:43, Andrey Shinkevich wrote:

Modify the comment for the flag BDRV_REQ_PREFETCH as we are going to
use it alone and pass it to the COR-filter driver for further
processing.

Signed-off-by: Andrey Shinkevich 
---
  include/block/block.h | 7 ---
  1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 981ab5b..2b7efd1 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -71,9 +71,10 @@ typedef enum {
  BDRV_REQ_NO_FALLBACK= 0x100,
  
  /*

- * BDRV_REQ_PREFETCH may be used only together with BDRV_REQ_COPY_ON_READ
- * on read request and means that caller doesn't really need data to be
- * written to qiov parameter which may be NULL.
+ * BDRV_REQ_PREFETCH may be used together with the BDRV_REQ_COPY_ON_READ
+ * flag or when the COR-filter applied to read operations and means that


There’s some word missing here, but I’m not sure what it is...  At least
an “is” before “applied”.  Perhaps something like ”or when a COR filter
is involved (in read operations)” would be better.


+ * caller doesn't really need data to be written to qiov parameter which


And this “written to” confused me for a second, because we’re reading
into qiov.  Technically, that means writing into the buffer, but, you know.

Could we rewrite the whole thing, perhaps?  Something like

“BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
(i.e., together with the BDRV_REQ_COPY_ON_READ flag or when there is a
COR filter), in which case it signals that the COR operation need not
read the data into memory (qiov), but only ensure it is copied to the
top layer (i.e., that COR is done).”

I don’t know.

Max



I would modify a little:

“BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
(i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
filter is involved), in which case it signals that the COR operation
need not read the data into memory (qiov) but only ensure they are
copied to the top layer (i.e., that COR operation is done).”



+ * may be NULL.
   */
  BDRV_REQ_PREFETCH  = 0x200,
  /* Mask of valid flags */








Re: [PATCH v11 05/13] copy-on-read: limit COR operations to base in COR driver

2020-10-14 Thread Andrey Shinkevich

On 14.10.2020 15:01, Max Reitz wrote:

On 12.10.20 19:43, Andrey Shinkevich wrote:

Limit COR operations by the base node in the backing chain when the
overlay base node name is given. It will be useful for a block stream
job when the COR-filter is applied. The overlay base node is passed as
the base itself may change due to concurrent commit jobs on the same
backing chain.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 39 +--
  1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index c578b1b..dfbd6ad 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -122,8 +122,43 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
 size_t qiov_offset,
 int flags)
  {


[...]


+ret = bdrv_is_allocated_above(bdrv_cow_bs(bs->file->bs),
+  state->base_overlay, true, offset,
+  n, );
+if (ret) {
+local_flags |= BDRV_REQ_COPY_ON_READ;
+}
+}


Furthermore, I just noticed – can the is_allocated functions not return
0 in @n, when @offset is a the EOF?  Is that something to look out for?
  (I’m not sure.)

Max



The check for EOF is managed earlier in the stream_run() for a 
block-stream job. For other cases of using the COR-filter, the check for 
EOF can be added to the cor_co_preadv_part().
I would be more than happy if we can escape the duplicated checking for 
is_allocated in the block-stream. But how the stream_run() can stop 
calling the blk_co_preadv() when EOF is reached if is_allocated removed 
from it? May the cor_co_preadv_part() return EOF (or other error code) 
to be handled by a caller if (ret == 0 && n == 0 && (flags & 
BDRV_REQ_PREFETCH)?


Andrey



Re: [PATCH v11 05/13] copy-on-read: limit COR operations to base in COR driver

2020-10-14 Thread Andrey Shinkevich

On 14.10.2020 14:59, Max Reitz wrote:

On 12.10.20 19:43, Andrey Shinkevich wrote:

Limit COR operations by the base node in the backing chain when the
overlay base node name is given. It will be useful for a block stream
job when the COR-filter is applied. The overlay base node is passed as
the base itself may change due to concurrent commit jobs on the same
backing chain.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 39 +--
  1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index c578b1b..dfbd6ad 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -122,8 +122,43 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
 size_t qiov_offset,
 int flags)
  {
-return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
-   flags | BDRV_REQ_COPY_ON_READ);
+int64_t n = 0;
+int64_t size = offset + bytes;
+int local_flags;
+int ret;
+BDRVStateCOR *state = bs->opaque;
+
+if (!state->base_overlay) {
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
+}
+
+while (offset < size) {
+local_flags = flags;
+
+/* In case of failure, try to copy-on-read anyway */
+ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+if (!ret) {


In case of failure, a negative value is going to be returned, we won’t
go into this conditional block, and local_flags isn’t going to contain
BDRV_REQ_COPY_ON_READ.

So the idea of CORing in case of failure sounds sound to me, but it
doesn’t look like that’s done.



Yes, it's obvious. That was just my fault to miss setting the additional 
condition for "ret < 0". Thank you for noticing that.


Andrey


+ret = bdrv_is_allocated_above(bdrv_cow_bs(bs->file->bs),


I think this should either be bdrv_backing_chain_next() or we must rule
out the possibility of bs->file->bs being a filter somewhere.  I think
I’d prefer the former.


+  state->base_overlay, true, offset,
+  n, );
+if (ret) {


“ret == 1 || ret < 0” would be more explicit (and in line with the “!ret
|| ret < 0” probably needed above), but correct either way.

Max





Re: [PATCH v11 04/13] copy-on-read: pass overlay base node name to COR driver

2020-10-14 Thread Andrey Shinkevich

On 14.10.2020 14:09, Max Reitz wrote:

On 12.10.20 19:43, Andrey Shinkevich wrote:

We are going to use the COR-filter for a block-stream job.
To limit COR operations by the base node in the backing chain during
stream job, pass the name of overlay base node to the copy-on-read
driver as base node itself may change due to possible concurrent jobs.
The rest of the functionality will be implemented in the patch that
follows.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 14 ++
  1 file changed, 14 insertions(+)


Is there a reason why you didn’t add this option to QAPI (as part of a
yet-to-be-created BlockdevOptionsCor)?  Because I’d really like it there.



I agree that passing a base overlay under the base option looks clumsy. 
We could pass the base node name and find its overlay ourselves here in 
cor_open(). In that case, we can use the existing QAPI.
The reason I used the existing QAPI is to make it easier for a user to 
operate with the traditional options and to keep things simple. So, the 
user shouldn't think what overlay or above-base node to pass.
If we introduce the specific BlockdevOptionsCor, what other options may 
come with?



diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index bcccf0f..c578b1b 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,19 +24,24 @@
  #include "block/block_int.h"
  #include "qemu/module.h"
  #include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
  #include "qapi/qmp/qdict.h"
  #include "block/copy-on-read.h"
  
  
  typedef struct BDRVStateCOR {

  bool active;
+BlockDriverState *base_overlay;
  } BDRVStateCOR;
  
  
  static int cor_open(BlockDriverState *bs, QDict *options, int flags,

  Error **errp)
  {
+BlockDriverState *base_overlay = NULL;
  BDRVStateCOR *state = bs->opaque;
+/* We need the base overlay node rather than the base itself */
+const char *base_overlay_node = qdict_get_try_str(options, "base");


Shouldn’t it be called base-overlay or above-base then?



The base_overlay identifier is used below as the pointer to BS. The 
base_overlay_node stands for the name of the node. I used that 
identifier to differ between the types. And the above_base has another 
meaning per block/stream.c - it can be a temporary filter with a JSON-name.


  
  bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,

 BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -52,7 +57,16 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
  ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
  bs->file->bs->supported_zero_flags);
  
+if (base_overlay_node) {

+qdict_del(options, "base");
+base_overlay = bdrv_lookup_bs(NULL, base_overlay_node, errp);


I think this is a use-after-free.  The storage @base_overlay_node points
to belongs to a QString, which is referenced only by @options; so
deleting that element of @options should free that string.

Max



I will swap those two function calls (bdrv_lookup_bs(); qdict_del();).
Thank you.

Andrey


+if (!base_overlay) {
+error_setg(errp, QERR_BASE_NOT_FOUND, base_overlay_node);
+return -EINVAL;
+}
+}
  state->active = true;
+state->base_overlay = base_overlay;
  
  /*

   * We don't need to call bdrv_child_refresh_perms() now as the permissions








Re: [PATCH v11 02/13] copy-on-read: add filter append/drop functions

2020-10-14 Thread Andrey Shinkevich

On 14.10.2020 13:44, Max Reitz wrote:

On 12.10.20 19:43, Andrey Shinkevich wrote:

Provide API for the COR-filter insertion/removal.
Also, drop the filter child permissions for an inactive state when the
filter node is being removed.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
  block/copy-on-read.c | 88 
  block/copy-on-read.h | 35 +
  2 files changed, 123 insertions(+)
  create mode 100644 block/copy-on-read.h

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..bcccf0f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c


[...]


@@ -159,4 +188,63 @@ static void bdrv_copy_on_read_init(void)
  bdrv_register(_copy_on_read);
  }
  
+

+BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
+ QDict *node_options,
+ int flags, Error **errp)


I had hoped you could make this a generic block layer function. :(

(Because it really is rather generic)

*shrug*


Actually, I did (and still can do) that for the 'append node' function 
only but not for the 'drop node' one so far...


diff --git a/block.c b/block.c
index 11ab55f..f41e876 100644
--- a/block.c
+++ b/block.c
@@ -4669,6 +4669,55 @@ static void bdrv_delete(BlockDriverState *bs)
 g_free(bs);
 }

+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict 
*node_options,

+   int flags, Error **errp)
+{
+BlockDriverState *new_node_bs;
+Error *local_err = NULL;
+
+new_node_bs =  bdrv_open(NULL, NULL, node_options, flags, errp);
+if (new_node_bs == NULL) {
+error_prepend(errp, "Could not create node: ");
+return NULL;
+}
+
+bdrv_drained_begin(bs);
+bdrv_replace_node(bs, new_node_bs, _err);
+bdrv_drained_end(bs);
+
+if (local_err) {
+bdrv_unref(new_node_bs);
+error_propagate(errp, local_err);
+return NULL;
+}
+
+return new_node_bs;
+}
+
+void bdrv_remove_node(BlockDriverState *bs)
+{
+BdrvChild *child;
+BlockDriverState *inferior_bs;
+
+child = bdrv_filter_or_cow_child(bs);
+if (!child) {
+return;
+}
+inferior_bs = child->bs;
+
+/* Retain the BDS until we complete the graph change. */
+bdrv_ref(inferior_bs);
+/* Hold a guest back from writing while permissions are being reset. */
+bdrv_drained_begin(inferior_bs);
+/* Refresh permissions before the graph change. */
+bdrv_child_refresh_perms(bs, child, _abort);
+bdrv_replace_node(bs, inferior_bs, _abort);
+
+bdrv_drained_end(inferior_bs);
+bdrv_unref(inferior_bs);
+bdrv_unref(bs);
+}

So, it is an intermediate solution in this patch of the series. I am 
going to make both functions generic once Vladimir overhauls the QEMU 
permission update system. Otherwise, the COR-filter node cannot be 
removed from the backing chain gracefully.


Thank you for your r-b. If the next version comes, I can move the 
'append node' function only to the generic layer.


Andrey



Reviewed-by: Max Reitz 


+{
+BlockDriverState *cor_filter_bs;
+Error *local_err = NULL;
+
+cor_filter_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
+if (cor_filter_bs == NULL) {
+error_prepend(errp, "Could not create COR-filter node: ");
+return NULL;
+}
+
+if (!qdict_get_try_str(node_options, "node-name")) {
+cor_filter_bs->implicit = true;
+}
+
+bdrv_drained_begin(bs);
+bdrv_replace_node(bs, cor_filter_bs, _err);
+bdrv_drained_end(bs);
+
+if (local_err) {
+bdrv_unref(cor_filter_bs);
+error_propagate(errp, local_err);
+return NULL;
+}
+
+return cor_filter_bs;
+}






[PATCH v11 13/13] block: apply COR-filter to block-stream jobs

2020-10-12 Thread Andrey Shinkevich via
This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 93 +-
 tests/qemu-iotests/030 | 51 +++--
 tests/qemu-iotests/030.out |  4 +-
 tests/qemu-iotests/141.out |  2 +-
 tests/qemu-iotests/245 | 19 +++---
 5 files changed, 81 insertions(+), 88 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index d3e1812..93564db 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -17,8 +17,10 @@
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "block/copy-on-read.h"
 
 enum {
 /*
@@ -33,6 +35,8 @@ typedef struct StreamBlockJob {
 BlockJob common;
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
+BlockDriverState *cor_filter_bs;
+BlockDriverState *target_bs;
 BlockdevOnError on_error;
 bool bs_read_only;
 bool chain_frozen;
@@ -43,8 +47,7 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
 {
 assert(bytes < SIZE_MAX);
 
-return blk_co_preadv(blk, offset, bytes, NULL,
- BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
+return blk_co_preadv(blk, offset, bytes, NULL, BDRV_REQ_PREFETCH);
 }
 
 static void stream_abort(Job *job)
@@ -52,23 +55,20 @@ static void stream_abort(Job *job)
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 
 if (s->chain_frozen) {
-BlockJob *bjob = >common;
-bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 }
 }
 
 static int stream_prepare(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
 BlockDriverState *base_unfiltered = bdrv_skip_filters(base);
 Error *local_err = NULL;
 int ret = 0;
 
-bdrv_unfreeze_backing_chain(bs, s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 s->chain_frozen = false;
 
 if (bdrv_cow_child(unfiltered_bs)) {
@@ -94,13 +94,14 @@ static void stream_clean(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
+
+bdrv_cor_filter_drop(s->cor_filter_bs);
 
 /* Reopen the image back in read-only mode if necessary */
 if (s->bs_read_only) {
 /* Give up write permissions before making it read-only */
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
-bdrv_reopen_set_read_only(bs, true, NULL);
+bdrv_reopen_set_read_only(s->target_bs, true, NULL);
 }
 }
 
@@ -108,9 +109,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockBackend *blk = s->common.blk;
-BlockDriverState *bs = blk_bs(blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-bool enable_cor = !bdrv_cow_child(s->base_overlay);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 int64_t len;
 int64_t offset = 0;
 uint64_t delay_ns = 0;
@@ -122,21 +121,12 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 return 0;
 }
 
-len = bdrv_getlength(bs);
+len = bdrv_getlength(s->target_bs);
 if (len < 0) {
 return len;
 }
 job_progress_set_remaining(>common.job, len);
 
-

[PATCH v11 12/13] stream: remove unused backing-file name parameter

2020-10-12 Thread Andrey Shinkevich via
The 'backing-file' argument is not used by the block-stream job. It
designates a backing file name to set in QCOW2 image header after the
block-stream job finished. A backing file name of the node above base
is used instead.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c|  6 +-
 blockdev.c| 21 ++---
 include/block/block_int.h |  2 +-
 3 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 51462bd..d3e1812 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -34,7 +34,6 @@ typedef struct StreamBlockJob {
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
 BlockdevOnError on_error;
-char *backing_file_str;
 bool bs_read_only;
 bool chain_frozen;
 } StreamBlockJob;
@@ -103,8 +102,6 @@ static void stream_clean(Job *job)
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
 bdrv_reopen_set_read_only(bs, true, NULL);
 }
-
-g_free(s->backing_file_str);
 }
 
 static int coroutine_fn stream_run(Job *job, Error **errp)
@@ -220,7 +217,7 @@ static const BlockJobDriver stream_job_driver = {
 };
 
 void stream_start(const char *job_id, BlockDriverState *bs,
-  BlockDriverState *base, const char *backing_file_str,
+  BlockDriverState *base,
   int creation_flags, int64_t speed,
   BlockdevOnError on_error,
   const char *filter_node_name,
@@ -295,7 +292,6 @@ void stream_start(const char *job_id, BlockDriverState *bs,
 
 s->base_overlay = base_overlay;
 s->above_base = above_base;
-s->backing_file_str = g_strdup(backing_file_str);
 s->bs_read_only = bs_read_only;
 s->chain_frozen = true;
 
diff --git a/blockdev.c b/blockdev.c
index d719c47..019b6e0 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2498,7 +2498,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 BlockDriverState *base_bs = NULL;
 AioContext *aio_context;
 Error *local_err = NULL;
-const char *base_name = NULL;
 int job_flags = JOB_DEFAULT;
 
 if (!has_on_error) {
@@ -2526,7 +2525,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
-base_name = base;
 }
 
 if (has_base_node) {
@@ -2541,7 +2539,11 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
 bdrv_refresh_filename(base_bs);
-base_name = base_bs->filename;
+}
+
+if (has_backing_file) {
+warn_report("Use of \"backing-file\" argument is deprecated; "
+"a backing file of the node above base is used instead");
 }
 
 /* Check for op blockers in the whole chain between bs and base */
@@ -2553,17 +2555,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 }
 
-/* if we are streaming the entire chain, the result will have no backing
- * file, and specifying one is therefore an error */
-if (base_bs == NULL && has_backing_file) {
-error_setg(errp, "backing file specified, but streaming the "
- "entire chain");
-goto out;
-}
-
-/* backing_file string overrides base bs filename */
-base_name = has_backing_file ? backing_file : base_name;
-
 if (has_auto_finalize && !auto_finalize) {
 job_flags |= JOB_MANUAL_FINALIZE;
 }
@@ -2571,7 +2562,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 job_flags |= JOB_MANUAL_DISMISS;
 }
 
-stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+stream_start(has_job_id ? job_id : NULL, bs, base_bs,
  job_flags, has_speed ? speed : 0, on_error,
  filter_node_name, _err);
 if (local_err) {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index a142867..4f523c3 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1151,7 +1151,7 @@ int is_windows_drive(const char *filename);
  * BlockDriverState.
  */
 void stream_start(const char *job_id, BlockDriverState *bs,
-  BlockDriverState *base, const char *backing_file_str,
+  BlockDriverState *base,
   int creation_flags, int64_t speed,
   BlockdevOnError on_error,
   const char *filter_node_name,
-- 
1.8.3.1




[PATCH v11 11/13] stream: mark backing-file argument as deprecated

2020-10-12 Thread Andrey Shinkevich via
Whereas the block-stream job starts using a backing file name of the
base node overlay after the block-stream job completes, mark the QMP
'backing-file' argument as deprecated.

Signed-off-by: Andrey Shinkevich 
---
 docs/system/deprecated.rst | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/docs/system/deprecated.rst b/docs/system/deprecated.rst
index 8b3ab5b..7491fcf 100644
--- a/docs/system/deprecated.rst
+++ b/docs/system/deprecated.rst
@@ -285,6 +285,12 @@ details.
 The ``query-events`` command has been superseded by the more powerful
 and accurate ``query-qmp-schema`` command.
 
+``block-stream`` argument ``backing-file`` (since 5.2)
+'
+
+The argument ``backing-file`` is deprecated. QEMU uses a backing file
+name of the base node overlay after the block-stream job completes.
+
 chardev client socket with ``wait`` option (since 4.0)
 ''
 
-- 
1.8.3.1




[PATCH v11 08/13] copy-on-read: add support for BDRV_REQ_PREFETCH to COR-filter

2020-10-12 Thread Andrey Shinkevich via
Add support for the BDRV_REQ_PREFETCH flag to the supported_write_flags
of the COR-filter.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index dfbd6ad..b136895 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -50,6 +50,7 @@ static int cor_open(BlockDriverState *bs, QDict *options, int 
flags,
 return -EINVAL;
 }
 
+bs->supported_read_flags = BDRV_REQ_PREFETCH;
 bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
 (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
 
-- 
1.8.3.1




[PATCH v11 09/13] copy-on-read: skip non-guest reads if no copy needed

2020-10-12 Thread Andrey Shinkevich via
If the flag BDRV_REQ_PREFETCH was set, pass it further to the
COR-driver to skip unneeded reading. It can be taken into account for
the COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 13 +
 block/io.c   |  3 ++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index b136895..278a11a 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -148,10 +148,15 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
 }
 }
 
-ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
-  local_flags);
-if (ret < 0) {
-return ret;
+if (!!(flags & BDRV_REQ_PREFETCH) &
+!(local_flags & BDRV_REQ_COPY_ON_READ)) {
+/* Skip non-guest reads if no copy needed */
+} else {
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
 }
 
 offset += n;
diff --git a/block/io.c b/block/io.c
index 11df188..bff1808 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1512,7 +1512,8 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 
 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
 if (bytes <= max_bytes && bytes <= max_transfer) {
-ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
+ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset,
+ flags & bs->supported_read_flags);
 goto out;
 }
 
-- 
1.8.3.1




[PATCH v11 10/13] stream: skip filters when writing backing file name to QCOW2 header

2020-10-12 Thread Andrey Shinkevich via
Avoid writing a filter JSON-name to QCOW2 image when the backing file
is changed after the block stream job.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index e0540ee..51462bd 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -65,6 +65,7 @@ static int stream_prepare(Job *job)
 BlockDriverState *bs = blk_bs(bjob->blk);
 BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+BlockDriverState *base_unfiltered = bdrv_skip_filters(base);
 Error *local_err = NULL;
 int ret = 0;
 
@@ -73,10 +74,10 @@ static int stream_prepare(Job *job)
 
 if (bdrv_cow_child(unfiltered_bs)) {
 const char *base_id = NULL, *base_fmt = NULL;
-if (base) {
-base_id = s->backing_file_str;
-if (base->drv) {
-base_fmt = base->drv->format_name;
+if (base_unfiltered) {
+base_id = base_unfiltered->filename;
+if (base_unfiltered->drv) {
+base_fmt = base_unfiltered->drv->format_name;
 }
 }
 bdrv_set_backing_hd(unfiltered_bs, base, _err);
-- 
1.8.3.1




[PATCH v11 07/13] block: include supported_read_flags into BDS structure

2020-10-12 Thread Andrey Shinkevich via
Add the new member supported_read_flags to BlockDriverState structure.
It will control the BDRV_REQ_PREFETCH flag set for copy-on-read
operations.

Signed-off-by: Andrey Shinkevich 
---
 include/block/block_int.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index f782737..a142867 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -873,6 +873,10 @@ struct BlockDriverState {
 /* I/O Limits */
 BlockLimits bl;
 
+/*
+ * Flags honored during pread (so far: BDRV_REQ_PREFETCH)
+ */
+unsigned int supported_read_flags;
 /* Flags honored during pwrite (so far: BDRV_REQ_FUA,
  * BDRV_REQ_WRITE_UNCHANGED).
  * If a driver does not support BDRV_REQ_WRITE_UNCHANGED, those
-- 
1.8.3.1




[PATCH v11 06/13] block: modify the comment for BDRV_REQ_PREFETCH flag

2020-10-12 Thread Andrey Shinkevich via
Modify the comment for the flag BDRV_REQ_PREFETCH as we are going to
use it alone and pass it to the COR-filter driver for further
processing.

Signed-off-by: Andrey Shinkevich 
---
 include/block/block.h | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 981ab5b..2b7efd1 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -71,9 +71,10 @@ typedef enum {
 BDRV_REQ_NO_FALLBACK= 0x100,
 
 /*
- * BDRV_REQ_PREFETCH may be used only together with BDRV_REQ_COPY_ON_READ
- * on read request and means that caller doesn't really need data to be
- * written to qiov parameter which may be NULL.
+ * BDRV_REQ_PREFETCH may be used together with the BDRV_REQ_COPY_ON_READ
+ * flag or when the COR-filter applied to read operations and means that
+ * caller doesn't really need data to be written to qiov parameter which
+ * may be NULL.
  */
 BDRV_REQ_PREFETCH  = 0x200,
 /* Mask of valid flags */
-- 
1.8.3.1




[PATCH v11 05/13] copy-on-read: limit COR operations to base in COR driver

2020-10-12 Thread Andrey Shinkevich via
Limit COR operations by the base node in the backing chain when the
overlay base node name is given. It will be useful for a block stream
job when the COR-filter is applied. The overlay base node is passed as
the base itself may change due to concurrent commit jobs on the same
backing chain.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 39 +--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index c578b1b..dfbd6ad 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -122,8 +122,43 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
size_t qiov_offset,
int flags)
 {
-return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
-   flags | BDRV_REQ_COPY_ON_READ);
+int64_t n = 0;
+int64_t size = offset + bytes;
+int local_flags;
+int ret;
+BDRVStateCOR *state = bs->opaque;
+
+if (!state->base_overlay) {
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
+}
+
+while (offset < size) {
+local_flags = flags;
+
+/* In case of failure, try to copy-on-read anyway */
+ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+if (!ret) {
+ret = bdrv_is_allocated_above(bdrv_cow_bs(bs->file->bs),
+  state->base_overlay, true, offset,
+  n, );
+if (ret) {
+local_flags |= BDRV_REQ_COPY_ON_READ;
+}
+}
+
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
+
+offset += n;
+qiov_offset += n;
+bytes -= n;
+}
+
+return 0;
 }
 
 
-- 
1.8.3.1




[PATCH v11 02/13] copy-on-read: add filter append/drop functions

2020-10-12 Thread Andrey Shinkevich via
Provide API for the COR-filter insertion/removal.
Also, drop the filter child permissions for an inactive state when the
filter node is being removed.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 88 
 block/copy-on-read.h | 35 +
 2 files changed, 123 insertions(+)
 create mode 100644 block/copy-on-read.h

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..bcccf0f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,21 @@
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+bool active;
+} BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BDRVStateCOR *state = bs->opaque;
+
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
false, errp);
@@ -42,6 +52,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+state->active = true;
+
+/*
+ * We don't need to call bdrv_child_refresh_perms() now as the permissions
+ * will be updated later when the filter node gets its parent.
+ */
+
 return 0;
 }
 
@@ -57,6 +74,17 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild 
*c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
 {
+BDRVStateCOR *s = bs->opaque;
+
+if (!s->active) {
+/*
+ * While the filter is being removed
+ */
+*nperm = 0;
+*nshared = BLK_PERM_ALL;
+return;
+}
+
 *nperm = perm & PERM_PASSTHROUGH;
 *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
 
@@ -135,6 +163,7 @@ static void cor_lock_medium(BlockDriverState *bs, bool 
locked)
 
 static BlockDriver bdrv_copy_on_read = {
 .format_name= "copy-on-read",
+.instance_size  = sizeof(BDRVStateCOR),
 
 .bdrv_open  = cor_open,
 .bdrv_child_perm= cor_child_perm,
@@ -159,4 +188,63 @@ static void bdrv_copy_on_read_init(void)
 bdrv_register(_copy_on_read);
 }
 
+
+BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
+ QDict *node_options,
+ int flags, Error **errp)
+{
+BlockDriverState *cor_filter_bs;
+Error *local_err = NULL;
+
+cor_filter_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
+if (cor_filter_bs == NULL) {
+error_prepend(errp, "Could not create COR-filter node: ");
+return NULL;
+}
+
+if (!qdict_get_try_str(node_options, "node-name")) {
+cor_filter_bs->implicit = true;
+}
+
+bdrv_drained_begin(bs);
+bdrv_replace_node(bs, cor_filter_bs, _err);
+bdrv_drained_end(bs);
+
+if (local_err) {
+bdrv_unref(cor_filter_bs);
+error_propagate(errp, local_err);
+return NULL;
+}
+
+return cor_filter_bs;
+}
+
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
+{
+BdrvChild *child;
+BlockDriverState *bs;
+BDRVStateCOR *s = cor_filter_bs->opaque;
+
+child = bdrv_filter_child(cor_filter_bs);
+if (!child) {
+return;
+}
+bs = child->bs;
+
+/* Retain the BDS until we complete the graph change. */
+bdrv_ref(bs);
+/* Hold a guest back from writing while permissions are being reset. */
+bdrv_drained_begin(bs);
+/* Drop permissions before the graph change. */
+s->active = false;
+bdrv_child_refresh_perms(cor_filter_bs, child, _abort);
+bdrv_replace_node(cor_filter_bs, bs, _abort);
+
+bdrv_drained_end(bs);
+bdrv_unref(bs);
+bdrv_unref(cor_filter_bs);
+}
+
+
 block_init(bdrv_copy_on_read_init);
diff --git a/block/copy-on-read.h b/block/copy-on-read.h
new file mode 100644
index 000..d6f2422
--- /dev/null
+++ b/block/copy-on-read.h
@@ -0,0 +1,35 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * The filter driver performs Copy-On-Read (COR) operations
+ *
+ * Copyright (c) 2018-2020 Virtuozzo International GmbH.
+ *
+ * Author:
+ *   Andrey Shinkevich 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any la

[PATCH v11 04/13] copy-on-read: pass overlay base node name to COR driver

2020-10-12 Thread Andrey Shinkevich via
We are going to use the COR-filter for a block-stream job.
To limit COR operations by the base node in the backing chain during
stream job, pass the name of overlay base node to the copy-on-read
driver as base node itself may change due to possible concurrent jobs.
The rest of the functionality will be implemented in the patch that
follows.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index bcccf0f..c578b1b 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,19 +24,24 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qdict.h"
 #include "block/copy-on-read.h"
 
 
 typedef struct BDRVStateCOR {
 bool active;
+BlockDriverState *base_overlay;
 } BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BlockDriverState *base_overlay = NULL;
 BDRVStateCOR *state = bs->opaque;
+/* We need the base overlay node rather than the base itself */
+const char *base_overlay_node = qdict_get_try_str(options, "base");
 
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -52,7 +57,16 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+if (base_overlay_node) {
+qdict_del(options, "base");
+base_overlay = bdrv_lookup_bs(NULL, base_overlay_node, errp);
+if (!base_overlay) {
+error_setg(errp, QERR_BASE_NOT_FOUND, base_overlay_node);
+return -EINVAL;
+}
+}
 state->active = true;
+state->base_overlay = base_overlay;
 
 /*
  * We don't need to call bdrv_child_refresh_perms() now as the permissions
-- 
1.8.3.1




[PATCH v11 03/13] qapi: add filter-node-name to block-stream

2020-10-12 Thread Andrey Shinkevich via
Provide the possibility to pass the 'filter-node-name' parameter to the
block-stream job as it is done for the commit block job.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/monitor/block-hmp-cmds.c | 4 ++--
 block/stream.c | 4 +++-
 blockdev.c | 4 +++-
 include/block/block_int.h  | 7 ++-
 qapi/block-core.json   | 6 ++
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 4d3db5e..4e66775 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -507,8 +507,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 
 qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
  false, NULL, qdict_haskey(qdict, "speed"), speed, true,
- BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
- );
+ BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, false, 
false,
+ false, );
 
 hmp_handle_error(mon, error);
 }
diff --git a/block/stream.c b/block/stream.c
index 8ce6729..e0540ee 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -221,7 +221,9 @@ static const BlockJobDriver stream_job_driver = {
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp)
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp)
 {
 StreamBlockJob *s;
 BlockDriverState *iter;
diff --git a/blockdev.c b/blockdev.c
index bebd3ba..d719c47 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2489,6 +2489,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
   bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
+  bool has_filter_node_name, const char *filter_node_name,
   bool has_auto_finalize, bool auto_finalize,
   bool has_auto_dismiss, bool auto_dismiss,
   Error **errp)
@@ -2571,7 +2572,8 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 
 stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
- job_flags, has_speed ? speed : 0, on_error, _err);
+ job_flags, has_speed ? speed : 0, on_error,
+ filter_node_name, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto out;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 38cad9d..f782737 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1134,6 +1134,9 @@ int is_windows_drive(const char *filename);
  *  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means
+ * that a node name should be autogenerated.
  * @errp: Error object.
  *
  * Start a streaming operation on @bs.  Clusters that are unallocated
@@ -1146,7 +1149,9 @@ int is_windows_drive(const char *filename);
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp);
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp);
 
 /**
  * commit_start:
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 3c16f1e..32fb097 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2533,6 +2533,11 @@
 #'stop' and 'enospc' can only be used if the block device
 #supports io-status (see BlockInfo).  Since 1.3.
 #
+# @filter-node-name: the node name that should be assigned to the
+#filter driver that the stream job inserts into the graph
+#above @device. If this option is not given, a node name is
+#autogenerated. (Since: 5.2)
+#
 # @auto-finalize: When false, this job will wait in a PENDING state after it 
has
 # finished its work, waiting for @block-job-finalize before
 # making any block graph changes.
@@ -2563,6 +2568,7 @@
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
 '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
 '*on-error': 'Block

[PATCH v11 01/13] copy-on-read: Support preadv/pwritev_part functions

2020-10-12 Thread Andrey Shinkevich via
Add support for the recently introduced functions
bdrv_co_preadv_part()
and
bdrv_co_pwritev_part()
to the COR-filter driver.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 2816e61..cb03e0f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -74,21 +74,25 @@ static int64_t cor_getlength(BlockDriverState *bs)
 }
 
 
-static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
-  uint64_t offset, uint64_t bytes,
-  QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
+   uint64_t offset, uint64_t bytes,
+   QEMUIOVector *qiov,
+   size_t qiov_offset,
+   int flags)
 {
-return bdrv_co_preadv(bs->file, offset, bytes, qiov,
-  flags | BDRV_REQ_COPY_ON_READ);
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
 }
 
 
-static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
-   uint64_t offset, uint64_t bytes,
-   QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
+uint64_t offset,
+uint64_t bytes,
+QEMUIOVector *qiov,
+size_t qiov_offset, int flags)
 {
-
-return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
+flags);
 }
 
 
@@ -137,8 +141,8 @@ static BlockDriver bdrv_copy_on_read = {
 
 .bdrv_getlength = cor_getlength,
 
-.bdrv_co_preadv = cor_co_preadv,
-.bdrv_co_pwritev= cor_co_pwritev,
+.bdrv_co_preadv_part= cor_co_preadv_part,
+.bdrv_co_pwritev_part   = cor_co_pwritev_part,
 .bdrv_co_pwrite_zeroes  = cor_co_pwrite_zeroes,
 .bdrv_co_pdiscard   = cor_co_pdiscard,
 .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed,
-- 
1.8.3.1




[PATCH v11 00/13] Apply COR-filter to the block-stream permanently

2020-10-12 Thread Andrey Shinkevich via
The iotest case test_stream_parallel still does not pass after the
COR-filter is inserted into the backing chain. As the test case may not
be initialized, it does not make a sense and was removed again.

v11:
  04: Base node overlay is used instead of base.
  05: Base node overlay is used instead of base.
  06: New.
  07: New.
  08: New.
  09: The new BDS-member 'supported_read_flags' is applied.
  10: The 'base_metadata' variable renamed to 'base_unfiltered'.
  11: New.
  12: The backing-file argument is left in the QMP interface. Warning added.
  13: The BDRV_REQ_COPY_ON_READ removed from the stream_populate();
  The 'implicit' initialization moved back to COR-filter driver.
  Base node overlay is used instead of base.

The v8 Message-Id:
<1601383109-110988-1-git-send-email-andrey.shinkev...@virtuozzo.com>

Andrey Shinkevich (13):
  copy-on-read: Support preadv/pwritev_part functions
  copy-on-read: add filter append/drop functions
  qapi: add filter-node-name to block-stream
  copy-on-read: pass overlay base node name to COR driver
  copy-on-read: limit COR operations to base in COR driver
  block: modify the comment for BDRV_REQ_PREFETCH flag
  block: include supported_read_flags into BDS structure
  copy-on-read: add support for BDRV_REQ_PREFETCH to COR-filter
  copy-on-read: skip non-guest reads if no copy needed
  stream: skip filters when writing backing file name to QCOW2 header
  stream: mark backing-file argument as deprecated
  stream: remove unused backing-file name parameter
  block: apply COR-filter to block-stream jobs

 block/copy-on-read.c   | 171 ++---
 block/copy-on-read.h   |  35 +
 block/io.c |   3 +-
 block/monitor/block-hmp-cmds.c |   4 +-
 block/stream.c | 112 ---
 blockdev.c |  25 +++---
 docs/system/deprecated.rst |   6 ++
 include/block/block.h  |   7 +-
 include/block/block_int.h  |  13 +++-
 qapi/block-core.json   |   6 ++
 tests/qemu-iotests/030 |  51 ++--
 tests/qemu-iotests/030.out |   4 +-
 tests/qemu-iotests/141.out |   2 +-
 tests/qemu-iotests/245 |  19 +++--
 14 files changed, 324 insertions(+), 134 deletions(-)
 create mode 100644 block/copy-on-read.h

-- 
1.8.3.1




Re: [PATCH v10 6/9] copy-on-read: skip non-guest reads if no copy needed

2020-10-09 Thread Andrey Shinkevich

On 07.10.2020 22:28, Vladimir Sementsov-Ogievskiy wrote:

07.10.2020 22:01, Andrey Shinkevich wrote:


On 07.10.2020 13:06, Vladimir Sementsov-Ogievskiy wrote:

29.09.2020 15:38, Andrey Shinkevich wrote:

If the flag BDRV_REQ_PREFETCH was set, pass it further to the
COR-driver to skip unneeded reading. It can be taken into account for
the COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 14 ++
  block/io.c   |  2 +-
  2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index f53f7e0..5389dca 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -145,10 +145,16 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,

  }
  }
-    ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, 
qiov_offset,

-  local_flags);
-    if (ret < 0) {
-    return ret;
+    if ((flags & BDRV_REQ_PREFETCH) &


BDRV_REQ_PREFETCH is documented to be only used with 
BDRV_REQ_COPY_ON_READ. But here
BDRV_REQ_COPY_ON_READ appears intermediately. We should change 
documentation in block.h

in a separate patch (and probably code in bdrv_aligned_preadv())



OK, we will come here without the BDRV_REQ_PREFETCH flag set.


flag BDRV_REQ_PREFETCH should be set in stream job. Where should it be 
handled, I don't follow?




If we leave block/io.c unchanged in this patch, what I'm agreeing with, 
we'll come to the COR-driver with the hardcoded flags = 0 :


#4  0x55a22bb480cf in cor_co_preadv_part (bs=0x55a22d593710, 
offset=0, bytes=524288, qiov=0x0, qiov_offset=0, flags=0) at 
../block/copy-on-read.c:149
#5  0x55a22badcb1d in bdrv_driver_preadv (bs=0x55a22d593710, 
offset=0, bytes=524288, qiov=0x0, qiov_offset=0, flags=0) at 
../block/io.c:1129
#6  0x55a22baddc81 in bdrv_aligned_preadv (child=0x55a22d814780, 
req=0x7f8c1abffce0, offset=0, bytes=524288, align=1, qiov=0x0, 
qiov_offset=0, flags=512) at ../block/io.c:1515
#7  0x55a22bade59a in bdrv_co_preadv_part (child=0x55a22d814780, 
offset=0, bytes=524288, qiov=0x0, qiov_offset=0, 
flags=BDRV_REQ_PREFETCH) at ../block/io.c:1757
#8  0x55a22bade3d2 in bdrv_co_preadv (child=0x55a22d814780, 
offset=0, bytes=524288, qiov=0x0, flags=BDRV_REQ_PREFETCH) at 
../block/io.c:1715
#9  0x55a22baf5d09 in blk_do_preadv (blk=0x55a22d818c00, offset=0, 
bytes=524288, qiov=0x0, flags=BDRV_REQ_PREFETCH) at 
../block/block-backend.c:1211
#10 0x55a22baf5d61 in blk_co_preadv (blk=0x55a22d818c00, offset=0, 
bytes=524288, qiov=0x0, flags=BDRV_REQ_PREFETCH) at 
../block/block-backend.c:1223
#11 0x55a22bab4eba in stream_populate (blk=0x55a22d818c00, offset=0, 
bytes=524288) at ../block/stream.c:50
#12 0x55a22bab52c2 in stream_run (job=0x55a22d810a20, 
errp=0x55a22d810aa0) at ../block/stream.c:162
#13 0x55a22bab79f0 in job_co_entry (opaque=0x55a22d810a20) at 
../job.c:908


So, the only way for the COR-filter driver to differ between guests 
reads and the stream job is to check the qiov pointer for NULL and reset 
the flags as appropriate. This is what I am going to do in the next version.


Andrey

To differ between guest reads and the stream job ones, we would set it 
here by checking for the qiov NULL pointer:



diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 4e3b1c5..df2c2ab 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -144,6 +144,9 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,

    n, );
  if (ret) {
  local_flags |= BDRV_REQ_COPY_ON_READ;
+    if (!qiov) {
+    local_flags |= BDRV_REQ_PREFETCH;


if qiov is NULL, this means that flags must include BDRV_REQ_PREFETCH. 
local_flags should inherit flags I think.



+    }
  }
  }

Andrey


+    !(local_flags & BDRV_REQ_COPY_ON_READ)) {
+    /* Skip non-guest reads if no copy needed */
+    } else {
+


extra new-line ?

+    ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, 
qiov_offset,

+  local_flags);
+    if (ret < 0) {
+    return ret;
+    }
  }
  offset += n;
diff --git a/block/io.c b/block/io.c
index 11df188..62b75a5 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1388,7 +1388,7 @@ static int coroutine_fn 
bdrv_co_do_copy_on_readv(BdrvChild *child,

  qemu_iovec_init_buf(_qiov, bounce_buffer, pnum);
  ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
- _qiov, 0, 0);
+ _qiov, 0, flags & 
BDRV_REQ_PREFETCH);


Why? In this place we want to read. We'll write back the data a few 
lines below. What will we write,

if underlying

Re: [PATCH v10 8/9] block: remove unused backing-file name parameter

2020-10-07 Thread Andrey Shinkevich



On 07.10.2020 13:21, Vladimir Sementsov-Ogievskiy wrote:

29.09.2020 15:38, Andrey Shinkevich wrote:

The block stream QMP parameter backing-file is in use no more. It
designates a backing file name to set in QCOW2 image header after the
block stream job finished. The base file name is used instead.

Signed-off-by: Andrey Shinkevich 


We can't just remove it without a deprecation period of three releases.


It has not been in use for a long. It's time.



So actually, in a previous patch, we should implement new behavior for
automatic backing-file detection if this parameter is unspecified. Amd
keep old behavior for backing-file-name if it is given.

Hmm. Or, probably, we can use direct base for base-filename? And in cases
when we should skip filters (for example of parallel jobs) user should
specify backing-file explicitly?


The backing_file_str is always specified if the base is specified and is 
always equal to the base->filename. So, the user's backing file name is 
always NULL for the stream job. Furthermore, it is not checked for being 
the correct backing node and can lead to a wrong record in the QCOW2 header.


Andrey




---
  block/monitor/block-hmp-cmds.c |  2 +-
  block/stream.c |  6 +-
  blockdev.c | 17 +
  include/block/block_int.h  |  2 +-
  qapi/block-core.json   | 17 +
  5 files changed, 5 insertions(+), 39 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c 
b/block/monitor/block-hmp-cmds.c

index 4e66775..5f19499 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -506,7 +506,7 @@ void hmp_block_stream(Monitor *mon, const QDict 
*qdict)

  int64_t speed = qdict_get_try_int(qdict, "speed", 0);
  qmp_block_stream(true, device, device, base != NULL, base, 
false, NULL,
- false, NULL, qdict_haskey(qdict, "speed"), 
speed, true,

+ qdict_haskey(qdict, "speed"), speed, true,
   BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, 
false, false,

   false, );
diff --git a/block/stream.c b/block/stream.c
index b0719e9..fe2663f 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -34,7 +34,6 @@ typedef struct StreamBlockJob {
  BlockDriverState *base_overlay; /* COW overlay (stream from 
this) */

  BlockDriverState *above_base;   /* Node directly above the base */
  BlockdevOnError on_error;
-    char *backing_file_str;
  bool bs_read_only;
  bool chain_frozen;
  } StreamBlockJob;
@@ -103,8 +102,6 @@ static void stream_clean(Job *job)
  blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
  bdrv_reopen_set_read_only(bs, true, NULL);
  }
-
-    g_free(s->backing_file_str);
  }
  static int coroutine_fn stream_run(Job *job, Error **errp)
@@ -220,7 +217,7 @@ static const BlockJobDriver stream_job_driver = {
  };
  void stream_start(const char *job_id, BlockDriverState *bs,
-  BlockDriverState *base, const char *backing_file_str,
+  BlockDriverState *base,
    int creation_flags, int64_t speed,
    BlockdevOnError on_error,
    const char *filter_node_name,
@@ -295,7 +292,6 @@ void stream_start(const char *job_id, 
BlockDriverState *bs,

  s->base_overlay = base_overlay;
  s->above_base = above_base;
-    s->backing_file_str = g_strdup(backing_file_str);
  s->bs_read_only = bs_read_only;
  s->chain_frozen = true;
diff --git a/blockdev.c b/blockdev.c
index d719c47..b223601 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2486,7 +2486,6 @@ out:
  void qmp_block_stream(bool has_job_id, const char *job_id, const 
char *device,

    bool has_base, const char *base,
    bool has_base_node, const char *base_node,
-  bool has_backing_file, const char *backing_file,
    bool has_speed, int64_t speed,
    bool has_on_error, BlockdevOnError on_error,
    bool has_filter_node_name, const char 
*filter_node_name,
@@ -2498,7 +2497,6 @@ void qmp_block_stream(bool has_job_id, const 
char *job_id, const char *device,

  BlockDriverState *base_bs = NULL;
  AioContext *aio_context;
  Error *local_err = NULL;
-    const char *base_name = NULL;
  int job_flags = JOB_DEFAULT;
  if (!has_on_error) {
@@ -2526,7 +2524,6 @@ void qmp_block_stream(bool has_job_id, const 
char *job_id, const char *device,

  goto out;
  }
  assert(bdrv_get_aio_context(base_bs) == aio_context);
-    base_name = base;
  }
  if (has_base_node) {
@@ -2541,7 +2538,6 @@ void qmp_block_stream(bool has_job_id, const 
char *job_id, const char *device,

  }
  assert(bdrv_get_aio_context(base_bs) == aio_context);
  bdrv_refresh_filename(base_bs);
-    base_name = bas

Re: [PATCH v10 6/9] copy-on-read: skip non-guest reads if no copy needed

2020-10-07 Thread Andrey Shinkevich



On 07.10.2020 13:06, Vladimir Sementsov-Ogievskiy wrote:

29.09.2020 15:38, Andrey Shinkevich wrote:

If the flag BDRV_REQ_PREFETCH was set, pass it further to the
COR-driver to skip unneeded reading. It can be taken into account for
the COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 14 ++
  block/io.c   |  2 +-
  2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index f53f7e0..5389dca 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -145,10 +145,16 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,

  }
  }
-    ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, 
qiov_offset,

-  local_flags);
-    if (ret < 0) {
-    return ret;
+    if ((flags & BDRV_REQ_PREFETCH) &


BDRV_REQ_PREFETCH is documented to be only used with 
BDRV_REQ_COPY_ON_READ. But here
BDRV_REQ_COPY_ON_READ appears intermediately. We should change 
documentation in block.h

in a separate patch (and probably code in bdrv_aligned_preadv())



OK, we will come here without the BDRV_REQ_PREFETCH flag set.
To differ between guest reads and the stream job ones, we would set it 
here by checking for the qiov NULL pointer:



diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 4e3b1c5..df2c2ab 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -144,6 +144,9 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,

   n, );
 if (ret) {
 local_flags |= BDRV_REQ_COPY_ON_READ;
+if (!qiov) {
+local_flags |= BDRV_REQ_PREFETCH;
+}
 }
 }

Andrey


+    !(local_flags & BDRV_REQ_COPY_ON_READ)) {
+    /* Skip non-guest reads if no copy needed */
+    } else {
+


extra new-line ?

+    ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, 
qiov_offset,

+  local_flags);
+    if (ret < 0) {
+    return ret;
+    }
  }
  offset += n;
diff --git a/block/io.c b/block/io.c
index 11df188..62b75a5 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1388,7 +1388,7 @@ static int coroutine_fn jk(BdrvChild *child,
  qemu_iovec_init_buf(_qiov, bounce_buffer, pnum);
  ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
- _qiov, 0, 0);
+ _qiov, 0, flags & 
BDRV_REQ_PREFETCH);


Why? In this place we want to read. We'll write back the data a few 
lines below. What will we write,

if underlying driver decide to do nothing because of BDRV_REQ_PREFETCH?



See my comment above please.


  if (ret < 0) {
  goto err;
  }








Re: [PATCH v10 9/9] block: apply COR-filter to block-stream jobs

2020-10-07 Thread Andrey Shinkevich



On 07.10.2020 20:27, Andrey Shinkevich wrote:


On 29.09.2020 15:38, Andrey Shinkevich wrote:

This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 93 
++

  tests/qemu-iotests/030 | 51 +++--
  tests/qemu-iotests/030.out |  4 +-
  tests/qemu-iotests/141.out |  2 +-
  tests/qemu-iotests/245 | 19 +++---
  5 files changed, 83 insertions(+), 86 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index fe2663f..240b3dc 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -17,8 +17,10 @@



One more change missed, as we use the COR-filter:

@@ -47,8 +47,7 @@ static int coroutine_fn stream_populate(BlockBackend 
*blk,

  {
  assert(bytes < SIZE_MAX);

-    return blk_co_preadv(blk, offset, bytes, NULL,
- BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);

  +return blk_co_preadv(blk, offset, bytes, NULL, BDRV_REQ_PREFETCH);

Sorry, with the only flag BDRV_REQ_PREFETCH set.
A change in the comment at the flag BDRV_REQ_PREFETCH is coming with a 
separate patch as Vladimir suggested.


Andrey


+    return blk_co_preadv(blk, offset, bytes, NULL, 0);
  }

Andrey




Re: [PATCH v10 9/9] block: apply COR-filter to block-stream jobs

2020-10-07 Thread Andrey Shinkevich



On 29.09.2020 15:38, Andrey Shinkevich wrote:

This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 93 ++
  tests/qemu-iotests/030 | 51 +++--
  tests/qemu-iotests/030.out |  4 +-
  tests/qemu-iotests/141.out |  2 +-
  tests/qemu-iotests/245 | 19 +++---
  5 files changed, 83 insertions(+), 86 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index fe2663f..240b3dc 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -17,8 +17,10 @@



One more change missed, as we use the COR-filter:

@@ -47,8 +47,7 @@ static int coroutine_fn stream_populate(BlockBackend *blk,
 {
 assert(bytes < SIZE_MAX);

-return blk_co_preadv(blk, offset, bytes, NULL,
- BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
+return blk_co_preadv(blk, offset, bytes, NULL, 0);
 }

Andrey



Re: [PATCH v10 5/9] copy-on-read: limit guest COR activity to base in COR driver

2020-10-05 Thread Andrey Shinkevich

On 05.10.2020 17:58, Vladimir Sementsov-Ogievskiy wrote:

29.09.2020 15:38, Andrey Shinkevich wrote:

Limit the guest's COR operations by the base node in the backing chain
when the base node name is given. It will be useful for a block stream
job when the COR-filter is applied.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 38 --
  1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index e04092f..f53f7e0 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -121,8 +121,42 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,

 size_t qiov_offset,
 int flags)
  {
-    return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, 
qiov_offset,

-   flags | BDRV_REQ_COPY_ON_READ);
+    int64_t n = 0;
+    int64_t size = offset + bytes;
+    int local_flags;
+    int ret;
+    BDRVStateCOR *state = bs->opaque;
+
+    if (!state->base_bs) {
+    return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, 
qiov_offset,

+   flags | BDRV_REQ_COPY_ON_READ);
+    }
+
+    while (offset < size) {
+    local_flags = flags;
+
+    /* In case of failure, try to copy-on-read anyway */


But you add the flag only in case of success.. On any failure of furhter 
is*allocated calls we should set the flag.




Actually, myself would prefer returning the error instead.

Andrey


+    ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+    if (!ret) {
+    ret = bdrv_is_allocated_above(bdrv_cow_bs(bs->file->bs),
+  state->base_bs, false, 
offset, n, );

+    if (ret > 0) {
+    local_flags |= BDRV_REQ_COPY_ON_READ;
+    }
+    }
+
+    ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, 
qiov_offset,

+  local_flags);
+    if (ret < 0) {
+    return ret;
+    }
+
+    offset += n;
+    qiov_offset += n;
+    bytes -= n;
+    }
+
+    return 0;
  }








Re: [PATCH v10 5/9] copy-on-read: limit guest COR activity to base in COR driver

2020-10-05 Thread Andrey Shinkevich




On 05.10.2020 17:58, Vladimir Sementsov-Ogievskiy wrote:

29.09.2020 15:38, Andrey Shinkevich wrote:

Limit the guest's COR operations by the base node in the backing chain
when the base node name is given. It will be useful for a block stream
job when the COR-filter is applied.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 38 --
  1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index e04092f..f53f7e0 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -121,8 +121,42 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,

 size_t qiov_offset,
 int flags)
  {
-    return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, 
qiov_offset,

-   flags | BDRV_REQ_COPY_ON_READ);
+    int64_t n = 0;
+    int64_t size = offset + bytes;
+    int local_flags;
+    int ret;
+    BDRVStateCOR *state = bs->opaque;
+
+    if (!state->base_bs) {
+    return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, 
qiov_offset,

+   flags | BDRV_REQ_COPY_ON_READ);
+    }
+
+    while (offset < size) {
+    local_flags = flags;
+
+    /* In case of failure, try to copy-on-read anyway */


But you add the flag only in case of success.. On any failure of furhter 
is*allocated calls we should set the flag.


Yes, thanks.
Andrey




+    ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+    if (!ret) {
+    ret = bdrv_is_allocated_above(bdrv_cow_bs(bs->file->bs),
+  state->base_bs, false, 
offset, n, );

+    if (ret > 0) {
+    local_flags |= BDRV_REQ_COPY_ON_READ;
+    }
+    }
+
+    ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, 
qiov_offset,

+  local_flags);
+    if (ret < 0) {
+    return ret;
+    }
+
+    offset += n;
+    qiov_offset += n;
+    bytes -= n;
+    }
+
+    return 0;
  }








Re: [PATCH v10 2/9] copy-on-read: add filter append/drop functions

2020-10-05 Thread Andrey Shinkevich

On 05.10.2020 16:34, Vladimir Sementsov-Ogievskiy wrote:

29.09.2020 15:38, Andrey Shinkevich wrote:

Provide API for the COR-filter insertion/removal.
Also, drop the filter child permissions for an inactive state when the
filter node is being removed.

Signed-off-by: Andrey Shinkevich 
---
  block/copy-on-read.c | 84 


  block/copy-on-read.h | 35 ++
  2 files changed, 119 insertions(+)
  create mode 100644 block/copy-on-read.h

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..3c8231f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,21 @@
  #include "qemu/osdep.h"
  #include "block/block_int.h"
  #include "qemu/module.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+    bool active;
+} BDRVStateCOR;
  static int cor_open(BlockDriverState *bs, QDict *options, int flags,
  Error **errp)
  {
+    BDRVStateCOR *state = bs->opaque;
+
  bs->file = bdrv_open_child(NULL, options, "file", bs, 
_of_bds,
 BDRV_CHILD_FILTERED | 
BDRV_CHILD_PRIMARY,

 false, errp);
@@ -42,6 +52,13 @@ static int cor_open(BlockDriverState *bs, QDict 
*options, int flags,

  ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
  bs->file->bs->supported_zero_flags);
+    state->active = true;
+
+    /*
+ * We don't need to call bdrv_child_refresh_perms() now as the 
permissions

+ * will be updated later when the filter node gets its parent.
+ */
+
  return 0;
  }
@@ -57,6 +74,17 @@ static void cor_child_perm(BlockDriverState *bs, 
BdrvChild *c,

 uint64_t perm, uint64_t shared,
 uint64_t *nperm, uint64_t *nshared)
  {
+    BDRVStateCOR *s = bs->opaque;
+
+    if (!s->active) {
+    /*
+ * While the filter is being removed
+ */
+    *nperm = 0;
+    *nshared = BLK_PERM_ALL;
+    return;
+    }
+
  *nperm = perm & PERM_PASSTHROUGH;
  *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
@@ -135,6 +163,7 @@ static void cor_lock_medium(BlockDriverState *bs, 
bool locked)

  static BlockDriver bdrv_copy_on_read = {
  .format_name    = "copy-on-read",
+    .instance_size  = sizeof(BDRVStateCOR),
  .bdrv_open  = cor_open,
  .bdrv_child_perm    = cor_child_perm,
@@ -159,4 +188,59 @@ static void bdrv_copy_on_read_init(void)
  bdrv_register(_copy_on_read);
  }
+
+BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
+ QDict *node_options,
+ int flags, Error **errp)



Ok, now function can add ~any filter, not only COR.. But it's a pair for 
bdrv_cor_filter_drop(), and with "active" hack we don't want make the 
functions generic I think. So it's OK for now to keep function here and 
named _cor_.



+{
+    BlockDriverState *cor_filter_bs;
+    Error *local_err = NULL;
+
+    cor_filter_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
+    if (cor_filter_bs == NULL) {
+    error_prepend(errp, "Could not create COR-filter node: ");
+    return NULL;
+    }


You've dropped setting ->implicit field if filter_node_name not 
specified. Probably caller now can do it.. I don't really care about 
implicit case, so it's OK for me if it works with iotests.


Thank you for your R-B. The idea behind setting the 'implicit' member by 
a caller is to prepare the code for the node replacement by a function 
at the block generic layer in future. In the scope of this series, that 
may be better to keep it here.


Andrey



So,

Reviewed-by: Vladimir Sementsov-Ogievskiy 






[PATCH v10 7/9] stream: skip filters when writing backing file name to QCOW2 header

2020-09-29 Thread Andrey Shinkevich via
Avoid writing a filter JSON-name to QCOW2 image when the backing file
is changed after the block stream job.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index e0540ee..b0719e9 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -65,6 +65,7 @@ static int stream_prepare(Job *job)
 BlockDriverState *bs = blk_bs(bjob->blk);
 BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
+BlockDriverState *base_metadata = bdrv_skip_filters(base);
 Error *local_err = NULL;
 int ret = 0;
 
@@ -73,10 +74,10 @@ static int stream_prepare(Job *job)
 
 if (bdrv_cow_child(unfiltered_bs)) {
 const char *base_id = NULL, *base_fmt = NULL;
-if (base) {
-base_id = s->backing_file_str;
-if (base->drv) {
-base_fmt = base->drv->format_name;
+if (base_metadata) {
+base_id = base_metadata->filename;
+if (base_metadata->drv) {
+base_fmt = base_metadata->drv->format_name;
 }
 }
 bdrv_set_backing_hd(unfiltered_bs, base, _err);
-- 
1.8.3.1




[PATCH v10 9/9] block: apply COR-filter to block-stream jobs

2020-09-29 Thread Andrey Shinkevich via
This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 93 ++
 tests/qemu-iotests/030 | 51 +++--
 tests/qemu-iotests/030.out |  4 +-
 tests/qemu-iotests/141.out |  2 +-
 tests/qemu-iotests/245 | 19 +++---
 5 files changed, 83 insertions(+), 86 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index fe2663f..240b3dc 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -17,8 +17,10 @@
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "block/copy-on-read.h"
 
 enum {
 /*
@@ -33,6 +35,8 @@ typedef struct StreamBlockJob {
 BlockJob common;
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
+BlockDriverState *cor_filter_bs;
+BlockDriverState *target_bs;
 BlockdevOnError on_error;
 bool bs_read_only;
 bool chain_frozen;
@@ -52,23 +56,20 @@ static void stream_abort(Job *job)
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 
 if (s->chain_frozen) {
-BlockJob *bjob = >common;
-bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 }
 }
 
 static int stream_prepare(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
 BlockDriverState *base_metadata = bdrv_skip_filters(base);
 Error *local_err = NULL;
 int ret = 0;
 
-bdrv_unfreeze_backing_chain(bs, s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 s->chain_frozen = false;
 
 if (bdrv_cow_child(unfiltered_bs)) {
@@ -94,13 +95,14 @@ static void stream_clean(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
+
+bdrv_cor_filter_drop(s->cor_filter_bs);
 
 /* Reopen the image back in read-only mode if necessary */
 if (s->bs_read_only) {
 /* Give up write permissions before making it read-only */
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
-bdrv_reopen_set_read_only(bs, true, NULL);
+bdrv_reopen_set_read_only(s->target_bs, true, NULL);
 }
 }
 
@@ -108,9 +110,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockBackend *blk = s->common.blk;
-BlockDriverState *bs = blk_bs(blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-bool enable_cor = !bdrv_cow_child(s->base_overlay);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 int64_t len;
 int64_t offset = 0;
 uint64_t delay_ns = 0;
@@ -122,21 +122,12 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 return 0;
 }
 
-len = bdrv_getlength(bs);
+len = bdrv_getlength(s->target_bs);
 if (len < 0) {
 return len;
 }
 job_progress_set_remaining(>common.job, len);
 
-/* Turn on copy-on-read for the whole block device so that guest read
- * requests help us make progress.  Only do this when copying the entire
- * backing chain since the copy-on-read operation does not take base into
- * account.
- */
-if (enable_cor) {
-bdrv_enable_copy_on_read(bs);
-}
-
 for (

[PATCH v10 6/9] copy-on-read: skip non-guest reads if no copy needed

2020-09-29 Thread Andrey Shinkevich via
If the flag BDRV_REQ_PREFETCH was set, pass it further to the
COR-driver to skip unneeded reading. It can be taken into account for
the COR-algorithms optimization. That check is being made during the
block stream job by the moment.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 14 ++
 block/io.c   |  2 +-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index f53f7e0..5389dca 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -145,10 +145,16 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
 }
 }
 
-ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
-  local_flags);
-if (ret < 0) {
-return ret;
+if ((flags & BDRV_REQ_PREFETCH) &
+!(local_flags & BDRV_REQ_COPY_ON_READ)) {
+/* Skip non-guest reads if no copy needed */
+} else {
+
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
 }
 
 offset += n;
diff --git a/block/io.c b/block/io.c
index 11df188..62b75a5 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1388,7 +1388,7 @@ static int coroutine_fn 
bdrv_co_do_copy_on_readv(BdrvChild *child,
 qemu_iovec_init_buf(_qiov, bounce_buffer, pnum);
 
 ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
- _qiov, 0, 0);
+ _qiov, 0, flags & 
BDRV_REQ_PREFETCH);
 if (ret < 0) {
 goto err;
 }
-- 
1.8.3.1




[PATCH v10 4/9] copy-on-read: pass base node name to COR driver

2020-09-29 Thread Andrey Shinkevich via
To limit the guest's COR operations by the base node in the backing
chain during stream job, pass the base node name to the copy-on-read
driver. The rest of the functionality will be implemented in the patch
that follows.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 3c8231f..e04092f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -24,19 +24,23 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qdict.h"
 #include "block/copy-on-read.h"
 
 
 typedef struct BDRVStateCOR {
 bool active;
+BlockDriverState *base_bs;
 } BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BlockDriverState *base_bs = NULL;
 BDRVStateCOR *state = bs->opaque;
+const char *base_node = qdict_get_try_str(options, "base");
 
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -52,7 +56,16 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+if (base_node) {
+qdict_del(options, "base");
+base_bs = bdrv_lookup_bs(NULL, base_node, errp);
+if (!base_bs) {
+error_setg(errp, QERR_BASE_NOT_FOUND, base_node);
+return -EINVAL;
+}
+}
 state->active = true;
+state->base_bs = base_bs;
 
 /*
  * We don't need to call bdrv_child_refresh_perms() now as the permissions
-- 
1.8.3.1




[PATCH v10 8/9] block: remove unused backing-file name parameter

2020-09-29 Thread Andrey Shinkevich via
The block stream QMP parameter backing-file is in use no more. It
designates a backing file name to set in QCOW2 image header after the
block stream job finished. The base file name is used instead.

Signed-off-by: Andrey Shinkevich 
---
 block/monitor/block-hmp-cmds.c |  2 +-
 block/stream.c |  6 +-
 blockdev.c | 17 +
 include/block/block_int.h  |  2 +-
 qapi/block-core.json   | 17 +
 5 files changed, 5 insertions(+), 39 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 4e66775..5f19499 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -506,7 +506,7 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 int64_t speed = qdict_get_try_int(qdict, "speed", 0);
 
 qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
- false, NULL, qdict_haskey(qdict, "speed"), speed, true,
+ qdict_haskey(qdict, "speed"), speed, true,
  BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, false, 
false,
  false, );
 
diff --git a/block/stream.c b/block/stream.c
index b0719e9..fe2663f 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -34,7 +34,6 @@ typedef struct StreamBlockJob {
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
 BlockdevOnError on_error;
-char *backing_file_str;
 bool bs_read_only;
 bool chain_frozen;
 } StreamBlockJob;
@@ -103,8 +102,6 @@ static void stream_clean(Job *job)
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
 bdrv_reopen_set_read_only(bs, true, NULL);
 }
-
-g_free(s->backing_file_str);
 }
 
 static int coroutine_fn stream_run(Job *job, Error **errp)
@@ -220,7 +217,7 @@ static const BlockJobDriver stream_job_driver = {
 };
 
 void stream_start(const char *job_id, BlockDriverState *bs,
-  BlockDriverState *base, const char *backing_file_str,
+  BlockDriverState *base,
   int creation_flags, int64_t speed,
   BlockdevOnError on_error,
   const char *filter_node_name,
@@ -295,7 +292,6 @@ void stream_start(const char *job_id, BlockDriverState *bs,
 
 s->base_overlay = base_overlay;
 s->above_base = above_base;
-s->backing_file_str = g_strdup(backing_file_str);
 s->bs_read_only = bs_read_only;
 s->chain_frozen = true;
 
diff --git a/blockdev.c b/blockdev.c
index d719c47..b223601 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2486,7 +2486,6 @@ out:
 void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
   bool has_base, const char *base,
   bool has_base_node, const char *base_node,
-  bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
   bool has_filter_node_name, const char *filter_node_name,
@@ -2498,7 +2497,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 BlockDriverState *base_bs = NULL;
 AioContext *aio_context;
 Error *local_err = NULL;
-const char *base_name = NULL;
 int job_flags = JOB_DEFAULT;
 
 if (!has_on_error) {
@@ -2526,7 +2524,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
-base_name = base;
 }
 
 if (has_base_node) {
@@ -2541,7 +2538,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
 bdrv_refresh_filename(base_bs);
-base_name = base_bs->filename;
 }
 
 /* Check for op blockers in the whole chain between bs and base */
@@ -2553,17 +2549,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 }
 
-/* if we are streaming the entire chain, the result will have no backing
- * file, and specifying one is therefore an error */
-if (base_bs == NULL && has_backing_file) {
-error_setg(errp, "backing file specified, but streaming the "
- "entire chain");
-goto out;
-}
-
-/* backing_file string overrides base bs filename */
-base_name = has_backing_file ? backing_file : base_name;
-
 if (has_auto_finalize && !auto_finalize) {
 job_flags |= JOB_MANUAL_FINALIZE;
 }
@@ -2571,7 +2556,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 job_flags |= JOB_MANUAL_DISMISS;
 }
 
-str

[PATCH v10 3/9] qapi: add filter-node-name to block-stream

2020-09-29 Thread Andrey Shinkevich via
Provide the possibility to pass the 'filter-node-name' parameter to the
block-stream job as it is done for the commit block job.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/monitor/block-hmp-cmds.c | 4 ++--
 block/stream.c | 4 +++-
 blockdev.c | 4 +++-
 include/block/block_int.h  | 7 ++-
 qapi/block-core.json   | 6 ++
 5 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 4d3db5e..4e66775 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -507,8 +507,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 
 qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
  false, NULL, qdict_haskey(qdict, "speed"), speed, true,
- BLOCKDEV_ON_ERROR_REPORT, false, false, false, false,
- );
+ BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, false, 
false,
+ false, );
 
 hmp_handle_error(mon, error);
 }
diff --git a/block/stream.c b/block/stream.c
index 8ce6729..e0540ee 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -221,7 +221,9 @@ static const BlockJobDriver stream_job_driver = {
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp)
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp)
 {
 StreamBlockJob *s;
 BlockDriverState *iter;
diff --git a/blockdev.c b/blockdev.c
index bebd3ba..d719c47 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2489,6 +2489,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
   bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
+  bool has_filter_node_name, const char *filter_node_name,
   bool has_auto_finalize, bool auto_finalize,
   bool has_auto_dismiss, bool auto_dismiss,
   Error **errp)
@@ -2571,7 +2572,8 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 
 stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
- job_flags, has_speed ? speed : 0, on_error, _err);
+ job_flags, has_speed ? speed : 0, on_error,
+ filter_node_name, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 goto out;
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 38cad9d..f782737 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1134,6 +1134,9 @@ int is_windows_drive(const char *filename);
  *  See @BlockJobCreateFlags
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @on_error: The action to take upon error.
+ * @filter_node_name: The node name that should be assigned to the filter
+ * driver that the commit job inserts into the graph above @bs. NULL means
+ * that a node name should be autogenerated.
  * @errp: Error object.
  *
  * Start a streaming operation on @bs.  Clusters that are unallocated
@@ -1146,7 +1149,9 @@ int is_windows_drive(const char *filename);
 void stream_start(const char *job_id, BlockDriverState *bs,
   BlockDriverState *base, const char *backing_file_str,
   int creation_flags, int64_t speed,
-  BlockdevOnError on_error, Error **errp);
+  BlockdevOnError on_error,
+  const char *filter_node_name,
+  Error **errp);
 
 /**
  * commit_start:
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 3c16f1e..32fb097 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2533,6 +2533,11 @@
 #'stop' and 'enospc' can only be used if the block device
 #supports io-status (see BlockInfo).  Since 1.3.
 #
+# @filter-node-name: the node name that should be assigned to the
+#filter driver that the stream job inserts into the graph
+#above @device. If this option is not given, a node name is
+#autogenerated. (Since: 5.2)
+#
 # @auto-finalize: When false, this job will wait in a PENDING state after it 
has
 # finished its work, waiting for @block-job-finalize before
 # making any block graph changes.
@@ -2563,6 +2568,7 @@
   'data': { '*job-id': 'str', 'device': 'str', '*base': 'str',
 '*base-node': 'str', '*backing-file': 'str', '*speed': 'int',
 '*on-error': 'Block

[PATCH v10 2/9] copy-on-read: add filter append/drop functions

2020-09-29 Thread Andrey Shinkevich via
Provide API for the COR-filter insertion/removal.
Also, drop the filter child permissions for an inactive state when the
filter node is being removed.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 84 
 block/copy-on-read.h | 35 ++
 2 files changed, 119 insertions(+)
 create mode 100644 block/copy-on-read.h

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..3c8231f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,21 @@
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+bool active;
+} BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BDRVStateCOR *state = bs->opaque;
+
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
false, errp);
@@ -42,6 +52,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+state->active = true;
+
+/*
+ * We don't need to call bdrv_child_refresh_perms() now as the permissions
+ * will be updated later when the filter node gets its parent.
+ */
+
 return 0;
 }
 
@@ -57,6 +74,17 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild 
*c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
 {
+BDRVStateCOR *s = bs->opaque;
+
+if (!s->active) {
+/*
+ * While the filter is being removed
+ */
+*nperm = 0;
+*nshared = BLK_PERM_ALL;
+return;
+}
+
 *nperm = perm & PERM_PASSTHROUGH;
 *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
 
@@ -135,6 +163,7 @@ static void cor_lock_medium(BlockDriverState *bs, bool 
locked)
 
 static BlockDriver bdrv_copy_on_read = {
 .format_name= "copy-on-read",
+.instance_size  = sizeof(BDRVStateCOR),
 
 .bdrv_open  = cor_open,
 .bdrv_child_perm= cor_child_perm,
@@ -159,4 +188,59 @@ static void bdrv_copy_on_read_init(void)
 bdrv_register(_copy_on_read);
 }
 
+
+BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
+ QDict *node_options,
+ int flags, Error **errp)
+{
+BlockDriverState *cor_filter_bs;
+Error *local_err = NULL;
+
+cor_filter_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
+if (cor_filter_bs == NULL) {
+error_prepend(errp, "Could not create COR-filter node: ");
+return NULL;
+}
+
+bdrv_drained_begin(bs);
+bdrv_replace_node(bs, cor_filter_bs, _err);
+bdrv_drained_end(bs);
+
+if (local_err) {
+bdrv_unref(cor_filter_bs);
+error_propagate(errp, local_err);
+return NULL;
+}
+
+return cor_filter_bs;
+}
+
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
+{
+BdrvChild *child;
+BlockDriverState *bs;
+BDRVStateCOR *s = cor_filter_bs->opaque;
+
+child = bdrv_filter_child(cor_filter_bs);
+if (!child) {
+return;
+}
+bs = child->bs;
+
+/* Retain the BDS until we complete the graph change. */
+bdrv_ref(bs);
+/* Hold a guest back from writing while permissions are being reset. */
+bdrv_drained_begin(bs);
+/* Drop permissions before the graph change. */
+s->active = false;
+bdrv_child_refresh_perms(cor_filter_bs, child, _abort);
+bdrv_replace_node(cor_filter_bs, bs, _abort);
+
+bdrv_drained_end(bs);
+bdrv_unref(bs);
+bdrv_unref(cor_filter_bs);
+}
+
+
 block_init(bdrv_copy_on_read_init);
diff --git a/block/copy-on-read.h b/block/copy-on-read.h
new file mode 100644
index 000..d6f2422
--- /dev/null
+++ b/block/copy-on-read.h
@@ -0,0 +1,35 @@
+/*
+ * Copy-on-read filter block driver
+ *
+ * The filter driver performs Copy-On-Read (COR) operations
+ *
+ * Copyright (c) 2018-2020 Virtuozzo International GmbH.
+ *
+ * Author:
+ *   Andrey Shinkevich 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANT

[PATCH v10 1/9] copy-on-read: Support preadv/pwritev_part functions

2020-09-29 Thread Andrey Shinkevich via
Add support for the recently introduced functions
bdrv_co_preadv_part()
and
bdrv_co_pwritev_part()
to the COR-filter driver.

Signed-off-by: Andrey Shinkevich 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-on-read.c | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 2816e61..cb03e0f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -74,21 +74,25 @@ static int64_t cor_getlength(BlockDriverState *bs)
 }
 
 
-static int coroutine_fn cor_co_preadv(BlockDriverState *bs,
-  uint64_t offset, uint64_t bytes,
-  QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs,
+   uint64_t offset, uint64_t bytes,
+   QEMUIOVector *qiov,
+   size_t qiov_offset,
+   int flags)
 {
-return bdrv_co_preadv(bs->file, offset, bytes, qiov,
-  flags | BDRV_REQ_COPY_ON_READ);
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
 }
 
 
-static int coroutine_fn cor_co_pwritev(BlockDriverState *bs,
-   uint64_t offset, uint64_t bytes,
-   QEMUIOVector *qiov, int flags)
+static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
+uint64_t offset,
+uint64_t bytes,
+QEMUIOVector *qiov,
+size_t qiov_offset, int flags)
 {
-
-return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
+flags);
 }
 
 
@@ -137,8 +141,8 @@ static BlockDriver bdrv_copy_on_read = {
 
 .bdrv_getlength = cor_getlength,
 
-.bdrv_co_preadv = cor_co_preadv,
-.bdrv_co_pwritev= cor_co_pwritev,
+.bdrv_co_preadv_part= cor_co_preadv_part,
+.bdrv_co_pwritev_part   = cor_co_pwritev_part,
 .bdrv_co_pwrite_zeroes  = cor_co_pwrite_zeroes,
 .bdrv_co_pdiscard   = cor_co_pdiscard,
 .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed,
-- 
1.8.3.1




[PATCH v10 0/9] Apply COR-filter to the block-stream permanently

2020-09-29 Thread Andrey Shinkevich via
Despite the patch "freeze link to base node..." has been removed from the
series in the current version 9, the iotest case test_stream_parallel does
not pass after the COR-filter is inserted into the backing chain. As the
test case may not be initialized, it does not make a sense and was removed
again.
The check with bdrv_is_allocated_above() takes place in the COR-filter and
in the block-stream job both. An optimization of the block-stream job based
on the filter functionality may be made in a separate series.

v10:
  02: The missed new file block/copy-on-read.h added
v9:
  02: Refactored.
  04: Base node name is used instead of the file name.
  05: New implementation based on Max' review.
  06: New.
  07: New. The patch "freeze link to base node..." was deleted.
  08: New.
  09: The filter node options are initialized.

The v8 Message-Id:
<1598633579-221780-1-git-send-email-andrey.shinkev...@virtuozzo.com>

Andrey Shinkevich (9):
  copy-on-read: Support preadv/pwritev_part functions
  copy-on-read: add filter append/drop functions
  qapi: add filter-node-name to block-stream
  copy-on-read: pass base node name to COR driver
  copy-on-read: limit guest COR activity to base in COR driver
  copy-on-read: skip non-guest reads if no copy needed
  stream: skip filters when writing backing file name to QCOW2 header
  block: remove unused backing-file name parameter
  block: apply COR-filter to block-stream jobs

 block/copy-on-read.c   | 165 ++---
 block/copy-on-read.h   |  35 +
 block/io.c |   2 +-
 block/monitor/block-hmp-cmds.c |   6 +-
 block/stream.c | 112 +---
 blockdev.c |  21 +-
 include/block/block_int.h  |   9 ++-
 qapi/block-core.json   |  23 ++
 tests/qemu-iotests/030 |  51 ++---
 tests/qemu-iotests/030.out |   4 +-
 tests/qemu-iotests/141.out |   2 +-
 tests/qemu-iotests/245 |  19 +++--
 12 files changed, 302 insertions(+), 147 deletions(-)
 create mode 100644 block/copy-on-read.h

-- 
1.8.3.1




[PATCH v10 5/9] copy-on-read: limit guest COR activity to base in COR driver

2020-09-29 Thread Andrey Shinkevich via
Limit the guest's COR operations by the base node in the backing chain
when the base node name is given. It will be useful for a block stream
job when the COR-filter is applied.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 38 --
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index e04092f..f53f7e0 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -121,8 +121,42 @@ static int coroutine_fn 
cor_co_preadv_part(BlockDriverState *bs,
size_t qiov_offset,
int flags)
 {
-return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
-   flags | BDRV_REQ_COPY_ON_READ);
+int64_t n = 0;
+int64_t size = offset + bytes;
+int local_flags;
+int ret;
+BDRVStateCOR *state = bs->opaque;
+
+if (!state->base_bs) {
+return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset,
+   flags | BDRV_REQ_COPY_ON_READ);
+}
+
+while (offset < size) {
+local_flags = flags;
+
+/* In case of failure, try to copy-on-read anyway */
+ret = bdrv_is_allocated(bs->file->bs, offset, bytes, );
+if (!ret) {
+ret = bdrv_is_allocated_above(bdrv_cow_bs(bs->file->bs),
+  state->base_bs, false, offset, n, 
);
+if (ret > 0) {
+local_flags |= BDRV_REQ_COPY_ON_READ;
+}
+}
+
+ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset,
+  local_flags);
+if (ret < 0) {
+return ret;
+}
+
+offset += n;
+qiov_offset += n;
+bytes -= n;
+}
+
+return 0;
 }
 
 
-- 
1.8.3.1




[PATCH v9 9/9] block: apply COR-filter to block-stream jobs

2020-09-28 Thread Andrey Shinkevich via
This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 93 ++
 tests/qemu-iotests/030 | 51 +++--
 tests/qemu-iotests/030.out |  4 +-
 tests/qemu-iotests/141.out |  2 +-
 tests/qemu-iotests/245 | 19 +++---
 5 files changed, 83 insertions(+), 86 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index fe2663f..240b3dc 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -17,8 +17,10 @@
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "block/copy-on-read.h"
 
 enum {
 /*
@@ -33,6 +35,8 @@ typedef struct StreamBlockJob {
 BlockJob common;
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
+BlockDriverState *cor_filter_bs;
+BlockDriverState *target_bs;
 BlockdevOnError on_error;
 bool bs_read_only;
 bool chain_frozen;
@@ -52,23 +56,20 @@ static void stream_abort(Job *job)
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 
 if (s->chain_frozen) {
-BlockJob *bjob = >common;
-bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 }
 }
 
 static int stream_prepare(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
 BlockDriverState *base_metadata = bdrv_skip_filters(base);
 Error *local_err = NULL;
 int ret = 0;
 
-bdrv_unfreeze_backing_chain(bs, s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 s->chain_frozen = false;
 
 if (bdrv_cow_child(unfiltered_bs)) {
@@ -94,13 +95,14 @@ static void stream_clean(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockJob *bjob = >common;
-BlockDriverState *bs = blk_bs(bjob->blk);
+
+bdrv_cor_filter_drop(s->cor_filter_bs);
 
 /* Reopen the image back in read-only mode if necessary */
 if (s->bs_read_only) {
 /* Give up write permissions before making it read-only */
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
-bdrv_reopen_set_read_only(bs, true, NULL);
+bdrv_reopen_set_read_only(s->target_bs, true, NULL);
 }
 }
 
@@ -108,9 +110,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockBackend *blk = s->common.blk;
-BlockDriverState *bs = blk_bs(blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-bool enable_cor = !bdrv_cow_child(s->base_overlay);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 int64_t len;
 int64_t offset = 0;
 uint64_t delay_ns = 0;
@@ -122,21 +122,12 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 return 0;
 }
 
-len = bdrv_getlength(bs);
+len = bdrv_getlength(s->target_bs);
 if (len < 0) {
 return len;
 }
 job_progress_set_remaining(>common.job, len);
 
-/* Turn on copy-on-read for the whole block device so that guest read
- * requests help us make progress.  Only do this when copying the entire
- * backing chain since the copy-on-read operation does not take base into
- * account.
- */
-if (enable_cor) {
-bdrv_enable_copy_on_read(bs);
-}
-
 for (

[PATCH v9 8/9] block: remove unused backing-file name parameter

2020-09-28 Thread Andrey Shinkevich via
The block stream QMP parameter backing-file is in use no more. It
designates a backing file name to set in QCOW2 image header after the
block stream job finished. The base file name is used instead.

Signed-off-by: Andrey Shinkevich 
---
 block/monitor/block-hmp-cmds.c |  2 +-
 block/stream.c |  6 +-
 blockdev.c | 17 +
 include/block/block_int.h  |  2 +-
 qapi/block-core.json   | 17 +
 5 files changed, 5 insertions(+), 39 deletions(-)

diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index 4e66775..5f19499 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -506,7 +506,7 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
 int64_t speed = qdict_get_try_int(qdict, "speed", 0);
 
 qmp_block_stream(true, device, device, base != NULL, base, false, NULL,
- false, NULL, qdict_haskey(qdict, "speed"), speed, true,
+ qdict_haskey(qdict, "speed"), speed, true,
  BLOCKDEV_ON_ERROR_REPORT, false, NULL, false, false, 
false,
  false, );
 
diff --git a/block/stream.c b/block/stream.c
index b0719e9..fe2663f 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -34,7 +34,6 @@ typedef struct StreamBlockJob {
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
 BlockdevOnError on_error;
-char *backing_file_str;
 bool bs_read_only;
 bool chain_frozen;
 } StreamBlockJob;
@@ -103,8 +102,6 @@ static void stream_clean(Job *job)
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, _abort);
 bdrv_reopen_set_read_only(bs, true, NULL);
 }
-
-g_free(s->backing_file_str);
 }
 
 static int coroutine_fn stream_run(Job *job, Error **errp)
@@ -220,7 +217,7 @@ static const BlockJobDriver stream_job_driver = {
 };
 
 void stream_start(const char *job_id, BlockDriverState *bs,
-  BlockDriverState *base, const char *backing_file_str,
+  BlockDriverState *base,
   int creation_flags, int64_t speed,
   BlockdevOnError on_error,
   const char *filter_node_name,
@@ -295,7 +292,6 @@ void stream_start(const char *job_id, BlockDriverState *bs,
 
 s->base_overlay = base_overlay;
 s->above_base = above_base;
-s->backing_file_str = g_strdup(backing_file_str);
 s->bs_read_only = bs_read_only;
 s->chain_frozen = true;
 
diff --git a/blockdev.c b/blockdev.c
index d719c47..b223601 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2486,7 +2486,6 @@ out:
 void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
   bool has_base, const char *base,
   bool has_base_node, const char *base_node,
-  bool has_backing_file, const char *backing_file,
   bool has_speed, int64_t speed,
   bool has_on_error, BlockdevOnError on_error,
   bool has_filter_node_name, const char *filter_node_name,
@@ -2498,7 +2497,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 BlockDriverState *base_bs = NULL;
 AioContext *aio_context;
 Error *local_err = NULL;
-const char *base_name = NULL;
 int job_flags = JOB_DEFAULT;
 
 if (!has_on_error) {
@@ -2526,7 +2524,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
-base_name = base;
 }
 
 if (has_base_node) {
@@ -2541,7 +2538,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 assert(bdrv_get_aio_context(base_bs) == aio_context);
 bdrv_refresh_filename(base_bs);
-base_name = base_bs->filename;
 }
 
 /* Check for op blockers in the whole chain between bs and base */
@@ -2553,17 +2549,6 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 }
 }
 
-/* if we are streaming the entire chain, the result will have no backing
- * file, and specifying one is therefore an error */
-if (base_bs == NULL && has_backing_file) {
-error_setg(errp, "backing file specified, but streaming the "
- "entire chain");
-goto out;
-}
-
-/* backing_file string overrides base bs filename */
-base_name = has_backing_file ? backing_file : base_name;
-
 if (has_auto_finalize && !auto_finalize) {
 job_flags |= JOB_MANUAL_FINALIZE;
 }
@@ -2571,7 +2556,7 @@ void qmp_block_stream(bool has_job_id, const char 
*job_id, const char *device,
 job_flags |= JOB_MANUAL_DISMISS;
 }
 
-str

[PATCH v9 2/9] copy-on-read: add filter append/drop functions

2020-09-28 Thread Andrey Shinkevich via
Provide API for the COR-filter insertion/removal.
Also, drop the filter child permissions for an inactive state when the
filter node is being removed.

Signed-off-by: Andrey Shinkevich 
---
 block/copy-on-read.c | 84 
 1 file changed, 84 insertions(+)

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index cb03e0f..3c8231f 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -23,11 +23,21 @@
 #include "qemu/osdep.h"
 #include "block/block_int.h"
 #include "qemu/module.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "block/copy-on-read.h"
+
+
+typedef struct BDRVStateCOR {
+bool active;
+} BDRVStateCOR;
 
 
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
 Error **errp)
 {
+BDRVStateCOR *state = bs->opaque;
+
 bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
false, errp);
@@ -42,6 +52,13 @@ static int cor_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
 bs->file->bs->supported_zero_flags);
 
+state->active = true;
+
+/*
+ * We don't need to call bdrv_child_refresh_perms() now as the permissions
+ * will be updated later when the filter node gets its parent.
+ */
+
 return 0;
 }
 
@@ -57,6 +74,17 @@ static void cor_child_perm(BlockDriverState *bs, BdrvChild 
*c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
 {
+BDRVStateCOR *s = bs->opaque;
+
+if (!s->active) {
+/*
+ * While the filter is being removed
+ */
+*nperm = 0;
+*nshared = BLK_PERM_ALL;
+return;
+}
+
 *nperm = perm & PERM_PASSTHROUGH;
 *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED;
 
@@ -135,6 +163,7 @@ static void cor_lock_medium(BlockDriverState *bs, bool 
locked)
 
 static BlockDriver bdrv_copy_on_read = {
 .format_name= "copy-on-read",
+.instance_size  = sizeof(BDRVStateCOR),
 
 .bdrv_open  = cor_open,
 .bdrv_child_perm= cor_child_perm,
@@ -159,4 +188,59 @@ static void bdrv_copy_on_read_init(void)
 bdrv_register(_copy_on_read);
 }
 
+
+BlockDriverState *bdrv_cor_filter_append(BlockDriverState *bs,
+ QDict *node_options,
+ int flags, Error **errp)
+{
+BlockDriverState *cor_filter_bs;
+Error *local_err = NULL;
+
+cor_filter_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
+if (cor_filter_bs == NULL) {
+error_prepend(errp, "Could not create COR-filter node: ");
+return NULL;
+}
+
+bdrv_drained_begin(bs);
+bdrv_replace_node(bs, cor_filter_bs, _err);
+bdrv_drained_end(bs);
+
+if (local_err) {
+bdrv_unref(cor_filter_bs);
+error_propagate(errp, local_err);
+return NULL;
+}
+
+return cor_filter_bs;
+}
+
+
+void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs)
+{
+BdrvChild *child;
+BlockDriverState *bs;
+BDRVStateCOR *s = cor_filter_bs->opaque;
+
+child = bdrv_filter_child(cor_filter_bs);
+if (!child) {
+return;
+}
+bs = child->bs;
+
+/* Retain the BDS until we complete the graph change. */
+bdrv_ref(bs);
+/* Hold a guest back from writing while permissions are being reset. */
+bdrv_drained_begin(bs);
+/* Drop permissions before the graph change. */
+s->active = false;
+bdrv_child_refresh_perms(cor_filter_bs, child, _abort);
+bdrv_replace_node(cor_filter_bs, bs, _abort);
+
+bdrv_drained_end(bs);
+bdrv_unref(bs);
+bdrv_unref(cor_filter_bs);
+}
+
+
 block_init(bdrv_copy_on_read_init);
-- 
1.8.3.1




  1   2   3   4   5   6   7   8   9   >