date:20210824

[PATCH v4] block/file-win32: add reopen handlers

2021-08-24 Thread Viktor Prutyanov

Make 'qemu-img commit' work on Windows.

Command 'commit' requires reopening backing file in RW mode. So,
add reopen prepare/commit/abort handlers and change dwShareMode
for CreateFile call in order to allow further read/write reopening.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/418

Suggested-by: Hanna Reitz 
Signed-off-by: Viktor Prutyanov 
Tested-by: Helge Konetzka 
---
 v2:
- fix indentation in raw_reopen_prepare
- free rs if raw_reopen_prepare fails
 v3:
- restore suggested-by field missed in v2
 v4:
- add file type check
- add comment about options
- replace rs check with assert in raw_reopen_commit

 block/file-win32.c | 100 -
 1 file changed, 99 insertions(+), 1 deletion(-)

diff --git a/block/file-win32.c b/block/file-win32.c
index 2642088bd6..8320495f2b 100644
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -58,6 +58,10 @@ typedef struct BDRVRawState {
 QEMUWin32AIOState *aio;
 } BDRVRawState;
 
+typedef struct BDRVRawReopenState {
+HANDLE hfile;
+} BDRVRawReopenState;
+
 /*
  * Read/writes the data to/from a given linear buffer.
  *
@@ -392,7 +396,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, 
int flags,
 }
 
 s->hfile = CreateFile(filename, access_flags,
-  FILE_SHARE_READ, NULL,
+  FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
   OPEN_EXISTING, overlapped, NULL);
 if (s->hfile == INVALID_HANDLE_VALUE) {
 int err = GetLastError();
@@ -634,6 +638,96 @@ static int coroutine_fn raw_co_create_opts(BlockDriver 
*drv,
 return raw_co_create(&options, errp);
 }
 
+static int raw_reopen_prepare(BDRVReopenState *state,
+  BlockReopenQueue *queue, Error **errp)
+{
+BDRVRawState *s = state->bs->opaque;
+BDRVRawReopenState *rs;
+int access_flags;
+DWORD overlapped;
+int ret = 0;
+
+if (s->type != FTYPE_FILE) {
+error_setg(errp, "Can only reopen files");
+return -EINVAL;
+}
+
+rs = g_new0(BDRVRawReopenState, 1);
+
+/*
+ * We do not support changing any options (only flags). By leaving
+ * all options in state->options, we tell the generic reopen code
+ * that we do not support changing any of them, so it will verify
+ * that their values did not change.
+ */
+
+raw_parse_flags(state->flags, s->aio != NULL, &access_flags, &overlapped);
+rs->hfile = CreateFile(state->bs->filename, access_flags,
+   FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
+   OPEN_EXISTING, overlapped, NULL);
+
+if (rs->hfile == INVALID_HANDLE_VALUE) {
+int err = GetLastError();
+
+error_setg_win32(errp, err, "Could not reopen '%s'",
+ state->bs->filename);
+if (err == ERROR_ACCESS_DENIED) {
+ret = -EACCES;
+} else {
+ret = -EINVAL;
+}
+goto fail;
+}
+
+if (s->aio) {
+ret = win32_aio_attach(s->aio, rs->hfile);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "Could not enable AIO");
+goto fail;
+}
+}
+
+state->opaque = rs;
+
+return 0;
+
+fail:
+g_free(rs);
+state->opaque = NULL;
+
+return ret;
+}
+
+static void raw_reopen_commit(BDRVReopenState *state)
+{
+BDRVRawState *s = state->bs->opaque;
+BDRVRawReopenState *rs = state->opaque;
+
+assert(rs != NULL);
+
+CloseHandle(s->hfile);
+s->hfile = rs->hfile;
+
+g_free(rs);
+state->opaque = NULL;
+}
+
+static void raw_reopen_abort(BDRVReopenState *state)
+{
+BDRVRawReopenState *rs = state->opaque;
+
+if (!rs) {
+return;
+}
+
+if (rs->hfile != INVALID_HANDLE_VALUE) {
+CloseHandle(rs->hfile);
+}
+
+g_free(rs);
+state->opaque = NULL;
+}
+
 static QemuOptsList raw_create_opts = {
 .name = "raw-create-opts",
 .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
@@ -659,6 +753,10 @@ BlockDriver bdrv_file = {
 .bdrv_co_create_opts = raw_co_create_opts,
 .bdrv_has_zero_init = bdrv_has_zero_init_1,
 
+.bdrv_reopen_prepare = raw_reopen_prepare,
+.bdrv_reopen_commit  = raw_reopen_commit,
+.bdrv_reopen_abort   = raw_reopen_abort,
+
 .bdrv_aio_preadv= raw_aio_preadv,
 .bdrv_aio_pwritev   = raw_aio_pwritev,
 .bdrv_aio_flush = raw_aio_flush,
-- 
2.21.0

Re: [PATCH V2] block/rbd: implement bdrv_co_block_status

2021-08-24 Thread Ilya Dryomov

On Mon, Aug 23, 2021 at 11:38 AM Peter Lieven  wrote:
>
> Am 22.08.21 um 23:02 schrieb Ilya Dryomov:
> > On Tue, Aug 10, 2021 at 3:41 PM Peter Lieven  wrote:
> >> the qemu rbd driver currently lacks support for bdrv_co_block_status.
> >> This results mainly in incorrect progress during block operations (e.g.
> >> qemu-img convert with an rbd image as source).
> >>
> >> This patch utilizes the rbd_diff_iterate2 call from librbd to detect
> >> allocated and unallocated (all zero areas).
> >>
> >> To avoid querying the ceph OSDs for the answer this is only done if
> >> the image has the fast-diff features which depends on the object-map
> > Hi Peter,
> >
> > Nit: "has the fast-diff feature which depends on the object-map and
> > exclusive-lock features"
>
>
> will reword in V3.
>
>
> >
> >> and exclusive-lock. In this case it is guaranteed that the information
> >> is present in memory in the librbd client and thus very fast.
> >>
> >> If fast-diff is not available all areas are reported to be allocated
> >> which is the current behaviour if bdrv_co_block_status is not implemented.
> >>
> >> Signed-off-by: Peter Lieven 
> >> ---
> >> V1->V2:
> >> - add commit comment [Stefano]
> >> - use failed_post_open [Stefano]
> >> - remove redundant assert [Stefano]
> >> - add macro+comment for the magic -9000 value [Stefano]
> >> - always set *file if its non NULL [Stefano]
> >>
> >>   block/rbd.c | 125 
> >>   1 file changed, 125 insertions(+)
> >>
> >> diff --git a/block/rbd.c b/block/rbd.c
> >> index dcf82b15b8..8692e76f40 100644
> >> --- a/block/rbd.c
> >> +++ b/block/rbd.c
> >> @@ -88,6 +88,7 @@ typedef struct BDRVRBDState {
> >>   char *namespace;
> >>   uint64_t image_size;
> >>   uint64_t object_size;
> >> +uint64_t features;
> >>   } BDRVRBDState;
> >>
> >>   typedef struct RBDTask {
> >> @@ -983,6 +984,13 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict 
> >> *options, int flags,
> >>   s->image_size = info.size;
> >>   s->object_size = info.obj_size;
> >>
> >> +r = rbd_get_features(s->image, &s->features);
> >> +if (r < 0) {
> >> +error_setg_errno(errp, -r, "error getting image features from %s",
> >> + s->image_name);
> >> +goto failed_post_open;
> >> +}
> > The object-map and fast-diff features can be enabled/disabled while the
> > image is open so this should probably go to qemu_rbd_co_block_status().
> >
> >> +
> >>   /* If we are using an rbd snapshot, we must be r/o, otherwise
> >>* leave as-is */
> >>   if (s->snap != NULL) {
> >> @@ -1259,6 +1267,122 @@ static ImageInfoSpecific 
> >> *qemu_rbd_get_specific_info(BlockDriverState *bs,
> >>   return spec_info;
> >>   }
> >>
> >> +typedef struct rbd_diff_req {
> >> +uint64_t offs;
> >> +uint64_t bytes;
> >> +int exists;
> >> +} rbd_diff_req;
> >> +
> >> +/*
> >> + * rbd_diff_iterate2 allows to interrupt the exection by returning a 
> >> negative
> >> + * value in the callback routine. Choose a value that does not conflict 
> >> with
> >> + * an existing exitcode and return it if we want to prematurely stop the
> >> + * execution because we detected a change in the allocation status.
> >> + */
> >> +#define QEMU_RBD_EXIT_DIFF_ITERATE2 -9000
> >> +
> >> +static int qemu_rbd_co_block_status_cb(uint64_t offs, size_t len,
> >> +   int exists, void *opaque)
> >> +{
> >> +struct rbd_diff_req *req = opaque;
> >> +
> >> +assert(req->offs + req->bytes <= offs);
> >> +
> >> +if (req->exists && offs > req->offs + req->bytes) {
> >> +/*
> >> + * we started in an allocated area and jumped over an unallocated 
> >> area,
> >> + * req->bytes contains the length of the allocated area before the
> >> + * unallocated area. stop further processing.
> >> + */
> >> +return QEMU_RBD_EXIT_DIFF_ITERATE2;
> >> +}
> >> +if (req->exists && !exists) {
> >> +/*
> >> + * we started in an allocated area and reached a hole. req->bytes
> >> + * contains the length of the allocated area before the hole.
> >> + * stop further processing.
> >> + */
> >> +return QEMU_RBD_EXIT_DIFF_ITERATE2;
> >> +}
> >> +if (!req->exists && exists && offs > req->offs) {
> >> +/*
> >> + * we started in an unallocated area and hit the first allocated
> >> + * block. req->bytes must be set to the length of the unallocated 
> >> area
> >> + * before the allocated area. stop further processing.
> >> + */
> >> +req->bytes = offs - req->offs;
> >> +return QEMU_RBD_EXIT_DIFF_ITERATE2;
> >> +}
> >> +
> >> +/*
> >> + * assert that we catched all cases above and allocation state has not
> > catched -> caught
> >
> >> + * changed during callbacks.
> >> + */
> >> +assert(exists == req->exists || !req->bytes);
> >> +req

Re: [PATCH v8 00/34] block: publish backup-top filter

2021-08-24 Thread Vladimir Sementsov-Ogievskiy


24.08.2021 18:51, Hanna Reitz wrote:

On 24.08.21 10:38, Vladimir Sementsov-Ogievskiy wrote:

Hi all!

v8:

06: add Hanna's r-b
07: keep is_fleecing detection in _new() function
08,17,18: add Hanna's r-b
19: wording, s/6.1/6.2/, add Markus's a-b
25: new
29: add John's r-b
34: new

Patches without r-b: 07, 25, 34


Thanks!  I’ve applied the series to my block-next branch:

https://github.com/XanClic/qemu/commits/block-next

Hanna



Thanks for reviewing!

--
Best regards,
Vladimir

Re: [PATCH v8 00/34] block: publish backup-top filter

2021-08-24 Thread Hanna Reitz


On 24.08.21 10:38, Vladimir Sementsov-Ogievskiy wrote:

Hi all!

v8:

06: add Hanna's r-b
07: keep is_fleecing detection in _new() function
08,17,18: add Hanna's r-b
19: wording, s/6.1/6.2/, add Markus's a-b
25: new
29: add John's r-b
34: new

Patches without r-b: 07, 25, 34


Thanks!  I’ve applied the series to my block-next branch:

https://github.com/XanClic/qemu/commits/block-next

Hanna

Re: [PATCH 1/2] iotests: Fix unspecified-encoding pylint warnings

2021-08-24 Thread Philippe Mathieu-Daudé

On 8/24/21 5:35 PM, Hanna Reitz wrote:
> As of recently, pylint complains when `open()` calls are missing an
> `encoding=` specified.  Everything we have should be UTF-8 (and in fact,
> everything should be UTF-8, period (exceptions apply)), so use that.
> 
> Signed-off-by: Hanna Reitz 
> ---
>  tests/qemu-iotests/297| 2 +-
>  tests/qemu-iotests/iotests.py | 8 +---
>  2 files changed, 6 insertions(+), 4 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé

Re: qcow2 perfomance: read-only IO on the guest generates high write IO on the host

2021-08-24 Thread Kevin Wolf

[ Cc: qemu-block ]

Am 11.08.2021 um 13:36 hat Christopher Pereira geschrieben:
> Hi,
> 
> I'm reading a directory with 5.000.000 files (2,4 GB) inside a guest using
> "find | grep -c".
> 
> On the host I saw high write IO (40 MB/s !) during over 1 hour using
> virt-top.
> 
> I later repeated the read-only operation inside the guest and no additional
> data was written on the host. The operation took only some seconds.
> 
> I believe QEMU was creating some kind of cache or metadata map the first
> time I accessed the inodes.

No, at least in theory, QEMU shouldn't allocate anything when you're
just reading.

Are you sure that this isn't activity coming from your guest OS?

> But I wonder why the cache or metadata map wasn't available the first time
> and why QEMU had to recreate it?
> 
> The VM has "compressed base <- snap 1" and base was converted without
> prealloc.
> 
> Is it because we created the base using convert without metadata prealloc
> and so the metadata map got lost?
> 
> I will do some experiments soon using convert + metadata prealloc and
> probably find out myself, but I will happy to read your comments and gain
> some additional insights.
> If it the problem persists, I would try again without compression.

What were the results of your experiments? Is the behaviour related to
any of these options?

> Additional info:
> 
>  * Guest fs is xfs.
>  * (I believe the snapshot didn't significantly increase in size, but I
>would need to double check)
>  * This is a production host with old QEMU emulator version 2.3.0
>(qemu-kvm-ev-2.3.0-31.el7_2.10.1)

Discussing the most recent version is generally easier, but the expected
behaviour has always been the same, so it probably doesn't matter much
in this case.

Kevin

[PATCH 2/2] iotests: Fix use-{list,dict}-literal warnings

2021-08-24 Thread Hanna Reitz

pylint proposes using `[]` instead of `list()` and `{}` instead of
`dict()`, because it is faster.  That seems simple enough, so heed its
advice.

Signed-off-by: Hanna Reitz 
---
 tests/qemu-iotests/iotests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index c05c16494b..8b44e6c437 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -702,7 +702,7 @@ def hmp_qemu_io(self, drive: str, cmd: str,
 
 def flatten_qmp_object(self, obj, output=None, basestr=''):
 if output is None:
-output = dict()
+output = {}
 if isinstance(obj, list):
 for i, item in enumerate(obj):
 self.flatten_qmp_object(item, output, basestr + str(i) + '.')
@@ -715,7 +715,7 @@ def flatten_qmp_object(self, obj, output=None, basestr=''):
 
 def qmp_to_opts(self, obj):
 obj = self.flatten_qmp_object(obj)
-output_list = list()
+output_list = []
 for key in obj:
 output_list += [key + '=' + obj[key]]
 return ','.join(output_list)
-- 
2.31.1

[PATCH 0/2] iotests: Fix pylint warnings

2021-08-24 Thread Hanna Reitz

Hi,

I’ve updated my pylint to 2.10.2 and was greeted with some new warnings.
Some are fixed by John’s “Run iotest linters during Python CI” series
(https://lists.nongnu.org/archive/html/qemu-block/2021-07/msg00611.html),
but some are not (namely unspecified-encoding, use-list-literal, and
use-dict-literal).  This series cleans up that rest.


Hanna Reitz (2):
  iotests: Fix unspecified-encoding pylint warnings
  iotests: Fix use-{list,dict}-literal warnings

 tests/qemu-iotests/297|  2 +-
 tests/qemu-iotests/iotests.py | 12 +++-
 2 files changed, 8 insertions(+), 6 deletions(-)

-- 
2.31.1

[PATCH 1/2] iotests: Fix unspecified-encoding pylint warnings

2021-08-24 Thread Hanna Reitz

As of recently, pylint complains when `open()` calls are missing an
`encoding=` specified.  Everything we have should be UTF-8 (and in fact,
everything should be UTF-8, period (exceptions apply)), so use that.

Signed-off-by: Hanna Reitz 
---
 tests/qemu-iotests/297| 2 +-
 tests/qemu-iotests/iotests.py | 8 +---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
index 433b732336..0a49953d27 100755
--- a/tests/qemu-iotests/297
+++ b/tests/qemu-iotests/297
@@ -46,7 +46,7 @@ def is_python_file(filename):
 if filename.endswith('.py'):
 return True
 
-with open(filename) as f:
+with open(filename, encoding='utf-8') as f:
 try:
 first_line = f.readline()
 return re.match('^#!.*python', first_line) is not None
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 4c8971d946..c05c16494b 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -610,7 +610,7 @@ def _post_shutdown(self) -> None:
 return
 valgrind_filename =  f"{test_dir}/{self._popen.pid}.valgrind"
 if self.exitcode() == 99:
-with open(valgrind_filename) as f:
+with open(valgrind_filename, encoding='utf-8') as f:
 print(f.read())
 else:
 os.remove(valgrind_filename)
@@ -1120,7 +1120,8 @@ def notrun(reason):
 # Each test in qemu-iotests has a number ("seq")
 seq = os.path.basename(sys.argv[0])
 
-with open('%s/%s.notrun' % (output_dir, seq), 'w') as outfile:
+with open('%s/%s.notrun' % (output_dir, seq), 'w', encoding='utf-8') \
+as outfile:
 outfile.write(reason + '\n')
 logger.warning("%s not run: %s", seq, reason)
 sys.exit(0)
@@ -1134,7 +1135,8 @@ def case_notrun(reason):
 # Each test in qemu-iotests has a number ("seq")
 seq = os.path.basename(sys.argv[0])
 
-with open('%s/%s.casenotrun' % (output_dir, seq), 'a') as outfile:
+with open('%s/%s.casenotrun' % (output_dir, seq), 'a', encoding='utf-8') \
+as outfile:
 outfile.write('[case not run] ' + reason + '\n')
 
 def _verify_image_format(supported_fmts: Sequence[str] = (),
-- 
2.31.1

Re: [PATCH v2 00/17] python/iotests: Run iotest linters during Python CI

2021-08-24 Thread Hanna Reitz


On 20.07.21 19:33, John Snow wrote:

GitLab: https://gitlab.com/jsnow/qemu/-/commits/python-package-iotest
CI: https://gitlab.com/jsnow/qemu/-/pipelines/340144191


I’ll take the liberty of applying patches 1 and 2 to my block-next 
branch, because, well, they fix some of the 297 errors I’m seeing with 
an updated pylint.


(There’s more still, namely some unspecified-encoding errors, one 
use-dict-literal, and one use-list-literal, but I don’t think there are 
patches for that in this series, so I guess I’ll have to have a go 
myself...)


To make this formal:

Thanks, I’ve applied patches 1 and 2 to my block-next branch:

https://github.com/XanClic/qemu/commits/block-next

Hanna

[PATCH 5/9] util/vfio-helpers: Remove unreachable code in qemu_vfio_dma_map()

2021-08-24 Thread Philippe Mathieu-Daudé

qemu_vfio_add_mapping() returns a pointer to an indexed entry
in pre-allocated QEMUVFIOState::mappings[], thus can not be NULL.
Remove the pointless check.

Signed-off-by: Philippe Mathieu-Daudé 
---
 util/vfio-helpers.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index d956866b4e9..e7909222cfd 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -751,10 +751,6 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t 
size,
 }
 
 mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
-if (!mapping) {
-ret = -ENOMEM;
-goto out;
-}
 assert(qemu_vfio_verify_mappings(s));
 ret = qemu_vfio_do_mapping(s, host, size, iova0);
 if (ret) {
-- 
2.31.1

[PATCH 8/9] util/vfio-helpers: Let qemu_vfio_do_mapping() propagate Error

2021-08-24 Thread Philippe Mathieu-Daudé

Pass qemu_vfio_do_mapping() an Error* argument so it can propagate
any error to callers. Replace error_report() which only report
to the monitor by the more generic error_setg_errno().

Reviewed-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Philippe Mathieu-Daudé 
---
 util/vfio-helpers.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index f4c16e1dae5..613f3db3745 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -610,7 +610,7 @@ static IOVAMapping *qemu_vfio_add_mapping(QEMUVFIOState *s,
 
 /* Do the DMA mapping with VFIO. */
 static int qemu_vfio_do_mapping(QEMUVFIOState *s, void *host, size_t size,
-uint64_t iova)
+uint64_t iova, Error **errp)
 {
 struct vfio_iommu_type1_dma_map dma_map = {
 .argsz = sizeof(dma_map),
@@ -622,7 +622,7 @@ static int qemu_vfio_do_mapping(QEMUVFIOState *s, void 
*host, size_t size,
 trace_qemu_vfio_do_mapping(s, host, iova, size);
 
 if (ioctl(s->container, VFIO_IOMMU_MAP_DMA, &dma_map)) {
-error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
+error_setg_errno(errp, errno, "VFIO_MAP_DMA failed");
 return -errno;
 }
 return 0;
@@ -755,7 +755,7 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t 
size,
 
 mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
 assert(qemu_vfio_verify_mappings(s));
-ret = qemu_vfio_do_mapping(s, host, size, iova0);
+ret = qemu_vfio_do_mapping(s, host, size, iova0, errp);
 if (ret < 0) {
 qemu_vfio_undo_mapping(s, mapping, NULL);
 return ret;
@@ -766,7 +766,7 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t 
size,
 error_setg(errp, "iova range not found");
 return -ENOMEM;
 }
-ret = qemu_vfio_do_mapping(s, host, size, iova0);
+ret = qemu_vfio_do_mapping(s, host, size, iova0, errp);
 if (ret < 0) {
 return ret;
 }
-- 
2.31.1

[PATCH 4/9] util/vfio-helpers: Replace qemu_mutex_lock() calls with QEMU_LOCK_GUARD

2021-08-24 Thread Philippe Mathieu-Daudé

Simplify qemu_vfio_dma_[un]map() handlers by replacing a pair of
qemu_mutex_lock/qemu_mutex_unlock calls by the WITH_QEMU_LOCK_GUARD
macro.

Signed-off-by: Philippe Mathieu-Daudé 
---
 util/vfio-helpers.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 1d149136299..d956866b4e9 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -735,7 +735,7 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t 
size,
 assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size));
 assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
 trace_qemu_vfio_dma_map(s, host, size, temporary, iova);
-qemu_mutex_lock(&s->lock);
+QEMU_LOCK_GUARD(&s->lock);
 mapping = qemu_vfio_find_mapping(s, host, &index);
 if (mapping) {
 iova0 = mapping->iova + ((uint8_t *)host - (uint8_t *)mapping->host);
@@ -778,7 +778,6 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t 
size,
 *iova = iova0;
 }
 out:
-qemu_mutex_unlock(&s->lock);
 return ret;
 }
 
@@ -813,14 +812,12 @@ void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host)
 }
 
 trace_qemu_vfio_dma_unmap(s, host);
-qemu_mutex_lock(&s->lock);
+QEMU_LOCK_GUARD(&s->lock);
 m = qemu_vfio_find_mapping(s, host, &index);
 if (!m) {
-goto out;
+return;
 }
 qemu_vfio_undo_mapping(s, m, NULL);
-out:
-qemu_mutex_unlock(&s->lock);
 }
 
 static void qemu_vfio_reset(QEMUVFIOState *s)
-- 
2.31.1

[PATCH 9/9] block/nvme: Only report VFIO error on failed retry

2021-08-24 Thread Philippe Mathieu-Daudé

We expect the first qemu_vfio_dma_map() to fail (indicating
DMA mappings exhaustion, see commit 15a730e7a3a). Do not
report the first failure as error, since we are going to
flush the mappings and retry.

This removes spurious error message displayed on the monitor:

  (qemu) c
  (qemu) qemu-kvm: VFIO_MAP_DMA failed: No space left on device
  (qemu) info status
  VM status: running

Reported-by: Tingting Mao 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/block/nvme.c b/block/nvme.c
index 80546b0babd..abfe305baf2 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -1019,6 +1019,7 @@ static coroutine_fn int 
nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
 uint64_t *pagelist = req->prp_list_page;
 int i, j, r;
 int entries = 0;
+Error *local_err = NULL, **errp = NULL;
 
 assert(qiov->size);
 assert(QEMU_IS_ALIGNED(qiov->size, s->page_size));
@@ -1031,7 +1032,7 @@ static coroutine_fn int 
nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
 try_map:
 r = qemu_vfio_dma_map(s->vfio,
   qiov->iov[i].iov_base,
-  len, true, &iova, NULL);
+  len, true, &iova, errp);
 if (r == -ENOSPC) {
 /*
  * In addition to the -ENOMEM error, the VFIO_IOMMU_MAP_DMA
@@ -1066,6 +1067,8 @@ try_map:
 goto fail;
 }
 }
+errp = &local_err;
+
 goto try_map;
 }
 if (r) {
@@ -1109,6 +1112,9 @@ fail:
  * because they are already mapped before calling this function; for
  * temporary mappings, a later nvme_cmd_(un)map_qiov will reclaim by
  * calling qemu_vfio_dma_reset_temporary when necessary. */
+if (local_err) {
+error_reportf_err(local_err, "Cannot map buffer for DMA: ");
+}
 return r;
 }
 
-- 
2.31.1

[PATCH 7/9] util/vfio-helpers: Have qemu_vfio_dma_map() propagate Error

2021-08-24 Thread Philippe Mathieu-Daudé

Now that all qemu_vfio_dma_map() callers provide an Error* argument,
fill it with relevant / more descriptive message. Reduce 'ret'
(returned value) scope by returning errno directly to simplify
(removing the goto / 'out' label).

Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c|  1 +
 util/vfio-helpers.c | 31 ++-
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/block/nvme.c b/block/nvme.c
index 663e5d918fa..80546b0babd 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -240,6 +240,7 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState 
*s,
 r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, bytes,
   false, &prp_list_iova, errp);
 if (r) {
+error_prepend(errp, "Cannot map buffer for DMA: ");
 goto fail;
 }
 q->free_req_head = -1;
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 3e1a49bea15..f4c16e1dae5 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -729,7 +729,6 @@ qemu_vfio_find_temp_iova(QEMUVFIOState *s, size_t size, 
uint64_t *iova)
 int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
   bool temporary, uint64_t *iova, Error **errp)
 {
-int ret = 0;
 int index;
 IOVAMapping *mapping;
 uint64_t iova0;
@@ -742,32 +741,34 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, 
size_t size,
 if (mapping) {
 iova0 = mapping->iova + ((uint8_t *)host - (uint8_t *)mapping->host);
 } else {
+int ret;
+
 if (s->high_water_mark - s->low_water_mark + 1 < size) {
-ret = -ENOMEM;
-goto out;
+error_setg(errp, "iova exhausted (water mark reached)");
+return -ENOMEM;
 }
 if (!temporary) {
-if (qemu_vfio_find_fixed_iova(s, size, &iova0)) {
-ret = -ENOMEM;
-goto out;
+if (qemu_vfio_find_fixed_iova(s, size, &iova0) < 0) {
+error_setg(errp, "iova range not found");
+return -ENOMEM;
 }
 
 mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
 assert(qemu_vfio_verify_mappings(s));
 ret = qemu_vfio_do_mapping(s, host, size, iova0);
-if (ret) {
+if (ret < 0) {
 qemu_vfio_undo_mapping(s, mapping, NULL);
-goto out;
+return ret;
 }
 qemu_vfio_dump_mappings(s);
 } else {
 if (qemu_vfio_find_temp_iova(s, size, &iova0)) {
-ret = -ENOMEM;
-goto out;
+error_setg(errp, "iova range not found");
+return -ENOMEM;
 }
 ret = qemu_vfio_do_mapping(s, host, size, iova0);
-if (ret) {
-goto out;
+if (ret < 0) {
+return ret;
 }
 }
 }
@@ -775,11 +776,7 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t 
size,
 if (iova) {
 *iova = iova0;
 }
-out:
-if (ret) {
-error_setg_errno(errp, -ret, "Cannot map buffer for DMA");
-}
-return ret;
+return 0;
 }
 
 /* Reset the high watermark and free all "temporary" mappings. */
-- 
2.31.1

[PATCH 6/9] util/vfio-helpers: Pass Error handle to qemu_vfio_dma_map()

2021-08-24 Thread Philippe Mathieu-Daudé

Currently qemu_vfio_dma_map() displays errors on stderr.
When using management interface, this information is simply
lost. Pass qemu_vfio_dma_map() an Error** handle so it can
propagate the error to callers.

Reviewed-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Philippe Mathieu-Daudé 
---
Changed:
- Initialize *err before calling error_prepend (Eric Auger)
- use error_reportf_err() in nvme_register_buf()
---
 include/qemu/vfio-helpers.h |  2 +-
 block/nvme.c| 20 ++--
 util/vfio-helpers.c | 13 +
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/include/qemu/vfio-helpers.h b/include/qemu/vfio-helpers.h
index 4491c8e1a6e..bde9495b254 100644
--- a/include/qemu/vfio-helpers.h
+++ b/include/qemu/vfio-helpers.h
@@ -18,7 +18,7 @@ typedef struct QEMUVFIOState QEMUVFIOState;
 QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp);
 void qemu_vfio_close(QEMUVFIOState *s);
 int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
-  bool temporary, uint64_t *iova_list);
+  bool temporary, uint64_t *iova_list, Error **errp);
 int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s);
 void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host);
 void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
diff --git a/block/nvme.c b/block/nvme.c
index 6642c104aa4..663e5d918fa 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -176,12 +176,11 @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue 
*q,
 return false;
 }
 memset(q->queue, 0, bytes);
-r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova);
+r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova, errp);
 if (r) {
-error_setg(errp, "Cannot map queue");
-return false;
+error_prepend(errp, "Cannot map queue: ");
 }
-return true;
+return r == 0;
 }
 
 static void nvme_free_queue_pair(NVMeQueuePair *q)
@@ -239,7 +238,7 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState 
*s,
 qemu_co_queue_init(&q->free_req_queue);
 q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh, q);
 r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, bytes,
-  false, &prp_list_iova);
+  false, &prp_list_iova, errp);
 if (r) {
 goto fail;
 }
@@ -533,9 +532,9 @@ static bool nvme_identify(BlockDriverState *bs, int 
namespace, Error **errp)
 error_setg(errp, "Cannot allocate buffer for identify response");
 goto out;
 }
-r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova);
+r = qemu_vfio_dma_map(s->vfio, id, id_size, true, &iova, errp);
 if (r) {
-error_setg(errp, "Cannot map buffer for DMA");
+error_prepend(errp, "Cannot map buffer for DMA: ");
 goto out;
 }
 
@@ -1031,7 +1030,7 @@ static coroutine_fn int 
nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
 try_map:
 r = qemu_vfio_dma_map(s->vfio,
   qiov->iov[i].iov_base,
-  len, true, &iova);
+  len, true, &iova, NULL);
 if (r == -ENOSPC) {
 /*
  * In addition to the -ENOMEM error, the VFIO_IOMMU_MAP_DMA
@@ -1523,14 +1522,15 @@ static void nvme_aio_unplug(BlockDriverState *bs)
 static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size)
 {
 int ret;
+Error *local_err = NULL;
 BDRVNVMeState *s = bs->opaque;
 
-ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL);
+ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL, &local_err);
 if (ret) {
 /* FIXME: we may run out of IOVA addresses after repeated
  * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap
  * doesn't reclaim addresses for fixed mappings. */
-error_report("nvme_register_buf failed: %s", strerror(-ret));
+error_reportf_err(local_err, "nvme_register_buf failed: ");
 }
 }
 
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index e7909222cfd..3e1a49bea15 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -463,13 +463,15 @@ static void qemu_vfio_ram_block_added(RAMBlockNotifier 
*n, void *host,
   size_t size, size_t max_size)
 {
 QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
+Error *local_err = NULL;
 int ret;
 
 trace_qemu_vfio_ram_block_added(s, host, max_size);
-ret = qemu_vfio_dma_map(s, host, max_size, false, NULL);
+ret = qemu_vfio_dma_map(s, host, max_size, false, NULL, &local_err);
 if (ret) {
-error_report("qemu_vfio_dma_map(%p, %zu) failed: %s", host, max_size,
- strerror(-ret));
+error_reportf_err(local_err,
+  "qemu_vfio_dma_map(%p, %zu) failed: ",
+  host, max_size);
 }
 }
 
@@

[PATCH 3/9] util/vfio-helpers: Let qemu_vfio_verify_mappings() use error_report()

2021-08-24 Thread Philippe Mathieu-Daudé

Instead of displaying the error on stderr, use error_report()
which also report to the monitor.

Reviewed-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Philippe Mathieu-Daudé 
---
 util/vfio-helpers.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 95b86e6..1d149136299 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -660,13 +660,13 @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
 if (QEMU_VFIO_DEBUG) {
 for (i = 0; i < s->nr_mappings - 1; ++i) {
 if (!(s->mappings[i].host < s->mappings[i + 1].host)) {
-fprintf(stderr, "item %d not sorted!\n", i);
+error_report("item %d not sorted!", i);
 qemu_vfio_dump_mappings(s);
 return false;
 }
 if (!(s->mappings[i].host + s->mappings[i].size <=
   s->mappings[i + 1].host)) {
-fprintf(stderr, "item %d overlap with next!\n", i);
+error_report("item %d overlap with next!", i);
 qemu_vfio_dump_mappings(s);
 return false;
 }
-- 
2.31.1

[PATCH 2/9] block/nvme: Have nvme_create_queue_pair() report errors consistently

2021-08-24 Thread Philippe Mathieu-Daudé

nvme_create_queue_pair() does not return a boolean value (indicating
eventual error) but a pointer, and is inconsistent in how it fills the
error handler. To fulfill callers expectations, always set an error
message on failure.

Reported-by: Auger Eric 
Signed-off-by: Philippe Mathieu-Daudé 
---
 block/nvme.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block/nvme.c b/block/nvme.c
index e8dbbc23177..6642c104aa4 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -220,6 +220,7 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState 
*s,
 
 q = g_try_new0(NVMeQueuePair, 1);
 if (!q) {
+error_setg(errp, "Cannot allocate queue pair");
 return NULL;
 }
 trace_nvme_create_queue_pair(idx, q, size, aio_context,
@@ -228,6 +229,7 @@ static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState 
*s,
   qemu_real_host_page_size);
 q->prp_list_pages = qemu_try_memalign(qemu_real_host_page_size, bytes);
 if (!q->prp_list_pages) {
+error_setg(errp, "Cannot allocate PRP page list");
 goto fail;
 }
 memset(q->prp_list_pages, 0, bytes);
-- 
2.31.1

[PATCH 1/9] block/nvme: Use safer trace format string

2021-08-24 Thread Philippe Mathieu-Daudé

Fix when building with -Wshorten-64-to-32:

  warning: implicit conversion loses integer precision: 'unsigned long' to 
'int' [-Wshorten-64-to-32]

Signed-off-by: Philippe Mathieu-Daudé 
---
 block/trace-events | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/trace-events b/block/trace-events
index b3d2b1e62cb..f4f1267c8c0 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -156,7 +156,7 @@ nvme_dsm(void *s, uint64_t offset, uint64_t bytes) "s %p 
offset 0x%"PRIx64" byte
 nvme_dsm_done(void *s, uint64_t offset, uint64_t bytes, int ret) "s %p offset 
0x%"PRIx64" bytes %"PRId64" ret %d"
 nvme_dma_map_flush(void *s) "s %p"
 nvme_free_req_queue_wait(void *s, unsigned q_index) "s %p q #%u"
-nvme_create_queue_pair(unsigned q_index, void *q, unsigned size, void 
*aio_context, int fd) "index %u q %p size %u aioctx %p fd %d"
+nvme_create_queue_pair(unsigned q_index, void *q, size_t size, void 
*aio_context, int fd) "index %u q %p size %zu aioctx %p fd %d"
 nvme_free_queue_pair(unsigned q_index, void *q) "index %u q %p"
 nvme_cmd_map_qiov(void *s, void *cmd, void *req, void *qiov, int entries) "s 
%p cmd %p req %p qiov %p entries %d"
 nvme_cmd_map_qiov_pages(void *s, int i, uint64_t page) "s %p page[%d] 
0x%"PRIx64
-- 
2.31.1

[PATCH 0/9] block/nvme: Rework error reporting

2021-08-24 Thread Philippe Mathieu-Daudé

Hi,

This series contains various patches sent last year with
review comments addressed, few more cleanups, and a new
patch which remove the spurious "VFIO_MAP_DMA failed: No
space left on device" now poping up since commit 15a730e7a.
(it is the expected behavior, which is why we retry the
same call after flushing the DMA mappings).

Regards,

Phil.

Philippe Mathieu-Daudé (9):
  block/nvme: Use safer trace format string
  block/nvme: Have nvme_create_queue_pair() report errors consistently
  util/vfio-helpers: Let qemu_vfio_verify_mappings() use error_report()
  util/vfio-helpers: Replace qemu_mutex_lock() calls with
QEMU_LOCK_GUARD
  util/vfio-helpers: Remove unreachable code in qemu_vfio_dma_map()
  util/vfio-helpers: Pass Error handle to qemu_vfio_dma_map()
  util/vfio-helpers: Have qemu_vfio_dma_map() propagate Error
  util/vfio-helpers: Let qemu_vfio_do_mapping() propagate Error
  block/nvme: Only report VFIO error on failed retry

 include/qemu/vfio-helpers.h |  2 +-
 block/nvme.c| 29 +++--
 util/vfio-helpers.c | 63 +
 block/trace-events  |  2 +-
 4 files changed, 50 insertions(+), 46 deletions(-)

-- 
2.31.1

Re: [PATCH v8 34/34] block/block-copy: block_copy_state_new(): drop extra arguments

2021-08-24 Thread Hanna Reitz


On 24.08.21 10:38, Vladimir Sementsov-Ogievskiy wrote:

The only caller pass copy_range and compress both false. Let's just
drop these arguments.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  include/block/block-copy.h | 1 -
  block/block-copy.c | 5 ++---
  block/copy-before-write.c  | 2 +-
  3 files changed, 3 insertions(+), 5 deletions(-)


Reviewed-by: Hanna Reitz

Re: [PATCH v8 25/34] python:QEMUMachine: template typing for self returning methods

2021-08-24 Thread Hanna Reitz


On 24.08.21 10:38, Vladimir Sementsov-Ogievskiy wrote:

mypy thinks that return value of these methods in subclusses is
QEMUMachine, which is wrong. So, make typing smarter.

Suggested-by: John Snow 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  python/qemu/machine/machine.py | 10 +++---
  1 file changed, 7 insertions(+), 3 deletions(-)


Reviewed-by: Hanna Reitz

Re: [PATCH v8 07/34] block/block-copy: introduce block_copy_set_copy_opts()

2021-08-24 Thread Hanna Reitz


On 24.08.21 10:38, Vladimir Sementsov-Ogievskiy wrote:

We'll need a possibility to set compress and use_copy_range options
after initialization of the state. So make corresponding part of
block_copy_state_new() separate and public.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  include/block/block-copy.h |  3 +++
  block/block-copy.c | 49 ++
  2 files changed, 32 insertions(+), 20 deletions(-)


Reviewed-by: Hanna Reitz

[PATCH] ide: Cap LBA28 capacity announcement to 2^28-1

2021-08-24 Thread Samuel Thibault

The LBA28 capacity (at offsets 60/61 of identification) is supposed to
express the maximum size supported by LBA28 commands. If the device is
larger than this, we have to cap it to 2^28-1.

At least NetBSD happens to be using this value to determine whether to use
LBA28 or LBA48 for its commands, using LBA28 for sectors that don't need
LBA48. This commit thus fixes NetBSD access to disks larger than 128GiB.

Signed-off-by: Samuel Thibault 
---
 hw/ide/core.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index fd69ca3167..e28f8aad61 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -98,8 +98,12 @@ static void put_le16(uint16_t *p, unsigned int v)
 static void ide_identify_size(IDEState *s)
 {
 uint16_t *p = (uint16_t *)s->identify_data;
-put_le16(p + 60, s->nb_sectors);
-put_le16(p + 61, s->nb_sectors >> 16);
+int64_t nb_sectors_lba28 = s->nb_sectors;
+if (nb_sectors_lba28 >= 1 << 28) {
+nb_sectors_lba28 = (1 << 28) - 1;
+}
+put_le16(p + 60, nb_sectors_lba28);
+put_le16(p + 61, nb_sectors_lba28 >> 16);
 put_le16(p + 100, s->nb_sectors);
 put_le16(p + 101, s->nb_sectors >> 16);
 put_le16(p + 102, s->nb_sectors >> 32);
-- 
2.32.0

[PATCH v2 3/3] qcow2: handle_dependencies(): relax conflict detection

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

There is no conflict and no dependency if we have parallel writes to
different subclusters of one cluster when the cluster itself is already
allocated. So, relax extra dependency.

Measure performance:
First, prepare build/qemu-img-old and build/qemu-img-new images.

cd scripts/simplebench
./img_bench_templater.py

Paste the following to stdin of running script:

qemu_img=../../build/qemu-img-{old|new}
$qemu_img create -f qcow2 -o extended_l2=on /ssd/x.qcow2 1G
$qemu_img bench -c 10 -d 8 [-s 2K|-s 2K -o 512|-s $((1024*2+512))] \
-w -t none -n /ssd/x.qcow2

The result:

All results are in seconds

--  -  -
oldnew
-s 2K   6.7 ± 15%  6.2 ± 12%
 -7%
-s 2K -o 51213 ± 3%11 ± 5%
 -16%
-s $((1024*2+512))  9.5 ± 4%   8.4
 -12%
--  -  -

So small writes are more independent now and that helps to keep deeper
io queue which improves performance.

271 iotest output becomes racy for three allocation in one cluster.
Second and third writes may finish in different order. Second and
third requests don't depend on each other any more. Still they both
depend on first request anyway. Filter out second and third write
offsets to cover both possible outputs.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/qcow2-cluster.c  | 11 +++
 tests/qemu-iotests/271 |  5 -
 tests/qemu-iotests/271.out |  4 ++--
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 9917e5c28c..c1c43a891b 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1403,6 +1403,17 @@ static int handle_dependencies(BlockDriverState *bs, 
uint64_t guest_offset,
 continue;
 }
 
+if (old_alloc->keep_old_clusters &&
+(end <= l2meta_cow_start(old_alloc) ||
+ start >= l2meta_cow_end(old_alloc)))
+{
+/*
+ * Clusters intersect but COW areas don't. And cluster itself is
+ * already allocated. So, there is no actual conflict.
+ */
+continue;
+}
+
 /* Conflict */
 
 if (start < old_start) {
diff --git a/tests/qemu-iotests/271 b/tests/qemu-iotests/271
index 599b849cc6..d9d391955e 100755
--- a/tests/qemu-iotests/271
+++ b/tests/qemu-iotests/271
@@ -893,7 +893,10 @@ EOF
 }
 
 _make_test_img -o extended_l2=on 1M
-_concurrent_io | $QEMU_IO | _filter_qemu_io
+# Second an third writes in _concurrent_io() are independent and may finish in
+# different order. So, filter offset out to match both possible variants.
+_concurrent_io | $QEMU_IO | _filter_qemu_io | \
+$SED -e 's/\(20480\|40960\)/OFFSET/'
 _concurrent_verify | $QEMU_IO | _filter_qemu_io
 
 # success, all done
diff --git a/tests/qemu-iotests/271.out b/tests/qemu-iotests/271.out
index 81043ba4d7..5be780de76 100644
--- a/tests/qemu-iotests/271.out
+++ b/tests/qemu-iotests/271.out
@@ -719,8 +719,8 @@ blkdebug: Suspended request 'A'
 blkdebug: Resuming request 'A'
 wrote 2048/2048 bytes at offset 30720
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 2048/2048 bytes at offset 20480
+wrote 2048/2048 bytes at offset OFFSET
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-wrote 2048/2048 bytes at offset 40960
+wrote 2048/2048 bytes at offset OFFSET
 2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 *** done
-- 
2.29.2

[PATCH v2 2/3] qcow2: refactor handle_dependencies() loop body

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

No logic change, just prepare for the following commit. While being
here do also small grammar fix in a comment.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
Reviewed-by: Hanna Reitz 
---
 block/qcow2-cluster.c | 49 ---
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index bd0597842f..9917e5c28c 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1400,29 +1400,36 @@ static int handle_dependencies(BlockDriverState *bs, 
uint64_t guest_offset,
 
 if (end <= old_start || start >= old_end) {
 /* No intersection */
-} else {
-if (start < old_start) {
-/* Stop at the start of a running allocation */
-bytes = old_start - start;
-} else {
-bytes = 0;
-}
+continue;
+}
 
-/* Stop if already an l2meta exists. After yielding, it wouldn't
- * be valid any more, so we'd have to clean up the old L2Metas
- * and deal with requests depending on them before starting to
- * gather new ones. Not worth the trouble. */
-if (bytes == 0 && *m) {
-*cur_bytes = 0;
-return 0;
-}
+/* Conflict */
 
-if (bytes == 0) {
-/* Wait for the dependency to complete. We need to recheck
- * the free/allocated clusters when we continue. */
-qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
-return -EAGAIN;
-}
+if (start < old_start) {
+/* Stop at the start of a running allocation */
+bytes = old_start - start;
+} else {
+bytes = 0;
+}
+
+/*
+ * Stop if an l2meta already exists. After yielding, it wouldn't
+ * be valid any more, so we'd have to clean up the old L2Metas
+ * and deal with requests depending on them before starting to
+ * gather new ones. Not worth the trouble.
+ */
+if (bytes == 0 && *m) {
+*cur_bytes = 0;
+return 0;
+}
+
+if (bytes == 0) {
+/*
+ * Wait for the dependency to complete. We need to recheck
+ * the free/allocated clusters when we continue.
+ */
+qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
+return -EAGAIN;
 }
 }
 
-- 
2.29.2

[PATCH v2 1/3] simplebench: add img_bench_templater.py

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Add simple grammar-parsing template benchmark. New tool consume test
template written in bash with some special grammar injections and
produces multiple tests, run them and finally print a performance
comparison table of different tests produced from one template.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 scripts/simplebench/img_bench_templater.py | 95 ++
 scripts/simplebench/table_templater.py | 62 ++
 2 files changed, 157 insertions(+)
 create mode 100755 scripts/simplebench/img_bench_templater.py
 create mode 100644 scripts/simplebench/table_templater.py

diff --git a/scripts/simplebench/img_bench_templater.py 
b/scripts/simplebench/img_bench_templater.py
new file mode 100755
index 00..f8e1540ada
--- /dev/null
+++ b/scripts/simplebench/img_bench_templater.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+#
+# Process img-bench test templates
+#
+# Copyright (c) 2021 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+
+import sys
+import subprocess
+import re
+import json
+
+import simplebench
+from results_to_text import results_to_text
+from table_templater import Templater
+
+
+def bench_func(env, case):
+test = templater.gen(env['data'], case['data'])
+
+p = subprocess.run(test, shell=True, stdout=subprocess.PIPE,
+   stderr=subprocess.STDOUT, universal_newlines=True)
+
+if p.returncode == 0:
+try:
+m = re.search(r'Run completed in (\d+.\d+) seconds.', p.stdout)
+return {'seconds': float(m.group(1))}
+except Exception:
+return {'error': f'failed to parse qemu-img output: {p.stdout}'}
+else:
+return {'error': f'qemu-img failed: {p.returncode}: {p.stdout}'}
+
+
+if __name__ == '__main__':
+if len(sys.argv) > 1:
+print("""
+Usage: img_bench_templater.py < path/to/test-template.sh
+
+This script generates performance tests from a test template (example below),
+runs them, and displays the results in a table. The template is read from
+stdin.  It must be written in bash and end with a `qemu-img bench` invocation
+(whose result is parsed to get the test instance’s result).
+
+Use the following syntax in the template to create the various different test
+instances:
+
+  column templating: {var1|var2|...} - test will use different values in
+  different columns. You may use several {} constructions in the test, in this
+  case product of all choice-sets will be used.
+
+  row templating: [var1|var2|...] - similar thing to define rows (test-cases)
+
+Test template example:
+
+Assume you want to compare two qemu-img binaries, called qemu-img-old and
+qemu-img-new in your build directory in two test-cases with 4K writes and 64K
+writes. The template may look like this:
+
+qemu_img=/path/to/qemu/build/qemu-img-{old|new}
+$qemu_img create -f qcow2 /ssd/x.qcow2 1G
+$qemu_img bench -c 100 -d 8 [-s 4K|-s 64K] -w -t none -n /ssd/x.qcow2
+
+When passing this to stdin of img_bench_templater.py, the resulting comparison
+table will contain two columns (for two binaries) and two rows (for two
+test-cases).
+
+In addition to displaying the results, script also stores results in JSON
+format into results.json file in current directory.
+""")
+sys.exit()
+
+templater = Templater(sys.stdin.read())
+
+envs = [{'id': ' / '.join(x), 'data': x} for x in templater.columns]
+cases = [{'id': ' / '.join(x), 'data': x} for x in templater.rows]
+
+result = simplebench.bench(bench_func, envs, cases, count=5,
+   initial_run=False)
+print(results_to_text(result))
+with open('results.json', 'w') as f:
+json.dump(result, f, indent=4)
diff --git a/scripts/simplebench/table_templater.py 
b/scripts/simplebench/table_templater.py
new file mode 100644
index 00..950f3b3024
--- /dev/null
+++ b/scripts/simplebench/table_templater.py
@@ -0,0 +1,62 @@
+# Parser for test templates
+#
+# Copyright (c) 2021 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the i

[PATCH v2 0/3] qcow2: relax subclusters allocation dependencies

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Hi all!

v2:
01: improve documentation
02: add Hanna's and Eric's r-bs, add tiny grammar fix
03: fix test by filtering instead of reducing number of writes

Parallel small writes to unallocated cluster works bad when subclusters
enabled.

Look, without subclusters, one of write requests will allocate the whole
cluster, and all other writes to this cluster will be independent of
each other, they depend only on the first one that does allocation.

With subclusters, each write to unallocated subcluster will block the
whole cluster for parallel writing.

So, assume we write 8 consecutive 4k chunks in parallel:

Without subclusters, one of the chunks will block all the cluster and
write L2 entry. The remaining 7 chunks are written in parallel.

With subclusters, each of the chunks will allocate new subcluster and
block the whole cluster. All the chunks are dependent on each other and
queue depth becomes 1. That's not good.

Let's improve the situation.

Vladimir Sementsov-Ogievskiy (3):
  simplebench: add img_bench_templater.py
  qcow2: refactor handle_dependencies() loop body
  qcow2: handle_dependencies(): relax conflict detection

 block/qcow2-cluster.c  | 60 +-
 scripts/simplebench/img_bench_templater.py | 95 ++
 scripts/simplebench/table_templater.py | 62 ++
 tests/qemu-iotests/271 |  5 +-
 tests/qemu-iotests/271.out |  4 +-
 5 files changed, 202 insertions(+), 24 deletions(-)
 create mode 100755 scripts/simplebench/img_bench_templater.py
 create mode 100644 scripts/simplebench/table_templater.py

-- 
2.29.2

Re: [PATCH 1/3] simplebench: add img_bench_templater.py

2021-08-24 Thread Vladimir Sementsov-Ogievskiy


24.08.2021 11:59, Hanna Reitz wrote:

On 24.08.21 10:53, Vladimir Sementsov-Ogievskiy wrote:

19.08.2021 19:37, Hanna Reitz wrote:

On 24.07.21 15:38, Vladimir Sementsov-Ogievskiy wrote:


[...]


+import itertools
+from lark import Lark
+
+grammar = """
+start: ( text | column_switch | row_switch )+
+
+column_switch: "{" text ["|" text]+ "}"
+row_switch: "[" text ["|" text]+ "]"
+text: /[^|{}\[\]]+/

So I have no idea how this really works, of course, but does this mean that the 
`text` pattern cannot contain pipe symbols? I.e. that you cannot use pipes in 
the test template?



Hmm. I didn't try. I hope lark is smart enough to keep pipes that are out of {} 
[] as is.. But of course, you can't hope that pipe inside {} or [] will work as 
bash-pipe.


Yep, sure.  It’s just that the `text` nonterminal symbol doesn’t look like it 
could match anything with a pipe in it.


Oops, right. To avoid it, we'll have to split text into "text_inside_brackets" and 
"text_outside_of_brackets".. As well, let's postpone this complication until we really 
need it.




Same thing with other special symbols ("{}" and "[]"). I don't want to care 
about this too much now. This simple grammar works good for test template in patch 03. If we need 
something more, we can add a kind of special symbols escaping later.


But yes, if someone trips over this (i.e. we ourselves), we can still fix it 
then.

Hanna




--
Best regards,
Vladimir

Re: [PATCH 1/3] simplebench: add img_bench_templater.py

2021-08-24 Thread Vladimir Sementsov-Ogievskiy


19.08.2021 19:37, Hanna Reitz wrote:

On 24.07.21 15:38, Vladimir Sementsov-Ogievskiy wrote:

Add simple grammar-parsing template benchmark.


This doesn’t really say much, and FWIW, for like ten minutes I thought this 
would do something completely different than it did (while I was trying to 
parse the help text).

(I thought this was about formatting an existing test’s output, and that 
“template” were kind of the wrong word, but then it turned out it’s exactly the 
right word, only that this is not about using a test’s output as a template, 
but actually using a template of a test (i.e. a test template, not a template 
test) to generate test instances to run...  Which of course is much cooler.)

Functionality-wise, as far as I understand (of course I have no knowledge of 
lark), this looks good to me.  And it’s really quite cool.

I just found the documentation confusing, so I have some suggestions for it 
below.


Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  scripts/simplebench/img_bench_templater.py | 85 ++
  scripts/simplebench/table_templater.py | 62 
  2 files changed, 147 insertions(+)
  create mode 100755 scripts/simplebench/img_bench_templater.py
  create mode 100644 scripts/simplebench/table_templater.py

diff --git a/scripts/simplebench/img_bench_templater.py 
b/scripts/simplebench/img_bench_templater.py
new file mode 100755
index 00..d18a243d35
--- /dev/null
+++ b/scripts/simplebench/img_bench_templater.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+#
+# Run img-bench template tests
+#
+# Copyright (c) 2021 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+
+import sys
+import subprocess
+import re
+import json
+
+import simplebench
+from results_to_text import results_to_text
+from table_templater import Templater
+
+
+def bench_func(env, case):
+    test = templater.gen(env['data'], case['data'])
+
+    p = subprocess.run(test, shell=True, stdout=subprocess.PIPE,
+   stderr=subprocess.STDOUT, universal_newlines=True)
+
+    if p.returncode == 0:
+    try:
+    m = re.search(r'Run completed in (\d+.\d+) seconds.', p.stdout)
+    return {'seconds': float(m.group(1))}
+    except Exception:
+    return {'error': f'failed to parse qemu-img output: {p.stdout}'}
+    else:
+    return {'error': f'qemu-img failed: {p.returncode}: {p.stdout}'}
+
+
+if __name__ == '__main__':
+    if len(sys.argv) > 1:
+    print("""
+Usage: no arguments. Just pass template test to stdin. Template test is


FWIW, I completely misunderstood this.

At first, this sounded really ambiguous to me; then I thought that clearly this 
must mean that one should pipe the test’s output to this script, i.e.

$ path/to/test.sh | scripts/simplebench/img_bench_templater.py

But now after reading more, I finally understand that this is not what is 
meant, but actually literally passing some template of a test script to this 
script, i.e.

$ scripts/simplebench/img_bench_templater.py < path/to/test-template.sh

So, two things; first, I believe it should be a “test template”, not a 
“template test”, because this is about templates for a test, not about a test 
that has something to do with templates.

Second, perhaps we should start with what this does.

Perhaps:

“This script generates performance tests from a test template (example below), 
runs them, and displays the results in a table. The template is read from 
stdin.  It must be written in bash and end with a `qemu-img bench` invocation 
(whose result is parsed to get the test instance’s result).”?


Yes, that's correct, thanks for wording




+a bash script, last command should be qemu-img bench (it's output is parsed
+to get a result). For templating use the following synax:


“Use the following syntax in the template to create the various different test 
instances:”?


+
+  column templating: {var1|var2|...} - test will use different values in
+  different columns. You may use several {} constructions in the test, in this
+  case product of all choice-sets will be used.
+
+  row templating: [var1|var2|...] - similar thing to define rows (test-cases)
+
+Test tempalate example:


*template


+
+Assume you want to compare two qemu-img binaries, called qemu-img-old and
+qemu-img-new in your build directory in two test-cases with 4K writes and 64

[PATCH v8 34/34] block/block-copy: block_copy_state_new(): drop extra arguments

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

The only caller pass copy_range and compress both false. Let's just
drop these arguments.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/block-copy.h | 1 -
 block/block-copy.c | 5 ++---
 block/copy-before-write.c  | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index b8a2d63545..99370fa38b 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -25,7 +25,6 @@ typedef struct BlockCopyState BlockCopyState;
 typedef struct BlockCopyCallState BlockCopyCallState;
 
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
- bool use_copy_range, bool compress,
  Error **errp);
 
 /* Function should be called prior any actual copy request */
diff --git a/block/block-copy.c b/block/block-copy.c
index 975f9986a4..37d804ec42 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -383,8 +383,7 @@ static int64_t 
block_copy_calculate_cluster_size(BlockDriverState *target,
 }
 
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
- bool use_copy_range,
- bool compress, Error **errp)
+ Error **errp)
 {
 BlockCopyState *s;
 int64_t cluster_size;
@@ -433,7 +432,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, 
BdrvChild *target,
 cluster_size),
 };
 
-block_copy_set_copy_opts(s, use_copy_range, compress);
+block_copy_set_copy_opts(s, false, false);
 
 ratelimit_init(&s->rate_limit);
 qemu_co_mutex_init(&s->lock);
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 2cd68b480a..2a5e57deca 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -170,7 +170,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
  bs->file->bs->supported_zero_flags);
 
-s->bcs = block_copy_state_new(bs->file, s->target, false, false, errp);
+s->bcs = block_copy_state_new(bs->file, s->target, errp);
 if (!s->bcs) {
 error_prepend(errp, "Cannot create block-copy-state: ");
 return -EINVAL;
-- 
2.29.2

[PATCH v8 33/34] iotests/image-fleecing: add test-case for copy-before-write filter

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

New fleecing method becomes available: copy-before-write filter.

Actually we don't need backup job to setup image fleecing. Add test
for new recommended way of image fleecing.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/tests/image-fleecing | 50 +-
 tests/qemu-iotests/tests/image-fleecing.out | 72 +
 2 files changed, 107 insertions(+), 15 deletions(-)

diff --git a/tests/qemu-iotests/tests/image-fleecing 
b/tests/qemu-iotests/tests/image-fleecing
index e210c00d28..f6318492c6 100755
--- a/tests/qemu-iotests/tests/image-fleecing
+++ b/tests/qemu-iotests/tests/image-fleecing
@@ -48,7 +48,7 @@ remainder = [('0xd5', '0x108000',  '32k'), # Right-end of 
partial-left [1]
  ('0xdc', '32M',   '32k'), # Left-end of partial-right [2]
  ('0xcd', '0x3ff', '64k')] # patterns[3]
 
-def do_test(base_img_path, fleece_img_path, nbd_sock_path, vm):
+def do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm):
 log('--- Setting up images ---')
 log('')
 
@@ -67,6 +67,7 @@ def do_test(base_img_path, fleece_img_path, nbd_sock_path, 
vm):
 
 src_node = 'source'
 tmp_node = 'temp'
+qom_path = '/machine/peripheral/sda'
 vm.add_blockdev(f'driver={iotests.imgfmt},file.driver=file,'
 f'file.filename={base_img_path},node-name={src_node}')
 vm.add_device('virtio-scsi')
@@ -90,12 +91,22 @@ def do_test(base_img_path, fleece_img_path, nbd_sock_path, 
vm):
 'backing': src_node,
 }))
 
-# Establish COW from source to fleecing node
-log(vm.qmp('blockdev-backup',
-   job_id='fleecing',
-   device=src_node,
-   target=tmp_node,
-   sync='none'))
+# Establish CBW from source to fleecing node
+if use_cbw:
+log(vm.qmp('blockdev-add', {
+'driver': 'copy-before-write',
+'node-name': 'fl-cbw',
+'file': src_node,
+'target': tmp_node
+}))
+
+log(vm.qmp('qom-set', path=qom_path, property='drive', value='fl-cbw'))
+else:
+log(vm.qmp('blockdev-backup',
+   job_id='fleecing',
+   device=src_node,
+   target=tmp_node,
+   sync='none'))
 
 log('')
 log('--- Setting up NBD Export ---')
@@ -124,7 +135,7 @@ def do_test(base_img_path, fleece_img_path, nbd_sock_path, 
vm):
 for p in overwrite:
 cmd = 'write -P%s %s %s' % p
 log(cmd)
-log(vm.hmp_qemu_io('/machine/peripheral/sda', cmd, qdev=True))
+log(vm.hmp_qemu_io(qom_path, cmd, qdev=True))
 
 log('')
 log('--- Verifying Data ---')
@@ -139,10 +150,15 @@ def do_test(base_img_path, fleece_img_path, 
nbd_sock_path, vm):
 log('--- Cleanup ---')
 log('')
 
-log(vm.qmp('block-job-cancel', device='fleecing'))
-e = vm.event_wait('BLOCK_JOB_CANCELLED')
-assert e is not None
-log(e, filters=[iotests.filter_qmp_event])
+if use_cbw:
+log(vm.qmp('qom-set', path=qom_path, property='drive', value=src_node))
+log(vm.qmp('blockdev-del', node_name='fl-cbw'))
+else:
+log(vm.qmp('block-job-cancel', device='fleecing'))
+e = vm.event_wait('BLOCK_JOB_CANCELLED')
+assert e is not None
+log(e, filters=[iotests.filter_qmp_event])
+
 log(vm.qmp('nbd-server-stop'))
 log(vm.qmp('blockdev-del', node_name=tmp_node))
 vm.shutdown()
@@ -160,13 +176,17 @@ def do_test(base_img_path, fleece_img_path, 
nbd_sock_path, vm):
 log('Done')
 
 
-def test():
+def test(use_cbw):
 with iotests.FilePath('base.img') as base_img_path, \
  iotests.FilePath('fleece.img') as fleece_img_path, \
  iotests.FilePath('nbd.sock',
   base_dir=iotests.sock_dir) as nbd_sock_path, \
  iotests.VM() as vm:
-do_test(base_img_path, fleece_img_path, nbd_sock_path, vm)
+do_test(use_cbw, base_img_path, fleece_img_path, nbd_sock_path, vm)
+
 
+log('=== Test backup(sync=none) based fleecing ===\n')
+test(False)
 
-test()
+log('=== Test filter based fleecing ===\n')
+test(True)
diff --git a/tests/qemu-iotests/tests/image-fleecing.out 
b/tests/qemu-iotests/tests/image-fleecing.out
index 314a1b54e9..e96d122a8b 100644
--- a/tests/qemu-iotests/tests/image-fleecing.out
+++ b/tests/qemu-iotests/tests/image-fleecing.out
@@ -1,3 +1,5 @@
+=== Test backup(sync=none) based fleecing ===
+
 --- Setting up images ---
 
 Done
@@ -65,3 +67,73 @@ read -P0xdc 32M 32k
 read -P0xcd 0x3ff 64k
 
 Done
+=== Test filter based fleecing ===
+
+--- Setting up images ---
+
+Done
+
+--- Launching VM ---
+
+Done
+
+--- Setting up Fleecing Graph ---
+
+{"return": {}}
+{"return": {}}
+{"return": {}}
+
+--- Setting up NBD Export ---
+
+{"return": {}}
+{"return": {}}
+
+--- Sanity Check ---
+
+read -P0x5d 0 64k
+read -P0xd5 1M 64k
+read -P0xdc 32M 64k
+read -P0xcd 0x3ff 64k
+read -P0 0x00f8000 32k
+r

[PATCH v8 30/34] iotests/image-fleecing: proper source device

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Define scsi device to operate with it by qom-set in further patch.

Give a new node-name to source block node, to not look like device
name.

Job now don't want to work without giving explicit id, so, let's call
it "fleecing".

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/tests/image-fleecing | 12 
 tests/qemu-iotests/tests/image-fleecing.out |  2 +-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/tests/image-fleecing 
b/tests/qemu-iotests/tests/image-fleecing
index 799369e290..961941bb27 100755
--- a/tests/qemu-iotests/tests/image-fleecing
+++ b/tests/qemu-iotests/tests/image-fleecing
@@ -70,7 +70,11 @@ with iotests.FilePath('base.img') as base_img_path, \
 log('--- Launching VM ---')
 log('')
 
-vm.add_drive(base_img_path)
+src_node = 'source'
+vm.add_blockdev(f'driver={iotests.imgfmt},file.driver=file,'
+f'file.filename={base_img_path},node-name={src_node}')
+vm.add_device('virtio-scsi')
+vm.add_device(f'scsi-hd,id=sda,drive={src_node}')
 vm.launch()
 log('Done')
 
@@ -78,7 +82,6 @@ with iotests.FilePath('base.img') as base_img_path, \
 log('--- Setting up Fleecing Graph ---')
 log('')
 
-src_node = 'drive0'
 tgt_node = 'fleeceNode'
 
 # create tgt_node backed by src_node
@@ -94,6 +97,7 @@ with iotests.FilePath('base.img') as base_img_path, \
 
 # Establish COW from source to fleecing node
 log(vm.qmp('blockdev-backup',
+   job_id='fleecing',
device=src_node,
target=tgt_node,
sync='none'))
@@ -125,7 +129,7 @@ with iotests.FilePath('base.img') as base_img_path, \
 for p in overwrite:
 cmd = 'write -P%s %s %s' % p
 log(cmd)
-log(vm.hmp_qemu_io(src_node, cmd))
+log(vm.hmp_qemu_io('/machine/peripheral/sda', cmd, qdev=True))
 
 log('')
 log('--- Verifying Data ---')
@@ -140,7 +144,7 @@ with iotests.FilePath('base.img') as base_img_path, \
 log('--- Cleanup ---')
 log('')
 
-log(vm.qmp('block-job-cancel', device=src_node))
+log(vm.qmp('block-job-cancel', device='fleecing'))
 e = vm.event_wait('BLOCK_JOB_CANCELLED')
 assert e is not None
 log(e, filters=[iotests.filter_qmp_event])
diff --git a/tests/qemu-iotests/tests/image-fleecing.out 
b/tests/qemu-iotests/tests/image-fleecing.out
index 16643dde30..314a1b54e9 100644
--- a/tests/qemu-iotests/tests/image-fleecing.out
+++ b/tests/qemu-iotests/tests/image-fleecing.out
@@ -50,7 +50,7 @@ read -P0 0x3fe 64k
 --- Cleanup ---
 
 {"return": {}}
-{"data": {"device": "drive0", "len": 67108864, "offset": 393216, "speed": 0, 
"type": "backup"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": 
{"microseconds": "USECS", "seconds": "SECS"}}
+{"data": {"device": "fleecing", "len": 67108864, "offset": 393216, "speed": 0, 
"type": "backup"}, "event": "BLOCK_JOB_CANCELLED", "timestamp": 
{"microseconds": "USECS", "seconds": "SECS"}}
 {"return": {}}
 {"return": {}}
 
-- 
2.29.2

[PATCH v8 29/34] iotests.py: hmp_qemu_io: support qdev

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
Reviewed-by: John Snow 
---
 tests/qemu-iotests/iotests.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 89663dac06..77efcb0927 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -651,9 +651,10 @@ def resume_drive(self, drive: str) -> None:
 self.hmp(f'qemu-io {drive} "remove_break bp_{drive}"')
 
 def hmp_qemu_io(self, drive: str, cmd: str,
-use_log: bool = False) -> QMPMessage:
+use_log: bool = False, qdev: bool = False) -> QMPMessage:
 """Write to a given drive using an HMP command"""
-return self.hmp(f'qemu-io {drive} "{cmd}"', use_log=use_log)
+d = '-d ' if qdev else ''
+return self.hmp(f'qemu-io {d}{drive} "{cmd}"', use_log=use_log)
 
 def flatten_qmp_object(self, obj, output=None, basestr=''):
 if output is None:
-- 
2.29.2

[PATCH v8 26/34] iotests/222: fix pylint and mypy complains

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Here:
 - long line
 - move to new interface of vm.qmp() (direct passing dict), to avoid
   mypy false-positive, as it thinks that unpacked dict is a positional
   argument.
 - extra parenthesis
 - handle event_wait possible None value

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/222 | 20 +++-
 tests/qemu-iotests/297 |  2 +-
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/tests/qemu-iotests/222 b/tests/qemu-iotests/222
index b48afe623e..5e2556f8df 100755
--- a/tests/qemu-iotests/222
+++ b/tests/qemu-iotests/222
@@ -50,7 +50,8 @@ remainder = [("0xd5", "0x108000",  "32k"), # Right-end of 
partial-left [1]
 
 with iotests.FilePath('base.img') as base_img_path, \
  iotests.FilePath('fleece.img') as fleece_img_path, \
- iotests.FilePath('nbd.sock', base_dir=iotests.sock_dir) as nbd_sock_path, 
\
+ iotests.FilePath('nbd.sock',
+  base_dir=iotests.sock_dir) as nbd_sock_path, \
  iotests.VM() as vm:
 
 log('--- Setting up images ---')
@@ -81,7 +82,7 @@ with iotests.FilePath('base.img') as base_img_path, \
 tgt_node = "fleeceNode"
 
 # create tgt_node backed by src_node
-log(vm.qmp("blockdev-add", **{
+log(vm.qmp("blockdev-add", {
 "driver": "qcow2",
 "node-name": tgt_node,
 "file": {
@@ -103,8 +104,8 @@ with iotests.FilePath('base.img') as base_img_path, \
 
 nbd_uri = 'nbd+unix:///%s?socket=%s' % (tgt_node, nbd_sock_path)
 log(vm.qmp("nbd-server-start",
-   **{"addr": { "type": "unix",
-"data": { "path": nbd_sock_path } } }))
+   {"addr": { "type": "unix",
+  "data": { "path": nbd_sock_path } } }))
 
 log(vm.qmp("nbd-server-add", device=tgt_node))
 
@@ -112,7 +113,7 @@ with iotests.FilePath('base.img') as base_img_path, \
 log('--- Sanity Check ---')
 log('')
 
-for p in (patterns + zeroes):
+for p in patterns + zeroes:
 cmd = "read -P%s %s %s" % p
 log(cmd)
 assert qemu_io_silent('-r', '-f', 'raw', '-c', cmd, nbd_uri) == 0
@@ -130,7 +131,7 @@ with iotests.FilePath('base.img') as base_img_path, \
 log('--- Verifying Data ---')
 log('')
 
-for p in (patterns + zeroes):
+for p in patterns + zeroes:
 cmd = "read -P%s %s %s" % p
 log(cmd)
 assert qemu_io_silent('-r', '-f', 'raw', '-c', cmd, nbd_uri) == 0
@@ -140,8 +141,9 @@ with iotests.FilePath('base.img') as base_img_path, \
 log('')
 
 log(vm.qmp('block-job-cancel', device=src_node))
-log(vm.event_wait('BLOCK_JOB_CANCELLED'),
-filters=[iotests.filter_qmp_event])
+e = vm.event_wait('BLOCK_JOB_CANCELLED')
+assert e is not None
+log(e, filters=[iotests.filter_qmp_event])
 log(vm.qmp('nbd-server-stop'))
 log(vm.qmp('blockdev-del', node_name=tgt_node))
 vm.shutdown()
@@ -150,7 +152,7 @@ with iotests.FilePath('base.img') as base_img_path, \
 log('--- Confirming writes ---')
 log('')
 
-for p in (overwrite + remainder):
+for p in overwrite + remainder:
 cmd = "read -P%s %s %s" % p
 log(cmd)
 assert qemu_io_silent(base_img_path, '-c', cmd) == 0
diff --git a/tests/qemu-iotests/297 b/tests/qemu-iotests/297
index 433b732336..345b617b34 100755
--- a/tests/qemu-iotests/297
+++ b/tests/qemu-iotests/297
@@ -31,7 +31,7 @@ SKIP_FILES = (
 '096', '118', '124', '132', '136', '139', '147', '148', '149',
 '151', '152', '155', '163', '165', '169', '194', '196', '199', '202',
 '203', '205', '206', '207', '208', '210', '211', '212', '213', '216',
-'218', '219', '222', '224', '228', '234', '235', '236', '237', '238',
+'218', '219', '224', '228', '234', '235', '236', '237', '238',
 '240', '242', '245', '246', '248', '255', '256', '257', '258', '260',
 '262', '264', '266', '274', '277', '280', '281', '295', '296', '298',
 '299', '302', '303', '304', '307',
-- 
2.29.2

Re: [PATCH 1/3] simplebench: add img_bench_templater.py

2021-08-24 Thread Hanna Reitz


On 24.08.21 10:53, Vladimir Sementsov-Ogievskiy wrote:

19.08.2021 19:37, Hanna Reitz wrote:

On 24.07.21 15:38, Vladimir Sementsov-Ogievskiy wrote:


[...]


+import itertools
+from lark import Lark
+
+grammar = """
+start: ( text | column_switch | row_switch )+
+
+column_switch: "{" text ["|" text]+ "}"
+row_switch: "[" text ["|" text]+ "]"
+text: /[^|{}\[\]]+/

So I have no idea how this really works, of course, but does this 
mean that the `text` pattern cannot contain pipe symbols? I.e. that 
you cannot use pipes in the test template?




Hmm. I didn't try. I hope lark is smart enough to keep pipes that are 
out of {} [] as is.. But of course, you can't hope that pipe inside {} 
or [] will work as bash-pipe.


Yep, sure.  It’s just that the `text` nonterminal symbol doesn’t look 
like it could match anything with a pipe in it.


Same thing with other special symbols ("{}" and "[]"). I don't want to 
care about this too much now. This simple grammar works good for test 
template in patch 03. If we need something more, we can add a kind of 
special symbols escaping later.


But yes, if someone trips over this (i.e. we ourselves), we can still 
fix it then.


Hanna

[PATCH v8 22/34] qapi: publish copy-before-write filter

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
Acked-by: Markus Armbruster 
---
 qapi/block-core.json | 25 +++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 675d8265eb..6764d8b84f 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2825,13 +2825,14 @@
 # @blklogwrites: Since 3.0
 # @blkreplay: Since 4.2
 # @compress: Since 5.0
+# @copy-before-write: Since 6.2
 #
 # Since: 2.9
 ##
 { 'enum': 'BlockdevDriver',
   'data': [ 'blkdebug', 'blklogwrites', 'blkreplay', 'blkverify', 'bochs',
-'cloop', 'compress', 'copy-on-read', 'dmg', 'file', 'ftp', 'ftps',
-'gluster',
+'cloop', 'compress', 'copy-before-write', 'copy-on-read', 'dmg',
+'file', 'ftp', 'ftps', 'gluster',
 {'name': 'host_cdrom', 'if': 'defined(HAVE_HOST_BLOCK_DEVICE)' },
 {'name': 'host_device', 'if': 'defined(HAVE_HOST_BLOCK_DEVICE)' },
 'http', 'https', 'iscsi',
@@ -4049,6 +4050,25 @@
   'base': 'BlockdevOptionsGenericFormat',
   'data': { '*bottom': 'str' } }
 
+##
+# @BlockdevOptionsCbw:
+#
+# Driver specific block device options for the copy-before-write driver,
+# which does so called copy-before-write operations: when data is
+# written to the filter, the filter first reads corresponding blocks
+# from its file child and copies them to @target child. After successfully
+# copying, the write request is propagated to file child. If copying
+# fails, the original write request is failed too and no data is written
+# to file child.
+#
+# @target: The target for copy-before-write operations.
+#
+# Since: 6.2
+##
+{ 'struct': 'BlockdevOptionsCbw',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { 'target': 'BlockdevRef' } }
+
 ##
 # @BlockdevOptions:
 #
@@ -4101,6 +4121,7 @@
   'bochs':  'BlockdevOptionsGenericFormat',
   'cloop':  'BlockdevOptionsGenericFormat',
   'compress':   'BlockdevOptionsGenericFormat',
+  'copy-before-write':'BlockdevOptionsCbw',
   'copy-on-read':'BlockdevOptionsCor',
   'dmg':'BlockdevOptionsGenericFormat',
   'file':   'BlockdevOptionsFile',
-- 
2.29.2

[PATCH v8 25/34] python:QEMUMachine: template typing for self returning methods

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

mypy thinks that return value of these methods in subclusses is
QEMUMachine, which is wrong. So, make typing smarter.

Suggested-by: John Snow 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 python/qemu/machine/machine.py | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
index ce1e130c13..1c347d2c30 100644
--- a/python/qemu/machine/machine.py
+++ b/python/qemu/machine/machine.py
@@ -36,6 +36,7 @@
 Sequence,
 Tuple,
 Type,
+TypeVar,
 )
 
 from qemu.qmp import (  # pylint: disable=import-error
@@ -73,6 +74,9 @@ class AbnormalShutdown(QEMUMachineError):
 """
 
 
+_T = TypeVar('_T', bound='QEMUMachine')
+
+
 class QEMUMachine:
 """
 A QEMU VM.
@@ -166,7 +170,7 @@ def __init__(self,
 self._remove_files: List[str] = []
 self._user_killed = False
 
-def __enter__(self) -> 'QEMUMachine':
+def __enter__(self: _T) -> _T:
 return self
 
 def __exit__(self,
@@ -182,8 +186,8 @@ def add_monitor_null(self) -> None:
 self._args.append('-monitor')
 self._args.append('null')
 
-def add_fd(self, fd: int, fdset: int,
-   opaque: str, opts: str = '') -> 'QEMUMachine':
+def add_fd(self: _T, fd: int, fdset: int,
+   opaque: str, opts: str = '') -> _T:
 """
 Pass a file descriptor to the VM
 """
-- 
2.29.2

[PATCH v8 19/34] block/copy-before-write: initialize block-copy bitmap

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

We are going to publish copy-before-write filter to be used in separate
of backup. Future step would support bitmap for the filter. But let's
start from full set bitmap.

We have to modify backup, as bitmap is first initialized by
copy-before-write filter, and then backup modifies it.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 block/backup.c| 16 +++-
 block/copy-before-write.c |  4 
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 4869f1e5da..687d2882bc 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -233,18 +233,16 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job)
 BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs);
 
 if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
+bdrv_clear_dirty_bitmap(bcs_bitmap, NULL);
 ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap,
NULL, true);
 assert(ret);
-} else {
-if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
-/*
- * We can't hog the coroutine to initialize this thoroughly.
- * Set a flag and resume work when we are able to yield safely.
- */
-block_copy_set_skip_unallocated(job->bcs, true);
-}
-bdrv_set_dirty_bitmap(bcs_bitmap, 0, job->len);
+} else if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
+/*
+ * We can't hog the coroutine to initialize this thoroughly.
+ * Set a flag and resume work when we are able to yield safely.
+ */
+block_copy_set_skip_unallocated(job->bcs, true);
 }
 
 estimate = bdrv_get_dirty_count(bcs_bitmap);
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 1cefcade78..2efe098aae 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -147,6 +147,7 @@ static void cbw_child_perm(BlockDriverState *bs, BdrvChild 
*c,
 static int cbw_init(BlockDriverState *bs, QDict *options, Error **errp)
 {
 BDRVCopyBeforeWriteState *s = bs->opaque;
+BdrvDirtyBitmap *copy_bitmap;
 
 bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -174,6 +175,9 @@ static int cbw_init(BlockDriverState *bs, QDict *options, 
Error **errp)
 return -EINVAL;
 }
 
+copy_bitmap = block_copy_dirty_bitmap(s->bcs);
+bdrv_set_dirty_bitmap(copy_bitmap, 0, bdrv_dirty_bitmap_size(copy_bitmap));
+
 return 0;
 }
 
-- 
2.29.2

[PATCH v8 23/34] python/qemu/machine.py: refactor _qemu_args()

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

 - use shorter construction
 - don't create new dict if not needed
 - drop extra unpacking key-val arguments
 - drop extra default values

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
Reviewed-by: John Snow 
---
 python/qemu/machine/machine.py | 18 --
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
index 971ed7e8c6..5eab31aeec 100644
--- a/python/qemu/machine/machine.py
+++ b/python/qemu/machine/machine.py
@@ -564,14 +564,12 @@ def _qmp(self) -> QEMUMonitorProtocol:
 return self._qmp_connection
 
 @classmethod
-def _qmp_args(cls, _conv_keys: bool = True, **args: Any) -> Dict[str, Any]:
-qmp_args = dict()
-for key, value in args.items():
-if _conv_keys:
-qmp_args[key.replace('_', '-')] = value
-else:
-qmp_args[key] = value
-return qmp_args
+def _qmp_args(cls, conv_keys: bool,
+  args: Dict[str, Any]) -> Dict[str, object]:
+if conv_keys:
+return {k.replace('_', '-'): v for k, v in args.items()}
+
+return args
 
 def qmp(self, cmd: str,
 conv_keys: bool = True,
@@ -579,7 +577,7 @@ def qmp(self, cmd: str,
 """
 Invoke a QMP command and return the response dict
 """
-qmp_args = self._qmp_args(conv_keys, **args)
+qmp_args = self._qmp_args(conv_keys, args)
 return self._qmp.cmd(cmd, args=qmp_args)
 
 def command(self, cmd: str,
@@ -590,7 +588,7 @@ def command(self, cmd: str,
 On success return the response dict.
 On failure raise an exception.
 """
-qmp_args = self._qmp_args(conv_keys, **args)
+qmp_args = self._qmp_args(conv_keys, args)
 return self._qmp.command(cmd, **qmp_args)
 
 def get_qmp_event(self, wait: bool = False) -> Optional[QMPMessage]:
-- 
2.29.2

[PATCH v8 28/34] iotests: move 222 to tests/image-fleecing

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Give a good name to test file.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/{222 => tests/image-fleecing} | 0
 tests/qemu-iotests/{222.out => tests/image-fleecing.out} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/qemu-iotests/{222 => tests/image-fleecing} (100%)
 rename tests/qemu-iotests/{222.out => tests/image-fleecing.out} (100%)

diff --git a/tests/qemu-iotests/222 b/tests/qemu-iotests/tests/image-fleecing
similarity index 100%
rename from tests/qemu-iotests/222
rename to tests/qemu-iotests/tests/image-fleecing
diff --git a/tests/qemu-iotests/222.out 
b/tests/qemu-iotests/tests/image-fleecing.out
similarity index 100%
rename from tests/qemu-iotests/222.out
rename to tests/qemu-iotests/tests/image-fleecing.out
-- 
2.29.2

[PATCH v8 18/34] block/copy-before-write: cbw_init(): use options

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

One more step closer to .bdrv_open(): use options instead of plain
arguments. Move to bdrv_open_child() calls, native for drive open
handlers.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
---
 block/copy-before-write.c | 29 +++--
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 1e7180760a..1cefcade78 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -144,25 +144,20 @@ static void cbw_child_perm(BlockDriverState *bs, 
BdrvChild *c,
 }
 }
 
-static int cbw_init(BlockDriverState *bs, BlockDriverState *source,
-BlockDriverState *target, Error **errp)
+static int cbw_init(BlockDriverState *bs, QDict *options, Error **errp)
 {
 BDRVCopyBeforeWriteState *s = bs->opaque;
 
-bdrv_ref(target);
-s->target = bdrv_attach_child(bs, target, "target", &child_of_bds,
-  BDRV_CHILD_DATA, errp);
-if (!s->target) {
-error_prepend(errp, "Cannot attach target child: ");
+bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds,
+   BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
+   false, errp);
+if (!bs->file) {
 return -EINVAL;
 }
 
-bdrv_ref(source);
-bs->file = bdrv_attach_child(bs, source, "file", &child_of_bds,
- BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
- errp);
-if (!bs->file) {
-error_prepend(errp, "Cannot attach file child: ");
+s->target = bdrv_open_child(NULL, options, "target", bs, &child_of_bds,
+BDRV_CHILD_DATA, false, errp);
+if (!s->target) {
 return -EINVAL;
 }
 
@@ -209,6 +204,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
 int ret;
 BDRVCopyBeforeWriteState *state;
 BlockDriverState *top;
+QDict *opts;
 
 assert(source->total_sectors == target->total_sectors);
 
@@ -220,7 +216,12 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
 }
 state = top->opaque;
 
-ret = cbw_init(top, source, target, errp);
+opts = qdict_new();
+qdict_put_str(opts, "file", bdrv_get_node_name(source));
+qdict_put_str(opts, "target", bdrv_get_node_name(target));
+
+ret = cbw_init(top, opts, errp);
+qobject_unref(opts);
 if (ret < 0) {
 goto fail;
 }
-- 
2.29.2

[PATCH v8 21/34] block/copy-before-write: make public block driver

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Finally, copy-before-write gets own .bdrv_open and .bdrv_close
handlers, block_init() call and becomes available through bdrv_open().

To achieve this:

 - cbw_init gets unused flags argument and becomes cbw_open
 - block_copy_state_free() call moved to new cbw_close()
 - in bdrv_cbw_append:
   - options are completed with driver and node-name, and we can simply
 use bdrv_insert_node() to do both open and drained replacing
 - in bdrv_cbw_drop:
   - cbw_close() is now responsible for freeing s->bcs, so don't do it
 here

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 block/copy-before-write.c | 60 ++-
 1 file changed, 28 insertions(+), 32 deletions(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 2efe098aae..2cd68b480a 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -144,7 +144,8 @@ static void cbw_child_perm(BlockDriverState *bs, BdrvChild 
*c,
 }
 }
 
-static int cbw_init(BlockDriverState *bs, QDict *options, Error **errp)
+static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+Error **errp)
 {
 BDRVCopyBeforeWriteState *s = bs->opaque;
 BdrvDirtyBitmap *copy_bitmap;
@@ -181,10 +182,21 @@ static int cbw_init(BlockDriverState *bs, QDict *options, 
Error **errp)
 return 0;
 }
 
+static void cbw_close(BlockDriverState *bs)
+{
+BDRVCopyBeforeWriteState *s = bs->opaque;
+
+block_copy_state_free(s->bcs);
+s->bcs = NULL;
+}
+
 BlockDriver bdrv_cbw_filter = {
 .format_name = "copy-before-write",
 .instance_size = sizeof(BDRVCopyBeforeWriteState),
 
+.bdrv_open  = cbw_open,
+.bdrv_close = cbw_close,
+
 .bdrv_co_preadv = cbw_co_preadv,
 .bdrv_co_pwritev= cbw_co_pwritev,
 .bdrv_co_pwrite_zeroes  = cbw_co_pwrite_zeroes,
@@ -205,56 +217,40 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState 
*source,
   Error **errp)
 {
 ERRP_GUARD();
-int ret;
 BDRVCopyBeforeWriteState *state;
 BlockDriverState *top;
 QDict *opts;
 
 assert(source->total_sectors == target->total_sectors);
 
-top = bdrv_new_open_driver(&bdrv_cbw_filter, filter_node_name,
-   BDRV_O_RDWR, errp);
-if (!top) {
-error_prepend(errp, "Cannot open driver: ");
-return NULL;
-}
-state = top->opaque;
-
 opts = qdict_new();
+qdict_put_str(opts, "driver", "copy-before-write");
+if (filter_node_name) {
+qdict_put_str(opts, "node-name", filter_node_name);
+}
 qdict_put_str(opts, "file", bdrv_get_node_name(source));
 qdict_put_str(opts, "target", bdrv_get_node_name(target));
 
-ret = cbw_init(top, opts, errp);
-qobject_unref(opts);
-if (ret < 0) {
-goto fail;
-}
-
-bdrv_drained_begin(source);
-ret = bdrv_replace_node(source, top, errp);
-bdrv_drained_end(source);
-if (ret < 0) {
-error_prepend(errp, "Cannot append copy-before-write filter: ");
-goto fail;
+top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp);
+if (!top) {
+return NULL;
 }
 
+state = top->opaque;
 *bcs = state->bcs;
 
 return top;
-
-fail:
-block_copy_state_free(state->bcs);
-bdrv_unref(top);
-return NULL;
 }
 
 void bdrv_cbw_drop(BlockDriverState *bs)
 {
-BDRVCopyBeforeWriteState *s = bs->opaque;
-
 bdrv_drop_filter(bs, &error_abort);
-
-block_copy_state_free(s->bcs);
-
 bdrv_unref(bs);
 }
+
+static void cbw_init(void)
+{
+bdrv_register(&bdrv_cbw_filter);
+}
+
+block_init(cbw_init);
-- 
2.29.2

[PATCH v8 16/34] block/copy-before-write: cbw_init(): use file child after attaching

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

In the next commit we'll get rid of source argument of cbw_init().
Prepare to it now, to make next commit simpler: move the code block
that uses source below attaching the child and use bs->file->bs instead
of source variable.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 block/copy-before-write.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index d7f1833efa..4858dcf8ff 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -149,13 +149,6 @@ static int cbw_init(BlockDriverState *bs, BlockDriverState 
*source,
 {
 BDRVCopyBeforeWriteState *s = bs->opaque;
 
-bs->total_sectors = source->total_sectors;
-bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
-(BDRV_REQ_FUA & source->supported_write_flags);
-bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
-((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
- source->supported_zero_flags);
-
 bdrv_ref(target);
 s->target = bdrv_attach_child(bs, target, "target", &child_of_bds,
   BDRV_CHILD_DATA, errp);
@@ -173,6 +166,13 @@ static int cbw_init(BlockDriverState *bs, BlockDriverState 
*source,
 return -EINVAL;
 }
 
+bs->total_sectors = bs->file->bs->total_sectors;
+bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+(BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
+bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+ bs->file->bs->supported_zero_flags);
+
 s->bcs = block_copy_state_new(bs->file, s->target, false, compress, errp);
 if (!s->bcs) {
 error_prepend(errp, "Cannot create block-copy-state: ");
-- 
2.29.2

[PATCH v8 24/34] python/qemu/machine: QEMUMachine: improve qmp() method

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

We often call qmp() with unpacking dict, like qmp('foo', **{...}).
mypy don't really like it, it thinks that passed unpacked dict is a
positional argument and complains that it type should be bool (because
second argument of qmp() is conv_keys: bool).

Allow passing dict directly, simplifying interface, and giving a way to
satisfy mypy.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
Reviewed-by: John Snow 
---
 python/qemu/machine/machine.py | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/python/qemu/machine/machine.py b/python/qemu/machine/machine.py
index 5eab31aeec..ce1e130c13 100644
--- a/python/qemu/machine/machine.py
+++ b/python/qemu/machine/machine.py
@@ -572,11 +572,21 @@ def _qmp_args(cls, conv_keys: bool,
 return args
 
 def qmp(self, cmd: str,
-conv_keys: bool = True,
+args_dict: Optional[Dict[str, object]] = None,
+conv_keys: Optional[bool] = None,
 **args: Any) -> QMPMessage:
 """
 Invoke a QMP command and return the response dict
 """
+if args_dict is not None:
+assert not args
+assert conv_keys is None
+args = args_dict
+conv_keys = False
+
+if conv_keys is None:
+conv_keys = True
+
 qmp_args = self._qmp_args(conv_keys, args)
 return self._qmp.cmd(cmd, args=qmp_args)
 
-- 
2.29.2

[PATCH v8 32/34] iotests/image-fleecing: prepare for adding new test-case

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

We are going to add a test-case with some behavior modifications. So,
let's prepare a function to be reused.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/tests/image-fleecing | 19 +--
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/tests/qemu-iotests/tests/image-fleecing 
b/tests/qemu-iotests/tests/image-fleecing
index ec4ef5f3f6..e210c00d28 100755
--- a/tests/qemu-iotests/tests/image-fleecing
+++ b/tests/qemu-iotests/tests/image-fleecing
@@ -48,12 +48,7 @@ remainder = [('0xd5', '0x108000',  '32k'), # Right-end of 
partial-left [1]
  ('0xdc', '32M',   '32k'), # Left-end of partial-right [2]
  ('0xcd', '0x3ff', '64k')] # patterns[3]
 
-with iotests.FilePath('base.img') as base_img_path, \
- iotests.FilePath('fleece.img') as fleece_img_path, \
- iotests.FilePath('nbd.sock',
-  base_dir=iotests.sock_dir) as nbd_sock_path, \
- iotests.VM() as vm:
-
+def do_test(base_img_path, fleece_img_path, nbd_sock_path, vm):
 log('--- Setting up images ---')
 log('')
 
@@ -163,3 +158,15 @@ with iotests.FilePath('base.img') as base_img_path, \
 
 log('')
 log('Done')
+
+
+def test():
+with iotests.FilePath('base.img') as base_img_path, \
+ iotests.FilePath('fleece.img') as fleece_img_path, \
+ iotests.FilePath('nbd.sock',
+  base_dir=iotests.sock_dir) as nbd_sock_path, \
+ iotests.VM() as vm:
+do_test(base_img_path, fleece_img_path, nbd_sock_path, vm)
+
+
+test()
-- 
2.29.2

[PATCH v8 15/34] block/copy-before-write: cbw_init(): rename variables

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

One more step closer to real .bdrv_open() handler: use more usual names
for bs being initialized and its state.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 block/copy-before-write.c | 29 ++---
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index a4fee645fd..d7f1833efa 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -144,38 +144,37 @@ static void cbw_child_perm(BlockDriverState *bs, 
BdrvChild *c,
 }
 }
 
-static int cbw_init(BlockDriverState *top, BlockDriverState *source,
+static int cbw_init(BlockDriverState *bs, BlockDriverState *source,
 BlockDriverState *target, bool compress, Error **errp)
 {
-BDRVCopyBeforeWriteState *state = top->opaque;
+BDRVCopyBeforeWriteState *s = bs->opaque;
 
-top->total_sectors = source->total_sectors;
-top->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+bs->total_sectors = source->total_sectors;
+bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
 (BDRV_REQ_FUA & source->supported_write_flags);
-top->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
  source->supported_zero_flags);
 
 bdrv_ref(target);
-state->target = bdrv_attach_child(top, target, "target", &child_of_bds,
-  BDRV_CHILD_DATA, errp);
-if (!state->target) {
+s->target = bdrv_attach_child(bs, target, "target", &child_of_bds,
+  BDRV_CHILD_DATA, errp);
+if (!s->target) {
 error_prepend(errp, "Cannot attach target child: ");
 return -EINVAL;
 }
 
 bdrv_ref(source);
-top->file = bdrv_attach_child(top, source, "file", &child_of_bds,
-  BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-  errp);
-if (!top->file) {
+bs->file = bdrv_attach_child(bs, source, "file", &child_of_bds,
+ BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
+ errp);
+if (!bs->file) {
 error_prepend(errp, "Cannot attach file child: ");
 return -EINVAL;
 }
 
-state->bcs = block_copy_state_new(top->file, state->target, false, 
compress,
-  errp);
-if (!state->bcs) {
+s->bcs = block_copy_state_new(bs->file, s->target, false, compress, errp);
+if (!s->bcs) {
 error_prepend(errp, "Cannot create block-copy-state: ");
 return -EINVAL;
 }
-- 
2.29.2

[PATCH v8 31/34] iotests/image-fleecing: rename tgt_node

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Actually target of backup(sync=None) is not a final backup target:
image fleecing is intended to be used with external tool, which will
copy data from fleecing node to some real backup target.

Also, we are going to add a test case for "push backup with fleecing",
where instead of exporting fleecing node by NBD, we'll start a backup
job from fleecing node to real backup target.

To avoid confusion, let's rename temporary fleecing node now.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/tests/image-fleecing | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/qemu-iotests/tests/image-fleecing 
b/tests/qemu-iotests/tests/image-fleecing
index 961941bb27..ec4ef5f3f6 100755
--- a/tests/qemu-iotests/tests/image-fleecing
+++ b/tests/qemu-iotests/tests/image-fleecing
@@ -71,6 +71,7 @@ with iotests.FilePath('base.img') as base_img_path, \
 log('')
 
 src_node = 'source'
+tmp_node = 'temp'
 vm.add_blockdev(f'driver={iotests.imgfmt},file.driver=file,'
 f'file.filename={base_img_path},node-name={src_node}')
 vm.add_device('virtio-scsi')
@@ -82,12 +83,11 @@ with iotests.FilePath('base.img') as base_img_path, \
 log('--- Setting up Fleecing Graph ---')
 log('')
 
-tgt_node = 'fleeceNode'
 
-# create tgt_node backed by src_node
+# create tmp_node backed by src_node
 log(vm.qmp('blockdev-add', {
 'driver': 'qcow2',
-'node-name': tgt_node,
+'node-name': tmp_node,
 'file': {
 'driver': 'file',
 'filename': fleece_img_path,
@@ -99,19 +99,19 @@ with iotests.FilePath('base.img') as base_img_path, \
 log(vm.qmp('blockdev-backup',
job_id='fleecing',
device=src_node,
-   target=tgt_node,
+   target=tmp_node,
sync='none'))
 
 log('')
 log('--- Setting up NBD Export ---')
 log('')
 
-nbd_uri = 'nbd+unix:///%s?socket=%s' % (tgt_node, nbd_sock_path)
+nbd_uri = 'nbd+unix:///%s?socket=%s' % (tmp_node, nbd_sock_path)
 log(vm.qmp('nbd-server-start',
{'addr': { 'type': 'unix',
   'data': { 'path': nbd_sock_path } } }))
 
-log(vm.qmp('nbd-server-add', device=tgt_node))
+log(vm.qmp('nbd-server-add', device=tmp_node))
 
 log('')
 log('--- Sanity Check ---')
@@ -149,7 +149,7 @@ with iotests.FilePath('base.img') as base_img_path, \
 assert e is not None
 log(e, filters=[iotests.filter_qmp_event])
 log(vm.qmp('nbd-server-stop'))
-log(vm.qmp('blockdev-del', node_name=tgt_node))
+log(vm.qmp('blockdev-del', node_name=tmp_node))
 vm.shutdown()
 
 log('')
-- 
2.29.2

[PATCH v8 17/34] block/copy-before-write: bdrv_cbw_append(): drop unused compress arg

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
---
 block/copy-before-write.h | 1 -
 block/backup.c| 2 +-
 block/copy-before-write.c | 7 +++
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/block/copy-before-write.h b/block/copy-before-write.h
index b386fd8f01..51847e711a 100644
--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
@@ -32,7 +32,6 @@
 BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
   BlockDriverState *target,
   const char *filter_node_name,
-  bool compress,
   BlockCopyState **bcs,
   Error **errp);
 void bdrv_cbw_drop(BlockDriverState *bs);
diff --git a/block/backup.c b/block/backup.c
index 83516297cb..4869f1e5da 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -452,7 +452,7 @@ BlockJob *backup_job_create(const char *job_id, 
BlockDriverState *bs,
 goto error;
 }
 
-cbw = bdrv_cbw_append(bs, target, filter_node_name, false, &bcs, errp);
+cbw = bdrv_cbw_append(bs, target, filter_node_name, &bcs, errp);
 if (!cbw) {
 goto error;
 }
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 4858dcf8ff..1e7180760a 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -145,7 +145,7 @@ static void cbw_child_perm(BlockDriverState *bs, BdrvChild 
*c,
 }
 
 static int cbw_init(BlockDriverState *bs, BlockDriverState *source,
-BlockDriverState *target, bool compress, Error **errp)
+BlockDriverState *target, Error **errp)
 {
 BDRVCopyBeforeWriteState *s = bs->opaque;
 
@@ -173,7 +173,7 @@ static int cbw_init(BlockDriverState *bs, BlockDriverState 
*source,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
  bs->file->bs->supported_zero_flags);
 
-s->bcs = block_copy_state_new(bs->file, s->target, false, compress, errp);
+s->bcs = block_copy_state_new(bs->file, s->target, false, false, errp);
 if (!s->bcs) {
 error_prepend(errp, "Cannot create block-copy-state: ");
 return -EINVAL;
@@ -202,7 +202,6 @@ BlockDriver bdrv_cbw_filter = {
 BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
   BlockDriverState *target,
   const char *filter_node_name,
-  bool compress,
   BlockCopyState **bcs,
   Error **errp)
 {
@@ -221,7 +220,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
 }
 state = top->opaque;
 
-ret = cbw_init(top, source, target, compress, errp);
+ret = cbw_init(top, source, target, errp);
 if (ret < 0) {
 goto fail;
 }
-- 
2.29.2

[PATCH v8 14/34] block/copy-before-write: introduce cbw_init()

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Move part of bdrv_cbw_append() to new function cbw_open(). It's an
intermediate step for adding normal .bdrv_open() handler to the
filter. With this commit no logic is changed, but we have a function
which will be turned into .bdrv_open() handler in future commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 block/copy-before-write.c | 69 +++
 1 file changed, 41 insertions(+), 28 deletions(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index adbbc64aa9..a4fee645fd 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -144,6 +144,45 @@ static void cbw_child_perm(BlockDriverState *bs, BdrvChild 
*c,
 }
 }
 
+static int cbw_init(BlockDriverState *top, BlockDriverState *source,
+BlockDriverState *target, bool compress, Error **errp)
+{
+BDRVCopyBeforeWriteState *state = top->opaque;
+
+top->total_sectors = source->total_sectors;
+top->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
+(BDRV_REQ_FUA & source->supported_write_flags);
+top->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
+((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+ source->supported_zero_flags);
+
+bdrv_ref(target);
+state->target = bdrv_attach_child(top, target, "target", &child_of_bds,
+  BDRV_CHILD_DATA, errp);
+if (!state->target) {
+error_prepend(errp, "Cannot attach target child: ");
+return -EINVAL;
+}
+
+bdrv_ref(source);
+top->file = bdrv_attach_child(top, source, "file", &child_of_bds,
+  BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
+  errp);
+if (!top->file) {
+error_prepend(errp, "Cannot attach file child: ");
+return -EINVAL;
+}
+
+state->bcs = block_copy_state_new(top->file, state->target, false, 
compress,
+  errp);
+if (!state->bcs) {
+error_prepend(errp, "Cannot create block-copy-state: ");
+return -EINVAL;
+}
+
+return 0;
+}
+
 BlockDriver bdrv_cbw_filter = {
 .format_name = "copy-before-write",
 .instance_size = sizeof(BDRVCopyBeforeWriteState),
@@ -181,36 +220,10 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState 
*source,
 error_prepend(errp, "Cannot open driver: ");
 return NULL;
 }
-
 state = top->opaque;
-top->total_sectors = source->total_sectors;
-top->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
-(BDRV_REQ_FUA & source->supported_write_flags);
-top->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
-((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
- source->supported_zero_flags);
-
-bdrv_ref(target);
-state->target = bdrv_attach_child(top, target, "target", &child_of_bds,
-  BDRV_CHILD_DATA, errp);
-if (!state->target) {
-error_prepend(errp, "Cannot attach target child: ");
-goto fail;
-}
 
-bdrv_ref(source);
-top->file = bdrv_attach_child(top, source, "file", &child_of_bds,
-  BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
-  errp);
-if (!top->file) {
-error_prepend(errp, "Cannot attach file child: ");
-goto fail;
-}
-
-state->bcs = block_copy_state_new(top->file, state->target, false, 
compress,
-  errp);
-if (!state->bcs) {
-error_prepend(errp, "Cannot create block-copy-state: ");
+ret = cbw_init(top, source, target, compress, errp);
+if (ret < 0) {
 goto fail;
 }
 
-- 
2.29.2

[PATCH v8 10/34] block/copy-before-write: relax permission requirements when no parents

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

We are going to publish copy-before-write filter. So, user should be
able to create it with blockdev-add first, specifying both filtered and
target children. And then do blockdev-reopen, to actually insert the
filter where needed.

Currently, filter unshares write permission unconditionally on source
node. It's good, but it will not allow to do blockdev-add. So, let's
relax restrictions when filter doesn't have any parent.

Test output is modified, as now permission conflict happens only when
job creates a blk parent for filter node.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 block/copy-before-write.c  | 8 +---
 tests/qemu-iotests/283.out | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index a7996d54db..2a51cc64e4 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -142,10 +142,12 @@ static void cbw_child_perm(BlockDriverState *bs, 
BdrvChild *c,
 bdrv_default_perms(bs, c, role, reopen_queue,
perm, shared, nperm, nshared);
 
-if (perm & BLK_PERM_WRITE) {
-*nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+if (!QLIST_EMPTY(&bs->parents)) {
+if (perm & BLK_PERM_WRITE) {
+*nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+}
+*nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
 }
-*nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
 }
 }
 
diff --git a/tests/qemu-iotests/283.out b/tests/qemu-iotests/283.out
index f2b7219632..5bb75952ef 100644
--- a/tests/qemu-iotests/283.out
+++ b/tests/qemu-iotests/283.out
@@ -5,7 +5,7 @@
 {"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": 
"base", "node-name": "other", "take-child-perms": ["write"]}}
 {"return": {}}
 {"execute": "blockdev-backup", "arguments": {"device": "source", "sync": 
"full", "target": "target"}}
-{"error": {"class": "GenericError", "desc": "Cannot append copy-before-write 
filter: Permission conflict on node 'base': permissions 'write' are both 
required by node 'other' (uses node 'base' as 'image' child) and unshared by 
node 'source' (uses node 'base' as 'image' child)."}}
+{"error": {"class": "GenericError", "desc": "Permission conflict on node 
'base': permissions 'write' are both required by node 'other' (uses node 'base' 
as 'image' child) and unshared by node 'source' (uses node 'base' as 'image' 
child)."}}
 
 === copy-before-write filter should be gone after job-finalize ===
 
-- 
2.29.2

[PATCH v8 27/34] iotests/222: constantly use single quotes for strings

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

The file use both single and double quotes for strings. Let's be
consistent.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/222 | 68 +-
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/tests/qemu-iotests/222 b/tests/qemu-iotests/222
index 5e2556f8df..799369e290 100755
--- a/tests/qemu-iotests/222
+++ b/tests/qemu-iotests/222
@@ -30,23 +30,23 @@ iotests.script_initialize(
 supported_platforms=['linux'],
 )
 
-patterns = [("0x5d", "0", "64k"),
-("0xd5", "1M","64k"),
-("0xdc", "32M",   "64k"),
-("0xcd", "0x3ff", "64k")]  # 64M - 64K
+patterns = [('0x5d', '0', '64k'),
+('0xd5', '1M','64k'),
+('0xdc', '32M',   '64k'),
+('0xcd', '0x3ff', '64k')]  # 64M - 64K
 
-overwrite = [("0xab", "0", "64k"), # Full overwrite
- ("0xad", "0x00f8000", "64k"), # Partial-left (1M-32K)
- ("0x1d", "0x2008000", "64k"), # Partial-right (32M+32K)
- ("0xea", "0x3fe", "64k")] # Adjacent-left (64M - 128K)
+overwrite = [('0xab', '0', '64k'), # Full overwrite
+ ('0xad', '0x00f8000', '64k'), # Partial-left (1M-32K)
+ ('0x1d', '0x2008000', '64k'), # Partial-right (32M+32K)
+ ('0xea', '0x3fe', '64k')] # Adjacent-left (64M - 128K)
 
-zeroes = [("0", "0x00f8000", "32k"), # Left-end of partial-left (1M-32K)
-  ("0", "0x201", "32k"), # Right-end of partial-right (32M+64K)
-  ("0", "0x3fe", "64k")] # overwrite[3]
+zeroes = [('0', '0x00f8000', '32k'), # Left-end of partial-left (1M-32K)
+  ('0', '0x201', '32k'), # Right-end of partial-right (32M+64K)
+  ('0', '0x3fe', '64k')] # overwrite[3]
 
-remainder = [("0xd5", "0x108000",  "32k"), # Right-end of partial-left [1]
- ("0xdc", "32M",   "32k"), # Left-end of partial-right [2]
- ("0xcd", "0x3ff", "64k")] # patterns[3]
+remainder = [('0xd5', '0x108000',  '32k'), # Right-end of partial-left [1]
+ ('0xdc', '32M',   '32k'), # Left-end of partial-right [2]
+ ('0xcd', '0x3ff', '64k')] # patterns[3]
 
 with iotests.FilePath('base.img') as base_img_path, \
  iotests.FilePath('fleece.img') as fleece_img_path, \
@@ -58,7 +58,7 @@ with iotests.FilePath('base.img') as base_img_path, \
 log('')
 
 assert qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M') == 0
-assert qemu_img('create', '-f', "qcow2", fleece_img_path, '64M') == 0
+assert qemu_img('create', '-f', 'qcow2', fleece_img_path, '64M') == 0
 
 for p in patterns:
 qemu_io('-f', iotests.imgfmt,
@@ -78,43 +78,43 @@ with iotests.FilePath('base.img') as base_img_path, \
 log('--- Setting up Fleecing Graph ---')
 log('')
 
-src_node = "drive0"
-tgt_node = "fleeceNode"
+src_node = 'drive0'
+tgt_node = 'fleeceNode'
 
 # create tgt_node backed by src_node
-log(vm.qmp("blockdev-add", {
-"driver": "qcow2",
-"node-name": tgt_node,
-"file": {
-"driver": "file",
-"filename": fleece_img_path,
+log(vm.qmp('blockdev-add', {
+'driver': 'qcow2',
+'node-name': tgt_node,
+'file': {
+'driver': 'file',
+'filename': fleece_img_path,
 },
-"backing": src_node,
+'backing': src_node,
 }))
 
 # Establish COW from source to fleecing node
-log(vm.qmp("blockdev-backup",
+log(vm.qmp('blockdev-backup',
device=src_node,
target=tgt_node,
-   sync="none"))
+   sync='none'))
 
 log('')
 log('--- Setting up NBD Export ---')
 log('')
 
 nbd_uri = 'nbd+unix:///%s?socket=%s' % (tgt_node, nbd_sock_path)
-log(vm.qmp("nbd-server-start",
-   {"addr": { "type": "unix",
-  "data": { "path": nbd_sock_path } } }))
+log(vm.qmp('nbd-server-start',
+   {'addr': { 'type': 'unix',
+  'data': { 'path': nbd_sock_path } } }))
 
-log(vm.qmp("nbd-server-add", device=tgt_node))
+log(vm.qmp('nbd-server-add', device=tgt_node))
 
 log('')
 log('--- Sanity Check ---')
 log('')
 
 for p in patterns + zeroes:
-cmd = "read -P%s %s %s" % p
+cmd = 'read -P%s %s %s' % p
 log(cmd)
 assert qemu_io_silent('-r', '-f', 'raw', '-c', cmd, nbd_uri) == 0
 
@@ -123,7 +123,7 @@ with iotests.FilePath('base.img') as base_img_path, \
 log('')
 
 for p in overwrite:
-cmd = "write -P%s %s %s" % p
+cmd = 'write -P%s %s %s' % p
 log(cmd)
 log(vm.hmp_qemu_io(src_node, cmd))
 
@@ -132,7 +132,7 @@ with iotests.FilePath('base.img') as base_img_path, \
 log('')
 
 for p in patterns + zeroes:
-cmd = "read -P%s %s %s" % p
+cmd = 'read -P%s %s

[PATCH v8 04/34] qdev: allow setting drive property for realized device

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

We need an ability to insert filters above top block node, attached to
block device. It can't be achieved with blockdev-reopen command. So, we
want do it with help of qom-set.

Intended usage:

Assume there is a node A that is attached to some guest device.

1. blockdev-add to create a filter node B that has A as its child.

2. qom-set to change the node attached to the guest device’s
   BlockBackend from A to B.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 hw/core/qdev-properties-system.c | 43 +++-
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 2760c21f11..e71f5d64d1 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -36,11 +36,11 @@
 
 static bool check_prop_still_unset(Object *obj, const char *name,
const void *old_val, const char *new_val,
-   Error **errp)
+   bool allow_override, Error **errp)
 {
 const GlobalProperty *prop = qdev_find_global_prop(obj, name);
 
-if (!old_val) {
+if (!old_val || (!prop && allow_override)) {
 return true;
 }
 
@@ -93,16 +93,34 @@ static void set_drive_helper(Object *obj, Visitor *v, const 
char *name,
 BlockBackend *blk;
 bool blk_created = false;
 int ret;
+BlockDriverState *bs;
+AioContext *ctx;
 
 if (!visit_type_str(v, name, &str, errp)) {
 return;
 }
 
-/*
- * TODO Should this really be an error?  If no, the old value
- * needs to be released before we store the new one.
- */
-if (!check_prop_still_unset(obj, name, *ptr, str, errp)) {
+if (!check_prop_still_unset(obj, name, *ptr, str, true, errp)) {
+return;
+}
+
+if (*ptr) {
+/* BlockBackend alread exists. So, we want to change attached node */
+blk = *ptr;
+ctx = blk_get_aio_context(blk);
+bs = bdrv_lookup_bs(NULL, str, errp);
+if (!bs) {
+return;
+}
+
+if (ctx != bdrv_get_aio_context(bs)) {
+error_setg(errp, "Different aio context is not supported for new "
+   "node");
+}
+
+aio_context_acquire(ctx);
+blk_replace_bs(blk, bs, errp);
+aio_context_release(ctx);
 return;
 }
 
@@ -114,7 +132,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const 
char *name,
 
 blk = blk_by_name(str);
 if (!blk) {
-BlockDriverState *bs = bdrv_lookup_bs(NULL, str, NULL);
+bs = bdrv_lookup_bs(NULL, str, NULL);
 if (bs) {
 /*
  * If the device supports iothreads, it will make sure to move the
@@ -123,8 +141,7 @@ static void set_drive_helper(Object *obj, Visitor *v, const 
char *name,
  * aware of iothreads require their BlockBackends to be in the main
  * AioContext.
  */
-AioContext *ctx = iothread ? bdrv_get_aio_context(bs) :
- qemu_get_aio_context();
+ctx = iothread ? bdrv_get_aio_context(bs) : qemu_get_aio_context();
 blk = blk_new(ctx, 0, BLK_PERM_ALL);
 blk_created = true;
 
@@ -196,6 +213,7 @@ static void release_drive(Object *obj, const char *name, 
void *opaque)
 const PropertyInfo qdev_prop_drive = {
 .name  = "str",
 .description = "Node name or ID of a block device to use as a backend",
+.realized_set_allowed = true,
 .get   = get_drive,
 .set   = set_drive,
 .release = release_drive,
@@ -204,6 +222,7 @@ const PropertyInfo qdev_prop_drive = {
 const PropertyInfo qdev_prop_drive_iothread = {
 .name  = "str",
 .description = "Node name or ID of a block device to use as a backend",
+.realized_set_allowed = true,
 .get   = get_drive,
 .set   = set_drive_iothread,
 .release = release_drive,
@@ -238,7 +257,7 @@ static void set_chr(Object *obj, Visitor *v, const char 
*name, void *opaque,
  * TODO Should this really be an error?  If no, the old value
  * needs to be released before we store the new one.
  */
-if (!check_prop_still_unset(obj, name, be->chr, str, errp)) {
+if (!check_prop_still_unset(obj, name, be->chr, str, false, errp)) {
 return;
 }
 
@@ -408,7 +427,7 @@ static void set_netdev(Object *obj, Visitor *v, const char 
*name,
  * TODO Should this really be an error?  If no, the old value
  * needs to be released before we store the new one.
  */
-if (!check_prop_still_unset(obj, name, ncs[i], str, errp)) {
+if (!check_prop_still_unset(obj, name, ncs[i], str, false, errp)) {
 goto out;
 }
 
-- 
2.29.2

[PATCH v8 08/34] block/backup: set copy_range and compress after filter insertion

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

We are going to publish copy-before-write filter, so it would be
initialized through options. Still we don't want to publish compress
and copy-range options, as

1. Modern way to enable compression is to use compress filter.

2. For copy-range it's unclean how to make proper interface:
 - it's has experimental prefix for backup job anyway
 - the whole BackupPerf structure doesn't make sense for the filter
 So, let's just add copy-range possibility to the filter later if
 needed.

Still, we are going to continue support for compression and
experimental copy-range in backup job. So, set these options after
filter creation.

Note, that we can drop "compress" argument of bdrv_cbw_append() now, as
well as "perf". The only reason not doing so is that now, when I
prepare this patch the big series around it is already reviewed and I
want to avoid extra rebase conflicts to simplify review of the
following version.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
---
 block/copy-before-write.h | 1 -
 block/backup.c| 3 ++-
 block/copy-before-write.c | 4 +---
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/block/copy-before-write.h b/block/copy-before-write.h
index f37e2249ae..538aab8bdb 100644
--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
@@ -33,7 +33,6 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
   BlockDriverState *target,
   const char *filter_node_name,
   uint64_t cluster_size,
-  BackupPerf *perf,
   bool compress,
   BlockCopyState **bcs,
   Error **errp);
diff --git a/block/backup.c b/block/backup.c
index 84f9a5f02c..b31fd99ab3 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -504,7 +504,7 @@ BlockJob *backup_job_create(const char *job_id, 
BlockDriverState *bs,
 }
 
 cbw = bdrv_cbw_append(bs, target, filter_node_name,
-  cluster_size, perf, compress, &bcs, errp);
+  cluster_size, false, &bcs, errp);
 if (!cbw) {
 goto error;
 }
@@ -530,6 +530,7 @@ BlockJob *backup_job_create(const char *job_id, 
BlockDriverState *bs,
 job->len = len;
 job->perf = *perf;
 
+block_copy_set_copy_opts(bcs, perf->use_copy_range, compress);
 block_copy_set_progress_meter(bcs, &job->common.job.progress);
 block_copy_set_speed(bcs, speed);
 
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 4337076c1c..235251a640 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -170,7 +170,6 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
   BlockDriverState *target,
   const char *filter_node_name,
   uint64_t cluster_size,
-  BackupPerf *perf,
   bool compress,
   BlockCopyState **bcs,
   Error **errp)
@@ -217,8 +216,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
 
 state->cluster_size = cluster_size;
 state->bcs = block_copy_state_new(top->backing, state->target,
-  cluster_size, perf->use_copy_range,
-  compress, errp);
+  cluster_size, false, compress, errp);
 if (!state->bcs) {
 error_prepend(errp, "Cannot create block-copy-state: ");
 goto fail;
-- 
2.29.2

[PATCH v8 12/34] block/copy-before-write: use file child instead of backing

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

We are going to publish copy-before-write filter, and there no public
backing-child-based filter in Qemu. No reason to create a precedent, so
let's refactor copy-before-write filter instead.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 block/copy-before-write.c | 39 ++-
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 945d9340f4..7a6c15f141 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -43,7 +43,7 @@ static coroutine_fn int cbw_co_preadv(
 BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 QEMUIOVector *qiov, int flags)
 {
-return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
+return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
 
 static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
@@ -71,7 +71,7 @@ static int coroutine_fn cbw_co_pdiscard(BlockDriverState *bs,
 return ret;
 }
 
-return bdrv_co_pdiscard(bs->backing, offset, bytes);
+return bdrv_co_pdiscard(bs->file, offset, bytes);
 }
 
 static int coroutine_fn cbw_co_pwrite_zeroes(BlockDriverState *bs,
@@ -82,7 +82,7 @@ static int coroutine_fn cbw_co_pwrite_zeroes(BlockDriverState 
*bs,
 return ret;
 }
 
-return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags);
+return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
 static coroutine_fn int cbw_co_pwritev(BlockDriverState *bs,
@@ -95,29 +95,22 @@ static coroutine_fn int cbw_co_pwritev(BlockDriverState *bs,
 return ret;
 }
 
-return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
+return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 }
 
 static int coroutine_fn cbw_co_flush(BlockDriverState *bs)
 {
-if (!bs->backing) {
+if (!bs->file) {
 return 0;
 }
 
-return bdrv_co_flush(bs->backing->bs);
+return bdrv_co_flush(bs->file->bs);
 }
 
 static void cbw_refresh_filename(BlockDriverState *bs)
 {
-if (bs->backing == NULL) {
-/*
- * we can be here after failed bdrv_attach_child in
- * bdrv_set_backing_hd
- */
-return;
-}
 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
-bs->backing->bs->filename);
+bs->file->bs->filename);
 }
 
 static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c,
@@ -186,6 +179,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
 top = bdrv_new_open_driver(&bdrv_cbw_filter, filter_node_name,
BDRV_O_RDWR, errp);
 if (!top) {
+error_prepend(errp, "Cannot open driver: ");
 return NULL;
 }
 
@@ -201,21 +195,32 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState 
*source,
 state->target = bdrv_attach_child(top, target, "target", &child_of_bds,
   BDRV_CHILD_DATA, errp);
 if (!state->target) {
+error_prepend(errp, "Cannot attach target child: ");
+bdrv_unref(top);
+return NULL;
+}
+
+bdrv_ref(source);
+top->file = bdrv_attach_child(top, source, "file", &child_of_bds,
+  BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
+  errp);
+if (!top->file) {
+error_prepend(errp, "Cannot attach file child: ");
 bdrv_unref(top);
 return NULL;
 }
 
 bdrv_drained_begin(source);
 
-ret = bdrv_append(top, source, errp);
+ret = bdrv_replace_node(source, top, errp);
 if (ret < 0) {
 error_prepend(errp, "Cannot append copy-before-write filter: ");
 goto fail;
 }
 appended = true;
 
-state->bcs = block_copy_state_new(top->backing, state->target,
-  false, compress, errp);
+state->bcs = block_copy_state_new(top->file, state->target, false, 
compress,
+  errp);
 if (!state->bcs) {
 error_prepend(errp, "Cannot create block-copy-state: ");
 goto fail;
-- 
2.29.2

[PATCH v8 03/34] qdev-properties: PropertyInfo: add realized_set_allowed field

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Add field, so property can declare support for setting the property
when device is realized. To be used in the following commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 include/hw/qdev-properties.h | 1 +
 hw/core/qdev-properties.c| 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/hw/qdev-properties.h b/include/hw/qdev-properties.h
index 0ef97d60ce..f7925f67d0 100644
--- a/include/hw/qdev-properties.h
+++ b/include/hw/qdev-properties.h
@@ -32,6 +32,7 @@ struct PropertyInfo {
 const char *name;
 const char *description;
 const QEnumLookup *enum_table;
+bool realized_set_allowed; /* allow setting property on realized device */
 int (*print)(Object *obj, Property *prop, char *dest, size_t len);
 void (*set_default_value)(ObjectProperty *op, const Property *prop);
 ObjectProperty *(*create)(ObjectClass *oc, const char *name,
diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index 50f40949f5..c34aac6ebc 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -26,11 +26,11 @@ void qdev_prop_set_after_realize(DeviceState *dev, const 
char *name,
 
 /* returns: true if property is allowed to be set, false otherwise */
 static bool qdev_prop_allow_set(Object *obj, const char *name,
-Error **errp)
+const PropertyInfo *info, Error **errp)
 {
 DeviceState *dev = DEVICE(obj);
 
-if (dev->realized) {
+if (dev->realized && !info->realized_set_allowed) {
 qdev_prop_set_after_realize(dev, name, errp);
 return false;
 }
@@ -79,7 +79,7 @@ static void field_prop_set(Object *obj, Visitor *v, const 
char *name,
 {
 Property *prop = opaque;
 
-if (!qdev_prop_allow_set(obj, name, errp)) {
+if (!qdev_prop_allow_set(obj, name, prop->info, errp)) {
 return;
 }
 
-- 
2.29.2

[PATCH v8 20/34] block/block-copy: make setting progress optional

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Now block-copy will crash if user don't set progress meter by
block_copy_set_progress_meter(). copy-before-write filter will be used
in separate of backup job, and it doesn't want any progress meter (for
now). So, allow not setting it.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 block/block-copy.c | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/block/block-copy.c b/block/block-copy.c
index b0e0a38330..975f9986a4 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -291,9 +291,11 @@ static void coroutine_fn block_copy_task_end(BlockCopyTask 
*task, int ret)
 bdrv_set_dirty_bitmap(task->s->copy_bitmap, task->offset, task->bytes);
 }
 QLIST_REMOVE(task, list);
-progress_set_remaining(task->s->progress,
-   bdrv_get_dirty_count(task->s->copy_bitmap) +
-   task->s->in_flight_bytes);
+if (task->s->progress) {
+progress_set_remaining(task->s->progress,
+   bdrv_get_dirty_count(task->s->copy_bitmap) +
+   task->s->in_flight_bytes);
+}
 qemu_co_queue_restart_all(&task->wait_queue);
 }
 
@@ -593,7 +595,7 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
 t->call_state->ret = ret;
 t->call_state->error_is_read = error_is_read;
 }
-} else {
+} else if (s->progress) {
 progress_work_done(s->progress, t->bytes);
 }
 }
@@ -699,9 +701,11 @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
 if (!ret) {
 qemu_co_mutex_lock(&s->lock);
 bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
-progress_set_remaining(s->progress,
-   bdrv_get_dirty_count(s->copy_bitmap) +
-   s->in_flight_bytes);
+if (s->progress) {
+progress_set_remaining(s->progress,
+   bdrv_get_dirty_count(s->copy_bitmap) +
+   s->in_flight_bytes);
+}
 qemu_co_mutex_unlock(&s->lock);
 }
 
-- 
2.29.2

[PATCH v8 11/34] block/copy-before-write: drop extra bdrv_unref on failure path

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

bdrv_attach_child() do bdrv_unref() on failure, so we shouldn't do it
by hand here.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 block/copy-before-write.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 2a51cc64e4..945d9340f4 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -201,7 +201,6 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
 state->target = bdrv_attach_child(top, target, "target", &child_of_bds,
   BDRV_CHILD_DATA, errp);
 if (!state->target) {
-bdrv_unref(target);
 bdrv_unref(top);
 return NULL;
 }
-- 
2.29.2

[PATCH v8 13/34] block/copy-before-write: bdrv_cbw_append(): replace child at last

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Refactor the function to replace child at last. Thus we don't need to
revert it and code is simplified.

block-copy state initialization being done before replacing the child
doesn't need any drained section.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 block/copy-before-write.c | 33 +++--
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 7a6c15f141..adbbc64aa9 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -172,7 +172,6 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
 int ret;
 BDRVCopyBeforeWriteState *state;
 BlockDriverState *top;
-bool appended = false;
 
 assert(source->total_sectors == target->total_sectors);
 
@@ -196,8 +195,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
   BDRV_CHILD_DATA, errp);
 if (!state->target) {
 error_prepend(errp, "Cannot attach target child: ");
-bdrv_unref(top);
-return NULL;
+goto fail;
 }
 
 bdrv_ref(source);
@@ -206,18 +204,8 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
   errp);
 if (!top->file) {
 error_prepend(errp, "Cannot attach file child: ");
-bdrv_unref(top);
-return NULL;
-}
-
-bdrv_drained_begin(source);
-
-ret = bdrv_replace_node(source, top, errp);
-if (ret < 0) {
-error_prepend(errp, "Cannot append copy-before-write filter: ");
 goto fail;
 }
-appended = true;
 
 state->bcs = block_copy_state_new(top->file, state->target, false, 
compress,
   errp);
@@ -225,21 +213,22 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState 
*source,
 error_prepend(errp, "Cannot create block-copy-state: ");
 goto fail;
 }
-*bcs = state->bcs;
 
+bdrv_drained_begin(source);
+ret = bdrv_replace_node(source, top, errp);
 bdrv_drained_end(source);
+if (ret < 0) {
+error_prepend(errp, "Cannot append copy-before-write filter: ");
+goto fail;
+}
+
+*bcs = state->bcs;
 
 return top;
 
 fail:
-if (appended) {
-bdrv_cbw_drop(top);
-} else {
-bdrv_unref(top);
-}
-
-bdrv_drained_end(source);
-
+block_copy_state_free(state->bcs);
+bdrv_unref(top);
 return NULL;
 }
 
-- 
2.29.2

[PATCH v8 05/34] block: rename backup-top to copy-before-write

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

We are going to convert backup_top to full featured public filter,
which can be used in separate of backup job. Start from renaming from
"how it used" to "what it does".

While updating comments in 283 iotest, drop and rephrase also things
about ".active", as this field is now dropped, and filter doesn't have
"inactive" mode.

Note that this change may be considered as incompatible interface
change, as backup-top filter format name was visible through
query-block and query-named-block-nodes.

Still, consider the following reasoning:

1. backup-top was never documented, so if someone depends on format
   name (for driver that can't be used other than it is automatically
   inserted on backup job start), it's a kind of "undocumented feature
   use". So I think we are free to change it.

2. There is a hope, that there is no such users: it's a lot more native
   to give a good node-name to backup-top filter if need to operate
   with it somehow, and don't touch format name.

3. Another "incompatible" change in further commit would be moving
   copy-before-write filter from using backing child to file child. And
   this is even more reasonable than renaming: for now all public
   filters are file-child based.

So, it's a risky change, but risk seems small and good interface worth
it.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 block/{backup-top.h => copy-before-write.h} |  28 +++---
 block/backup.c  |  22 ++---
 block/{backup-top.c => copy-before-write.c} | 100 ++--
 MAINTAINERS |   4 +-
 block/meson.build   |   2 +-
 tests/qemu-iotests/283  |  35 +++
 tests/qemu-iotests/283.out  |   4 +-
 7 files changed, 95 insertions(+), 100 deletions(-)
 rename block/{backup-top.h => copy-before-write.h} (56%)
 rename block/{backup-top.c => copy-before-write.c} (62%)

diff --git a/block/backup-top.h b/block/copy-before-write.h
similarity index 56%
rename from block/backup-top.h
rename to block/copy-before-write.h
index b28b0031c4..5977b7aa31 100644
--- a/block/backup-top.h
+++ b/block/copy-before-write.h
@@ -1,10 +1,10 @@
 /*
- * backup-top filter driver
+ * copy-before-write filter driver
  *
  * The driver performs Copy-Before-Write (CBW) operation: it is injected above
  * some node, and before each write it copies _old_ data to the target node.
  *
- * Copyright (c) 2018-2019 Virtuozzo International GmbH.
+ * Copyright (c) 2018-2021 Virtuozzo International GmbH.
  *
  * Author:
  *  Sementsov-Ogievskiy Vladimir 
@@ -23,20 +23,20 @@
  * along with this program. If not, see .
  */
 
-#ifndef BACKUP_TOP_H
-#define BACKUP_TOP_H
+#ifndef COPY_BEFORE_WRITE_H
+#define COPY_BEFORE_WRITE_H
 
 #include "block/block_int.h"
 #include "block/block-copy.h"
 
-BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
- BlockDriverState *target,
- const char *filter_node_name,
- uint64_t cluster_size,
- BackupPerf *perf,
- BdrvRequestFlags write_flags,
- BlockCopyState **bcs,
- Error **errp);
-void bdrv_backup_top_drop(BlockDriverState *bs);
+BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+  BlockDriverState *target,
+  const char *filter_node_name,
+  uint64_t cluster_size,
+  BackupPerf *perf,
+  BdrvRequestFlags write_flags,
+  BlockCopyState **bcs,
+  Error **errp);
+void bdrv_cbw_drop(BlockDriverState *bs);
 
-#endif /* BACKUP_TOP_H */
+#endif /* COPY_BEFORE_WRITE_H */
diff --git a/block/backup.c b/block/backup.c
index bd3614ce70..ac91821b08 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -27,13 +27,13 @@
 #include "qemu/bitmap.h"
 #include "qemu/error-report.h"
 
-#include "block/backup-top.h"
+#include "block/copy-before-write.h"
 
 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
 
 typedef struct BackupBlockJob {
 BlockJob common;
-BlockDriverState *backup_top;
+BlockDriverState *cbw;
 BlockDriverState *source_bs;
 BlockDriverState *target_bs;
 
@@ -104,7 +104,7 @@ static void backup_clean(Job *job)
 {
 BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
 block_job_remove_all_bdrv(&s->common);
-bdrv_backup_top_drop(s->backup_top);
+bdrv_cbw_drop(s->cbw);
 }
 
 void backup_do_checkpoint(BlockJob *job, Error **errp)
@@ -408,7 +408,7 @@ BlockJob *backup_job_create(const char *job_id, 
BlockDriverState *bs,
 BackupBlockJob *job = NULL;
 int64_t cluster_size;

[PATCH v8 09/34] block/backup: move cluster size calculation to block-copy

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

The main consumer of cluster-size is block-copy. Let's calculate it
here instead of passing through backup-top.

We are going to publish copy-before-write filter soon, so it will be
created through options. But we don't want for now to make explicit
option for cluster-size, let's continue to calculate it automatically.
So, now is the time to get rid of cluster_size argument for
bdrv_cbw_append().

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 block/copy-before-write.h  |  1 -
 include/block/block-copy.h |  5 +--
 block/backup.c | 62 ++
 block/block-copy.c | 51 ++-
 block/copy-before-write.c  | 10 +++---
 5 files changed, 66 insertions(+), 63 deletions(-)

diff --git a/block/copy-before-write.h b/block/copy-before-write.h
index 538aab8bdb..b386fd8f01 100644
--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
@@ -32,7 +32,6 @@
 BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
   BlockDriverState *target,
   const char *filter_node_name,
-  uint64_t cluster_size,
   bool compress,
   BlockCopyState **bcs,
   Error **errp);
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index dca6c4ce36..b8a2d63545 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -25,8 +25,8 @@ typedef struct BlockCopyState BlockCopyState;
 typedef struct BlockCopyCallState BlockCopyCallState;
 
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
- int64_t cluster_size, bool use_copy_range,
- bool compress, Error **errp);
+ bool use_copy_range, bool compress,
+ Error **errp);
 
 /* Function should be called prior any actual copy request */
 void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
@@ -91,6 +91,7 @@ void block_copy_kick(BlockCopyCallState *call_state);
 void block_copy_call_cancel(BlockCopyCallState *call_state);
 
 BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s);
+int64_t block_copy_cluster_size(BlockCopyState *s);
 void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip);
 
 #endif /* BLOCK_COPY_H */
diff --git a/block/backup.c b/block/backup.c
index b31fd99ab3..83516297cb 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -29,8 +29,6 @@
 
 #include "block/copy-before-write.h"
 
-#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
-
 typedef struct BackupBlockJob {
 BlockJob common;
 BlockDriverState *cbw;
@@ -354,43 +352,6 @@ static const BlockJobDriver backup_job_driver = {
 .set_speed = backup_set_speed,
 };
 
-static int64_t backup_calculate_cluster_size(BlockDriverState *target,
- Error **errp)
-{
-int ret;
-BlockDriverInfo bdi;
-bool target_does_cow = bdrv_backing_chain_next(target);
-
-/*
- * If there is no backing file on the target, we cannot rely on COW if our
- * backup cluster size is smaller than the target cluster size. Even for
- * targets with a backing file, try to avoid COW if possible.
- */
-ret = bdrv_get_info(target, &bdi);
-if (ret == -ENOTSUP && !target_does_cow) {
-/* Cluster size is not defined */
-warn_report("The target block device doesn't provide "
-"information about the block size and it doesn't have a "
-"backing file. The default block size of %u bytes is "
-"used. If the actual block size of the target exceeds "
-"this default, the backup may be unusable",
-BACKUP_CLUSTER_SIZE_DEFAULT);
-return BACKUP_CLUSTER_SIZE_DEFAULT;
-} else if (ret < 0 && !target_does_cow) {
-error_setg_errno(errp, -ret,
-"Couldn't determine the cluster size of the target image, "
-"which has no backing file");
-error_append_hint(errp,
-"Aborting, since this may create an unusable destination image\n");
-return ret;
-} else if (ret < 0 && target_does_cow) {
-/* Not fatal; just trudge on ahead. */
-return BACKUP_CLUSTER_SIZE_DEFAULT;
-}
-
-return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
-}
-
 BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
   BlockDriverState *target, int64_t speed,
   MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
@@ -448,11 +409,6 @@ BlockJob *backup_job_create(const char *job_id, 
BlockDriverState *bs,
 return NULL;
 }
 
-cluster_size = backup_calculate_cluster_size(target, errp);
-if (cluster_size < 0) {
-goto error;
-}
-

[PATCH v8 02/34] block: introduce blk_replace_bs

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Add function to change bs inside blk.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 include/sysemu/block-backend.h | 1 +
 block/block-backend.c  | 8 
 2 files changed, 9 insertions(+)

diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 9ac5f7bbd3..29d4fdbf63 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -102,6 +102,7 @@ BlockBackend *blk_by_public(BlockBackendPublic *public);
 BlockDriverState *blk_bs(BlockBackend *blk);
 void blk_remove_bs(BlockBackend *blk);
 int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
+int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp);
 bool bdrv_has_blk(BlockDriverState *bs);
 bool bdrv_is_root_node(BlockDriverState *bs);
 int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
diff --git a/block/block-backend.c b/block/block-backend.c
index deb55c272e..6140d133e2 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -869,6 +869,14 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, 
Error **errp)
 return 0;
 }
 
+/*
+ * Change BlockDriverState associated with @blk.
+ */
+int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp)
+{
+return bdrv_replace_child_bs(blk->root, new_bs, errp);
+}
+
 /*
  * Sets the permission bitmasks that the user of the BlockBackend needs.
  */
-- 
2.29.2

[PATCH v8 07/34] block/block-copy: introduce block_copy_set_copy_opts()

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

We'll need a possibility to set compress and use_copy_range options
after initialization of the state. So make corresponding part of
block_copy_state_new() separate and public.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/block-copy.h |  3 +++
 block/block-copy.c | 49 ++
 2 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index 734389d32a..dca6c4ce36 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -28,6 +28,9 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, 
BdrvChild *target,
  int64_t cluster_size, bool use_copy_range,
  bool compress, Error **errp);
 
+/* Function should be called prior any actual copy request */
+void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
+  bool compress);
 void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm);
 
 void block_copy_state_free(BlockCopyState *s);
diff --git a/block/block-copy.c b/block/block-copy.c
index 58b4345a5a..e83870ff81 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -315,6 +315,33 @@ static uint32_t block_copy_max_transfer(BdrvChild *source, 
BdrvChild *target)
  target->bs->bl.max_transfer));
 }
 
+void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
+  bool compress)
+{
+/* Keep BDRV_REQ_SERIALISING set (or not set) in block_copy_state_new() */
+s->write_flags = (s->write_flags & BDRV_REQ_SERIALISING) |
+(compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
+
+if (s->max_transfer < s->cluster_size) {
+/*
+ * copy_range does not respect max_transfer. We don't want to bother
+ * with requests smaller than block-copy cluster size, so fallback to
+ * buffered copying (read and write respect max_transfer on their
+ * behalf).
+ */
+s->method = COPY_READ_WRITE_CLUSTER;
+} else if (compress) {
+/* Compression supports only cluster-size writes and no copy-range. */
+s->method = COPY_READ_WRITE_CLUSTER;
+} else {
+/*
+ * If copy range enabled, start with COPY_RANGE_SMALL, until first
+ * successful copy_range (look at block_copy_do_copy).
+ */
+s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
+}
+}
+
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
  int64_t cluster_size, bool use_copy_range,
  bool compress, Error **errp)
@@ -353,32 +380,14 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, 
BdrvChild *target,
 .copy_bitmap = copy_bitmap,
 .cluster_size = cluster_size,
 .len = bdrv_dirty_bitmap_size(copy_bitmap),
-.write_flags = (is_fleecing ? BDRV_REQ_SERIALISING : 0) |
-(compress ? BDRV_REQ_WRITE_COMPRESSED : 0),
+.write_flags = (is_fleecing ? BDRV_REQ_SERIALISING : 0),
 .mem = shres_create(BLOCK_COPY_MAX_MEM),
 .max_transfer = QEMU_ALIGN_DOWN(
 block_copy_max_transfer(source, target),
 cluster_size),
 };
 
-if (s->max_transfer < cluster_size) {
-/*
- * copy_range does not respect max_transfer. We don't want to bother
- * with requests smaller than block-copy cluster size, so fallback to
- * buffered copying (read and write respect max_transfer on their
- * behalf).
- */
-s->method = COPY_READ_WRITE_CLUSTER;
-} else if (compress) {
-/* Compression supports only cluster-size writes and no copy-range. */
-s->method = COPY_READ_WRITE_CLUSTER;
-} else {
-/*
- * If copy range enabled, start with COPY_RANGE_SMALL, until first
- * successful copy_range (look at block_copy_do_copy).
- */
-s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
-}
+block_copy_set_copy_opts(s, use_copy_range, compress);
 
 ratelimit_init(&s->rate_limit);
 qemu_co_mutex_init(&s->lock);
-- 
2.29.2

[PATCH v8 01/34] block: introduce bdrv_replace_child_bs()

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Add function to transactionally replace bs inside BdrvChild.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 include/block/block.h |  2 ++
 block.c   | 31 +++
 2 files changed, 33 insertions(+)

diff --git a/include/block/block.h b/include/block/block.h
index 3477290f9a..740038a892 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -361,6 +361,8 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState 
*bs_top,
 Error **errp);
 int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
   Error **errp);
+int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
+  Error **errp);
 BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
int flags, Error **errp);
 int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
diff --git a/block.c b/block.c
index e97ce0b1c8..b2b66263f9 100644
--- a/block.c
+++ b/block.c
@@ -5048,6 +5048,37 @@ out:
 return ret;
 }
 
+/* Not for empty child */
+int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
+  Error **errp)
+{
+int ret;
+Transaction *tran = tran_new();
+g_autoptr(GHashTable) found = NULL;
+g_autoptr(GSList) refresh_list = NULL;
+BlockDriverState *old_bs = child->bs;
+
+bdrv_ref(old_bs);
+bdrv_drained_begin(old_bs);
+bdrv_drained_begin(new_bs);
+
+bdrv_replace_child_tran(child, new_bs, tran);
+
+found = g_hash_table_new(NULL, NULL);
+refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs);
+refresh_list = bdrv_topological_dfs(refresh_list, found, new_bs);
+
+ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
+
+tran_finalize(tran, ret);
+
+bdrv_drained_end(old_bs);
+bdrv_drained_end(new_bs);
+bdrv_unref(old_bs);
+
+return ret;
+}
+
 static void bdrv_delete(BlockDriverState *bs)
 {
 assert(bdrv_op_blocker_is_empty(bs));
-- 
2.29.2

[PATCH v8 06/34] block-copy: move detecting fleecing scheme to block-copy

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

We want to simplify initialization interface of copy-before-write
filter as we are going to make it public. So, let's detect fleecing
scheme exactly in block-copy code, to not pass this information through
extra levels.

Why not just set BDRV_REQ_SERIALISING unconditionally: because we are
going to implement new more efficient fleecing scheme which will not
rely on backing feature.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
---
 block/copy-before-write.h  |  2 +-
 include/block/block-copy.h |  3 +--
 block/backup.c | 21 +
 block/block-copy.c | 24 +---
 block/copy-before-write.c  |  4 ++--
 5 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/block/copy-before-write.h b/block/copy-before-write.h
index 5977b7aa31..f37e2249ae 100644
--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
@@ -34,7 +34,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
   const char *filter_node_name,
   uint64_t cluster_size,
   BackupPerf *perf,
-  BdrvRequestFlags write_flags,
+  bool compress,
   BlockCopyState **bcs,
   Error **errp);
 void bdrv_cbw_drop(BlockDriverState *bs);
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index 5c8278895c..734389d32a 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -26,8 +26,7 @@ typedef struct BlockCopyCallState BlockCopyCallState;
 
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
  int64_t cluster_size, bool use_copy_range,
- BdrvRequestFlags write_flags,
- Error **errp);
+ bool compress, Error **errp);
 
 void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm);
 
diff --git a/block/backup.c b/block/backup.c
index ac91821b08..84f9a5f02c 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -407,7 +407,6 @@ BlockJob *backup_job_create(const char *job_id, 
BlockDriverState *bs,
 int64_t len, target_len;
 BackupBlockJob *job = NULL;
 int64_t cluster_size;
-BdrvRequestFlags write_flags;
 BlockDriverState *cbw = NULL;
 BlockCopyState *bcs = NULL;
 
@@ -504,26 +503,8 @@ BlockJob *backup_job_create(const char *job_id, 
BlockDriverState *bs,
 goto error;
 }
 
-/*
- * If source is in backing chain of target assume that target is going to 
be
- * used for "image fleecing", i.e. it should represent a kind of snapshot 
of
- * source at backup-start point in time. And target is going to be read by
- * somebody (for example, used as NBD export) during backup job.
- *
- * In this case, we need to add BDRV_REQ_SERIALISING write flag to avoid
- * intersection of backup writes and third party reads from target,
- * otherwise reading from target we may occasionally read already updated 
by
- * guest data.
- *
- * For more information see commit f8d59dfb40bb and test
- * tests/qemu-iotests/222
- */
-write_flags = (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) 
|
-  (compress ? BDRV_REQ_WRITE_COMPRESSED : 0),
-
 cbw = bdrv_cbw_append(bs, target, filter_node_name,
-cluster_size, perf,
-write_flags, &bcs, errp);
+  cluster_size, perf, compress, &bcs, errp);
 if (!cbw) {
 goto error;
 }
diff --git a/block/block-copy.c b/block/block-copy.c
index 0becad52da..58b4345a5a 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -317,10 +317,11 @@ static uint32_t block_copy_max_transfer(BdrvChild 
*source, BdrvChild *target)
 
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
  int64_t cluster_size, bool use_copy_range,
- BdrvRequestFlags write_flags, Error 
**errp)
+ bool compress, Error **errp)
 {
 BlockCopyState *s;
 BdrvDirtyBitmap *copy_bitmap;
+bool is_fleecing;
 
 copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
errp);
@@ -329,6 +330,22 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, 
BdrvChild *target,
 }
 bdrv_disable_dirty_bitmap(copy_bitmap);
 
+/*
+ * If source is in backing chain of target assume that target is going to 
be
+ * used for "image fleecing", i.e. it should represent a kind of snapshot 
of
+ * source at backup-start point in time. And target is going to be read by
+ * somebody (for example, used as NBD export) during back

[PATCH v8 00/34] block: publish backup-top filter

2021-08-24 Thread Vladimir Sementsov-Ogievskiy

Hi all!

v8:

06: add Hanna's r-b
07: keep is_fleecing detection in _new() function
08,17,18: add Hanna's r-b
19: wording, s/6.1/6.2/, add Markus's a-b
25: new
29: add John's r-b
34: new

Patches without r-b: 07, 25, 34

Vladimir Sementsov-Ogievskiy (34):
  block: introduce bdrv_replace_child_bs()
  block: introduce blk_replace_bs
  qdev-properties: PropertyInfo: add realized_set_allowed field
  qdev: allow setting drive property for realized device
  block: rename backup-top to copy-before-write
  block-copy: move detecting fleecing scheme to block-copy
  block/block-copy: introduce block_copy_set_copy_opts()
  block/backup: set copy_range and compress after filter insertion
  block/backup: move cluster size calculation to block-copy
  block/copy-before-write: relax permission requirements when no parents
  block/copy-before-write: drop extra bdrv_unref on failure path
  block/copy-before-write: use file child instead of backing
  block/copy-before-write: bdrv_cbw_append(): replace child at last
  block/copy-before-write: introduce cbw_init()
  block/copy-before-write: cbw_init(): rename variables
  block/copy-before-write: cbw_init(): use file child after attaching
  block/copy-before-write: bdrv_cbw_append(): drop unused compress arg
  block/copy-before-write: cbw_init(): use options
  block/copy-before-write: initialize block-copy bitmap
  block/block-copy: make setting progress optional
  block/copy-before-write: make public block driver
  qapi: publish copy-before-write filter
  python/qemu/machine.py: refactor _qemu_args()
  python/qemu/machine: QEMUMachine: improve qmp() method
  python:QEMUMachine: template typing for self returning methods
  iotests/222: fix pylint and mypy complains
  iotests/222: constantly use single quotes for strings
  iotests: move 222 to tests/image-fleecing
  iotests.py: hmp_qemu_io: support qdev
  iotests/image-fleecing: proper source device
  iotests/image-fleecing: rename tgt_node
  iotests/image-fleecing: prepare for adding new test-case
  iotests/image-fleecing: add test-case for copy-before-write filter
  block/block-copy: block_copy_state_new(): drop extra arguments

 qapi/block-core.json|  25 +-
 block/{backup-top.h => copy-before-write.h} |  25 +-
 include/block/block-copy.h  |   6 +-
 include/block/block.h   |   2 +
 include/hw/qdev-properties.h|   1 +
 include/sysemu/block-backend.h  |   1 +
 block.c |  31 +++
 block/backup-top.c  | 253 ---
 block/backup.c  | 116 ++---
 block/block-backend.c   |   8 +
 block/block-copy.c  | 135 ---
 block/copy-before-write.c   | 256 
 hw/core/qdev-properties-system.c|  43 +++-
 hw/core/qdev-properties.c   |   6 +-
 MAINTAINERS |   4 +-
 block/meson.build   |   2 +-
 python/qemu/machine/machine.py  |  40 +--
 tests/qemu-iotests/222  | 159 
 tests/qemu-iotests/222.out  |  67 -
 tests/qemu-iotests/283  |  35 ++-
 tests/qemu-iotests/283.out  |   4 +-
 tests/qemu-iotests/297  |   2 +-
 tests/qemu-iotests/iotests.py   |   5 +-
 tests/qemu-iotests/tests/image-fleecing | 192 +++
 tests/qemu-iotests/tests/image-fleecing.out | 139 +++
 25 files changed, 885 insertions(+), 672 deletions(-)
 rename block/{backup-top.h => copy-before-write.h} (56%)
 delete mode 100644 block/backup-top.c
 create mode 100644 block/copy-before-write.c
 delete mode 100755 tests/qemu-iotests/222
 delete mode 100644 tests/qemu-iotests/222.out
 create mode 100755 tests/qemu-iotests/tests/image-fleecing
 create mode 100644 tests/qemu-iotests/tests/image-fleecing.out

-- 
2.29.2

66 matches

Mail list logo