Re: [PATCH v2 12/22] qemu-iotests/199: fix style
On 17/02/2020 18:02, Vladimir Sementsov-Ogievskiy wrote: Mostly, satisfy pep8 complains. Signed-off-by: Vladimir Sementsov-Ogievskiy --- tests/qemu-iotests/199 | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/qemu-iotests/199 b/tests/qemu-iotests/199 index 40774eed74..de9ba8d94c 100755 --- a/tests/qemu-iotests/199 +++ b/tests/qemu-iotests/199 @@ -28,8 +28,8 @@ disk_b = os.path.join(iotests.test_dir, 'disk_b') size = '256G' fifo = os.path.join(iotests.test_dir, 'mig_fifo') -class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase): +class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase): def tearDown(self): self.vm_a.shutdown() self.vm_b.shutdown() @@ -54,7 +54,7 @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase): result = self.vm_a.qmp('block-dirty-bitmap-add', node='drive0', name='bitmap', granularity=granularity) -self.assert_qmp(result, 'return', {}); +self.assert_qmp(result, 'return', {}) s = 0 while s < write_size: @@ -71,7 +71,7 @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase): result = self.vm_a.qmp('block-dirty-bitmap-clear', node='drive0', name='bitmap') -self.assert_qmp(result, 'return', {}); +self.assert_qmp(result, 'return', {}) s = 0 while s < write_size: self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % (s, chunk)) @@ -104,15 +104,16 @@ class TestDirtyBitmapPostcopyMigration(iotests.QMPTestCase): self.vm_b.hmp_qemu_io('drive0', 'write %d %d' % (s, chunk)) s += 0x1 -result = self.vm_b.qmp('query-block'); +result = self.vm_b.qmp('query-block') while len(result['return'][0]['dirty-bitmaps']) > 1: time.sleep(2) -result = self.vm_b.qmp('query-block'); +result = self.vm_b.qmp('query-block') result = self.vm_b.qmp('x-debug-block-dirty-bitmap-sha256', node='drive0', name='bitmap') -self.assert_qmp(result, 'return/sha256', sha256); +self.assert_qmp(result, 'return/sha256', sha256) + if __name__ == '__main__': iotests.main(supported_fmts=['qcow2'], supported_cache_modes=['none'], Reviewed-by: Andrey Shinkevich -- With the best regards, Andrey Shinkevich
Re: [PATCH 1/5] aio-posix: fix use after leaving scope in aio_poll()
On Fri, Feb 14, 2020 at 05:17:08PM +, Stefan Hajnoczi wrote: > epoll_handler is a stack variable and must not be accessed after it goes > out of scope: > > if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) { > AioHandler epoll_handler; > ... > add_pollfd(&epoll_handler); > ret = aio_epoll(ctx, pollfds, npfd, timeout); > } ... > > ... > > /* if we have any readable fds, dispatch event */ > if (ret > 0) { > for (i = 0; i < npfd; i++) { > nodes[i]->pfd.revents = pollfds[i].revents; > } > } > > nodes[0] is &epoll_handler, which has already gone out of scope. > > There is no need to use pollfds[] for epoll. We don't need an > AioHandler for the epoll fd. > > Signed-off-by: Stefan Hajnoczi > --- > util/aio-posix.c | 20 > 1 file changed, 8 insertions(+), 12 deletions(-) Reviewed-by: Sergio Lopez signature.asc Description: PGP signature
Re: [PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec
Patchew URL: https://patchew.org/QEMU/20200218224811.30050-1-andrzej.jakow...@linux.intel.com/ Hi, This series failed the docker-mingw@fedora build test. Please find the testing commands and their output below. If you have Docker installed, you can probably reproduce it locally. === TEST SCRIPT BEGIN === #! /bin/bash export ARCH=x86_64 make docker-image-fedora V=1 NETWORK=1 time make docker-test-mingw@fedora J=14 NETWORK=1 === TEST SCRIPT END === CC hw/display/sii9022.o CC hw/display/ssd0303.o /tmp/qemu-test/src/hw/block/nvme.c: In function 'nvme_pmr_read': /tmp/qemu-test/src/hw/block/nvme.c:1342:15: error: implicit declaration of function 'msync'; did you mean 'fsync'? [-Werror=implicit-function-declaration] ret = msync(n->pmrbuf, n->f_pmr_size, MS_SYNC); ^ fsync /tmp/qemu-test/src/hw/block/nvme.c:1342:15: error: nested extern declaration of 'msync' [-Werror=nested-externs] /tmp/qemu-test/src/hw/block/nvme.c:1342:47: error: 'MS_SYNC' undeclared (first use in this function) ret = msync(n->pmrbuf, n->f_pmr_size, MS_SYNC); ^~~ /tmp/qemu-test/src/hw/block/nvme.c:1342:47: note: each undeclared identifier is reported only once for each function it appears in /tmp/qemu-test/src/hw/block/nvme.c: In function 'nvme_realize': /tmp/qemu-test/src/hw/block/nvme.c:1413:21: error: implicit declaration of function 'mmap'; did you mean 'max'? [-Werror=implicit-function-declaration] n->pmrbuf = mmap(NULL, n->f_pmr_size, ^~~~ max /tmp/qemu-test/src/hw/block/nvme.c:1413:21: error: nested extern declaration of 'mmap' [-Werror=nested-externs] /tmp/qemu-test/src/hw/block/nvme.c:1414:27: error: 'PROT_READ' undeclared (first use in this function); did you mean 'OF_READ'? (PROT_READ | PROT_WRITE), MAP_SHARED, fd, 0); ^ OF_READ /tmp/qemu-test/src/hw/block/nvme.c:1414:39: error: 'PROT_WRITE' undeclared (first use in this function); did you mean 'OF_WRITE'? (PROT_READ | PROT_WRITE), MAP_SHARED, fd, 0); ^~ OF_WRITE /tmp/qemu-test/src/hw/block/nvme.c:1414:52: error: 'MAP_SHARED' undeclared (first use in this function); did you mean 'RAM_SHARED'? (PROT_READ | PROT_WRITE), MAP_SHARED, fd, 0); ^~ RAM_SHARED /tmp/qemu-test/src/hw/block/nvme.c:1416:26: error: 'MAP_FAILED' undeclared (first use in this function); did you mean 'WAIT_FAILED'? if (n->pmrbuf == MAP_FAILED) { ^~ WAIT_FAILED /tmp/qemu-test/src/hw/block/nvme.c: In function 'nvme_exit': /tmp/qemu-test/src/hw/block/nvme.c:1583:13: error: implicit declaration of function 'munmap' [-Werror=implicit-function-declaration] munmap(n->pmrbuf, n->f_pmr_size); ^~ /tmp/qemu-test/src/hw/block/nvme.c:1583:13: error: nested extern declaration of 'munmap' [-Werror=nested-externs] cc1: all warnings being treated as errors make: *** [/tmp/qemu-test/src/rules.mak:69: hw/block/nvme.o] Error 1 make: *** Waiting for unfinished jobs Traceback (most recent call last): File "./tests/docker/docker.py", line 664, in --- raise CalledProcessError(retcode, cmd) subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', '--label', 'com.qemu.instance.uuid=058833b8d96e4c67a646a099f4118351', '-u', '1003', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', '-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 'SHOW_ENV=', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', '/home/patchew2/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', '/var/tmp/patchew-tester-tmp-0kstbie3/src/docker-src.2020-02-18-20.04.28.2259:/var/tmp/qemu:z,ro', 'qemu:fedora', '/var/tmp/qemu/run', 'test-mingw']' returned non-zero exit status 2. filter=--filter=label=com.qemu.instance.uuid=058833b8d96e4c67a646a099f4118351 make[1]: *** [docker-run] Error 1 make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-0kstbie3/src' make: *** [docker-run-test-mingw@fedora] Error 2 real2m39.403s user0m8.170s The full log is available at http://patchew.org/logs/20200218224811.30050-1-andrzej.jakow...@linux.intel.com/testing.docker-mingw@fedora/?type=message. --- Email generated automatically by Patchew [https://patchew.org/]. Please send your feedback to patchew-de...@redhat.com
[PATCH v1] block/nvme: introduce PMR support from NVMe 1.4 spec
This patch introduces support for PMR that has been defined as part of NVMe 1.4 spec. User can now specify a pmr_file which will be mmap'ed into qemu address space and subsequently in PCI BAR 2. Guest OS can perform mmio read and writes to the PMR region that will stay persistent accross system reboot. Signed-off-by: Andrzej Jakowski --- hw/block/nvme.c | 145 ++- hw/block/nvme.h | 5 ++ hw/block/trace-events | 5 ++ include/block/nvme.h | 172 ++ 4 files changed, 326 insertions(+), 1 deletion(-) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index d28335cbf3..836cf8d426 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -19,10 +19,14 @@ * -drive file=,if=none,id= * -device nvme,drive=,serial=,id=, \ * cmb_size_mb=, \ + * [pmr_file=,] \ * num_queues= * * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at * offset 0 in BAR2 and supports only WDS, RDS and SQS for now. + * + * Either cmb or pmr - due to limitation in avaialbe BAR indexes. + * pmr_file file needs to be preallocated and be multiple of MiB in size. */ #include "qemu/osdep.h" @@ -1141,6 +1145,26 @@ static void nvme_write_bar(NvmeCtrl *n, hwaddr offset, uint64_t data, NVME_GUEST_ERR(nvme_ub_mmiowr_cmbsz_readonly, "invalid write to read only CMBSZ, ignored"); return; +case 0xE00: /* PMRCAP */ +NVME_GUEST_ERR(nvme_ub_mmiowr_pmrcap_readonly, + "invalid write to PMRCAP register, ignored"); +return; +case 0xE04: /* TODO PMRCTL */ +break; +case 0xE08: /* PMRSTS */ +NVME_GUEST_ERR(nvme_ub_mmiowr_pmrsts_readonly, + "invalid write to PMRSTS register, ignored"); +return; +case 0xE0C: /* PMREBS */ +NVME_GUEST_ERR(nvme_ub_mmiowr_pmrebs_readonly, + "invalid write to PMREBS register, ignored"); +return; +case 0xE10: /* PMRSWTP */ +NVME_GUEST_ERR(nvme_ub_mmiowr_pmrswtp_readonly, + "invalid write to PMRSWTP register, ignored"); +return; +case 0xE14: /* TODO PMRMSC */ + break; default: NVME_GUEST_ERR(nvme_ub_mmiowr_invalid, "invalid MMIO write," @@ -1303,6 +1327,38 @@ static const MemoryRegionOps nvme_cmb_ops = { }, }; +static void nvme_pmr_write(void *opaque, hwaddr addr, uint64_t data, +unsigned size) +{ +NvmeCtrl *n = (NvmeCtrl *)opaque; +stn_le_p(&n->pmrbuf[addr], size, data); +} + +static uint64_t nvme_pmr_read(void *opaque, hwaddr addr, unsigned size) +{ +NvmeCtrl *n = (NvmeCtrl *)opaque; +if (!NVME_PMRCAP_PMRWBM(n->bar.pmrcap)) { +int ret; +ret = msync(n->pmrbuf, n->f_pmr_size, MS_SYNC); +if (!ret) { +NVME_GUEST_ERR(nvme_ub_mmiowr_pmrread_barrier, + "error while persisting data"); +} +} +return ldn_le_p(&n->pmrbuf[addr], size); +} + +static const MemoryRegionOps nvme_pmr_ops = { +.read = nvme_pmr_read, +.write = nvme_pmr_write, +.endianness = DEVICE_LITTLE_ENDIAN, +.impl = { +.min_access_size = 1, +.max_access_size = 8, +}, +}; + + static void nvme_realize(PCIDevice *pci_dev, Error **errp) { NvmeCtrl *n = NVME(pci_dev); @@ -1332,6 +1388,37 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) error_setg(errp, "serial property not set"); return; } + +if (!n->cmb_size_mb && n->pmr_file) { +int fd; + +n->f_pmr = fopen(n->pmr_file, "r+b"); +if (!n->f_pmr) { +error_setg(errp, "pmr backend file open error"); +return; +} + +fseek(n->f_pmr, 0L, SEEK_END); +n->f_pmr_size = ftell(n->f_pmr); +fseek(n->f_pmr, 0L, SEEK_SET); + +/* pmr file size needs to be multiple of MiB in size */ +if (!n->f_pmr_size || n->f_pmr_size % (1 << 20)) { +error_setg(errp, "pmr backend file size needs to be greater than 0" + "and multiple of MiB in size"); +return; +} + +fd = fileno(n->f_pmr); +n->pmrbuf = mmap(NULL, n->f_pmr_size, + (PROT_READ | PROT_WRITE), MAP_SHARED, fd, 0); + +if (n->pmrbuf == MAP_FAILED) { +error_setg(errp, "pmr backend file mmap error"); +return; +} +} + blkconf_blocksizes(&n->conf); if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk), false, errp)) { @@ -1393,7 +1480,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp) n->bar.intmc = n->bar.intms = 0; if (n->cmb_size_mb) { - NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2); NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0); @@ -1415,6 +1501,52 @@ static void nvme_rea
Re: [PATCH v2 00/22] Fix error handling during bitmap postcopy
On 2/18/20 2:02 PM, Andrey Shinkevich wrote: qemu-iotests:$ ./check -qcow2 PASSED (except always failed 261 and 272) Have you reported those failures on the threads that introduced those tests? -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: Cross-project NBD extension proposal: NBD_INFO_INIT_STATE
On 2/17/20 9:13 AM, Max Reitz wrote: Hi, It’s my understanding that without some is_zero infrastructure for QEMU, it’s impossible to implement this flag in qemu’s NBD server. You're right that we may need some more infrastructure before being able to decide when to report this bit in all cases. But for raw files, that infrastructure already exists: does block_status at offset 0 and the entire image as length return status that the entire file is a hole. And for qcow2 files, it would not be that hard to teach a similar block_status request to report the entire image as a hole based on my proposed qcow2 autoclear bit tracking that the image still reads as zero. At the same time, I still haven’t understood what we need the flag for. As far as I understood in our discussion on your qemu series, there is no case where anyone would need to know whether an image is zero. All > practical cases involve someone having to ensure that some image is zero. Knowing whether an image is zero can help with that, but that can be an implementation detail. For qcow2, the idea would be that there is some flag that remains true as long as the image is guaranteed to be zero. Then we’d have some bdrv_make_zero function, and qcow2’s implementation would use this information to gauge whether there’s something to do as all. For NBD, we cannot use this idea directly because to implement such a flag (as you’re describing in this mail), we’d need separate is_zero infrastructure, and that kind of makes the point of “drivers’ bdrv_make_zero() implementations do the right thing by themselves” moot. We don't necessarily need a separate is_zero infrastructure if we can instead teach the existing block_status infrastructure to report that the entire image reads as zero. You're right that clients that need to force an entire image to be zero won't need to directly call block_status (they can just call bdrv_make_zero, and let that worry about whether a block status call makes sense among its list of steps to try). But since block_status can report all-zero status for some cases, it's not hard to use that for feeding the NBD bit. However, there's a difference between qemu's block status (which is already typed correctly to return a 64-bit answer, even if it may need a few tweaks for clients that currently don't expect it to request more than 32 bits) and NBD's block status (which can only report 32 bits barring a new extension to the protocol), and where a single all-zero bit at NBD_OPT_GO is just as easy of an extension as a way to report a 64-bit all-zero response to NBD_CMD_BLOCK_STATUS. OTOH, we wouldn’t need such a flag for the implementation, because we could just send a 64-bit discard/make_zero over the whole block device length to the NBD server, and then the server internally does the right thing(TM). AFAIU discard and write_zeroes currently have only 32 bit length fields, but there were plans for adding support for 64 bit versions anyway. From my naïve outsider perspective, doing that doesn’t seem a more complicated protocol addition than adding some way to tell whether an NBD export is zero. Adding 64-bit commands to NBD is more invasive than adding a single startup status bit. Both ideas can be done - doing one does not preclude the other. But at the same time, not all servers will implement both ideas - if one is easy to implement while the other is hard, it is not unlikely that qemu will still encounter NBD servers that advertise startup state but not support 64-bit make_zero (even if qemu as NBD server starts supporting 64-bit make zero) or even 64-bit block status results. Another thing to think about here is timing. With the proposed NBD addition, it is the server telling the client that "the image you are connecting to started zero", prior to the point that the client even has a chance to request "can you make the image all zero in a quick manner (and if not, I'll fall back to writing zeroes as I go)". And even if NBD gains a 64-bit block status and/or make zero command, it is still less network traffic for the server to advertise up-front that the image is all zero than it is for the client to have to issue command requests of the server (network traffic is not always the bottleneck, but it can be a consideration). So I’m still wondering whether there are actually cases where we need to tell whether some image or NBD export is zero that do not involve making it zero if it isn’t. Just because we don't think that qemu-img has such a case does not mean that other NBD clients will not be able to come up with some use for knowing if an image starts all zero. (I keep asking because it seems to me that if all we ever really want to do is to ensure that some images/exports are zero, we should implement that.) The problem is WHERE do you implement it. Is it more efficient to implement make_zero in the NBD server (the client merely requests to make
Re: [PATCH v2 00/22] Fix error handling during bitmap postcopy
qemu-iotests:$ ./check -qcow2 PASSED (except always failed 261 and 272) Andrey On 17/02/2020 18:02, Vladimir Sementsov-Ogievskiy wrote: Original idea of bitmaps postcopy migration is that bitmaps are non critical data, and their loss is not serious problem. So, using postcopy method on any failure we should just drop unfinished bitmaps and continue guest execution. However, it doesn't work so. It crashes, fails, it goes to postcopy-recovery feature. It does anything except for behavior we want. These series fixes at least some problems with error handling during bitmaps migration postcopy. v1 was "[PATCH 0/7] Fix crashes on early shutdown during bitmaps postcopy" v2: Most of patches are new or changed a lot. Only patches 06,07 mostly unchanged, just rebased on refactorings. Vladimir Sementsov-Ogievskiy (22): migration/block-dirty-bitmap: fix dirty_bitmap_mig_before_vm_start migration/block-dirty-bitmap: rename state structure types migration/block-dirty-bitmap: rename dirty_bitmap_mig_cleanup migration/block-dirty-bitmap: move mutex init to dirty_bitmap_mig_init migration/block-dirty-bitmap: refactor state global variables migration/block-dirty-bitmap: rename finish_lock to just lock migration/block-dirty-bitmap: simplify dirty_bitmap_load_complete migration/block-dirty-bitmap: keep bitmap state for all bitmaps migration/block-dirty-bitmap: relax error handling in incoming part migration/block-dirty-bitmap: cancel migration on shutdown migration/savevm: don't worry if bitmap migration postcopy failed qemu-iotests/199: fix style qemu-iotests/199: drop extra constraints qemu-iotests/199: better catch postcopy time qemu-iotests/199: improve performance: set bitmap by discard qemu-iotests/199: change discard patterns qemu-iotests/199: increase postcopy period python/qemu/machine: add kill() method qemu-iotests/199: prepare for new test-cases addition qemu-iotests/199: check persistent bitmaps qemu-iotests/199: add early shutdown case to bitmaps postcopy qemu-iotests/199: add source-killed case to bitmaps postcopy Cc: John Snow Cc: Vladimir Sementsov-Ogievskiy Cc: Stefan Hajnoczi Cc: Fam Zheng Cc: Juan Quintela Cc: "Dr. David Alan Gilbert" Cc: Eduardo Habkost Cc: Cleber Rosa Cc: Kevin Wolf Cc: Max Reitz Cc: qemu-block@nongnu.org Cc: qemu-de...@nongnu.org Cc: qemu-sta...@nongnu.org # for patch 01 migration/migration.h | 3 +- migration/block-dirty-bitmap.c | 444 + migration/migration.c | 15 +- migration/savevm.c | 37 ++- python/qemu/machine.py | 12 +- tests/qemu-iotests/199 | 244 ++ tests/qemu-iotests/199.out | 4 +- 7 files changed, 529 insertions(+), 230 deletions(-) -- With the best regards, Andrey Shinkevich
Re: [PATCH v3] configure: Avoid compiling system tools on user build by default
Le 17/02/2020 à 14:33, Philippe Mathieu-Daudé a écrit : > User-mode does not need the system tools. Do not build them by > default if the user specifies --disable-system. > > This disables building the following binaries on a user-only build: > > - elf2dmp > - qemu-edid > - qemu-ga > - qemu-img > - qemu-io > - qemu-nbd > - ivshmem-client > - ivshmem-server > > The qemu-user binaries are not affected by this change. > > Signed-off-by: Philippe Mathieu-Daudé > --- > v3: > - fixed typos (Aleksandar) > v2: > - use simpler if/else statement (therefore not adding Richard R-b) > - improved description (Aleksandar) > --- > configure | 12 +++- > 1 file changed, 11 insertions(+), 1 deletion(-) > > diff --git a/configure b/configure > index 6f5d850949..efe00dd497 100755 > --- a/configure > +++ b/configure > @@ -455,7 +455,7 @@ guest_agent_ntddscsi="no" > guest_agent_msi="" > vss_win32_sdk="" > win_sdk="no" > -want_tools="yes" > +want_tools="" > libiscsi="" > libnfs="" > coroutine="" > @@ -2213,6 +2213,16 @@ else > echo big/little test failed > fi > > +## > +# system tools > +if test -z "$want_tools"; then > +if test "$softmmu" = "no"; then > +want_tools=no > +else > +want_tools=yes > +fi > +fi > + > ## > # cocoa implies not SDL or GTK > # (the cocoa UI code currently assumes it is always the active UI > Applied to my linux-user branch. Thanks, Laurent
Re: [PATCH 2/3] hw/display/qxl: Remove unneeded variable assignment
Le 15/02/2020 à 17:15, Philippe Mathieu-Daudé a écrit : > Fix warning reported by Clang static code analyzer: > > hw/display/qxl.c:1634:14: warning: Value stored to 'orig_io_port' during > its initialization is never read > uint32_t orig_io_port = io_port; >^~~~ ~~~ > > Reported-by: Clang Static Analyzer > Signed-off-by: Philippe Mathieu-Daudé > --- > hw/display/qxl.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/hw/display/qxl.c b/hw/display/qxl.c > index 64884da708..21a43a1d5e 100644 > --- a/hw/display/qxl.c > +++ b/hw/display/qxl.c > @@ -1631,7 +1631,7 @@ static void ioport_write(void *opaque, hwaddr addr, > PCIQXLDevice *d = opaque; > uint32_t io_port = addr; > qxl_async_io async = QXL_SYNC; > -uint32_t orig_io_port = io_port; > +uint32_t orig_io_port; > > if (d->guest_bug && io_port != QXL_IO_RESET) { > return; > Applied to my trivial-patches branch. Thanks, Laurent
Re: [PATCH 3/3] hw/block/pflash_cfi02: Remove unneeded variable assignment
Le 15/02/2020 à 17:15, Philippe Mathieu-Daudé a écrit : > Fix warning reported by Clang static code analyzer: > > CC hw/block/pflash_cfi02.o > hw/block/pflash_cfi02.c:311:5: warning: Value stored to 'ret' is never read > ret = -1; > ^ ~~ > > Reported-by: Clang Static Analyzer > Signed-off-by: Philippe Mathieu-Daudé > --- > hw/block/pflash_cfi02.c | 1 - > 1 file changed, 1 deletion(-) > > diff --git a/hw/block/pflash_cfi02.c b/hw/block/pflash_cfi02.c > index 7c4744c020..12f18d401a 100644 > --- a/hw/block/pflash_cfi02.c > +++ b/hw/block/pflash_cfi02.c > @@ -308,7 +308,6 @@ static uint64_t pflash_read(void *opaque, hwaddr offset, > unsigned int width) > hwaddr boff; > uint64_t ret; > > -ret = -1; > /* Lazy reset to ROMD mode after a certain amount of read accesses */ > if (!pfl->rom_mode && pfl->wcycle == 0 && > ++pfl->read_counter > PFLASH_LAZY_ROMD_THRESHOLD) { > Applied to my trivial-patches branch. Thanks, Laurent
Re: [PATCH v2 10/22] migration/block-dirty-bitmap: cancel migration on shutdown
On 17/02/2020 18:02, Vladimir Sementsov-Ogievskiy wrote: If target is turned of prior to postcopy finished, target crashes because busy bitmaps are found at shutdown. Canceling incoming migration helps, as it removes all unfinished (and therefore busy) bitmaps. Similarly on source we crash in bdrv_close_all which asserts that all bdrv states are removed, because bdrv states involved into dirty bitmap migration are referenced by it. So, we need to cancel outgoing migration as well. Signed-off-by: Vladimir Sementsov-Ogievskiy --- migration/migration.h | 2 ++ migration/block-dirty-bitmap.c | 16 migration/migration.c | 13 + 3 files changed, 31 insertions(+) diff --git a/migration/migration.h b/migration/migration.h index 2948f2387b..2de6b8bbe2 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -332,6 +332,8 @@ void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); void dirty_bitmap_mig_before_vm_start(void); +void dirty_bitmap_mig_cancel_outgoing(void); +void dirty_bitmap_mig_cancel_incoming(void); void migrate_add_address(SocketAddress *address); int foreach_not_ignored_block(RAMBlockIterFunc func, void *opaque); diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index aea5326804..3ca425d95e 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -585,6 +585,22 @@ static void cancel_incoming_locked(DBMLoadState *s) s->bitmaps = NULL; } +void dirty_bitmap_mig_cancel_outgoing(void) +{ +dirty_bitmap_do_save_cleanup(&dbm_state.save); The comment above the dirty_bitmap_do_save_cleanup() says: "Called with iothread lock taken" +} + +void dirty_bitmap_mig_cancel_incoming(void) +{ +DBMLoadState *s = &dbm_state.load; + +qemu_mutex_lock(&s->lock); + +cancel_incoming_locked(s); + +qemu_mutex_unlock(&s->lock); +} + static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) { GSList *item; diff --git a/migration/migration.c b/migration/migration.c index 515047932c..7c605ba218 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -181,6 +181,19 @@ void migration_shutdown(void) */ migrate_fd_cancel(current_migration); object_unref(OBJECT(current_migration)); + +/* + * Cancel outgoing migration of dirty bitmaps. It should + * at least unref used block nodes. + */ +dirty_bitmap_mig_cancel_outgoing(); + +/* + * Cancel incoming migration of dirty bitmaps. Dirty bitmaps + * are non-critical data, and their loss never considered as + * something serious. + */ +dirty_bitmap_mig_cancel_incoming(); } /* For outgoing */ Reviewed-by: Andrey Shinkevich -- With the best regards, Andrey Shinkevich
Re: [PATCH RESEND 00/13] trivial: Detect and remove superfluous semicolons in C code
Le 18/02/2020 à 18:04, Paolo Bonzini a écrit : > On 18/02/20 10:43, Philippe Mathieu-Daudé wrote: >> Luc noticed a superfluous trailing semicolon: >> https://lists.gnu.org/archive/html/qemu-devel/2020-02/msg04593.html >> >> Prevent that by modifying checkpatch.pl and clean the codebase. >> >> Philippe Mathieu-Daudé (13): >> scripts/checkpatch.pl: Detect superfluous semicolon in C code >> audio/alsaaudio: Remove superfluous semicolons >> block: Remove superfluous semicolons >> block/io_uring: Remove superfluous semicolon >> hw/arm/xlnx-versal: Remove superfluous semicolon >> hw/m68k/next-cube: Remove superfluous semicolon >> hw/scsi/esp: Remove superfluous semicolon >> hw/vfio/display: Remove superfluous semicolon >> migration/multifd: Remove superfluous semicolon >> ui/input-barrier: Remove superfluous semicolon >> target/i386/whpx: Remove superfluous semicolon >> tests/qtest/libqos/qgraph: Remove superfluous semicolons >> contrib/rdmacm-mux: Remove superfluous semicolon >> >> audio/alsaaudio.c | 4 ++-- >> block.c | 4 ++-- >> block/io_uring.c| 2 +- >> contrib/rdmacm-mux/main.c | 2 +- >> hw/arm/xlnx-versal-virt.c | 2 +- >> hw/m68k/next-cube.c | 2 +- >> hw/scsi/esp.c | 2 +- >> hw/vfio/display.c | 2 +- >> migration/multifd.c | 2 +- >> target/i386/whpx-all.c | 2 +- >> tests/qtest/libqos/qgraph.c | 4 ++-- >> ui/input-barrier.c | 2 +- >> scripts/checkpatch.pl | 5 + >> 13 files changed, 20 insertions(+), 15 deletions(-) >> > > Acked-by: Paolo Bonzini > > Laurent, can you queue this in qemu-trivial? > Queued, except patches 3, 4 (taken by Kevin) and 9 (by Juan) Thanks, Laurent
Re: [PATCH RESEND 00/13] trivial: Detect and remove superfluous semicolons in C code
On 18/02/2020 18:04, Paolo Bonzini wrote: > On 18/02/20 10:43, Philippe Mathieu-Daudé wrote: >> Luc noticed a superfluous trailing semicolon: >> https://lists.gnu.org/archive/html/qemu-devel/2020-02/msg04593.html >> >> Prevent that by modifying checkpatch.pl and clean the codebase. >> >> Philippe Mathieu-Daudé (13): >> scripts/checkpatch.pl: Detect superfluous semicolon in C code >> audio/alsaaudio: Remove superfluous semicolons >> block: Remove superfluous semicolons >> block/io_uring: Remove superfluous semicolon >> hw/arm/xlnx-versal: Remove superfluous semicolon >> hw/m68k/next-cube: Remove superfluous semicolon >> hw/scsi/esp: Remove superfluous semicolon >> hw/vfio/display: Remove superfluous semicolon >> migration/multifd: Remove superfluous semicolon >> ui/input-barrier: Remove superfluous semicolon >> target/i386/whpx: Remove superfluous semicolon >> tests/qtest/libqos/qgraph: Remove superfluous semicolons >> contrib/rdmacm-mux: Remove superfluous semicolon >> >> audio/alsaaudio.c | 4 ++-- >> block.c | 4 ++-- >> block/io_uring.c| 2 +- >> contrib/rdmacm-mux/main.c | 2 +- >> hw/arm/xlnx-versal-virt.c | 2 +- >> hw/m68k/next-cube.c | 2 +- >> hw/scsi/esp.c | 2 +- >> hw/vfio/display.c | 2 +- >> migration/multifd.c | 2 +- >> target/i386/whpx-all.c | 2 +- >> tests/qtest/libqos/qgraph.c | 4 ++-- >> ui/input-barrier.c | 2 +- >> scripts/checkpatch.pl | 5 + >> 13 files changed, 20 insertions(+), 15 deletions(-) >> > > Acked-by: Paolo Bonzini > > Laurent, can you queue this in qemu-trivial? > Yes, I will. Thanks, Laurent
Re: [PATCH v2 09/22] migration/block-dirty-bitmap: relax error handling in incoming part
On 17/02/2020 18:02, Vladimir Sementsov-Ogievskiy wrote: Bitmaps data is not critical, and we should not fail the migration (or use postcopy recovering) because of dirty-bitmaps migration failure. Instead we should just lose unfinished bitmaps. Still we have to report io stream violation errors, as they affect the whole migration stream. Signed-off-by: Vladimir Sementsov-Ogievskiy --- migration/block-dirty-bitmap.c | 148 + 1 file changed, 113 insertions(+), 35 deletions(-) diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index 1329db8d7d..aea5326804 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -145,6 +145,15 @@ typedef struct DBMLoadState { bool before_vm_start_handled; /* set in dirty_bitmap_mig_before_vm_start */ +/* + * cancelled + * Incoming migration is cancelled for some reason. That means that we + * still should read our chunks from migration stream, to not affect other + * migration objects (like RAM), but just ignore them and do not touch any + * bitmaps or nodes. + */ +bool cancelled; + GSList *bitmaps; QemuMutex lock; /* protect bitmaps */ } DBMLoadState; @@ -545,13 +554,47 @@ void dirty_bitmap_mig_before_vm_start(void) qemu_mutex_unlock(&s->lock); } +static void cancel_incoming_locked(DBMLoadState *s) +{ +GSList *item; + +if (s->cancelled) { +return; +} + +s->cancelled = true; +s->bs = NULL; +s->bitmap = NULL; + +/* Drop all unfinished bitmaps */ +for (item = s->bitmaps; item; item = g_slist_next(item)) { +LoadBitmapState *b = item->data; + +/* + * Bitmap must be unfinished, as finished bitmaps should already be + * removed from the list. + */ +assert(!s->before_vm_start_handled || !b->migrated); +if (bdrv_dirty_bitmap_has_successor(b->bitmap)) { +bdrv_reclaim_dirty_bitmap(b->bitmap, &error_abort); +} +bdrv_release_dirty_bitmap(b->bitmap); +} + +g_slist_free_full(s->bitmaps, g_free); +s->bitmaps = NULL; +} + static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) { GSList *item; trace_dirty_bitmap_load_complete(); -bdrv_dirty_bitmap_deserialize_finish(s->bitmap); -qemu_mutex_lock(&s->lock); Why is it safe to remove the critical section? +if (s->cancelled) { +return; +} + +bdrv_dirty_bitmap_deserialize_finish(s->bitmap); if (bdrv_dirty_bitmap_has_successor(s->bitmap)) { bdrv_reclaim_dirty_bitmap(s->bitmap, &error_abort); @@ -569,8 +612,6 @@ static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) break; } } - -qemu_mutex_unlock(&s->lock); } static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) @@ -582,15 +623,32 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) if (s->flags & DIRTY_BITMAP_MIG_FLAG_ZEROES) { trace_dirty_bitmap_load_bits_zeroes(); -bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte, nr_bytes, - false); +if (!s->cancelled) { +bdrv_dirty_bitmap_deserialize_zeroes(s->bitmap, first_byte, + nr_bytes, false); +} } else { size_t ret; uint8_t *buf; uint64_t buf_size = qemu_get_be64(f); -uint64_t needed_size = -bdrv_dirty_bitmap_serialization_size(s->bitmap, - first_byte, nr_bytes); +uint64_t needed_size; + +buf = g_malloc(buf_size); +ret = qemu_get_buffer(f, buf, buf_size); +if (ret != buf_size) { +error_report("Failed to read bitmap bits"); +g_free(buf); +return -EIO; +} + +if (s->cancelled) { +g_free(buf); +return 0; +} + +needed_size = bdrv_dirty_bitmap_serialization_size(s->bitmap, + first_byte, + nr_bytes); if (needed_size > buf_size || buf_size > QEMU_ALIGN_UP(needed_size, 4 * sizeof(long)) @@ -599,15 +657,8 @@ static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) error_report("Migrated bitmap granularity doesn't " "match the destination bitmap '%s' granularity", bdrv_dirty_bitmap_name(s->bitmap)); -return -EINVAL; -} - -buf = g_malloc(buf_size); -ret = qemu_get_buffer(f, buf, buf_size); -if (ret != buf_size) { -error_report("Failed to read bitmap bits"); -g_free(buf); -return -EIO; +cancel_incoming_locked(s);
[PATCH] aio-posix: avoid reacquiring rcu_read_lock() when polling
The first rcu_read_lock/unlock() is expensive. Nested calls are cheap. This optimization increases IOPS from 73k to 162k with a Linux guest that has 2 virtio-blk,num-queues=1 and 99 virtio-blk,num-queues=32 devices. Signed-off-by: Stefan Hajnoczi --- util/aio-posix.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/util/aio-posix.c b/util/aio-posix.c index a4977f538e..f67f5b34e9 100644 --- a/util/aio-posix.c +++ b/util/aio-posix.c @@ -15,6 +15,7 @@ #include "qemu/osdep.h" #include "block/block.h" +#include "qemu/rcu.h" #include "qemu/rcu_queue.h" #include "qemu/sockets.h" #include "qemu/cutils.h" @@ -514,6 +515,16 @@ static bool run_poll_handlers_once(AioContext *ctx, int64_t *timeout) bool progress = false; AioHandler *node; +/* + * Optimization: ->io_poll() handlers often contain RCU read critical + * sections and we therefore see many rcu_read_lock() -> rcu_read_unlock() + * -> rcu_read_lock() -> ... sequences with expensive memory + * synchronization primitives. Make the entire polling loop an RCU + * critical section because nested rcu_read_lock()/rcu_read_unlock() calls + * are cheap. + */ +RCU_READ_LOCK_GUARD(); + QLIST_FOREACH_RCU(node, &ctx->aio_handlers, node) { if (!node->deleted && node->io_poll && aio_node_check(ctx, node->is_external) && -- 2.24.1
Re: [PATCH RESEND 00/13] trivial: Detect and remove superfluous semicolons in C code
On 18/02/20 10:43, Philippe Mathieu-Daudé wrote: > Luc noticed a superfluous trailing semicolon: > https://lists.gnu.org/archive/html/qemu-devel/2020-02/msg04593.html > > Prevent that by modifying checkpatch.pl and clean the codebase. > > Philippe Mathieu-Daudé (13): > scripts/checkpatch.pl: Detect superfluous semicolon in C code > audio/alsaaudio: Remove superfluous semicolons > block: Remove superfluous semicolons > block/io_uring: Remove superfluous semicolon > hw/arm/xlnx-versal: Remove superfluous semicolon > hw/m68k/next-cube: Remove superfluous semicolon > hw/scsi/esp: Remove superfluous semicolon > hw/vfio/display: Remove superfluous semicolon > migration/multifd: Remove superfluous semicolon > ui/input-barrier: Remove superfluous semicolon > target/i386/whpx: Remove superfluous semicolon > tests/qtest/libqos/qgraph: Remove superfluous semicolons > contrib/rdmacm-mux: Remove superfluous semicolon > > audio/alsaaudio.c | 4 ++-- > block.c | 4 ++-- > block/io_uring.c| 2 +- > contrib/rdmacm-mux/main.c | 2 +- > hw/arm/xlnx-versal-virt.c | 2 +- > hw/m68k/next-cube.c | 2 +- > hw/scsi/esp.c | 2 +- > hw/vfio/display.c | 2 +- > migration/multifd.c | 2 +- > target/i386/whpx-all.c | 2 +- > tests/qtest/libqos/qgraph.c | 4 ++-- > ui/input-barrier.c | 2 +- > scripts/checkpatch.pl | 5 + > 13 files changed, 20 insertions(+), 15 deletions(-) > Acked-by: Paolo Bonzini Laurent, can you queue this in qemu-trivial?
Re: [PATCH v2 08/22] migration/block-dirty-bitmap: keep bitmap state for all bitmaps
On 17/02/2020 18:02, Vladimir Sementsov-Ogievskiy wrote: Keep bitmap state for disabled bitmaps too. Keep the state until the end of the process. It's needed for the following commit to implement bitmap postcopy canceling. To clean-up the new list the following logic is used: We need two events to consider bitmap migration finished: 1. chunk with DIRTY_BITMAP_MIG_FLAG_COMPLETE flag should be received 2. dirty_bitmap_mig_before_vm_start should be called These two events may come in any order, so we understand which one is last, and on the last of them we remove bitmap migration state from the list. Signed-off-by: Vladimir Sementsov-Ogievskiy --- migration/block-dirty-bitmap.c | 64 +++--- 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index 9cc750d93b..1329db8d7d 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -132,6 +132,7 @@ typedef struct LoadBitmapState { BlockDriverState *bs; BdrvDirtyBitmap *bitmap; bool migrated; +bool enabled; } LoadBitmapState; /* State of the dirty bitmap migration (DBM) during load process */ @@ -142,8 +143,10 @@ typedef struct DBMLoadState { BlockDriverState *bs; BdrvDirtyBitmap *bitmap; -GSList *enabled_bitmaps; -QemuMutex lock; /* protect enabled_bitmaps */ +bool before_vm_start_handled; /* set in dirty_bitmap_mig_before_vm_start */ + +GSList *bitmaps; +QemuMutex lock; /* protect bitmaps */ } DBMLoadState; typedef struct DBMState { @@ -458,6 +461,7 @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s) Error *local_err = NULL; uint32_t granularity = qemu_get_be32(f); uint8_t flags = qemu_get_byte(f); +LoadBitmapState *b; if (s->bitmap) { error_report("Bitmap with the same name ('%s') already exists on " @@ -484,45 +488,59 @@ static int dirty_bitmap_load_start(QEMUFile *f, DBMLoadState *s) bdrv_disable_dirty_bitmap(s->bitmap); if (flags & DIRTY_BITMAP_MIG_START_FLAG_ENABLED) { -LoadBitmapState *b; - bdrv_dirty_bitmap_create_successor(s->bitmap, &local_err); if (local_err) { error_report_err(local_err); return -EINVAL; } - -b = g_new(LoadBitmapState, 1); -b->bs = s->bs; -b->bitmap = s->bitmap; -b->migrated = false; -s->enabled_bitmaps = g_slist_prepend(s->enabled_bitmaps, b); } +b = g_new(LoadBitmapState, 1); +b->bs = s->bs; +b->bitmap = s->bitmap; +b->migrated = false; +b->enabled = flags & DIRTY_BITMAP_MIG_START_FLAG_ENABLED, + +s->bitmaps = g_slist_prepend(s->bitmaps, b); + return 0; } -void dirty_bitmap_mig_before_vm_start(void) +/* + * before_vm_start_handle_item + * + * g_slist_foreach helper + * + * item is LoadBitmapState* + * opaque is DBMLoadState* + */ +static void before_vm_start_handle_item(void *item, void *opaque) { -DBMLoadState *s = &dbm_state.load; -GSList *item; - -qemu_mutex_lock(&s->lock); - -for (item = s->enabled_bitmaps; item; item = g_slist_next(item)) { -LoadBitmapState *b = item->data; +DBMLoadState *s = opaque; +LoadBitmapState *b = item; +if (b->enabled) { if (b->migrated) { bdrv_enable_dirty_bitmap(b->bitmap); } else { bdrv_dirty_bitmap_enable_successor(b->bitmap); } +} +if (b->migrated) { +s->bitmaps = g_slist_remove(s->bitmaps, b); g_free(b); } +} -g_slist_free(s->enabled_bitmaps); -s->enabled_bitmaps = NULL; +void dirty_bitmap_mig_before_vm_start(void) +{ +DBMLoadState *s = &dbm_state.load; +qemu_mutex_lock(&s->lock); + +assert(!s->before_vm_start_handled); +g_slist_foreach(s->bitmaps, before_vm_start_handle_item, s); +s->before_vm_start_handled = true; qemu_mutex_unlock(&s->lock); } @@ -539,11 +557,15 @@ static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) bdrv_reclaim_dirty_bitmap(s->bitmap, &error_abort); } -for (item = s->enabled_bitmaps; item; item = g_slist_next(item)) { +for (item = s->bitmaps; item; item = g_slist_next(item)) { LoadBitmapState *b = item->data; if (b->bitmap == s->bitmap) { b->migrated = true; +if (s->before_vm_start_handled) { +s->bitmaps = g_slist_remove(s->bitmaps, b); +g_free(b); +} break; } } Reviewed-by: Andrey Shinkevich -- With the best regards, Andrey Shinkevich
Re: x-blockdev-reopen & block-dirty-bitmaps
Am 18.02.2020 um 16:35 hat Peter Krempa geschrieben: > > This sounds like a case that blockdev-snapshot might be able to solve: > > After the offline copy has completed, you blockdev-add the whole backing > > chain for the target and then use blockdev-snapshot to add the active > > layer (that had 'backing': null) to it. > > Interresting idea! I'll give it a try. If you think that at least trying > blockdev-reopen in this case might be of some value I might want to give > it a try since I have some of the framework prepared now. Anything that tests blockdev-reopen in new cases has some value. :-) Kevin
[PATCH v5 1/4] qapi: Add a 'coroutine' flag for commands
This patch adds a new 'coroutine' flag to QMP command definitions that tells the QMP dispatcher that the command handler is safe to be run in a coroutine. The documentation of the new flag pretends that this flag is already used as intended, which it isn't yet after this patch. We'll implement this in another patch in this series. Signed-off-by: Kevin Wolf Reviewed-by: Marc-André Lureau Reviewed-by: Markus Armbruster --- docs/devel/qapi-code-gen.txt| 11 +++ include/qapi/qmp/dispatch.h | 1 + tests/test-qmp-cmds.c | 4 scripts/qapi/commands.py| 10 +++--- scripts/qapi/doc.py | 2 +- scripts/qapi/expr.py| 10 -- scripts/qapi/introspect.py | 2 +- scripts/qapi/schema.py | 9 ++--- tests/qapi-schema/test-qapi.py | 7 --- tests/Makefile.include | 1 + tests/qapi-schema/oob-coroutine.err | 2 ++ tests/qapi-schema/oob-coroutine.json| 2 ++ tests/qapi-schema/oob-coroutine.out | 0 tests/qapi-schema/qapi-schema-test.json | 1 + tests/qapi-schema/qapi-schema-test.out | 2 ++ 15 files changed, 51 insertions(+), 13 deletions(-) create mode 100644 tests/qapi-schema/oob-coroutine.err create mode 100644 tests/qapi-schema/oob-coroutine.json create mode 100644 tests/qapi-schema/oob-coroutine.out diff --git a/docs/devel/qapi-code-gen.txt b/docs/devel/qapi-code-gen.txt index 59d6973e1e..df01bd852b 100644 --- a/docs/devel/qapi-code-gen.txt +++ b/docs/devel/qapi-code-gen.txt @@ -457,6 +457,7 @@ Syntax: '*gen': false, '*allow-oob': true, '*allow-preconfig': true, +'*coroutine': true, '*if': COND, '*features': FEATURES } @@ -581,6 +582,16 @@ before the machine is built. It defaults to false. For example: QMP is available before the machine is built only when QEMU was started with --preconfig. +Member 'coroutine' tells the QMP dispatcher whether the command handler +is safe to be run in a coroutine. It defaults to false. If it is true, +the command handler is called from coroutine context and may yield while +waiting for an external event (such as I/O completion) in order to avoid +blocking the guest and other background operations. + +It is an error to specify both 'coroutine': true and 'allow-oob': true +for a command. (We don't currently have a use case for both together and +without a use case, it's not entirely clear what the semantics should be.) + The optional 'if' member specifies a conditional. See "Configuring the schema" below for more on this. diff --git a/include/qapi/qmp/dispatch.h b/include/qapi/qmp/dispatch.h index 9aa426a398..d6ce9efc8e 100644 --- a/include/qapi/qmp/dispatch.h +++ b/include/qapi/qmp/dispatch.h @@ -24,6 +24,7 @@ typedef enum QmpCommandOptions QCO_NO_SUCCESS_RESP = (1U << 0), QCO_ALLOW_OOB = (1U << 1), QCO_ALLOW_PRECONFIG = (1U << 2), +QCO_COROUTINE = (1U << 3), } QmpCommandOptions; typedef struct QmpCommand diff --git a/tests/test-qmp-cmds.c b/tests/test-qmp-cmds.c index 79507d9e54..6359cc28c7 100644 --- a/tests/test-qmp-cmds.c +++ b/tests/test-qmp-cmds.c @@ -35,6 +35,10 @@ void qmp_cmd_success_response(Error **errp) { } +void qmp_coroutine_cmd(Error **errp) +{ +} + Empty2 *qmp_user_def_cmd0(Error **errp) { return g_new0(Empty2, 1); diff --git a/scripts/qapi/commands.py b/scripts/qapi/commands.py index afa55b055c..f2f2f8948d 100644 --- a/scripts/qapi/commands.py +++ b/scripts/qapi/commands.py @@ -194,7 +194,8 @@ out: return ret -def gen_register_command(name, success_response, allow_oob, allow_preconfig): +def gen_register_command(name, success_response, allow_oob, allow_preconfig, + coroutine): options = [] if not success_response: @@ -203,6 +204,8 @@ def gen_register_command(name, success_response, allow_oob, allow_preconfig): options += ['QCO_ALLOW_OOB'] if allow_preconfig: options += ['QCO_ALLOW_PRECONFIG'] +if coroutine: +options += ['QCO_COROUTINE'] if not options: options = ['QCO_NO_OPTIONS'] @@ -285,7 +288,7 @@ void %(c_prefix)sqmp_init_marshal(QmpCommandList *cmds); def visit_command(self, name, info, ifcond, arg_type, ret_type, gen, success_response, boxed, allow_oob, allow_preconfig, - features): + coroutine, features): if not gen: return # FIXME: If T is a user-defined type, the user is responsible @@ -303,7 +306,8 @@ void %(c_prefix)sqmp_init_marshal(QmpCommandList *cmds); self._genh.add(gen_marshal_decl(name)) self._genc.add(gen_marshal(name, arg_type, boxed, ret_type)) self._regy.add(gen_register_command(name, success_response, -
[PATCH v5 0/4] qmp: Optionally run handlers in coroutines
Some QMP command handlers can block the main loop for a relatively long time, for example because they perform some I/O. This is quite nasty. Allowing such handlers to run in a coroutine where they can yield (and therefore release the BQL) while waiting for an event such as I/O completion solves the problem. This series adds the infrastructure to allow this and switches block_resize to run in a coroutine as a first example. This is an alternative solution to Marc-André's "monitor: add asynchronous command type" series. v5: - Improved comments and documentation [Markus] v4: - Forbid 'oob': true, 'coroutine': true [Markus] - Removed Python type hints [Markus] - Introduced separate bool qmp_dispatcher_co_shutdown to make it clearer how a shutdown request is signalled to the dispatcher [Markus] - Allow using aio_poll() with iohandler_ctx and use that instead of aio_bh_poll() [Markus] - Removed coroutine_fn from qmp_block_resize() again because at least one caller (HMP) calls it outside of coroutine context [Markus] - Tried to make the synchronisation between dispatcher and the monitor thread clearer, and fixed a race condition - Improved documentation and comments v3: - Fix race between monitor thread and dispatcher that could schedule the dispatcher coroutine twice if a second requests comes in before the dispatcher can wake up [Patchew] v2: - Fix typo in a commit message [Eric] - Use hyphen instead of underscore for the test command [Eric] - Mark qmp_block_resize() as coroutine_fn [Stefan] Kevin Wolf (4): qapi: Add a 'coroutine' flag for commands vl: Initialise main loop earlier qmp: Move dispatcher to a coroutine block: Mark 'block_resize' as coroutine qapi/block-core.json| 3 +- docs/devel/qapi-code-gen.txt| 11 +++ include/qapi/qmp/dispatch.h | 2 + monitor/monitor-internal.h | 6 +- monitor/monitor.c | 55 +-- monitor/qmp.c | 122 ++-- qapi/qmp-dispatch.c | 44 - qapi/qmp-registry.c | 3 + tests/test-qmp-cmds.c | 4 + util/aio-posix.c| 7 +- vl.c| 10 +- scripts/qapi/commands.py| 10 +- scripts/qapi/doc.py | 2 +- scripts/qapi/expr.py| 10 +- scripts/qapi/introspect.py | 2 +- scripts/qapi/schema.py | 9 +- tests/qapi-schema/test-qapi.py | 7 +- tests/Makefile.include | 1 + tests/qapi-schema/oob-coroutine.err | 2 + tests/qapi-schema/oob-coroutine.json| 2 + tests/qapi-schema/oob-coroutine.out | 0 tests/qapi-schema/qapi-schema-test.json | 1 + tests/qapi-schema/qapi-schema-test.out | 2 + 23 files changed, 254 insertions(+), 61 deletions(-) create mode 100644 tests/qapi-schema/oob-coroutine.err create mode 100644 tests/qapi-schema/oob-coroutine.json create mode 100644 tests/qapi-schema/oob-coroutine.out -- 2.20.1
[PATCH v5 3/4] qmp: Move dispatcher to a coroutine
This moves the QMP dispatcher to a coroutine and runs all QMP command handlers that declare 'coroutine': true in coroutine context so they can avoid blocking the main loop while doing I/O or waiting for other events. For commands that are not declared safe to run in a coroutine, the dispatcher drops out of coroutine context by calling the QMP command handler from a bottom half. Signed-off-by: Kevin Wolf --- include/qapi/qmp/dispatch.h | 1 + monitor/monitor-internal.h | 6 +- monitor/monitor.c | 55 +--- monitor/qmp.c | 122 +++- qapi/qmp-dispatch.c | 44 - qapi/qmp-registry.c | 3 + util/aio-posix.c| 7 ++- 7 files changed, 196 insertions(+), 42 deletions(-) diff --git a/include/qapi/qmp/dispatch.h b/include/qapi/qmp/dispatch.h index d6ce9efc8e..6812e49b5f 100644 --- a/include/qapi/qmp/dispatch.h +++ b/include/qapi/qmp/dispatch.h @@ -30,6 +30,7 @@ typedef enum QmpCommandOptions typedef struct QmpCommand { const char *name; +/* Runs in coroutine context if QCO_COROUTINE is set */ QmpCommandFunc *fn; QmpCommandOptions options; QTAILQ_ENTRY(QmpCommand) node; diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h index 3e6baba88f..f8123b151a 100644 --- a/monitor/monitor-internal.h +++ b/monitor/monitor-internal.h @@ -155,7 +155,9 @@ static inline bool monitor_is_qmp(const Monitor *mon) typedef QTAILQ_HEAD(MonitorList, Monitor) MonitorList; extern IOThread *mon_iothread; -extern QEMUBH *qmp_dispatcher_bh; +extern Coroutine *qmp_dispatcher_co; +extern bool qmp_dispatcher_co_shutdown; +extern bool qmp_dispatcher_co_busy; extern QmpCommandList qmp_commands, qmp_cap_negotiation_commands; extern QemuMutex monitor_lock; extern MonitorList mon_list; @@ -173,7 +175,7 @@ void monitor_fdsets_cleanup(void); void qmp_send_response(MonitorQMP *mon, const QDict *rsp); void monitor_data_destroy_qmp(MonitorQMP *mon); -void monitor_qmp_bh_dispatcher(void *data); +void coroutine_fn monitor_qmp_dispatcher_co(void *data); int get_monitor_def(int64_t *pval, const char *name); void help_cmd(Monitor *mon, const char *name); diff --git a/monitor/monitor.c b/monitor/monitor.c index c1a6c4460f..72d57b5cd2 100644 --- a/monitor/monitor.c +++ b/monitor/monitor.c @@ -53,8 +53,32 @@ typedef struct { /* Shared monitor I/O thread */ IOThread *mon_iothread; -/* Bottom half to dispatch the requests received from I/O thread */ -QEMUBH *qmp_dispatcher_bh; +/* Coroutine to dispatch the requests received from I/O thread */ +Coroutine *qmp_dispatcher_co; + +/* Set to true when the dispatcher coroutine should terminate */ +bool qmp_dispatcher_co_shutdown; + +/* + * qmp_dispatcher_co_busy is used for synchronisation between the + * monitor thread and the main thread to ensure that the dispatcher + * coroutine never gets scheduled a second time when it's already + * scheduled (scheduling the same coroutine twice is forbidden). + * + * It is true if the coroutine is active and processing requests. + * Additional requests may then be pushed onto a mon->qmp_requests, + * and @qmp_dispatcher_co_shutdown may be set without further ado. + * @qmp_dispatcher_co_busy must not be woken up in this case. + * + * If false, you also have to set @qmp_dispatcher_co_busy to true and + * wake up @qmp_dispatcher_co after pushing the new requests. + * + * The coroutine will automatically change this variable back to false + * before it yields. Nobody else may set the variable to false. + * + * Access must be atomic for thread safety. + */ +bool qmp_dispatcher_co_busy; /* Protects mon_list, monitor_qapi_event_state, monitor_destroyed. */ QemuMutex monitor_lock; @@ -579,9 +603,24 @@ void monitor_cleanup(void) } qemu_mutex_unlock(&monitor_lock); -/* QEMUBHs needs to be deleted before destroying the I/O thread */ -qemu_bh_delete(qmp_dispatcher_bh); -qmp_dispatcher_bh = NULL; +/* + * The dispatcher needs to stop before destroying the I/O thread. + * + * We need to poll both qemu_aio_context and iohandler_ctx to make + * sure that the dispatcher coroutine keeps making progress and + * eventually terminates. qemu_aio_context is automatically + * polled by calling AIO_WAIT_WHILE on it, but we must poll + * iohandler_ctx manually. + */ +qmp_dispatcher_co_shutdown = true; +if (!atomic_xchg(&qmp_dispatcher_co_busy, true)) { +aio_co_wake(qmp_dispatcher_co); +} + +AIO_WAIT_WHILE(qemu_get_aio_context(), + (aio_poll(iohandler_get_aio_context(), false), +atomic_mb_read(&qmp_dispatcher_co_busy))); + if (mon_iothread) { iothread_destroy(mon_iothread); mon_iothread = NULL; @@ -604,9 +643,9 @@ void monitor_init_globals_core(void) * have commands assuming that context. It would be nice to get * rid of those assumptions. */ -qmp_dispatcher_bh
[PATCH v5 4/4] block: Mark 'block_resize' as coroutine
block_resize is safe to run in a coroutine, and it does some I/O that could potentially take quite some time, so use it as an example for the new 'coroutine': true annotation in the QAPI schema. Signed-off-by: Kevin Wolf Reviewed-by: Marc-André Lureau Reviewed-by: Markus Armbruster --- qapi/block-core.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index 37d7ea7295..077b721d01 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1344,7 +1344,8 @@ { 'command': 'block_resize', 'data': { '*device': 'str', '*node-name': 'str', -'size': 'int' } } +'size': 'int' }, + 'coroutine': true } ## # @NewImageMode: -- 2.20.1
[PATCH v5 2/4] vl: Initialise main loop earlier
We want to be able to use qemu_aio_context in the monitor initialisation. Signed-off-by: Kevin Wolf Reviewed-by: Marc-André Lureau Reviewed-by: Markus Armbruster --- vl.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vl.c b/vl.c index 794f2e5733..98bc51e089 100644 --- a/vl.c +++ b/vl.c @@ -2894,6 +2894,11 @@ int main(int argc, char **argv, char **envp) runstate_init(); precopy_infrastructure_init(); postcopy_infrastructure_init(); + +if (qemu_init_main_loop(&main_loop_err)) { +error_report_err(main_loop_err); +exit(1); +} monitor_init_globals(); if (qcrypto_init(&err) < 0) { @@ -3811,11 +3816,6 @@ int main(int argc, char **argv, char **envp) qemu_unlink_pidfile_notifier.notify = qemu_unlink_pidfile; qemu_add_exit_notifier(&qemu_unlink_pidfile_notifier); -if (qemu_init_main_loop(&main_loop_err)) { -error_report_err(main_loop_err); -exit(1); -} - #ifdef CONFIG_SECCOMP olist = qemu_find_opts_err("sandbox", NULL); if (olist) { -- 2.20.1
Re: x-blockdev-reopen & block-dirty-bitmaps
On Tue, Feb 18, 2020 at 15:25:33 +0100, Kevin Wolf wrote: > Am 18.02.2020 um 13:58 hat Peter Krempa geschrieben: > > On Mon, Feb 17, 2020 at 10:52:31 +0100, Kevin Wolf wrote: > > > Am 14.02.2020 um 21:32 hat John Snow geschrieben: [...] > > Well, while we probably want it to be stable for upstream acceptance > > that didn't prevent me from actually trying to use reopening. It would > > be probably frowned upon if I tried to use it upstream. > > > > The problem is that we'd have to carry the compatibility code for at > > least the two possible names of the command if nothing else changes and > > also the fact that once the command is declared stable, some older > > libvirt versions might not know to use it. > > I think this is exactly the thing we need before we can mark it stable: > Some evidence that it actually provides the functionality that > management tools need. So thanks for giving it a try. Yes, this is the unfortunate circular dependency :). We want to use it only once it's stable and you want some testing for it. Finding a good use case for us is the hardest usually. > > The implementation was surprisingly easy though and works well to reopen > > the backing files in RW mode. The caveat was that the reopen somehow > > still didn't reopen the bitmaps and qemu ended up reporting: > > > > libvirt-1-format: Failed to make dirty bitmaps writable: Cannot update > > bitmap directory: Bad file descriptor > > > > So unfortunately it didn't work out for that scenario. > > I'm not completely sure, but this sounds a bit like a reopen bug in the > file-posix driver to me, where we keep using the old file descriptor > somewhere? > > Someone (TM) should turn this into a qemu-iotests case and then we can > debug it. I'm not sure I'd be very helpful in turning it into a test but I can provide the (rough) steps if that will be helpful: The images both had some bitmaps already present and active: "dirty-bitmaps": [ { "name": "b", "recording": true, "persistent": true, "busy": false, "status": "active", "granularity": 65536, "count": 0 } ], I've started qemu with: -blockdev '{"driver":"file","filename":"/tmp/copy4.qcow2","node-name":"libvirt-2-storage","auto-read-only":true,"discard":"unmap"}' \ -blockdev '{"node-name":"libvirt-2-format","read-only":true,"driver":"qcow2","file":"libvirt-2-storage","backing":null}' \ -blockdev '{"driver":"file","filename":"/tmp/copy4.1582023995","node-name":"libvirt-1-storage","auto-read-only":true,"discard":"unmap"}' \ -blockdev '{"node-name":"libvirt-1-format","read-only":false,"driver":"qcow2","file":"libvirt-1-storage","backing":"libvirt-2-format"}' \ -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0xa,drive=libvirt-1-format,id=virtio-disk0 \ Tried to reopen the backing image: {"execute":"x-blockdev-reopen","arguments":{"node-name":"libvirt-2-format","read-only":false,"driver":"qcow2","file":"libvirt- 2-storage","backing":null},"id":"libvirt-370"} I suspect that at that point this was printed to stderr (but I don't have timestamps in the log): libvirt-2-format: Failed to make dirty bitmaps writable: Cannot update bitmap directory: Bad file descriptor I then tried to remove the bitmaps but that failed: {"execute":"transaction","arguments":{"actions":[{"type":"block-dirty-bitmap-remove","data":{"node":"libvirt-1-format","name": "b"}},{"type":"block-dirty-bitmap-remove","data":{"node":"libvirt-2-format","name":"b"}}]},"id":"libvirt-373"} > > > > > > Also I'm afraid I have another use case for it: > > > > oVirt when doing their 'live storage migration' actually uses libvirt to > > mirror only the top layer in shallow mode and copies everything else > > while the mirror is running using qemu-img. > > > > Prior to libvirt's use of -blockdev this worked well, because qemu > > reopened the mirror destination (which caused to open the backing files) > > only at the end. With -blockdev we have to open the backing files right > > away so that they can be properly installed as backing of the image > > being mirrored and oVirt's qemu-img instance gets a locking error as the > > images are actually opened for reading already. > > > > I'm afraid that we won't be able to restore the previous semantics > > without actually opening the backing files after the copy is > > synchronized before completing the job and then installing them as the > > backing via blockdev-reopen. > > > > Libvirt's documentation was partially alibistic [1] and asked the user to > > actually provide a working image but oVirt actually exploited the qemu > > behaviour to allow folding the two operations together. > > > > [1] https://libvirt.org/html/libvirt-libvirt-domain.html#virDomainBlockCopy > > This sounds like a case that blockdev-snapshot might be able to solve: > After the offline copy has completed, you blockdev-add the whole backing > chain for the target and then use blockd
Re: [PATCH v4 3/4] qmp: Move dispatcher to a coroutine
Am 18.02.2020 um 15:12 hat Markus Armbruster geschrieben: > >> Regarding calling monitor_qmp_requests_pop_any_with_lock(): it needs > >> @monitor_lock and @mon_list to be valid. We just initialized > >> @monitor_lock, and @mon_list is empty. > >> monitor_qmp_requests_pop_any_with_lock() should be safe and return null. > >> monitor_qmp_dispatcher_co() should also be safe and yield without doing > >> work. > >> > >> Can we exploit that to make things a bit simpler? Separate patch would > >> be fine with me. > > > > If this is true, we could replace this line: > > > > aio_co_schedule(iohandler_get_aio_context(), qmp_dispatcher_co); > > > > with the following one: > > > > qemu_aio_coroutine_enter(iohandler_get_aio_context(), > > qmp_dispatcher_co); > > > > I'm not sure that this is any simpler. > > Naive question: what's the difference between "scheduling", "entering", > and "waking up" a coroutine? > > qemu_coroutine_enter() and qemu_aio_coroutine_enter() are in > coroutine.h. These are the low-level functions that just enter the coroutine (i.e. do a stack switch and jump to coroutine code) immediately when they are called. They must be called in the right thread with the right AioContext locks held. (What "right" means depends on the code run in the coroutine.) > aio_co_schedule(), aio_co_wake() and aio_co_enter() are in aio.h. aio_co_schedule() never enters the coroutine directly. It only adds it to the list of scheduled coroutines and schedules a BH in the target AioContext. This BH in turn will actually enter the coroutine. aio_co_enter() enters the coroutine immediately if it's being called outside of coroutine context and in the right thread for the given AioContext. If it's in the right thread, but in coroutine context then it will queue the given coroutine so that it runs whenever the current coroutine yields. If it's in the wrong thread, it uses aio_co_schedule() to get it run in the right thread. aio_co_wake() takes just the coroutine as a parameter and calls aio_co_enter() with the context in which the coroutine was last run. All of these functions make sure that the AioContext lock is taken when the coroutine is entered and that they run in the right thread. > qemu_coroutine_enter() calls qemu_aio_coroutine_enter(). > > aio_co_wake() calls aio_co_enter() calls qemu_aio_coroutine_enter(). > > aio_co_enter() seems to be independent. It's not. It calls either aio_co_schedule() or qemu_aio_coroutine_enter(), or inserts the coroutine ina queue that is processed in qemu_aio_coroutine_enter(). > aio.h seems to be layered on top of coroutine.h. Should I prefer using > aio.h to coroutine.h? In the common case, using the aio.h functions is preferable because they just do "the right thing" without requiring as much thinking as the low-level functions. > >> > } > >> > > >> > QemuOptsList qemu_mon_opts = { > >> > diff --git a/monitor/qmp.c b/monitor/qmp.c > >> > index 54c06ba824..9444de9fcf 100644 > >> > --- a/monitor/qmp.c > >> > +++ b/monitor/qmp.c > >> > @@ -133,6 +133,10 @@ static void monitor_qmp_respond(MonitorQMP *mon, > >> > QDict *rsp) > >> > } > >> > } > >> > > >> > +/* > >> > + * Runs outside of coroutine context for OOB commands, but in coroutine > >> > context > >> > + * for everything else. > >> > + */ > >> > >> Nitpick: wrap around column 70, please. > >> > >> Note to self: the precondition is asserted in do_qmp_dispatch() below. > >> Asserting here is impractical, because we don't know whether this is an > >> OOB command. > >> > >> > static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req) > >> > { > >> > Monitor *old_mon; > >> > @@ -211,43 +215,87 @@ static QMPRequest > >> > *monitor_qmp_requests_pop_any_with_lock(void) > >> > return req_obj; > >> > } > >> > > >> > -void monitor_qmp_bh_dispatcher(void *data) > >> > +void coroutine_fn monitor_qmp_dispatcher_co(void *data) > >> > { > >> > -QMPRequest *req_obj = monitor_qmp_requests_pop_any_with_lock(); > >> > +QMPRequest *req_obj = NULL; > >> > QDict *rsp; > >> > bool need_resume; > >> > MonitorQMP *mon; > >> > > >> > -if (!req_obj) { > >> > -return; > >> > -} > >> > +while (true) { > >> > +assert(atomic_mb_read(&qmp_dispatcher_co_busy) == true); > >> > >> Read and assert, then ... > >> > >> > + > >> > +/* Mark the dispatcher as not busy already here so that we > >> > don't miss > >> > + * any new requests coming in the middle of our processing. */ > >> > +atomic_mb_set(&qmp_dispatcher_co_busy, false); > >> > >> ... set. Would exchange, then assert be cleaner? > > > > Then you would ask me why the exchange has to be atomic. :-) > > Possibly :) > > > More practically, I would need a temporary variable so that I don't get > > code with side effects in assert() (which may be compiled out with > > NDEBUG). The temporary variable would never be read outside the assert > > and would be unused with NDE
Re: [PATCH RESEND 01/13] scripts/checkpatch.pl: Detect superfluous semicolon in C code
On 2/18/20 10:43 AM, Philippe Mathieu-Daudé wrote: > Display error when a commit contains superfluous semicolon: > > $ git show 6663a0a3376 | scripts/checkpatch.pl -q - > ERROR: superfluous trailing semicolon > #276: FILE: block/io_uring.c:186: > +ret = -ENOSPC;; > total: 1 errors, 1 warnings, 485 lines checked > > Reported-by: Luc Michel > Signed-off-by: Philippe Mathieu-Daudé Reviewed-by: Luc Michel > --- > Cc: Paolo Bonzini > --- > scripts/checkpatch.pl | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl > index ce43a306f8..11512a8a09 100755 > --- a/scripts/checkpatch.pl > +++ b/scripts/checkpatch.pl > @@ -1830,6 +1830,11 @@ sub process { > ERROR("suspicious ; after while (0)\n" . $herecurr); > } > > +# Check superfluous trailing ';' > + if ($line =~ /;;$/) { > + ERROR("superfluous trailing semicolon\n" . $herecurr); > + } > + > # Check relative indent for conditionals and blocks. > if ($line =~ /\b(?:(?:if|while|for)\s*\(|do\b)/ && $line !~ > /^.\s*#/ && $line !~ /\}\s*while\s*/) { > my ($s, $c) = ($stat, $cond); >
Re: x-blockdev-reopen & block-dirty-bitmaps
Am 18.02.2020 um 13:58 hat Peter Krempa geschrieben: > On Mon, Feb 17, 2020 at 10:52:31 +0100, Kevin Wolf wrote: > > Am 14.02.2020 um 21:32 hat John Snow geschrieben: > > > On 2/14/20 3:19 PM, Kevin Wolf wrote: > > > > Am 14.02.2020 um 19:54 hat John Snow geschrieben: > > > >> Hi, what work remains to make this a stable interface, is it known? > > > >> > > > >> We're having a problem with bitmaps where in some cases libvirt wants > > > >> to > > > >> commit an image back down to a base image but -- for various reasons -- > > > >> the bitmap was left enabled in the backing image, so it would accrue > > > >> new > > > >> writes during the commit. > > > >> > > > >> Normally, when creating a snapshot using blockdev-snapshot, the backing > > > >> file becomes RO and all of the bitmaps become RO too. > > > >> > > > >> The goal is to be able to disable (or enable) bitmaps from a backing > > > >> file before (or atomically just before) a commit operation to allow > > > >> libvirt greater control on snapshot commit. > > > >> > > > >> Now, in my own testing, we can reopen a backing file just fine, delete > > > >> or disable a bitmap and be done with it -- but the interface isn't > > > >> stable, so libvirt will be reluctant to use such tricks. > > Well, while we probably want it to be stable for upstream acceptance > that didn't prevent me from actually trying to use reopening. It would > be probably frowned upon if I tried to use it upstream. > > The problem is that we'd have to carry the compatibility code for at > least the two possible names of the command if nothing else changes and > also the fact that once the command is declared stable, some older > libvirt versions might not know to use it. I think this is exactly the thing we need before we can mark it stable: Some evidence that it actually provides the functionality that management tools need. So thanks for giving it a try. > The implementation was surprisingly easy though and works well to reopen > the backing files in RW mode. The caveat was that the reopen somehow > still didn't reopen the bitmaps and qemu ended up reporting: > > libvirt-1-format: Failed to make dirty bitmaps writable: Cannot update bitmap > directory: Bad file descriptor > > So unfortunately it didn't work out for that scenario. I'm not completely sure, but this sounds a bit like a reopen bug in the file-posix driver to me, where we keep using the old file descriptor somewhere? Someone (TM) should turn this into a qemu-iotests case and then we can debug it. > > > Also I'm afraid I have another use case for it: > > oVirt when doing their 'live storage migration' actually uses libvirt to > mirror only the top layer in shallow mode and copies everything else > while the mirror is running using qemu-img. > > Prior to libvirt's use of -blockdev this worked well, because qemu > reopened the mirror destination (which caused to open the backing files) > only at the end. With -blockdev we have to open the backing files right > away so that they can be properly installed as backing of the image > being mirrored and oVirt's qemu-img instance gets a locking error as the > images are actually opened for reading already. > > I'm afraid that we won't be able to restore the previous semantics > without actually opening the backing files after the copy is > synchronized before completing the job and then installing them as the > backing via blockdev-reopen. > > Libvirt's documentation was partially alibistic [1] and asked the user to > actually provide a working image but oVirt actually exploited the qemu > behaviour to allow folding the two operations together. > > [1] https://libvirt.org/html/libvirt-libvirt-domain.html#virDomainBlockCopy This sounds like a case that blockdev-snapshot might be able to solve: After the offline copy has completed, you blockdev-add the whole backing chain for the target and then use blockdev-snapshot to add the active layer (that had 'backing': null) to it. > > > >> > > > >> Probably a loaded question, but: > > > >> > > > >> - What's needed to make the interface stable? > > > >> - Are there known problem points? > > > >> - Any suggestions for workarounds in the meantime? > > > > > > > > I think I've asked this before, but I don't remember the answer... > > > > > > > > What would be the problem with letting the enable/disable command > > > > temporarily reopen the backing file read-write, like the commit job [1] > > > > does? > > > > > > > > > > I guess no problem? I wouldn't want it to do this automatically, but > > > perhaps we could add a "force=True" bool where it tries to do just this. > > > > > > (And once reopen works properly we could deprecate this workaround again.) > > > > I'm not sure if I would view this only as a workaround, but if you like > > it better with force=true, I won't object either. > > I'm wondering whether adding a feature deprecating it soon is even worth > doing. From libvirt's point of view it would be comparable
Re: [PATCH v2 07/22] migration/block-dirty-bitmap: simplify dirty_bitmap_load_complete
On 17/02/2020 18:02, Vladimir Sementsov-Ogievskiy wrote: bdrv_enable_dirty_bitmap_locked() call does nothing, as if we are in postcopy, bitmap successor must be enabled, and reclaim operation will enable the bitmap. So, actually we need just call _reclaim_ in both if branches, and making differences only to add an assertion seems not really good. The logic becomes simple: on load complete we do reclaim and that's all. Signed-off-by: Vladimir Sementsov-Ogievskiy --- migration/block-dirty-bitmap.c | 25 - 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index 440c41cfca..9cc750d93b 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -535,6 +535,10 @@ static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) qemu_mutex_lock(&s->lock); +if (bdrv_dirty_bitmap_has_successor(s->bitmap)) { What about making it sure? assert(!s->bitmap->successor->disabled); +bdrv_reclaim_dirty_bitmap(s->bitmap, &error_abort); +} + for (item = s->enabled_bitmaps; item; item = g_slist_next(item)) { LoadBitmapState *b = item->data; @@ -544,27 +548,6 @@ static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) } } -if (bdrv_dirty_bitmap_has_successor(s->bitmap)) { -bdrv_dirty_bitmap_lock(s->bitmap); -if (s->enabled_bitmaps == NULL) { -/* in postcopy */ -bdrv_reclaim_dirty_bitmap_locked(s->bitmap, &error_abort); -bdrv_enable_dirty_bitmap_locked(s->bitmap); -} else { -/* target not started, successor must be empty */ -int64_t count = bdrv_get_dirty_count(s->bitmap); -BdrvDirtyBitmap *ret = bdrv_reclaim_dirty_bitmap_locked(s->bitmap, -NULL); -/* bdrv_reclaim_dirty_bitmap can fail only on no successor (it - * must be) or on merge fail, but merge can't fail when second - * bitmap is empty - */ -assert(ret == s->bitmap && - count == bdrv_get_dirty_count(s->bitmap)); -} -bdrv_dirty_bitmap_unlock(s->bitmap); -} - qemu_mutex_unlock(&s->lock); } Reviewed-by: Andrey Shinkevich -- With the best regards, Andrey Shinkevich
[PULL 35/36] iotests: Add tests for invalid Quorum @replaces
From: Max Reitz Add two tests to see that you cannot replace a Quorum child with the mirror job while the child is in use by a different parent. Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-19-mre...@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 70 +- tests/qemu-iotests/041.out | 4 +-- 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index 1d9e64ff6d..7af6de9124 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -20,6 +20,7 @@ import time import os +import re import iotests from iotests import qemu_img, qemu_io @@ -34,6 +35,8 @@ quorum_img3 = os.path.join(iotests.test_dir, 'quorum3.img') quorum_repair_img = os.path.join(iotests.test_dir, 'quorum_repair.img') quorum_snapshot_file = os.path.join(iotests.test_dir, 'quorum_snapshot.img') +nbd_sock_path = os.path.join(iotests.test_dir, 'nbd.sock') + class TestSingleDrive(iotests.QMPTestCase): image_len = 1 * 1024 * 1024 # MB qmp_cmd = 'drive-mirror' @@ -892,7 +895,8 @@ class TestRepairQuorum(iotests.QMPTestCase): def tearDown(self): self.vm.shutdown() -for i in self.IMAGES + [ quorum_repair_img, quorum_snapshot_file ]: +for i in self.IMAGES + [ quorum_repair_img, quorum_snapshot_file, + nbd_sock_path ]: # Do a try/except because the test may have deleted some images try: os.remove(i) @@ -1032,6 +1036,70 @@ class TestRepairQuorum(iotests.QMPTestCase): self.assert_has_block_node("repair0", quorum_repair_img) self.vm.assert_block_path('quorum0', '/children.1', 'repair0') +def test_with_other_parent(self): +""" +Check that we cannot replace a Quorum child when it has other +parents. +""" +result = self.vm.qmp('nbd-server-start', + addr={ + 'type': 'unix', + 'data': {'path': nbd_sock_path} + }) +self.assert_qmp(result, 'return', {}) + +result = self.vm.qmp('nbd-server-add', device='img1') +self.assert_qmp(result, 'return', {}) + +result = self.vm.qmp('drive-mirror', job_id='mirror', device='quorum0', + sync='full', node_name='repair0', replaces='img1', + target=quorum_repair_img, format=iotests.imgfmt) +self.assert_qmp(result, 'error/desc', +"Cannot replace 'img1' by a node mirrored from " +"'quorum0', because it cannot be guaranteed that doing " +"so would not lead to an abrupt change of visible data") + +def test_with_other_parents_after_mirror_start(self): +""" +The same as test_with_other_parent(), but add the NBD server +only when the mirror job is already running. +""" +result = self.vm.qmp('nbd-server-start', + addr={ + 'type': 'unix', + 'data': {'path': nbd_sock_path} + }) +self.assert_qmp(result, 'return', {}) + +result = self.vm.qmp('drive-mirror', job_id='mirror', device='quorum0', + sync='full', node_name='repair0', replaces='img1', + target=quorum_repair_img, format=iotests.imgfmt) +self.assert_qmp(result, 'return', {}) + +result = self.vm.qmp('nbd-server-add', device='img1') +self.assert_qmp(result, 'return', {}) + +# The full error message goes to stderr, we will check it later +self.complete_and_wait('mirror', + completion_error='Operation not permitted') + +# Should not have been replaced +self.vm.assert_block_path('quorum0', '/children.1', 'img1') + +# Check the full error message now +self.vm.shutdown() +log = self.vm.get_log() +log = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', log) +log = re.sub(r'^Formatting.*\n', '', log) +log = re.sub(r'\n\[I \+\d+\.\d+\] CLOSED\n?$', '', log) +log = re.sub(r'^%s: ' % os.path.basename(iotests.qemu_prog), '', log) + +self.assertEqual(log, + "Can no longer replace 'img1' by 'repair0', because " + + "it can no longer be guaranteed that doing so would " + + "not lead to an abrupt change of visible data") + + # Test mirroring with a source that does not have any parents (not even a # BlockBackend) class TestOrphanedSource(iotests.QMPTestCase): diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out index f496be9197..ffc779b4d1 100644 --- a/tests/qemu-iotests/041.out +++ b/t
Re: [PATCH v4 3/4] qmp: Move dispatcher to a coroutine
Never been closer... Kevin Wolf writes: > Am 17.02.2020 um 12:08 hat Markus Armbruster geschrieben: >> This is the hairy one. Please bear with me while I try to grok it. >> >> Kevin Wolf writes: >> >> > This moves the QMP dispatcher to a coroutine and runs all QMP command >> > handlers that declare 'coroutine': true in coroutine context so they >> > can avoid blocking the main loop while doing I/O or waiting for other >> > events. >> > >> > For commands that are not declared safe to run in a coroutine, the >> > dispatcher drops out of coroutine context by calling the QMP command >> > handler from a bottom half. >> > >> > Signed-off-by: Kevin Wolf >> > --- >> > include/qapi/qmp/dispatch.h | 1 + >> > monitor/monitor-internal.h | 6 +- >> > monitor/monitor.c | 33 --- >> > monitor/qmp.c | 110 ++-- >> > qapi/qmp-dispatch.c | 44 ++- >> > qapi/qmp-registry.c | 3 + >> > util/aio-posix.c| 7 ++- >> > 7 files changed, 162 insertions(+), 42 deletions(-) >> > >> > diff --git a/include/qapi/qmp/dispatch.h b/include/qapi/qmp/dispatch.h >> > index d6ce9efc8e..6812e49b5f 100644 >> > --- a/include/qapi/qmp/dispatch.h >> > +++ b/include/qapi/qmp/dispatch.h >> > @@ -30,6 +30,7 @@ typedef enum QmpCommandOptions >> > typedef struct QmpCommand >> > { >> > const char *name; >> > +/* Runs in coroutine context if QCO_COROUTINE is set */ >> > QmpCommandFunc *fn; >> > QmpCommandOptions options; >> > QTAILQ_ENTRY(QmpCommand) node; >> > diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h >> > index d78f5ca190..f180d03368 100644 >> > --- a/monitor/monitor-internal.h >> > +++ b/monitor/monitor-internal.h >> > @@ -154,7 +154,9 @@ static inline bool monitor_is_qmp(const Monitor *mon) >> > >> > typedef QTAILQ_HEAD(MonitorList, Monitor) MonitorList; >> > extern IOThread *mon_iothread; >> > -extern QEMUBH *qmp_dispatcher_bh; >> > +extern Coroutine *qmp_dispatcher_co; >> > +extern bool qmp_dispatcher_co_shutdown; >> > +extern bool qmp_dispatcher_co_busy; >> > extern QmpCommandList qmp_commands, qmp_cap_negotiation_commands; >> > extern QemuMutex monitor_lock; >> > extern MonitorList mon_list; >> > @@ -172,7 +174,7 @@ void monitor_fdsets_cleanup(void); >> > >> > void qmp_send_response(MonitorQMP *mon, const QDict *rsp); >> > void monitor_data_destroy_qmp(MonitorQMP *mon); >> > -void monitor_qmp_bh_dispatcher(void *data); >> > +void coroutine_fn monitor_qmp_dispatcher_co(void *data); >> > >> > int get_monitor_def(int64_t *pval, const char *name); >> > void help_cmd(Monitor *mon, const char *name); >> > diff --git a/monitor/monitor.c b/monitor/monitor.c >> > index 12898b6448..e753fa435d 100644 >> > --- a/monitor/monitor.c >> > +++ b/monitor/monitor.c >> > @@ -53,8 +53,18 @@ typedef struct { >> > /* Shared monitor I/O thread */ >> > IOThread *mon_iothread; >> > >> > -/* Bottom half to dispatch the requests received from I/O thread */ >> > -QEMUBH *qmp_dispatcher_bh; >> > +/* Coroutine to dispatch the requests received from I/O thread */ >> > +Coroutine *qmp_dispatcher_co; >> > + >> > +/* Set to true when the dispatcher coroutine should terminate */ >> > +bool qmp_dispatcher_co_shutdown; >> > + >> > +/* >> > + * true if the coroutine is active and processing requests. The coroutine >> > may >> > + * only be woken up externally (e.g. from the monitor thread) after >> > changing >> > + * qmp_dispatcher_co_busy from false to true (e.g. using atomic_xchg). >> > + */ >> >> I'm not sure what you mean by "externally". > > By anyone outside the coroutine itself. Maybe just dropping the word > "externally" avoids the confusion because it's an implementation detail > that the coroutine can schedule itself while it is marked busy. Let's do that. For me, a coroutine scheduling itself is not covered by "woken up". >> Also mention how it changes from true to false? > > Somethin like: "The coroutine will automatically change it back to false > after processing all pending requests"? Works for me. More below. >> Note to self: monitor_qmp_dispatcher_co() checks busy is true on resume. >> >> Nitpick: wrap around column 70, two spaces between sentences for >> consistency with other comments in this file, please. > > Any specific reason why comments (but not code) in this file use a > different text width than everything else in QEMU? My editor is set to > use 80 characters to conform to CODING_STYLE.rst. Legibility. Humans tend to have trouble following long lines with their eyes (I sure do). Typographic manuals suggest to limit columns to roughly 60 characters for exactly that reason[*]. Code is special. It's typically indented, and long identifiers push it further to the right, function arguments in particular. We compromised at 80 columns. Block comments are not code. They are typically not indented much. This one isn't indented at all. Line length without
[PULL 34/36] iotests: Use self.image_len in TestRepairQuorum
From: Max Reitz 041's TestRepairQuorum has its own image_len, no need to refer to TestSingleDrive. (This patch allows commenting out TestSingleDrive to speed up 041 during test testing.) Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-18-mre...@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index 084da6baf3..1d9e64ff6d 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -872,7 +872,7 @@ class TestRepairQuorum(iotests.QMPTestCase): # Add each individual quorum images for i in self.IMAGES: qemu_img('create', '-f', iotests.imgfmt, i, - str(TestSingleDrive.image_len)) + str(self.image_len)) # Assign a node name to each quorum image in order to manipulate # them opts = "node-name=img%i" % self.IMAGES.index(i) -- 2.20.1
[PULL 33/36] iotests: Resolve TODOs in 041
From: Max Reitz Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-17-mre...@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index 7b2cf5c2f8..084da6baf3 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -909,8 +909,7 @@ class TestRepairQuorum(iotests.QMPTestCase): self.complete_and_wait(drive="job0") self.assert_has_block_node("repair0", quorum_repair_img) -# TODO: a better test requiring some QEMU infrastructure will be added -# to check that this file is really driven by quorum +self.vm.assert_block_path('quorum0', '/children.1', 'repair0') self.vm.shutdown() self.assertTrue(iotests.compare_images(quorum_img2, quorum_repair_img), 'target image does not match source after mirroring') @@ -1031,8 +1030,7 @@ class TestRepairQuorum(iotests.QMPTestCase): self.complete_and_wait('job0') self.assert_has_block_node("repair0", quorum_repair_img) -# TODO: a better test requiring some QEMU infrastructure will be added -# to check that this file is really driven by quorum +self.vm.assert_block_path('quorum0', '/children.1', 'repair0') # Test mirroring with a source that does not have any parents (not even a # BlockBackend) -- 2.20.1
[PULL 36/36] iotests: Check that @replaces can replace filters
From: Max Reitz Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-20-mre...@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 46 ++ tests/qemu-iotests/041.out | 4 ++-- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index 7af6de9124..5d67bf14bf 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -1190,6 +1190,52 @@ class TestOrphanedSource(iotests.QMPTestCase): self.assertFalse('mirror-filter' in nodes, 'Mirror filter node did not disappear') +# Test cases for @replaces that do not necessarily involve Quorum +class TestReplaces(iotests.QMPTestCase): +# Each of these test cases needs their own block graph, so do not +# create any nodes here +def setUp(self): +self.vm = iotests.VM() +self.vm.launch() + +def tearDown(self): +self.vm.shutdown() +for img in (test_img, target_img): +try: +os.remove(img) +except OSError: +pass + +@iotests.skip_if_unsupported(['copy-on-read']) +def test_replace_filter(self): +""" +Check that we can replace filter nodes. +""" +result = self.vm.qmp('blockdev-add', **{ + 'driver': 'copy-on-read', + 'node-name': 'filter0', + 'file': { + 'driver': 'copy-on-read', + 'node-name': 'filter1', + 'file': { + 'driver': 'null-co' + } + } + }) +self.assert_qmp(result, 'return', {}) + +result = self.vm.qmp('blockdev-add', + node_name='target', driver='null-co') +self.assert_qmp(result, 'return', {}) + +result = self.vm.qmp('blockdev-mirror', job_id='mirror', device='filter0', + target='target', sync='full', replaces='filter1') +self.assert_qmp(result, 'return', {}) + +self.complete_and_wait('mirror') + +self.vm.assert_block_path('filter0', '/file', 'target') + if __name__ == '__main__': iotests.main(supported_fmts=['qcow2', 'qed'], supported_protocols=['file'], diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out index ffc779b4d1..877b76fd31 100644 --- a/tests/qemu-iotests/041.out +++ b/tests/qemu-iotests/041.out @@ -1,5 +1,5 @@ -. +.. -- -Ran 93 tests +Ran 94 tests OK -- 2.20.1
[PULL 32/36] iotests/041: Drop superfluous shutdowns
From: Max Reitz All tearDowns in 041 shutdown the VM. Thus, test cases do not need to do it themselves (unless they need the VM to be down for some post-operation check). Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-16-mre...@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 11 --- 1 file changed, 11 deletions(-) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index aa7d54d968..7b2cf5c2f8 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -80,7 +80,6 @@ class TestSingleDrive(iotests.QMPTestCase): self.cancel_and_wait(force=True) result = self.vm.qmp('query-block') self.assert_qmp(result, 'return[0]/inserted/file', test_img) -self.vm.shutdown() def test_cancel_after_ready(self): self.assert_no_active_block_jobs() @@ -201,8 +200,6 @@ class TestSingleDrive(iotests.QMPTestCase): self.assert_qmp(result, 'return[0]/node-name', 'top') self.assert_qmp(result, 'return[0]/backing/node-name', 'base') -self.vm.shutdown() - def test_medium_not_found(self): if iotests.qemu_default_machine != 'pc': return @@ -455,7 +452,6 @@ new_state = "1" self.assert_qmp(event, 'data/id', 'drive0') self.assert_no_active_block_jobs() -self.vm.shutdown() def test_ignore_read(self): self.assert_no_active_block_jobs() @@ -475,7 +471,6 @@ new_state = "1" result = self.vm.qmp('query-block-jobs') self.assert_qmp(result, 'return[0]/paused', False) self.complete_and_wait() -self.vm.shutdown() def test_large_cluster(self): self.assert_no_active_block_jobs() @@ -540,7 +535,6 @@ new_state = "1" self.complete_and_wait(wait_ready=False) self.assert_no_active_block_jobs() -self.vm.shutdown() class TestWriteErrors(iotests.QMPTestCase): image_len = 2 * 1024 * 1024 # MB @@ -614,7 +608,6 @@ new_state = "1" completed = True self.assert_no_active_block_jobs() -self.vm.shutdown() def test_ignore_write(self): self.assert_no_active_block_jobs() @@ -631,7 +624,6 @@ new_state = "1" result = self.vm.qmp('query-block-jobs') self.assert_qmp(result, 'return[0]/paused', False) self.complete_and_wait() -self.vm.shutdown() def test_stop_write(self): self.assert_no_active_block_jobs() @@ -667,7 +659,6 @@ new_state = "1" self.complete_and_wait(wait_ready=False) self.assert_no_active_block_jobs() -self.vm.shutdown() class TestSetSpeed(iotests.QMPTestCase): image_len = 80 * 1024 * 1024 # MB @@ -936,7 +927,6 @@ class TestRepairQuorum(iotests.QMPTestCase): # here we check that the last registered quorum file has not been # swapped out and unref self.assert_has_block_node(None, quorum_img3) -self.vm.shutdown() def test_cancel_after_ready(self): self.assert_no_active_block_jobs() @@ -1043,7 +1033,6 @@ class TestRepairQuorum(iotests.QMPTestCase): self.assert_has_block_node("repair0", quorum_repair_img) # TODO: a better test requiring some QEMU infrastructure will be added # to check that this file is really driven by quorum -self.vm.shutdown() # Test mirroring with a source that does not have any parents (not even a # BlockBackend) -- 2.20.1
[PULL 31/36] iotests: Add VM.assert_block_path()
From: Max Reitz Signed-off-by: Max Reitz Message-Id: <20200218103454.296704-15-mre...@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/iotests.py | 59 +++ 1 file changed, 59 insertions(+) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 0473e824ed..8815052eb5 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -714,6 +714,65 @@ class VM(qtest.QEMUQtestMachine): return fields.items() <= ret.items() +def assert_block_path(self, root, path, expected_node, graph=None): +""" +Check whether the node under the given path in the block graph +is @expected_node. + +@root is the node name of the node where the @path is rooted. + +@path is a string that consists of child names separated by +slashes. It must begin with a slash. + +Examples for @root + @path: + - root="qcow2-node", path="/backing/file" + - root="quorum-node", path="/children.2/file" + +Hypothetically, @path could be empty, in which case it would +point to @root. However, in practice this case is not useful +and hence not allowed. + +@expected_node may be None. (All elements of the path but the +leaf must still exist.) + +@graph may be None or the result of an x-debug-query-block-graph +call that has already been performed. +""" +if graph is None: +graph = self.qmp('x-debug-query-block-graph')['return'] + +iter_path = iter(path.split('/')) + +# Must start with a / +assert next(iter_path) == '' + +node = next((node for node in graph['nodes'] if node['name'] == root), +None) + +# An empty @path is not allowed, so the root node must be present +assert node is not None, 'Root node %s not found' % root + +for child_name in iter_path: +assert node is not None, 'Cannot follow path %s%s' % (root, path) + +try: +node_id = next(edge['child'] for edge in graph['edges'] \ + if edge['parent'] == node['id'] and +edge['name'] == child_name) + +node = next(node for node in graph['nodes'] \ + if node['id'] == node_id) +except StopIteration: +node = None + +if node is None: +assert expected_node is None, \ + 'No node found under %s (but expected %s)' % \ + (path, expected_node) +else: +assert node['name'] == expected_node, \ + 'Found node %s under %s (but expected %s)' % \ + (node['name'], path, expected_node) index_re = re.compile(r'([^\[]+)\[([^\]]+)\]') -- 2.20.1
[PULL 30/36] iotests: Use complete_and_wait() in 155
From: Max Reitz This way, we get to see errors during the completion phase. Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-14-mre...@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/155 | 7 +-- 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/qemu-iotests/155 b/tests/qemu-iotests/155 index e35b1d534b..f237868710 100755 --- a/tests/qemu-iotests/155 +++ b/tests/qemu-iotests/155 @@ -163,12 +163,7 @@ class MirrorBaseClass(BaseClass): self.assert_qmp(result, 'return', {}) -self.vm.event_wait('BLOCK_JOB_READY') - -result = self.vm.qmp('block-job-complete', device='mirror-job') -self.assert_qmp(result, 'return', {}) - -self.vm.event_wait('BLOCK_JOB_COMPLETED') +self.complete_and_wait('mirror-job') def testFull(self): self.runMirror('full') -- 2.20.1
[PULL 20/36] block: Drop bdrv_is_first_non_filter()
From: Max Reitz It is unused now. (And it was ugly because it needed to explore all BDS chains from the top.) Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-4-mre...@redhat.com> Signed-off-by: Kevin Wolf --- include/block/block.h | 1 - block.c | 26 -- 2 files changed, 27 deletions(-) diff --git a/include/block/block.h b/include/block/block.h index 6cd566324d..6a8462a5bc 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -394,7 +394,6 @@ int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, /* external snapshots */ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, BlockDriverState *candidate); -bool bdrv_is_first_non_filter(BlockDriverState *candidate); /* check if a named node can be replaced when doing drive-mirror */ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, diff --git a/block.c b/block.c index 9db0b973fe..145d0baf5e 100644 --- a/block.c +++ b/block.c @@ -6234,32 +6234,6 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, return false; } -/* This function checks if the candidate is the first non filter bs down it's - * bs chain. Since we don't have pointers to parents it explore all bs chains - * from the top. Some filters can choose not to pass down the recursion. - */ -bool bdrv_is_first_non_filter(BlockDriverState *candidate) -{ -BlockDriverState *bs; -BdrvNextIterator it; - -/* walk down the bs forest recursively */ -for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { -bool perm; - -/* try to recurse in this top level bs */ -perm = bdrv_recurse_is_first_non_filter(bs, candidate); - -/* candidate is the first non filter */ -if (perm) { -bdrv_next_cleanup(&it); -return true; -} -} - -return false; -} - BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, const char *node_name, Error **errp) { -- 2.20.1
[PULL 26/36] block: Use bdrv_recurse_can_replace()
From: Max Reitz Let check_to_replace_node() use the more specialized bdrv_recurse_can_replace() instead of bdrv_recurse_is_first_non_filter(), which is too restrictive (or, in the case of quorum, sometimes not restrictive enough). Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-10-mre...@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 18 -- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index e2fefe5883..80a0806eb0 100644 --- a/block.c +++ b/block.c @@ -6272,6 +6272,17 @@ bool bdrv_recurse_can_replace(BlockDriverState *bs, return false; } +/* + * Check whether the given @node_name can be replaced by a node that + * has the same data as @parent_bs. If so, return @node_name's BDS; + * NULL otherwise. + * + * @node_name must be a (recursive) *child of @parent_bs (or this + * function will return NULL). + * + * The result (whether the node can be replaced or not) is only valid + * for as long as no graph or permission changes occur. + */ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, const char *node_name, Error **errp) { @@ -6296,8 +6307,11 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, * Another benefit is that this tests exclude backing files which are * blocked by the backing blockers. */ -if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) { -error_setg(errp, "Only top most non filter can be replaced"); +if (!bdrv_recurse_can_replace(parent_bs, to_replace_bs)) { +error_setg(errp, "Cannot replace '%s' by a node mirrored from '%s', " + "because it cannot be guaranteed that doing so would not " + "lead to an abrupt change of visible data", + node_name, parent_bs->node_name); to_replace_bs = NULL; goto out; } -- 2.20.1
[PULL 27/36] block: Remove bdrv_recurse_is_first_non_filter()
From: Max Reitz It no longer has any users. Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-11-mre...@redhat.com> Signed-off-by: Kevin Wolf --- include/block/block.h | 4 include/block/block_int.h | 8 block.c | 33 - block/blkverify.c | 15 --- block/copy-on-read.c | 9 - block/filter-compress.c | 9 - block/quorum.c| 18 -- block/replication.c | 7 --- block/throttle.c | 8 9 files changed, 111 deletions(-) diff --git a/include/block/block.h b/include/block/block.h index 6a8462a5bc..314ce63ed9 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -391,10 +391,6 @@ int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, BlockDriverAmendStatusCB *status_cb, void *cb_opaque, Error **errp); -/* external snapshots */ -bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, - BlockDriverState *candidate); - /* check if a named node can be replaced when doing drive-mirror */ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, const char *node_name, Error **errp); diff --git a/include/block/block_int.h b/include/block/block_int.h index eaefac210e..6f9fd5e20e 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -94,14 +94,6 @@ struct BlockDriver { * must implement them and return -ENOTSUP. */ bool is_filter; -/* for snapshots block filter like Quorum can implement the - * following recursive callback. - * It's purpose is to recurse on the filter children while calling - * bdrv_recurse_is_first_non_filter on them. - * For a sample implementation look in the future Quorum block filter. - */ -bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs, - BlockDriverState *candidate); /* * Return true if @to_replace can be replaced by a BDS with the * same data as @bs without it affecting @bs's behavior (that is, diff --git a/block.c b/block.c index 80a0806eb0..946e3c896e 100644 --- a/block.c +++ b/block.c @@ -6201,39 +6201,6 @@ int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts, return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque, errp); } -/* This function will be called by the bdrv_recurse_is_first_non_filter method - * of block filter and by bdrv_is_first_non_filter. - * It is used to test if the given bs is the candidate or recurse more in the - * node graph. - */ -bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, - BlockDriverState *candidate) -{ -/* return false if basic checks fails */ -if (!bs || !bs->drv) { -return false; -} - -/* the code reached a non block filter driver -> check if the bs is - * the same as the candidate. It's the recursion termination condition. - */ -if (!bs->drv->is_filter) { -return bs == candidate; -} -/* Down this path the driver is a block filter driver */ - -/* If the block filter recursion method is defined use it to recurse down - * the node graph. - */ -if (bs->drv->bdrv_recurse_is_first_non_filter) { -return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); -} - -/* the driver is a block filter but don't allow to recurse -> return false - */ -return false; -} - /* * This function checks whether the given @to_replace is allowed to be * replaced by a node that always shows the same data as @bs. This is diff --git a/block/blkverify.c b/block/blkverify.c index 0add3ab483..ba6b1853ae 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -268,20 +268,6 @@ static int blkverify_co_flush(BlockDriverState *bs) return bdrv_co_flush(s->test_file->bs); } -static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs, - BlockDriverState *candidate) -{ -BDRVBlkverifyState *s = bs->opaque; - -bool perm = bdrv_recurse_is_first_non_filter(bs->file->bs, candidate); - -if (perm) { -return true; -} - -return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate); -} - static bool blkverify_recurse_can_replace(BlockDriverState *bs, BlockDriverState *to_replace) { @@ -341,7 +327,6 @@ static BlockDriver bdrv_blkverify = { .bdrv_co_flush= blkverify_co_flush, .is_filter= true, -.bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter, .bdrv_recurse_can_replace = blkverify_recurse_can_replace, }; diff --git a/block/copy-on-read.c b/block/copy-on-read.c ind
[PULL 22/36] quorum: Fix child permissions
From: Max Reitz Quorum cannot share WRITE or RESIZE on its children. Presumably, it only does so because as a filter, it seemed intuitively correct to point its .bdrv_child_perm to bdrv_filter_default_perm(). However, it is not really a filter, and bdrv_filter_default_perm() does not work for it, so we have to provide a custom .bdrv_child_perm implementation. Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-6-mre...@redhat.com> Signed-off-by: Kevin Wolf --- block/quorum.c | 19 ++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/block/quorum.c b/block/quorum.c index df68adcfaa..17b439056f 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -1114,6 +1114,23 @@ static char *quorum_dirname(BlockDriverState *bs, Error **errp) return NULL; } +static void quorum_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ +*nperm = perm & DEFAULT_PERM_PASSTHROUGH; + +/* + * We cannot share RESIZE or WRITE, as this would make the + * children differ from each other. + */ +*nshared = (shared & (BLK_PERM_CONSISTENT_READ | + BLK_PERM_WRITE_UNCHANGED)) + | DEFAULT_PERM_UNCHANGED; +} + static const char *const quorum_strong_runtime_opts[] = { QUORUM_OPT_VOTE_THRESHOLD, QUORUM_OPT_BLKVERIFY, @@ -1143,7 +1160,7 @@ static BlockDriver bdrv_quorum = { .bdrv_add_child = quorum_add_child, .bdrv_del_child = quorum_del_child, -.bdrv_child_perm= bdrv_filter_default_perms, +.bdrv_child_perm= quorum_child_perm, .is_filter = true, .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter, -- 2.20.1
[PULL 21/36] iotests: Let 041 use -blockdev for quorum children
From: Max Reitz Using -drive with default options means that a virtio-blk drive will be created that has write access to the to-be quorum children. Quorum should have exclusive write access to them, so we should use -blockdev instead. Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-5-mre...@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index 43556b9727..aa7d54d968 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -885,7 +885,10 @@ class TestRepairQuorum(iotests.QMPTestCase): # Assign a node name to each quorum image in order to manipulate # them opts = "node-name=img%i" % self.IMAGES.index(i) -self.vm = self.vm.add_drive(i, opts) +opts += ',driver=%s' % iotests.imgfmt +opts += ',file.driver=file' +opts += ',file.filename=%s' % i +self.vm = self.vm.add_blockdev(opts) self.vm.launch() -- 2.20.1
[PULL 28/36] mirror: Double-check immediately before replacing
From: Max Reitz There is no guarantee that we can still replace the node we want to replace at the end of the mirror job. Double-check by calling bdrv_recurse_can_replace(). Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-12-mre...@redhat.com> Signed-off-by: Kevin Wolf --- block/mirror.c | 14 +- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/block/mirror.c b/block/mirror.c index cacbc70014..447051dbc6 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -700,7 +700,19 @@ static int mirror_exit_common(Job *job) * drain potential other users of the BDS before changing the graph. */ assert(s->in_drain); bdrv_drained_begin(target_bs); -bdrv_replace_node(to_replace, target_bs, &local_err); +/* + * Cannot use check_to_replace_node() here, because that would + * check for an op blocker on @to_replace, and we have our own + * there. + */ +if (bdrv_recurse_can_replace(src, to_replace)) { +bdrv_replace_node(to_replace, target_bs, &local_err); +} else { +error_setg(&local_err, "Can no longer replace '%s' by '%s', " + "because it can no longer be guaranteed that doing so " + "would not lead to an abrupt change of visible data", + to_replace->node_name, target_bs->node_name); +} bdrv_drained_end(target_bs); if (local_err) { error_report_err(local_err); -- 2.20.1
[PULL 29/36] quorum: Stop marking it as a filter
From: Max Reitz Quorum is not a filter, for example because it cannot guarantee which of its children will serve the next request. Thus, any of its children may differ from the data visible to quorum's parents. We have other filters with multiple children, but they differ in this aspect: - blkverify quits the whole qemu process if its children differ. As such, we can always skip it when we want to skip it (as a filter node) by going to any of its children. Both have the same data. - replication generally serves requests from bs->file, so this is its only actually filtered child. - Block job filters currently only have one child, but they will probably get more children in the future. Still, they will always have only one actually filtered child. Having "filters" as a dedicated node category only makes sense if you can skip them by going to a one fixed child that always shows the same data as the filter node. Quorum cannot fulfill this, so it is not a filter. Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-13-mre...@redhat.com> Signed-off-by: Kevin Wolf --- block/quorum.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block/quorum.c b/block/quorum.c index f57b0402d9..6d7a56bd93 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -1198,7 +1198,6 @@ static BlockDriver bdrv_quorum = { .bdrv_child_perm= quorum_child_perm, -.is_filter = true, .bdrv_recurse_can_replace = quorum_recurse_can_replace, .strong_runtime_opts= quorum_strong_runtime_opts, -- 2.20.1
[PULL 15/36] iotests: Test error handling policies with block-commit
This tests both read failure (from the top node) and write failure (to the base node) for on-error=report/stop/ignore. As block-commit actually starts two different types of block jobs (mirror.c for committing the active later, commit.c for intermediate layers), all tests are run for both cases. Signed-off-by: Kevin Wolf Message-Id: <20200214200812.28180-8-kw...@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/040 | 283 + tests/qemu-iotests/040.out | 4 +- 2 files changed, 285 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 index 2e7ee0e84f..32c82b4ec6 100755 --- a/tests/qemu-iotests/040 +++ b/tests/qemu-iotests/040 @@ -430,6 +430,289 @@ class TestReopenOverlay(ImageCommitTestCase): def test_reopen_overlay(self): self.run_commit_test(self.img1, self.img0) +class TestErrorHandling(iotests.QMPTestCase): +image_len = 2 * 1024 * 1024 + +def setUp(self): +iotests.create_image(backing_img, self.image_len) +qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img) +qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img) + +qemu_io('-f', iotests.imgfmt, '-c', 'write -P 0x11 0 512k', mid_img) +qemu_io('-f', iotests.imgfmt, '-c', 'write -P 0x22 0 512k', test_img) + +self.vm = iotests.VM() +self.vm.launch() + +self.blkdebug_file = iotests.file_path("blkdebug.conf") + +def tearDown(self): +self.vm.shutdown() +os.remove(test_img) +os.remove(mid_img) +os.remove(backing_img) + +def blockdev_add(self, **kwargs): +result = self.vm.qmp('blockdev-add', **kwargs) +self.assert_qmp(result, 'return', {}) + +def add_block_nodes(self, base_debug=None, mid_debug=None, top_debug=None): +self.blockdev_add(node_name='base-file', driver='file', + filename=backing_img) +self.blockdev_add(node_name='mid-file', driver='file', + filename=mid_img) +self.blockdev_add(node_name='top-file', driver='file', + filename=test_img) + +if base_debug: +self.blockdev_add(node_name='base-dbg', driver='blkdebug', + image='base-file', inject_error=base_debug) +if mid_debug: +self.blockdev_add(node_name='mid-dbg', driver='blkdebug', + image='mid-file', inject_error=mid_debug) +if top_debug: +self.blockdev_add(node_name='top-dbg', driver='blkdebug', + image='top-file', inject_error=top_debug) + +self.blockdev_add(node_name='base-fmt', driver='raw', + file=('base-dbg' if base_debug else 'base-file')) +self.blockdev_add(node_name='mid-fmt', driver=iotests.imgfmt, + file=('mid-dbg' if mid_debug else 'mid-file'), + backing='base-fmt') +self.blockdev_add(node_name='top-fmt', driver=iotests.imgfmt, + file=('top-dbg' if top_debug else 'top-file'), + backing='mid-fmt') + +def run_job(self, expected_events, error_pauses_job=False): +match_device = {'data': {'device': 'job0'}} +events = [ +('BLOCK_JOB_COMPLETED', match_device), +('BLOCK_JOB_CANCELLED', match_device), +('BLOCK_JOB_ERROR', match_device), +('BLOCK_JOB_READY', match_device), +] + +completed = False +log = [] +while not completed: +ev = self.vm.events_wait(events, timeout=5.0) +if ev['event'] == 'BLOCK_JOB_COMPLETED': +completed = True +elif ev['event'] == 'BLOCK_JOB_ERROR': +if error_pauses_job: +result = self.vm.qmp('block-job-resume', device='job0') +self.assert_qmp(result, 'return', {}) +elif ev['event'] == 'BLOCK_JOB_READY': +result = self.vm.qmp('block-job-complete', device='job0') +self.assert_qmp(result, 'return', {}) +else: +self.fail("Unexpected event: %s" % ev) +log.append(iotests.filter_qmp_event(ev)) + +self.maxDiff = None +self.assertEqual(expected_events, log) + +def event_error(self, op, action): +return { +'event': 'BLOCK_JOB_ERROR', +'data': {'action': action, 'device': 'job0', 'operation': op}, +'timestamp': {'microseconds': 'USECS', 'seconds': 'SECS'} +} + +def event_ready(self): +return { +'event': 'BLOCK_JOB_READY', +'data': {'device': 'job0', + 'len': 524288, + 'offset': 524288, + 'speed': 0, + 'type': 'commit'}, +
[PULL 18/36] blockdev: Allow external snapshots everywhere
From: Max Reitz There is no good reason why we would allow external snapshots only on the first non-filter node in a chain. Parent BDSs should not care whether their child is replaced by a snapshot. (If they do care, they should announce that via freezing the chain, which is checked in bdrv_append() through bdrv_set_backing_hd().) Before we had bdrv_is_first_non_filter() here (since 212a5a8f095), there was a special function bdrv_check_ext_snapshot() that allowed snapshots by default, but block drivers could override this. Only blkverify did so, however. It is not clear to me why blkverify would do so; maybe just so that the testee block driver would not be replaced. The introducing commit f6186f49e2c does not explain why. Maybe because 08b24cfe376 would have been the correct solution? (Which adds a .supports_backing check.) Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-2-mre...@redhat.com> Signed-off-by: Kevin Wolf --- blockdev.c | 5 - 1 file changed, 5 deletions(-) diff --git a/blockdev.c b/blockdev.c index 374189a426..d83bb2beda 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1592,11 +1592,6 @@ static void external_snapshot_prepare(BlkActionState *common, } } -if (!bdrv_is_first_non_filter(state->old_bs)) { -error_setg(errp, QERR_FEATURE_DISABLED, "snapshot"); -goto out; -} - if (action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC) { BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync.data; const char *format = s->has_format ? s->format : "qcow2"; -- 2.20.1
[PULL 24/36] blkverify: Implement .bdrv_recurse_can_replace()
From: Max Reitz Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-8-mre...@redhat.com> Signed-off-by: Kevin Wolf --- block/blkverify.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/block/blkverify.c b/block/blkverify.c index 304b0a1368..0add3ab483 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -282,6 +282,20 @@ static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs, return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate); } +static bool blkverify_recurse_can_replace(BlockDriverState *bs, + BlockDriverState *to_replace) +{ +BDRVBlkverifyState *s = bs->opaque; + +/* + * blkverify quits the whole qemu process if there is a mismatch + * between bs->file->bs and s->test_file->bs. Therefore, we know + * know that both must match bs and we can recurse down to either. + */ +return bdrv_recurse_can_replace(bs->file->bs, to_replace) || + bdrv_recurse_can_replace(s->test_file->bs, to_replace); +} + static void blkverify_refresh_filename(BlockDriverState *bs) { BDRVBlkverifyState *s = bs->opaque; @@ -328,6 +342,7 @@ static BlockDriver bdrv_blkverify = { .is_filter= true, .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter, +.bdrv_recurse_can_replace = blkverify_recurse_can_replace, }; static void bdrv_blkverify_init(void) -- 2.20.1
[PULL 25/36] quorum: Implement .bdrv_recurse_can_replace()
From: Max Reitz Signed-off-by: Max Reitz Message-Id: <20200218103454.296704-9-mre...@redhat.com> Signed-off-by: Kevin Wolf --- block/quorum.c | 54 ++ 1 file changed, 54 insertions(+) diff --git a/block/quorum.c b/block/quorum.c index 17b439056f..3ece6e4382 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -813,6 +813,59 @@ static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs, return false; } +static bool quorum_recurse_can_replace(BlockDriverState *bs, + BlockDriverState *to_replace) +{ +BDRVQuorumState *s = bs->opaque; +int i; + +for (i = 0; i < s->num_children; i++) { +/* + * We have no idea whether our children show the same data as + * this node (@bs). It is actually highly likely that + * @to_replace does not, because replacing a broken child is + * one of the main use cases here. + * + * We do know that the new BDS will match @bs, so replacing + * any of our children by it will be safe. It cannot change + * the data this quorum node presents to its parents. + * + * However, replacing @to_replace by @bs in any of our + * children's chains may change visible data somewhere in + * there. We therefore cannot recurse down those chains with + * bdrv_recurse_can_replace(). + * (More formally, bdrv_recurse_can_replace() requires that + * @to_replace will be replaced by something matching the @bs + * passed to it. We cannot guarantee that.) + * + * Thus, we can only check whether any of our immediate + * children matches @to_replace. + * + * (In the future, we might add a function to recurse down a + * chain that checks that nothing there cares about a change + * in data from the respective child in question. For + * example, most filters do not care when their child's data + * suddenly changes, as long as their parents do not care.) + */ +if (s->children[i]->bs == to_replace) { +/* + * We now have to ensure that there is no other parent + * that cares about replacing this child by a node with + * potentially different data. + * We do so by checking whether there are any other parents + * at all, which is stricter than necessary, but also very + * simple. (We may decide to implement something more + * complex and permissive when there is an actual need for + * it.) + */ +return QLIST_FIRST(&to_replace->parents) == s->children[i] && +QLIST_NEXT(s->children[i], next_parent) == NULL; +} +} + +return false; +} + static int quorum_valid_threshold(int threshold, int num_children, Error **errp) { @@ -1164,6 +1217,7 @@ static BlockDriver bdrv_quorum = { .is_filter = true, .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter, +.bdrv_recurse_can_replace = quorum_recurse_can_replace, .strong_runtime_opts= quorum_strong_runtime_opts, }; -- 2.20.1
[PULL 17/36] block/io_uring: Remove superfluous semicolon
From: Philippe Mathieu-Daudé Fixes: 6663a0a3376 Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20200218094402.26625-5-phi...@redhat.com> Signed-off-by: Kevin Wolf --- block/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/io_uring.c b/block/io_uring.c index 56892fd1ab..a3142ca989 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -187,7 +187,7 @@ static void luring_process_completions(LuringState *s) ret = 0; } } else { -ret = -ENOSPC;; +ret = -ENOSPC; } } end: -- 2.20.1
[PULL 23/36] block: Add bdrv_recurse_can_replace()
From: Max Reitz After a couple of follow-up patches, this function will replace bdrv_recurse_is_first_non_filter() in check_to_replace_node(). bdrv_recurse_is_first_non_filter() is both not sufficiently specific for check_to_replace_node() (it allows cases that should not be allowed, like replacing child nodes of quorum with dissenting data that have more parents than just quorum), and it is too restrictive (it is perfectly fine to replace filters). Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-7-mre...@redhat.com> Signed-off-by: Kevin Wolf --- include/block/block_int.h | 10 ++ block.c | 38 ++ 2 files changed, 48 insertions(+) diff --git a/include/block/block_int.h b/include/block/block_int.h index 640fb82c78..eaefac210e 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -102,6 +102,13 @@ struct BlockDriver { */ bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs, BlockDriverState *candidate); +/* + * Return true if @to_replace can be replaced by a BDS with the + * same data as @bs without it affecting @bs's behavior (that is, + * without it being visible to @bs's parents). + */ +bool (*bdrv_recurse_can_replace)(BlockDriverState *bs, + BlockDriverState *to_replace); int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); int (*bdrv_probe_device)(const char *filename); @@ -1263,6 +1270,9 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared); +bool bdrv_recurse_can_replace(BlockDriverState *bs, + BlockDriverState *to_replace); + /* * Default implementation for drivers to pass bdrv_co_block_status() to * their file. diff --git a/block.c b/block.c index 145d0baf5e..e2fefe5883 100644 --- a/block.c +++ b/block.c @@ -6234,6 +6234,44 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, return false; } +/* + * This function checks whether the given @to_replace is allowed to be + * replaced by a node that always shows the same data as @bs. This is + * used for example to verify whether the mirror job can replace + * @to_replace by the target mirrored from @bs. + * To be replaceable, @bs and @to_replace may either be guaranteed to + * always show the same data (because they are only connected through + * filters), or some driver may allow replacing one of its children + * because it can guarantee that this child's data is not visible at + * all (for example, for dissenting quorum children that have no other + * parents). + */ +bool bdrv_recurse_can_replace(BlockDriverState *bs, + BlockDriverState *to_replace) +{ +if (!bs || !bs->drv) { +return false; +} + +if (bs == to_replace) { +return true; +} + +/* See what the driver can do */ +if (bs->drv->bdrv_recurse_can_replace) { +return bs->drv->bdrv_recurse_can_replace(bs, to_replace); +} + +/* For filters without an own implementation, we can recurse on our own */ +if (bs->drv->is_filter) { +BdrvChild *child = bs->file ?: bs->backing; +return bdrv_recurse_can_replace(child->bs, to_replace); +} + +/* Safe default */ +return false; +} + BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, const char *node_name, Error **errp) { -- 2.20.1
[PULL 13/36] commit: Fix is_read for block_job_error_action()
block_job_error_action() needs to know if reading from the top node or writing to the base node failed so that it can set the right 'operation' in the BLOCK_JOB_ERROR QMP event. Signed-off-by: Kevin Wolf Message-Id: <20200214200812.28180-6-kw...@redhat.com> Reviewed-by: Ján Tomko Signed-off-by: Kevin Wolf --- block/commit.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/block/commit.c b/block/commit.c index 2992a1012f..8e672799af 100644 --- a/block/commit.c +++ b/block/commit.c @@ -143,6 +143,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) for (offset = 0; offset < len; offset += n) { bool copy; +bool error_in_source = true; /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that bdrv_drain_all() returns. @@ -162,11 +163,15 @@ static int coroutine_fn commit_run(Job *job, Error **errp) ret = blk_co_pread(s->top, offset, n, buf, 0); if (ret >= 0) { ret = blk_co_pwrite(s->base, offset, n, buf, 0); +if (ret < 0) { +error_in_source = false; +} } } if (ret < 0) { BlockErrorAction action = -block_job_error_action(&s->common, s->on_error, false, -ret); +block_job_error_action(&s->common, s->on_error, + error_in_source, -ret); if (action == BLOCK_ERROR_ACTION_REPORT) { goto out; } else { -- 2.20.1
[PULL 19/36] blockdev: Allow resizing everywhere
From: Max Reitz Block nodes that do not allow resizing should not share BLK_PERM_RESIZE. It does not matter whether they are the first non-filter in their chain or not. Signed-off-by: Max Reitz Reviewed-by: Vladimir Sementsov-Ogievskiy Message-Id: <20200218103454.296704-3-mre...@redhat.com> Signed-off-by: Kevin Wolf --- blockdev.c | 5 - 1 file changed, 5 deletions(-) diff --git a/blockdev.c b/blockdev.c index d83bb2beda..45de0ba37e 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3331,11 +3331,6 @@ void qmp_block_resize(bool has_device, const char *device, aio_context = bdrv_get_aio_context(bs); aio_context_acquire(aio_context); -if (!bdrv_is_first_non_filter(bs)) { -error_setg(errp, QERR_FEATURE_DISABLED, "resize"); -goto out; -} - if (size < 0) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "size", "a >0 size"); goto out; -- 2.20.1
[PULL 14/36] commit: Expose on-error option in QMP
Now that the error handling in the common block job is fixed, we can expose the on-error option in QMP instead of hard-coding it as 'report' in qmp_block_commit(). This fulfills the promise that the old comment in that function made, even if a bit later than expected: "This will be part of the QMP command, if/when the BlockdevOnError change for blkmirror makes it in". Signed-off-by: Kevin Wolf Message-Id: <20200214200812.28180-7-kw...@redhat.com> Reviewed-by: Ján Tomko Signed-off-by: Kevin Wolf --- qapi/block-core.json | 4 blockdev.c | 8 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index 1c32158d11..37d7ea7295 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1658,6 +1658,9 @@ # # @speed: the maximum speed, in bytes per second # +# @on-error: the action to take on an error. 'ignore' means that the request +#should be retried. (default: report; Since: 5.0) +# # @filter-node-name: the node name that should be assigned to the #filter driver that the commit job inserts into the graph #above @top. If this option is not given, a node name is @@ -1694,6 +1697,7 @@ 'data': { '*job-id': 'str', 'device': 'str', '*base-node': 'str', '*base': 'str', '*top-node': 'str', '*top': 'str', '*backing-file': 'str', '*speed': 'int', +'*on-error': 'BlockdevOnError', '*filter-node-name': 'str', '*auto-finalize': 'bool', '*auto-dismiss': 'bool' } } diff --git a/blockdev.c b/blockdev.c index c6a727cca9..374189a426 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3471,6 +3471,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, bool has_top, const char *top, bool has_backing_file, const char *backing_file, bool has_speed, int64_t speed, + bool has_on_error, BlockdevOnError on_error, bool has_filter_node_name, const char *filter_node_name, bool has_auto_finalize, bool auto_finalize, bool has_auto_dismiss, bool auto_dismiss, @@ -3481,15 +3482,14 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device, BlockDriverState *base_bs, *top_bs; AioContext *aio_context; Error *local_err = NULL; -/* This will be part of the QMP command, if/when the - * BlockdevOnError change for blkmirror makes it in - */ -BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT; int job_flags = JOB_DEFAULT; if (!has_speed) { speed = 0; } +if (!has_on_error) { +on_error = BLOCKDEV_ON_ERROR_REPORT; +} if (!has_filter_node_name) { filter_node_name = NULL; } -- 2.20.1
[PULL 10/36] commit: Remove unused bytes_written
The bytes_written variable is only ever written to, it serves no purpose. This has actually been the case since the commit job was first introduced in commit 747ff602636. Signed-off-by: Kevin Wolf Message-Id: <20200214200812.28180-3-kw...@redhat.com> Reviewed-by: Ján Tomko Signed-off-by: Kevin Wolf --- block/commit.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/block/commit.c b/block/commit.c index 23c90b3b91..cce898a4f3 100644 --- a/block/commit.c +++ b/block/commit.c @@ -140,7 +140,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp) int ret = 0; int64_t n = 0; /* bytes */ void *buf = NULL; -int bytes_written = 0; int64_t len, base_len; ret = len = blk_getlength(s->top); @@ -180,7 +179,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp) trace_commit_one_iteration(s, offset, n, ret); if (copy) { ret = commit_populate(s->top, s->base, offset, n, buf); -bytes_written += n; } if (ret < 0) { BlockErrorAction action = -- 2.20.1
[PULL 16/36] block: Remove superfluous semicolons
From: Philippe Mathieu-Daudé Fixes: 132ada80c4a Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20200218094402.26625-4-phi...@redhat.com> Signed-off-by: Kevin Wolf --- block.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 9c810534d6..9db0b973fe 100644 --- a/block.c +++ b/block.c @@ -2435,13 +2435,13 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, if (bdrv_get_aio_context(child_bs) != ctx) { ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err); if (ret < 0 && child_role->can_set_aio_ctx) { -GSList *ignore = g_slist_prepend(NULL, child);; +GSList *ignore = g_slist_prepend(NULL, child); ctx = bdrv_get_aio_context(child_bs); if (child_role->can_set_aio_ctx(child, ctx, &ignore, NULL)) { error_free(local_err); ret = 0; g_slist_free(ignore); -ignore = g_slist_prepend(NULL, child);; +ignore = g_slist_prepend(NULL, child); child_role->set_aio_ctx(child, ctx, &ignore); } g_slist_free(ignore); -- 2.20.1
[PULL 12/36] commit: Inline commit_populate()
commit_populate() is a very short function and only called in a single place. Its return value doesn't tell us whether an error happened while reading or writing, which would be necessary for sending the right data in the BLOCK_JOB_ERROR QMP event. Signed-off-by: Kevin Wolf Message-Id: <20200214200812.28180-5-kw...@redhat.com> Reviewed-by: Ján Tomko Signed-off-by: Kevin Wolf --- block/commit.c | 28 ++-- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/block/commit.c b/block/commit.c index 8189f079d2..2992a1012f 100644 --- a/block/commit.c +++ b/block/commit.c @@ -43,27 +43,6 @@ typedef struct CommitBlockJob { char *backing_file_str; } CommitBlockJob; -static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base, -int64_t offset, uint64_t bytes, -void *buf) -{ -int ret = 0; - -assert(bytes < SIZE_MAX); - -ret = blk_co_pread(bs, offset, bytes, buf, 0); -if (ret < 0) { -return ret; -} - -ret = blk_co_pwrite(base, offset, bytes, buf, 0); -if (ret < 0) { -return ret; -} - -return 0; -} - static int commit_prepare(Job *job) { CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); @@ -178,7 +157,12 @@ static int coroutine_fn commit_run(Job *job, Error **errp) copy = (ret == 1); trace_commit_one_iteration(s, offset, n, ret); if (copy) { -ret = commit_populate(s->top, s->base, offset, n, buf); +assert(n < SIZE_MAX); + +ret = blk_co_pread(s->top, offset, n, buf, 0); +if (ret >= 0) { +ret = blk_co_pwrite(s->base, offset, n, buf, 0); +} } if (ret < 0) { BlockErrorAction action = -- 2.20.1
[PULL 11/36] commit: Fix argument order for block_job_error_action()
The block_job_error_action() error call in the commit job gives the on_err and is_read arguments in the wrong order. Fix this. (Of course, hard-coded is_read = false is wrong, too, but that's a separate problem for a separate patch.) Signed-off-by: Kevin Wolf Message-Id: <20200214200812.28180-4-kw...@redhat.com> Reviewed-by: Ján Tomko Signed-off-by: Kevin Wolf --- block/commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/commit.c b/block/commit.c index cce898a4f3..8189f079d2 100644 --- a/block/commit.c +++ b/block/commit.c @@ -182,7 +182,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) } if (ret < 0) { BlockErrorAction action = -block_job_error_action(&s->common, false, s->on_error, -ret); +block_job_error_action(&s->common, s->on_error, false, -ret); if (action == BLOCK_ERROR_ACTION_REPORT) { goto out; } else { -- 2.20.1
[PULL 07/36] iotests: Test copy offloading with external data file
This adds a test for 'qemu-img convert' with copy offloading where the target image has an external data file. If the test hosts supports it, it tests both the case where copy offloading is supported and the case where it isn't (otherwise we just test unsupported twice). More specifically, the case with unsupported copy offloading tests qcow2_alloc_cluster_abort() with external data files. Signed-off-by: Kevin Wolf Message-Id: <20200211094900.17315-4-kw...@redhat.com> Signed-off-by: Kevin Wolf --- tests/qemu-iotests/244 | 14 ++ tests/qemu-iotests/244.out | 6 ++ 2 files changed, 20 insertions(+) diff --git a/tests/qemu-iotests/244 b/tests/qemu-iotests/244 index 0d1efee6ef..2ec1815e6f 100755 --- a/tests/qemu-iotests/244 +++ b/tests/qemu-iotests/244 @@ -197,6 +197,20 @@ $QEMU_IO -c 'read -P 0x11 0 1M' -f $IMGFMT "$TEST_IMG" | _filter_qemu_io $QEMU_IMG map --output=human "$TEST_IMG" | _filter_testdir $QEMU_IMG map --output=json "$TEST_IMG" +echo +echo "=== Copy offloading ===" +echo + +# Make use of copy offloading if the test host can provide it +_make_test_img -o "data_file=$TEST_IMG.data" 64M +$QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG" +$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG" + +# blkdebug doesn't support copy offloading, so this tests the error path +$QEMU_IMG amend -f $IMGFMT -o "data_file=blkdebug::$TEST_IMG.data" "$TEST_IMG" +$QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG" +$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG" + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/244.out b/tests/qemu-iotests/244.out index 6a3d0067cc..e6f4dc7993 100644 --- a/tests/qemu-iotests/244.out +++ b/tests/qemu-iotests/244.out @@ -122,4 +122,10 @@ Offset Length Mapped to File 0 0x100 TEST_DIR/t.qcow2.data [{ "start": 0, "length": 1048576, "depth": 0, "zero": false, "data": true, "offset": 0}, { "start": 1048576, "length": 66060288, "depth": 0, "zero": true, "data": false}] + +=== Copy offloading === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 data_file=TEST_DIR/t.IMGFMT.data +Images are identical. +Images are identical. *** done -- 2.20.1
[PULL 09/36] qapi: Document meaning of 'ignore' BlockdevOnError for jobs
It is not obvious what 'ignore' actually means for block jobs: It could be continuing the job and returning success in the end despite the error (no block job does this). It could also mean continuing and returning failure in the end (this is what stream does). And it can mean retrying the failed request later (this is what backup, commit and mirror do). This (somewhat inconsistent) behaviour was introduced and described for stream and mirror in commit 32c81a4a6ec. backup and commit were introduced later and use the same model as mirror. Signed-off-by: Kevin Wolf Message-Id: <20200214200812.28180-2-kw...@redhat.com> Reviewed-by: Ján Tomko Signed-off-by: Kevin Wolf --- qapi/block-core.json | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index 13dad62f44..1c32158d11 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1164,7 +1164,10 @@ # for jobs, cancel the job # # @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR -# or BLOCK_JOB_ERROR) +# or BLOCK_JOB_ERROR). The backup, mirror and commit block jobs retry +# the failing request later and may still complete successfully. The +# stream block job continues to stream and will complete with an +# error. # # @enospc: same as @stop on ENOSPC, same as @report otherwise. # -- 2.20.1
[PULL 02/36] mirror: Don't let an operation wait for itself
mirror_wait_for_free_in_flight_slot() just picks a random operation to wait for. However, when mirror_co_read() waits for free slots, its MirrorOp is already in s->ops_in_flight, so if not enough slots are immediately available, an operation can end up waiting for itself to complete, which results in a hang. Fix this by passing the current MirrorOp and skipping this operation when picking an operation to wait for. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1794692 Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block/mirror.c | 21 - 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 8959e4255f..cacbc70014 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -283,11 +283,14 @@ static int mirror_cow_align(MirrorBlockJob *s, int64_t *offset, } static inline void coroutine_fn -mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) +mirror_wait_for_any_operation(MirrorBlockJob *s, MirrorOp *self, bool active) { MirrorOp *op; QTAILQ_FOREACH(op, &s->ops_in_flight, next) { +if (self == op) { +continue; +} /* Do not wait on pseudo ops, because it may in turn wait on * some other operation to start, which may in fact be the * caller of this function. Since there is only one pseudo op @@ -302,10 +305,10 @@ mirror_wait_for_any_operation(MirrorBlockJob *s, bool active) } static inline void coroutine_fn -mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s) +mirror_wait_for_free_in_flight_slot(MirrorBlockJob *s, MirrorOp *self) { /* Only non-active operations use up in-flight slots */ -mirror_wait_for_any_operation(s, false); +mirror_wait_for_any_operation(s, self, false); } /* Perform a mirror copy operation. @@ -348,7 +351,7 @@ static void coroutine_fn mirror_co_read(void *opaque) while (s->buf_free_count < nb_chunks) { trace_mirror_yield_in_flight(s, op->offset, s->in_flight); -mirror_wait_for_free_in_flight_slot(s); +mirror_wait_for_free_in_flight_slot(s, op); } /* Now make a QEMUIOVector taking enough granularity-sized chunks @@ -555,7 +558,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) while (s->in_flight >= MAX_IN_FLIGHT) { trace_mirror_yield_in_flight(s, offset, s->in_flight); -mirror_wait_for_free_in_flight_slot(s); +mirror_wait_for_free_in_flight_slot(s, pseudo_op); } if (s->ret < 0) { @@ -609,7 +612,7 @@ static void mirror_free_init(MirrorBlockJob *s) static void coroutine_fn mirror_wait_for_all_io(MirrorBlockJob *s) { while (s->in_flight > 0) { -mirror_wait_for_free_in_flight_slot(s); +mirror_wait_for_free_in_flight_slot(s, NULL); } } @@ -794,7 +797,7 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) if (s->in_flight >= MAX_IN_FLIGHT) { trace_mirror_yield(s, UINT64_MAX, s->buf_free_count, s->in_flight); -mirror_wait_for_free_in_flight_slot(s); +mirror_wait_for_free_in_flight_slot(s, NULL); continue; } @@ -947,7 +950,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) /* Do not start passive operations while there are active * writes in progress */ while (s->in_active_write_counter) { -mirror_wait_for_any_operation(s, true); +mirror_wait_for_any_operation(s, NULL, true); } if (s->ret < 0) { @@ -973,7 +976,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight); -mirror_wait_for_free_in_flight_slot(s); +mirror_wait_for_free_in_flight_slot(s, NULL); continue; } else if (cnt != 0) { delay_ns = mirror_iteration(s); -- 2.20.1
[PULL 01/36] mirror: Store MirrorOp.co for debuggability
If a coroutine is launched, but the coroutine pointer isn't stored anywhere, debugging any problems inside the coroutine is quite hard. Let's store the coroutine pointer of a mirror operation in MirrorOp to have it available in the debugger. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block/mirror.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/mirror.c b/block/mirror.c index f0f2d9dff1..8959e4255f 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -103,6 +103,7 @@ struct MirrorOp { bool is_pseudo_op; bool is_active_write; CoQueue waiting_requests; +Coroutine *co; QTAILQ_ENTRY(MirrorOp) next; }; @@ -429,6 +430,7 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, default: abort(); } +op->co = co; QTAILQ_INSERT_TAIL(&s->ops_in_flight, op, next); qemu_coroutine_enter(co); -- 2.20.1
[PULL 06/36] qcow2: Fix qcow2_alloc_cluster_abort() for external data file
For external data file, cluster allocations return an offset in the data file and are not refcounted. In this case, there is nothing to do for qcow2_alloc_cluster_abort(). Freeing the same offset in the qcow2 file is wrong and causes crashes in the better case or image corruption in the worse case. Signed-off-by: Kevin Wolf Message-Id: <20200211094900.17315-3-kw...@redhat.com> Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 1947f13a2d..78c95dfa16 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1026,8 +1026,11 @@ err: void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m) { BDRVQcow2State *s = bs->opaque; -qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits, -QCOW2_DISCARD_NEVER); +if (!has_data_file(bs)) { +qcow2_free_clusters(bs, m->alloc_offset, +m->nb_clusters << s->cluster_bits, +QCOW2_DISCARD_NEVER); +} } /* -- 2.20.1
[PULL 08/36] block/qcow2-bitmap: Remove unneeded variable assignment
From: Philippe Mathieu-Daudé Fix warning reported by Clang static code analyzer: CC block/qcow2-bitmap.o block/qcow2-bitmap.c:650:5: warning: Value stored to 'ret' is never read ret = -EINVAL; ^ ~~~ Fixes: 88ddffae8 Reported-by: Clang Static Analyzer Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20200215161557.4077-2-phi...@redhat.com> Reviewed-by: Richard Henderson Reviewed-by: Ján Tomko Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Kevin Wolf --- block/qcow2-bitmap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c index d41f5d049b..82c9f3 100644 --- a/block/qcow2-bitmap.c +++ b/block/qcow2-bitmap.c @@ -647,7 +647,6 @@ static Qcow2BitmapList *bitmap_list_load(BlockDriverState *bs, uint64_t offset, return bm_list; broken_dir: -ret = -EINVAL; error_setg(errp, "Broken bitmap directory"); fail: -- 2.20.1
[PULL 03/36] qcow2: Fix alignment checks in encrypted images
From: Alberto Garcia I/O requests to encrypted media should be aligned to the sector size used by the underlying encryption method, not to BDRV_SECTOR_SIZE. Fortunately this doesn't break anything at the moment because both existing QCRYPTO_BLOCK_*_SECTOR_SIZE have the same value as BDRV_SECTOR_SIZE. The checks in qcow2_co_preadv_encrypted() are also unnecessary because they are repeated immediately afterwards in qcow2_co_encdec(). Signed-off-by: Alberto Garcia Message-Id: <20200213171646.15876-1-be...@igalia.com> Reviewed-by: Daniel P. Berrangé Signed-off-by: Kevin Wolf --- block/qcow2-threads.c | 12 block/qcow2.c | 2 -- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c index 8f5a0d1ebe..77bb578cdf 100644 --- a/block/qcow2-threads.c +++ b/block/qcow2-threads.c @@ -246,12 +246,15 @@ qcow2_co_encdec(BlockDriverState *bs, uint64_t host_offset, .len = len, .func = func, }; +uint64_t sector_size; -assert(QEMU_IS_ALIGNED(guest_offset, BDRV_SECTOR_SIZE)); -assert(QEMU_IS_ALIGNED(host_offset, BDRV_SECTOR_SIZE)); -assert(QEMU_IS_ALIGNED(len, BDRV_SECTOR_SIZE)); assert(s->crypto); +sector_size = qcrypto_block_get_sector_size(s->crypto); +assert(QEMU_IS_ALIGNED(guest_offset, sector_size)); +assert(QEMU_IS_ALIGNED(host_offset, sector_size)); +assert(QEMU_IS_ALIGNED(len, sector_size)); + return len == 0 ? 0 : qcow2_co_process(bs, qcow2_encdec_pool_func, &arg); } @@ -270,7 +273,8 @@ qcow2_co_encdec(BlockDriverState *bs, uint64_t host_offset, *will be written to the underlying storage device at *@host_offset * - * @len - length of the buffer (must be a BDRV_SECTOR_SIZE multiple) + * @len - length of the buffer (must be a multiple of the encryption + *sector size) * * Depending on the encryption method, @host_offset and/or @guest_offset * may be used for generating the initialization vector for diff --git a/block/qcow2.c b/block/qcow2.c index ef96606f8d..8dcee5efec 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -2068,8 +2068,6 @@ qcow2_co_preadv_encrypted(BlockDriverState *bs, goto fail; } -assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE)); -assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE)); if (qcow2_co_decrypt(bs, file_cluster_offset + offset_into_cluster(s, offset), offset, buf, bytes) < 0) -- 2.20.1
[PULL 00/36] Block layer patches
The following changes since commit 6c599282f8ab382fe59f03a6cae755b89561a7b3: Merge remote-tracking branch 'remotes/armbru/tags/pull-monitor-2020-02-15-v2' into staging (2020-02-17 13:32:25 +) are available in the Git repository at: git://repo.or.cz/qemu/kevin.git tags/for-upstream for you to fetch changes up to c45a88f4429d7a8f384b75f3fd3fed5138a6edca: iotests: Check that @replaces can replace filters (2020-02-18 14:52:16 +0100) Block layer patches: - Fix check_to_replace_node() - commit: Expose on-error option in QMP - qcow2: Fix qcow2_alloc_cluster_abort() for external data file - mirror: Fix deadlock - vvfat: Fix segfault while closing read-write node - Code cleanups Alberto Garcia (1): qcow2: Fix alignment checks in encrypted images Hikaru Nishida (1): block/vvfat: Do not unref qcow on closing backing bdrv Kevin Wolf (12): mirror: Store MirrorOp.co for debuggability mirror: Don't let an operation wait for itself qcow2: update_refcount(): Reset old_table_index after qcow2_cache_put() qcow2: Fix qcow2_alloc_cluster_abort() for external data file iotests: Test copy offloading with external data file qapi: Document meaning of 'ignore' BlockdevOnError for jobs commit: Remove unused bytes_written commit: Fix argument order for block_job_error_action() commit: Inline commit_populate() commit: Fix is_read for block_job_error_action() commit: Expose on-error option in QMP iotests: Test error handling policies with block-commit Max Reitz (19): blockdev: Allow external snapshots everywhere blockdev: Allow resizing everywhere block: Drop bdrv_is_first_non_filter() iotests: Let 041 use -blockdev for quorum children quorum: Fix child permissions block: Add bdrv_recurse_can_replace() blkverify: Implement .bdrv_recurse_can_replace() quorum: Implement .bdrv_recurse_can_replace() block: Use bdrv_recurse_can_replace() block: Remove bdrv_recurse_is_first_non_filter() mirror: Double-check immediately before replacing quorum: Stop marking it as a filter iotests: Use complete_and_wait() in 155 iotests: Add VM.assert_block_path() iotests/041: Drop superfluous shutdowns iotests: Resolve TODOs in 041 iotests: Use self.image_len in TestRepairQuorum iotests: Add tests for invalid Quorum @replaces iotests: Check that @replaces can replace filters Philippe Mathieu-Daudé (3): block/qcow2-bitmap: Remove unneeded variable assignment block: Remove superfluous semicolons block/io_uring: Remove superfluous semicolon qapi/block-core.json | 9 +- include/block/block.h | 5 - include/block/block_int.h | 16 +-- block.c | 89 ++--- block/blkverify.c | 20 +-- block/commit.c| 37 ++ block/copy-on-read.c | 9 -- block/filter-compress.c | 9 -- block/io_uring.c | 2 +- block/mirror.c| 37 -- block/qcow2-bitmap.c | 1 - block/qcow2-cluster.c | 7 +- block/qcow2-refcount.c| 1 + block/qcow2-threads.c | 12 +- block/qcow2.c | 2 - block/quorum.c| 70 +-- block/replication.c | 7 -- block/throttle.c | 8 -- block/vvfat.c | 7 -- blockdev.c| 18 +-- tests/qemu-iotests/iotests.py | 59 + tests/qemu-iotests/040| 283 ++ tests/qemu-iotests/040.out| 4 +- tests/qemu-iotests/041| 138 +--- tests/qemu-iotests/041.out| 4 +- tests/qemu-iotests/155| 7 +- tests/qemu-iotests/244| 14 +++ tests/qemu-iotests/244.out| 6 + 28 files changed, 675 insertions(+), 206 deletions(-)
[PULL 04/36] block/vvfat: Do not unref qcow on closing backing bdrv
From: Hikaru Nishida Before this commit, BDRVVVFATState.qcow is unrefed in write_target_close on closing backing bdrv of vvfat. However, qcow bdrv is opend as a child of vvfat in enable_write_target() so it will be also unrefed on closing vvfat itself. This causes use-after-free of qcow on freeing vvfat which has backing bdrv and qcow bdrv as children in this order because bdrv_close(vvfat) tries to free qcow bdrv after freeing backing bdrv as QLIST_FOREACH_SAFE() loop keeps next pointer, but BdrvChild of qcow is already freed in bdrv_close(backing bdrv). Signed-off-by: Hikaru Nishida Message-Id: <20200209175156.85748-1-hikaru...@gmail.com> Signed-off-by: Kevin Wolf --- block/vvfat.c | 7 --- 1 file changed, 7 deletions(-) diff --git a/block/vvfat.c b/block/vvfat.c index 019b8f1341..ab800c4887 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3124,17 +3124,10 @@ write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes, return ret; } -static void write_target_close(BlockDriverState *bs) { -BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque); -bdrv_unref_child(s->bs, s->qcow); -g_free(s->qcow_filename); -} - static BlockDriver vvfat_write_target = { .format_name= "vvfat_write_target", .instance_size = sizeof(void*), .bdrv_co_pwritev= write_target_commit, -.bdrv_close = write_target_close, }; static void vvfat_qcow_options(int *child_flags, QDict *child_options, -- 2.20.1
[PULL 05/36] qcow2: update_refcount(): Reset old_table_index after qcow2_cache_put()
In the case that update_refcount() frees a refcount block, it evicts it from the metadata cache. Before doing so, however, it returns the currently used refcount block to the cache because it might be the same. Returning the refcount block early means that we need to reset old_table_index so that we reload the refcount block in the next iteration if it is actually still in use. Fixes: f71c08ea8e60f035485a512fd2af8908567592f0 Signed-off-by: Kevin Wolf Message-Id: <20200211094900.17315-2-kw...@redhat.com> Signed-off-by: Kevin Wolf --- block/qcow2-refcount.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index c963bc8de1..7ef1c0e42a 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -889,6 +889,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, offset); if (table != NULL) { qcow2_cache_put(s->refcount_block_cache, &refcount_block); +old_table_index = -1; qcow2_cache_discard(s->refcount_block_cache, table); } -- 2.20.1
Re: [PATCH v2] virtio: increase virtuqueue size for virtio-scsi and virtio-blk
On 18.02.2020 16:59, Denis Plotnikov wrote: On 18.02.2020 16:53, Stefan Hajnoczi wrote: On Thu, Feb 13, 2020 at 05:59:27PM +0300, Denis Plotnikov wrote: v1: * seg_max default value changing removed --- The goal is to reduce the amount of requests issued by a guest on 1M reads/writes. This rises the performance up to 4% on that kind of disk access pattern. The maximum chunk size to be used for the guest disk accessing is limited with seg_max parameter, which represents the max amount of pices in the scatter-geather list in one guest disk request. Since seg_max is virqueue_size dependent, increasing the virtqueue size increases seg_max, which, in turn, increases the maximum size of data to be read/write from a guest disk. More details in the original problem statment: https://lists.gnu.org/archive/html/qemu-devel/2017-12/msg03721.html Suggested-by: Denis V. Lunev Signed-off-by: Denis Plotnikov --- hw/block/virtio-blk.c | 2 +- hw/core/machine.c | 2 ++ hw/scsi/virtio-scsi.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) I fixed up the "virtuqueue" typo in the commit message and the mis-formatted commit description (git-am(1) stops including lines after the first "---"). Actually, I sent the corrected version v3 of the patch last week. But it seems it got lost among that gigantic patch flow in the mailing list :) Thanks for applying! Denis Thanks, applied to my block tree: https://github.com/stefanha/qemu/commits/block Stefan I'm going to send the test checking the virtqueue-sizes for machine types a little bit later. Denis
Re: [PATCH v2] virtio: increase virtuqueue size for virtio-scsi and virtio-blk
On 18.02.2020 16:53, Stefan Hajnoczi wrote: On Thu, Feb 13, 2020 at 05:59:27PM +0300, Denis Plotnikov wrote: v1: * seg_max default value changing removed --- The goal is to reduce the amount of requests issued by a guest on 1M reads/writes. This rises the performance up to 4% on that kind of disk access pattern. The maximum chunk size to be used for the guest disk accessing is limited with seg_max parameter, which represents the max amount of pices in the scatter-geather list in one guest disk request. Since seg_max is virqueue_size dependent, increasing the virtqueue size increases seg_max, which, in turn, increases the maximum size of data to be read/write from a guest disk. More details in the original problem statment: https://lists.gnu.org/archive/html/qemu-devel/2017-12/msg03721.html Suggested-by: Denis V. Lunev Signed-off-by: Denis Plotnikov --- hw/block/virtio-blk.c | 2 +- hw/core/machine.c | 2 ++ hw/scsi/virtio-scsi.c | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) I fixed up the "virtuqueue" typo in the commit message and the mis-formatted commit description (git-am(1) stops including lines after the first "---"). Actually, I sent the corrected version v3 of the patch last week. But it seems it got lost among that gigantic patch flow in the mailing list :) Thanks for applying! Denis Thanks, applied to my block tree: https://github.com/stefanha/qemu/commits/block Stefan
Re: [PATCH v2] virtio: increase virtuqueue size for virtio-scsi and virtio-blk
On Thu, Feb 13, 2020 at 05:59:27PM +0300, Denis Plotnikov wrote: > v1: > * seg_max default value changing removed > > --- > The goal is to reduce the amount of requests issued by a guest on > 1M reads/writes. This rises the performance up to 4% on that kind of > disk access pattern. > > The maximum chunk size to be used for the guest disk accessing is > limited with seg_max parameter, which represents the max amount of > pices in the scatter-geather list in one guest disk request. > > Since seg_max is virqueue_size dependent, increasing the virtqueue > size increases seg_max, which, in turn, increases the maximum size > of data to be read/write from a guest disk. > > More details in the original problem statment: > https://lists.gnu.org/archive/html/qemu-devel/2017-12/msg03721.html > > Suggested-by: Denis V. Lunev > Signed-off-by: Denis Plotnikov > --- > hw/block/virtio-blk.c | 2 +- > hw/core/machine.c | 2 ++ > hw/scsi/virtio-scsi.c | 2 +- > 3 files changed, 4 insertions(+), 2 deletions(-) I fixed up the "virtuqueue" typo in the commit message and the mis-formatted commit description (git-am(1) stops including lines after the first "---"). Thanks, applied to my block tree: https://github.com/stefanha/qemu/commits/block Stefan signature.asc Description: PGP signature
Re: [PATCH v4 00/19] block: Fix check_to_replace_node()
Am 18.02.2020 um 11:34 hat Max Reitz geschrieben: > Branch: https://github.com/XanClic/qemu.git fix-can-replace-v4 > Branch: https://git.xanclic.moe/XanClic/qemu.git fix-can-replace-v4 > > v1: https://lists.nongnu.org/archive/html/qemu-block/2019-09/msg01027.html > v2: https://lists.nongnu.org/archive/html/qemu-block/2019-11/msg00370.html > v3: https://lists.nongnu.org/archive/html/qemu-block/2020-01/msg00922.html > > > Hi, > > For what this series does, see the cover letter of v1 (linked above). > > > Changes in v4 compared to v3: > - Following the discussion with Kevin, I changed Quorum’s > .bdrv_recurse_can_replace() implementation from unsharing the > CONSISTENT_READ permission and taking the WRITE permission to just > checking whether there are any other parents > - This made the old patches 8 and 9 unnecessary, so they’ve been > dropped > - And of course it requires some changes to patch 10 (now 8) > - (Patch 10 (was: 12) gets a rebase conflict that’s obvious to > resolve, so I kept Vladimir’s R-b) > > - As suggested by Vladimir, I added the root node name to the > cannot-follow-path error message in patch 14 (was: 16), and added an > assertion that the root node exists in the first place Thanks, applied to the block branch. Kevin
Re: [PATCH v4 18/19] iotests: Add tests for invalid Quorum @replaces
Am 18.02.2020 um 13:49 hat Max Reitz geschrieben: > On 18.02.20 13:38, Kevin Wolf wrote: > > Am 18.02.2020 um 11:34 hat Max Reitz geschrieben: > >> Add two tests to see that you cannot replace a Quorum child with the > >> mirror job while the child is in use by a different parent. > >> > >> Signed-off-by: Max Reitz > >> Reviewed-by: Vladimir Sementsov-Ogievskiy > >> --- > >> tests/qemu-iotests/041 | 70 +- > >> tests/qemu-iotests/041.out | 4 +-- > >> 2 files changed, 71 insertions(+), 3 deletions(-) > >> > >> diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 > >> index 1d9e64ff6d..53c8671969 100755 > >> --- a/tests/qemu-iotests/041 > >> +++ b/tests/qemu-iotests/041 > >> @@ -20,6 +20,7 @@ > >> > >> import time > >> import os > >> +import re > >> import iotests > >> from iotests import qemu_img, qemu_io > >> > >> @@ -34,6 +35,8 @@ quorum_img3 = os.path.join(iotests.test_dir, > >> 'quorum3.img') > >> quorum_repair_img = os.path.join(iotests.test_dir, 'quorum_repair.img') > >> quorum_snapshot_file = os.path.join(iotests.test_dir, > >> 'quorum_snapshot.img') > >> > >> +nbd_sock_path = os.path.join(iotests.test_dir, 'nbd.sock') > >> + > >> class TestSingleDrive(iotests.QMPTestCase): > >> image_len = 1 * 1024 * 1024 # MB > >> qmp_cmd = 'drive-mirror' > >> @@ -892,7 +895,8 @@ class TestRepairQuorum(iotests.QMPTestCase): > >> > >> def tearDown(self): > >> self.vm.shutdown() > >> -for i in self.IMAGES + [ quorum_repair_img, quorum_snapshot_file > >> ]: > >> +for i in self.IMAGES + [ quorum_repair_img, quorum_snapshot_file, > >> + nbd_sock_path ]: > >> # Do a try/except because the test may have deleted some > >> images > >> try: > >> os.remove(i) > >> @@ -1032,6 +1036,70 @@ class TestRepairQuorum(iotests.QMPTestCase): > >> self.assert_has_block_node("repair0", quorum_repair_img) > >> self.vm.assert_block_path('quorum0', '/children.1', 'repair0') > >> > >> +def test_with_other_parent(self): > >> +""" > >> +Check that we cannot replace a Quorum child when it has other > >> +parents. > >> +""" > >> +result = self.vm.qmp('nbd-server-start', > >> + addr={ > >> + 'type': 'unix', > >> + 'data': {'path': nbd_sock_path} > >> + }) > >> +self.assert_qmp(result, 'return', {}) > >> + > >> +result = self.vm.qmp('nbd-server-add', device='img1') > >> +self.assert_qmp(result, 'return', {}) > >> + > >> +result = self.vm.qmp('drive-mirror', job_id='mirror', > >> device='quorum0', > >> + sync='full', node_name='repair0', > >> replaces='img1', > >> + target=quorum_repair_img, > >> format=iotests.imgfmt) > >> +self.assert_qmp(result, 'error/desc', > >> +"Cannot replace 'img1' by a node mirrored from " > >> +"'quorum0', because it cannot be guaranteed that > >> doing " > >> +"so would not lead to an abrupt change of visible > >> data") > >> + > >> +def test_with_other_parents_after_mirror_start(self): > >> +""" > >> +The same as test_with_other_parent(), but add the NBD server > >> +only when the mirror job is already running. > >> +""" > >> +result = self.vm.qmp('nbd-server-start', > >> + addr={ > >> + 'type': 'unix', > >> + 'data': {'path': nbd_sock_path} > >> + }) > >> +self.assert_qmp(result, 'return', {}) > >> + > >> +result = self.vm.qmp('drive-mirror', job_id='mirror', > >> device='quorum0', > >> + sync='full', node_name='repair0', > >> replaces='img1', > >> + target=quorum_repair_img, > >> format=iotests.imgfmt) > >> +self.assert_qmp(result, 'return', {}) > >> + > >> +result = self.vm.qmp('nbd-server-add', device='img1') > >> +self.assert_qmp(result, 'return', {}) > >> + > >> +# The full error message goes to stderr, we will check it later > >> +self.complete_and_wait('mirror', > >> + completion_error='Operation not permitted') > >> + > >> +# Should not have been replaced > >> +self.vm.assert_block_path('quorum0', '/children.1', 'img1') > >> + > >> +# Check the full error message now > >> +self.vm.shutdown() > >> +log = self.vm.get_log() > >> +log = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', log) > >> +log = re.sub(r'^Formatting.*\n', '', log) > >> +log = re.sub(r'\n\[I \+\d+\.\d+\] CLOSED\n?$', '', log) > >> +log = re.sub(
Re: [PATCH v2 06/22] migration/block-dirty-bitmap: rename finish_lock to just lock
On 17/02/2020 18:02, Vladimir Sementsov-Ogievskiy wrote: finish_lock is bad name, as lock used not only on process end. Signed-off-by: Vladimir Sementsov-Ogievskiy --- migration/block-dirty-bitmap.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index 7a82b76809..440c41cfca 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -143,7 +143,7 @@ typedef struct DBMLoadState { BdrvDirtyBitmap *bitmap; GSList *enabled_bitmaps; -QemuMutex finish_lock; +QemuMutex lock; /* protect enabled_bitmaps */ } DBMLoadState; typedef struct DBMState { @@ -507,7 +507,7 @@ void dirty_bitmap_mig_before_vm_start(void) DBMLoadState *s = &dbm_state.load; GSList *item; -qemu_mutex_lock(&s->finish_lock); +qemu_mutex_lock(&s->lock); for (item = s->enabled_bitmaps; item; item = g_slist_next(item)) { LoadBitmapState *b = item->data; @@ -524,7 +524,7 @@ void dirty_bitmap_mig_before_vm_start(void) g_slist_free(s->enabled_bitmaps); s->enabled_bitmaps = NULL; -qemu_mutex_unlock(&s->finish_lock); +qemu_mutex_unlock(&s->lock); } static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) @@ -533,7 +533,7 @@ static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) trace_dirty_bitmap_load_complete(); bdrv_dirty_bitmap_deserialize_finish(s->bitmap); -qemu_mutex_lock(&s->finish_lock); +qemu_mutex_lock(&s->lock); for (item = s->enabled_bitmaps; item; item = g_slist_next(item)) { LoadBitmapState *b = item->data; @@ -565,7 +565,7 @@ static void dirty_bitmap_load_complete(QEMUFile *f, DBMLoadState *s) bdrv_dirty_bitmap_unlock(s->bitmap); } -qemu_mutex_unlock(&s->finish_lock); +qemu_mutex_unlock(&s->lock); } static int dirty_bitmap_load_bits(QEMUFile *f, DBMLoadState *s) @@ -747,7 +747,7 @@ static SaveVMHandlers savevm_dirty_bitmap_handlers = { void dirty_bitmap_mig_init(void) { QSIMPLEQ_INIT(&dbm_state.save.dbms_list); -qemu_mutex_init(&dbm_state.load.finish_lock); +qemu_mutex_init(&dbm_state.load.lock); register_savevm_live("dirty-bitmap", 0, 1, &savevm_dirty_bitmap_handlers, Reviewed-by: Andrey Shinkevich -- With the best regards, Andrey Shinkevich
Re: [PATCH v3 03/33] block: Add BdrvChildRole and BdrvChildRoleBits
On 2/18/20 6:42 AM, Max Reitz wrote: This mask will supplement BdrvChildClass when it comes to what role (or combination of roles) a child takes for its parent. It consists of BdrvChildRoleBits values (which is an enum). Because empty enums are not allowed, let us just start with it filled. Signed-off-by: Max Reitz --- include/block/block.h | 38 ++ 1 file changed, 38 insertions(+) + +/* Mask of BdrvChildRoleBits values */ +typedef unsigned int BdrvChildRole; + I like how it turned out (both the additional comments earlier on, and the typedef for documentation purposes). Reviewed-by: Eric Blake -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: x-blockdev-reopen & block-dirty-bitmaps
On Mon, Feb 17, 2020 at 10:52:31 +0100, Kevin Wolf wrote: > Am 14.02.2020 um 21:32 hat John Snow geschrieben: > > On 2/14/20 3:19 PM, Kevin Wolf wrote: > > > Am 14.02.2020 um 19:54 hat John Snow geschrieben: > > >> Hi, what work remains to make this a stable interface, is it known? > > >> > > >> We're having a problem with bitmaps where in some cases libvirt wants to > > >> commit an image back down to a base image but -- for various reasons -- > > >> the bitmap was left enabled in the backing image, so it would accrue new > > >> writes during the commit. > > >> > > >> Normally, when creating a snapshot using blockdev-snapshot, the backing > > >> file becomes RO and all of the bitmaps become RO too. > > >> > > >> The goal is to be able to disable (or enable) bitmaps from a backing > > >> file before (or atomically just before) a commit operation to allow > > >> libvirt greater control on snapshot commit. > > >> > > >> Now, in my own testing, we can reopen a backing file just fine, delete > > >> or disable a bitmap and be done with it -- but the interface isn't > > >> stable, so libvirt will be reluctant to use such tricks. Well, while we probably want it to be stable for upstream acceptance that didn't prevent me from actually trying to use reopening. It would be probably frowned upon if I tried to use it upstream. The problem is that we'd have to carry the compatibility code for at least the two possible names of the command if nothing else changes and also the fact that once the command is declared stable, some older libvirt versions might not know to use it. The implementation was surprisingly easy though and works well to reopen the backing files in RW mode. The caveat was that the reopen somehow still didn't reopen the bitmaps and qemu ended up reporting: libvirt-1-format: Failed to make dirty bitmaps writable: Cannot update bitmap directory: Bad file descriptor So unfortunately it didn't work out for that scenario. Also I'm afraid I have another use case for it: oVirt when doing their 'live storage migration' actually uses libvirt to mirror only the top layer in shallow mode and copies everything else while the mirror is running using qemu-img. Prior to libvirt's use of -blockdev this worked well, because qemu reopened the mirror destination (which caused to open the backing files) only at the end. With -blockdev we have to open the backing files right away so that they can be properly installed as backing of the image being mirrored and oVirt's qemu-img instance gets a locking error as the images are actually opened for reading already. I'm afraid that we won't be able to restore the previous semantics without actually opening the backing files after the copy is synchronized before completing the job and then installing them as the backing via blockdev-reopen. Libvirt's documentation was partially alibistic [1] and asked the user to actually provide a working image but oVirt actually exploited the qemu behaviour to allow folding the two operations together. [1] https://libvirt.org/html/libvirt-libvirt-domain.html#virDomainBlockCopy > > >> > > >> Probably a loaded question, but: > > >> > > >> - What's needed to make the interface stable? > > >> - Are there known problem points? > > >> - Any suggestions for workarounds in the meantime? > > > > > > I think I've asked this before, but I don't remember the answer... > > > > > > What would be the problem with letting the enable/disable command > > > temporarily reopen the backing file read-write, like the commit job [1] > > > does? > > > > > > > I guess no problem? I wouldn't want it to do this automatically, but > > perhaps we could add a "force=True" bool where it tries to do just this. > > > > (And once reopen works properly we could deprecate this workaround again.) > > I'm not sure if I would view this only as a workaround, but if you like > it better with force=true, I won't object either. I'm wondering whether adding a feature deprecating it soon is even worth doing. From libvirt's point of view it would be comparable to using the x-prefixed command, with just slightly longer transition period. ( deleting the x-prefix is a very hard transition to cope with)
Re: [PATCH v3 04/33] block: Add BdrvChildRole to BdrvChild
On 2/18/20 6:42 AM, Max Reitz wrote: For now, it is always set to 0. Later patches in this series will ensure that all callers pass an appropriate combination of flags. Signed-off-by: Max Reitz --- 31 files changed, 62 insertions(+), 49 deletions(-) Mostly mechanical, and the differences from v2 stem from the type changes in patch 3. Reviewed-by: Eric Blake -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [PATCH v2 32/33] block: Pass BdrvChildRole in remaining cases
On 2/18/20 6:01 AM, Max Reitz wrote: Is it worth an assert(role) somewhere now that you've converted all callers to pass at least one role? Well, as the commit message states, block_job_add_bdrv() in blockjob.c still passes BdrvChildRole=0 to bdrv_root_attach_child(). So it depends on what function we’re looking at. I suppose we could add such an assertion to bdrv_attach_child() because we could expect all BDSs to pass some role for their children. OTOH, maybe a BDS has a legitimate reason not to: Maybe it just wants to take some permissions on some BDS without having any real relationship to it. Right now, some block jobs do that, well, except they do so through the back door of adding the child BDS to the block job object (which then passes no child role). So maybe I’d actually rather not add such an assertion anywhere. Fair enough - you have more knowledge of which callers remain that still have a legitimate reason to not request a role. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [PATCH v4 18/19] iotests: Add tests for invalid Quorum @replaces
On 18.02.20 13:38, Kevin Wolf wrote: > Am 18.02.2020 um 11:34 hat Max Reitz geschrieben: >> Add two tests to see that you cannot replace a Quorum child with the >> mirror job while the child is in use by a different parent. >> >> Signed-off-by: Max Reitz >> Reviewed-by: Vladimir Sementsov-Ogievskiy >> --- >> tests/qemu-iotests/041 | 70 +- >> tests/qemu-iotests/041.out | 4 +-- >> 2 files changed, 71 insertions(+), 3 deletions(-) >> >> diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 >> index 1d9e64ff6d..53c8671969 100755 >> --- a/tests/qemu-iotests/041 >> +++ b/tests/qemu-iotests/041 >> @@ -20,6 +20,7 @@ >> >> import time >> import os >> +import re >> import iotests >> from iotests import qemu_img, qemu_io >> >> @@ -34,6 +35,8 @@ quorum_img3 = os.path.join(iotests.test_dir, 'quorum3.img') >> quorum_repair_img = os.path.join(iotests.test_dir, 'quorum_repair.img') >> quorum_snapshot_file = os.path.join(iotests.test_dir, 'quorum_snapshot.img') >> >> +nbd_sock_path = os.path.join(iotests.test_dir, 'nbd.sock') >> + >> class TestSingleDrive(iotests.QMPTestCase): >> image_len = 1 * 1024 * 1024 # MB >> qmp_cmd = 'drive-mirror' >> @@ -892,7 +895,8 @@ class TestRepairQuorum(iotests.QMPTestCase): >> >> def tearDown(self): >> self.vm.shutdown() >> -for i in self.IMAGES + [ quorum_repair_img, quorum_snapshot_file ]: >> +for i in self.IMAGES + [ quorum_repair_img, quorum_snapshot_file, >> + nbd_sock_path ]: >> # Do a try/except because the test may have deleted some images >> try: >> os.remove(i) >> @@ -1032,6 +1036,70 @@ class TestRepairQuorum(iotests.QMPTestCase): >> self.assert_has_block_node("repair0", quorum_repair_img) >> self.vm.assert_block_path('quorum0', '/children.1', 'repair0') >> >> +def test_with_other_parent(self): >> +""" >> +Check that we cannot replace a Quorum child when it has other >> +parents. >> +""" >> +result = self.vm.qmp('nbd-server-start', >> + addr={ >> + 'type': 'unix', >> + 'data': {'path': nbd_sock_path} >> + }) >> +self.assert_qmp(result, 'return', {}) >> + >> +result = self.vm.qmp('nbd-server-add', device='img1') >> +self.assert_qmp(result, 'return', {}) >> + >> +result = self.vm.qmp('drive-mirror', job_id='mirror', >> device='quorum0', >> + sync='full', node_name='repair0', >> replaces='img1', >> + target=quorum_repair_img, >> format=iotests.imgfmt) >> +self.assert_qmp(result, 'error/desc', >> +"Cannot replace 'img1' by a node mirrored from " >> +"'quorum0', because it cannot be guaranteed that >> doing " >> +"so would not lead to an abrupt change of visible >> data") >> + >> +def test_with_other_parents_after_mirror_start(self): >> +""" >> +The same as test_with_other_parent(), but add the NBD server >> +only when the mirror job is already running. >> +""" >> +result = self.vm.qmp('nbd-server-start', >> + addr={ >> + 'type': 'unix', >> + 'data': {'path': nbd_sock_path} >> + }) >> +self.assert_qmp(result, 'return', {}) >> + >> +result = self.vm.qmp('drive-mirror', job_id='mirror', >> device='quorum0', >> + sync='full', node_name='repair0', >> replaces='img1', >> + target=quorum_repair_img, >> format=iotests.imgfmt) >> +self.assert_qmp(result, 'return', {}) >> + >> +result = self.vm.qmp('nbd-server-add', device='img1') >> +self.assert_qmp(result, 'return', {}) >> + >> +# The full error message goes to stderr, we will check it later >> +self.complete_and_wait('mirror', >> + completion_error='Operation not permitted') >> + >> +# Should not have been replaced >> +self.vm.assert_block_path('quorum0', '/children.1', 'img1') >> + >> +# Check the full error message now >> +self.vm.shutdown() >> +log = self.vm.get_log() >> +log = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', log) >> +log = re.sub(r'^Formatting.*\n', '', log) >> +log = re.sub(r'\n\[I \+\d+\.\d+\] CLOSED\n?$', '', log) >> +log = re.sub(r'^qemu-system-[^:]*: ', '', log) > > I would have applied the series, but: > > +.F > +== > +FAIL: test_with_other_parents_after_mirror_
Re: [PATCH v3 20/33] block: Switch child_format users to child_of_bds
On 2/18/20 6:42 AM, Max Reitz wrote: Both users (quorum and blkverify) use child_format for not-really-filtered children, so the appropriate BdrvChildRole in both cases is DATA. (Note that this will cause bdrv_inherited_options() to force-allow format probing.) Signed-off-by: Max Reitz --- block/blkverify.c | 4 ++-- block/quorum.c| 6 -- 2 files changed, 6 insertions(+), 4 deletions(-) Reviewed-by: Eric Blake -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [PATCH v2 05/22] migration/block-dirty-bitmap: refactor state global variables
On 17/02/2020 18:02, Vladimir Sementsov-Ogievskiy wrote: Move all state variables into one global struct. Reduce global variable usage, utilizing opaque pointer where possible. Signed-off-by: Vladimir Sementsov-Ogievskiy --- migration/block-dirty-bitmap.c | 171 ++--- 1 file changed, 95 insertions(+), 76 deletions(-) diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index 49d4cf8810..7a82b76809 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -128,6 +128,12 @@ typedef struct DBMSaveState { BdrvDirtyBitmap *prev_bitmap; } DBMSaveState; +typedef struct LoadBitmapState { +BlockDriverState *bs; +BdrvDirtyBitmap *bitmap; +bool migrated; +} LoadBitmapState; + /* State of the dirty bitmap migration (DBM) during load process */ typedef struct DBMLoadState { uint32_t flags; @@ -135,18 +141,17 @@ typedef struct DBMLoadState { char bitmap_name[256]; BlockDriverState *bs; BdrvDirtyBitmap *bitmap; + +GSList *enabled_bitmaps; +QemuMutex finish_lock; } DBMLoadState; -static DBMSaveState dirty_bitmap_mig_state; +typedef struct DBMState { +DBMSaveState save; +DBMLoadState load; +} DBMState; -/* State of one bitmap during load process */ -typedef struct LoadBitmapState { -BlockDriverState *bs; -BdrvDirtyBitmap *bitmap; -bool migrated; -} LoadBitmapState; -static GSList *enabled_bitmaps; -QemuMutex finish_lock; +static DBMState dbm_state; static uint32_t qemu_get_bitmap_flags(QEMUFile *f) { @@ -169,21 +174,21 @@ static void qemu_put_bitmap_flags(QEMUFile *f, uint32_t flags) qemu_put_byte(f, flags); } -static void send_bitmap_header(QEMUFile *f, SaveBitmapState *dbms, - uint32_t additional_flags) +static void send_bitmap_header(QEMUFile *f, DBMSaveState *s, + SaveBitmapState *dbms, uint32_t additional_flags) { BlockDriverState *bs = dbms->bs; BdrvDirtyBitmap *bitmap = dbms->bitmap; uint32_t flags = additional_flags; trace_send_bitmap_header_enter(); -if (bs != dirty_bitmap_mig_state.prev_bs) { -dirty_bitmap_mig_state.prev_bs = bs; +if (bs != s->prev_bs) { +s->prev_bs = bs; flags |= DIRTY_BITMAP_MIG_FLAG_DEVICE_NAME; } -if (bitmap != dirty_bitmap_mig_state.prev_bitmap) { -dirty_bitmap_mig_state.prev_bitmap = bitmap; +if (bitmap != s->prev_bitmap) { +s->prev_bitmap = bitmap; flags |= DIRTY_BITMAP_MIG_FLAG_BITMAP_NAME; } @@ -198,19 +203,22 @@ static void send_bitmap_header(QEMUFile *f, SaveBitmapState *dbms, } } -static void send_bitmap_start(QEMUFile *f, SaveBitmapState *dbms) +static void send_bitmap_start(QEMUFile *f, DBMSaveState *s, + SaveBitmapState *dbms) { -send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_START); +send_bitmap_header(f, s, dbms, DIRTY_BITMAP_MIG_FLAG_START); qemu_put_be32(f, bdrv_dirty_bitmap_granularity(dbms->bitmap)); qemu_put_byte(f, dbms->flags); } -static void send_bitmap_complete(QEMUFile *f, SaveBitmapState *dbms) +static void send_bitmap_complete(QEMUFile *f, DBMSaveState *s, + SaveBitmapState *dbms) { -send_bitmap_header(f, dbms, DIRTY_BITMAP_MIG_FLAG_COMPLETE); +send_bitmap_header(f, s, dbms, DIRTY_BITMAP_MIG_FLAG_COMPLETE); } -static void send_bitmap_bits(QEMUFile *f, SaveBitmapState *dbms, +static void send_bitmap_bits(QEMUFile *f, DBMSaveState *s, + SaveBitmapState *dbms, uint64_t start_sector, uint32_t nr_sectors) { /* align for buffer_is_zero() */ @@ -235,7 +243,7 @@ static void send_bitmap_bits(QEMUFile *f, SaveBitmapState *dbms, trace_send_bitmap_bits(flags, start_sector, nr_sectors, buf_size); -send_bitmap_header(f, dbms, flags); +send_bitmap_header(f, s, dbms, flags); qemu_put_be64(f, start_sector); qemu_put_be32(f, nr_sectors); @@ -254,12 +262,12 @@ static void send_bitmap_bits(QEMUFile *f, SaveBitmapState *dbms, } /* Called with iothread lock taken. */ -static void dirty_bitmap_do_save_cleanup(void) +static void dirty_bitmap_do_save_cleanup(DBMSaveState *s) { SaveBitmapState *dbms; -while ((dbms = QSIMPLEQ_FIRST(&dirty_bitmap_mig_state.dbms_list)) != NULL) { -QSIMPLEQ_REMOVE_HEAD(&dirty_bitmap_mig_state.dbms_list, entry); +while ((dbms = QSIMPLEQ_FIRST(&s->dbms_list)) != NULL) { +QSIMPLEQ_REMOVE_HEAD(&s->dbms_list, entry); bdrv_dirty_bitmap_set_busy(dbms->bitmap, false); bdrv_unref(dbms->bs); g_free(dbms); @@ -267,17 +275,17 @@ static void dirty_bitmap_do_save_cleanup(void) } /* Called with iothread lock taken. */ -static int init_dirty_bitmap_migration(void) +static int init_dirty_bitmap_migration(D
Re: [PATCH] migration/block: rename BLOCK_SIZE macro
Stefan Hajnoczi wrote: > Both and define BLOCK_SIZE macros. Avoiding > using that name in block/migration.c. > > I noticed this when including (Linux io_uring) from > "block/aio.h" and compilation failed. Although patches adding that > include haven't been sent yet, it makes sense to rename the macro now in > case someone else stumbles on it in the meantime. > > Signed-off-by: Stefan Hajnoczi queuing it. Thanks.
Re: [PATCH] migration/block: rename BLOCK_SIZE macro
Stefan Hajnoczi wrote: > Both and define BLOCK_SIZE macros. Avoiding > using that name in block/migration.c. > > I noticed this when including (Linux io_uring) from > "block/aio.h" and compilation failed. Although patches adding that > include haven't been sent yet, it makes sense to rename the macro now in > case someone else stumbles on it in the meantime. > > Signed-off-by: Stefan Hajnoczi Reviewed-by: Juan Quintela
[PATCH v3 27/33] tests: Use child_of_bds instead of child_file
Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- tests/test-bdrv-drain.c | 29 + tests/test-bdrv-graph-mod.c | 6 -- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index 15393a0140..91567ca97d 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -97,7 +97,7 @@ static void bdrv_test_child_perm(BlockDriverState *bs, BdrvChild *c, * detach_by_driver_cb_parent as one of them. */ if (child_class != &child_file && child_class != &child_of_bds) { -child_class = &child_file; +child_class = &child_of_bds; } bdrv_format_default_perms(bs, c, child_class, role, reopen_queue, @@ -1203,7 +1203,8 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, &error_abort); -bdrv_attach_child(bs, null_bs, "null-child", &child_file, 0, &error_abort); +bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, + BDRV_CHILD_DATA, &error_abort); /* This child will be the one to pass to requests through to, and * it will stall until a drain occurs */ @@ -1211,14 +1212,17 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, &error_abort); child_bs->total_sectors = 65536 >> BDRV_SECTOR_BITS; /* Takes our reference to child_bs */ -tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child", &child_file, -0, &error_abort); +tts->wait_child = bdrv_attach_child(bs, child_bs, "wait-child", +&child_of_bds, +BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY, +&error_abort); /* This child is just there to be deleted * (for detach_instead_of_delete == true) */ null_bs = bdrv_open("null-co://", NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, &error_abort); -bdrv_attach_child(bs, null_bs, "null-child", &child_file, 0, &error_abort); +bdrv_attach_child(bs, null_bs, "null-child", &child_of_bds, BDRV_CHILD_DATA, + &error_abort); blk = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); blk_insert_bs(blk, bs, &error_abort); @@ -1315,7 +1319,8 @@ static void detach_indirect_bh(void *opaque) bdrv_ref(data->c); data->child_c = bdrv_attach_child(data->parent_b, data->c, "PB-C", - &child_file, 0, &error_abort); + &child_of_bds, BDRV_CHILD_DATA, + &error_abort); } static void detach_by_parent_aio_cb(void *opaque, int ret) @@ -1332,7 +1337,7 @@ static void detach_by_driver_cb_drained_begin(BdrvChild *child) { aio_bh_schedule_oneshot(qemu_get_current_aio_context(), detach_indirect_bh, &detach_by_parent_data); -child_file.drained_begin(child); +child_of_bds.drained_begin(child); } static BdrvChildClass detach_by_driver_cb_class; @@ -1367,7 +1372,7 @@ static void test_detach_indirect(bool by_parent_cb) QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, NULL, 0); if (!by_parent_cb) { -detach_by_driver_cb_class = child_file; +detach_by_driver_cb_class = child_of_bds; detach_by_driver_cb_class.drained_begin = detach_by_driver_cb_drained_begin; } @@ -1397,15 +1402,15 @@ static void test_detach_indirect(bool by_parent_cb) /* Set child relationships */ bdrv_ref(b); bdrv_ref(a); -child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_file, 0, -&error_abort); +child_b = bdrv_attach_child(parent_b, b, "PB-B", &child_of_bds, +BDRV_CHILD_DATA, &error_abort); child_a = bdrv_attach_child(parent_b, a, "PB-A", &child_of_bds, BDRV_CHILD_COW, &error_abort); bdrv_ref(a); bdrv_attach_child(parent_a, a, "PA-A", - by_parent_cb ? &child_file : &detach_by_driver_cb_class, - 0, &error_abort); + by_parent_cb ? &child_of_bds : &detach_by_driver_cb_class, + BDRV_CHILD_DATA, &error_abort); g_assert_cmpint(parent_a->refcnt, ==, 1); g_assert_cmpint(parent_b->refcnt, ==, 1); diff --git a/tests/test-bdrv-graph-mod.c b/tests/test-bdrv-graph-mod.c index 3707e2533c..6ae91ff171 100644 --- a/tests/test-bdrv-graph-mod.c +++ b/tests/test-bdrv-graph-mod.c @@ -112,7 +112,8 @@ static void test_update_perm_tree(void) blk_insert_bs(root, bs, &error_abort); -bdrv_attach_child(filter, bs, "child", &child_file, 0, &error_abort); +bdrv_attach_child(filter, bs, "child", &child_of_bds, +
[PATCH v3 25/33] block: Make filter drivers use child_of_bds
Note that some filters have secondary children, namely blkverify (the image to be verified) and blklogwrites (the log). This patch does not touch those children. Note that for blkverify, the filtered child should not be format-probed. While there is nothing enforcing this here, in practice, it will not be: blkverify implements .bdrv_file_open. The block layer ensures (and in fact, asserts) that BDRV_O_PROTOCOL is set for every BDS whose driver implements .bdrv_file_open. This flag will then be bequeathed to blkverify's children, and they will thus (by default) not be probed either. ("By default" refers to the fact that blkverify's other child (the non-filtered one) will have BDRV_O_PROTOCOL force-unset, because that is what happens for all non-filtered children of non-format drivers.) Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block/blkdebug.c| 4 +++- block/blklogwrites.c| 3 ++- block/blkverify.c | 4 +++- block/copy-on-read.c| 5 +++-- block/filter-compress.c | 5 +++-- block/replication.c | 3 ++- block/throttle.c| 5 +++-- 7 files changed, 19 insertions(+), 10 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index 8dd8ed6055..b31fa40b0e 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -497,7 +497,9 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, /* Open the image file */ bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image", - bs, &child_file, 0, false, &local_err); + bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + false, &local_err); if (local_err) { ret = -EINVAL; error_propagate(errp, local_err); diff --git a/block/blklogwrites.c b/block/blklogwrites.c index 4faf912ef1..78b0c49460 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -157,7 +157,8 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, } /* Open the file */ -bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 0, false, +bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, false, &local_err); if (local_err) { ret = -EINVAL; diff --git a/block/blkverify.c b/block/blkverify.c index 1684b7aa2e..5c3b29244a 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -125,7 +125,9 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, /* Open the raw file */ bs->file = bdrv_open_child(qemu_opt_get(opts, "x-raw"), options, "raw", - bs, &child_file, 0, false, &local_err); + bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + false, &local_err); if (local_err) { ret = -EINVAL; error_propagate(errp, local_err); diff --git a/block/copy-on-read.c b/block/copy-on-read.c index a2d92ac394..c857ea0da7 100644 --- a/block/copy-on-read.c +++ b/block/copy-on-read.c @@ -28,8 +28,9 @@ static int cor_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { -bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 0, false, - errp); +bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + false, errp); if (!bs->file) { return -EINVAL; } diff --git a/block/filter-compress.c b/block/filter-compress.c index 4dc5f9fb8c..9edd937645 100644 --- a/block/filter-compress.c +++ b/block/filter-compress.c @@ -30,8 +30,9 @@ static int compress_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { -bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 0, false, - errp); +bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + false, errp); if (!bs->file) { return -EINVAL; } diff --git a/block/replication.c b/block/replication.c index 9ca5c9368e..ec512ae1c3 100644 --- a/block/replication.c +++ b/block/replication.c @@ -90,7 +90,8 @@ static int replication_open(BlockDriverState *bs, QDict *options, const char *mode; const char *top_id; -bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 0, +bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, false, errp); if (!bs->file) {
[PATCH v3 28/33] block: Use bdrv_default_perms()
bdrv_default_perms() can decide which permission profile to use based on the BdrvChildRole, so block drivers do not need to select it explicitly. The blkverify driver now no longer shares the WRITE permission for the image to verify. We thus have to adjust two places in test-block-iothread not to take it. (Note that in theory, blkverify should behave like quorum in this regard and share neither WRITE nor RESIZE for both of its children. In practice, it does not really matter, because blkverify is used only for debugging, so we might as well keep its permissions rather liberal.) Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block/backup-top.c | 4 ++-- block/blkdebug.c| 4 ++-- block/blklogwrites.c| 9 ++--- block/blkreplay.c | 2 +- block/blkverify.c | 2 +- block/bochs.c | 2 +- block/cloop.c | 2 +- block/crypto.c | 2 +- block/dmg.c | 2 +- block/filter-compress.c | 2 +- block/parallels.c | 2 +- block/qcow.c| 2 +- block/qcow2.c | 2 +- block/qed.c | 2 +- block/raw-format.c | 2 +- block/throttle.c| 2 +- block/vdi.c | 2 +- block/vhdx.c| 2 +- block/vmdk.c| 2 +- block/vpc.c | 2 +- tests/test-bdrv-drain.c | 10 +- tests/test-bdrv-graph-mod.c | 2 +- tests/test-block-iothread.c | 17 ++--- 23 files changed, 43 insertions(+), 37 deletions(-) diff --git a/block/backup-top.c b/block/backup-top.c index 3d6f6530a2..00dbad44b5 100644 --- a/block/backup-top.c +++ b/block/backup-top.c @@ -153,8 +153,8 @@ static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c, *nperm = BLK_PERM_WRITE; } else { /* Source child */ -bdrv_filter_default_perms(bs, c, child_class, role, reopen_queue, - perm, shared, nperm, nshared); +bdrv_default_perms(bs, c, child_class, role, reopen_queue, + perm, shared, nperm, nshared); if (perm & BLK_PERM_WRITE) { *nperm = *nperm | BLK_PERM_CONSISTENT_READ; diff --git a/block/blkdebug.c b/block/blkdebug.c index b31fa40b0e..a925d8295e 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -1003,8 +1003,8 @@ static void blkdebug_child_perm(BlockDriverState *bs, BdrvChild *c, { BDRVBlkdebugState *s = bs->opaque; -bdrv_filter_default_perms(bs, c, child_class, role, reopen_queue, - perm, shared, nperm, nshared); +bdrv_default_perms(bs, c, child_class, role, reopen_queue, + perm, shared, nperm, nshared); *nperm |= s->take_child_perms; *nshared &= ~s->unshare_child_perms; diff --git a/block/blklogwrites.c b/block/blklogwrites.c index 3a57b273fc..8684fb1c74 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -295,13 +295,8 @@ static void blk_log_writes_child_perm(BlockDriverState *bs, BdrvChild *c, return; } -if (!strcmp(c->name, "log")) { -bdrv_format_default_perms(bs, c, child_class, role, ro_q, perm, shrd, - nperm, nshrd); -} else { -bdrv_filter_default_perms(bs, c, child_class, role, ro_q, perm, shrd, - nperm, nshrd); -} +bdrv_default_perms(bs, c, child_class, role, ro_q, perm, shrd, + nperm, nshrd); } static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp) diff --git a/block/blkreplay.c b/block/blkreplay.c index 71628f4d56..be8cdb6b60 100644 --- a/block/blkreplay.c +++ b/block/blkreplay.c @@ -138,7 +138,7 @@ static BlockDriver bdrv_blkreplay = { .instance_size = 0, .bdrv_open = blkreplay_open, -.bdrv_child_perm= bdrv_filter_default_perms, +.bdrv_child_perm= bdrv_default_perms, .bdrv_getlength = blkreplay_getlength, .bdrv_co_preadv = blkreplay_co_preadv, diff --git a/block/blkverify.c b/block/blkverify.c index 5c3b29244a..2f261de24b 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -319,7 +319,7 @@ static BlockDriver bdrv_blkverify = { .bdrv_parse_filename = blkverify_parse_filename, .bdrv_file_open = blkverify_open, .bdrv_close = blkverify_close, -.bdrv_child_perm = bdrv_filter_default_perms, +.bdrv_child_perm = bdrv_default_perms, .bdrv_getlength = blkverify_getlength, .bdrv_refresh_filename= blkverify_refresh_filename, .bdrv_dirname = blkverify_dirname, diff --git a/block/bochs.c b/block/bochs.c index 62c3f42548..2f010ab40a 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -297,7 +297,7 @@ static BlockDriver bdrv_bochs = { .insta
[PATCH v3 31/33] block: Drop child_file
Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block.c | 30 +- include/block/block_int.h | 1 - tests/test-bdrv-drain.c | 8 +++- 3 files changed, 4 insertions(+), 35 deletions(-) diff --git a/block.c b/block.c index 16eb790ea6..986f99415c 100644 --- a/block.c +++ b/block.c @@ -1121,33 +1121,6 @@ const BdrvChildClass child_of_bds = { .update_filename = bdrv_child_cb_update_filename, }; -/* - * Returns the options and flags that bs->file should get if a protocol driver - * is expected, based on the given options and flags for the parent BDS - */ -static void bdrv_protocol_options(BdrvChildRole role, bool parent_is_format, - int *child_flags, QDict *child_options, - int parent_flags, QDict *parent_options) -{ -bdrv_inherited_options(BDRV_CHILD_IMAGE, true, - child_flags, child_options, - parent_flags, parent_options); -} - -const BdrvChildClass child_file = { -.parent_is_bds = true, -.get_parent_desc = bdrv_child_get_parent_desc, -.inherit_options = bdrv_protocol_options, -.drained_begin = bdrv_child_cb_drained_begin, -.drained_poll= bdrv_child_cb_drained_poll, -.drained_end = bdrv_child_cb_drained_end, -.attach = bdrv_child_cb_attach, -.detach = bdrv_child_cb_detach, -.inactivate = bdrv_child_cb_inactivate, -.can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, -.set_aio_ctx = bdrv_child_cb_set_aio_ctx, -}; - static void bdrv_backing_attach(BdrvChild *c) { BlockDriverState *parent = c->opaque; @@ -2278,8 +2251,7 @@ static void bdrv_default_perms_for_metadata(BlockDriverState *bs, BdrvChild *c, { int flags; -assert(child_class == &child_file || - (child_class == &child_of_bds && (role & BDRV_CHILD_METADATA))); +assert(child_class == &child_of_bds && (role & BDRV_CHILD_METADATA)); flags = bdrv_reopen_get_flags(reopen_queue, bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index 28a35cd5d1..1609e40b58 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -741,7 +741,6 @@ struct BdrvChildClass { }; extern const BdrvChildClass child_of_bds; -extern const BdrvChildClass child_file; struct BdrvChild { BlockDriverState *bs; diff --git a/tests/test-bdrv-drain.c b/tests/test-bdrv-drain.c index 0da5a3a6a1..655fd0d085 100644 --- a/tests/test-bdrv-drain.c +++ b/tests/test-bdrv-drain.c @@ -93,12 +93,10 @@ static void bdrv_test_child_perm(BlockDriverState *bs, BdrvChild *c, uint64_t *nperm, uint64_t *nshared) { /* - * bdrv_default_perms() accepts only these two, so disguise - * detach_by_driver_cb_parent as one of them. + * bdrv_default_perms() accepts nothing else, so disguise + * detach_by_driver_cb_parent. */ -if (child_class != &child_file && child_class != &child_of_bds) { -child_class = &child_of_bds; -} +child_class = &child_of_bds; bdrv_default_perms(bs, c, child_class, role, reopen_queue, perm, shared, nperm, nshared); -- 2.24.1
[PATCH v3 14/33] block: Distinguish paths in *_format_default_perms
bdrv_format_default_perms() has one code path for backing files, and one for storage files. We want to pull them out into own functions, so make sure they are completely distinct before so the next patches will be a bit cleaner. Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block.c | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index 1d33f58ff8..982785b15a 100644 --- a/block.c +++ b/block.c @@ -2331,6 +2331,13 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, perm |= BLK_PERM_CONSISTENT_READ; } shared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE); + +if (bs->open_flags & BDRV_O_INACTIVE) { +shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; +} + +*nperm = perm; +*nshared = shared; } else { /* We want consistent read from backing files if the parent needs it. * No other operations are performed on backing files. */ @@ -2347,14 +2354,14 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD | BLK_PERM_WRITE_UNCHANGED; -} -if (bs->open_flags & BDRV_O_INACTIVE) { -shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; -} +if (bs->open_flags & BDRV_O_INACTIVE) { +shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; +} -*nperm = perm; -*nshared = shared; +*nperm = perm; +*nshared = shared; +} } uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) -- 2.24.1
[PATCH v3 15/33] block: Pull out bdrv_default_perms_for_backing()
Right now, bdrv_format_default_perms() is used by format parents (generally). We want to switch to a model where most parents use a single BdrvChildClass, which then decides the permissions based on the child role. To do so, we have to split bdrv_format_default_perms() into separate functions for each such role. Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block.c | 62 + 1 file changed, 40 insertions(+), 22 deletions(-) diff --git a/block.c b/block.c index 982785b15a..8b97412cbc 100644 --- a/block.c +++ b/block.c @@ -2302,6 +2302,44 @@ void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, *nshared = (shared & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; } +static void bdrv_default_perms_for_backing(BlockDriverState *bs, BdrvChild *c, + const BdrvChildClass *child_class, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ +assert(child_class == &child_backing || + (child_class == &child_of_bds && (role & BDRV_CHILD_COW))); + +/* + * We want consistent read from backing files if the parent needs it. + * No other operations are performed on backing files. + */ +perm &= BLK_PERM_CONSISTENT_READ; + +/* + * If the parent can deal with changing data, we're okay with a + * writable and resizable backing file. + * TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? + */ +if (shared & BLK_PERM_WRITE) { +shared = BLK_PERM_WRITE | BLK_PERM_RESIZE; +} else { +shared = 0; +} + +shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD | + BLK_PERM_WRITE_UNCHANGED; + +if (bs->open_flags & BDRV_O_INACTIVE) { +shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; +} + +*nperm = perm; +*nshared = shared; +} + void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildClass *child_class, BdrvChildRole role, @@ -2339,28 +2377,8 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, *nperm = perm; *nshared = shared; } else { -/* We want consistent read from backing files if the parent needs it. - * No other operations are performed on backing files. */ -perm &= BLK_PERM_CONSISTENT_READ; - -/* If the parent can deal with changing data, we're okay with a - * writable and resizable backing file. */ -/* TODO Require !(perm & BLK_PERM_CONSISTENT_READ), too? */ -if (shared & BLK_PERM_WRITE) { -shared = BLK_PERM_WRITE | BLK_PERM_RESIZE; -} else { -shared = 0; -} - -shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD | - BLK_PERM_WRITE_UNCHANGED; - -if (bs->open_flags & BDRV_O_INACTIVE) { -shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; -} - -*nperm = perm; -*nshared = shared; +bdrv_default_perms_for_backing(bs, c, child_class, role, reopen_queue, + perm, shared, nperm, nshared); } } -- 2.24.1
[PATCH v3 17/33] block: Split bdrv_default_perms_for_storage()
We can be less restrictive about pure data children than those with metadata on them. For bdrv_format_default_perms(), we keep the safe option of bdrv_default_perms_for_metadata() (until we drop bdrv_format_default_perms() altogether). That means that bdrv_default_perms_for_data() is unused at this point. We will use it for all children that have the DATA role, but not the METADATA role. So far, no child has any role, so we do not use it, but that will change. Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block.c | 53 +++-- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/block.c b/block.c index 64b5635122..c0ba274743 100644 --- a/block.c +++ b/block.c @@ -2340,18 +2340,17 @@ static void bdrv_default_perms_for_backing(BlockDriverState *bs, BdrvChild *c, *nshared = shared; } -static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c, - const BdrvChildClass *child_class, - BdrvChildRole role, - BlockReopenQueue *reopen_queue, - uint64_t perm, uint64_t shared, - uint64_t *nperm, uint64_t *nshared) +static void bdrv_default_perms_for_metadata(BlockDriverState *bs, BdrvChild *c, +const BdrvChildClass *child_class, +BdrvChildRole role, +BlockReopenQueue *reopen_queue, +uint64_t perm, uint64_t shared, +uint64_t *nperm, uint64_t *nshared) { int flags; assert(child_class == &child_file || - (child_class == &child_of_bds && -(role & (BDRV_CHILD_METADATA | BDRV_CHILD_DATA; + (child_class == &child_of_bds && (role & BDRV_CHILD_METADATA))); flags = bdrv_reopen_get_flags(reopen_queue, bs); @@ -2384,6 +2383,40 @@ static void bdrv_default_perms_for_storage(BlockDriverState *bs, BdrvChild *c, *nshared = shared; } +/* TODO: Use */ +static void __attribute__((unused)) +bdrv_default_perms_for_data(BlockDriverState *bs, BdrvChild *c, +const BdrvChildClass *child_class, +BdrvChildRole role, +BlockReopenQueue *reopen_queue, +uint64_t perm, uint64_t shared, +uint64_t *nperm, uint64_t *nshared) +{ +assert(child_class == &child_of_bds && (role & BDRV_CHILD_DATA)); + +/* + * Apart from the modifications below, the same permissions are + * forwarded and left alone as for filters + */ +bdrv_filter_default_perms(bs, c, child_class, role, reopen_queue, + perm, shared, &perm, &shared); + +/* + * We cannot allow other users to resize the file because the + * format driver might have some assumptions about the size + * (e.g. because it is stored in metadata, or because the file is + * split into fixed-size data files). + */ +shared &= ~BLK_PERM_RESIZE; + +if (bs->open_flags & BDRV_O_INACTIVE) { +shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE; +} + +*nperm = perm; +*nshared = shared; +} + void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildClass *child_class, BdrvChildRole role, @@ -2395,8 +2428,8 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, assert(child_class == &child_backing || child_class == &child_file); if (!backing) { -bdrv_default_perms_for_storage(bs, c, child_class, role, reopen_queue, - perm, shared, nperm, nshared); +bdrv_default_perms_for_metadata(bs, c, child_class, role, reopen_queue, +perm, shared, nperm, nshared); } else { bdrv_default_perms_for_backing(bs, c, child_class, role, reopen_queue, perm, shared, nperm, nshared); -- 2.24.1
[PATCH v3 24/33] block: Make format drivers use child_of_bds
Commonly, they need to pass the BDRV_CHILD_IMAGE set as the BdrvChildRole; but there are exceptions for drivers with external data files (qcow2 and vmdk). Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block/bochs.c | 4 ++-- block/cloop.c | 4 ++-- block/crypto.c| 4 ++-- block/dmg.c | 4 ++-- block/parallels.c | 4 ++-- block/qcow.c | 4 ++-- block/qcow2.c | 19 +-- block/qed.c | 4 ++-- block/vdi.c | 4 ++-- block/vhdx.c | 4 ++-- block/vmdk.c | 20 +--- block/vpc.c | 4 ++-- 12 files changed, 50 insertions(+), 29 deletions(-) diff --git a/block/bochs.c b/block/bochs.c index b013e73063..62c3f42548 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -110,8 +110,8 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags, return ret; } -bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 0, - false, errp); +bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); if (!bs->file) { return -EINVAL; } diff --git a/block/cloop.c b/block/cloop.c index 3ed9fa63cc..d374a8427d 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -71,8 +71,8 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags, return ret; } -bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 0, - false, errp); +bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); if (!bs->file) { return -EINVAL; } diff --git a/block/crypto.c b/block/crypto.c index 4da74a7737..2d85d9e70a 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -200,8 +200,8 @@ static int block_crypto_open_generic(QCryptoBlockFormat format, unsigned int cflags = 0; QDict *cryptoopts = NULL; -bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 0, - false, errp); +bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); if (!bs->file) { return -EINVAL; } diff --git a/block/dmg.c b/block/dmg.c index af8188638c..bc64194577 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -439,8 +439,8 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags, return ret; } -bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 0, - false, errp); +bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); if (!bs->file) { return -EINVAL; } diff --git a/block/parallels.c b/block/parallels.c index 3d5b3b7c63..7dc9a1fa76 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -728,8 +728,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, Error *local_err = NULL; char *buf; -bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 0, - false, errp); +bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); if (!bs->file) { return -EINVAL; } diff --git a/block/qcow.c b/block/qcow.c index 2bf8e8eb36..bc7f7c1054 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -130,8 +130,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, qdict_extract_subqdict(options, &encryptopts, "encrypt."); encryptfmt = qdict_get_try_str(encryptopts, "format"); -bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 0, - false, errp); +bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_IMAGE, false, errp); if (!bs->file) { ret = -EINVAL; goto fail; diff --git a/block/qcow2.c b/block/qcow2.c index 2ce974df4d..7e1532cc5e 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1537,8 +1537,9 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, } /* Open external data file */ -s->data_file = bdrv_open_child(NULL, options, "data-file", bs, &child_file, - 0, true, &local_err); +s->data_file = bdrv_open_child(NULL, options, "data-file", bs, + &child_of_bds, BDRV_CHILD_DATA, + true, &local_err); if (local_err) { error_propagate(errp, local_err); ret = -EINVAL; @@ -1548,8 +1549,8 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, if (s->incompatible_features & QCOW2_INCOMPAT_DATA_FILE) { if (!s->data_file &
[PATCH v3 23/33] block: Drop child_backing
Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block.c | 62 +++ include/block/block_int.h | 1 - 2 files changed, 4 insertions(+), 59 deletions(-) diff --git a/block.c b/block.c index 31affcb4ee..1f573f3815 100644 --- a/block.c +++ b/block.c @@ -1191,15 +1191,6 @@ static void bdrv_backing_attach(BdrvChild *c) parent->backing_blocker); } -/* XXX: Will be removed along with child_backing */ -static void bdrv_child_cb_attach_backing(BdrvChild *c) -{ -if (!(c->role & BDRV_CHILD_COW)) { -bdrv_backing_attach(c); -} -bdrv_child_cb_attach(c); -} - static void bdrv_backing_detach(BdrvChild *c) { BlockDriverState *parent = c->opaque; @@ -1210,28 +1201,6 @@ static void bdrv_backing_detach(BdrvChild *c) parent->backing_blocker = NULL; } -/* XXX: Will be removed along with child_backing */ -static void bdrv_child_cb_detach_backing(BdrvChild *c) -{ -if (!(c->role & BDRV_CHILD_COW)) { -bdrv_backing_detach(c); -} -bdrv_child_cb_detach(c); -} - -/* - * Returns the options and flags that bs->backing should get, based on the - * given options and flags for the parent BDS - */ -static void bdrv_backing_options(BdrvChildRole role, bool parent_is_format, - int *child_flags, QDict *child_options, - int parent_flags, QDict *parent_options) -{ -bdrv_inherited_options(BDRV_CHILD_COW, true, - child_flags, child_options, - parent_flags, parent_options); -} - static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, const char *filename, Error **errp) { @@ -1259,21 +1228,6 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, return ret; } -const BdrvChildClass child_backing = { -.parent_is_bds = true, -.get_parent_desc = bdrv_child_get_parent_desc, -.attach = bdrv_child_cb_attach_backing, -.detach = bdrv_child_cb_detach_backing, -.inherit_options = bdrv_backing_options, -.drained_begin = bdrv_child_cb_drained_begin, -.drained_poll= bdrv_child_cb_drained_poll, -.drained_end = bdrv_child_cb_drained_end, -.inactivate = bdrv_child_cb_inactivate, -.update_filename = bdrv_backing_update_filename, -.can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, -.set_aio_ctx = bdrv_child_cb_set_aio_ctx, -}; - static int bdrv_open_flags(BlockDriverState *bs, int flags) { int open_flags = flags; @@ -2280,8 +2234,7 @@ static void bdrv_default_perms_for_backing(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { -assert(child_class == &child_backing || - (child_class == &child_of_bds && (role & BDRV_CHILD_COW))); +assert(child_class == &child_of_bds && (role & BDRV_CHILD_COW)); /* * We want consistent read from backing files if the parent needs it. @@ -2393,23 +2346,16 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { -bool backing = (child_class == &child_backing); - if (child_class == &child_of_bds) { bdrv_default_perms(bs, c, child_class, role, reopen_queue, perm, shared, nperm, nshared); return; } -assert(child_class == &child_backing || child_class == &child_file); +assert(child_class == &child_file); -if (!backing) { -bdrv_default_perms_for_metadata(bs, c, child_class, role, reopen_queue, -perm, shared, nperm, nshared); -} else { -bdrv_default_perms_for_backing(bs, c, child_class, role, reopen_queue, - perm, shared, nperm, nshared); -} +bdrv_default_perms_for_metadata(bs, c, child_class, role, reopen_queue, +perm, shared, nperm, nshared); } void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, diff --git a/include/block/block_int.h b/include/block/block_int.h index 75139a95ae..ab64a5ccea 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -742,7 +742,6 @@ struct BdrvChildClass { extern const BdrvChildClass child_of_bds; extern const BdrvChildClass child_file; -extern const BdrvChildClass child_backing; struct BdrvChild { BlockDriverState *bs; -- 2.24.1
[PATCH v3 13/33] block: Add child_of_bds
Any current user of child_file, child_format, and child_backing can and should use this generic BdrvChildClass instead, as it can handle all of these cases. However, to be able to do so, the users must pass the appropriate BdrvChildRole when the child is created/attached. (The following commits will take care of that.) Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block.c | 27 +++ include/block/block_int.h | 1 + 2 files changed, 28 insertions(+) diff --git a/block.c b/block.c index 0f24546863..1d33f58ff8 100644 --- a/block.c +++ b/block.c @@ -1094,6 +1094,33 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, *child_flags = flags; } +static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, +const char *filename, Error **errp); + +static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base, + const char *filename, Error **errp) +{ +if (c->role & BDRV_CHILD_COW) { +return bdrv_backing_update_filename(c, base, filename, errp); +} +return 0; +} + +const BdrvChildClass child_of_bds = { +.parent_is_bds = true, +.get_parent_desc = bdrv_child_get_parent_desc, +.inherit_options = bdrv_inherited_options, +.drained_begin = bdrv_child_cb_drained_begin, +.drained_poll= bdrv_child_cb_drained_poll, +.drained_end = bdrv_child_cb_drained_end, +.attach = bdrv_child_cb_attach, +.detach = bdrv_child_cb_detach, +.inactivate = bdrv_child_cb_inactivate, +.can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx, +.set_aio_ctx = bdrv_child_cb_set_aio_ctx, +.update_filename = bdrv_child_cb_update_filename, +}; + /* * Returns the options and flags that bs->file should get if a protocol driver * is expected, based on the given options and flags for the parent BDS diff --git a/include/block/block_int.h b/include/block/block_int.h index 1f8a818f76..dd7ccea35e 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -740,6 +740,7 @@ struct BdrvChildClass { void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore); }; +extern const BdrvChildClass child_of_bds; extern const BdrvChildClass child_file; extern const BdrvChildClass child_format; extern const BdrvChildClass child_backing; -- 2.24.1
[PATCH v3 30/33] block: Drop bdrv_format_default_perms()
Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block.c | 19 --- include/block/block_int.h | 11 --- 2 files changed, 30 deletions(-) diff --git a/block.c b/block.c index a7c2ad6c5b..16eb790ea6 100644 --- a/block.c +++ b/block.c @@ -2344,25 +2344,6 @@ static void bdrv_default_perms_for_data(BlockDriverState *bs, BdrvChild *c, *nshared = shared; } -void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, - const BdrvChildClass *child_class, - BdrvChildRole role, - BlockReopenQueue *reopen_queue, - uint64_t perm, uint64_t shared, - uint64_t *nperm, uint64_t *nshared) -{ -if (child_class == &child_of_bds) { -bdrv_default_perms(bs, c, child_class, role, reopen_queue, - perm, shared, nperm, nshared); -return; -} - -assert(child_class == &child_file); - -bdrv_default_perms_for_metadata(bs, c, child_class, role, reopen_queue, -perm, shared, nperm, nshared); -} - void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildClass *child_class, BdrvChildRole role, BlockReopenQueue *reopen_queue, diff --git a/include/block/block_int.h b/include/block/block_int.h index c2fb6b8899..28a35cd5d1 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -1253,17 +1253,6 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, */ int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp); -/* Default implementation for BlockDriver.bdrv_child_perm() that can be used by - * (non-raw) image formats: Like above for bs->backing, but for bs->file it - * requires WRITE | RESIZE for read-write images, always requires - * CONSISTENT_READ and doesn't share WRITE. */ -void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, - const BdrvChildClass *child_class, - BdrvChildRole child_role, - BlockReopenQueue *reopen_queue, - uint64_t perm, uint64_t shared, - uint64_t *nperm, uint64_t *nshared); - bool bdrv_recurse_can_replace(BlockDriverState *bs, BlockDriverState *to_replace); -- 2.24.1
[PATCH v3 33/33] block: Drop @child_class from bdrv_child_perm()
Implementations should decide the necessary permissions based on @role. Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block.c | 45 - block/backup-top.c | 3 +-- block/blkdebug.c| 3 +-- block/blklogwrites.c| 3 +-- block/commit.c | 1 - block/copy-on-read.c| 1 - block/mirror.c | 1 - block/quorum.c | 1 - block/replication.c | 1 - block/vvfat.c | 4 +--- include/block/block_int.h | 4 +--- tests/test-bdrv-drain.c | 19 +--- tests/test-bdrv-graph-mod.c | 1 - 13 files changed, 25 insertions(+), 62 deletions(-) diff --git a/block.c b/block.c index 986f99415c..00c491dd91 100644 --- a/block.c +++ b/block.c @@ -1788,13 +1788,13 @@ bool bdrv_is_writable(BlockDriverState *bs) } static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, -BdrvChild *c, const BdrvChildClass *child_class, -BdrvChildRole role, BlockReopenQueue *reopen_queue, +BdrvChild *c, BdrvChildRole role, +BlockReopenQueue *reopen_queue, uint64_t parent_perm, uint64_t parent_shared, uint64_t *nperm, uint64_t *nshared) { assert(bs->drv && bs->drv->bdrv_child_perm); -bs->drv->bdrv_child_perm(bs, c, child_class, role, reopen_queue, +bs->drv->bdrv_child_perm(bs, c, role, reopen_queue, parent_perm, parent_shared, nperm, nshared); /* TODO Take force_share from reopen_queue */ @@ -1888,7 +1888,7 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, uint64_t cur_perm, cur_shared; bool child_tighten_restr; -bdrv_child_perm(bs, c->bs, c, c->klass, c->role, q, +bdrv_child_perm(bs, c->bs, c, c->role, q, cumulative_perms, cumulative_shared_perms, &cur_perm, &cur_shared); ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, ignore_children, @@ -1955,7 +1955,7 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, /* Update all children */ QLIST_FOREACH(c, &bs->children, next) { uint64_t cur_perm, cur_shared; -bdrv_child_perm(bs, c->bs, c, c->klass, c->role, NULL, +bdrv_child_perm(bs, c->bs, c, c->role, NULL, cumulative_perms, cumulative_shared_perms, &cur_perm, &cur_shared); bdrv_child_set_perm(c, cur_perm, cur_shared); @@ -2183,7 +2183,7 @@ int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) uint64_t perms, shared; bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared); -bdrv_child_perm(bs, c->bs, c, c->klass, c->role, NULL, +bdrv_child_perm(bs, c->bs, c, c->role, NULL, parent_perms, parent_shared, &perms, &shared); return bdrv_child_try_set_perm(c, perms, shared, errp); @@ -2195,7 +2195,6 @@ int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) * filtered child. */ static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, - const BdrvChildClass *child_class, BdrvChildRole role, BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, @@ -2206,13 +2205,12 @@ static void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, } static void bdrv_default_perms_for_backing(BlockDriverState *bs, BdrvChild *c, - const BdrvChildClass *child_class, BdrvChildRole role, BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) { -assert(child_class == &child_of_bds && (role & BDRV_CHILD_COW)); +assert(role & BDRV_CHILD_COW); /* * We want consistent read from backing files if the parent needs it. @@ -2243,7 +2241,6 @@ static void bdrv_default_perms_for_backing(BlockDriverState *bs, BdrvChild *c, } static void bdrv_default_perms_for_metadata(BlockDriverState *bs, BdrvChild *c, -const BdrvChildClass *child_class, BdrvChildRole role, BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, @@ -2251,7 +2248,7 @@ static void bdrv_default_perms_for_metadata(BlockDriverState *bs, BdrvChild *c, { int flags; -assert
[PATCH v3 11/33] block: Unify bdrv_child_cb_attach()
Make bdrv_child_cb_attach() call bdrv_backing_attach() for children with a COW role (and drop the reverse call from bdrv_backing_attach()), so it can be used for any child (with a proper role set). Because so far no child has a proper role set, we need a temporary new callback for child_backing.attach that ensures bdrv_backing_attach() is called for all COW children that do not have their role set yet. Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block.c | 16 +++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index aa54d40e4f..3cf1293a7b 100644 --- a/block.c +++ b/block.c @@ -942,9 +942,16 @@ static void bdrv_child_cb_drained_end(BdrvChild *child, bdrv_drained_end_no_poll(bs, drained_end_counter); } +static void bdrv_backing_attach(BdrvChild *c); + static void bdrv_child_cb_attach(BdrvChild *child) { BlockDriverState *bs = child->opaque; + +if (child->role & BDRV_CHILD_COW) { +bdrv_backing_attach(child); +} + bdrv_apply_subtree_drain(child, bs); } @@ -1178,7 +1185,14 @@ static void bdrv_backing_attach(BdrvChild *c) parent->backing_blocker); bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET, parent->backing_blocker); +} +/* XXX: Will be removed along with child_backing */ +static void bdrv_child_cb_attach_backing(BdrvChild *c) +{ +if (!(c->role & BDRV_CHILD_COW)) { +bdrv_backing_attach(c); +} bdrv_child_cb_attach(c); } @@ -1237,7 +1251,7 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base, const BdrvChildClass child_backing = { .parent_is_bds = true, .get_parent_desc = bdrv_child_get_parent_desc, -.attach = bdrv_backing_attach, +.attach = bdrv_child_cb_attach_backing, .detach = bdrv_backing_detach, .inherit_options = bdrv_backing_options, .drained_begin = bdrv_child_cb_drained_begin, -- 2.24.1
[PATCH v3 32/33] block: Pass BdrvChildRole in remaining cases
These calls have no real use for the child role yet, but it will not harm to give one. Notably, the bdrv_root_attach_child() call in blockjob.c is left unmodified because there is not much the generic BlockJob object wants from its children. Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block/block-backend.c | 11 +++ block/vvfat.c | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 9e0078bfb5..e655e15675 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -401,8 +401,9 @@ BlockBackend *blk_new_open(const char *filename, const char *reference, return NULL; } -blk->root = bdrv_root_attach_child(bs, "root", &child_root, 0, blk->ctx, - perm, BLK_PERM_ALL, blk, errp); +blk->root = bdrv_root_attach_child(bs, "root", &child_root, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + blk->ctx, perm, BLK_PERM_ALL, blk, errp); if (!blk->root) { blk_unref(blk); return NULL; @@ -812,8 +813,10 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp) { ThrottleGroupMember *tgm = &blk->public.throttle_group_member; bdrv_ref(bs); -blk->root = bdrv_root_attach_child(bs, "root", &child_root, 0, blk->ctx, - blk->perm, blk->shared_perm, blk, errp); +blk->root = bdrv_root_attach_child(bs, "root", &child_root, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + blk->ctx, blk->perm, blk->shared_perm, + blk, errp); if (blk->root == NULL) { return -EPERM; } diff --git a/block/vvfat.c b/block/vvfat.c index 8f4ff5a97e..d4f4218924 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -3186,7 +3186,7 @@ static int enable_write_target(BlockDriverState *bs, Error **errp) options = qdict_new(); qdict_put_str(options, "write-target.driver", "qcow"); s->qcow = bdrv_open_child(s->qcow_filename, options, "write-target", bs, - &child_vvfat_qcow, 0, false, errp); + &child_vvfat_qcow, BDRV_CHILD_DATA, false, errp); qobject_unref(options); if (!s->qcow) { ret = -EINVAL; -- 2.24.1
[PATCH v3 09/33] block: Add generic bdrv_inherited_options()
After the series this patch belongs to, we want to have a common BdrvChildClass that encompasses all of child_file, child_format, and child_backing. Such a single class needs a single .inherit_options() implementation, and this patch introduces it. The next patch will show how the existing implementations can fall back to it just by passing appropriate BdrvChildRole and parent_is_format values. Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block.c | 84 + 1 file changed, 84 insertions(+) diff --git a/block.c b/block.c index c33f0e9b42..9179b9b604 100644 --- a/block.c +++ b/block.c @@ -998,6 +998,90 @@ static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, *child_flags &= ~BDRV_O_NATIVE_AIO; } +/* + * Returns the options and flags that a generic child of a BDS should + * get, based on the given options and flags for the parent BDS. + */ +static void __attribute__((unused)) +bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, + int *child_flags, QDict *child_options, + int parent_flags, QDict *parent_options) +{ +int flags = parent_flags; + +/* + * First, decide whether to set, clear, or leave BDRV_O_PROTOCOL. + * Generally, the question to answer is: Should this child be + * format-probed by default? + */ + +/* + * Pure and non-filtered data children of non-format nodes should + * be probed by default (even when the node itself has BDRV_O_PROTOCOL + * set). This only affects a very limited set of drivers (namely + * quorum and blkverify when this comment was written). + * Force-clear BDRV_O_PROTOCOL then. + */ +if (!parent_is_format && +(role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA | + BDRV_CHILD_FILTERED)) == +BDRV_CHILD_DATA) +{ +flags &= ~BDRV_O_PROTOCOL; +} + +/* + * All children of format nodes (except for COW children) and all + * metadata children in general should never be format-probed. + * Force-set BDRV_O_PROTOCOL then. + */ +if ((parent_is_format && !(role & BDRV_CHILD_COW)) || +(role & BDRV_CHILD_METADATA)) +{ +flags |= BDRV_O_PROTOCOL; +} + +/* + * If the cache mode isn't explicitly set, inherit direct and no-flush from + * the parent. + */ +qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT); +qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH); +qdict_copy_default(child_options, parent_options, BDRV_OPT_FORCE_SHARE); + +if (role & BDRV_CHILD_COW) { +/* backing files are always opened read-only */ +qdict_set_default_str(child_options, BDRV_OPT_READ_ONLY, "on"); +qdict_set_default_str(child_options, BDRV_OPT_AUTO_READ_ONLY, "off"); +} else { +/* Inherit the read-only option from the parent if it's not set */ +qdict_copy_default(child_options, parent_options, BDRV_OPT_READ_ONLY); +qdict_copy_default(child_options, parent_options, + BDRV_OPT_AUTO_READ_ONLY); +} + +if (parent_is_format && !(role & BDRV_CHILD_COW)) { +/* + * Our format drivers take care to send flushes and respect + * unmap policy, so we can default to enable both on lower + * layers regardless of the corresponding parent options. + */ +qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap"); +} + +/* Clear flags that only apply to the top layer */ +flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); + +if (role & BDRV_CHILD_METADATA) { +flags &= ~BDRV_O_NO_IO; +} +if (role & BDRV_CHILD_COW) { +flags &= ~BDRV_O_TEMPORARY; +} + +*child_flags = flags; +} + /* * Returns the options and flags that bs->file should get if a protocol driver * is expected, based on the given options and flags for the parent BDS -- 2.24.1
[PATCH v3 08/33] block: Rename bdrv_inherited_options()
The other two .inherit_options implementations specify exactly for what case they are used in their name, so do it for this one as well. (The actual intention behind this patch is to follow it up with a generic bdrv_inherited_options() that works for all three cases.) Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block.c b/block.c index 706fae355e..c33f0e9b42 100644 --- a/block.c +++ b/block.c @@ -1002,9 +1002,9 @@ static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, * Returns the options and flags that bs->file should get if a protocol driver * is expected, based on the given options and flags for the parent BDS */ -static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, - int *child_flags, QDict *child_options, - int parent_flags, QDict *parent_options) +static void bdrv_protocol_options(BdrvChildRole role, bool parent_is_format, + int *child_flags, QDict *child_options, + int parent_flags, QDict *parent_options) { int flags = parent_flags; @@ -1036,7 +1036,7 @@ static void bdrv_inherited_options(BdrvChildRole role, bool parent_is_format, const BdrvChildClass child_file = { .parent_is_bds = true, .get_parent_desc = bdrv_child_get_parent_desc, -.inherit_options = bdrv_inherited_options, +.inherit_options = bdrv_protocol_options, .drained_begin = bdrv_child_cb_drained_begin, .drained_poll= bdrv_child_cb_drained_poll, .drained_end = bdrv_child_cb_drained_end, -- 2.24.1
[PATCH v3 06/33] block: Pass BdrvChildRole to .inherit_options()
For now, all callers (effectively) pass 0 and no callee evaluates thie value. Later patches will change both. Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block.c | 40 +++ block/block-backend.c | 3 ++- block/vvfat.c | 3 ++- include/block/block_int.h | 3 ++- 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/block.c b/block.c index bfeed6e8d9..9fc865288d 100644 --- a/block.c +++ b/block.c @@ -77,6 +77,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, QDict *options, int flags, BlockDriverState *parent, const BdrvChildClass *child_class, + BdrvChildRole child_role, Error **errp); /* If non-zero, use only whitelisted block drivers */ @@ -1001,7 +1002,8 @@ static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options, * Returns the options and flags that bs->file should get if a protocol driver * is expected, based on the given options and flags for the parent BDS */ -static void bdrv_inherited_options(int *child_flags, QDict *child_options, +static void bdrv_inherited_options(BdrvChildRole role, + int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options) { int flags = parent_flags; @@ -1050,10 +1052,11 @@ const BdrvChildClass child_file = { * (and not only protocols) is permitted for it, based on the given options and * flags for the parent BDS */ -static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options, +static void bdrv_inherited_fmt_options(BdrvChildRole role, + int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options) { -child_file.inherit_options(child_flags, child_options, +child_file.inherit_options(role, child_flags, child_options, parent_flags, parent_options); *child_flags &= ~(BDRV_O_PROTOCOL | BDRV_O_NO_IO); @@ -1134,7 +1137,8 @@ static void bdrv_backing_detach(BdrvChild *c) * Returns the options and flags that bs->backing should get, based on the * given options and flags for the parent BDS */ -static void bdrv_backing_options(int *child_flags, QDict *child_options, +static void bdrv_backing_options(BdrvChildRole role, + int *child_flags, QDict *child_options, int parent_flags, QDict *parent_options) { int flags = parent_flags; @@ -2710,7 +2714,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, } backing_hd = bdrv_open_inherit(backing_filename, reference, options, 0, bs, - &child_backing, errp); + &child_backing, 0, errp); if (!backing_hd) { bs->open_flags |= BDRV_O_NO_BACKING; error_prepend(errp, "Could not open backing file: "); @@ -2745,7 +2749,7 @@ free_exit: static BlockDriverState * bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, BlockDriverState *parent, const BdrvChildClass *child_class, - bool allow_none, Error **errp) + BdrvChildRole child_role, bool allow_none, Error **errp) { BlockDriverState *bs = NULL; QDict *image_options; @@ -2776,7 +2780,7 @@ bdrv_open_child_bs(const char *filename, QDict *options, const char *bdref_key, } bs = bdrv_open_inherit(filename, reference, image_options, 0, - parent, child_class, errp); + parent, child_class, child_role, errp); if (!bs) { goto done; } @@ -2810,7 +2814,7 @@ BdrvChild *bdrv_open_child(const char *filename, BlockDriverState *bs; bs = bdrv_open_child_bs(filename, options, bdref_key, parent, child_class, -allow_none, errp); +child_role, allow_none, errp); if (bs == NULL) { return NULL; } @@ -2859,7 +2863,7 @@ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp) } -bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, errp); +bs = bdrv_open_inherit(NULL, reference, qdict, 0, NULL, NULL, 0, errp); obj = NULL; fail: @@ -2958,6 +2962,7 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, QDict *options, int flags, BlockDriverState *parent, const BdrvChildClass *child_class, + BdrvChildRole child_role,
[PATCH v3 26/33] block: Use child_of_bds in remaining places
Replace child_file by child_of_bds in all remaining places (excluding tests). Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block.c | 3 ++- block/backup-top.c | 4 ++-- block/blklogwrites.c | 4 ++-- block/blkreplay.c| 5 +++-- block/raw-format.c | 15 +-- 5 files changed, 22 insertions(+), 9 deletions(-) diff --git a/block.c b/block.c index 1f573f3815..d3beed1819 100644 --- a/block.c +++ b/block.c @@ -3242,7 +3242,8 @@ static BlockDriverState *bdrv_open_inherit(const char *filename, BlockDriverState *file_bs; file_bs = bdrv_open_child_bs(filename, options, "file", bs, - &child_file, 0, true, &local_err); + &child_of_bds, BDRV_CHILD_IMAGE, + true, &local_err); if (local_err) { goto fail; } diff --git a/block/backup-top.c b/block/backup-top.c index c173e61250..3d6f6530a2 100644 --- a/block/backup-top.c +++ b/block/backup-top.c @@ -210,8 +210,8 @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState *source, source->supported_zero_flags); bdrv_ref(target); -state->target = bdrv_attach_child(top, target, "target", &child_file, 0, - errp); +state->target = bdrv_attach_child(top, target, "target", &child_of_bds, + BDRV_CHILD_DATA, errp); if (!state->target) { bdrv_unref(target); bdrv_unref(top); diff --git a/block/blklogwrites.c b/block/blklogwrites.c index 78b0c49460..3a57b273fc 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -167,8 +167,8 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, } /* Open the log file */ -s->log_file = bdrv_open_child(NULL, options, "log", bs, &child_file, 0, - false, &local_err); +s->log_file = bdrv_open_child(NULL, options, "log", bs, &child_of_bds, + BDRV_CHILD_METADATA, false, &local_err); if (local_err) { ret = -EINVAL; error_propagate(errp, local_err); diff --git a/block/blkreplay.c b/block/blkreplay.c index f97493f45a..71628f4d56 100644 --- a/block/blkreplay.c +++ b/block/blkreplay.c @@ -27,8 +27,9 @@ static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags, int ret; /* Open the image file */ -bs->file = bdrv_open_child(NULL, options, "image", - bs, &child_file, 0, false, &local_err); +bs->file = bdrv_open_child(NULL, options, "image", bs, &child_of_bds, + BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY, + false, &local_err); if (local_err) { ret = -EINVAL; error_propagate(errp, local_err); diff --git a/block/raw-format.c b/block/raw-format.c index 33f5942474..c6470e4622 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -444,6 +444,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, BDRVRawState *s = bs->opaque; bool has_size; uint64_t offset, size; +BdrvChildRole file_role; int ret; ret = raw_read_options(options, &offset, &has_size, &size, errp); @@ -451,8 +452,18 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, return ret; } -bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, 0, - false, errp); +/* + * Without offset and a size limit, this driver behaves very much + * like a filter. With any such limit, it does not. + */ +if (offset || has_size) { +file_role = BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY; +} else { +file_role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY; +} + +bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + file_role, false, errp); if (!bs->file) { return -EINVAL; } -- 2.24.1
[PATCH v3 05/33] block: Pass BdrvChildRole to bdrv_child_perm()
For now, all callers pass 0 and no callee evaluates this value. Later patches will change both. Signed-off-by: Max Reitz Reviewed-by: Eric Blake --- block.c | 22 -- block/backup-top.c | 3 ++- block/blkdebug.c| 5 +++-- block/blklogwrites.c| 9 + block/commit.c | 1 + block/copy-on-read.c| 1 + block/mirror.c | 1 + block/quorum.c | 1 + block/replication.c | 1 + block/vvfat.c | 1 + include/block/block_int.h | 5 - tests/test-bdrv-drain.c | 5 +++-- tests/test-bdrv-graph-mod.c | 1 + 13 files changed, 36 insertions(+), 20 deletions(-) diff --git a/block.c b/block.c index e8ddf1689e..bfeed6e8d9 100644 --- a/block.c +++ b/block.c @@ -1786,12 +1786,12 @@ bool bdrv_is_writable(BlockDriverState *bs) static void bdrv_child_perm(BlockDriverState *bs, BlockDriverState *child_bs, BdrvChild *c, const BdrvChildClass *child_class, -BlockReopenQueue *reopen_queue, +BdrvChildRole role, BlockReopenQueue *reopen_queue, uint64_t parent_perm, uint64_t parent_shared, uint64_t *nperm, uint64_t *nshared) { assert(bs->drv && bs->drv->bdrv_child_perm); -bs->drv->bdrv_child_perm(bs, c, child_class, reopen_queue, +bs->drv->bdrv_child_perm(bs, c, child_class, role, reopen_queue, parent_perm, parent_shared, nperm, nshared); /* TODO Take force_share from reopen_queue */ @@ -1885,7 +1885,7 @@ static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q, uint64_t cur_perm, cur_shared; bool child_tighten_restr; -bdrv_child_perm(bs, c->bs, c, c->klass, q, +bdrv_child_perm(bs, c->bs, c, c->klass, c->role, q, cumulative_perms, cumulative_shared_perms, &cur_perm, &cur_shared); ret = bdrv_child_check_perm(c, q, cur_perm, cur_shared, ignore_children, @@ -1952,7 +1952,7 @@ static void bdrv_set_perm(BlockDriverState *bs, uint64_t cumulative_perms, /* Update all children */ QLIST_FOREACH(c, &bs->children, next) { uint64_t cur_perm, cur_shared; -bdrv_child_perm(bs, c->bs, c, c->klass, NULL, +bdrv_child_perm(bs, c->bs, c, c->klass, c->role, NULL, cumulative_perms, cumulative_shared_perms, &cur_perm, &cur_shared); bdrv_child_set_perm(c, cur_perm, cur_shared); @@ -2180,14 +2180,15 @@ int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp) uint64_t perms, shared; bdrv_get_cumulative_perm(bs, &parent_perms, &parent_shared); -bdrv_child_perm(bs, c->bs, c, c->klass, NULL, parent_perms, parent_shared, -&perms, &shared); +bdrv_child_perm(bs, c->bs, c, c->klass, c->role, NULL, +parent_perms, parent_shared, &perms, &shared); return bdrv_child_try_set_perm(c, perms, shared, errp); } void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildClass *child_class, + BdrvChildRole role, BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) @@ -2198,6 +2199,7 @@ void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildClass *child_class, + BdrvChildRole role, BlockReopenQueue *reopen_queue, uint64_t perm, uint64_t shared, uint64_t *nperm, uint64_t *nshared) @@ -2210,7 +2212,7 @@ void bdrv_format_default_perms(BlockDriverState *bs, BdrvChild *c, /* Apart from the modifications below, the same permissions are * forwarded and left alone as for filters */ -bdrv_filter_default_perms(bs, c, child_class, reopen_queue, +bdrv_filter_default_perms(bs, c, child_class, role, reopen_queue, perm, shared, &perm, &shared); /* Format drivers may touch metadata even if the guest doesn't write */ @@ -2486,7 +2488,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs, bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm); assert(parent_bs->drv); -bdrv_child_perm(parent_bs, child_bs, NULL, child_class, NULL, +bdrv_child_perm(parent_bs, child_bs, NULL, child_class, child_role, NULL, perm, shared_perm, &perm, &shared_perm); child = bdrv_root_attach_child(child_bs, child_name, child_class, @@