[Bug 1623998] Re: pulseaudio Invalid argument error
I tried using "-device usb-audio"and the guest was able to detect the USB audio card, but for some reason no audio would play. I did not see any messages in the terminal about pulseaudio. -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1623998 Title: pulseaudio Invalid argument error Status in QEMU: Incomplete Bug description: When using qemu-system-ppc on Ubuntu Mate 15 with the usb audio card, I see these error messages: pulseaudio: set_sink_input_volume() failed pulseaudio: Reason: Invalid argument pulseaudio: set_sink_input_mute() failed pulseaudio: Reason: Invalid argument No audio plays. When an attempt is made, QEMU seems to freeze for a moment. I use "-device usb-audio" to add the usb sound card. This issue is present in both emulation and KVM mode. To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1623998/+subscriptions
[PATCH] target/i386: log guest name and memory error type AO, AR for MCEs
In a large VPC environment we want to log memory error occurrences and log them with guest name and type - there are few use cases - if VM crashes on AR mce inform the user about the reason and resolve the case - if VM hangs notify the user to reboot and resume processing - if VM continues to run let the user know, he/she maybe able to correlate to vm internal outage - Rawhammer attacks - isolate/determine the attacker possible migrating it off the hypervisor - In general track memory errors on a hyperviosr over time to determine trends Monitoring our fleet we come across quite a few of these and been able to take action where before there were no clues to the causes. When memory error occurs we get a log entry in qemu log: Guest [Droplet-12345678] 2019-08-02T05:00:11.940270Z qemu-system-x86_64: Guest MCE Memory Error at qemu addr 0x7f3c7622f000 and guest 78e42f000 addr of type BUS_MCEERR_AR injected with enterprise logging environment we can to take further actions. Signed-off-by: Mario Smarduch --- target/i386/kvm.c | 27 ++- util/qemu-error.c | 24 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 92069099ab..79ebccc684 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -555,9 +555,9 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code) (MCM_ADDR_PHYS << 6) | 0xc, flags); } -static void hardware_memory_error(void) +static void hardware_memory_error(void *addr) { -fprintf(stderr, "Hardware memory error!\n"); +error_report("QEMU got Hardware memory error at addr %p", addr); exit(1); } @@ -581,15 +581,32 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) kvm_physical_memory_addr_from_host(c->kvm_state, addr, )) { kvm_hwpoison_page_add(ram_addr); kvm_mce_inject(cpu, paddr, code); +/* + * Use different logging severity based on error type. + * If mcelog is running qemu va addr will help debug via mcelog. + */ +if (code == BUS_MCEERR_AR) { +error_report("Guest MCE Memory Error at qemu addr %p and " +"guest %lx addr of type %s injected", addr, paddr, + "BUS_MCEERR_AR"); +} else { + warn_report("Guest MCE Memory Error at qemu addr %p and " + "guest %lx addr of type %s injected", addr, + paddr, "BUS_MCEERR_AO"); +} + return; } -fprintf(stderr, "Hardware memory error for memory used by " -"QEMU itself instead of guest system!\n"); +if (code == BUS_MCEERR_AO) { +warn_report("Hardware memory error at addr %p of type %s " +"for memory used by QEMU itself instead of guest system!", +addr, "BUS_MCEERR_AO"); +} } if (code == BUS_MCEERR_AR) { -hardware_memory_error(); +hardware_memory_error(addr); } /* Hope we are lucky for AO MCE */ diff --git a/util/qemu-error.c b/util/qemu-error.c index f373f3b3b0..2ebafd4405 100644 --- a/util/qemu-error.c +++ b/util/qemu-error.c @@ -11,6 +11,8 @@ */ #include "qemu/osdep.h" +#include "qemu/option.h" +#include "qemu/config-file.h" #include "monitor/monitor.h" #include "qemu/error-report.h" @@ -35,11 +37,31 @@ int error_printf(const char *fmt, ...) return ret; } +static const char *error_get_guestname(void) +{ +QemuOpts *opts = qemu_opts_find(qemu_find_opts("name"), NULL); +return qemu_opt_get(opts, "guest"); +} + +/* + * Print guest name associated with error, to aid debugging errors from + * multiple guests in centralized logging environment. + */ +static void error_print_guestname(void) +{ +const char *name; +name = error_get_guestname(); +if (name != NULL && !cur_mon) { +error_printf("Guest [%s] ", name); +} +} + int error_printf_unless_qmp(const char *fmt, ...) { va_list ap; int ret; +error_print_guestname(); va_start(ap, fmt); ret = error_vprintf_unless_qmp(fmt, ap); va_end(ap); @@ -274,6 +296,7 @@ void error_report(const char *fmt, ...) { va_list ap; +error_print_guestname(); va_start(ap, fmt); vreport(REPORT_TYPE_ERROR, fmt, ap); va_end(ap); @@ -289,6 +312,7 @@ void warn_report(const char *fmt, ...) { va_list ap; +error_print_guestname(); va_start(ap, fmt); vreport(REPORT_TYPE_WARNING, fmt, ap); va_end(ap); -- 2.17.1
Re: [PATCH v3] migration: Support gtree migration
Eric Auger wrote: > Introduce support for GTree migration. A custom save/restore > is implemented. Each item is made of a key and a data. > > If the key is a pointer to an object, 2 VMSDs are passed into > the GTree VMStateField. > > When putting the items, the tree is traversed in sorted order by > g_tree_foreach. > > On the get() path, gtrees must be allocated using the proper > key compare, key destroy and value destroy. This must be handled > beforehand, for example in a pre_load method. > > Tests are added to test save/dump of structs containing gtrees > including the virtio-iommu domain/mappings scenario. > > Signed-off-by: Eric Auger Overal I like the patch. I think that I found a minor error. > +static int put_gtree(QEMUFile *f, void *pv, size_t unused_size, > + const VMStateField *field, QJSON *vmdesc) > +{ > +bool direct_key = (!field->start); > +const VMStateDescription *key_vmsd = direct_key ? NULL : >vmsd[0]; > +const VMStateDescription *val_vmsd = direct_key ? >vmsd[0] : > + >vmsd[1]; > +struct put_gtree_data capsule = {f, key_vmsd, val_vmsd, 0}; Please, consider change the last line to: struct put_gtree_data capsule = { .f = f, .key_vmsd = key_vmsd, .val_vmsd = val_vmsd, .ret = 0}; It makes much easier make changes on the future. Once here, I think that you are missing on the middle a: .vmdesc = vmdesc, No? At least you use it on put_gtree_elem, and I don't see any place where you assign it. When I did the change is when I noticed that it was missing. > +GTree **pval = pv; > +GTree *tree = *pval; > +int ret; > + > +qemu_put_be32(f, g_tree_nnodes(tree)); > +g_tree_foreach(tree, put_gtree_elem, (gpointer)); > +qemu_put_byte(f, false); > +ret = capsule.ret; > +if (ret) { > +error_report("%s : failed to save gtree (%d)", field->name, ret); > +} > +return ret; > +} As said before, with this changes you have my reviewed-by. Later, Juan.
Re: ptimer use of bottom-half handlers
On 10/4/19 10:40 AM, Peter Maydell wrote: > So we should probably cause that to happen in the new scheme > (conceptually, something like "the trigger callback is > implicitly considered to be called from within a tx begin/ > commit block, so (a) it doesn't need to do begin/commit itself > and (b) if it does something resulting in a repeat trigger > the second call will happen after it returns, not recursively" ?) That sounds plausible. r~
Re: Re: target/ppc: bug in optimised vsl/vsr implementation?
> > In comment #9 in the bug > (https://bugs.launchpad.net/qemu/+bug/1841990/comments/9), I note that the > issue was produced in running the test suite for the Power Vector Library > project (https://github.com/open-power-sdk/pveclib), which makes productive > use of dcbenq. > > Maybe that could be adopted or adapted to suit? > > PC I will leave adoption/adaptation decision to Alex and others, but just want to tell you that I took a brief look at pveclib code and internals, and I got an impression that this is an excellent peace of software, that required incredible meticulousness in order to implement, all kudos. Aleksandar
Re: [PATCH v2] target/arm/arch_dump: Add SVE notes
Patchew URL: https://patchew.org/QEMU/20191004120313.5347-1-drjo...@redhat.com/ Hi, This series seems to have some coding style problems. See output below for more information: Type: series Message-id: 20191004120313.5347-1-drjo...@redhat.com Subject: [PATCH v2] target/arm/arch_dump: Add SVE notes === TEST SCRIPT BEGIN === #!/bin/bash git rev-parse base > /dev/null || exit 0 git config --local diff.renamelimit 0 git config --local diff.renames True git config --local diff.algorithm histogram ./scripts/checkpatch.pl --mailback base.. === TEST SCRIPT END === Switched to a new branch 'test' 9d21a9a target/arm/arch_dump: Add SVE notes === OUTPUT BEGIN === ERROR: code indent should never use tabs #21: FILE: include/elf.h:1653: +#define NT_ARM_SVE^I0x405^I^I/* ARM Scalable Vector Extension$ WARNING: Block comments use a leading /* on a separate line #21: FILE: include/elf.h:1653: +#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension ERROR: code indent should never use tabs #22: FILE: include/elf.h:1654: +^I^I^I^I^I registers */$ WARNING: Block comments use * on subsequent lines #22: FILE: include/elf.h:1654: +#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension + registers */ WARNING: Block comments use a trailing */ on a separate line #22: FILE: include/elf.h:1654: + registers */ total: 2 errors, 3 warnings, 183 lines checked Commit 9d21a9a5cba2 (target/arm/arch_dump: Add SVE notes) has style problems, please review. If any of these errors are false positives report them to the maintainer, see CHECKPATCH in MAINTAINERS. === OUTPUT END === Test command exited with code: 1 The full log is available at http://patchew.org/logs/20191004120313.5347-1-drjo...@redhat.com/testing.checkpatch/?type=message. --- Email generated automatically by Patchew [https://patchew.org/]. Please send your feedback to patchew-de...@redhat.com
Re: [PATCH v2 00/11] RFC crypto/luks: encryption key managment using amend interface
On 13.09.19 00:30, Maxim Levitsky wrote: > This patch series is continuation of my work to add encryption > key managment to luks/qcow2 with luks. > > This is second version of this patch set. > The changes are mostly addressing the review feedback, > plus I tested (and fixed sadly) the somewhat ugly code > that allows to still write share a raw luks device, > while preveting the key managment from happening in this case, > as it is unsafe. > I added a new iotest dedicated to that as well. > > Best regards, > Maxim Levitsky At least for an RFC looks good from my perspective. I didn’t look at the crypto things very closely (assuming Dan would do so), and I didn’t check the iotests in detail. (But it definitely doesn’t look like they lack in breadth. Maybe I’d like to see a test that you cannot have other useful nodes attached to the LUKS or qcow2 node while the amendment process is ongoing (because CONSISTENT_READ is unshared). But that’s the only thing I can think of.) Max signature.asc Description: OpenPGP digital signature
Re: [PATCH v2 11/11] iotests : add tests for encryption key management
On 13.09.19 00:30, Maxim Levitsky wrote: > Note that currently I add tests 300-302, which are > placeholders to ease the rebase. In final version > of these patches I will update these. > > Signed-off-by: Maxim Levitsky > --- > tests/qemu-iotests/300 | 202 + > tests/qemu-iotests/300.out | 98 +++ > tests/qemu-iotests/301 | 90 + > tests/qemu-iotests/301.out | 30 + > tests/qemu-iotests/302 | 252 + > tests/qemu-iotests/302.out | 18 +++ > tests/qemu-iotests/303 | 228 + > tests/qemu-iotests/303.out | 28 + > tests/qemu-iotests/group | 9 ++ > 9 files changed, 955 insertions(+) > create mode 100755 tests/qemu-iotests/300 > create mode 100644 tests/qemu-iotests/300.out > create mode 100755 tests/qemu-iotests/301 > create mode 100644 tests/qemu-iotests/301.out > create mode 100644 tests/qemu-iotests/302 > create mode 100644 tests/qemu-iotests/302.out > create mode 100644 tests/qemu-iotests/303 > create mode 100644 tests/qemu-iotests/303.out [...] > diff --git a/tests/qemu-iotests/303.out b/tests/qemu-iotests/303.out > new file mode 100644 > index 00..1cf3917208 > --- /dev/null > +++ b/tests/qemu-iotests/303.out > @@ -0,0 +1,28 @@ > +qemu-img: Failed to get shared "consistent read" lock > +Is another process using the image > [/home/mlevitsk/USERSPACE/qemu/build-luks/tests/qemu-iotests/scratch/test.img]? Ah, this should be filtered. Max signature.asc Description: OpenPGP digital signature
[Bug 1846816] [NEW] Booting error on AIX 6.1 "Illegal Trap Instruction Interrupt in Kernel""
Public bug reported: # ls -ltr total 8750584 -rw-rw-r-- 1 linux linux 4274997248 Oct 4 18:33 AIX.vol1.iso -rw-rw-r-- 1 linux linux 4293888000 Oct 4 18:45 AIX.vol2.iso -rw-rw-r-- 1 linux linux 391485440 Oct 4 18:50 AIX.vol3.iso -rw-r--r-- 1 root root 204608 Oct 4 19:00 AIX61.img # qemu-system-ppc64 -cpu POWER8,compat=power7 -machine pseries -m 8192 -serial mon:stdio \ > -drive file=/qemu/BST/AIX61.img,if=none,id=drive-virtio-disk0 \ > -device virtio-scsi-pci,id=scsi -device scsi-hd,drive=drive-virtio-disk0 \ > -cdrom /qemu/BST/AIX.vol1.iso \ > -prom-env boot-command='boot cdrom: -s verbose' VNC server running on ::1:5900 qemu-system-ppc64: warning: TCG doesn't support requested feature, cap-ibs=workaround SLOF ** QEMU Starting Build Date = Jul 3 2019 12:26:14 FW Version = git-ba1ab360eebe6338 Press "s" to enter Open Firmware. Populating /vdevice methods Populating /vdevice/vty@7100 Populating /vdevice/nvram@7101 Populating /vdevice/l-lan@7102 Populating /vdevice/v-scsi@7103 SCSI: Looking for devices 8200 CD-ROM : "QEMU QEMU CD-ROM 2.5+" Populating /pci@8002000 00 (D) : 1234 qemu vga 00 0800 (D) : 1033 0194serial bus [ usb-xhci ] 00 1000 (D) : 1af4 1004virtio [ scsi ] Populating /pci@8002000/scsi@2 SCSI: Looking for devices 100 DISK : "QEMU QEMU HARDDISK2.5+" Installing QEMU fb Scanning USB XHCI: Initializing USB Keyboard USB mouse No console specified using screen & keyboard Welcome to Open Firmware Copyright (c) 2004, 2017 IBM Corporation All rights reserved. This program and the accompanying materials are made available under the terms of the BSD License available at http://www.opensource.org/licenses/bsd-license.php Trying to load: -s verbose from: /vdevice/v-scsi@7103/disk@8200: ... Successfully loaded qemu-system-ppc64: Couldn't negotiate a suitable PVR during CAS AIX StarLED{814} AIX Version 6.1 exec(/etc/init){1,0} INIT: EXECUTING /sbin/rc.boot 1 exec(/usr/bin/sh,-c,/sbin/rc.boot 1){1114146,1} exec(/sbin/rc.boot,/sbin/rc.boot,1){1114146,1} + PHASE=1 + + bootinfo -p exec(/usr/sbin/bootinfo,-p){1179684,1114146} PLATFORM=chrp + [ ! -x /usr/lib/boot/bin/bootinfo_chrp ] + [ 1 -eq 1 ] + 1> /usr/lib/libc.a + init -c unlink /usr/lib/boot/bin/!(*_chrp) exec(/etc/init,-c,unlink /usr/lib/boot/bin/!(*_chrp)){1179686,1114146} + chramfs -t exec(/usr/sbin/chramfs,-t){1179688,1114146} + init -c unlink /usr/sbin/chramfs + 1> /dev/null exec(/etc/init,-c,unlink /usr/sbin/chramfs){1179690,1114146} + + bootinfo -t exec(/usr/sbin/bootinfo,-t){1179692,1114146} BOOTYPE=3 + [ 0 -ne 0 ] + [ -z 3 ] + unset pdev_to_ldev undolt native_netboot_cfg + unset disknet_odm_init config_ATM + /usr/lib/methods/showled 0x510 DEV CFG 1 START exec(/usr/lib/methods/showled,0x510,DEV CFG 1 START){1179694,1114146} + cfgmgr -f -v exec(/usr/sbin/cfgmgr,-f,-v){1179696,1114146} cfgmgr is running in phase 1 Time: 0 LEDS: 0x538 Invoking top level program -- "/etc/methods/defsys" exec(/bin/sh,-c,/etc/methods/defsys ){1245222,1179696} exec(/etc/methods/defsys){1245222,1179696} exec(/bin/sh,-c,/usr/lib/methods/define_rspc -n -c sys -s node -t chrp){1310760,1245222} exec(/usr/lib/methods/define_rspc,-n,-c,sys,-s,node,-t,chrp){1310760,1245222} Time: 0 LEDS: 0x539 Return code = 0 * stdout * sys0 *** no stderr Attempting to configure device 'sys0' Time: 0 LEDS: 0x811 Invoking /usr/lib/methods/cfgsys_chrp -1 -l sys0 exec(/bin/sh,-c,/usr/lib/methods/cfgsys_chrp -1 -l sys0){1245224,1179696} Number of running methods: 1 exec(/usr/lib/methods/cfgsys_chrp,-1,-l,sys0){1245224,1179696} LED{A20} Illegal Trap Instruction Interrupt in Kernel 04151A74 tweqir0,0r0=0 KDB(0)> ** Affects: qemu Importance: Undecided Status: New ** Tags: aix -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1846816 Title: Booting error on AIX 6.1 "Illegal Trap Instruction Interrupt in Kernel"" Status in QEMU: New Bug description: # ls -ltr total 8750584 -rw-rw-r-- 1 linux linux 4274997248 Oct 4 18:33 AIX.vol1.iso -rw-rw-r-- 1 linux linux 4293888000 Oct 4 18:45 AIX.vol2.iso -rw-rw-r-- 1 linux linux 391485440 Oct 4 18:50 AIX.vol3.iso -rw-r--r-- 1 root root 204608 Oct 4 19:00 AIX61.img # qemu-system-ppc64 -cpu POWER8,compat=power7 -machine pseries -m 8192 -serial mon:stdio \ > -drive file=/qemu/BST/AIX61.img,if=none,id=drive-virtio-disk0 \ > -device virtio-scsi-pci,id=scsi -device scsi-hd,drive=drive-virtio-disk0 \ > -cdrom /qemu/BST/AIX.vol1.iso \ > -prom-env boot-command='boot cdrom: -s verbose' VNC server running on
Re: [PATCH 6/7] tests/fw_cfg: Declare one QFWCFG for all tests
On 10/04/19 00:54, Philippe Mathieu-Daudé wrote: > It is pointless to create/remove a QFWCFG object for each test. > Move it to the test context and create/remove it only once. > > Signed-off-by: Philippe Mathieu-Daudé > --- > tests/fw_cfg-test.c | 74 + > 1 file changed, 27 insertions(+), 47 deletions(-) Reviewed-by: Laszlo Ersek
Re: [PATCH v2 09/11] block/qcow2: implement blockdev-amend
On 13.09.19 00:30, Maxim Levitsky wrote: > Currently only for changing crypto parameters Yep, that elegantly avoids most of the problems we’d have otherwise. :-) > Signed-off-by: Maxim Levitsky > --- > block/qcow2.c| 71 > qapi/block-core.json | 6 ++-- > 2 files changed, 75 insertions(+), 2 deletions(-) > > diff --git a/block/qcow2.c b/block/qcow2.c > index 26f83aeb44..c8847ec6e2 100644 > --- a/block/qcow2.c > +++ b/block/qcow2.c > @@ -3079,6 +3079,18 @@ qcow2_co_create(BlockdevCreateOptions *create_options, > Error **errp) > assert(create_options->driver == BLOCKDEV_DRIVER_QCOW2); > qcow2_opts = _options->u.qcow2; > > +if (!qcow2_opts->has_size) { > +error_setg(errp, "Size is manadatory for image creation"); > +return -EINVAL; > + > +} > + > +if (!qcow2_opts->has_file) { > +error_setg(errp, "'file' is manadatory for image creation"); > +return -EINVAL; > + > +} > + > bs = bdrv_open_blockdev_ref(qcow2_opts->file, errp); > if (bs == NULL) { > return -EIO; > @@ -5111,6 +5123,64 @@ static int qcow2_amend_options(BlockDriverState *bs, > QemuOpts *opts, > return 0; > } > > + > +static int coroutine_fn qcow2_co_amend(BlockDriverState *bs, > + BlockdevCreateOptions *opts, > + bool force, > + Error **errp) > +{ > +BlockdevCreateOptionsQcow2 *qopts = >u.qcow2; > +BDRVQcow2State *s = bs->opaque; > +int ret; > + > +/* > + * This is ugly as hell, in later versions of this patch > + * something has to be done about this Well, at least the language of the comment needs adjustment. :-) > + */ > +if (qopts->has_file || qopts->has_size || qopts->has_data_file || > +qopts->has_data_file_raw || qopts->has_version || > +qopts->has_backing_file || qopts->has_backing_fmt || > +qopts->has_cluster_size || qopts->has_preallocation || > +qopts->has_lazy_refcounts || qopts->has_refcount_bits) { > + > +error_setg(errp, > +"Only LUKS encryption options can be amended for qcow2 with > blockdev-amend"); > +return -EOPNOTSUPP; > + > +} > + > +if (qopts->has_encrypt) { > +if (!s->crypto) { > +error_setg(errp, "QCOW2 image is not encrypted, can't amend"); > +return -EOPNOTSUPP; > +} > + > +if (qopts->encrypt->format != Q_CRYPTO_BLOCK_FORMAT_LUKS) { > +error_setg(errp, > + "Amend can't be used to change the qcow2 encryption > format"); > +return -EOPNOTSUPP; > +} > + > +if (s->crypt_method_header != QCOW_CRYPT_LUKS) { > +error_setg(errp, > + "Only LUKS encryption options can be amended for > qcow2 with blockdev-amend"); > +return -EOPNOTSUPP; > +} > + > +ret = qcrypto_block_amend_options(s->crypto, > + qcow2_crypto_hdr_read_func, > + qcow2_crypto_hdr_write_func, > + bs, > + qopts->encrypt, > + force, > + errp); > +if (ret) { > +return ret; > +} > +} > +return 0; I suppose we need to do the same permission stuff as for LUKS, though; i.e., unshare CONSISTENT_READ while this operation is ongoing. > +} > + > /* > * If offset or size are negative, respectively, they will not be included in > * the BLOCK_IMAGE_CORRUPTED event emitted. > @@ -5303,6 +5373,7 @@ BlockDriver bdrv_qcow2 = { > .mutable_opts= mutable_opts, > .bdrv_co_check = qcow2_co_check, > .bdrv_amend_options = qcow2_amend_options, > +.bdrv_co_amend = qcow2_co_amend, > > .bdrv_detach_aio_context = qcow2_detach_aio_context, > .bdrv_attach_aio_context = qcow2_attach_aio_context, > diff --git a/qapi/block-core.json b/qapi/block-core.json > index 4a6db98938..0eb4e45168 100644 > --- a/qapi/block-core.json > +++ b/qapi/block-core.json > @@ -4294,6 +4294,7 @@ > # Driver specific image creation options for qcow2. > # > # @file Node to create the image format on > +# Mandatory for create > # @data-fileNode to use as an external data file in which all guest > # data is stored so that only metadata remains in the qcow2 > # file (since: 4.0) > @@ -4301,6 +4302,7 @@ > # standalone (read-only) raw image without looking at qcow2 > # metadata (default: false; since: 4.0) > # @size Size of the virtual disk in bytes > +# Mandatory for create > # @version Compatibility level
Re: [PATCH 5/7] tests/libqos/fw_cfg: Pass QTestState as argument
On 10/04/19 00:54, Philippe Mathieu-Daudé wrote: > Since a QFWCFG object is not tied to a particular test, we can > call *_fw_cfg_init() once before creating QTests and use the same > for all the tests, then release the object with g_free() once all > the tests are run. > > Refactor the qfw_cfg* API to take QTestState as argument. > > Signed-off-by: Philippe Mathieu-Daudé > --- > tests/boot-order-test.c | 12 > tests/fw_cfg-test.c | 49 > tests/libqos/fw_cfg.c| 61 > tests/libqos/fw_cfg.h| 30 +--- > tests/libqos/malloc-pc.c | 4 +-- > 5 files changed, 77 insertions(+), 79 deletions(-) Not much fun to review. :) Reviewed-by: Laszlo Ersek
Re: [PATCH v2 07/11] block: add x-blockdev-amend qmp command
On 13.09.19 00:30, Maxim Levitsky wrote: > Signed-off-by: Maxim Levitsky > --- > block/Makefile.objs | 2 +- > block/amend.c | 116 ++ > include/block/block_int.h | 23 ++-- > qapi/block-core.json | 26 + > qapi/job.json | 4 +- > 5 files changed, 163 insertions(+), 8 deletions(-) > create mode 100644 block/amend.c I think I’d move this (and everything to belongs to it) to a different series. > diff --git a/block/Makefile.objs b/block/Makefile.objs > index 35f3bca4d9..10d0308792 100644 > --- a/block/Makefile.objs > +++ b/block/Makefile.objs > @@ -18,7 +18,7 @@ block-obj-y += block-backend.o snapshot.o qapi.o > block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o > block-obj-$(CONFIG_POSIX) += file-posix.o > block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o > -block-obj-y += null.o mirror.o commit.o io.o create.o > +block-obj-y += null.o mirror.o commit.o io.o create.o amend.o > block-obj-y += throttle-groups.o > block-obj-$(CONFIG_LINUX) += nvme.o > > diff --git a/block/amend.c b/block/amend.c > new file mode 100644 > index 00..9bd28e08e7 > --- /dev/null > +++ b/block/amend.c > @@ -0,0 +1,116 @@ > +/* > + * Block layer code related to image options amend > + * > + * Copyright (c) 2018 Kevin Wolf > + * Copyright (c) 2019 Maxim Levitsky > + * > + * Heavily based on create.c > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > deal > + * in the Software without restriction, including without limitation the > rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > + > +#include "qemu/osdep.h" > +#include "block/block_int.h" > +#include "qemu/job.h" > +#include "qemu/main-loop.h" > +#include "qapi/qapi-commands-block-core.h" > +#include "qapi/qapi-visit-block-core.h" > +#include "qapi/clone-visitor.h" > +#include "qapi/error.h" > + > +typedef struct BlockdevAmendJob { > +Job common; > +BlockdevCreateOptions *opts; > +BlockDriverState *bs; > +bool force; > +} BlockdevAmendJob; > + > +static int coroutine_fn blockdev_amend_run(Job *job, Error **errp) > +{ > +BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common); > +int ret; > + > +job_progress_set_remaining(>common, 1); > +ret = s->bs->drv->bdrv_co_amend(s->bs, s->opts, s->force, errp); > +job_progress_update(>common, 1); It would be nice if the amend job could make use of the progress reporting that we have in place for amend. > + > +qapi_free_BlockdevCreateOptions(s->opts); > + > +return ret; > +} > + > +static const JobDriver blockdev_amend_job_driver = { > +.instance_size = sizeof(BlockdevAmendJob), > +.job_type = JOB_TYPE_AMEND, > +.run = blockdev_amend_run, > +}; > + > +void qmp_x_blockdev_amend(const char *job_id, > +const char *node_name, > +BlockdevCreateOptions *options, > +bool has_force, > +bool force, > +Error **errp) > +{ > +BlockdevAmendJob *s; > +const char *fmt = BlockdevDriver_str(options->driver); > +BlockDriver *drv = bdrv_find_format(fmt); > +BlockDriverState *bs = bdrv_find_node(node_name); > + > +/* > + * If the driver is in the schema, we know that it exists. But it may not > + * be whitelisted. > + */ > +assert(drv); > +if (bdrv_uses_whitelist() && !bdrv_is_whitelisted(drv, false)) { > +error_setg(errp, "Driver is not whitelisted"); > +return; > +} > + > +if (bs->drv != drv) { > +error_setg(errp, > + "x-blockdev-amend doesn't support changing the block > driver"); > +return; > + > +} > + > +/* Error out if the driver doesn't support .bdrv_co_amend */ > +if (!drv->bdrv_co_amend) { > +error_setg(errp, "Driver does not support x-blockdev-amend"); > +return; > +} > + > +/* > + * Create the block job > + * TODO Running in the main context. Block drivers
Re: [Virtio-fs] [PATCH] virtiofsd: Add pread64() to seccomp list for posix_fallocate()
* Misono Tomohiro (misono.tomoh...@jp.fujitsu.com) wrote: > I test virtiofs with NFS 4.0 as backend and notice that fallocate > causes system hang (kernel: 5.4-rc1, qemu: virtio-fs-dev branch): > $ mount -t virtiofs myfs /mnt > $ dd if=/dev/urandom bs=1000 seek=1 count=1 of=/mnt/file > $ fallocate -l 2000 /mnt/file # system hang > > This is because: > 1. virtiofs supports fallocate syscall while NFS 4.0 does not. > 2. virtiofsd uses posix_fallocate() and it fallbacks to pread64()/ > pwrite64() sequence to reserve blocks if fallocate syscall is > not available. > 3. pread64() syscall is prohibited by seccomp and virtiofsd thread > is killed by SIGSYS. > > So, just add pread64() to seccomp white list to fix this problem. > > Signed-off-by: Misono Tomohiro Thanks, I've squashed this into our seccomp commit. (It'll be a little while before I push it out, I need to finish a tidyup I'm doing). Dave > --- > contrib/virtiofsd/seccomp.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/contrib/virtiofsd/seccomp.c b/contrib/virtiofsd/seccomp.c > index 93b679271d..88b61bca42 100644 > --- a/contrib/virtiofsd/seccomp.c > +++ b/contrib/virtiofsd/seccomp.c > @@ -61,6 +61,7 @@ static const int syscall_whitelist[] = { > SCMP_SYS(ppoll), > SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */ > SCMP_SYS(preadv), > + SCMP_SYS(pread64), > SCMP_SYS(pwritev), > SCMP_SYS(pwrite64), > SCMP_SYS(read), > -- > 2.21.0 > > ___ > Virtio-fs mailing list > virtio...@redhat.com > https://www.redhat.com/mailman/listinfo/virtio-fs -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
Re: [PATCH 4/7] tests/fw_cfg: Let the tests use a context
On 10/04/19 00:54, Philippe Mathieu-Daudé wrote: > Introduce the local QTestCtx structure, and register the tests > with qtest_add_data_func(ctx). > For now the context only contains the machine name (which is > fixed to the 'pc' machine, since this test only runs on the > x86 architecture). > > Signed-off-by: Philippe Mathieu-Daudé > --- > tests/fw_cfg-test.c | 93 - > 1 file changed, 59 insertions(+), 34 deletions(-) > > diff --git a/tests/fw_cfg-test.c b/tests/fw_cfg-test.c > index 53ae82f7c8..77a833d50e 100644 > --- a/tests/fw_cfg-test.c > +++ b/tests/fw_cfg-test.c > @@ -23,13 +23,18 @@ static uint16_t max_cpus = 1; > static uint64_t nb_nodes = 0; > static uint16_t boot_menu = 0; > > -static void test_fw_cfg_signature(void) > +typedef struct { > +const char *machine_name; > +} QTestCtx; > + > +static void test_fw_cfg_signature(const void *opaque) > { > +QTestCtx *ctx = (QTestCtx *)opaque; (1) I'd suggest: const QTestCtx *ctx = opaque; (I think that should work fine with the current patch; not sure if it will work with the rest of the series, when you perhaps introduce more members to QTestCtx.) This request applies a few more times to this patch, of course. Another (different) comment: > QFWCFG *fw_cfg; > QTestState *s; > char buf[5]; > > -s = qtest_init(""); > +s = qtest_initf("-M %s", ctx->machine_name); > fw_cfg = pc_fw_cfg_init(s); > > qfw_cfg_get(fw_cfg, FW_CFG_SIGNATURE, buf, 4); > @@ -40,13 +45,14 @@ static void test_fw_cfg_signature(void) > qtest_quit(s); > } > > -static void test_fw_cfg_id(void) > +static void test_fw_cfg_id(const void *opaque) > { > +QTestCtx *ctx = (QTestCtx *)opaque; > QFWCFG *fw_cfg; > QTestState *s; > uint32_t id; > > -s = qtest_init(""); > +s = qtest_initf("-M %s", ctx->machine_name); > fw_cfg = pc_fw_cfg_init(s); > > id = qfw_cfg_get_u32(fw_cfg, FW_CFG_ID); > @@ -56,8 +62,9 @@ static void test_fw_cfg_id(void) > qtest_quit(s); > } > > -static void test_fw_cfg_uuid(void) > +static void test_fw_cfg_uuid(const void *opaque) > { > +QTestCtx *ctx = (QTestCtx *)opaque; > QFWCFG *fw_cfg; > QTestState *s; > > @@ -67,7 +74,7 @@ static void test_fw_cfg_uuid(void) > 0x8a, 0xcb, 0x81, 0xc6, 0xea, 0x54, 0xf2, 0xd8, > }; > > -s = qtest_init("-uuid 4600cb32-38ec-4b2f-8acb-81c6ea54f2d8"); > +s = qtest_initf("-M %s -uuid 4600cb32-38ec-4b2f-8acb-81c6ea54f2d8", > ctx->machine_name); > fw_cfg = pc_fw_cfg_init(s); > > qfw_cfg_get(fw_cfg, FW_CFG_UUID, buf, 16); > @@ -78,12 +85,13 @@ static void test_fw_cfg_uuid(void) > > } > > -static void test_fw_cfg_ram_size(void) > +static void test_fw_cfg_ram_size(const void *opaque) > { > +QTestCtx *ctx = (QTestCtx *)opaque; > QFWCFG *fw_cfg; > QTestState *s; > > -s = qtest_init(""); > +s = qtest_initf("-M %s", ctx->machine_name); > fw_cfg = pc_fw_cfg_init(s); > > g_assert_cmpint(qfw_cfg_get_u64(fw_cfg, FW_CFG_RAM_SIZE), ==, ram_size); > @@ -92,12 +100,13 @@ static void test_fw_cfg_ram_size(void) > qtest_quit(s); > } > > -static void test_fw_cfg_nographic(void) > +static void test_fw_cfg_nographic(const void *opaque) > { > +QTestCtx *ctx = (QTestCtx *)opaque; > QFWCFG *fw_cfg; > QTestState *s; > > -s = qtest_init(""); > +s = qtest_initf("-M %s", ctx->machine_name); > fw_cfg = pc_fw_cfg_init(s); > > g_assert_cmpint(qfw_cfg_get_u16(fw_cfg, FW_CFG_NOGRAPHIC), ==, 0); > @@ -106,12 +115,13 @@ static void test_fw_cfg_nographic(void) > qtest_quit(s); > } > > -static void test_fw_cfg_nb_cpus(void) > +static void test_fw_cfg_nb_cpus(const void *opaque) > { > +QTestCtx *ctx = (QTestCtx *)opaque; > QFWCFG *fw_cfg; > QTestState *s; > > -s = qtest_init(""); > +s = qtest_initf("-M %s", ctx->machine_name); > fw_cfg = pc_fw_cfg_init(s); > > g_assert_cmpint(qfw_cfg_get_u16(fw_cfg, FW_CFG_NB_CPUS), ==, nb_cpus); > @@ -120,12 +130,13 @@ static void test_fw_cfg_nb_cpus(void) > qtest_quit(s); > } > > -static void test_fw_cfg_max_cpus(void) > +static void test_fw_cfg_max_cpus(const void *opaque) > { > +QTestCtx *ctx = (QTestCtx *)opaque; > QFWCFG *fw_cfg; > QTestState *s; > > -s = qtest_init(""); > +s = qtest_initf("-M %s", ctx->machine_name); > fw_cfg = pc_fw_cfg_init(s); > > g_assert_cmpint(qfw_cfg_get_u16(fw_cfg, FW_CFG_MAX_CPUS), ==, max_cpus); > @@ -133,14 +144,15 @@ static void test_fw_cfg_max_cpus(void) > qtest_quit(s); > } > > -static void test_fw_cfg_numa(void) > +static void test_fw_cfg_numa(const void *opaque) > { > +QTestCtx *ctx = (QTestCtx *)opaque; > QFWCFG *fw_cfg; > QTestState *s; > uint64_t *cpu_mask; > uint64_t *node_mask; > > -s = qtest_init(""); > +s = qtest_initf("-M %s", ctx->machine_name); > fw_cfg =
Re: [PATCH v2] target/ppc: Fix for optimized vsl/vsr instructions
On 10/4/19 8:43 AM, Stefan Brankovic wrote: > In previous implementation, invocation of TCG shift function could request > shift of TCG variable by 64 bits when variable 'sh' is 0, which is not > supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes > this by using two separate invocation of TCG shift functions, with maximum > shift amount of 32. > > Name of variable 'shifted' is changed to 'carry' so variable naming > is similar to old helper implementation. > > Variables 'avrA' and 'avrB' are replaced with variable 'avr'. > > Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822 > Reported-by: Paul Clark Preferred: "Paul A. Clarke" (for historical consistency) > Reported-by: Mark Cave-Ayland > Suggested-by: Aleksandar Markovic > Signed-off-by: Stefan Brankovic Applying this patch on top of dce5a787c05fe1a3e54d92871cdeba2af6798e0d eliminated the failures that I reported in https://bugs.launchpad.net/qemu/+bug/1841990 associated with vsl/vsr. Tested-by: Paul A. Clarke PC
Re: [PATCH v2] target/ppc: Fix for optimized vsl/vsr instructions
> Reported-by: Paul Clark Stefan, Paul's full name is Paul A. Clarke. Thanks for the fix! Aleksandar
Re: [PATCH v2 05/11] block/crypto: implement the encryption key management
On 13.09.19 00:30, Maxim Levitsky wrote: > This implements the encryption key management > using the generic code in qcrypto layer > (currently only for qemu-img amend) > > This code adds another 'write_func' because the initialization > write_func works directly on the underlying file, > because during the creation, there is no open instance > of the luks driver, but during regular use, we have it, > and should use it instead. > > > This commit also adds a 'hack/workaround' I and Kevin Wolf (thanks) > made to make the driver still support write sharing, > but be safe against concurrent metadata update (the keys) > Eventually write sharing for luks driver will be deprecated > and removed together with this hack. > > The hack is that we ask (as a format driver) for > BLK_PERM_CONSISTENT_READ always > (technically always unless opened with BDRV_O_NO_IO) > > and then when we want to update the keys, we > unshare that permission. So if someone else > has the image open, even readonly, this will fail. > > Also thanks to Daniel Berrange for the variant of > that hack that involves asking for read, > rather that write permission > > Signed-off-by: Maxim Levitsky > --- > block/crypto.c | 118 +++-- > 1 file changed, 115 insertions(+), 3 deletions(-) > > diff --git a/block/crypto.c b/block/crypto.c > index a6a3e1f1d8..f42fa057e6 100644 > --- a/block/crypto.c > +++ b/block/crypto.c > @@ -36,6 +36,7 @@ typedef struct BlockCrypto BlockCrypto; > > struct BlockCrypto { > QCryptoBlock *block; > +bool updating_keys; > }; > > > @@ -70,6 +71,24 @@ static ssize_t block_crypto_read_func(QCryptoBlock *block, > return ret; > } > > +static ssize_t block_crypto_write_func(QCryptoBlock *block, > + size_t offset, > + const uint8_t *buf, > + size_t buflen, > + void *opaque, > + Error **errp) There’s already a function of this name for creation. > +{ > +BlockDriverState *bs = opaque; > +ssize_t ret; > + > +ret = bdrv_pwrite(bs->file, offset, buf, buflen); > +if (ret < 0) { > +error_setg_errno(errp, -ret, "Could not write encryption header"); > +return ret; > +} > +return ret; > +} > + > > struct BlockCryptoCreateData { > BlockBackend *blk; [...] > +static void > +block_crypto_child_perms(BlockDriverState *bs, BdrvChild *c, > + const BdrvChildRole *role, > + BlockReopenQueue *reopen_queue, > + uint64_t perm, uint64_t shared, > + uint64_t *nperm, uint64_t *nshared) > +{ > + > +BlockCrypto *crypto = bs->opaque; > + > +/* > + * Ask for consistent read permission so that if > + * someone else tries to open this image with this permission > + * neither will be able to edit encryption keys > + */ > +if (!(bs->open_flags & BDRV_O_NO_IO)) { > +perm |= BLK_PERM_CONSISTENT_READ; > +} > + > +/* > + * This driver doesn't modify LUKS metadata except > + * when updating the encryption slots. > + * Thus unlike a proper format driver we don't ask for > + * shared write permission. However we need it > + * when we area updating keys, to ensure that only we > + * had opened the device r/w > + * > + * Encryption update will set the crypto->updating_keys > + * during that period and refresh permissions > + * > + */ > + > +if (crypto->updating_keys) { > +/*need exclusive write access for header update */ > +perm |= BLK_PERM_WRITE; > +shared &= ~(BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE); > +} > + > +bdrv_filter_default_perms(bs, c, role, reopen_queue, > +perm, shared, nperm, nshared); > +} This will probably work, but usually drivers do it the other way around: First call any of the default_perms(), and then adjust *nperm and *nshared as required. (perm/shared are what the parents need, *nperm/*nshared is what this driver needs, so it makes more sense that way; and this way nobody has to check whether the settings survived the default_perms() call.) ((But the permissions themselves do look correct.)) Max signature.asc Description: OpenPGP digital signature
Re: [PATCH 3/7] tests/libqos/fw_cfg: Document pc_fw_cfg_init to drop pc_fw_cfg_uninit
On 10/04/19 00:54, Philippe Mathieu-Daudé wrote: > Document pc_fw_cfg_init() return value must be released > with g_free(). Directly calling g_free() we don't really > need pc_fw_cfg_uninit(): remove it. > > Signed-off-by: Philippe Mathieu-Daudé > --- > tests/fw_cfg-test.c | 22 +++--- > tests/libqos/fw_cfg.h| 14 +- > tests/libqos/malloc-pc.c | 2 +- > 3 files changed, 21 insertions(+), 17 deletions(-) > > diff --git a/tests/fw_cfg-test.c b/tests/fw_cfg-test.c > index 1d3147f821..53ae82f7c8 100644 > --- a/tests/fw_cfg-test.c > +++ b/tests/fw_cfg-test.c > @@ -36,7 +36,7 @@ static void test_fw_cfg_signature(void) > buf[4] = 0; > > g_assert_cmpstr(buf, ==, "QEMU"); > -pc_fw_cfg_uninit(fw_cfg); > +g_free(fw_cfg); > qtest_quit(s); > } > > @@ -52,7 +52,7 @@ static void test_fw_cfg_id(void) > id = qfw_cfg_get_u32(fw_cfg, FW_CFG_ID); > g_assert((id == 1) || > (id == 3)); > -pc_fw_cfg_uninit(fw_cfg); > +g_free(fw_cfg); > qtest_quit(s); > } > > @@ -73,7 +73,7 @@ static void test_fw_cfg_uuid(void) > qfw_cfg_get(fw_cfg, FW_CFG_UUID, buf, 16); > g_assert(memcmp(buf, uuid, sizeof(buf)) == 0); > > -pc_fw_cfg_uninit(fw_cfg); > +g_free(fw_cfg); > qtest_quit(s); > > } > @@ -88,7 +88,7 @@ static void test_fw_cfg_ram_size(void) > > g_assert_cmpint(qfw_cfg_get_u64(fw_cfg, FW_CFG_RAM_SIZE), ==, ram_size); > > -pc_fw_cfg_uninit(fw_cfg); > +g_free(fw_cfg); > qtest_quit(s); > } > > @@ -102,7 +102,7 @@ static void test_fw_cfg_nographic(void) > > g_assert_cmpint(qfw_cfg_get_u16(fw_cfg, FW_CFG_NOGRAPHIC), ==, 0); > > -pc_fw_cfg_uninit(fw_cfg); > +g_free(fw_cfg); > qtest_quit(s); > } > > @@ -116,7 +116,7 @@ static void test_fw_cfg_nb_cpus(void) > > g_assert_cmpint(qfw_cfg_get_u16(fw_cfg, FW_CFG_NB_CPUS), ==, nb_cpus); > > -pc_fw_cfg_uninit(fw_cfg); > +g_free(fw_cfg); > qtest_quit(s); > } > > @@ -129,7 +129,7 @@ static void test_fw_cfg_max_cpus(void) > fw_cfg = pc_fw_cfg_init(s); > > g_assert_cmpint(qfw_cfg_get_u16(fw_cfg, FW_CFG_MAX_CPUS), ==, max_cpus); > -pc_fw_cfg_uninit(fw_cfg); > +g_free(fw_cfg); > qtest_quit(s); > } > > @@ -158,7 +158,7 @@ static void test_fw_cfg_numa(void) > > g_free(node_mask); > g_free(cpu_mask); > -pc_fw_cfg_uninit(fw_cfg); > +g_free(fw_cfg); > qtest_quit(s); > } > > @@ -171,7 +171,7 @@ static void test_fw_cfg_boot_menu(void) > fw_cfg = pc_fw_cfg_init(s); > > g_assert_cmpint(qfw_cfg_get_u16(fw_cfg, FW_CFG_BOOT_MENU), ==, > boot_menu); > -pc_fw_cfg_uninit(fw_cfg); > +g_free(fw_cfg); > qtest_quit(s); > } > > @@ -190,7 +190,7 @@ static void test_fw_cfg_reboot_timeout(void) > g_assert_cmpint(filesize, ==, sizeof(reboot_timeout)); > reboot_timeout = le32_to_cpu(reboot_timeout); > g_assert_cmpint(reboot_timeout, ==, 15); > -pc_fw_cfg_uninit(fw_cfg); > +g_free(fw_cfg); > qtest_quit(s); > } > > @@ -209,7 +209,7 @@ static void test_fw_cfg_splash_time(void) > g_assert_cmpint(filesize, ==, sizeof(splash_time)); > splash_time = le16_to_cpu(splash_time); > g_assert_cmpint(splash_time, ==, 12); > -pc_fw_cfg_uninit(fw_cfg); > +g_free(fw_cfg); > qtest_quit(s); > } > > diff --git a/tests/libqos/fw_cfg.h b/tests/libqos/fw_cfg.h > index 3247fd4000..6316f4c354 100644 > --- a/tests/libqos/fw_cfg.h > +++ b/tests/libqos/fw_cfg.h > @@ -54,14 +54,18 @@ QFWCFG *mm_fw_cfg_init(QTestState *qts, uint64_t base); > */ > QFWCFG *io_fw_cfg_init(QTestState *qts, uint16_t base); > > +/** > + * pc_fw_cfg_init(): > + * @qts: The #QTestState that will be referred to by the QFWCFG object. > + * > + * This function is specific to the PC machine (X86). > + * > + * Returns a newly allocated QFWCFG object which must be released > + * with a call to g_free() when no longer required. > + */ > static inline QFWCFG *pc_fw_cfg_init(QTestState *qts) > { > return io_fw_cfg_init(qts, 0x510); > } > > -static inline void pc_fw_cfg_uninit(QFWCFG *fw_cfg) > -{ > -g_free(fw_cfg); > -} > - > #endif > diff --git a/tests/libqos/malloc-pc.c b/tests/libqos/malloc-pc.c > index 6f92ce4135..949a99361d 100644 > --- a/tests/libqos/malloc-pc.c > +++ b/tests/libqos/malloc-pc.c > @@ -29,5 +29,5 @@ void pc_alloc_init(QGuestAllocator *s, QTestState *qts, > QAllocOpts flags) > alloc_init(s, flags, 1 << 20, MIN(ram_size, 0xE000), PAGE_SIZE); > > /* clean-up */ > -pc_fw_cfg_uninit(fw_cfg); > +g_free(fw_cfg); > } > I'm getting the impression we're trying to remove "PC" (x86) references from the cleanup action :) Reviewed-by: Laszlo Ersek
Re: [PATCH 2/7] tests/libqos/fw_cfg: Document mm_fw_cfg_init to drop mm_fw_cfg_uninit
On 10/04/19 00:54, Philippe Mathieu-Daudé wrote: > Document mm_fw_cfg_init() return value must be released > with g_free(). mm_fw_cfg_uninit() was never used, remove it. > > Signed-off-by: Philippe Mathieu-Daudé > --- > tests/libqos/fw_cfg.c | 5 - > tests/libqos/fw_cfg.h | 10 +- > 2 files changed, 9 insertions(+), 6 deletions(-) > > diff --git a/tests/libqos/fw_cfg.c b/tests/libqos/fw_cfg.c > index 37c3f2cf4d..ddeec821db 100644 > --- a/tests/libqos/fw_cfg.c > +++ b/tests/libqos/fw_cfg.c > @@ -126,11 +126,6 @@ QFWCFG *mm_fw_cfg_init(QTestState *qts, uint64_t base) > return fw_cfg; > } > > -void mm_fw_cfg_uninit(QFWCFG *fw_cfg) > -{ > -g_free(fw_cfg); > -} > - > static void io_fw_cfg_select(QFWCFG *fw_cfg, uint16_t key) > { > qtest_outw(fw_cfg->qts, fw_cfg->base, key); > diff --git a/tests/libqos/fw_cfg.h b/tests/libqos/fw_cfg.h > index 15604040bd..3247fd4000 100644 > --- a/tests/libqos/fw_cfg.h > +++ b/tests/libqos/fw_cfg.h > @@ -34,8 +34,16 @@ uint64_t qfw_cfg_get_u64(QFWCFG *fw_cfg, uint16_t key); > size_t qfw_cfg_get_file(QFWCFG *fw_cfg, const char *filename, > void *data, size_t buflen); > > +/** > + * mm_fw_cfg_init(): > + * @qts: The #QTestState that will be referred to by the QFWCFG object. > + * @base: The MMIO base address of the fw_cfg device in the guest. > + * > + * Returns a newly allocated QFWCFG object which must be released > + * with a call to g_free() when no longer required. > + */ > QFWCFG *mm_fw_cfg_init(QTestState *qts, uint64_t base); > -void mm_fw_cfg_uninit(QFWCFG *fw_cfg); > + > /** > * io_fw_cfg_init(): > * @qts: The #QTestState that will be referred to by the QFWCFG object. > Sigh, we've never even called this function :/ Reviewed-by: Laszlo Ersek
Re: [PATCH 1/7] tests/libqos/fw_cfg: Document io_fw_cfg_init to drop io_fw_cfg_uninit
On 10/04/19 00:54, Philippe Mathieu-Daudé wrote: > Document io_fw_cfg_init() return value must be released > with g_free(). Directly calling g_free() we don't really > need io_fw_cfg_uninit(): remove it. > > Signed-off-by: Philippe Mathieu-Daudé > --- > tests/libqos/fw_cfg.c | 5 - > tests/libqos/fw_cfg.h | 11 +-- > 2 files changed, 9 insertions(+), 7 deletions(-) > > diff --git a/tests/libqos/fw_cfg.c b/tests/libqos/fw_cfg.c > index 1f46258f96..37c3f2cf4d 100644 > --- a/tests/libqos/fw_cfg.c > +++ b/tests/libqos/fw_cfg.c > @@ -157,8 +157,3 @@ QFWCFG *io_fw_cfg_init(QTestState *qts, uint16_t base) > > return fw_cfg; > } > - > -void io_fw_cfg_uninit(QFWCFG *fw_cfg) > -{ > -g_free(fw_cfg); > -} > diff --git a/tests/libqos/fw_cfg.h b/tests/libqos/fw_cfg.h > index 13325cc4ff..15604040bd 100644 > --- a/tests/libqos/fw_cfg.h > +++ b/tests/libqos/fw_cfg.h > @@ -36,8 +36,15 @@ size_t qfw_cfg_get_file(QFWCFG *fw_cfg, const char > *filename, > > QFWCFG *mm_fw_cfg_init(QTestState *qts, uint64_t base); > void mm_fw_cfg_uninit(QFWCFG *fw_cfg); > +/** > + * io_fw_cfg_init(): > + * @qts: The #QTestState that will be referred to by the QFWCFG object. > + * @base: The I/O address of the fw_cfg device in the guest. > + * > + * Returns a newly allocated QFWCFG object which must be released > + * with a call to g_free() when no longer required. > + */ > QFWCFG *io_fw_cfg_init(QTestState *qts, uint16_t base); > -void io_fw_cfg_uninit(QFWCFG *fw_cfg); > > static inline QFWCFG *pc_fw_cfg_init(QTestState *qts) > { > @@ -46,7 +53,7 @@ static inline QFWCFG *pc_fw_cfg_init(QTestState *qts) > > static inline void pc_fw_cfg_uninit(QFWCFG *fw_cfg) > { > -io_fw_cfg_uninit(fw_cfg); > +g_free(fw_cfg); > } > > #endif > This more or less reverts 0729d833d6d6 ("tests/libqos: Add io_fw_cfg_uninit() and mm_fw_cfg_uninit()", 2019-05-23). I'm fine either way. Reviewed-by: Laszlo Ersek
Re: [PATCH v9 3/3] iotests: test nbd reconnect
On 9/24/19 3:31 AM, Vladimir Sementsov-Ogievskiy wrote: +def qemu_nbd_popen(*args): +'''Run qemu-nbd in daemon mode and return the parent's exit code''' +return subprocess.Popen(qemu_nbd_args + ['--persistent'] + list(args)) + Should you also use a pid file here, and wait for the existence of the pid file before returning (rather than hard-coding sleep(1))? What do you mean / how to do it? We want to wait until listening socket is prepared.. In shell: qemu-nbd --pid-file=/path/to/file ... while [ ! -e /path/to/file ]; do sleep ... # fractional second, or exponential, or whatever... done # Now the listening socket is indeed prepared You'd have to translate that idiom to python. Or: pre-open Unix socket at /path/to/socket LISTEN_PID=... LISTEN_FDS=1 qemu-nbd ... 3<>/path/to/socket Now the socket is pre-created and passed into qemu-nbd via systemd socket activation, so you know the listening socket is ready without having to do any loop at all. Here's a patch in libnbd where we just switched from waiting for the port to appear (because the test predated qemu-nbd pidfile support) to instead using socket activation, for reference: https://github.com/libguestfs/libnbd/commit/352331d177 -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [PATCH v2 02/11] qcrypto-luks: extend the create options for upcoming encryption key management
On 13.09.19 00:30, Maxim Levitsky wrote: > Now you can specify which slot to put the encryption key to > Plus add 'active' option which will let user erase the key secret > instead of adding it. > Check that active=true it when creating. > > Signed-off-by: Maxim Levitsky > --- > block/crypto.c | 2 ++ > block/crypto.h | 16 +++ > block/qcow2.c | 2 ++ > crypto/block-luks.c| 26 +++--- > qapi/crypto.json | 19 ++ > tests/qemu-iotests/082.out | 54 ++ > 6 files changed, 115 insertions(+), 4 deletions(-) (Just doing a cursory RFC-style review) I think we also want to reject unlock-secret if it’s given for creation; and I suppose it’d be more important to print which slots are OK than the slot the user has given. (It isn’t like we shouldn’t print that slot index, but it’s more likely the user knows that than what the limits are. I think.) Max signature.asc Description: OpenPGP digital signature
Re: ptimer use of bottom-half handlers
On Fri, 27 Sep 2019 at 11:01, Peter Maydell wrote: > The ptimer API is basically a down-counter (with a lot of bells > and whistles), with functions to do things like "read current count" > and "write current count". When the counter hits zero it typically > reloads, and the ptimer code notifies a callback function in the > device that's using ptimer. In the current implementation this > is done using a QEMU bottom-half handler, so ptimer_trigger() > calls replay_bh_schedule_event() on a QEMUBH that it was passed > by the device when it was initialized. The comment says this is > "to avoid reentrancy issues". That's a bit vague, but I assume the > idea is to avoid the code of the device's 'triggered' callback > being called from within eg ptimer_set_count(), because the > device might be in the middle of changing multiple parts of the > ptimer's state, its own state might not be consistent, etc. In the course of trying to do some conversions of devices to the new API I've proposed, I figured out the other part of what this "to avoid reentrancy issues" probably is referring to: if the device's 'triggered' callback itself calls a ptimer function like ptimer_run() then potentially it could recurse: ptimer_trigger() -> trigger callback -> ptimer_run() -> ptimer_reload() -> ptimer_trigger() -> ... I need to think a bit more carefully about what is supposed to happen here and what we want to have happen. I guess at the moment we'll just schedule the QEMU BH to run again, so the trigger callback is called again, but not recursively. So we should probably cause that to happen in the new scheme (conceptually, something like "the trigger callback is implicitly considered to be called from within a tx begin/ commit block, so (a) it doesn't need to do begin/commit itself and (b) if it does something resulting in a repeat trigger the second call will happen after it returns, not recursively" ?) thanks -- PMM
[PATCH v4 1/3] tests/migration: mem leak fix
‘data’ has the possibility of memory leaks, so use the glib macros g_autofree recommended by CODING_STYLE.rst to automatically release the memory that returned from g_malloc(). Signed-off-by: Mao Zhongyi Reviewed-by: Alex Bennée --- tests/migration/stress.c | 21 ++--- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/tests/migration/stress.c b/tests/migration/stress.c index d9aa4afe92..d8a6f64af0 100644 --- a/tests/migration/stress.c +++ b/tests/migration/stress.c @@ -170,26 +170,14 @@ static unsigned long long now(void) static int stressone(unsigned long long ramsizeMB) { size_t pagesPerMB = 1024 * 1024 / PAGE_SIZE; -char *ram = malloc(ramsizeMB * 1024 * 1024); +g_autofree char *ram = g_malloc(ramsizeMB * 1024 * 1024); char *ramptr; size_t i, j, k; -char *data = malloc(PAGE_SIZE); +g_autofree char *data = g_malloc(PAGE_SIZE); char *dataptr; size_t nMB = 0; unsigned long long before, after; -if (!ram) { -fprintf(stderr, "%s (%05d): ERROR: cannot allocate %llu MB of RAM: %s\n", -argv0, gettid(), ramsizeMB, strerror(errno)); -return -1; -} -if (!data) { -fprintf(stderr, "%s (%d): ERROR: cannot allocate %d bytes of RAM: %s\n", -argv0, gettid(), PAGE_SIZE, strerror(errno)); -free(ram); -return -1; -} - /* We don't care about initial state, but we do want * to fault it all into RAM, otherwise the first iter * of the loop below will be quite slow. We cna't use @@ -198,8 +186,6 @@ static int stressone(unsigned long long ramsizeMB) memset(ram, 0xfe, ramsizeMB * 1024 * 1024); if (random_bytes(data, PAGE_SIZE) < 0) { -free(ram); -free(data); return -1; } @@ -227,9 +213,6 @@ static int stressone(unsigned long long ramsizeMB) } } } - -free(data); -free(ram); } -- 2.17.1
[PATCH v4 3/3] tests/migration:fix unreachable path in stress test
If stressone() or stress() exits it's because of a failure because the test runs forever otherwise, so change stressone and stress type to void to make the exit_failure() as the exit function of main(). Signed-off-by: Mao Zhongyi --- tests/migration/stress.c | 13 + 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/migration/stress.c b/tests/migration/stress.c index f9626d50ee..a062ef6b55 100644 --- a/tests/migration/stress.c +++ b/tests/migration/stress.c @@ -167,7 +167,7 @@ static unsigned long long now(void) return (tv.tv_sec * 1000ull) + (tv.tv_usec / 1000ull); } -static int stressone(unsigned long long ramsizeMB) +static void stressone(unsigned long long ramsizeMB) { size_t pagesPerMB = 1024 * 1024 / PAGE_SIZE; g_autofree char *ram = g_malloc(ramsizeMB * 1024 * 1024); @@ -186,7 +186,7 @@ static int stressone(unsigned long long ramsizeMB) memset(ram, 0xfe, ramsizeMB * 1024 * 1024); if (random_bytes(data, PAGE_SIZE) < 0) { -return -1; +return; } before = now(); @@ -225,7 +225,7 @@ static void *stressthread(void *arg) return NULL; } -static int stress(unsigned long long ramsizeGB, int ncpus) +static void stress(unsigned long long ramsizeGB, int ncpus) { size_t i; unsigned long long ramsizeMB = ramsizeGB * 1024 / ncpus; @@ -238,8 +238,6 @@ static int stress(unsigned long long ramsizeGB, int ncpus) } stressone(ramsizeMB); - -return 0; } @@ -335,8 +333,7 @@ int main(int argc, char **argv) fprintf(stdout, "%s (%05d): INFO: RAM %llu GiB across %d CPUs\n", argv0, gettid(), ramsizeGB, ncpus); -if (stress(ramsizeGB, ncpus) < 0) -exit_failure(); +stress(ramsizeGB, ncpus); -exit_success(); +exit_failure(); } -- 2.17.1
[PATCH v4 2/3] tests/migration: fix a typo in comment
Signed-off-by: Mao Zhongyi Reviewed-by: Alex Bennée Reviewed-by: Laurent Vivier Reviewed-by: Philippe Mathieu-Daudé --- tests/migration/stress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/migration/stress.c b/tests/migration/stress.c index d8a6f64af0..f9626d50ee 100644 --- a/tests/migration/stress.c +++ b/tests/migration/stress.c @@ -180,7 +180,7 @@ static int stressone(unsigned long long ramsizeMB) /* We don't care about initial state, but we do want * to fault it all into RAM, otherwise the first iter - * of the loop below will be quite slow. We cna't use + * of the loop below will be quite slow. We can't use * 0x0 as the byte as gcc optimizes that away into a * calloc instead :-) */ memset(ram, 0xfe, ramsizeMB * 1024 * 1024); -- 2.17.1
[PATCH v4 0/3] some fix in tests/migration
This patchset mainly fixes memory leak, typo and return value of stress function in stress test. v4-v3: p1: - remove redundant g_malloc return value judgment. [Laurent Vivier] p3: - always use exit_failure() as the exit function of main(). [Laurent Vivier] - update the commit message. v3->v2: p1: - replace malloc with g_malloc [Laurent Vivier] p3: - change stressone type to void and stree return value to -1 to make the path of 'if (stress(ramsizeGB, ncpus) < 0)' can be reached.[Laurent Vivier] - update the commit message. v2->v1: - use g_autofree to release memory automatically instead of free(). [Alex Bennée] Cc: arm...@redhat.com Cc: laur...@vivier.eu Cc: tony.ngu...@bt.com Cc: alex.ben...@linaro.org Mao Zhongyi (3): tests/migration: mem leak fix tests/migration: fix a typo in comment tests/migration:fix unreachable path in stress test tests/migration/stress.c | 36 1 file changed, 8 insertions(+), 28 deletions(-) -- 2.17.1
Re: [PATCH v9 2/3] block/nbd: nbd reconnect
23 Sep 2019 22:23 Eric Blake wrote: On 9/17/19 12:13 PM, Vladimir Sementsov-Ogievskiy wrote: > Implement reconnect. To achieve this: > > 1. add new modes: >connecting-wait: means, that reconnecting is in progress, and there > were small number of reconnect attempts, so all requests are > waiting for the connection. >connecting-nowait: reconnecting is in progress, there were a lot of > attempts of reconnect, all requests will return errors. > >two old modes are used too: >connected: normal state >quit: exiting after fatal error or on close > > Possible transitions are: > >* -> quit >connecting-* -> connected >connecting-wait -> connecting-nowait (transition is done after > reconnect-delay seconds in connecting-wait mode) >connected -> connecting-wait > > 2. Implement reconnect in connection_co. So, in connecting-* mode, > connection_co, tries to reconnect unlimited times. > > 3. Retry nbd queries on channel error, if we are in connecting-wait > state. > > Signed-off-by: Vladimir Sementsov-Ogievskiy > --- > block/nbd.c | 332 ++-- > 1 file changed, 269 insertions(+), 63 deletions(-) > > + > +static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s) > +{ > +Error *local_err = NULL; > + > +if (!nbd_client_connecting(s)) { > +return; > +} > + > +/* Wait for completion of all in-flight requests */ > + > +qemu_co_mutex_lock(>send_mutex); > + > +while (s->in_flight > 0) { > +qemu_co_mutex_unlock(>send_mutex); > +nbd_recv_coroutines_wake_all(s); > +s->wait_in_flight = true; > +qemu_coroutine_yield(); > +s->wait_in_flight = false; > +qemu_co_mutex_lock(>send_mutex); > +} > + > +qemu_co_mutex_unlock(>send_mutex); > + > +if (!nbd_client_connecting(s)) { > +return; > +} > + > +/* > + * Now we are sure that nobody is accessing the channel, and no one will > + * try until we set the state to CONNECTED. > + */ > + > +/* Finalize previous connection if any */ > +if (s->ioc) { > +nbd_client_detach_aio_context(s->bs); > +object_unref(OBJECT(s->sioc)); > +s->sioc = NULL; > +object_unref(OBJECT(s->ioc)); > +s->ioc = NULL; > +} > + > +s->connect_status = nbd_client_connect(s->bs, _err); > +error_free(s->connect_err); > +s->connect_err = NULL; > +error_propagate(>connect_err, local_err); > +local_err = NULL; > + Looks like a dead assignment to local_err. But I see elsewhere you add it, because you convert straight-line code into loops where it matters. > +if (s->connect_status < 0) { > +/* failed attempt */ > +return; > +} > + > +/* successfully connected */ > +s->state = NBD_CLIENT_CONNECTED; > +qemu_co_queue_restart_all(>free_sema); > +} > + > +static coroutine_fn void nbd_reconnect_loop(BDRVNBDState *s) Coroutine functions should generally have '_co_' in their name. I'd prefer nbd_co_reconnect_loop. > +{ > +uint64_t start_time_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); > +uint64_t delay_ns = s->reconnect_delay * NANOSECONDS_PER_SECOND; > +uint64_t timeout = 1 * NANOSECONDS_PER_SECOND; > +uint64_t max_timeout = 16 * NANOSECONDS_PER_SECOND; > + > +nbd_reconnect_attempt(s); > + > +while (nbd_client_connecting(s)) { > +if (s->state == NBD_CLIENT_CONNECTING_WAIT && > +qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start_time_ns > > delay_ns) > +{ > +s->state = NBD_CLIENT_CONNECTING_NOWAIT; > +qemu_co_queue_restart_all(>free_sema); > +} > + > +qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, timeout, > + >connection_co_sleep_ns_state); > +if (s->drained) { > +bdrv_dec_in_flight(s->bs); > +s->wait_drained_end = true; > +while (s->drained) { > +/* > + * We may be entered once from > nbd_client_attach_aio_context_bh > + * and then from nbd_client_co_drain_end. So here is a loop. > + */ > +qemu_coroutine_yield(); > +} > +bdrv_inc_in_flight(s->bs); > +} > +if (timeout < max_timeout) { > +timeout *= 2; > +} Exponential backup, ok. If I read the loop correctly, you've hardcoded the max_timeout at 16s, which means the overall timeout is about 30s when adding in the time of the earlier iterations. Does that need to be tunable? But for now I can live with it. > + > +nbd_reconnect_attempt(s); > +} > } > > static coroutine_fn void nbd_connection_entry(void *opaque) > @@ -177,16 +330,26 @@ static coroutine_fn void nbd_connection_entry(void > *opaque) > * Therefore we keep an additional in_flight reference all the time > and > * only
Re: [PATCH v6 00/10] Introduce the microvm machine type
Patchew URL: https://patchew.org/QEMU/20191004093752.16564-1-...@redhat.com/ Hi, This series seems to have some coding style problems. See output below for more information: Type: series Message-id: 20191004093752.16564-1-...@redhat.com Subject: [PATCH v6 00/10] Introduce the microvm machine type === TEST SCRIPT BEGIN === #!/bin/bash git rev-parse base > /dev/null || exit 0 git config --local diff.renamelimit 0 git config --local diff.renames True git config --local diff.algorithm histogram ./scripts/checkpatch.pl --mailback base.. === TEST SCRIPT END === Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384 From https://github.com/patchew-project/qemu * [new tag] patchew/20191004171204.21040-1-eric.au...@redhat.com -> patchew/20191004171204.21040-1-eric.au...@redhat.com Switched to a new branch 'test' 82de93f hw/i386: Introduce the microvm machine type fda0032 docs/microvm.rst: document the new microvm machine type 8dc483d roms: add microvm-bios (qboot) as binary and git submodule 16f12bc hw/intc/apic: reject pic ints if isa_pic == NULL 22f8cab fw_cfg: add "modify" functions for all types 7cdaa3f hw/i386: make x86.c independent from PCMachineState 052084d hw/i386: split PCMachineState deriving X86MachineState from it afc0d5a hw/i386/pc: move shared x86 functions to x86.c and export them 9c1dc683 hw/i386/pc: rename functions shared with non-PC machines bd6947a hw/virtio: Factorize virtio-mmio headers === OUTPUT BEGIN === 1/10 Checking commit bd6947a2e366 (hw/virtio: Factorize virtio-mmio headers) WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? #77: new file mode 100644 total: 0 errors, 1 warnings, 131 lines checked Patch 1/10 has style problems, please review. If any of these errors are false positives report them to the maintainer, see CHECKPATCH in MAINTAINERS. 2/10 Checking commit 9c1dc683f829 (hw/i386/pc: rename functions shared with non-PC machines) 3/10 Checking commit afc0d5a54977 (hw/i386/pc: move shared x86 functions to x86.c and export them) WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? #749: new file mode 100644 WARNING: Block comments use a leading /* on a separate line #809: FILE: hw/i386/x86.c:56: +/* Calculates initial APIC ID for a specific CPU index WARNING: Block comments use a leading /* on a separate line #866: FILE: hw/i386/x86.c:113: +/* Calculates the limit to CPU APIC ID values WARNING: Block comments should align the * on each line #913: FILE: hw/i386/x86.c:160: + * -smp hasn't been parsed after it +*/ WARNING: line over 80 characters #926: FILE: hw/i386/x86.c:173: +ms->possible_cpus->cpus[i].arch_id = x86_cpu_apic_id_from_index(pcms, i); ERROR: spaces required around that '+' (ctx:VxV) #1087: FILE: hw/i386/x86.c:334: +cmdline_size = (strlen(kernel_cmdline)+16) & ~15; ^ ERROR: do not use assignment in if condition #1091: FILE: hw/i386/x86.c:338: +if (!f || !(kernel_size = get_file_size(f)) || ERROR: if this code is redundant consider removing it #1100: FILE: hw/i386/x86.c:347: +#if 0 ERROR: spaces required around that '+' (ctx:VxV) #1101: FILE: hw/i386/x86.c:348: +fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202)); ^ ERROR: spaces required around that '+' (ctx:VxV) #1103: FILE: hw/i386/x86.c:350: +if (ldl_p(header+0x202) == 0x53726448) { ^ ERROR: spaces required around that '+' (ctx:VxV) #1104: FILE: hw/i386/x86.c:351: +protocol = lduw_p(header+0x206); ^ ERROR: if this code is redundant consider removing it #1194: FILE: hw/i386/x86.c:441: +#if 0 ERROR: spaces required around that '+' (ctx:VxV) #1206: FILE: hw/i386/x86.c:453: +lduw_p(header+0x236) & XLF_CAN_BE_LOADED_ABOVE_4G) { ^ ERROR: spaces required around that '+' (ctx:VxV) #1225: FILE: hw/i386/x86.c:472: +initrd_max = ldl_p(header+0x22c); ^ ERROR: spaces required around that '+' (ctx:VxV) #1235: FILE: hw/i386/x86.c:482: +fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1); ^ ERROR: spaces required around that '+' (ctx:VxV) #1239: FILE: hw/i386/x86.c:486: +stl_p(header+0x228, cmdline_addr); ^ ERROR: spaces required around that '+' (ctx:VxV) #1241: FILE: hw/i386/x86.c:488: +stw_p(header+0x20, 0xA33F); ^ ERROR: spaces required around that '+' (ctx:VxV) #1242: FILE: hw/i386/x86.c:489: +stw_p(header+0x22, cmdline_addr-real_addr); ^ ERROR: spaces required around that '-' (ctx:VxV) #1242: FILE: hw/i386/x86.c:489: +stw_p(header+0x22, cmdline_addr-real_addr); ^ ERROR: consider using qemu_strtol in preference to strtol #1258: FILE:
Re: [PATCH v6 00/10] Introduce the microvm machine type
Patchew URL: https://patchew.org/QEMU/20191004093752.16564-1-...@redhat.com/ Hi, This series failed the docker-quick@centos7 build test. Please find the testing commands and their output below. If you have Docker installed, you can probably reproduce it locally. === TEST SCRIPT BEGIN === #!/bin/bash make docker-image-centos7 V=1 NETWORK=1 time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1 === TEST SCRIPT END === qemu-system-x86_64: missing kernel image file name, required by microvm Broken pipe /tmp/qemu-test/src/tests/libqtest.c:140: kill_qemu() tried to terminate QEMU process but encountered exit status 1 (expected 0) ERROR - too few tests run (expected 7, got 4) make: *** [check-qtest-x86_64] Error 1 make: *** Waiting for unfinished jobs TESTiotest-qcow2: 159 TESTiotest-qcow2: 161 --- raise CalledProcessError(retcode, cmd) subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', '--label', 'com.qemu.instance.uuid=479a42ee8d764327bfb3924069c8e2dc', '-u', '1001', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', '-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 'SHOW_ENV=1', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', '/home/patchew/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', '/var/tmp/patchew-tester-tmp-0dfb_dst/src/docker-src.2019-10-04-13.10.50.4894:/var/tmp/qemu:z,ro', 'qemu:centos7', '/var/tmp/qemu/run', 'test-quick']' returned non-zero exit status 2. filter=--filter=label=com.qemu.instance.uuid=479a42ee8d764327bfb3924069c8e2dc make[1]: *** [docker-run] Error 1 make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-0dfb_dst/src' make: *** [docker-run-test-quick@centos7] Error 2 real10m42.831s user0m8.500s The full log is available at http://patchew.org/logs/20191004093752.16564-1-...@redhat.com/testing.docker-quick@centos7/?type=message. --- Email generated automatically by Patchew [https://patchew.org/]. Please send your feedback to patchew-de...@redhat.com
[PULL 29/29] target/i386/kvm: Silence warning from Valgrind about uninitialized bytes
From: Thomas Huth When I run QEMU with KVM under Valgrind, I currently get this warning: Syscall param ioctl(generic) points to uninitialised byte(s) at 0x95BA45B: ioctl (in /usr/lib64/libc-2.28.so) by 0x429DC3: kvm_ioctl (kvm-all.c:2365) by 0x51B249: kvm_arch_get_supported_msr_feature (kvm.c:469) by 0x4C2A49: x86_cpu_get_supported_feature_word (cpu.c:3765) by 0x4C4116: x86_cpu_expand_features (cpu.c:5065) by 0x4C7F8D: x86_cpu_realizefn (cpu.c:5242) by 0x5961F3: device_set_realized (qdev.c:835) by 0x7038F6: property_set_bool (object.c:2080) by 0x707EFE: object_property_set_qobject (qom-qobject.c:26) by 0x705814: object_property_set_bool (object.c:1338) by 0x498435: pc_new_cpu (pc.c:1549) by 0x49C67D: pc_cpus_init (pc.c:1681) Address 0x1ffeffee74 is on thread 1's stack in frame #2, created by kvm_arch_get_supported_msr_feature (kvm.c:445) It's harmless, but a little bit annoying, so silence it by properly initializing the whole structure with zeroes. Signed-off-by: Thomas Huth Signed-off-by: Paolo Bonzini --- target/i386/kvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index be4bbfb..11b9c85 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -188,7 +188,7 @@ static int kvm_get_tsc(CPUState *cs) struct { struct kvm_msrs info; struct kvm_msr_entry entries[1]; -} msr_data; +} msr_data = {}; int ret; if (env->tsc_valid) { @@ -448,7 +448,7 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) struct { struct kvm_msrs info; struct kvm_msr_entry entries[1]; -} msr_data; +} msr_data = {}; uint64_t value; uint32_t ret, can_be_one, must_be_one; -- 1.8.3.1
[PATCH] migration: Support QLIST migration
94869d5c52 ("migration: migrate QTAILQ") introduced support for QTAILQ migration. Let's do the same for QLIST. Signed-off-by: Eric Auger --- include/migration/vmstate.h | 21 ++ include/qemu/queue.h| 30 migration/vmstate-types.c | 60 tests/test-vmstate.c| 133 4 files changed, 244 insertions(+) diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 1fbfd099dd..54ec9caeb0 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -224,6 +224,7 @@ extern const VMStateInfo vmstate_info_unused_buffer; extern const VMStateInfo vmstate_info_tmp; extern const VMStateInfo vmstate_info_bitmap; extern const VMStateInfo vmstate_info_qtailq; +extern const VMStateInfo vmstate_info_qlist; #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0) /* @@ -754,6 +755,26 @@ extern const VMStateInfo vmstate_info_qtailq; .start= offsetof(_type, _next), \ } +/* + * For migrating a QLIST + * Target QLIST needs be properly initialized. + * _type: type of QLIST element + * _next: name of QLIST_ENTRY entry field in QLIST element + * _vmsd: VMSD for QLIST element + * size: size of QLIST element + * start: offset of QLIST_ENTRY in QTAILQ element + */ +#define VMSTATE_QLIST_V(_field, _state, _version, _vmsd, _type, _next) \ +{\ +.name = (stringify(_field)), \ +.version_id = (_version), \ +.vmsd = &(_vmsd),\ +.size = sizeof(_type), \ +.info = _info_qlist, \ +.offset = offsetof(_state, _field),\ +.start= offsetof(_type, _next), \ +} + /* _f : field name _f_n : num of elements field_name _n : num of elements diff --git a/include/qemu/queue.h b/include/qemu/queue.h index 73bf4a984d..e965b4d18d 100644 --- a/include/qemu/queue.h +++ b/include/qemu/queue.h @@ -491,4 +491,34 @@ union { \ QTAILQ_RAW_TQH_CIRC(head)->tql_prev = QTAILQ_RAW_TQE_CIRC(elm, entry); \ } while (/*CONSTCOND*/0) +#define QLIST_RAW_FIRST(head) \ +field_at_offset(head, 0, void *) + +#define QLIST_RAW_NEXT(elm, entry) \ +field_at_offset(elm, entry, void *) + +#define QLIST_RAW_PREVIOUS(elm, entry) \ +field_at_offset(elm, entry + sizeof(void *), void *) + +#define QLIST_RAW_FOREACH(elm, head, entry) \ +for ((elm) = *QLIST_RAW_FIRST(head); \ + (elm); \ + (elm) = *QLIST_RAW_NEXT(elm, entry)) + +#define QLIST_RAW_INSERT_TAIL(head, elm, entry) do { \ +void *iter, *last = NULL; \ +*QLIST_RAW_NEXT(elm, entry) = NULL; \ +if (!*QLIST_RAW_FIRST(head)) { \ +*QLIST_RAW_FIRST(head) = elm; \ +*QLIST_RAW_PREVIOUS(elm, entry) = head; \ +break; \ +} \ +for (iter = *QLIST_RAW_FIRST(head); \ + iter; last = iter, iter = *QLIST_RAW_NEXT(iter, entry)) \ +{ } \ +*QLIST_RAW_NEXT(last, entry) = elm; \ +*QLIST_RAW_PREVIOUS(elm, entry) = last; \ +} while (0) + + #endif /* QEMU_SYS_QUEUE_H */ diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c index bee658a1b2..0eedc614f1 100644 --- a/migration/vmstate-types.c +++ b/migration/vmstate-types.c @@ -691,3 +691,63 @@ const VMStateInfo vmstate_info_qtailq = { .get = get_qtailq, .put = put_qtailq, }; + +static int put_qlist(QEMUFile *f, void *pv, size_t unused_size, + const VMStateField *field, QJSON *vmdesc) +{ +const VMStateDescription *vmsd = field->vmsd; +/* offset of the QTAILQ entry in a QTAILQ element*/ +size_t entry_offset = field->start; +void *elm; +int ret; + +QLIST_RAW_FOREACH(elm, pv, entry_offset) { +qemu_put_byte(f, true); +ret =
[PULL 27/29] target/i386: add VMX features
Add code to convert the VMX feature words back into MSR values, allowing the user to enable/disable VMX features as they wish. The same infrastructure enables support for limiting VMX features in named CPU models. Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 225 ++ target/i386/cpu.h | 9 +++ target/i386/kvm.c | 162 ++- 3 files changed, 394 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 0a5d182..44f1bbd 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1232,6 +1232,163 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .index = MSR_IA32_CORE_CAPABILITY, }, }, + +[FEAT_VMX_PROCBASED_CTLS] = { +.type = MSR_FEATURE_WORD, +.feat_names = { +NULL, NULL, "vmx-vintr-pending", "vmx-tsc-offset", +NULL, NULL, NULL, "vmx-hlt-exit", +NULL, "vmx-invlpg-exit", "vmx-mwait-exit", "vmx-rdpmc-exit", +"vmx-rdtsc-exit", NULL, NULL, "vmx-cr3-load-noexit", +"vmx-cr3-store-noexit", NULL, NULL, "vmx-cr8-load-exit", +"vmx-cr8-store-exit", "vmx-flexpriority", "vmx-vnmi-pending", "vmx-movdr-exit", +"vmx-io-exit", "vmx-io-bitmap", NULL, "vmx-mtf", +"vmx-msr-bitmap", "vmx-monitor-exit", "vmx-pause-exit", "vmx-secondary-ctls", +}, +.msr = { +.index = MSR_IA32_VMX_TRUE_PROCBASED_CTLS, +} +}, + +[FEAT_VMX_SECONDARY_CTLS] = { +.type = MSR_FEATURE_WORD, +.feat_names = { +"vmx-apicv-xapic", "vmx-ept", "vmx-desc-exit", "vmx-rdtscp-exit", +"vmx-apicv-x2apic", "vmx-vpid", "vmx-wbinvd-exit", "vmx-unrestricted-guest", +"vmx-apicv-register", "vmx-apicv-vid", "vmx-ple", "vmx-rdrand-exit", +"vmx-invpcid-exit", "vmx-vmfunc", "vmx-shadow-vmcs", "vmx-encls-exit", +"vmx-rdseed-exit", "vmx-pml", NULL, NULL, +"vmx-xsaves", NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +}, +.msr = { +.index = MSR_IA32_VMX_PROCBASED_CTLS2, +} +}, + +[FEAT_VMX_PINBASED_CTLS] = { +.type = MSR_FEATURE_WORD, +.feat_names = { +"vmx-intr-exit", NULL, NULL, "vmx-nmi-exit", +NULL, "vmx-vnmi", "vmx-preemption-timer", "vmx-posted-intr", +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +}, +.msr = { +.index = MSR_IA32_VMX_TRUE_PINBASED_CTLS, +} +}, + +[FEAT_VMX_EXIT_CTLS] = { +.type = MSR_FEATURE_WORD, +/* + * VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE is copied from + * the LM CPUID bit. + */ +.feat_names = { +NULL, NULL, "vmx-exit-nosave-debugctl", NULL, +NULL, NULL, NULL, NULL, +NULL, NULL /* vmx-exit-host-addr-space-size */, NULL, NULL, +"vmx-exit-load-perf-global-ctrl", NULL, NULL, "vmx-exit-ack-intr", +NULL, NULL, "vmx-exit-save-pat", "vmx-exit-load-pat", +"vmx-exit-save-efer", "vmx-exit-load-efer", +"vmx-exit-save-preemption-timer", "vmx-exit-clear-bndcfgs", +NULL, "vmx-exit-clear-rtit-ctl", NULL, NULL, +NULL, NULL, NULL, NULL, +}, +.msr = { +.index = MSR_IA32_VMX_TRUE_EXIT_CTLS, +} +}, + +[FEAT_VMX_ENTRY_CTLS] = { +.type = MSR_FEATURE_WORD, +.feat_names = { +NULL, NULL, "vmx-entry-noload-debugctl", NULL, +NULL, NULL, NULL, NULL, +NULL, "vmx-entry-ia32e-mode", NULL, NULL, +NULL, "vmx-entry-load-perf-global-ctrl", "vmx-entry-load-pat", "vmx-entry-load-efer", +"vmx-entry-load-bndcfgs", NULL, "vmx-entry-load-rtit-ctl", NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +}, +.msr = { +.index = MSR_IA32_VMX_TRUE_ENTRY_CTLS, +} +}, + +[FEAT_VMX_MISC] = { +.type = MSR_FEATURE_WORD, +.feat_names = { +NULL, NULL, NULL, NULL, +NULL, "vmx-store-lma", "vmx-activity-hlt", "vmx-activity-shutdown", +"vmx-activity-wait-sipi", NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, NULL, NULL, NULL, +NULL, "vmx-vmwrite-vmexit-fields", "vmx-zero-len-inject", NULL, +}, +.msr = { +.index = MSR_IA32_VMX_MISC, +} +}, + +[FEAT_VMX_EPT_VPID_CAPS] = { +.type = MSR_FEATURE_WORD, +.feat_names = { +"vmx-ept-execonly", NULL, NULL,
[PULL 26/29] vmxcap: correct the name of the variables
The low bits are 1 if the control must be one, the high bits are 1 if the control can be one. Correct the variable names as they are very confusing. Signed-off-by: Paolo Bonzini --- scripts/kvm/vmxcap | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap index d8c7d6d..5dfeb2e 100755 --- a/scripts/kvm/vmxcap +++ b/scripts/kvm/vmxcap @@ -51,15 +51,15 @@ class Control(object): return (val & 0x, val >> 32) def show(self): print(self.name) -mbz, mb1 = self.read2(self.cap_msr) -tmbz, tmb1 = 0, 0 +mb1, cb1 = self.read2(self.cap_msr) +tmb1, tcb1 = 0, 0 if self.true_cap_msr: -tmbz, tmb1 = self.read2(self.true_cap_msr) +tmb1, tcb1 = self.read2(self.true_cap_msr) for bit in sorted(self.bits.keys()): -zero = not (mbz & (1 << bit)) -one = mb1 & (1 << bit) -true_zero = not (tmbz & (1 << bit)) -true_one = tmb1 & (1 << bit) +zero = not (mb1 & (1 << bit)) +one = cb1 & (1 << bit) +true_zero = not (tmb1 & (1 << bit)) +true_one = tcb1 & (1 << bit) s= '?' if (self.true_cap_msr and true_zero and true_one and one and not zero): -- 1.8.3.1
[PULL 24/29] target/i386: expand feature words to 64 bits
VMX requires 64-bit feature words for the IA32_VMX_EPT_VPID_CAP and IA32_VMX_BASIC MSRs. (The VMX control MSRs are 64-bit wide but actually have only 32 bits of information). Signed-off-by: Paolo Bonzini --- include/sysemu/kvm.h | 2 +- target/i386/cpu.c| 71 +++- target/i386/cpu.h| 2 +- target/i386/kvm.c| 2 +- 4 files changed, 40 insertions(+), 37 deletions(-) diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index fd67477..9d14328 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -462,7 +462,7 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension); uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function, uint32_t index, int reg); -uint32_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index); +uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index); void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 35d868d..0a5d182 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -789,7 +789,7 @@ typedef struct FeatureWordInfo { * In cases of disagreement between feature naming conventions, * aliases may be added. */ -const char *feat_names[32]; +const char *feat_names[64]; union { /* If type==CPUID_FEATURE_WORD */ struct { @@ -803,11 +803,11 @@ typedef struct FeatureWordInfo { uint32_t index; } msr; }; -uint32_t tcg_features; /* Feature flags supported by TCG */ -uint32_t unmigratable_flags; /* Feature flags known to be unmigratable */ -uint32_t migratable_flags; /* Feature flags known to be migratable */ +uint64_t tcg_features; /* Feature flags supported by TCG */ +uint64_t unmigratable_flags; /* Feature flags known to be unmigratable */ +uint64_t migratable_flags; /* Feature flags known to be migratable */ /* Features that shouldn't be auto-enabled by "-cpu host" */ -uint32_t no_autoenable_flags; +uint64_t no_autoenable_flags; } FeatureWordInfo; static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { @@ -1236,7 +1236,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { typedef struct FeatureMask { FeatureWord index; -uint32_t mask; +uint64_t mask; } FeatureMask; typedef struct FeatureDep { @@ -1246,11 +1246,11 @@ typedef struct FeatureDep { static FeatureDep feature_dependencies[] = { { .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_ARCH_CAPABILITIES }, -.to = { FEAT_ARCH_CAPABILITIES, ~0u }, +.to = { FEAT_ARCH_CAPABILITIES, ~0ull }, }, { .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_CORE_CAPABILITY }, -.to = { FEAT_CORE_CAPABILITY, ~0u }, +.to = { FEAT_CORE_CAPABILITY, ~0ull }, }, }; @@ -1362,14 +1362,14 @@ const char *get_register_name_32(unsigned int reg) * Returns the set of feature flags that are supported and migratable by * QEMU, for a given FeatureWord. */ -static uint32_t x86_cpu_get_migratable_flags(FeatureWord w) +static uint64_t x86_cpu_get_migratable_flags(FeatureWord w) { FeatureWordInfo *wi = _word_info[w]; -uint32_t r = 0; +uint64_t r = 0; int i; -for (i = 0; i < 32; i++) { -uint32_t f = 1U << i; +for (i = 0; i < 64; i++) { +uint64_t f = 1ULL << i; /* If the feature name is known, it is implicitly considered migratable, * unless it is explicitly set in unmigratable_flags */ @@ -2931,7 +2931,7 @@ void x86_cpu_change_kvm_default(const char *prop, const char *value) assert(pv->prop); } -static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, +static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, bool migratable_only); static bool lmce_supported(void) @@ -3117,7 +3117,7 @@ static bool x86_cpu_have_filtered_features(X86CPU *cpu) return false; } -static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint32_t mask, +static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, const char *verbose_prefix) { CPUX86State *env = >env; @@ -3134,8 +3134,8 @@ static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint32_t mask, return; } -for (i = 0; i < 32; ++i) { -if ((1UL << i) & mask) { +for (i = 0; i < 64; ++i) { +if ((1ULL << i) & mask) { feat_word_str = feature_word_description(f, i); warn_report("%s: %s%s%s [bit %d]", verbose_prefix, @@ -3378,7 +3378,7 @@ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { -
[PULL 22/29] target/i386: handle filtered_features in a new function mark_unavailable_features
The next patch will add a different reason for filtering features, unrelated to host feature support. Extract a new function that takes care of disabling the features and optionally reporting them. Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 87 ++- 1 file changed, 48 insertions(+), 39 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 80cfab0..83f8981 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -3096,17 +3096,41 @@ static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) return NULL; } -static void report_unavailable_features(FeatureWord w, uint32_t mask) +static bool x86_cpu_have_filtered_features(X86CPU *cpu) { +FeatureWord w; + +for (w = 0; w < FEATURE_WORDS; w++) { +if (cpu->filtered_features[w]) { +return true; +} +} + +return false; +} + +static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint32_t mask, + const char *verbose_prefix) +{ +CPUX86State *env = >env; FeatureWordInfo *f = _word_info[w]; int i; char *feat_word_str; +if (!cpu->force_features) { +env->features[w] &= ~mask; +} +cpu->filtered_features[w] |= mask; + +if (!verbose_prefix) { +return; +} + for (i = 0; i < 32; ++i) { if ((1UL << i) & mask) { feat_word_str = feature_word_description(f, i); -warn_report("%s doesn't support requested feature: %s%s%s [bit %d]", -accel_uses_host_cpuid() ? "host" : "TCG", +warn_report("%s: %s%s%s [bit %d]", +verbose_prefix, feat_word_str, f->feat_names[i] ? "." : "", f->feat_names[i] ? f->feat_names[i] : "", i); @@ -3511,7 +3535,7 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features, } static void x86_cpu_expand_features(X86CPU *cpu, Error **errp); -static int x86_cpu_filter_features(X86CPU *cpu); +static void x86_cpu_filter_features(X86CPU *cpu, bool verbose); /* Build a list with the name of all features on a feature word array */ static void x86_cpu_list_feature_names(FeatureWordArray features, @@ -3576,7 +3600,7 @@ static void x86_cpu_class_check_missing_features(X86CPUClass *xcc, next = >next; } -x86_cpu_filter_features(xc); +x86_cpu_filter_features(xc, false); x86_cpu_list_feature_names(xc->filtered_features, next); @@ -3784,15 +3808,6 @@ static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, return r; } -static void x86_cpu_report_filtered_features(X86CPU *cpu) -{ -FeatureWord w; - -for (w = 0; w < FEATURE_WORDS; w++) { -report_unavailable_features(w, cpu->filtered_features[w]); -} -} - static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props) { PropValue *pv; @@ -5154,24 +5169,24 @@ out: * * Returns: 0 if all flags are supported by the host, non-zero otherwise. */ -static int x86_cpu_filter_features(X86CPU *cpu) +static void x86_cpu_filter_features(X86CPU *cpu, bool verbose) { CPUX86State *env = >env; FeatureWord w; -int rv = 0; +const char *prefix = NULL; + +if (verbose) { +prefix = accel_uses_host_cpuid() + ? "host doesn't support requested feature" + : "TCG doesn't support requested feature"; +} for (w = 0; w < FEATURE_WORDS; w++) { uint32_t host_feat = x86_cpu_get_supported_feature_word(w, false); uint32_t requested_features = env->features[w]; -uint32_t available_features = requested_features & host_feat; -if (!cpu->force_features) { -env->features[w] = available_features; -} -cpu->filtered_features[w] = requested_features & ~available_features; -if (cpu->filtered_features[w]) { -rv = 1; -} +uint32_t unavailable_features = requested_features & ~host_feat; +mark_unavailable_features(cpu, w, unavailable_features, prefix); } if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) && @@ -5197,13 +5212,9 @@ static int x86_cpu_filter_features(X86CPU *cpu) * host can't emulate the capabilities we report on * cpu_x86_cpuid(), intel-pt can't be enabled on the current host. */ -env->features[FEAT_7_0_EBX] &= ~CPUID_7_0_EBX_INTEL_PT; -cpu->filtered_features[FEAT_7_0_EBX] |= CPUID_7_0_EBX_INTEL_PT; -rv = 1; +mark_unavailable_features(cpu, FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT, prefix); } } - -return rv; } static void x86_cpu_realizefn(DeviceState *dev, Error **errp) @@ -5244,16 +5255,14 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) goto out; } -if (x86_cpu_filter_features(cpu) && -
[PULL 28/29] target/i386: work around KVM_GET_MSRS bug for secondary execution controls
Some secondary controls are automatically enabled/disabled based on the CPUID values that are set for the guest. However, they are still available at a global level and therefore should be present when KVM_GET_MSRS is sent to /dev/kvm. Unfortunately KVM forgot to include those, so fix that. Signed-off-by: Paolo Bonzini --- target/i386/kvm.c | 17 + 1 file changed, 17 insertions(+) diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 45f0a1f..be4bbfb 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -479,6 +479,23 @@ uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) value = msr_data.entries[0].data; switch (index) { case MSR_IA32_VMX_PROCBASED_CTLS2: +/* KVM forgot to add these bits for some time, do this ourselves. */ +if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & CPUID_XSAVE_XSAVES) { +value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; +} +if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & CPUID_EXT_RDRAND) { +value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; +} +if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_INVPCID) { +value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; +} +if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & CPUID_7_0_EBX_RDSEED) { +value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; +} +if (kvm_arch_get_supported_cpuid(s, 0x8001, 0, R_EDX) & CPUID_EXT2_RDTSCP) { +value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; +} +/* fall through */ case MSR_IA32_VMX_TRUE_PINBASED_CTLS: case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: case MSR_IA32_VMX_TRUE_ENTRY_CTLS: -- 1.8.3.1
[PULL 23/29] target/i386: introduce generic feature dependency mechanism
Sometimes a CPU feature does not make sense unless another is present. In the case of VMX features, KVM does not even allow setting the VMX controls to some invalid combinations. Therefore, this patch adds a generic mechanism that looks for bits that the user explicitly cleared, and uses them to remove other bits from the expanded CPU definition. If these dependent bits were also explicitly *set* by the user, this will be a warning for "-cpu check" and an error for "-cpu enforce". If not, then the dependent bits are cleared silently, for convenience. With VMX features, this will be used so that for example "-cpu host,-rdrand" will also hide support for RDRAND exiting. Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 72 --- 1 file changed, 48 insertions(+), 24 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 83f8981..35d868d 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -801,10 +801,6 @@ typedef struct FeatureWordInfo { /* If type==MSR_FEATURE_WORD */ struct { uint32_t index; -struct { /*CPUID that enumerate this MSR*/ -FeatureWord cpuid_class; -uint32_tcpuid_flag; -} cpuid_dep; } msr; }; uint32_t tcg_features; /* Feature flags supported by TCG */ @@ -1218,10 +1214,6 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .msr = { .index = MSR_IA32_ARCH_CAPABILITIES, -.cpuid_dep = { -FEAT_7_0_EDX, -CPUID_7_0_EDX_ARCH_CAPABILITIES -} }, }, [FEAT_CORE_CAPABILITY] = { @@ -1238,14 +1230,30 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { }, .msr = { .index = MSR_IA32_CORE_CAPABILITY, -.cpuid_dep = { -FEAT_7_0_EDX, -CPUID_7_0_EDX_CORE_CAPABILITY, -}, }, }, }; +typedef struct FeatureMask { +FeatureWord index; +uint32_t mask; +} FeatureMask; + +typedef struct FeatureDep { +FeatureMask from, to; +} FeatureDep; + +static FeatureDep feature_dependencies[] = { +{ +.from = { FEAT_7_0_EDX, CPUID_7_0_EDX_ARCH_CAPABILITIES }, +.to = { FEAT_ARCH_CAPABILITIES, ~0u }, +}, +{ +.from = { FEAT_7_0_EDX, CPUID_7_0_EDX_CORE_CAPABILITY }, +.to = { FEAT_CORE_CAPABILITY, ~0u }, +}, +}; + typedef struct X86RegisterInfo32 { /* Name of register */ const char *name; @@ -5063,9 +5071,26 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) { CPUX86State *env = >env; FeatureWord w; +int i; GList *l; Error *local_err = NULL; +for (l = plus_features; l; l = l->next) { +const char *prop = l->data; +object_property_set_bool(OBJECT(cpu), true, prop, _err); +if (local_err) { +goto out; +} +} + +for (l = minus_features; l; l = l->next) { +const char *prop = l->data; +object_property_set_bool(OBJECT(cpu), false, prop, _err); +if (local_err) { +goto out; +} +} + /*TODO: Now cpu->max_features doesn't overwrite features * set using QOM properties, and we can convert * plus_features & minus_features to global properties @@ -5083,19 +5108,18 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) } } -for (l = plus_features; l; l = l->next) { -const char *prop = l->data; -object_property_set_bool(OBJECT(cpu), true, prop, _err); -if (local_err) { -goto out; -} -} +for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) { +FeatureDep *d = _dependencies[i]; +if (!(env->features[d->from.index] & d->from.mask)) { +uint32_t unavailable_features = env->features[d->to.index] & d->to.mask; -for (l = minus_features; l; l = l->next) { -const char *prop = l->data; -object_property_set_bool(OBJECT(cpu), false, prop, _err); -if (local_err) { -goto out; +/* Not an error unless the dependent feature was added explicitly. */ +mark_unavailable_features(cpu, d->to.index, + unavailable_features & env->user_features[d->to.index], + "This feature depends on other features that were not requested"); + +env->user_features[d->to.index] |= unavailable_features; +env->features[d->to.index] &= ~unavailable_features; } } -- 1.8.3.1
[PULL 25/29] target/i386: add VMX definitions
These will be used to compile the list of VMX features for named CPU models, and/or by the code that sets up the VMX MSRs. Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 130 ++ 1 file changed, 130 insertions(+) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 312d70f..d908e98 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -452,6 +452,25 @@ typedef enum X86Seg { #define MSR_IA32_BNDCFGS0x0d90 #define MSR_IA32_XSS0x0da0 +#define MSR_IA32_VMX_BASIC 0x0480 +#define MSR_IA32_VMX_PINBASED_CTLS 0x0481 +#define MSR_IA32_VMX_PROCBASED_CTLS 0x0482 +#define MSR_IA32_VMX_EXIT_CTLS 0x0483 +#define MSR_IA32_VMX_ENTRY_CTLS 0x0484 +#define MSR_IA32_VMX_MISC 0x0485 +#define MSR_IA32_VMX_CR0_FIXED0 0x0486 +#define MSR_IA32_VMX_CR0_FIXED1 0x0487 +#define MSR_IA32_VMX_CR4_FIXED0 0x0488 +#define MSR_IA32_VMX_CR4_FIXED1 0x0489 +#define MSR_IA32_VMX_VMCS_ENUM 0x048a +#define MSR_IA32_VMX_PROCBASED_CTLS20x048b +#define MSR_IA32_VMX_EPT_VPID_CAP 0x048c +#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x048d +#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x048e +#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x048f +#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x0490 +#define MSR_IA32_VMX_VMFUNC 0x0491 + #define XSTATE_FP_BIT 0 #define XSTATE_SSE_BIT 1 #define XSTATE_YMM_BIT 2 @@ -752,6 +771,117 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; #define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) +/* VMX MSR features */ +#define MSR_VMX_BASIC_VMCS_REVISION_MASK 0x7FFFull +#define MSR_VMX_BASIC_VMXON_REGION_SIZE_MASK (0x1FFFull << 32) +#define MSR_VMX_BASIC_VMCS_MEM_TYPE_MASK (0x003Cull << 32) +#define MSR_VMX_BASIC_DUAL_MONITOR (1ULL << 49) +#define MSR_VMX_BASIC_INS_OUTS (1ULL << 54) +#define MSR_VMX_BASIC_TRUE_CTLS (1ULL << 55) + +#define MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK 0x1Full +#define MSR_VMX_MISC_STORE_LMA (1ULL << 5) +#define MSR_VMX_MISC_ACTIVITY_HLT(1ULL << 6) +#define MSR_VMX_MISC_ACTIVITY_SHUTDOWN (1ULL << 7) +#define MSR_VMX_MISC_ACTIVITY_WAIT_SIPI (1ULL << 8) +#define MSR_VMX_MISC_MAX_MSR_LIST_SIZE_MASK 0x0E00ull +#define MSR_VMX_MISC_VMWRITE_VMEXIT (1ULL << 29) +#define MSR_VMX_MISC_ZERO_LEN_INJECT (1ULL << 30) + +#define MSR_VMX_EPT_EXECONLY (1ULL << 0) +#define MSR_VMX_EPT_PAGE_WALK_LENGTH_4 (1ULL << 6) +#define MSR_VMX_EPT_PAGE_WALK_LENGTH_5 (1ULL << 7) +#define MSR_VMX_EPT_UC (1ULL << 8) +#define MSR_VMX_EPT_WB (1ULL << 14) +#define MSR_VMX_EPT_2MB (1ULL << 16) +#define MSR_VMX_EPT_1GB (1ULL << 17) +#define MSR_VMX_EPT_INVEPT (1ULL << 20) +#define MSR_VMX_EPT_AD_BITS (1ULL << 21) +#define MSR_VMX_EPT_ADVANCED_VMEXIT_INFO (1ULL << 22) +#define MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT(1ULL << 25) +#define MSR_VMX_EPT_INVEPT_ALL_CONTEXT (1ULL << 26) +#define MSR_VMX_EPT_INVVPID (1ULL << 32) +#define MSR_VMX_EPT_INVVPID_SINGLE_ADDR (1ULL << 40) +#define MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT (1ULL << 41) +#define MSR_VMX_EPT_INVVPID_ALL_CONTEXT (1ULL << 42) +#define MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS (1ULL << 43) + +#define MSR_VMX_VMFUNC_EPT_SWITCHING (1ULL << 0) + + +/* VMX controls */ +#define VMX_CPU_BASED_VIRTUAL_INTR_PENDING 0x0004 +#define VMX_CPU_BASED_USE_TSC_OFFSETING 0x0008 +#define VMX_CPU_BASED_HLT_EXITING 0x0080 +#define VMX_CPU_BASED_INVLPG_EXITING0x0200 +#define VMX_CPU_BASED_MWAIT_EXITING 0x0400 +#define VMX_CPU_BASED_RDPMC_EXITING 0x0800 +#define VMX_CPU_BASED_RDTSC_EXITING 0x1000 +#define VMX_CPU_BASED_CR3_LOAD_EXITING 0x8000 +#define VMX_CPU_BASED_CR3_STORE_EXITING 0x0001 +#define VMX_CPU_BASED_CR8_LOAD_EXITING 0x0008 +#define VMX_CPU_BASED_CR8_STORE_EXITING 0x0010 +#define VMX_CPU_BASED_TPR_SHADOW0x0020 +#define VMX_CPU_BASED_VIRTUAL_NMI_PENDING 0x0040 +#define VMX_CPU_BASED_MOV_DR_EXITING0x0080 +#define VMX_CPU_BASED_UNCOND_IO_EXITING 0x0100 +#define VMX_CPU_BASED_USE_IO_BITMAPS0x0200 +#define
[PULL v3 00/29] Misc patches for 2019-10-02
The following changes since commit 7f21573c822805a8e6be379d9bcf3ad9effef3dc: Merge remote-tracking branch 'remotes/huth-gitlab/tags/pull-request-2019-10-01' into staging (2019-10-01 13:13:38 +0100) are available in the git repository at: git://github.com/bonzini/qemu.git tags/for-upstream for you to fetch changes up to a1834d975f7d329b128965dd69bc3aaa7195f5a2: target/i386/kvm: Silence warning from Valgrind about uninitialized bytes (2019-10-04 18:49:20 +0200) * Compilation fix for KVM (Alex) * SMM fix (Dmitry) * VFIO error reporting (Eric) * win32 fixes and workarounds (Marc-André) * qemu-pr-helper crash bugfix (Maxim) * Memory leak fixes (myself) * VMX features (myself) * Record-replay deadlock (Pavel) * i386 CPUID bits (Sebastian) * kconfig tweak (Thomas) * Valgrind fix (Thomas) * Autoconverge test (Yury) Alex Bennée (1): accel/kvm: ensure ret always set Dmitry Poletaev (1): Fix wrong behavior of cpu_memory_rw_debug() function in SMM Eric Auger (2): vfio: Turn the container error into an Error handle memory: allow memory_region_register_iommu_notifier() to fail Marc-André Lureau (3): util: WSAEWOULDBLOCK on connect should map to EINPROGRESS tests: skip serial test on windows win32: work around main-loop busy loop on socket/fd event Maxim Levitsky (1): qemu-pr-helper: fix crash in mpath_reconstruct_sense Paolo Bonzini (16): ide: fix leak from qemu_allocate_irqs microblaze: fix leak of fdevice tree blob mcf5208: fix leak from qemu_allocate_irqs hppa: fix leak from g_strdup_printf mips: fix memory leaks in board initialization cris: do not leak struct cris_disasm_data lm32: do not leak memory on object_new/object_unref docker: test-debug: disable LeakSanitizer tests/docker: only enable ubsan for test-clang target/i386: handle filtered_features in a new function mark_unavailable_features target/i386: introduce generic feature dependency mechanism target/i386: expand feature words to 64 bits target/i386: add VMX definitions vmxcap: correct the name of the variables target/i386: add VMX features target/i386: work around KVM_GET_MSRS bug for secondary execution controls Pavel Dovgaluk (1): replay: don't synchronize memory operations in replay mode Sebastian Andrzej Siewior (1): i386: Add CPUID bit for CLZERO and XSAVEERPTR Thomas Huth (2): hw/isa: Introduce a CONFIG_ISA_SUPERIO switch for isa-superio.c target/i386/kvm: Silence warning from Valgrind about uninitialized bytes Yury Kotov (1): tests/migration: Add a test for auto converge accel/kvm/kvm-all.c | 6 +- disas/cris.c | 59 +++--- exec.c| 23 ++- hw/arm/smmuv3.c | 18 +- hw/hppa/dino.c| 1 + hw/hppa/machine.c | 4 +- hw/i386/amd_iommu.c | 17 +- hw/i386/intel_iommu.c | 8 +- hw/ide/cmd646.c | 1 + hw/isa/Kconfig| 10 +- hw/isa/Makefile.objs | 2 +- hw/m68k/mcf5208.c | 2 + hw/microblaze/boot.c | 1 + hw/mips/Kconfig | 1 + hw/mips/mips_int.c| 1 + hw/mips/mips_jazz.c | 2 + hw/ppc/spapr_iommu.c | 8 +- hw/timer/lm32_timer.c | 6 +- hw/timer/milkymist-sysctl.c | 10 +- hw/vfio/common.c | 52 +++-- hw/vfio/spapr.c | 4 +- hw/virtio/vhost.c | 9 +- include/exec/memory.h | 21 +- include/hw/vfio/vfio-common.h | 2 +- include/sysemu/kvm.h | 2 +- memory.c | 31 +-- scripts/kvm/vmxcap| 14 +- scsi/qemu-pr-helper.c | 6 +- target/i386/cpu.c | 447 +- target/i386/cpu.h | 146 +- target/i386/helper.c | 5 +- target/i386/kvm.c | 185 - tests/docker/test-clang | 4 +- tests/docker/test-debug | 1 + tests/migration-test.c| 157 +-- tests/test-char.c | 4 +- util/async.c | 6 +- util/oslib-win32.c| 6 +- 38 files changed, 1038 insertions(+), 244 deletions(-) -- 1.8.3.1
Re: [PULL 12/30] Makefile: Remove generated files when doing 'distclean'
On 04/10/19 14:20, Peter Maydell wrote: > On Wed, 2 Oct 2019 at 18:07, Paolo Bonzini wrote: >> >> From: Thomas Huth >> >> When running "make distclean" we currently leave a lot of generated >> files in the build directory. Fix that. >> >> Signed-off-by: Thomas Huth >> Reviewed-by: John Snow >> Signed-off-by: Paolo Bonzini >> --- > >> diff --git a/tests/Makefile.include b/tests/Makefile.include >> index 3543451..48b52da 100644 >> --- a/tests/Makefile.include >> +++ b/tests/Makefile.include >> @@ -1176,11 +1176,21 @@ check: check-block check-qapi-schema check-unit >> check-softfloat check-qtest chec >> check-clean: >> rm -rf $(check-unit-y) tests/*.o $(QEMU_IOTESTS_HELPERS-y) >> rm -rf $(sort $(foreach target,$(SYSEMU_TARGET_LIST), >> $(check-qtest-$(target)-y)) $(check-qtest-generic-y)) >> - rm -f tests/test-qapi-gen-timestamp >> rm -rf $(TESTS_VENV_DIR) $(TESTS_RESULTS_DIR) >> + rm -f tests/qemu-iotests/common.env tests/qemu-iotests/check.* >> + rm -f tests/test-qapi-gen-timestamp tests/qht-bench$(EXESUF) \ >> + tests/fp/fp-test tests/fp/*.out tests/qapi-schema/*.test.* >> >> clean: check-clean > > Hi; this change breaks the sequence > 'make clean; make; make check' > > because now 'make clean' removes tests/qemu-iotests/common.env. > But this file is created by 'configure', not by 'make', so if there's > no other reason why 'make' needs to re-run configure then we get > to the 'make check' stage with the file not existing, and then > when we try to run the iotests they fail with: > > ./check: line 60: > /home/petmay01/linaro/qemu-for-merges/build/alldbg/tests/qemu-iotests/common.env: > No such file or directory > check: failed to source common.env (make sure the qemu-iotests are run > from tests/qemu-iotests in the build tree) > /home/petmay01/linaro/qemu-for-merges/tests/Makefile.include:1102: > recipe for target 'check-tests/check-block.sh' failed > > thanks > -- PMM > I've dropped this patch and will send v3 that adds back the VMX patches. Paolo
Re: libvirt? qemu change that mmaps ELF files breaks libvirt svirt handling for os.kernel
On 04/10/19 14:47, Christian Borntraeger wrote: >> Please file a bz entry against the selinux-policy component for >> whatever distro you're using, and/or Fedora rawhide, and CC me >> on it too. > > Done. This was on Fedora 30. > https://bugzilla.redhat.com/show_bug.cgi?id=1758525 > > Now sure about others like RHEL. RHV. > We'll take care of that. Thanks! Paolo
Re: [PATCH 4/4] Revert "mirror: Only mirror granularity-aligned chunks"
On 12.09.19 17:13, Vladimir Sementsov-Ogievskiy wrote: > This reverts commit 9adc1cb49af8d4e54f57980b1eed5c0a4b2dafa6. > "mirror: Only mirror granularity-aligned chunks" > > Since previous commit unaligned chunks are supported by > do_sync_target_write. > > Signed-off-by: Vladimir Sementsov-Ogievskiy > --- > block/mirror.c | 29 - > 1 file changed, 29 deletions(-) Reviewed-by: Max Reitz signature.asc Description: OpenPGP digital signature
Re: [PATCH 3/4] block/mirror: support unaligned write in active mirror
On 12.09.19 17:13, Vladimir Sementsov-Ogievskiy wrote: > Prior 9adc1cb49af8d do_sync_target_write had a bug: it reset aligned-up > region in the dirty bitmap, which means that we may not copy some bytes > and assume them copied, which actually leads to producing corrupted > target. > > So 9adc1cb49af8d forced dirty bitmap granularity to be > request_alignment for mirror-top filter, so we are not working with > unaligned requests. However forcing large alignment obviously decreases > performance of unaligned requests. > > This commit provides another solution for the problem: if unaligned > padding is already dirty, we can safely ignore it, as > 1. It's dirty, it will be copied by mirror_iteration anyway > 2. It's dirty, so skipping it now we don't increase dirtiness of the >bitmap and therefore don't damage "synchronicity" of the >write-blocking mirror. > > If unaligned padding is not dirty, we just write it, no reason to touch > dirty bitmap if we succeed (on failure we'll set the whole region > ofcourse, but we loss "synchronicity" on failure anyway). > > Note: we need to disable dirty_bitmap, otherwise we will not be able to > see in do_sync_target_write bitmap state before current operation. We > may of course check dirty bitmap before the operation in > bdrv_mirror_top_do_write and remember it, but we don't need active > dirty bitmap for write-blocking mirror anyway. > > New code-path is unused until the following commit reverts > 9adc1cb49af8d. > > Suggested-by: Denis V. Lunev > Signed-off-by: Vladimir Sementsov-Ogievskiy > --- > block/mirror.c | 39 ++- > 1 file changed, 38 insertions(+), 1 deletion(-) > > diff --git a/block/mirror.c b/block/mirror.c > index d176bf5920..d192f6a96b 100644 > --- a/block/mirror.c > +++ b/block/mirror.c > @@ -1204,6 +1204,39 @@ do_sync_target_write(MirrorBlockJob *job, MirrorMethod > method, > QEMUIOVector *qiov, int flags) > { > int ret; > +size_t qiov_offset = 0; > + > +if (!QEMU_IS_ALIGNED(offset, job->granularity) && > +bdrv_dirty_bitmap_get(job->dirty_bitmap, offset)) { > +/* > + * Dirty unaligned padding > + * 1. It's already dirty, no damage to "actively_synced" if we > just > + *skip unaligned part. > + * 2. If we copy it, we can't reset corresponding bit in > + *dirty_bitmap as there may be some "dirty" bytes still not > + *copied. > + * So, just ignore it. > + */ > +qiov_offset = QEMU_ALIGN_UP(offset, job->granularity) - offset; > +if (bytes <= qiov_offset) { > +/* nothing to do after shrink */ > +return; > +} > +offset += qiov_offset; > +bytes -= qiov_offset; > +} > + > +if (!QEMU_IS_ALIGNED(offset + bytes, job->granularity) && > +bdrv_dirty_bitmap_get(job->dirty_bitmap, offset + bytes - 1)) > +{ > +uint64_t tail = (offset + bytes) % job->granularity; > + > +if (bytes <= tail) { > +/* nothing to do after shrink */ > +return; > +} > +bytes -= tail; > +} > > bdrv_reset_dirty_bitmap(job->dirty_bitmap, offset, bytes); > The bdrv_set_dirty_bitmap() in the error case below needs to use the original offset/bytes, I suppose. Apart from that, looks good to me. Max signature.asc Description: OpenPGP digital signature
[PATCH 4/5] hw/arm/boot: Expose the pmem nodes in the DT
From: Eric Auger In case of NV-DIMM slots, let's add /pmem DT nodes. Signed-off-by: Eric Auger Signed-off-by: Shameer Kolothum --- hw/arm/boot.c | 45 + 1 file changed, 45 insertions(+) diff --git a/hw/arm/boot.c b/hw/arm/boot.c index c264864c11..bd6d72b33e 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -20,6 +20,7 @@ #include "hw/boards.h" #include "sysemu/reset.h" #include "hw/loader.h" +#include "hw/mem/memory-device.h" #include "elf.h" #include "sysemu/device_tree.h" #include "qemu/config-file.h" @@ -523,6 +524,44 @@ static void fdt_add_psci_node(void *fdt) qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn); } +static int fdt_add_pmem_node(void *fdt, uint32_t acells, uint32_t scells) +{ +MemoryDeviceInfoList *info, *info_list = qmp_memory_device_list(); +MemoryDeviceInfo *mi; +int ret; + +for (info = info_list; info != NULL; info = info->next) { +mi = info->value; + +if (mi->type == MEMORY_DEVICE_INFO_KIND_NVDIMM) { +PCDIMMDeviceInfo *di = mi->u.nvdimm.data; +char *nodename; + +nodename = g_strdup_printf("/pmem@%" PRIx64, di->addr); +qemu_fdt_add_subnode(fdt, nodename); +qemu_fdt_setprop_string(fdt, nodename, "compatible", "pmem-region"); +ret = qemu_fdt_setprop_sized_cells(fdt, nodename, "reg", acells, + di->addr, scells, di->size); +/* only set the NUMA ID if it is specified */ +if (!ret && di->node >= 0) { +ret = qemu_fdt_setprop_cell(fdt, nodename, "numa-node-id", +di->node); +} + +g_free(nodename); + +if (ret < 0) { +fprintf(stderr, "couldn't add NVDIMM /memory@%"PRIx64" node\n", +di->addr); +goto out; +} +} +} +out: +qapi_free_MemoryDeviceInfoList(info_list); +return ret; +} + int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, hwaddr addr_limit, AddressSpace *as, MachineState *ms) { @@ -622,6 +661,12 @@ int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, } } +rc = fdt_add_pmem_node(fdt, acells, scells); +if (rc < 0) { +fprintf(stderr, "couldn't add pmem memory nodes\n"); +goto fail; +} + rc = fdt_path_offset(fdt, "/chosen"); if (rc < 0) { qemu_fdt_add_subnode(fdt, "/chosen"); -- 2.17.1
[PATCH 2/5] nvdimm: Use configurable ACPI IO base and size
From: Kwangwoo Lee This patch makes IO base and size configurable to create NPIO AML for ACPI NFIT. Since a different architecture like AArch64 does not use port-mapped IO, a configurable IO base is required to create correct mapping of ACPI IO address and size. Signed-off-by: Kwangwoo Lee Signed-off-by: Eric Auger Signed-off-by: Shameer Kolothum --- hw/acpi/nvdimm.c| 32 ++-- hw/i386/acpi-build.c| 6 ++ hw/i386/acpi-build.h| 3 +++ hw/i386/pc_piix.c | 2 ++ hw/i386/pc_q35.c| 2 ++ include/hw/mem/nvdimm.h | 3 +++ 6 files changed, 38 insertions(+), 10 deletions(-) diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c index 9fdad6dc3f..f91eea3802 100644 --- a/hw/acpi/nvdimm.c +++ b/hw/acpi/nvdimm.c @@ -926,11 +926,13 @@ void nvdimm_acpi_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev) } void nvdimm_init_acpi_state(NVDIMMState *state, MemoryRegion *io, +struct AcpiGenericAddress dsm_io, FWCfgState *fw_cfg, Object *owner) { +state->dsm_io = dsm_io; memory_region_init_io(>io_mr, owner, _dsm_ops, state, - "nvdimm-acpi-io", NVDIMM_ACPI_IO_LEN); -memory_region_add_subregion(io, NVDIMM_ACPI_IO_BASE, >io_mr); + "nvdimm-acpi-io", dsm_io.bit_width >> 3); +memory_region_add_subregion(io, dsm_io.address, >io_mr); state->dsm_mem = g_array_new(false, true /* clear */, 1); acpi_data_push(state->dsm_mem, sizeof(NvdimmDsmIn)); @@ -959,12 +961,14 @@ void nvdimm_init_acpi_state(NVDIMMState *state, MemoryRegion *io, #define NVDIMM_QEMU_RSVD_UUID "648B9CF2-CDA1-4312-8AD9-49C4AF32BD62" -static void nvdimm_build_common_dsm(Aml *dev) +static void nvdimm_build_common_dsm(Aml *dev, +NVDIMMState *nvdimm_state) { Aml *method, *ifctx, *function, *handle, *uuid, *dsm_mem, *elsectx2; Aml *elsectx, *unsupport, *unpatched, *expected_uuid, *uuid_invalid; Aml *pckg, *pckg_index, *pckg_buf, *field, *dsm_out_buf, *dsm_out_buf_size; uint8_t byte_list[1]; +AmlRegionSpace rs; method = aml_method(NVDIMM_COMMON_DSM, 5, AML_SERIALIZED); uuid = aml_arg(0); @@ -975,9 +979,16 @@ static void nvdimm_build_common_dsm(Aml *dev) aml_append(method, aml_store(aml_name(NVDIMM_ACPI_MEM_ADDR), dsm_mem)); +if (nvdimm_state->dsm_io.space_id == AML_AS_SYSTEM_IO) { +rs = AML_SYSTEM_IO; +} else { +rs = AML_SYSTEM_MEMORY; +} + /* map DSM memory and IO into ACPI namespace. */ -aml_append(method, aml_operation_region(NVDIMM_DSM_IOPORT, AML_SYSTEM_IO, - aml_int(NVDIMM_ACPI_IO_BASE), NVDIMM_ACPI_IO_LEN)); +aml_append(method, aml_operation_region(NVDIMM_DSM_IOPORT, rs, + aml_int(nvdimm_state->dsm_io.address), + nvdimm_state->dsm_io.bit_width >> 3)); aml_append(method, aml_operation_region(NVDIMM_DSM_MEMORY, AML_SYSTEM_MEMORY, dsm_mem, sizeof(NvdimmDsmIn))); @@ -992,7 +1003,7 @@ static void nvdimm_build_common_dsm(Aml *dev) field = aml_field(NVDIMM_DSM_IOPORT, AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE); aml_append(field, aml_named_field(NVDIMM_DSM_NOTIFY, - NVDIMM_ACPI_IO_LEN * BITS_PER_BYTE)); + (nvdimm_state->dsm_io.bit_width >> 3) * BITS_PER_BYTE)); aml_append(method, field); /* @@ -1260,7 +1271,8 @@ static void nvdimm_build_nvdimm_devices(Aml *root_dev, uint32_t ram_slots) } static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data, - BIOSLinker *linker, GArray *dsm_dma_area, + BIOSLinker *linker, + NVDIMMState *nvdimm_state, uint32_t ram_slots) { Aml *ssdt, *sb_scope, *dev; @@ -1288,7 +1300,7 @@ static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data, */ aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0012"))); -nvdimm_build_common_dsm(dev); +nvdimm_build_common_dsm(dev, nvdimm_state); /* 0 is reserved for root device. */ nvdimm_build_device_dsm(dev, 0); @@ -1307,7 +1319,7 @@ static void nvdimm_build_ssdt(GArray *table_offsets, GArray *table_data, NVDIMM_ACPI_MEM_ADDR); bios_linker_loader_alloc(linker, - NVDIMM_DSM_MEM_FILE, dsm_dma_area, + NVDIMM_DSM_MEM_FILE, nvdimm_state->dsm_mem, sizeof(NvdimmDsmIn), false /* high memory */); bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, mem_addr_offset, sizeof(uint32_t), @@ -1329,7 +1341,7 @@ void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data, return; } -nvdimm_build_ssdt(table_offsets, table_data, linker, state->dsm_mem, +
[PATCH 5/5] hw/arm/virt: Add nvdimm hotplug support
This adds support for nvdimm hotplug events through GED and enables nvdimm for the arm/virt. Now Guests with DT boot can have nvdimm cold plug and with ACPI both cold/hot plug. Hot removal functionality is not yet supported. Signed-off-by: Shameer Kolothum --- docs/specs/acpi_hw_reduced_hotplug.rst | 1 + hw/acpi/generic_event_device.c | 13 + hw/arm/virt.c | 20 +++- include/hw/acpi/generic_event_device.h | 1 + 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/docs/specs/acpi_hw_reduced_hotplug.rst b/docs/specs/acpi_hw_reduced_hotplug.rst index 911a98255b..e3abe975bf 100644 --- a/docs/specs/acpi_hw_reduced_hotplug.rst +++ b/docs/specs/acpi_hw_reduced_hotplug.rst @@ -63,6 +63,7 @@ GED IO interface (4 byte access) bits: 0: Memory hotplug event 1: System power down event + 2: NVDIMM hotplug event 2-31: Reserved **write_access:** diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index 9cee90cc70..ad1b684304 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -16,6 +16,7 @@ #include "hw/acpi/generic_event_device.h" #include "hw/irq.h" #include "hw/mem/pc-dimm.h" +#include "hw/mem/nvdimm.h" #include "hw/qdev-properties.h" #include "migration/vmstate.h" #include "qemu/error-report.h" @@ -23,6 +24,7 @@ static const uint32_t ged_supported_events[] = { ACPI_GED_MEM_HOTPLUG_EVT, ACPI_GED_PWR_DOWN_EVT, +ACPI_GED_NVDIMM_HOTPLUG_EVT, }; /* @@ -110,6 +112,11 @@ void build_ged_aml(Aml *table, const char *name, HotplugHandler *hotplug_dev, aml_notify(aml_name(ACPI_POWER_BUTTON_DEVICE), aml_int(0x80))); break; +case ACPI_GED_NVDIMM_HOTPLUG_EVT: +aml_append(if_ctx, + aml_notify(aml_name("\\_SB.NVDR"), + aml_int(0x80))); +break; default: /* * Please make sure all the events in ged_supported_events[] @@ -175,7 +182,11 @@ static void acpi_ged_device_plug_cb(HotplugHandler *hotplug_dev, AcpiGedState *s = ACPI_GED(hotplug_dev); if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { +if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) { +nvdimm_acpi_plug_cb(hotplug_dev, dev); +} else { acpi_memory_plug_cb(hotplug_dev, >memhp_state, dev, errp); +} } else { error_setg(errp, "virt: device plug request for unsupported device" " type: %s", object_get_typename(OBJECT(dev))); @@ -192,6 +203,8 @@ static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) sel = ACPI_GED_MEM_HOTPLUG_EVT; } else if (ev & ACPI_POWER_DOWN_STATUS) { sel = ACPI_GED_PWR_DOWN_EVT; +} else if (ev & ACPI_NVDIMM_HOTPLUG_STATUS) { +sel = ACPI_GED_NVDIMM_HOTPLUG_EVT; } else { /* Unknown event. Return without generating interrupt. */ warn_report("GED: Unsupported event %d. No irq injected", ev); diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 30bc8a7803..acdcba9638 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -543,6 +543,10 @@ static inline DeviceState *create_acpi_ged(VirtMachineState *vms, qemu_irq *pic) event |= ACPI_GED_MEM_HOTPLUG_EVT; } +if (ms->nvdimms_state->is_enabled) { +event |= ACPI_GED_NVDIMM_HOTPLUG_EVT; +} + dev = qdev_create(NULL, TYPE_ACPI_GED); qdev_prop_set_uint32(dev, "ged-event", event); @@ -1938,9 +1942,12 @@ static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, } if (!vms->acpi_dev) { -error_setg(errp, - "memory hotplug is not enabled: missing acpi-ged device"); -return; +/* Allow nvdimm DT or cold plug */ +if (!(is_nvdimm && !dev->hotplugged)) { +error_setg(errp, + "memory hotplug is not enabled: missing acpi-ged device"); +return; + } } pc_dimm_pre_plug(PC_DIMM(dev), MACHINE(hotplug_dev), NULL, errp); @@ -1964,8 +1971,10 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev, nvdimm_plug(ms->nvdimms_state); } -hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); -hhc->plug(HOTPLUG_HANDLER(vms->acpi_dev), dev, _abort); +if (vms->acpi_dev) { +hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); +hhc->plug(HOTPLUG_HANDLER(vms->acpi_dev), dev, _abort); +} out: error_propagate(errp, local_err); } @@ -2073,6 +2082,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) hc->plug = virt_machine_device_plug_cb; hc->unplug_request = virt_machine_device_unplug_request_cb; mc->numa_mem_supported = true; +mc->nvdimm_supported = true; mc->auto_enable_numa_with_memhp
[PATCH 3/5] hw/arm/virt: Add nvdimm hot-plug infrastructure
From: Kwangwoo Lee Pre-plug and plug handlers are prepared for NVDIMM support. Please note nvdimm_support is not yet enabled. Signed-off-by: Eric Auger Signed-off-by: Kwangwoo Lee Signed-off-by: Shameer Kolothum --- hw/arm/Kconfig | 1 + hw/arm/virt-acpi-build.c | 6 ++ hw/arm/virt.c| 22 +- hw/mem/Kconfig | 2 +- include/hw/arm/virt.h| 1 + 5 files changed, 30 insertions(+), 2 deletions(-) diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig index c6e7782580..851dd81289 100644 --- a/hw/arm/Kconfig +++ b/hw/arm/Kconfig @@ -24,6 +24,7 @@ config ARM_VIRT select DIMM select ACPI_MEMORY_HOTPLUG select ACPI_HW_REDUCED +select ACPI_NVDIMM config CHEETAH bool diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 074e0c858e..4e63f5da48 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -44,6 +44,7 @@ #include "hw/pci/pcie_host.h" #include "hw/pci/pci.h" #include "hw/arm/virt.h" +#include "hw/mem/nvdimm.h" #include "sysemu/numa.h" #include "sysemu/reset.h" #include "kvm_arm.h" @@ -835,6 +836,11 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) } } +if (ms->nvdimms_state->is_enabled) { +nvdimm_build_acpi(table_offsets, tables_blob, tables->linker, + ms->nvdimms_state, ms->ram_slots); +} + if (its_class_name() && !vmc->no_its) { acpi_add_table(table_offsets, tables_blob); build_iort(tables_blob, tables->linker, vms); diff --git a/hw/arm/virt.c b/hw/arm/virt.c index d4bedc2607..30bc8a7803 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -143,6 +143,7 @@ static const MemMapEntry base_memmap[] = { [VIRT_SMMU] = { 0x0905, 0x0002 }, [VIRT_PCDIMM_ACPI] ={ 0x0907, MEMORY_HOTPLUG_IO_LEN }, [VIRT_ACPI_GED] = { 0x0908, ACPI_GED_EVT_SEL_LEN }, +[VIRT_NVDIMM_ACPI] ={ 0x0909, NVDIMM_ACPI_IO_LEN}, [VIRT_MMIO] = { 0x0a00, 0x0200 }, /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */ [VIRT_PLATFORM_BUS] = { 0x0c00, 0x0200 }, @@ -1750,6 +1751,18 @@ static void machvirt_init(MachineState *machine) create_platform_bus(vms, pic); +if (machine->nvdimms_state->is_enabled) { +const struct AcpiGenericAddress arm_virt_nvdimm_acpi_dsmio = { +.space_id = AML_AS_SYSTEM_MEMORY, +.address = vms->memmap[VIRT_NVDIMM_ACPI].base, +.bit_width = NVDIMM_ACPI_IO_LEN << 3 +}; + +nvdimm_init_acpi_state(machine->nvdimms_state, sysmem, + arm_virt_nvdimm_acpi_dsmio, + vms->fw_cfg, OBJECT(vms)); +} + vms->bootinfo.ram_size = machine->ram_size; vms->bootinfo.nb_cpus = smp_cpus; vms->bootinfo.board_id = -1; @@ -1916,9 +1929,10 @@ static void virt_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); +MachineState *ms = MACHINE(hotplug_dev); const bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); -if (is_nvdimm) { +if (is_nvdimm && (!ms->nvdimms_state->is_enabled)) { error_setg(errp, "nvdimm is not yet supported"); return; } @@ -1937,6 +1951,8 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev, { HotplugHandlerClass *hhc; VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); +MachineState *ms = MACHINE(hotplug_dev); +bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); Error *local_err = NULL; pc_dimm_plug(PC_DIMM(dev), MACHINE(vms), _err); @@ -1944,6 +1960,10 @@ static void virt_memory_plug(HotplugHandler *hotplug_dev, goto out; } +if (is_nvdimm) { +nvdimm_plug(ms->nvdimms_state); +} + hhc = HOTPLUG_HANDLER_GET_CLASS(vms->acpi_dev); hhc->plug(HOTPLUG_HANDLER(vms->acpi_dev), dev, _abort); out: diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig index 620fd4cb59..0d5f8f321a 100644 --- a/hw/mem/Kconfig +++ b/hw/mem/Kconfig @@ -8,4 +8,4 @@ config MEM_DEVICE config NVDIMM bool default y -depends on PC +depends on PC || ARM_VIRT diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index 0b41083e9d..06d5e75611 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -79,6 +79,7 @@ enum { VIRT_SECURE_MEM, VIRT_PCDIMM_ACPI, VIRT_ACPI_GED, +VIRT_NVDIMM_ACPI, VIRT_LOWMEMMAP_LAST, }; -- 2.17.1
[PATCH 0/5] ARM virt: Add NVDIMM support
This series adds NVDIMM support to arm/virt platform. This has a dependency on [0] and make use of the GED device for NVDIMM hotplug events. The series reuses some of the patches posted by Eric in his earlier attempt here[1]. Patch 1/5 is a fix to the Guest reboot issue on NVDIMM hot add case described here[2]. I have done basic sanity testing of NVDIMM deviecs with both ACPI and DT Guest boot. Further testing is always welcome. Please let me know your feedback. Thanks, Shameer [0] https://patchwork.kernel.org/cover/11150345/ [1] https://patchwork.kernel.org/cover/10830777/ [2] https://patchwork.kernel.org/patch/11154757/ Eric Auger (1): hw/arm/boot: Expose the pmem nodes in the DT Kwangwoo Lee (2): nvdimm: Use configurable ACPI IO base and size hw/arm/virt: Add nvdimm hot-plug infrastructure Shameer Kolothum (2): hw/arm: Align ACPI blob len to PAGE size hw/arm/virt: Add nvdimm hotplug support docs/specs/acpi_hw_reduced_hotplug.rst | 1 + hw/acpi/generic_event_device.c | 13 hw/acpi/nvdimm.c | 32 -- hw/arm/Kconfig | 1 + hw/arm/boot.c | 45 ++ hw/arm/virt-acpi-build.c | 20 hw/arm/virt.c | 42 hw/i386/acpi-build.c | 6 hw/i386/acpi-build.h | 3 ++ hw/i386/pc_piix.c | 2 ++ hw/i386/pc_q35.c | 2 ++ hw/mem/Kconfig | 2 +- include/hw/acpi/generic_event_device.h | 1 + include/hw/arm/virt.h | 1 + include/hw/mem/nvdimm.h| 3 ++ 15 files changed, 157 insertions(+), 17 deletions(-) -- 2.17.1
[PATCH 1/5] block: Mark commit and mirror as filter drivers
From: Max Reitz The commit and mirror block nodes are filters, so they should be marked as such. Signed-off-by: Max Reitz Signed-off-by: Vladimir Sementsov-Ogievskiy [squash comment fix from another Max's patch and adjust commit msg] --- include/block/block_int.h | 8 +--- block/commit.c| 2 ++ block/mirror.c| 2 ++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/block/block_int.h b/include/block/block_int.h index 0422acdf1c..4af0fa8fe4 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -89,9 +89,11 @@ struct BlockDriver { int instance_size; /* set to true if the BlockDriver is a block filter. Block filters pass - * certain callbacks that refer to data (see block.c) to their bs->file if - * the driver doesn't implement them. Drivers that do not wish to forward - * must implement them and return -ENOTSUP. + * certain callbacks that refer to data (see block.c) to their bs->file + * or bs->backing (whichever one exists) if the driver doesn't implement + * them. Drivers that do not wish to forward must implement them and return + * -ENOTSUP. + * Note that filters are not allowed to modify data. */ bool is_filter; /* for snapshots block filter like Quorum can implement the diff --git a/block/commit.c b/block/commit.c index bc8454463d..07e7a3cd0a 100644 --- a/block/commit.c +++ b/block/commit.c @@ -253,6 +253,8 @@ static BlockDriver bdrv_commit_top = { .bdrv_co_block_status = bdrv_co_block_status_from_backing, .bdrv_refresh_filename = bdrv_commit_top_refresh_filename, .bdrv_child_perm= bdrv_commit_top_child_perm, + +.is_filter = true, }; void commit_start(const char *job_id, BlockDriverState *bs, diff --git a/block/mirror.c b/block/mirror.c index fe984efb90..838781dfaa 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1487,6 +1487,8 @@ static BlockDriver bdrv_mirror_top = { .bdrv_refresh_filename = bdrv_mirror_top_refresh_filename, .bdrv_child_perm= bdrv_mirror_top_child_perm, .bdrv_refresh_limits= bdrv_mirror_top_refresh_limits, + +.is_filter = true, }; static BlockJob *mirror_start_job( -- 2.21.0
[PATCH 0/5] fix migration with bitmaps and mirror
Hi all! It's a continuation for "bitmap migration bug with -drive while block mirror runs" <315cff78-dcdb-a3ce-2742-da3cc9f0c...@redhat.com> https://lists.gnu.org/archive/html/qemu-devel/2019-09/msg07241.html The problem is that bitmaps migrated to node with same node-name or blk-parent name. And currently only the latter actually work in libvirt. And with mirror-top filter it doesn't work, because bdrv_get_device_or_node_name don't go through filters. Fix this by handling filtered children of block backends in separate. Max Reitz (1): block: Mark commit and mirror as filter drivers Vladimir Sementsov-Ogievskiy (4): migretion/block-dirty-bitmap: refactor init_dirty_bitmap_migration block/dirty-bitmap: add bdrv_has_named_bitmaps helper migration/block-dirty-bitmap: fix bitmaps migration during mirror job iotests: 194: test also migration of dirty bitmap include/block/block_int.h | 8 ++- include/block/dirty-bitmap.h | 1 + block/commit.c | 2 + block/dirty-bitmap.c | 13 block/mirror.c | 2 + migration/block-dirty-bitmap.c | 118 +++-- tests/qemu-iotests/194 | 14 ++-- tests/qemu-iotests/194.out | 6 ++ 8 files changed, 121 insertions(+), 43 deletions(-) -- 2.21.0
[PATCH 1/5] hw/arm: Align ACPI blob len to PAGE size
If ACPI blob length modifications happens after the initial virt_acpi_build() call, and the changed blob length is within the PAGE size boundary, then the revised size is not seen by the firmware on Guest reboot. The is because in the virt_acpi_build_update() -> acpi_ram_update() -> qemu_ram_resize() path, qemu_ram_resize() uses ram_block size which is aligned to PAGE size and the "resize callback" to update the size seen by firmware is not getting invoked. Hence align ACPI blob sizes to PAGE boundary. Signed-off-by: Shameer Kolothum --- More details on this issue can be found here, https://patchwork.kernel.org/patch/11154757/ --- hw/arm/virt-acpi-build.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index 4cd50175e0..074e0c858e 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -790,6 +790,7 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) GArray *table_offsets; unsigned dsdt, xsdt; GArray *tables_blob = tables->table_data; +GArray *cmd_blob = tables->linker->cmd_blob; MachineState *ms = MACHINE(vms); table_offsets = g_array_new(false, true /* clear */, @@ -854,6 +855,19 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) build_rsdp(tables->rsdp, tables->linker, _data); } +/* + * Align the ACPI blob lengths to PAGE size so that on ACPI table + * regeneration, the length that firmware sees really gets updated + * through 'resize' callback in qemu_ram_resize() in the + * virt_acpi_build_update() -> acpi_ram_update() -> qemu_ram_resize() + * path. + */ +g_array_set_size(tables_blob, + TARGET_PAGE_ALIGN(acpi_data_len(tables_blob))); +g_array_set_size(tables->rsdp, + TARGET_PAGE_ALIGN(acpi_data_len(tables->rsdp))); +g_array_set_size(cmd_blob, + TARGET_PAGE_ALIGN(acpi_data_len(cmd_blob))); /* Cleanup memory that's no longer used. */ g_array_free(table_offsets, true); } -- 2.17.1
[PATCH 4/5] migration/block-dirty-bitmap: fix bitmaps migration during mirror job
Important thing for bitmap migration is to select destination block node to obtain the migrated bitmap. Prepatch, on source we use bdrv_get_device_or_node_name() to identify the node, and on target we do bdrv_lookup_bs. bdrv_get_device_or_node_name() returns blk name only for direct children of blk. So, bitmaps of direct children of blks are migrated by blk name and others - by node name. Libvirt currently is unprepared to bitmap migration by node-name, node-names are mostly auto-generated. So actually only migration by blk name works. Now, consider classic libvirt migrations assisted by mirror block job: mirror block job inserts filter, so our source is not a direct child of blk, and bitmaps are migrated by node-names. And this just don't work. Let's fix it by allowing use blk-name even if some implicit filters are inserted. Note, that we possibly want to allow explicit filters skipping too, but this is another story. Note2: we, of course, can't skip filters and use blk name to migrate bitmaps in filtered node by blk name for this blk if these filters have named bitmaps which should be migrated. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1652424 Signed-off-by: Vladimir Sementsov-Ogievskiy --- migration/block-dirty-bitmap.c | 39 +- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index 46641b7861..3105479c50 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -321,14 +321,48 @@ static int init_dirty_bitmap_migration(void) { BlockDriverState *bs; DirtyBitmapMigBitmapState *dbms; +GHashTable *handled_by_blk = g_hash_table_new(NULL, NULL); +BlockBackend *blk; dirty_bitmap_mig_state.bulk_completed = false; dirty_bitmap_mig_state.prev_bs = NULL; dirty_bitmap_mig_state.prev_bitmap = NULL; dirty_bitmap_mig_state.no_bitmaps = false; +/* + * Use blockdevice name for direct (or filtered) children of named block + * backends. + */ +for (blk = blk_next(NULL); blk; blk = blk_next(blk)) { +const char *name = blk_name(blk); + +if (!name || strcmp(name, "") == 0) { +continue; +} + +bs = blk_bs(blk); + +/* Skip filters without bitmaos */ +while (bs && bs->drv && bs->drv->is_filter && + !bdrv_has_named_bitmaps(bs)) +{ +bs = bs->backing->bs ?: bs->file->bs; +} + +if (bs && bs->drv && !bs->drv->is_filter) { +if (add_bitmaps_to_list(bs, name)) { +goto fail; +} +g_hash_table_add(handled_by_blk, bs); +} +} + for (bs = bdrv_next_all_states(NULL); bs; bs = bdrv_next_all_states(bs)) { -if (add_bitmaps_to_list(bs, bdrv_get_device_or_node_name(bs))) { +if (g_hash_table_contains(handled_by_blk, bs)) { +continue; +} + +if (add_bitmaps_to_list(bs, bdrv_get_node_name(bs))) { goto fail; } } @@ -342,9 +376,12 @@ static int init_dirty_bitmap_migration(void) dirty_bitmap_mig_state.no_bitmaps = true; } +g_hash_table_destroy(handled_by_blk); + return 0; fail: +g_hash_table_destroy(handled_by_blk); dirty_bitmap_mig_cleanup(); return -1; -- 2.21.0
[PATCH 3/5] block/dirty-bitmap: add bdrv_has_named_bitmaps helper
To be used for bitmap migration in further commit. Signed-off-by: Vladimir Sementsov-Ogievskiy --- include/block/dirty-bitmap.h | 1 + block/dirty-bitmap.c | 13 + 2 files changed, 14 insertions(+) diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h index 4b4b731b46..4c0ebe5c2a 100644 --- a/include/block/dirty-bitmap.h +++ b/include/block/dirty-bitmap.h @@ -101,6 +101,7 @@ int64_t bdrv_get_meta_dirty_count(BdrvDirtyBitmap *bitmap); void bdrv_dirty_bitmap_truncate(BlockDriverState *bs, int64_t bytes); bool bdrv_dirty_bitmap_readonly(const BdrvDirtyBitmap *bitmap); bool bdrv_has_readonly_bitmaps(BlockDriverState *bs); +bool bdrv_has_named_bitmaps(BlockDriverState *bs); bool bdrv_dirty_bitmap_get_autoload(const BdrvDirtyBitmap *bitmap); bool bdrv_dirty_bitmap_get_persistence(BdrvDirtyBitmap *bitmap); bool bdrv_dirty_bitmap_inconsistent(const BdrvDirtyBitmap *bitmap); diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c index 134e0c9a0c..cfc957ebc2 100644 --- a/block/dirty-bitmap.c +++ b/block/dirty-bitmap.c @@ -742,6 +742,19 @@ bool bdrv_has_readonly_bitmaps(BlockDriverState *bs) return false; } +bool bdrv_has_named_bitmaps(BlockDriverState *bs) +{ +BdrvDirtyBitmap *bm; + +QLIST_FOREACH(bm, >dirty_bitmaps, list) { +if (bdrv_dirty_bitmap_name(bm)) { +return true; +} +} + +return false; +} + /* Called with BQL taken. */ void bdrv_dirty_bitmap_set_persistence(BdrvDirtyBitmap *bitmap, bool persistent) { -- 2.21.0
[PATCH 5/5] iotests: 194: test also migration of dirty bitmap
Test that dirty bitmap migration works when we deal with mirror. Signed-off-by: Vladimir Sementsov-Ogievskiy --- tests/qemu-iotests/194 | 14 ++ tests/qemu-iotests/194.out | 6 ++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/qemu-iotests/194 b/tests/qemu-iotests/194 index d746ab1e21..bcf55a03e1 100755 --- a/tests/qemu-iotests/194 +++ b/tests/qemu-iotests/194 @@ -42,6 +42,8 @@ with iotests.FilePath('source.img') as source_img_path, \ .add_incoming('unix:{0}'.format(migration_sock_path)) .launch()) +source_vm.qmp_log('block-dirty-bitmap-add', node='drive0', name='bitmap0') + iotests.log('Launching NBD server on destination...') iotests.log(dest_vm.qmp('nbd-server-start', addr={'type': 'unix', 'data': {'path': nbd_sock_path}})) iotests.log(dest_vm.qmp('nbd-server-add', device='drive0', writable=True)) @@ -61,12 +63,14 @@ with iotests.FilePath('source.img') as source_img_path, \ filters=[iotests.filter_qmp_event]) iotests.log('Starting migration...') -source_vm.qmp('migrate-set-capabilities', - capabilities=[{'capability': 'events', 'state': True}]) -dest_vm.qmp('migrate-set-capabilities', -capabilities=[{'capability': 'events', 'state': True}]) +capabilities = [{'capability': 'events', 'state': True}, +{'capability': 'dirty-bitmaps', 'state': True}] +source_vm.qmp('migrate-set-capabilities', capabilities=capabilities) +dest_vm.qmp('migrate-set-capabilities', capabilities=capabilities) iotests.log(source_vm.qmp('migrate', uri='unix:{0}'.format(migration_sock_path))) +source_vm.qmp_log('migrate-start-postcopy') + while True: event1 = source_vm.event_wait('MIGRATION') iotests.log(event1, filters=[iotests.filter_qmp_event]) @@ -82,3 +86,5 @@ with iotests.FilePath('source.img') as source_img_path, \ iotests.log('Stopping the NBD server on destination...') iotests.log(dest_vm.qmp('nbd-server-stop')) break + +iotests.log(source_vm.qmp('query-block')['return'][0]['dirty-bitmaps']) diff --git a/tests/qemu-iotests/194.out b/tests/qemu-iotests/194.out index 71857853fb..dd60dcc14f 100644 --- a/tests/qemu-iotests/194.out +++ b/tests/qemu-iotests/194.out @@ -1,4 +1,6 @@ Launching VMs... +{"execute": "block-dirty-bitmap-add", "arguments": {"name": "bitmap0", "node": "drive0"}} +{"return": {}} Launching NBD server on destination... {"return": {}} {"return": {}} @@ -8,11 +10,15 @@ Waiting for `drive-mirror` to complete... {"data": {"device": "mirror-job0", "len": 1073741824, "offset": 1073741824, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} Starting migration... {"return": {}} +{"execute": "migrate-start-postcopy", "arguments": {}} +{"return": {}} {"data": {"status": "setup"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} {"data": {"status": "active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} +{"data": {"status": "postcopy-active"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} {"data": {"status": "completed"}, "event": "MIGRATION", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} Gracefully ending the `drive-mirror` job on source... {"return": {}} {"data": {"device": "mirror-job0", "len": 1073741824, "offset": 1073741824, "speed": 0, "type": "mirror"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}} Stopping the NBD server on destination... {"return": {}} +[{"busy": false, "count": 0, "granularity": 65536, "name": "bitmap0", "persistent": false, "recording": true, "status": "active"}] -- 2.21.0
[PATCH 2/5] migretion/block-dirty-bitmap: refactor init_dirty_bitmap_migration
Split out handling one bs, it is needed for the following commit, which will handle BlockBackends in separate. Signed-off-by: Vladimir Sementsov-Ogievskiy --- migration/block-dirty-bitmap.c | 93 +++--- 1 file changed, 51 insertions(+), 42 deletions(-) diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c index 5121f86d73..46641b7861 100644 --- a/migration/block-dirty-bitmap.c +++ b/migration/block-dirty-bitmap.c @@ -268,59 +268,68 @@ static void dirty_bitmap_mig_cleanup(void) } /* Called with iothread lock taken. */ -static int init_dirty_bitmap_migration(void) +static int add_bitmaps_to_list(BlockDriverState *bs, const char *bs_name) { -BlockDriverState *bs; BdrvDirtyBitmap *bitmap; DirtyBitmapMigBitmapState *dbms; Error *local_err = NULL; -dirty_bitmap_mig_state.bulk_completed = false; -dirty_bitmap_mig_state.prev_bs = NULL; -dirty_bitmap_mig_state.prev_bitmap = NULL; -dirty_bitmap_mig_state.no_bitmaps = false; +for (bitmap = bdrv_dirty_bitmap_next(bs, NULL); bitmap; + bitmap = bdrv_dirty_bitmap_next(bs, bitmap)) +{ +if (!bdrv_dirty_bitmap_name(bitmap)) { +continue; +} -for (bs = bdrv_next_all_states(NULL); bs; bs = bdrv_next_all_states(bs)) { -const char *name = bdrv_get_device_or_node_name(bs); +if (!bs_name || strcmp(bs_name, "") == 0) { +error_report("Found bitmap '%s' in unnamed node %p. It can't " + "be migrated", bdrv_dirty_bitmap_name(bitmap), bs); +return -1; +} -for (bitmap = bdrv_dirty_bitmap_next(bs, NULL); bitmap; - bitmap = bdrv_dirty_bitmap_next(bs, bitmap)) -{ -if (!bdrv_dirty_bitmap_name(bitmap)) { -continue; -} +if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, _err)) { +error_report_err(local_err); +return -1; +} -if (!name || strcmp(name, "") == 0) { -error_report("Found bitmap '%s' in unnamed node %p. It can't " - "be migrated", bdrv_dirty_bitmap_name(bitmap), bs); -goto fail; -} +bdrv_ref(bs); +bdrv_dirty_bitmap_set_busy(bitmap, true); + +dbms = g_new0(DirtyBitmapMigBitmapState, 1); +dbms->bs = bs; +dbms->node_name = bs_name; +dbms->bitmap = bitmap; +dbms->total_sectors = bdrv_nb_sectors(bs); +dbms->sectors_per_chunk = CHUNK_SIZE * 8 * +bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS; +if (bdrv_dirty_bitmap_enabled(bitmap)) { +dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_ENABLED; +} +if (bdrv_dirty_bitmap_get_persistence(bitmap)) { +dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT; +} -if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, -_err)) { -error_report_err(local_err); -goto fail; -} +QSIMPLEQ_INSERT_TAIL(_bitmap_mig_state.dbms_list, + dbms, entry); +} -bdrv_ref(bs); -bdrv_dirty_bitmap_set_busy(bitmap, true); - -dbms = g_new0(DirtyBitmapMigBitmapState, 1); -dbms->bs = bs; -dbms->node_name = name; -dbms->bitmap = bitmap; -dbms->total_sectors = bdrv_nb_sectors(bs); -dbms->sectors_per_chunk = CHUNK_SIZE * 8 * -bdrv_dirty_bitmap_granularity(bitmap) >> BDRV_SECTOR_BITS; -if (bdrv_dirty_bitmap_enabled(bitmap)) { -dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_ENABLED; -} -if (bdrv_dirty_bitmap_get_persistence(bitmap)) { -dbms->flags |= DIRTY_BITMAP_MIG_START_FLAG_PERSISTENT; -} +return 0; +} + +/* Called with iothread lock taken. */ +static int init_dirty_bitmap_migration(void) +{ +BlockDriverState *bs; +DirtyBitmapMigBitmapState *dbms; + +dirty_bitmap_mig_state.bulk_completed = false; +dirty_bitmap_mig_state.prev_bs = NULL; +dirty_bitmap_mig_state.prev_bitmap = NULL; +dirty_bitmap_mig_state.no_bitmaps = false; -QSIMPLEQ_INSERT_TAIL(_bitmap_mig_state.dbms_list, - dbms, entry); +for (bs = bdrv_next_all_states(NULL); bs; bs = bdrv_next_all_states(bs)) { +if (add_bitmaps_to_list(bs, bdrv_get_device_or_node_name(bs))) { +goto fail; } } -- 2.21.0
Re: [PATCH v5 0/5] iotests: use python logging
On 18.09.19 01:45, John Snow wrote: > This series uses python logging to enable output conditionally on > iotests.log(). We unify an initialization call (which also enables > debugging output for those tests with -d) and then make the switch > inside of iotests. > > It will help alleviate the need to create logged/unlogged versions > of all the various helpers we have made. > > V5: > - Rebased again > - Allow Python tests to run on any platform > > V4: > - Rebased on top of kwolf/block at the behest of mreitz > > V3: > - Rebased for 4.1+; now based on main branch. > > V2: > - Added all of the other python tests I missed to use script_initialize > - Refactored the common setup as per Ehabkost's suggestion > - Added protocol arguments to common initialization, >but this isn't strictly required. I’m OK to take the series as-is (it doesn’t affect any auto tests, so we can decide what to do about non-Linux platforms in make check at a later point), but there seems to be something you wanted to fix up in patch 5. (And there’s also Kevin’s pending pull request that changes a bit of iotests.py.) Max signature.asc Description: OpenPGP digital signature
Re: [PATCH 3/4] block/mirror: support unaligned write in active mirror
04.10.2019 18:27, Max Reitz wrote: > On 04.10.19 17:04, Vladimir Sementsov-Ogievskiy wrote: >> 04.10.2019 17:48, Max Reitz wrote: >>> On 04.10.19 15:22, Vladimir Sementsov-Ogievskiy wrote: 04.10.2019 15:59, Max Reitz wrote: > On 03.10.19 11:34, Vladimir Sementsov-Ogievskiy wrote: >> 02.10.2019 18:52, Max Reitz wrote: >>> On 02.10.19 17:06, Vladimir Sementsov-Ogievskiy wrote: 02.10.2019 18:03, Vladimir Sementsov-Ogievskiy wrote: > 02.10.2019 17:57, Max Reitz wrote: >> On 12.09.19 17:13, Vladimir Sementsov-Ogievskiy wrote: >>> Prior 9adc1cb49af8d do_sync_target_write had a bug: it reset >>> aligned-up >>> region in the dirty bitmap, which means that we may not copy some >>> bytes >>> and assume them copied, which actually leads to producing corrupted >>> target. >>> >>> So 9adc1cb49af8d forced dirty bitmap granularity to be >>> request_alignment for mirror-top filter, so we are not working with >>> unaligned requests. However forcing large alignment obviously >>> decreases >>> performance of unaligned requests. >>> >>> This commit provides another solution for the problem: if unaligned >>> padding is already dirty, we can safely ignore it, as >>> 1. It's dirty, it will be copied by mirror_iteration anyway >>> 2. It's dirty, so skipping it now we don't increase dirtiness of the >>> bitmap and therefore don't damage "synchronicity" of the >>> write-blocking mirror. >> >> But that’s not what active mirror is for. The point of active >> mirror is >> that it must converge because every guest write will contribute >> towards >> that goal. >> >> If you skip active mirroring for unaligned guest writes, they will >> not >> contribute towards converging, but in fact lead to the opposite. >> > > The will not contribute only if region is already dirty. Actually, > after > first iteration of mirroring (copying the whole disk), all following > writes > will contribute, so the whole process must converge. It is a bit > similar with > running one mirror loop in normal mode, and then enable > write-blocking. > In other words, we don't need "all guest writes contribute" to converge, "all guest writes don't create new dirty bits" is enough, as we have parallel mirror iteration which contiguously handles dirty bits. >>> >>> Hm, in a sense. But it does mean that guest writes will not contribute >>> to convergence. >>> >>> And that’s against the current definition of write-blocking, which does >>> state that “when data is written to the source, write it (synchronously) >>> to the target as well”. >>> >> >> Hmm, understand. But IMHO our proposed behavior is better in general. >> Do you think it's a problem to change spec now? >> If yes, I'll resend with an option > > Well, the thing is that I’d find it weird if write-blocking wasn’t > blocking in all cases. And in my opinion, it makes more sense for > active mirror if all writes actively contributed to convergence. > Why? What is the benefit in it? What is "all writes actively contributed to convergence" for user? >>> >>> One thing I wonder about is whether it’s really guaranteed that the >>> background job will run under full I/O load, and how often it runs. >>> >>> I fear that with your model, the background job might starve and the >>> mirror may take a very long time. >> >> Hmmm. I think mirror job is in same context as guest writes, and goes >> through same IO api.. Why it will starve? (I understand that my words >> are not an evidence...). > > I thought that maybe if the disk is read to write and write all the > time, there’d be no time for the mirror coroutine. > >>> It won’t diverge, but it also won’t >>> really converge. >> >> But same will be with current behavior: guest is not guaranteed to write >> to all parts of disk. And in most scenarios it doesn't. So, if mirror job >> starve because of huge guest IO load, we will not converge anyway. >> >> So, background process is necessary thing for converge anyway. > > Good point. That convinces me. > >>> The advantage of letting all writes block is that even under full I/O >>> load, the mirror job will progress at a steady pace. >>> I think for user there may be the following criteria: 1. guaranteed converge, with any guest write load. Both current and my proposed variants are OK. 2. Less impact on guest. Obviously my proposed variant is better 3. Total time of mirroring Seems, current may be a bit better, but I don't think that unaligned
Re: [PATCH v2] s390x/tcg: MVCL: Exit to main loop if requested
On 10/4/19 5:34 AM, David Hildenbrand wrote: > Or, as alternative, something like "cpu_shall_exit()" which only > wraps the single check. I would prefer this to the full cpu_loop_exit_restore. It's harder to imagine what else might need doing for some other user of the interface. r~
Re: [PATCH 3/4] block/mirror: support unaligned write in active mirror
On 04.10.19 17:04, Vladimir Sementsov-Ogievskiy wrote: > 04.10.2019 17:48, Max Reitz wrote: >> On 04.10.19 15:22, Vladimir Sementsov-Ogievskiy wrote: >>> 04.10.2019 15:59, Max Reitz wrote: On 03.10.19 11:34, Vladimir Sementsov-Ogievskiy wrote: > 02.10.2019 18:52, Max Reitz wrote: >> On 02.10.19 17:06, Vladimir Sementsov-Ogievskiy wrote: >>> 02.10.2019 18:03, Vladimir Sementsov-Ogievskiy wrote: 02.10.2019 17:57, Max Reitz wrote: > On 12.09.19 17:13, Vladimir Sementsov-Ogievskiy wrote: >> Prior 9adc1cb49af8d do_sync_target_write had a bug: it reset >> aligned-up >> region in the dirty bitmap, which means that we may not copy some >> bytes >> and assume them copied, which actually leads to producing corrupted >> target. >> >> So 9adc1cb49af8d forced dirty bitmap granularity to be >> request_alignment for mirror-top filter, so we are not working with >> unaligned requests. However forcing large alignment obviously >> decreases >> performance of unaligned requests. >> >> This commit provides another solution for the problem: if unaligned >> padding is already dirty, we can safely ignore it, as >> 1. It's dirty, it will be copied by mirror_iteration anyway >> 2. It's dirty, so skipping it now we don't increase dirtiness of the >> bitmap and therefore don't damage "synchronicity" of the >> write-blocking mirror. > > But that’s not what active mirror is for. The point of active mirror > is > that it must converge because every guest write will contribute > towards > that goal. > > If you skip active mirroring for unaligned guest writes, they will not > contribute towards converging, but in fact lead to the opposite. > The will not contribute only if region is already dirty. Actually, after first iteration of mirroring (copying the whole disk), all following writes will contribute, so the whole process must converge. It is a bit similar with running one mirror loop in normal mode, and then enable write-blocking. >>> >>> >>> In other words, we don't need "all guest writes contribute" to converge, >>> "all guest writes don't create new dirty bits" is enough, as we have >>> parallel >>> mirror iteration which contiguously handles dirty bits. >> >> Hm, in a sense. But it does mean that guest writes will not contribute >> to convergence. >> >> And that’s against the current definition of write-blocking, which does >> state that “when data is written to the source, write it (synchronously) >> to the target as well”. >> > > Hmm, understand. But IMHO our proposed behavior is better in general. > Do you think it's a problem to change spec now? > If yes, I'll resend with an option Well, the thing is that I’d find it weird if write-blocking wasn’t blocking in all cases. And in my opinion, it makes more sense for active mirror if all writes actively contributed to convergence. >>> >>> Why? What is the benefit in it? >>> What is "all writes actively contributed to convergence" for user? >> >> One thing I wonder about is whether it’s really guaranteed that the >> background job will run under full I/O load, and how often it runs. >> >> I fear that with your model, the background job might starve and the >> mirror may take a very long time. > > Hmmm. I think mirror job is in same context as guest writes, and goes > through same IO api.. Why it will starve? (I understand that my words > are not an evidence...). I thought that maybe if the disk is read to write and write all the time, there’d be no time for the mirror coroutine. >> It won’t diverge, but it also won’t >> really converge. > > But same will be with current behavior: guest is not guaranteed to write > to all parts of disk. And in most scenarios it doesn't. So, if mirror job > starve because of huge guest IO load, we will not converge anyway. > > So, background process is necessary thing for converge anyway. Good point. That convinces me. >> The advantage of letting all writes block is that even under full I/O >> load, the mirror job will progress at a steady pace. >> >>> I think for user there may be the following criteria: >>> >>> 1. guaranteed converge, with any guest write load. >>> Both current and my proposed variants are OK. >>> >>> 2. Less impact on guest. >>> Obviously my proposed variant is better >>> >>> 3. Total time of mirroring >>> Seems, current may be a bit better, but I don't think that unaligned >>> tails gives significant impact. >>> >>> === >>> >>> So, assume I want [1]+[2]. And possibly >>> 2.2: Even less impact on guest: ignore not only unaligned tails if they are >>> already
[PATCH v2] target/riscv: Expose "priv" register for GDB
This patch enables a debugger to read and write the current privilege level via a special "priv" register. When compiled with CONFIG_USER_ONLY the register is still visible but is hardwired to zero. Signed-off-by: Jonathan Behrens --- gdb-xml/riscv-32bit-cpu.xml | 1 + gdb-xml/riscv-64bit-cpu.xml | 1 + target/riscv/cpu.c | 2 +- target/riscv/gdbstub.c | 14 ++ 4 files changed, 17 insertions(+), 1 deletion(-) --- Changelog V2: - Use PRV_H and PRV_S instead of integer literals diff --git a/gdb-xml/riscv-32bit-cpu.xml b/gdb-xml/riscv-32bit-cpu.xml index 0d07aaec85..d6d76aafd8 100644 --- a/gdb-xml/riscv-32bit-cpu.xml +++ b/gdb-xml/riscv-32bit-cpu.xml @@ -44,4 +44,5 @@ + diff --git a/gdb-xml/riscv-64bit-cpu.xml b/gdb-xml/riscv-64bit-cpu.xml index b8aa424ae4..0758d1b5fe 100644 --- a/gdb-xml/riscv-64bit-cpu.xml +++ b/gdb-xml/riscv-64bit-cpu.xml @@ -44,4 +44,5 @@ + diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index f13e298a36..347989858f 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -475,7 +475,7 @@ static void riscv_cpu_class_init(ObjectClass *c, void *data) cc->synchronize_from_tb = riscv_cpu_synchronize_from_tb; cc->gdb_read_register = riscv_cpu_gdb_read_register; cc->gdb_write_register = riscv_cpu_gdb_write_register; -cc->gdb_num_core_regs = 33; +cc->gdb_num_core_regs = 34; #if defined(TARGET_RISCV32) cc->gdb_core_xml_file = "riscv-32bit-cpu.xml"; #elif defined(TARGET_RISCV64) diff --git a/target/riscv/gdbstub.c b/target/riscv/gdbstub.c index ded140e8d8..7e0822145d 100644 --- a/target/riscv/gdbstub.c +++ b/target/riscv/gdbstub.c @@ -278,6 +278,12 @@ int riscv_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) return gdb_get_regl(mem_buf, env->gpr[n]); } else if (n == 32) { return gdb_get_regl(mem_buf, env->pc); +} else if (n == 33) { +#ifdef CONFIG_USER_ONLY +return gdb_get_regl(mem_buf, 0); +#else +return gdb_get_regl(mem_buf, env->priv); +#endif } return 0; } @@ -296,6 +302,14 @@ int riscv_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) } else if (n == 32) { env->pc = ldtul_p(mem_buf); return sizeof(target_ulong); +} else if (n == 33) { +#ifndef CONFIG_USER_ONLY +env->priv = ldtul_p(mem_buf) & 0x3; +if (env->priv == PRV_H) { +env->priv = PRV_S; +} +#endif +return sizeof(target_ulong); } return 0; } -- 2.23.0
Re: [Qemu-devel] [PATCH v9 07/17] blockdev: adds bdrv_parse_aio to use io_uring
On Wed, Aug 07, 2019 at 02:49:51PM +0200, Julia Suvorova via Qemu-devel wrote: > On Wed, Aug 7, 2019 at 2:06 PM Aarushi Mehta wrote: > > > > > > > > On Wed, 7 Aug, 2019, 17:15 Julia Suvorova, wrote: > >> > >> On Fri, Aug 2, 2019 at 1:41 AM Aarushi Mehta > >> wrote: > >> > +int bdrv_parse_aio(const char *mode, int *flags) > >> > +{ > >> > +if (!strcmp(mode, "threads")) { > >> > +/* do nothing, default */ > >> > +} else if (!strcmp(mode, "native")) { > >> > +*flags |= BDRV_O_NATIVE_AIO; > >> > >> This 'if' should be covered with CONFIG_LINUX_AIO. > > > > > > The aio=native definition is shared with Windows hosts' native aio and will > > break if it was covered. > > > > file-posix handles the case. > > Fair enough. Then you can remove all ifdefs for io_uring from > raw_open_common in file-posix.c as this case was already checked here. This is not possible since the BLOCKDEV_AIO_OPTIONS_IO_URING enum constant is conditional in the QAPI schema: { 'enum': 'BlockdevAioOptions', 'data': [ 'threads', 'native', { 'name': 'io_uring', 'if': 'defined(CONFIG_LINUX_IO_URING)' } ] } The code can only use BLOCKDEV_AIO_OPTIONS_IO_URING if CONFIG_LINUX_IO_URING was defined, so we cannot drop the #ifdefs in raw_open_common(). Stefan signature.asc Description: PGP signature
Re: [PATCH 3/4] block/mirror: support unaligned write in active mirror
04.10.2019 17:48, Max Reitz wrote: > On 04.10.19 15:22, Vladimir Sementsov-Ogievskiy wrote: >> 04.10.2019 15:59, Max Reitz wrote: >>> On 03.10.19 11:34, Vladimir Sementsov-Ogievskiy wrote: 02.10.2019 18:52, Max Reitz wrote: > On 02.10.19 17:06, Vladimir Sementsov-Ogievskiy wrote: >> 02.10.2019 18:03, Vladimir Sementsov-Ogievskiy wrote: >>> 02.10.2019 17:57, Max Reitz wrote: On 12.09.19 17:13, Vladimir Sementsov-Ogievskiy wrote: > Prior 9adc1cb49af8d do_sync_target_write had a bug: it reset > aligned-up > region in the dirty bitmap, which means that we may not copy some > bytes > and assume them copied, which actually leads to producing corrupted > target. > > So 9adc1cb49af8d forced dirty bitmap granularity to be > request_alignment for mirror-top filter, so we are not working with > unaligned requests. However forcing large alignment obviously > decreases > performance of unaligned requests. > > This commit provides another solution for the problem: if unaligned > padding is already dirty, we can safely ignore it, as > 1. It's dirty, it will be copied by mirror_iteration anyway > 2. It's dirty, so skipping it now we don't increase dirtiness of the > bitmap and therefore don't damage "synchronicity" of the > write-blocking mirror. But that’s not what active mirror is for. The point of active mirror is that it must converge because every guest write will contribute towards that goal. If you skip active mirroring for unaligned guest writes, they will not contribute towards converging, but in fact lead to the opposite. >>> >>> The will not contribute only if region is already dirty. Actually, after >>> first iteration of mirroring (copying the whole disk), all following >>> writes >>> will contribute, so the whole process must converge. It is a bit >>> similar with >>> running one mirror loop in normal mode, and then enable write-blocking. >>> >> >> >> In other words, we don't need "all guest writes contribute" to converge, >> "all guest writes don't create new dirty bits" is enough, as we have >> parallel >> mirror iteration which contiguously handles dirty bits. > > Hm, in a sense. But it does mean that guest writes will not contribute > to convergence. > > And that’s against the current definition of write-blocking, which does > state that “when data is written to the source, write it (synchronously) > to the target as well”. > Hmm, understand. But IMHO our proposed behavior is better in general. Do you think it's a problem to change spec now? If yes, I'll resend with an option >>> >>> Well, the thing is that I’d find it weird if write-blocking wasn’t >>> blocking in all cases. And in my opinion, it makes more sense for >>> active mirror if all writes actively contributed to convergence. >>> >> >> Why? What is the benefit in it? >> What is "all writes actively contributed to convergence" for user? > > One thing I wonder about is whether it’s really guaranteed that the > background job will run under full I/O load, and how often it runs. > > I fear that with your model, the background job might starve and the > mirror may take a very long time. Hmmm. I think mirror job is in same context as guest writes, and goes through same IO api.. Why it will starve? (I understand that my words are not an evidence...). > It won’t diverge, but it also won’t > really converge. But same will be with current behavior: guest is not guaranteed to write to all parts of disk. And in most scenarios it doesn't. So, if mirror job starve because of huge guest IO load, we will not converge anyway. So, background process is necessary thing for converge anyway. > > The advantage of letting all writes block is that even under full I/O > load, the mirror job will progress at a steady pace. > >> I think for user there may be the following criteria: >> >> 1. guaranteed converge, with any guest write load. >> Both current and my proposed variants are OK. >> >> 2. Less impact on guest. >> Obviously my proposed variant is better >> >> 3. Total time of mirroring >> Seems, current may be a bit better, but I don't think that unaligned >> tails gives significant impact. >> >> === >> >> So, assume I want [1]+[2]. And possibly >> 2.2: Even less impact on guest: ignore not only unaligned tails if they are >> already dirty, but full synchronous mirror operation if area is already >> dirty. >> >> How should I call this? Should it be separate mode, or option for >> write-blocking? > > I don’t know whether it makes sense to add a separate mode or a separate > option just for this difference. I don’t think anyone would choose the > non-default option. At
[PATCH] netmap: support git-submodule build otption
From: Giuseppe Lettieri With this patch, netmap support can be enabled with the following options to the configure script: --enable-netmap[=system] Use the host system netmap installation. Fail if not found. --enable-netmap=git clone the official netmap repository on github (mostly useful for CI) Signed-off-by: Giuseppe Lettieri --- .gitmodules | 3 +++ configure | 64 + 2 files changed, 58 insertions(+), 9 deletions(-) diff --git a/.gitmodules b/.gitmodules index c5c474169d..bf75dbc5e3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -58,3 +58,6 @@ [submodule "roms/opensbi"] path = roms/opensbi url = https://git.qemu.org/git/opensbi.git +[submodule "netmap"] + path = netmap + url = https://github.com/luigirizzo/netmap.git diff --git a/configure b/configure index 8f8446f52b..cb2c6c70d6 100755 --- a/configure +++ b/configure @@ -1132,6 +1132,10 @@ for opt do ;; --enable-netmap) netmap="yes" ;; + --enable-netmap=git) netmap="git" + ;; + --enable-netmap=system) netmap="system" + ;; --disable-xen) xen="no" ;; --enable-xen) xen="yes" @@ -3318,8 +3322,9 @@ fi # a minor/major version number. Minor new features will be marked with values up # to 15, and if something happens that requires a change to the backend we will # move above 15, submit the backend fixes and modify this two bounds. -if test "$netmap" != "no" ; then - cat > $TMPC << EOF +case "$netmap" in +"" | yes | system) + cat > $TMPC << EOF #include #include #include @@ -3330,14 +3335,55 @@ if test "$netmap" != "no" ; then int main(void) { return 0; } EOF if compile_prog "" "" ; then -netmap=yes +netmap_system=yes else -if test "$netmap" = "yes" ; then - feature_not_found "netmap" -fi -netmap=no +netmap_system=no fi -fi + ;; +esac + +case "$netmap" in + "" | yes) +if test "$netmap_system" = "yes"; then + netmap=system +elif test -e "${source_path}/.git" && test $git_update = 'yes' ; then + netmap=git +elif test -e "${source_path}/netmap/configure" ; then + netmap=internal +elif test -z "$netmap" ; then + netmap=no +else + feature_not_found "netmap" "Install netmap or git submodule" +fi +;; + + system) +if test "$netmap_system" = "no"; then + feature_not_found "netmap" "Install netmap" +fi +;; +esac + +case "$netmap" in + git | internal) +if test "$netmap" = git; then + git_submodules="${git_submodules} netmap" +fi +mkdir -p netmap +QEMU_CFLAGS="$QEMU_CFLAGS -I\$(SRC_PATH)/netmap/sys" +;; + + system) +;; + + no) +;; + *) +error_exit "Unknown state for netmap: $netmap" +;; +esac + +## ## # libcap-ng library probe @@ -6585,7 +6631,7 @@ if test "$vde" = "yes" ; then echo "CONFIG_VDE=y" >> $config_host_mak echo "VDE_LIBS=$vde_libs" >> $config_host_mak fi -if test "$netmap" = "yes" ; then +if test "$netmap" != "no" ; then echo "CONFIG_NETMAP=y" >> $config_host_mak fi if test "$l2tpv3" = "yes" ; then -- 2.21.0
Re: [PATCH 3/4] block/mirror: support unaligned write in active mirror
On 04.10.19 15:22, Vladimir Sementsov-Ogievskiy wrote: > 04.10.2019 15:59, Max Reitz wrote: >> On 03.10.19 11:34, Vladimir Sementsov-Ogievskiy wrote: >>> 02.10.2019 18:52, Max Reitz wrote: On 02.10.19 17:06, Vladimir Sementsov-Ogievskiy wrote: > 02.10.2019 18:03, Vladimir Sementsov-Ogievskiy wrote: >> 02.10.2019 17:57, Max Reitz wrote: >>> On 12.09.19 17:13, Vladimir Sementsov-Ogievskiy wrote: Prior 9adc1cb49af8d do_sync_target_write had a bug: it reset aligned-up region in the dirty bitmap, which means that we may not copy some bytes and assume them copied, which actually leads to producing corrupted target. So 9adc1cb49af8d forced dirty bitmap granularity to be request_alignment for mirror-top filter, so we are not working with unaligned requests. However forcing large alignment obviously decreases performance of unaligned requests. This commit provides another solution for the problem: if unaligned padding is already dirty, we can safely ignore it, as 1. It's dirty, it will be copied by mirror_iteration anyway 2. It's dirty, so skipping it now we don't increase dirtiness of the bitmap and therefore don't damage "synchronicity" of the write-blocking mirror. >>> >>> But that’s not what active mirror is for. The point of active mirror is >>> that it must converge because every guest write will contribute towards >>> that goal. >>> >>> If you skip active mirroring for unaligned guest writes, they will not >>> contribute towards converging, but in fact lead to the opposite. >>> >> >> The will not contribute only if region is already dirty. Actually, after >> first iteration of mirroring (copying the whole disk), all following >> writes >> will contribute, so the whole process must converge. It is a bit similar >> with >> running one mirror loop in normal mode, and then enable write-blocking. >> > > > In other words, we don't need "all guest writes contribute" to converge, > "all guest writes don't create new dirty bits" is enough, as we have > parallel > mirror iteration which contiguously handles dirty bits. Hm, in a sense. But it does mean that guest writes will not contribute to convergence. And that’s against the current definition of write-blocking, which does state that “when data is written to the source, write it (synchronously) to the target as well”. >>> >>> Hmm, understand. But IMHO our proposed behavior is better in general. >>> Do you think it's a problem to change spec now? >>> If yes, I'll resend with an option >> >> Well, the thing is that I’d find it weird if write-blocking wasn’t >> blocking in all cases. And in my opinion, it makes more sense for >> active mirror if all writes actively contributed to convergence. >> > > Why? What is the benefit in it? > What is "all writes actively contributed to convergence" for user? One thing I wonder about is whether it’s really guaranteed that the background job will run under full I/O load, and how often it runs. I fear that with your model, the background job might starve and the mirror may take a very long time. It won’t diverge, but it also won’t really converge. The advantage of letting all writes block is that even under full I/O load, the mirror job will progress at a steady pace. > I think for user there may be the following criteria: > > 1. guaranteed converge, with any guest write load. > Both current and my proposed variants are OK. > > 2. Less impact on guest. > Obviously my proposed variant is better > > 3. Total time of mirroring > Seems, current may be a bit better, but I don't think that unaligned > tails gives significant impact. > > === > > So, assume I want [1]+[2]. And possibly > 2.2: Even less impact on guest: ignore not only unaligned tails if they are > already dirty, but full synchronous mirror operation if area is already dirty. > > How should I call this? Should it be separate mode, or option for > write-blocking? I don’t know whether it makes sense to add a separate mode or a separate option just for this difference. I don’t think anyone would choose the non-default option. But I do think there’s quite a bit of difference in how the job behaves still... I don’t know. :-/ Max signature.asc Description: OpenPGP digital signature
Re: [PATCH v5 1/5] iotests: remove 'linux' from default supported platforms
On 04.10.19 16:05, Peter Maydell wrote: > On Fri, 4 Oct 2019 at 14:50, Max Reitz wrote: >> On 04.10.19 15:16, Peter Maydell wrote: >>> 'make check' does have the restriction >>> that we don't want the tests to take too long to run, but in >>> general the block layer should be running some reasonable subset >>> of tests in the project's standard "run the tests please" command. >>> (I have no opinion on exactly what that subset ought to be, beyond >>> that it would be good if that subset doesn't have known intermittent >>> failures in it.) >> >> Deciding the subset is difficult. My stance was that it’s better to not >> choose an arbitrary subset here but ensure that really everything gets >> run as part of CI, that is asynchronously so it doesn’t block anyone and >> can take as long as it wants. > > I wonder if we have a terminology difference confusion here. > To me "CI" means "we have tests which we automatically run > before pushing commits to master, and if they fail then we > don't push". So (a) they have to run synchronously and (b) there > is a need for them to run in a reasonable period of time because > otherwise it takes too long to run the tests before pushing > to master and we get a backlog of unprocessed pullrequests > and annoying delays. Hm, yes. In that case, there’s of course no point in moving it. I meant moving the tests to somewhere where they run asynchronously. >> If we decide to get pull requests based on that, then that’d bring even >> more pain, but at least it’d be honest. But just running half of the >> qcow2 tests to me seems only like we want to pretend we ran the iotests. >> So for me, iotests still break, and we need to deal with make check >> failures on top. I’d at least prefer one or the other. > > I think the ideal we're aiming for here is: > (1) people doing active work in the block subsystem are probably > often going to want to run all the iotests, and certainly the > subsystem maintainers will want to do that as they put together > pull requests. > (2) but people who *don't* do active work in the block subsystem > still sometimes touch it in passing as part of things like global > refactorings or other changes that touch big parts of the tree, > or accidentally break it with a change to some other bit of QEMU > entirely. These people won't run the full iotests, but it is > reasonable to expect them to run 'make check', and it would be good > if that caught "whoops you broke the block subsystem". > > Similarly, "make check" has incredibly spotty coverage of > various arm boards and devices, but it does do some basic > checks which do catch accidental breakage. Yes, it has its use, but at the same time it means that intermittent failure can appear in make check because some iotest is broken. (The past has shown that it’s hard to predict which tests will at some point start to exhibit such behavior.) This then requires immediate attention, and I simply want help with that from someone who really cares about having the test be part of make check, as I wrote in my reply to Thomas’s patch “iotests: Do not run the iotests during "make check" anymore”. The issues are just half as pressing when it isn’t make check that’s intermittently failing. (I hope nobody takes this as “He doesn’t want to fix intermittent failures”, because I do really try to do that. So I don’t consider this a good kind of pressure.) I suppose my main problem is that I’m just lucky that I can’t remember a case where the block subsystem was really broken and iotests in make check would have caught it; and that I’m naïve enough to expect this to continue into the future. Max signature.asc Description: OpenPGP digital signature
Re: [PATCH v15 0/5] backup-top filter driver for backup
04.10.2019 17:21, Max Reitz wrote: > On 01.10.19 15:14, Vladimir Sementsov-Ogievskiy wrote: >> Hi all! >> >> These series introduce backup-top driver. It's a filter-node, which >> do copy-before-write operation. Mirror uses filter-node for handling >> guest writes, let's move to filter-node (from write-notifiers) for >> backup too. >> >> Preparation patches are queued in Max's block branch: >> >> Based-on: https://github.com/XanClic/qemu.git block >> >> v15: use BdrvChildren in block-copy >> 01-03: new >> 04-05: a lot of changes, such as >> 04: >> - add new parameters for creation >> - prepare bcs creation >> - add target child >> - refactor bdrv_backup_top_append >> - drop r-b >> 05: >> - move block-copy to use BdrvChildren >> - drop extra style fixing hunks >> - iotest 141 output changed >> >> v14: Drop range locks, keep old good in-flight requests waiting >> >> 12: new patch >> 14: use old request-waiting scheme instead of range locks >> >> Vladimir Sementsov-Ogievskiy (5): >>block/backup: move in-flight requests handling from backup to >> block-copy >>block/backup: move write_flags calculation inside backup_job_create >>block/block-copy: split block_copy_set_callbacks function >>block: introduce backup-top filter driver >>block/backup: use backup-top instead of write notifiers >> >> qapi/block-core.json | 8 +- >> block/backup-top.h | 41 + >> include/block/block-copy.h | 31 +++- >> include/block/block_int.h | 1 + >> block/backup-top.c | 276 + >> block/backup.c | 147 +- >> block/block-copy.c | 140 + >> block/replication.c| 2 +- >> blockdev.c | 1 + >> block/Makefile.objs| 1 + >> tests/qemu-iotests/056 | 8 +- >> tests/qemu-iotests/141.out | 2 +- >> tests/qemu-iotests/257 | 7 +- >> tests/qemu-iotests/257.out | 306 ++--- >> 14 files changed, 628 insertions(+), 343 deletions(-) >> create mode 100644 block/backup-top.h >> create mode 100644 block/backup-top.c > > Thanks, applied to my block branch: > > https://git.xanclic.moe/XanClic/qemu/commits/branch/block > > Max > Yay!! Thank you! -- Best regards, Vladimir
Re: [Qemu-devel] [PATCH v2 2/3] tests/migration: fix a typo in comment
On 9/11/19 5:31 AM, Mao Zhongyi wrote: Cc: arm...@redhat.com Cc: laur...@vivier.eu Cc: tony.ngu...@bt.com Signed-off-by: Mao Zhongyi Reviewed-by: Alex Bennée --- tests/migration/stress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/migration/stress.c b/tests/migration/stress.c index 6cbb2d49d3..19a6eff5fd 100644 --- a/tests/migration/stress.c +++ b/tests/migration/stress.c @@ -191,7 +191,7 @@ static int stressone(unsigned long long ramsizeMB) /* We don't care about initial state, but we do want * to fault it all into RAM, otherwise the first iter - * of the loop below will be quite slow. We cna't use + * of the loop below will be quite slow. We can't use * 0x0 as the byte as gcc optimizes that away into a * calloc instead :-) */ memset(ram, 0xfe, ramsizeMB * 1024 * 1024); Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH v15 0/5] backup-top filter driver for backup
On 01.10.19 15:14, Vladimir Sementsov-Ogievskiy wrote: > Hi all! > > These series introduce backup-top driver. It's a filter-node, which > do copy-before-write operation. Mirror uses filter-node for handling > guest writes, let's move to filter-node (from write-notifiers) for > backup too. > > Preparation patches are queued in Max's block branch: > > Based-on: https://github.com/XanClic/qemu.git block > > v15: use BdrvChildren in block-copy > 01-03: new > 04-05: a lot of changes, such as > 04: > - add new parameters for creation > - prepare bcs creation > - add target child > - refactor bdrv_backup_top_append > - drop r-b > 05: > - move block-copy to use BdrvChildren > - drop extra style fixing hunks > - iotest 141 output changed > > v14: Drop range locks, keep old good in-flight requests waiting > > 12: new patch > 14: use old request-waiting scheme instead of range locks > > Vladimir Sementsov-Ogievskiy (5): > block/backup: move in-flight requests handling from backup to > block-copy > block/backup: move write_flags calculation inside backup_job_create > block/block-copy: split block_copy_set_callbacks function > block: introduce backup-top filter driver > block/backup: use backup-top instead of write notifiers > > qapi/block-core.json | 8 +- > block/backup-top.h | 41 + > include/block/block-copy.h | 31 +++- > include/block/block_int.h | 1 + > block/backup-top.c | 276 + > block/backup.c | 147 +- > block/block-copy.c | 140 + > block/replication.c| 2 +- > blockdev.c | 1 + > block/Makefile.objs| 1 + > tests/qemu-iotests/056 | 8 +- > tests/qemu-iotests/141.out | 2 +- > tests/qemu-iotests/257 | 7 +- > tests/qemu-iotests/257.out | 306 ++--- > 14 files changed, 628 insertions(+), 343 deletions(-) > create mode 100644 block/backup-top.h > create mode 100644 block/backup-top.c Thanks, applied to my block branch: https://git.xanclic.moe/XanClic/qemu/commits/branch/block Max signature.asc Description: OpenPGP digital signature
Re: [PATCH v15 5/5] block/backup: use backup-top instead of write notifiers
On 01.10.19 15:14, Vladimir Sementsov-Ogievskiy wrote: > Drop write notifiers and use filter node instead. > > = Changes = > > 1. Add filter-node-name argument for backup qmp api. We have to do it > in this commit, as 257 needs to be fixed. > > 2. There are no more write notifiers here, so is_write_notifier > parameter is dropped from block-copy paths. > > 3. To sync with in-flight requests at job finish we now have drained > removing of the filter, we don't need rw-lock. > > 4. Block-copy is now using BdrvChildren instead of BlockBackends > > 5. As backup-top owns these children, we also move block-copy state > into backup-top's ownership. > > = Iotest changes = > > 56: op-blocker doesn't shoot now, as we set it on source, but then > check on filter, when trying to start second backup. > To keep the test we instead can catch another collision: both jobs will > get 'drive0' job-id, as job-id parameter is unspecified. To prevent > interleaving with file-posix locks (as they are dependent on config) > let's use another target for second backup. > > Also, it's obvious now that we'd like to drop this op-blocker at all > and add a test-case for two backups from one node (to different > destinations) actually works. But not in these series. > > 141: Output changed: prepatch, "Node is in use" comes from bdrv_has_blk > check inside qmp_blockdev_del. But we've dropped block-copy blk > objects, so no more blk objects on source bs (job blk is on backup-top > filter bs). New message is from op-blocker, which is the next check in > qmp_blockdev_add. > > 257: The test wants to emulate guest write during backup. They should > go to filter node, not to original source node, of course. Therefore we > need to specify filter node name and use it. > > Signed-off-by: Vladimir Sementsov-Ogievskiy > --- > qapi/block-core.json | 8 +- > include/block/block-copy.h | 14 +- > include/block/block_int.h | 1 + > block/backup-top.c | 21 +-- > block/backup.c | 73 +++-- > block/block-copy.c | 81 +++--- > block/replication.c| 2 +- > blockdev.c | 1 + > tests/qemu-iotests/056 | 8 +- > tests/qemu-iotests/141.out | 2 +- > tests/qemu-iotests/257 | 7 +- > tests/qemu-iotests/257.out | 306 ++--- > 12 files changed, 237 insertions(+), 287 deletions(-) [...] > diff --git a/block/block-copy.c b/block/block-copy.c > index fcb112da14..5404bc921d 100644 > --- a/block/block-copy.c > +++ b/block/block-copy.c [...] > @@ -218,8 +183,8 @@ static int coroutine_fn > block_copy_with_offload(BlockCopyState *s, > nr_clusters = DIV_ROUND_UP(nbytes, s->cluster_size); > bdrv_reset_dirty_bitmap(s->copy_bitmap, start, > s->cluster_size * nr_clusters); > -ret = blk_co_copy_range(s->source, start, s->target, start, nbytes, > -read_flags, s->write_flags); > +ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes, > +0, s->write_flags); The indentation’s off here. I’ll fix it. Reviewed-by: Max Reitz signature.asc Description: OpenPGP digital signature
Re: [PATCH v5 1/5] iotests: remove 'linux' from default supported platforms
On Fri, 4 Oct 2019 at 14:50, Max Reitz wrote: > On 04.10.19 15:16, Peter Maydell wrote: > > 'make check' does have the restriction > > that we don't want the tests to take too long to run, but in > > general the block layer should be running some reasonable subset > > of tests in the project's standard "run the tests please" command. > > (I have no opinion on exactly what that subset ought to be, beyond > > that it would be good if that subset doesn't have known intermittent > > failures in it.) > > Deciding the subset is difficult. My stance was that it’s better to not > choose an arbitrary subset here but ensure that really everything gets > run as part of CI, that is asynchronously so it doesn’t block anyone and > can take as long as it wants. I wonder if we have a terminology difference confusion here. To me "CI" means "we have tests which we automatically run before pushing commits to master, and if they fail then we don't push". So (a) they have to run synchronously and (b) there is a need for them to run in a reasonable period of time because otherwise it takes too long to run the tests before pushing to master and we get a backlog of unprocessed pullrequests and annoying delays. > If we decide to get pull requests based on that, then that’d bring even > more pain, but at least it’d be honest. But just running half of the > qcow2 tests to me seems only like we want to pretend we ran the iotests. > So for me, iotests still break, and we need to deal with make check > failures on top. I’d at least prefer one or the other. I think the ideal we're aiming for here is: (1) people doing active work in the block subsystem are probably often going to want to run all the iotests, and certainly the subsystem maintainers will want to do that as they put together pull requests. (2) but people who *don't* do active work in the block subsystem still sometimes touch it in passing as part of things like global refactorings or other changes that touch big parts of the tree, or accidentally break it with a change to some other bit of QEMU entirely. These people won't run the full iotests, but it is reasonable to expect them to run 'make check', and it would be good if that caught "whoops you broke the block subsystem". Similarly, "make check" has incredibly spotty coverage of various arm boards and devices, but it does do some basic checks which do catch accidental breakage. thanks -- PMM
Re: [PATCH v6 00/10] Introduce the microvm machine type
On Fri, Oct 04, 2019 at 11:37:42AM +0200, Sergio Lopez wrote: > Microvm is a machine type inspired by Firecracker and constructed > after the its machine model. > > It's a minimalist machine type without PCI nor ACPI support, designed > for short-lived guests. Microvm also establishes a baseline for > benchmarking and optimizing both QEMU and guest operating systems, > since it is optimized for both boot time and footprint. > > --- > > Changelog > v6: > - Some style fixes (Philippe Mathieu-Daudé) > - Fix a documentation bug stating that LAPIC was in userspace (Paolo >Bonzini) > - Update Xen HVM code after X86MachineState introduction (Philippe >Mathieu-Daudé) > - Rename header guard from QEMU_VIRTIO_MMIO_H to HW_VIRTIO_MMIO_H >(Philippe Mathieu-Daudé) > > v5: > - Drop unneeded "[PATCH v4 2/8] hw/i386: Factorize e820 related >functions" (Philippe Mathieu-Daudé) > - Drop unneeded "[PATCH v4 1/8] hw/i386: Factorize PVH related >functions" (Stefano Garzarella) > - Split X86MachineState introduction into smaller patches (Philippe >Mathieu-Daudé) > - Change option-roms to x-option-roms and kernel-cmdline to >auto-kernel-cmdline (Paolo Bonzini) > - Make i8259 PIT and i8254 PIC optional (Paolo Bonzini) > - Some fixes to the documentation (Paolo Bonzini) > - Switch documentation format from txt to rst (Peter Maydell) > - Move NMI interface to X86_MACHINE (Philippe Mathieu-Daudé, Paolo >Bonzini) > > v4: > - This is a complete rewrite of the whole patchset, with a focus on >reusing as much existing code as possible to ease the maintenance burden >and making the machine type as compatible as possible by default. As >a result, the number of lines dedicated specifically to microvm is >383 (code lines measured by "cloc") and, with the default >configuration, it's now able to boot both PVH ELF images and >bzImages with either SeaBIOS or qboot. > > v3: > - Add initrd support (thanks Stefano). > > v2: > - Drop "[PATCH 1/4] hw/i386: Factorize CPU routine". > - Simplify machine definition (thanks Eduardo). > - Remove use of unneeded NUMA-related callbacks (thanks Eduardo). > - Add a patch to factorize PVH-related functions. > - Replace use of Linux's Zero Page with PVH (thanks Maran and Paolo). > > --- > Sergio Lopez (10): > hw/virtio: Factorize virtio-mmio headers > hw/i386/pc: rename functions shared with non-PC machines > hw/i386/pc: move shared x86 functions to x86.c and export them > hw/i386: split PCMachineState deriving X86MachineState from it > hw/i386: make x86.c independent from PCMachineState > fw_cfg: add "modify" functions for all types > hw/intc/apic: reject pic ints if isa_pic == NULL > roms: add microvm-bios (qboot) as binary and git submodule > docs/microvm.rst: document the new microvm machine type > hw/i386: Introduce the microvm machine type > > docs/microvm.rst | 98 > default-configs/i386-softmmu.mak | 1 + > include/hw/i386/microvm.h| 83 > include/hw/i386/pc.h | 28 +- > include/hw/i386/x86.h| 94 > include/hw/nvram/fw_cfg.h| 42 ++ > include/hw/virtio/virtio-mmio.h | 73 +++ > hw/acpi/cpu_hotplug.c| 10 +- > hw/i386/acpi-build.c | 29 +- > hw/i386/amd_iommu.c | 3 +- > hw/i386/intel_iommu.c| 3 +- > hw/i386/microvm.c| 574 ++ > hw/i386/pc.c | 780 +++--- > hw/i386/pc_piix.c| 46 +- > hw/i386/pc_q35.c | 38 +- > hw/i386/pc_sysfw.c | 58 +-- > hw/i386/x86.c| 790 +++ > hw/i386/xen/xen-hvm.c| 23 +- > hw/intc/apic.c | 2 +- > hw/intc/ioapic.c | 2 +- > hw/nvram/fw_cfg.c| 29 ++ > hw/virtio/virtio-mmio.c | 48 +- > .gitmodules | 3 + > hw/i386/Kconfig | 4 + > hw/i386/Makefile.objs| 2 + > pc-bios/bios-microvm.bin | Bin 0 -> 65536 bytes > roms/Makefile| 6 + > roms/qboot | 1 + So I guess we want to add new files for x86 machines MAINTAINERS entry. Otherwise looks good. Which tree is this going to be merged through? Mine? > 28 files changed, 1963 insertions(+), 907 deletions(-) > create mode 100644 docs/microvm.rst > create mode 100644 include/hw/i386/microvm.h > create mode 100644 include/hw/i386/x86.h > create mode 100644 include/hw/virtio/virtio-mmio.h > create mode 100644 hw/i386/microvm.c > create mode 100644 hw/i386/x86.c > create mode 100755 pc-bios/bios-microvm.bin > create mode 16 roms/qboot > > -- > 2.21.0
Re: [PATCH v5 1/5] iotests: remove 'linux' from default supported platforms
On 04.10.19 15:16, Peter Maydell wrote: > On Fri, 4 Oct 2019 at 13:45, Max Reitz wrote: >>> In the end, I don't care what code these patches touched. I do an >>> innocent git pull, and when I finally see that it's master that breaks >>> iotests and not my patches on top of it, I'm annoyed. >> >> Hm. Part of my point was that this still happens all the time. >> >> Which is why I’d prefer more tests to run in CI than a handful of not >> very useful ones in make check. >> >> (Of course, running it in a CI won’t prevent iotest failures on your >> machine, but in practice neither does the current model.) > > If you want the tree to be defended against problems, then > you need to run tests in 'make check'. Nothing else gets consistently > run and has failures spotted either before code goes into the > tree or quickly afterwards. Yes, but part of the problem is that what does get run as part of make check currently does not help much. So this doesn’t really defend the iotests from problems. > 'make check' does have the restriction > that we don't want the tests to take too long to run, but in > general the block layer should be running some reasonable subset > of tests in the project's standard "run the tests please" command. > (I have no opinion on exactly what that subset ought to be, beyond > that it would be good if that subset doesn't have known intermittent > failures in it.) Deciding the subset is difficult. My stance was that it’s better to not choose an arbitrary subset here but ensure that really everything gets run as part of CI, that is asynchronously so it doesn’t block anyone and can take as long as it wants. If we decide to get pull requests based on that, then that’d bring even more pain, but at least it’d be honest. But just running half of the qcow2 tests to me seems only like we want to pretend we ran the iotests. So for me, iotests still break, and we need to deal with make check failures on top. I’d at least prefer one or the other. I voted for dropping make check, because running all iotests doesn’t seem feasible to me. >> I still think that I personally would prefer the iotests to not run as >> part of make check, but just as CI. > > 'make check' *is* our CI gate, to a first approximation. Most of > the various travis and other setups are simply a front-end for > "build QEMU in various configurations and on various hosts > and then run 'make check'". The travis setup at the moment is > a bit odd, because it runs tests but it's not a gate on our > merging changes. Ideally I would like to fix this so that > rather than me personally running "make check" via a bunch > of scripts we have one CI setup that we trust (gitlab seems > the current favoured contender) and that gates changes flowing > into master rather than me doing it manually. We might then turn > travis off if it's not providing anything for us that the gitlab > setup doesn't. Hm, but make check also serves other purposes. I would suppose e.g. distributions generally require make check to pass when building packages. I assumed our CI included more things than just make check, so we could move the iotests from out of make check but keep them as part of CI. Max signature.asc Description: OpenPGP digital signature
[PATCH v2] target/ppc: Fix for optimized vsl/vsr instructions
This patch fixes bug in optimized vsl/vsr instructions reported by Mark Cave-Ayland and Paul Clarke. Sorry for not responding earlier, I was absent last couple of days. I also integrated some suggestions made by Aleksandar Markovic. New soultion is tested and still has noticable performance improvement compared to old helper implementation. V1 of this patch was not sent to qemu-devel and I am now sending V2 to appropriate email adresses. Stefan Brankovic (1): target/ppc: Fix for optimized vsl/vsr instructions target/ppc/translate/vmx-impl.inc.c | 84 ++--- 1 file changed, 40 insertions(+), 44 deletions(-) -- 2.7.4
[PATCH v2] target/ppc: Fix for optimized vsl/vsr instructions
In previous implementation, invocation of TCG shift function could request shift of TCG variable by 64 bits when variable 'sh' is 0, which is not supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes this by using two separate invocation of TCG shift functions, with maximum shift amount of 32. Name of variable 'shifted' is changed to 'carry' so variable naming is similar to old helper implementation. Variables 'avrA' and 'avrB' are replaced with variable 'avr'. Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822 Reported-by: Paul Clark Reported-by: Mark Cave-Ayland Suggested-by: Aleksandar Markovic Signed-off-by: Stefan Brankovic --- target/ppc/translate/vmx-impl.inc.c | 84 ++--- 1 file changed, 40 insertions(+), 44 deletions(-) diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c index 2472a52..81d5a7a 100644 --- a/target/ppc/translate/vmx-impl.inc.c +++ b/target/ppc/translate/vmx-impl.inc.c @@ -590,40 +590,38 @@ static void trans_vsl(DisasContext *ctx) int VT = rD(ctx->opcode); int VA = rA(ctx->opcode); int VB = rB(ctx->opcode); -TCGv_i64 avrA = tcg_temp_new_i64(); -TCGv_i64 avrB = tcg_temp_new_i64(); +TCGv_i64 avr = tcg_temp_new_i64(); TCGv_i64 sh = tcg_temp_new_i64(); -TCGv_i64 shifted = tcg_temp_new_i64(); +TCGv_i64 carry = tcg_temp_new_i64(); TCGv_i64 tmp = tcg_temp_new_i64(); -/* Place bits 125-127 of vB in sh. */ -get_avr64(avrB, VB, false); -tcg_gen_andi_i64(sh, avrB, 0x07ULL); +/* Place bits 125-127 of vB in 'sh'. */ +get_avr64(avr, VB, false); +tcg_gen_andi_i64(sh, avr, 0x07ULL); /* - * Save highest sh bits of lower doubleword element of vA in variable - * shifted and perform shift on lower doubleword. + * Save highest 'sh' bits of lower doubleword element of vA in variable + * 'carry' and perform shift on lower doubleword. */ -get_avr64(avrA, VA, false); -tcg_gen_subfi_i64(tmp, 64, sh); -tcg_gen_shr_i64(shifted, avrA, tmp); -tcg_gen_andi_i64(shifted, shifted, 0x7fULL); -tcg_gen_shl_i64(avrA, avrA, sh); -set_avr64(VT, avrA, false); +get_avr64(avr, VA, false); +tcg_gen_subfi_i64(tmp, 32, sh); +tcg_gen_shri_i64(carry, avr, 32); +tcg_gen_shr_i64(carry, carry, tmp); +tcg_gen_shl_i64(avr, avr, sh); +set_avr64(VT, avr, false); /* * Perform shift on higher doubleword element of vA and replace lowest - * sh bits with shifted. + * 'sh' bits with 'carry'. */ -get_avr64(avrA, VA, true); -tcg_gen_shl_i64(avrA, avrA, sh); -tcg_gen_or_i64(avrA, avrA, shifted); -set_avr64(VT, avrA, true); +get_avr64(avr, VA, true); +tcg_gen_shl_i64(avr, avr, sh); +tcg_gen_or_i64(avr, avr, carry); +set_avr64(VT, avr, true); -tcg_temp_free_i64(avrA); -tcg_temp_free_i64(avrB); +tcg_temp_free_i64(avr); tcg_temp_free_i64(sh); -tcg_temp_free_i64(shifted); +tcg_temp_free_i64(carry); tcg_temp_free_i64(tmp); } @@ -639,39 +637,37 @@ static void trans_vsr(DisasContext *ctx) int VT = rD(ctx->opcode); int VA = rA(ctx->opcode); int VB = rB(ctx->opcode); -TCGv_i64 avrA = tcg_temp_new_i64(); -TCGv_i64 avrB = tcg_temp_new_i64(); +TCGv_i64 avr = tcg_temp_new_i64(); TCGv_i64 sh = tcg_temp_new_i64(); -TCGv_i64 shifted = tcg_temp_new_i64(); +TCGv_i64 carry = tcg_temp_new_i64(); TCGv_i64 tmp = tcg_temp_new_i64(); -/* Place bits 125-127 of vB in sh. */ -get_avr64(avrB, VB, false); -tcg_gen_andi_i64(sh, avrB, 0x07ULL); +/* Place bits 125-127 of vB in 'sh'. */ +get_avr64(avr, VB, false); +tcg_gen_andi_i64(sh, avr, 0x07ULL); /* - * Save lowest sh bits of higher doubleword element of vA in variable - * shifted and perform shift on higher doubleword. + * Save lowest 'sh' bits of higher doubleword element of vA in variable + * 'carry' and perform shift on higher doubleword. */ -get_avr64(avrA, VA, true); -tcg_gen_subfi_i64(tmp, 64, sh); -tcg_gen_shl_i64(shifted, avrA, tmp); -tcg_gen_andi_i64(shifted, shifted, 0xfe00ULL); -tcg_gen_shr_i64(avrA, avrA, sh); -set_avr64(VT, avrA, true); +get_avr64(avr, VA, true); +tcg_gen_subfi_i64(tmp, 32, sh); +tcg_gen_shli_i64(carry, avr, 32); +tcg_gen_shl_i64(carry, carry, tmp); +tcg_gen_shr_i64(avr, avr, sh); +set_avr64(VT, avr, true); /* * Perform shift on lower doubleword element of vA and replace highest - * sh bits with shifted. + * 'sh' bits with 'carry'. */ -get_avr64(avrA, VA, false); -tcg_gen_shr_i64(avrA, avrA, sh); -tcg_gen_or_i64(avrA, avrA, shifted); -set_avr64(VT, avrA, false); +get_avr64(avr, VA, false); +tcg_gen_shr_i64(avr, avr, sh); +tcg_gen_or_i64(avr, avr, carry); +set_avr64(VT, avr, false); -tcg_temp_free_i64(avrA); -tcg_temp_free_i64(avrB); +
Re: [PATCH v15 4/5] block: introduce backup-top filter driver
On 01.10.19 15:14, Vladimir Sementsov-Ogievskiy wrote: > Backup-top filter caches write operations and does copy-before-write > operations. > > The driver will be used in backup instead of write-notifiers. > > Signed-off-by: Vladimir Sementsov-Ogievskiy > --- > block/backup-top.h | 41 +++ > block/backup-top.c | 281 > block/Makefile.objs | 1 + > 3 files changed, 323 insertions(+) > create mode 100644 block/backup-top.h > create mode 100644 block/backup-top.c Reviewed-by: Max Reitz signature.asc Description: OpenPGP digital signature
Re: [PATCH v1 0/5] s390x/mmu: Implement more facilities
On 26.09.19 12:16, David Hildenbrand wrote: > This is the follow up of: > [PATCH-for-4.2 v1 0/9] s390x: MMU changes and extensions > Without the general MMU rework. It's based on: > [PATCH v2 0/7] s390x/mmu: DAT translation rewrite > > This series adds adds EDAT2 MMU support, and implements/indicates related > facilities (ESOP-1, ESOP-2, IEP, ...) for TCG. The QEMU CPU model is > updated. > > IEP under QEMU TCG seems to work just fine, when eabling it via the "max" > CPU model - via kvm unit tests: > t460s: ~/git/kvm-unit-tests master $ ./s390x-run s390x/iep.elf -cpu max > [...] > PASS: iep: iep protection: Program interrupt: expected(4) == received(4) > SUMMARY: 1 tests > > EXIT: STATUS=1 > > Changes since "[PATCH-for-4.2 v1 0/9] s390x: MMU changes and extensions": > - "s390x/mmu: Add EDAT2 translation support" > -- Fix vaddr offset within 2GB page > - "s390x/mmu: Implement ESOP-2 and ..." > -- Squashed two patches as requested. > -- Also set ilen to "2" in case of MMU_INST_FETCH on mmu_translate_real > - "s390x/mmu: Implement Instruction-Execution-Protection Facility" > -- Make sure s390_cpu_get_phys_page_debug() doesn't choke on IEP > - "s390x/cpumodel: Add new TCG features to QEMU cpu model" > -- Add comment "features introduced after the z13" > > Cc: Ilya Leoshkevich > > David Hildenbrand (5): > s390x/mmu: Add EDAT2 translation support > s390x/mmu: Implement ESOP-2 and > access-exception-fetch/store-indication facility > s390x/mmu: Implement Instruction-Execution-Protection Facility > s390x/cpumodel: Prepare for changes of QEMU model > s390x/cpumodel: Add new TCG features to QEMU cpu model > > hw/s390x/s390-virtio-ccw.c | 2 ++ > target/s390x/cpu.h | 1 + > target/s390x/gen-features.c | 11 +- > target/s390x/helper.c | 6 +- > target/s390x/mmu_helper.c | 42 +++-- > 5 files changed, 58 insertions(+), 4 deletions(-) > @Christian (@Conny) if I can get an ACK on the last patch, I can send this directly upstream. -- Thanks, David / dhildenb
Re: [PATCH 3/4] block/mirror: support unaligned write in active mirror
04.10.2019 15:59, Max Reitz wrote: > On 03.10.19 11:34, Vladimir Sementsov-Ogievskiy wrote: >> 02.10.2019 18:52, Max Reitz wrote: >>> On 02.10.19 17:06, Vladimir Sementsov-Ogievskiy wrote: 02.10.2019 18:03, Vladimir Sementsov-Ogievskiy wrote: > 02.10.2019 17:57, Max Reitz wrote: >> On 12.09.19 17:13, Vladimir Sementsov-Ogievskiy wrote: >>> Prior 9adc1cb49af8d do_sync_target_write had a bug: it reset aligned-up >>> region in the dirty bitmap, which means that we may not copy some bytes >>> and assume them copied, which actually leads to producing corrupted >>> target. >>> >>> So 9adc1cb49af8d forced dirty bitmap granularity to be >>> request_alignment for mirror-top filter, so we are not working with >>> unaligned requests. However forcing large alignment obviously decreases >>> performance of unaligned requests. >>> >>> This commit provides another solution for the problem: if unaligned >>> padding is already dirty, we can safely ignore it, as >>> 1. It's dirty, it will be copied by mirror_iteration anyway >>> 2. It's dirty, so skipping it now we don't increase dirtiness of the >>> bitmap and therefore don't damage "synchronicity" of the >>> write-blocking mirror. >> >> But that’s not what active mirror is for. The point of active mirror is >> that it must converge because every guest write will contribute towards >> that goal. >> >> If you skip active mirroring for unaligned guest writes, they will not >> contribute towards converging, but in fact lead to the opposite. >> > > The will not contribute only if region is already dirty. Actually, after > first iteration of mirroring (copying the whole disk), all following > writes > will contribute, so the whole process must converge. It is a bit similar > with > running one mirror loop in normal mode, and then enable write-blocking. > In other words, we don't need "all guest writes contribute" to converge, "all guest writes don't create new dirty bits" is enough, as we have parallel mirror iteration which contiguously handles dirty bits. >>> >>> Hm, in a sense. But it does mean that guest writes will not contribute >>> to convergence. >>> >>> And that’s against the current definition of write-blocking, which does >>> state that “when data is written to the source, write it (synchronously) >>> to the target as well”. >>> >> >> Hmm, understand. But IMHO our proposed behavior is better in general. >> Do you think it's a problem to change spec now? >> If yes, I'll resend with an option > > Well, the thing is that I’d find it weird if write-blocking wasn’t > blocking in all cases. And in my opinion, it makes more sense for > active mirror if all writes actively contributed to convergence. > Why? What is the benefit in it? What is "all writes actively contributed to convergence" for user? I think for user there may be the following criteria: 1. guaranteed converge, with any guest write load. Both current and my proposed variants are OK. 2. Less impact on guest. Obviously my proposed variant is better 3. Total time of mirroring Seems, current may be a bit better, but I don't think that unaligned tails gives significant impact. === So, assume I want [1]+[2]. And possibly 2.2: Even less impact on guest: ignore not only unaligned tails if they are already dirty, but full synchronous mirror operation if area is already dirty. How should I call this? Should it be separate mode, or option for write-blocking? -- Best regards, Vladimir
Re: [PATCH v5 1/5] iotests: remove 'linux' from default supported platforms
On Fri, 4 Oct 2019 at 13:45, Max Reitz wrote: > > In the end, I don't care what code these patches touched. I do an > > innocent git pull, and when I finally see that it's master that breaks > > iotests and not my patches on top of it, I'm annoyed. > > Hm. Part of my point was that this still happens all the time. > > Which is why I’d prefer more tests to run in CI than a handful of not > very useful ones in make check. > > (Of course, running it in a CI won’t prevent iotest failures on your > machine, but in practice neither does the current model.) If you want the tree to be defended against problems, then you need to run tests in 'make check'. Nothing else gets consistently run and has failures spotted either before code goes into the tree or quickly afterwards. 'make check' does have the restriction that we don't want the tests to take too long to run, but in general the block layer should be running some reasonable subset of tests in the project's standard "run the tests please" command. (I have no opinion on exactly what that subset ought to be, beyond that it would be good if that subset doesn't have known intermittent failures in it.) > I still think that I personally would prefer the iotests to not run as > part of make check, but just as CI. 'make check' *is* our CI gate, to a first approximation. Most of the various travis and other setups are simply a front-end for "build QEMU in various configurations and on various hosts and then run 'make check'". The travis setup at the moment is a bit odd, because it runs tests but it's not a gate on our merging changes. Ideally I would like to fix this so that rather than me personally running "make check" via a bunch of scripts we have one CI setup that we trust (gitlab seems the current favoured contender) and that gates changes flowing into master rather than me doing it manually. We might then turn travis off if it's not providing anything for us that the gitlab setup doesn't. thanks -- PMM
Re: [PATCH v2] s390x/tcg: MVCL: Exit to main loop if requested
David Hildenbrand writes: > On 04.10.19 14:11, Peter Maydell wrote: >> On Fri, 4 Oct 2019 at 09:02, David Hildenbrand wrote: >>> So shall we leave this patch as-is (adding a summary of what you >>> explained to the description) or shall we somehow factor out the >>> TCG-internal-thingy check? >> >> Nothing else in target code touches the icount data structures, >> so if this s390 insn needs to make this check I think it ought >> to do it by calling a function implemented by the tcg code; >> that can then have a good name that describes what it's doing >> and a doc comment explaining the reason we need to have it. >> >> thanks >> -- PMM >> > > I can offer something like this: > > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index 49db07ba0b..d370ac0134 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -72,6 +72,26 @@ void QEMU_NORETURN cpu_loop_exit(CPUState *cpu); > void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc); > void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc); > > +/** > + * cpu_cond_loop_exit_restore: > + * @cpu: the vCPU state to be restored > + * @pc: the host PC > + * > + * Trigger a cpu_loop_exit_restore() in case somebody asked for a return > + * to the main loop (e.g. cpu_exit() or cpu_interrupt()). > + * > + * This is helpful for architectures that support interruptible > + * instructions. After writing back all state to registers/memory, this > + * call can be used to conditionally return back to the main loop or to > + * continue executing the interruptible instruction. > + */ > +static inline void cpu_cond_loop_exit_restore(CPUState *cpu, uintptr_t pc) > +{ > +if (unlikely((int32_t)atomic_read(_neg(cs)->icount_decr.u32) < 0)) { > +cpu_loop_exit_restore(cs, ra); > +} > +} > + cpu_loop_exit_restore_cond_irq cpu_loop_exit_restore_ifirq cpu_loop_exit_restore_conditional cpu_loop_exit_restore_maybe meh, naming stuff is hard: Acked-by: Alex Bennée > #if !defined(CONFIG_USER_ONLY) > void cpu_reloading_memory_map(void); > /** > > > Or, as alternative, something like "cpu_shall_exit()" which only > wraps the single check. -- Alex Bennée
Re: [PATCH] netmap: support git-submodule build otption
On Fri, 4 Oct 2019 at 14:03, Giuseppe Lettieri wrote: > > From: Giuseppe Lettieri > > With this patch, netmap support can be enabled with > the following options to the configure script: > > --enable-netmap[=system] > > Use the host system netmap installation. > Fail if not found. > > --enable-netmap=git > > clone the official netmap repository on > github (mostly useful for CI) > > Signed-off-by: Giuseppe Lettieri > --- > .gitmodules | 3 +++ > configure | 64 + > 2 files changed, 58 insertions(+), 9 deletions(-) > > diff --git a/.gitmodules b/.gitmodules > index c5c474169d..bf75dbc5e3 100644 > --- a/.gitmodules > +++ b/.gitmodules > @@ -58,3 +58,6 @@ > [submodule "roms/opensbi"] > path = roms/opensbi > url = https://git.qemu.org/git/opensbi.git > +[submodule "netmap"] > + path = netmap > + url = https://github.com/luigirizzo/netmap.git Hi; this patch seems to be missing the rationale for why we want to do this. New submodules: * should always be on git.qemu.org (we need to mirror them in case the original upstream vanishes) * need a strong justification for why they're required (ie why we can't just use whatever the system-provided version of the library is, or fall back to not providing the feature if the library isn't present) Basically new submodules are a pain so we seek to minimize the use of them. thanks -- PMM
[PATCH v3] s390x/tcg: MVCL: Exit to main loop if requested
MVCL is interruptible and we should check for interrupts and process them after writing back the variables to the registers. Let's check for any exit requests and exit to the main loop. Introduce a new helper function for that: cpu_cond_loop_exit_restore(). When booting Fedora 30, I can see a handful of these exits and it seems to work reliable. Also, Richard explained why this works correctly even when MVCL is called via EXECUTE: (1) TB with EXECUTE runs, at address Ae - env->psw_addr stored with Ae. - helper_ex() runs, memory address Am computed from D2a(X2a,B2a) or from psw.addr+RI2. - env->ex_value stored with memory value modified by R1a (2) TB of executee runs, - env->ex_value stored with 0. - helper_mvcl() runs, using and updating R1b, R1b+1, R2b, R2b+1. (3a) helper_mvcl() completes, - TB of executee continues, psw.addr += ilen. - Next instruction is the one following EXECUTE. (3b) helper_mvcl() exits to main loop, - cpu_loop_exit_restore() unwinds psw.addr = Ae. - Next instruction is the EXECUTE itself... - goto 1. As the PoP mentiones that an interruptible instruction called via EXECUTE should avoid modifying storage/registers that are used by EXECUTE itself, it is fine to retrigger EXECUTE. Cc: Alex Bennée Cc: Peter Maydell Cc: Paolo Bonzini Suggested-by: Richard Henderson Signed-off-by: David Hildenbrand --- v2 -> v3: - Add TCG helper function - Add details about EXECUTE to description - Return to main loop only if there is work left to do v1 -> v2: - Check only if icount_decr.u32 < 0 - Drop should_interrupt_instruction() and perform the check inline - Rephrase comment, subject, and description --- include/exec/exec-all.h | 20 target/s390x/mem_helper.c | 11 ++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 49db07ba0b..d6beeddecf 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -72,6 +72,26 @@ void QEMU_NORETURN cpu_loop_exit(CPUState *cpu); void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc); void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc); +/** + * cpu_cond_loop_exit_restore: + * @cpu: the vCPU state to be restored + * @pc: the host PC + * + * Trigger a cpu_loop_exit_restore() in case somebody asked for a return + * to the main loop (e.g., cpu_exit() or cpu_interrupt()). + * + * This is helpful for architectures that support interruptible + * instructions. After writing back all state to registers/memory, this + * call can be used to conditionally return back to the main loop or to + * continue executing the interruptible instruction. + */ +static inline void cpu_cond_loop_exit_restore(CPUState *cpu, uintptr_t pc) +{ +if (unlikely((int32_t)atomic_read(_neg(cpu)->icount_decr.u32) < 0)) { +cpu_loop_exit_restore(cpu, pc); +} +} + #if !defined(CONFIG_USER_ONLY) void cpu_reloading_memory_map(void); /** diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c index 08c5cc6a99..c6ccb73d51 100644 --- a/target/s390x/mem_helper.c +++ b/target/s390x/mem_helper.c @@ -1015,6 +1015,7 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2) uint64_t srclen = env->regs[r2 + 1] & 0xff; uint64_t src = get_address(env, r2); uint8_t pad = env->regs[r2 + 1] >> 24; +CPUState *cs = env_cpu(env); S390Access srca, desta; uint32_t cc, cur_len; @@ -1065,7 +1066,15 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2) env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen); set_address_zero(env, r1, dest); -/* TODO: Deliver interrupts. */ +/* + * MVCL is interruptible. Return to the main loop if requested after + * writing back all state to registers. If no interrupt will get + * injected, we'll end up back in this handler and continue processing + * the remaining parts. + */ +if (destlen) { +cpu_cond_loop_exit_restore(cs, ra); +} } return cc; } -- 2.21.0
Re: bitmap migration bug with -drive while block mirror runs
On 10/4/19 4:24 AM, Vladimir Sementsov-Ogievskiy wrote: The way I see it, we know an auto-generated node name will never be correct, but an explicitly specified one represents an explicit user configuration. It's wrong to use generated names for migration details, but it's never wrong to use explicit configuration. So I believe they are /already/ distinct in nature. We even already have code that can tell them apart. Is it restricted to create user node-names formatted like automated ones? Yes. Explicit node names cannot begin with '#', while all generated node names do. There are four cases here: - The bitmap is loaded to a root node with an explicit name - The bitmap is loaded to a non-root node with an explicit name The blockdev case with persistence. The name represents explicit user configuration and can be relied upon in the destination. - The bitmap is loaded to a root node with an implicit name, with a named BB The -drive case. The named BB represents the explicit user configuration and can be relied upon. - The bitmap is loaded to a non-root node with an implicit name. So, do you suggest to save information of haw bitmap was loaded or created in BdrvDirtyBitmap structure, to distinguish, how to identify it, by blk name or by node-name? And how this information would be updated on bitmap merge? And what about creating bitmaps? So if one bitmap created in node N by blk name B, and another bitmap created in same node N by node-name N, will we migrated these bitmaps in different ways? In the -drive case (historical libvirt), the block device is named, and node names are generated (it may be possible to use -drive and still create explicit node names, but libvirt will never do that). You can create a bitmap using either ('drive-name','bitmap-name'), or ('generated-node-name','bitmap-name'), but for the purposes of migration, only the 'drive-name' variant is migrateable. In the -blockdev case (upcoming libvirt), the block device is anonymous, and all node names are given by libvirt. Thus, you can only create a bitmap using ('node-name','bitmap-name'), but it is also obvious that migration will use the 'node-name' variant. If it's a problem for libvirt to keep same node-names, why should we insist? Is it really a problem? If libvirt requests migration of bitmaps, those bitmaps need to go somewhere. Even if it constructs a different graph for valid reasons, it should still understand which qcow2 nodes correlate to which other qcow2 nodes and name them accordingly. I don't see why this is actually a terrible constraint. Even in our mapping proposal we're still using node-names. The obvious case I see is that if we have a source: Backing.qcow2 (contains bitmap1) <- Active.qcow2 (contains bitmap2) and we want to migrate AND flatten at the same time, but still preserve the bitmaps as a record of changes between the points in time, then libvirt needs a way to specify migration to: Flattened.qcow2 (contains bitmap1 and bitmap2) No matter which node name libvirt assigns to Flattened.qcow2, at least one of the two bitmaps on the source will be migrated to a different node name on the destination, while still giving the net result of a bitmap logically associated with the drive (and not any particular node). Ok, I'm not completely against node-name matching, keeping in mind that it is current default behavior anyway. And I see Peter not completely against too. But I'd prefer to select default name from current moment, not involving information of "how bitmap was created or loaded", as it may lead to migrating bitmaps from one node in different ways which seems inconsistent. As long as a bitmap never has both names non-generated, we should be fine (it either has an explicit drive name and generated node name, or it has no drive name and an explicit node name). Current default is blk name. And node-name if blk name is not available. So I think the only thing to fix right now is skipping filters. We possibly may additionally restrict migrating bitmaps without blk name and with generated node-name. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3226 Virtualization: qemu.org | libvirt.org
Re: [PATCH 3/4] block/mirror: support unaligned write in active mirror
On 03.10.19 11:34, Vladimir Sementsov-Ogievskiy wrote: > 02.10.2019 18:52, Max Reitz wrote: >> On 02.10.19 17:06, Vladimir Sementsov-Ogievskiy wrote: >>> 02.10.2019 18:03, Vladimir Sementsov-Ogievskiy wrote: 02.10.2019 17:57, Max Reitz wrote: > On 12.09.19 17:13, Vladimir Sementsov-Ogievskiy wrote: >> Prior 9adc1cb49af8d do_sync_target_write had a bug: it reset aligned-up >> region in the dirty bitmap, which means that we may not copy some bytes >> and assume them copied, which actually leads to producing corrupted >> target. >> >> So 9adc1cb49af8d forced dirty bitmap granularity to be >> request_alignment for mirror-top filter, so we are not working with >> unaligned requests. However forcing large alignment obviously decreases >> performance of unaligned requests. >> >> This commit provides another solution for the problem: if unaligned >> padding is already dirty, we can safely ignore it, as >> 1. It's dirty, it will be copied by mirror_iteration anyway >> 2. It's dirty, so skipping it now we don't increase dirtiness of the >> bitmap and therefore don't damage "synchronicity" of the >> write-blocking mirror. > > But that’s not what active mirror is for. The point of active mirror is > that it must converge because every guest write will contribute towards > that goal. > > If you skip active mirroring for unaligned guest writes, they will not > contribute towards converging, but in fact lead to the opposite. > The will not contribute only if region is already dirty. Actually, after first iteration of mirroring (copying the whole disk), all following writes will contribute, so the whole process must converge. It is a bit similar with running one mirror loop in normal mode, and then enable write-blocking. >>> >>> >>> In other words, we don't need "all guest writes contribute" to converge, >>> "all guest writes don't create new dirty bits" is enough, as we have >>> parallel >>> mirror iteration which contiguously handles dirty bits. >> >> Hm, in a sense. But it does mean that guest writes will not contribute >> to convergence. >> >> And that’s against the current definition of write-blocking, which does >> state that “when data is written to the source, write it (synchronously) >> to the target as well”. >> > > Hmm, understand. But IMHO our proposed behavior is better in general. > Do you think it's a problem to change spec now? > If yes, I'll resend with an option Well, the thing is that I’d find it weird if write-blocking wasn’t blocking in all cases. And in my opinion, it makes more sense for active mirror if all writes actively contributed to convergence. Max signature.asc Description: OpenPGP digital signature
Re: [PATCH] audio: add -audiodev pa,in|out.latency= to documentation
Hi On Fri, Oct 4, 2019 at 4:57 PM Stefan Hajnoczi wrote: > > The "latency" parameter wasn't covered by the documentation. > > Signed-off-by: Stefan Hajnoczi > --- > Hi Gerd, > You asked me to resend this patch because there was a conflict. I have > rebased it onto qemu.git/master (4f59102571fc). > > qemu-options.hx | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/qemu-options.hx b/qemu-options.hx > index 2a04ca6ac5..5c27c57273 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -470,6 +470,7 @@ DEF("audiodev", HAS_ARG, QEMU_OPTION_audiodev, > "-audiodev pa,id=id[,prop[=value][,...]]\n" > "server= PulseAudio server address\n" > "in|out.name= source/sink device name\n" > +"in|out.latency= desired latency in microseconds\n" > #endif > #ifdef CONFIG_AUDIO_SDL > "-audiodev sdl,id=id[,prop[=value][,...]]\n" > @@ -630,6 +631,10 @@ Sets the PulseAudio @var{server} to connect to. > @item in|out.name=@var{sink} > Use the specified source/sink for recording/playback. > > +@item in|out.latency=@var{usecs} > +Desired latency in microseconds. The PulseAudio server will try to honor > this > +value but actual latencies may be lower or higher. default is 15ms according to f6142777659f2e7ad143f2850f1f036f899f475f, might be worth to document. otherwise, looks good: Reviewed-by: Marc-André Lureau > + > @end table > > @item -audiodev sdl,id=@var{id}[,@var{prop}[=@var{value}][,...]] > -- > 2.21.0 > > -- Marc-André Lureau
Re: [RFC 3/4] ptimer: Provide new transaction-based API
On Fri, 4 Oct 2019 at 12:48, Peter Maydell wrote: > > Provide the new transaction-based API. > +void ptimer_transaction_begin(ptimer_state *s) > +{ > +assert(!s->in_transaction && s->callback); > +s->in_transaction = true; As I was working on converting the ptimer tests to the new API I noticed a silly bug here -- we should set s->need_reload = false; in ptimer_transaction_begin(). Otherwise one transaction has decided to do a reload, we'll do a reload in commit every time, including when that's bogus (eg when the timer is disabled)... thanks -- PMM
[PATCH] audio: add -audiodev pa,in|out.latency= to documentation
The "latency" parameter wasn't covered by the documentation. Signed-off-by: Stefan Hajnoczi --- Hi Gerd, You asked me to resend this patch because there was a conflict. I have rebased it onto qemu.git/master (4f59102571fc). qemu-options.hx | 5 + 1 file changed, 5 insertions(+) diff --git a/qemu-options.hx b/qemu-options.hx index 2a04ca6ac5..5c27c57273 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -470,6 +470,7 @@ DEF("audiodev", HAS_ARG, QEMU_OPTION_audiodev, "-audiodev pa,id=id[,prop[=value][,...]]\n" "server= PulseAudio server address\n" "in|out.name= source/sink device name\n" +"in|out.latency= desired latency in microseconds\n" #endif #ifdef CONFIG_AUDIO_SDL "-audiodev sdl,id=id[,prop[=value][,...]]\n" @@ -630,6 +631,10 @@ Sets the PulseAudio @var{server} to connect to. @item in|out.name=@var{sink} Use the specified source/sink for recording/playback. +@item in|out.latency=@var{usecs} +Desired latency in microseconds. The PulseAudio server will try to honor this +value but actual latencies may be lower or higher. + @end table @item -audiodev sdl,id=@var{id}[,@var{prop}[=@var{value}][,...]] -- 2.21.0
Re: [PATCH 02/67] iotests.py: Add @skip_for_imgopts()
On 03.10.19 17:19, Vladimir Sementsov-Ogievskiy wrote: > 01.10.2019 22:46, Max Reitz wrote: >> Signed-off-by: Max Reitz >> --- >> tests/qemu-iotests/iotests.py | 13 + >> 1 file changed, 13 insertions(+) >> >> diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py >> index 7030900807..cdcb62c4ac 100644 >> --- a/tests/qemu-iotests/iotests.py >> +++ b/tests/qemu-iotests/iotests.py >> @@ -950,6 +950,19 @@ def skip_if_unsupported(required_formats=[], >> read_only=False): >> return func_wrapper >> return skip_test_decorator >> >> +def skip_for_imgopts(unsupported_opts=[]): >> +'''Skip Test Decorator >> + Skips the test if imgopts contains any of the given options''' >> +def skip_test_decorator(func): >> +def func_wrapper(test_case: QMPTestCase, *args, **kwargs): > > how about > > unsup = set(imgopts) & set(unsupported_opts) > if unsup: > test_case.case_skip('... Options {} are ...', format(..., ', > '.join(map(str, unsup))) Sure. Max signature.asc Description: OpenPGP digital signature
[Virtio-fs] [PATCH] virtiofsd: Add pread64() to seccomp list for posix_fallocate()
I test virtiofs with NFS 4.0 as backend and notice that fallocate causes system hang (kernel: 5.4-rc1, qemu: virtio-fs-dev branch): $ mount -t virtiofs myfs /mnt $ dd if=/dev/urandom bs=1000 seek=1 count=1 of=/mnt/file $ fallocate -l 2000 /mnt/file # system hang This is because: 1. virtiofs supports fallocate syscall while NFS 4.0 does not. 2. virtiofsd uses posix_fallocate() and it fallbacks to pread64()/ pwrite64() sequence to reserve blocks if fallocate syscall is not available. 3. pread64() syscall is prohibited by seccomp and virtiofsd thread is killed by SIGSYS. So, just add pread64() to seccomp white list to fix this problem. Signed-off-by: Misono Tomohiro --- contrib/virtiofsd/seccomp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/virtiofsd/seccomp.c b/contrib/virtiofsd/seccomp.c index 93b679271d..88b61bca42 100644 --- a/contrib/virtiofsd/seccomp.c +++ b/contrib/virtiofsd/seccomp.c @@ -61,6 +61,7 @@ static const int syscall_whitelist[] = { SCMP_SYS(ppoll), SCMP_SYS(prctl), /* TODO restrict to just PR_SET_NAME? */ SCMP_SYS(preadv), + SCMP_SYS(pread64), SCMP_SYS(pwritev), SCMP_SYS(pwrite64), SCMP_SYS(read), -- 2.21.0
Re: libvirt? qemu change that mmaps ELF files breaks libvirt svirt handling for os.kernel
On 04.10.19 14:36, Daniel P. Berrangé wrote: > On Fri, Oct 04, 2019 at 02:18:49PM +0200, Christian Borntraeger wrote: >> >> >> On 04.10.19 14:13, Paolo Bonzini wrote: >>> On 04/10/19 14:03, Christian Borntraeger wrote: Stefano, Paolo, I have an interesting fail in QEMU 2019-10-04T12:00:32.675188Z qemu-system-s390x: GLib: g_mapped_file_unref: assertion 'file != NULL' failed that bisected to commit 816b9fe450220e19acb91a0ce4a8ade7000648d1 (refs/bisect/bad) elf-ops.h: Map into memory the ELF to load strace tells that I can read the ELF file, but not mmap strace: 214365 openat(AT_FDCWD, "/var/lib/libvirt/images/test_cpu_timer.elf", O_RDONLY) = 36 214365 read(46, "\177ELF\2\2\1\0\0\0\0\0\0\0\0\0", 16) = 16 214365 lseek(46, 0, SEEK_SET) = 0 [...] 214365 fstat(46, {st_mode=S_IFREG|0755, st_size=168176, ...}) = 0 214365 mmap(NULL, 168176, PROT_READ|PROT_WRITE, MAP_PRIVATE, 46, 0) = -1 EACCES (Permission denied) So reading from /var/lib/libvirt/images/test_cpu_timer.elf does work, mmaping does not. setenforce 0 makes the problem go away. This might be more of an issue in libvirt, setting the svirt context too restrictive, but I am not too deep into the svirt part of libvirt. Reverting the qemu commit makes the problem go away. >>> >>> Yes, the policy is too restrictive in my opinion. >>> >>> Can you include the output of "audit2allow" and/or "audit2allow -R"? >>> >>> Thanks, >>> >>> Paolo >>> >> >> require { >> type unconfined_t; >> type virt_content_t; >> type svirt_t; >> type systemd_tmpfiles_t; >> type user_home_t; >> type NetworkManager_t; >> class file { entrypoint execute ioctl lock map open read write }; >> class bpf prog_run; >> } >> >> #= svirt_t == >> allow svirt_t user_home_t:file { entrypoint execute ioctl lock open read >> write }; >> >> # This avc can be allowed using the boolean 'domain_can_mmap_files' > > This is an unrelated boolean and we don't want to use that so ignore > this suggestion ! > >> allow svirt_t virt_content_t:file map; > > This matches your strace. virt_content_t is the label we use on > files that are exposed to QEMU read-only. > > The svirt policy only allows mmap on files that re exposed read-write > currently. > > I believe we can safely allow this mmap on read-only files too > because the read-only restriction is enforced at time of open, > and any writes to the mapped file will result in a private > copy on write. > > Please file a bz entry against the selinux-policy component for > whatever distro you're using, and/or Fedora rawhide, and CC me > on it too. Done. This was on Fedora 30. https://bugzilla.redhat.com/show_bug.cgi?id=1758525 Now sure about others like RHEL. RHV.
Re: [PATCH v5 1/5] iotests: remove 'linux' from default supported platforms
On 04.10.19 12:19, Kevin Wolf wrote: > Am 02.10.2019 um 19:47 hat Max Reitz geschrieben: >> On 02.10.19 18:44, Kevin Wolf wrote: >>> Am 02.10.2019 um 13:57 hat Max Reitz geschrieben: It usually worked fine for me because it’s rather rare that non-block patches broke the iotests. >>> >>> I disagree. It happened all the time that someone else broke the iotests >>> in master and we needed to fix it up. >> >> OK. >> >> (In my experience, it’s still mostly block patches, only that they tend >> to go through non-block trees.) > > Fair enough, it's usually code that touches block code. I assumed "block > patches" to mean patches that go through one of the block trees and for > which iotests are run before sending a pull request. > > In the end, I don't care what code these patches touched. I do an > innocent git pull, and when I finally see that it's master that breaks > iotests and not my patches on top of it, I'm annoyed. Hm. Part of my point was that this still happens all the time. Which is why I’d prefer more tests to run in CI than a handful of not very useful ones in make check. (Of course, running it in a CI won’t prevent iotest failures on your machine, but in practice neither does the current model.) Maybe my main problem is that I feel like now I have to deal with all of the fallout, even though adding the iotests to make check wasn’t my idea and neither me nor Kevin ever consented. (I don’t know whether Kevin consented implicitly, but I don’t see his name in bdd95e47844f2d8b.) You can’t just give me more responsibility without my consent and then call me egoistic for not liking it. >>> >>> I think I may have implicitly consented by helping Alex with the changes >>> to 'check' that made its output fit better in 'make check'. >>> >>> Basically, my stance is that I share your dislike for the effects on me >>> personally (also, I can't run 'make check' any more before a pull >>> request without testing half of the iotests twice because I still need a >>> full iotests run), -x auto for the qcow2 pass, by the way. >>> but I also think that having iotests in 'make check' >>> is really the right thing for the project despite the inconvenience it >>> means for me. >> >> Then I expect you to NACK Thomas’s patch, and I take it to mean that I >> can rely on you to handle basically all make check iotests failures that >> are not caused by my own pull requests. >> >> Works for me. > > Ah, we can also play this game the other way round. > > So if you merge that revert, when iotests break in master, I take it I > can rely on you to fix it up before it impacts my working branch? This is not a game, I’m talking purely from my standpoint: (I talked wrongly, but more on that below) Whenever make check fails, it’s urgent. Without iotests running in make check, we had some time to sort the situation out. That’s no longer the case. That means we need to take care of everything immediately. And I purely want help with that. I just did not have the impression that such help was there in the past months. (This impression may or may not have a correlation to reality.) I thought that was just because nobody really cared about the iotests in make check. Now you say you do, so that’s why I said you should NACK the revert and then I know that I can count on your help. Now where I was wrong: I didn’t say “your help” but “you handle it”. That was wrong. I’m sorry. That would mean I reap all the benefits and you have to pay the price. What I’m not so sure of is why you didn’t just say that’s unfair and instead reply with an impossible suggestion. That makes it a bit difficult for me to find out exactly what you plan to do. I take your point as “Exactly, this suggestion would be impossible. With the tests in make check, the worst that happens is CI breakage and rejected pull requests, and dealing with those is very much possible.”[1] As I’ve said, my POV is different, because I find CI breakage, make check breakage, and rejected pull requests to be worse because those are failures on other people’s machines. That puts me personally under much more stress than failures on my own machine. But it seems that you feel differently. [1] Or maybe you wanted to say that you find a rare intermittent failure that slips into make check less worse than 100 % failure of some test for everyone who runs the iotests. I don’t know. I know that the 100 % failures are annoying but generally easily fixed; whereas the intermittent ones are the ones that stick and hard to fix. And those intermittent ones are unlikely to cause pull requests to be rejected. I know you’ll say that we just need to ensure we can add more tests, then. But for one thing, the most important tests are the ones that take the longest, like 041. And the other of course is that adding any more tests to make check just brings me more pain, so I won’t do it. >>> >>> That's something that
[Bug 1777777] Re: arm9 clock pending (SP804)
I sent out an initial RFC patchset which fixes this (it's just an RFC because it only converts this one device to the new ptimer API, and we should do a proper conversion of all devices); it seems to make the test case in this bug work correctly: https://patchew.org/QEMU/20191004114848.16831-1-peter.mayd...@linaro.org/ -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/177 Title: arm9 clock pending (SP804) Status in QEMU: Confirmed Bug description: Hello all, I'm using the versatilepb board and the timer Interrupt Mask Status register (offset 0x14 of the SP804) does not seem to be working properly on the latest qemu-2.12. I tried on the 2.5 (i believe this is the mainstream version that comes with Linux) and it works perfectly. What happens is that the pending bit does not seem to be set in some scenarios. In my case, I see the timer value decreasing to 0 and then being reset to the reload value and neither does the interrupt is triggered nor the pending bit is set. I believe this is a matter of timing since in the "long" run the system eventually catches up (after a few microseconds). Thank you To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/177/+subscriptions
Re: libvirt? qemu change that mmaps ELF files breaks libvirt svirt handling for os.kernel
On Fri, Oct 04, 2019 at 02:18:49PM +0200, Christian Borntraeger wrote: > > > On 04.10.19 14:13, Paolo Bonzini wrote: > > On 04/10/19 14:03, Christian Borntraeger wrote: > >> Stefano, Paolo, > >> > >> I have an interesting fail in QEMU > >> > >> 2019-10-04T12:00:32.675188Z qemu-system-s390x: GLib: g_mapped_file_unref: > >> assertion 'file != NULL' failed > >> that bisected to > >> commit 816b9fe450220e19acb91a0ce4a8ade7000648d1 (refs/bisect/bad) > >> elf-ops.h: Map into memory the ELF to load > >> > >> strace tells that I can read the ELF file, but not mmap > >> strace: > >> 214365 openat(AT_FDCWD, "/var/lib/libvirt/images/test_cpu_timer.elf", > >> O_RDONLY) = 36 > >> 214365 read(46, "\177ELF\2\2\1\0\0\0\0\0\0\0\0\0", 16) = 16 > >> 214365 lseek(46, 0, SEEK_SET) = 0 > >> [...] > >> 214365 fstat(46, {st_mode=S_IFREG|0755, st_size=168176, ...}) = 0 > >> 214365 mmap(NULL, 168176, PROT_READ|PROT_WRITE, MAP_PRIVATE, 46, 0) = -1 > >> EACCES (Permission denied) > >> > >> So reading from /var/lib/libvirt/images/test_cpu_timer.elf does work, > >> mmaping does not. > >> setenforce 0 makes the problem go away. > >> > >> This might be more of an issue in libvirt, setting the svirt context too > >> restrictive, but I am not too deep into the svirt part of libvirt. > >> Reverting the qemu commit makes the problem go away. > > > > Yes, the policy is too restrictive in my opinion. > > > > Can you include the output of "audit2allow" and/or "audit2allow -R"? > > > > Thanks, > > > > Paolo > > > > require { > type unconfined_t; > type virt_content_t; > type svirt_t; > type systemd_tmpfiles_t; > type user_home_t; > type NetworkManager_t; > class file { entrypoint execute ioctl lock map open read write }; > class bpf prog_run; > } > > #= svirt_t == > allow svirt_t user_home_t:file { entrypoint execute ioctl lock open read > write }; > > # This avc can be allowed using the boolean 'domain_can_mmap_files' This is an unrelated boolean and we don't want to use that so ignore this suggestion ! > allow svirt_t virt_content_t:file map; This matches your strace. virt_content_t is the label we use on files that are exposed to QEMU read-only. The svirt policy only allows mmap on files that re exposed read-write currently. I believe we can safely allow this mmap on read-only files too because the read-only restriction is enforced at time of open, and any writes to the mapped file will result in a private copy on write. Please file a bz entry against the selinux-policy component for whatever distro you're using, and/or Fedora rawhide, and CC me on it too. > corecmd_bin_entry_type(svirt_t) > userdom_manage_user_home_content_dirs(svirt_t) > userdom_map_user_home_files(svirt_t) > virt_rw_svirt_image(svirt_t) These look unrelated to the problem above. Regards, Daniel -- |: https://berrange.com -o-https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o-https://fstop138.berrange.com :| |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
Re: [PATCH v2] s390x/tcg: MVCL: Exit to main loop if requested
On 04.10.19 14:11, Peter Maydell wrote: > On Fri, 4 Oct 2019 at 09:02, David Hildenbrand wrote: >> So shall we leave this patch as-is (adding a summary of what you >> explained to the description) or shall we somehow factor out the >> TCG-internal-thingy check? > > Nothing else in target code touches the icount data structures, > so if this s390 insn needs to make this check I think it ought > to do it by calling a function implemented by the tcg code; > that can then have a good name that describes what it's doing > and a doc comment explaining the reason we need to have it. > > thanks > -- PMM > I can offer something like this: diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 49db07ba0b..d370ac0134 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -72,6 +72,26 @@ void QEMU_NORETURN cpu_loop_exit(CPUState *cpu); void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc); void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc); +/** + * cpu_cond_loop_exit_restore: + * @cpu: the vCPU state to be restored + * @pc: the host PC + * + * Trigger a cpu_loop_exit_restore() in case somebody asked for a return + * to the main loop (e.g. cpu_exit() or cpu_interrupt()). + * + * This is helpful for architectures that support interruptible + * instructions. After writing back all state to registers/memory, this + * call can be used to conditionally return back to the main loop or to + * continue executing the interruptible instruction. + */ +static inline void cpu_cond_loop_exit_restore(CPUState *cpu, uintptr_t pc) +{ +if (unlikely((int32_t)atomic_read(_neg(cs)->icount_decr.u32) < 0)) { +cpu_loop_exit_restore(cs, ra); +} +} + #if !defined(CONFIG_USER_ONLY) void cpu_reloading_memory_map(void); /** Or, as alternative, something like "cpu_shall_exit()" which only wraps the single check. -- Thanks, David / dhildenb
Re: [PATCH] target/arm/arch_dump: Add SVE notes
Patchew URL: https://patchew.org/QEMU/20191004094609.32714-1-drjo...@redhat.com/ Hi, This series seems to have some coding style problems. See output below for more information: Type: series Message-id: 20191004094609.32714-1-drjo...@redhat.com Subject: [PATCH] target/arm/arch_dump: Add SVE notes === TEST SCRIPT BEGIN === #!/bin/bash git rev-parse base > /dev/null || exit 0 git config --local diff.renamelimit 0 git config --local diff.renames True git config --local diff.algorithm histogram ./scripts/checkpatch.pl --mailback base.. === TEST SCRIPT END === Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384 From https://github.com/patchew-project/qemu * [new tag] patchew/20191004120313.5347-1-drjo...@redhat.com -> patchew/20191004120313.5347-1-drjo...@redhat.com Switched to a new branch 'test' d041497 target/arm/arch_dump: Add SVE notes === OUTPUT BEGIN === ERROR: code indent should never use tabs #21: FILE: include/elf.h:1653: +#define NT_ARM_SVE^I0x405^I^I/* ARM Scalable Vector Extension$ WARNING: Block comments use a leading /* on a separate line #21: FILE: include/elf.h:1653: +#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension ERROR: code indent should never use tabs #22: FILE: include/elf.h:1654: +^I^I^I^I^I registers */$ WARNING: Block comments use * on subsequent lines #22: FILE: include/elf.h:1654: +#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension + registers */ WARNING: Block comments use a trailing */ on a separate line #22: FILE: include/elf.h:1654: + registers */ total: 2 errors, 3 warnings, 181 lines checked Commit d0414974dab6 (target/arm/arch_dump: Add SVE notes) has style problems, please review. If any of these errors are false positives report them to the maintainer, see CHECKPATCH in MAINTAINERS. === OUTPUT END === Test command exited with code: 1 The full log is available at http://patchew.org/logs/20191004094609.32714-1-drjo...@redhat.com/testing.checkpatch/?type=message. --- Email generated automatically by Patchew [https://patchew.org/]. Please send your feedback to patchew-de...@redhat.com
Re: [PULL v2 00/22] Misc patches for 2010-10-02
On Thu, 3 Oct 2019 at 12:05, Paolo Bonzini wrote: > > The following changes since commit 7f21573c822805a8e6be379d9bcf3ad9effef3dc: > > Merge remote-tracking branch > 'remotes/huth-gitlab/tags/pull-request-2019-10-01' into staging (2019-10-01 > 13:13:38 +0100) > > are available in the git repository at: > > > git://github.com/bonzini/qemu.git tags/for-upstream > > for you to fetch changes up to 8cd5aa6899689e6f9f3e72bf43474a436148a341: > > tests/docker: only enable ubsan for test-clang (2019-10-03 12:58:05 +0200) > > > * Compilation fix for KVM (Alex) > * SMM fix (Dmitry) > * VFIO error reporting (Eric) > * win32 fixes and workarounds (Marc-André) > * qemu-pr-helper crash bugfix (Maxim) > * Memory leak fixes (myself) > * Record-replay deadlock (Pavel) > * i386 CPUID bits (Sebastian) > * kconfig tweak (Thomas) > * Valgrind fix (Thomas) > * distclean improvement (Thomas) > * Autoconverge test (Yury) > Thomas Huth (2): > Makefile: Remove generated files when doing 'distclean' This change breaks the 'build from clean' part of my merge build tests -- I've followed up to the patch email with the description of why. thanks -- PMM
libvirt? qemu change that mmaps ELF files breaks libvirt svirt handling for os.kernel
Stefano, Paolo, I have an interesting fail in QEMU 2019-10-04T12:00:32.675188Z qemu-system-s390x: GLib: g_mapped_file_unref: assertion 'file != NULL' failed that bisected to commit 816b9fe450220e19acb91a0ce4a8ade7000648d1 (refs/bisect/bad) elf-ops.h: Map into memory the ELF to load strace tells that I can read the ELF file, but not mmap strace: 214365 openat(AT_FDCWD, "/var/lib/libvirt/images/test_cpu_timer.elf", O_RDONLY) = 36 214365 read(46, "\177ELF\2\2\1\0\0\0\0\0\0\0\0\0", 16) = 16 214365 lseek(46, 0, SEEK_SET) = 0 [...] 214365 fstat(46, {st_mode=S_IFREG|0755, st_size=168176, ...}) = 0 214365 mmap(NULL, 168176, PROT_READ|PROT_WRITE, MAP_PRIVATE, 46, 0) = -1 EACCES (Permission denied) So reading from /var/lib/libvirt/images/test_cpu_timer.elf does work, mmaping does not. setenforce 0 makes the problem go away. This might be more of an issue in libvirt, setting the svirt context too restrictive, but I am not too deep into the svirt part of libvirt. Reverting the qemu commit makes the problem go away.
Re: [RFC 0/4] transaction-based ptimer API
On 04/10/19 14:00, Peter Maydell wrote: > No, because stop/run causes the ptimer to "lose time" > (we stop the underlying timer and restart it). It's > very common for a device to want to change the ptimer > properties without a stop/restart -- "set the ptimer > count value when the guest writes to the device's counter > register" is the common one. Of the three begin/commit > blocks in the arm_timer.c conversion, only one of those > involves calls to stop/run, and even there we only > call stop/run if the write to the control register > modified the enable bit. Ok, thanks. Paolo
[PATCH v2] target/arm/arch_dump: Add SVE notes
When dumping a guest with dump-guest-memory also dump the SVE registers if they are in use. Signed-off-by: Andrew Jones --- include/elf.h | 2 + target/arm/arch_dump.c | 135 - 2 files changed, 135 insertions(+), 2 deletions(-) diff --git a/include/elf.h b/include/elf.h index 3501e0c8d03a..a7c357af74ca 100644 --- a/include/elf.h +++ b/include/elf.h @@ -1650,6 +1650,8 @@ typedef struct elf64_shdr { #define NT_ARM_HW_BREAK 0x402 /* ARM hardware breakpoint registers */ #define NT_ARM_HW_WATCH 0x403 /* ARM hardware watchpoint registers */ #define NT_ARM_SYSTEM_CALL 0x404 /* ARM system call number */ +#define NT_ARM_SVE 0x405 /* ARM Scalable Vector Extension + registers */ /* * Physical entry point into the kernel. diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c index 26a2c098687c..b02e398430b9 100644 --- a/target/arm/arch_dump.c +++ b/target/arm/arch_dump.c @@ -62,12 +62,23 @@ struct aarch64_user_vfp_state { QEMU_BUILD_BUG_ON(sizeof(struct aarch64_user_vfp_state) != 528); +/* struct user_sve_header from arch/arm64/include/uapi/asm/ptrace.h */ +struct aarch64_user_sve_header { +uint32_t size; +uint32_t max_size; +uint16_t vl; +uint16_t max_vl; +uint16_t flags; +uint16_t reserved; +} QEMU_PACKED; + struct aarch64_note { Elf64_Nhdr hdr; char name[8]; /* align_up(sizeof("CORE"), 4) */ union { struct aarch64_elf_prstatus prstatus; struct aarch64_user_vfp_state vfp; +struct aarch64_user_sve_header sve; }; } QEMU_PACKED; @@ -76,6 +87,8 @@ struct aarch64_note { (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_elf_prstatus)) #define AARCH64_PRFPREG_NOTE_SIZE \ (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_user_vfp_state)) +#define AARCH64_SVE_NOTE_SIZE(env) \ +(AARCH64_NOTE_HEADER_SIZE + sve_size(env)) static void aarch64_note_init(struct aarch64_note *note, DumpState *s, const char *name, Elf64_Word namesz, @@ -128,11 +141,113 @@ static int aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f, return 0; } +#ifdef TARGET_AARCH64 +static off_t sve_zreg_offset(uint32_t vq, int n) +{ +off_t off = sizeof(struct aarch64_user_sve_header); +return ROUND_UP(off, 16) + vq * 16 * n; +} +static off_t sve_preg_offset(uint32_t vq, int n) +{ +return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n; +} +static off_t sve_fpsr_offset(uint32_t vq) +{ +off_t off = sve_preg_offset(vq, 17) + offsetof(struct aarch64_note, sve); +return ROUND_UP(off, 16) - offsetof(struct aarch64_note, sve); +} +static off_t sve_fpcr_offset(uint32_t vq) +{ +return sve_fpsr_offset(vq) + sizeof(uint32_t); +} +static uint32_t sve_current_vq(CPUARMState *env) +{ +return sve_zcr_len_for_el(env, arm_current_el(env)) + 1; +} +static size_t sve_size_vq(uint32_t vq) +{ +off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t) + +offsetof(struct aarch64_note, sve); +return ROUND_UP(off, 16) - offsetof(struct aarch64_note, sve); +} +static size_t sve_size(CPUARMState *env) +{ +return sve_size_vq(sve_current_vq(env)); +} + +static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, + CPUARMState *env, int cpuid, + DumpState *s) +{ +struct aarch64_note *note; +ARMCPU *cpu = env_archcpu(env); +uint32_t vq = sve_current_vq(env); +uint32_t fpr; +uint8_t *buf; +int ret, i; + +note = g_malloc0(AARCH64_SVE_NOTE_SIZE(env)); +buf = (uint8_t *)>sve; + +aarch64_note_init(note, s, "LINUX", 6, NT_ARM_SVE, sve_size_vq(vq)); + +note->sve.size = cpu_to_dump32(s, sve_size_vq(vq)); +note->sve.max_size = cpu_to_dump32(s, sve_size_vq(cpu->sve_max_vq)); +note->sve.vl = cpu_to_dump16(s, vq * 16); +note->sve.max_vl = cpu_to_dump16(s, cpu->sve_max_vq * 16); +note->sve.flags = cpu_to_dump16(s, 1); + +for (i = 0; i < 32; ++i) { +#ifdef HOST_WORDS_BIGENDIAN +uint64_t d[vq * 2]; +int j; + +for (j = 0; j < vq * 2; ++j) { +d[j] = bswap64(env->vfp.zregs[i].d[j]); +} +#else +uint64_t *d = >vfp.zregs[i].d[0]; +#endif +memcpy([sve_zreg_offset(vq, i)], [0], vq * 16); +} + +for (i = 0; i < 17; ++i) { +#ifdef HOST_WORDS_BIGENDIAN +uint64_t d[DIV_ROUND_UP(vq * 2, 8)]; +int j; + +for (j = 0; j < DIV_ROUND_UP(vq * 2, 8); ++j) { +d[j] = bswap64(env->vfp.pregs[i].p[j]); +} +#else +uint64_t *d = >vfp.pregs[i].p[0]; +#endif +memcpy([sve_preg_offset(vq, i)], [0], vq * 16 / 8); +} + +fpr = cpu_to_dump32(s, vfp_get_fpsr(env)); +memcpy([sve_fpsr_offset(vq)], , sizeof(uint32_t)); + +fpr = cpu_to_dump32(s, vfp_get_fpcr(env)); +memcpy([sve_fpcr_offset(vq)], , sizeof(uint32_t)); + +ret