Re: [PATCH v2 3/3] tests/acpi: Generate reference blob for IORT rev E.b
On Tue, 5 Oct 2021 10:38:05 +0200 Eric Auger wrote: > Re-generate reference blobs with rebuild-expected-aml.sh. commit message should have diff old vs new, if dis is the same/similar for various variants, it's fine to provide only one variant diff here. > > Signed-off-by: Eric Auger > --- > tests/qtest/bios-tables-test-allowed-diff.h | 1 - > tests/data/acpi/virt/IORT | Bin 124 -> 128 bytes > tests/data/acpi/virt/IORT.memhp | Bin 124 -> 128 bytes > tests/data/acpi/virt/IORT.numamem | Bin 124 -> 128 bytes > tests/data/acpi/virt/IORT.pxb | Bin 124 -> 128 bytes > 5 files changed, 1 deletion(-) > > diff --git a/tests/qtest/bios-tables-test-allowed-diff.h > b/tests/qtest/bios-tables-test-allowed-diff.h > index 9a5a923d6b..dfb8523c8b 100644 > --- a/tests/qtest/bios-tables-test-allowed-diff.h > +++ b/tests/qtest/bios-tables-test-allowed-diff.h > @@ -1,2 +1 @@ > /* List of comma-separated changed AML files to ignore */ > -"tests/data/acpi/virt/IORT", > diff --git a/tests/data/acpi/virt/IORT b/tests/data/acpi/virt/IORT > index > 521acefe9ba66706c5607321a82d330586f3f280..7efd0ce8a6b3928efa7e1373f688ab4c5f50543b > 100644 > GIT binary patch > literal 128 > zcmebD4+?2uU|?Y0?Bwt45v<@85#X!<1dKp25F11@0kHuPgMkDCNC*yK93~3}W)K^M > VRiHGGVg_O`aDdYP|3ers^8jQz3IPBB > > literal 124 > zcmebD4+^Pa00MR=e`k+i1*eDrX9XZ&1PX!JAesq?4S*O7Bw!2(4Uz`|CKCt^;wu0# > QRGb+i3L*dhhtM#y0PN=p0RR91 > > diff --git a/tests/data/acpi/virt/IORT.memhp b/tests/data/acpi/virt/IORT.memhp > index > 521acefe9ba66706c5607321a82d330586f3f280..7efd0ce8a6b3928efa7e1373f688ab4c5f50543b > 100644 > GIT binary patch > literal 128 > zcmebD4+?2uU|?Y0?Bwt45v<@85#X!<1dKp25F11@0kHuPgMkDCNC*yK93~3}W)K^M > VRiHGGVg_O`aDdYP|3ers^8jQz3IPBB > > literal 124 > zcmebD4+^Pa00MR=e`k+i1*eDrX9XZ&1PX!JAesq?4S*O7Bw!2(4Uz`|CKCt^;wu0# > QRGb+i3L*dhhtM#y0PN=p0RR91 > > diff --git a/tests/data/acpi/virt/IORT.numamem > b/tests/data/acpi/virt/IORT.numamem > index > 521acefe9ba66706c5607321a82d330586f3f280..7efd0ce8a6b3928efa7e1373f688ab4c5f50543b > 100644 > GIT binary patch > literal 128 > zcmebD4+?2uU|?Y0?Bwt45v<@85#X!<1dKp25F11@0kHuPgMkDCNC*yK93~3}W)K^M > VRiHGGVg_O`aDdYP|3ers^8jQz3IPBB > > literal 124 > zcmebD4+^Pa00MR=e`k+i1*eDrX9XZ&1PX!JAesq?4S*O7Bw!2(4Uz`|CKCt^;wu0# > QRGb+i3L*dhhtM#y0PN=p0RR91 > > diff --git a/tests/data/acpi/virt/IORT.pxb b/tests/data/acpi/virt/IORT.pxb > index > 521acefe9ba66706c5607321a82d330586f3f280..7efd0ce8a6b3928efa7e1373f688ab4c5f50543b > 100644 > GIT binary patch > literal 128 > zcmebD4+?2uU|?Y0?Bwt45v<@85#X!<1dKp25F11@0kHuPgMkDCNC*yK93~3}W)K^M > VRiHGGVg_O`aDdYP|3ers^8jQz3IPBB > > literal 124 > zcmebD4+^Pa00MR=e`k+i1*eDrX9XZ&1PX!JAesq?4S*O7Bw!2(4Uz`|CKCt^;wu0# > QRGb+i3L*dhhtM#y0PN=p0RR91 >
Re: [PATCH v2 2/3] hw/arm/virt-acpi-build: IORT upgrade up to revision E.b
On Tue, 5 Oct 2021 10:38:04 +0200 Eric Auger wrote: > Upgrade the IORT table from B to E.b specification > revision (ARM DEN 0049E.b). > > Signed-off-by: Eric Auger with nit below fixed: Reviewed-by: Igor Mammedov > > --- > > v1 -> v2: > - Fix Revision value for ITS node and SMMUv3 node > - increment an identifier > --- > hw/arm/virt-acpi-build.c | 48 > 1 file changed, 29 insertions(+), 19 deletions(-) > > diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c > index 257d0fee17..789bac3134 100644 > --- a/hw/arm/virt-acpi-build.c > +++ b/hw/arm/virt-acpi-build.c > @@ -241,19 +241,20 @@ static void acpi_dsdt_add_tpm(Aml *scope, > VirtMachineState *vms) > #endif > > #define ID_MAPPING_ENTRY_SIZE 20 > -#define SMMU_V3_ENTRY_SIZE 60 > -#define ROOT_COMPLEX_ENTRY_SIZE 32 > +#define SMMU_V3_ENTRY_SIZE 68 > +#define ROOT_COMPLEX_ENTRY_SIZE 36 > #define IORT_NODE_OFFSET 48 > > static void build_iort_id_mapping(GArray *table_data, uint32_t input_base, >uint32_t id_count, uint32_t out_ref) > { > -/* Identity RID mapping covering the whole input RID range */ > +/* Table 4 ID mapping format */ > build_append_int_noprefix(table_data, input_base, 4); /* Input base */ > build_append_int_noprefix(table_data, id_count, 4); /* Number of IDs */ > build_append_int_noprefix(table_data, input_base, 4); /* Output base */ > build_append_int_noprefix(table_data, out_ref, 4); /* Output Reference */ > -build_append_int_noprefix(table_data, 0, 4); /* Flags */ > +/* Flags */ > +build_append_int_noprefix(table_data, 0 /* Single mapping */, 4); > } > > struct AcpiIortIdMapping { > @@ -298,7 +299,7 @@ static int iort_idmap_compare(gconstpointer a, > gconstpointer b) > /* > * Input Output Remapping Table (IORT) > * Conforms to "IO Remapping Table System Software on ARM Platforms", > - * Document number: ARM DEN 0049B, October 2015 > + * Document number: ARM DEN 0049E, Feb 2021 > */ > static void > build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms) > @@ -307,10 +308,11 @@ build_iort(GArray *table_data, BIOSLinker *linker, > VirtMachineState *vms) > const uint32_t iort_node_offset = IORT_NODE_OFFSET; > size_t node_size, smmu_offset = 0; > AcpiIortIdMapping *idmap; > +uint32_t id = 0; It's probably worth mentioning in commit message that IDs are generated in sequential order, to make sure that different nodes unique but otherwise IDs are not referenced/used. If we were to actually use IDs, I'd prefer explicit naming for static nodes (i.e. something like SMMUv3_ID = #). > GArray *smmu_idmaps = g_array_new(false, true, > sizeof(AcpiIortIdMapping)); > GArray *its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping)); > > -AcpiTable table = { .sig = "IORT", .rev = 0, .oem_id = vms->oem_id, > +AcpiTable table = { .sig = "IORT", .rev = 3, .oem_id = vms->oem_id, > .oem_table_id = vms->oem_table_id }; > /* Table 2 The IORT */ > acpi_table_begin(, table_data); > @@ -358,12 +360,12 @@ build_iort(GArray *table_data, BIOSLinker *linker, > VirtMachineState *vms) > build_append_int_noprefix(table_data, IORT_NODE_OFFSET, 4); > build_append_int_noprefix(table_data, 0, 4); /* Reserved */ > > -/* 3.1.1.3 ITS group node */ > +/* Table 12 ITS Group Format */ > build_append_int_noprefix(table_data, 0 /* ITS Group */, 1); /* Type */ > node_size = 20 /* fixed header size */ + 4 /* 1 GIC ITS Identifier */; > build_append_int_noprefix(table_data, node_size, 2); /* Length */ > -build_append_int_noprefix(table_data, 0, 1); /* Revision */ > -build_append_int_noprefix(table_data, 0, 4); /* Reserved */ > +build_append_int_noprefix(table_data, 1, 1); /* Revision */ > +build_append_int_noprefix(table_data, id++, 4); /* Identifier */ > build_append_int_noprefix(table_data, 0, 4); /* Number of ID mappings */ > build_append_int_noprefix(table_data, 0, 4); /* Reference to ID Array */ > build_append_int_noprefix(table_data, 1, 4); /* Number of ITSs */ > @@ -374,19 +376,19 @@ build_iort(GArray *table_data, BIOSLinker *linker, > VirtMachineState *vms) > int irq = vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE; > > smmu_offset = table_data->len - table.table_offset; > -/* 3.1.1.2 SMMUv3 */ > +/* Table 9 SMMUv3 Format */ > build_append_int_noprefix(table_data, 4 /* SMMUv3 */, 1); /* Type */ > node_size = SMMU_V3_ENTRY_SIZE + ID_MAPPING_ENTRY_SIZE; > build_append_int_noprefix(table_data, node_size, 2); /* Length */ > -build_append_int_noprefix(table_data, 0, 1); /* Revision */ > -build_append_int_noprefix(table_data, 0, 4); /* Reserved */ > +build_append_int_noprefix(table_data, 4, 1); /* Revision */ > +build_append_int_noprefix(table_data, id++, 4); /*
Re: [PATCH] hw/misc: applesmc: use host osk as default on macs
On Mon, 11 Oct 2021 15:40:07 +0200, bala...@eik.bme.hu wrote: > I guess a frequent use case for running macOS guests with keys from host > would be on hosts running macOS too so a solution that works both on macOS > and Linux might be better than a Linux specific one which then needs > another way to do the same on macOS. Looks like there's free code for that > too and you don't have to convince a maintainer either. I mostly agree with you (hadn't given much thought to qemu on macOSX), with the small caveat that (on Linux) you'll end up racing the kernel applesmc driver for access to the physical hardware; not sure whether you'd run into anything similar on host-side macOSX as well, never actually used it much as a developer... :) Cheers, --Gabriel
Re: Moving QEMU downloads to GitLab Releases?
On Mon, Oct 11, 2021 at 4:59 AM Stefan Hajnoczi wrote: > On Mon, Oct 11, 2021 at 09:21:30AM +0200, Gerd Hoffmann wrote: > > Hi, > > > > > > I guess the main question is who is using the ROM/BIOS sources in the > > > > tarballs, and would this 2-step process work for those users? If > there > > > > are distros relying on them then maybe there are some more logistics > to > > > > consider since the make-release downloads are both time-consuming and > > > > prone to network errors/stalls since it relies on the availability > of a > > > > good number of different hosts. > > > > > > Great, maybe Gerd can comment on splitting out firmware. > > > > I think the binaries are mostly a convenience feature for developers. > > Distros typically build from source anyway, and typically they build > > from upstream source instead of qemu submodule. > > > > The idea to split them out to a separate repo is exists for quite a > > while. I have an old qemu-firmware repo laying around on my disk, which > > basically moves roms/ + submodules and the binaries built from it over. > > > > I think with the switch to gitlab it doesn't make sense any more to > > commit pre-built firmware binaries to a git repo. Instead we should > > build the firmware in gitlab ci, i.e. effectively move the build rules > > we have right now in roms/Makefile to .gitlab-ci.d/, then publish the > > firmware binaries as build artifacts or gitlab pages. > > > > When done just remove the pre-build binaries from git and add a helper > > script to fetch firmware binaries from gitlab instead. > > > > > QEMU mirrors firmware sources on GitLab too. We're able to provide the > > > same level of download availability on our mirror firmware repos as for > > > the main qemu.git repo. > > > > I think enabling CI for the firmware mirrors should work given that it > > is possible to specify a custom CI/CD configuration file, i.e. use > > something like > > > > /qemu-project/qemu/.gitlab-ci.d/firmware/seabios.yml > > > > or > > > > /qemu-project/qemu-firmware/seabios.yml > > > > as config file for the > > > > /qemu-project/seabios/ > > > > mirror. Then we can publish binaries using gitlab pages at > > > > https://qemu-project.gitlab.io/seabios/ > > > > That way we also don't need the roms/ submodules any more, i.e. we > > can remove both sources and binaries for the firmware from the > > release tarballs. > > Thanks! > > For developer convenience it would be nice to avoid manual steps after > git clone qemu.git. Maybe ./configure should check for firmware blobs > and automatically download them. There could be a ./configure > --disable-firmware-download option that distros can use to skip the > download when building everything from source. > One thing to keep in mind about the downstream consumers: Many of them have two phases to their build process that run asynchronously to each other. There is a fetch phase that grabs everything, and a build phase that builds the binaries. The second phase may not have access to the internet for a variety of reasons (these days being a security measure, for good or ill). Please make sure that any such plans allow for this model. Today, that's all dealt with by grabbing tarballs from github which is how the submodules are dealt with. So long as the images had well known names, and could be fetched with the normal wget/cgit/fetch programs, that would suffice. Requiring use of some weird bespoke script would cause friction for the downstreams using qemu. So while I'm all for making things a little more independent, let's not do it in a way that makes life difficult for downstreams. There are more there than just a couple of big distro builders. Warner
Re: [PATCH v2] vmdk: allow specification of tools version
On 13.09.21 15:04, Thomas Weißschuh wrote: VMDK files support an attribute that represents the version of the guest tools that are installed on the disk. This attribute is used by vSphere before a machine has been started to determine if the VM has the guest tools installed. This is important when configuring "Operating system customizations" in vSphere, as it checks for the presence of the guest tools before allowing those customizations. Thus when the VM has not yet booted normally it would be impossible to customize it, therefore preventing a customized first-boot. The attribute should not hurt on disks that do not have the guest tools installed and indeed the VMware tools also unconditionally add this attribute. (Defaulting to the value "2147483647", as is done in this patch) Signed-off-by: Thomas Weißschuh --- v1: https://lore.kernel.org/qemu-devel/20210908174250.12946-1-thomas.weissschuh@zeiss.com/ v1 -> v2: * Expand QAPI docs (Eric Blake) block/vmdk.c | 24 qapi/block-core.json | 3 +++ 2 files changed, 23 insertions(+), 4 deletions(-) [...] diff --git a/qapi/block-core.json b/qapi/block-core.json index c8ce1d9d5d..42431f52d0 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -4597,6 +4597,8 @@ # @adapter-type: The adapter type used to fill in the descriptor. Default: ide. # @hwversion: Hardware version. The meaningful options are "4" or "6". # Default: "4". +# @toolsversion: VMware guest tools version. + Default: "2147483647" (Since 6.2) There’s a # missing at the start of the line, and so this doesn’t compile. I’ve added it (hope that’s OK for you) and taken this patch to my block branch: https://gitlab.com/hreitz/qemu/-/commits/block/ Hanna
Re: [PATCH v3 0/3] hw/mips/boston: ELF kernel support
ping? 在 2021/10/2 19:45, Jiaxun Yang 写道: Jiaxun Yang (3): hw/mips/boston: Massage memory map information hw/mips/boston: Allow loading elf kernel and dtb hw/mips/boston: Add FDT generator hw/mips/boston.c | 348 +++ 1 file changed, 320 insertions(+), 28 deletions(-)
Re: [PATCH v3 3/3] tests/acpi/bios-tables-test: update DSDT blob for multifunction bridge test
On Thu, 7 Oct 2021 19:27:50 +0530 Ani Sinha wrote: > We added a new unit test for testing acpi hotplug on multifunction bridges in > q35 machines. Here, we update the DSDT table gloden master blob for this unit > test. > > The test adds the following devices to qemu and then checks the changes > introduced in the DSDT table due to the addition of the following devices: > > (a) a multifunction bridge device > (b) a bridge device with function 1 > (c) a non-bridge device with function 2 > > In the DSDT table, we should see AML hotplug descriptions for (a) and (b). > For (a) we should find a hotplug AML description for function 0. > > Following is the ASL diff between the original DSDT table and the modified > DSDT > table due to the unit test. We see that multifunction bridge on bus 2 and > single > function bridge on bus 3 function 1 are described, not the non-bridge balloon > device on bus 4, function 2. > > @@ -1,30 +1,30 @@ > /* > * Intel ACPI Component Architecture > * AML/ASL+ Disassembler version 20190509 (64-bit version) > * Copyright (c) 2000 - 2019 Intel Corporation > * > * Disassembling to symbolic ASL+ operators > * > - * Disassembly of tests/data/acpi/q35/DSDT, Thu Oct 7 18:29:19 2021 > + * Disassembly of /tmp/aml-C7JCA1, Thu Oct 7 18:29:19 2021 > * > * Original Table Header: > * Signature"DSDT" > - * Length 0x2061 (8289) > + * Length 0x2187 (8583) > * Revision 0x01 32-bit table (V1), no 64-bit math support > - * Checksum 0xF9 > + * Checksum 0x8D > * OEM ID "BOCHS " > * OEM Table ID "BXPC" > * OEM Revision 0x0001 (1) > * Compiler ID "BXPC" > * Compiler Version 0x0001 (1) > */ > DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC", 0x0001) > { > Scope (\) > { > OperationRegion (DBG, SystemIO, 0x0402, One) > Field (DBG, ByteAcc, NoLock, Preserve) > { > DBGB, 8 > } > > @@ -3265,23 +3265,95 @@ > Method (_S1D, 0, NotSerialized) // _S1D: S1 Device State > { > Return (Zero) > } > > Method (_S2D, 0, NotSerialized) // _S2D: S2 Device State > { > Return (Zero) > } > > Method (_S3D, 0, NotSerialized) // _S3D: S3 Device State > { > Return (Zero) > } > } > > +Device (S10) > +{ > +Name (_ADR, 0x0002) // _ADR: Address > +Name (BSEL, One) > +Device (S00) > +{ > +Name (_SUN, Zero) // _SUN: Slot User Number > +Name (_ADR, Zero) // _ADR: Address > +Method (_EJ0, 1, NotSerialized) // _EJx: Eject Device, > x=0-9 > +{ > +PCEJ (BSEL, _SUN) > +} > + > +Method (_DSM, 4, Serialized) // _DSM: Device-Specific > Method > +{ > +Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, _SUN)) > +} > +} > + > +Method (DVNT, 2, NotSerialized) > +{ > +If ((Arg0 & One)) > +{ > +Notify (S00, Arg1) > +} > +} > + > +Method (PCNT, 0, NotSerialized) > +{ > +BNUM = One > +DVNT (PCIU, One) > +DVNT (PCID, 0x03) > +} > +} > + > +Device (S19) > +{ > +Name (_ADR, 0x00030001) // _ADR: Address > +Name (BSEL, Zero) > +Device (S00) > +{ > +Name (_SUN, Zero) // _SUN: Slot User Number > +Name (_ADR, Zero) // _ADR: Address > +Method (_EJ0, 1, NotSerialized) // _EJx: Eject Device, > x=0-9 > +{ > +PCEJ (BSEL, _SUN) > +} > + > +Method (_DSM, 4, Serialized) // _DSM: Device-Specific > Method > +{ > +Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, _SUN)) > +} > +} > + > +Method (DVNT, 2, NotSerialized) > +{ > +If ((Arg0 & One)) > +{ > +Notify (S00, Arg1) > +} > +} > + > +Method (PCNT, 0, NotSerialized) > +{ > +BNUM = Zero > +DVNT (PCIU, One) > +DVNT (PCID, 0x03) > +} > +
Re: [PATCH v3 1/3] tests/acpi/bios-tables-test: add and allow changes to a new q35 DSDT table blob
On Thu, 7 Oct 2021 19:27:48 +0530 Ani Sinha wrote: > We are adding a new unit test to cover the acpi hotplug support in q35 for > multi-function bridges. This test uses a new table DSDT.multi-bridge. > We need to allow changes in DSDT acpi table for addition of this new > unit test. > > Signed-off-by: Ani Sinha Acked-by: Igor Mammedov > --- > tests/data/acpi/q35/DSDT.multi-bridge | 0 > tests/qtest/bios-tables-test-allowed-diff.h | 1 + > 2 files changed, 1 insertion(+) > create mode 100644 tests/data/acpi/q35/DSDT.multi-bridge > > diff --git a/tests/data/acpi/q35/DSDT.multi-bridge > b/tests/data/acpi/q35/DSDT.multi-bridge > new file mode 100644 > index 00..e69de29bb2 > diff --git a/tests/qtest/bios-tables-test-allowed-diff.h > b/tests/qtest/bios-tables-test-allowed-diff.h > index dfb8523c8b..dabc024f53 100644 > --- a/tests/qtest/bios-tables-test-allowed-diff.h > +++ b/tests/qtest/bios-tables-test-allowed-diff.h > @@ -1 +1,2 @@ > /* List of comma-separated changed AML files to ignore */ > +"tests/data/acpi/q35/DSDT.multi-bridge",
Re: [PATCH v3 2/3] tests/acpi/pcihp: add unit tests for hotplug on multifunction bridges for q35
On Thu, 7 Oct 2021 19:27:49 +0530 Ani Sinha wrote: > commit d7346e614f4ec ("acpi: x86: pcihp: add support hotplug on multifunction > bridges") > added ACPI hotplug descriptions for cold plugged bridges for functions other > than 0. For all other devices, the ACPI hotplug descriptions are limited to > function 0 only. This change adds unit tests for this feature. > > This test adds the following devices to qemu and then checks the changes > introduced in the DSDT table due to the addition of the following devices: > > (a) a multifunction bridge device > (b) a bridge device with function 1 > (c) a non-bridge device with function 2 > > In the DSDT table, we should see AML hotplug descriptions for (a) and (b). > For (a) we should find a hotplug AML description for function 0. > > The following diff compares the DSDT table AML with the new unit test before > and after the change d7346e614f4ec is introduced. In other words, > this diff reflects the changes that occurs in the DSDT table due to the change > d7346e614f4ec . > > @@ -1,60 +1,38 @@ > /* > * Intel ACPI Component Architecture > * AML/ASL+ Disassembler version 20190509 (64-bit version) > * Copyright (c) 2000 - 2019 Intel Corporation > * > * Disassembling to symbolic ASL+ operators > * > - * Disassembly of tests/data/acpi/q35/DSDT.multi-bridge, Thu Oct 7 18:56:05 > 2021 > + * Disassembly of /tmp/aml-AN0DA1, Thu Oct 7 18:56:05 2021 > * > * Original Table Header: > * Signature"DSDT" > - * Length 0x20FE (8446) > + * Length 0x2187 (8583) > * Revision 0x01 32-bit table (V1), no 64-bit math support > - * Checksum 0xDE > + * Checksum 0x8D > * OEM ID "BOCHS " > * OEM Table ID "BXPC" > * OEM Revision 0x0001 (1) > * Compiler ID "BXPC" > * Compiler Version 0x0001 (1) > */ > DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC", 0x0001) > { > -/* > - * iASL Warning: There was 1 external control method found during > - * disassembly, but only 0 were resolved (1 unresolved). Additional > - * ACPI tables may be required to properly disassemble the code. This > - * resulting disassembler output file may not compile because the > - * disassembler did not know how many arguments to assign to the > - * unresolved methods. Note: SSDTs can be dynamically loaded at > - * runtime and may or may not be available via the host OS. > - * > - * In addition, the -fe option can be used to specify a file containing > - * control method external declarations with the associated method > - * argument counts. Each line of the file must be of the form: > - * External (, MethodObj, ) > - * Invocation: > - * iasl -fe refs.txt -d dsdt.aml > - * > - * The following methods were unresolved and many not compile properly > - * because the disassembler had to guess at the number of arguments > - * required for each: > - */ > -External (_SB_.PCI0.S19_.PCNT, MethodObj)// Warning: Unknown method, > guessing 1 arguments > - > Scope (\) > { > OperationRegion (DBG, SystemIO, 0x0402, One) > Field (DBG, ByteAcc, NoLock, Preserve) > { > DBGB, 8 > } > > Method (DBUG, 1, NotSerialized) > { > ToHexString (Arg0, Local0) > ToBuffer (Local0, Local0) > Local1 = (SizeOf (Local0) - One) > Local2 = Zero > While ((Local2 < Local1)) > { > @@ -3322,24 +3300,60 @@ > Method (DVNT, 2, NotSerialized) > { > If ((Arg0 & One)) > { > Notify (S00, Arg1) > } > } > > Method (PCNT, 0, NotSerialized) > { > BNUM = One > DVNT (PCIU, One) > DVNT (PCID, 0x03) > } > } > > +Device (S19) > +{ > +Name (_ADR, 0x00030001) // _ADR: Address > +Name (BSEL, Zero) > +Device (S00) > +{ > +Name (_SUN, Zero) // _SUN: Slot User Number > +Name (_ADR, Zero) // _ADR: Address > +Method (_EJ0, 1, NotSerialized) // _EJx: Eject Device, > x=0-9 > +{ > +PCEJ (BSEL, _SUN) > +} > + > +Method (_DSM, 4, Serialized) // _DSM: Device-Specific > Method > +{ > +Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, _SUN)) > +} > +} > + > +Method (DVNT, 2, NotSerialized) > +{ > +If ((Arg0 & One)) > +
Re: ACPI endianness
On Mon, 11 Oct 2021, Michael S. Tsirkin wrote: On Mon, Oct 11, 2021 at 03:27:44PM +0200, BALATON Zoltan wrote: On Mon, 11 Oct 2021, Michael S. Tsirkin wrote: On Mon, Oct 11, 2021 at 12:13:55PM +0200, BALATON Zoltan wrote: On Mon, 11 Oct 2021, Philippe Mathieu-Daudé wrote: On 10/10/21 15:24, BALATON Zoltan wrote: Hello, I'm trying to fix shutdown and reboot on pegasos2 which uses ACPI as part of the VIA VT8231 (similar to and modelled in hw/isa/vt82c686b.c) and found that the guest writes to ACPI PM1aCNT register come out with wrong endianness but not shure why. I have this: $ qemu-system-ppc -M pegasos2 -monitor stdio (qemu) info mtree [...] memory-region: pci1-io - (prio 0, i/o): pci1-io [...] 0f00-0f7f (prio 0, i/o): via-pm 0f00-0f03 (prio 0, i/o): acpi-evt 0f04-0f05 (prio 0, i/o): acpi-cnt 0f08-0f0b (prio 0, i/o): acpi-tmr memory-region: system - (prio 0, i/o): system -1fff (prio 0, ram): pegasos2.ram 8000-bfff (prio 0, i/o): alias pci1-mem0-win @pci1-mem 8000-bfff c000-dfff (prio 0, i/o): alias pci0-mem0-win @pci0-mem c000-dfff f100-f100 (prio 0, i/o): mv64361 f800-f8ff (prio 0, i/o): alias pci0-io-win @pci0-io -00ff f900-f9ff (prio 0, i/o): alias pci0-mem1-win @pci0-mem -00ff fd00-fdff (prio 0, i/o): alias pci1-mem1-win @pci1-mem -00ff fe00-feff (prio 0, i/o): alias pci1-io-win @pci1-io -00ff ff80- (prio 0, i/o): alias pci1-mem3-win @pci1-mem ff80- fff0-fff7 (prio 0, rom): pegasos2.rom The guest (which is big endian PPC and I think wotks on real hardware) writes to 0xf05 in the io region which should be the high byte of the little endian register but in the acpi code it comes out wrong, instead of 0x2800 I get in acpi_pm1_cnt_write: val=0x28 Looks like a northbridge issue (MV64340). Does Pegasos2 enables bus swapping? See hw/pci-host/mv64361.c:585: static void warn_swap_bit(uint64_t val) { if ((val & 0x300ULL) >> 24 != 1) { qemu_log_mask(LOG_UNIMP, "%s: Data swap not implemented", __func__); } } No, guests don't use this feature just byteswap themselves and write little endian values to the mapped io region. (Or in this case just write one byte of the 16 bit value, it specifically writes 0x28 to 0xf05.) That's why I think the device model should not byteswap itself so the acpi regions should be declared native endian and let the guest handle it. Some mentions I've found say that native endian means don't byteswap, little endian means byteswap if vcpu is big endian and big endian means byteswap if vcpu is little endian. Since guests already account for this and write little endian values to these regions there's no need to byteswap in device model and changing to native endian should not affect little endian machines so unless there's a better argument I'll try sending a patch. Regards, BALATON Zoltan native endian means endian-ness is open-coded in the device handler itself. I think you just found a bug in memory core. static const MemoryRegionOps acpi_pm_cnt_ops = { .read = acpi_pm_cnt_read, .write = acpi_pm_cnt_write, .impl.min_access_size = 2, .valid.min_access_size = 1, .valid.max_access_size = 2, .endianness = DEVICE_LITTLE_ENDIAN, }; Because of that .impl.min_access_size = 2, the 1 byte write should be converted to a 2 byte read+2 byte write. docs/devel/memory.rst just says: - .impl.min_access_size, .impl.max_access_size define the access sizes (in bytes) supported by the *implementation*; other access sizes will be emulated using the ones available. For example a 4-byte write will be emulated using four 1-byte writes, if .impl.max_access_size = 1. Instead what we have is: MemTxResult memory_region_dispatch_write(MemoryRegion *mr, hwaddr addr, uint64_t data, MemOp op, MemTxAttrs attrs) { unsigned size = memop_size(op); if (!memory_region_access_valid(mr, addr, size, true, attrs)) { unassigned_mem_write(mr, addr, data, size); return MEMTX_DECODE_ERROR; } adjust_endianness(mr, , op); --- static void adjust_endianness(MemoryRegion *mr, uint64_t *data, MemOp op) { if ((op & MO_BSWAP) != devend_memop(mr->ops->endianness)) { switch (op & MO_SIZE) { case MO_8: break;
Re: [PATCH] hw/misc: applesmc: use host osk as default on macs
On Mon, 11 Oct 2021, Gabriel L. Somlo wrote: FWIW, there's an applesmc driver in the Linux kernel, and it exposes many of the keys and values stored on the chip under /sys/devices/platform/applesmc.768 (or at least it *used* to back when I last checked). My idea at the time was to get this driver to also expose the value of OSK0,1, so that userspace programs (like e.g. qemu) could read and use the values hardcoded in hardware without needing to hardcode them themselves in software. I guess a frequent use case for running macOS guests with keys from host would be on hosts running macOS too so a solution that works both on macOS and Linux might be better than a Linux specific one which then needs another way to do the same on macOS. Looks like there's free code for that too and you don't have to convince a maintainer either. Regards, BALATON Zoltan
Re: [PATCH] s390x/ipl: check kernel command line size
On 06/10/2021 11.26, Marc Hartmayer wrote: Check if the provided kernel command line exceeds the maximum size of the s390x Linux kernel command line size, which is 896 bytes. Reported-by: Sven Schnelle Signed-off-by: Marc Hartmayer --- hw/s390x/ipl.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c index 1821c6faeef3..a58ea58cc736 100644 --- a/hw/s390x/ipl.c +++ b/hw/s390x/ipl.c @@ -38,6 +38,7 @@ #define KERN_IMAGE_START0x01UL #define LINUX_MAGIC_ADDR0x010008UL #define KERN_PARM_AREA 0x010480UL +#define KERN_PARM_AREA_SIZE 0x000380UL #define INITRD_START0x80UL #define INITRD_PARM_START 0x010408UL #define PARMFILE_START 0x001000UL @@ -190,10 +191,19 @@ static void s390_ipl_realize(DeviceState *dev, Error **errp) * loader) and it won't work. For this case we force it to 0x1, too. */ if (pentry == KERN_IMAGE_START || pentry == 0x800) { -char *parm_area = rom_ptr(KERN_PARM_AREA, strlen(ipl->cmdline) + 1); +size_t cmdline_size = strlen(ipl->cmdline) + 1; +char *parm_area = rom_ptr(KERN_PARM_AREA, cmdline_size); + ipl->start_addr = KERN_IMAGE_START; /* Overwrite parameters in the kernel image, which are "rom" */ if (parm_area) { +if (cmdline_size > KERN_PARM_AREA_SIZE) { +error_setg(errp, + "kernel command line exceeds maximum size: %lu > %lu", I think the first %lu should be %zd instead? Apart from that, the patch looks fine to me... so if you agree, I can fix that up when picking up the patch. Thomas + cmdline_size, KERN_PARM_AREA_SIZE); +return; +} + strcpy(parm_area, ipl->cmdline); } } else {
Re: ACPI endianness
On Mon, Oct 11, 2021 at 03:27:44PM +0200, BALATON Zoltan wrote: > On Mon, 11 Oct 2021, Michael S. Tsirkin wrote: > > On Mon, Oct 11, 2021 at 12:13:55PM +0200, BALATON Zoltan wrote: > > > On Mon, 11 Oct 2021, Philippe Mathieu-Daudé wrote: > > > > On 10/10/21 15:24, BALATON Zoltan wrote: > > > > > Hello, > > > > > > > > > > I'm trying to fix shutdown and reboot on pegasos2 which uses ACPI as > > > > > part of the VIA VT8231 (similar to and modelled in hw/isa/vt82c686b.c) > > > > > and found that the guest writes to ACPI PM1aCNT register come out with > > > > > wrong endianness but not shure why. I have this: > > > > > > > > > > $ qemu-system-ppc -M pegasos2 -monitor stdio > > > > > (qemu) info mtree > > > > > [...] > > > > > memory-region: pci1-io > > > > > - (prio 0, i/o): pci1-io > > > > > [...] > > > > > 0f00-0f7f (prio 0, i/o): via-pm > > > > > 0f00-0f03 (prio 0, i/o): acpi-evt > > > > > 0f04-0f05 (prio 0, i/o): acpi-cnt > > > > > 0f08-0f0b (prio 0, i/o): acpi-tmr > > > > > > > > > > memory-region: system > > > > > - (prio 0, i/o): system > > > > > -1fff (prio 0, ram): pegasos2.ram > > > > > 8000-bfff (prio 0, i/o): alias > > > > > pci1-mem0-win > > > > > @pci1-mem 8000-bfff > > > > > c000-dfff (prio 0, i/o): alias > > > > > pci0-mem0-win > > > > > @pci0-mem c000-dfff > > > > > f100-f100 (prio 0, i/o): mv64361 > > > > > f800-f8ff (prio 0, i/o): alias pci0-io-win > > > > > @pci0-io -00ff > > > > > f900-f9ff (prio 0, i/o): alias > > > > > pci0-mem1-win > > > > > @pci0-mem -00ff > > > > > fd00-fdff (prio 0, i/o): alias > > > > > pci1-mem1-win > > > > > @pci1-mem -00ff > > > > > fe00-feff (prio 0, i/o): alias pci1-io-win > > > > > @pci1-io -00ff > > > > > ff80- (prio 0, i/o): alias > > > > > pci1-mem3-win > > > > > @pci1-mem ff80- > > > > > fff0-fff7 (prio 0, rom): pegasos2.rom > > > > > > > > > > The guest (which is big endian PPC and I think wotks on real hardware) > > > > > writes to 0xf05 in the io region which should be the high byte of the > > > > > little endian register but in the acpi code it comes out wrong, > > > > > instead > > > > > of 0x2800 I get in acpi_pm1_cnt_write: val=0x28 > > > > > > > > Looks like a northbridge issue (MV64340). > > > > Does Pegasos2 enables bus swapping? > > > > See hw/pci-host/mv64361.c:585: > > > > > > > > static void warn_swap_bit(uint64_t val) > > > > { > > > >if ((val & 0x300ULL) >> 24 != 1) { > > > >qemu_log_mask(LOG_UNIMP, "%s: Data swap not implemented", > > > > __func__); > > > >} > > > > } > > > > > > No, guests don't use this feature just byteswap themselves and write > > > little > > > endian values to the mapped io region. (Or in this case just write one > > > byte > > > of the 16 bit value, it specifically writes 0x28 to 0xf05.) That's why I > > > think the device model should not byteswap itself so the acpi regions > > > should > > > be declared native endian and let the guest handle it. Some mentions I've > > > found say that native endian means don't byteswap, little endian means > > > byteswap if vcpu is big endian and big endian means byteswap if vcpu is > > > little endian. Since guests already account for this and write little > > > endian > > > values to these regions there's no need to byteswap in device model and > > > changing to native endian should not affect little endian machines so > > > unless > > > there's a better argument I'll try sending a patch. > > > > > > Regards, > > > BALATON Zoltan > > > > native endian means endian-ness is open-coded in the device handler > > itself. I think you just found a bug in memory core. > > > > static const MemoryRegionOps acpi_pm_cnt_ops = { > >.read = acpi_pm_cnt_read, > >.write = acpi_pm_cnt_write, > >.impl.min_access_size = 2, > >.valid.min_access_size = 1, > >.valid.max_access_size = 2, > >.endianness = DEVICE_LITTLE_ENDIAN, > > }; > > > > > > Because of that .impl.min_access_size = 2, > > the 1 byte write should be converted to a 2 byte > > read+2 byte write. > > > > docs/devel/memory.rst just says: > > - .impl.min_access_size, .impl.max_access_size define the access sizes > > (in bytes) supported by the *implementation*; other access sizes will be > > emulated using the ones available. For example a 4-byte write will be > > emulated using four 1-byte writes, if
Re: Deprecate the ppc405 boards in QEMU?
On 11/10/2021 11.20, David Gibson wrote: On Mon, Oct 11, 2021 at 10:10:36AM +0200, Thomas Huth wrote: On 06/10/2021 09.25, Thomas Huth wrote: On 05/10/2021 23.53, BALATON Zoltan wrote: [...] Maybe these 405 boards in QEMU ran with modified firmware where the memory detection was patched out but it seems to detect the RAM so I wonder where it gets that from. Maybe by reading the SDRAM controller DCRs ppc4xx_sdram_init() sets up. Then I'm not sure what it needs the SPD for, I forgot how this worked on sam460ex. Maybe for the speed calibration, so could be it detects ram by reading DCRs then tries to get SPD data and that's where it stops as i2c is not emulated on taihu. This could be confirmed by checking what it pokes with -d guest_errors that shows accesses to missing devices but don't know where 405 has the i2c controller and if it's the same as newer SoCs. If so that could be reused and an i2c bus could be added with the SPD data like in sam460ex to make u-boot happy or you could skip this in u-boot. FWIW, I've just tried the latter (skipping the sdram init in u-boot), and indeed, I can get to the u-boot prompt now. [...]> I've also attached the patch with my modifications to u-boot. FYI, the changes can now be found on this branch here: https://gitlab.com/huth/u-boot/-/commits/taihu I also added a gitlab-CI file, so you can now download the u-boot.bin as an artifact from the corresponding pipeline, e.g.: https://gitlab.com/huth/u-boot/-/jobs/1667201028 Thanks. Are you going to send a v2 of your proposed deprecation patches? No, since there was interest in keeping the 405 boards for testing the 405 code in the Linux kernel, and since there is now a way to do at least some very basic testing of these boards (with the u-boot firmware), I don't plan to respin the deprecation patch. I just sent a patch for adding the boards to our CI instead: https://lists.gnu.org/archive/html/qemu-devel/2021-10/msg02072.html Thomas
Re: ACPI endianness
On Mon, 11 Oct 2021, Michael S. Tsirkin wrote: On Mon, Oct 11, 2021 at 12:13:55PM +0200, BALATON Zoltan wrote: On Mon, 11 Oct 2021, Philippe Mathieu-Daudé wrote: On 10/10/21 15:24, BALATON Zoltan wrote: Hello, I'm trying to fix shutdown and reboot on pegasos2 which uses ACPI as part of the VIA VT8231 (similar to and modelled in hw/isa/vt82c686b.c) and found that the guest writes to ACPI PM1aCNT register come out with wrong endianness but not shure why. I have this: $ qemu-system-ppc -M pegasos2 -monitor stdio (qemu) info mtree [...] memory-region: pci1-io - (prio 0, i/o): pci1-io [...] 0f00-0f7f (prio 0, i/o): via-pm 0f00-0f03 (prio 0, i/o): acpi-evt 0f04-0f05 (prio 0, i/o): acpi-cnt 0f08-0f0b (prio 0, i/o): acpi-tmr memory-region: system - (prio 0, i/o): system -1fff (prio 0, ram): pegasos2.ram 8000-bfff (prio 0, i/o): alias pci1-mem0-win @pci1-mem 8000-bfff c000-dfff (prio 0, i/o): alias pci0-mem0-win @pci0-mem c000-dfff f100-f100 (prio 0, i/o): mv64361 f800-f8ff (prio 0, i/o): alias pci0-io-win @pci0-io -00ff f900-f9ff (prio 0, i/o): alias pci0-mem1-win @pci0-mem -00ff fd00-fdff (prio 0, i/o): alias pci1-mem1-win @pci1-mem -00ff fe00-feff (prio 0, i/o): alias pci1-io-win @pci1-io -00ff ff80- (prio 0, i/o): alias pci1-mem3-win @pci1-mem ff80- fff0-fff7 (prio 0, rom): pegasos2.rom The guest (which is big endian PPC and I think wotks on real hardware) writes to 0xf05 in the io region which should be the high byte of the little endian register but in the acpi code it comes out wrong, instead of 0x2800 I get in acpi_pm1_cnt_write: val=0x28 Looks like a northbridge issue (MV64340). Does Pegasos2 enables bus swapping? See hw/pci-host/mv64361.c:585: static void warn_swap_bit(uint64_t val) { if ((val & 0x300ULL) >> 24 != 1) { qemu_log_mask(LOG_UNIMP, "%s: Data swap not implemented", __func__); } } No, guests don't use this feature just byteswap themselves and write little endian values to the mapped io region. (Or in this case just write one byte of the 16 bit value, it specifically writes 0x28 to 0xf05.) That's why I think the device model should not byteswap itself so the acpi regions should be declared native endian and let the guest handle it. Some mentions I've found say that native endian means don't byteswap, little endian means byteswap if vcpu is big endian and big endian means byteswap if vcpu is little endian. Since guests already account for this and write little endian values to these regions there's no need to byteswap in device model and changing to native endian should not affect little endian machines so unless there's a better argument I'll try sending a patch. Regards, BALATON Zoltan native endian means endian-ness is open-coded in the device handler itself. I think you just found a bug in memory core. static const MemoryRegionOps acpi_pm_cnt_ops = { .read = acpi_pm_cnt_read, .write = acpi_pm_cnt_write, .impl.min_access_size = 2, .valid.min_access_size = 1, .valid.max_access_size = 2, .endianness = DEVICE_LITTLE_ENDIAN, }; Because of that .impl.min_access_size = 2, the 1 byte write should be converted to a 2 byte read+2 byte write. docs/devel/memory.rst just says: - .impl.min_access_size, .impl.max_access_size define the access sizes (in bytes) supported by the *implementation*; other access sizes will be emulated using the ones available. For example a 4-byte write will be emulated using four 1-byte writes, if .impl.max_access_size = 1. Instead what we have is: MemTxResult memory_region_dispatch_write(MemoryRegion *mr, hwaddr addr, uint64_t data, MemOp op, MemTxAttrs attrs) { unsigned size = memop_size(op); if (!memory_region_access_valid(mr, addr, size, true, attrs)) { unassigned_mem_write(mr, addr, data, size); return MEMTX_DECODE_ERROR; } adjust_endianness(mr, , op); --- static void adjust_endianness(MemoryRegion *mr, uint64_t *data, MemOp op) { if ((op & MO_BSWAP) != devend_memop(mr->ops->endianness)) { switch (op & MO_SIZE) { case MO_8: break; case MO_16: *data = bswap16(*data); break; case MO_32: *data =
Re: [PATCH] qapi: Improve input_type_enum()'s error message
On 10/11/21 15:15, Markus Armbruster wrote: > The error message claims the parameter is invalid: > > $ qemu-system-x86_64 -object qom-type=nonexistent > qemu-system-x86_64: -object qom-type=nonexistent: Invalid parameter > 'nonexistent' > > What's wrong is actually the *value* 'nonexistent'. Improve the > message to > > qemu-system-x86_64: -object qom-type=nonexistent: Parameter 'qom-type' > does not accept value 'nonexistent' > > Signed-off-by: Markus Armbruster > --- > qapi/qapi-visit-core.c | 3 ++- > tests/unit/check-qom-proplist.c | 2 +- > tests/qemu-iotests/049.out | 6 +++--- > tests/qemu-iotests/206.out | 2 +- > tests/qemu-iotests/237.out | 6 +++--- > tests/qemu-iotests/245 | 2 +- > 6 files changed, 11 insertions(+), 10 deletions(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH] qemu-iotests: flush after every test
On 06.10.21 11:27, Paolo Bonzini wrote: This makes it possible to see what is happening, even if the output of "make check-block" is not sent to a tty (for example if it is sent to grep or tee). Signed-off-by: Paolo Bonzini --- tests/qemu-iotests/testrunner.py | 1 + 1 file changed, 1 insertion(+) Thanks, applied to my block branch: https://gitlab.com/hreitz/qemu/-/commits/block/ Hanna
Re: [PATCH] tests/acceptance: Add tests for the ppc405 boards
On 10/11/21 14:59, Thomas Huth wrote: > Using the U-Boot firmware, we can check that at least the serial console > of the ppc405 boards is still usable. > > Signed-off-by: Thomas Huth > --- > Based-on: 20211006071140.565952-1-th...@redhat.com > > tests/acceptance/ppc_405.py | 40 + > 1 file changed, 40 insertions(+) > create mode 100644 tests/acceptance/ppc_405.py > +def test_ppc_taihu(self): > +""" > +:avocado: tags=arch:ppc > +:avocado: tags=machine:taihu Please consider adding: tags=cpu:405ep > +""" > +self.do_test_ppc405() > + > +def test_ppc_ref405ep(self): > +""" > +:avocado: tags=arch:ppc > +:avocado: tags=machine:ref405ep > +""" > +self.do_test_ppc405() > Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé
Re: ACPI endianness
On Mon, 11 Oct 2021 08:19:01 -0400 "Michael S. Tsirkin" wrote: > On Mon, Oct 11, 2021 at 12:13:55PM +0200, BALATON Zoltan wrote: > > On Mon, 11 Oct 2021, Philippe Mathieu-Daudé wrote: > > > On 10/10/21 15:24, BALATON Zoltan wrote: > > > > Hello, > > > > > > > > I'm trying to fix shutdown and reboot on pegasos2 which uses ACPI as > > > > part of the VIA VT8231 (similar to and modelled in hw/isa/vt82c686b.c) > > > > and found that the guest writes to ACPI PM1aCNT register come out with > > > > wrong endianness but not shure why. I have this: > > > > > > > > $ qemu-system-ppc -M pegasos2 -monitor stdio > > > > (qemu) info mtree > > > > [...] > > > > memory-region: pci1-io > > > > - (prio 0, i/o): pci1-io > > > > [...] > > > > 0f00-0f7f (prio 0, i/o): via-pm > > > > 0f00-0f03 (prio 0, i/o): acpi-evt > > > > 0f04-0f05 (prio 0, i/o): acpi-cnt > > > > 0f08-0f0b (prio 0, i/o): acpi-tmr > > > > > > > > memory-region: system > > > > - (prio 0, i/o): system > > > > -1fff (prio 0, ram): pegasos2.ram > > > > 8000-bfff (prio 0, i/o): alias pci1-mem0-win > > > > @pci1-mem 8000-bfff > > > > c000-dfff (prio 0, i/o): alias pci0-mem0-win > > > > @pci0-mem c000-dfff > > > > f100-f100 (prio 0, i/o): mv64361 > > > > f800-f8ff (prio 0, i/o): alias pci0-io-win > > > > @pci0-io -00ff > > > > f900-f9ff (prio 0, i/o): alias pci0-mem1-win > > > > @pci0-mem -00ff > > > > fd00-fdff (prio 0, i/o): alias pci1-mem1-win > > > > @pci1-mem -00ff > > > > fe00-feff (prio 0, i/o): alias pci1-io-win > > > > @pci1-io -00ff > > > > ff80- (prio 0, i/o): alias pci1-mem3-win > > > > @pci1-mem ff80- > > > > fff0-fff7 (prio 0, rom): pegasos2.rom > > > > > > > > The guest (which is big endian PPC and I think wotks on real hardware) > > > > writes to 0xf05 in the io region which should be the high byte of the > > > > little endian register but in the acpi code it comes out wrong, instead > > > > of 0x2800 I get in acpi_pm1_cnt_write: val=0x28 > > > > > > Looks like a northbridge issue (MV64340). > > > Does Pegasos2 enables bus swapping? > > > See hw/pci-host/mv64361.c:585: > > > > > > static void warn_swap_bit(uint64_t val) > > > { > > >if ((val & 0x300ULL) >> 24 != 1) { > > >qemu_log_mask(LOG_UNIMP, "%s: Data swap not implemented", > > > __func__); > > >} > > > } > > > > No, guests don't use this feature just byteswap themselves and write little > > endian values to the mapped io region. (Or in this case just write one byte > > of the 16 bit value, it specifically writes 0x28 to 0xf05.) That's why I > > think the device model should not byteswap itself so the acpi regions should > > be declared native endian and let the guest handle it. Some mentions I've > > found say that native endian means don't byteswap, little endian means > > byteswap if vcpu is big endian and big endian means byteswap if vcpu is > > little endian. Since guests already account for this and write little endian > > values to these regions there's no need to byteswap in device model and > > changing to native endian should not affect little endian machines so unless > > there's a better argument I'll try sending a patch. > > > > Regards, > > BALATON Zoltan > > native endian means endian-ness is open-coded in the device handler > itself. I think you just found a bug in memory core. > > static const MemoryRegionOps acpi_pm_cnt_ops = { > .read = acpi_pm_cnt_read, > .write = acpi_pm_cnt_write, > .impl.min_access_size = 2, > .valid.min_access_size = 1, > .valid.max_access_size = 2, > .endianness = DEVICE_LITTLE_ENDIAN, > }; > > > Because of that .impl.min_access_size = 2, > the 1 byte write should be converted to a 2 byte > read+2 byte write. > > docs/devel/memory.rst just says: > - .impl.min_access_size, .impl.max_access_size define the access sizes > (in bytes) supported by the *implementation*; other access sizes will be > emulated using the ones available. For example a 4-byte write will be > emulated using four 1-byte writes, if .impl.max_access_size = 1. > > > > Instead what we have is: > > MemTxResult memory_region_dispatch_write(MemoryRegion *mr, > hwaddr addr, > uint64_t data, > MemOp op, >
Re: [PATCH] hw/misc: applesmc: use host osk as default on macs
FWIW, there's an applesmc driver in the Linux kernel, and it exposes many of the keys and values stored on the chip under /sys/devices/platform/applesmc.768 (or at least it *used* to back when I last checked). My idea at the time was to get this driver to also expose the value of OSK0,1, so that userspace programs (like e.g. qemu) could read and use the values hardcoded in hardware without needing to hardcode them themselves in software. It went nowhere at the time (the hwmon maintainer kept insisting that "since it's a constant why don't you just hardcode it", and round and round we went until I walked away: https://lore.kernel.org/lkml/20121210222313.gf2...@hedwig.ini.cmu.edu/ Given *this* conversation, it might be worth someone's effort to try that approach again. IMO it's really the most efficient: have an already existing applesmc driver in the hypervisor's kernel expose the desired key values (it's whole job is to expose key values to userspace in the first place). Then have userspace read that and use it for whatever purposes (including populating guest-facing emulated smc devices). Nobody has to use anyone's copyrighted code or strings or whatever. If only the hwmon folks could be convinced this time around :) HTH, --Gabriel On Sun, Oct 10, 2021 at 06:22:04PM -0300, Pedro Tôrres wrote: > AFAIK there’s no public documentation from Apple on how to read values from > SMC. > > But Amit Singh wrote a book, Mac OS X Internals, and one of the processes > described on it is how to read OSK directly from SMC: https://web.archive.org/ > web/20190630050544/http://osxbook.com/book/bonus/chapter7/tpmdrmmyth/ > > This is actually referenced on VirtualBox’s source code as the base for their > implementation: > https://www.virtualbox.org/browser/vbox/trunk/src/VBox/Devices/ > EFI/DevSmc.cpp#L520 > > Additionally, there is the smcFanControl project, licensed under GPLv2, that > performs reads and writes on SMC and have all information necessary to > implement this feature on QEMU: https://github.com/hholtmann/smcFanControl > > This project was used as base for the SMC in-tree driver for Linux and it’s > referenced there: https://github.com/torvalds/linux/blob/master/drivers/hwmon/ > applesmc.c#L14 > > I think we would be safe using these sources as the base for our > implementation, given that other huge GPL projects like Linux and VirtualBox > have been using them for years. > > Best regards, > Pedro Tôrres > > > On Oct 10, 2021, at 4:25 PM, Paolo Bonzini wrote: > > > Can you instead provide documentation in English (pseudocode, tables of > the > structs, etc.)? That's the safest bet. > > Paolo > > El sáb., 9 oct. 2021 7:32, Pedro Tôrres escribió: > > Hey Paolo and Phil, > > I understand you concerns regarding the license that Apple open-source > code is distributed. > > If I restart from scratch and implement this feature based only on > VirtualBox code, that is distributed under GPLv2, would it be fine? > > Best regards, > Pedro Tôrres > > > On Oct 8, 2021, at 3:54 PM, Paolo Bonzini > wrote: > > > On 08/10/21 14:03, Phil Dennis-Jordan wrote: > > 1. Licensing > > Given that the code it's heavily based on is copyright Apple > Computer Inc., licensed under APSL, is it safe including it in > qemu as is? > > If the integrated code is going to be quite so "directly > inspired" (down to the inconsistent struct definition style > and > mixing unrelated constants in the same anonymous enum), > perhaps > at minimum place it in its own isolated source file with > appropriate notice? > > > Yeah, this should be reverted. > > Pedro, I understand that you stated it was "based on code from > Apple" but you also said (by including Signed-off-by) that > > --- > (a) The contribution was created in whole or in part by me and I >have the right to submit it under the open source license >indicated in the file; or > > (b) The contribution is based upon previous work that, to the best >of my knowledge, is covered under an appropriate open source >license and I have the right under that license to submit that >work with modifications, whether created in whole or in part >by me, under the same open source license (unless I am >permitted to submit under a different license), as indicated >in the file; or > --- > > and this is not true. > > Thanks very much, > > Paolo > >
[PATCH] qapi: Improve input_type_enum()'s error message
The error message claims the parameter is invalid: $ qemu-system-x86_64 -object qom-type=nonexistent qemu-system-x86_64: -object qom-type=nonexistent: Invalid parameter 'nonexistent' What's wrong is actually the *value* 'nonexistent'. Improve the message to qemu-system-x86_64: -object qom-type=nonexistent: Parameter 'qom-type' does not accept value 'nonexistent' Signed-off-by: Markus Armbruster --- qapi/qapi-visit-core.c | 3 ++- tests/unit/check-qom-proplist.c | 2 +- tests/qemu-iotests/049.out | 6 +++--- tests/qemu-iotests/206.out | 2 +- tests/qemu-iotests/237.out | 6 +++--- tests/qemu-iotests/245 | 2 +- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/qapi/qapi-visit-core.c b/qapi/qapi-visit-core.c index a641adec51..7310f0a0ca 100644 --- a/qapi/qapi-visit-core.c +++ b/qapi/qapi-visit-core.c @@ -392,7 +392,8 @@ static bool input_type_enum(Visitor *v, const char *name, int *obj, value = qapi_enum_parse(lookup, enum_str, -1, NULL); if (value < 0) { -error_setg(errp, QERR_INVALID_PARAMETER, enum_str); +error_setg(errp, "Parameter '%s' does not accept value '%s'", + name ? name : "null", enum_str); g_free(enum_str); return false; } diff --git a/tests/unit/check-qom-proplist.c b/tests/unit/check-qom-proplist.c index 48503e0dff..ed341088d3 100644 --- a/tests/unit/check-qom-proplist.c +++ b/tests/unit/check-qom-proplist.c @@ -488,7 +488,7 @@ static void test_dummy_badenum(void) g_assert(dobj == NULL); g_assert(err != NULL); g_assert_cmpstr(error_get_pretty(err), ==, -"Invalid parameter 'yeti'"); +"Parameter 'av' does not accept value 'yeti'"); g_assert(object_resolve_path_component(parent, "dummy0") == NULL); diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out index 01f7b1f240..8719c91b48 100644 --- a/tests/qemu-iotests/049.out +++ b/tests/qemu-iotests/049.out @@ -174,11 +174,11 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off comp qemu-img create -f qcow2 -o compat=0.42 TEST_DIR/t.qcow2 64M Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.42 lazy_refcounts=off refcount_bits=16 -qemu-img: TEST_DIR/t.qcow2: Invalid parameter '0.42' +qemu-img: TEST_DIR/t.qcow2: Parameter 'version' does not accept value '0.42' qemu-img create -f qcow2 -o compat=foobar TEST_DIR/t.qcow2 64M Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=foobar lazy_refcounts=off refcount_bits=16 -qemu-img: TEST_DIR/t.qcow2: Invalid parameter 'foobar' +qemu-img: TEST_DIR/t.qcow2: Parameter 'version' does not accept value 'foobar' == Check preallocation option == @@ -190,7 +190,7 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off prea qemu-img create -f qcow2 -o preallocation=1234 TEST_DIR/t.qcow2 64M Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off preallocation=1234 compression_type=zlib size=67108864 lazy_refcounts=off refcount_bits=16 -qemu-img: TEST_DIR/t.qcow2: Invalid parameter '1234' +qemu-img: TEST_DIR/t.qcow2: Parameter 'preallocation' does not accept value '1234' == Check encryption option == diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out index b68c443867..3593e8e9c2 100644 --- a/tests/qemu-iotests/206.out +++ b/tests/qemu-iotests/206.out @@ -192,7 +192,7 @@ Job failed: Could not resize image: Failed to grow the L1 table: File too large === Invalid version === {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "file": "node0", "size": 67108864, "version": "v1"}}} -{"error": {"class": "GenericError", "desc": "Invalid parameter 'v1'"}} +{"error": {"class": "GenericError", "desc": "Parameter 'version' does not accept value 'v1'"}} {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "qcow2", "file": "node0", "lazy-refcounts": true, "size": 67108864, "version": "v2"}}} {"return": {}} diff --git a/tests/qemu-iotests/237.out b/tests/qemu-iotests/237.out index aa94986803..2f09ff5512 100644 --- a/tests/qemu-iotests/237.out +++ b/tests/qemu-iotests/237.out @@ -116,13 +116,13 @@ Job failed: Cannot find device='this doesn't exist' nor node-name='this doesn't == Invalid adapter types == {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"adapter-type": "foo", "driver": "vmdk", "file": "node0", "size": 33554432}}} -{"error": {"class": "GenericError", "desc": "Invalid parameter 'foo'"}} +{"error": {"class": "GenericError", "desc": "Parameter 'adapter-type' does not accept value 'foo'"}} {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"adapter-type": "IDE", "driver": "vmdk", "file": "node0",
Re: Deprecate the ppc405 boards in QEMU?
On Mon, Oct 11, 2021 at 10:10:36AM +0200, Thomas Huth wrote: > On 06/10/2021 09.25, Thomas Huth wrote: > > On 05/10/2021 23.53, BALATON Zoltan wrote: > > [...] > > > Maybe these 405 boards in QEMU ran with modified firmware where the > > > memory detection was patched out but it seems to detect the RAM so I > > > wonder where it gets that from. Maybe by reading the SDRAM > > > controller DCRs ppc4xx_sdram_init() sets up. Then I'm not sure what > > > it needs the SPD for, I forgot how this worked on sam460ex. Maybe > > > for the speed calibration, so could be it detects ram by reading > > > DCRs then tries to get SPD data and that's where it stops as i2c is > > > not emulated on taihu. This could be confirmed by checking what it > > > pokes with -d guest_errors that shows accesses to missing devices > > > but don't know where 405 has the i2c controller and if it's the same > > > as newer SoCs. If so that could be reused and an i2c bus could be > > > added with the SPD data like in sam460ex to make u-boot happy or you > > > could skip this in u-boot. > > > > FWIW, I've just tried the latter (skipping the sdram init in u-boot), > > and indeed, I can get to the u-boot prompt now. > [...]> I've also attached the patch with my modifications to u-boot. > > FYI, the changes can now be found on this branch here: > > https://gitlab.com/huth/u-boot/-/commits/taihu > > I also added a gitlab-CI file, so you can now download the u-boot.bin as an > artifact from the corresponding pipeline, e.g.: > > https://gitlab.com/huth/u-boot/-/jobs/1667201028 Thanks. Are you going to send a v2 of your proposed deprecation patches? If you do can you please CC me explicitly. I only scan qemu-ppc once a week or so, and it goes into a different folder. If I'm CCed I'll notice and respond to it faster. -- David Gibson| I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson signature.asc Description: PGP signature
[PATCH] tests/acceptance: Add tests for the ppc405 boards
Using the U-Boot firmware, we can check that at least the serial console of the ppc405 boards is still usable. Signed-off-by: Thomas Huth --- Based-on: 20211006071140.565952-1-th...@redhat.com tests/acceptance/ppc_405.py | 40 + 1 file changed, 40 insertions(+) create mode 100644 tests/acceptance/ppc_405.py diff --git a/tests/acceptance/ppc_405.py b/tests/acceptance/ppc_405.py new file mode 100644 index 00..bf7c2f22fc --- /dev/null +++ b/tests/acceptance/ppc_405.py @@ -0,0 +1,40 @@ +# Test that the U-Boot firmware boots on ppc 405 machines and check the console +# +# Copyright (c) 2021 Red Hat, Inc. +# +# This work is licensed under the terms of the GNU GPL, version 2 or +# later. See the COPYING file in the top-level directory. + +from avocado.utils import archive +from avocado_qemu import Test +from avocado_qemu import wait_for_console_pattern +from avocado_qemu import exec_command_and_wait_for_pattern + +class Ppc405Machine(Test): + +timeout = 90 + +def do_test_ppc405(self): +uboot_url = ('https://gitlab.com/huth/u-boot/-/raw/' + 'taihu-2021-10-09/u-boot-taihu.bin') +uboot_hash = ('3208940e908a5edc7c03eab072c60f0dcfadc2ab'); +file_path = self.fetch_asset(uboot_url, asset_hash=uboot_hash) +self.vm.set_console(console_index=1) +self.vm.add_args('-bios', file_path) +self.vm.launch() +wait_for_console_pattern(self, 'AMCC PPC405EP Evaluation Board') +exec_command_and_wait_for_pattern(self, 'reset', 'AMCC PowerPC 405EP') + +def test_ppc_taihu(self): +""" +:avocado: tags=arch:ppc +:avocado: tags=machine:taihu +""" +self.do_test_ppc405() + +def test_ppc_ref405ep(self): +""" +:avocado: tags=arch:ppc +:avocado: tags=machine:ref405ep +""" +self.do_test_ppc405() -- 2.27.0
Re: [PATCH 6/6] pcie: expire pending delete
On Mon, Oct 11, 2021 at 02:05:04PM +0200, Gerd Hoffmann wrote: > Add an expire time for pending delete, once the time is over allow > pressing the attention button again. > > This makes pcie hotplug behave more like acpi hotplug, where one can > try sending an 'device_del' monitor command again in case the guest > didn't respond to the first attempt. > > Signed-off-by: Gerd Hoffmann > --- > include/hw/qdev-core.h | 1 + > hw/pci/pcie.c | 2 ++ > softmmu/qdev-monitor.c | 4 +++- > 3 files changed, 6 insertions(+), 1 deletion(-) > > diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h > index 4ff19c714bd8..d082a9a00aca 100644 > --- a/include/hw/qdev-core.h > +++ b/include/hw/qdev-core.h > @@ -180,6 +180,7 @@ struct DeviceState { > char *canonical_path; > bool realized; > bool pending_deleted_event; > +int64_t pending_deleted_expires_ms; > QemuOpts *opts; > int hotplugged; > bool allow_unplug_during_migration; > diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c > index f3ac04399969..477c8776aa27 100644 > --- a/hw/pci/pcie.c > +++ b/hw/pci/pcie.c > @@ -549,6 +549,8 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler > *hotplug_dev, > } > > dev->pending_deleted_event = true; > +dev->pending_deleted_expires_ms = > +qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 5000; /* 5 secs */ > > /* In case user cancel the operation of multi-function hot-add, > * remove the function that is unexposed to guest individually, Well this will be barely enough, right? Once the Power Indicator begins blinking, a 5-second abort interval exists during which a second depression of the Attention Button cancels the operation. So I guess it needs to be more. Problem is of course if guest is busy because of interrupts and whatnot, it might not get to handling that in time ... > diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c > index 0705f008466d..5e7960c52a0a 100644 > --- a/softmmu/qdev-monitor.c > +++ b/softmmu/qdev-monitor.c > @@ -910,7 +910,9 @@ void qmp_device_del(const char *id, Error **errp) > { > DeviceState *dev = find_device_state(id, errp); > if (dev != NULL) { > -if (dev->pending_deleted_event) { > +if (dev->pending_deleted_event && > +(dev->pending_deleted_expires_ms == 0 || > + dev->pending_deleted_expires_ms > > qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL))) { > error_setg(errp, "Device %s is already in the " > "process of unplug", id); > return; > -- > 2.31.1
Re: [PATCH v2] coroutine: resize pool periodically instead of limiting size
On Mon, Sep 13, 2021 at 04:35:24PM +0100, Stefan Hajnoczi wrote: > It was reported that enabling SafeStack reduces IOPS significantly > (>25%) with the following fio benchmark on virtio-blk using a NVMe host > block device: > > # fio --rw=randrw --bs=4k --iodepth=64 --runtime=1m --direct=1 \ > --filename=/dev/vdb --name=job1 --ioengine=libaio --thread \ > --group_reporting --numjobs=16 --time_based \ > --output=/tmp/fio_result > > Serge Guelton and I found that SafeStack is not really at fault, it just > increases the cost of coroutine creation. This fio workload exhausts the > coroutine pool and coroutine creation becomes a bottleneck. Previous > work by Honghao Wang also pointed to excessive coroutine creation. > > Creating new coroutines is expensive due to allocating new stacks with > mmap(2) and mprotect(2). Currently there are thread-local and global > pools that recycle old Coroutine objects and their stacks but the > hardcoded size limit of 64 for thread-local pools and 128 for the global > pool is insufficient for the fio benchmark shown above. > > This patch changes the coroutine pool algorithm to a simple thread-local > pool without a maximum size limit. Threads periodically shrink the pool > down to a size sufficient for the maximum observed number of coroutines. > > The global pool is removed by this patch. It can help to hide the fact > that local pools are easily exhausted, but it's doesn't fix the root > cause. I don't think there is a need for a global pool because QEMU's > threads are long-lived, so let's keep things simple. > > Performance of the above fio benchmark is as follows: > > Before After > IOPS 60k 97k > > Memory usage varies over time as needed by the workload: > > VSZ (KB) RSS (KB) > Before fio 4705248 843128 > During fio 5747668 (+ ~100 MB) 849280 > After fio 4694996 (- ~100 MB) 845184 > > This confirms that coroutines are indeed being freed when no longer > needed. > > Thanks to Serge Guelton for working on identifying the bottleneck with > me! > > Reported-by: Tingting Mao > Cc: Serge Guelton > Cc: Honghao Wang > Cc: Paolo Bonzini > Cc: Daniele Buono > Signed-off-by: Stefan Hajnoczi > --- > v2: > * Retained minimum pool size of 64 to keep latency low for threads that >perform I/O infrequently and to prevent possible regressions [Daniele] > --- > include/qemu/coroutine-pool-timer.h | 36 > include/qemu/coroutine.h| 7 +++ > iothread.c | 6 +++ > util/coroutine-pool-timer.c | 35 +++ > util/main-loop.c| 5 +++ > util/qemu-coroutine.c | 66 - > util/meson.build| 1 + > 7 files changed, 126 insertions(+), 30 deletions(-) > create mode 100644 include/qemu/coroutine-pool-timer.h > create mode 100644 util/coroutine-pool-timer.c Applied to my block-next tree: https://gitlab.com/stefanha/qemu/commits/block-next Stefan signature.asc Description: PGP signature
[PULL 2/2] iothread: use IOThreadParamInfo in iothread_[set|get]_param()
From: Stefano Garzarella Commit 0445409d74 ("iothread: generalize iothread_set_param/iothread_get_param") moved common code to set and get IOThread parameters in two new functions. These functions are called inside callbacks, so we don't need to use an opaque pointer. Let's replace `void *opaque` parameter with `IOThreadParamInfo *info`. Suggested-by: Kevin Wolf Signed-off-by: Stefano Garzarella Reviewed-by: Philippe Mathieu-Daudé Message-id: 20210727145936.147032-3-sgarz...@redhat.com Signed-off-by: Stefan Hajnoczi --- iothread.c | 18 ++ 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/iothread.c b/iothread.c index a73e560ba0..0f98af0f2a 100644 --- a/iothread.c +++ b/iothread.c @@ -231,20 +231,18 @@ static IOThreadParamInfo aio_max_batch_info = { }; static void iothread_get_param(Object *obj, Visitor *v, -const char *name, void *opaque, Error **errp) +const char *name, IOThreadParamInfo *info, Error **errp) { IOThread *iothread = IOTHREAD(obj); -IOThreadParamInfo *info = opaque; int64_t *field = (void *)iothread + info->offset; visit_type_int64(v, name, field, errp); } static bool iothread_set_param(Object *obj, Visitor *v, -const char *name, void *opaque, Error **errp) +const char *name, IOThreadParamInfo *info, Error **errp) { IOThread *iothread = IOTHREAD(obj); -IOThreadParamInfo *info = opaque; int64_t *field = (void *)iothread + info->offset; int64_t value; @@ -266,16 +264,18 @@ static bool iothread_set_param(Object *obj, Visitor *v, static void iothread_get_poll_param(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { +IOThreadParamInfo *info = opaque; -iothread_get_param(obj, v, name, opaque, errp); +iothread_get_param(obj, v, name, info, errp); } static void iothread_set_poll_param(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { IOThread *iothread = IOTHREAD(obj); +IOThreadParamInfo *info = opaque; -if (!iothread_set_param(obj, v, name, opaque, errp)) { +if (!iothread_set_param(obj, v, name, info, errp)) { return; } @@ -291,16 +291,18 @@ static void iothread_set_poll_param(Object *obj, Visitor *v, static void iothread_get_aio_param(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { +IOThreadParamInfo *info = opaque; -iothread_get_param(obj, v, name, opaque, errp); +iothread_get_param(obj, v, name, info, errp); } static void iothread_set_aio_param(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { IOThread *iothread = IOTHREAD(obj); +IOThreadParamInfo *info = opaque; -if (!iothread_set_param(obj, v, name, opaque, errp)) { +if (!iothread_set_param(obj, v, name, info, errp)) { return; } -- 2.31.1
[PULL 1/2] iothread: rename PollParamInfo to IOThreadParamInfo
From: Stefano Garzarella Commit 1793ad0247 ("iothread: add aio-max-batch parameter") added a new parameter (aio-max-batch) to IOThread and used PollParamInfo structure to handle it. Since it is not a parameter of the polling mechanism, we rename the structure to a more generic IOThreadParamInfo. Suggested-by: Kevin Wolf Signed-off-by: Stefano Garzarella Reviewed-by: Philippe Mathieu-Daudé Message-id: 20210727145936.147032-2-sgarz...@redhat.com Signed-off-by: Stefan Hajnoczi --- iothread.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/iothread.c b/iothread.c index ddbbde61f7..a73e560ba0 100644 --- a/iothread.c +++ b/iothread.c @@ -215,18 +215,18 @@ static void iothread_complete(UserCreatable *obj, Error **errp) typedef struct { const char *name; ptrdiff_t offset; /* field's byte offset in IOThread struct */ -} PollParamInfo; +} IOThreadParamInfo; -static PollParamInfo poll_max_ns_info = { +static IOThreadParamInfo poll_max_ns_info = { "poll-max-ns", offsetof(IOThread, poll_max_ns), }; -static PollParamInfo poll_grow_info = { +static IOThreadParamInfo poll_grow_info = { "poll-grow", offsetof(IOThread, poll_grow), }; -static PollParamInfo poll_shrink_info = { +static IOThreadParamInfo poll_shrink_info = { "poll-shrink", offsetof(IOThread, poll_shrink), }; -static PollParamInfo aio_max_batch_info = { +static IOThreadParamInfo aio_max_batch_info = { "aio-max-batch", offsetof(IOThread, aio_max_batch), }; @@ -234,7 +234,7 @@ static void iothread_get_param(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { IOThread *iothread = IOTHREAD(obj); -PollParamInfo *info = opaque; +IOThreadParamInfo *info = opaque; int64_t *field = (void *)iothread + info->offset; visit_type_int64(v, name, field, errp); @@ -244,7 +244,7 @@ static bool iothread_set_param(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { IOThread *iothread = IOTHREAD(obj); -PollParamInfo *info = opaque; +IOThreadParamInfo *info = opaque; int64_t *field = (void *)iothread + info->offset; int64_t value; -- 2.31.1
[PULL 0/2] Block patches
The following changes since commit ca61fa4b803e5d0abaf6f1ceb690f23bb78a4def: Merge remote-tracking branch 'remotes/quic/tags/pull-hex-20211006' into staging (2021-10-06 12:11:14 -0700) are available in the Git repository at: https://gitlab.com/stefanha/qemu.git tags/block-pull-request for you to fetch changes up to 1cc7eada97914f090125e588497986f6f7900514: iothread: use IOThreadParamInfo in iothread_[set|get]_param() (2021-10-07 15:29:50 +0100) Pull request Stefano Garzarella (2): iothread: rename PollParamInfo to IOThreadParamInfo iothread: use IOThreadParamInfo in iothread_[set|get]_param() iothread.c | 28 +++- 1 file changed, 15 insertions(+), 13 deletions(-) -- 2.31.1
Re: [PATCH] Trim some trailing space from human-readable output
On Sat, 9 Oct 2021 17:24:01 +0200 Markus Armbruster wrote: > I noticed -cpu help printing enough trailing spaces to make the output > at least 84 characters wide. Looks ugly unless the terminal is wider. > Ugly or not, trailing spaces are stupid. > > The culprit is this line in x86_cpu_list_entry(): > > qemu_printf("x86 %-20s %-58s\n", name, desc); > > This prints a string with minimum field left-justified right before a > newline. Change it to > > qemu_printf("x86 %-20s %s\n", name, desc); > > which avoids the trailing spaces and is simpler to boot. > > A search for the pattern with "git-grep -E '%-[0-9]+s\\n'" found a few > more instances. Change them similarly. > > Signed-off-by: Markus Armbruster > --- > monitor/hmp-cmds.c | 2 +- > target/i386/cpu-dump.c | 4 ++-- > target/i386/cpu.c | 2 +- > target/ppc/cpu_init.c | 2 +- ppc part Acked-by: Greg Kurz > target/s390x/cpu_models.c | 4 ++-- > target/xtensa/mmu_helper.c | 2 +- > 6 files changed, 8 insertions(+), 8 deletions(-) > > diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c > index bcaa41350e..9e45a138a5 100644 > --- a/monitor/hmp-cmds.c > +++ b/monitor/hmp-cmds.c > @@ -1945,7 +1945,7 @@ void hmp_rocker_ports(Monitor *mon, const QDict *qdict) > monitor_printf(mon, " port linkduplex neg?\n"); > > for (port = list; port; port = port->next) { > -monitor_printf(mon, "%10s %-4s %-3s %2s %-3s\n", > +monitor_printf(mon, "%10s %-4s %-3s %2s %s\n", > port->value->name, > port->value->enabled ? port->value->link_up ? > "up" : "down" : "!ena", > diff --git a/target/i386/cpu-dump.c b/target/i386/cpu-dump.c > index 02b635a52c..08ac957e99 100644 > --- a/target/i386/cpu-dump.c > +++ b/target/i386/cpu-dump.c > @@ -464,13 +464,13 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, int > flags) > snprintf(cc_op_name, sizeof(cc_op_name), "[%d]", env->cc_op); > #ifdef TARGET_X86_64 > if (env->hflags & HF_CS64_MASK) { > -qemu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " > CCO=%-8s\n", > +qemu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " CCO=%s\n", > env->cc_src, env->cc_dst, > cc_op_name); > } else > #endif > { > -qemu_fprintf(f, "CCS=%08x CCD=%08x CCO=%-8s\n", > +qemu_fprintf(f, "CCS=%08x CCD=%08x CCO=%s\n", > (uint32_t)env->cc_src, (uint32_t)env->cc_dst, > cc_op_name); > } > diff --git a/target/i386/cpu.c b/target/i386/cpu.c > index a7b1b6aa93..8d2c0ded10 100644 > --- a/target/i386/cpu.c > +++ b/target/i386/cpu.c > @@ -4876,7 +4876,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer > user_data) > desc = g_strdup_printf("%s", model_id); > } > > -qemu_printf("x86 %-20s %-58s\n", name, desc); > +qemu_printf("x86 %-20s %s\n", name, desc); > } > > /* list available CPU models and flags */ > diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c > index 6aad01d1d3..8ab81dd1ed 100644 > --- a/target/ppc/cpu_init.c > +++ b/target/ppc/cpu_init.c > @@ -8734,7 +8734,7 @@ void ppc_cpu_list(void) > > #ifdef CONFIG_KVM > qemu_printf("\n"); > -qemu_printf("PowerPC %-16s\n", "host"); > +qemu_printf("PowerPC %s\n", "host"); > #endif > } > > diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c > index 4e4598cc77..11e06cc51f 100644 > --- a/target/s390x/cpu_models.c > +++ b/target/s390x/cpu_models.c > @@ -398,14 +398,14 @@ void s390_cpu_list(void) > for (feat = 0; feat < S390_FEAT_MAX; feat++) { > const S390FeatDef *def = s390_feat_def(feat); > > -qemu_printf("%-20s %-50s\n", def->name, def->desc); > +qemu_printf("%-20s %s\n", def->name, def->desc); > } > > qemu_printf("\nRecognized feature groups:\n"); > for (group = 0; group < S390_FEAT_GROUP_MAX; group++) { > const S390FeatGroupDef *def = s390_feat_group_def(group); > > -qemu_printf("%-20s %-50s\n", def->name, def->desc); > +qemu_printf("%-20s %s\n", def->name, def->desc); > } > } > > diff --git a/target/xtensa/mmu_helper.c b/target/xtensa/mmu_helper.c > index b01ff9399a..57e319a1af 100644 > --- a/target/xtensa/mmu_helper.c > +++ b/target/xtensa/mmu_helper.c > @@ -1098,7 +1098,7 @@ static void dump_tlb(CPUXtensaState *env, bool dtlb) > qemu_printf("\tVaddr Paddr ASID Attr RWX > Cache\n" > "\t-- -- --- > ---\n"); > } > -qemu_printf("\t0x%08x 0x%08x 0x%02x 0x%02x %c%c%c %-7s\n", > +qemu_printf("\t0x%08x 0x%08x 0x%02x 0x%02x %c%c%c %s\n", > entry->vaddr, > entry->paddr, >
Re: ACPI endianness
On Mon, Oct 11, 2021 at 12:13:55PM +0200, BALATON Zoltan wrote: > On Mon, 11 Oct 2021, Philippe Mathieu-Daudé wrote: > > On 10/10/21 15:24, BALATON Zoltan wrote: > > > Hello, > > > > > > I'm trying to fix shutdown and reboot on pegasos2 which uses ACPI as > > > part of the VIA VT8231 (similar to and modelled in hw/isa/vt82c686b.c) > > > and found that the guest writes to ACPI PM1aCNT register come out with > > > wrong endianness but not shure why. I have this: > > > > > > $ qemu-system-ppc -M pegasos2 -monitor stdio > > > (qemu) info mtree > > > [...] > > > memory-region: pci1-io > > > - (prio 0, i/o): pci1-io > > > [...] > > > 0f00-0f7f (prio 0, i/o): via-pm > > > 0f00-0f03 (prio 0, i/o): acpi-evt > > > 0f04-0f05 (prio 0, i/o): acpi-cnt > > > 0f08-0f0b (prio 0, i/o): acpi-tmr > > > > > > memory-region: system > > > - (prio 0, i/o): system > > > -1fff (prio 0, ram): pegasos2.ram > > > 8000-bfff (prio 0, i/o): alias pci1-mem0-win > > > @pci1-mem 8000-bfff > > > c000-dfff (prio 0, i/o): alias pci0-mem0-win > > > @pci0-mem c000-dfff > > > f100-f100 (prio 0, i/o): mv64361 > > > f800-f8ff (prio 0, i/o): alias pci0-io-win > > > @pci0-io -00ff > > > f900-f9ff (prio 0, i/o): alias pci0-mem1-win > > > @pci0-mem -00ff > > > fd00-fdff (prio 0, i/o): alias pci1-mem1-win > > > @pci1-mem -00ff > > > fe00-feff (prio 0, i/o): alias pci1-io-win > > > @pci1-io -00ff > > > ff80- (prio 0, i/o): alias pci1-mem3-win > > > @pci1-mem ff80- > > > fff0-fff7 (prio 0, rom): pegasos2.rom > > > > > > The guest (which is big endian PPC and I think wotks on real hardware) > > > writes to 0xf05 in the io region which should be the high byte of the > > > little endian register but in the acpi code it comes out wrong, instead > > > of 0x2800 I get in acpi_pm1_cnt_write: val=0x28 > > > > Looks like a northbridge issue (MV64340). > > Does Pegasos2 enables bus swapping? > > See hw/pci-host/mv64361.c:585: > > > > static void warn_swap_bit(uint64_t val) > > { > >if ((val & 0x300ULL) >> 24 != 1) { > >qemu_log_mask(LOG_UNIMP, "%s: Data swap not implemented", __func__); > >} > > } > > No, guests don't use this feature just byteswap themselves and write little > endian values to the mapped io region. (Or in this case just write one byte > of the 16 bit value, it specifically writes 0x28 to 0xf05.) That's why I > think the device model should not byteswap itself so the acpi regions should > be declared native endian and let the guest handle it. Some mentions I've > found say that native endian means don't byteswap, little endian means > byteswap if vcpu is big endian and big endian means byteswap if vcpu is > little endian. Since guests already account for this and write little endian > values to these regions there's no need to byteswap in device model and > changing to native endian should not affect little endian machines so unless > there's a better argument I'll try sending a patch. > > Regards, > BALATON Zoltan native endian means endian-ness is open-coded in the device handler itself. I think you just found a bug in memory core. static const MemoryRegionOps acpi_pm_cnt_ops = { .read = acpi_pm_cnt_read, .write = acpi_pm_cnt_write, .impl.min_access_size = 2, .valid.min_access_size = 1, .valid.max_access_size = 2, .endianness = DEVICE_LITTLE_ENDIAN, }; Because of that .impl.min_access_size = 2, the 1 byte write should be converted to a 2 byte read+2 byte write. docs/devel/memory.rst just says: - .impl.min_access_size, .impl.max_access_size define the access sizes (in bytes) supported by the *implementation*; other access sizes will be emulated using the ones available. For example a 4-byte write will be emulated using four 1-byte writes, if .impl.max_access_size = 1. Instead what we have is: MemTxResult memory_region_dispatch_write(MemoryRegion *mr, hwaddr addr, uint64_t data, MemOp op, MemTxAttrs attrs) { unsigned size = memop_size(op); if (!memory_region_access_valid(mr, addr, size, true, attrs)) { unassigned_mem_write(mr, addr, data, size); return MEMTX_DECODE_ERROR; } adjust_endianness(mr, , op); --- static void
[PATCH 4/6] pcie: factor out pcie_cap_slot_unplug()
No functional change. Signed-off-by: Gerd Hoffmann --- hw/pci/pcie.c | 35 +-- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index c455f92e16bf..70fc11ba4c7d 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -497,6 +497,26 @@ static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque) object_unparent(OBJECT(dev)); } +static void pcie_cap_slot_do_unplug(PCIDevice *dev) +{ +PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); +uint8_t *exp_cap = dev->config + dev->exp.exp_cap; +uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); + +pci_for_each_device(sec_bus, pci_bus_num(sec_bus), +pcie_unplug_device, NULL); + +pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); +if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || +(lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { +pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, + PCI_EXP_LNKSTA_DLLLA); +} +pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDC); +} + void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp) { @@ -676,7 +696,6 @@ void pcie_cap_slot_write_config(PCIDevice *dev, uint32_t pos = dev->exp.exp_cap; uint8_t *exp_cap = dev->config + pos; uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA); -uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP); if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) { /* @@ -726,19 +745,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev, (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF && (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) || (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) { -PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); -pci_for_each_device(sec_bus, pci_bus_num(sec_bus), -pcie_unplug_device, NULL); - -pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDS); -if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA || -(lnkcap & PCI_EXP_LNKCAP_DLLLARC)) { -pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, - PCI_EXP_LNKSTA_DLLLA); -} -pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_PDC); +pcie_cap_slot_do_unplug(dev); } pcie_cap_update_power(dev); -- 2.31.1
[PATCH 1/6] pci: implement power state
This allows to power off pci devices. In "off" state the devices will not be visible. No pci config space access, no pci bar access, no dma. Default state is "on", so this patch (alone) should not change behavior. Use case: Allows hotplug controllers implement slot power. Hotplug controllers doing so should set the inital power state for devices in the ->plug callback. Signed-off-by: Gerd Hoffmann --- include/hw/pci/pci.h | 2 ++ hw/pci/pci.c | 25 +++-- hw/pci/pci_host.c| 6 -- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 7fc90132cf1d..7cb6d92e17f7 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -268,6 +268,7 @@ typedef struct PCIReqIDCache PCIReqIDCache; struct PCIDevice { DeviceState qdev; bool partially_hotplugged; +bool has_power; /* PCI config space */ uint8_t *config; @@ -904,5 +905,6 @@ extern const VMStateDescription vmstate_pci_device; } MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); +void pci_set_power(PCIDevice *pci_dev, bool state); #endif diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 186758ee11fe..94f53944c7cc 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1380,6 +1380,9 @@ static void pci_update_mappings(PCIDevice *d) continue; new_addr = pci_bar_address(d, i, r->type, r->size); +if (!d->has_power) { +new_addr = PCI_BAR_UNMAPPED; +} /* This bar isn't changed */ if (new_addr == r->addr) @@ -1464,8 +1467,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int if (range_covers_byte(addr, l, PCI_COMMAND)) { pci_update_irq_disabled(d, was_irq_disabled); memory_region_set_enabled(>bus_master_enable_region, - pci_get_word(d->config + PCI_COMMAND) -& PCI_COMMAND_MASTER); + (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->has_power); } msi_write_config(d, addr, val_in, l); @@ -2190,6 +2193,8 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) pci_qdev_unrealize(DEVICE(pci_dev)); return; } + +pci_set_power(pci_dev, true); } PCIDevice *pci_new_multifunction(int devfn, bool multifunction, @@ -2861,6 +2866,22 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector) return msg; } +void pci_set_power(PCIDevice *d, bool state) +{ +if (d->has_power == state) { +return; +} + +d->has_power = state; +pci_update_mappings(d); +memory_region_set_enabled(>bus_master_enable_region, + (pci_get_word(d->config + PCI_COMMAND) + & PCI_COMMAND_MASTER) && d->has_power); +if (!d->has_power) { +pci_device_reset(d); +} +} + static const TypeInfo pci_device_type_info = { .name = TYPE_PCI_DEVICE, .parent = TYPE_DEVICE, diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index cf02f0d6a501..7beafd40a8e9 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -74,7 +74,8 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. */ -if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) { +if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || +!pci_dev->has_power) { return; } @@ -97,7 +98,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, /* non-zero functions are only exposed when function 0 is present, * allowing direct removal of unexposed functions. */ -if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) { +if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || +!pci_dev->has_power) { return ~0x0; } -- 2.31.1
[PATCH 6/6] pcie: expire pending delete
Add an expire time for pending delete, once the time is over allow pressing the attention button again. This makes pcie hotplug behave more like acpi hotplug, where one can try sending an 'device_del' monitor command again in case the guest didn't respond to the first attempt. Signed-off-by: Gerd Hoffmann --- include/hw/qdev-core.h | 1 + hw/pci/pcie.c | 2 ++ softmmu/qdev-monitor.c | 4 +++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 4ff19c714bd8..d082a9a00aca 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -180,6 +180,7 @@ struct DeviceState { char *canonical_path; bool realized; bool pending_deleted_event; +int64_t pending_deleted_expires_ms; QemuOpts *opts; int hotplugged; bool allow_unplug_during_migration; diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index f3ac04399969..477c8776aa27 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -549,6 +549,8 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, } dev->pending_deleted_event = true; +dev->pending_deleted_expires_ms = +qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 5000; /* 5 secs */ /* In case user cancel the operation of multi-function hot-add, * remove the function that is unexposed to guest individually, diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c index 0705f008466d..5e7960c52a0a 100644 --- a/softmmu/qdev-monitor.c +++ b/softmmu/qdev-monitor.c @@ -910,7 +910,9 @@ void qmp_device_del(const char *id, Error **errp) { DeviceState *dev = find_device_state(id, errp); if (dev != NULL) { -if (dev->pending_deleted_event) { +if (dev->pending_deleted_event && +(dev->pending_deleted_expires_ms == 0 || + dev->pending_deleted_expires_ms > qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL))) { error_setg(errp, "Device %s is already in the " "process of unplug", id); return; -- 2.31.1
[PATCH 0/6] RfC: try improve native hotplug for pcie root ports
Gerd Hoffmann (6): pci: implement power state pcie: implement slow power control for pcie root ports pcie: add power indicator blink check pcie: factor out pcie_cap_slot_unplug() pcie: fast unplug when slot power is off pcie: expire pending delete include/hw/pci/pci.h | 2 ++ include/hw/qdev-core.h | 1 + hw/pci/pci.c | 25 +++-- hw/pci/pci_host.c | 6 ++-- hw/pci/pcie.c | 82 ++ softmmu/qdev-monitor.c | 4 ++- 6 files changed, 101 insertions(+), 19 deletions(-) -- 2.31.1
[PATCH 5/6] pcie: fast unplug when slot power is off
In case the slot is powered off (and the power indicator turned off too) we can unplug right away, without round-trip to the guest. Also clear pending attention button press, there is nothing to care about any more. Signed-off-by: Gerd Hoffmann --- hw/pci/pcie.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 70fc11ba4c7d..f3ac04399969 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -561,6 +561,16 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, return; } +if (((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF) && +((sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_OFF)) { +/* slot is powered off -> unplug without round-trip to the guest */ +pcie_cap_slot_do_unplug(hotplug_pdev); +hotplug_event_notify(hotplug_pdev); +pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_ABP); +return; +} + pcie_cap_slot_push_attention_button(hotplug_pdev); } -- 2.31.1
[PATCH 3/6] pcie: add power indicator blink check
Refuse to push the attention button in case the guest is busy with some hotplug operation (as indicated by the power indicator blinking). Signed-off-by: Gerd Hoffmann --- hw/pci/pcie.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 4a52c250615e..c455f92e16bf 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -506,6 +506,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev); uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap; uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP); +uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); /* Check if hot-unplug is disabled on the slot */ if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) { @@ -521,6 +522,12 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev, return; } +if ((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_BLINK) { +error_setg(errp, "Hot-unplug failed: " + "guest is busy (power indicator blinking)"); +return; +} + dev->pending_deleted_event = true; /* In case user cancel the operation of multi-function hot-add, -- 2.31.1
[PATCH 2/6] pcie: implement slow power control for pcie root ports
With this patch hot-plugged pci devices will only be visible to the guest if the guests hotplug driver has enabled slot power. This should fix the hot-plug race which one can hit when hot-plugging a pci device at boot, while the guest is in the middle of the pci bus scan. Signed-off-by: Gerd Hoffmann --- hw/pci/pcie.c | 28 1 file changed, 28 insertions(+) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 6e95d829037a..4a52c250615e 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -366,6 +366,29 @@ static void hotplug_event_clear(PCIDevice *dev) } } +static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque) +{ +bool *power = opaque; + +pci_set_power(dev, *power); +} + +static void pcie_cap_update_power(PCIDevice *hotplug_dev) +{ +uint8_t *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap; +PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(hotplug_dev)); +uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP); +uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL); +bool power = true; + +if (sltcap & PCI_EXP_SLTCAP_PCP) { +power = (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_ON; +} + +pci_for_each_device(sec_bus, pci_bus_num(sec_bus), +pcie_set_power_device, ); +} + /* * A PCI Express Hot-Plug Event has occurred, so update slot status register * and notify OS of the event if necessary. @@ -434,6 +457,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_DLLLA); } +pcie_cap_update_power(hotplug_pdev); return; } @@ -451,6 +475,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, } pcie_cap_slot_event(hotplug_pdev, PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP); +pcie_cap_update_power(hotplug_pdev); } } @@ -625,6 +650,7 @@ void pcie_cap_slot_reset(PCIDevice *dev) PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_ABP); +pcie_cap_update_power(dev); hotplug_event_update_event_status(dev); } @@ -707,6 +733,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev, pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDC); } +pcie_cap_update_power(dev); hotplug_event_notify(dev); @@ -733,6 +760,7 @@ int pcie_cap_slot_post_load(void *opaque, int version_id) { PCIDevice *dev = opaque; hotplug_event_update_event_status(dev); +pcie_cap_update_power(dev); return 0; } -- 2.31.1
Re: [PATCH v4 1/2] monitor/hmp: add support for flag argument with value
* Stefan Reiter (s.rei...@proxmox.com) wrote: > Adds support for the "-xS" parameter type, where "-x" denotes a flag > name and the "S" suffix indicates that this flag is supposed to take an > arbitrary string parameter. > > These parameters are always optional, the entry in the qdict will be > omitted if the flag is not given. > > Reviewed-by: Eric Blake > Signed-off-by: Stefan Reiter It would be great if you could send a patch to update the big comment in monitor/monitor-internal.h that describes all the parameters. But that can come another time. Dave > --- > monitor/hmp.c | 17 - > 1 file changed, 16 insertions(+), 1 deletion(-) > > diff --git a/monitor/hmp.c b/monitor/hmp.c > index d50c3124e1..a32dce7a35 100644 > --- a/monitor/hmp.c > +++ b/monitor/hmp.c > @@ -980,6 +980,7 @@ static QDict *monitor_parse_arguments(Monitor *mon, > { > const char *tmp = p; > int skip_key = 0; > +int ret; > /* option */ > > c = *typestr++; > @@ -1002,8 +1003,22 @@ static QDict *monitor_parse_arguments(Monitor *mon, > } > if (skip_key) { > p = tmp; > +} else if (*typestr == 'S') { > +/* has option with string value */ > +typestr++; > +tmp = p++; > +while (qemu_isspace(*p)) { > +p++; > +} > +ret = get_str(buf, sizeof(buf), ); > +if (ret < 0) { > +monitor_printf(mon, "%s: value expected for > -%c\n", > + cmd->name, *tmp); > +goto fail; > +} > +qdict_put_str(qdict, key, buf); > } else { > -/* has option */ > +/* has boolean option */ > p++; > qdict_put_bool(qdict, key, true); > } > -- > 2.30.2 > > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
[PATCH 6/6] doc: Add the SGX numa description
Add the SGX numa reference command and how to check if SGX numa is support or not with multiple EPC sections. Signed-off-by: Yang Zhong --- docs/system/i386/sgx.rst | 31 +++ 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/docs/system/i386/sgx.rst b/docs/system/i386/sgx.rst index f103ae2a2f..9e4ada761f 100644 --- a/docs/system/i386/sgx.rst +++ b/docs/system/i386/sgx.rst @@ -141,8 +141,7 @@ To launch a SGX guest: |qemu_system_x86| \\ -cpu host,+sgx-provisionkey \\ -object memory-backend-epc,id=mem1,size=64M,prealloc=on \\ - -object memory-backend-epc,id=mem2,size=28M \\ - -M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2 + -M sgx-epc.0.memdev=mem1,sgx-epc.0.node=0 Utilizing SGX in the guest requires a kernel/OS with SGX support. The support can be determined in guest by:: @@ -152,8 +151,32 @@ The support can be determined in guest by:: and SGX epc info by:: $ dmesg | grep sgx - [1.242142] sgx: EPC section 0x18000-0x181bf - [1.242319] sgx: EPC section 0x181c0-0x1837f + [0.182807] sgx: EPC section 0x14000-0x143ff + [0.183695] sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0. + +To launch a SGX numa guest: + +.. parsed-literal:: + + |qemu_system_x86| \\ + -cpu host,+sgx-provisionkey \\ + -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \\ + -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \\ + -numa node,nodeid=0,cpus=0-1,memdev=node0 \\ + -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \\ + -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \\ + -numa node,nodeid=1,cpus=2-3,memdev=node1 \\ + -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 + +and SGX epc numa info by:: + + $ dmesg | grep sgx + [0.369937] sgx: EPC section 0x18000-0x183ff + [0.370259] sgx: EPC section 0x18400-0x185bf + + $ dmesg | grep SRAT + [0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x18000-0x183ff] + [0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x18400-0x185bf] References --
[PATCH 3/6] numa: Add SGXEPCSection list for multiple sections
The SGXEPCSection list added into SGXInfo to show the multiple SGX EPC sections detailed info, not the total size like before. Signed-off-by: Yang Zhong --- qapi/misc-target.json | 19 +-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/qapi/misc-target.json b/qapi/misc-target.json index 594fbd1577..89a5a4250a 100644 --- a/qapi/misc-target.json +++ b/qapi/misc-target.json @@ -334,6 +334,21 @@ 'returns': 'SevAttestationReport', 'if': 'TARGET_I386' } +## +# @SGXEPCSection: +# +# Information about intel SGX EPC section info +# +# @index: the SGX epc section index +# +# @size: the size of epc section +# +# Since: 6.2 +## +{ 'struct': 'SGXEPCSection', + 'data': { 'index': 'uint64', +'size': 'uint64'}} + ## # @SGXInfo: # @@ -347,7 +362,7 @@ # # @flc: true if FLC is supported # -# @section-size: The EPC section size for guest +# @sections: The EPC sections info for guest # # Since: 6.2 ## @@ -356,7 +371,7 @@ 'sgx1': 'bool', 'sgx2': 'bool', 'flc': 'bool', -'section-size': 'uint64'}, +'sections': ['SGXEPCSection']}, 'if': 'TARGET_I386' } ##
[PATCH 4/6] monitor: numa support for 'info sgx' command
This patch can enable numa support for 'info sgx' command in the monitor, which can show detailed SGX EPC sections info. (qemu) info sgx SGX support: enabled SGX1 support: enabled SGX2 support: enabled FLC support: enabled SECTION #0: size=67108864 SECTION #1: size=29360128 The QMP interface show: (QEMU) query-sgx {"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \ [{"index": 0, "size": 67108864}, {"index": 1, "size": 29360128}], "flc": true}} Signed-off-by: Yang Zhong --- hw/i386/sgx.c | 25 +++-- target/i386/monitor.c | 11 +-- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c index 906facb645..8af45925c6 100644 --- a/hw/i386/sgx.c +++ b/hw/i386/sgx.c @@ -126,6 +126,28 @@ SGXInfo *sgx_get_capabilities(Error **errp) return info; } +static SGXEPCSectionList *sgx_get_epc_sections_list(void) +{ +GSList *device_list = sgx_epc_get_device_list(); +SGXEPCSectionList *head = NULL, **tail = +SGXEPCSection *section; +uint64_t i = 0; + +for (; device_list; device_list = device_list->next) { +DeviceState *dev = device_list->data; +Object *obj = OBJECT(dev); + +section = g_new0(SGXEPCSection, 1); +section->index = i++; +section->size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP, + _abort); +QAPI_LIST_APPEND(tail, section); +} +g_slist_free(device_list); + +return head; +} + SGXInfo *sgx_get_info(Error **errp) { SGXInfo *info = NULL; @@ -144,14 +166,13 @@ SGXInfo *sgx_get_info(Error **errp) return NULL; } -SGXEPCState *sgx_epc = >sgx_epc; info = g_new0(SGXInfo, 1); info->sgx = true; info->sgx1 = true; info->sgx2 = true; info->flc = true; -info->section_size = sgx_epc->size; +info->sections = sgx_get_epc_sections_list(); return info; } diff --git a/target/i386/monitor.c b/target/i386/monitor.c index 196c1c9e77..08e7d4a425 100644 --- a/target/i386/monitor.c +++ b/target/i386/monitor.c @@ -773,6 +773,7 @@ SGXInfo *qmp_query_sgx(Error **errp) void hmp_info_sgx(Monitor *mon, const QDict *qdict) { Error *err = NULL; +SGXEPCSectionList *section_list, *section; g_autoptr(SGXInfo) info = qmp_query_sgx(); if (err) { @@ -787,8 +788,14 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict) info->sgx2 ? "enabled" : "disabled"); monitor_printf(mon, "FLC support: %s\n", info->flc ? "enabled" : "disabled"); -monitor_printf(mon, "size: %" PRIu64 "\n", - info->section_size); + +section_list = info->sections; +for (section = section_list; section; section = section->next) { +monitor_printf(mon, "SECTION #%" PRId64 ": ", + section->value->index); +monitor_printf(mon, "size=%" PRIu64 "\n", + section->value->size); +} } SGXInfo *qmp_query_sgx_capabilities(Error **errp)
[PATCH 0/6] SGX NUMA support
The basic SGX patches were merged into Qemu release, the left NUMA function for SGX should be enabled. The patch1 implemented the SGX NUMA ACPI to enable NUMA in the SGX guest. Since Libvirt need detailed host SGX EPC sections info to decide how to allocate EPC sections for SGX NUMA guest, the SGXEPCSection list is introduced to show detailed sections info in the monitor or HMP interface. Please help review this patchset, the link also can be found: https://github.com/intel/qemu-sgx upstream Yang Zhong (6): numa: Enable numa for SGX EPC sections monitor: Support 'info numa' command numa: Add SGXEPCSection list for multiple sections monitor: numa support for 'info sgx' command numa: Enable numa for libvirt interface doc: Add the SGX numa description docs/system/i386/sgx.rst | 31 +-- qapi/machine.json | 6 ++- qapi/misc-target.json | 19 - include/hw/i386/sgx-epc.h | 3 ++ hw/core/numa.c| 6 +++ hw/i386/acpi-build.c | 4 ++ hw/i386/sgx-epc.c | 3 ++ hw/i386/sgx.c | 84 +++ monitor/hmp-cmds.c| 1 + target/i386/monitor.c | 11 - qemu-options.hx | 4 +- 11 files changed, 154 insertions(+), 18 deletions(-)
[PATCH 2/6] monitor: Support 'info numa' command
Add the MEMORY_DEVICE_INFO_KIND_SGX_EPC case for SGX numa info with 'info numa' command in the monitor. Signed-off-by: Yang Zhong --- hw/core/numa.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/hw/core/numa.c b/hw/core/numa.c index 510d096a88..1aa05dcf42 100644 --- a/hw/core/numa.c +++ b/hw/core/numa.c @@ -756,6 +756,7 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) PCDIMMDeviceInfo *pcdimm_info; VirtioPMEMDeviceInfo *vpi; VirtioMEMDeviceInfo *vmi; +SgxEPCDeviceInfo *se; for (info = info_list; info; info = info->next) { MemoryDeviceInfo *value = info->value; @@ -781,6 +782,11 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[]) node_mem[vmi->node].node_mem += vmi->size; node_mem[vmi->node].node_plugged_mem += vmi->size; break; +case MEMORY_DEVICE_INFO_KIND_SGX_EPC: +se = value->u.sgx_epc.data; +node_mem[se->node].node_mem += se->size; +node_mem[se->node].node_plugged_mem = 0; +break; default: g_assert_not_reached(); }
[PATCH 5/6] numa: Enable numa for libvirt interface
Libvirt need get the detailed host SGX EPC capabilities to support numa function. Libvirt can decide how to allocate host EPC sections to guest numa from host numa info. (QEMU) query-sgx-capabilities {"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \ [{"index": 0, "size": 17070817280}, {"index": 1, "size": 17079205888}], "flc": true}} Signed-off-by: Yang Zhong --- hw/i386/sgx.c | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c index 8af45925c6..fe3034060d 100644 --- a/hw/i386/sgx.c +++ b/hw/i386/sgx.c @@ -74,11 +74,13 @@ static uint64_t sgx_calc_section_metric(uint64_t low, uint64_t high) ((high & MAKE_64BIT_MASK(0, 20)) << 32); } -static uint64_t sgx_calc_host_epc_section_size(void) +static SGXEPCSectionList *sgx_calc_host_epc_sections(void) { +SGXEPCSectionList *head = NULL, **tail = +SGXEPCSection *section; uint32_t i, type; uint32_t eax, ebx, ecx, edx; -uint64_t size = 0; +uint32_t j = 0; for (i = 0; i < SGX_MAX_EPC_SECTIONS; i++) { host_cpuid(0x12, i + 2, , , , ); @@ -92,10 +94,13 @@ static uint64_t sgx_calc_host_epc_section_size(void) break; } -size += sgx_calc_section_metric(ecx, edx); +section = g_new0(SGXEPCSection, 1); +section->index = j++; +section->size = sgx_calc_section_metric(ecx, edx); +QAPI_LIST_APPEND(tail, section); } -return size; +return head; } SGXInfo *sgx_get_capabilities(Error **errp) @@ -119,7 +124,7 @@ SGXInfo *sgx_get_capabilities(Error **errp) info->sgx1 = eax & (1U << 0) ? true : false; info->sgx2 = eax & (1U << 1) ? true : false; -info->section_size = sgx_calc_host_epc_section_size(); +info->sections = sgx_calc_host_epc_sections(); close(fd);
[PATCH 1/6] numa: Enable numa for SGX EPC sections
The basic SGX did not enable numa for SGX EPC sections, which result in all EPC sections located in numa node 0. This patch enable SGX numa function in the guest and the EPC section can work with RAM as one numa node. The Guest kernel related log: [0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x18000-0x183ff] [0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x18400-0x185bf] The SRAT table can normally show SGX EPC sections menory info in different numa nodes. The SGX EPC numa related command: .. -m 4G,maxmem=20G \ -smp sockets=2,cores=2 \ -cpu host,+sgx-provisionkey \ -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \ -object memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \ -numa node,nodeid=0,cpus=0-1,memdev=node0 \ -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \ -object memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \ -numa node,nodeid=1,cpus=2-3,memdev=node1 \ -M sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 \ .. Signed-off-by: Yang Zhong --- qapi/machine.json | 6 +- include/hw/i386/sgx-epc.h | 3 +++ hw/i386/acpi-build.c | 4 hw/i386/sgx-epc.c | 3 +++ hw/i386/sgx.c | 44 +++ monitor/hmp-cmds.c| 1 + qemu-options.hx | 4 ++-- 7 files changed, 62 insertions(+), 3 deletions(-) diff --git a/qapi/machine.json b/qapi/machine.json index 5db54df298..09b6188e6f 100644 --- a/qapi/machine.json +++ b/qapi/machine.json @@ -1213,6 +1213,7 @@ 'data': { '*id': 'str', 'memaddr': 'size', 'size': 'size', +'node': 'int', 'memdev': 'str' } } @@ -1288,7 +1289,10 @@ # Since: 6.2 ## { 'struct': 'SgxEPC', - 'data': { 'memdev': 'str' } } + 'data': { 'memdev': 'str', +'node': 'int' + } +} ## # @SgxEPCProperties: diff --git a/include/hw/i386/sgx-epc.h b/include/hw/i386/sgx-epc.h index 65a68ca753..7a61c52869 100644 --- a/include/hw/i386/sgx-epc.h +++ b/include/hw/i386/sgx-epc.h @@ -25,6 +25,7 @@ #define SGX_EPC_ADDR_PROP "addr" #define SGX_EPC_SIZE_PROP "size" #define SGX_EPC_MEMDEV_PROP "memdev" +#define SGX_EPC_NUMA_NODE_PROP "node" /** * SGXEPCDevice: @@ -38,6 +39,7 @@ typedef struct SGXEPCDevice { /* public */ uint64_t addr; +uint32_t node; HostMemoryBackendEpc *hostmem; } SGXEPCDevice; @@ -56,6 +58,7 @@ typedef struct SGXEPCState { } SGXEPCState; int sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size); +void sgx_epc_build_srat(GArray *table_data); static inline uint64_t sgx_epc_above_4g_end(SGXEPCState *sgx_epc) { diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 81418b7911..563a38992f 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2062,6 +2062,10 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) nvdimm_build_srat(table_data); } +if (pcms->sgx_epc.size != 0) { +sgx_epc_build_srat(table_data); +} + /* * TODO: this part is not in ACPI spec and current linux kernel boots fine * without these entries. But I recall there were issues the last time I diff --git a/hw/i386/sgx-epc.c b/hw/i386/sgx-epc.c index 55e2217eae..e5cd2789be 100644 --- a/hw/i386/sgx-epc.c +++ b/hw/i386/sgx-epc.c @@ -21,6 +21,7 @@ static Property sgx_epc_properties[] = { DEFINE_PROP_UINT64(SGX_EPC_ADDR_PROP, SGXEPCDevice, addr, 0), +DEFINE_PROP_UINT32(SGX_EPC_NUMA_NODE_PROP, SGXEPCDevice, node, 0), DEFINE_PROP_LINK(SGX_EPC_MEMDEV_PROP, SGXEPCDevice, hostmem, TYPE_MEMORY_BACKEND_EPC, HostMemoryBackendEpc *), DEFINE_PROP_END_OF_LIST(), @@ -139,6 +140,8 @@ static void sgx_epc_md_fill_device_info(const MemoryDeviceState *md, se->memaddr = epc->addr; se->size = object_property_get_uint(OBJECT(epc), SGX_EPC_SIZE_PROP, NULL); +se->node = object_property_get_uint(OBJECT(epc), SGX_EPC_NUMA_NODE_PROP, +NULL); se->memdev = object_get_canonical_path(OBJECT(epc->hostmem)); info->u.sgx_epc.data = se; diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c index e481e9358f..906facb645 100644 --- a/hw/i386/sgx.c +++ b/hw/i386/sgx.c @@ -19,6 +19,7 @@ #include "exec/address-spaces.h" #include "hw/i386/sgx.h" #include "sysemu/hw_accel.h" +#include "hw/acpi/aml-build.h" #define SGX_MAX_EPC_SECTIONS8 #define SGX_CPUID_EPC_INVALID 0x0 @@ -27,6 +28,46 @@ #define SGX_CPUID_EPC_SECTION 0x1 #define SGX_CPUID_EPC_MASK 0xF +static int sgx_epc_device_list(Object *obj, void *opaque) +{ +GSList **list = opaque; + +if (object_dynamic_cast(obj, TYPE_SGX_EPC)) { +*list = g_slist_append(*list, DEVICE(obj)); +} + +object_child_foreach(obj, sgx_epc_device_list,
Re: [RFC PATCH v2 24/25] job.h: split function pointers in JobDriver
On Fri, Oct 08, 2021 at 12:48:26PM +0200, Emanuele Giuseppe Esposito wrote: > > > On 07/10/2021 16:54, Stefan Hajnoczi wrote: > > On Tue, Oct 05, 2021 at 10:32:14AM -0400, Emanuele Giuseppe Esposito wrote: > > > The job API will be handled separately in another serie. > > > > > > Signed-off-by: Emanuele Giuseppe Esposito > > > --- > > > include/qemu/job.h | 31 +++ > > > 1 file changed, 31 insertions(+) > > > > > > diff --git a/include/qemu/job.h b/include/qemu/job.h > > > index 41162ed494..c236c43026 100644 > > > --- a/include/qemu/job.h > > > +++ b/include/qemu/job.h > > > @@ -169,12 +169,21 @@ typedef struct Job { > > >* Callbacks and other information about a Job driver. > > >*/ > > > struct JobDriver { > > > + > > > +/* Fields initialized in struct definition and never changed. */ > > > + > > > /** Derived Job struct size */ > > > size_t instance_size; > > > /** Enum describing the operation */ > > > JobType job_type; > > > +/* > > > + * I/O API functions. These functions are thread-safe, and therefore > > > + * can run in any thread as long as they have called > > > + * aio_context_acquire/release(). > > > + */ > > > > This comment described the block layer well but job.h is more generic. I > > don't think these functions are necessarily thread-safe (they shouldn't > > be invoked from multiple threads at the same time). It's just that they > > are run without regard to the BQL and may run in an arbitrary thread. > > Ok, in this instance I will change it to: > > /* > * Functions run without regard to the BQL and may run in any > * arbitrary thread. > */ It's not clear from that comment whether the functions need to be thread-safe (re-entrancy safe) or not. I think the answer is "no" since the caller ensures they are called from one thread at a time. Stefan signature.asc Description: PGP signature
[PATCH] contrib/plugins: add a drcov plugin
From: NDNF This patch adds the ability to generate files in drcov format. Primary goal this script is to have coverage logfiles thatwork in Lighthouse. Problems: - The path to the executable file is not specified. - base, end, entry take incorrect values. (Lighthouse + IDA Pro anyway work). Signed-off-by: Ivanov Arkady --- contrib/plugins/Makefile | 1 + contrib/plugins/drcov.c | 112 +++ 2 files changed, 113 insertions(+) create mode 100644 contrib/plugins/drcov.c diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile index 7801b08b0d..0a681efeec 100644 --- a/contrib/plugins/Makefile +++ b/contrib/plugins/Makefile @@ -17,6 +17,7 @@ NAMES += hotblocks NAMES += hotpages NAMES += howvec NAMES += lockstep +NAMES += drcov SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES))) diff --git a/contrib/plugins/drcov.c b/contrib/plugins/drcov.c new file mode 100644 index 00..d6a7d131c0 --- /dev/null +++ b/contrib/plugins/drcov.c @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2021, Ivanov Arkady + * + * Drcov - a DynamoRIO-based tool that collects coverage information + * from a binary. Primary goal this script is to have coverage log + * files that work in Lighthouse. + * + * License: GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION; + +static char header[] = "DRCOV VERSION: 2\n" +"DRCOV FLAVOR: drcov-64\n" +"Module Table: version 2, count 1\n" +"Columns: id, base, end, entry, path\n"; + +static FILE *fp; + +typedef struct { +uint32_t start; +uint16_t size; +uint16_t mod_id; +} bb_entry_t; + +static GSList *bbs; + +static void printfHeader() +{ +g_autoptr(GString) head = g_string_new(""); +g_string_append(head, header); +g_string_append_printf(head, "0, 0x%x, 0x%x, 0x%x, %s\n", + 0, 0x, 0, "path"); +g_string_append_printf(head, "BB Table: %d bbs\n", g_slist_length(bbs)); +fwrite(head->str, sizeof(char), head->len, fp); +} + +static void printfCharArray32(uint32_t data) +{ +const uint8_t *bytes = (const uint8_t *)(); +fwrite(bytes, sizeof(char), sizeof(data), fp); +} + +static void printfCharArray16(uint16_t data) +{ +const uint8_t *bytes = (const uint8_t *)(); +fwrite(bytes, sizeof(char), sizeof(data), fp); +} + + +static void printf_el(gpointer data, gpointer user_data) +{ +bb_entry_t *bb = (bb_entry_t *)data; +printfCharArray32(bb->start); +printfCharArray16(bb->size); +printfCharArray16(bb->mod_id); +} + +static void plugin_exit(qemu_plugin_id_t id, void *p) +{ +/* Print function */ +printfHeader(); +g_slist_foreach(bbs, printf_el, NULL); + +/* Clear */ +g_slist_free_full(bbs, _free); +fclose(fp); +} + +static void plugin_init(void) +{ +fp = fopen("file.drcov.trace", "wb"); +} + +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb) +{ +bb_entry_t *bb = g_new0(bb_entry_t, 1); +uint64_t pc = qemu_plugin_tb_vaddr(tb); + +size_t n = qemu_plugin_tb_n_insns(tb); +for (int i = 0; i < n; i++) { +bb->size += qemu_plugin_insn_size(qemu_plugin_tb_get_insn(tb, i)); +} + +bb->start = pc; +bb->mod_id = 0; +bbs = g_slist_append(bbs, bb); + +} + +QEMU_PLUGIN_EXPORT +int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info, +int argc, char **argv) +{ +plugin_init(); + +qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans); +qemu_plugin_register_atexit_cb(id, plugin_exit, NULL); +return 0; +} -- 2.25.1
Re: [RFC PATCH v2 11/25] include/block/blockjob.h: global state API
On Fri, Oct 08, 2021 at 09:20:35AM +0200, Emanuele Giuseppe Esposito wrote: > > > > +/* > > > + * Global state (GS) API. These functions run under the BQL lock. > > > + * > > > + * If a function modifies the graph, it also uses drain and/or > > > + * aio_context_acquire/release to be sure it has unique access. > > > + * aio_context locking is needed together with BQL because of > > > + * the thread-safe I/O API that concurrently runs and accesses > > > + * the graph without the BQL. > > > + * > > > + * It is important to note that not all of these functions are > > > + * necessarily limited to running under the BQL, but they would > > > + * require additional auditing and may small thread-safety changes > > > + * to move them into the I/O API. Often it's not worth doing that > > > + * work since the APIs are only used with the BQL held at the > > > + * moment, so they have been placed in the GS API (for now). > > > + * > > > + * All functions below must use this assertion: > > > + * g_assert(qemu_in_main_thread()); > > > + * to catch when they are accidentally called without the BQL. > > > + */ > > > > This is comment is duplicated in many places. I suggest explaining it in > > one place and using references in the other files: > > > >/* > > * Global state (GS) API. These functions run under the BQL lock. > > * > > * See include/block/block.h for more information about the GS API. > > */ > > > > Good idea. Should I also do that for I/O, or it's not worth for very few > lines? Up to you, but I think it makes sense to have a minimal comment for both the GS and I/O API. Stefan signature.asc Description: PGP signature
Re: [PATCH v2 0/2] pylint: fix new errors and warnings in qemu-iotests
On 11.10.21 11:58, Emanuele Giuseppe Esposito wrote: On 11/10/2021 11:29, Hanna Reitz wrote: On 08.10.21 08:28, Emanuele Giuseppe Esposito wrote: There are some warnings and errors that we either miss or are new in pylint. Anyways, test 297 of qemu-iotests fails because of that, so we need to fix it. All these fixes involve just indentation or additional spaces added. Signed-off-by: Emanuele Giuseppe Esposito --- v2: * temporarly enable and then disable "bad whitespace" error in image-fleecing * better indentation for a fix in test 129 in patch one So the changes look good to me, but I can’t get my pylint to generate a bad-whitespace warning no matter how hard I try. (When you asked on IRC whether others see pylint warnings, I thought you meant the consider-using-f-string warnings that John disabled in 3765315d4c84f9c0799744f43a314169baaccc05.) I have pylint 2.11.1, which should be the newest version. So I tried to look around what might be the cause and found this: https://pylint.pycqa.org/en/latest/whatsnew/2.6.html – it seems that as of pylint 2.6, bad-whitespace warnings are no longer emitted. If that’s the reason why I can’t see the warning, then I think we should take only patch 1 (because it just makes sense), but skip patch 2. Yes you are right. I had 2.4.4, and now that I upgraded to 2.11.1 I don't see bad-whitespace errors anymore (actually pylint does not seem to complain at all). So I agree we can just take patch 1, as formatting is wrong anyways. OK, thanks! I’ve applied patch 1 to my block branch: https://gitlab.com/hreitz/qemu/-/commits/block/ Hanna
Re: Moving QEMU downloads to GitLab Releases?
On Mon, Oct 11, 2021 at 09:21:30AM +0200, Gerd Hoffmann wrote: > Hi, > > > > I guess the main question is who is using the ROM/BIOS sources in the > > > tarballs, and would this 2-step process work for those users? If there > > > are distros relying on them then maybe there are some more logistics to > > > consider since the make-release downloads are both time-consuming and > > > prone to network errors/stalls since it relies on the availability of a > > > good number of different hosts. > > > > Great, maybe Gerd can comment on splitting out firmware. > > I think the binaries are mostly a convenience feature for developers. > Distros typically build from source anyway, and typically they build > from upstream source instead of qemu submodule. > > The idea to split them out to a separate repo is exists for quite a > while. I have an old qemu-firmware repo laying around on my disk, which > basically moves roms/ + submodules and the binaries built from it over. > > I think with the switch to gitlab it doesn't make sense any more to > commit pre-built firmware binaries to a git repo. Instead we should > build the firmware in gitlab ci, i.e. effectively move the build rules > we have right now in roms/Makefile to .gitlab-ci.d/, then publish the > firmware binaries as build artifacts or gitlab pages. > > When done just remove the pre-build binaries from git and add a helper > script to fetch firmware binaries from gitlab instead. > > > QEMU mirrors firmware sources on GitLab too. We're able to provide the > > same level of download availability on our mirror firmware repos as for > > the main qemu.git repo. > > I think enabling CI for the firmware mirrors should work given that it > is possible to specify a custom CI/CD configuration file, i.e. use > something like > > /qemu-project/qemu/.gitlab-ci.d/firmware/seabios.yml > > or > > /qemu-project/qemu-firmware/seabios.yml > > as config file for the > > /qemu-project/seabios/ > > mirror. Then we can publish binaries using gitlab pages at > > https://qemu-project.gitlab.io/seabios/ > > That way we also don't need the roms/ submodules any more, i.e. we > can remove both sources and binaries for the firmware from the > release tarballs. Thanks! For developer convenience it would be nice to avoid manual steps after git clone qemu.git. Maybe ./configure should check for firmware blobs and automatically download them. There could be a ./configure --disable-firmware-download option that distros can use to skip the download when building everything from source. Stefan signature.asc Description: PGP signature
Re: [PATCH] qemu-img: Consistent docs for convert -F
On 21.09.21 16:28, Eric Blake wrote: Use consistent capitalization, and fix a missed line (we duplicate the qemu-img synopses in too many places). Fixes: 1899bf4737 (qemu-img: Add -F shorthand to convert) Signed-off-by: Eric Blake --- docs/tools/qemu-img.rst | 2 +- qemu-img-cmds.hx| 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) Thanks, applied to my block branch: https://gitlab.com/hreitz/qemu/-/commits/block Speaking of duplicating in too many places, comparing the lines in qemu-img.rst and qemu-img-cmds.hx I noticed that `--skip-broken-bitmaps` too is missing from qemu-img-cmds.hx, so perhaps that’s also something we want to fix. Hanna
Re: [PATCH 6/8] tcg/aarch64: Support TCG_TARGET_SIGNED_ADDR32
Richard Henderson writes: > AArch64 has both sign and zero-extending addressing modes, which > means that either treatment of guest addresses is equally efficient. > Enabling this for AArch64 gives us testing of the feature in CI. So which guests front ends will exercise this backend? I realise you never mentioned it in the cover letter. Is this something we can exercise in 32 bit user mode tests? > Signed-off-by: Richard Henderson > --- > tcg/aarch64/tcg-target-sa32.h | 8 - > tcg/aarch64/tcg-target.c.inc | 68 ++- > 2 files changed, 51 insertions(+), 25 deletions(-) > > diff --git a/tcg/aarch64/tcg-target-sa32.h b/tcg/aarch64/tcg-target-sa32.h > index cb185b1526..c99e502e4c 100644 > --- a/tcg/aarch64/tcg-target-sa32.h > +++ b/tcg/aarch64/tcg-target-sa32.h > @@ -1 +1,7 @@ > -#define TCG_TARGET_SIGNED_ADDR32 0 > +/* > + * AArch64 has both SXTW and UXTW addressing modes, which means that > + * it is agnostic to how guest addresses should be represented. > + * Because aarch64 is more common than the other hosts that will > + * want to use this feature, enable it for continuous testing. > + */ > +#define TCG_TARGET_SIGNED_ADDR32 1 > diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc > index 5edca8d44d..88b2963f9d 100644 > --- a/tcg/aarch64/tcg-target.c.inc > +++ b/tcg/aarch64/tcg-target.c.inc > @@ -12,6 +12,7 @@ > > #include "../tcg-pool.c.inc" > #include "qemu/bitops.h" > +#include "tcg-target-sa32.h" > > /* We're going to re-use TCGType in setting of the SF bit, which controls > the size of the operation performed. If we know the values match, it > @@ -804,12 +805,12 @@ static void tcg_out_insn_3617(TCGContext *s, > AArch64Insn insn, bool q, > } > > static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn, > - TCGReg rd, TCGReg base, TCGType ext, > + TCGReg rd, TCGReg base, int option, >TCGReg regoff) > { > /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */ > tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | > - 0x4000 | ext << 13 | base << 5 | (rd & 0x1f)); > + option << 13 | base << 5 | (rd & 0x1f)); > } > > static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn, > @@ -1124,7 +1125,7 @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn > insn, TCGReg rd, > > /* Worst-case scenario, move offset to temp register, use reg offset. */ > tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset); > -tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP); > +tcg_out_ldst_r(s, insn, rd, rn, 3 /* LSL #0 */, TCG_REG_TMP); > } > > static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) > @@ -1718,34 +1719,34 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg > addr_reg, MemOp opc, > > static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext, > TCGReg data_r, TCGReg addr_r, > - TCGType otype, TCGReg off_r) > + int option, TCGReg off_r) > { > /* Byte swapping is left to middle-end expansion. */ > tcg_debug_assert((memop & MO_BSWAP) == 0); > > switch (memop & MO_SSIZE) { > case MO_UB: > -tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r); > +tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, option, off_r); > break; > case MO_SB: > tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW, > - data_r, addr_r, otype, off_r); > + data_r, addr_r, option, off_r); > break; > case MO_UW: > -tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r); > +tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, option, off_r); > break; > case MO_SW: > tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW), > - data_r, addr_r, otype, off_r); > + data_r, addr_r, option, off_r); > break; > case MO_UL: > -tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r); > +tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, option, off_r); > break; > case MO_SL: > -tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r); > +tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, option, off_r); > break; > case MO_Q: > -tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r); > +tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, option, off_r); > break; > default: > tcg_abort(); > @@ -1754,50 +1755,68 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, > MemOp memop, TCGType ext, > > static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop, > TCGReg data_r, TCGReg addr_r, > -
Re: [PATCH v2] hw/usb/vt82c686-uhci-pci: Use ISA instead of PCI interrupts
On Mon, 11 Oct 2021, Gerd Hoffmann wrote: On Tue, Oct 05, 2021 at 03:12:05PM +0200, BALATON Zoltan wrote: This device is part of a superio/ISA bridge chip and IRQs from it are routed to an ISA interrupt set by the Interrupt Line PCI config register. Change uhci_update_irq() to allow this and use it from vt82c686-uhci-pci. Hmm, shouldn't this logic be part of the superio bridge emulation? But how? The ISA bridge does not know about PCI and PCI does not know about ISA. UHCI is a PCIDevice and would raise PCI interrupts. Where and how could I convert that to ISA interrupts? (Oh and ISA in QEMU is not Qdev'ified and I don't want to do that as it's too much work and too much to break that I can't even test so if an alternative solution involves that then get somebody do that first.) This patch puts the irq mapping in the vt82xx specific vt82c686-uhci-pci.c which in the real chip also contains the ISA bridge so in a way it's part of the superio bridge emulation in that this uhci variant is part of that chip model. Regards, BALATON Zoltan
Re: [PATCH 5/8] linux-user: Support TCG_TARGET_SIGNED_ADDR32
Richard Henderson writes: > When using reserved_va, which is the default for a 64-bit host > and a 32-bit guest, set guest_base_signed_addr32 if requested > by TCG_TARGET_SIGNED_ADDR32, and the executable layout allows. > > Signed-off-by: Richard Henderson > --- > include/exec/cpu-all.h | 4 --- > linux-user/elfload.c | 62 ++ > 2 files changed, 50 insertions(+), 16 deletions(-) > > diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h > index 80b5e17329..71d8e1de7a 100644 > --- a/include/exec/cpu-all.h > +++ b/include/exec/cpu-all.h > @@ -278,11 +278,7 @@ extern intptr_t qemu_host_page_mask; > #define PAGE_RESET 0x0040 > /* For linux-user, indicates that the page is MAP_ANON. */ > #define PAGE_ANON 0x0080 > - > -#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY) > -/* FIXME: Code that sets/uses this is broken and needs to go away. */ > #define PAGE_RESERVED 0x0100 > -#endif Can we reference why this FIXME is being dropped in the commit message? Looking at the current tree state I can see several uses of it due to moves in 5b6dd8683d (exec: move TB handling to translate-all.c) which post-date 2e9a5713f0 (Remove PAGE_RESERVED). Otherwise looks reasonable: Reviewed-by: Alex Bennée -- Alex Bennée
Re: ACPI endianness
On Mon, 11 Oct 2021, Philippe Mathieu-Daudé wrote: On 10/10/21 15:24, BALATON Zoltan wrote: Hello, I'm trying to fix shutdown and reboot on pegasos2 which uses ACPI as part of the VIA VT8231 (similar to and modelled in hw/isa/vt82c686b.c) and found that the guest writes to ACPI PM1aCNT register come out with wrong endianness but not shure why. I have this: $ qemu-system-ppc -M pegasos2 -monitor stdio (qemu) info mtree [...] memory-region: pci1-io - (prio 0, i/o): pci1-io [...] 0f00-0f7f (prio 0, i/o): via-pm 0f00-0f03 (prio 0, i/o): acpi-evt 0f04-0f05 (prio 0, i/o): acpi-cnt 0f08-0f0b (prio 0, i/o): acpi-tmr memory-region: system - (prio 0, i/o): system -1fff (prio 0, ram): pegasos2.ram 8000-bfff (prio 0, i/o): alias pci1-mem0-win @pci1-mem 8000-bfff c000-dfff (prio 0, i/o): alias pci0-mem0-win @pci0-mem c000-dfff f100-f100 (prio 0, i/o): mv64361 f800-f8ff (prio 0, i/o): alias pci0-io-win @pci0-io -00ff f900-f9ff (prio 0, i/o): alias pci0-mem1-win @pci0-mem -00ff fd00-fdff (prio 0, i/o): alias pci1-mem1-win @pci1-mem -00ff fe00-feff (prio 0, i/o): alias pci1-io-win @pci1-io -00ff ff80- (prio 0, i/o): alias pci1-mem3-win @pci1-mem ff80- fff0-fff7 (prio 0, rom): pegasos2.rom The guest (which is big endian PPC and I think wotks on real hardware) writes to 0xf05 in the io region which should be the high byte of the little endian register but in the acpi code it comes out wrong, instead of 0x2800 I get in acpi_pm1_cnt_write: val=0x28 Looks like a northbridge issue (MV64340). Does Pegasos2 enables bus swapping? See hw/pci-host/mv64361.c:585: static void warn_swap_bit(uint64_t val) { if ((val & 0x300ULL) >> 24 != 1) { qemu_log_mask(LOG_UNIMP, "%s: Data swap not implemented", __func__); } } No, guests don't use this feature just byteswap themselves and write little endian values to the mapped io region. (Or in this case just write one byte of the 16 bit value, it specifically writes 0x28 to 0xf05.) That's why I think the device model should not byteswap itself so the acpi regions should be declared native endian and let the guest handle it. Some mentions I've found say that native endian means don't byteswap, little endian means byteswap if vcpu is big endian and big endian means byteswap if vcpu is little endian. Since guests already account for this and write little endian values to these regions there's no need to byteswap in device model and changing to native endian should not affect little endian machines so unless there's a better argument I'll try sending a patch. Regards, BALATON Zoltan
Re: [PATCH v4 00/11] virtio-iommu: Add ACPI support
On Fri, Oct 8, 2021 at 11:18 PM Jean-Philippe Brucker wrote: > > On Tue, Oct 05, 2021 at 11:45:42AM -0400, Michael S. Tsirkin wrote: > > Looks like this can not be applied yet because the bypass bit > > isn't in yet. what's up with that? > > The boot-bypass bit isn't a hard dependency for this series, but it will > be needed for full support eventually. It will be delayed by spec and > Linux header changes > > In the meantime we can work around the problem by having the boot disks > bypass the IOMMU (virtio without iommu-platform, or iommu-bypass bus). Hi Jean, I have tested on x86 platform with environment: qemu, your personal repo: http://jpbrucker.net/git/qemu, branch: virtio-iommu/acpi-2021-10-01 linux kernel, 5.14 qemu command, with '-device virtio-iommu' and boot disk 'virtio-blk-pci,iommu_platform=off' But the guest boot failed. The print is as follows: Begin: Loading essential drivers ... done. Begin: Running /scripts/init-premount ... done. Begin: Mounting root file system ... Begin: Running /scripts/local-top ... done. Begin: Running /scripts/local-premount ... [7.490238] Btrfs loaded, crc32c=crc32c-generic, zoned=no Scanning for Btrfs filesystems done. [ 10.593238] _warn_unseeded_randomness: 300 callbacks suppressed [ 10.593243] random: get_random_u32 called from bucket_table_alloc.isra.0+0x123/0x430 with crng_init=0 [ 37.534015] random: get_random_u64 called from dup_task_struct+0x415/0xa10 with crng_init=0 [ 37.539950] random: get_random_u64 called from arch_pick_mmap_layout+0x411/0x5e0 with crng_init=0 [ 37.539973] random: get_random_u64 called from arch_pick_mmap_layout+0x381/0x5e0 with crng_init=0 Begin: Waiting for root file system ... [ 38.624374] _warn_unseeded_randomness: 38 callbacks suppressed [ 38.624389] random: get_random_u64 called from dup_task_struct+0x415/0xa10 with crng_init=0 Begin: Running /scripts/local-block ... [ 38.630760] random: get_random_u64 called from arch_pick_mmap_layout+0x410 [ 38.630767] random: get_random_u64 called from arch_pick_mmap_layout+0x381/0x5e0 with crng_init=0 mdadm: No arrays found in config file or automatically done. mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically [ 39.633914] _warn_unseeded_randomness: 1278 callbacks suppressed [ 39.633918] random: get_random_u64 called from dup_task_struct+0x415/0xa10 with crng_init=0 [ 39.634867] random: get_random_u64 called from arch_pick_mmap_layout+0x411/0x5e0 with crng_init=0 [ 39.634875] random: get_random_u64 called from arch_pick_mmap_layout+0x381/0x5e0 with crng_init=0 mdadm: error opening /dev/md?*: No such file or directory mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically mdadm: No arrays found in config file or automatically done. Gave up waiting for root file system device. Common problems: - Boot args (cat /proc/cmdline) - Check rootdelay= (did the system wait long enough?) - Missing modules (cat /proc/modules; ls /dev) ALERT! UUID=3caf26b5-4d08-43e0-8634-7573269c4f70 does not exist. Dropping to a shell! Any suggestions? Thanks. -- Haiwei
Re: [PATCH 2/8] accel/tcg: Split out g2h_tlbe
Richard Henderson writes: > Create a new function to combine a CPUTLBEntry addend > with the guest address to form a host address. > > Signed-off-by: Richard Henderson Reviewed-by: Alex Bennée -- Alex Bennée
Re: [PATCH 1/8] tcg: Add TCG_TARGET_SIGNED_ADDR32
Richard Henderson writes: > Define as 0 for all tcg hosts. Put this in a separate header, > because we'll want this in places that do not ordinarily have > access to all of tcg/tcg.h. > > Signed-off-by: Richard Henderson Reviewed-by: Alex Bennée -- Alex Bennée
Re: [PATCH v2 0/2] pylint: fix new errors and warnings in qemu-iotests
On 11/10/2021 11:29, Hanna Reitz wrote: On 08.10.21 08:28, Emanuele Giuseppe Esposito wrote: There are some warnings and errors that we either miss or are new in pylint. Anyways, test 297 of qemu-iotests fails because of that, so we need to fix it. All these fixes involve just indentation or additional spaces added. Signed-off-by: Emanuele Giuseppe Esposito --- v2: * temporarly enable and then disable "bad whitespace" error in image-fleecing * better indentation for a fix in test 129 in patch one So the changes look good to me, but I can’t get my pylint to generate a bad-whitespace warning no matter how hard I try. (When you asked on IRC whether others see pylint warnings, I thought you meant the consider-using-f-string warnings that John disabled in 3765315d4c84f9c0799744f43a314169baaccc05.) I have pylint 2.11.1, which should be the newest version. So I tried to look around what might be the cause and found this: https://pylint.pycqa.org/en/latest/whatsnew/2.6.html – it seems that as of pylint 2.6, bad-whitespace warnings are no longer emitted. If that’s the reason why I can’t see the warning, then I think we should take only patch 1 (because it just makes sense), but skip patch 2. Yes you are right. I had 2.4.4, and now that I upgraded to 2.11.1 I don't see bad-whitespace errors anymore (actually pylint does not seem to complain at all). So I agree we can just take patch 1, as formatting is wrong anyways. Thank you, Emanuele
Re: [PATCH v2] hw/usb/vt82c686-uhci-pci: Use ISA instead of PCI interrupts
On Tue, Oct 05, 2021 at 03:12:05PM +0200, BALATON Zoltan wrote: > This device is part of a superio/ISA bridge chip and IRQs from it are > routed to an ISA interrupt set by the Interrupt Line PCI config > register. Change uhci_update_irq() to allow this and use it from > vt82c686-uhci-pci. Hmm, shouldn't this logic be part of the superio bridge emulation? take care, Gerd
Re: [PATCH v2 0/2] pylint: fix new errors and warnings in qemu-iotests
On 08.10.21 08:28, Emanuele Giuseppe Esposito wrote: There are some warnings and errors that we either miss or are new in pylint. Anyways, test 297 of qemu-iotests fails because of that, so we need to fix it. All these fixes involve just indentation or additional spaces added. Signed-off-by: Emanuele Giuseppe Esposito --- v2: * temporarly enable and then disable "bad whitespace" error in image-fleecing * better indentation for a fix in test 129 in patch one So the changes look good to me, but I can’t get my pylint to generate a bad-whitespace warning no matter how hard I try. (When you asked on IRC whether others see pylint warnings, I thought you meant the consider-using-f-string warnings that John disabled in 3765315d4c84f9c0799744f43a314169baaccc05.) I have pylint 2.11.1, which should be the newest version. So I tried to look around what might be the cause and found this: https://pylint.pycqa.org/en/latest/whatsnew/2.6.html – it seems that as of pylint 2.6, bad-whitespace warnings are no longer emitted. If that’s the reason why I can’t see the warning, then I think we should take only patch 1 (because it just makes sense), but skip patch 2. Hanna
Re: [RFC PATCH v1 2/2] s390x/kvm: Pass SIGP Stop flags
On 11.10.21 10:40, Christian Borntraeger wrote: Am 11.10.21 um 09:09 schrieb David Hildenbrand: On 08.10.21 22:38, Eric Farman wrote: When building a Stop IRQ to pass to KVM, we should incorporate the flags if handling the SIGP Stop and Store Status order. With that, KVM can reject other orders that are submitted for the same CPU while the operation is fully processed. Signed-off-by: Eric Farman Acked-by: Janosch Frank --- target/s390x/kvm/kvm.c | 4 1 file changed, 4 insertions(+) diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 5b1fdb55c4..701b9ddc88 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -2555,6 +2555,10 @@ void kvm_s390_stop_interrupt(S390CPU *cpu) .type = KVM_S390_SIGP_STOP, }; + if (cpu->env.sigp_order == SIGP_STOP_STORE_STATUS) { + irq.u.stop.flags = KVM_S390_STOP_FLAG_STORE_STATUS; + } + KVM_S390_STOP_FLAG_STORE_STATUS tells KVM to perform the store status as well ... is that really what we want? At least it should not hurt I guess. QEMU then does it again? The thing is, that before we officially completed the action in user space (and let other SIGP actions actually succeed in user space on the CPU), the target CPU will be reported as !busy in the kernel already. And before we even inject the stop interrupt, the CPU will be detected as !busy in the kernel. I guess it will fix some cases where we poll via SENSE if the stop and store happened, because the store *did* happen in the kernel and we'll simply store again in user space. However, I wonder if we want to handle it more generically: Properly flag a CPU as busy for SIGP when we start processing the order until we completed processing the order. That would allow to handle other SIGP operations in user space cleanly, without any chance for races with SENSE code running in the kernel. -- Thanks, David / dhildenb
Re: [PATCH v3 1/1] virtio: write back F_VERSION_1 before validate
On Mon, Oct 11 2021, Halil Pasic wrote: > The virtio specification virtio-v1.1-cs01 states: "Transitional devices > MUST detect Legacy drivers by detecting that VIRTIO_F_VERSION_1 has not > been acknowledged by the driver." This is exactly what QEMU as of 6.1 > has done relying solely on VIRTIO_F_VERSION_1 for detecting that. > > However, the specification also says: "... the driver MAY read (but MUST > NOT write) the device-specific configuration fields to check that it can > support the device ..." before setting FEATURES_OK. > > In that case, any transitional device relying solely on > VIRTIO_F_VERSION_1 for detecting legacy drivers will return data in > legacy format. In particular, this implies that it is in big endian > format for big endian guests. This naturally confuses the driver which > expects little endian in the modern mode. > > It is probably a good idea to amend the spec to clarify that > VIRTIO_F_VERSION_1 can only be relied on after the feature negotiation > is complete. Before validate callback existed, config space was only > read after FEATURES_OK. However, we already have two regressions, so > let's address this here as well. > > The regressions affect the VIRTIO_NET_F_MTU feature of virtio-net and > the VIRTIO_BLK_F_BLK_SIZE feature of virtio-blk for BE guests when > virtio 1.0 is used on both sides. The latter renders virtio-blk unusable > with DASD backing, because things simply don't work with the default. > See Fixes tags for relevant commits. > > For QEMU, we can work around the issue by writing out the feature bits > with VIRTIO_F_VERSION_1 bit set. We (ab)use the finalize_features > config op for this. This isn't enough to address all vhost devices since > these do not get the features until FEATURES_OK, however it looks like > the affected devices actually never handled the endianness for legacy > mode correctly, so at least that's not a regression. > > No devices except virtio net and virtio blk seem to be affected. > > Long term the right thing to do is to fix the hypervisors. > > Cc: #v4.11 > Signed-off-by: Halil Pasic > Fixes: 82e89ea077b9 ("virtio-blk: Add validation for block size in config > space") > Fixes: fe36cbe0671e ("virtio_net: clear MTU when out of range") > Reported-by: mark...@us.ibm.com > Reviewed-by: Cornelia Huck > --- > > @Connie: I made some more commit message changes to accommodate Michael's > requests. I just assumed these will work or you as well and kept your > r-b. Please shout at me if it needs to be dropped :) No need to shout, still looks good to me :) > --- > drivers/virtio/virtio.c | 11 +++ > 1 file changed, 11 insertions(+) > > diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c > index 0a5b54034d4b..236081afe9a2 100644 > --- a/drivers/virtio/virtio.c > +++ b/drivers/virtio/virtio.c > @@ -239,6 +239,17 @@ static int virtio_dev_probe(struct device *_d) > driver_features_legacy = driver_features; > } > > + /* > + * Some devices detect legacy solely via F_VERSION_1. Write > + * F_VERSION_1 to force LE config space accesses before FEATURES_OK for > + * these when needed. > + */ > + if (drv->validate && !virtio_legacy_is_little_endian() > + && device_features & BIT_ULL(VIRTIO_F_VERSION_1)) { > + dev->features = BIT_ULL(VIRTIO_F_VERSION_1); > + dev->config->finalize_features(dev); > + } > + > if (device_features & (1ULL << VIRTIO_F_VERSION_1)) > dev->features = driver_features & device_features; > else > > base-commit: 60a9483534ed0d99090a2ee1d4bb0b8179195f51 > -- > 2.25.1
Re: [RFC PATCH v1 2/2] s390x/kvm: Pass SIGP Stop flags
Am 11.10.21 um 09:09 schrieb David Hildenbrand: On 08.10.21 22:38, Eric Farman wrote: When building a Stop IRQ to pass to KVM, we should incorporate the flags if handling the SIGP Stop and Store Status order. With that, KVM can reject other orders that are submitted for the same CPU while the operation is fully processed. Signed-off-by: Eric Farman Acked-by: Janosch Frank --- target/s390x/kvm/kvm.c | 4 1 file changed, 4 insertions(+) diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 5b1fdb55c4..701b9ddc88 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -2555,6 +2555,10 @@ void kvm_s390_stop_interrupt(S390CPU *cpu) .type = KVM_S390_SIGP_STOP, }; + if (cpu->env.sigp_order == SIGP_STOP_STORE_STATUS) { + irq.u.stop.flags = KVM_S390_STOP_FLAG_STORE_STATUS; + } + KVM_S390_STOP_FLAG_STORE_STATUS tells KVM to perform the store status as well ... is that really what we want? At least it should not hurt I guess. QEMU then does it again? Maybe we want a different (more generic) way to tell KVM that a CPU is temporarily busy for SIGP orders?
Re: [PATCH v2 2/2] target/i386: Remove core-capability in Snowridge CPU model
Hi Eduardo, Ping for this minor change. On 8/27/2021 2:48 PM, Chenyi Qiang wrote: Because core-capability releated features are model-specific and KVM won't support it, remove the core-capability in CPU model to avoid the warning message. Signed-off-by: Chenyi Qiang --- target/i386/cpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index aebf81d9c9..86a15af1ed 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -3684,9 +3684,10 @@ static const X86CPUDefinition builtin_x86_defs[] = { }, { .version = 4, -.note = "no split lock detect", +.note = "no split lock detect, no core-capability", .props = (PropValue[]) { { "split-lock-detect", "off" }, +{ "core-capability", "off" }, { /* end of list */ }, }, },
Re: Deprecate the ppc405 boards in QEMU?
On 06/10/2021 09.25, Thomas Huth wrote: On 05/10/2021 23.53, BALATON Zoltan wrote: [...] Maybe these 405 boards in QEMU ran with modified firmware where the memory detection was patched out but it seems to detect the RAM so I wonder where it gets that from. Maybe by reading the SDRAM controller DCRs ppc4xx_sdram_init() sets up. Then I'm not sure what it needs the SPD for, I forgot how this worked on sam460ex. Maybe for the speed calibration, so could be it detects ram by reading DCRs then tries to get SPD data and that's where it stops as i2c is not emulated on taihu. This could be confirmed by checking what it pokes with -d guest_errors that shows accesses to missing devices but don't know where 405 has the i2c controller and if it's the same as newer SoCs. If so that could be reused and an i2c bus could be added with the SPD data like in sam460ex to make u-boot happy or you could skip this in u-boot. FWIW, I've just tried the latter (skipping the sdram init in u-boot), and indeed, I can get to the u-boot prompt now. [...]> I've also attached the patch with my modifications to u-boot. FYI, the changes can now be found on this branch here: https://gitlab.com/huth/u-boot/-/commits/taihu I also added a gitlab-CI file, so you can now download the u-boot.bin as an artifact from the corresponding pipeline, e.g.: https://gitlab.com/huth/u-boot/-/jobs/1667201028 Thomas
Re: [PATCH 1/2] hw/core/machine: Split out smp_parse as an inline API
Hi Markus, On 2021/10/11 13:26, Markus Armbruster wrote: Yanan Wang writes: Functionally smp_parse() is only called once and in one place i.e. machine_set_smp, the possible second place where it'll be called should be some unit tests if any. Actually we are going to introduce an unit test for the parser. For necessary isolation of the tested code, split smp_parse out into a separate header as an inline API. Why inline? The motivation of the splitting is to isolate the tested smp_parse from the other unrelated code in machine.c, so that we can solve the build dependency problem for the unit test. I once tried to split smp_parse out into a source file in [1] for the test, but it looks more concise and convenient to make it as an inline function in a header compared to [1]. Given that we only call it in one place, it may not be harmful to keep it an inline. Anyway, I not sure the method in this patch is most appropriate and compliant. If it's just wrong I can change back to [1]. :) [1] https://lore.kernel.org/qemu-devel/20210910073025.16480-16-wangyana...@huawei.com/#t Thanks, Yanan Signed-off-by: Yanan Wang .
Re: [PATCH v3 6/6] tests/qapi-schema: Test cases for aliases
Kevin Wolf writes: [...] > What I had in mind was using the schema to generate the necessary code, > using the generic keyval parser everywhere, and just providing a hook > where the QDict could be modified after the keyval parser and before the > visitor. Most command line options would not have any explicit code for > parsing input, only the declarative schema and the final option handler > getting a QAPI type (which could actually be the corresponding QMP > command handler in the common case). A walk to the bakery made me see a problem with transforming the qdict between keyval parsing and the visitor: error reporting. On closer investigation, the problem exists even with just aliases. All experiments performed with your complete QAPIfication at https://repo.or.cz/qemu/kevin.git qapi-alias-chardev-v4. Example: flattening leads to suboptimal error $ qemu-system-x86_64 -chardev udp,id=chr0,port=12345,ipv4=om qemu-system-x86_64: -chardev udp,id=chr0,port=12345,ipv4=om: Parameter 'backend.data.remote.data.ipv4' expects 'on' or 'off' We're using "alternate" notation, but the error message barks back in "standard" notation. It comes from the visitor. While less than pleasant, it's still understandable, because the "standard" key ends with the "alternate" key. Example: renaming leads to confusing error So far, we rename only members of type 'str', where the visitor can't fail. Just for illustrating the problem, I'm adding one where it can: diff --git a/qapi/char.json b/qapi/char.json index 0e39840d4f..b436d83cbe 100644 --- a/qapi/char.json +++ b/qapi/char.json @@ -398,7 +398,8 @@ ## { 'struct': 'ChardevRingbuf', 'data': { '*size': 'int' }, - 'base': 'ChardevCommon' } + 'base': 'ChardevCommon', + 'aliases': [ { 'name': 'capacity', 'source': [ 'size' ] } ] } ## # @ChardevQemuVDAgent: With this patch: $ qemu-system-x86_64 -chardev ringbuf,id=chr0,capacity=lots qemu-system-x86_64: -chardev ringbuf,id=chr0,capacity=lots: Parameter 'backend.data.size' expects integer The error message fails to connect to the offending key=value. Example: manual transformation leads to confusion #1 Starting point: $ qemu-system-x86_64 -chardev udp,id=chr0,port=12345,localaddr=localhost Works. host defaults to localhost, localport defaults to 0, same as in git master. Now "forget" to specify @port: $ qemu-system-x86_64 -chardev udp,id=chr0,localaddr=localhost qemu-system-x86_64: -chardev udp,id=chr0,localaddr=localhost: Parameter 'backend.data.remote.data.port' is missing Again, the visitor's error message uses "standard" notation. We obediently do what the error message tells us to do: $ qemu-system-x86_64 -chardev udp,id=chr0,localaddr=localhost,backend.data.remote.data.port=12345 qemu-system-x86_64: -chardev udp,id=chr0,localaddr=localhost,backend.data.remote.data.port=12345: Parameters 'backend.*' used inconsistently Mission accomplished: confusion :) Example: manual transformation leads to confusion #2 Confusion is possible even without tricking the user into mixing "standard" and "alternate" explicitly: $ qemu-system-x86_64 -chardev udp,id=chr0,backend.data.remote.type=udp qemu-system-x86_64: -chardev udp,id=chr0,backend.data.remote.type=udp: Parameters 'backend.*' used inconsistently Here, the user tried to stick to "standard", but forgot to specify a required member. The transformation machinery then "helpfully" transformed nothing into something, which made the visitor throw up. Clear error reporting is a critical part of a human-friendly interface. We have two separate problems with it: 1. The visitor reports errors as if aliases didn't exist Fixing this is "merely" a matter of tracing back alias applications. More complexity... 2. The visitor reports errors as if manual transformation didn't exist Manual transformation can distort the users input beyond recognition. The visitor reports errors for the transformed input. To fix this one, we'd have to augment the parse tree so it points back at the actual user input, and then make the manual transformations preserve that. Uff! I'm afraid I need another round of thinking on how to best drag legacy syntax along when we QAPIfy. [...]
Re: [PATCH] hw/misc: Add a virtual pci device to dynamically attach memory to QEMU
virito-mem currently relies on having a single sparse memory region (anon mmap, mmaped file, mmaped huge pages, mmap shmem) per VM. Although we can share memory with other processes, sharing with other VMs is not intended. Instead of actually mmaping parts dynamically (which can be quite expensive), virtio-mem relies on punching holes into the backend and dynamically allocating memory/file blocks/... on access. So the easy way to make it work is: a) Exposing the CXL memory to the buddy via dax/kmem, esulting in device memory getting managed by the buddy on a separate NUMA node. Linux kernel buddy system? how to guarantee other applications don't apply memory from it Excellent question. Usually, you would online the memory to ZONE_MOVABLE, such that even if some other allocation ended up there, that it could get migrated somewhere else. For example, "daxctl reconfigure-device" tries doing that as default: https://pmem.io/ndctl/daxctl-reconfigure-device.html However, I agree that we might actually want to tell the system to not use this CPU-less node as fallback for other allocations, and that we might not want to swap out such memory etc. But, in the end all that virtio-mem needs to work in the hypervisor is a) A sparse memmap (anonymous RAM, memfd, file) b) A way to populate memory within that sparse memmap (e.g., on fault, using madvise(MADV_POPULATE_WRITE), fallocate()) c) A way to discard memory (madvise(MADV_DONTNEED), fallocate(FALLOC_FL_PUNCH_HOLE)) So instead of using anonymous memory+mbind, you can also mmap a sparse file and rely on populate-on-demand. One alternative for your use case would be to create a DAX filesystem on that CXL memory (IIRC that should work) and simply providing virtio-mem with a sparse file located on that filesystem. Of course, you can also use some other mechanism as you might have in your approach, as long as it supports a,b,c. b) (optional) allocate huge pages on that separate NUMA node. c) Use ordinary memory-device-ram or memory-device-memfd (for huge pages), *bidning* the memory backend to that special NUMA node. "-object memory-backend/device-ram or memory-device-memfd, id=mem0, size=768G" How to bind backend memory to NUMA node I think the syntax is "policy=bind,host-nodes=X" whereby X is a node mask. So for node "0" you'd use "host-nodes=0x1" for "5" "host-nodes=0x20" etc. This will dynamically allocate memory from that special NUMA node, resulting in the virtio-mem device completely being backed by that device memory, being able to dynamically resize the memory allocation. Exposing an actual devdax to the virtio-mem device, shared by multiple VMs isn't really what we want and won't work without major design changes. Also, I'm not so sure it's a very clean design: exposing memory belonging to other VMs to unrelated QEMU processes. This sounds like a serious security hole: if you managed to escalate to the QEMU process from inside the VM, you can access unrelated VM memory quite happily. You want an abstraction in-between, that makes sure each VM/QEMU process only sees private memory: for example, the buddy via dax/kmem. Hi David Thanks for your suggestion, also sorry for my delayed reply due to my long vacation. How does current virtio-mem dynamically attach memory to guest, via page fault? Essentially you have a large sparse mmap. Withing that mmap, memory is populated on demand. Instead if mmap/munmap you perform a single large mmap and then dynamically populate memory/discard memory. Right now, memory is populated via page faults on access. This is sub-optimal when dealing with limited resources (i.e., hugetlbfs, file blocks) and you might run out of backend memory. I'm working on a "prealloc" mode, which will preallocate/populate memory necessary for exposing the next block of memory to the VM, and which fails gracefully if preallocation/population fails in the case of such limited resources. The patch resides on: https://github.com/davidhildenbrand/qemu/tree/virtio-mem-next commit ded0e302c14ae1b68bdce9059dcca344e0a5f5f0 Author: David Hildenbrand Date: Mon Aug 2 19:51:36 2021 +0200 virtio-mem: support "prealloc=on" option Especially for hugetlb, but also for file-based memory backends, we'd like to be able to prealloc memory, especially to make user errors less severe: crashing the VM when there are not sufficient huge pages around. A common option for hugetlb will be using "reserve=off,prealloc=off" for the memory backend and "prealloc=on" for the virtio-mem device. This way, no huge pages will be reserved for the process, but we can recover if there are no actual huge pages when plugging memory. Signed-off-by: David Hildenbrand -- Thanks, David / dhildenb
Re: Moving QEMU downloads to GitLab Releases?
Hi, > > I guess the main question is who is using the ROM/BIOS sources in the > > tarballs, and would this 2-step process work for those users? If there > > are distros relying on them then maybe there are some more logistics to > > consider since the make-release downloads are both time-consuming and > > prone to network errors/stalls since it relies on the availability of a > > good number of different hosts. > > Great, maybe Gerd can comment on splitting out firmware. I think the binaries are mostly a convenience feature for developers. Distros typically build from source anyway, and typically they build from upstream source instead of qemu submodule. The idea to split them out to a separate repo is exists for quite a while. I have an old qemu-firmware repo laying around on my disk, which basically moves roms/ + submodules and the binaries built from it over. I think with the switch to gitlab it doesn't make sense any more to commit pre-built firmware binaries to a git repo. Instead we should build the firmware in gitlab ci, i.e. effectively move the build rules we have right now in roms/Makefile to .gitlab-ci.d/, then publish the firmware binaries as build artifacts or gitlab pages. When done just remove the pre-build binaries from git and add a helper script to fetch firmware binaries from gitlab instead. > QEMU mirrors firmware sources on GitLab too. We're able to provide the > same level of download availability on our mirror firmware repos as for > the main qemu.git repo. I think enabling CI for the firmware mirrors should work given that it is possible to specify a custom CI/CD configuration file, i.e. use something like /qemu-project/qemu/.gitlab-ci.d/firmware/seabios.yml or /qemu-project/qemu-firmware/seabios.yml as config file for the /qemu-project/seabios/ mirror. Then we can publish binaries using gitlab pages at https://qemu-project.gitlab.io/seabios/ That way we also don't need the roms/ submodules any more, i.e. we can remove both sources and binaries for the firmware from the release tarballs. take care, Gerd
Re: [RFC PATCH v1 2/2] s390x/kvm: Pass SIGP Stop flags
On 08.10.21 22:38, Eric Farman wrote: When building a Stop IRQ to pass to KVM, we should incorporate the flags if handling the SIGP Stop and Store Status order. With that, KVM can reject other orders that are submitted for the same CPU while the operation is fully processed. Signed-off-by: Eric Farman Acked-by: Janosch Frank --- target/s390x/kvm/kvm.c | 4 1 file changed, 4 insertions(+) diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 5b1fdb55c4..701b9ddc88 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -2555,6 +2555,10 @@ void kvm_s390_stop_interrupt(S390CPU *cpu) .type = KVM_S390_SIGP_STOP, }; +if (cpu->env.sigp_order == SIGP_STOP_STORE_STATUS) { +irq.u.stop.flags = KVM_S390_STOP_FLAG_STORE_STATUS; +} + KVM_S390_STOP_FLAG_STORE_STATUS tells KVM to perform the store status as well ... is that really what we want? Maybe we want a different (more generic) way to tell KVM that a CPU is temporarily busy for SIGP orders? -- Thanks, David / dhildenb
Re: [RFC PATCH v1 1/2] s390x: sigp: Force Set Architecture to return Invalid Parameter
On 08.10.21 22:38, Eric Farman wrote: According to the Principles of Operation, the SIGP Set Architecture order will return Incorrect State if some CPUs are not stopped, but only if the CZAM facility is not present. If it is, the order will return Invalid Parameter because the architecture mode cannot be changed. Since CZAM always exists when S390_FEAT_ZARCH exists, which in turn exists for every defined CPU model, we can simplify this code. Fixes: 075e52b81664 ("s390x/cpumodel: we are always in zarchitecture mode") Signed-off-by: Eric Farman Reviewed-by: Christian Borntraeger Reviewed-by: Janosch Frank Reviewed-by: David Hildenbrand -- Thanks, David / dhildenb
Re: [PATCH] monitor: Tidy up find_device_state()
On 16/09/21 13:17, Markus Armbruster wrote: Commit 6287d827d4 "monitor: allow device_del to accept QOM paths" extended find_device_state() to accept QOM paths in addition to qdev IDs. This added a checked conversion to TYPE_DEVICE at the end, which duplicates the check done for the qdev ID case earlier, except it sets a *different* error: GenericError "ID is not a hotpluggable device" when passed a QOM path, and DeviceNotFound "Device 'ID' not found" when passed a qdev ID. Fortunately, the latter won't happen as long as we add only devices to /machine/peripheral/. Earlier, commit b6cc36abb2 "qdev: device_del: Search for to be unplugged device in 'peripheral' container" rewrote the lookup by qdev ID to use QOM instead of qdev_find_recursive(), so it can handle buss-less devices. It does so by constructing an absolute QOM path. Works, but object_resolve_path_component() is easier. Switching to it also gets rid of the unclean duplication described above. While there, avoid converting to TYPE_DEVICE twice, first to check whether it's possible, and then for real. Signed-off-by: Markus Armbruster --- softmmu/qdev-monitor.c | 13 + 1 file changed, 5 insertions(+), 8 deletions(-) Queued, thanks. Paolo diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c index a304754ab9..d1ab3c25fb 100644 --- a/softmmu/qdev-monitor.c +++ b/softmmu/qdev-monitor.c @@ -831,16 +831,12 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp) static DeviceState *find_device_state(const char *id, Error **errp) { Object *obj; +DeviceState *dev; if (id[0] == '/') { obj = object_resolve_path(id, NULL); } else { -char *root_path = object_get_canonical_path(qdev_get_peripheral()); -char *path = g_strdup_printf("%s/%s", root_path, id); - -g_free(root_path); -obj = object_resolve_path_type(path, TYPE_DEVICE, NULL); -g_free(path); +obj = object_resolve_path_component(qdev_get_peripheral(), id); } if (!obj) { @@ -849,12 +845,13 @@ static DeviceState *find_device_state(const char *id, Error **errp) return NULL; } -if (!object_dynamic_cast(obj, TYPE_DEVICE)) { +dev = (DeviceState *)object_dynamic_cast(obj, TYPE_DEVICE); +if (!dev) { error_setg(errp, "%s is not a hotpluggable device", id); return NULL; } -return DEVICE(obj); +return dev; } void qdev_unplug(DeviceState *dev, Error **errp)