Re: [PATCH v2 3/3] tests/acpi: Generate reference blob for IORT rev E.b

2021-10-11 Thread Igor Mammedov
On Tue,  5 Oct 2021 10:38:05 +0200
Eric Auger  wrote:

> Re-generate reference blobs with rebuild-expected-aml.sh.

commit message should have diff old vs new,
if dis is the same/similar for various variants,
it's fine to provide only one variant  diff here.

> 
> Signed-off-by: Eric Auger 
> ---
>  tests/qtest/bios-tables-test-allowed-diff.h |   1 -
>  tests/data/acpi/virt/IORT   | Bin 124 -> 128 bytes
>  tests/data/acpi/virt/IORT.memhp | Bin 124 -> 128 bytes
>  tests/data/acpi/virt/IORT.numamem   | Bin 124 -> 128 bytes
>  tests/data/acpi/virt/IORT.pxb   | Bin 124 -> 128 bytes
>  5 files changed, 1 deletion(-)
> 
> diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
> b/tests/qtest/bios-tables-test-allowed-diff.h
> index 9a5a923d6b..dfb8523c8b 100644
> --- a/tests/qtest/bios-tables-test-allowed-diff.h
> +++ b/tests/qtest/bios-tables-test-allowed-diff.h
> @@ -1,2 +1 @@
>  /* List of comma-separated changed AML files to ignore */
> -"tests/data/acpi/virt/IORT",
> diff --git a/tests/data/acpi/virt/IORT b/tests/data/acpi/virt/IORT
> index 
> 521acefe9ba66706c5607321a82d330586f3f280..7efd0ce8a6b3928efa7e1373f688ab4c5f50543b
>  100644
> GIT binary patch
> literal 128
> zcmebD4+?2uU|?Y0?Bwt45v<@85#X!<1dKp25F11@0kHuPgMkDCNC*yK93~3}W)K^M
> VRiHGGVg_O`aDdYP|3ers^8jQz3IPBB
> 
> literal 124
> zcmebD4+^Pa00MR=e`k+i1*eDrX9XZ&1PX!JAesq?4S*O7Bw!2(4Uz`|CKCt^;wu0#
> QRGb+i3L*dhhtM#y0PN=p0RR91
> 
> diff --git a/tests/data/acpi/virt/IORT.memhp b/tests/data/acpi/virt/IORT.memhp
> index 
> 521acefe9ba66706c5607321a82d330586f3f280..7efd0ce8a6b3928efa7e1373f688ab4c5f50543b
>  100644
> GIT binary patch
> literal 128
> zcmebD4+?2uU|?Y0?Bwt45v<@85#X!<1dKp25F11@0kHuPgMkDCNC*yK93~3}W)K^M
> VRiHGGVg_O`aDdYP|3ers^8jQz3IPBB
> 
> literal 124
> zcmebD4+^Pa00MR=e`k+i1*eDrX9XZ&1PX!JAesq?4S*O7Bw!2(4Uz`|CKCt^;wu0#
> QRGb+i3L*dhhtM#y0PN=p0RR91
> 
> diff --git a/tests/data/acpi/virt/IORT.numamem 
> b/tests/data/acpi/virt/IORT.numamem
> index 
> 521acefe9ba66706c5607321a82d330586f3f280..7efd0ce8a6b3928efa7e1373f688ab4c5f50543b
>  100644
> GIT binary patch
> literal 128
> zcmebD4+?2uU|?Y0?Bwt45v<@85#X!<1dKp25F11@0kHuPgMkDCNC*yK93~3}W)K^M
> VRiHGGVg_O`aDdYP|3ers^8jQz3IPBB
> 
> literal 124
> zcmebD4+^Pa00MR=e`k+i1*eDrX9XZ&1PX!JAesq?4S*O7Bw!2(4Uz`|CKCt^;wu0#
> QRGb+i3L*dhhtM#y0PN=p0RR91
> 
> diff --git a/tests/data/acpi/virt/IORT.pxb b/tests/data/acpi/virt/IORT.pxb
> index 
> 521acefe9ba66706c5607321a82d330586f3f280..7efd0ce8a6b3928efa7e1373f688ab4c5f50543b
>  100644
> GIT binary patch
> literal 128
> zcmebD4+?2uU|?Y0?Bwt45v<@85#X!<1dKp25F11@0kHuPgMkDCNC*yK93~3}W)K^M
> VRiHGGVg_O`aDdYP|3ers^8jQz3IPBB
> 
> literal 124
> zcmebD4+^Pa00MR=e`k+i1*eDrX9XZ&1PX!JAesq?4S*O7Bw!2(4Uz`|CKCt^;wu0#
> QRGb+i3L*dhhtM#y0PN=p0RR91
> 




Re: [PATCH v2 2/3] hw/arm/virt-acpi-build: IORT upgrade up to revision E.b

2021-10-11 Thread Igor Mammedov
On Tue,  5 Oct 2021 10:38:04 +0200
Eric Auger  wrote:

> Upgrade the IORT table from B to E.b specification
> revision (ARM DEN 0049E.b).
> 
> Signed-off-by: Eric Auger 
with nit below fixed:
Reviewed-by: Igor Mammedov 

> 
> ---
> 
> v1 -> v2:
> - Fix Revision value for ITS node and SMMUv3 node
> - increment an identifier
> ---
>  hw/arm/virt-acpi-build.c | 48 
>  1 file changed, 29 insertions(+), 19 deletions(-)
> 
> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> index 257d0fee17..789bac3134 100644
> --- a/hw/arm/virt-acpi-build.c
> +++ b/hw/arm/virt-acpi-build.c
> @@ -241,19 +241,20 @@ static void acpi_dsdt_add_tpm(Aml *scope, 
> VirtMachineState *vms)
>  #endif
>  
>  #define ID_MAPPING_ENTRY_SIZE 20
> -#define SMMU_V3_ENTRY_SIZE 60
> -#define ROOT_COMPLEX_ENTRY_SIZE 32
> +#define SMMU_V3_ENTRY_SIZE 68
> +#define ROOT_COMPLEX_ENTRY_SIZE 36
>  #define IORT_NODE_OFFSET 48
>  
>  static void build_iort_id_mapping(GArray *table_data, uint32_t input_base,
>uint32_t id_count, uint32_t out_ref)
>  {
> -/* Identity RID mapping covering the whole input RID range */
> +/* Table 4 ID mapping format */
>  build_append_int_noprefix(table_data, input_base, 4); /* Input base */
>  build_append_int_noprefix(table_data, id_count, 4); /* Number of IDs */
>  build_append_int_noprefix(table_data, input_base, 4); /* Output base */
>  build_append_int_noprefix(table_data, out_ref, 4); /* Output Reference */
> -build_append_int_noprefix(table_data, 0, 4); /* Flags */
> +/* Flags */
> +build_append_int_noprefix(table_data, 0 /* Single mapping */, 4);
>  }
>  
>  struct AcpiIortIdMapping {
> @@ -298,7 +299,7 @@ static int iort_idmap_compare(gconstpointer a, 
> gconstpointer b)
>  /*
>   * Input Output Remapping Table (IORT)
>   * Conforms to "IO Remapping Table System Software on ARM Platforms",
> - * Document number: ARM DEN 0049B, October 2015
> + * Document number: ARM DEN 0049E, Feb 2021
>   */
>  static void
>  build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
> @@ -307,10 +308,11 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
> VirtMachineState *vms)
>  const uint32_t iort_node_offset = IORT_NODE_OFFSET;
>  size_t node_size, smmu_offset = 0;
>  AcpiIortIdMapping *idmap;
> +uint32_t id = 0;

It's probably worth mentioning in commit message that
IDs are generated in sequential order, to make sure that
different nodes unique but otherwise IDs are not referenced/used.

If we were to actually use IDs, I'd prefer explicit naming for
static nodes (i.e. something like SMMUv3_ID = #).



>  GArray *smmu_idmaps = g_array_new(false, true, 
> sizeof(AcpiIortIdMapping));
>  GArray *its_idmaps = g_array_new(false, true, sizeof(AcpiIortIdMapping));
>  
> -AcpiTable table = { .sig = "IORT", .rev = 0, .oem_id = vms->oem_id,
> +AcpiTable table = { .sig = "IORT", .rev = 3, .oem_id = vms->oem_id,
>  .oem_table_id = vms->oem_table_id };
>  /* Table 2 The IORT */
>  acpi_table_begin(, table_data);
> @@ -358,12 +360,12 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
> VirtMachineState *vms)
>  build_append_int_noprefix(table_data, IORT_NODE_OFFSET, 4);
>  build_append_int_noprefix(table_data, 0, 4); /* Reserved */
>  
> -/* 3.1.1.3 ITS group node */
> +/* Table 12 ITS Group Format */
>  build_append_int_noprefix(table_data, 0 /* ITS Group */, 1); /* Type */
>  node_size =  20 /* fixed header size */ + 4 /* 1 GIC ITS Identifier */;
>  build_append_int_noprefix(table_data, node_size, 2); /* Length */
> -build_append_int_noprefix(table_data, 0, 1); /* Revision */
> -build_append_int_noprefix(table_data, 0, 4); /* Reserved */
> +build_append_int_noprefix(table_data, 1, 1); /* Revision */
> +build_append_int_noprefix(table_data, id++, 4); /* Identifier */
>  build_append_int_noprefix(table_data, 0, 4); /* Number of ID mappings */
>  build_append_int_noprefix(table_data, 0, 4); /* Reference to ID Array */
>  build_append_int_noprefix(table_data, 1, 4); /* Number of ITSs */
> @@ -374,19 +376,19 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
> VirtMachineState *vms)
>  int irq =  vms->irqmap[VIRT_SMMU] + ARM_SPI_BASE;
>  
>  smmu_offset = table_data->len - table.table_offset;
> -/* 3.1.1.2 SMMUv3 */
> +/* Table 9 SMMUv3 Format */
>  build_append_int_noprefix(table_data, 4 /* SMMUv3 */, 1); /* Type */
>  node_size =  SMMU_V3_ENTRY_SIZE + ID_MAPPING_ENTRY_SIZE;
>  build_append_int_noprefix(table_data, node_size, 2); /* Length */
> -build_append_int_noprefix(table_data, 0, 1); /* Revision */
> -build_append_int_noprefix(table_data, 0, 4); /* Reserved */
> +build_append_int_noprefix(table_data, 4, 1); /* Revision */
> +build_append_int_noprefix(table_data, id++, 4); /* 

Re: [PATCH] hw/misc: applesmc: use host osk as default on macs

2021-10-11 Thread Gabriel L. Somlo
On Mon, 11 Oct 2021 15:40:07 +0200, bala...@eik.bme.hu wrote:
> I guess a frequent use case for running macOS guests with keys from host 
> would be on hosts running macOS too so a solution that works both on macOS 
> and Linux might be better than a Linux specific one which then needs 
> another way to do the same on macOS. Looks like there's free code for that 
> too and you don't have to convince a maintainer either.

I mostly agree with you (hadn't given much thought to qemu on macOSX),
with the small caveat that (on Linux) you'll end up racing the kernel
applesmc driver for access to the physical hardware; not sure whether
you'd run into anything similar on host-side macOSX as well, never
actually used it much as a developer... :)

Cheers,
--Gabriel



Re: Moving QEMU downloads to GitLab Releases?

2021-10-11 Thread Warner Losh
On Mon, Oct 11, 2021 at 4:59 AM Stefan Hajnoczi  wrote:

> On Mon, Oct 11, 2021 at 09:21:30AM +0200, Gerd Hoffmann wrote:
> >   Hi,
> >
> > > > I guess the main question is who is using the ROM/BIOS sources in the
> > > > tarballs, and would this 2-step process work for those users? If
> there
> > > > are distros relying on them then maybe there are some more logistics
> to
> > > > consider since the make-release downloads are both time-consuming and
> > > > prone to network errors/stalls since it relies on the availability
> of a
> > > > good number of different hosts.
> > >
> > > Great, maybe Gerd can comment on splitting out firmware.
> >
> > I think the binaries are mostly a convenience feature for developers.
> > Distros typically build from source anyway, and typically they build
> > from upstream source instead of qemu submodule.
> >
> > The idea to split them out to a separate repo is exists for quite a
> > while.  I have an old qemu-firmware repo laying around on my disk, which
> > basically moves roms/ + submodules and the binaries built from it over.
> >
> > I think with the switch to gitlab it doesn't make sense any more to
> > commit pre-built firmware binaries to a git repo.  Instead we should
> > build the firmware in gitlab ci, i.e. effectively move the build rules
> > we have right now in roms/Makefile to .gitlab-ci.d/, then publish the
> > firmware binaries as build artifacts or gitlab pages.
> >
> > When done just remove the pre-build binaries from git and add a helper
> > script to fetch firmware binaries from gitlab instead.
> >
> > > QEMU mirrors firmware sources on GitLab too. We're able to provide the
> > > same level of download availability on our mirror firmware repos as for
> > > the main qemu.git repo.
> >
> > I think enabling CI for the firmware mirrors should work given that it
> > is possible to specify a custom CI/CD configuration file, i.e. use
> > something like
> >
> > /qemu-project/qemu/.gitlab-ci.d/firmware/seabios.yml
> >
> > or
> >
> > /qemu-project/qemu-firmware/seabios.yml
> >
> > as config file for the
> >
> > /qemu-project/seabios/
> >
> > mirror.  Then we can publish binaries using gitlab pages at
> >
> > https://qemu-project.gitlab.io/seabios/
> >
> > That way we also don't need the roms/ submodules any more, i.e. we
> > can remove both sources and binaries for the firmware from the
> > release tarballs.
>
> Thanks!
>
> For developer convenience it would be nice to avoid manual steps after
> git clone qemu.git. Maybe ./configure should check for firmware blobs
> and automatically download them. There could be a ./configure
> --disable-firmware-download option that distros can use to skip the
> download when building everything from source.
>

One thing to keep in mind about the downstream consumers: Many
of them have two phases to their build process that run asynchronously
to each other. There is a fetch phase that grabs everything, and a build
phase that builds the binaries. The second phase may not have access
to the internet for a variety of reasons (these days being a security
measure, for good or ill). Please make sure that any such plans
allow for this model.

Today, that's all dealt with by grabbing tarballs from github which
is how the submodules are dealt with. So long as the images
had well known names, and could be fetched with the normal
wget/cgit/fetch programs, that would suffice. Requiring use of
some weird bespoke script would cause friction for the downstreams
using qemu.

So while I'm all for making things a little more independent,
let's not do it in a way that makes life difficult for downstreams.
There are more there than just a couple of big distro builders.

Warner


Re: [PATCH v2] vmdk: allow specification of tools version

2021-10-11 Thread Hanna Reitz

On 13.09.21 15:04, Thomas Weißschuh wrote:

VMDK files support an attribute that represents the version of the guest
tools that are installed on the disk.
This attribute is used by vSphere before a machine has been started to
determine if the VM has the guest tools installed.
This is important when configuring "Operating system customizations" in
vSphere, as it checks for the presence of the guest tools before
allowing those customizations.
Thus when the VM has not yet booted normally it would be impossible to
customize it, therefore preventing a customized first-boot.

The attribute should not hurt on disks that do not have the guest tools
installed and indeed the VMware tools also unconditionally add this
attribute.
(Defaulting to the value "2147483647", as is done in this patch)

Signed-off-by: Thomas Weißschuh 
---

v1: 
https://lore.kernel.org/qemu-devel/20210908174250.12946-1-thomas.weissschuh@zeiss.com/

v1 -> v2:
* Expand QAPI docs (Eric Blake)

  block/vmdk.c | 24 
  qapi/block-core.json |  3 +++
  2 files changed, 23 insertions(+), 4 deletions(-)


[...]


diff --git a/qapi/block-core.json b/qapi/block-core.json
index c8ce1d9d5d..42431f52d0 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4597,6 +4597,8 @@
  # @adapter-type: The adapter type used to fill in the descriptor. Default: 
ide.
  # @hwversion: Hardware version. The meaningful options are "4" or "6".
  # Default: "4".
+# @toolsversion: VMware guest tools version.
+ Default: "2147483647" (Since 6.2)


There’s a # missing at the start of the line, and so this doesn’t compile.

I’ve added it (hope that’s OK for you) and taken this patch to my block 
branch:


https://gitlab.com/hreitz/qemu/-/commits/block/

Hanna




Re: [PATCH v3 0/3] hw/mips/boston: ELF kernel support

2021-10-11 Thread Jiaxun Yang

ping?

在 2021/10/2 19:45, Jiaxun Yang 写道:

Jiaxun Yang (3):
   hw/mips/boston: Massage memory map information
   hw/mips/boston: Allow loading elf kernel and dtb
   hw/mips/boston: Add FDT generator

  hw/mips/boston.c | 348 +++
  1 file changed, 320 insertions(+), 28 deletions(-)






Re: [PATCH v3 3/3] tests/acpi/bios-tables-test: update DSDT blob for multifunction bridge test

2021-10-11 Thread Igor Mammedov
On Thu,  7 Oct 2021 19:27:50 +0530
Ani Sinha  wrote:

> We added a new unit test for testing acpi hotplug on multifunction bridges in
> q35 machines. Here, we update the DSDT table gloden master blob for this unit
> test.
> 
> The test adds the following devices to qemu and then checks the changes
> introduced in the DSDT table due to the addition of the following devices:
> 
> (a) a multifunction bridge device
> (b) a bridge device with function 1
> (c) a non-bridge device with function 2
> 
> In the DSDT table, we should see AML hotplug descriptions for (a) and (b).
> For (a) we should find a hotplug AML description for function 0.
> 
> Following is the ASL diff between the original DSDT table and the modified 
> DSDT
> table due to the unit test. We see that multifunction bridge on bus 2 and 
> single
> function bridge on bus 3 function 1 are described, not the non-bridge balloon
> device on bus 4, function 2.
> 
> @@ -1,30 +1,30 @@
>  /*
>   * Intel ACPI Component Architecture
>   * AML/ASL+ Disassembler version 20190509 (64-bit version)
>   * Copyright (c) 2000 - 2019 Intel Corporation
>   *
>   * Disassembling to symbolic ASL+ operators
>   *
> - * Disassembly of tests/data/acpi/q35/DSDT, Thu Oct  7 18:29:19 2021
> + * Disassembly of /tmp/aml-C7JCA1, Thu Oct  7 18:29:19 2021
>   *
>   * Original Table Header:
>   * Signature"DSDT"
> - * Length   0x2061 (8289)
> + * Length   0x2187 (8583)
>   * Revision 0x01  32-bit table (V1), no 64-bit math support
> - * Checksum 0xF9
> + * Checksum 0x8D
>   * OEM ID   "BOCHS "
>   * OEM Table ID "BXPC"
>   * OEM Revision 0x0001 (1)
>   * Compiler ID  "BXPC"
>   * Compiler Version 0x0001 (1)
>   */
>  DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC", 0x0001)
>  {
>  Scope (\)
>  {
>  OperationRegion (DBG, SystemIO, 0x0402, One)
>  Field (DBG, ByteAcc, NoLock, Preserve)
>  {
>  DBGB,   8
>  }
> 
> @@ -3265,23 +3265,95 @@
>  Method (_S1D, 0, NotSerialized)  // _S1D: S1 Device State
>  {
>  Return (Zero)
>  }
> 
>  Method (_S2D, 0, NotSerialized)  // _S2D: S2 Device State
>  {
>  Return (Zero)
>  }
> 
>  Method (_S3D, 0, NotSerialized)  // _S3D: S3 Device State
>  {
>  Return (Zero)
>  }
>  }
> 
> +Device (S10)
> +{
> +Name (_ADR, 0x0002)  // _ADR: Address
> +Name (BSEL, One)
> +Device (S00)
> +{
> +Name (_SUN, Zero)  // _SUN: Slot User Number
> +Name (_ADR, Zero)  // _ADR: Address
> +Method (_EJ0, 1, NotSerialized)  // _EJx: Eject Device, 
> x=0-9
> +{
> +PCEJ (BSEL, _SUN)
> +}
> +
> +Method (_DSM, 4, Serialized)  // _DSM: Device-Specific 
> Method
> +{
> +Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, _SUN))
> +}
> +}
> +
> +Method (DVNT, 2, NotSerialized)
> +{
> +If ((Arg0 & One))
> +{
> +Notify (S00, Arg1)
> +}
> +}
> +
> +Method (PCNT, 0, NotSerialized)
> +{
> +BNUM = One
> +DVNT (PCIU, One)
> +DVNT (PCID, 0x03)
> +}
> +}
> +
> +Device (S19)
> +{
> +Name (_ADR, 0x00030001)  // _ADR: Address
> +Name (BSEL, Zero)
> +Device (S00)
> +{
> +Name (_SUN, Zero)  // _SUN: Slot User Number
> +Name (_ADR, Zero)  // _ADR: Address
> +Method (_EJ0, 1, NotSerialized)  // _EJx: Eject Device, 
> x=0-9
> +{
> +PCEJ (BSEL, _SUN)
> +}
> +
> +Method (_DSM, 4, Serialized)  // _DSM: Device-Specific 
> Method
> +{
> +Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, _SUN))
> +}
> +}
> +
> +Method (DVNT, 2, NotSerialized)
> +{
> +If ((Arg0 & One))
> +{
> +Notify (S00, Arg1)
> +}
> +}
> +
> +Method (PCNT, 0, NotSerialized)
> +{
> +BNUM = Zero
> +DVNT (PCIU, One)
> +DVNT (PCID, 0x03)
> +}
> +  

Re: [PATCH v3 1/3] tests/acpi/bios-tables-test: add and allow changes to a new q35 DSDT table blob

2021-10-11 Thread Igor Mammedov
On Thu,  7 Oct 2021 19:27:48 +0530
Ani Sinha  wrote:

> We are adding a new unit test to cover the acpi hotplug support in q35 for
> multi-function bridges. This test uses a new table DSDT.multi-bridge.
> We need to allow changes in DSDT acpi table for addition of this new
> unit test.
> 
> Signed-off-by: Ani Sinha 

Acked-by: Igor Mammedov 

> ---
>  tests/data/acpi/q35/DSDT.multi-bridge   | 0
>  tests/qtest/bios-tables-test-allowed-diff.h | 1 +
>  2 files changed, 1 insertion(+)
>  create mode 100644 tests/data/acpi/q35/DSDT.multi-bridge
> 
> diff --git a/tests/data/acpi/q35/DSDT.multi-bridge 
> b/tests/data/acpi/q35/DSDT.multi-bridge
> new file mode 100644
> index 00..e69de29bb2
> diff --git a/tests/qtest/bios-tables-test-allowed-diff.h 
> b/tests/qtest/bios-tables-test-allowed-diff.h
> index dfb8523c8b..dabc024f53 100644
> --- a/tests/qtest/bios-tables-test-allowed-diff.h
> +++ b/tests/qtest/bios-tables-test-allowed-diff.h
> @@ -1 +1,2 @@
>  /* List of comma-separated changed AML files to ignore */
> +"tests/data/acpi/q35/DSDT.multi-bridge",




Re: [PATCH v3 2/3] tests/acpi/pcihp: add unit tests for hotplug on multifunction bridges for q35

2021-10-11 Thread Igor Mammedov
On Thu,  7 Oct 2021 19:27:49 +0530
Ani Sinha  wrote:

> commit d7346e614f4ec ("acpi: x86: pcihp: add support hotplug on multifunction 
> bridges")
> added ACPI hotplug descriptions for cold plugged bridges for functions other
> than 0. For all other devices, the ACPI hotplug descriptions are limited to
> function 0 only. This change adds unit tests for this feature.
> 
> This test adds the following devices to qemu and then checks the changes
> introduced in the DSDT table due to the addition of the following devices:
> 
> (a) a multifunction bridge device
> (b) a bridge device with function 1
> (c) a non-bridge device with function 2
> 
> In the DSDT table, we should see AML hotplug descriptions for (a) and (b).
> For (a) we should find a hotplug AML description for function 0.
> 
> The following diff compares the DSDT table AML with the new unit test before
> and after the change d7346e614f4ec is introduced. In other words,
> this diff reflects the changes that occurs in the DSDT table due to the change
> d7346e614f4ec .
> 
> @@ -1,60 +1,38 @@
>  /*
>   * Intel ACPI Component Architecture
>   * AML/ASL+ Disassembler version 20190509 (64-bit version)
>   * Copyright (c) 2000 - 2019 Intel Corporation
>   *
>   * Disassembling to symbolic ASL+ operators
>   *
> - * Disassembly of tests/data/acpi/q35/DSDT.multi-bridge, Thu Oct  7 18:56:05 
> 2021
> + * Disassembly of /tmp/aml-AN0DA1, Thu Oct  7 18:56:05 2021
>   *
>   * Original Table Header:
>   * Signature"DSDT"
> - * Length   0x20FE (8446)
> + * Length   0x2187 (8583)
>   * Revision 0x01  32-bit table (V1), no 64-bit math support
> - * Checksum 0xDE
> + * Checksum 0x8D
>   * OEM ID   "BOCHS "
>   * OEM Table ID "BXPC"
>   * OEM Revision 0x0001 (1)
>   * Compiler ID  "BXPC"
>   * Compiler Version 0x0001 (1)
>   */
>  DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC", 0x0001)
>  {
> -/*
> - * iASL Warning: There was 1 external control method found during
> - * disassembly, but only 0 were resolved (1 unresolved). Additional
> - * ACPI tables may be required to properly disassemble the code. This
> - * resulting disassembler output file may not compile because the
> - * disassembler did not know how many arguments to assign to the
> - * unresolved methods. Note: SSDTs can be dynamically loaded at
> - * runtime and may or may not be available via the host OS.
> - *
> - * In addition, the -fe option can be used to specify a file containing
> - * control method external declarations with the associated method
> - * argument counts. Each line of the file must be of the form:
> - * External (, MethodObj, )
> - * Invocation:
> - * iasl -fe refs.txt -d dsdt.aml
> - *
> - * The following methods were unresolved and many not compile properly
> - * because the disassembler had to guess at the number of arguments
> - * required for each:
> - */
> -External (_SB_.PCI0.S19_.PCNT, MethodObj)// Warning: Unknown method, 
> guessing 1 arguments
> -
>  Scope (\)
>  {
>  OperationRegion (DBG, SystemIO, 0x0402, One)
>  Field (DBG, ByteAcc, NoLock, Preserve)
>  {
>  DBGB,   8
>  }
> 
>  Method (DBUG, 1, NotSerialized)
>  {
>  ToHexString (Arg0, Local0)
>  ToBuffer (Local0, Local0)
>  Local1 = (SizeOf (Local0) - One)
>  Local2 = Zero
>  While ((Local2 < Local1))
>  {
> @@ -3322,24 +3300,60 @@
>  Method (DVNT, 2, NotSerialized)
>  {
>  If ((Arg0 & One))
>  {
>  Notify (S00, Arg1)
>  }
>  }
> 
>  Method (PCNT, 0, NotSerialized)
>  {
>  BNUM = One
>  DVNT (PCIU, One)
>  DVNT (PCID, 0x03)
>  }
>  }
> 
> +Device (S19)
> +{
> +Name (_ADR, 0x00030001)  // _ADR: Address
> +Name (BSEL, Zero)
> +Device (S00)
> +{
> +Name (_SUN, Zero)  // _SUN: Slot User Number
> +Name (_ADR, Zero)  // _ADR: Address
> +Method (_EJ0, 1, NotSerialized)  // _EJx: Eject Device, 
> x=0-9
> +{
> +PCEJ (BSEL, _SUN)
> +}
> +
> +Method (_DSM, 4, Serialized)  // _DSM: Device-Specific 
> Method
> +{
> +Return (PDSM (Arg0, Arg1, Arg2, Arg3, BSEL, _SUN))
> +}
> +}
> +
> +Method (DVNT, 2, NotSerialized)
> +{
> +If ((Arg0 & One))
> + 

Re: ACPI endianness

2021-10-11 Thread BALATON Zoltan

On Mon, 11 Oct 2021, Michael S. Tsirkin wrote:

On Mon, Oct 11, 2021 at 03:27:44PM +0200, BALATON Zoltan wrote:

On Mon, 11 Oct 2021, Michael S. Tsirkin wrote:

On Mon, Oct 11, 2021 at 12:13:55PM +0200, BALATON Zoltan wrote:

On Mon, 11 Oct 2021, Philippe Mathieu-Daudé wrote:

On 10/10/21 15:24, BALATON Zoltan wrote:

Hello,

I'm trying to fix shutdown and reboot on pegasos2 which uses ACPI as
part of the VIA VT8231 (similar to and modelled in hw/isa/vt82c686b.c)
and found that the guest writes to ACPI PM1aCNT register come out with
wrong endianness but not shure why. I have this:

$ qemu-system-ppc -M pegasos2 -monitor stdio
(qemu) info mtree
[...]
memory-region: pci1-io
  - (prio 0, i/o): pci1-io
[...]
    0f00-0f7f (prio 0, i/o): via-pm
  0f00-0f03 (prio 0, i/o): acpi-evt
  0f04-0f05 (prio 0, i/o): acpi-cnt
  0f08-0f0b (prio 0, i/o): acpi-tmr

memory-region: system
  - (prio 0, i/o): system
    -1fff (prio 0, ram): pegasos2.ram
    8000-bfff (prio 0, i/o): alias pci1-mem0-win
@pci1-mem 8000-bfff
    c000-dfff (prio 0, i/o): alias pci0-mem0-win
@pci0-mem c000-dfff
    f100-f100 (prio 0, i/o): mv64361
    f800-f8ff (prio 0, i/o): alias pci0-io-win
@pci0-io -00ff
    f900-f9ff (prio 0, i/o): alias pci0-mem1-win
@pci0-mem -00ff
    fd00-fdff (prio 0, i/o): alias pci1-mem1-win
@pci1-mem -00ff
    fe00-feff (prio 0, i/o): alias pci1-io-win
@pci1-io -00ff
    ff80- (prio 0, i/o): alias pci1-mem3-win
@pci1-mem ff80-
    fff0-fff7 (prio 0, rom): pegasos2.rom

The guest (which is big endian PPC and I think wotks on real hardware)
writes to 0xf05 in the io region which should be the high byte of the
little endian register but in the acpi code it comes out wrong, instead
of 0x2800 I get in acpi_pm1_cnt_write: val=0x28


Looks like a northbridge issue (MV64340).
Does Pegasos2 enables bus swapping?
See hw/pci-host/mv64361.c:585:

static void warn_swap_bit(uint64_t val)
{
   if ((val & 0x300ULL) >> 24 != 1) {
   qemu_log_mask(LOG_UNIMP, "%s: Data swap not implemented", __func__);
   }
}


No, guests don't use this feature just byteswap themselves and write little
endian values to the mapped io region. (Or in this case just write one byte
of the 16 bit value, it specifically writes 0x28 to 0xf05.) That's why I
think the device model should not byteswap itself so the acpi regions should
be declared native endian and let the guest handle it. Some mentions I've
found say that native endian means don't byteswap, little endian means
byteswap if vcpu is big endian and big endian means byteswap if vcpu is
little endian. Since guests already account for this and write little endian
values to these regions there's no need to byteswap in device model and
changing to native endian should not affect little endian machines so unless
there's a better argument I'll try sending a patch.

Regards,
BALATON Zoltan


native endian means endian-ness is open-coded in the device handler
itself.  I think you just found a bug in memory core.

static const MemoryRegionOps acpi_pm_cnt_ops = {
   .read = acpi_pm_cnt_read,
   .write = acpi_pm_cnt_write,
   .impl.min_access_size = 2,
   .valid.min_access_size = 1,
   .valid.max_access_size = 2,
   .endianness = DEVICE_LITTLE_ENDIAN,
};


Because of that .impl.min_access_size = 2,
the 1 byte write should be converted to a 2 byte
read+2 byte write.

docs/devel/memory.rst just says:
- .impl.min_access_size, .impl.max_access_size define the access sizes
 (in bytes) supported by the *implementation*; other access sizes will be
 emulated using the ones available.  For example a 4-byte write will be
 emulated using four 1-byte writes, if .impl.max_access_size = 1.



Instead what we have is:

MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
hwaddr addr,
uint64_t data,
MemOp op,
MemTxAttrs attrs)
{
   unsigned size = memop_size(op);

   if (!memory_region_access_valid(mr, addr, size, true, attrs)) {
   unassigned_mem_write(mr, addr, data, size);
   return MEMTX_DECODE_ERROR;
   }

   adjust_endianness(mr, , op);


---


static void adjust_endianness(MemoryRegion *mr, uint64_t *data, MemOp op)
{
   if ((op & MO_BSWAP) != devend_memop(mr->ops->endianness)) {
   switch (op & MO_SIZE) {
   case MO_8:
   break;

Re: [PATCH] hw/misc: applesmc: use host osk as default on macs

2021-10-11 Thread BALATON Zoltan

On Mon, 11 Oct 2021, Gabriel L. Somlo wrote:

FWIW, there's an applesmc driver in the Linux kernel, and it exposes
many of the keys and values stored on the chip under
/sys/devices/platform/applesmc.768 (or at least it *used* to back when
I last checked).

My idea at the time was to get this driver to also expose the value of
OSK0,1, so that userspace programs (like e.g. qemu) could read and use
the values hardcoded in hardware without needing to hardcode them
themselves in software.


I guess a frequent use case for running macOS guests with keys from host 
would be on hosts running macOS too so a solution that works both on macOS 
and Linux might be better than a Linux specific one which then needs 
another way to do the same on macOS. Looks like there's free code for that 
too and you don't have to convince a maintainer either.


Regards,
BALATON Zoltan



Re: [PATCH] s390x/ipl: check kernel command line size

2021-10-11 Thread Thomas Huth

On 06/10/2021 11.26, Marc Hartmayer wrote:

Check if the provided kernel command line exceeds the maximum size of the s390x
Linux kernel command line size, which is 896 bytes.

Reported-by: Sven Schnelle 
Signed-off-by: Marc Hartmayer 
---
  hw/s390x/ipl.c | 12 +++-
  1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 1821c6faeef3..a58ea58cc736 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -38,6 +38,7 @@
  #define KERN_IMAGE_START0x01UL
  #define LINUX_MAGIC_ADDR0x010008UL
  #define KERN_PARM_AREA  0x010480UL
+#define KERN_PARM_AREA_SIZE 0x000380UL
  #define INITRD_START0x80UL
  #define INITRD_PARM_START   0x010408UL
  #define PARMFILE_START  0x001000UL
@@ -190,10 +191,19 @@ static void s390_ipl_realize(DeviceState *dev, Error 
**errp)
   * loader) and it won't work. For this case we force it to 0x1, 
too.
   */
  if (pentry == KERN_IMAGE_START || pentry == 0x800) {
-char *parm_area = rom_ptr(KERN_PARM_AREA, strlen(ipl->cmdline) + 
1);
+size_t cmdline_size = strlen(ipl->cmdline) + 1;
+char *parm_area = rom_ptr(KERN_PARM_AREA, cmdline_size);
+
  ipl->start_addr = KERN_IMAGE_START;
  /* Overwrite parameters in the kernel image, which are "rom" */
  if (parm_area) {
+if (cmdline_size > KERN_PARM_AREA_SIZE) {
+error_setg(errp,
+   "kernel command line exceeds maximum size: %lu > 
%lu",


I think the first %lu should be %zd instead?

Apart from that, the patch looks fine to me... so if you agree, I can fix 
that up when picking up the patch.


 Thomas



+   cmdline_size, KERN_PARM_AREA_SIZE);
+return;
+}
+
  strcpy(parm_area, ipl->cmdline);
  }
  } else {






Re: ACPI endianness

2021-10-11 Thread Michael S. Tsirkin
On Mon, Oct 11, 2021 at 03:27:44PM +0200, BALATON Zoltan wrote:
> On Mon, 11 Oct 2021, Michael S. Tsirkin wrote:
> > On Mon, Oct 11, 2021 at 12:13:55PM +0200, BALATON Zoltan wrote:
> > > On Mon, 11 Oct 2021, Philippe Mathieu-Daudé wrote:
> > > > On 10/10/21 15:24, BALATON Zoltan wrote:
> > > > > Hello,
> > > > > 
> > > > > I'm trying to fix shutdown and reboot on pegasos2 which uses ACPI as
> > > > > part of the VIA VT8231 (similar to and modelled in hw/isa/vt82c686b.c)
> > > > > and found that the guest writes to ACPI PM1aCNT register come out with
> > > > > wrong endianness but not shure why. I have this:
> > > > > 
> > > > > $ qemu-system-ppc -M pegasos2 -monitor stdio
> > > > > (qemu) info mtree
> > > > > [...]
> > > > > memory-region: pci1-io
> > > > >   - (prio 0, i/o): pci1-io
> > > > > [...]
> > > > >     0f00-0f7f (prio 0, i/o): via-pm
> > > > >   0f00-0f03 (prio 0, i/o): acpi-evt
> > > > >   0f04-0f05 (prio 0, i/o): acpi-cnt
> > > > >   0f08-0f0b (prio 0, i/o): acpi-tmr
> > > > > 
> > > > > memory-region: system
> > > > >   - (prio 0, i/o): system
> > > > >     -1fff (prio 0, ram): pegasos2.ram
> > > > >     8000-bfff (prio 0, i/o): alias 
> > > > > pci1-mem0-win
> > > > > @pci1-mem 8000-bfff
> > > > >     c000-dfff (prio 0, i/o): alias 
> > > > > pci0-mem0-win
> > > > > @pci0-mem c000-dfff
> > > > >     f100-f100 (prio 0, i/o): mv64361
> > > > >     f800-f8ff (prio 0, i/o): alias pci0-io-win
> > > > > @pci0-io -00ff
> > > > >     f900-f9ff (prio 0, i/o): alias 
> > > > > pci0-mem1-win
> > > > > @pci0-mem -00ff
> > > > >     fd00-fdff (prio 0, i/o): alias 
> > > > > pci1-mem1-win
> > > > > @pci1-mem -00ff
> > > > >     fe00-feff (prio 0, i/o): alias pci1-io-win
> > > > > @pci1-io -00ff
> > > > >     ff80- (prio 0, i/o): alias 
> > > > > pci1-mem3-win
> > > > > @pci1-mem ff80-
> > > > >     fff0-fff7 (prio 0, rom): pegasos2.rom
> > > > > 
> > > > > The guest (which is big endian PPC and I think wotks on real hardware)
> > > > > writes to 0xf05 in the io region which should be the high byte of the
> > > > > little endian register but in the acpi code it comes out wrong, 
> > > > > instead
> > > > > of 0x2800 I get in acpi_pm1_cnt_write: val=0x28
> > > > 
> > > > Looks like a northbridge issue (MV64340).
> > > > Does Pegasos2 enables bus swapping?
> > > > See hw/pci-host/mv64361.c:585:
> > > > 
> > > > static void warn_swap_bit(uint64_t val)
> > > > {
> > > >if ((val & 0x300ULL) >> 24 != 1) {
> > > >qemu_log_mask(LOG_UNIMP, "%s: Data swap not implemented", 
> > > > __func__);
> > > >}
> > > > }
> > > 
> > > No, guests don't use this feature just byteswap themselves and write 
> > > little
> > > endian values to the mapped io region. (Or in this case just write one 
> > > byte
> > > of the 16 bit value, it specifically writes 0x28 to 0xf05.) That's why I
> > > think the device model should not byteswap itself so the acpi regions 
> > > should
> > > be declared native endian and let the guest handle it. Some mentions I've
> > > found say that native endian means don't byteswap, little endian means
> > > byteswap if vcpu is big endian and big endian means byteswap if vcpu is
> > > little endian. Since guests already account for this and write little 
> > > endian
> > > values to these regions there's no need to byteswap in device model and
> > > changing to native endian should not affect little endian machines so 
> > > unless
> > > there's a better argument I'll try sending a patch.
> > > 
> > > Regards,
> > > BALATON Zoltan
> > 
> > native endian means endian-ness is open-coded in the device handler
> > itself.  I think you just found a bug in memory core.
> > 
> > static const MemoryRegionOps acpi_pm_cnt_ops = {
> >.read = acpi_pm_cnt_read,
> >.write = acpi_pm_cnt_write,
> >.impl.min_access_size = 2,
> >.valid.min_access_size = 1,
> >.valid.max_access_size = 2,
> >.endianness = DEVICE_LITTLE_ENDIAN,
> > };
> > 
> > 
> > Because of that .impl.min_access_size = 2,
> > the 1 byte write should be converted to a 2 byte
> > read+2 byte write.
> > 
> > docs/devel/memory.rst just says:
> > - .impl.min_access_size, .impl.max_access_size define the access sizes
> >  (in bytes) supported by the *implementation*; other access sizes will be
> >  emulated using the ones available.  For example a 4-byte write will be
> >  emulated using four 1-byte writes, if 

Re: Deprecate the ppc405 boards in QEMU?

2021-10-11 Thread Thomas Huth

On 11/10/2021 11.20, David Gibson wrote:

On Mon, Oct 11, 2021 at 10:10:36AM +0200, Thomas Huth wrote:

On 06/10/2021 09.25, Thomas Huth wrote:

On 05/10/2021 23.53, BALATON Zoltan wrote:
[...]

Maybe these 405 boards in QEMU ran with modified firmware where the
memory detection was patched out but it seems to detect the RAM so I
wonder where it gets that from. Maybe by reading the SDRAM
controller DCRs ppc4xx_sdram_init() sets up. Then I'm not sure what
it needs the SPD for, I forgot how this worked on sam460ex. Maybe
for the speed calibration, so could be it detects ram by reading
DCRs then tries to get SPD data and that's where it stops as i2c is
not emulated on taihu. This could be confirmed by checking what it
pokes with -d guest_errors that shows accesses to missing devices
but don't know where 405 has the i2c controller and if it's the same
as newer SoCs. If so that could be reused and an i2c bus could be
added with the SPD data like in sam460ex to make u-boot happy or you
could skip this in u-boot.


FWIW, I've just tried the latter (skipping the sdram init in u-boot),
and indeed, I can get to the u-boot prompt now.

[...]> I've also attached the patch with my modifications to u-boot.

FYI, the changes can now be found on this branch here:

  https://gitlab.com/huth/u-boot/-/commits/taihu

I also added a gitlab-CI file, so you can now download the u-boot.bin as an
artifact from the corresponding pipeline, e.g.:

  https://gitlab.com/huth/u-boot/-/jobs/1667201028


Thanks.

Are you going to send a v2 of your proposed deprecation patches?


No, since there was interest in keeping the 405 boards for testing the 405 
code in the Linux kernel, and since there is now a way to do at least some 
very basic testing of these boards (with the u-boot firmware), I don't plan 
to respin the deprecation patch. I just sent a patch for adding the boards 
to our CI instead:


 https://lists.gnu.org/archive/html/qemu-devel/2021-10/msg02072.html

 Thomas




Re: ACPI endianness

2021-10-11 Thread BALATON Zoltan

On Mon, 11 Oct 2021, Michael S. Tsirkin wrote:

On Mon, Oct 11, 2021 at 12:13:55PM +0200, BALATON Zoltan wrote:

On Mon, 11 Oct 2021, Philippe Mathieu-Daudé wrote:

On 10/10/21 15:24, BALATON Zoltan wrote:

Hello,

I'm trying to fix shutdown and reboot on pegasos2 which uses ACPI as
part of the VIA VT8231 (similar to and modelled in hw/isa/vt82c686b.c)
and found that the guest writes to ACPI PM1aCNT register come out with
wrong endianness but not shure why. I have this:

$ qemu-system-ppc -M pegasos2 -monitor stdio
(qemu) info mtree
[...]
memory-region: pci1-io
  - (prio 0, i/o): pci1-io
[...]
    0f00-0f7f (prio 0, i/o): via-pm
  0f00-0f03 (prio 0, i/o): acpi-evt
  0f04-0f05 (prio 0, i/o): acpi-cnt
  0f08-0f0b (prio 0, i/o): acpi-tmr

memory-region: system
  - (prio 0, i/o): system
    -1fff (prio 0, ram): pegasos2.ram
    8000-bfff (prio 0, i/o): alias pci1-mem0-win
@pci1-mem 8000-bfff
    c000-dfff (prio 0, i/o): alias pci0-mem0-win
@pci0-mem c000-dfff
    f100-f100 (prio 0, i/o): mv64361
    f800-f8ff (prio 0, i/o): alias pci0-io-win
@pci0-io -00ff
    f900-f9ff (prio 0, i/o): alias pci0-mem1-win
@pci0-mem -00ff
    fd00-fdff (prio 0, i/o): alias pci1-mem1-win
@pci1-mem -00ff
    fe00-feff (prio 0, i/o): alias pci1-io-win
@pci1-io -00ff
    ff80- (prio 0, i/o): alias pci1-mem3-win
@pci1-mem ff80-
    fff0-fff7 (prio 0, rom): pegasos2.rom

The guest (which is big endian PPC and I think wotks on real hardware)
writes to 0xf05 in the io region which should be the high byte of the
little endian register but in the acpi code it comes out wrong, instead
of 0x2800 I get in acpi_pm1_cnt_write: val=0x28


Looks like a northbridge issue (MV64340).
Does Pegasos2 enables bus swapping?
See hw/pci-host/mv64361.c:585:

static void warn_swap_bit(uint64_t val)
{
   if ((val & 0x300ULL) >> 24 != 1) {
   qemu_log_mask(LOG_UNIMP, "%s: Data swap not implemented", __func__);
   }
}


No, guests don't use this feature just byteswap themselves and write little
endian values to the mapped io region. (Or in this case just write one byte
of the 16 bit value, it specifically writes 0x28 to 0xf05.) That's why I
think the device model should not byteswap itself so the acpi regions should
be declared native endian and let the guest handle it. Some mentions I've
found say that native endian means don't byteswap, little endian means
byteswap if vcpu is big endian and big endian means byteswap if vcpu is
little endian. Since guests already account for this and write little endian
values to these regions there's no need to byteswap in device model and
changing to native endian should not affect little endian machines so unless
there's a better argument I'll try sending a patch.

Regards,
BALATON Zoltan


native endian means endian-ness is open-coded in the device handler
itself.  I think you just found a bug in memory core.

static const MemoryRegionOps acpi_pm_cnt_ops = {
   .read = acpi_pm_cnt_read,
   .write = acpi_pm_cnt_write,
   .impl.min_access_size = 2,
   .valid.min_access_size = 1,
   .valid.max_access_size = 2,
   .endianness = DEVICE_LITTLE_ENDIAN,
};


Because of that .impl.min_access_size = 2,
the 1 byte write should be converted to a 2 byte
read+2 byte write.

docs/devel/memory.rst just says:
- .impl.min_access_size, .impl.max_access_size define the access sizes
 (in bytes) supported by the *implementation*; other access sizes will be
 emulated using the ones available.  For example a 4-byte write will be
 emulated using four 1-byte writes, if .impl.max_access_size = 1.



Instead what we have is:

MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
hwaddr addr,
uint64_t data,
MemOp op,
MemTxAttrs attrs)
{
   unsigned size = memop_size(op);

   if (!memory_region_access_valid(mr, addr, size, true, attrs)) {
   unassigned_mem_write(mr, addr, data, size);
   return MEMTX_DECODE_ERROR;
   }

   adjust_endianness(mr, , op);


---


static void adjust_endianness(MemoryRegion *mr, uint64_t *data, MemOp op)
{
   if ((op & MO_BSWAP) != devend_memop(mr->ops->endianness)) {
   switch (op & MO_SIZE) {
   case MO_8:
   break;
   case MO_16:
   *data = bswap16(*data);
   break;
   case MO_32:
   *data = 

Re: [PATCH] qapi: Improve input_type_enum()'s error message

2021-10-11 Thread Philippe Mathieu-Daudé
On 10/11/21 15:15, Markus Armbruster wrote:
> The error message claims the parameter is invalid:
> 
> $ qemu-system-x86_64 -object qom-type=nonexistent
> qemu-system-x86_64: -object qom-type=nonexistent: Invalid parameter 
> 'nonexistent'
> 
> What's wrong is actually the *value* 'nonexistent'.  Improve the
> message to
> 
> qemu-system-x86_64: -object qom-type=nonexistent: Parameter 'qom-type' 
> does not accept value 'nonexistent'
> 
> Signed-off-by: Markus Armbruster 
> ---
>  qapi/qapi-visit-core.c  | 3 ++-
>  tests/unit/check-qom-proplist.c | 2 +-
>  tests/qemu-iotests/049.out  | 6 +++---
>  tests/qemu-iotests/206.out  | 2 +-
>  tests/qemu-iotests/237.out  | 6 +++---
>  tests/qemu-iotests/245  | 2 +-
>  6 files changed, 11 insertions(+), 10 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH] qemu-iotests: flush after every test

2021-10-11 Thread Hanna Reitz

On 06.10.21 11:27, Paolo Bonzini wrote:

This makes it possible to see what is happening, even if the output of
"make check-block" is not sent to a tty (for example if it is sent to
grep or tee).

Signed-off-by: Paolo Bonzini 
---
  tests/qemu-iotests/testrunner.py | 1 +
  1 file changed, 1 insertion(+)


Thanks, applied to my block branch:

https://gitlab.com/hreitz/qemu/-/commits/block/

Hanna




Re: [PATCH] tests/acceptance: Add tests for the ppc405 boards

2021-10-11 Thread Philippe Mathieu-Daudé
On 10/11/21 14:59, Thomas Huth wrote:
> Using the U-Boot firmware, we can check that at least the serial console
> of the ppc405 boards is still usable.
> 
> Signed-off-by: Thomas Huth 
> ---
>  Based-on: 20211006071140.565952-1-th...@redhat.com
> 
>  tests/acceptance/ppc_405.py | 40 +
>  1 file changed, 40 insertions(+)
>  create mode 100644 tests/acceptance/ppc_405.py

> +def test_ppc_taihu(self):
> +"""
> +:avocado: tags=arch:ppc
> +:avocado: tags=machine:taihu

Please consider adding:

  tags=cpu:405ep

> +"""
> +self.do_test_ppc405()
> +
> +def test_ppc_ref405ep(self):
> +"""
> +:avocado: tags=arch:ppc
> +:avocado: tags=machine:ref405ep
> +"""
> +self.do_test_ppc405()
> 

Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Philippe Mathieu-Daudé 




Re: ACPI endianness

2021-10-11 Thread Jonathan Cameron
On Mon, 11 Oct 2021 08:19:01 -0400
"Michael S. Tsirkin"  wrote:

> On Mon, Oct 11, 2021 at 12:13:55PM +0200, BALATON Zoltan wrote:
> > On Mon, 11 Oct 2021, Philippe Mathieu-Daudé wrote:  
> > > On 10/10/21 15:24, BALATON Zoltan wrote:  
> > > > Hello,
> > > > 
> > > > I'm trying to fix shutdown and reboot on pegasos2 which uses ACPI as
> > > > part of the VIA VT8231 (similar to and modelled in hw/isa/vt82c686b.c)
> > > > and found that the guest writes to ACPI PM1aCNT register come out with
> > > > wrong endianness but not shure why. I have this:
> > > > 
> > > > $ qemu-system-ppc -M pegasos2 -monitor stdio
> > > > (qemu) info mtree
> > > > [...]
> > > > memory-region: pci1-io
> > > >   - (prio 0, i/o): pci1-io
> > > > [...]
> > > >     0f00-0f7f (prio 0, i/o): via-pm
> > > >   0f00-0f03 (prio 0, i/o): acpi-evt
> > > >   0f04-0f05 (prio 0, i/o): acpi-cnt
> > > >   0f08-0f0b (prio 0, i/o): acpi-tmr
> > > > 
> > > > memory-region: system
> > > >   - (prio 0, i/o): system
> > > >     -1fff (prio 0, ram): pegasos2.ram
> > > >     8000-bfff (prio 0, i/o): alias pci1-mem0-win
> > > > @pci1-mem 8000-bfff
> > > >     c000-dfff (prio 0, i/o): alias pci0-mem0-win
> > > > @pci0-mem c000-dfff
> > > >     f100-f100 (prio 0, i/o): mv64361
> > > >     f800-f8ff (prio 0, i/o): alias pci0-io-win
> > > > @pci0-io -00ff
> > > >     f900-f9ff (prio 0, i/o): alias pci0-mem1-win
> > > > @pci0-mem -00ff
> > > >     fd00-fdff (prio 0, i/o): alias pci1-mem1-win
> > > > @pci1-mem -00ff
> > > >     fe00-feff (prio 0, i/o): alias pci1-io-win
> > > > @pci1-io -00ff
> > > >     ff80- (prio 0, i/o): alias pci1-mem3-win
> > > > @pci1-mem ff80-
> > > >     fff0-fff7 (prio 0, rom): pegasos2.rom
> > > > 
> > > > The guest (which is big endian PPC and I think wotks on real hardware)
> > > > writes to 0xf05 in the io region which should be the high byte of the
> > > > little endian register but in the acpi code it comes out wrong, instead
> > > > of 0x2800 I get in acpi_pm1_cnt_write: val=0x28  
> > > 
> > > Looks like a northbridge issue (MV64340).
> > > Does Pegasos2 enables bus swapping?
> > > See hw/pci-host/mv64361.c:585:
> > > 
> > > static void warn_swap_bit(uint64_t val)
> > > {
> > >if ((val & 0x300ULL) >> 24 != 1) {
> > >qemu_log_mask(LOG_UNIMP, "%s: Data swap not implemented", 
> > > __func__);
> > >}
> > > }  
> > 
> > No, guests don't use this feature just byteswap themselves and write little
> > endian values to the mapped io region. (Or in this case just write one byte
> > of the 16 bit value, it specifically writes 0x28 to 0xf05.) That's why I
> > think the device model should not byteswap itself so the acpi regions should
> > be declared native endian and let the guest handle it. Some mentions I've
> > found say that native endian means don't byteswap, little endian means
> > byteswap if vcpu is big endian and big endian means byteswap if vcpu is
> > little endian. Since guests already account for this and write little endian
> > values to these regions there's no need to byteswap in device model and
> > changing to native endian should not affect little endian machines so unless
> > there's a better argument I'll try sending a patch.
> > 
> > Regards,
> > BALATON Zoltan  
> 
> native endian means endian-ness is open-coded in the device handler
> itself.  I think you just found a bug in memory core.
> 
> static const MemoryRegionOps acpi_pm_cnt_ops = {
> .read = acpi_pm_cnt_read,
> .write = acpi_pm_cnt_write,
> .impl.min_access_size = 2,
> .valid.min_access_size = 1,
> .valid.max_access_size = 2,
> .endianness = DEVICE_LITTLE_ENDIAN,
> };
> 
> 
> Because of that .impl.min_access_size = 2,
> the 1 byte write should be converted to a 2 byte
> read+2 byte write.
> 
> docs/devel/memory.rst just says:
> - .impl.min_access_size, .impl.max_access_size define the access sizes
>   (in bytes) supported by the *implementation*; other access sizes will be
>   emulated using the ones available.  For example a 4-byte write will be
>   emulated using four 1-byte writes, if .impl.max_access_size = 1.
> 
> 
> 
> Instead what we have is:
> 
> MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
>  hwaddr addr,
>  uint64_t data,
>  MemOp op,
>   

Re: [PATCH] hw/misc: applesmc: use host osk as default on macs

2021-10-11 Thread Gabriel L. Somlo
FWIW, there's an applesmc driver in the Linux kernel, and it exposes
many of the keys and values stored on the chip under
/sys/devices/platform/applesmc.768 (or at least it *used* to back when
I last checked).

My idea at the time was to get this driver to also expose the value of
OSK0,1, so that userspace programs (like e.g. qemu) could read and use
the values hardcoded in hardware without needing to hardcode them
themselves in software.

It went nowhere at the time (the hwmon maintainer kept insisting that
"since it's a constant why don't you just hardcode it", and round and
round we went until I walked away:

https://lore.kernel.org/lkml/20121210222313.gf2...@hedwig.ini.cmu.edu/

Given *this* conversation, it might be worth someone's effort to try
that approach again. IMO it's really the most efficient: have an
already existing applesmc driver in the hypervisor's kernel expose the
desired key values (it's whole job is to expose key values to
userspace in the first place). Then have userspace read that and use
it for whatever purposes (including populating guest-facing emulated
smc devices). Nobody has to use anyone's copyrighted code or strings
or whatever. If only the hwmon folks could be convinced this time
around :)

HTH,
--Gabriel

On Sun, Oct 10, 2021 at 06:22:04PM -0300, Pedro Tôrres wrote:
> AFAIK there’s no public documentation from Apple on how to read values from
> SMC.
> 
> But Amit Singh wrote a book, Mac OS X Internals, and one of the processes
> described on it is how to read OSK directly from SMC: https://web.archive.org/
> web/20190630050544/http://osxbook.com/book/bonus/chapter7/tpmdrmmyth/
> 
> This is actually referenced on VirtualBox’s source code as the base for their
> implementation: 
> https://www.virtualbox.org/browser/vbox/trunk/src/VBox/Devices/
> EFI/DevSmc.cpp#L520
> 
> Additionally, there is the smcFanControl project, licensed under GPLv2, that
> performs reads and writes on SMC and have all information necessary to
> implement this feature on QEMU: https://github.com/hholtmann/smcFanControl
> 
> This project was used as base for the SMC in-tree driver for Linux and it’s
> referenced there: https://github.com/torvalds/linux/blob/master/drivers/hwmon/
> applesmc.c#L14
> 
> I think we would be safe using these sources as the base for our
> implementation, given that other huge GPL projects like Linux and VirtualBox
> have been using them for years.
> 
> Best regards,
> Pedro Tôrres
> 
> 
> On Oct 10, 2021, at 4:25 PM, Paolo Bonzini  wrote:
> 
> 
> Can you instead provide documentation in English (pseudocode, tables of 
> the
> structs, etc.)? That's the safest bet.
> 
> Paolo
> 
> El sáb., 9 oct. 2021 7:32, Pedro Tôrres  escribió:
> 
> Hey Paolo and Phil,
> 
> I understand you concerns regarding the license that Apple open-source
> code is distributed.
> 
> If I restart from scratch and implement this feature based only on
> VirtualBox code, that is distributed under GPLv2, would it be fine?
> 
> Best regards,
> Pedro Tôrres
> 
> 
> On Oct 8, 2021, at 3:54 PM, Paolo Bonzini 
> wrote:
> 
> 
> On 08/10/21 14:03, Phil Dennis-Jordan wrote:
> 
> 1. Licensing
> 
> Given that the code it's heavily based on is copyright Apple
> Computer Inc., licensed under APSL, is it safe including it in
> qemu as is?
> 
> If the integrated code is going to be quite so "directly
> inspired" (down to the inconsistent struct definition style 
> and
> mixing unrelated constants in the same anonymous enum), 
> perhaps
> at minimum place it in its own isolated source file with
> appropriate notice?
> 
>
> Yeah, this should be reverted.
>
> Pedro, I understand that you stated it was "based on code from
> Apple" but you also said (by including Signed-off-by) that
>
> ---
> (a) The contribution was created in whole or in part by me and I
>have the right to submit it under the open source license
>indicated in the file; or
>
> (b) The contribution is based upon previous work that, to the best
>of my knowledge, is covered under an appropriate open source
>license and I have the right under that license to submit that
>work with modifications, whether created in whole or in part
>by me, under the same open source license (unless I am
>permitted to submit under a different license), as indicated
>in the file; or
> ---
>
> and this is not true.
>
> Thanks very much,
>
> Paolo
>
> 



[PATCH] qapi: Improve input_type_enum()'s error message

2021-10-11 Thread Markus Armbruster
The error message claims the parameter is invalid:

$ qemu-system-x86_64 -object qom-type=nonexistent
qemu-system-x86_64: -object qom-type=nonexistent: Invalid parameter 
'nonexistent'

What's wrong is actually the *value* 'nonexistent'.  Improve the
message to

qemu-system-x86_64: -object qom-type=nonexistent: Parameter 'qom-type' does 
not accept value 'nonexistent'

Signed-off-by: Markus Armbruster 
---
 qapi/qapi-visit-core.c  | 3 ++-
 tests/unit/check-qom-proplist.c | 2 +-
 tests/qemu-iotests/049.out  | 6 +++---
 tests/qemu-iotests/206.out  | 2 +-
 tests/qemu-iotests/237.out  | 6 +++---
 tests/qemu-iotests/245  | 2 +-
 6 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/qapi/qapi-visit-core.c b/qapi/qapi-visit-core.c
index a641adec51..7310f0a0ca 100644
--- a/qapi/qapi-visit-core.c
+++ b/qapi/qapi-visit-core.c
@@ -392,7 +392,8 @@ static bool input_type_enum(Visitor *v, const char *name, 
int *obj,
 
 value = qapi_enum_parse(lookup, enum_str, -1, NULL);
 if (value < 0) {
-error_setg(errp, QERR_INVALID_PARAMETER, enum_str);
+error_setg(errp, "Parameter '%s' does not accept value '%s'",
+   name ? name : "null", enum_str);
 g_free(enum_str);
 return false;
 }
diff --git a/tests/unit/check-qom-proplist.c b/tests/unit/check-qom-proplist.c
index 48503e0dff..ed341088d3 100644
--- a/tests/unit/check-qom-proplist.c
+++ b/tests/unit/check-qom-proplist.c
@@ -488,7 +488,7 @@ static void test_dummy_badenum(void)
 g_assert(dobj == NULL);
 g_assert(err != NULL);
 g_assert_cmpstr(error_get_pretty(err), ==,
-"Invalid parameter 'yeti'");
+"Parameter 'av' does not accept value 'yeti'");
 
 g_assert(object_resolve_path_component(parent, "dummy0")
  == NULL);
diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out
index 01f7b1f240..8719c91b48 100644
--- a/tests/qemu-iotests/049.out
+++ b/tests/qemu-iotests/049.out
@@ -174,11 +174,11 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 
cluster_size=65536 extended_l2=off comp
 
 qemu-img create -f qcow2 -o compat=0.42 TEST_DIR/t.qcow2 64M
 Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off 
compression_type=zlib size=67108864 compat=0.42 lazy_refcounts=off 
refcount_bits=16
-qemu-img: TEST_DIR/t.qcow2: Invalid parameter '0.42'
+qemu-img: TEST_DIR/t.qcow2: Parameter 'version' does not accept value '0.42'
 
 qemu-img create -f qcow2 -o compat=foobar TEST_DIR/t.qcow2 64M
 Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off 
compression_type=zlib size=67108864 compat=foobar lazy_refcounts=off 
refcount_bits=16
-qemu-img: TEST_DIR/t.qcow2: Invalid parameter 'foobar'
+qemu-img: TEST_DIR/t.qcow2: Parameter 'version' does not accept value 'foobar'
 
 == Check preallocation option ==
 
@@ -190,7 +190,7 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 
extended_l2=off prea
 
 qemu-img create -f qcow2 -o preallocation=1234 TEST_DIR/t.qcow2 64M
 Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off 
preallocation=1234 compression_type=zlib size=67108864 lazy_refcounts=off 
refcount_bits=16
-qemu-img: TEST_DIR/t.qcow2: Invalid parameter '1234'
+qemu-img: TEST_DIR/t.qcow2: Parameter 'preallocation' does not accept value 
'1234'
 
 == Check encryption option ==
 
diff --git a/tests/qemu-iotests/206.out b/tests/qemu-iotests/206.out
index b68c443867..3593e8e9c2 100644
--- a/tests/qemu-iotests/206.out
+++ b/tests/qemu-iotests/206.out
@@ -192,7 +192,7 @@ Job failed: Could not resize image: Failed to grow the L1 
table: File too large
 
 === Invalid version ===
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": 
{"driver": "qcow2", "file": "node0", "size": 67108864, "version": "v1"}}}
-{"error": {"class": "GenericError", "desc": "Invalid parameter 'v1'"}}
+{"error": {"class": "GenericError", "desc": "Parameter 'version' does not 
accept value 'v1'"}}
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": 
{"driver": "qcow2", "file": "node0", "lazy-refcounts": true, "size": 67108864, 
"version": "v2"}}}
 {"return": {}}
diff --git a/tests/qemu-iotests/237.out b/tests/qemu-iotests/237.out
index aa94986803..2f09ff5512 100644
--- a/tests/qemu-iotests/237.out
+++ b/tests/qemu-iotests/237.out
@@ -116,13 +116,13 @@ Job failed: Cannot find device='this doesn't exist' nor 
node-name='this doesn't
 == Invalid adapter types ==
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": 
{"adapter-type": "foo", "driver": "vmdk", "file": "node0", "size": 33554432}}}
-{"error": {"class": "GenericError", "desc": "Invalid parameter 'foo'"}}
+{"error": {"class": "GenericError", "desc": "Parameter 'adapter-type' does not 
accept value 'foo'"}}
 
 {"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": 
{"adapter-type": "IDE", "driver": "vmdk", "file": "node0", 

Re: Deprecate the ppc405 boards in QEMU?

2021-10-11 Thread David Gibson
On Mon, Oct 11, 2021 at 10:10:36AM +0200, Thomas Huth wrote:
> On 06/10/2021 09.25, Thomas Huth wrote:
> > On 05/10/2021 23.53, BALATON Zoltan wrote:
> > [...]
> > > Maybe these 405 boards in QEMU ran with modified firmware where the
> > > memory detection was patched out but it seems to detect the RAM so I
> > > wonder where it gets that from. Maybe by reading the SDRAM
> > > controller DCRs ppc4xx_sdram_init() sets up. Then I'm not sure what
> > > it needs the SPD for, I forgot how this worked on sam460ex. Maybe
> > > for the speed calibration, so could be it detects ram by reading
> > > DCRs then tries to get SPD data and that's where it stops as i2c is
> > > not emulated on taihu. This could be confirmed by checking what it
> > > pokes with -d guest_errors that shows accesses to missing devices
> > > but don't know where 405 has the i2c controller and if it's the same
> > > as newer SoCs. If so that could be reused and an i2c bus could be
> > > added with the SPD data like in sam460ex to make u-boot happy or you
> > > could skip this in u-boot.
> > 
> > FWIW, I've just tried the latter (skipping the sdram init in u-boot),
> > and indeed, I can get to the u-boot prompt now.
> [...]> I've also attached the patch with my modifications to u-boot.
> 
> FYI, the changes can now be found on this branch here:
> 
>  https://gitlab.com/huth/u-boot/-/commits/taihu
> 
> I also added a gitlab-CI file, so you can now download the u-boot.bin as an
> artifact from the corresponding pipeline, e.g.:
> 
>  https://gitlab.com/huth/u-boot/-/jobs/1667201028

Thanks.

Are you going to send a v2 of your proposed deprecation patches?  If
you do can you please CC me explicitly.  I only scan qemu-ppc once a
week or so, and it goes into a different folder.  If I'm CCed I'll
notice and respond to it faster.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[PATCH] tests/acceptance: Add tests for the ppc405 boards

2021-10-11 Thread Thomas Huth
Using the U-Boot firmware, we can check that at least the serial console
of the ppc405 boards is still usable.

Signed-off-by: Thomas Huth 
---
 Based-on: 20211006071140.565952-1-th...@redhat.com

 tests/acceptance/ppc_405.py | 40 +
 1 file changed, 40 insertions(+)
 create mode 100644 tests/acceptance/ppc_405.py

diff --git a/tests/acceptance/ppc_405.py b/tests/acceptance/ppc_405.py
new file mode 100644
index 00..bf7c2f22fc
--- /dev/null
+++ b/tests/acceptance/ppc_405.py
@@ -0,0 +1,40 @@
+# Test that the U-Boot firmware boots on ppc 405 machines and check the console
+#
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+
+from avocado.utils import archive
+from avocado_qemu import Test
+from avocado_qemu import wait_for_console_pattern
+from avocado_qemu import exec_command_and_wait_for_pattern
+
+class Ppc405Machine(Test):
+
+timeout = 90
+
+def do_test_ppc405(self):
+uboot_url = ('https://gitlab.com/huth/u-boot/-/raw/'
+ 'taihu-2021-10-09/u-boot-taihu.bin')
+uboot_hash = ('3208940e908a5edc7c03eab072c60f0dcfadc2ab');
+file_path = self.fetch_asset(uboot_url, asset_hash=uboot_hash)
+self.vm.set_console(console_index=1)
+self.vm.add_args('-bios', file_path)
+self.vm.launch()
+wait_for_console_pattern(self, 'AMCC PPC405EP Evaluation Board')
+exec_command_and_wait_for_pattern(self, 'reset', 'AMCC PowerPC 405EP')
+
+def test_ppc_taihu(self):
+"""
+:avocado: tags=arch:ppc
+:avocado: tags=machine:taihu
+"""
+self.do_test_ppc405()
+
+def test_ppc_ref405ep(self):
+"""
+:avocado: tags=arch:ppc
+:avocado: tags=machine:ref405ep
+"""
+self.do_test_ppc405()
-- 
2.27.0




Re: [PATCH 6/6] pcie: expire pending delete

2021-10-11 Thread Michael S. Tsirkin
On Mon, Oct 11, 2021 at 02:05:04PM +0200, Gerd Hoffmann wrote:
> Add an expire time for pending delete, once the time is over allow
> pressing the attention button again.
> 
> This makes pcie hotplug behave more like acpi hotplug, where one can
> try sending an 'device_del' monitor command again in case the guest
> didn't respond to the first attempt.
> 
> Signed-off-by: Gerd Hoffmann 
> ---
>  include/hw/qdev-core.h | 1 +
>  hw/pci/pcie.c  | 2 ++
>  softmmu/qdev-monitor.c | 4 +++-
>  3 files changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> index 4ff19c714bd8..d082a9a00aca 100644
> --- a/include/hw/qdev-core.h
> +++ b/include/hw/qdev-core.h
> @@ -180,6 +180,7 @@ struct DeviceState {
>  char *canonical_path;
>  bool realized;
>  bool pending_deleted_event;
> +int64_t pending_deleted_expires_ms;
>  QemuOpts *opts;
>  int hotplugged;
>  bool allow_unplug_during_migration;
> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> index f3ac04399969..477c8776aa27 100644
> --- a/hw/pci/pcie.c
> +++ b/hw/pci/pcie.c
> @@ -549,6 +549,8 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler 
> *hotplug_dev,
>  }
>  
>  dev->pending_deleted_event = true;
> +dev->pending_deleted_expires_ms =
> +qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 5000; /* 5 secs */
>  
>  /* In case user cancel the operation of multi-function hot-add,
>   * remove the function that is unexposed to guest individually,


Well this will be barely enough, right?

Once the Power
Indicator begins blinking, a 5-second abort interval exists during 
which a second depression of the
Attention Button cancels the operation.

So I guess it needs to be more. Problem is of course if guest is
busy because of interrupts and whatnot, it might not get to
handling that in time ...


> diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
> index 0705f008466d..5e7960c52a0a 100644
> --- a/softmmu/qdev-monitor.c
> +++ b/softmmu/qdev-monitor.c
> @@ -910,7 +910,9 @@ void qmp_device_del(const char *id, Error **errp)
>  {
>  DeviceState *dev = find_device_state(id, errp);
>  if (dev != NULL) {
> -if (dev->pending_deleted_event) {
> +if (dev->pending_deleted_event &&
> +(dev->pending_deleted_expires_ms == 0 ||
> + dev->pending_deleted_expires_ms > 
> qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL))) {
>  error_setg(errp, "Device %s is already in the "
>   "process of unplug", id);
>  return;
> -- 
> 2.31.1




Re: [PATCH v2] coroutine: resize pool periodically instead of limiting size

2021-10-11 Thread Stefan Hajnoczi
On Mon, Sep 13, 2021 at 04:35:24PM +0100, Stefan Hajnoczi wrote:
> It was reported that enabling SafeStack reduces IOPS significantly
> (>25%) with the following fio benchmark on virtio-blk using a NVMe host
> block device:
> 
>   # fio --rw=randrw --bs=4k --iodepth=64 --runtime=1m --direct=1 \
>   --filename=/dev/vdb --name=job1 --ioengine=libaio --thread \
>   --group_reporting --numjobs=16 --time_based \
> --output=/tmp/fio_result
> 
> Serge Guelton and I found that SafeStack is not really at fault, it just
> increases the cost of coroutine creation. This fio workload exhausts the
> coroutine pool and coroutine creation becomes a bottleneck. Previous
> work by Honghao Wang also pointed to excessive coroutine creation.
> 
> Creating new coroutines is expensive due to allocating new stacks with
> mmap(2) and mprotect(2). Currently there are thread-local and global
> pools that recycle old Coroutine objects and their stacks but the
> hardcoded size limit of 64 for thread-local pools and 128 for the global
> pool is insufficient for the fio benchmark shown above.
> 
> This patch changes the coroutine pool algorithm to a simple thread-local
> pool without a maximum size limit. Threads periodically shrink the pool
> down to a size sufficient for the maximum observed number of coroutines.
> 
> The global pool is removed by this patch. It can help to hide the fact
> that local pools are easily exhausted, but it's doesn't fix the root
> cause. I don't think there is a need for a global pool because QEMU's
> threads are long-lived, so let's keep things simple.
> 
> Performance of the above fio benchmark is as follows:
> 
>   Before   After
> IOPS 60k 97k
> 
> Memory usage varies over time as needed by the workload:
> 
> VSZ (KB) RSS (KB)
> Before fio  4705248  843128
> During fio  5747668 (+ ~100 MB)  849280
> After fio   4694996 (- ~100 MB)  845184
> 
> This confirms that coroutines are indeed being freed when no longer
> needed.
> 
> Thanks to Serge Guelton for working on identifying the bottleneck with
> me!
> 
> Reported-by: Tingting Mao 
> Cc: Serge Guelton 
> Cc: Honghao Wang 
> Cc: Paolo Bonzini 
> Cc: Daniele Buono 
> Signed-off-by: Stefan Hajnoczi 
> ---
> v2:
>  * Retained minimum pool size of 64 to keep latency low for threads that
>perform I/O infrequently and to prevent possible regressions [Daniele]
> ---
>  include/qemu/coroutine-pool-timer.h | 36 
>  include/qemu/coroutine.h|  7 +++
>  iothread.c  |  6 +++
>  util/coroutine-pool-timer.c | 35 +++
>  util/main-loop.c|  5 +++
>  util/qemu-coroutine.c   | 66 -
>  util/meson.build|  1 +
>  7 files changed, 126 insertions(+), 30 deletions(-)
>  create mode 100644 include/qemu/coroutine-pool-timer.h
>  create mode 100644 util/coroutine-pool-timer.c

Applied to my block-next tree:
https://gitlab.com/stefanha/qemu/commits/block-next

Stefan


signature.asc
Description: PGP signature


[PULL 2/2] iothread: use IOThreadParamInfo in iothread_[set|get]_param()

2021-10-11 Thread Stefan Hajnoczi
From: Stefano Garzarella 

Commit 0445409d74 ("iothread: generalize
iothread_set_param/iothread_get_param") moved common code to set and
get IOThread parameters in two new functions.

These functions are called inside callbacks, so we don't need to use an
opaque pointer. Let's replace `void *opaque` parameter with
`IOThreadParamInfo *info`.

Suggested-by: Kevin Wolf 
Signed-off-by: Stefano Garzarella 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20210727145936.147032-3-sgarz...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 iothread.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/iothread.c b/iothread.c
index a73e560ba0..0f98af0f2a 100644
--- a/iothread.c
+++ b/iothread.c
@@ -231,20 +231,18 @@ static IOThreadParamInfo aio_max_batch_info = {
 };
 
 static void iothread_get_param(Object *obj, Visitor *v,
-const char *name, void *opaque, Error **errp)
+const char *name, IOThreadParamInfo *info, Error **errp)
 {
 IOThread *iothread = IOTHREAD(obj);
-IOThreadParamInfo *info = opaque;
 int64_t *field = (void *)iothread + info->offset;
 
 visit_type_int64(v, name, field, errp);
 }
 
 static bool iothread_set_param(Object *obj, Visitor *v,
-const char *name, void *opaque, Error **errp)
+const char *name, IOThreadParamInfo *info, Error **errp)
 {
 IOThread *iothread = IOTHREAD(obj);
-IOThreadParamInfo *info = opaque;
 int64_t *field = (void *)iothread + info->offset;
 int64_t value;
 
@@ -266,16 +264,18 @@ static bool iothread_set_param(Object *obj, Visitor *v,
 static void iothread_get_poll_param(Object *obj, Visitor *v,
 const char *name, void *opaque, Error **errp)
 {
+IOThreadParamInfo *info = opaque;
 
-iothread_get_param(obj, v, name, opaque, errp);
+iothread_get_param(obj, v, name, info, errp);
 }
 
 static void iothread_set_poll_param(Object *obj, Visitor *v,
 const char *name, void *opaque, Error **errp)
 {
 IOThread *iothread = IOTHREAD(obj);
+IOThreadParamInfo *info = opaque;
 
-if (!iothread_set_param(obj, v, name, opaque, errp)) {
+if (!iothread_set_param(obj, v, name, info, errp)) {
 return;
 }
 
@@ -291,16 +291,18 @@ static void iothread_set_poll_param(Object *obj, Visitor 
*v,
 static void iothread_get_aio_param(Object *obj, Visitor *v,
 const char *name, void *opaque, Error **errp)
 {
+IOThreadParamInfo *info = opaque;
 
-iothread_get_param(obj, v, name, opaque, errp);
+iothread_get_param(obj, v, name, info, errp);
 }
 
 static void iothread_set_aio_param(Object *obj, Visitor *v,
 const char *name, void *opaque, Error **errp)
 {
 IOThread *iothread = IOTHREAD(obj);
+IOThreadParamInfo *info = opaque;
 
-if (!iothread_set_param(obj, v, name, opaque, errp)) {
+if (!iothread_set_param(obj, v, name, info, errp)) {
 return;
 }
 
-- 
2.31.1




[PULL 1/2] iothread: rename PollParamInfo to IOThreadParamInfo

2021-10-11 Thread Stefan Hajnoczi
From: Stefano Garzarella 

Commit 1793ad0247 ("iothread: add aio-max-batch parameter") added
a new parameter (aio-max-batch) to IOThread and used PollParamInfo
structure to handle it.

Since it is not a parameter of the polling mechanism, we rename the
structure to a more generic IOThreadParamInfo.

Suggested-by: Kevin Wolf 
Signed-off-by: Stefano Garzarella 
Reviewed-by: Philippe Mathieu-Daudé 
Message-id: 20210727145936.147032-2-sgarz...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 iothread.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/iothread.c b/iothread.c
index ddbbde61f7..a73e560ba0 100644
--- a/iothread.c
+++ b/iothread.c
@@ -215,18 +215,18 @@ static void iothread_complete(UserCreatable *obj, Error 
**errp)
 typedef struct {
 const char *name;
 ptrdiff_t offset; /* field's byte offset in IOThread struct */
-} PollParamInfo;
+} IOThreadParamInfo;
 
-static PollParamInfo poll_max_ns_info = {
+static IOThreadParamInfo poll_max_ns_info = {
 "poll-max-ns", offsetof(IOThread, poll_max_ns),
 };
-static PollParamInfo poll_grow_info = {
+static IOThreadParamInfo poll_grow_info = {
 "poll-grow", offsetof(IOThread, poll_grow),
 };
-static PollParamInfo poll_shrink_info = {
+static IOThreadParamInfo poll_shrink_info = {
 "poll-shrink", offsetof(IOThread, poll_shrink),
 };
-static PollParamInfo aio_max_batch_info = {
+static IOThreadParamInfo aio_max_batch_info = {
 "aio-max-batch", offsetof(IOThread, aio_max_batch),
 };
 
@@ -234,7 +234,7 @@ static void iothread_get_param(Object *obj, Visitor *v,
 const char *name, void *opaque, Error **errp)
 {
 IOThread *iothread = IOTHREAD(obj);
-PollParamInfo *info = opaque;
+IOThreadParamInfo *info = opaque;
 int64_t *field = (void *)iothread + info->offset;
 
 visit_type_int64(v, name, field, errp);
@@ -244,7 +244,7 @@ static bool iothread_set_param(Object *obj, Visitor *v,
 const char *name, void *opaque, Error **errp)
 {
 IOThread *iothread = IOTHREAD(obj);
-PollParamInfo *info = opaque;
+IOThreadParamInfo *info = opaque;
 int64_t *field = (void *)iothread + info->offset;
 int64_t value;
 
-- 
2.31.1




[PULL 0/2] Block patches

2021-10-11 Thread Stefan Hajnoczi
The following changes since commit ca61fa4b803e5d0abaf6f1ceb690f23bb78a4def:

  Merge remote-tracking branch 'remotes/quic/tags/pull-hex-20211006' into 
staging (2021-10-06 12:11:14 -0700)

are available in the Git repository at:

  https://gitlab.com/stefanha/qemu.git tags/block-pull-request

for you to fetch changes up to 1cc7eada97914f090125e588497986f6f7900514:

  iothread: use IOThreadParamInfo in iothread_[set|get]_param() (2021-10-07 
15:29:50 +0100)


Pull request



Stefano Garzarella (2):
  iothread: rename PollParamInfo to IOThreadParamInfo
  iothread: use IOThreadParamInfo in iothread_[set|get]_param()

 iothread.c | 28 +++-
 1 file changed, 15 insertions(+), 13 deletions(-)

-- 
2.31.1





Re: [PATCH] Trim some trailing space from human-readable output

2021-10-11 Thread Greg Kurz
On Sat,  9 Oct 2021 17:24:01 +0200
Markus Armbruster  wrote:

> I noticed -cpu help printing enough trailing spaces to make the output
> at least 84 characters wide.  Looks ugly unless the terminal is wider.
> Ugly or not, trailing spaces are stupid.
> 
> The culprit is this line in x86_cpu_list_entry():
> 
> qemu_printf("x86 %-20s  %-58s\n", name, desc);
> 
> This prints a string with minimum field left-justified right before a
> newline.  Change it to
> 
> qemu_printf("x86 %-20s  %s\n", name, desc);
> 
> which avoids the trailing spaces and is simpler to boot.
> 
> A search for the pattern with "git-grep -E '%-[0-9]+s\\n'" found a few
> more instances.  Change them similarly.
> 
> Signed-off-by: Markus Armbruster 
> ---
>  monitor/hmp-cmds.c | 2 +-
>  target/i386/cpu-dump.c | 4 ++--
>  target/i386/cpu.c  | 2 +-
>  target/ppc/cpu_init.c  | 2 +-

ppc part

Acked-by: Greg Kurz 

>  target/s390x/cpu_models.c  | 4 ++--
>  target/xtensa/mmu_helper.c | 2 +-
>  6 files changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
> index bcaa41350e..9e45a138a5 100644
> --- a/monitor/hmp-cmds.c
> +++ b/monitor/hmp-cmds.c
> @@ -1945,7 +1945,7 @@ void hmp_rocker_ports(Monitor *mon, const QDict *qdict)
>  monitor_printf(mon, "  port  linkduplex neg?\n");
>  
>  for (port = list; port; port = port->next) {
> -monitor_printf(mon, "%10s  %-4s   %-3s  %2s  %-3s\n",
> +monitor_printf(mon, "%10s  %-4s   %-3s  %2s  %s\n",
> port->value->name,
> port->value->enabled ? port->value->link_up ?
> "up" : "down" : "!ena",
> diff --git a/target/i386/cpu-dump.c b/target/i386/cpu-dump.c
> index 02b635a52c..08ac957e99 100644
> --- a/target/i386/cpu-dump.c
> +++ b/target/i386/cpu-dump.c
> @@ -464,13 +464,13 @@ void x86_cpu_dump_state(CPUState *cs, FILE *f, int 
> flags)
>  snprintf(cc_op_name, sizeof(cc_op_name), "[%d]", env->cc_op);
>  #ifdef TARGET_X86_64
>  if (env->hflags & HF_CS64_MASK) {
> -qemu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " 
> CCO=%-8s\n",
> +qemu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " CCO=%s\n",
>   env->cc_src, env->cc_dst,
>   cc_op_name);
>  } else
>  #endif
>  {
> -qemu_fprintf(f, "CCS=%08x CCD=%08x CCO=%-8s\n",
> +qemu_fprintf(f, "CCS=%08x CCD=%08x CCO=%s\n",
>   (uint32_t)env->cc_src, (uint32_t)env->cc_dst,
>   cc_op_name);
>  }
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index a7b1b6aa93..8d2c0ded10 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -4876,7 +4876,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer 
> user_data)
>  desc = g_strdup_printf("%s", model_id);
>  }
>  
> -qemu_printf("x86 %-20s  %-58s\n", name, desc);
> +qemu_printf("x86 %-20s  %s\n", name, desc);
>  }
>  
>  /* list available CPU models and flags */
> diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
> index 6aad01d1d3..8ab81dd1ed 100644
> --- a/target/ppc/cpu_init.c
> +++ b/target/ppc/cpu_init.c
> @@ -8734,7 +8734,7 @@ void ppc_cpu_list(void)
>  
>  #ifdef CONFIG_KVM
>  qemu_printf("\n");
> -qemu_printf("PowerPC %-16s\n", "host");
> +qemu_printf("PowerPC %s\n", "host");
>  #endif
>  }
>  
> diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
> index 4e4598cc77..11e06cc51f 100644
> --- a/target/s390x/cpu_models.c
> +++ b/target/s390x/cpu_models.c
> @@ -398,14 +398,14 @@ void s390_cpu_list(void)
>  for (feat = 0; feat < S390_FEAT_MAX; feat++) {
>  const S390FeatDef *def = s390_feat_def(feat);
>  
> -qemu_printf("%-20s %-50s\n", def->name, def->desc);
> +qemu_printf("%-20s %s\n", def->name, def->desc);
>  }
>  
>  qemu_printf("\nRecognized feature groups:\n");
>  for (group = 0; group < S390_FEAT_GROUP_MAX; group++) {
>  const S390FeatGroupDef *def = s390_feat_group_def(group);
>  
> -qemu_printf("%-20s %-50s\n", def->name, def->desc);
> +qemu_printf("%-20s %s\n", def->name, def->desc);
>  }
>  }
>  
> diff --git a/target/xtensa/mmu_helper.c b/target/xtensa/mmu_helper.c
> index b01ff9399a..57e319a1af 100644
> --- a/target/xtensa/mmu_helper.c
> +++ b/target/xtensa/mmu_helper.c
> @@ -1098,7 +1098,7 @@ static void dump_tlb(CPUXtensaState *env, bool dtlb)
>  qemu_printf("\tVaddr   Paddr   ASID  Attr RWX 
> Cache\n"
>  "\t--  --     --- 
> ---\n");
>  }
> -qemu_printf("\t0x%08x  0x%08x  0x%02x  0x%02x %c%c%c %-7s\n",
> +qemu_printf("\t0x%08x  0x%08x  0x%02x  0x%02x %c%c%c %s\n",
>  entry->vaddr,
>  entry->paddr,
>  

Re: ACPI endianness

2021-10-11 Thread Michael S. Tsirkin
On Mon, Oct 11, 2021 at 12:13:55PM +0200, BALATON Zoltan wrote:
> On Mon, 11 Oct 2021, Philippe Mathieu-Daudé wrote:
> > On 10/10/21 15:24, BALATON Zoltan wrote:
> > > Hello,
> > > 
> > > I'm trying to fix shutdown and reboot on pegasos2 which uses ACPI as
> > > part of the VIA VT8231 (similar to and modelled in hw/isa/vt82c686b.c)
> > > and found that the guest writes to ACPI PM1aCNT register come out with
> > > wrong endianness but not shure why. I have this:
> > > 
> > > $ qemu-system-ppc -M pegasos2 -monitor stdio
> > > (qemu) info mtree
> > > [...]
> > > memory-region: pci1-io
> > >   - (prio 0, i/o): pci1-io
> > > [...]
> > >     0f00-0f7f (prio 0, i/o): via-pm
> > >   0f00-0f03 (prio 0, i/o): acpi-evt
> > >   0f04-0f05 (prio 0, i/o): acpi-cnt
> > >   0f08-0f0b (prio 0, i/o): acpi-tmr
> > > 
> > > memory-region: system
> > >   - (prio 0, i/o): system
> > >     -1fff (prio 0, ram): pegasos2.ram
> > >     8000-bfff (prio 0, i/o): alias pci1-mem0-win
> > > @pci1-mem 8000-bfff
> > >     c000-dfff (prio 0, i/o): alias pci0-mem0-win
> > > @pci0-mem c000-dfff
> > >     f100-f100 (prio 0, i/o): mv64361
> > >     f800-f8ff (prio 0, i/o): alias pci0-io-win
> > > @pci0-io -00ff
> > >     f900-f9ff (prio 0, i/o): alias pci0-mem1-win
> > > @pci0-mem -00ff
> > >     fd00-fdff (prio 0, i/o): alias pci1-mem1-win
> > > @pci1-mem -00ff
> > >     fe00-feff (prio 0, i/o): alias pci1-io-win
> > > @pci1-io -00ff
> > >     ff80- (prio 0, i/o): alias pci1-mem3-win
> > > @pci1-mem ff80-
> > >     fff0-fff7 (prio 0, rom): pegasos2.rom
> > > 
> > > The guest (which is big endian PPC and I think wotks on real hardware)
> > > writes to 0xf05 in the io region which should be the high byte of the
> > > little endian register but in the acpi code it comes out wrong, instead
> > > of 0x2800 I get in acpi_pm1_cnt_write: val=0x28
> > 
> > Looks like a northbridge issue (MV64340).
> > Does Pegasos2 enables bus swapping?
> > See hw/pci-host/mv64361.c:585:
> > 
> > static void warn_swap_bit(uint64_t val)
> > {
> >if ((val & 0x300ULL) >> 24 != 1) {
> >qemu_log_mask(LOG_UNIMP, "%s: Data swap not implemented", __func__);
> >}
> > }
> 
> No, guests don't use this feature just byteswap themselves and write little
> endian values to the mapped io region. (Or in this case just write one byte
> of the 16 bit value, it specifically writes 0x28 to 0xf05.) That's why I
> think the device model should not byteswap itself so the acpi regions should
> be declared native endian and let the guest handle it. Some mentions I've
> found say that native endian means don't byteswap, little endian means
> byteswap if vcpu is big endian and big endian means byteswap if vcpu is
> little endian. Since guests already account for this and write little endian
> values to these regions there's no need to byteswap in device model and
> changing to native endian should not affect little endian machines so unless
> there's a better argument I'll try sending a patch.
> 
> Regards,
> BALATON Zoltan

native endian means endian-ness is open-coded in the device handler
itself.  I think you just found a bug in memory core.

static const MemoryRegionOps acpi_pm_cnt_ops = {
.read = acpi_pm_cnt_read,
.write = acpi_pm_cnt_write,
.impl.min_access_size = 2,
.valid.min_access_size = 1,
.valid.max_access_size = 2,
.endianness = DEVICE_LITTLE_ENDIAN,
};


Because of that .impl.min_access_size = 2,
the 1 byte write should be converted to a 2 byte
read+2 byte write.

docs/devel/memory.rst just says:
- .impl.min_access_size, .impl.max_access_size define the access sizes
  (in bytes) supported by the *implementation*; other access sizes will be
  emulated using the ones available.  For example a 4-byte write will be
  emulated using four 1-byte writes, if .impl.max_access_size = 1.



Instead what we have is:

MemTxResult memory_region_dispatch_write(MemoryRegion *mr,
 hwaddr addr,
 uint64_t data,
 MemOp op,
 MemTxAttrs attrs)
{
unsigned size = memop_size(op);

if (!memory_region_access_valid(mr, addr, size, true, attrs)) {
unassigned_mem_write(mr, addr, data, size);
return MEMTX_DECODE_ERROR;
}

adjust_endianness(mr, , op);


---


static void 

[PATCH 4/6] pcie: factor out pcie_cap_slot_unplug()

2021-10-11 Thread Gerd Hoffmann
No functional change.

Signed-off-by: Gerd Hoffmann 
---
 hw/pci/pcie.c | 35 +--
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index c455f92e16bf..70fc11ba4c7d 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -497,6 +497,26 @@ static void pcie_unplug_device(PCIBus *bus, PCIDevice 
*dev, void *opaque)
 object_unparent(OBJECT(dev));
 }
 
+static void pcie_cap_slot_do_unplug(PCIDevice *dev)
+{
+PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
+uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP);
+
+pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+pcie_unplug_device, NULL);
+
+pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
+ PCI_EXP_SLTSTA_PDS);
+if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA ||
+(lnkcap & PCI_EXP_LNKCAP_DLLLARC)) {
+pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
+ PCI_EXP_LNKSTA_DLLLA);
+}
+pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
+   PCI_EXP_SLTSTA_PDC);
+}
+
 void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
  DeviceState *dev, Error **errp)
 {
@@ -676,7 +696,6 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
 uint32_t pos = dev->exp.exp_cap;
 uint8_t *exp_cap = dev->config + pos;
 uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
-uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP);
 
 if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
 /*
@@ -726,19 +745,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
 (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF &&
 (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) ||
 (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) {
-PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
-pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
-pcie_unplug_device, NULL);
-
-pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
- PCI_EXP_SLTSTA_PDS);
-if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA ||
-(lnkcap & PCI_EXP_LNKCAP_DLLLARC)) {
-pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
- PCI_EXP_LNKSTA_DLLLA);
-}
-pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
-   PCI_EXP_SLTSTA_PDC);
+pcie_cap_slot_do_unplug(dev);
 }
 pcie_cap_update_power(dev);
 
-- 
2.31.1




[PATCH 1/6] pci: implement power state

2021-10-11 Thread Gerd Hoffmann
This allows to power off pci devices.  In "off" state the devices will
not be visible.  No pci config space access, no pci bar access, no dma.

Default state is "on", so this patch (alone) should not change behavior.

Use case:  Allows hotplug controllers implement slot power.  Hotplug
controllers doing so should set the inital power state for devices in
the ->plug callback.

Signed-off-by: Gerd Hoffmann 
---
 include/hw/pci/pci.h |  2 ++
 hw/pci/pci.c | 25 +++--
 hw/pci/pci_host.c|  6 --
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 7fc90132cf1d..7cb6d92e17f7 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -268,6 +268,7 @@ typedef struct PCIReqIDCache PCIReqIDCache;
 struct PCIDevice {
 DeviceState qdev;
 bool partially_hotplugged;
+bool has_power;
 
 /* PCI config space */
 uint8_t *config;
@@ -904,5 +905,6 @@ extern const VMStateDescription vmstate_pci_device;
 }
 
 MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
+void pci_set_power(PCIDevice *pci_dev, bool state);
 
 #endif
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 186758ee11fe..94f53944c7cc 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1380,6 +1380,9 @@ static void pci_update_mappings(PCIDevice *d)
 continue;
 
 new_addr = pci_bar_address(d, i, r->type, r->size);
+if (!d->has_power) {
+new_addr = PCI_BAR_UNMAPPED;
+}
 
 /* This bar isn't changed */
 if (new_addr == r->addr)
@@ -1464,8 +1467,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t 
addr, uint32_t val_in, int
 if (range_covers_byte(addr, l, PCI_COMMAND)) {
 pci_update_irq_disabled(d, was_irq_disabled);
 memory_region_set_enabled(>bus_master_enable_region,
-  pci_get_word(d->config + PCI_COMMAND)
-& PCI_COMMAND_MASTER);
+  (pci_get_word(d->config + PCI_COMMAND)
+   & PCI_COMMAND_MASTER) && d->has_power);
 }
 
 msi_write_config(d, addr, val_in, l);
@@ -2190,6 +2193,8 @@ static void pci_qdev_realize(DeviceState *qdev, Error 
**errp)
 pci_qdev_unrealize(DEVICE(pci_dev));
 return;
 }
+
+pci_set_power(pci_dev, true);
 }
 
 PCIDevice *pci_new_multifunction(int devfn, bool multifunction,
@@ -2861,6 +2866,22 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int 
vector)
 return msg;
 }
 
+void pci_set_power(PCIDevice *d, bool state)
+{
+if (d->has_power == state) {
+return;
+}
+
+d->has_power = state;
+pci_update_mappings(d);
+memory_region_set_enabled(>bus_master_enable_region,
+  (pci_get_word(d->config + PCI_COMMAND)
+   & PCI_COMMAND_MASTER) && d->has_power);
+if (!d->has_power) {
+pci_device_reset(d);
+}
+}
+
 static const TypeInfo pci_device_type_info = {
 .name = TYPE_PCI_DEVICE,
 .parent = TYPE_DEVICE,
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index cf02f0d6a501..7beafd40a8e9 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -74,7 +74,8 @@ void pci_host_config_write_common(PCIDevice *pci_dev, 
uint32_t addr,
 /* non-zero functions are only exposed when function 0 is present,
  * allowing direct removal of unexposed functions.
  */
-if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
+!pci_dev->has_power) {
 return;
 }
 
@@ -97,7 +98,8 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, 
uint32_t addr,
 /* non-zero functions are only exposed when function 0 is present,
  * allowing direct removal of unexposed functions.
  */
-if (pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) {
+if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
+!pci_dev->has_power) {
 return ~0x0;
 }
 
-- 
2.31.1




[PATCH 6/6] pcie: expire pending delete

2021-10-11 Thread Gerd Hoffmann
Add an expire time for pending delete, once the time is over allow
pressing the attention button again.

This makes pcie hotplug behave more like acpi hotplug, where one can
try sending an 'device_del' monitor command again in case the guest
didn't respond to the first attempt.

Signed-off-by: Gerd Hoffmann 
---
 include/hw/qdev-core.h | 1 +
 hw/pci/pcie.c  | 2 ++
 softmmu/qdev-monitor.c | 4 +++-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 4ff19c714bd8..d082a9a00aca 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -180,6 +180,7 @@ struct DeviceState {
 char *canonical_path;
 bool realized;
 bool pending_deleted_event;
+int64_t pending_deleted_expires_ms;
 QemuOpts *opts;
 int hotplugged;
 bool allow_unplug_during_migration;
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index f3ac04399969..477c8776aa27 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -549,6 +549,8 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler 
*hotplug_dev,
 }
 
 dev->pending_deleted_event = true;
+dev->pending_deleted_expires_ms =
+qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 5000; /* 5 secs */
 
 /* In case user cancel the operation of multi-function hot-add,
  * remove the function that is unexposed to guest individually,
diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
index 0705f008466d..5e7960c52a0a 100644
--- a/softmmu/qdev-monitor.c
+++ b/softmmu/qdev-monitor.c
@@ -910,7 +910,9 @@ void qmp_device_del(const char *id, Error **errp)
 {
 DeviceState *dev = find_device_state(id, errp);
 if (dev != NULL) {
-if (dev->pending_deleted_event) {
+if (dev->pending_deleted_event &&
+(dev->pending_deleted_expires_ms == 0 ||
+ dev->pending_deleted_expires_ms > 
qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL))) {
 error_setg(errp, "Device %s is already in the "
  "process of unplug", id);
 return;
-- 
2.31.1




[PATCH 0/6] RfC: try improve native hotplug for pcie root ports

2021-10-11 Thread Gerd Hoffmann


Gerd Hoffmann (6):
  pci: implement power state
  pcie: implement slow power control for pcie root ports
  pcie: add power indicator blink check
  pcie: factor out pcie_cap_slot_unplug()
  pcie: fast unplug when slot power is off
  pcie: expire pending delete

 include/hw/pci/pci.h   |  2 ++
 include/hw/qdev-core.h |  1 +
 hw/pci/pci.c   | 25 +++--
 hw/pci/pci_host.c  |  6 ++--
 hw/pci/pcie.c  | 82 ++
 softmmu/qdev-monitor.c |  4 ++-
 6 files changed, 101 insertions(+), 19 deletions(-)

-- 
2.31.1





[PATCH 5/6] pcie: fast unplug when slot power is off

2021-10-11 Thread Gerd Hoffmann
In case the slot is powered off (and the power indicator turned off too)
we can unplug right away, without round-trip to the guest.

Also clear pending attention button press, there is nothing to care
about any more.

Signed-off-by: Gerd Hoffmann 
---
 hw/pci/pcie.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 70fc11ba4c7d..f3ac04399969 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -561,6 +561,16 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler 
*hotplug_dev,
 return;
 }
 
+if (((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_OFF) &&
+((sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_OFF)) {
+/* slot is powered off -> unplug without round-trip to the guest */
+pcie_cap_slot_do_unplug(hotplug_pdev);
+hotplug_event_notify(hotplug_pdev);
+pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
+ PCI_EXP_SLTSTA_ABP);
+return;
+}
+
 pcie_cap_slot_push_attention_button(hotplug_pdev);
 }
 
-- 
2.31.1




[PATCH 3/6] pcie: add power indicator blink check

2021-10-11 Thread Gerd Hoffmann
Refuse to push the attention button in case the guest is busy with some
hotplug operation (as indicated by the power indicator blinking).

Signed-off-by: Gerd Hoffmann 
---
 hw/pci/pcie.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 4a52c250615e..c455f92e16bf 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -506,6 +506,7 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler 
*hotplug_dev,
 PCIDevice *hotplug_pdev = PCI_DEVICE(hotplug_dev);
 uint8_t *exp_cap = hotplug_pdev->config + hotplug_pdev->exp.exp_cap;
 uint32_t sltcap = pci_get_word(exp_cap + PCI_EXP_SLTCAP);
+uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL);
 
 /* Check if hot-unplug is disabled on the slot */
 if ((sltcap & PCI_EXP_SLTCAP_HPC) == 0) {
@@ -521,6 +522,12 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler 
*hotplug_dev,
 return;
 }
 
+if ((sltctl & PCI_EXP_SLTCTL_PIC) == PCI_EXP_SLTCTL_PWR_IND_BLINK) {
+error_setg(errp, "Hot-unplug failed: "
+   "guest is busy (power indicator blinking)");
+return;
+}
+
 dev->pending_deleted_event = true;
 
 /* In case user cancel the operation of multi-function hot-add,
-- 
2.31.1




[PATCH 2/6] pcie: implement slow power control for pcie root ports

2021-10-11 Thread Gerd Hoffmann
With this patch hot-plugged pci devices will only be visible to the
guest if the guests hotplug driver has enabled slot power.

This should fix the hot-plug race which one can hit when hot-plugging
a pci device at boot, while the guest is in the middle of the pci bus
scan.

Signed-off-by: Gerd Hoffmann 
---
 hw/pci/pcie.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 6e95d829037a..4a52c250615e 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -366,6 +366,29 @@ static void hotplug_event_clear(PCIDevice *dev)
 }
 }
 
+static void pcie_set_power_device(PCIBus *bus, PCIDevice *dev, void *opaque)
+{
+bool *power = opaque;
+
+pci_set_power(dev, *power);
+}
+
+static void pcie_cap_update_power(PCIDevice *hotplug_dev)
+{
+uint8_t *exp_cap = hotplug_dev->config + hotplug_dev->exp.exp_cap;
+PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(hotplug_dev));
+uint32_t sltcap = pci_get_long(exp_cap + PCI_EXP_SLTCAP);
+uint16_t sltctl = pci_get_word(exp_cap + PCI_EXP_SLTCTL);
+bool power = true;
+
+if (sltcap & PCI_EXP_SLTCAP_PCP) {
+power = (sltctl & PCI_EXP_SLTCTL_PCC) == PCI_EXP_SLTCTL_PWR_ON;
+}
+
+pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+pcie_set_power_device, );
+}
+
 /*
  * A PCI Express Hot-Plug Event has occurred, so update slot status register
  * and notify OS of the event if necessary.
@@ -434,6 +457,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, 
DeviceState *dev,
 pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
PCI_EXP_LNKSTA_DLLLA);
 }
+pcie_cap_update_power(hotplug_pdev);
 return;
 }
 
@@ -451,6 +475,7 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, 
DeviceState *dev,
 }
 pcie_cap_slot_event(hotplug_pdev,
 PCI_EXP_HP_EV_PDC | PCI_EXP_HP_EV_ABP);
+pcie_cap_update_power(hotplug_pdev);
 }
 }
 
@@ -625,6 +650,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
  PCI_EXP_SLTSTA_PDC |
  PCI_EXP_SLTSTA_ABP);
 
+pcie_cap_update_power(dev);
 hotplug_event_update_event_status(dev);
 }
 
@@ -707,6 +733,7 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
 pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
PCI_EXP_SLTSTA_PDC);
 }
+pcie_cap_update_power(dev);
 
 hotplug_event_notify(dev);
 
@@ -733,6 +760,7 @@ int pcie_cap_slot_post_load(void *opaque, int version_id)
 {
 PCIDevice *dev = opaque;
 hotplug_event_update_event_status(dev);
+pcie_cap_update_power(dev);
 return 0;
 }
 
-- 
2.31.1




Re: [PATCH v4 1/2] monitor/hmp: add support for flag argument with value

2021-10-11 Thread Dr. David Alan Gilbert
* Stefan Reiter (s.rei...@proxmox.com) wrote:
> Adds support for the "-xS" parameter type, where "-x" denotes a flag
> name and the "S" suffix indicates that this flag is supposed to take an
> arbitrary string parameter.
> 
> These parameters are always optional, the entry in the qdict will be
> omitted if the flag is not given.
> 
> Reviewed-by: Eric Blake 
> Signed-off-by: Stefan Reiter 

It would be great if you could send a patch to update the big comment in
monitor/monitor-internal.h that describes all the parameters.
But that can come another time.

Dave

> ---
>  monitor/hmp.c | 17 -
>  1 file changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/monitor/hmp.c b/monitor/hmp.c
> index d50c3124e1..a32dce7a35 100644
> --- a/monitor/hmp.c
> +++ b/monitor/hmp.c
> @@ -980,6 +980,7 @@ static QDict *monitor_parse_arguments(Monitor *mon,
>  {
>  const char *tmp = p;
>  int skip_key = 0;
> +int ret;
>  /* option */
>  
>  c = *typestr++;
> @@ -1002,8 +1003,22 @@ static QDict *monitor_parse_arguments(Monitor *mon,
>  }
>  if (skip_key) {
>  p = tmp;
> +} else if (*typestr == 'S') {
> +/* has option with string value */
> +typestr++;
> +tmp = p++;
> +while (qemu_isspace(*p)) {
> +p++;
> +}
> +ret = get_str(buf, sizeof(buf), );
> +if (ret < 0) {
> +monitor_printf(mon, "%s: value expected for 
> -%c\n",
> +   cmd->name, *tmp);
> +goto fail;
> +}
> +qdict_put_str(qdict, key, buf);
>  } else {
> -/* has option */
> +/* has boolean option */
>  p++;
>  qdict_put_bool(qdict, key, true);
>  }
> -- 
> 2.30.2
> 
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




[PATCH 6/6] doc: Add the SGX numa description

2021-10-11 Thread Yang Zhong
Add the SGX numa reference command and how to check if
SGX numa is support or not with multiple EPC sections.

Signed-off-by: Yang Zhong 
---
 docs/system/i386/sgx.rst | 31 +++
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/docs/system/i386/sgx.rst b/docs/system/i386/sgx.rst
index f103ae2a2f..9e4ada761f 100644
--- a/docs/system/i386/sgx.rst
+++ b/docs/system/i386/sgx.rst
@@ -141,8 +141,7 @@ To launch a SGX guest:
   |qemu_system_x86| \\
-cpu host,+sgx-provisionkey \\
-object memory-backend-epc,id=mem1,size=64M,prealloc=on \\
-   -object memory-backend-epc,id=mem2,size=28M \\
-   -M sgx-epc.0.memdev=mem1,sgx-epc.1.memdev=mem2
+   -M sgx-epc.0.memdev=mem1,sgx-epc.0.node=0
 
 Utilizing SGX in the guest requires a kernel/OS with SGX support.
 The support can be determined in guest by::
@@ -152,8 +151,32 @@ The support can be determined in guest by::
 and SGX epc info by::
 
   $ dmesg | grep sgx
-  [1.242142] sgx: EPC section 0x18000-0x181bf
-  [1.242319] sgx: EPC section 0x181c0-0x1837f
+  [0.182807] sgx: EPC section 0x14000-0x143ff
+  [0.183695] sgx: [Firmware Bug]: Unable to map EPC section to online 
node. Fallback to the NUMA node 0.
+
+To launch a SGX numa guest:
+
+.. parsed-literal::
+
+  |qemu_system_x86| \\
+   -cpu host,+sgx-provisionkey \\
+   -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \\
+   -object 
memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \\
+   -numa node,nodeid=0,cpus=0-1,memdev=node0 \\
+   -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \\
+   -object 
memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \\
+   -numa node,nodeid=1,cpus=2-3,memdev=node1 \\
+   -M 
sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1
+
+and SGX epc numa info by::
+
+  $ dmesg | grep sgx
+  [0.369937] sgx: EPC section 0x18000-0x183ff
+  [0.370259] sgx: EPC section 0x18400-0x185bf
+
+  $ dmesg | grep SRAT
+  [0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x18000-0x183ff]
+  [0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x18400-0x185bf]
 
 References
 --



[PATCH 3/6] numa: Add SGXEPCSection list for multiple sections

2021-10-11 Thread Yang Zhong
The SGXEPCSection list added into SGXInfo to show the multiple
SGX EPC sections detailed info, not the total size like before.

Signed-off-by: Yang Zhong 
---
 qapi/misc-target.json | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/qapi/misc-target.json b/qapi/misc-target.json
index 594fbd1577..89a5a4250a 100644
--- a/qapi/misc-target.json
+++ b/qapi/misc-target.json
@@ -334,6 +334,21 @@
   'returns': 'SevAttestationReport',
   'if': 'TARGET_I386' }
 
+##
+# @SGXEPCSection:
+#
+# Information about intel SGX EPC section info
+#
+# @index: the SGX epc section index
+#
+# @size: the size of epc section
+#
+# Since: 6.2
+##
+{ 'struct': 'SGXEPCSection',
+  'data': { 'index': 'uint64',
+'size': 'uint64'}}
+
 ##
 # @SGXInfo:
 #
@@ -347,7 +362,7 @@
 #
 # @flc: true if FLC is supported
 #
-# @section-size: The EPC section size for guest
+# @sections: The EPC sections info for guest
 #
 # Since: 6.2
 ##
@@ -356,7 +371,7 @@
 'sgx1': 'bool',
 'sgx2': 'bool',
 'flc': 'bool',
-'section-size': 'uint64'},
+'sections': ['SGXEPCSection']},
'if': 'TARGET_I386' }
 
 ##



[PATCH 4/6] monitor: numa support for 'info sgx' command

2021-10-11 Thread Yang Zhong
This patch can enable numa support for 'info sgx' command
in the monitor, which can show detailed SGX EPC sections
info.

(qemu) info sgx
 SGX support: enabled
 SGX1 support: enabled
 SGX2 support: enabled
 FLC support: enabled
 SECTION #0: size=67108864
 SECTION #1: size=29360128

 The QMP interface show:
 (QEMU) query-sgx
 {"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \
 [{"index": 0, "size": 67108864}, {"index": 1, "size": 29360128}], "flc": true}}

Signed-off-by: Yang Zhong 
---
 hw/i386/sgx.c | 25 +++--
 target/i386/monitor.c | 11 +--
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c
index 906facb645..8af45925c6 100644
--- a/hw/i386/sgx.c
+++ b/hw/i386/sgx.c
@@ -126,6 +126,28 @@ SGXInfo *sgx_get_capabilities(Error **errp)
 return info;
 }
 
+static SGXEPCSectionList *sgx_get_epc_sections_list(void)
+{
+GSList *device_list = sgx_epc_get_device_list();
+SGXEPCSectionList *head = NULL, **tail = 
+SGXEPCSection *section;
+uint64_t i = 0;
+
+for (; device_list; device_list = device_list->next) {
+DeviceState *dev = device_list->data;
+Object *obj = OBJECT(dev);
+
+section = g_new0(SGXEPCSection, 1);
+section->index = i++;
+section->size = object_property_get_uint(obj, SGX_EPC_SIZE_PROP,
+ _abort);
+QAPI_LIST_APPEND(tail, section);
+}
+g_slist_free(device_list);
+
+return head;
+}
+
 SGXInfo *sgx_get_info(Error **errp)
 {
 SGXInfo *info = NULL;
@@ -144,14 +166,13 @@ SGXInfo *sgx_get_info(Error **errp)
 return NULL;
 }
 
-SGXEPCState *sgx_epc = >sgx_epc;
 info = g_new0(SGXInfo, 1);
 
 info->sgx = true;
 info->sgx1 = true;
 info->sgx2 = true;
 info->flc = true;
-info->section_size = sgx_epc->size;
+info->sections = sgx_get_epc_sections_list();
 
 return info;
 }
diff --git a/target/i386/monitor.c b/target/i386/monitor.c
index 196c1c9e77..08e7d4a425 100644
--- a/target/i386/monitor.c
+++ b/target/i386/monitor.c
@@ -773,6 +773,7 @@ SGXInfo *qmp_query_sgx(Error **errp)
 void hmp_info_sgx(Monitor *mon, const QDict *qdict)
 {
 Error *err = NULL;
+SGXEPCSectionList *section_list, *section;
 g_autoptr(SGXInfo) info = qmp_query_sgx();
 
 if (err) {
@@ -787,8 +788,14 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict)
info->sgx2 ? "enabled" : "disabled");
 monitor_printf(mon, "FLC support: %s\n",
info->flc ? "enabled" : "disabled");
-monitor_printf(mon, "size: %" PRIu64 "\n",
-   info->section_size);
+
+section_list = info->sections;
+for (section = section_list; section; section = section->next) {
+monitor_printf(mon, "SECTION #%" PRId64 ": ",
+   section->value->index);
+monitor_printf(mon, "size=%" PRIu64 "\n",
+   section->value->size);
+}
 }
 
 SGXInfo *qmp_query_sgx_capabilities(Error **errp)



[PATCH 0/6] SGX NUMA support

2021-10-11 Thread Yang Zhong
The basic SGX patches were merged into Qemu release, the left NUMA
function for SGX should be enabled. The patch1 implemented the SGX NUMA
ACPI to enable NUMA in the SGX guest. Since Libvirt need detailed host
SGX EPC sections info to decide how to allocate EPC sections for SGX NUMA
guest, the SGXEPCSection list is introduced to show detailed sections info
in the monitor or HMP interface.

Please help review this patchset, the link also can be found:
https://github.com/intel/qemu-sgx upstream


Yang Zhong (6):
  numa: Enable numa for SGX EPC sections
  monitor: Support 'info numa' command
  numa: Add SGXEPCSection list for multiple sections
  monitor: numa support for 'info sgx' command
  numa: Enable numa for libvirt interface
  doc: Add the SGX numa description

 docs/system/i386/sgx.rst  | 31 +--
 qapi/machine.json |  6 ++-
 qapi/misc-target.json | 19 -
 include/hw/i386/sgx-epc.h |  3 ++
 hw/core/numa.c|  6 +++
 hw/i386/acpi-build.c  |  4 ++
 hw/i386/sgx-epc.c |  3 ++
 hw/i386/sgx.c | 84 +++
 monitor/hmp-cmds.c|  1 +
 target/i386/monitor.c | 11 -
 qemu-options.hx   |  4 +-
 11 files changed, 154 insertions(+), 18 deletions(-)




[PATCH 2/6] monitor: Support 'info numa' command

2021-10-11 Thread Yang Zhong
Add the MEMORY_DEVICE_INFO_KIND_SGX_EPC case for SGX numa info
with 'info numa' command in the monitor.

Signed-off-by: Yang Zhong 
---
 hw/core/numa.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/hw/core/numa.c b/hw/core/numa.c
index 510d096a88..1aa05dcf42 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -756,6 +756,7 @@ static void numa_stat_memory_devices(NumaNodeMem node_mem[])
 PCDIMMDeviceInfo *pcdimm_info;
 VirtioPMEMDeviceInfo *vpi;
 VirtioMEMDeviceInfo *vmi;
+SgxEPCDeviceInfo *se;
 
 for (info = info_list; info; info = info->next) {
 MemoryDeviceInfo *value = info->value;
@@ -781,6 +782,11 @@ static void numa_stat_memory_devices(NumaNodeMem 
node_mem[])
 node_mem[vmi->node].node_mem += vmi->size;
 node_mem[vmi->node].node_plugged_mem += vmi->size;
 break;
+case MEMORY_DEVICE_INFO_KIND_SGX_EPC:
+se = value->u.sgx_epc.data;
+node_mem[se->node].node_mem += se->size;
+node_mem[se->node].node_plugged_mem = 0;
+break;
 default:
 g_assert_not_reached();
 }



[PATCH 5/6] numa: Enable numa for libvirt interface

2021-10-11 Thread Yang Zhong
Libvirt need get the detailed host SGX EPC capabilities to support
numa function. Libvirt can decide how to allocate host EPC sections
to guest numa from host numa info.

(QEMU) query-sgx-capabilities
{"return": {"sgx": true, "sgx2": true, "sgx1": true, "sections": \
[{"index": 0, "size": 17070817280}, {"index": 1, "size": 17079205888}], "flc": 
true}}

Signed-off-by: Yang Zhong 
---
 hw/i386/sgx.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c
index 8af45925c6..fe3034060d 100644
--- a/hw/i386/sgx.c
+++ b/hw/i386/sgx.c
@@ -74,11 +74,13 @@ static uint64_t sgx_calc_section_metric(uint64_t low, 
uint64_t high)
((high & MAKE_64BIT_MASK(0, 20)) << 32);
 }
 
-static uint64_t sgx_calc_host_epc_section_size(void)
+static SGXEPCSectionList *sgx_calc_host_epc_sections(void)
 {
+SGXEPCSectionList *head = NULL, **tail = 
+SGXEPCSection *section;
 uint32_t i, type;
 uint32_t eax, ebx, ecx, edx;
-uint64_t size = 0;
+uint32_t j = 0;
 
 for (i = 0; i < SGX_MAX_EPC_SECTIONS; i++) {
 host_cpuid(0x12, i + 2, , , , );
@@ -92,10 +94,13 @@ static uint64_t sgx_calc_host_epc_section_size(void)
 break;
 }
 
-size += sgx_calc_section_metric(ecx, edx);
+section = g_new0(SGXEPCSection, 1);
+section->index = j++;
+section->size = sgx_calc_section_metric(ecx, edx);
+QAPI_LIST_APPEND(tail, section);
 }
 
-return size;
+return head;
 }
 
 SGXInfo *sgx_get_capabilities(Error **errp)
@@ -119,7 +124,7 @@ SGXInfo *sgx_get_capabilities(Error **errp)
 info->sgx1 = eax & (1U << 0) ? true : false;
 info->sgx2 = eax & (1U << 1) ? true : false;
 
-info->section_size = sgx_calc_host_epc_section_size();
+info->sections = sgx_calc_host_epc_sections();
 
 close(fd);
 



[PATCH 1/6] numa: Enable numa for SGX EPC sections

2021-10-11 Thread Yang Zhong
The basic SGX did not enable numa for SGX EPC sections, which
result in all EPC sections located in numa node 0. This patch
enable SGX numa function in the guest and the EPC section can
work with RAM as one numa node.

The Guest kernel related log:
[0.009981] ACPI: SRAT: Node 0 PXM 0 [mem 0x18000-0x183ff]
[0.009982] ACPI: SRAT: Node 1 PXM 1 [mem 0x18400-0x185bf]
The SRAT table can normally show SGX EPC sections menory info in different
numa nodes.

The SGX EPC numa related command:
 ..
 -m 4G,maxmem=20G \
 -smp sockets=2,cores=2 \
 -cpu host,+sgx-provisionkey \
 -object memory-backend-ram,size=2G,host-nodes=0,policy=bind,id=node0 \
 -object 
memory-backend-epc,id=mem0,size=64M,prealloc=on,host-nodes=0,policy=bind \
 -numa node,nodeid=0,cpus=0-1,memdev=node0 \
 -object memory-backend-ram,size=2G,host-nodes=1,policy=bind,id=node1 \
 -object 
memory-backend-epc,id=mem1,size=28M,prealloc=on,host-nodes=1,policy=bind \
 -numa node,nodeid=1,cpus=2-3,memdev=node1 \
 -M 
sgx-epc.0.memdev=mem0,sgx-epc.0.node=0,sgx-epc.1.memdev=mem1,sgx-epc.1.node=1 \
 ..

Signed-off-by: Yang Zhong 
---
 qapi/machine.json |  6 +-
 include/hw/i386/sgx-epc.h |  3 +++
 hw/i386/acpi-build.c  |  4 
 hw/i386/sgx-epc.c |  3 +++
 hw/i386/sgx.c | 44 +++
 monitor/hmp-cmds.c|  1 +
 qemu-options.hx   |  4 ++--
 7 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/qapi/machine.json b/qapi/machine.json
index 5db54df298..09b6188e6f 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1213,6 +1213,7 @@
   'data': { '*id': 'str',
 'memaddr': 'size',
 'size': 'size',
+'node': 'int',
 'memdev': 'str'
   }
 }
@@ -1288,7 +1289,10 @@
 # Since: 6.2
 ##
 { 'struct': 'SgxEPC',
-  'data': { 'memdev': 'str' } }
+  'data': { 'memdev': 'str',
+'node': 'int'
+  }
+}
 
 ##
 # @SgxEPCProperties:
diff --git a/include/hw/i386/sgx-epc.h b/include/hw/i386/sgx-epc.h
index 65a68ca753..7a61c52869 100644
--- a/include/hw/i386/sgx-epc.h
+++ b/include/hw/i386/sgx-epc.h
@@ -25,6 +25,7 @@
 #define SGX_EPC_ADDR_PROP "addr"
 #define SGX_EPC_SIZE_PROP "size"
 #define SGX_EPC_MEMDEV_PROP "memdev"
+#define SGX_EPC_NUMA_NODE_PROP "node"
 
 /**
  * SGXEPCDevice:
@@ -38,6 +39,7 @@ typedef struct SGXEPCDevice {
 
 /* public */
 uint64_t addr;
+uint32_t node;
 HostMemoryBackendEpc *hostmem;
 } SGXEPCDevice;
 
@@ -56,6 +58,7 @@ typedef struct SGXEPCState {
 } SGXEPCState;
 
 int sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size);
+void sgx_epc_build_srat(GArray *table_data);
 
 static inline uint64_t sgx_epc_above_4g_end(SGXEPCState *sgx_epc)
 {
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 81418b7911..563a38992f 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2062,6 +2062,10 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
MachineState *machine)
 nvdimm_build_srat(table_data);
 }
 
+if (pcms->sgx_epc.size != 0) {
+sgx_epc_build_srat(table_data);
+}
+
 /*
  * TODO: this part is not in ACPI spec and current linux kernel boots fine
  * without these entries. But I recall there were issues the last time I
diff --git a/hw/i386/sgx-epc.c b/hw/i386/sgx-epc.c
index 55e2217eae..e5cd2789be 100644
--- a/hw/i386/sgx-epc.c
+++ b/hw/i386/sgx-epc.c
@@ -21,6 +21,7 @@
 
 static Property sgx_epc_properties[] = {
 DEFINE_PROP_UINT64(SGX_EPC_ADDR_PROP, SGXEPCDevice, addr, 0),
+DEFINE_PROP_UINT32(SGX_EPC_NUMA_NODE_PROP, SGXEPCDevice, node, 0),
 DEFINE_PROP_LINK(SGX_EPC_MEMDEV_PROP, SGXEPCDevice, hostmem,
  TYPE_MEMORY_BACKEND_EPC, HostMemoryBackendEpc *),
 DEFINE_PROP_END_OF_LIST(),
@@ -139,6 +140,8 @@ static void sgx_epc_md_fill_device_info(const 
MemoryDeviceState *md,
 se->memaddr = epc->addr;
 se->size = object_property_get_uint(OBJECT(epc), SGX_EPC_SIZE_PROP,
 NULL);
+se->node = object_property_get_uint(OBJECT(epc), SGX_EPC_NUMA_NODE_PROP,
+NULL);
 se->memdev = object_get_canonical_path(OBJECT(epc->hostmem));
 
 info->u.sgx_epc.data = se;
diff --git a/hw/i386/sgx.c b/hw/i386/sgx.c
index e481e9358f..906facb645 100644
--- a/hw/i386/sgx.c
+++ b/hw/i386/sgx.c
@@ -19,6 +19,7 @@
 #include "exec/address-spaces.h"
 #include "hw/i386/sgx.h"
 #include "sysemu/hw_accel.h"
+#include "hw/acpi/aml-build.h"
 
 #define SGX_MAX_EPC_SECTIONS8
 #define SGX_CPUID_EPC_INVALID   0x0
@@ -27,6 +28,46 @@
 #define SGX_CPUID_EPC_SECTION   0x1
 #define SGX_CPUID_EPC_MASK  0xF
 
+static int sgx_epc_device_list(Object *obj, void *opaque)
+{
+GSList **list = opaque;
+
+if (object_dynamic_cast(obj, TYPE_SGX_EPC)) {
+*list = g_slist_append(*list, DEVICE(obj));
+}
+
+object_child_foreach(obj, sgx_epc_device_list, 

Re: [RFC PATCH v2 24/25] job.h: split function pointers in JobDriver

2021-10-11 Thread Stefan Hajnoczi
On Fri, Oct 08, 2021 at 12:48:26PM +0200, Emanuele Giuseppe Esposito wrote:
> 
> 
> On 07/10/2021 16:54, Stefan Hajnoczi wrote:
> > On Tue, Oct 05, 2021 at 10:32:14AM -0400, Emanuele Giuseppe Esposito wrote:
> > > The job API will be handled separately in another serie.
> > > 
> > > Signed-off-by: Emanuele Giuseppe Esposito 
> > > ---
> > >   include/qemu/job.h | 31 +++
> > >   1 file changed, 31 insertions(+)
> > > 
> > > diff --git a/include/qemu/job.h b/include/qemu/job.h
> > > index 41162ed494..c236c43026 100644
> > > --- a/include/qemu/job.h
> > > +++ b/include/qemu/job.h
> > > @@ -169,12 +169,21 @@ typedef struct Job {
> > >* Callbacks and other information about a Job driver.
> > >*/
> > >   struct JobDriver {
> > > +
> > > +/* Fields initialized in struct definition and never changed. */
> > > +
> > >   /** Derived Job struct size */
> > >   size_t instance_size;
> > >   /** Enum describing the operation */
> > >   JobType job_type;
> > > +/*
> > > + * I/O API functions. These functions are thread-safe, and therefore
> > > + * can run in any thread as long as they have called
> > > + * aio_context_acquire/release().
> > > + */
> > 
> > This comment described the block layer well but job.h is more generic. I
> > don't think these functions are necessarily thread-safe (they shouldn't
> > be invoked from multiple threads at the same time). It's just that they
> > are run without regard to the BQL and may run in an arbitrary thread.
> 
> Ok, in this instance I will change it to:
> 
> /*
>  * Functions run without regard to the BQL and may run in any
>  * arbitrary thread.
>  */

It's not clear from that comment whether the functions need to be
thread-safe (re-entrancy safe) or not. I think the answer is "no" since
the caller ensures they are called from one thread at a time.

Stefan


signature.asc
Description: PGP signature


[PATCH] contrib/plugins: add a drcov plugin

2021-10-11 Thread Arkadiy
From: NDNF 

This patch adds the ability to generate files in drcov format.
Primary goal this script is to have coverage
logfiles thatwork in Lighthouse.
Problems:
- The path to the executable file is not specified.
- base, end, entry take incorrect values.
  (Lighthouse + IDA Pro anyway work).

Signed-off-by: Ivanov Arkady 
---
 contrib/plugins/Makefile |   1 +
 contrib/plugins/drcov.c  | 112 +++
 2 files changed, 113 insertions(+)
 create mode 100644 contrib/plugins/drcov.c

diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
index 7801b08b0d..0a681efeec 100644
--- a/contrib/plugins/Makefile
+++ b/contrib/plugins/Makefile
@@ -17,6 +17,7 @@ NAMES += hotblocks
 NAMES += hotpages
 NAMES += howvec
 NAMES += lockstep
+NAMES += drcov
 
 SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))
 
diff --git a/contrib/plugins/drcov.c b/contrib/plugins/drcov.c
new file mode 100644
index 00..d6a7d131c0
--- /dev/null
+++ b/contrib/plugins/drcov.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2021, Ivanov Arkady 
+ *
+ * Drcov - a DynamoRIO-based tool that collects coverage information
+ * from a binary. Primary goal this script is to have coverage log
+ * files that work in Lighthouse.
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+static char header[] = "DRCOV VERSION: 2\n"
+"DRCOV FLAVOR: drcov-64\n"
+"Module Table: version 2, count 1\n"
+"Columns: id, base, end, entry, path\n";
+
+static FILE *fp;
+
+typedef struct {
+uint32_t start;
+uint16_t size;
+uint16_t mod_id;
+} bb_entry_t;
+
+static GSList *bbs;
+
+static void printfHeader()
+{
+g_autoptr(GString) head = g_string_new("");
+g_string_append(head, header);
+g_string_append_printf(head, "0, 0x%x, 0x%x, 0x%x, %s\n",
+   0, 0x, 0, "path");
+g_string_append_printf(head, "BB Table: %d bbs\n", g_slist_length(bbs));
+fwrite(head->str, sizeof(char), head->len, fp);
+}
+
+static void printfCharArray32(uint32_t data)
+{
+const uint8_t *bytes = (const uint8_t *)();
+fwrite(bytes, sizeof(char), sizeof(data), fp);
+}
+
+static void printfCharArray16(uint16_t data)
+{
+const uint8_t *bytes = (const uint8_t *)();
+fwrite(bytes, sizeof(char), sizeof(data), fp);
+}
+
+
+static void printf_el(gpointer data, gpointer user_data)
+{
+bb_entry_t *bb = (bb_entry_t *)data;
+printfCharArray32(bb->start);
+printfCharArray16(bb->size);
+printfCharArray16(bb->mod_id);
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+/* Print function */
+printfHeader();
+g_slist_foreach(bbs, printf_el, NULL);
+
+/* Clear */
+g_slist_free_full(bbs, _free);
+fclose(fp);
+}
+
+static void plugin_init(void)
+{
+fp = fopen("file.drcov.trace", "wb");
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+bb_entry_t *bb = g_new0(bb_entry_t, 1);
+uint64_t pc = qemu_plugin_tb_vaddr(tb);
+
+size_t n = qemu_plugin_tb_n_insns(tb);
+for (int i = 0; i < n; i++) {
+bb->size += qemu_plugin_insn_size(qemu_plugin_tb_get_insn(tb, i));
+}
+
+bb->start = pc;
+bb->mod_id = 0;
+bbs = g_slist_append(bbs, bb);
+
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+int argc, char **argv)
+{
+plugin_init();
+
+qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+return 0;
+}
-- 
2.25.1




Re: [RFC PATCH v2 11/25] include/block/blockjob.h: global state API

2021-10-11 Thread Stefan Hajnoczi
On Fri, Oct 08, 2021 at 09:20:35AM +0200, Emanuele Giuseppe Esposito wrote:
> 
> > > +/*
> > > + * Global state (GS) API. These functions run under the BQL lock.
> > > + *
> > > + * If a function modifies the graph, it also uses drain and/or
> > > + * aio_context_acquire/release to be sure it has unique access.
> > > + * aio_context locking is needed together with BQL because of
> > > + * the thread-safe I/O API that concurrently runs and accesses
> > > + * the graph without the BQL.
> > > + *
> > > + * It is important to note that not all of these functions are
> > > + * necessarily limited to running under the BQL, but they would
> > > + * require additional auditing and may small thread-safety changes
> > > + * to move them into the I/O API. Often it's not worth doing that
> > > + * work since the APIs are only used with the BQL held at the
> > > + * moment, so they have been placed in the GS API (for now).
> > > + *
> > > + * All functions below must use this assertion:
> > > + * g_assert(qemu_in_main_thread());
> > > + * to catch when they are accidentally called without the BQL.
> > > + */
> > 
> > This is comment is duplicated in many places. I suggest explaining it in
> > one place and using references in the other files:
> > 
> >/*
> > * Global state (GS) API. These functions run under the BQL lock.
> > *
> > * See include/block/block.h for more information about the GS API.
> > */
> > 
> 
> Good idea. Should I also do that for I/O, or it's not worth for very few
> lines?

Up to you, but I think it makes sense to have a minimal comment for both
the GS and I/O API.

Stefan


signature.asc
Description: PGP signature


Re: [PATCH v2 0/2] pylint: fix new errors and warnings in qemu-iotests

2021-10-11 Thread Hanna Reitz

On 11.10.21 11:58, Emanuele Giuseppe Esposito wrote:



On 11/10/2021 11:29, Hanna Reitz wrote:

On 08.10.21 08:28, Emanuele Giuseppe Esposito wrote:

There are some warnings and errors that we either miss or
are new in pylint. Anyways, test 297 of qemu-iotests fails
because of that, so we need to fix it.

All these fixes involve just indentation or additional spaces
added.

Signed-off-by: Emanuele Giuseppe Esposito 
---
v2:
* temporarly enable and then disable "bad whitespace" error in 
image-fleecing

* better indentation for a fix in test 129 in patch one


So the changes look good to me, but I can’t get my pylint to generate 
a bad-whitespace warning no matter how hard I try. (When you asked on 
IRC whether others see pylint warnings, I thought you meant the 
consider-using-f-string warnings that John disabled in 
3765315d4c84f9c0799744f43a314169baaccc05.)


I have pylint 2.11.1, which should be the newest version.  So I tried 
to look around what might be the cause and found this: 
https://pylint.pycqa.org/en/latest/whatsnew/2.6.html – it seems that 
as of pylint 2.6, bad-whitespace warnings are no longer emitted.  If 
that’s the reason why I can’t see the warning, then I think we should 
take only patch 1 (because it just makes sense), but skip patch 2.




Yes you are right. I had 2.4.4, and now that I upgraded to 2.11.1 I 
don't see bad-whitespace errors anymore (actually pylint does not seem 
to complain at all). So I agree we can just take patch 1, as 
formatting is wrong anyways.


OK, thanks!

I’ve applied patch 1 to my block branch:

https://gitlab.com/hreitz/qemu/-/commits/block/

Hanna




Re: Moving QEMU downloads to GitLab Releases?

2021-10-11 Thread Stefan Hajnoczi
On Mon, Oct 11, 2021 at 09:21:30AM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> > > I guess the main question is who is using the ROM/BIOS sources in the
> > > tarballs, and would this 2-step process work for those users? If there
> > > are distros relying on them then maybe there are some more logistics to
> > > consider since the make-release downloads are both time-consuming and
> > > prone to network errors/stalls since it relies on the availability of a
> > > good number of different hosts.
> > 
> > Great, maybe Gerd can comment on splitting out firmware.
> 
> I think the binaries are mostly a convenience feature for developers.
> Distros typically build from source anyway, and typically they build
> from upstream source instead of qemu submodule.
> 
> The idea to split them out to a separate repo is exists for quite a
> while.  I have an old qemu-firmware repo laying around on my disk, which
> basically moves roms/ + submodules and the binaries built from it over.
> 
> I think with the switch to gitlab it doesn't make sense any more to
> commit pre-built firmware binaries to a git repo.  Instead we should
> build the firmware in gitlab ci, i.e. effectively move the build rules
> we have right now in roms/Makefile to .gitlab-ci.d/, then publish the
> firmware binaries as build artifacts or gitlab pages.
> 
> When done just remove the pre-build binaries from git and add a helper
> script to fetch firmware binaries from gitlab instead.
> 
> > QEMU mirrors firmware sources on GitLab too. We're able to provide the
> > same level of download availability on our mirror firmware repos as for
> > the main qemu.git repo.
> 
> I think enabling CI for the firmware mirrors should work given that it
> is possible to specify a custom CI/CD configuration file, i.e. use
> something like
> 
> /qemu-project/qemu/.gitlab-ci.d/firmware/seabios.yml
> 
> or
> 
> /qemu-project/qemu-firmware/seabios.yml
> 
> as config file for the 
> 
> /qemu-project/seabios/
> 
> mirror.  Then we can publish binaries using gitlab pages at
> 
> https://qemu-project.gitlab.io/seabios/
> 
> That way we also don't need the roms/ submodules any more, i.e. we
> can remove both sources and binaries for the firmware from the
> release tarballs.

Thanks!

For developer convenience it would be nice to avoid manual steps after
git clone qemu.git. Maybe ./configure should check for firmware blobs
and automatically download them. There could be a ./configure
--disable-firmware-download option that distros can use to skip the
download when building everything from source.

Stefan


signature.asc
Description: PGP signature


Re: [PATCH] qemu-img: Consistent docs for convert -F

2021-10-11 Thread Hanna Reitz

On 21.09.21 16:28, Eric Blake wrote:

Use consistent capitalization, and fix a missed line (we duplicate the
qemu-img synopses in too many places).

Fixes: 1899bf4737 (qemu-img: Add -F shorthand to convert)
Signed-off-by: Eric Blake 
---
  docs/tools/qemu-img.rst | 2 +-
  qemu-img-cmds.hx| 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)


Thanks, applied to my block branch:

https://gitlab.com/hreitz/qemu/-/commits/block

Speaking of duplicating in too many places, comparing the lines in 
qemu-img.rst and qemu-img-cmds.hx I noticed that `--skip-broken-bitmaps` 
too is missing from qemu-img-cmds.hx, so perhaps that’s also something 
we want to fix.


Hanna




Re: [PATCH 6/8] tcg/aarch64: Support TCG_TARGET_SIGNED_ADDR32

2021-10-11 Thread Alex Bennée


Richard Henderson  writes:

> AArch64 has both sign and zero-extending addressing modes, which
> means that either treatment of guest addresses is equally efficient.
> Enabling this for AArch64 gives us testing of the feature in CI.

So which guests front ends will exercise this backend? I realise you
never mentioned it in the cover letter. Is this something we can
exercise in 32 bit user mode tests?

> Signed-off-by: Richard Henderson 
> ---
>  tcg/aarch64/tcg-target-sa32.h |  8 -
>  tcg/aarch64/tcg-target.c.inc  | 68 ++-
>  2 files changed, 51 insertions(+), 25 deletions(-)
>
> diff --git a/tcg/aarch64/tcg-target-sa32.h b/tcg/aarch64/tcg-target-sa32.h
> index cb185b1526..c99e502e4c 100644
> --- a/tcg/aarch64/tcg-target-sa32.h
> +++ b/tcg/aarch64/tcg-target-sa32.h
> @@ -1 +1,7 @@
> -#define TCG_TARGET_SIGNED_ADDR32 0
> +/*
> + * AArch64 has both SXTW and UXTW addressing modes, which means that
> + * it is agnostic to how guest addresses should be represented.
> + * Because aarch64 is more common than the other hosts that will
> + * want to use this feature, enable it for continuous testing.
> + */
> +#define TCG_TARGET_SIGNED_ADDR32 1
> diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
> index 5edca8d44d..88b2963f9d 100644
> --- a/tcg/aarch64/tcg-target.c.inc
> +++ b/tcg/aarch64/tcg-target.c.inc
> @@ -12,6 +12,7 @@
>  
>  #include "../tcg-pool.c.inc"
>  #include "qemu/bitops.h"
> +#include "tcg-target-sa32.h"
>  
>  /* We're going to re-use TCGType in setting of the SF bit, which controls
> the size of the operation performed.  If we know the values match, it
> @@ -804,12 +805,12 @@ static void tcg_out_insn_3617(TCGContext *s, 
> AArch64Insn insn, bool q,
>  }
>  
>  static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
> -  TCGReg rd, TCGReg base, TCGType ext,
> +  TCGReg rd, TCGReg base, int option,
>TCGReg regoff)
>  {
>  /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
>  tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
> -  0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
> +  option << 13 | base << 5 | (rd & 0x1f));
>  }
>  
>  static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
> @@ -1124,7 +1125,7 @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn 
> insn, TCGReg rd,
>  
>  /* Worst-case scenario, move offset to temp register, use reg offset.  */
>  tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
> -tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
> +tcg_out_ldst_r(s, insn, rd, rn, 3 /* LSL #0 */, TCG_REG_TMP);
>  }
>  
>  static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
> @@ -1718,34 +1719,34 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg 
> addr_reg, MemOp opc,
>  
>  static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
> TCGReg data_r, TCGReg addr_r,
> -   TCGType otype, TCGReg off_r)
> +   int option, TCGReg off_r)
>  {
>  /* Byte swapping is left to middle-end expansion. */
>  tcg_debug_assert((memop & MO_BSWAP) == 0);
>  
>  switch (memop & MO_SSIZE) {
>  case MO_UB:
> -tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
> +tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, option, off_r);
>  break;
>  case MO_SB:
>  tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
> -   data_r, addr_r, otype, off_r);
> +   data_r, addr_r, option, off_r);
>  break;
>  case MO_UW:
> -tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
> +tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, option, off_r);
>  break;
>  case MO_SW:
>  tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
> -   data_r, addr_r, otype, off_r);
> +   data_r, addr_r, option, off_r);
>  break;
>  case MO_UL:
> -tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
> +tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, option, off_r);
>  break;
>  case MO_SL:
> -tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
> +tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, option, off_r);
>  break;
>  case MO_Q:
> -tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
> +tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, option, off_r);
>  break;
>  default:
>  tcg_abort();
> @@ -1754,50 +1755,68 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, 
> MemOp memop, TCGType ext,
>  
>  static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
> TCGReg data_r, TCGReg addr_r,
> -  

Re: [PATCH v2] hw/usb/vt82c686-uhci-pci: Use ISA instead of PCI interrupts

2021-10-11 Thread BALATON Zoltan

On Mon, 11 Oct 2021, Gerd Hoffmann wrote:

On Tue, Oct 05, 2021 at 03:12:05PM +0200, BALATON Zoltan wrote:

This device is part of a superio/ISA bridge chip and IRQs from it are
routed to an ISA interrupt set by the Interrupt Line PCI config
register. Change uhci_update_irq() to allow this and use it from
vt82c686-uhci-pci.


Hmm, shouldn't this logic be part of the superio bridge emulation?


But how? The ISA bridge does not know about PCI and PCI does not know 
about ISA. UHCI is a PCIDevice and would raise PCI interrupts. Where and 
how could I convert that to ISA interrupts? (Oh and ISA in QEMU is not 
Qdev'ified and I don't want to do that as it's too much work and too much 
to break that I can't even test so if an alternative solution involves 
that then get somebody do that first.) This patch puts the irq mapping in 
the vt82xx specific vt82c686-uhci-pci.c which in the real chip also 
contains the ISA bridge so in a way it's part of the superio bridge 
emulation in that this uhci variant is part of that chip model.


Regards,
BALATON Zoltan



Re: [PATCH 5/8] linux-user: Support TCG_TARGET_SIGNED_ADDR32

2021-10-11 Thread Alex Bennée


Richard Henderson  writes:

> When using reserved_va, which is the default for a 64-bit host
> and a 32-bit guest, set guest_base_signed_addr32 if requested
> by TCG_TARGET_SIGNED_ADDR32, and the executable layout allows.
>
> Signed-off-by: Richard Henderson 
> ---
>  include/exec/cpu-all.h |  4 ---
>  linux-user/elfload.c   | 62 ++
>  2 files changed, 50 insertions(+), 16 deletions(-)
>
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index 80b5e17329..71d8e1de7a 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -278,11 +278,7 @@ extern intptr_t qemu_host_page_mask;
>  #define PAGE_RESET 0x0040
>  /* For linux-user, indicates that the page is MAP_ANON. */
>  #define PAGE_ANON  0x0080
> -
> -#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
> -/* FIXME: Code that sets/uses this is broken and needs to go away.  */
>  #define PAGE_RESERVED  0x0100
> -#endif

Can we reference why this FIXME is being dropped in the commit message?
Looking at the current tree state I can see several uses of it due to
moves in 5b6dd8683d (exec: move TB handling to translate-all.c) which
post-date 2e9a5713f0 (Remove PAGE_RESERVED).

Otherwise looks reasonable:

Reviewed-by: Alex Bennée 

-- 
Alex Bennée



Re: ACPI endianness

2021-10-11 Thread BALATON Zoltan

On Mon, 11 Oct 2021, Philippe Mathieu-Daudé wrote:

On 10/10/21 15:24, BALATON Zoltan wrote:

Hello,

I'm trying to fix shutdown and reboot on pegasos2 which uses ACPI as
part of the VIA VT8231 (similar to and modelled in hw/isa/vt82c686b.c)
and found that the guest writes to ACPI PM1aCNT register come out with
wrong endianness but not shure why. I have this:

$ qemu-system-ppc -M pegasos2 -monitor stdio
(qemu) info mtree
[...]
memory-region: pci1-io
  - (prio 0, i/o): pci1-io
[...]
    0f00-0f7f (prio 0, i/o): via-pm
  0f00-0f03 (prio 0, i/o): acpi-evt
  0f04-0f05 (prio 0, i/o): acpi-cnt
  0f08-0f0b (prio 0, i/o): acpi-tmr

memory-region: system
  - (prio 0, i/o): system
    -1fff (prio 0, ram): pegasos2.ram
    8000-bfff (prio 0, i/o): alias pci1-mem0-win
@pci1-mem 8000-bfff
    c000-dfff (prio 0, i/o): alias pci0-mem0-win
@pci0-mem c000-dfff
    f100-f100 (prio 0, i/o): mv64361
    f800-f8ff (prio 0, i/o): alias pci0-io-win
@pci0-io -00ff
    f900-f9ff (prio 0, i/o): alias pci0-mem1-win
@pci0-mem -00ff
    fd00-fdff (prio 0, i/o): alias pci1-mem1-win
@pci1-mem -00ff
    fe00-feff (prio 0, i/o): alias pci1-io-win
@pci1-io -00ff
    ff80- (prio 0, i/o): alias pci1-mem3-win
@pci1-mem ff80-
    fff0-fff7 (prio 0, rom): pegasos2.rom

The guest (which is big endian PPC and I think wotks on real hardware)
writes to 0xf05 in the io region which should be the high byte of the
little endian register but in the acpi code it comes out wrong, instead
of 0x2800 I get in acpi_pm1_cnt_write: val=0x28


Looks like a northbridge issue (MV64340).
Does Pegasos2 enables bus swapping?
See hw/pci-host/mv64361.c:585:

static void warn_swap_bit(uint64_t val)
{
   if ((val & 0x300ULL) >> 24 != 1) {
   qemu_log_mask(LOG_UNIMP, "%s: Data swap not implemented", __func__);
   }
}


No, guests don't use this feature just byteswap themselves and write 
little endian values to the mapped io region. (Or in this case just write 
one byte of the 16 bit value, it specifically writes 0x28 to 0xf05.) 
That's why I think the device model should not byteswap itself so the acpi 
regions should be declared native endian and let the guest handle it. Some 
mentions I've found say that native endian means don't byteswap, little 
endian means byteswap if vcpu is big endian and big endian means byteswap 
if vcpu is little endian. Since guests already account for this and write 
little endian values to these regions there's no need to byteswap in 
device model and changing to native endian should not affect little endian 
machines so unless there's a better argument I'll try sending a patch.


Regards,
BALATON Zoltan

Re: [PATCH v4 00/11] virtio-iommu: Add ACPI support

2021-10-11 Thread Haiwei Li
On Fri, Oct 8, 2021 at 11:18 PM Jean-Philippe Brucker
 wrote:
>
> On Tue, Oct 05, 2021 at 11:45:42AM -0400, Michael S. Tsirkin wrote:
> > Looks like this can not be applied yet because the bypass bit
> > isn't in yet. what's up with that?
>
> The boot-bypass bit isn't a hard dependency for this series, but it will
> be needed for full support eventually. It will be delayed by spec and
> Linux header changes
>
> In the meantime we can work around the problem by having the boot disks
> bypass the IOMMU (virtio without iommu-platform, or iommu-bypass bus).

Hi Jean,

I have tested on x86 platform with environment:

qemu, your personal repo: http://jpbrucker.net/git/qemu, branch:
virtio-iommu/acpi-2021-10-01
linux kernel, 5.14
qemu command, with '-device virtio-iommu' and boot disk
'virtio-blk-pci,iommu_platform=off'

But the guest boot failed. The print is as follows:

Begin: Loading essential drivers ... done.
Begin: Running /scripts/init-premount ... done.
Begin: Mounting root file system ... Begin: Running /scripts/local-top ... done.
Begin: Running /scripts/local-premount ... [7.490238] Btrfs
loaded, crc32c=crc32c-generic, zoned=no
Scanning for Btrfs filesystems
done.
[   10.593238] _warn_unseeded_randomness: 300 callbacks suppressed
[   10.593243] random: get_random_u32 called from
bucket_table_alloc.isra.0+0x123/0x430 with crng_init=0
[   37.534015] random: get_random_u64 called from
dup_task_struct+0x415/0xa10 with crng_init=0
[   37.539950] random: get_random_u64 called from
arch_pick_mmap_layout+0x411/0x5e0 with crng_init=0
[   37.539973] random: get_random_u64 called from
arch_pick_mmap_layout+0x381/0x5e0 with crng_init=0
Begin: Waiting for root file system ... [   38.624374]
_warn_unseeded_randomness: 38 callbacks suppressed
[   38.624389] random: get_random_u64 called from
dup_task_struct+0x415/0xa10 with crng_init=0
Begin: Running /scripts/local-block ... [   38.630760] random:
get_random_u64 called from arch_pick_mmap_layout+0x410
[   38.630767] random: get_random_u64 called from
arch_pick_mmap_layout+0x381/0x5e0 with crng_init=0
mdadm: No arrays found in config file or automatically
done.
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
[   39.633914] _warn_unseeded_randomness: 1278 callbacks suppressed
[   39.633918] random: get_random_u64 called from
dup_task_struct+0x415/0xa10 with crng_init=0
[   39.634867] random: get_random_u64 called from
arch_pick_mmap_layout+0x411/0x5e0 with crng_init=0
[   39.634875] random: get_random_u64 called from
arch_pick_mmap_layout+0x381/0x5e0 with crng_init=0
mdadm: error opening /dev/md?*: No such file or directory
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
mdadm: No arrays found in config file or automatically
done.
Gave up waiting for root file system device.  Common problems:
 - Boot args (cat /proc/cmdline)
   - Check rootdelay= (did the system wait long enough?)
 - Missing modules (cat /proc/modules; ls /dev)
ALERT!  UUID=3caf26b5-4d08-43e0-8634-7573269c4f70 does not exist.
Dropping to a shell!

Any suggestions? Thanks.

--
Haiwei



Re: [PATCH 2/8] accel/tcg: Split out g2h_tlbe

2021-10-11 Thread Alex Bennée


Richard Henderson  writes:

> Create a new function to combine a CPUTLBEntry addend
> with the guest address to form a host address.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée



Re: [PATCH 1/8] tcg: Add TCG_TARGET_SIGNED_ADDR32

2021-10-11 Thread Alex Bennée


Richard Henderson  writes:

> Define as 0 for all tcg hosts.  Put this in a separate header,
> because we'll want this in places that do not ordinarily have
> access to all of tcg/tcg.h.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée



Re: [PATCH v2 0/2] pylint: fix new errors and warnings in qemu-iotests

2021-10-11 Thread Emanuele Giuseppe Esposito




On 11/10/2021 11:29, Hanna Reitz wrote:

On 08.10.21 08:28, Emanuele Giuseppe Esposito wrote:

There are some warnings and errors that we either miss or
are new in pylint. Anyways, test 297 of qemu-iotests fails
because of that, so we need to fix it.

All these fixes involve just indentation or additional spaces
added.

Signed-off-by: Emanuele Giuseppe Esposito 
---
v2:
* temporarly enable and then disable "bad whitespace" error in 
image-fleecing

* better indentation for a fix in test 129 in patch one


So the changes look good to me, but I can’t get my pylint to generate a 
bad-whitespace warning no matter how hard I try.  (When you asked on IRC 
whether others see pylint warnings, I thought you meant the 
consider-using-f-string warnings that John disabled in 
3765315d4c84f9c0799744f43a314169baaccc05.)


I have pylint 2.11.1, which should be the newest version.  So I tried to 
look around what might be the cause and found this: 
https://pylint.pycqa.org/en/latest/whatsnew/2.6.html – it seems that as 
of pylint 2.6, bad-whitespace warnings are no longer emitted.  If that’s 
the reason why I can’t see the warning, then I think we should take only 
patch 1 (because it just makes sense), but skip patch 2.




Yes you are right. I had 2.4.4, and now that I upgraded to 2.11.1 I 
don't see bad-whitespace errors anymore (actually pylint does not seem 
to complain at all). So I agree we can just take patch 1, as formatting 
is wrong anyways.


Thank you,
Emanuele




Re: [PATCH v2] hw/usb/vt82c686-uhci-pci: Use ISA instead of PCI interrupts

2021-10-11 Thread Gerd Hoffmann
On Tue, Oct 05, 2021 at 03:12:05PM +0200, BALATON Zoltan wrote:
> This device is part of a superio/ISA bridge chip and IRQs from it are
> routed to an ISA interrupt set by the Interrupt Line PCI config
> register. Change uhci_update_irq() to allow this and use it from
> vt82c686-uhci-pci.

Hmm, shouldn't this logic be part of the superio bridge emulation?

take care,
  Gerd




Re: [PATCH v2 0/2] pylint: fix new errors and warnings in qemu-iotests

2021-10-11 Thread Hanna Reitz

On 08.10.21 08:28, Emanuele Giuseppe Esposito wrote:

There are some warnings and errors that we either miss or
are new in pylint. Anyways, test 297 of qemu-iotests fails
because of that, so we need to fix it.

All these fixes involve just indentation or additional spaces
added.

Signed-off-by: Emanuele Giuseppe Esposito 
---
v2:
* temporarly enable and then disable "bad whitespace" error in image-fleecing
* better indentation for a fix in test 129 in patch one


So the changes look good to me, but I can’t get my pylint to generate a 
bad-whitespace warning no matter how hard I try.  (When you asked on IRC 
whether others see pylint warnings, I thought you meant the 
consider-using-f-string warnings that John disabled in 
3765315d4c84f9c0799744f43a314169baaccc05.)


I have pylint 2.11.1, which should be the newest version.  So I tried to 
look around what might be the cause and found this: 
https://pylint.pycqa.org/en/latest/whatsnew/2.6.html – it seems that as 
of pylint 2.6, bad-whitespace warnings are no longer emitted.  If that’s 
the reason why I can’t see the warning, then I think we should take only 
patch 1 (because it just makes sense), but skip patch 2.


Hanna




Re: [RFC PATCH v1 2/2] s390x/kvm: Pass SIGP Stop flags

2021-10-11 Thread David Hildenbrand

On 11.10.21 10:40, Christian Borntraeger wrote:



Am 11.10.21 um 09:09 schrieb David Hildenbrand:

On 08.10.21 22:38, Eric Farman wrote:

When building a Stop IRQ to pass to KVM, we should incorporate
the flags if handling the SIGP Stop and Store Status order.
With that, KVM can reject other orders that are submitted for
the same CPU while the operation is fully processed.

Signed-off-by: Eric Farman 
Acked-by: Janosch Frank 
---
   target/s390x/kvm/kvm.c | 4 
   1 file changed, 4 insertions(+)

diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 5b1fdb55c4..701b9ddc88 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -2555,6 +2555,10 @@ void kvm_s390_stop_interrupt(S390CPU *cpu)
   .type = KVM_S390_SIGP_STOP,
   };
+    if (cpu->env.sigp_order == SIGP_STOP_STORE_STATUS) {
+    irq.u.stop.flags = KVM_S390_STOP_FLAG_STORE_STATUS;
+    }
+


KVM_S390_STOP_FLAG_STORE_STATUS tells KVM to perform the store status as well 
... is that really what we want?

At least it should not hurt I guess. QEMU then does it again?


The thing is, that before we officially completed the action in user 
space (and let other SIGP actions actually succeed in user space on the 
CPU), the target CPU will be reported as !busy in the kernel already. 
And before we even inject the stop interrupt, the CPU will be detected 
as !busy in the kernel. I guess it will fix some cases where we poll via 
SENSE if the stop and store happened, because the store *did* happen in 
the kernel and we'll simply store again in user space.


However, I wonder if we want to handle it more generically: Properly 
flag a CPU as busy for SIGP when we start processing the order until we 
completed processing the order. That would allow to handle other SIGP 
operations in user space cleanly, without any chance for races with 
SENSE code running in the kernel.


--
Thanks,

David / dhildenb




Re: [PATCH v3 1/1] virtio: write back F_VERSION_1 before validate

2021-10-11 Thread Cornelia Huck
On Mon, Oct 11 2021, Halil Pasic  wrote:

> The virtio specification virtio-v1.1-cs01 states: "Transitional devices
> MUST detect Legacy drivers by detecting that VIRTIO_F_VERSION_1 has not
> been acknowledged by the driver."  This is exactly what QEMU as of 6.1
> has done relying solely on VIRTIO_F_VERSION_1 for detecting that.
>
> However, the specification also says: "... the driver MAY read (but MUST
> NOT write) the device-specific configuration fields to check that it can
> support the device ..." before setting FEATURES_OK.
>
> In that case, any transitional device relying solely on
> VIRTIO_F_VERSION_1 for detecting legacy drivers will return data in
> legacy format.  In particular, this implies that it is in big endian
> format for big endian guests. This naturally confuses the driver which
> expects little endian in the modern mode.
>
> It is probably a good idea to amend the spec to clarify that
> VIRTIO_F_VERSION_1 can only be relied on after the feature negotiation
> is complete. Before validate callback existed, config space was only
> read after FEATURES_OK. However, we already have two regressions, so
> let's address this here as well.
>
> The regressions affect the VIRTIO_NET_F_MTU feature of virtio-net and
> the VIRTIO_BLK_F_BLK_SIZE feature of virtio-blk for BE guests when
> virtio 1.0 is used on both sides. The latter renders virtio-blk unusable
> with DASD backing, because things simply don't work with the default.
> See Fixes tags for relevant commits.
>
> For QEMU, we can work around the issue by writing out the feature bits
> with VIRTIO_F_VERSION_1 bit set.  We (ab)use the finalize_features
> config op for this. This isn't enough to address all vhost devices since
> these do not get the features until FEATURES_OK, however it looks like
> the affected devices actually never handled the endianness for legacy
> mode correctly, so at least that's not a regression.
>
> No devices except virtio net and virtio blk seem to be affected.
>
> Long term the right thing to do is to fix the hypervisors.
>
> Cc:  #v4.11
> Signed-off-by: Halil Pasic 
> Fixes: 82e89ea077b9 ("virtio-blk: Add validation for block size in config 
> space")
> Fixes: fe36cbe0671e ("virtio_net: clear MTU when out of range")
> Reported-by: mark...@us.ibm.com
> Reviewed-by: Cornelia Huck 
> ---
>
> @Connie: I made some more commit message changes to accommodate Michael's
> requests. I just assumed these will work or you as well and kept your
> r-b. Please shout at me if it needs to be dropped :)

No need to shout, still looks good to me :)

> ---
>  drivers/virtio/virtio.c | 11 +++
>  1 file changed, 11 insertions(+)
>
> diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
> index 0a5b54034d4b..236081afe9a2 100644
> --- a/drivers/virtio/virtio.c
> +++ b/drivers/virtio/virtio.c
> @@ -239,6 +239,17 @@ static int virtio_dev_probe(struct device *_d)
>   driver_features_legacy = driver_features;
>   }
>  
> + /*
> +  * Some devices detect legacy solely via F_VERSION_1. Write
> +  * F_VERSION_1 to force LE config space accesses before FEATURES_OK for
> +  * these when needed.
> +  */
> + if (drv->validate && !virtio_legacy_is_little_endian()
> +   && device_features & BIT_ULL(VIRTIO_F_VERSION_1)) {
> + dev->features = BIT_ULL(VIRTIO_F_VERSION_1);
> + dev->config->finalize_features(dev);
> + }
> +
>   if (device_features & (1ULL << VIRTIO_F_VERSION_1))
>   dev->features = driver_features & device_features;
>   else
>
> base-commit: 60a9483534ed0d99090a2ee1d4bb0b8179195f51
> -- 
> 2.25.1




Re: [RFC PATCH v1 2/2] s390x/kvm: Pass SIGP Stop flags

2021-10-11 Thread Christian Borntraeger




Am 11.10.21 um 09:09 schrieb David Hildenbrand:

On 08.10.21 22:38, Eric Farman wrote:

When building a Stop IRQ to pass to KVM, we should incorporate
the flags if handling the SIGP Stop and Store Status order.
With that, KVM can reject other orders that are submitted for
the same CPU while the operation is fully processed.

Signed-off-by: Eric Farman 
Acked-by: Janosch Frank 
---
  target/s390x/kvm/kvm.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 5b1fdb55c4..701b9ddc88 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -2555,6 +2555,10 @@ void kvm_s390_stop_interrupt(S390CPU *cpu)
  .type = KVM_S390_SIGP_STOP,
  };
+    if (cpu->env.sigp_order == SIGP_STOP_STORE_STATUS) {
+    irq.u.stop.flags = KVM_S390_STOP_FLAG_STORE_STATUS;
+    }
+


KVM_S390_STOP_FLAG_STORE_STATUS tells KVM to perform the store status as well 
... is that really what we want?

At least it should not hurt I guess. QEMU then does it again?


Maybe we want a different (more generic) way to tell KVM that a CPU is 
temporarily busy for SIGP orders?





Re: [PATCH v2 2/2] target/i386: Remove core-capability in Snowridge CPU model

2021-10-11 Thread Chenyi Qiang

Hi Eduardo,

Ping for this minor change.

On 8/27/2021 2:48 PM, Chenyi Qiang wrote:

Because core-capability releated features are model-specific and KVM
won't support it, remove the core-capability in CPU model to avoid the
warning message.

Signed-off-by: Chenyi Qiang 
---
  target/i386/cpu.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index aebf81d9c9..86a15af1ed 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -3684,9 +3684,10 @@ static const X86CPUDefinition builtin_x86_defs[] = {
  },
  {
  .version = 4,
-.note = "no split lock detect",
+.note = "no split lock detect, no core-capability",
  .props = (PropValue[]) {
  { "split-lock-detect", "off" },
+{ "core-capability", "off" },
  { /* end of list */ },
  },
  },





Re: Deprecate the ppc405 boards in QEMU?

2021-10-11 Thread Thomas Huth

On 06/10/2021 09.25, Thomas Huth wrote:

On 05/10/2021 23.53, BALATON Zoltan wrote:
[...]
Maybe these 405 boards in QEMU ran with modified firmware where the memory 
detection was patched out but it seems to detect the RAM so I wonder where 
it gets that from. Maybe by reading the SDRAM controller DCRs 
ppc4xx_sdram_init() sets up. Then I'm not sure what it needs the SPD for, 
I forgot how this worked on sam460ex. Maybe for the speed calibration, so 
could be it detects ram by reading DCRs then tries to get SPD data and 
that's where it stops as i2c is not emulated on taihu. This could be 
confirmed by checking what it pokes with -d guest_errors that shows 
accesses to missing devices but don't know where 405 has the i2c 
controller and if it's the same as newer SoCs. If so that could be reused 
and an i2c bus could be added with the SPD data like in sam460ex to make 
u-boot happy or you could skip this in u-boot.


FWIW, I've just tried the latter (skipping the sdram init in u-boot), and 
indeed, I can get to the u-boot prompt now.

[...]> I've also attached the patch with my modifications to u-boot.

FYI, the changes can now be found on this branch here:

 https://gitlab.com/huth/u-boot/-/commits/taihu

I also added a gitlab-CI file, so you can now download the u-boot.bin as an 
artifact from the corresponding pipeline, e.g.:


 https://gitlab.com/huth/u-boot/-/jobs/1667201028

 Thomas




Re: [PATCH 1/2] hw/core/machine: Split out smp_parse as an inline API

2021-10-11 Thread wangyanan (Y)

Hi Markus,

On 2021/10/11 13:26, Markus Armbruster wrote:

Yanan Wang  writes:


Functionally smp_parse() is only called once and in one place
i.e. machine_set_smp, the possible second place where it'll be
called should be some unit tests if any.

Actually we are going to introduce an unit test for the parser.
For necessary isolation of the tested code, split smp_parse out
into a separate header as an inline API.

Why inline?

The motivation of the splitting is to isolate the tested smp_parse
from the other unrelated code in machine.c, so that we can solve
the build dependency problem for the unit test.

I once tried to split smp_parse out into a source file in [1] for the
test, but it looks more concise and convenient to make it as an
inline function in a header compared to [1]. Given that we only call
it in one place, it may not be harmful to keep it an inline.

Anyway, I not sure the method in this patch is most appropriate
and compliant. If it's just wrong I can change back to [1]. :)

[1] 
https://lore.kernel.org/qemu-devel/20210910073025.16480-16-wangyana...@huawei.com/#t


Thanks,
Yanan

Signed-off-by: Yanan Wang 

.





Re: [PATCH v3 6/6] tests/qapi-schema: Test cases for aliases

2021-10-11 Thread Markus Armbruster
Kevin Wolf  writes:

[...]

> What I had in mind was using the schema to generate the necessary code,
> using the generic keyval parser everywhere, and just providing a hook
> where the QDict could be modified after the keyval parser and before the
> visitor. Most command line options would not have any explicit code for
> parsing input, only the declarative schema and the final option handler
> getting a QAPI type (which could actually be the corresponding QMP
> command handler in the common case).

A walk to the bakery made me see a problem with transforming the qdict
between keyval parsing and the visitor: error reporting.  On closer
investigation, the problem exists even with just aliases.

All experiments performed with your complete QAPIfication at
https://repo.or.cz/qemu/kevin.git qapi-alias-chardev-v4.


Example: flattening leads to suboptimal error

$ qemu-system-x86_64 -chardev udp,id=chr0,port=12345,ipv4=om
qemu-system-x86_64: -chardev udp,id=chr0,port=12345,ipv4=om: Parameter 
'backend.data.remote.data.ipv4' expects 'on' or 'off'

We're using "alternate" notation, but the error message barks back in
"standard" notation.  It comes from the visitor.  While less than
pleasant, it's still understandable, because the "standard" key ends
with the "alternate" key.


Example: renaming leads to confusing error

So far, we rename only members of type 'str', where the visitor can't
fail.  Just for illustrating the problem, I'm adding one where it can:

diff --git a/qapi/char.json b/qapi/char.json
index 0e39840d4f..b436d83cbe 100644
--- a/qapi/char.json
+++ b/qapi/char.json
@@ -398,7 +398,8 @@
 ##
 { 'struct': 'ChardevRingbuf',
   'data': { '*size': 'int' },
-  'base': 'ChardevCommon' }
+  'base': 'ChardevCommon',
+  'aliases': [ { 'name': 'capacity', 'source': [ 'size' ] } ] }

 ##
 # @ChardevQemuVDAgent:

With this patch:

$ qemu-system-x86_64 -chardev ringbuf,id=chr0,capacity=lots
qemu-system-x86_64: -chardev ringbuf,id=chr0,capacity=lots: Parameter 
'backend.data.size' expects integer

The error message fails to connect to the offending key=value.


Example: manual transformation leads to confusion #1

Starting point:

$ qemu-system-x86_64 -chardev udp,id=chr0,port=12345,localaddr=localhost

Works.  host defaults to localhost, localport defaults to 0, same as in
git master.

Now "forget" to specify @port:

$ qemu-system-x86_64 -chardev udp,id=chr0,localaddr=localhost
qemu-system-x86_64: -chardev udp,id=chr0,localaddr=localhost: Parameter 
'backend.data.remote.data.port' is missing

Again, the visitor's error message uses "standard" notation.

We obediently do what the error message tells us to do:

$ qemu-system-x86_64 -chardev 
udp,id=chr0,localaddr=localhost,backend.data.remote.data.port=12345
qemu-system-x86_64: -chardev 
udp,id=chr0,localaddr=localhost,backend.data.remote.data.port=12345: Parameters 
'backend.*' used inconsistently

Mission accomplished: confusion :)


Example: manual transformation leads to confusion #2

Confusion is possible even without tricking the user into mixing
"standard" and "alternate" explicitly:

$ qemu-system-x86_64 -chardev udp,id=chr0,backend.data.remote.type=udp
qemu-system-x86_64: -chardev udp,id=chr0,backend.data.remote.type=udp: 
Parameters 'backend.*' used inconsistently

Here, the user tried to stick to "standard", but forgot to specify a
required member.  The transformation machinery then "helpfully"
transformed nothing into something, which made the visitor throw up.


Clear error reporting is a critical part of a human-friendly interface.
We have two separate problems with it:

1. The visitor reports errors as if aliases didn't exist

   Fixing this is "merely" a matter of tracing back alias applications.
   More complexity...

2. The visitor reports errors as if manual transformation didn't exist

   Manual transformation can distort the users input beyond recognition.
   The visitor reports errors for the transformed input.

   To fix this one, we'd have to augment the parse tree so it points
   back at the actual user input, and then make the manual
   transformations preserve that.  Uff!

I'm afraid I need another round of thinking on how to best drag legacy
syntax along when we QAPIfy.

[...]




Re: [PATCH] hw/misc: Add a virtual pci device to dynamically attach memory to QEMU

2021-10-11 Thread David Hildenbrand

virito-mem currently relies on having a single sparse memory region (anon
mmap, mmaped file, mmaped huge pages, mmap shmem) per VM. Although we can
share memory with other processes, sharing with other VMs is not intended.
Instead of actually mmaping parts dynamically (which can be quite
expensive), virtio-mem relies on punching holes into the backend and
dynamically allocating memory/file blocks/... on access.

So the easy way to make it work is:

a) Exposing the CXL memory to the buddy via dax/kmem, esulting in device
memory getting managed by the buddy on a separate NUMA node.



Linux kernel buddy system? how to guarantee other applications don't apply 
memory
from it


Excellent question. Usually, you would online the memory to ZONE_MOVABLE,
such that even if some other allocation ended up there, that it could
get migrated somewhere else.

For example, "daxctl reconfigure-device" tries doing that as default:

https://pmem.io/ndctl/daxctl-reconfigure-device.html

However, I agree that we might actually want to tell the system to not
use this CPU-less node as fallback for other allocations, and that we
might not want to swap out such memory etc.


But, in the end all that virtio-mem needs to work in the hypervisor is

a) A sparse memmap (anonymous RAM, memfd, file)
b) A way to populate memory within that sparse memmap (e.g., on fault,
   using madvise(MADV_POPULATE_WRITE), fallocate())
c) A way to discard memory (madvise(MADV_DONTNEED),
   fallocate(FALLOC_FL_PUNCH_HOLE))

So instead of using anonymous memory+mbind, you can also mmap a sparse file
and rely on populate-on-demand. One alternative for your use case would be
to create a DAX  filesystem on that CXL memory (IIRC that should work) and
simply providing virtio-mem with a sparse file located on that filesystem.

Of course, you can also use some other mechanism as you might have in
your approach, as long as it supports a,b,c.





b) (optional) allocate huge pages on that separate NUMA node.
c) Use ordinary memory-device-ram or memory-device-memfd (for huge pages),
*bidning* the memory backend to that special NUMA node.

  
"-object memory-backend/device-ram or memory-device-memfd, id=mem0, size=768G"

How to bind backend memory to NUMA node



I think the syntax is "policy=bind,host-nodes=X"

whereby X is a node mask. So for node "0" you'd use "host-nodes=0x1" for "5"
"host-nodes=0x20" etc.



This will dynamically allocate memory from that special NUMA node, resulting
in the virtio-mem device completely being backed by that device memory,
being able to dynamically resize the memory allocation.


Exposing an actual devdax to the virtio-mem device, shared by multiple VMs
isn't really what we want and won't work without major design changes. Also,
I'm not so sure it's a very clean design: exposing memory belonging to other
VMs to unrelated QEMU processes. This sounds like a serious security hole:
if you managed to escalate to the QEMU process from inside the VM, you can
access unrelated VM memory quite happily. You want an abstraction
in-between, that makes sure each VM/QEMU process only sees private memory:
for example, the buddy via dax/kmem.


Hi David
Thanks for your suggestion, also sorry for my delayed reply due to my long 
vacation.
How does current virtio-mem dynamically attach memory to guest, via page fault?


Essentially you have a large sparse mmap. Withing that mmap, memory is
populated on demand. Instead if mmap/munmap you perform a single large
mmap and then dynamically populate memory/discard memory.

Right now, memory is populated via page faults on access. This is
sub-optimal when dealing with limited resources (i.e., hugetlbfs,
file blocks) and you might run out of backend memory.

I'm working on a "prealloc" mode, which will preallocate/populate memory
necessary for exposing the next block of memory to the VM, and which
fails gracefully if preallocation/population fails in the case of such
limited resources.

The patch resides on:
https://github.com/davidhildenbrand/qemu/tree/virtio-mem-next

commit ded0e302c14ae1b68bdce9059dcca344e0a5f5f0
Author: David Hildenbrand 
Date:   Mon Aug 2 19:51:36 2021 +0200

virtio-mem: support "prealloc=on" option

Especially for hugetlb, but also for file-based memory backends, we'd

like to be able to prealloc memory, especially to make user errors less
severe: crashing the VM when there are not sufficient huge pages around.

A common option for hugetlb will be using "reserve=off,prealloc=off" for

the memory backend and "prealloc=on" for the virtio-mem device. This
way, no huge pages will be reserved for the process, but we can recover
if there are no actual huge pages when plugging memory.

Signed-off-by: David Hildenbrand 



--
Thanks,

David / dhildenb




Re: Moving QEMU downloads to GitLab Releases?

2021-10-11 Thread Gerd Hoffmann
  Hi,

> > I guess the main question is who is using the ROM/BIOS sources in the
> > tarballs, and would this 2-step process work for those users? If there
> > are distros relying on them then maybe there are some more logistics to
> > consider since the make-release downloads are both time-consuming and
> > prone to network errors/stalls since it relies on the availability of a
> > good number of different hosts.
> 
> Great, maybe Gerd can comment on splitting out firmware.

I think the binaries are mostly a convenience feature for developers.
Distros typically build from source anyway, and typically they build
from upstream source instead of qemu submodule.

The idea to split them out to a separate repo is exists for quite a
while.  I have an old qemu-firmware repo laying around on my disk, which
basically moves roms/ + submodules and the binaries built from it over.

I think with the switch to gitlab it doesn't make sense any more to
commit pre-built firmware binaries to a git repo.  Instead we should
build the firmware in gitlab ci, i.e. effectively move the build rules
we have right now in roms/Makefile to .gitlab-ci.d/, then publish the
firmware binaries as build artifacts or gitlab pages.

When done just remove the pre-build binaries from git and add a helper
script to fetch firmware binaries from gitlab instead.

> QEMU mirrors firmware sources on GitLab too. We're able to provide the
> same level of download availability on our mirror firmware repos as for
> the main qemu.git repo.

I think enabling CI for the firmware mirrors should work given that it
is possible to specify a custom CI/CD configuration file, i.e. use
something like

/qemu-project/qemu/.gitlab-ci.d/firmware/seabios.yml

or

/qemu-project/qemu-firmware/seabios.yml

as config file for the 

/qemu-project/seabios/

mirror.  Then we can publish binaries using gitlab pages at

https://qemu-project.gitlab.io/seabios/

That way we also don't need the roms/ submodules any more, i.e. we
can remove both sources and binaries for the firmware from the
release tarballs.

take care,
  Gerd




Re: [RFC PATCH v1 2/2] s390x/kvm: Pass SIGP Stop flags

2021-10-11 Thread David Hildenbrand

On 08.10.21 22:38, Eric Farman wrote:

When building a Stop IRQ to pass to KVM, we should incorporate
the flags if handling the SIGP Stop and Store Status order.
With that, KVM can reject other orders that are submitted for
the same CPU while the operation is fully processed.

Signed-off-by: Eric Farman 
Acked-by: Janosch Frank 
---
  target/s390x/kvm/kvm.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 5b1fdb55c4..701b9ddc88 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -2555,6 +2555,10 @@ void kvm_s390_stop_interrupt(S390CPU *cpu)
  .type = KVM_S390_SIGP_STOP,
  };
  
+if (cpu->env.sigp_order == SIGP_STOP_STORE_STATUS) {

+irq.u.stop.flags = KVM_S390_STOP_FLAG_STORE_STATUS;
+}
+


KVM_S390_STOP_FLAG_STORE_STATUS tells KVM to perform the store status as 
well ... is that really what we want?


Maybe we want a different (more generic) way to tell KVM that a CPU is 
temporarily busy for SIGP orders?


--
Thanks,

David / dhildenb




Re: [RFC PATCH v1 1/2] s390x: sigp: Force Set Architecture to return Invalid Parameter

2021-10-11 Thread David Hildenbrand

On 08.10.21 22:38, Eric Farman wrote:

According to the Principles of Operation, the SIGP Set Architecture
order will return Incorrect State if some CPUs are not stopped, but
only if the CZAM facility is not present. If it is, the order will
return Invalid Parameter because the architecture mode cannot be
changed.

Since CZAM always exists when S390_FEAT_ZARCH exists, which in turn
exists for every defined CPU model, we can simplify this code.

Fixes: 075e52b81664 ("s390x/cpumodel: we are always in zarchitecture mode")
Signed-off-by: Eric Farman 
Reviewed-by: Christian Borntraeger 
Reviewed-by: Janosch Frank 


Reviewed-by: David Hildenbrand 


--
Thanks,

David / dhildenb




Re: [PATCH] monitor: Tidy up find_device_state()

2021-10-11 Thread Paolo Bonzini

On 16/09/21 13:17, Markus Armbruster wrote:

Commit 6287d827d4 "monitor: allow device_del to accept QOM paths"
extended find_device_state() to accept QOM paths in addition to qdev
IDs.  This added a checked conversion to TYPE_DEVICE at the end, which
duplicates the check done for the qdev ID case earlier, except it sets
a *different* error: GenericError "ID is not a hotpluggable device"
when passed a QOM path, and DeviceNotFound "Device 'ID' not found"
when passed a qdev ID.  Fortunately, the latter won't happen as long
as we add only devices to /machine/peripheral/.

Earlier, commit b6cc36abb2 "qdev: device_del: Search for to be
unplugged device in 'peripheral' container" rewrote the lookup by qdev
ID to use QOM instead of qdev_find_recursive(), so it can handle
buss-less devices.  It does so by constructing an absolute QOM path.
Works, but object_resolve_path_component() is easier.  Switching to it
also gets rid of the unclean duplication described above.

While there, avoid converting to TYPE_DEVICE twice, first to check
whether it's possible, and then for real.

Signed-off-by: Markus Armbruster 
---
  softmmu/qdev-monitor.c | 13 +
  1 file changed, 5 insertions(+), 8 deletions(-)


Queued, thanks.

Paolo


diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
index a304754ab9..d1ab3c25fb 100644
--- a/softmmu/qdev-monitor.c
+++ b/softmmu/qdev-monitor.c
@@ -831,16 +831,12 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, 
Error **errp)
  static DeviceState *find_device_state(const char *id, Error **errp)
  {
  Object *obj;
+DeviceState *dev;
  
  if (id[0] == '/') {

  obj = object_resolve_path(id, NULL);
  } else {
-char *root_path = object_get_canonical_path(qdev_get_peripheral());
-char *path = g_strdup_printf("%s/%s", root_path, id);
-
-g_free(root_path);
-obj = object_resolve_path_type(path, TYPE_DEVICE, NULL);
-g_free(path);
+obj = object_resolve_path_component(qdev_get_peripheral(), id);
  }
  
  if (!obj) {

@@ -849,12 +845,13 @@ static DeviceState *find_device_state(const char *id, 
Error **errp)
  return NULL;
  }
  
-if (!object_dynamic_cast(obj, TYPE_DEVICE)) {

+dev = (DeviceState *)object_dynamic_cast(obj, TYPE_DEVICE);
+if (!dev) {
  error_setg(errp, "%s is not a hotpluggable device", id);
  return NULL;
  }
  
-return DEVICE(obj);

+return dev;
  }
  
  void qdev_unplug(DeviceState *dev, Error **errp)







<    1   2