date:20191016

RE: [PATCH] ssi: xilinx_spips: Filter the non spi registers transactions

2019-10-16 Thread Sai Pavan Boddu

Hi Alistair,

> -Original Message-
> From: Alistair Francis 
> Sent: Thursday, October 17, 2019 4:39 AM
> To: Sai Pavan Boddu 
> Cc: Alistair Francis ; Peter Maydell
> ; qemu-devel@nongnu.org Developers  de...@nongnu.org>
> Subject: Re: [PATCH] ssi: xilinx_spips: Filter the non spi registers 
> transactions
> 
> On Sun, Oct 13, 2019 at 11:51 PM Sai Pavan Boddu
>  wrote:
> >
> > ZynqMP/Versal specific qspi registers should be handled inside
> > zynqmp_qspi_read/write calls. When few of these transactions are
> > handled by spi hooks we see state change in spi bus unexpectedly.
> >
> > Signed-off-by: Sai Pavan Boddu 
> > ---
> >  hw/ssi/xilinx_spips.c | 26 --
> >  1 file changed, 24 insertions(+), 2 deletions(-)
> >
> > diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c index
> > a309c71..4f9f8e0 100644
> > --- a/hw/ssi/xilinx_spips.c
> > +++ b/hw/ssi/xilinx_spips.c
> > @@ -109,6 +109,7 @@
> >  #define R_GPIO  (0x30 / 4)
> >  #define R_LPBK_DLY_ADJ  (0x38 / 4)
> >  #define R_LPBK_DLY_ADJ_RESET (0x33)
> > +#define R_IOU_TAPDLY_BYPASS (0x3C / 4)
> >  #define R_TXD1  (0x80 / 4)
> >  #define R_TXD2  (0x84 / 4)
> >  #define R_TXD3  (0x88 / 4)
> > @@ -139,6 +140,8 @@
> >  #define R_LQSPI_STS (0xA4 / 4)
> >  #define LQSPI_STS_WR_RECVD  (1 << 1)
> >
> > +#define R_DUMMY_CYCLE_EN(0xC8 / 4)
> > +#define R_ECO   (0xF8 / 4)
> >  #define R_MOD_ID(0xFC / 4)
> >
> >  #define R_GQSPI_SELECT  (0x144 / 4)
> > @@ -938,7 +941,16 @@ static uint64_t xlnx_zynqmp_qspips_read(void
> *opaque,
> >  int shortfall;
> >
> >  if (reg <= R_MOD_ID) {
> > -return xilinx_spips_read(opaque, addr, size);
> > +switch (addr) {
> > +case R_GPIO:
> > +case R_LPBK_DLY_ADJ:
> > +case R_IOU_TAPDLY_BYPASS:
> > +case R_DUMMY_CYCLE_EN:
> > +case R_ECO:
> > +return s->regs[addr / 4];
> > +default:
> > +return xilinx_spips_read(opaque, addr, size);
> 
> This doesn't seem right. This should have no functional change for the read
> function and has the consequence of not printing the memory accesses. If
> you try to debug this code now you won't see all of these operations in the
> log.
[Sai Pavan Boddu] Yeah reads do not have any issue. But I see your point of 
debug prints.
> 
> > +}
> >  } else {
> >  switch (reg) {
> >  case R_GQSPI_RXD:
> > @@ -1063,7 +1075,17 @@ static void xlnx_zynqmp_qspips_write(void
> *opaque, hwaddr addr,
> >  uint32_t reg = addr / 4;
> >
> >  if (reg <= R_MOD_ID) {
> > -xilinx_qspips_write(opaque, addr, value, size);
> > +switch (reg) {
> > +case R_GPIO:
> > +case R_LPBK_DLY_ADJ:
> > +case R_IOU_TAPDLY_BYPASS:
> > +case R_DUMMY_CYCLE_EN:
> > +case R_ECO:
> > +s->regs[addr] = value;
> > +break;
> > +default:
> > +xilinx_qspips_write(opaque, addr, value, size);
> > +}
> 
> For the write code it looks like this skips the "no_reg_update" goto.
> Maybe that is the issue that you are seeing?
[Sai Pavan Boddu] yes, no_reg_update triggers update of cs lines.
We can also put a check there to skip no_reg_update when it’s a zynqmp qspi.
I will try that and send a V2.

Thanks
Sai Pavan
> 
> Alistair
> 
> >  } else {
> >  switch (reg) {
> >  case R_GQSPI_CNFG:
> > --
> > 2.7.4
> >
> >

Re: [PATCH v10 13/15] docs/microvm.rst: document the new microvm machine type

2019-10-16 Thread Sergio Lopez


Marc-André Lureau  writes:

> Hi
>
> On Wed, Oct 16, 2019 at 12:19 PM Sergio Lopez  wrote:
>>
>> Document the new microvm machine type.
>>
>> Signed-off-by: Sergio Lopez 
>> Reviewed-by: Michael S. Tsirkin 
>> ---
>>  docs/microvm.rst | 98 
>>  1 file changed, 98 insertions(+)
>>  create mode 100644 docs/microvm.rst
>>
>> diff --git a/docs/microvm.rst b/docs/microvm.rst
>> new file mode 100644
>> index 00..0aab55576c
>> --- /dev/null
>> +++ b/docs/microvm.rst
>> @@ -0,0 +1,98 @@
>> +
>> +microvm Machine Type
>> +
>> +
>> +``microvm`` is a machine type inspired by ``Firecracker`` and
>> +constructed after its machine model.
>> +
>> +It's a minimalist machine type without ``PCI`` nor ``ACPI`` support,
>> +designed for short-lived guests. microvm also establishes a baseline
>> +for benchmarking and optimizing both QEMU and guest operating systems,
>> +since it is optimized for both boot time and footprint.
>> +
>> +
>> +Supported devices
>> +-
>> +
>> +The microvm machine type supports the following devices:
>> +
>> +- ISA bus
>> +- i8259 PIC (optional)
>> +- i8254 PIT (optional)
>> +- MC146818 RTC (optional)
>> +- One ISA serial port (optional)
>> +- LAPIC
>> +- IOAPIC (with kernel-irqchip=split by default)
>> +- kvmclock (if using KVM)
>> +- fw_cfg
>> +- Up to eight virtio-mmio devices (configured by the user)
>> +
>> +
>> +Using the microvm machine type
>> +--
>> +
>> +Machine-specific options
>> +
>> +
>> +It supports the following machine-specific options:
>> +
>> +- microvm.x-option-roms=bool (Set off to disable loading option ROMs)
>> +- microvm.pit=OnOffAuto (Enable i8254 PIT)
>> +- microvm.isa-serial=bool (Set off to disable the instantiation an ISA 
>> serial port)
>> +- microvm.pic=OnOffAuto (Enable i8259 PIC)
>> +- microvm.rtc=OnOffAuto (Enable MC146818 RTC)
>> +- microvm.auto-kernel-cmdline=bool (Set off to disable adding virtio-mmio 
>> devices to the kernel cmdline)
>> +
>> +
>> +Boot options
>> +
>> +
>> +By default, microvm uses ``qboot`` as its BIOS, to obtain better boot
>> +times, but it's also compatible with ``SeaBIOS``.
>> +
>> +As no current FW is able to boot from a block device using
>> +``virtio-mmio`` as its transport, a microvm-based VM needs to be run
>> +using a host-side kernel and, optionally, an initrd image.
>> +
>> +
>> +Running a microvm-based VM
>> +~~
>> +
>> +By default, microvm aims for maximum compatibility, enabling both
>> +legacy and non-legacy devices. In this example, a VM is created
>> +without passing any additional machine-specific option, using the
>> +legacy ``ISA serial`` device as console::
>> +
>> +  $ qemu-system-x86_64 -M microvm \
>> + -enable-kvm -cpu host -m 512m -smp 2 \
>> + -kernel vmlinux -append "earlyprintk=ttyS0 console=ttyS0 
>> root=/dev/vda" \
>> + -nodefaults -no-user-config -nographic \
>> + -serial stdio \
>> + -drive id=test,file=test.img,format=raw,if=none \
>> + -device virtio-blk-device,drive=test \
>> + -netdev tap,id=tap0,script=no,downscript=no \
>> + -device virtio-net-device,netdev=tap0
>> +
>> +While the example above works, you might be interested in reducing the
>> +footprint further by disabling some legacy devices. If you're using
>> +``KVM``, you can disable the ``RTC``, making the Guest rely on
>> +``kvmclock`` exclusively. Additionally, if your host's CPUs have the
>> +``TSC_DEADLINE`` feature, you can also disable both the i8259 PIC and
>> +the i8254 PIT (make sure you're also emulating a CPU with such feature
>> +in the guest).
>> +
>> +This is an example of a VM with all optional legacy features
>> +disabled::
>> +
>> +  $ qemu-system-x86_64 \
>> + -M microvm,x-option-roms=off,pit=off,pic=off,isa-serial=off,rtc=off \
>> + -enable-kvm -cpu host -m 512m -smp 2 \
>> + -kernel vmlinux -append "console=hvc0 root=/dev/vda" \
>> + -nodefaults -no-user-config -nographic \
>> + -chardev stdio,id=virtiocon0,server \
>
> server? doesn't make sense here :)

Heh, good catch!

Thanks,
Sergio.

>> + -device virtio-serial-device \
>> + -device virtconsole,chardev=virtiocon0 \
>> + -drive id=test,file=test.img,format=raw,if=none \
>> + -device virtio-blk-device,drive=test \
>> + -netdev tap,id=tap0,script=no,downscript=no \
>> + -device virtio-net-device,netdev=tap0
>> --
>> 2.21.0
>>
>>
>
> seem to work with appropriate kernel otherwise, so
> Reviewed-by: Marc-André Lureau 



signature.asc
Description: PGP signature

Re: [PATCH v10 14/15] hw/i386: Introduce the microvm machine type

2019-10-16 Thread Sergio Lopez


Marc-André Lureau  writes:

> Hi
>
> On Wed, Oct 16, 2019 at 12:24 PM Sergio Lopez  wrote:
>>
>> microvm is a machine type inspired by Firecracker and constructed
>> after its machine model.
>>
>> It's a minimalist machine type without PCI nor ACPI support, designed
>> for short-lived guests. microvm also establishes a baseline for
>> benchmarking and optimizing both QEMU and guest operating systems,
>> since it is optimized for both boot time and footprint.
>>
>> Signed-off-by: Sergio Lopez 
>> Reviewed-by: Michael S. Tsirkin 
>> ---
>>  default-configs/i386-softmmu.mak |   1 +
>>  include/hw/i386/microvm.h|  83 +
>>  hw/i386/microvm.c| 572 +++
>>  hw/i386/Kconfig  |  10 +
>>  hw/i386/Makefile.objs|   1 +
>>  5 files changed, 667 insertions(+)
>>  create mode 100644 include/hw/i386/microvm.h
>>  create mode 100644 hw/i386/microvm.c
>>
>> diff --git a/default-configs/i386-softmmu.mak 
>> b/default-configs/i386-softmmu.mak
>> index 4229900f57..4cc64dafa2 100644
>> --- a/default-configs/i386-softmmu.mak
>> +++ b/default-configs/i386-softmmu.mak
>> @@ -28,3 +28,4 @@
>>  CONFIG_ISAPC=y
>>  CONFIG_I440FX=y
>>  CONFIG_Q35=y
>> +CONFIG_MICROVM=y
>> diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h
>> new file mode 100644
>> index 00..54cf599f4e
>> --- /dev/null
>> +++ b/include/hw/i386/microvm.h
>> @@ -0,0 +1,83 @@
>> +/*
>> + * Copyright (c) 2018 Intel Corporation
>> + * Copyright (c) 2019 Red Hat, Inc.
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2 or later, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> + * more details.
>> + *
>> + * You should have received a copy of the GNU General Public License along 
>> with
>> + * this program.  If not, see .
>> + */
>> +
>> +#ifndef HW_I386_MICROVM_H
>> +#define HW_I386_MICROVM_H
>> +
>> +#include "qemu-common.h"
>> +#include "exec/hwaddr.h"
>> +#include "qemu/notify.h"
>> +
>> +#include "hw/boards.h"
>> +#include "hw/i386/x86.h"
>> +
>> +/* microvm memory layout */
>> +#define PVH_START_INFO0x6000
>> +#define MEMMAP_START  0x7000
>> +#define MODLIST_START 0x7800
>> +#define BOOT_STACK_POINTER0x8ff0
>> +#define PML4_START0x9000
>> +#define PDPTE_START   0xa000
>> +#define PDE_START 0xb000
>> +#define KERNEL_CMDLINE_START  0x2
>> +#define EBDA_START0x9fc00
>> +#define HIMEM_START   0x10
>
> Those define looks outdated, or am I missing something?

You're right, those are leftovers from pre-v4 patch series. I'll drop
them.

Thanks,
Sergio.

>> +
>> +/* Platform virtio definitions */
>> +#define VIRTIO_MMIO_BASE  0xc000
>> +#define VIRTIO_IRQ_BASE   5
>> +#define VIRTIO_NUM_TRANSPORTS 8
>> +#define VIRTIO_CMDLINE_MAXLEN 64
>> +
>> +/* Machine type options */
>> +#define MICROVM_MACHINE_PIT "pit"
>> +#define MICROVM_MACHINE_PIC "pic"
>> +#define MICROVM_MACHINE_RTC "rtc"
>> +#define MICROVM_MACHINE_ISA_SERIAL  "isa-serial"
>> +#define MICROVM_MACHINE_OPTION_ROMS "x-option-roms"
>> +#define MICROVM_MACHINE_AUTO_KERNEL_CMDLINE "auto-kernel-cmdline"
>> +
>> +typedef struct {
>> +X86MachineClass parent;
>> +HotplugHandler *(*orig_hotplug_handler)(MachineState *machine,
>> +   DeviceState *dev);
>> +} MicrovmMachineClass;
>> +
>> +typedef struct {
>> +X86MachineState parent;
>> +
>> +/* Machine type options */
>> +OnOffAuto pic;
>> +OnOffAuto pit;
>> +OnOffAuto rtc;
>> +bool isa_serial;
>> +bool option_roms;
>> +bool auto_kernel_cmdline;
>> +
>> +/* Machine state */
>> +bool kernel_cmdline_fixed;
>> +} MicrovmMachineState;
>> +
>> +#define TYPE_MICROVM_MACHINE   MACHINE_TYPE_NAME("microvm")
>> +#define MICROVM_MACHINE(obj) \
>> +OBJECT_CHECK(MicrovmMachineState, (obj), TYPE_MICROVM_MACHINE)
>> +#define MICROVM_MACHINE_GET_CLASS(obj) \
>> +OBJECT_GET_CLASS(MicrovmMachineClass, obj, TYPE_MICROVM_MACHINE)
>> +#define MICROVM_MACHINE_CLASS(class) \
>> +OBJECT_CLASS_CHECK(MicrovmMachineClass, class, TYPE_MICROVM_MACHINE)
>> +
>> +#endif
>> diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
>> new file mode 100644
>> index 00..20d2189ea8
>> --- /dev/null
>> +++ b/hw/i386/microvm.c
>> @@ -0,0 +1,572 @@
>> +/*
>> + * Copyright (c) 2018 Intel Corporation
>> + * Copyright (c) 2019 Red Hat, Inc.
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms and conditions o

Re: [PATCH v9 04/15] hw/i386/pc: replace use of strtol with qemu_strtol in x86_load_linux()

2019-10-16 Thread Sergio Lopez


Markus Armbruster  writes:

> Philippe Mathieu-Daudé  writes:
>
>> Hi Sergio,
>>
>> On 10/15/19 1:23 PM, Sergio Lopez wrote:
>>> Follow checkpatch.pl recommendation and replace the use of strtol with
>>> qemu_strtol in x86_load_linux().
>>
>> "with qemu_strtoui"
>>
>>>
>>> Signed-off-by: Sergio Lopez 
>>> ---
>>>   hw/i386/pc.c | 9 -
>>>   1 file changed, 8 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>>> index 77e86bfc3d..c8608b8007 100644
>>> --- a/hw/i386/pc.c
>>> +++ b/hw/i386/pc.c
>>> @@ -68,6 +68,7 @@
>>>   #include "qemu/config-file.h"
>>>   #include "qemu/error-report.h"
>>>   #include "qemu/option.h"
>>> +#include "qemu/cutils.h"
>>>   #include "hw/acpi/acpi.h"
>>>   #include "hw/acpi/cpu_hotplug.h"
>>>   #include "hw/boards.h"
>>> @@ -1202,6 +1203,7 @@ static void x86_load_linux(PCMachineState *pcms,
>>>   vmode = strstr(kernel_cmdline, "vga=");
>>>   if (vmode) {
>>>   unsigned int video_mode;
>>> +int ret;
>>>   /* skip "vga=" */
>>>   vmode += 4;
>>>   if (!strncmp(vmode, "normal", 6)) {
>>> @@ -1211,7 +1213,12 @@ static void x86_load_linux(PCMachineState *pcms,
>>>   } else if (!strncmp(vmode, "ask", 3)) {
>>>   video_mode = 0xfffd;
>>>   } else {
>>> -video_mode = strtol(vmode, NULL, 0);
>>> +ret = qemu_strtoui(vmode, NULL, 0, &video_mode);
>>> +if (ret != 0) {
>>> +fprintf(stderr, "qemu: can't parse 'vga' parameter: %s\n",
>>> +strerror(-ret));
>>
>> (Cc'ing Markus/Daniel just in case)
>>
>> I'm wondering if using fprintf() is appropriate, thinking about
>> instantiating a machine via libvirt, is this error reported to the
>> user?
>>
>> I first thought about using error_report() instead:
>>
>> error_report("qemu: can't parse 'vga' parameter: %s",
>>  strerror(-ret));
>
> Make that
>
>  error_report("can't parse 'vga' parameter: %s", strerror(-ret));
>
>> But this API is meaningful when used in console/monitor. We can't get
>> here from the monitor,
>
> True, but error_report() should be used anyway, because (1) it makes
> intent more obvious, and (2) it uses a uniform, featureful error format.
>
> With the proposed fprintf(), we get
>
> qemu: can't parse 'vga' parameter: Numerical result out of range
>
> With error_report():
>
> * we report the *actual* argv[0] instead of "qemu"
>
> * we obey -msg timestamp=on
>
> * if "[PATCHv2 1/2] util/qemu-error: add guest name helper with -msg
>   options" gets accepted, we obey -msg guest-name=on, too
>
> * we have a common way to point to the offending command line argument
>   or configuration file line (not worth doing here)
>
> Please use error_report().
>
> [...]

But should we use error_report even if other occurrences in the same
function are using fprintf? Or are you suggesting to change those too?
If so, is it really worth it doing it now or can we do that in a future
patch (seems completely unrelated to this patch series)?

Thanks,
Sergio.


signature.asc
Description: PGP signature

Re: [PATCH] aspeed: Add an AST2600 eval board

2019-10-16 Thread Joel Stanley

On Wed, 16 Oct 2019 at 09:08, Cédric Le Goater  wrote:
>
> Define the board with 1 GiB of RAM but some boards can have up to 2
> GiB.
>
> Signed-off-by: Cédric Le Goater 
> Reviewed-by: Joel Stanley 
> ---
>
>  Changes since AST2600 patchset:
>
>  - lowered the RAM size to 1 GiB as it was breaking the tests on some
>hosts.

Peter,

After chatting with Cédric I agree we should merge this patch.

As it turns out the EVBs have differing amounts of RAM; his has 1GB
while mine has 2GB. So we are not being inaccurate by setting 1GB as
the default here.

Cheers,

Joel

>
>  include/hw/arm/aspeed.h |  1 +
>  hw/arm/aspeed.c | 23 +++
>  2 files changed, 24 insertions(+)
>
> diff --git a/include/hw/arm/aspeed.h b/include/hw/arm/aspeed.h
> index 02073a6b4d61..f49bc7081e4d 100644
> --- a/include/hw/arm/aspeed.h
> +++ b/include/hw/arm/aspeed.h
> @@ -18,6 +18,7 @@ typedef struct AspeedBoardConfig {
>  const char *desc;
>  const char *soc_name;
>  uint32_t hw_strap1;
> +uint32_t hw_strap2;
>  const char *fmc_model;
>  const char *spi_model;
>  uint32_t num_cs;
> diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
> index 52993f84b461..028191ff36fc 100644
> --- a/hw/arm/aspeed.c
> +++ b/hw/arm/aspeed.c
> @@ -88,6 +88,10 @@ struct AspeedBoardState {
>  /* Witherspoon hardware value: 0xF10AD216 (but use romulus definition) */
>  #define WITHERSPOON_BMC_HW_STRAP1 ROMULUS_BMC_HW_STRAP1
>
> +/* AST2600 evb hardware value */
> +#define AST2600_EVB_HW_STRAP1 0x00C0
> +#define AST2600_EVB_HW_STRAP2 0x0003
> +
>  /*
>   * The max ram region is for firmwares that scan the address space
>   * with load/store to guess how much RAM the SoC has.
> @@ -187,6 +191,8 @@ static void aspeed_board_init(MachineState *machine,
>   &error_abort);
>  object_property_set_int(OBJECT(&bmc->soc), cfg->hw_strap1, "hw-strap1",
>  &error_abort);
> +object_property_set_int(OBJECT(&bmc->soc), cfg->hw_strap2, "hw-strap2",
> +&error_abort);
>  object_property_set_int(OBJECT(&bmc->soc), cfg->num_cs, "num-cs",
>  &error_abort);
>  object_property_set_int(OBJECT(&bmc->soc), machine->smp.cpus, "num-cpus",
> @@ -308,6 +314,12 @@ static void ast2500_evb_i2c_init(AspeedBoardState *bmc)
>  i2c_create_slave(aspeed_i2c_get_bus(DEVICE(&soc->i2c), 11), "ds1338", 
> 0x32);
>  }
>
> +static void ast2600_evb_i2c_init(AspeedBoardState *bmc)
> +{
> +/* Start with some devices on our I2C busses */
> +ast2500_evb_i2c_init(bmc);
> +}
> +
>  static void romulus_bmc_i2c_init(AspeedBoardState *bmc)
>  {
>  AspeedSoCState *soc = &bmc->soc;
> @@ -455,6 +467,17 @@ static const AspeedBoardConfig aspeed_boards[] = {
>  .num_cs= 2,
>  .i2c_init  = witherspoon_bmc_i2c_init,
>  .ram   = 512 * MiB,
> +}, {
> +.name  = MACHINE_TYPE_NAME("ast2600-evb"),
> +.desc  = "Aspeed AST2600 EVB (Cortex A7)",
> +.soc_name  = "ast2600-a0",
> +.hw_strap1 = AST2600_EVB_HW_STRAP1,
> +.hw_strap2 = AST2600_EVB_HW_STRAP2,
> +.fmc_model = "w25q512jv",
> +.spi_model = "mx66u51235f",
> +.num_cs= 1,
> +.i2c_init  = ast2600_evb_i2c_init,
> +.ram   = 1 * GiB,
>  },
>  };
>
> --
> 2.21.0
>

Re: [PATCH v19 3/5] ACPI: Add APEI GHES table generation support

2019-10-16 Thread Xiang Zheng




On 2019/10/15 22:52, Peter Maydell wrote:
> On Tue, 15 Oct 2019 at 15:02, Xiang Zheng  wrote:
>>
>> From: Dongjiu Geng 
>>
>> This patch implements APEI GHES Table generation via fw_cfg blobs. Now
>> it only supports ARMv8 SEA, a type of GHESv2 error source. Afterwards,
>> we can extend the supported types if needed. For the CPER section,
>> currently it is memory section because kernel mainly wants userspace to
>> handle the memory errors.
>>
>> This patch follows the spec ACPI 6.2 to build the Hardware Error Source
>> table. For more detailed information, please refer to document:
>> docs/specs/acpi_hest_ghes.rst
>>
>> Suggested-by: Laszlo Ersek 
>> Signed-off-by: Dongjiu Geng 
>> Signed-off-by: Xiang Zheng 
> 
>> +/* Error Status Address */
>> +build_append_gas(table_data, AML_SYSTEM_MEMORY, 0x40, 0,
>> + 4 /* QWord access */, 0);
> 
> Hi; this doesn't seem to compile with clang:
> 
> /home/petmay01/linaro/qemu-from-laptop/qemu/hw/acpi/acpi_ghes.c:330:34:
> error: implicit conversion from
>   enumeration type 'AmlRegionSpace' to different enumeration type
> 'AmlAddressSpace'
>   [-Werror,-Wenum-conversion]
> build_append_gas(table_data, AML_SYSTEM_MEMORY, 0x40, 0,
>  ^
> /home/petmay01/linaro/qemu-from-laptop/qemu/hw/acpi/acpi_ghes.c:351:34:
> error: implicit conversion from
>   enumeration type 'AmlRegionSpace' to different enumeration type
> 'AmlAddressSpace'
>   [-Werror,-Wenum-conversion]
> build_append_gas(table_data, AML_SYSTEM_MEMORY, 0x40, 0,
>  ^
> 2 errors generated.
> 
> Should these be AML_AS_SYSTEM_MEMORY, or should the build_append_gas()
> function be taking an AmlRegionSpace rather than an AmlAddressSpace ?

Yes, these should be AML_AS_SYSTEM_MEMORY, the first field of Generic Address
Structure(GAS) is Address Space ID. I will fix these compile errors.

> 
> thanks
> -- PMM
> 
> .
> 

-- 

Thanks,
Xiang

Re: [PATCH v19 5/5] target-arm: kvm64: handle SIGBUS signal from kernel or KVM

2019-10-16 Thread Xiang Zheng




On 2019/10/15 22:48, Peter Maydell wrote:
> On Tue, 15 Oct 2019 at 15:02, Xiang Zheng  wrote:
>>
>> From: Dongjiu Geng 
>>
>> Add a SIGBUS signal handler. In this handler, it checks the SIGBUS type,
>> translates the host VA delivered by host to guest PA, then fills this PA
>> to guest APEI GHES memory, then notifies guest according to the SIGBUS
>> type.
>>
>> When guest accesses the poisoned memory, it will generate a Synchronous
>> External Abort(SEA). Then host kernel gets an APEI notification and calls
>> memory_failure() to unmapped the affected page in stage 2, finally
>> returns to guest.
>>
>> Guest continues to access the PG_hwpoison page, it will trap to KVM as
>> stage2 fault, then a SIGBUS_MCEERR_AR synchronous signal is delivered to
>> Qemu, Qemu records this error address into guest APEI GHES memory and
>> notifes guest using Synchronous-External-Abort(SEA).
>>
>> In order to inject a vSEA, we introduce the kvm_inject_arm_sea() function
>> in which we can setup the type of exception and the syndrome information.
>> When switching to guest, the target vcpu will jump to the synchronous
>> external abort vector table entry.
>>
>> The ESR_ELx.DFSC is set to synchronous external abort(0x10), and the
>> ESR_ELx.FnV is set to not valid(0x1), which will tell guest that FAR is
>> not valid and hold an UNKNOWN value. These values will be set to KVM
>> register structures through KVM_SET_ONE_REG IOCTL.
>>
>> Signed-off-by: Dongjiu Geng 
>> Signed-off-by: Xiang Zheng 
> 
>> +static int acpi_ghes_record_mem_error(uint64_t error_block_address,
>> +  uint64_t error_physical_addr,
>> +  uint32_t data_length)
>> +{
>> +GArray *block;
>> +uint64_t current_block_length;
>> +/* Memory Error Section Type */
>> +QemuUUID mem_section_id_le = UEFI_CPER_SEC_PLATFORM_MEM;
>> +QemuUUID fru_id = {0};
> 
> Hi; this makes at least some versions of clang complain
> (this is a clang bug, but it's present in shipped versions):
> 
> /home/petmay01/linaro/qemu-from-laptop/qemu/hw/acpi/acpi_ghes.c:135:24:
> error: suggest braces around
>   initialization of subobject [-Werror,-Wmissing-braces]
> QemuUUID fru_id = {0};
>^
>{}
> 
> We generally use "{}" as the generic zero-initializer for
> this reason (it's gcc/clang specific whereas "{0}" is
> in the standard, but all of the compilers we care about
> support it and don't warn about its use).
> 
>> +uint8_t fru_text[20] = {0};
> 
> Clang doesn't mind this one because it's not initializing
> a struct type, but you could use "{}" here too for consistency.
> 

OK, I will replace all the "{0}" with "{}".

> thanks
> -- PMM
> 
> .
> 

-- 

Thanks,
Xiang

[RFC 5/5] spapr: Work around spurious warnings from vfio INTx initialization

2019-10-16 Thread David Gibson

Traditional PCI INTx for vfio devices can only perform well if using
an in-kernel irqchip.  Therefore, vfio_intx_update() issues a warning
if an in kernel irqchip is not available.

We usually do have an in-kernel irqchip available for pseries machines
on POWER hosts.  However, because the platform allows feature
negotiation of what interrupt controller model to use, we don't
currently initialize it until machine reset.  vfio_intx_update() is
called (first) from vfio_realize() before that, so it can issue a
spurious warning, even if we will have an in kernel irqchip by the
time we need it.

To workaround this, make a call to spapr_irq_update_active_intc() from
spapr_irq_init() which is called at machine realize time, before the
vfio realize.  This call will be pretty much obsoleted by the later
call at reset time, but it serves to suppress the spurious warning
from VFIO.

Cc: Alex Williamson 
Cc: Alexey Kardashevskiy 

Signed-off-by: David Gibson 
---
 hw/ppc/spapr_irq.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
index 45544b8976..bb91c61fa0 100644
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -345,6 +345,14 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
 
 spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr,
   smc->nr_xirqs + SPAPR_XIRQ_BASE);
+
+/*
+ * Mostly we don't actually need this until reset, except that not
+ * having this set up can cause VFIO devices to issue a
+ * false-positive warning during realize(), because they don't yet
+ * have an in-kernel irq chip.
+ */
+spapr_irq_update_active_intc(spapr);
 }
 
 int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp)
@@ -500,7 +508,8 @@ void spapr_irq_update_active_intc(SpaprMachineState *spapr)
  * this.
  */
 new_intc = SPAPR_INTC(spapr->xive);
-} else if (spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+} else if (spapr->ov5_cas
+   && spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
 new_intc = SPAPR_INTC(spapr->xive);
 } else {
 new_intc = SPAPR_INTC(spapr->ics);
-- 
2.21.0

[RFC 4/5] spapr: Handle irq backend changes with VFIO PCI devices

2019-10-16 Thread David Gibson

pseries machine type can have one of two different interrupt controllers in
use depending on feature negotiation with the guest.  Usually this is
invisible to devices, because they route to a common set of qemu_irqs which
in turn dispatch to the correct back end.

VFIO passthrough devices, however, wire themselves up directly to the KVM
irqchip for performance, which means they are affected by this change in
interrupt controller.  To get them to adjust correctly for the change in
irqchip, we need to fire the kvm irqchip change notifier.

Cc: Alex Williamson 
Cc: Alexey Kardashevskiy 

Signed-off-by: David Gibson 
---
 hw/ppc/spapr_irq.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
index 234d1073e5..45544b8976 100644
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -480,6 +480,12 @@ static void set_active_intc(SpaprMachineState *spapr,
 }
 
 spapr->active_intc = new_intc;
+
+/*
+ * We've changed the kernel irqchip, let VFIO devices know they
+ * need to readjust.
+ */
+kvm_irqchip_change_notify();
 }
 
 void spapr_irq_update_active_intc(SpaprMachineState *spapr)
-- 
2.21.0

[RFC 1/5] kvm: Introduce KVM irqchip change notifier

2019-10-16 Thread David Gibson

Awareness of an in kernel irqchip is usually local to the machine and its
top-level interrupt controller.  However, in a few cases other things need
to know about it.  In particular vfio devices need this in order to
accelerate interrupt delivery.

If interrupt routing is changed, such devices may need to readjust their
connection to the KVM irqchip.  pci_bus_fire_intx_routing_notifier() exists
to do just this.

However, for the pseries machine type we have a situation where the routing
remains constant but the top-level irq chip itself is changed.  This occurs
because of PAPR feature negotiation which allows the guest to decide
between the older XICS and newer XIVE irq chip models (both of which are
paravirtualized).

To allow devices like vfio to adjust to this change, introduce a new
notifier for the purpose kvm_irqchip_change_notify().

Cc: Alex Williamson 
Cc: Alexey Kardashevskiy 

Signed-off-by: David Gibson 
---
 accel/kvm/kvm-all.c| 18 ++
 accel/stubs/kvm-stub.c | 12 
 include/sysemu/kvm.h   |  5 +
 3 files changed, 35 insertions(+)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index d2d96d73e8..44df1908dd 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -149,6 +149,9 @@ static const KVMCapabilityInfo kvm_required_capabilites[] = 
{
 KVM_CAP_LAST_INFO
 };
 
+static NotifierList kvm_irqchip_change_notifiers =
+NOTIFIER_LIST_INITIALIZER(kvm_irqchip_change_notifiers);
+
 #define kvm_slots_lock(kml)  qemu_mutex_lock(&(kml)->slots_lock)
 #define kvm_slots_unlock(kml)qemu_mutex_unlock(&(kml)->slots_lock)
 
@@ -1396,6 +1399,21 @@ void kvm_irqchip_release_virq(KVMState *s, int virq)
 trace_kvm_irqchip_release_virq(virq);
 }
 
+void kvm_irqchip_add_change_notifier(Notifier *n)
+{
+notifier_list_add(&kvm_irqchip_change_notifiers, n);
+}
+
+void kvm_irqchip_remove_change_notifier(Notifier *n)
+{
+notifier_remove(n);
+}
+
+void kvm_irqchip_change_notify(void)
+{
+notifier_list_notify(&kvm_irqchip_change_notifiers, NULL);
+}
+
 static unsigned int kvm_hash_msi(uint32_t data)
 {
 /* This is optimized for IA32 MSI layout. However, no other arch shall
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
index 6feb66ed80..82f118d2df 100644
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -138,6 +138,18 @@ void kvm_irqchip_commit_routes(KVMState *s)
 {
 }
 
+void kvm_irqchip_add_change_notifier(Notifier *n)
+{
+}
+
+void kvm_irqchip_remove_change_notifier(Notifier *n)
+{
+}
+
+void kvm_irqchip_change_notify(void)
+{
+}
+
 int kvm_irqchip_add_adapter_route(KVMState *s, AdapterInfo *adapter)
 {
 return -ENOSYS;
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 9d143282bc..9fe233b9bf 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -201,6 +201,7 @@ typedef struct KVMCapabilityInfo {
 struct KVMState;
 typedef struct KVMState KVMState;
 extern KVMState *kvm_state;
+typedef struct Notifier Notifier;
 
 /* external API */
 
@@ -401,6 +402,10 @@ int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg);
 
 void kvm_irqchip_add_irq_route(KVMState *s, int gsi, int irqchip, int pin);
 
+void kvm_irqchip_add_change_notifier(Notifier *n);
+void kvm_irqchip_remove_change_notifier(Notifier *n);
+void kvm_irqchip_change_notify(void);
+
 void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic);
 
 struct kvm_guest_debug;
-- 
2.21.0

[RFC 2/5] vfio/pci: Split vfio_intx_update()

2019-10-16 Thread David Gibson

This splits the vfio_intx_update() function into one part doing the actual
reconnection with the KVM irqchip (vfio_intx_update(), now taking an
argument with the new routing) and vfio_intx_routing_notifier() which
handles calls to the pci device intx routing notifier and calling
vfio_intx_update() when necessary.  This will make adding support for the
irqchip change notifier easier.

Cc: Alex Williamson 
Cc: Alexey Kardashevskiy 

Signed-off-by: David Gibson 
---
 hw/vfio/pci.c | 39 ++-
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 12fac39804..529ad13908 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -215,30 +215,18 @@ static void vfio_intx_disable_kvm(VFIOPCIDevice *vdev)
 #endif
 }
 
-static void vfio_intx_update(PCIDevice *pdev)
+static void vfio_intx_update(VFIOPCIDevice *vdev, PCIINTxRoute *route)
 {
-VFIOPCIDevice *vdev = PCI_VFIO(pdev);
-PCIINTxRoute route;
 Error *err = NULL;
 
-if (vdev->interrupt != VFIO_INT_INTx) {
-return;
-}
-
-route = pci_device_route_intx_to_irq(&vdev->pdev, vdev->intx.pin);
-
-if (!pci_intx_route_changed(&vdev->intx.route, &route)) {
-return; /* Nothing changed */
-}
-
 trace_vfio_intx_update(vdev->vbasedev.name,
-   vdev->intx.route.irq, route.irq);
+   vdev->intx.route.irq, route->irq);
 
 vfio_intx_disable_kvm(vdev);
 
-vdev->intx.route = route;
+vdev->intx.route = *route;
 
-if (route.mode != PCI_INTX_ENABLED) {
+if (route->mode != PCI_INTX_ENABLED) {
 return;
 }
 
@@ -251,6 +239,22 @@ static void vfio_intx_update(PCIDevice *pdev)
 vfio_intx_eoi(&vdev->vbasedev);
 }
 
+static void vfio_intx_routing_notifier(PCIDevice *pdev)
+{
+VFIOPCIDevice *vdev = PCI_VFIO(pdev);
+PCIINTxRoute route;
+
+if (vdev->interrupt != VFIO_INT_INTx) {
+return;
+}
+
+route = pci_device_route_intx_to_irq(&vdev->pdev, vdev->intx.pin);
+
+if (pci_intx_route_changed(&vdev->intx.route, &route)) {
+vfio_intx_update(vdev, &route);
+}
+}
+
 static int vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
 {
 uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
@@ -2954,7 +2958,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
 vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
   vfio_intx_mmap_enable, vdev);
-pci_device_set_intx_routing_notifier(&vdev->pdev, vfio_intx_update);
+pci_device_set_intx_routing_notifier(&vdev->pdev,
+ vfio_intx_routing_notifier);
 ret = vfio_intx_enable(vdev, errp);
 if (ret) {
 goto out_teardown;
-- 
2.21.0

[RFC 3/5] vfio/pci: Respond to KVM irqchip change notifier

2019-10-16 Thread David Gibson

VFIO PCI devices already respond to the pci intx routing notifier, in order
to update kernel irqchip mappings when routing is updated.  However this
won't handle the case where the irqchip itself is replaced by a different
model while retaining the same routing.  This case can happen on
the pseries machine type due to PAPR feature negotiation.

To handle that case, add a handler for the irqchip change notifier, which
does much the same thing as the routing notifier, but is unconditional,
rather than being a no-op when the routing hasn't changed.

Cc: Alex Williamson 
Cc: Alexey Kardashevskiy 

Signed-off-by: David Gibson 
---
 hw/vfio/pci.c | 12 
 hw/vfio/pci.h |  2 ++
 2 files changed, 14 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 529ad13908..6aa806baff 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -255,6 +255,14 @@ static void vfio_intx_routing_notifier(PCIDevice *pdev)
 }
 }
 
+static void vfio_irqchip_change(Notifier *notify, void *data)
+{
+VFIOPCIDevice *vdev = container_of(notify, VFIOPCIDevice,
+   irqchip_change_notifier);
+
+vfio_intx_update(vdev, &vdev->intx.route);
+}
+
 static int vfio_intx_enable(VFIOPCIDevice *vdev, Error **errp)
 {
 uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
@@ -2960,6 +2968,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
   vfio_intx_mmap_enable, vdev);
 pci_device_set_intx_routing_notifier(&vdev->pdev,
  vfio_intx_routing_notifier);
+vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
+kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
 ret = vfio_intx_enable(vdev, errp);
 if (ret) {
 goto out_teardown;
@@ -3009,6 +3019,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 
 out_teardown:
 pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
 vfio_teardown_msi(vdev);
 vfio_bars_exit(vdev);
 error:
@@ -3042,6 +3053,7 @@ static void vfio_exitfn(PCIDevice *pdev)
 vfio_unregister_req_notifier(vdev);
 vfio_unregister_err_notifier(vdev);
 pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
 vfio_disable_interrupts(vdev);
 if (vdev->intx.mmap_timer) {
 timer_free(vdev->intx.mmap_timer);
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 834a90d646..11324f28ce 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -168,6 +168,8 @@ typedef struct VFIOPCIDevice {
 bool no_vfio_ioeventfd;
 bool enable_ramfb;
 VFIODisplay *dpy;
+
+Notifier irqchip_change_notifier;
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
-- 
2.21.0

[RFC 0/5] Handle PAPR irq chip changes for VFIO devices

2019-10-16 Thread David Gibson

The pseries machine type has the odd property that it's root irq chip
can be completely changed at runtime.  This comes about because PAPR
feature negotiation lets the guest choose between the old XICS style
or new XIVE style PIC.  It's possible, because both PICs are
paravirtualized via hypercalls.

VFIO needs to wire up device interrupts directly to the kernel irqchip
to accelerate delivery, and that's broken by the irq chip change.
This series introduces a new notifier to get this correctly updated
when PAPR switchs irq chip.

Caveats:
 * I'm not sure I've sufficiently pinned down the semantics of when
   exactly the new notifier should be called yet
   
 * It would kind of be niced to automatically fire the notifier from
   somewhere in the irq chip update routines, rather than at the PAPR
   level.  I haven't seen a good way to do that (at least not without
   double firing it on every transition).

 * Patch 5/5 to work around spurious warnings is working a bit *too*
   well.  On a Boston machine which allows in-kernel XICS, but not
   in-kernel XIVE, I (correctly) no longer get the spurious warning at
   initial start up (in XICS mode).  However we incorrectly *don't*
   get the "failed to setup resample irqfd" warning after we negotiate
   features and switch to XIVE mode.  I haven't had a chance to
   investigate, but I suspect a kernel bug where it's responding to
   KVM_IRQFD based on stale information about the kernel irqchip.

David Gibson (5):
  kvm: Introduce KVM irqchip change notifier
  vfio/pci: Split vfio_intx_update()
  vfio/pci: Respond to KVM irqchip change notifier
  spapr: Handle irq backend changes with VFIO PCI devices
  spapr: Work around spurious warnings from vfio INTx initialization

 accel/kvm/kvm-all.c| 18 +++
 accel/stubs/kvm-stub.c | 12 ++
 hw/ppc/spapr_irq.c | 17 +-
 hw/vfio/pci.c  | 51 --
 hw/vfio/pci.h  |  2 ++
 include/sysemu/kvm.h   |  5 +
 6 files changed, 87 insertions(+), 18 deletions(-)

-- 
2.21.0

Re: [PATCH 0/4] target/arm vector improvements

2019-10-16 Thread no-reply

Patchew URL: 
https://patchew.org/QEMU/20191017044232.27601-1-richard.hender...@linaro.org/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PATCH 0/4] target/arm vector improvements
Type: series
Message-id: 20191017044232.27601-1-richard.hender...@linaro.org

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
3ea6653 target/arm: Convert PMULL.8 to gvec
b6cf8ea target/arm: Convert PMULL.64 to gvec
5eddaf2 target/arm: Convert PMUL.8 to gvec
bd8f967 target/arm: Vectorize USHL and SSHL

=== OUTPUT BEGIN ===
1/4 Checking commit bd8f967551b6 (target/arm: Vectorize USHL and SSHL)
ERROR: trailing statements should be on next line
#160: FILE: target/arm/translate.c:3583:
+case 2: gen_ushl_i32(var, var, shift); break;

ERROR: trailing statements should be on next line
#167: FILE: target/arm/translate.c:3589:
+case 2: gen_sshl_i32(var, var, shift); break;

total: 2 errors, 0 warnings, 571 lines checked

Patch 1/4 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

2/4 Checking commit 5eddaf2661e3 (target/arm: Convert PMUL.8 to gvec)
3/4 Checking commit b6cf8ea095db (target/arm: Convert PMULL.64 to gvec)
4/4 Checking commit 3ea665336e6c (target/arm: Convert PMULL.8 to gvec)
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20191017044232.27601-1-richard.hender...@linaro.org/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [Patch v2] checkpatch: sugguest to use qemu_real_host_page_size instead of getpagesize() or sysconf(_SC_PAGESIZE)

2019-10-16 Thread Richard Henderson

On 10/16/19 5:46 PM, Wei Yang wrote:
> Signed-off-by: Wei Yang 
> CC: Richard Henderson 
> CC: Stefan Hajnoczi 
> 
> ---
> v2: add "\b" for better match, suggested by Richard Henderson 
> 
> ---
>  scripts/checkpatch.pl | 6 ++
>  1 file changed, 6 insertions(+)

Reviewed-by: Richard Henderson 


r~

[PATCH 4/4] target/arm: Convert PMULL.8 to gvec

2019-10-16 Thread Richard Henderson

We still need two different helpers, since NEON and SVE2 get the
inputs from different locations within the source vector.  However,
we can convert both to the same internal form for computation.

The sve2 helper is not used yet, but adding it with this patch
helps illustrate why the neon changes are helpful.

Signed-off-by: Richard Henderson 
---
 target/arm/helper-sve.h|  2 ++
 target/arm/helper.h|  3 +-
 target/arm/neon_helper.c   | 32 
 target/arm/translate-a64.c | 27 +++--
 target/arm/translate.c | 26 -
 target/arm/vec_helper.c| 60 ++
 6 files changed, 95 insertions(+), 55 deletions(-)

diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 9e79182ab4..2f47279155 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -1574,3 +1574,5 @@ DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
 DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG,
void, env, ptr, ptr, ptr, tl, i32)
+
+DEF_HELPER_FLAGS_4(sve2_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index d954399b7e..8a8517cf34 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -335,7 +335,6 @@ DEF_HELPER_2(neon_sub_u8, i32, i32, i32)
 DEF_HELPER_2(neon_sub_u16, i32, i32, i32)
 DEF_HELPER_2(neon_mul_u8, i32, i32, i32)
 DEF_HELPER_2(neon_mul_u16, i32, i32, i32)
-DEF_HELPER_2(neon_mull_p8, i64, i32, i32)
 
 DEF_HELPER_2(neon_tst_u8, i32, i32, i32)
 DEF_HELPER_2(neon_tst_u16, i32, i32, i32)
@@ -688,6 +687,8 @@ DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, 
ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(neon_pmull_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #include "helper-sve.h"
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
index 6a107da0e1..c7a8438b42 100644
--- a/target/arm/neon_helper.c
+++ b/target/arm/neon_helper.c
@@ -1129,38 +1129,6 @@ NEON_VOP(mul_u8, neon_u8, 4)
 NEON_VOP(mul_u16, neon_u16, 2)
 #undef NEON_FN
 
-/* Polynomial multiplication is like integer multiplication except the
-   partial products are XORed, not added.  */
-uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2)
-{
-uint64_t result = 0;
-uint64_t mask;
-uint64_t op2ex = op2;
-op2ex = (op2ex & 0xff) |
-((op2ex & 0xff00) << 8) |
-((op2ex & 0xff) << 16) |
-((op2ex & 0xff00) << 24);
-while (op1) {
-mask = 0;
-if (op1 & 1) {
-mask |= 0x;
-}
-if (op1 & (1 << 8)) {
-mask |= (0xU << 16);
-}
-if (op1 & (1 << 16)) {
-mask |= (0xULL << 32);
-}
-if (op1 & (1 << 24)) {
-mask |= (0xULL << 48);
-}
-result ^= op2ex & mask;
-op1 = (op1 >> 1) & 0x7f7f7f7f;
-op2ex <<= 1;
-}
-return result;
-}
-
 #define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0
 NEON_VOP(tst_u8, neon_u8, 4)
 NEON_VOP(tst_u16, neon_u16, 2)
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 12588d18df..2934e4fc16 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -10483,10 +10483,6 @@ static void handle_3rd_widening(DisasContext *s, int 
is_q, int is_u, int size,
 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
   tcg_passres, tcg_passres);
 break;
-case 14: /* PMULL */
-assert(size == 0);
-gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
-break;
 default:
 g_assert_not_reached();
 }
@@ -10650,11 +10646,21 @@ static void disas_simd_three_reg_diff(DisasContext 
*s, uint32_t insn)
 handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
 break;
 case 14: /* PMULL, PMULL2 */
-if (is_u || size == 1 || size == 2) {
+if (is_u) {
 unallocated_encoding(s);
 return;
 }
-if (size == 3) {
+switch (size) {
+case 0: /* PMULL.P8 */
+if (!fp_access_check(s)) {
+return;
+}
+/* The Q field specifies lo/hi half input for this insn.  */
+gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
+ gen_helper_neon_pmull_h);
+break;
+
+case 3: /* PMULL.P64 */
 if (!dc_isar_feature(aa64_pmull, s)) {
 unallocated_encoding(s);
 return;
@@ -10665,9 +10671,13 @@ static void disas_simd_three_reg_diff(DisasContext *s, 
uint32_t insn)
 /* The Q

[PATCH 1/4] target/arm: Vectorize USHL and SSHL

2019-10-16 Thread Richard Henderson

These instructions shift left or right depending on the sign
of the input, and 7 bits are significant to the shift.  This
requires several masks and selects in addition to the actual
shifts to form the complete answer.

That said, the operation is still a small improvement even for
two 64-bit elements -- 13 vector operations instead of 2 * 7
integer operations.

Signed-off-by: Richard Henderson 
---
v2: Fix operand ordering for aa32 VSHL.
---
 target/arm/helper.h|  11 +-
 target/arm/translate.h |   6 +
 target/arm/neon_helper.c   |  33 
 target/arm/translate-a64.c |  18 +--
 target/arm/translate.c | 301 +++--
 target/arm/vec_helper.c|  88 +++
 6 files changed, 391 insertions(+), 66 deletions(-)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index 1fb2cb5a77..fc0d594a14 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -296,14 +296,8 @@ DEF_HELPER_2(neon_abd_s16, i32, i32, i32)
 DEF_HELPER_2(neon_abd_u32, i32, i32, i32)
 DEF_HELPER_2(neon_abd_s32, i32, i32, i32)
 
-DEF_HELPER_2(neon_shl_u8, i32, i32, i32)
-DEF_HELPER_2(neon_shl_s8, i32, i32, i32)
 DEF_HELPER_2(neon_shl_u16, i32, i32, i32)
 DEF_HELPER_2(neon_shl_s16, i32, i32, i32)
-DEF_HELPER_2(neon_shl_u32, i32, i32, i32)
-DEF_HELPER_2(neon_shl_s32, i32, i32, i32)
-DEF_HELPER_2(neon_shl_u64, i64, i64, i64)
-DEF_HELPER_2(neon_shl_s64, i64, i64, i64)
 DEF_HELPER_2(neon_rshl_u8, i32, i32, i32)
 DEF_HELPER_2(neon_rshl_s8, i32, i32, i32)
 DEF_HELPER_2(neon_rshl_u16, i32, i32, i32)
@@ -690,6 +684,11 @@ DEF_HELPER_FLAGS_2(frint64_s, TCG_CALL_NO_RWG, f32, f32, 
ptr)
 DEF_HELPER_FLAGS_2(frint32_d, TCG_CALL_NO_RWG, f64, f64, ptr)
 DEF_HELPER_FLAGS_2(frint64_d, TCG_CALL_NO_RWG, f64, f64, ptr)
 
+DEF_HELPER_FLAGS_4(gvec_sshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #include "helper-sve.h"
diff --git a/target/arm/translate.h b/target/arm/translate.h
index dd24f91f26..0c4e6e4bbd 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -274,6 +274,8 @@ uint64_t vfp_expand_imm(int size, uint8_t imm8);
 extern const GVecGen3 mla_op[4];
 extern const GVecGen3 mls_op[4];
 extern const GVecGen3 cmtst_op[4];
+extern const GVecGen3 sshl_op[4];
+extern const GVecGen3 ushl_op[4];
 extern const GVecGen2i ssra_op[4];
 extern const GVecGen2i usra_op[4];
 extern const GVecGen2i sri_op[4];
@@ -283,6 +285,10 @@ extern const GVecGen4 sqadd_op[4];
 extern const GVecGen4 uqsub_op[4];
 extern const GVecGen4 sqsub_op[4];
 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void gen_ushl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b);
+void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
+void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
 
 /*
  * Forward to the isar_feature_* tests given a DisasContext pointer.
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
index 4259056723..c581ffb7d3 100644
--- a/target/arm/neon_helper.c
+++ b/target/arm/neon_helper.c
@@ -615,24 +615,9 @@ NEON_VOP(abd_u32, neon_u32, 1)
 } else { \
 dest = src1 << tmp; \
 }} while (0)
-NEON_VOP(shl_u8, neon_u8, 4)
 NEON_VOP(shl_u16, neon_u16, 2)
-NEON_VOP(shl_u32, neon_u32, 1)
 #undef NEON_FN
 
-uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop)
-{
-int8_t shift = (int8_t)shiftop;
-if (shift >= 64 || shift <= -64) {
-val = 0;
-} else if (shift < 0) {
-val >>= -shift;
-} else {
-val <<= shift;
-}
-return val;
-}
-
 #define NEON_FN(dest, src1, src2) do { \
 int8_t tmp; \
 tmp = (int8_t)src2; \
@@ -645,27 +630,9 @@ uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t 
shiftop)
 } else { \
 dest = src1 << tmp; \
 }} while (0)
-NEON_VOP(shl_s8, neon_s8, 4)
 NEON_VOP(shl_s16, neon_s16, 2)
-NEON_VOP(shl_s32, neon_s32, 1)
 #undef NEON_FN
 
-uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop)
-{
-int8_t shift = (int8_t)shiftop;
-int64_t val = valop;
-if (shift >= 64) {
-val = 0;
-} else if (shift <= -64) {
-val >>= 63;
-} else if (shift < 0) {
-val >>= -shift;
-} else {
-val <<= shift;
-}
-return val;
-}
-
 #define NEON_FN(dest, src1, src2) do { \
 int8_t tmp; \
 tmp = (int8_t)src2; \
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 2d6cd09634..255a168df6 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -8685,9 +8685,9 @@ static void handle_3same_64(DisasContext *s, int opcode, 
bool u,
 break;
 case 0x8: /* SSHL, USHL */
 if (u) {
-gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
+gen_ushl_i64(tcg_rd, tcg_

[PATCH 3/4] target/arm: Convert PMULL.64 to gvec

2019-10-16 Thread Richard Henderson

The gvec form will be needed for implementing SVE2.

Signed-off-by: Richard Henderson 
---
 target/arm/helper.h|  4 +---
 target/arm/neon_helper.c   | 30 --
 target/arm/translate-a64.c | 28 +++-
 target/arm/translate.c | 16 ++--
 target/arm/vec_helper.c| 33 +
 5 files changed, 39 insertions(+), 72 deletions(-)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index 800446e537..d954399b7e 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -555,9 +555,6 @@ DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, 
i32, i32)
 DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 DEF_HELPER_2(dc_zva, void, env, i64)
 
-DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
-
 DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(gvec_qrdmlsh_s16, TCG_CALL_NO_RWG,
@@ -689,6 +686,7 @@ DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, 
ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_pmull_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
index 9e7a9a1ac5..6a107da0e1 100644
--- a/target/arm/neon_helper.c
+++ b/target/arm/neon_helper.c
@@ -2152,33 +2152,3 @@ void HELPER(neon_zip16)(void *vd, void *vm)
 rm[0] = m0;
 rd[0] = d0;
 }
-
-/* Helper function for 64 bit polynomial multiply case:
- * perform PolynomialMult(op1, op2) and return either the top or
- * bottom half of the 128 bit result.
- */
-uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
-{
-int bitnum;
-uint64_t res = 0;
-
-for (bitnum = 0; bitnum < 64; bitnum++) {
-if (op1 & (1ULL << bitnum)) {
-res ^= op2 << bitnum;
-}
-}
-return res;
-}
-uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
-{
-int bitnum;
-uint64_t res = 0;
-
-/* bit 0 of op1 can't influence the high 64 bits at all */
-for (bitnum = 1; bitnum < 64; bitnum++) {
-if (op1 & (1ULL << bitnum)) {
-res ^= op2 >> (64 - bitnum);
-}
-}
-return res;
-}
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 04e25cfe06..12588d18df 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -10598,30 +10598,6 @@ static void handle_3rd_narrowing(DisasContext *s, int 
is_q, int is_u, int size,
 clear_vec_high(s, is_q, rd);
 }
 
-static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
-{
-/* PMULL of 64 x 64 -> 128 is an odd special case because it
- * is the only three-reg-diff instruction which produces a
- * 128-bit wide result from a single operation. However since
- * it's possible to calculate the two halves more or less
- * separately we just use two helper calls.
- */
-TCGv_i64 tcg_op1 = tcg_temp_new_i64();
-TCGv_i64 tcg_op2 = tcg_temp_new_i64();
-TCGv_i64 tcg_res = tcg_temp_new_i64();
-
-read_vec_element(s, tcg_op1, rn, is_q, MO_64);
-read_vec_element(s, tcg_op2, rm, is_q, MO_64);
-gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
-write_vec_element(s, tcg_res, rd, 0, MO_64);
-gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
-write_vec_element(s, tcg_res, rd, 1, MO_64);
-
-tcg_temp_free_i64(tcg_op1);
-tcg_temp_free_i64(tcg_op2);
-tcg_temp_free_i64(tcg_res);
-}
-
 /* AdvSIMD three different
  *   31  30  29 28   24 23  22  21 20  16 1512 11 10 95 40
  * +---+---+---+---+--+---+--++-+--+--+
@@ -10686,7 +10662,9 @@ static void disas_simd_three_reg_diff(DisasContext *s, 
uint32_t insn)
 if (!fp_access_check(s)) {
 return;
 }
-handle_pmull_64(s, is_q, rd, rn, rm);
+/* The Q field specifies lo/hi half input for this insn.  */
+gen_gvec_op3_ool(s, true, rd, rn, rm, is_q,
+ gen_helper_gvec_pmull_q);
 return;
 }
 goto is_widening;
diff --git a/target/arm/translate.c b/target/arm/translate.c
index b66a2f6b71..4e34249672 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -5877,23 +5877,11 @@ static int disas_neon_data_insn(DisasContext *s, 
uint32_t insn)
  * outside the loop below as it only performs a single pass.
  */
 if (op == 14 && size == 2) {
-TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
-
 if (!dc_isar_feature(aa32_pmull, s)) {
 return 1;
 }
-

[PATCH 2/4] target/arm: Convert PMUL.8 to gvec

2019-10-16 Thread Richard Henderson

The gvec form will be needed for implementing SVE2.

Extend the implementation to operate on uint64_t instead of uint32_t.
Use a counted inner loop instead of terminating when op1 goes to zero,
looking toward the required implementation for ARMv8.4-DIT.

Signed-off-by: Richard Henderson 
---
 target/arm/helper.h|  3 ++-
 target/arm/neon_helper.c   | 22 --
 target/arm/translate-a64.c | 10 +++---
 target/arm/translate.c | 11 ---
 target/arm/vec_helper.c| 30 ++
 5 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index fc0d594a14..800446e537 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -335,7 +335,6 @@ DEF_HELPER_2(neon_sub_u8, i32, i32, i32)
 DEF_HELPER_2(neon_sub_u16, i32, i32, i32)
 DEF_HELPER_2(neon_mul_u8, i32, i32, i32)
 DEF_HELPER_2(neon_mul_u16, i32, i32, i32)
-DEF_HELPER_2(neon_mul_p8, i32, i32, i32)
 DEF_HELPER_2(neon_mull_p8, i64, i32, i32)
 
 DEF_HELPER_2(neon_tst_u8, i32, i32, i32)
@@ -689,6 +688,8 @@ DEF_HELPER_FLAGS_4(gvec_sshl_h, TCG_CALL_NO_RWG, void, ptr, 
ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_ushl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_ushl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(gvec_pmul_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #include "helper-sve.h"
diff --git a/target/arm/neon_helper.c b/target/arm/neon_helper.c
index c581ffb7d3..9e7a9a1ac5 100644
--- a/target/arm/neon_helper.c
+++ b/target/arm/neon_helper.c
@@ -1131,28 +1131,6 @@ NEON_VOP(mul_u16, neon_u16, 2)
 
 /* Polynomial multiplication is like integer multiplication except the
partial products are XORed, not added.  */
-uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2)
-{
-uint32_t mask;
-uint32_t result;
-result = 0;
-while (op1) {
-mask = 0;
-if (op1 & 1)
-mask |= 0xff;
-if (op1 & (1 << 8))
-mask |= (0xff << 8);
-if (op1 & (1 << 16))
-mask |= (0xff << 16);
-if (op1 & (1 << 24))
-mask |= (0xff << 24);
-result ^= op2 & mask;
-op1 = (op1 >> 1) & 0x7f7f7f7f;
-op2 = (op2 << 1) & 0xfefefefe;
-}
-return result;
-}
-
 uint64_t HELPER(neon_mull_p8)(uint32_t op1, uint32_t op2)
 {
 uint64_t result = 0;
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 255a168df6..04e25cfe06 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -0,9 +0,10 @@ static void disas_simd_3same_int(DisasContext *s, 
uint32_t insn)
 case 0x13: /* MUL, PMUL */
 if (!u) { /* MUL */
 gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
-return;
+} else {  /* PMUL */
+gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b);
 }
-break;
+return;
 case 0x12: /* MLA, MLS */
 if (u) {
 gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
@@ -11242,11 +11243,6 @@ static void disas_simd_3same_int(DisasContext *s, 
uint32_t insn)
 genfn = fns[size][u];
 break;
 }
-case 0x13: /* MUL, PMUL */
-assert(u); /* PMUL */
-assert(size == 0);
-genfn = gen_helper_neon_mul_p8;
-break;
 case 0x16: /* SQDMULH, SQRDMULH */
 {
 static NeonGenTwoOpEnvFn * const fns[2][2] = {
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 598bb1cc00..b66a2f6b71 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -5014,16 +5014,17 @@ static int disas_neon_data_insn(DisasContext *s, 
uint32_t insn)
 
 case NEON_3R_VMUL: /* VMUL */
 if (u) {
-/* Polynomial case allows only P8 and is handled below.  */
+/* Polynomial case allows only P8.  */
 if (size != 0) {
 return 1;
 }
+tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
+   0, gen_helper_gvec_pmul_b);
 } else {
 tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
  vec_size, vec_size);
-return 0;
 }
-break;
+return 0;
 
 case NEON_3R_VML: /* VMLA, VMLS */
 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
@@ -5213,10 +5214,6 @@ static int disas_neon_data_insn(DisasContext *s, 
uint32_t insn)
 tmp2 = neon_load_reg(rd, pass);
 gen_neon_add(size, tmp, tmp2);
 break;
-case NEON_3R_VMUL:
-/* VMUL.P8; other cases already eliminated.  */
-gen_helper_neon_mul_p8(tmp, tmp, tmp2);
-break;
 case NEON_3R_VPMAX:

[PATCH 0/4] target/arm vector improvements

2019-10-16 Thread Richard Henderson

The first patch has been seen before.

  https://patchwork.ozlabs.org/patch/1115039/

It had a bug and I didn't fix it right away and then forgot.
Fixed now; I had mixed up the operand ordering for aarch32.

The next 3 are something that I noticed while doing other stuff.

In particular, pmull is used heavily during https transfers.
While cloning a repository, the old code peaks at 27% of the
total runtime, as measured by perf top.  The new code does
not quite reach 3% repeating the same clone.

In addition, the new helper functions are in the form that
will be required for the implementation of SVE2.

The comment in patch 2 about ARMv8.4-DIT is perhaps a stretch,
but re-reading the pmull instruction description in the current
ARM ARM brought it to mind.

Since TCG is officially not in the security domain, it's
probably not a bug to just claim to support DIT without
actually doing anything to ensure the algorithms used are in
fact timing independent of the data.

On the other hand, I expect the bit distribution of stuff
going through these sort of hashing algorithms to approach
50% 1's and 0's, so I also don't think we gain anything on
average to terminate the loop early.

Thoughts on DIT specifically?


r~


Richard Henderson (4):
  target/arm: Vectorize USHL and SSHL
  target/arm: Convert PMUL.8 to gvec
  target/arm: Convert PMULL.64 to gvec
  target/arm: Convert PMULL.8 to gvec

 target/arm/helper-sve.h|   2 +
 target/arm/helper.h|  21 ++-
 target/arm/translate.h |   6 +
 target/arm/neon_helper.c   | 117 -
 target/arm/translate-a64.c |  83 -
 target/arm/translate.c | 350 -
 target/arm/vec_helper.c| 211 ++
 7 files changed, 562 insertions(+), 228 deletions(-)

-- 
2.17.1

Re: [Patch v2] checkpatch: sugguest to use qemu_real_host_page_size instead of getpagesize() or sysconf(_SC_PAGESIZE)

2019-10-16 Thread no-reply

Patchew URL: 
https://patchew.org/QEMU/20191017004633.13229-1-richardw.y...@linux.intel.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [Patch v2] checkpatch: sugguest to use qemu_real_host_page_size 
instead of getpagesize() or sysconf(_SC_PAGESIZE)
Type: series
Message-id: 20191017004633.13229-1-richardw.y...@linux.intel.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
3751d76 checkpatch: sugguest to use qemu_real_host_page_size instead of 
getpagesize() or sysconf(_SC_PAGESIZE)

=== OUTPUT BEGIN ===
ERROR: line over 90 characters
#22: FILE: scripts/checkpatch.pl:2919:
+   ERROR("use qemu_real_host_page_size instead of 
getpagesize()\n" . $herecurr);

ERROR: line over 90 characters
#25: FILE: scripts/checkpatch.pl:2922:
+   ERROR("use qemu_real_host_page_size instead of 
sysconf(_SC_PAGESIZE)\n" . $herecurr);

total: 2 errors, 0 warnings, 12 lines checked

Commit 3751d7633653 (checkpatch: sugguest to use qemu_real_host_page_size 
instead of getpagesize() or sysconf(_SC_PAGESIZE)) has style problems, please 
review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20191017004633.13229-1-richardw.y...@linux.intel.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH v4 00/19] spapr: IRQ subsystem cleanup

2019-10-16 Thread David Gibson

On Wed, Oct 16, 2019 at 06:04:04PM +0200, Greg Kurz wrote:
> On Wed,  9 Oct 2019 17:07:59 +1100
> David Gibson  wrote:
> 
> > This is a substantial rework to clean up the handling of IRQs in
> > spapr.  It includes some cleanups to both the XICS and XIVE interrupt
> > controller backends, as well as more to the common spapr irq handling
> > infrastructure.
> > 
> 
> Patches up to 16 have been reviewed. Any chance you merge them before
> soft freeze (12 days left) ?

Seems reasonable, merged to ppc-for-4.2.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

[Patch v2] checkpatch: sugguest to use qemu_real_host_page_size instead of getpagesize() or sysconf(_SC_PAGESIZE)

2019-10-16 Thread Wei Yang

Signed-off-by: Wei Yang 
CC: Richard Henderson 
CC: Stefan Hajnoczi 

---
v2: add "\b" for better match, suggested by Richard Henderson 

---
 scripts/checkpatch.pl | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index aa9a354a0e..ab68a16fd2 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2915,6 +2915,12 @@ sub process {
if ($line =~ /\bbzero\(/) {
ERROR("use memset() instead of bzero()\n" . $herecurr);
}
+   if ($line =~ /\bgetpagesize\(\)/) {
+   ERROR("use qemu_real_host_page_size instead of 
getpagesize()\n" . $herecurr);
+   }
+   if ($line =~ /\bsysconf\(_SC_PAGESIZE\)/) {
+   ERROR("use qemu_real_host_page_size instead of 
sysconf(_SC_PAGESIZE)\n" . $herecurr);
+   }
my $non_exit_glib_asserts = qr{g_assert_cmpstr|
g_assert_cmpint|
g_assert_cmpuint|
-- 
2.17.1

Re: [PATCH] checkpatch: sugguest to use qemu_real_host_page_size instead of getpagesize() or sysconf(_SC_PAGESIZE)

2019-10-16 Thread Wei Yang

On Wed, Oct 16, 2019 at 07:48:50PM +0100, Stefan Hajnoczi wrote:
>On Wed, Oct 16, 2019 at 09:24:32AM +0800, Wei Yang wrote:
>> Signed-off-by: Wei Yang 
>> CC: David Gibson 
>> ---
>>  scripts/checkpatch.pl | 6 ++
>>  1 file changed, 6 insertions(+)
>> 
>> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
>> index aa9a354a0e..4b360ed310 100755
>> --- a/scripts/checkpatch.pl
>> +++ b/scripts/checkpatch.pl
>> @@ -2915,6 +2915,12 @@ sub process {
>>  if ($line =~ /\bbzero\(/) {
>>  ERROR("use memset() instead of bzero()\n" . $herecurr);
>>  }
>> +if ($line =~ /getpagesize\(\)/) {
>> +ERROR("use qemu_real_host_page_size instead of 
>> getpagesize()\n" . $herecurr);
>> +}
>> +if ($line =~ /sysconf\(_SC_PAGESIZE\)/) {
>> +ERROR("use qemu_real_host_page_size instead of 
>> sysconf(_SC_PAGESIZE)\n" . $herecurr);
>> +}
>>  my $non_exit_glib_asserts = qr{g_assert_cmpstr|
>>  g_assert_cmpint|
>>  g_assert_cmpuint|
>
>Just wanted to say thank you for extending checkpatch.pl!  We don't do
>it enough but it's the best way to extend QEMU coding style because it's
>automated :).
>

You are welcome. Glad to do something.

>Stefan



-- 
Wei Yang
Help you, Help me

Re: [PATCH] checkpatch: sugguest to use qemu_real_host_page_size instead of getpagesize() or sysconf(_SC_PAGESIZE)

2019-10-16 Thread Wei Yang

On Wed, Oct 16, 2019 at 08:43:32AM -0700, Richard Henderson wrote:
>On 10/15/19 6:24 PM, Wei Yang wrote:
>>  if ($line =~ /\bbzero\(/) {
>>  ERROR("use memset() instead of bzero()\n" . $herecurr);
>>  }
>> +if ($line =~ /getpagesize\(\)/) {
>> +ERROR("use qemu_real_host_page_size instead of 
>> getpagesize()\n" . $herecurr);
>> +}
>> +if ($line =~ /sysconf\(_SC_PAGESIZE\)/) {
>
>Use \b to match beginning of symbol like bzero did?
>

You are right, thanks for the suggestion.

>
>r~

-- 
Wei Yang
Help you, Help me

Re: [PATCH v3 10/10] vfio: unplug failover primary device before migration

2019-10-16 Thread Alex Williamson

On Wed, 16 Oct 2019 22:18:47 +0200
Jens Freimann  wrote:

> On Tue, Oct 15, 2019 at 07:52:12PM -0600, Alex Williamson wrote:
> >On Fri, 11 Oct 2019 13:20:15 +0200
> >Jens Freimann  wrote:
> >  
> >> As usual block all vfio-pci devices from being migrated, but make an
> >> exception for failover primary devices. This is achieved by setting
> >> unmigratable to 0 but also add a migration blocker for all vfio-pci
> >> devices except failover primary devices. These will be unplugged before
> >> migration happens by the migration handler of the corresponding
> >> virtio-net standby device.
> >>
> >> Signed-off-by: Jens Freimann 
> >> ---
> >>  hw/vfio/pci.c | 35 ++-
> >>  hw/vfio/pci.h |  2 ++
> >>  2 files changed, 36 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >> index c5e6fe61cb..64cf8e07d9 100644
> >> --- a/hw/vfio/pci.c
> >> +++ b/hw/vfio/pci.c
> >> @@ -40,6 +40,9 @@
> >>  #include "pci.h"
> >>  #include "trace.h"
> >>  #include "qapi/error.h"
> >> +#include "migration/blocker.h"
> >> +#include "qemu/option.h"
> >> +#include "qemu/option_int.h"
> >>
> >>  #define TYPE_VFIO_PCI "vfio-pci"
> >>  #define PCI_VFIO(obj)OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
> >> @@ -2698,6 +2701,12 @@ static void 
> >> vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
> >>  vdev->req_enabled = false;
> >>  }
> >>
> >> +static int has_net_failover_arg(void *opaque, const char *name,
> >> +   const char *value, Error **errp)
> >> +{
> >> +return (strcmp(name, "net_failover_pair_id") == 0);
> >> +}
> >> +
> >>  static void vfio_realize(PCIDevice *pdev, Error **errp)
> >>  {
> >>  VFIOPCIDevice *vdev = PCI_VFIO(pdev);
> >> @@ -2710,6 +2719,20 @@ static void vfio_realize(PCIDevice *pdev, Error 
> >> **errp)
> >>  int groupid;
> >>  int i, ret;
> >>  bool is_mdev;
> >> +uint16_t class_id;
> >> +
> >> +if (qemu_opt_foreach(pdev->qdev.opts, has_net_failover_arg,
> >> + (void *) pdev->qdev.opts, &err) == 0) {  
> >
> >Why do we need a qemu_opt_foreach here versus testing
> >vdev->net_failover_pair_id as you do below or similar to how we test
> >sysfsdev immediately below this chunk?  
> 
> We don't need it, I will change it and move it to where we check for
> the PCI class.
> >  
> >> +error_setg(&vdev->migration_blocker,
> >> +"VFIO device doesn't support migration");
> >> +ret = migrate_add_blocker(vdev->migration_blocker, &err);  
> >
> >Where's the migrate_del_blocker()/error_free() for any other realize
> >error or device removal?
> >  
> >> +if (err) {
> >> +error_propagate(errp, err);
> >> +error_free(vdev->migration_blocker);
> >> +}  
> >
> >As Connie noted, unclear if this aborts or continues without a
> >migration blocker, which would be bad.  
> 
> It aborts in my test. PCI realize propagates it further and eventually
> it leads to aborting qemu.
> 
> It looks like this now:
> 
>  if (!pdev->net_failover_pair_id) {
>   error_setg(&vdev->migration_blocker,
>   "VFIO device doesn't support migration");
>   ret = migrate_add_blocker(vdev->migration_blocker, &err);
>   if (err) {
>   error_propagate(errp, err);
>   } else {
>   error_propagate(errp, vdev->migration_blocker);
>   }
>   goto error;

This unconditionally goes to error when we don't have a failover pair
set :-\

I suspect we don't want any sort of error propagate in the success
case, the migration_blocker pre-defines the error when the migration is
blocked, right?  Thanks,

Alex

>   } else {
>   pdev->qdev.allow_unplug_during_migration = true;
>   }
> 
> >> +} else {
> >> +pdev->qdev.allow_unplug_during_migration = true;
> >> +}
> >>
> >>  if (!vdev->vbasedev.sysfsdev) {
> >>  if (!(~vdev->host.domain || ~vdev->host.bus ||
> >> @@ -2812,6 +2835,14 @@ static void vfio_realize(PCIDevice *pdev, Error 
> >> **errp)
> >>  goto error;
> >>  }
> >>
> >> +if (vdev->net_failover_pair_id != NULL) {
> >> +class_id = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
> >> +if (class_id != PCI_CLASS_NETWORK_ETHERNET) {
> >> +error_setg(errp, "failover device is not an Ethernet device");
> >> +goto error;
> >> +}
> >> +}  
> >
> >Not clear to me why we do this separate from setting up the migration
> >blocker or why we use a different mechanism to test for the property.  
> 
> I'm moving this check to hw/pci/pci.c as you suggested.
> 
> >> +
> >>  /* vfio emulates a lot for us, but some bits need extra love */
> >>  vdev->emulated_config_bits = g_malloc0(vdev->config_size);
> >>
> >> @@ -3110,6 +3141,8 @@ static Property vfio_pci_dev_properties[] = {
> >>  display, ON_OFF_AUTO_OFF),
> >>  DEFINE_PROP_UINT32("xr

Re: [PATCH] ssi: xilinx_spips: Filter the non spi registers transactions

2019-10-16 Thread Alistair Francis

On Sun, Oct 13, 2019 at 11:51 PM Sai Pavan Boddu
 wrote:
>
> ZynqMP/Versal specific qspi registers should be handled inside
> zynqmp_qspi_read/write calls. When few of these transactions are handled by
> spi hooks we see state change in spi bus unexpectedly.
>
> Signed-off-by: Sai Pavan Boddu 
> ---
>  hw/ssi/xilinx_spips.c | 26 --
>  1 file changed, 24 insertions(+), 2 deletions(-)
>
> diff --git a/hw/ssi/xilinx_spips.c b/hw/ssi/xilinx_spips.c
> index a309c71..4f9f8e0 100644
> --- a/hw/ssi/xilinx_spips.c
> +++ b/hw/ssi/xilinx_spips.c
> @@ -109,6 +109,7 @@
>  #define R_GPIO  (0x30 / 4)
>  #define R_LPBK_DLY_ADJ  (0x38 / 4)
>  #define R_LPBK_DLY_ADJ_RESET (0x33)
> +#define R_IOU_TAPDLY_BYPASS (0x3C / 4)
>  #define R_TXD1  (0x80 / 4)
>  #define R_TXD2  (0x84 / 4)
>  #define R_TXD3  (0x88 / 4)
> @@ -139,6 +140,8 @@
>  #define R_LQSPI_STS (0xA4 / 4)
>  #define LQSPI_STS_WR_RECVD  (1 << 1)
>
> +#define R_DUMMY_CYCLE_EN(0xC8 / 4)
> +#define R_ECO   (0xF8 / 4)
>  #define R_MOD_ID(0xFC / 4)
>
>  #define R_GQSPI_SELECT  (0x144 / 4)
> @@ -938,7 +941,16 @@ static uint64_t xlnx_zynqmp_qspips_read(void *opaque,
>  int shortfall;
>
>  if (reg <= R_MOD_ID) {
> -return xilinx_spips_read(opaque, addr, size);
> +switch (addr) {
> +case R_GPIO:
> +case R_LPBK_DLY_ADJ:
> +case R_IOU_TAPDLY_BYPASS:
> +case R_DUMMY_CYCLE_EN:
> +case R_ECO:
> +return s->regs[addr / 4];
> +default:
> +return xilinx_spips_read(opaque, addr, size);

This doesn't seem right. This should have no functional change for the
read function and has the consequence of not printing the memory
accesses. If you try to debug this code now you won't see all of these
operations in the log.

> +}
>  } else {
>  switch (reg) {
>  case R_GQSPI_RXD:
> @@ -1063,7 +1075,17 @@ static void xlnx_zynqmp_qspips_write(void *opaque, 
> hwaddr addr,
>  uint32_t reg = addr / 4;
>
>  if (reg <= R_MOD_ID) {
> -xilinx_qspips_write(opaque, addr, value, size);
> +switch (reg) {
> +case R_GPIO:
> +case R_LPBK_DLY_ADJ:
> +case R_IOU_TAPDLY_BYPASS:
> +case R_DUMMY_CYCLE_EN:
> +case R_ECO:
> +s->regs[addr] = value;
> +break;
> +default:
> +xilinx_qspips_write(opaque, addr, value, size);
> +}

For the write code it looks like this skips the "no_reg_update" goto.
Maybe that is the issue that you are seeing?

Alistair

>  } else {
>  switch (reg) {
>  case R_GQSPI_CNFG:
> --
> 2.7.4
>
>

Re: [PATCH] configure: Require Python >= 3.5

2019-10-16 Thread John Snow




On 10/16/19 6:42 PM, Eduardo Habkost wrote:
> Python 3.5 is the oldest Python version available on our
> supported build platforms, and Python 2 end of life will be 3
> weeks after the planned release date of QEMU 4.2.0.  Drop Python
> 2 support from configure completely, and require Python 3.5 or
> newer.
> 
> Signed-off-by: Eduardo Habkost 

Seems like a good time and place to mention this. Kevin, you require
3.6+ for iotests, which are -- at present -- invoked as part of "make
check".

Do we care? Basically, this just means that iotests won't run for
systems that don't have 3.6+, which would be platforms like Debian 9 --
which is why ehabkost is choosing 3.5 here.

--js

> ---
>  configure  | 18 --
>  tests/Makefile.include |  5 -
>  2 files changed, 4 insertions(+), 19 deletions(-)
> 
> diff --git a/configure b/configure
> index 08ca4bcb46..870657ec7b 100755
> --- a/configure
> +++ b/configure
> @@ -895,9 +895,9 @@ fi
>  : ${install=${INSTALL-install}}
>  # We prefer python 3.x. A bare 'python' is traditionally
>  # python 2.x, but some distros have it as python 3.x, so
> -# we check that before python2
> +# we check that too
>  python=
> -for binary in "${PYTHON-python3}" python python2
> +for binary in "${PYTHON-python3}" python
>  do
>  if has "$binary"
>  then
> @@ -1824,8 +1824,8 @@ fi
>  
>  # Note that if the Python conditional here evaluates True we will exit
>  # with status 1 which is a shell 'false' value.
> -if ! $python -c 'import sys; sys.exit(sys.version_info < (2,7))'; then
> -  error_exit "Cannot use '$python', Python 2 >= 2.7 or Python 3 is 
> required." \
> +if ! $python -c 'import sys; sys.exit(sys.version_info < (3,5))'; then
> +  error_exit "Cannot use '$python', Python >= 3.5 is required." \
>"Use --python=/path/to/python to specify a supported Python."
>  fi
>  
> @@ -6456,15 +6456,6 @@ if test "$supported_os" = "no"; then
>  echo "us upstream at qemu-devel@nongnu.org."
>  fi
>  
> -# Note that if the Python conditional here evaluates True we will exit
> -# with status 1 which is a shell 'false' value.
> -if ! $python -c 'import sys; sys.exit(sys.version_info < (3,0))'; then
> -  echo
> -  echo "warning: Python 2 support is deprecated" >&2
> -  echo "warning: Python 3 will be required for building future versions of 
> QEMU" >&2
> -  python2="y"
> -fi
> -
>  config_host_mak="config-host.mak"
>  
>  echo "# Automatically generated by configure - do not modify" 
> >config-all-disas.mak
> @@ -7282,7 +7273,6 @@ echo "INSTALL_DATA=$install -c -m 0644" >> 
> $config_host_mak
>  echo "INSTALL_PROG=$install -c -m 0755" >> $config_host_mak
>  echo "INSTALL_LIB=$install -c -m 0644" >> $config_host_mak
>  echo "PYTHON=$python" >> $config_host_mak
> -echo "PYTHON2=$python2" >> $config_host_mak
>  echo "CC=$cc" >> $config_host_mak
>  if $iasl -h > /dev/null 2>&1; then
>echo "IASL=$iasl" >> $config_host_mak
> diff --git a/tests/Makefile.include b/tests/Makefile.include
> index 3543451ed3..54ee1f0a2f 100644
> --- a/tests/Makefile.include
> +++ b/tests/Makefile.include
> @@ -1137,7 +1137,6 @@ TESTS_RESULTS_DIR=$(BUILD_DIR)/tests/results
>  AVOCADO_SHOW=app
>  AVOCADO_TAGS=$(patsubst %-softmmu,-t arch:%, $(filter 
> %-softmmu,$(TARGET_DIRS)))
>  
> -ifneq ($(PYTHON2),y)
>  $(TESTS_VENV_DIR): $(TESTS_VENV_REQ)
>   $(call quiet-command, \
>  $(PYTHON) -m venv --system-site-packages $@, \
> @@ -1146,10 +1145,6 @@ $(TESTS_VENV_DIR): $(TESTS_VENV_REQ)
>  $(TESTS_VENV_DIR)/bin/python -m pip -q install -r 
> $(TESTS_VENV_REQ), \
>  PIP, $(TESTS_VENV_REQ))
>   $(call quiet-command, touch $@)
> -else
> -$(TESTS_VENV_DIR):
> - $(error "venv directory for tests requires Python 3")
> -endif
>  
>  $(TESTS_RESULTS_DIR):
>   $(call quiet-command, mkdir -p $@, \
>

[PATCH] configure: Require Python >= 3.5

2019-10-16 Thread Eduardo Habkost

Python 3.5 is the oldest Python version available on our
supported build platforms, and Python 2 end of life will be 3
weeks after the planned release date of QEMU 4.2.0.  Drop Python
2 support from configure completely, and require Python 3.5 or
newer.

Signed-off-by: Eduardo Habkost 
---
 configure  | 18 --
 tests/Makefile.include |  5 -
 2 files changed, 4 insertions(+), 19 deletions(-)

diff --git a/configure b/configure
index 08ca4bcb46..870657ec7b 100755
--- a/configure
+++ b/configure
@@ -895,9 +895,9 @@ fi
 : ${install=${INSTALL-install}}
 # We prefer python 3.x. A bare 'python' is traditionally
 # python 2.x, but some distros have it as python 3.x, so
-# we check that before python2
+# we check that too
 python=
-for binary in "${PYTHON-python3}" python python2
+for binary in "${PYTHON-python3}" python
 do
 if has "$binary"
 then
@@ -1824,8 +1824,8 @@ fi
 
 # Note that if the Python conditional here evaluates True we will exit
 # with status 1 which is a shell 'false' value.
-if ! $python -c 'import sys; sys.exit(sys.version_info < (2,7))'; then
-  error_exit "Cannot use '$python', Python 2 >= 2.7 or Python 3 is required." \
+if ! $python -c 'import sys; sys.exit(sys.version_info < (3,5))'; then
+  error_exit "Cannot use '$python', Python >= 3.5 is required." \
   "Use --python=/path/to/python to specify a supported Python."
 fi
 
@@ -6456,15 +6456,6 @@ if test "$supported_os" = "no"; then
 echo "us upstream at qemu-devel@nongnu.org."
 fi
 
-# Note that if the Python conditional here evaluates True we will exit
-# with status 1 which is a shell 'false' value.
-if ! $python -c 'import sys; sys.exit(sys.version_info < (3,0))'; then
-  echo
-  echo "warning: Python 2 support is deprecated" >&2
-  echo "warning: Python 3 will be required for building future versions of 
QEMU" >&2
-  python2="y"
-fi
-
 config_host_mak="config-host.mak"
 
 echo "# Automatically generated by configure - do not modify" 
>config-all-disas.mak
@@ -7282,7 +7273,6 @@ echo "INSTALL_DATA=$install -c -m 0644" >> 
$config_host_mak
 echo "INSTALL_PROG=$install -c -m 0755" >> $config_host_mak
 echo "INSTALL_LIB=$install -c -m 0644" >> $config_host_mak
 echo "PYTHON=$python" >> $config_host_mak
-echo "PYTHON2=$python2" >> $config_host_mak
 echo "CC=$cc" >> $config_host_mak
 if $iasl -h > /dev/null 2>&1; then
   echo "IASL=$iasl" >> $config_host_mak
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 3543451ed3..54ee1f0a2f 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -1137,7 +1137,6 @@ TESTS_RESULTS_DIR=$(BUILD_DIR)/tests/results
 AVOCADO_SHOW=app
 AVOCADO_TAGS=$(patsubst %-softmmu,-t arch:%, $(filter 
%-softmmu,$(TARGET_DIRS)))
 
-ifneq ($(PYTHON2),y)
 $(TESTS_VENV_DIR): $(TESTS_VENV_REQ)
$(call quiet-command, \
 $(PYTHON) -m venv --system-site-packages $@, \
@@ -1146,10 +1145,6 @@ $(TESTS_VENV_DIR): $(TESTS_VENV_REQ)
 $(TESTS_VENV_DIR)/bin/python -m pip -q install -r 
$(TESTS_VENV_REQ), \
 PIP, $(TESTS_VENV_REQ))
$(call quiet-command, touch $@)
-else
-$(TESTS_VENV_DIR):
-   $(error "venv directory for tests requires Python 3")
-endif
 
 $(TESTS_RESULTS_DIR):
$(call quiet-command, mkdir -p $@, \
-- 
2.21.0

Re: Python 2 and test/vm/netbsd

2019-10-16 Thread Eduardo Habkost

On Wed, Oct 16, 2019 at 08:11:57AM +0200, Thomas Huth wrote:
> On 16/10/2019 05.00, Eduardo Habkost wrote:
> > On Tue, Sep 17, 2019 at 08:31:40PM -0300, Eduardo Habkost wrote:
> >> On Mon, Jul 01, 2019 at 07:25:27PM -0300, Eduardo Habkost wrote:
> >>> On Mon, Jun 10, 2019 at 01:58:50PM +0100, Peter Maydell wrote:
> > [...]
>  The configure check also spits out deprecation warnings for
>  the NetBSD/FreeBSD/OpenBSD tests/vm configurations. It would be nice
>  to get those updated.
> >>>
> >>> CCing the test/vm maintainers.
> >>>
> >>> Fam, Alex, are you able to fix this and create new BSD VM images
> >>> with Python 3 available?  I thought the VM image configurations
> >>> were stored in the source tree, but they are downloaded from
> >>> download.patchew.org.
> >>
> >> Fam, Alex, can you help us on this?  Python 2 won't be supported
> >> anymore, so we need the VM images to be updated.
> > 
> > Anyone?
> > 
> > I'm about to submit patches to remove Python 2 support, and this
> > will break tests/vm/netbsd.
> > 
> > I'm powerless to fix this issue, because the netbsd image is
> > hosted at download.patchew.org.
> 
> Gerd had a patch to convert the netbsd VM script to ad hoc image
> creation, too:
> 
> https://lists.gnu.org/archive/html/qemu-devel/2019-05/msg04459.html
> 
> But there was a regression with the serial port between QEMU v3.0 and
> v4.x, so it was not included:
> 
> https://lists.gnu.org/archive/html/qemu-devel/2019-05/msg06784.html

The URL above has this error:

  con recv:  x: Exitqj
  con recv: To be able to use the network, we need answers to the
  following:Network media type
  con send: 
  con recv: : qqqk Perform autoconfiguration?
 >a: Yes b: Noq
  console: *** read timeout ***
  console: waiting for: 'a: Yes'
  console: line buffer:
  
  con recv: qqj

I believe that problem was solved in v4, because v4 was reading
the serial output 1 byte at a time.

The issue that caused the netbsd patch to be dropped was:
https://lore.kernel.org/qemu-devel/cafeaca8k9qja9ie-kwiaphr0fy_2zg7jrx5uv4aassjxcss...@mail.gmail.com/

Possibly this is the same issue we saw at:
https://lore.kernel.org/qemu-devel/20190607034214.gb22...@habkost.net/

The test script must either close the console socket, or keep
reading from it.  Otherwise, the QEMU VCPU threads might get
stuck waiting for the chardev to be writeable.

-- 
Eduardo

Re: [PATCH v10 2/3] block/nbd: nbd reconnect

2019-10-16 Thread Eric Blake


On 10/9/19 3:41 AM, Vladimir Sementsov-Ogievskiy wrote:

Implement reconnect. To achieve this:

1. add new modes:
connecting-wait: means, that reconnecting is in progress, and there
  were small number of reconnect attempts, so all requests are
  waiting for the connection.
connecting-nowait: reconnecting is in progress, there were a lot of
  attempts of reconnect, all requests will return errors.

two old modes are used too:
connected: normal state
quit: exiting after fatal error or on close

Possible transitions are:

* -> quit
connecting-* -> connected
connecting-wait -> connecting-nowait (transition is done after
   reconnect-delay seconds in connecting-wait mode)
connected -> connecting-wait

2. Implement reconnect in connection_co. So, in connecting-* mode,
 connection_co, tries to reconnect unlimited times.

3. Retry nbd queries on channel error, if we are in connecting-wait
 state.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block/nbd.c | 331 ++--
  1 file changed, 268 insertions(+), 63 deletions(-)



Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org

[PATCH] enable translating statx syscalls on more arches

2019-10-16 Thread Andrew Kelley

Signed-off-by: Andrew Kelley 
---
 linux-user/aarch64/syscall_nr.h | 13 ++
 linux-user/arm/syscall_nr.h | 38 
 linux-user/i386/syscall_nr.h| 43 
 linux-user/mips/cpu_loop.c  |  6 +
 linux-user/ppc/syscall_nr.h | 44 +
 5 files changed, 144 insertions(+)

diff --git a/linux-user/aarch64/syscall_nr.h
b/linux-user/aarch64/syscall_nr.h
index f00ffd7fb8..4e8d0bbb15 100644
--- a/linux-user/aarch64/syscall_nr.h
+++ b/linux-user/aarch64/syscall_nr.h
@@ -276,5 +276,18 @@
 #define TARGET_NR_membarrier 283
 #define TARGET_NR_mlock2 284
 #define TARGET_NR_copy_file_range 285
+#define TARGET_NR_preadv2 286
+#define TARGET_NR_pwritev2 287
+#define TARGET_NR_pkey_mprotect 288
+#define TARGET_NR_pkey_alloc 289
+#define TARGET_NR_pkey_free 290
+#define TARGET_NR_statx 291
+#define TARGET_NR_io_pgetevents 292
+#define TARGET_NR_rseq 293
+#define TARGET_NR_kexec_file_load 294
+#define TARGET_NR_pidfd_send_signal 424
+#define TARGET_NR_io_uring_setup 425
+#define TARGET_NR_io_uring_enter 426
+#define TARGET_NR_io_uring_register 427

 #endif
diff --git a/linux-user/arm/syscall_nr.h b/linux-user/arm/syscall_nr.h
index e7eda0d766..20afa3992a 100644
--- a/linux-user/arm/syscall_nr.h
+++ b/linux-user/arm/syscall_nr.h
@@ -400,4 +400,42 @@
 #define TARGET_NR_membarrier   (389)
 #define TARGET_NR_mlock2   (390)

+#define TARGET_NR_copy_file_range  (391)
+#define TARGET_NR_preadv2  (392)
+#define TARGET_NR_pwritev2 (393)
+#define TARGET_NR_pkey_mprotect(394)
+#define TARGET_NR_pkey_alloc   (395)
+#define TARGET_NR_pkey_free(396)
+#define TARGET_NR_statx(397)
+#define TARGET_NR_rseq (398)
+#define TARGET_NR_io_pgetevents(399)
+#define TARGET_NR_migrate_pages(400)
+
+#define TARGET_NR_kexec_file_load  (401)
+#define TARGET_NR_clock_gettime64  (403)
+#define TARGET_NR_clock_settime64  (404)
+#define TARGET_NR_clock_adjtime64  (405)
+#define TARGET_NR_clock_getres_time64  (406)
+#define TARGET_NR_clock_nanosleep_time64   (407)
+#define TARGET_NR_timer_gettime64  (408)
+#define TARGET_NR_timer_settime64  (409)
+#define TARGET_NR_timerfd_gettime64(410)
+
+#define TARGET_NR_timerfd_settime64(411)
+#define TARGET_NR_utimensat_time64 (412)
+#define TARGET_NR_pselect6_time64  (413)
+#define TARGET_NR_ppoll_time64 (414)
+#define TARGET_NR_io_pgetevents_time64 (416)
+#define TARGET_NR_recvmmsg_time64  (417)
+#define TARGET_NR_mq_timedsend_time64  (418)
+#define TARGET_NR_mq_timedreceive_time64   (419)
+#define TARGET_NR_semtimedop_time64(420)
+
+#define TARGET_NR_rt_sigtimedwait_time64   (421)
+#define TARGET_NR_futex_time64 (422)
+#define TARGET_NR_sched_rr_get_interval_time64 (423)
+#define TARGET_NR_pidfd_send_signal(424)
+#define TARGET_NR_io_uring_setup   (425)
+#define TARGET_NR_io_uring_enter   (426)
+#define TARGET_NR_io_uring_register(427)
 #endif
diff --git a/linux-user/i386/syscall_nr.h b/linux-user/i386/syscall_nr.h
index 3234ec21c6..e641674daf 100644
--- a/linux-user/i386/syscall_nr.h
+++ b/linux-user/i386/syscall_nr.h
@@ -383,5 +383,48 @@
 #define TARGET_NR_membarrier375
 #define TARGET_NR_mlock2376
 #define TARGET_NR_copy_file_range   377
+#define TARGET_NR_preadv2 378
+#define TARGET_NR_pwritev2 379
+#define TARGET_NR_pkey_mprotect 380
+#define TARGET_NR_pkey_alloc 381
+#define TARGET_NR_pkey_free 382
+#define TARGET_NR_statx 383
+#define TARGET_NR_arch_prctl 384
+#define TARGET_NR_io_pgetevents 385
+#define TARGET_NR_rseq 386
+#define TARGET_NR_semget 393
+#define TARGET_NR_semctl 394
+#define TARGET_NR_shmget 395
+#define TARGET_NR_shmctl 396
+#define TARGET_NR_shmat 397
+#define TARGET_NR_shmdt 398
+#define TARGET_NR_msgget 399
+#define TARGET_NR_msgsnd 400
+#define TARGET_NR_msgrcv 401
+#define TARGET_NR_msgctl 402
+#define TARGET_NR_clock_gettime64 403
+#define TARGET_NR_clock_settime64 404
+#define TARGET_NR_clock_adjtime64 405
+#define TARGET_NR_clock_getres_time64 406
+#define TARGET_NR_clock_nanosleep_time64 407
+#define TARGET_NR_timer_gettime64 408
+#define TARGET_NR_timer_settime64 409
+#define TARGET_NR_timerfd_gettime64 410
+#define TARGET_NR_timerfd_settime64 411
+#define TARGET_NR_utimensat_time64 412
+#define TARGET_NR_pselect6_time64 413
+#define TARGET_NR_ppoll_time64 414
+#define TARGET_NR_io_pgetevents_time64 416
+#define TARGET_NR_recvmmsg_time64 417
+#define TARGET_NR_mq_timedsend_time64 418
+#define TARGET_NR_mq_timedreceive_time64 419
+#define TARGET_NR_semtimedop_time64 420
+#define TAR

Re: Using virtual IOMMU in guest hypervisors other than KVM and Xen?

2019-10-16 Thread Jintack Lim

On Mon, Oct 14, 2019 at 7:50 PM Peter Xu  wrote:
>
> On Mon, Oct 14, 2019 at 01:28:49PM -0700, Jintack Lim wrote:
> > Hi,
>
> Hello, Jintack,
>
Hi Peter,

> >
> > I'm trying to pass through a physical network device to a nested VM
> > using virtual IOMMU. While I was able to do it successfully using KVM
> > and Xen guest hypervisors running in a VM respectively, I couldn't do
> > it with Hyper-V as I described below. I wonder if anyone have
> > successfully used virtual IOMMU in other hypervisors other than KVM
> > and Xen? (like Hyper-V or VMware)
> >
> > The issue I have with Hyper-V is that Hyper-V gives an error that the
> > underlying hardware is not capable of doing passthrough. The exact
> > error message is as follows.
> >
> > Windows Power-shell > (Get-VMHost).IovSupportReasons
> > The chipset on the system does not do DMA remapping, without which
> > SR-IOV cannot be supported.
> >
> > I'm pretty sure that Hyper-V recognizes virtual IOMMU, though; I have
> > enabled iommu in windows boot loader[1], and I see differences when
> > booing a Windows VM with and without virtual IOMMU. I also checked
> > that virtual IOMMU traces are printed.
>
> What traces have you checked?  More explicitly, have you seen DMAR
> enabled and page table setup for that specific device to be
> pass-throughed?

Thanks for the pointers. I checked that DMAR is NOT enabled. The only
registers that Windows guest accessed were Version Register,
Capability Register, and Extended Capability Register. On the other
hand, a Linux guest accessed other registers and enabled DMAR.
Here's a link to the trace I got using QEMU 4.1.0. Do you see anything
interesting there?
http://paste.ubuntu.com/p/YcSyxG9Z3x/

>
> >
> > I have tried multiple KVM/QEMU versions including the latest ones
> > (kernel v5.3, QEMU 4.1.0) as well as two different Windows servers
> > (2016 and 2019), but I see the same result. [4]
> >
> > I'd love to hear if somebody is using virtual IOMMU in Hyper-V or
> > VMware successfully, especially for passthrough. I also appreciate if
> > somebody can point out any configuration errors I have.
> >
> > Here's the qemu command line I use, basically from the QEMU vt-d
> > page[2] and Hyper-v on KVM from kvmforum [3].
> >
> > ./qemu/x86_64-softmmu/qemu-system-x86_64 -device
> > intel-iommu,intremap=on,caching-mode=on -smp 6 -m 24G -M
>
> Have you tried to use 4-level IOMMU page table (aw-bits=48 on latest
> QEMU, or x-aw-bits=48 on some old ones)?  IIRC we've encountered
> issues when trying to pass the SVVP Windows test with this, in which
> 4-level is required.  I'm not sure whether whether that is required in
> general usages of vIOMMU in Windows.

I just tried the option you mentioned, but it didn't change anything.
BTW, what version of Windows was it?

>
> > q35,accel=kvm,kernel-irqchip=split -cpu
> > host,hv_relaxed,hv_spinlocks=0x1fff,hv_vapic,hv_time -drive
> > if=none,file=/vm/guest0.img,id=vda,cache=none,format=raw -device
> > virtio-blk-pci,drive=vda --nographic -qmp
> > unix:/var/run/qmp,server,nowait -serial
> > telnet:127.0.0.1:,server,nowait -netdev
> > user,id=net0,hostfwd=tcp::-:22 -device
> > virtio-net-pci,netdev=net0,mac=de:ad:be:ef:f2:12 -netdev
> > tap,id=net1,vhost=on,helper=/srv/vm/qemu/qemu-bridge-helper -device
> > virtio-net-pci,netdev=net1,disable-modern=off,disable-legacy=on,mac=de:ad:be:ef:f2:11
> > -device vfio-pci,host=:06:10.0,id=net2 -monitor stdio -usb -device
> > usb-tablet -rtc base=localtime,clock=host -vnc 127.0.0.1:4 --cdrom
> > win19.iso --drive file=virtio-win.iso,index=3,media=cdrom
>
> --
> Peter Xu
>

Re: [PATCH v1 22/28] target/riscv: Allow specifying MMU stage

2019-10-16 Thread Alistair Francis

On Wed, Oct 16, 2019 at 12:02 PM Palmer Dabbelt  wrote:
>
> On Mon, 07 Oct 2019 11:05:33 PDT (-0700), alistai...@gmail.com wrote:
> > On Thu, Oct 3, 2019 at 8:53 AM Palmer Dabbelt  wrote:
> >>
> >> On Fri, 23 Aug 2019 16:38:47 PDT (-0700), Alistair Francis wrote:
> >> > Signed-off-by: Alistair Francis 
> >> > ---
> >> >  target/riscv/cpu_helper.c | 39 ++-
> >> >  1 file changed, 30 insertions(+), 9 deletions(-)
> >> >
> >> > diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> >> > index 098873c83e..9aa6906acd 100644
> >> > --- a/target/riscv/cpu_helper.c
> >> > +++ b/target/riscv/cpu_helper.c
> >> > @@ -318,10 +318,19 @@ void riscv_cpu_set_mode(CPURISCVState *env, 
> >> > target_ulong newpriv)
> >> >   *
> >> >   * Adapted from Spike's mmu_t::translate and mmu_t::walk
> >> >   *
> >> > + * @env: CPURISCVState
> >> > + * @physical: This will be set to the calculated physical address
> >> > + * @prot: The returned protection attributes
> >> > + * @addr: The virtual address to be translated
> >> > + * @access_type: The type of MMU access
> >> > + * @mmu_idx: Indicates current privilege level
> >> > + * @first_stage: Are we in first stage translation?
> >> > + *   Second stage is used for hypervisor guest translation
> >> >   */
> >> >  static int get_physical_address(CPURISCVState *env, hwaddr *physical,
> >> >  int *prot, target_ulong addr,
> >> > -int access_type, int mmu_idx)
> >> > +int access_type, int mmu_idx,
> >> > +bool first_stage)
> >> >  {
> >> >  /* NOTE: the env->pc value visible here will not be
> >> >   * correct, but the value visible to the exception handler
> >> > @@ -518,13 +527,23 @@ restart:
> >> >  }
> >> >
> >> >  static void raise_mmu_exception(CPURISCVState *env, target_ulong 
> >> > address,
> >> > -MMUAccessType access_type, bool 
> >> > pmp_violation)
> >> > +MMUAccessType access_type, bool 
> >> > pmp_violation,
> >> > +bool first_stage)
> >> >  {
> >> >  CPUState *cs = env_cpu(env);
> >> > -int page_fault_exceptions =
> >> > -(env->priv_ver >= PRIV_VERSION_1_10_0) &&
> >> > -get_field(env->satp, SATP_MODE) != VM_1_10_MBARE &&
> >> > -!pmp_violation;
> >> > +int page_fault_exceptions;
> >> > +if (first_stage) {
> >> > +page_fault_exceptions =
> >> > +(env->priv_ver >= PRIV_VERSION_1_10_0) &&
> >> > +get_field(env->satp, SATP_MODE) != VM_1_10_MBARE &&
> >> > +!pmp_violation;
> >> > +riscv_cpu_set_force_hs_excep(env, CLEAR_HS_EXCEP);
> >>
> >> It might just be email, but the indentation looks wrong here.
> >
> > Yep, fixed.
> >
> >>
> >> > +} else {
> >> > +page_fault_exceptions =
> >> > +get_field(env->hgatp, HGATP_MODE) != VM_1_10_MBARE &&
> >> > +!pmp_violation;
> >> > +riscv_cpu_set_force_hs_excep(env, FORCE_HS_EXCEP);
> >> > +}
> >> >  switch (access_type) {
> >> >  case MMU_INST_FETCH:
> >> >  cs->exception_index = page_fault_exceptions ?
> >> > @@ -551,7 +570,8 @@ hwaddr riscv_cpu_get_phys_page_debug(CPUState *cs, 
> >> > vaddr addr)
> >> >  int prot;
> >> >  int mmu_idx = cpu_mmu_index(&cpu->env, false);
> >> >
> >> > -if (get_physical_address(&cpu->env, &phys_addr, &prot, addr, 0, 
> >> > mmu_idx)) {
> >> > +if (get_physical_address(&cpu->env, &phys_addr, &prot, addr, 0, 
> >> > mmu_idx,
> >> > + true)) {
> >> >  return -1;
> >> >  }
> >> >  return phys_addr;
> >> > @@ -613,7 +633,8 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, 
> >> > int size,
> >> >  qemu_log_mask(CPU_LOG_MMU, "%s ad %" VADDR_PRIx " rw %d mmu_idx 
> >> > %d\n",
> >> >__func__, address, access_type, mmu_idx);
> >> >
> >> > -ret = get_physical_address(env, &pa, &prot, address, access_type, 
> >> > mmu_idx);
> >> > +ret = get_physical_address(env, &pa, &prot, address, access_type, 
> >> > mmu_idx,
> >> > +   true);
> >> >
> >> >  if (mode == PRV_M && access_type != MMU_INST_FETCH) {
> >> >  if (get_field(*env->mstatus, MSTATUS_MPRV)) {
> >> > @@ -640,7 +661,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, 
> >> > int size,
> >> >  } else if (probe) {
> >> >  return false;
> >> >  } else {
> >> > -raise_mmu_exception(env, address, access_type, pmp_violation);
> >> > +raise_mmu_exception(env, address, access_type, pmp_violation, 
> >> > true);
> >> >  riscv_raise_exception(env, cs->exception_index, retaddr);
> >> >  }
> >> >  #else
> >>
> >> I don't think it makes sense to split off these two (23 and 24, that add 
> >> the
> >> argument) out from the implementation

Re: [PATCH v1 27/28] target/riscv: Add the MSTATUS_MPV_ISSET helper macro

2019-10-16 Thread Alistair Francis

On Tue, Oct 8, 2019 at 11:36 AM Palmer Dabbelt  wrote:
>
> On Fri, 23 Aug 2019 16:39:00 PDT (-0700), Alistair Francis wrote:
> > Add a helper macro MSTATUS_MPV_ISSET() which will determine if the
> > MSTATUS_MPV bit is set for both 32-bit and 64-bit RISC-V.
> >
> > Signed-off-by: Alistair Francis 
> > ---
> >  target/riscv/cpu_bits.h   | 11 +++
> >  target/riscv/cpu_helper.c |  4 ++--
> >  target/riscv/op_helper.c  |  2 +-
> >  target/riscv/translate.c  |  2 +-
> >  4 files changed, 15 insertions(+), 4 deletions(-)
> >
> > diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> > index 55e20af6d9..7056d9218b 100644
> > --- a/target/riscv/cpu_bits.h
> > +++ b/target/riscv/cpu_bits.h
> > @@ -365,8 +365,19 @@
> >  #define MSTATUS_TVM 0x0010 /* since: priv-1.10 */
> >  #define MSTATUS_TW  0x2000 /* since: priv-1.10 */
> >  #define MSTATUS_TSR 0x4000 /* since: priv-1.10 */
> > +#if defined(TARGET_RISCV64)
> >  #define MSTATUS_MTL 0x40ULL
> >  #define MSTATUS_MPV 0x80ULL
> > +#elif defined(TARGET_RISCV32)
> > +#define MSTATUS_MTL 0x0040
> > +#define MSTATUS_MPV 0x0080
> > +#endif
> > +
> > +#ifdef TARGET_RISCV32
> > +# define MSTATUS_MPV_ISSET(env)  get_field(*env->mstatush, MSTATUS_MPV)
> > +#else
> > +# define MSTATUS_MPV_ISSET(env)  get_field(*env->mstatus, MSTATUS_MPV)
> > +#endif
> >
> >  #define MSTATUS64_UXL   0x0003ULL
> >  #define MSTATUS64_SXL   0x000CULL
> > diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> > index 8c80486dd0..2b88f756bb 100644
> > --- a/target/riscv/cpu_helper.c
> > +++ b/target/riscv/cpu_helper.c
> > @@ -351,7 +351,7 @@ static int get_physical_address(CPURISCVState *env, 
> > hwaddr *physical,
> >  mode = get_field(*env->mstatus, MSTATUS_MPP);
> >
> >  if (riscv_has_ext(env, RVH) &&
> > -get_field(*env->mstatus, MSTATUS_MPV)) {
> > +MSTATUS_MPV_ISSET(env)) {
> >  use_background = true;
> >  }
> >  }
> > @@ -730,7 +730,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, 
> > int size,
> >  m_mode_two_stage = env->priv == PRV_M &&
> > access_type != MMU_INST_FETCH &&
> > get_field(*env->mstatus, MSTATUS_MPRV) &&
> > -   get_field(*env->mstatus, MSTATUS_MPV);
> > +   MSTATUS_MPV_ISSET(env);
> >
> >  hs_mode_two_stage = env->priv == PRV_S &&
> >  !riscv_cpu_virt_enabled(env) &&
> > diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
> > index 8dec1aee99..6149cd9c15 100644
> > --- a/target/riscv/op_helper.c
> > +++ b/target/riscv/op_helper.c
> > @@ -146,7 +146,7 @@ target_ulong helper_mret(CPURISCVState *env, 
> > target_ulong cpu_pc_deb)
> >
> >  target_ulong mstatus = *env->mstatus;
> >  target_ulong prev_priv = get_field(mstatus, MSTATUS_MPP);
> > -target_ulong prev_virt = get_field(mstatus, MSTATUS_MPV);
> > +target_ulong prev_virt = MSTATUS_MPV_ISSET(env);
> >  mstatus = set_field(mstatus,
> >  env->priv_ver >= PRIV_VERSION_1_10_0 ?
> >  MSTATUS_MIE : MSTATUS_UIE << prev_priv,
> > diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> > index ea19ba9c5d..f0d9860429 100644
> > --- a/target/riscv/translate.c
> > +++ b/target/riscv/translate.c
> > @@ -754,7 +754,7 @@ static void 
> > riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
> >  ctx->virt_enabled = riscv_cpu_virt_enabled(env);
> >  if (env->priv_ver == PRV_M &&
> >  get_field(*env->mstatus, MSTATUS_MPRV) &&
> > -get_field(*env->mstatus, MSTATUS_MPV)) {
> > +MSTATUS_MPV_ISSET(env)) {
> >  ctx->virt_enabled = true;
> >  } else if (env->priv == PRV_S &&
> > !riscv_cpu_virt_enabled(env) &&
>
> This should be either ordered before or atomic with the patch that allows
> mstatush.mpv to be set, as otherwise there's point at which QEMU doesn't match
> the ISA.

I can't change the order due to dependencies, I can squash them but as
the Hypervisor extension can't be turned on there isn't really a
conflict with the ISA.

Do you still want me to squash them?

Alistair

Re: [PATCH v1 21/28] target/riscv: Respect MPRV and SPRV for floating point ops

2019-10-16 Thread Alistair Francis

On Wed, Oct 2, 2019 at 4:52 PM Palmer Dabbelt  wrote:
>
> On Fri, 23 Aug 2019 16:38:44 PDT (-0700), Alistair Francis wrote:
> > Respect the contents of MSTATUS.MPRV and HSTATUS.SPRV when performing
> > floating point operations when V=0.
>
> I'm confused as to what this has to do with floating point.

virt_enabled is only checked in mark_fs_dirty() for floating point support.

Alistair

>
> >
> > Signed-off-by: Alistair Francis 
> > ---
> >  target/riscv/translate.c | 16 +++-
> >  1 file changed, 15 insertions(+), 1 deletion(-)
> >
> > diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> > index 19771904f4..ea19ba9c5d 100644
> > --- a/target/riscv/translate.c
> > +++ b/target/riscv/translate.c
> > @@ -750,7 +750,21 @@ static void 
> > riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
> >  ctx->mstatus_fs = ctx->base.tb->flags & TB_FLAGS_MSTATUS_FS;
> >  ctx->priv_ver = env->priv_ver;
> >  #if !defined(CONFIG_USER_ONLY)
> > -ctx->virt_enabled = riscv_cpu_virt_enabled(env);
> > +if (riscv_has_ext(env, RVH)) {
> > +ctx->virt_enabled = riscv_cpu_virt_enabled(env);
> > +if (env->priv_ver == PRV_M &&
> > +get_field(*env->mstatus, MSTATUS_MPRV) &&
> > +get_field(*env->mstatus, MSTATUS_MPV)) {
> > +ctx->virt_enabled = true;
> > +} else if (env->priv == PRV_S &&
> > +   !riscv_cpu_virt_enabled(env) &&
> > +   get_field(env->hstatus, HSTATUS_SPRV) &&
> > +   get_field(env->hstatus, HSTATUS_SPV)) {
> > +ctx->virt_enabled = true;
> > +}
> > +} else {
> > +ctx->virt_enabled = false;
> > +}
> >  #else
> >  ctx->virt_enabled = false;
> >  #endif

Re: [PATCH v1 04/28] target/riscv: Fix CSR perm checking for HS mode

2019-10-16 Thread Alistair Francis

On Tue, Sep 10, 2019 at 7:48 AM Palmer Dabbelt  wrote:
>
> On Fri, 23 Aug 2019 16:38:00 PDT (-0700), Alistair Francis wrote:
> > Update the CSR permission checking to work correctly when we are in
> > HS-mode.
> >
> > Signed-off-by: Alistair Francis 
> > ---
> >  target/riscv/csr.c | 10 --
> >  1 file changed, 8 insertions(+), 2 deletions(-)
> >
> > diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> > index f767ad24be..471f23a1d0 100644
> > --- a/target/riscv/csr.c
> > +++ b/target/riscv/csr.c
> > @@ -799,9 +799,15 @@ int riscv_csrrw(CPURISCVState *env, int csrno, 
> > target_ulong *ret_value,
> >
> >  /* check privileges and return -1 if check fails */
> >  #if !defined(CONFIG_USER_ONLY)
> > -int csr_priv = get_field(csrno, 0x300);
> > +int csr_priv = env->priv;
>
> This isn't really "csr_priv" (ie, the priv needed to access the CSR) any more,
> it's really the effective priv of the machine.  Leaving the variable with the
> same name makes this hard to read, but I think it is correct.

I changed the name to effective_priv.

>
> >  int read_only = get_field(csrno, 0xC00) == 3;
> > -if ((write_mask && read_only) || (env->priv < csr_priv)) {
> > +
> > +if (riscv_has_ext(env, RVH) && !riscv_cpu_virt_enabled(env)) {
> > +/* Plus 1 as we are in HS mode */
>
> The comment is useless, it doesn't say why we increment it.  Also, I don't
> think this is correct: doesn't it allow U mode to access S CSRs when H is
> present and V is disabled?

Yes, you are correct. I have changed it to check that we are in S mode.

>
> Something like
>
> riscv_effective_priv(CPURISCVState *env)
> {
> if (riscv_has_ext(env, RVH) && env->priv == PRIV_S && 
> !riscv_cpu_virt_enabled(env)) {
> return PRIV_HS;

I don't like this as there is no PRIV_HS. It seems like a bad idea to
start using a reserved privilege level, if it is ever used we will
then be stuck updating this. I also don't think this is used anywhere
else. I have just fixed up the if statement and comment.

Alistair

> }
>
> return env->priv;
> }
>
> would probably be used in a handful of places, and would be a drop in for
> env->priv here.
>
> > +csr_priv++;
> > +}
> > +
> > +if ((write_mask && read_only) || (csr_priv < get_field(csrno, 0x300))) 
> > {
> >  return -1;
> >  }
> >  #endif

Re: [PATCH v9 04/15] hw/i386/pc: replace use of strtol with qemu_strtol in x86_load_linux()

2019-10-16 Thread Markus Armbruster

Philippe Mathieu-Daudé  writes:

> Hi Sergio,
>
> On 10/15/19 1:23 PM, Sergio Lopez wrote:
>> Follow checkpatch.pl recommendation and replace the use of strtol with
>> qemu_strtol in x86_load_linux().
>
> "with qemu_strtoui"
>
>>
>> Signed-off-by: Sergio Lopez 
>> ---
>>   hw/i386/pc.c | 9 -
>>   1 file changed, 8 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index 77e86bfc3d..c8608b8007 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -68,6 +68,7 @@
>>   #include "qemu/config-file.h"
>>   #include "qemu/error-report.h"
>>   #include "qemu/option.h"
>> +#include "qemu/cutils.h"
>>   #include "hw/acpi/acpi.h"
>>   #include "hw/acpi/cpu_hotplug.h"
>>   #include "hw/boards.h"
>> @@ -1202,6 +1203,7 @@ static void x86_load_linux(PCMachineState *pcms,
>>   vmode = strstr(kernel_cmdline, "vga=");
>>   if (vmode) {
>>   unsigned int video_mode;
>> +int ret;
>>   /* skip "vga=" */
>>   vmode += 4;
>>   if (!strncmp(vmode, "normal", 6)) {
>> @@ -1211,7 +1213,12 @@ static void x86_load_linux(PCMachineState *pcms,
>>   } else if (!strncmp(vmode, "ask", 3)) {
>>   video_mode = 0xfffd;
>>   } else {
>> -video_mode = strtol(vmode, NULL, 0);
>> +ret = qemu_strtoui(vmode, NULL, 0, &video_mode);
>> +if (ret != 0) {
>> +fprintf(stderr, "qemu: can't parse 'vga' parameter: %s\n",
>> +strerror(-ret));
>
> (Cc'ing Markus/Daniel just in case)
>
> I'm wondering if using fprintf() is appropriate, thinking about
> instantiating a machine via libvirt, is this error reported to the
> user?
>
> I first thought about using error_report() instead:
>
> error_report("qemu: can't parse 'vga' parameter: %s",
>  strerror(-ret));

Make that

 error_report("can't parse 'vga' parameter: %s", strerror(-ret));

> But this API is meaningful when used in console/monitor. We can't get
> here from the monitor,

True, but error_report() should be used anyway, because (1) it makes
intent more obvious, and (2) it uses a uniform, featureful error format.

With the proposed fprintf(), we get

qemu: can't parse 'vga' parameter: Numerical result out of range

With error_report():

* we report the *actual* argv[0] instead of "qemu"

* we obey -msg timestamp=on

* if "[PATCHv2 1/2] util/qemu-error: add guest name helper with -msg
  options" gets accepted, we obey -msg guest-name=on, too

* we have a common way to point to the offending command line argument
  or configuration file line (not worth doing here)

Please use error_report().

[...]

Re: [PATCH 5/5] aspeed/i2c: Add trace events

2019-10-16 Thread Jae Hyun Yoo


On 10/16/2019 1:50 AM, Cédric Le Goater wrote:

Signed-off-by: Cédric Le Goater 


Tested-by: Jae Hyun Yoo 

Thanks for the implementation!

-Jae

Re: [PATCH v3 10/10] vfio: unplug failover primary device before migration

2019-10-16 Thread Jens Freimann


On Tue, Oct 15, 2019 at 07:52:12PM -0600, Alex Williamson wrote:

On Fri, 11 Oct 2019 13:20:15 +0200
Jens Freimann  wrote:


As usual block all vfio-pci devices from being migrated, but make an
exception for failover primary devices. This is achieved by setting
unmigratable to 0 but also add a migration blocker for all vfio-pci
devices except failover primary devices. These will be unplugged before
migration happens by the migration handler of the corresponding
virtio-net standby device.

Signed-off-by: Jens Freimann 
---
 hw/vfio/pci.c | 35 ++-
 hw/vfio/pci.h |  2 ++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c5e6fe61cb..64cf8e07d9 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -40,6 +40,9 @@
 #include "pci.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "migration/blocker.h"
+#include "qemu/option.h"
+#include "qemu/option_int.h"

 #define TYPE_VFIO_PCI "vfio-pci"
 #define PCI_VFIO(obj)OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
@@ -2698,6 +2701,12 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice 
*vdev)
 vdev->req_enabled = false;
 }

+static int has_net_failover_arg(void *opaque, const char *name,
+   const char *value, Error **errp)
+{
+return (strcmp(name, "net_failover_pair_id") == 0);
+}
+
 static void vfio_realize(PCIDevice *pdev, Error **errp)
 {
 VFIOPCIDevice *vdev = PCI_VFIO(pdev);
@@ -2710,6 +2719,20 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 int groupid;
 int i, ret;
 bool is_mdev;
+uint16_t class_id;
+
+if (qemu_opt_foreach(pdev->qdev.opts, has_net_failover_arg,
+ (void *) pdev->qdev.opts, &err) == 0) {


Why do we need a qemu_opt_foreach here versus testing
vdev->net_failover_pair_id as you do below or similar to how we test
sysfsdev immediately below this chunk?


We don't need it, I will change it and move it to where we check for
the PCI class.



+error_setg(&vdev->migration_blocker,
+"VFIO device doesn't support migration");
+ret = migrate_add_blocker(vdev->migration_blocker, &err);


Where's the migrate_del_blocker()/error_free() for any other realize
error or device removal?


+if (err) {
+error_propagate(errp, err);
+error_free(vdev->migration_blocker);
+}


As Connie noted, unclear if this aborts or continues without a
migration blocker, which would be bad.


It aborts in my test. PCI realize propagates it further and eventually
it leads to aborting qemu.

It looks like this now:

if (!pdev->net_failover_pair_id) {
 error_setg(&vdev->migration_blocker,
 "VFIO device doesn't support migration");
 ret = migrate_add_blocker(vdev->migration_blocker, &err);
 if (err) {
 error_propagate(errp, err);
 } else {
 error_propagate(errp, vdev->migration_blocker);
 }
 goto error;
 } else {
 pdev->qdev.allow_unplug_during_migration = true;
 }


+} else {
+pdev->qdev.allow_unplug_during_migration = true;
+}

 if (!vdev->vbasedev.sysfsdev) {
 if (!(~vdev->host.domain || ~vdev->host.bus ||
@@ -2812,6 +2835,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 goto error;
 }

+if (vdev->net_failover_pair_id != NULL) {
+class_id = pci_get_word(pdev->config + PCI_CLASS_DEVICE);
+if (class_id != PCI_CLASS_NETWORK_ETHERNET) {
+error_setg(errp, "failover device is not an Ethernet device");
+goto error;
+}
+}


Not clear to me why we do this separate from setting up the migration
blocker or why we use a different mechanism to test for the property.


I'm moving this check to hw/pci/pci.c as you suggested.


+
 /* vfio emulates a lot for us, but some bits need extra love */
 vdev->emulated_config_bits = g_malloc0(vdev->config_size);

@@ -3110,6 +3141,8 @@ static Property vfio_pci_dev_properties[] = {
 display, ON_OFF_AUTO_OFF),
 DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0),
 DEFINE_PROP_UINT32("yres", VFIOPCIDevice, display_yres, 0),
+DEFINE_PROP_STRING("net_failover_pair_id", VFIOPCIDevice,
+net_failover_pair_id),


Should this and the Ethernet class test be done in PCIDevice?  The
migration aspect is the only thing unique to vfio since we don't
otherwise support it, right?  For instance, I should be able to
setup an emulated NIC with this failover pair id too, right?  Thanks,


Yes, we can do it in PCIDevice. Using it with an emulated device.
It wouldn't make sense for production but could make sense for
testing purposes.

Thanks for the review!

regards,
Jens

Re: [PATCH 4/5] aspeed/i2c: Add support for DMA transfers

2019-10-16 Thread Jae Hyun Yoo


On 10/16/2019 1:50 AM, Cédric Le Goater wrote:

The I2C controller of the Aspeed AST2500 and AST2600 SoCs supports DMA
transfers to and from DRAM.

A pair of registers defines the buffer address and the length of the
DMA transfer. The address should be aligned on 4 bytes and the maximum
length should not exceed 4K. The receive or transmit DMA transfer can
then be initiated with specific bits in the Command/Status register of
the controller.

Signed-off-by: Cédric Le Goater 


Tested-by: Jae Hyun Yoo

Re: [PATCH 2/5] aspeed/i2c: Check SRAM enablement on A2500

2019-10-16 Thread Jae Hyun Yoo


On 10/16/2019 1:50 AM, Cédric Le Goater wrote:

The SRAM must be enabled before using the Buffer Pool mode or the DMA
mode. This is not required on other SoCs.

Signed-off-by: Cédric Le Goater 


Tested-by: Jae Hyun Yoo

Re: [PATCH 1/5] aspeed/i2c: Add support for pool buffer transfers

2019-10-16 Thread Jae Hyun Yoo


On 10/16/2019 1:50 AM, Cédric Le Goater wrote:

The Aspeed I2C controller can operate in different transfer modes :

   - Byte Buffer mode, using a dedicated register to transfer a
 byte. This is what the model supports today.

   - Pool Buffer mode, using an internal SRAM to transfer multiple
 bytes in the same command sequence.

Each SoC has different SRAM characteristics. On the AST2400, 2048
bytes of SRAM are available at offset 0x800 of the controller AHB
window. The pool buffer can be configured from 1 to 256 bytes per bus.

On the AST2500, the SRAM is at offset 0x200 and the pool buffer is of
16 bytes per bus.

On the AST2600, the SRAM is at offset 0xC00 and the pool buffer is of
32 bytes per bus. It can be splitted in two for TX and RX but the
current model does not add support for it as it it unused by known
drivers.

Signed-off-by: Cédric Le Goater 


Tested-by: Jae Hyun Yoo

Re: [PATCH 3/5] aspeed: Add a DRAM memory region at the SoC level

2019-10-16 Thread Jae Hyun Yoo


On 10/16/2019 1:50 AM, Cédric Le Goater wrote:

Currently, we link the DRAM memory region to the FMC model (for DMAs)
through a property alias at the SoC level. The I2C model will need a
similar region for DMA support, add a DRAM region property at the SoC
level for both model to use.

Signed-off-by: Cédric Le Goater 


Tested-by: Jae Hyun Yoo

Re: [PATCH v8 0/8] Add Qemu to SeaBIOS LCHS interface

2019-10-16 Thread no-reply

Patchew URL: https://patchew.org/QEMU/20191016164145.115898-1-sam...@google.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PATCH v8 0/8] Add Qemu to SeaBIOS LCHS interface
Type: series
Message-id: 20191016164145.115898-1-sam...@google.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Switched to a new branch 'test'
c45edbb bootdevice: FW_CFG interface for LCHS values
729ce76 bootdevice: Refactor get_boot_devices_list
46853bd bootdevice: Gather LCHS from all relevant devices
4f9e597 scsi: Propagate unrealize() callback to scsi-hd
50179a7 bootdevice: Add interface to gather LCHS
968cf33 block: Support providing LCHS from user
ccb1747 hd-geo-test: Add tests for lchs override
538dbd3 block: Refactor macros - fix tabbing

=== OUTPUT BEGIN ===
1/8 Checking commit 538dbd328a6b (block: Refactor macros - fix tabbing)
ERROR: Macros with complex values should be enclosed in parenthesis
#57: FILE: include/hw/block/block.h:65:
+#define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf)  \
+DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0),  \
+DEFINE_PROP_UINT32("heads", _state, _conf.heads, 0),\
 DEFINE_PROP_UINT32("secs", _state, _conf.secs, 0)

total: 1 errors, 0 warnings, 37 lines checked

Patch 1/8 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

2/8 Checking commit ccb1747f11c5 (hd-geo-test: Add tests for lchs override)
WARNING: Block comments use a leading /* on a separate line
#606: FILE: tests/hd-geo-test.c:965:
+   "skipping hd-geo/override/* tests");

total: 0 errors, 1 warnings, 578 lines checked

Patch 2/8 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
3/8 Checking commit 968cf330ecbc (block: Support providing LCHS from user)
4/8 Checking commit 50179a7c4473 (bootdevice: Add interface to gather LCHS)
5/8 Checking commit 4f9e597a32b8 (scsi: Propagate unrealize() callback to 
scsi-hd)
6/8 Checking commit 46853bd0b7bf (bootdevice: Gather LCHS from all relevant 
devices)
7/8 Checking commit 729ce765a499 (bootdevice: Refactor get_boot_devices_list)
8/8 Checking commit c45edbb64fde (bootdevice: FW_CFG interface for LCHS values)
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20191016164145.115898-1-sam...@google.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH v10 14/15] hw/i386: Introduce the microvm machine type

2019-10-16 Thread Marc-André Lureau

Hi

On Wed, Oct 16, 2019 at 12:24 PM Sergio Lopez  wrote:
>
> microvm is a machine type inspired by Firecracker and constructed
> after its machine model.
>
> It's a minimalist machine type without PCI nor ACPI support, designed
> for short-lived guests. microvm also establishes a baseline for
> benchmarking and optimizing both QEMU and guest operating systems,
> since it is optimized for both boot time and footprint.
>
> Signed-off-by: Sergio Lopez 
> Reviewed-by: Michael S. Tsirkin 
> ---
>  default-configs/i386-softmmu.mak |   1 +
>  include/hw/i386/microvm.h|  83 +
>  hw/i386/microvm.c| 572 +++
>  hw/i386/Kconfig  |  10 +
>  hw/i386/Makefile.objs|   1 +
>  5 files changed, 667 insertions(+)
>  create mode 100644 include/hw/i386/microvm.h
>  create mode 100644 hw/i386/microvm.c
>
> diff --git a/default-configs/i386-softmmu.mak 
> b/default-configs/i386-softmmu.mak
> index 4229900f57..4cc64dafa2 100644
> --- a/default-configs/i386-softmmu.mak
> +++ b/default-configs/i386-softmmu.mak
> @@ -28,3 +28,4 @@
>  CONFIG_ISAPC=y
>  CONFIG_I440FX=y
>  CONFIG_Q35=y
> +CONFIG_MICROVM=y
> diff --git a/include/hw/i386/microvm.h b/include/hw/i386/microvm.h
> new file mode 100644
> index 00..54cf599f4e
> --- /dev/null
> +++ b/include/hw/i386/microvm.h
> @@ -0,0 +1,83 @@
> +/*
> + * Copyright (c) 2018 Intel Corporation
> + * Copyright (c) 2019 Red Hat, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along 
> with
> + * this program.  If not, see .
> + */
> +
> +#ifndef HW_I386_MICROVM_H
> +#define HW_I386_MICROVM_H
> +
> +#include "qemu-common.h"
> +#include "exec/hwaddr.h"
> +#include "qemu/notify.h"
> +
> +#include "hw/boards.h"
> +#include "hw/i386/x86.h"
> +
> +/* microvm memory layout */
> +#define PVH_START_INFO0x6000
> +#define MEMMAP_START  0x7000
> +#define MODLIST_START 0x7800
> +#define BOOT_STACK_POINTER0x8ff0
> +#define PML4_START0x9000
> +#define PDPTE_START   0xa000
> +#define PDE_START 0xb000
> +#define KERNEL_CMDLINE_START  0x2
> +#define EBDA_START0x9fc00
> +#define HIMEM_START   0x10

Those define looks outdated, or am I missing something?

> +
> +/* Platform virtio definitions */
> +#define VIRTIO_MMIO_BASE  0xc000
> +#define VIRTIO_IRQ_BASE   5
> +#define VIRTIO_NUM_TRANSPORTS 8
> +#define VIRTIO_CMDLINE_MAXLEN 64
> +
> +/* Machine type options */
> +#define MICROVM_MACHINE_PIT "pit"
> +#define MICROVM_MACHINE_PIC "pic"
> +#define MICROVM_MACHINE_RTC "rtc"
> +#define MICROVM_MACHINE_ISA_SERIAL  "isa-serial"
> +#define MICROVM_MACHINE_OPTION_ROMS "x-option-roms"
> +#define MICROVM_MACHINE_AUTO_KERNEL_CMDLINE "auto-kernel-cmdline"
> +
> +typedef struct {
> +X86MachineClass parent;
> +HotplugHandler *(*orig_hotplug_handler)(MachineState *machine,
> +   DeviceState *dev);
> +} MicrovmMachineClass;
> +
> +typedef struct {
> +X86MachineState parent;
> +
> +/* Machine type options */
> +OnOffAuto pic;
> +OnOffAuto pit;
> +OnOffAuto rtc;
> +bool isa_serial;
> +bool option_roms;
> +bool auto_kernel_cmdline;
> +
> +/* Machine state */
> +bool kernel_cmdline_fixed;
> +} MicrovmMachineState;
> +
> +#define TYPE_MICROVM_MACHINE   MACHINE_TYPE_NAME("microvm")
> +#define MICROVM_MACHINE(obj) \
> +OBJECT_CHECK(MicrovmMachineState, (obj), TYPE_MICROVM_MACHINE)
> +#define MICROVM_MACHINE_GET_CLASS(obj) \
> +OBJECT_GET_CLASS(MicrovmMachineClass, obj, TYPE_MICROVM_MACHINE)
> +#define MICROVM_MACHINE_CLASS(class) \
> +OBJECT_CLASS_CHECK(MicrovmMachineClass, class, TYPE_MICROVM_MACHINE)
> +
> +#endif
> diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
> new file mode 100644
> index 00..20d2189ea8
> --- /dev/null
> +++ b/hw/i386/microvm.c
> @@ -0,0 +1,572 @@
> +/*
> + * Copyright (c) 2018 Intel Corporation
> + * Copyright (c) 2019 Red Hat, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2 or later, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

[PATCH 08/10] image-fuzzer: Encode file name and file format to bytes

2019-10-16 Thread Eduardo Habkost

Callers of create_image() will pass strings as arguments, but the
Image class will expect bytes objects to be provided.  Encode
them inside create_image().

Signed-off-by: Eduardo Habkost 
---
 tests/image-fuzzer/qcow2/layout.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/image-fuzzer/qcow2/layout.py 
b/tests/image-fuzzer/qcow2/layout.py
index a0fd53c7ad..01bff4d05e 100644
--- a/tests/image-fuzzer/qcow2/layout.py
+++ b/tests/image-fuzzer/qcow2/layout.py
@@ -602,8 +602,8 @@ class Image(object):
 def create_image(test_img_path, backing_file_name=None, backing_file_fmt=None,
  fields_to_fuzz=None):
 """Create a fuzzed image and write it to the specified file."""
-image = Image(backing_file_name)
-image.set_backing_file_format(backing_file_fmt)
+image = Image(backing_file_name.encode())
+image.set_backing_file_format(backing_file_fmt.encode())
 image.create_feature_name_table()
 image.set_end_of_extension_area()
 image.create_l_structures()
-- 
2.21.0

[PATCH 07/10] image-fuzzer: Use bytes constant for field values

2019-10-16 Thread Eduardo Habkost

Field values are supposed to be bytes objects, not unicode
strings.  Change two constants that were declared as strings.

Signed-off-by: Eduardo Habkost 
---
 tests/image-fuzzer/qcow2/layout.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/image-fuzzer/qcow2/layout.py 
b/tests/image-fuzzer/qcow2/layout.py
index 0adcbd448d..a0fd53c7ad 100644
--- a/tests/image-fuzzer/qcow2/layout.py
+++ b/tests/image-fuzzer/qcow2/layout.py
@@ -122,7 +122,7 @@ class Image(object):
 def create_header(self, cluster_bits, backing_file_name=None):
 """Generate a random valid header."""
 meta_header = [
-['>4s', 0, "QFI\xfb", 'magic'],
+['>4s', 0, b"QFI\xfb", 'magic'],
 ['>I', 4, random.randint(2, 3), 'version'],
 ['>Q', 8, 0, 'backing_file_offset'],
 ['>I', 16, 0, 'backing_file_size'],
@@ -231,7 +231,7 @@ class Image(object):
 feature_tables = []
 feature_ids = []
 inner_offset = self.ext_offset + ext_header_len
-feat_name = 'some cool feature'
+feat_name = b'some cool feature'
 while len(feature_tables) < num_fnt_entries * 3:
 feat_type, feat_bit = gen_feat_ids()
 # Remove duplicates
-- 
2.21.0

[PATCH 06/10] image-fuzzer: Return bytes objects on string fuzzing functions

2019-10-16 Thread Eduardo Habkost

No caller of fuzzer functions is interested in unicode string values,
so replace them with bytes sequences.

Signed-off-by: Eduardo Habkost 
---
 tests/image-fuzzer/qcow2/fuzz.py | 42 
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/tests/image-fuzzer/qcow2/fuzz.py b/tests/image-fuzzer/qcow2/fuzz.py
index 154dc06cc0..c58bf11005 100644
--- a/tests/image-fuzzer/qcow2/fuzz.py
+++ b/tests/image-fuzzer/qcow2/fuzz.py
@@ -36,11 +36,11 @@ UINT32_V = [0, 0x100, 0x1000, 0x1, 0x10, UINT32//4, 
UINT32//2 - 1,
 UINT64_V = UINT32_V + [0x100, 0x1000, 0x1, UINT64//4,
UINT64//2 - 1, UINT64//2, UINT64//2 + 1, UINT64 - 1,
UINT64]
-STRING_V = ['%s%p%x%d', '.1024d', '%.2049d', '%p%p%p%p', '%x%x%x%x',
-'%d%d%d%d', '%s%s%s%s', '%999s', '%08x', '%%20d', '%%20n',
-'%%20x', '%%20s', '%s%s%s%s%s%s%s%s%s%s', '%p%p%p%p%p%p%p%p%p%p',
-'%#0123456x%08x%x%s%p%d%n%o%u%c%h%l%q%j%z%Z%t%i%e%g%f%a%C%S%08x%%',
-'%s x 129', '%x x 257']
+BYTES_V = [b'%s%p%x%d', b'.1024d', b'%.2049d', b'%p%p%p%p', b'%x%x%x%x',
+   b'%d%d%d%d', b'%s%s%s%s', b'%999s', b'%08x', b'%%20d', 
b'%%20n',
+   b'%%20x', b'%%20s', b'%s%s%s%s%s%s%s%s%s%s', 
b'%p%p%p%p%p%p%p%p%p%p',
+   b'%#0123456x%08x%x%s%p%d%n%o%u%c%h%l%q%j%z%Z%t%i%e%g%f%a%C%S%08x%%',
+   b'%s x 129', b'%x x 257']
 
 
 def random_from_intervals(intervals):
@@ -76,12 +76,12 @@ def random_bits(bit_ranges):
 return val
 
 
-def truncate_string(strings, length):
-"""Return strings truncated to specified length."""
-if type(strings) == list:
-return [s[:length] for s in strings]
+def truncate_bytes(sequences, length):
+"""Return sequences truncated to specified length."""
+if type(sequences) == list:
+return [s[:length] for s in sequences]
 else:
-return strings[:length]
+return sequences[:length]
 
 
 def validator(current, pick, choices):
@@ -110,12 +110,12 @@ def bit_validator(current, bit_ranges):
 return validator(current, random_bits, bit_ranges)
 
 
-def string_validator(current, strings):
-"""Return a random string value from the list not equal to the current.
+def bytes_validator(current, sequences):
+"""Return a random bytes value from the list not equal to the current.
 
 This function is useful for selection from valid values except current one.
 """
-return validator(current, random.choice, strings)
+return validator(current, random.choice, sequences)
 
 
 def selector(current, constraints, validate=int_validator):
@@ -283,9 +283,9 @@ def header_length(current):
 def bf_name(current):
 """Fuzz the backing file name."""
 constraints = [
-truncate_string(STRING_V, len(current))
+truncate_bytes(BYTES_V, len(current))
 ]
-return selector(current, constraints, string_validator)
+return selector(current, constraints, bytes_validator)
 
 
 def ext_magic(current):
@@ -303,10 +303,10 @@ def ext_length(current):
 def bf_format(current):
 """Fuzz backing file format in the corresponding header extension."""
 constraints = [
-truncate_string(STRING_V, len(current)),
-truncate_string(STRING_V, (len(current) + 7) & ~7)  # Fuzz padding
+truncate_bytes(BYTES_V, len(current)),
+truncate_bytes(BYTES_V, (len(current) + 7) & ~7)  # Fuzz padding
 ]
-return selector(current, constraints, string_validator)
+return selector(current, constraints, bytes_validator)
 
 
 def feature_type(current):
@@ -324,10 +324,10 @@ def feature_bit_number(current):
 def feature_name(current):
 """Fuzz feature name field of a feature name table header extension."""
 constraints = [
-truncate_string(STRING_V, len(current)),
-truncate_string(STRING_V, 46)  # Fuzz padding (field length = 46)
+truncate_bytes(BYTES_V, len(current)),
+truncate_bytes(BYTES_V, 46)  # Fuzz padding (field length = 46)
 ]
-return selector(current, constraints, string_validator)
+return selector(current, constraints, bytes_validator)
 
 
 def l1_entry(current):
-- 
2.21.0

[PATCH 05/10] image-fuzzer: Use %r for all fiels at Field.repr()

2019-10-16 Thread Eduardo Habkost

This makes the formatting code simpler, and safer if we change
the type of self.value from str to bytes.

Signed-off-by: Eduardo Habkost 
---
 tests/image-fuzzer/qcow2/layout.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/image-fuzzer/qcow2/layout.py 
b/tests/image-fuzzer/qcow2/layout.py
index 6501c9fd4b..0adcbd448d 100644
--- a/tests/image-fuzzer/qcow2/layout.py
+++ b/tests/image-fuzzer/qcow2/layout.py
@@ -53,8 +53,8 @@ class Field(object):
 return iter([self.fmt, self.offset, self.value, self.name])
 
 def __repr__(self):
-return "Field(fmt='%s', offset=%d, value=%s, name=%s)" % \
-(self.fmt, self.offset, str(self.value), self.name)
+return "Field(fmt=%r, offset=%r, value=%r, name=%r)" % \
+(self.fmt, self.offset, self.value, self.name)
 
 
 class FieldsList(object):
-- 
2.21.0

[PATCH 04/10] image-fuzzer: Use io.StringIO

2019-10-16 Thread Eduardo Habkost

StringIO.StringIO is not available on Python 3, but io.StringIO
is available on both Python 2 and 3.  io.StringIO is slightly
different from the Python 2 StringIO module, though, so we need
bytes coming from subprocess.Popen() to be explicitly decoded.

Signed-off-by: Eduardo Habkost 
---
 tests/image-fuzzer/runner.py | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tests/image-fuzzer/runner.py b/tests/image-fuzzer/runner.py
index 95d84f38f3..94cab5bd93 100755
--- a/tests/image-fuzzer/runner.py
+++ b/tests/image-fuzzer/runner.py
@@ -28,7 +28,7 @@ import shutil
 from itertools import count
 import time
 import getopt
-import StringIO
+import io
 import resource
 
 try:
@@ -84,8 +84,12 @@ def run_app(fd, q_args):
 try:
 out, err = process.communicate()
 signal.alarm(0)
-fd.write(out)
-fd.write(err)
+# fd is a text file, so we need to decode the process output before
+# writing to it.
+# We could be simply using the `errors` parameter of 
subprocess.Popen(),
+# but this will be possible only after migrating to Python 3
+fd.write(out.decode(errors='replace'))
+fd.write(err.decode(errors='replace'))
 fd.flush()
 return process.returncode
 
@@ -183,7 +187,7 @@ class TestEnv(object):
MAX_BACKING_FILE_SIZE) * (1 << 20)
 cmd = self.qemu_img + ['create', '-f', backing_file_fmt,
backing_file_name, str(backing_file_size)]
-temp_log = StringIO.StringIO()
+temp_log = io.StringIO()
 retcode = run_app(temp_log, cmd)
 if retcode == 0:
 temp_log.close()
@@ -240,7 +244,7 @@ class TestEnv(object):
"Backing file: %s\n" \
% (self.seed, " ".join(current_cmd),
   self.current_dir, backing_file_name)
-temp_log = StringIO.StringIO()
+temp_log = io.StringIO()
 try:
 retcode = run_app(temp_log, current_cmd)
 except OSError as e:
-- 
2.21.0

[PATCH 09/10] image-fuzzer: Run using python3

2019-10-16 Thread Eduardo Habkost

image-fuzzer is now supposed to be ready to run using Python 3.
Remove the __future__ imports and change the interpreter line to
"#!/usr/bin/env python3".

Signed-off-by: Eduardo Habkost 
---
 tests/image-fuzzer/qcow2/__init__.py | 1 -
 tests/image-fuzzer/qcow2/layout.py   | 1 -
 tests/image-fuzzer/runner.py | 3 +--
 3 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/image-fuzzer/qcow2/__init__.py 
b/tests/image-fuzzer/qcow2/__init__.py
index 09ef59821b..ed3af5da86 100644
--- a/tests/image-fuzzer/qcow2/__init__.py
+++ b/tests/image-fuzzer/qcow2/__init__.py
@@ -1,2 +1 @@
-from __future__ import absolute_import
 from .layout import create_image
diff --git a/tests/image-fuzzer/qcow2/layout.py 
b/tests/image-fuzzer/qcow2/layout.py
index 01bff4d05e..57ebe86e9a 100644
--- a/tests/image-fuzzer/qcow2/layout.py
+++ b/tests/image-fuzzer/qcow2/layout.py
@@ -16,7 +16,6 @@
 # along with this program.  If not, see .
 #
 
-from __future__ import absolute_import
 import random
 import struct
 from . import fuzz
diff --git a/tests/image-fuzzer/runner.py b/tests/image-fuzzer/runner.py
index 94cab5bd93..0793234815 100755
--- a/tests/image-fuzzer/runner.py
+++ b/tests/image-fuzzer/runner.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Tool for running fuzz tests
 #
@@ -18,7 +18,6 @@
 # along with this program.  If not, see .
 #
 
-from __future__ import print_function
 import sys
 import os
 import signal
-- 
2.21.0

[PATCH 10/10] image-fuzzer: Use errors parameter of subprocess.Popen()

2019-10-16 Thread Eduardo Habkost

Instead of manually encoding stderr and stdout output, use
`errors` parameter of subprocess.Popen().  This will make
process.communicate() return unicode strings instead of bytes
objects.

Signed-off-by: Eduardo Habkost 
---
 tests/image-fuzzer/runner.py | 11 ---
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/tests/image-fuzzer/runner.py b/tests/image-fuzzer/runner.py
index 0793234815..4ba5c79e13 100755
--- a/tests/image-fuzzer/runner.py
+++ b/tests/image-fuzzer/runner.py
@@ -79,16 +79,13 @@ def run_app(fd, q_args):
 devnull = open('/dev/null', 'r+')
 process = subprocess.Popen(q_args, stdin=devnull,
stdout=subprocess.PIPE,
-   stderr=subprocess.PIPE)
+   stderr=subprocess.PIPE,
+   errors='replace')
 try:
 out, err = process.communicate()
 signal.alarm(0)
-# fd is a text file, so we need to decode the process output before
-# writing to it.
-# We could be simply using the `errors` parameter of 
subprocess.Popen(),
-# but this will be possible only after migrating to Python 3
-fd.write(out.decode(errors='replace'))
-fd.write(err.decode(errors='replace'))
+fd.write(out)
+fd.write(err)
 fd.flush()
 return process.returncode
 
-- 
2.21.0

[PATCH 03/10] image-fuzzer: Explicitly use integer division operator

2019-10-16 Thread Eduardo Habkost

Most of the division expressions in image-fuzzer assume integer
division.  Use the // operator to keep the same behavior when we
move to Python 3.

Signed-off-by: Eduardo Habkost 
---
 tests/image-fuzzer/qcow2/fuzz.py   | 12 -
 tests/image-fuzzer/qcow2/layout.py | 40 +++---
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/tests/image-fuzzer/qcow2/fuzz.py b/tests/image-fuzzer/qcow2/fuzz.py
index abc4f0635d..154dc06cc0 100644
--- a/tests/image-fuzzer/qcow2/fuzz.py
+++ b/tests/image-fuzzer/qcow2/fuzz.py
@@ -27,14 +27,14 @@ UINT64 = 0x
 UINT32_M = 31
 UINT64_M = 63
 # Fuzz vectors
-UINT8_V = [0, 0x10, UINT8/4, UINT8/2 - 1, UINT8/2, UINT8/2 + 1, UINT8 - 1,
+UINT8_V = [0, 0x10, UINT8//4, UINT8//2 - 1, UINT8//2, UINT8//2 + 1, UINT8 - 1,
UINT8]
-UINT16_V = [0, 0x100, 0x1000, UINT16/4, UINT16/2 - 1, UINT16/2, UINT16/2 + 1,
+UINT16_V = [0, 0x100, 0x1000, UINT16//4, UINT16//2 - 1, UINT16//2, UINT16//2 + 
1,
 UINT16 - 1, UINT16]
-UINT32_V = [0, 0x100, 0x1000, 0x1, 0x10, UINT32/4, UINT32/2 - 1,
-UINT32/2, UINT32/2 + 1, UINT32 - 1, UINT32]
-UINT64_V = UINT32_V + [0x100, 0x1000, 0x1, UINT64/4,
-   UINT64/2 - 1, UINT64/2, UINT64/2 + 1, UINT64 - 1,
+UINT32_V = [0, 0x100, 0x1000, 0x1, 0x10, UINT32//4, UINT32//2 - 1,
+UINT32//2, UINT32//2 + 1, UINT32 - 1, UINT32]
+UINT64_V = UINT32_V + [0x100, 0x1000, 0x1, UINT64//4,
+   UINT64//2 - 1, UINT64//2, UINT64//2 + 1, UINT64 - 1,
UINT64]
 STRING_V = ['%s%p%x%d', '.1024d', '%.2049d', '%p%p%p%p', '%x%x%x%x',
 '%d%d%d%d', '%s%s%s%s', '%999s', '%08x', '%%20d', '%%20n',
diff --git a/tests/image-fuzzer/qcow2/layout.py 
b/tests/image-fuzzer/qcow2/layout.py
index fe273d4143..6501c9fd4b 100644
--- a/tests/image-fuzzer/qcow2/layout.py
+++ b/tests/image-fuzzer/qcow2/layout.py
@@ -253,7 +253,7 @@ class Image(object):
 ['>I', self.ext_offset, 0x6803f857, 'ext_magic'],
 # One feature table contains 3 fields and takes 48 bytes
 ['>I', self.ext_offset + UINT32_S,
- len(feature_tables) / 3 * 48, 'ext_length']
+ len(feature_tables) // 3 * 48, 'ext_length']
 ] + feature_tables)
 self.ext_offset = inner_offset
 
@@ -271,7 +271,7 @@ class Image(object):
 def create_l2_entry(host, guest, l2_cluster):
 """Generate one L2 entry."""
 offset = l2_cluster * self.cluster_size
-l2_size = self.cluster_size / UINT64_S
+l2_size = self.cluster_size // UINT64_S
 entry_offset = offset + UINT64_S * (guest % l2_size)
 cluster_descriptor = host * self.cluster_size
 if not self.header['version'][0].value == 2:
@@ -283,8 +283,8 @@ class Image(object):
 
 def create_l1_entry(l2_cluster, l1_offset, guest):
 """Generate one L1 entry."""
-l2_size = self.cluster_size / UINT64_S
-entry_offset = l1_offset + UINT64_S * (guest / l2_size)
+l2_size = self.cluster_size // UINT64_S
+entry_offset = l1_offset + UINT64_S * (guest // l2_size)
 # While snapshots are not supported bit #63 = 1
 entry_val = (1 << 63) + l2_cluster * self.cluster_size
 return ['>Q', entry_offset, entry_val, 'l1_entry']
@@ -298,11 +298,11 @@ class Image(object):
 l2 = []
 else:
 meta_data = self._get_metadata()
-guest_clusters = random.sample(range(self.image_size /
+guest_clusters = random.sample(range(self.image_size //
  self.cluster_size),
len(self.data_clusters))
 # Number of entries in a L1/L2 table
-l_size = self.cluster_size / UINT64_S
+l_size = self.cluster_size // UINT64_S
 # Number of clusters necessary for L1 table
 l1_size = int(ceil((max(guest_clusters) + 1) / float(l_size**2)))
 l1_start = self._get_adjacent_clusters(self.data_clusters |
@@ -318,7 +318,7 @@ class Image(object):
 # L2 entries
 l2 = []
 for host, guest in zip(self.data_clusters, guest_clusters):
-l2_id = guest / l_size
+l2_id = guest // l_size
 if l2_id not in l2_ids:
 l2_ids.append(l2_id)
 l2_clusters.append(self._get_adjacent_clusters(
@@ -339,14 +339,14 @@ class Image(object):
 def allocate_rfc_blocks(data, size):
 """Return indices of clusters allocated for refcount blocks."""
 cluster_ids = set()
-diff = block_ids = set([x / size for x in data])
+diff = block_ids = set([x // size for x in data])
 while len(diff) != 0:
 # A

[PATCH 01/10] image-fuzzer: Open image files in binary mode

2019-10-16 Thread Eduardo Habkost

This probably never caused problems because on Linux there's no
actual newline conversion happening, but on Python 3 the
binary/text distinction is stronger and we must explicitly open
the image file in binary mode.

Signed-off-by: Eduardo Habkost 
---
 tests/image-fuzzer/qcow2/layout.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/image-fuzzer/qcow2/layout.py 
b/tests/image-fuzzer/qcow2/layout.py
index 675877da96..c57418fa15 100644
--- a/tests/image-fuzzer/qcow2/layout.py
+++ b/tests/image-fuzzer/qcow2/layout.py
@@ -503,7 +503,7 @@ class Image(object):
 
 def write(self, filename):
 """Write an entire image to the file."""
-image_file = open(filename, 'w')
+image_file = open(filename, 'wb')
 for field in self:
 image_file.seek(field.offset)
 image_file.write(struct.pack(field.fmt, field.value))
-- 
2.21.0

[PATCH 02/10] image-fuzzer: Write bytes instead of string to image file

2019-10-16 Thread Eduardo Habkost

This is necessary for Python 3 compatibility.

Signed-off-by: Eduardo Habkost 
---
 tests/image-fuzzer/qcow2/layout.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/image-fuzzer/qcow2/layout.py 
b/tests/image-fuzzer/qcow2/layout.py
index c57418fa15..fe273d4143 100644
--- a/tests/image-fuzzer/qcow2/layout.py
+++ b/tests/image-fuzzer/qcow2/layout.py
@@ -518,7 +518,7 @@ class Image(object):
 rounded = (size + self.cluster_size - 1) & ~(self.cluster_size - 1)
 if rounded > size:
 image_file.seek(rounded - 1)
-image_file.write("\0")
+image_file.write(b'\x00')
 image_file.close()
 
 @staticmethod
-- 
2.21.0

[PATCH 00/10] image-fuzzer: Port to Python 3

2019-10-16 Thread Eduardo Habkost

This series ports image-fuzzer to Python 3.

Eduardo Habkost (10):
  image-fuzzer: Open image files in binary mode
  image-fuzzer: Write bytes instead of string to image file
  image-fuzzer: Explicitly use integer division operator
  image-fuzzer: Use io.StringIO
  image-fuzzer: Use %r for all fiels at Field.__repr__()
  image-fuzzer: Return bytes objects on string fuzzing functions
  image-fuzzer: Use bytes constant for field values
  image-fuzzer: Encode file name and file format to bytes
  image-fuzzer: Run using python3
  image-fuzzer: Use errors parameter of subprocess.Popen()

 tests/image-fuzzer/qcow2/__init__.py |  1 -
 tests/image-fuzzer/qcow2/fuzz.py | 54 +-
 tests/image-fuzzer/qcow2/layout.py   | 57 ++--
 tests/image-fuzzer/runner.py | 12 +++---
 4 files changed, 61 insertions(+), 63 deletions(-)

-- 
2.21.0

Re: [PATCH v4 0/4] qcow2: advanced compression options

2019-10-16 Thread no-reply

Patchew URL: 
https://patchew.org/QEMU/157124-882302-1-git-send-email-andrey.shinkev...@virtuozzo.com/



Hi,

This series failed the docker-mingw@fedora build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#! /bin/bash
export ARCH=x86_64
make docker-image-fedora V=1 NETWORK=1
time make docker-test-mingw@fedora J=14 NETWORK=1
=== TEST SCRIPT END ===

  CC  block/blkverify.o
  CC  block/blkreplay.o
/tmp/qemu-test/src/block/qcow2.c: In function 
'qcow2_co_pwritev_compressed_part':
/tmp/qemu-test/src/block/qcow2.c:4244:9: error: 'ret' may be used uninitialized 
in this function [-Werror=maybe-uninitialized]
 int ret;
 ^~~
cc1: all warnings being treated as errors
make: *** [/tmp/qemu-test/src/rules.mak:69: block/qcow2.o] Error 1
make: *** Waiting for unfinished jobs
  CC  block/parallels.o
Traceback (most recent call last):
---
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', 
'--label', 'com.qemu.instance.uuid=2e359c4b7b0f403ca14825a9c4b067a8', '-u', 
'1003', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', 
'-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 
'SHOW_ENV=', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', 
'/home/patchew2/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', 
'/var/tmp/patchew-tester-tmp-hjz8elmo/src/docker-src.2019-10-16-15.17.37.16307:/var/tmp/qemu:z,ro',
 'qemu:fedora', '/var/tmp/qemu/run', 'test-mingw']' returned non-zero exit 
status 2.
filter=--filter=label=com.qemu.instance.uuid=2e359c4b7b0f403ca14825a9c4b067a8
make[1]: *** [docker-run] Error 1
make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-hjz8elmo/src'
make: *** [docker-run-test-mingw@fedora] Error 2

real2m12.901s
user0m7.708s


The full log is available at
http://patchew.org/logs/157124-882302-1-git-send-email-andrey.shinkev...@virtuozzo.com/testing.docker-mingw@fedora/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH v4 0/4] qcow2: advanced compression options

2019-10-16 Thread no-reply

Patchew URL: 
https://patchew.org/QEMU/157124-882302-1-git-send-email-andrey.shinkev...@virtuozzo.com/



Hi,

This series failed the docker-quick@centos7 build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#!/bin/bash
make docker-image-centos7 V=1 NETWORK=1
time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1
=== TEST SCRIPT END ===

  CC  block/qed-table.o
  CC  block/qed-cluster.o
/tmp/qemu-test/src/block/qcow2.c: In function 
'qcow2_co_pwritev_compressed_part':
/tmp/qemu-test/src/block/qcow2.c:4244:9: error: 'ret' may be used uninitialized 
in this function [-Werror=maybe-uninitialized]
 int ret;
 ^
cc1: all warnings being treated as errors
make: *** [block/qcow2.o] Error 1
make: *** Waiting for unfinished jobs
Traceback (most recent call last):
  File "./tests/docker/docker.py", line 662, in 
---
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', 
'--label', 'com.qemu.instance.uuid=b109abacd8054efc992d66fa28ca7d8c', '-u', 
'1003', '--security-opt', 'seccomp=unconfined', '--rm', '-e', 'TARGET_LIST=', 
'-e', 'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 
'SHOW_ENV=1', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', 
'/home/patchew2/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', 
'/var/tmp/patchew-tester-tmp-ge9ikvez/src/docker-src.2019-10-16-15.15.08.9832:/var/tmp/qemu:z,ro',
 'qemu:centos7', '/var/tmp/qemu/run', 'test-quick']' returned non-zero exit 
status 2.
filter=--filter=label=com.qemu.instance.uuid=b109abacd8054efc992d66fa28ca7d8c
make[1]: *** [docker-run] Error 1
make[1]: Leaving directory `/var/tmp/patchew-tester-tmp-ge9ikvez/src'
make: *** [docker-run-test-quick@centos7] Error 2

real2m4.543s
user0m8.161s


The full log is available at
http://patchew.org/logs/157124-882302-1-git-send-email-andrey.shinkev...@virtuozzo.com/testing.docker-quick@centos7/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH v1 22/28] target/riscv: Allow specifying MMU stage

2019-10-16 Thread Palmer Dabbelt


On Mon, 07 Oct 2019 11:05:33 PDT (-0700), alistai...@gmail.com wrote:

On Thu, Oct 3, 2019 at 8:53 AM Palmer Dabbelt  wrote:


On Fri, 23 Aug 2019 16:38:47 PDT (-0700), Alistair Francis wrote:
> Signed-off-by: Alistair Francis 
> ---
>  target/riscv/cpu_helper.c | 39 ++-
>  1 file changed, 30 insertions(+), 9 deletions(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 098873c83e..9aa6906acd 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -318,10 +318,19 @@ void riscv_cpu_set_mode(CPURISCVState *env, 
target_ulong newpriv)
>   *
>   * Adapted from Spike's mmu_t::translate and mmu_t::walk
>   *
> + * @env: CPURISCVState
> + * @physical: This will be set to the calculated physical address
> + * @prot: The returned protection attributes
> + * @addr: The virtual address to be translated
> + * @access_type: The type of MMU access
> + * @mmu_idx: Indicates current privilege level
> + * @first_stage: Are we in first stage translation?
> + *   Second stage is used for hypervisor guest translation
>   */
>  static int get_physical_address(CPURISCVState *env, hwaddr *physical,
>  int *prot, target_ulong addr,
> -int access_type, int mmu_idx)
> +int access_type, int mmu_idx,
> +bool first_stage)
>  {
>  /* NOTE: the env->pc value visible here will not be
>   * correct, but the value visible to the exception handler
> @@ -518,13 +527,23 @@ restart:
>  }
>
>  static void raise_mmu_exception(CPURISCVState *env, target_ulong address,
> -MMUAccessType access_type, bool 
pmp_violation)
> +MMUAccessType access_type, bool 
pmp_violation,
> +bool first_stage)
>  {
>  CPUState *cs = env_cpu(env);
> -int page_fault_exceptions =
> -(env->priv_ver >= PRIV_VERSION_1_10_0) &&
> -get_field(env->satp, SATP_MODE) != VM_1_10_MBARE &&
> -!pmp_violation;
> +int page_fault_exceptions;
> +if (first_stage) {
> +page_fault_exceptions =
> +(env->priv_ver >= PRIV_VERSION_1_10_0) &&
> +get_field(env->satp, SATP_MODE) != VM_1_10_MBARE &&
> +!pmp_violation;
> +riscv_cpu_set_force_hs_excep(env, CLEAR_HS_EXCEP);

It might just be email, but the indentation looks wrong here.


Yep, fixed.



> +} else {
> +page_fault_exceptions =
> +get_field(env->hgatp, HGATP_MODE) != VM_1_10_MBARE &&
> +!pmp_violation;
> +riscv_cpu_set_force_hs_excep(env, FORCE_HS_EXCEP);
> +}
>  switch (access_type) {
>  case MMU_INST_FETCH:
>  cs->exception_index = page_fault_exceptions ?
> @@ -551,7 +570,8 @@ hwaddr riscv_cpu_get_phys_page_debug(CPUState *cs, vaddr 
addr)
>  int prot;
>  int mmu_idx = cpu_mmu_index(&cpu->env, false);
>
> -if (get_physical_address(&cpu->env, &phys_addr, &prot, addr, 0, 
mmu_idx)) {
> +if (get_physical_address(&cpu->env, &phys_addr, &prot, addr, 0, mmu_idx,
> + true)) {
>  return -1;
>  }
>  return phys_addr;
> @@ -613,7 +633,8 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
>  qemu_log_mask(CPU_LOG_MMU, "%s ad %" VADDR_PRIx " rw %d mmu_idx %d\n",
>__func__, address, access_type, mmu_idx);
>
> -ret = get_physical_address(env, &pa, &prot, address, access_type, 
mmu_idx);
> +ret = get_physical_address(env, &pa, &prot, address, access_type, 
mmu_idx,
> +   true);
>
>  if (mode == PRV_M && access_type != MMU_INST_FETCH) {
>  if (get_field(*env->mstatus, MSTATUS_MPRV)) {
> @@ -640,7 +661,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
>  } else if (probe) {
>  return false;
>  } else {
> -raise_mmu_exception(env, address, access_type, pmp_violation);
> +raise_mmu_exception(env, address, access_type, pmp_violation, true);
>  riscv_raise_exception(env, cs->exception_index, retaddr);
>  }
>  #else

I don't think it makes sense to split off these two (23 and 24, that add the
argument) out from the implementation.


The goal was just to make it easier to review. If you want them
combined I can easily combine them.


It's making it harder to read on my end :)

Re: [PATCH v3] target/arm/arch_dump: Add SVE notes

2019-10-16 Thread Richard Henderson

On 10/16/19 9:37 AM, Andrew Jones wrote:
> When dumping a guest with dump-guest-memory also dump the SVE
> registers if they are in use.
> 
> Signed-off-by: Andrew Jones 
> ---
> v3:
>   - Pulled sve_bswap64 out of kvm64.c and reused it here
>   - Changed fpsr_offset and sve_size to only align to a
> 16 byte boundary from the note payload offset, not
> from the note head. Doing this makes it consistent
> with the documentation and what gcore does. Testing
> shows that the elf headers and gdb are still happy.
>   - Added blank lines between functions
> 
> 
>  include/elf.h  |   2 +
>  target/arm/arch_dump.c | 124 -
>  target/arm/cpu.h   |  25 +
>  target/arm/kvm64.c |  24 
>  4 files changed, 149 insertions(+), 26 deletions(-)

Reviewed-by: Richard Henderson 


r~

Re: [PATCH] checkpatch: sugguest to use qemu_real_host_page_size instead of getpagesize() or sysconf(_SC_PAGESIZE)

2019-10-16 Thread Stefan Hajnoczi

On Wed, Oct 16, 2019 at 09:24:32AM +0800, Wei Yang wrote:
> Signed-off-by: Wei Yang 
> CC: David Gibson 
> ---
>  scripts/checkpatch.pl | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
> index aa9a354a0e..4b360ed310 100755
> --- a/scripts/checkpatch.pl
> +++ b/scripts/checkpatch.pl
> @@ -2915,6 +2915,12 @@ sub process {
>   if ($line =~ /\bbzero\(/) {
>   ERROR("use memset() instead of bzero()\n" . $herecurr);
>   }
> + if ($line =~ /getpagesize\(\)/) {
> + ERROR("use qemu_real_host_page_size instead of 
> getpagesize()\n" . $herecurr);
> + }
> + if ($line =~ /sysconf\(_SC_PAGESIZE\)/) {
> + ERROR("use qemu_real_host_page_size instead of 
> sysconf(_SC_PAGESIZE)\n" . $herecurr);
> + }
>   my $non_exit_glib_asserts = qr{g_assert_cmpstr|
>   g_assert_cmpint|
>   g_assert_cmpuint|

Just wanted to say thank you for extending checkpatch.pl!  We don't do
it enough but it's the best way to extend QEMU coding style because it's
automated :).

Stefan


signature.asc
Description: PGP signature

Re: [PATCH] virito: Use auto rcu_read macros

2019-10-16 Thread Stefan Hajnoczi

On Mon, Oct 14, 2019 at 06:54:40PM +0100, Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" 
> 
> Use RCU_READ_LOCK_GUARD and WITH_RCU_READ_LOCK_GUARD
> to replace the manual rcu_read_(un)lock calls.
> 
> I think the only change is virtio_load which was missing unlocks
> in error paths; those end up being fatal errors so it's not
> that important anyway.
> 
> Signed-off-by: Dr. David Alan Gilbert 
> ---
>  hw/virtio/virtio.c | 46 --
>  1 file changed, 16 insertions(+), 30 deletions(-)

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

Re: [PATCH 0/5] travis.yml improvements: Update libraries, build with arm64

2019-10-16 Thread Alex Bennée



Thomas Huth  writes:

> Update / add some libraries that we use in the Travis builds, and
> enable compiling on the aarch64 host, too.

Queued 1-3 to testing/next and I'll have a play with 4/5 but probably
won't include them in the final PR unless they seem solid.

>
> Thomas Huth (5):
>   travis.yml: Add libvdeplug-dev to compile-test net/vde.c
>   travis.yml: Use libsdl2 instead of libsdl1.2, and install
> libsdl2-image
>   travis.yml: Use newer version of libgnutls and libpng
>   travis.yml: Fix the ccache lines
>   travis.yml: Compile on arm64, too
>
>  .travis.yml | 21 +++--
>  1 file changed, 15 insertions(+), 6 deletions(-)


--
Alex Bennée

[Bug 1848244] Re: QEMU KVM IGD SandyBridge Passthrough crash

2019-10-16 Thread Anastasios Vacharakis

Solved: I added kvm.ignore_msrs=1 to kernel parameter!

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1848244

Title:
  QEMU KVM IGD SandyBridge Passthrough crash

Status in QEMU:
  New

Bug description:
  I try to passthrough my Intel GPU with this command:

  qemu-system-x86_64 -nodefaults -parallel none -k de -rtc
  base=localtime -serial unix:/run/qemu/win7-serial.sock,server,nowait
  -monitor unix:/run/qemu/win7-monitor.sock,server,nowait -netdev
  user,id=net0 -device virtio-net-pci,netdev=net0,mac=52:54:00:00:00:07
  -device vfio-pci,host=:00:02.0,addr=0x2 -device vfio-
  pci,host=:00:1b.0 -device virtio-keyboard-pci -device virtio-
  mouse-pci -object input-linux,id=kbd1,evdev=/dev/input/by-
  path/pci-:00:1a.0-usb-0:1.2.2:1.2-event-kbd,grab_all=on,repeat=on
  -object input-linux,id=mouse1,evdev=/dev/input/by-
  path/pci-:00:1a.0-usb-0:1.2.2:1.2-event-mouse -enable-kvm -cpu
  host -smp 4,sockets=1,cores=4,threads=1 -vga none -display none -m 2g
  -device virtio-blk-pci,drive=boot,bootindex=1 -drive
  file=/opt/vm/qcow2/win7.qcow2,format=qcow2,if=none,id=boot

  This ONLY works if i remove "-enable-kvm" else the windows (7 and 10)
  boot crashes in bluescreen "stop 0x003b" (probably while loading
  the intel gpu driver (intel graphics 3000).

  The system is an older ThinkPad T420 with Intel(R) Core(TM) i5-2520M
  CPU @ 2.50GHz.

  CMDLINE: BOOT_IMAGE=/vmlinuz-linux root=LABEL=root rw ipv6.disable=0
  net.ifnames=0 intel_iommu=on iommu=pt video=LVDS-1:d

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1848244/+subscriptions

[Bug 1846427] Re: 4.1.0: qcow2 corruption on savevm/quit/loadvm cycle

2019-10-16 Thread psyhomb

I can confirm exactly the same issue on Arch linux running qemu-4.1.0.

After downgrading from 4.1.0 => 4.0.0 everything is running normal
again, no corruption detected and all qcow2 images stays healthy.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1846427

Title:
  4.1.0: qcow2 corruption on savevm/quit/loadvm cycle

Status in QEMU:
  New

Bug description:
  I'm seeing massive corruption of qcow2 images with qemu 4.1.0 and git
  master as of 7f21573c822805a8e6be379d9bcf3ad9effef3dc after a few
  savevm/quit/loadvm cycles. I've narrowed it down to the following
  reproducer (further notes below):

  # qemu-img check debian.qcow2
  No errors were found on the image.
  251601/327680 = 76.78% allocated, 1.63% fragmented, 0.00% compressed clusters
  Image end offset: 18340446208
  # bin/qemu/bin/qemu-system-x86_64 -machine pc-q35-4.0.1,accel=kvm -m 4096 
-chardev stdio,id=charmonitor -mon chardev=charmonitor -drive 
file=debian.qcow2,id=d -S
  qemu-system-x86_64: warning: dbind: Couldn't register with accessibility bus: 
Did not receive a reply. Possible causes include: the remote application did 
not send a reply, the message bus security policy blocked the reply, the reply 
timeout expired, or the network connection was broken.
  QEMU 4.1.50 monitor - type 'help' for more information
  (qemu) loadvm foo
  (qemu) c
  (qemu) qcow2_free_clusters failed: Invalid argument
  qcow2_free_clusters failed: Invalid argument
  qcow2_free_clusters failed: Invalid argument
  qcow2_free_clusters failed: Invalid argument
  quit
  [m@nargothrond:~] qemu-img check debian.qcow2
  Leaked cluster 85179 refcount=2 reference=1
  Leaked cluster 85180 refcount=2 reference=1
  ERROR cluster 266150 refcount=0 reference=2
  [...]
  ERROR OFLAG_COPIED data cluster: l2_entry=42284 refcount=1

  9493 errors were found on the image.
  Data may be corrupted, or further writes to the image may corrupt it.

  2 leaked clusters were found on the image.
  This means waste of disk space, but no harm to data.
  259266/327680 = 79.12% allocated, 1.67% fragmented, 0.00% compressed clusters
  Image end offset: 18340446208

  This is on a x86_64 Linux 5.3.1 Gentoo host with qemu-system-x86_64
  and accel=kvm. The compiler is gcc-9.2.0 with the rest of the system
  similarly current.

  Reproduced with qemu-4.1.0 from distribution package as well as
  vanilla git checkout of tag v4.1.0 and commit
  7f21573c822805a8e6be379d9bcf3ad9effef3dc (today's master). Does not
  happen with qemu compiled from vanilla checkout of tag v4.0.0. Build
  sequence:

  ./configure --prefix=$HOME/bin/qemu-bisect --target-list=x86_64-softmmu 
--disable-werror --disable-docs
  [...]
  CFLAGS-O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -g
  [...] (can provide full configure output if helpful)
  make -j8 install

  The kind of guest OS does not matter: seen with Debian testing 64bit,
  Windows 7 x86/x64 BIOS and Windows 7 x64 EFI.

  The virtual storage controller does not seem to matter: seen with
  VirtIO SCSI, emulated SCSI and emulated SATA AHCI.

  Caching modes (none, directsync, writeback), aio mode (threads,
  native) or discard (ignore, unmap) or detect-zeroes (off, unmap) does
  not influence occurence either.

  Having more RAM in the guest seems to increase odds of corruption:
  With 512MB to the Debian guest problem hardly occurs at all, with 4GB
  RAM it happens almost instantly.

  An automated reproducer works as follows:

  - the guest *does* mount its root fs and swap with option discard and
  my testing leaves me with the impression that file deletion rather
  than reading is causing the issue

  - foo is a snapshot of the running Debian VM which is already running
  command

  # while true ; do dd if=/dev/zero of=foo bs=10240k count=400 ; done

  to produce some I/O to the disk (4GB file with 4GB of RAM).

  - on the host a loop continuously resumes and saves the guest state
  and quits qemu inbetween:

  # while true ; do (echo loadvm foo ; echo c ; sleep 10 ; echo stop ;
  echo savevm foo ; echo quit ) | bin/qemu-bisect/bin/qemu-system-x86_64
  -machine pc-q35-3.1,accel=kvm -m 4096 -chardev stdio,id=charmonitor
  -mon chardev=charmonitor -drive file=debian.qcow2,id=d -S -display
  none ; done

  - quitting qemu inbetween saves and loads seems to be necessary for
  the problem to occur. Just continusouly in one session saving and
  loading guest state does not trigger it.

  - For me, after about 2 to 6 iterations of above loop the image is
  corrupted.

  - corruption manifests with other messages from qemu as well, e.g.:

  (qemu) loadvm foo
  Error: Device 'd' does not have the requested snapshot 'foo'

  Using above reproducer I have to the be best of my ability bisected
  the introduction of the problem to commit
  69f47505ee66afaa513305de0c1895a224e52c45 (block: avoid recursive
  block_status call if possible). qemu compiled from the com

[Bug 1847793] Re: qemu 4.1.0 - Corrupt guest filesystem after new vm install

2019-10-16 Thread psyhomb

I can confirm exactly the same issue on Arch linux with ext4 filesystem
(qemu-4.1.0).

After downgrading from 4.1.0 => 4.0.0 everything is running normal
again, no corruption detected and all qcow2 images stays healthy.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1847793

Title:
  qemu 4.1.0 - Corrupt guest filesystem after new vm install

Status in QEMU:
  New

Bug description:
  When I install a new vm with qemu 4.1.0 all the guest filesystems are
  corrupt. The first boot from the install dvd iso is ok and the
  installer work fine. But the guest system hangs after the installer
  finishes and I reboot the guest. I can see the grub boot menue but the
  system cannot load the initramfs.

  Testet with:
  - RedHat Enterprise Linux 7.5, 7.6 and 7.7 (RedHat uses xfs for the /boot and 
/ partition)
  Guided install with the graphical installer, no lvm selected.
  - Debian Stable/Buster (Debian uses ext4 for / and /home partition)
  Guidet install with the graphical installer and default options.

  Used commandline to create the vm disk image:
  qemu-img create -f qcow2 /volumes/disk2-part2/vmdisks/vmtest10-1.qcow2 20G

  Used qemu commandline for vm installation:
  #!/bin/sh
  # vmtest10 Installation
  #
  /usr/bin/qemu-system-x86_64  -cpu SandyBridge-IBRS \
  -soundhw hda \
  -M q35 \
  -k de \
  -vga qxl \
  -machine accel=kvm \
  -m 4096 \
  -display gtk \
  -drive 
file=/volumes/disk2-part2/images/debian-10.0.0-amd64-DVD-1.iso,if=ide,media=cdrom
 \
  -drive 
file=/volumes/disk2-part2/images/vmtest10-1.qcow2,if=virtio,media=disk,cache=writeback
 \
  -boot once=d,menu=off \
  -device virtio-net-pci,mac=52:54:00:2c:02:6c,netdev=vlan0 \
  -netdev bridge,br=br0,id=vlan0 \
  -rtc base=localtime \
  -name "vmtest10" \
  -usb -device usb-tablet \
  -spice disable-ticketing \
  -device virtio-serial-pci \
  -device virtserialport,chardev=spicechannel0,name=com.redhat.spice.0 \
  -chardev spicevmc,id=spicechannel0,name=vdagent $*

  Host OS:
  Archlinux (last updated at 10.10.2019)
  Linux testing 5.3.5-arch1-1-ARCH #1 SMP PREEMPT Mon Oct 7 19:03:08 UTC 2019 
x86_64 GNU/Linux
  No libvirt in use.

  
  With qemu 4.0.0 it works fine without any errors.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1847793/+subscriptions

[PATCH] migration: savevm_state_insert_handler: constant-time element insertion

2019-10-16 Thread Scott Cheloha

Registering a SaveStateEntry object via savevm_state_insert_handler()
is an O(n) operation because the list is a priority queue maintained by
walking the list from head to tail to find a suitable insertion point.

This adds considerable overhead for VMs with many such objects.  For
instance, ppc64 machines with large maxmem (8T+) spend ~10% or more of
their CPU time in savevm_state_insert_handler() before attempting to
boot a kernel.

If we track the head for each priority's subqueue we can insert new
elements in constant time.

This commit also introduces a new function, savevm_state_remove_handler(),
which abstracts the logic for replacing the head of an element's subqueue
when removing it.

Signed-off-by: Scott Cheloha 
---
 migration/savevm.c | 35 ++-
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/migration/savevm.c b/migration/savevm.c
index 8d95e261f6..f7a2d36bba 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -250,6 +250,7 @@ typedef struct SaveStateEntry {
 
 typedef struct SaveState {
 QTAILQ_HEAD(, SaveStateEntry) handlers;
+SaveStateEntry *handler_pri_head[MIG_PRI_MAX + 1];
 int global_section_id;
 uint32_t len;
 const char *name;
@@ -261,6 +262,7 @@ typedef struct SaveState {
 
 static SaveState savevm_state = {
 .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
+.handler_pri_head = { [MIG_PRI_DEFAULT ... MIG_PRI_MAX] = NULL },
 .global_section_id = 0,
 };
 
@@ -709,20 +711,43 @@ static void savevm_state_handler_insert(SaveStateEntry 
*nse)
 {
 MigrationPriority priority = save_state_priority(nse);
 SaveStateEntry *se;
+int i;
 
 assert(priority <= MIG_PRI_MAX);
 
-QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
-if (save_state_priority(se) < priority) {
+for (i = priority - 1; i >= 0; i--) {
+se = savevm_state.handler_pri_head[i];
+if (se != NULL) {
+assert(save_state_priority(se) < priority);
 break;
 }
 }
 
-if (se) {
+if (i >= 0) {
 QTAILQ_INSERT_BEFORE(se, nse, entry);
 } else {
 QTAILQ_INSERT_TAIL(&savevm_state.handlers, nse, entry);
 }
+
+if (savevm_state.handler_pri_head[priority] == NULL) {
+savevm_state.handler_pri_head[priority] = nse;
+}
+}
+
+static void savevm_state_handler_remove(SaveStateEntry *se)
+{
+SaveStateEntry *next;
+MigrationPriority priority = save_state_priority(se);
+
+if (se == savevm_state.handler_pri_head[priority]) {
+next = QTAILQ_NEXT(se, entry);
+if (next != NULL && save_state_priority(next) == priority) {
+savevm_state.handler_pri_head[priority] = next;
+} else {
+savevm_state.handler_pri_head[priority] = NULL;
+}
+}
+QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
 }
 
 /* TODO: Individual devices generally have very little idea about the rest
@@ -777,7 +802,7 @@ void unregister_savevm(DeviceState *dev, const char *idstr, 
void *opaque)
 
 QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
 if (strcmp(se->idstr, id) == 0 && se->opaque == opaque) {
-QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
+savevm_state_handler_remove(se);
 g_free(se->compat);
 g_free(se);
 }
@@ -841,7 +866,7 @@ void vmstate_unregister(DeviceState *dev, const 
VMStateDescription *vmsd,
 
 QTAILQ_FOREACH_SAFE(se, &savevm_state.handlers, entry, new_se) {
 if (se->vmsd == vmsd && se->opaque == opaque) {
-QTAILQ_REMOVE(&savevm_state.handlers, se, entry);
+savevm_state_handler_remove(se);
 g_free(se->compat);
 g_free(se);
 }
-- 
2.23.0

[PATCH v2 6/6] block/block-copy: increase buffered copy request

2019-10-16 Thread Vladimir Sementsov-Ogievskiy

No reason to limit buffered copy to one cluster. Let's allow up to 1
MiB.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/block-copy.h |  2 +-
 block/block-copy.c | 48 +-
 2 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index edcdf0072d..0a161724d7 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -38,7 +38,7 @@ typedef struct BlockCopyState {
 BdrvDirtyBitmap *copy_bitmap;
 int64_t cluster_size;
 bool use_copy_range;
-int64_t copy_range_size;
+int64_t copy_size;
 uint64_t len;
 QLIST_HEAD(, BlockCopyInFlightReq) inflight_reqs;
 
diff --git a/block/block-copy.c b/block/block-copy.c
index d5042e46fd..74eb235972 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -21,6 +21,7 @@
 #include "qemu/units.h"
 
 #define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
+#define BLOCK_COPY_MAX_BUFFER (1 * MiB)
 #define BLOCK_COPY_MAX_MEM (128 * MiB)
 
 static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
@@ -75,10 +76,8 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, 
BdrvChild *target,
 {
 BlockCopyState *s;
 BdrvDirtyBitmap *copy_bitmap;
-
-/* Ignore BLOCK_COPY_MAX_COPY_RANGE if requested cluster_size is larger */
 uint32_t max_transfer =
-MIN_NON_ZERO(MAX(cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
+MIN_NON_ZERO(INT_MAX,
  MIN_NON_ZERO(source->bs->bl.max_transfer,
   target->bs->bl.max_transfer));
 
@@ -100,17 +99,28 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, 
BdrvChild *target,
 .mem = shres_create(BLOCK_COPY_MAX_MEM),
 };
 
-s->copy_range_size = QEMU_ALIGN_DOWN(max_transfer, cluster_size),
-/*
- * Set use_copy_range, consider the following:
- * 1. Compression is not supported for copy_range.
- * 2. copy_range does not respect max_transfer (it's a TODO), so we factor
- *that in here. If max_transfer is smaller than the job->cluster_size,
- *we do not use copy_range (in that case it's zero after aligning down
- *above).
- */
-s->use_copy_range =
-!(write_flags & BDRV_REQ_WRITE_COMPRESSED) && s->copy_range_size > 0;
+if (max_transfer < cluster_size) {
+/*
+ * copy_range does not respect max_transfer. We don't want to bother
+ * with requests smaller than block-copy cluster size, so fallback to
+ * buffered copying (read and write respect max_transfer on their
+ * behalf).
+ */
+s->use_copy_range = false;
+s->copy_size = cluster_size;
+} else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
+/* Compression is not supported for copy_range */
+s->use_copy_range = false;
+s->copy_size = MAX(cluster_size, BLOCK_COPY_MAX_BUFFER);
+} else {
+/*
+ * copy_range does not respect max_transfer (it's a TODO), so we factor
+ * that in here.
+ */
+s->use_copy_range = true;
+s->copy_size = MIN(MAX(cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
+   QEMU_ALIGN_DOWN(max_transfer, cluster_size));
+}
 
 QLIST_INIT(&s->inflight_reqs);
 
@@ -156,12 +166,19 @@ static int coroutine_fn block_copy_do_copy(BlockCopyState 
*s,
 if (ret < 0) {
 trace_block_copy_copy_range_fail(s, start, ret);
 s->use_copy_range = false;
+s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
 /* Fallback to read+write with allocated buffer */
 } else {
 goto out;
 }
 }
 
+/*
+ * In case of failed copy_range request above, we may proceed with buffered
+ * request larger than BLOCK_COPY_MAX_BUFFER. Still, further requests will
+ * be properly limited, so don't care too much.
+ */
+
 bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
 
 ret = bdrv_co_pread(s->source, start, nbytes, bounce_buffer, 0);
@@ -290,8 +307,7 @@ int coroutine_fn block_copy(BlockCopyState *s,
 continue; /* already copied */
 }
 
-chunk_end = MIN(end, start + (s->use_copy_range ?
-  s->copy_range_size : s->cluster_size));
+chunk_end = MIN(end, start + s->copy_size);
 
 next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start,
 chunk_end - start);
-- 
2.21.0

[PATCH v2 1/6] block/block-copy: allocate buffer in block_copy_with_bounce_buffer

2019-10-16 Thread Vladimir Sementsov-Ogievskiy

Move bounce_buffer allocation block_copy_with_bounce_buffer. This
commit simplifies further work on implementing copying by larger chunks
(of different size) and further asynchronous handling of block_copy
iterations (with help of block/aio_task API).

Allocation works fast, a lot faster than disk io, so it's not a problem
that we now allocate/free bounce_buffer more times. And we anyway will
have to allocate several bounce_buffers for parallel execution of loop
iterations in future.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 block/block-copy.c | 21 -
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/block/block-copy.c b/block/block-copy.c
index 0f76ea1e63..22b0bd7d07 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -126,20 +126,17 @@ void block_copy_set_callbacks(
 static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s,
   int64_t start,
   int64_t end,
-  bool *error_is_read,
-  void **bounce_buffer)
+  bool *error_is_read)
 {
 int ret;
 int nbytes;
+void *bounce_buffer = qemu_blockalign(s->source->bs, s->cluster_size);
 
 assert(QEMU_IS_ALIGNED(start, s->cluster_size));
 bdrv_reset_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
 nbytes = MIN(s->cluster_size, s->len - start);
-if (!*bounce_buffer) {
-*bounce_buffer = qemu_blockalign(s->source->bs, s->cluster_size);
-}
 
-ret = bdrv_co_pread(s->source, start, nbytes, *bounce_buffer, 0);
+ret = bdrv_co_pread(s->source, start, nbytes, bounce_buffer, 0);
 if (ret < 0) {
 trace_block_copy_with_bounce_buffer_read_fail(s, start, ret);
 if (error_is_read) {
@@ -148,7 +145,7 @@ static int coroutine_fn 
block_copy_with_bounce_buffer(BlockCopyState *s,
 goto fail;
 }
 
-ret = bdrv_co_pwrite(s->target, start, nbytes, *bounce_buffer,
+ret = bdrv_co_pwrite(s->target, start, nbytes, bounce_buffer,
  s->write_flags);
 if (ret < 0) {
 trace_block_copy_with_bounce_buffer_write_fail(s, start, ret);
@@ -158,8 +155,11 @@ static int coroutine_fn 
block_copy_with_bounce_buffer(BlockCopyState *s,
 goto fail;
 }
 
+qemu_vfree(bounce_buffer);
+
 return nbytes;
 fail:
+qemu_vfree(bounce_buffer);
 bdrv_set_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
 return ret;
 
@@ -271,7 +271,6 @@ int coroutine_fn block_copy(BlockCopyState *s,
 {
 int ret = 0;
 int64_t end = bytes + start; /* bytes */
-void *bounce_buffer = NULL;
 int64_t status_bytes;
 BlockCopyInFlightReq req;
 
@@ -324,7 +323,7 @@ int coroutine_fn block_copy(BlockCopyState *s,
 }
 if (!s->use_copy_range) {
 ret = block_copy_with_bounce_buffer(s, start, dirty_end,
-error_is_read, &bounce_buffer);
+error_is_read);
 }
 if (ret < 0) {
 break;
@@ -335,10 +334,6 @@ int coroutine_fn block_copy(BlockCopyState *s,
 ret = 0;
 }
 
-if (bounce_buffer) {
-qemu_vfree(bounce_buffer);
-}
-
 block_copy_inflight_req_end(&req);
 
 return ret;
-- 
2.21.0

[PATCH v2 0/6] block-copy: memory limit

2019-10-16 Thread Vladimir Sementsov-Ogievskiy

I'm going to bring block-status driven, async copying process to
block-copy, to make it fast. The first step is to limit memory usage of
backup, here is it.

v2: [mostly by Max's comments]
Now based on master (Thank you Max!)
01: add Max's r-b
02: add Max's r-b
03: - refactor block_copy_do_copy goto/return
- add small comment to block_copy_do_copy
04: - a lot of renaming and wording fixes
- refactor to use "available" instead of "taken"
- refactor co_get_from_shres
05: rebase on 04 changes
06: drop extra things from max_transfer calculation

Vladimir Sementsov-Ogievskiy (6):
  block/block-copy: allocate buffer in block_copy_with_bounce_buffer
  block/block-copy: limit copy_range_size to 16 MiB
  block/block-copy: refactor copying
  util: introduce SharedResource
  block/block-copy: add memory limit
  block/block-copy: increase buffered copy request

 include/block/block-copy.h|   5 +-
 include/qemu/co-shared-resource.h |  71 
 block/block-copy.c| 182 +++---
 util/qemu-co-shared-resource.c|  76 +
 block/trace-events|   6 +-
 util/Makefile.objs|   1 +
 6 files changed, 249 insertions(+), 92 deletions(-)
 create mode 100644 include/qemu/co-shared-resource.h
 create mode 100644 util/qemu-co-shared-resource.c

-- 
2.21.0

[PATCH v2 3/6] block/block-copy: refactor copying

2019-10-16 Thread Vladimir Sementsov-Ogievskiy

Merge copying code into one function block_copy_do_copy, which only
calls bdrv_ io functions and don't do any synchronization (like dirty
bitmap set/reset).

Refactor block_copy() function so that it takes full decision about
size of chunk to be copied and does all the synchronization (checking
intersecting requests, set/reset dirty bitmaps).

It will help:
 - introduce parallel processing of block_copy iterations: we need to
   calculate chunk size, start async chunk copying and go to the next
   iteration
 - simplify synchronization improvement (like memory limiting in
   further commit and reducing critical section (now we lock the whole
   requested range, when actually we need to lock only dirty region
   which we handle at the moment))

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/block-copy.c | 118 -
 block/trace-events |   6 +--
 2 files changed, 54 insertions(+), 70 deletions(-)

diff --git a/block/block-copy.c b/block/block-copy.c
index e37dfbfd03..c21db48734 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -126,79 +126,64 @@ void block_copy_set_callbacks(
 }
 
 /*
- * Copy range to target with a bounce buffer and return the bytes copied. If
- * error occurred, return a negative error number
+ * block_copy_do_copy
+ *
+ * Do copy of cluser-aligned chunk. @end is allowed to exceed s->len only to
+ * cover last cluster when s->len is not aligned to clusters.
+ *
+ * No sync here: nor bitmap neighter intersecting requests handling, only copy.
+ *
+ * Returns 0 on success.
  */
-static int coroutine_fn block_copy_with_bounce_buffer(BlockCopyState *s,
-  int64_t start,
-  int64_t end,
-  bool *error_is_read)
+static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
+   int64_t start, int64_t end,
+   bool *error_is_read)
 {
 int ret;
-int nbytes;
-void *bounce_buffer = qemu_blockalign(s->source->bs, s->cluster_size);
+int nbytes = MIN(end, s->len) - start;
+void *bounce_buffer = NULL;
 
 assert(QEMU_IS_ALIGNED(start, s->cluster_size));
-bdrv_reset_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
-nbytes = MIN(s->cluster_size, s->len - start);
+assert(QEMU_IS_ALIGNED(end, s->cluster_size));
+assert(end < s->len || end == QEMU_ALIGN_UP(s->len, s->cluster_size));
+
+if (s->use_copy_range) {
+ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
+ 0, s->write_flags);
+if (ret < 0) {
+trace_block_copy_copy_range_fail(s, start, ret);
+s->use_copy_range = false;
+/* Fallback to read+write with allocated buffer */
+} else {
+goto out;
+}
+}
+
+bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
 
 ret = bdrv_co_pread(s->source, start, nbytes, bounce_buffer, 0);
 if (ret < 0) {
-trace_block_copy_with_bounce_buffer_read_fail(s, start, ret);
+trace_block_copy_read_fail(s, start, ret);
 if (error_is_read) {
 *error_is_read = true;
 }
-goto fail;
+goto out;
 }
 
 ret = bdrv_co_pwrite(s->target, start, nbytes, bounce_buffer,
  s->write_flags);
 if (ret < 0) {
-trace_block_copy_with_bounce_buffer_write_fail(s, start, ret);
+trace_block_copy_write_fail(s, start, ret);
 if (error_is_read) {
 *error_is_read = false;
 }
-goto fail;
+goto out;
 }
 
+out:
 qemu_vfree(bounce_buffer);
 
-return nbytes;
-fail:
-qemu_vfree(bounce_buffer);
-bdrv_set_dirty_bitmap(s->copy_bitmap, start, s->cluster_size);
 return ret;
-
-}
-
-/*
- * Copy range to target and return the bytes copied. If error occurred, return 
a
- * negative error number.
- */
-static int coroutine_fn block_copy_with_offload(BlockCopyState *s,
-int64_t start,
-int64_t end)
-{
-int ret;
-int nr_clusters;
-int nbytes;
-
-assert(QEMU_IS_ALIGNED(s->copy_range_size, s->cluster_size));
-assert(QEMU_IS_ALIGNED(start, s->cluster_size));
-nbytes = MIN(s->copy_range_size, MIN(end, s->len) - start);
-nr_clusters = DIV_ROUND_UP(nbytes, s->cluster_size);
-bdrv_reset_dirty_bitmap(s->copy_bitmap, start,
-s->cluster_size * nr_clusters);
-ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
- 0, s->write_flags);
-if (ret < 0) {
-trace_block_copy_with_offload_fail(s, start, ret);
-bdrv_set_dirty_bitmap(s->copy_bitmap, start,
-  s->cluster_size * nr_clusters);
-

[PATCH v2 2/6] block/block-copy: limit copy_range_size to 16 MiB

2019-10-16 Thread Vladimir Sementsov-Ogievskiy

Large copy range may imply memory allocation and large io effort, so
using 2G copy range request may be bad idea. Let's limit it to 16 MiB.
It also helps the following patch to refactor copy-with-offload
fallback to copy-with-bounce-buffer.

Note, that total memory usage of backup is still not limited, it will
be fixed in further commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 block/block-copy.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/block/block-copy.c b/block/block-copy.c
index 22b0bd7d07..e37dfbfd03 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -18,6 +18,9 @@
 #include "qapi/error.h"
 #include "block/block-copy.h"
 #include "sysemu/block-backend.h"
+#include "qemu/units.h"
+
+#define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
 
 static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
int64_t start,
@@ -70,9 +73,12 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, 
BdrvChild *target,
 {
 BlockCopyState *s;
 BdrvDirtyBitmap *copy_bitmap;
+
+/* Ignore BLOCK_COPY_MAX_COPY_RANGE if requested cluster_size is larger */
 uint32_t max_transfer =
-MIN_NON_ZERO(INT_MAX, MIN_NON_ZERO(source->bs->bl.max_transfer,
-   target->bs->bl.max_transfer));
+MIN_NON_ZERO(MAX(cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
+ MIN_NON_ZERO(source->bs->bl.max_transfer,
+  target->bs->bl.max_transfer));
 
 copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
errp);
-- 
2.21.0

[PATCH v2 4/6] util: introduce SharedResource

2019-10-16 Thread Vladimir Sementsov-Ogievskiy

Introduce an API for some shared splittable resource, like memory.
It's going to be used by backup. Backup uses both read/write io and
copy_range. copy_range may consume memory implictly, so the new API is
abstract: it doesn't allocate any real memory by but only hands out
tickets.

The idea is that we have some total amount of something and callers
should wait in coroutine queue if there is not enough of the resource
at the moment.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/qemu/co-shared-resource.h | 71 +
 util/qemu-co-shared-resource.c| 76 +++
 util/Makefile.objs|  1 +
 3 files changed, 148 insertions(+)
 create mode 100644 include/qemu/co-shared-resource.h
 create mode 100644 util/qemu-co-shared-resource.c

diff --git a/include/qemu/co-shared-resource.h 
b/include/qemu/co-shared-resource.h
new file mode 100644
index 00..04c9c3d5be
--- /dev/null
+++ b/include/qemu/co-shared-resource.h
@@ -0,0 +1,71 @@
+/*
+ * Helper functionality for distributing a fixed total amount of
+ * an abstract resource among multiple coroutines.
+ *
+ * Copyright (c) 2019 Virtuozzo International GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_CO_SHARED_AMOUNT_H
+#define QEMU_CO_SHARED_AMOUNT_H
+
+
+typedef struct SharedResource SharedResource;
+
+/*
+ * Create SharedResource structure
+ *
+ * @total: total amount of some resource to be shared between clients
+ *
+ * Note: this API is not thread-safe.
+ */
+SharedResource *shres_create(uint64_t total);
+
+/*
+ * Release SharedResource structure
+ *
+ * This function may only be called once everything allocated by all
+ * clients has been deallocated.
+ */
+void shres_destroy(SharedResource *s);
+
+/*
+ * Try to allocate an amount of @n.  Return true on success, and false
+ * if there is too little left of the collective resource to fulfill
+ * the request.
+ */
+bool co_try_get_from_shres(SharedResource *s, uint64_t n);
+
+/*
+ * Allocate an amount of @n, and, if necessary, yield until
+ * that becomes possible.
+ */
+void coroutine_fn co_get_from_shres(SharedResource *s, uint64_t n);
+
+/*
+ * Deallocate an amount of @n.  The total amount allocated by a caller
+ * does not need to be deallocated/released with a single call, but may
+ * be split over several calls.  For example, get(4), get(3), and then
+ * put(5), put(2).
+ */
+void coroutine_fn co_put_to_shres(SharedResource *s, uint64_t n);
+
+
+#endif /* QEMU_CO_SHARED_AMOUNT_H */
diff --git a/util/qemu-co-shared-resource.c b/util/qemu-co-shared-resource.c
new file mode 100644
index 00..1c83cd9d29
--- /dev/null
+++ b/util/qemu-co-shared-resource.c
@@ -0,0 +1,76 @@
+/*
+ * Helper functionality for distributing a fixed total amount of
+ * an abstract resource among multiple coroutines.
+ *
+ * Copyright (c) 2019 Virtuozzo International GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR

[PATCH v2 5/6] block/block-copy: add memory limit

2019-10-16 Thread Vladimir Sementsov-Ogievskiy

Currently total allocation for parallel requests to block-copy instance
is unlimited. Let's limit it to 128 MiB.

For now block-copy is used only in backup, so actually we limit total
allocation for backup job.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/block-copy.h | 3 +++
 block/block-copy.c | 5 +
 2 files changed, 8 insertions(+)

diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index e2e135ff1b..edcdf0072d 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -16,6 +16,7 @@
 #define BLOCK_COPY_H
 
 #include "block/block.h"
+#include "qemu/co-shared-resource.h"
 
 typedef struct BlockCopyInFlightReq {
 int64_t start_byte;
@@ -69,6 +70,8 @@ typedef struct BlockCopyState {
  */
 ProgressResetCallbackFunc progress_reset_callback;
 void *progress_opaque;
+
+SharedResource *mem;
 } BlockCopyState;
 
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
diff --git a/block/block-copy.c b/block/block-copy.c
index c21db48734..d5042e46fd 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -21,6 +21,7 @@
 #include "qemu/units.h"
 
 #define BLOCK_COPY_MAX_COPY_RANGE (16 * MiB)
+#define BLOCK_COPY_MAX_MEM (128 * MiB)
 
 static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
int64_t start,
@@ -64,6 +65,7 @@ void block_copy_state_free(BlockCopyState *s)
 }
 
 bdrv_release_dirty_bitmap(s->source->bs, s->copy_bitmap);
+shres_destroy(s->mem);
 g_free(s);
 }
 
@@ -95,6 +97,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, 
BdrvChild *target,
 .cluster_size = cluster_size,
 .len = bdrv_dirty_bitmap_size(copy_bitmap),
 .write_flags = write_flags,
+.mem = shres_create(BLOCK_COPY_MAX_MEM),
 };
 
 s->copy_range_size = QEMU_ALIGN_DOWN(max_transfer, cluster_size),
@@ -313,7 +316,9 @@ int coroutine_fn block_copy(BlockCopyState *s,
 
 bdrv_reset_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
 
+co_get_from_shres(s->mem, chunk_end - start);
 ret = block_copy_do_copy(s, start, chunk_end, error_is_read);
+co_put_to_shres(s->mem, chunk_end - start);
 if (ret < 0) {
 bdrv_set_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
 break;
-- 
2.21.0

Re: [PATCH v1 1/5] contrib/gitdm: add more entries individuals and academics

2019-10-16 Thread Philippe Mathieu-Daudé


On 10/14/19 3:59 PM, Alex Bennée wrote:

Again this is guess work based on public websites. Please confirm.

Signed-off-by: Alex Bennée 
Cc: Emanuele Giuseppe Esposito 
Cc: Bastian Koppelmann 
Cc: "Kővágó, Zoltán" 
Cc: Li Qiang 
Cc: Li Qiang 
---
  contrib/gitdm/group-map-academics   | 3 +++
  contrib/gitdm/group-map-individuals | 4 
  2 files changed, 7 insertions(+)

diff --git a/contrib/gitdm/group-map-academics 
b/contrib/gitdm/group-map-academics
index 08f9d81d13..5cbb9d84c8 100644
--- a/contrib/gitdm/group-map-academics
+++ b/contrib/gitdm/group-map-academics
@@ -12,3 +12,6 @@ ispras.ru
  # Columbia University
  cs.columbia.edu
  c...@braap.org
+
+# University of Paderborn
+uni-paderborn.de
diff --git a/contrib/gitdm/group-map-individuals 
b/contrib/gitdm/group-map-individuals
index 1c84717438..301071b98b 100644
--- a/contrib/gitdm/group-map-individuals
+++ b/contrib/gitdm/group-map-individuals
@@ -14,3 +14,7 @@ nor...@nocrew.org
  samuel.thiba...@ens-lyon.org
  aurel...@aurel32.net
  bala...@eik.bme.hu
+e.emanuelegiuse...@gmail.com
+dirty.ice...@gmail.com
+liq...@163.com
+liq...@gmail.com


Also: liqiang...@360.cn

Re: [PATCH] Added hardfloat conversion from float32 to float64

2019-10-16 Thread no-reply

Patchew URL: https://patchew.org/QEMU/20191016073240.12473-1-mky...@tachyum.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [PATCH] Added hardfloat conversion from float32 to float64
Type: series
Message-id: 20191016073240.12473-1-mky...@tachyum.com

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Switched to a new branch 'test'
4af0ff7 Added hardfloat conversion from float32 to float64

=== OUTPUT BEGIN ===
ERROR: spaces required around that '*' (ctx:WxV)
#27: FILE: fpu/softfloat.c:1924:
+soft_float32_to_float64(float32 a, float_status *s)
 ^

ERROR: spaces required around that '*' (ctx:WxV)
#34: FILE: fpu/softfloat.c:1931:
+float64 float32_to_float64(float32 a, float_status *status)
^

total: 2 errors, 0 warnings, 27 lines checked

Commit 4af0ff7322fb (Added hardfloat conversion from float32 to float64) has 
style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20191016073240.12473-1-mky...@tachyum.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH] s390x/cpumodel: Add missing visit_free

2019-10-16 Thread David Hildenbrand


On 16.10.19 16:54, Andrew Jones wrote:

Beata Michalska noticed this missing visit_free() while reviewing
arm's implementation of qmp_query_cpu_model_expansion(), which is
modeled off this s390x implementation.


Nice to see ARM support getting added.



Signed-off-by: Andrew Jones 
---
  target/s390x/cpu_models.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index 009afc38b92d..7e92fb2e156d 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -515,6 +515,7 @@ static void cpu_model_from_info(S390CPUModel *model, const 
CpuModelInfo *info,
  visitor = qobject_input_visitor_new(info->props);
  visit_start_struct(visitor, NULL, NULL, 0, errp);
  if (*errp) {
+visit_free(visitor);
  object_unref(obj);
  return;
  }



Right, thanks

Reviewed-by: David Hildenbrand 

--

Thanks,

David / dhildenb

[PATCH v8 8/8] hd-geo-test: Add tests for lchs override

2019-10-16 Thread Sam Eiderman

From: Sam Eiderman 

Add QTest tests to check the logical geometry override option.

The tests in hd-geo-test are out of date - they only test IDE and do not
test interesting MBRs.

Creating qcow2 disks with specific size and MBR layout is currently
unused - we only use a default empty MBR.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
Signed-off-by: Sam Eiderman 
---
 tests/Makefile.include |   2 +-
 tests/hd-geo-test.c| 551 +
 2 files changed, 552 insertions(+), 1 deletion(-)

diff --git a/tests/Makefile.include b/tests/Makefile.include
index 3543451ed3..6941ae7c77 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -780,7 +780,7 @@ tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y)
 tests/ahci-test$(EXESUF): tests/ahci-test.o $(libqos-pc-obj-y) 
qemu-img$(EXESUF)
 tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o
 tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o
-tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o
+tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o $(libqos-obj-y)
 tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y)
 tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y)
 tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \
diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c
index 62eb624726..7e86c5416c 100644
--- a/tests/hd-geo-test.c
+++ b/tests/hd-geo-test.c
@@ -17,7 +17,12 @@
 
 #include "qemu/osdep.h"
 #include "qemu-common.h"
+#include "qemu/bswap.h"
+#include "qapi/qmp/qlist.h"
 #include "libqtest.h"
+#include "libqos/fw_cfg.h"
+#include "libqos/libqos.h"
+#include "standard-headers/linux/qemu_fw_cfg.h"
 
 #define ARGV_SIZE 256
 
@@ -388,6 +393,537 @@ static void test_ide_drive_cd_0(void)
 qtest_quit(qts);
 }
 
+typedef struct {
+bool active;
+uint32_t head;
+uint32_t sector;
+uint32_t cyl;
+uint32_t end_head;
+uint32_t end_sector;
+uint32_t end_cyl;
+uint32_t start_sect;
+uint32_t nr_sects;
+} MBRpartitions[4];
+
+static MBRpartitions empty_mbr = { {false, 0, 0, 0, 0, 0, 0, 0, 0},
+   {false, 0, 0, 0, 0, 0, 0, 0, 0},
+   {false, 0, 0, 0, 0, 0, 0, 0, 0},
+   {false, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+static char *create_qcow2_with_mbr(MBRpartitions mbr, uint64_t sectors)
+{
+const char *template = "/tmp/qtest.XX";
+char *raw_path = strdup(template);
+char *qcow2_path = strdup(template);
+char cmd[100 + 2 * PATH_MAX];
+uint8_t buf[512];
+int i, ret, fd, offset;
+uint64_t qcow2_size = sectors * 512;
+uint8_t status, parttype, head, sector, cyl;
+char *qemu_img_path;
+char *qemu_img_abs_path;
+
+offset = 0xbe;
+
+for (i = 0; i < 4; i++) {
+status = mbr[i].active ? 0x80 : 0x00;
+g_assert(mbr[i].head < 256);
+g_assert(mbr[i].sector < 64);
+g_assert(mbr[i].cyl < 1024);
+head = mbr[i].head;
+sector = mbr[i].sector + ((mbr[i].cyl & 0x300) >> 2);
+cyl = mbr[i].cyl & 0xff;
+
+buf[offset + 0x0] = status;
+buf[offset + 0x1] = head;
+buf[offset + 0x2] = sector;
+buf[offset + 0x3] = cyl;
+
+parttype = 0;
+g_assert(mbr[i].end_head < 256);
+g_assert(mbr[i].end_sector < 64);
+g_assert(mbr[i].end_cyl < 1024);
+head = mbr[i].end_head;
+sector = mbr[i].end_sector + ((mbr[i].end_cyl & 0x300) >> 2);
+cyl = mbr[i].end_cyl & 0xff;
+
+buf[offset + 0x4] = parttype;
+buf[offset + 0x5] = head;
+buf[offset + 0x6] = sector;
+buf[offset + 0x7] = cyl;
+
+(*(uint32_t *)&buf[offset + 0x8]) = cpu_to_le32(mbr[i].start_sect);
+(*(uint32_t *)&buf[offset + 0xc]) = cpu_to_le32(mbr[i].nr_sects);
+
+offset += 0x10;
+}
+
+fd = mkstemp(raw_path);
+g_assert(fd);
+close(fd);
+
+fd = open(raw_path, O_WRONLY);
+g_assert(fd >= 0);
+ret = write(fd, buf, sizeof(buf));
+g_assert(ret == sizeof(buf));
+close(fd);
+
+fd = mkstemp(qcow2_path);
+g_assert(fd);
+close(fd);
+
+qemu_img_path = getenv("QTEST_QEMU_IMG");
+g_assert(qemu_img_path);
+qemu_img_abs_path = realpath(qemu_img_path, NULL);
+g_assert(qemu_img_abs_path);
+
+ret = snprintf(cmd, sizeof(cmd),
+   "%s convert -f raw -O qcow2 %s %s > /dev/null",
+   qemu_img_abs_path,
+   raw_path, qcow2_path);
+g_assert((0 < ret) && (ret <= sizeof(cmd)));
+ret = system(cmd);
+g_assert(ret == 0);
+
+ret = snprintf(cmd, sizeof(cmd),
+   "%s resize %s %" PRIu64 " > /dev/null",
+   qemu_img_abs_path,
+   qcow2_path, qcow2_size);
+g_assert((0 < ret) && (ret <= sizeof(cmd)));
+ret = system(cmd);
+g_assert(ret == 0);
+
+free(qemu_img_abs_path);
+
+unlink(raw_path);
+free(

[PATCH v8 7/8] bootdevice: FW_CFG interface for LCHS values

2019-10-16 Thread Sam Eiderman

From: Sam Eiderman 

Using fw_cfg, supply logical CHS values directly from QEMU to the BIOS.

Non-standard logical geometries break under QEMU.

A virtual disk which contains an operating system which depends on
logical geometries (consistent values being reported from BIOS INT13
AH=08) will most likely break under QEMU/SeaBIOS if it has non-standard
logical geometries - for example 56 SPT (sectors per track).
No matter what QEMU will report - SeaBIOS, for large enough disks - will
use LBA translation, which will report 63 SPT instead.

In addition we cannot force SeaBIOS to rely on physical geometries at
all. A virtio-blk-pci virtual disk with 255 phyiscal heads cannot
report more than 16 physical heads when moved to an IDE controller,
since the ATA spec allows a maximum of 16 heads - this is an artifact of
virtualization.

By supplying the logical geometries directly we are able to support such
"exotic" disks.

We serialize this information in a similar way to the "bootorder"
interface.
The new fw_cfg entry is "bios-geometry".

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Sam Eiderman 
Signed-off-by: Sam Eiderman 
---
 bootdevice.c| 31 +++
 hw/nvram/fw_cfg.c   | 14 +++---
 include/sysemu/sysemu.h |  1 +
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/bootdevice.c b/bootdevice.c
index 2cf6b37c57..03aaffcc8d 100644
--- a/bootdevice.c
+++ b/bootdevice.c
@@ -405,3 +405,34 @@ void del_boot_device_lchs(DeviceState *dev, const char 
*suffix)
 }
 }
 }
+
+char *get_boot_devices_lchs_list(size_t *size)
+{
+FWLCHSEntry *i;
+size_t total = 0;
+char *list = NULL;
+
+QTAILQ_FOREACH(i, &fw_lchs, link) {
+char *bootpath;
+char *chs_string;
+size_t len;
+
+bootpath = get_boot_device_path(i->dev, false, i->suffix);
+chs_string = g_strdup_printf("%s %" PRIu32 " %" PRIu32 " %" PRIu32,
+ bootpath, i->lcyls, i->lheads, i->lsecs);
+
+if (total) {
+list[total - 1] = '\n';
+}
+len = strlen(chs_string) + 1;
+list = g_realloc(list, total + len);
+memcpy(&list[total], chs_string, len);
+total += len;
+g_free(chs_string);
+g_free(bootpath);
+}
+
+*size = total;
+
+return list;
+}
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 7dc3ac378e..18aff658c0 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -920,13 +920,21 @@ void *fw_cfg_modify_file(FWCfgState *s, const char 
*filename,
 
 static void fw_cfg_machine_reset(void *opaque)
 {
+MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+FWCfgState *s = opaque;
 void *ptr;
 size_t len;
-FWCfgState *s = opaque;
-char *bootindex = get_boot_devices_list(&len);
+char *buf;
 
-ptr = fw_cfg_modify_file(s, "bootorder", (uint8_t *)bootindex, len);
+buf = get_boot_devices_list(&len);
+ptr = fw_cfg_modify_file(s, "bootorder", (uint8_t *)buf, len);
 g_free(ptr);
+
+if (!mc->legacy_fw_cfg_order) {
+buf = get_boot_devices_lchs_list(&len);
+ptr = fw_cfg_modify_file(s, "bios-geometry", (uint8_t *)buf, len);
+g_free(ptr);
+}
 }
 
 static void fw_cfg_machine_ready(struct Notifier *n, void *data)
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 5bc5c79cbc..80c57fdc4e 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -106,6 +106,7 @@ void validate_bootdevices(const char *devices, Error 
**errp);
 void add_boot_device_lchs(DeviceState *dev, const char *suffix,
   uint32_t lcyls, uint32_t lheads, uint32_t lsecs);
 void del_boot_device_lchs(DeviceState *dev, const char *suffix);
+char *get_boot_devices_lchs_list(size_t *size);
 
 /* handler to set the boot_device order for a specific type of MachineClass */
 typedef void QEMUBootSetHandler(void *opaque, const char *boot_order,
-- 
2.23.0.700.g56cf767bdb-goog

[PATCH v8 5/8] bootdevice: Gather LCHS from all relevant devices

2019-10-16 Thread Sam Eiderman

From: Sam Eiderman 

Relevant devices are:
* ide-hd (and ide-cd, ide-drive)
* scsi-hd (and scsi-cd, scsi-disk, scsi-block)
* virtio-blk-pci

We do not call del_boot_device_lchs() for ide-* since we don't need to -
IDE block devices do not support unplugging.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
Signed-off-by: Sam Eiderman 
---
 hw/block/virtio-blk.c |  6 ++
 hw/ide/qdev.c |  5 +
 hw/scsi/scsi-disk.c   | 12 
 3 files changed, 23 insertions(+)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index ed2ddebd2b..c56e905f80 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1200,6 +1200,11 @@ static void virtio_blk_device_realize(DeviceState *dev, 
Error **errp)
 blk_set_guest_block_size(s->blk, s->conf.conf.logical_block_size);
 
 blk_iostatus_enable(s->blk);
+
+add_boot_device_lchs(dev, "/disk@0,0",
+ conf->conf.lcyls,
+ conf->conf.lheads,
+ conf->conf.lsecs);
 }
 
 static void virtio_blk_device_unrealize(DeviceState *dev, Error **errp)
@@ -1207,6 +1212,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev, 
Error **errp)
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 VirtIOBlock *s = VIRTIO_BLK(dev);
 
+del_boot_device_lchs(dev, "/disk@0,0");
 virtio_blk_data_plane_destroy(s->dataplane);
 s->dataplane = NULL;
 qemu_del_vm_change_state_handler(s->change);
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 6dd219944f..2ffd387a73 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -220,6 +220,11 @@ static void ide_dev_initfn(IDEDevice *dev, IDEDriveKind 
kind, Error **errp)
 
 add_boot_device_path(dev->conf.bootindex, &dev->qdev,
  dev->unit ? "/disk@1" : "/disk@0");
+
+add_boot_device_lchs(&dev->qdev, dev->unit ? "/disk@1" : "/disk@0",
+ dev->conf.lcyls,
+ dev->conf.lheads,
+ dev->conf.lsecs);
 }
 
 static void ide_dev_get_bootindex(Object *obj, Visitor *v, const char *name,
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 68b1675fd9..07fb5ebdf1 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -35,6 +35,7 @@
 #include "hw/block/block.h"
 #include "hw/qdev-properties.h"
 #include "sysemu/dma.h"
+#include "sysemu/sysemu.h"
 #include "qemu/cutils.h"
 #include "trace.h"
 
@@ -2414,6 +2415,16 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
 blk_set_guest_block_size(s->qdev.conf.blk, s->qdev.blocksize);
 
 blk_iostatus_enable(s->qdev.conf.blk);
+
+add_boot_device_lchs(&dev->qdev, NULL,
+ dev->conf.lcyls,
+ dev->conf.lheads,
+ dev->conf.lsecs);
+}
+
+static void scsi_unrealize(SCSIDevice *dev, Error **errp)
+{
+del_boot_device_lchs(&dev->qdev, NULL);
 }
 
 static void scsi_hd_realize(SCSIDevice *dev, Error **errp)
@@ -3018,6 +3029,7 @@ static void scsi_hd_class_initfn(ObjectClass *klass, void 
*data)
 SCSIDeviceClass *sc = SCSI_DEVICE_CLASS(klass);
 
 sc->realize  = scsi_hd_realize;
+sc->unrealize= scsi_unrealize;
 sc->alloc_req= scsi_new_request;
 sc->unit_attention_reported = scsi_disk_unit_attention_reported;
 dc->desc = "virtual SCSI disk";
-- 
2.23.0.700.g56cf767bdb-goog

Re: [SeaBIOS] Re: [PATCH v7 7/8] bootdevice: FW_CFG interface for LCHS values

2019-10-16 Thread Philippe Mathieu-Daudé


On 10/16/19 5:19 PM, Sam Eiderman wrote:

Sure!

Philippe withdrew his R-b on 7/8, as I explained 7/8 is fine (only
need to remove a bad comment) the problem was in the tests 8/8 -
should I include the original R/b?


I withdrew it because John was preparing his pull request, and I needed 
more time to review this again. But then Laszlo was quicker and figured 
out the problem is in the other patch, so please keep my original R-b.


Thanks to all 3 of you :)


I guess all other 1-6 are fine to add R/b...

On Wed, Oct 16, 2019 at 6:07 PM John Snow  wrote:




On 10/16/19 10:55 AM, Sam Eiderman wrote:

Thanks for the detailed comment Laszlo,

Indeed my e-mail has changed and I only received replies to the
commits where I added this new mail in the S-o-b section, should of
added in all of them.

So as you said it, the problem was actually in using qfw_cfg_get_u32
which assumes the value is encoded LE and has an additional
le32_to_cpu, should have used qfw_cfg_get directly like
qfw_cfg_get_file does.

Regarding qfw_cfg_get_file - I wrote this code when this function did
not exist yet, I think it was added 6 months ago. In any case, I will
use it instead.

Thanks for this.

I will resubmit this entire commit series:
* I will only change code in the last commit (tests)
* I will remove a comment which is now not true anymore
* I will add my new email in S-o-b

Sam



Philippe gave me a verbal tut-tut for not including his review tags in
my last pull request; when you re-spin could you be so kind as to
include any that still apply?

--js

[PATCH v8 6/8] bootdevice: Refactor get_boot_devices_list

2019-10-16 Thread Sam Eiderman

From: Sam Eiderman 

Move device name construction to a separate function.

We will reuse this function in the following commit to pass logical CHS
parameters through fw_cfg much like we currently pass bootindex.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Sam Eiderman 
Signed-off-by: Sam Eiderman 
---
 bootdevice.c | 61 +---
 1 file changed, 34 insertions(+), 27 deletions(-)

diff --git a/bootdevice.c b/bootdevice.c
index bc5e1c2de4..2cf6b37c57 100644
--- a/bootdevice.c
+++ b/bootdevice.c
@@ -202,6 +202,39 @@ DeviceState *get_boot_device(uint32_t position)
 return res;
 }
 
+static char *get_boot_device_path(DeviceState *dev, bool ignore_suffixes,
+  const char *suffix)
+{
+char *devpath = NULL, *s = NULL, *d, *bootpath;
+
+if (dev) {
+devpath = qdev_get_fw_dev_path(dev);
+assert(devpath);
+}
+
+if (!ignore_suffixes) {
+if (dev) {
+d = qdev_get_own_fw_dev_path_from_handler(dev->parent_bus, dev);
+if (d) {
+assert(!suffix);
+s = d;
+} else {
+s = g_strdup(suffix);
+}
+} else {
+s = g_strdup(suffix);
+}
+}
+
+bootpath = g_strdup_printf("%s%s",
+   devpath ? devpath : "",
+   s ? s : "");
+g_free(devpath);
+g_free(s);
+
+return bootpath;
+}
+
 /*
  * This function returns null terminated string that consist of new line
  * separated device paths.
@@ -218,36 +251,10 @@ char *get_boot_devices_list(size_t *size)
 bool ignore_suffixes = mc->ignore_boot_device_suffixes;
 
 QTAILQ_FOREACH(i, &fw_boot_order, link) {
-char *devpath = NULL,  *suffix = NULL;
 char *bootpath;
-char *d;
 size_t len;
 
-if (i->dev) {
-devpath = qdev_get_fw_dev_path(i->dev);
-assert(devpath);
-}
-
-if (!ignore_suffixes) {
-if (i->dev) {
-d = qdev_get_own_fw_dev_path_from_handler(i->dev->parent_bus,
-  i->dev);
-if (d) {
-assert(!i->suffix);
-suffix = d;
-} else {
-suffix = g_strdup(i->suffix);
-}
-} else {
-suffix = g_strdup(i->suffix);
-}
-}
-
-bootpath = g_strdup_printf("%s%s",
-   devpath ? devpath : "",
-   suffix ? suffix : "");
-g_free(devpath);
-g_free(suffix);
+bootpath = get_boot_device_path(i->dev, ignore_suffixes, i->suffix);
 
 if (total) {
 list[total-1] = '\n';
-- 
2.23.0.700.g56cf767bdb-goog

[PATCH v8 4/8] scsi: Propagate unrealize() callback to scsi-hd

2019-10-16 Thread Sam Eiderman

From: Sam Eiderman 

We will need to add LCHS removal logic to scsi-hd's unrealize() in the
next commit.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Sam Eiderman 
Signed-off-by: Sam Eiderman 
---
 hw/scsi/scsi-bus.c | 16 
 include/hw/scsi/scsi.h |  1 +
 2 files changed, 17 insertions(+)

diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index bccb7cc4c6..359d50d6d0 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -59,6 +59,14 @@ static void scsi_device_realize(SCSIDevice *s, Error **errp)
 }
 }
 
+static void scsi_device_unrealize(SCSIDevice *s, Error **errp)
+{
+SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s);
+if (sc->unrealize) {
+sc->unrealize(s, errp);
+}
+}
+
 int scsi_bus_parse_cdb(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf,
void *hba_private)
 {
@@ -217,12 +225,20 @@ static void scsi_qdev_realize(DeviceState *qdev, Error 
**errp)
 static void scsi_qdev_unrealize(DeviceState *qdev, Error **errp)
 {
 SCSIDevice *dev = SCSI_DEVICE(qdev);
+Error *local_err = NULL;
 
 if (dev->vmsentry) {
 qemu_del_vm_change_state_handler(dev->vmsentry);
 }
 
 scsi_device_purge_requests(dev, SENSE_CODE(NO_SENSE));
+
+scsi_device_unrealize(dev, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+
 blockdev_mark_auto_del(dev->conf.blk);
 }
 
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index d77a92361b..332ef602f4 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -59,6 +59,7 @@ struct SCSIRequest {
 typedef struct SCSIDeviceClass {
 DeviceClass parent_class;
 void (*realize)(SCSIDevice *dev, Error **errp);
+void (*unrealize)(SCSIDevice *dev, Error **errp);
 int (*parse_cdb)(SCSIDevice *dev, SCSICommand *cmd, uint8_t *buf,
  void *hba_private);
 SCSIRequest *(*alloc_req)(SCSIDevice *s, uint32_t tag, uint32_t lun,
-- 
2.23.0.700.g56cf767bdb-goog

[PATCH v8 1/8] block: Refactor macros - fix tabbing

2019-10-16 Thread Sam Eiderman

From: Sam Eiderman 

Fixing tabbing in block related macros.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Sam Eiderman 
Signed-off-by: Sam Eiderman 
---
 hw/ide/qdev.c|  2 +-
 include/hw/block/block.h | 16 
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 6fba6b62b8..6dd219944f 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -290,7 +290,7 @@ static void ide_drive_realize(IDEDevice *dev, Error **errp)
 DEFINE_BLOCK_PROPERTIES(IDEDrive, dev.conf),\
 DEFINE_BLOCK_ERROR_PROPERTIES(IDEDrive, dev.conf),  \
 DEFINE_PROP_STRING("ver",  IDEDrive, dev.version),  \
-DEFINE_PROP_UINT64("wwn",  IDEDrive, dev.wwn, 0),\
+DEFINE_PROP_UINT64("wwn",  IDEDrive, dev.wwn, 0),   \
 DEFINE_PROP_STRING("serial",  IDEDrive, dev.serial),\
 DEFINE_PROP_STRING("model", IDEDrive, dev.model)
 
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index 607539057a..fd55a30bca 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -50,21 +50,21 @@ static inline unsigned int get_physical_block_exp(BlockConf 
*conf)
   _conf.logical_block_size),\
 DEFINE_PROP_BLOCKSIZE("physical_block_size", _state,\
   _conf.physical_block_size),   \
-DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0),  \
+DEFINE_PROP_UINT16("min_io_size", _state, _conf.min_io_size, 0),\
 DEFINE_PROP_UINT32("opt_io_size", _state, _conf.opt_io_size, 0),\
-DEFINE_PROP_UINT32("discard_granularity", _state, \
-   _conf.discard_granularity, -1), \
-DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce, \
-ON_OFF_AUTO_AUTO), \
+DEFINE_PROP_UINT32("discard_granularity", _state,   \
+   _conf.discard_granularity, -1),  \
+DEFINE_PROP_ON_OFF_AUTO("write-cache", _state, _conf.wce,   \
+ON_OFF_AUTO_AUTO),  \
 DEFINE_PROP_BOOL("share-rw", _state, _conf.share_rw, false)
 
 #define DEFINE_BLOCK_PROPERTIES(_state, _conf)  \
 DEFINE_PROP_DRIVE("drive", _state, _conf.blk),  \
 DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf)
 
-#define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf)  \
-DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0),  \
-DEFINE_PROP_UINT32("heads", _state, _conf.heads, 0), \
+#define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf)  \
+DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0),  \
+DEFINE_PROP_UINT32("heads", _state, _conf.heads, 0),\
 DEFINE_PROP_UINT32("secs", _state, _conf.secs, 0)
 
 #define DEFINE_BLOCK_ERROR_PROPERTIES(_state, _conf)\
-- 
2.23.0.700.g56cf767bdb-goog

[PATCH v8 3/8] bootdevice: Add interface to gather LCHS

2019-10-16 Thread Sam Eiderman

From: Sam Eiderman 

Add an interface to provide direct logical CHS values for boot devices.
We will use this interface in the next commits.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
Signed-off-by: Sam Eiderman 
---
 bootdevice.c| 55 +
 include/sysemu/sysemu.h |  3 +++
 2 files changed, 58 insertions(+)

diff --git a/bootdevice.c b/bootdevice.c
index 1d225202f9..bc5e1c2de4 100644
--- a/bootdevice.c
+++ b/bootdevice.c
@@ -343,3 +343,58 @@ void device_add_bootindex_property(Object *obj, int32_t 
*bootindex,
 /* initialize devices' bootindex property to -1 */
 object_property_set_int(obj, -1, name, NULL);
 }
+
+typedef struct FWLCHSEntry FWLCHSEntry;
+
+struct FWLCHSEntry {
+QTAILQ_ENTRY(FWLCHSEntry) link;
+DeviceState *dev;
+char *suffix;
+uint32_t lcyls;
+uint32_t lheads;
+uint32_t lsecs;
+};
+
+static QTAILQ_HEAD(, FWLCHSEntry) fw_lchs =
+QTAILQ_HEAD_INITIALIZER(fw_lchs);
+
+void add_boot_device_lchs(DeviceState *dev, const char *suffix,
+  uint32_t lcyls, uint32_t lheads, uint32_t lsecs)
+{
+FWLCHSEntry *node;
+
+if (!lcyls && !lheads && !lsecs) {
+return;
+}
+
+assert(dev != NULL || suffix != NULL);
+
+node = g_malloc0(sizeof(FWLCHSEntry));
+node->suffix = g_strdup(suffix);
+node->dev = dev;
+node->lcyls = lcyls;
+node->lheads = lheads;
+node->lsecs = lsecs;
+
+QTAILQ_INSERT_TAIL(&fw_lchs, node, link);
+}
+
+void del_boot_device_lchs(DeviceState *dev, const char *suffix)
+{
+FWLCHSEntry *i;
+
+if (dev == NULL) {
+return;
+}
+
+QTAILQ_FOREACH(i, &fw_lchs, link) {
+if ((!suffix || !g_strcmp0(i->suffix, suffix)) &&
+ i->dev == dev) {
+QTAILQ_REMOVE(&fw_lchs, i, link);
+g_free(i->suffix);
+g_free(i);
+
+break;
+}
+}
+}
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 44f18eb739..5bc5c79cbc 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -103,6 +103,9 @@ void device_add_bootindex_property(Object *obj, int32_t 
*bootindex,
DeviceState *dev, Error **errp);
 void restore_boot_order(void *opaque);
 void validate_bootdevices(const char *devices, Error **errp);
+void add_boot_device_lchs(DeviceState *dev, const char *suffix,
+  uint32_t lcyls, uint32_t lheads, uint32_t lsecs);
+void del_boot_device_lchs(DeviceState *dev, const char *suffix);
 
 /* handler to set the boot_device order for a specific type of MachineClass */
 typedef void QEMUBootSetHandler(void *opaque, const char *boot_order,
-- 
2.23.0.700.g56cf767bdb-goog

[PATCH v8 1/1] hd-geo-test: Add tests for lchs override

2019-10-16 Thread Sam Eiderman

From: Sam Eiderman 

Add QTest tests to check the logical geometry override option.

The tests in hd-geo-test are out of date - they only test IDE and do not
test interesting MBRs.

Creating qcow2 disks with specific size and MBR layout is currently
unused - we only use a default empty MBR.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
Signed-off-by: Sam Eiderman 
---
 tests/Makefile.include |   2 +-
 tests/hd-geo-test.c| 551 +
 2 files changed, 552 insertions(+), 1 deletion(-)

diff --git a/tests/Makefile.include b/tests/Makefile.include
index 3543451ed3..6941ae7c77 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -780,7 +780,7 @@ tests/ide-test$(EXESUF): tests/ide-test.o $(libqos-pc-obj-y)
 tests/ahci-test$(EXESUF): tests/ahci-test.o $(libqos-pc-obj-y) 
qemu-img$(EXESUF)
 tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o
 tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o
-tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o
+tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o $(libqos-obj-y)
 tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y)
 tests/boot-serial-test$(EXESUF): tests/boot-serial-test.o $(libqos-obj-y)
 tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \
diff --git a/tests/hd-geo-test.c b/tests/hd-geo-test.c
index 62eb624726..7e86c5416c 100644
--- a/tests/hd-geo-test.c
+++ b/tests/hd-geo-test.c
@@ -17,7 +17,12 @@
 
 #include "qemu/osdep.h"
 #include "qemu-common.h"
+#include "qemu/bswap.h"
+#include "qapi/qmp/qlist.h"
 #include "libqtest.h"
+#include "libqos/fw_cfg.h"
+#include "libqos/libqos.h"
+#include "standard-headers/linux/qemu_fw_cfg.h"
 
 #define ARGV_SIZE 256
 
@@ -388,6 +393,537 @@ static void test_ide_drive_cd_0(void)
 qtest_quit(qts);
 }
 
+typedef struct {
+bool active;
+uint32_t head;
+uint32_t sector;
+uint32_t cyl;
+uint32_t end_head;
+uint32_t end_sector;
+uint32_t end_cyl;
+uint32_t start_sect;
+uint32_t nr_sects;
+} MBRpartitions[4];
+
+static MBRpartitions empty_mbr = { {false, 0, 0, 0, 0, 0, 0, 0, 0},
+   {false, 0, 0, 0, 0, 0, 0, 0, 0},
+   {false, 0, 0, 0, 0, 0, 0, 0, 0},
+   {false, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+static char *create_qcow2_with_mbr(MBRpartitions mbr, uint64_t sectors)
+{
+const char *template = "/tmp/qtest.XX";
+char *raw_path = strdup(template);
+char *qcow2_path = strdup(template);
+char cmd[100 + 2 * PATH_MAX];
+uint8_t buf[512];
+int i, ret, fd, offset;
+uint64_t qcow2_size = sectors * 512;
+uint8_t status, parttype, head, sector, cyl;
+char *qemu_img_path;
+char *qemu_img_abs_path;
+
+offset = 0xbe;
+
+for (i = 0; i < 4; i++) {
+status = mbr[i].active ? 0x80 : 0x00;
+g_assert(mbr[i].head < 256);
+g_assert(mbr[i].sector < 64);
+g_assert(mbr[i].cyl < 1024);
+head = mbr[i].head;
+sector = mbr[i].sector + ((mbr[i].cyl & 0x300) >> 2);
+cyl = mbr[i].cyl & 0xff;
+
+buf[offset + 0x0] = status;
+buf[offset + 0x1] = head;
+buf[offset + 0x2] = sector;
+buf[offset + 0x3] = cyl;
+
+parttype = 0;
+g_assert(mbr[i].end_head < 256);
+g_assert(mbr[i].end_sector < 64);
+g_assert(mbr[i].end_cyl < 1024);
+head = mbr[i].end_head;
+sector = mbr[i].end_sector + ((mbr[i].end_cyl & 0x300) >> 2);
+cyl = mbr[i].end_cyl & 0xff;
+
+buf[offset + 0x4] = parttype;
+buf[offset + 0x5] = head;
+buf[offset + 0x6] = sector;
+buf[offset + 0x7] = cyl;
+
+(*(uint32_t *)&buf[offset + 0x8]) = cpu_to_le32(mbr[i].start_sect);
+(*(uint32_t *)&buf[offset + 0xc]) = cpu_to_le32(mbr[i].nr_sects);
+
+offset += 0x10;
+}
+
+fd = mkstemp(raw_path);
+g_assert(fd);
+close(fd);
+
+fd = open(raw_path, O_WRONLY);
+g_assert(fd >= 0);
+ret = write(fd, buf, sizeof(buf));
+g_assert(ret == sizeof(buf));
+close(fd);
+
+fd = mkstemp(qcow2_path);
+g_assert(fd);
+close(fd);
+
+qemu_img_path = getenv("QTEST_QEMU_IMG");
+g_assert(qemu_img_path);
+qemu_img_abs_path = realpath(qemu_img_path, NULL);
+g_assert(qemu_img_abs_path);
+
+ret = snprintf(cmd, sizeof(cmd),
+   "%s convert -f raw -O qcow2 %s %s > /dev/null",
+   qemu_img_abs_path,
+   raw_path, qcow2_path);
+g_assert((0 < ret) && (ret <= sizeof(cmd)));
+ret = system(cmd);
+g_assert(ret == 0);
+
+ret = snprintf(cmd, sizeof(cmd),
+   "%s resize %s %" PRIu64 " > /dev/null",
+   qemu_img_abs_path,
+   qcow2_path, qcow2_size);
+g_assert((0 < ret) && (ret <= sizeof(cmd)));
+ret = system(cmd);
+g_assert(ret == 0);
+
+free(qemu_img_abs_path);
+
+unlink(raw_path);
+free(

[PATCH v8 2/8] block: Support providing LCHS from user

2019-10-16 Thread Sam Eiderman

From: Sam Eiderman 

Add logical geometry variables to BlockConf.

A user can now supply "lcyls", "lheads" & "lsecs" for any HD device
that supports CHS ("cyls", "heads", "secs").

These devices include:
* ide-hd
* scsi-hd
* virtio-blk-pci

In future commits we will use the provided LCHS and pass it to the BIOS
through fw_cfg to be supplied using INT13 routines.

Reviewed-by: Karl Heubaum 
Reviewed-by: Arbel Moshe 
Signed-off-by: Sam Eiderman 
Signed-off-by: Sam Eiderman 
---
 include/hw/block/block.h | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/hw/block/block.h b/include/hw/block/block.h
index fd55a30bca..d7246f3862 100644
--- a/include/hw/block/block.h
+++ b/include/hw/block/block.h
@@ -26,6 +26,7 @@ typedef struct BlockConf {
 uint32_t discard_granularity;
 /* geometry, not all devices use this */
 uint32_t cyls, heads, secs;
+uint32_t lcyls, lheads, lsecs;
 OnOffAuto wce;
 bool share_rw;
 BlockdevOnError rerror;
@@ -65,7 +66,10 @@ static inline unsigned int get_physical_block_exp(BlockConf 
*conf)
 #define DEFINE_BLOCK_CHS_PROPERTIES(_state, _conf)  \
 DEFINE_PROP_UINT32("cyls", _state, _conf.cyls, 0),  \
 DEFINE_PROP_UINT32("heads", _state, _conf.heads, 0),\
-DEFINE_PROP_UINT32("secs", _state, _conf.secs, 0)
+DEFINE_PROP_UINT32("secs", _state, _conf.secs, 0),  \
+DEFINE_PROP_UINT32("lcyls", _state, _conf.lcyls, 0),\
+DEFINE_PROP_UINT32("lheads", _state, _conf.lheads, 0),  \
+DEFINE_PROP_UINT32("lsecs", _state, _conf.lsecs, 0)
 
 #define DEFINE_BLOCK_ERROR_PROPERTIES(_state, _conf)\
 DEFINE_PROP_BLOCKDEV_ON_ERROR("rerror", _state, _conf.rerror,   \
-- 
2.23.0.700.g56cf767bdb-goog

[PATCH v8 0/8] Add Qemu to SeaBIOS LCHS interface

2019-10-16 Thread Sam Eiderman



v1:

Non-standard logical geometries break under QEMU.

A virtual disk which contains an operating system which depends on
logical geometries (consistent values being reported from BIOS INT13
AH=08) will most likely break under QEMU/SeaBIOS if it has non-standard
logical geometries - for example 56 SPT (sectors per track).
No matter what QEMU will guess - SeaBIOS, for large enough disks - will
use LBA translation, which will report 63 SPT instead.

In addition we can not enforce SeaBIOS to rely on phyiscal geometries at
all. A virtio-blk-pci virtual disk with 255 phyiscal heads can not
report more than 16 physical heads when moved to an IDE controller, the
ATA spec allows a maximum of 16 heads - this is an artifact of
virtualization.

By supplying the logical geometies directly we are able to support such
"exotic" disks.

We will use fw_cfg to do just that.

v2:

Fix missing parenthesis check in
"hd-geo-test: Add tests for lchs override"

v3:

* Rename fw_cfg key to "bios-geometry".
* Remove "extendible" interface.
* Add cpu_to_le32 fix as Laszlo suggested or big endian hosts
* Fix last qtest commit - automatic docker tester for some reason does not have 
qemu-img set

v4:

* Change fw_cfg interface from mixed textual/binary to textual only

v5:

* Fix line > 80 chars in tests/hd-geo-test.c

v6:

* Small fixes for issues pointed by Max
* (&conf->conf)->lcyls to conf->conf.lcyls and so on
* Remove scsi_unrealize from everything other than scsi-hd
* Add proper include to sysemu.h
* scsi_device_unrealize() after scsi_device_purge_requests()

v7:

* Adapted last commit (tests) to changes in qtest

v8:

* Fixed BE issue with tests by using qfw_cfg_get_file() instead of
  read_fw_cfg_file(), thanks Laszlo.
* Removed incorrect comment in 7/8.

Sam Eiderman (8):
  block: Refactor macros - fix tabbing
  block: Support providing LCHS from user
  bootdevice: Add interface to gather LCHS
  scsi: Propagate unrealize() callback to scsi-hd
  bootdevice: Gather LCHS from all relevant devices
  bootdevice: Refactor get_boot_devices_list
  bootdevice: FW_CFG interface for LCHS values
  hd-geo-test: Add tests for lchs override

 bootdevice.c | 147 +--
 hw/block/virtio-blk.c|   6 +
 hw/ide/qdev.c|   7 +-
 hw/nvram/fw_cfg.c|  14 +-
 hw/scsi/scsi-bus.c   |  16 ++
 hw/scsi/scsi-disk.c  |  12 +
 include/hw/block/block.h |  22 +-
 include/hw/scsi/scsi.h   |   1 +
 include/sysemu/sysemu.h  |   4 +
 tests/Makefile.include   |   2 +-
 tests/hd-geo-test.c  | 551 +++
 11 files changed, 741 insertions(+), 41 deletions(-)

-- 
2.23.0.700.g56cf767bdb-goog

[PATCH v3] target/arm/arch_dump: Add SVE notes

2019-10-16 Thread Andrew Jones

When dumping a guest with dump-guest-memory also dump the SVE
registers if they are in use.

Signed-off-by: Andrew Jones 
---
v3:
  - Pulled sve_bswap64 out of kvm64.c and reused it here
  - Changed fpsr_offset and sve_size to only align to a
16 byte boundary from the note payload offset, not
from the note head. Doing this makes it consistent
with the documentation and what gcore does. Testing
shows that the elf headers and gdb are still happy.
  - Added blank lines between functions


 include/elf.h  |   2 +
 target/arm/arch_dump.c | 124 -
 target/arm/cpu.h   |  25 +
 target/arm/kvm64.c |  24 
 4 files changed, 149 insertions(+), 26 deletions(-)

diff --git a/include/elf.h b/include/elf.h
index 3501e0c8d03a..a7c357af74ca 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -1650,6 +1650,8 @@ typedef struct elf64_shdr {
 #define NT_ARM_HW_BREAK 0x402   /* ARM hardware breakpoint registers */
 #define NT_ARM_HW_WATCH 0x403   /* ARM hardware watchpoint registers */
 #define NT_ARM_SYSTEM_CALL  0x404   /* ARM system call number */
+#define NT_ARM_SVE 0x405   /* ARM Scalable Vector Extension
+  registers */
 
 /*
  * Physical entry point into the kernel.
diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c
index 26a2c098687c..2345dec3c2c2 100644
--- a/target/arm/arch_dump.c
+++ b/target/arm/arch_dump.c
@@ -62,12 +62,23 @@ struct aarch64_user_vfp_state {
 
 QEMU_BUILD_BUG_ON(sizeof(struct aarch64_user_vfp_state) != 528);
 
+/* struct user_sve_header from arch/arm64/include/uapi/asm/ptrace.h */
+struct aarch64_user_sve_header {
+uint32_t size;
+uint32_t max_size;
+uint16_t vl;
+uint16_t max_vl;
+uint16_t flags;
+uint16_t reserved;
+} QEMU_PACKED;
+
 struct aarch64_note {
 Elf64_Nhdr hdr;
 char name[8]; /* align_up(sizeof("CORE"), 4) */
 union {
 struct aarch64_elf_prstatus prstatus;
 struct aarch64_user_vfp_state vfp;
+struct aarch64_user_sve_header sve;
 };
 } QEMU_PACKED;
 
@@ -76,6 +87,8 @@ struct aarch64_note {
 (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_elf_prstatus))
 #define AARCH64_PRFPREG_NOTE_SIZE \
 (AARCH64_NOTE_HEADER_SIZE + sizeof(struct aarch64_user_vfp_state))
+#define AARCH64_SVE_NOTE_SIZE(env) \
+(AARCH64_NOTE_HEADER_SIZE + sve_size(env))
 
 static void aarch64_note_init(struct aarch64_note *note, DumpState *s,
   const char *name, Elf64_Word namesz,
@@ -128,11 +141,102 @@ static int 
aarch64_write_elf64_prfpreg(WriteCoreDumpFunction f,
 return 0;
 }
 
+#ifdef TARGET_AARCH64
+static off_t sve_zreg_offset(uint32_t vq, int n)
+{
+off_t off = sizeof(struct aarch64_user_sve_header);
+return ROUND_UP(off, 16) + vq * 16 * n;
+}
+
+static off_t sve_preg_offset(uint32_t vq, int n)
+{
+return sve_zreg_offset(vq, 32) + vq * 16 / 8 * n;
+}
+
+static off_t sve_fpsr_offset(uint32_t vq)
+{
+off_t off = sve_preg_offset(vq, 17);
+return ROUND_UP(off, 16);
+}
+
+static off_t sve_fpcr_offset(uint32_t vq)
+{
+return sve_fpsr_offset(vq) + sizeof(uint32_t);
+}
+
+static uint32_t sve_current_vq(CPUARMState *env)
+{
+return sve_zcr_len_for_el(env, arm_current_el(env)) + 1;
+}
+
+static size_t sve_size_vq(uint32_t vq)
+{
+off_t off = sve_fpcr_offset(vq) + sizeof(uint32_t);
+return ROUND_UP(off, 16);
+}
+
+static size_t sve_size(CPUARMState *env)
+{
+return sve_size_vq(sve_current_vq(env));
+}
+
+static int aarch64_write_elf64_sve(WriteCoreDumpFunction f,
+   CPUARMState *env, int cpuid,
+   DumpState *s)
+{
+struct aarch64_note *note;
+ARMCPU *cpu = env_archcpu(env);
+uint32_t vq = sve_current_vq(env);
+uint64_t tmp[ARM_MAX_VQ * 2], *r;
+uint32_t fpr;
+uint8_t *buf;
+int ret, i;
+
+note = g_malloc0(AARCH64_SVE_NOTE_SIZE(env));
+buf = (uint8_t *)¬e->sve;
+
+aarch64_note_init(note, s, "LINUX", 6, NT_ARM_SVE, sve_size_vq(vq));
+
+note->sve.size = cpu_to_dump32(s, sve_size_vq(vq));
+note->sve.max_size = cpu_to_dump32(s, sve_size_vq(cpu->sve_max_vq));
+note->sve.vl = cpu_to_dump16(s, vq * 16);
+note->sve.max_vl = cpu_to_dump16(s, cpu->sve_max_vq * 16);
+note->sve.flags = cpu_to_dump16(s, 1);
+
+for (i = 0; i < 32; ++i) {
+r = sve_bswap64(tmp, &env->vfp.zregs[i].d[0], vq * 2);
+memcpy(&buf[sve_zreg_offset(vq, i)], r, vq * 16);
+}
+
+for (i = 0; i < 17; ++i) {
+r = sve_bswap64(tmp, r = &env->vfp.pregs[i].p[0],
+DIV_ROUND_UP(vq * 2, 8));
+memcpy(&buf[sve_preg_offset(vq, i)], r, vq * 16 / 8);
+}
+
+fpr = cpu_to_dump32(s, vfp_get_fpsr(env));
+memcpy(&buf[sve_fpsr_offset(vq)], &fpr, sizeof(uint32_t));
+
+fpr = cpu_to_dump32(s, vfp_get_fpcr(env));
+memcpy(&buf[sve_fpcr_offset(vq)], &f

Re: [PATCH v10 13/15] docs/microvm.rst: document the new microvm machine type

2019-10-16 Thread Marc-André Lureau

Hi

On Wed, Oct 16, 2019 at 12:19 PM Sergio Lopez  wrote:
>
> Document the new microvm machine type.
>
> Signed-off-by: Sergio Lopez 
> Reviewed-by: Michael S. Tsirkin 
> ---
>  docs/microvm.rst | 98 
>  1 file changed, 98 insertions(+)
>  create mode 100644 docs/microvm.rst
>
> diff --git a/docs/microvm.rst b/docs/microvm.rst
> new file mode 100644
> index 00..0aab55576c
> --- /dev/null
> +++ b/docs/microvm.rst
> @@ -0,0 +1,98 @@
> +
> +microvm Machine Type
> +
> +
> +``microvm`` is a machine type inspired by ``Firecracker`` and
> +constructed after its machine model.
> +
> +It's a minimalist machine type without ``PCI`` nor ``ACPI`` support,
> +designed for short-lived guests. microvm also establishes a baseline
> +for benchmarking and optimizing both QEMU and guest operating systems,
> +since it is optimized for both boot time and footprint.
> +
> +
> +Supported devices
> +-
> +
> +The microvm machine type supports the following devices:
> +
> +- ISA bus
> +- i8259 PIC (optional)
> +- i8254 PIT (optional)
> +- MC146818 RTC (optional)
> +- One ISA serial port (optional)
> +- LAPIC
> +- IOAPIC (with kernel-irqchip=split by default)
> +- kvmclock (if using KVM)
> +- fw_cfg
> +- Up to eight virtio-mmio devices (configured by the user)
> +
> +
> +Using the microvm machine type
> +--
> +
> +Machine-specific options
> +
> +
> +It supports the following machine-specific options:
> +
> +- microvm.x-option-roms=bool (Set off to disable loading option ROMs)
> +- microvm.pit=OnOffAuto (Enable i8254 PIT)
> +- microvm.isa-serial=bool (Set off to disable the instantiation an ISA 
> serial port)
> +- microvm.pic=OnOffAuto (Enable i8259 PIC)
> +- microvm.rtc=OnOffAuto (Enable MC146818 RTC)
> +- microvm.auto-kernel-cmdline=bool (Set off to disable adding virtio-mmio 
> devices to the kernel cmdline)
> +
> +
> +Boot options
> +
> +
> +By default, microvm uses ``qboot`` as its BIOS, to obtain better boot
> +times, but it's also compatible with ``SeaBIOS``.
> +
> +As no current FW is able to boot from a block device using
> +``virtio-mmio`` as its transport, a microvm-based VM needs to be run
> +using a host-side kernel and, optionally, an initrd image.
> +
> +
> +Running a microvm-based VM
> +~~
> +
> +By default, microvm aims for maximum compatibility, enabling both
> +legacy and non-legacy devices. In this example, a VM is created
> +without passing any additional machine-specific option, using the
> +legacy ``ISA serial`` device as console::
> +
> +  $ qemu-system-x86_64 -M microvm \
> + -enable-kvm -cpu host -m 512m -smp 2 \
> + -kernel vmlinux -append "earlyprintk=ttyS0 console=ttyS0 root=/dev/vda" 
> \
> + -nodefaults -no-user-config -nographic \
> + -serial stdio \
> + -drive id=test,file=test.img,format=raw,if=none \
> + -device virtio-blk-device,drive=test \
> + -netdev tap,id=tap0,script=no,downscript=no \
> + -device virtio-net-device,netdev=tap0
> +
> +While the example above works, you might be interested in reducing the
> +footprint further by disabling some legacy devices. If you're using
> +``KVM``, you can disable the ``RTC``, making the Guest rely on
> +``kvmclock`` exclusively. Additionally, if your host's CPUs have the
> +``TSC_DEADLINE`` feature, you can also disable both the i8259 PIC and
> +the i8254 PIT (make sure you're also emulating a CPU with such feature
> +in the guest).
> +
> +This is an example of a VM with all optional legacy features
> +disabled::
> +
> +  $ qemu-system-x86_64 \
> + -M microvm,x-option-roms=off,pit=off,pic=off,isa-serial=off,rtc=off \
> + -enable-kvm -cpu host -m 512m -smp 2 \
> + -kernel vmlinux -append "console=hvc0 root=/dev/vda" \
> + -nodefaults -no-user-config -nographic \
> + -chardev stdio,id=virtiocon0,server \

server? doesn't make sense here :)

> + -device virtio-serial-device \
> + -device virtconsole,chardev=virtiocon0 \
> + -drive id=test,file=test.img,format=raw,if=none \
> + -device virtio-blk-device,drive=test \
> + -netdev tap,id=tap0,script=no,downscript=no \
> + -device virtio-net-device,netdev=tap0
> --
> 2.21.0
>
>

seem to work with appropriate kernel otherwise, so
Reviewed-by: Marc-André Lureau 

-- 
Marc-André Lureau

[PATCH v4 4/4] tests/qemu-iotests: add case for block-stream compress

2019-10-16 Thread Andrey Shinkevich

Add a case to the iotest #030 that tests the 'compress' option for a
block-stream job.

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/030 | 51 +-
 tests/qemu-iotests/030.out |  4 ++--
 2 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index f3766f2..f0f0e26 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -21,7 +21,8 @@
 import time
 import os
 import iotests
-from iotests import qemu_img, qemu_io
+from iotests import qemu_img, qemu_io, qemu_img_pipe
+import json
 
 backing_img = os.path.join(iotests.test_dir, 'backing.img')
 mid_img = os.path.join(iotests.test_dir, 'mid.img')
@@ -956,6 +957,54 @@ class TestSetSpeed(iotests.QMPTestCase):
 
 self.cancel_and_wait(resume=True)
 
+class TestCompressed(iotests.QMPTestCase):
+test_img_init_size = 0
+
+def setUp(self):
+qemu_img('create', '-f', iotests.imgfmt, backing_img, '1M')
+qemu_img('create', '-f', iotests.imgfmt, '-o',
+ 'backing_file=%s' % backing_img, mid_img)
+qemu_img('create', '-f', iotests.imgfmt, '-o',
+ 'backing_file=%s' % mid_img, test_img)
+qemu_io('-c', 'write -P 0x1 0 512k', backing_img)
+top = json.loads(qemu_img_pipe('info', '--output=json', test_img))
+self.test_img_init_size = top['actual-size']
+self.vm = iotests.VM().add_drive(test_img, "backing.node-name=mid," +
+ "backing.backing.node-name=base," +
+ "compress=on")
+self.vm.launch()
+
+def tearDown(self):
+self.vm.shutdown()
+os.remove(test_img)
+os.remove(mid_img)
+os.remove(backing_img)
+
+def test_stream_compress(self):
+self.assert_no_active_block_jobs()
+
+result = self.vm.qmp('block-stream', device='mid', job_id='stream-mid')
+self.assert_qmp(result, 'return', {})
+
+self.wait_until_completed(drive='stream-mid')
+# Remove other 'JOB_STATUS_CHANGE' events for the job 'stream-mid'
+self.vm.get_qmp_events(wait=True)
+
+result = self.vm.qmp('block-stream', device='drive0',
+ job_id='stream-top')
+self.assert_qmp(result, 'return', {})
+
+self.wait_until_completed(drive='stream-top')
+self.vm.shutdown()
+
+top = json.loads(qemu_img_pipe('info', '--output=json', test_img))
+mid = json.loads(qemu_img_pipe('info', '--output=json', mid_img))
+base = json.loads(qemu_img_pipe('info', '--output=json', backing_img))
+
+self.assertEqual(mid['actual-size'], base['actual-size'])
+self.assertLess(top['actual-size'], mid['actual-size'])
+self.assertLess(self.test_img_init_size, top['actual-size'])
+
 if __name__ == '__main__':
 iotests.main(supported_fmts=['qcow2', 'qed'],
  supported_protocols=['file'])
diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
index 6d9bee1..af8dac1 100644
--- a/tests/qemu-iotests/030.out
+++ b/tests/qemu-iotests/030.out
@@ -1,5 +1,5 @@
-...
+
 --
-Ran 27 tests
+Ran 28 tests
 
 OK
-- 
1.8.3.1

[PATCH v4 1/4] block: support compressed write at generic layer

2019-10-16 Thread Andrey Shinkevich

To inform the block layer about writing all the data compressed, we
introduce the 'compress' command line option. Based on that option, the
written data will be aligned by the cluster size at the generic layer.

Suggested-by: Roman Kagan 
Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Andrey Shinkevich 
---
 block.c   | 20 +++-
 block/io.c| 14 ++
 block/qcow2.c |  4 
 blockdev.c|  9 -
 include/block/block.h |  1 +
 include/block/block_int.h |  2 ++
 qapi/block-core.json  |  6 +-
 qemu-options.hx   |  6 --
 8 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/block.c b/block.c
index 1946fc6..a674920 100644
--- a/block.c
+++ b/block.c
@@ -1418,6 +1418,11 @@ QemuOptsList bdrv_runtime_opts = {
 .type = QEMU_OPT_BOOL,
 .help = "always accept other writers (default: off)",
 },
+{
+.name = BDRV_OPT_COMPRESS,
+.type = QEMU_OPT_BOOL,
+.help = "compress all writes to the image (default: off)",
+},
 { /* end of list */ }
 },
 };
@@ -1545,6 +1550,14 @@ static int bdrv_open_common(BlockDriverState *bs, 
BlockBackend *file,
 }
 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
 
+if (bs->all_write_compressed && !drv->bdrv_co_pwritev_compressed_part) {
+error_setg(errp, "Compression is not supported for the driver '%s'",
+   drv->format_name);
+bs->all_write_compressed = false;
+ret = -ENOTSUP;
+goto fail_opts;
+}
+
 /* Open the image, either directly or using a protocol */
 open_flags = bdrv_open_flags(bs, bs->open_flags);
 node_name = qemu_opt_get(opts, "node-name");
@@ -2983,6 +2996,11 @@ static BlockDriverState *bdrv_open_inherit(const char 
*filename,
 flags &= ~BDRV_O_RDWR;
 }
 
+if (!g_strcmp0(qdict_get_try_str(options, BDRV_OPT_COMPRESS), "on") ||
+qdict_get_try_bool(options, BDRV_OPT_COMPRESS, false)) {
+bs->all_write_compressed = true;
+}
+
 if (flags & BDRV_O_SNAPSHOT) {
 snapshot_options = qdict_new();
 bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
@@ -3208,7 +3226,7 @@ static int bdrv_reset_options_allowed(BlockDriverState 
*bs,
  * in bdrv_reopen_prepare() so they can be left out of @new_opts */
 const char *const common_options[] = {
 "node-name", "discard", "cache.direct", "cache.no-flush",
-"read-only", "auto-read-only", "detect-zeroes", NULL
+"read-only", "auto-read-only", "detect-zeroes", "compress", NULL
 };
 
 for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
diff --git a/block/io.c b/block/io.c
index f0b86c1..3743a13 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1360,9 +1360,15 @@ static int coroutine_fn 
bdrv_co_do_copy_on_readv(BdrvChild *child,
 /* This does not change the data on the disk, it is not
  * necessary to flush even in cache=writethrough mode.
  */
-ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
-  &local_qiov, 0,
-  BDRV_REQ_WRITE_UNCHANGED);
+if (bs->all_write_compressed) {
+ret = bdrv_driver_pwritev_compressed(bs, cluster_offset,
+ pnum, &local_qiov,
+ qiov_offset);
+} else {
+ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
+  &local_qiov, 0,
+  BDRV_REQ_WRITE_UNCHANGED);
+}
 }
 
 if (ret < 0) {
@@ -1954,7 +1960,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild 
*child,
 } else if (flags & BDRV_REQ_ZERO_WRITE) {
 bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
 ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
-} else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
+} else if (flags & BDRV_REQ_WRITE_COMPRESSED || bs->all_write_compressed) {
 ret = bdrv_driver_pwritev_compressed(bs, offset, bytes,
  qiov, qiov_offset);
 } else if (bytes <= max_transfer) {
diff --git a/block/qcow2.c b/block/qcow2.c
index 7961c05..6b29e16 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1787,6 +1787,10 @@ static void qcow2_refresh_limits(BlockDriverState *bs, 
Error **errp)
 /* Encryption works on a sector granularity */
 bs->bl.request_alignment = qcrypto_block_get_sector_size(s->crypto);
 }
+if (bs->all_write_compressed) {
+bs->bl.request_alignment = MAX(bs->bl.request_alignment,
+   s->cluster_size);
+}
 bs->bl.pw

[PATCH v4 2/4] qcow2: Allow writing compressed data of multiple clusters

2019-10-16 Thread Andrey Shinkevich

QEMU currently supports writing compressed data of the size equal to
one cluster. This patch allows writing QCOW2 compressed data that
exceed one cluster. Now, we split buffered data into separate clusters
and write them compressed using the existing functionality.

Suggested-by: Pavel Butsykin 
Signed-off-by: Andrey Shinkevich 
---
 block/qcow2.c | 102 ++
 1 file changed, 75 insertions(+), 27 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 6b29e16..9a85d73 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -4156,10 +4156,8 @@ fail:
 return ret;
 }
 
-/* XXX: put compressed sectors first, then all the cluster aligned
-   tables to avoid losing bytes in alignment */
 static coroutine_fn int
-qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
+qcow2_co_pwritev_compressed_task(BlockDriverState *bs,
  uint64_t offset, uint64_t bytes,
  QEMUIOVector *qiov, size_t qiov_offset)
 {
@@ -4169,32 +4167,11 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
 uint8_t *buf, *out_buf;
 uint64_t cluster_offset;
 
-if (has_data_file(bs)) {
-return -ENOTSUP;
-}
-
-if (bytes == 0) {
-/* align end of file to a sector boundary to ease reading with
-   sector based I/Os */
-int64_t len = bdrv_getlength(bs->file->bs);
-if (len < 0) {
-return len;
-}
-return bdrv_co_truncate(bs->file, len, PREALLOC_MODE_OFF, NULL);
-}
-
-if (offset_into_cluster(s, offset)) {
-return -EINVAL;
-}
+assert(bytes == s->cluster_size || (bytes < s->cluster_size &&
+   (offset + bytes == bs->total_sectors << BDRV_SECTOR_BITS)));
 
 buf = qemu_blockalign(bs, s->cluster_size);
-if (bytes != s->cluster_size) {
-if (bytes > s->cluster_size ||
-offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
-{
-qemu_vfree(buf);
-return -EINVAL;
-}
+if (bytes < s->cluster_size) {
 /* Zero-pad last write if image size is not cluster aligned */
 memset(buf + bytes, 0, s->cluster_size - bytes);
 }
@@ -4243,6 +4220,77 @@ fail:
 return ret;
 }
 
+static coroutine_fn int qcow2_co_pwritev_compressed_task_entry(AioTask *task)
+{
+Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
+
+assert(!t->cluster_type && !t->l2meta);
+
+return qcow2_co_pwritev_compressed_task(t->bs, t->offset, t->bytes, 
t->qiov,
+t->qiov_offset);
+}
+
+/*
+ * XXX: put compressed sectors first, then all the cluster aligned
+   tables to avoid losing bytes in alignment
+ */
+static coroutine_fn int
+qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset)
+{
+BDRVQcow2State *s = bs->opaque;
+AioTaskPool *aio = NULL;
+int ret;
+
+if (has_data_file(bs)) {
+return -ENOTSUP;
+}
+
+if (bytes == 0) {
+/*
+ * align end of file to a sector boundary to ease reading with
+ * sector based I/Os
+ */
+int64_t len = bdrv_getlength(bs->file->bs);
+if (len < 0) {
+return len;
+}
+return bdrv_co_truncate(bs->file, len, PREALLOC_MODE_OFF, NULL);
+}
+
+if (offset_into_cluster(s, offset)) {
+return -EINVAL;
+}
+
+while (bytes && aio_task_pool_status(aio) == 0) {
+uint32_t chunk_size = MIN(bytes, s->cluster_size);
+
+if (!aio && chunk_size != bytes) {
+aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
+}
+
+ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_compressed_task_entry,
+ 0, 0, offset, chunk_size, qiov, qiov_offset, 
NULL);
+if (ret < 0) {
+break;
+}
+qiov_offset += chunk_size;
+offset += chunk_size;
+bytes -= chunk_size;
+}
+
+if (aio) {
+aio_task_pool_wait_all(aio);
+if (ret == 0) {
+ret = aio_task_pool_status(aio);
+}
+g_free(aio);
+}
+
+return ret;
+}
+
 static int coroutine_fn
 qcow2_co_preadv_compressed(BlockDriverState *bs,
uint64_t file_cluster_offset,
-- 
1.8.3.1

[PATCH v4 0/4] qcow2: advanced compression options

2019-10-16 Thread Andrey Shinkevich

New enhancements for writing compressed data to QCOW2 image.

v4:
The 'compression' support at the block generic layer has been
accumulated in the separate patch 1/4. A little code refactoring
was made.
v3:
Instead of introducing multiple key options for many drivers, the
'compression' option has been introduced at the block generic layer
as suggested by Roman Kagan. Discussed on the email thread with ID
<1570026166-748566-1-git-send-email-andrey.shinkev...@virtuozzo.com>

Andrey Shinkevich (4):
  block: support compressed write at generic layer
  qcow2: Allow writing compressed data of multiple clusters
  tests/qemu-iotests: add case to write compressed data of multiple
clusters
  tests/qemu-iotests: add case for block-stream compress

 block.c|  20 -
 block/io.c |  14 --
 block/qcow2.c  | 106 +
 blockdev.c |   9 +++-
 include/block/block.h  |   1 +
 include/block/block_int.h  |   2 +
 qapi/block-core.json   |   6 ++-
 qemu-options.hx|   6 ++-
 tests/qemu-iotests/030 |  51 +-
 tests/qemu-iotests/030.out |   4 +-
 tests/qemu-iotests/214 |  35 +++
 tests/qemu-iotests/214.out |  15 +++
 12 files changed, 230 insertions(+), 39 deletions(-)

-- 
1.8.3.1

[PATCH v4 3/4] tests/qemu-iotests: add case to write compressed data of multiple clusters

2019-10-16 Thread Andrey Shinkevich

Add the test case to the iotest #214 that checks possibility of writing
compressed data of more than one cluster size.

Signed-off-by: Andrey Shinkevich 
---
 tests/qemu-iotests/214 | 35 +++
 tests/qemu-iotests/214.out | 15 +++
 2 files changed, 50 insertions(+)

diff --git a/tests/qemu-iotests/214 b/tests/qemu-iotests/214
index 21ec8a2..0003dc2 100755
--- a/tests/qemu-iotests/214
+++ b/tests/qemu-iotests/214
@@ -89,6 +89,41 @@ _check_test_img -r all
 $QEMU_IO -c "read  -P 0x11  0 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | 
_filter_testdir
 $QEMU_IO -c "read  -P 0x22 4M 4M" "$TEST_IMG" 2>&1 | _filter_qemu_io | 
_filter_testdir
 
+echo
+echo "=== Write compressed data of multiple clusters ==="
+echo
+cluster_size=0x1
+_make_test_img 2M -o cluster_size=$cluster_size
+
+echo "Uncompressed data:"
+let data_size="8 * $cluster_size"
+$QEMU_IO -c "write -P 0xaa 0 $data_size" "$TEST_IMG" \
+ 2>&1 | _filter_qemu_io | _filter_testdir
+$QEMU_IMG info "$TEST_IMG" | sed -n '/disk size:/ s/^ *//p'
+
+_make_test_img 2M -o cluster_size=$cluster_size
+let data_size="3 * $cluster_size + ($cluster_size >> 1)"
+# Set compress=on. That will align the written data
+# by the cluster size and will write them compressed.
+QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT \
+$QEMU_IO -c "write -P 0xbb 0 $data_size" --image-opts \
+ driver=$IMGFMT,compress=on,file.filename=$TEST_IMG \
+ 2>&1 | _filter_qemu_io | _filter_testdir
+
+let offset="4 * $cluster_size"
+QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT \
+$QEMU_IO -c "write -P 0xcc $offset $data_size" "json:{\
+'driver': '$IMGFMT',
+'file': {
+'driver': 'file',
+'filename': '$TEST_IMG'
+},
+'compress': true
+}" | _filter_qemu_io | _filter_testdir
+
+echo "After the multiple cluster data have been written compressed,"
+$QEMU_IMG info "$TEST_IMG" | sed -n '/disk size:/ s/^ *//p'
+
 # success, all done
 echo '*** done'
 rm -f $seq.full
diff --git a/tests/qemu-iotests/214.out b/tests/qemu-iotests/214.out
index 0fcd8dc..09a2e9a 100644
--- a/tests/qemu-iotests/214.out
+++ b/tests/qemu-iotests/214.out
@@ -32,4 +32,19 @@ read 4194304/4194304 bytes at offset 0
 4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 4194304/4194304 bytes at offset 4194304
 4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+=== Write compressed data of multiple clusters ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2097152
+Uncompressed data:
+wrote 524288/524288 bytes at offset 0
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+disk size: 772 KiB
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2097152
+wrote 229376/229376 bytes at offset 0
+224 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 229376/229376 bytes at offset 262144
+224 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+After the multiple cluster data have been written compressed,
+disk size: 268 KiB
 *** done
-- 
1.8.3.1

Re: [PATCH v5 1/9] target/arm/monitor: Introduce qmp_query_cpu_model_expansion

2019-10-16 Thread Andrew Jones

On Wed, Oct 16, 2019 at 04:16:57PM +0100, Beata Michalska wrote:
> On Wed, 16 Oct 2019 at 14:50, Andrew Jones  wrote:
> >
> > On Wed, Oct 16, 2019 at 02:24:50PM +0100, Beata Michalska wrote:
> > > On Tue, 15 Oct 2019 at 12:56, Beata Michalska
> > >  wrote:
> > > >
> > > > On Tue, 15 Oct 2019 at 11:56, Andrew Jones  wrote:
> > > > >
> > > > > On Tue, Oct 15, 2019 at 10:59:16AM +0100, Beata Michalska wrote:
> > > > > > On Tue, 1 Oct 2019 at 14:04, Andrew Jones  
> > > > > > wrote:
> > > > > > > +
> > > > > > > +obj = object_new(object_class_get_name(oc));
> > > > > > > +
> > > > > > > +if (qdict_in) {
> > > > > > > +Visitor *visitor;
> > > > > > > +Error *err = NULL;
> > > > > > > +
> > > > > > > +visitor = qobject_input_visitor_new(model->props);
> > > > > > > +visit_start_struct(visitor, NULL, NULL, 0, &err);
> > > > > > > +if (err) {
> > > > > > > +object_unref(obj);
> > > > > >
> > > > > > Shouldn't we free the 'visitor' here as well ?
> > > > >
> > > > > Yes. Good catch. So we also need to fix
> > > > > target/s390x/cpu_models.c:cpu_model_from_info(), which has the same
> > > > > construction (the construction from which I derived this)
> > > > >
> > > > > >
> > > > > > > +error_propagate(errp, err);
> > > > > > > +return NULL;
> > > > > > > +}
> > > > > > > +
> > > > >
> > > > > What about the rest of the patch? With that fixed for v6 can I
> > > > > add your r-b?
> > > > >
> > > >
> > > > I still got this feeling that we could optimize that a bit - which I'm
> > > > currently on, so hopefully I'll be able to add more comments soon if
> > > > that proves to be the case.
> > > >
> > > > BR
> > > > Beata
> > >
> > > I think there are few options that might be considered though the gain
> > > is not huge .. but it's always smth:
> > >
> > > > +CpuModelExpansionInfo 
> > > > *qmp_query_cpu_model_expansion(CpuModelExpansionType type,
> > > > + CpuModelInfo 
> > > > *model,
> > > > + Error **errp)
> > > > +{
> > > > +CpuModelExpansionInfo *expansion_info;
> > > > +const QDict *qdict_in = NULL;
> > > > +QDict *qdict_out;
> > > > +ObjectClass *oc;
> > > > +Object *obj;
> > > > +const char *name;
> > > > +int i;
> > > > +
> > > > +if (type != CPU_MODEL_EXPANSION_TYPE_FULL) {
> > > > +error_setg(errp, "The requested expansion type is not 
> > > > supported");
> > > > +return NULL;
> > > > +}
> > > > +
> > > > +if (!kvm_enabled() && !strcmp(model->name, "host")) {
> > > > +error_setg(errp, "The CPU type '%s' requires KVM", 
> > > > model->name);
> > > > +return NULL;
> > > > +}
> > > > +
> > > > +oc = cpu_class_by_name(TYPE_ARM_CPU, model->name);
> > > > +if (!oc) {
> > > > +error_setg(errp, "The CPU type '%s' is not a recognized ARM 
> > > > CPU type",
> > > > +   model->name);
> > > > +return NULL;
> > > > +}
> > > > +
> > > > +if (kvm_enabled()) {
> > > > +const char *cpu_type = current_machine->cpu_type;
> > > > +int len = strlen(cpu_type) - strlen(ARM_CPU_TYPE_SUFFIX);
> > > > +bool supported = false;
> > > > +
> > > > +if (!strcmp(model->name, "host") || !strcmp(model->name, 
> > > > "max")) {
> > > > +/* These are kvmarm's recommended cpu types */
> > > > +supported = true;
> > > > +} else if (strlen(model->name) == len &&
> > > > +   !strncmp(model->name, cpu_type, len)) {
> > > > +/* KVM is enabled and we're using this type, so it works. 
> > > > */
> > > > +supported = true;
> > > > +}
> > > > +if (!supported) {
> > > > +error_setg(errp, "We cannot guarantee the CPU type '%s' 
> > > > works "
> > > > + "with KVM on this host", model->name);
> > > > +return NULL;
> > > > +}
> > > > +}
> > > > +
> > >
> > > The above section can be slightly reduced and rearranged - preferably
> > > moved to a separate function
> > > -> get_cpu_model (...) ?
> > >
> > > * You can check the 'host' model first and then validate the accelerator 
> > > ->
> > > if ( !strcmp(model->name, "host")
> > > if (!kvm_enabled())
> > > log_error & leave
> > >else
> > >   goto cpu_class_by_name /*cpu_class_by_name moved after the
> > > final model check @see below */
> > >
> > > * the kvm_enabled section can be than slightly improved (dropping the
> > > second compare against 'host')
> > >
> > >   if (kvm_enabled() && strcmp(model->name, "max") {
> > >/*Validate the current_machine->cpu_type against the
> > > model->name and report error case mismatch
> > >   /* otherwise just fall through */
> > >   }
> > >  * cpu_class_by_name moved here ...
> > > > +if (model->props) {

Re: [PATCH v4 00/19] spapr: IRQ subsystem cleanup

2019-10-16 Thread Greg Kurz

On Wed,  9 Oct 2019 17:07:59 +1100
David Gibson  wrote:

> This is a substantial rework to clean up the handling of IRQs in
> spapr.  It includes some cleanups to both the XICS and XIVE interrupt
> controller backends, as well as more to the common spapr irq handling
> infrastructure.
> 

Patches up to 16 have been reviewed. Any chance you merge them before
soft freeze (12 days left) ?

> The last two patches of this series, dealing with VFIO devices, are
> RFC only - there some problems that I'm discussing with Alex
> Williamson.
> 
> Changes since v3:
>  * Further minor tweaks to error handling
>  * Other minor polishes from feedback
>  * Added some patches to address VFIO irq routing
>  * Removed the first ~20 patches, which are now merged
> 
> Changes since v2:
>  * Fixed a bug where the "move handling multiple irq frees" to
>frontend patch was actually freeing one irq over and over, rather
>than freeing multiple irqs
>  * Fixed some places I missed still using only-Error * style, and flow
>on adjustments
>  * New idiom to iterate across all constructed backends for the things
>that need that (cpu_intc_create, claim & freem), rather than
>open-coding a call on the xics, then xive versions.
> 
> Changes since v1:
>  * Lots of extra patches
>  * Many minor adjustments based on feedback
>  * Moved towards return value + Error * style, instead of just Error *
>style
> 
> David Gibson (16):
>   spapr, xics, xive: Introduce SpaprInterruptController QOM interface
>   spapr, xics, xive: Move cpu_intc_create from SpaprIrq to
> SpaprInterruptController
>   spapr, xics, xive: Move irq claim and free from SpaprIrq to
> SpaprInterruptController
>   spapr: Formalize notion of active interrupt controller
>   spapr, xics, xive: Move set_irq from SpaprIrq to
> SpaprInterruptController
>   spapr, xics, xive: Move print_info from SpaprIrq to
> SpaprInterruptController
>   spapr, xics, xive: Move dt_populate from SpaprIrq to
> SpaprInterruptController
>   spapr, xics, xive: Match signatures for XICS and XIVE KVM connect
> routines
>   spapr: Remove SpaprIrq::init_kvm hook
>   spapr, xics, xive: Move SpaprIrq::reset hook logic into
> activate/deactivate
>   spapr, xics, xive: Move SpaprIrq::post_load hook to backends
>   spapr: Remove SpaprIrq::nr_msis
>   spapr: Move SpaprIrq::nr_xirqs to SpaprMachineClass
>   spapr: Remove last pieces of SpaprIrq
>   spapr: Handle irq backend changes with VFIO PCI devices
>   spapr: Work around spurious warnings from vfio INTx initialization
> 
> Greg Kurz (2):
>   xive: Make some device types not user creatable
>   xics: Make some device types not user creatable
> 
> Stefan Brankovic (1):
>   target/ppc: Fix for optimized vsl/vsr instructions
> 
>  hw/intc/spapr_xive.c| 295 
>  hw/intc/spapr_xive_kvm.c|  22 +-
>  hw/intc/xics.c  |  10 +
>  hw/intc/xics_kvm.c  |   9 +-
>  hw/intc/xics_spapr.c| 110 -
>  hw/intc/xive.c  |  15 +
>  hw/ppc/spapr.c  |  52 ++-
>  hw/ppc/spapr_caps.c |  64 +++
>  hw/ppc/spapr_cpu_core.c |   3 +-
>  hw/ppc/spapr_hcall.c|   7 +-
>  hw/ppc/spapr_irq.c  | 668 +---
>  hw/ppc/spapr_pci.c  |  16 +-
>  include/hw/pci-host/spapr.h |   4 +-
>  include/hw/ppc/spapr.h  |  17 +-
>  include/hw/ppc/spapr_irq.h  |  72 +--
>  include/hw/ppc/spapr_xive.h |   9 +-
>  include/hw/ppc/xics_spapr.h |   6 +-
>  target/ppc/translate/vmx-impl.inc.c |  84 ++--
>  18 files changed, 769 insertions(+), 694 deletions(-)
>

[PATCH 0/2] virtiofsd: add net and pid namespace sandboxing

2019-10-16 Thread Stefan Hajnoczi

These patches are based on gitlab.com/virtio-fs/qemu.git virtio-fs-dev.

virtiofsd is sandboxed so that it does not have access to the system in the
event that the process is compromised.  At the moment we use seccomp and mount
namespaces to restrict the list of allowed syscalls and only give access to the
shared directory.

This patch series enhances sandboxing by putting virtiofsd into an empty
network and pid namespace.  If the process is compromised it will be unable to
perform network activity, even to localhost services running on the host.  It
will also be unable to see other processes running on the system since it runs
as pid 1 in a new pid namespace.

These enhancements are inspired by the Crosvm virtio-fs device's jail
configuration.

Stefan Hajnoczi (2):
  virtiofsd: move to an empty network namespace
  virtiofsd: move to a new pid namespace

 contrib/virtiofsd/passthrough_ll.c | 109 +++--
 1 file changed, 86 insertions(+), 23 deletions(-)

-- 
2.21.0

[PATCH 2/2] virtiofsd: move to a new pid namespace

2019-10-16 Thread Stefan Hajnoczi

virtiofsd needs access to /proc/self/fd.  Let's move to a new pid
namespace so that a compromised process cannot see another other
processes running on the system.

One wrinkle in this approach: unshare(CLONE_NEWPID) affects *child*
processes and not the current process.  Therefore we need to fork the
pid 1 process that will actually run virtiofsd and leave a parent in
waitpid(2).  This is not the same thing as daemonization and parent
processes should not notice a difference.

Signed-off-by: Stefan Hajnoczi 
---
 contrib/virtiofsd/passthrough_ll.c | 95 ++
 1 file changed, 72 insertions(+), 23 deletions(-)

diff --git a/contrib/virtiofsd/passthrough_ll.c 
b/contrib/virtiofsd/passthrough_ll.c
index c27ff7d800..b6ee9b2e90 100644
--- a/contrib/virtiofsd/passthrough_ll.c
+++ b/contrib/virtiofsd/passthrough_ll.c
@@ -56,9 +56,12 @@
 #include 
 #include 
 #include 
+#include 
 #include 
+#include 
 #include 
 
+
 #include "ireg.h"
 #include 
 #include 
@@ -2749,6 +2752,72 @@ static void setup_net_namespace(void)
}
 }
 
+/*
+ * Move to a new pid namespace to prevent access to other processes if this
+ * process is compromised.
+ */
+static void setup_pid_namespace(void)
+{
+   pid_t child;
+
+   /*
+* Create a new pid namespace for *child* processes.  We'll have to
+* fork in order to enter the new pid namespace.  A new mount namespace
+* is also needed so that we can remount /proc for the new pid
+* namespace.
+*/
+   if (unshare(CLONE_NEWPID | CLONE_NEWNS) != 0) {
+   fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): 
%m\n");
+   exit(1);
+   }
+
+   child = fork();
+   if (child < 0) {
+   fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n");
+   exit(1);
+   }
+   if (child > 0) {
+   pid_t waited;
+   int wstatus;
+
+   /* The parent waits for the child */
+   do {
+   waited = waitpid(child, &wstatus, 0);
+   } while (waited < 0 && errno == EINTR);
+
+   if (WIFEXITED(wstatus)) {
+   exit(WEXITSTATUS(wstatus));
+   }
+
+   exit(1);
+   }
+
+   /*
+* If the mounts have shared propagation then we want to opt out so our
+* mount changes don't affect the parent mount namespace.
+*/
+   if (mount(NULL, "/", NULL, MS_REC|MS_SLAVE, NULL) < 0) {
+   fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n");
+   exit(1);
+   }
+
+   /* The child must remount /proc to use the new pid namespace */
+   if (mount("proc", "/proc", "proc",
+ MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) {
+   fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n");
+   exit(1);
+   }
+}
+
+static void setup_proc_self_fd(struct lo_data *lo)
+{
+   lo->proc_self_fd = open("/proc/self/fd", O_PATH);
+   if (lo->proc_self_fd == -1) {
+   fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n");
+   exit(1);
+   }
+}
+
 /* This magic is based on lxc's lxc_pivot_root() */
 static void setup_pivot_root(const char *source)
 {
@@ -2803,20 +2872,10 @@ static void setup_pivot_root(const char *source)
 
 /*
  * Make the source directory our root so symlinks cannot escape and no other
- * files are accessible.
+ * files are accessible.  Assumes unshare(CLONE_NEWNS) was already called.
  */
 static void setup_mount_namespace(const char *source)
 {
-   if (unshare(CLONE_NEWNS) != 0) {
-   fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWNS): %m\n");
-   exit(1);
-   }
-
-   if (mount(NULL, "/", NULL, MS_REC|MS_SLAVE, NULL) < 0) {
-   fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_PRIVATE): %m\n");
-   exit(1);
-   }
-
if (mount(source, source, NULL, MS_BIND, NULL) < 0) {
fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, 
source);
exit(1);
@@ -2831,6 +2890,8 @@ static void setup_mount_namespace(const char *source)
  */
 static void setup_sandbox(struct lo_data *lo, bool enable_syslog)
 {
+   setup_pid_namespace();
+   setup_proc_self_fd(lo);
setup_net_namespace();
setup_mount_namespace(lo->source);
setup_seccomp(enable_syslog);
@@ -2860,15 +2921,6 @@ static void setup_root(struct lo_data *lo, struct 
lo_inode *root)
g_atomic_int_set(&root->refcount, 2);
 }
 
-static void setup_proc_self_fd(struct lo_data *lo)
-{
-   lo->proc_self_fd = open("/proc/self/fd", O_PATH);
-   if (lo->proc_self_fd == -1) {
-   fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n");
-   exit(1);
-   }
-}
-
 /* Raise the maximum number of open file descriptors to the system limit */
 static void setup_nofile_rlimit(void)
 {
@@ -3110,9 +3162,6 @@ int main(i

1 2 3 >

1 - 100 of 236 matches

Mail list logo