date:20230601

[PATCH 0/1] virtio: add a new vcpu stall watchdog

2023-06-01 Thread zhanghao1

A new virtio pci device named virtio-vcpu-stall-watchdog-pci has been
added to handle vcpu stalling

 hw/virtio/Kconfig |   5 +
 hw/virtio/meson.build |   2 +
 hw/virtio/virtio-vcpu-stall-watchdog-pci.c|  89 +++
 hw/virtio/virtio-vcpu-stall-watchdog.c| 240 ++
 .../hw/virtio/virtio-vcpu-stall-watchdog.h|  45 
 5 files changed, 381 insertions(+)
 create mode 100644 hw/virtio/virtio-vcpu-stall-watchdog-pci.c
 create mode 100644 hw/virtio/virtio-vcpu-stall-watchdog.c
 create mode 100644 include/hw/virtio/virtio-vcpu-stall-watchdog.h

-- 
2.25.1


No virus found
Checked by Hillstone Network AntiVirus

[PATCH 1/1] Add a new virtio pci device named virtio-vcpu-stall-watchdog-pci

2023-06-01 Thread zhanghao1

Each vcpu creates a corresponding timer task. The watchdog
is driven by a timer according to a certain period. Each time
the timer expires, the counter is decremented. When the counter
is "0", the watchdog considers the vcpu to be stalling and resets
the VM. To avoid watchdog expiration, the guest kernel driver
needs to periodically send a pet event to update the counter.

Signed-off-by: zhanghao1 
---
 hw/virtio/Kconfig |   5 +
 hw/virtio/meson.build |   2 +
 hw/virtio/virtio-vcpu-stall-watchdog-pci.c|  89 +++
 hw/virtio/virtio-vcpu-stall-watchdog.c| 240 ++
 .../hw/virtio/virtio-vcpu-stall-watchdog.h|  45 
 5 files changed, 381 insertions(+)
 create mode 100644 hw/virtio/virtio-vcpu-stall-watchdog-pci.c
 create mode 100644 hw/virtio/virtio-vcpu-stall-watchdog.c
 create mode 100644 include/hw/virtio/virtio-vcpu-stall-watchdog.h

diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig
index 89e9e426d8..2247e382e4 100644
--- a/hw/virtio/Kconfig
+++ b/hw/virtio/Kconfig
@@ -90,3 +90,8 @@ config VHOST_VDPA_DEV
 bool
 default y
 depends on VIRTIO && VHOST_VDPA && LINUX
+
+config VIRTIO_VCPU_STALL_WATCHDOG
+bool
+default y
+depends on VIRTIO
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
index bdec78bfc6..b93246e2db 100644
--- a/hw/virtio/meson.build
+++ b/hw/virtio/meson.build
@@ -33,6 +33,7 @@ specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', 
if_true: files('vhost-user
 specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: 
files('vhost-user-gpio.c'))
 specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], 
if_true: files('vhost-user-gpio-pci.c'))
 specific_virtio_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: 
files('vdpa-dev.c'))
+specific_virtio_ss.add(when: 'CONFIG_VIRTIO_VCPU_STALL_WATCHDOG', if_true: 
files('virtio-vcpu-stall-watchdog.c'))
 
 virtio_pci_ss = ss.source_set()
 virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: 
files('vhost-vsock-pci.c'))
@@ -59,6 +60,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: 
files('virtio-pmem-pci.c'
 virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: 
files('virtio-iommu-pci.c'))
 virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: 
files('virtio-mem-pci.c'))
 virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: 
files('vdpa-dev-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VIRTIO_VCPU_STALL_WATCHDOG', if_true: 
files('virtio-vcpu-stall-watchdog-pci.c'))
 
 specific_virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss)
 
diff --git a/hw/virtio/virtio-vcpu-stall-watchdog-pci.c 
b/hw/virtio/virtio-vcpu-stall-watchdog-pci.c
new file mode 100644
index 00..7540d488e3
--- /dev/null
+++ b/hw/virtio/virtio-vcpu-stall-watchdog-pci.c
@@ -0,0 +1,89 @@
+/*
+ * Virtio cpu stall watchdog PCI Bindings
+ *
+ * Copyright 2023 Kylin, Inc.
+ * Copyright 2023 Hao Zhang 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "hw/virtio/virtio-pci.h"
+#include "hw/virtio/virtio-vcpu-stall-watchdog.h"
+#include "qapi/error.h"
+#include "qemu/module.h"
+
+typedef struct VirtIOCpuStallWatchdogPCI VirtIOCpuStallWatchdogPCI;
+
+/*
+ * virtio-cpu-stall-watchdog-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VIRTIO_CPU_STALL_WATCHDOG_PCI 
"virtio-vcpu-stall-watchdog-pci-base"
+#define VIRTIO_CPU_STALL_WATCHDOG_PCI(obj) \
+OBJECT_CHECK(VirtIOCpuStallWatchdogPCI, (obj), 
TYPE_VIRTIO_CPU_STALL_WATCHDOG_PCI)
+
+struct VirtIOCpuStallWatchdogPCI {
+VirtIOPCIProxy parent_obj;
+VirtIOCPUSTALLWATCHDOG vdev;
+};
+
+static Property vcpu_stall_watchdog_properties[] = {
+DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors,
+   DEV_NVECTORS_UNSPECIFIED),
+DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_vcpu_stall_watchdog_pci_realize(VirtIOPCIProxy *vpci_dev, 
Error **errp)
+{
+VirtIOCpuStallWatchdogPCI *dev = VIRTIO_CPU_STALL_WATCHDOG_PCI(vpci_dev);
+DeviceState *vdev = DEVICE(>vdev);
+
+if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+vpci_dev->nvectors = 1;
+}
+
+if (!qdev_realize(vdev, BUS(_dev->bus), errp)) {
+return;
+}
+}
+
+static void virtio_vcpu_stall_watchdog_pci_class_init(ObjectClass *klass, void 
*data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+
+k->realize = virtio_vcpu_stall_watchdog_pci_realize;
+set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+device_class_set_props(dc, vcpu_stall_watchdog_properties);
+pcidev_k->revision = VIRTIO_PCI_ABI_VERSION;
+pcidev_k->class_id = PCI_CLASS_OTHERS;
+}
+
+static void virtio_vcpu_stall_watchdog_init(Object *obj)
+{
+VirtIOCpuStallWatchdogPCI *dev =

Re: [PATCH 01/16] target/riscv: skip features setup for KVM CPUs

2023-06-01 Thread Alistair Francis

On Wed, May 31, 2023 at 5:49 AM Daniel Henrique Barboza
 wrote:
>
> As it is today it's not possible to use '-cpu host' if the RISC-V host
> has RVH enabled. This is the resulting error:
>
> $ sudo ./qemu/build/qemu-system-riscv64 \
> -machine virt,accel=kvm -m 2G -smp 1 \
> -nographic -snapshot -kernel ./guest_imgs/Image  \
> -initrd ./guest_imgs/rootfs_kvm_riscv64.img \
> -append "earlycon=sbi root=/dev/ram rw" \
> -cpu host
> qemu-system-riscv64: H extension requires priv spec 1.12.0
>
> This happens because we're checking for priv spec for all CPUs, and
> since we're not setting  env->priv_ver for the 'host' CPU, it's being
> default to zero (i.e. PRIV_SPEC_1_10_0).
>
> In reality env->priv_ver does not make sense when running with the KVM
> 'host' CPU. It's used to gate certain CSRs/extensions during translation
> to make them unavailable if the hart declares an older spec version. It
> doesn't have any other use. E.g. OpenSBI version 1.2 retrieves the spec
> checking if the CSR_MCOUNTEREN, CSR_MCOUNTINHIBIT and CSR_MENVCFG CSRs
> are available [1].
>
> 'priv_ver' is just one example. We're doing a lot of feature validation
> and setup during riscv_cpu_realize() that it doesn't apply KVM CPUs.
> Validating the feature set for those CPUs is a KVM problem that should
> be handled in KVM specific code.
>
> The new riscv_cpu_realize_features() helper contains all validation
> logic that are not applicable to KVM CPUs. riscv_cpu_realize() verifies
> if we're dealing with a KVM CPU and, if not, execute the new helper to
> proceed with the usual realize() logic for all other CPUs.
>
> [1] lib/sbi/sbi_hart.c, hart_detect_features()
>
> Signed-off-by: Daniel Henrique Barboza 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.c | 41 -
>  1 file changed, 32 insertions(+), 9 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 938c7bd87b..72f5433776 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -331,6 +331,15 @@ static void set_satp_mode_default_map(RISCVCPU *cpu)
>  }
>  #endif
>
> +static bool riscv_running_KVM(void)
> +{
> +#ifndef CONFIG_USER_ONLY
> +return kvm_enabled();
> +#else
> +return false;
> +#endif
> +}
> +
>  static void riscv_any_cpu_init(Object *obj)
>  {
>  RISCVCPU *cpu = RISCV_CPU(obj);
> @@ -1295,20 +1304,12 @@ static void 
> riscv_cpu_validate_misa_priv(CPURISCVState *env, Error **errp)
>  }
>  }
>
> -static void riscv_cpu_realize(DeviceState *dev, Error **errp)
> +static void riscv_cpu_realize_features(DeviceState *dev, Error **errp)
>  {
> -CPUState *cs = CPU(dev);
>  RISCVCPU *cpu = RISCV_CPU(dev);
>  CPURISCVState *env = >env;
> -RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev);
>  Error *local_err = NULL;
>
> -cpu_exec_realizefn(cs, _err);
> -if (local_err != NULL) {
> -error_propagate(errp, local_err);
> -return;
> -}
> -
>  riscv_cpu_validate_misa_mxl(cpu, _err);
>  if (local_err != NULL) {
>  error_propagate(errp, local_err);
> @@ -1354,6 +1355,28 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
> **errp)
>  }
>   }
>  #endif
> +}
> +
> +static void riscv_cpu_realize(DeviceState *dev, Error **errp)
> +{
> +CPUState *cs = CPU(dev);
> +RISCVCPU *cpu = RISCV_CPU(dev);
> +RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev);
> +Error *local_err = NULL;
> +
> +cpu_exec_realizefn(cs, _err);
> +if (local_err != NULL) {
> +error_propagate(errp, local_err);
> +return;
> +}
> +
> +if (!riscv_running_KVM()) {
> +riscv_cpu_realize_features(dev, _err);
> +if (local_err != NULL) {
> +error_propagate(errp, local_err);
> +return;
> +}
> +}
>
>  riscv_cpu_finalize_features(cpu, _err);
>  if (local_err != NULL) {
> --
> 2.40.1
>
>

Re: [PATCH v14 03/10] accel: collecting TB execution count

2023-06-01 Thread Richard Henderson


On 6/1/23 18:54, Wu, Fei wrote:

We are not *checking* the tb->tb_stats->stats_enabled bit except at code
generation time, not code execution time.  Therefore nothing ever reads
the TB_PAUSED bit (or, correspondingly, the clearing of the other
bits).  The setting of the bit is permanent.


At dump time, it does check stats_enabled e.g. in dump_tb_header(). So
the question is whether FILTER is necessary at all? If not, we can
remove FILTER together with PAUSE, and only keep START & STOP in hmp cmd.


Let's start simpler and remove FILTER and PAUSE.


r~

Re: [PULL 00/21] NBD and miscellaneous patches for 2023-06-01

2023-06-01 Thread Richard Henderson


On 6/1/23 15:02, Eric Blake wrote:

Eric Blake (21):
   iotests: Fix test 104 under NBD
   qcow2: Explicit mention of padding bytes
   test-cutils: Avoid g_assert in unit tests
   test-cutils: Use g_assert_cmpuint where appropriate
   test-cutils: Test integral qemu_strto* value on failures
   test-cutils: Test more integer corner cases
   cutils: Fix wraparound parsing in qemu_strtoui
   cutils: Document differences between parse_uint and qemu_strtou64
   cutils: Adjust signature of parse_uint[_full]
   cutils: Allow NULL endptr in parse_uint()
   test-cutils: Add coverage of qemu_strtod
   test-cutils: Prepare for upcoming semantic change in qemu_strtosz
   test-cutils: Refactor qemu_strtosz tests for less boilerplate
   cutils: Allow NULL str in qemu_strtosz
   numa: Check for qemu_strtosz_MiB error
   test-cutils: Add more coverage to qemu_strtosz
   cutils: Set value in all qemu_strtosz* error paths
   cutils: Set value in all integral qemu_strto* error paths
   cutils: Use parse_uint in qemu_strtosz for negative rejection
   cutils: Improve qemu_strtod* error paths
   cutils: Improve qemu_strtosz handling of fractions


This is failing on Windows (32 and 64-bit):

https://gitlab.com/qemu-project/qemu/-/jobs/4399466166#L3524
https://gitlab.com/qemu-project/qemu/-/jobs/4399466165#L3332

|  21/135 /cutils/qemu_strtol/overflow - 
ERROR:../tests/unit/test-cutils.c:1387:test_qemu_strtol_overflow: assertion failed (res == 
LONG_MIN): (2147483647 == -2147483648) FAIL


It seems to have returned LONG_MAX instead of LONG_MIN.


r~

Re: [PATCH v3 2/2] target/riscv: Add RVV registers to log

2023-06-01 Thread Alistair Francis

On Mon, Apr 10, 2023 at 10:47 PM Ivan Klokov  wrote:
>
> Print RvV extesion register to log if VPU option is enabled.
>
> Signed-off-by: Ivan Klokov 

I applied the first patch, unfortunately this one doesn't apply
anymore. Do you mind rebasing this on
https://github.com/alistair23/qemu/tree/riscv-to-apply.next

Alistair

> ---
>  target/riscv/cpu.c | 56 +-
>  1 file changed, 55 insertions(+), 1 deletion(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 5bc0005cc7..cfd063a5dc 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -172,6 +172,14 @@ const char * const riscv_fpr_regnames[] = {
>"f30/ft10", "f31/ft11"
>  };
>
> +const char * const riscv_rvv_regnames[] = {
> +  "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",
> +  "v7",  "v8",  "v9",  "v10", "v11", "v12", "v13",
> +  "v14", "v15", "v16", "v17", "v18", "v19", "v20",
> +  "v21", "v22", "v23", "v24", "v25", "v26", "v27",
> +  "v28", "v29", "v30", "v31"
> +};
> +
>  static const char * const riscv_excp_names[] = {
>  "misaligned_fetch",
>  "fault_fetch",
> @@ -422,7 +430,8 @@ static void riscv_cpu_dump_state(CPUState *cs, FILE *f, 
> int flags)
>  {
>  RISCVCPU *cpu = RISCV_CPU(cs);
>  CPURISCVState *env = >env;
> -int i;
> +int i, j;
> +uint8_t *p;
>
>  #if !defined(CONFIG_USER_ONLY)
>  if (riscv_has_ext(env, RVH)) {
> @@ -506,6 +515,51 @@ static void riscv_cpu_dump_state(CPUState *cs, FILE *f, 
> int flags)
>  }
>  }
>  }
> +if (riscv_has_ext(env, RVV) && (flags & CPU_DUMP_VPU)) {
> +static const int dump_rvv_csrs[] = {
> +CSR_VSTART,
> +CSR_VXSAT,
> +CSR_VXRM,
> +CSR_VCSR,
> +CSR_VL,
> +CSR_VTYPE,
> +CSR_VLENB,
> +};
> +for (int i = 0; i < ARRAY_SIZE(dump_rvv_csrs); ++i) {
> +int csrno = dump_rvv_csrs[i];
> +target_ulong val = 0;
> +RISCVException res = riscv_csrrw_debug(env, csrno, , 0, 0);
> +
> +/*
> + * Rely on the smode, hmode, etc, predicates within csr.c
> + * to do the filtering of the registers that are present.
> + */
> +if (res == RISCV_EXCP_NONE) {
> +qemu_fprintf(f, " %-8s " TARGET_FMT_lx "\n",
> + csr_ops[csrno].name, val);
> +}
> +}
> +uint16_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
> +
> +/*
> + * From vector_helper.c
> + * Note that vector data is stored in host-endian 64-bit chunks,
> + * so addressing bytes needs a host-endian fixup.
> + */
> +#if HOST_BIG_ENDIAN
> +#define BYTE(x)   ((x) ^ 7)
> +#else
> +#define BYTE(x)   (x)
> +#endif
> +for (i = 0; i < 32; i++) {
> +qemu_fprintf(f, " %-8s ", riscv_rvv_regnames[i]);
> +p = (uint8_t *)env->vreg;
> +for (j = vlenb - 1 ; j >= 0; j--) {
> +qemu_fprintf(f, "%02x", *(p + i * vlenb + BYTE(j)));
> +}
> +qemu_fprintf(f, "\n");
> +}
> +}
>  }
>
>  static void riscv_cpu_set_pc(CPUState *cs, vaddr value)
> --
> 2.34.1
>
>

Re: [PATCH v3 0/2] Support for print to log vector extension registers

2023-06-01 Thread Alistair Francis

On Mon, Apr 10, 2023 at 10:46 PM Ivan Klokov  wrote:
>
> The patch added an ability to include VPU registers in the 'cpu' logging.
> ---
> v3:
>- split of the patch into two parts: general and RISC-V specific
> ---
>
> Ivan Klokov (2):
>   util/log: Add vector registers to log
>   target/riscv: Add RVV registers to log

I'm going to go ahead and merge this

Applied to riscv-to-apply.next

Alistair

>
>  accel/tcg/cpu-exec.c  |  3 +++
>  include/hw/core/cpu.h |  2 ++
>  include/qemu/log.h|  1 +
>  target/riscv/cpu.c| 56 ++-
>  util/log.c|  2 ++
>  5 files changed, 63 insertions(+), 1 deletion(-)
>
> --
> 2.34.1
>
>

Re: [PATCH v2 4/6] target/riscv: Split interrupt logic from riscv_cpu_update_mip.

2023-06-01 Thread Alistair Francis

On Sat, May 27, 2023 at 2:24 AM Rajnesh Kanwal  wrote:
>
> This is to allow virtual interrupts to be inserted into S and VS
> modes. Given virtual interrupts will be maintained in separate
> mvip and hvip CSRs, riscv_cpu_update_mip will no longer be in the
> path and interrupts need to be triggered for these cases from
> rmw_hvip64 and rmw_mvip64 functions.
>
> Signed-off-by: Rajnesh Kanwal 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.h|  1 +
>  target/riscv/cpu_helper.c | 25 ++---
>  2 files changed, 19 insertions(+), 7 deletions(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index de7e43126a..de55bfb775 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -562,6 +562,7 @@ void riscv_cpu_swap_hypervisor_regs(CPURISCVState *env);
>  int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint64_t interrupts);
>  uint64_t riscv_cpu_update_mip(CPURISCVState *env, uint64_t mask,
>uint64_t value);
> +void riscv_cpu_interrupt(CPURISCVState *env);
>  #define BOOL_TO_MASK(x) (-!!(x)) /* helper for riscv_cpu_update_mip value */
>  void riscv_cpu_set_rdtime_fn(CPURISCVState *env, uint64_t (*fn)(void *),
>   void *arg);
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index b25ee179e9..c79ec4db76 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -609,11 +609,12 @@ int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint64_t 
> interrupts)
>  }
>  }
>
> -uint64_t riscv_cpu_update_mip(CPURISCVState *env, uint64_t mask,
> -  uint64_t value)
> +void riscv_cpu_interrupt(CPURISCVState *env)
>  {
> +uint64_t gein, vsgein = 0, vstip = 0;
>  CPUState *cs = env_cpu(env);
> -uint64_t gein, vsgein = 0, vstip = 0, old = env->mip;
> +
> +QEMU_IOTHREAD_LOCK_GUARD();
>
>  if (env->virt_enabled) {
>  gein = get_field(env->hstatus, HSTATUS_VGEIN);
> @@ -622,15 +623,25 @@ uint64_t riscv_cpu_update_mip(CPURISCVState *env, 
> uint64_t mask,
>
>  vstip = env->vstime_irq ? MIP_VSTIP : 0;
>
> -QEMU_IOTHREAD_LOCK_GUARD();
> -
> -env->mip = (env->mip & ~mask) | (value & mask);
> -
>  if (env->mip | vsgein | vstip) {
>  cpu_interrupt(cs, CPU_INTERRUPT_HARD);
>  } else {
>  cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
>  }
> +}
> +
> +uint64_t riscv_cpu_update_mip(CPURISCVState *env, uint64_t mask, uint64_t 
> value)
> +{
> +uint64_t old = env->mip;
> +
> +/* No need to update mip for VSTIP */
> +mask = ((mask == MIP_VSTIP) && env->vstime_irq) ? 0 : mask;
> +
> +QEMU_IOTHREAD_LOCK_GUARD();
> +
> +env->mip = (env->mip & ~mask) | (value & mask);
> +
> +riscv_cpu_interrupt(env);
>
>  return old;
>  }
> --
> 2.25.1
>
>

Re: [PATCH 1/1] hw/arm/sbsa-ref: use XHCI to replace EHCI

2023-06-01 Thread Yuquan Wang

Hi, Leif

On Thu, 1 Jun 2023 18:59:56 +0100, Leif Lindholm wrote:
> 
> +Ard
> 
> On Thu, Jun 01, 2023 at 16:01:43 +0100, Peter Maydell wrote:
> > > >> Also has EHCI never worked, or has it worked in some modes and so this
> > > >> change should be versioned?
> > > >
> > > > AIUI, EHCI has never worked and can never have worked, because
> > > > this board's RAM is all above 4G and the QEMU EHCI controller
> > > > implementation only allows DMA descriptors with 32-bit addresses.
> > > >
> > > > Looking back at the archives, it seems we discussed XHCI vs
> > > > EHCI when the sbsa-ref board went in, and the conclusion was
> > > > that XHCI would be better. But there wasn't a sysbus XHCI device
> > > > at that point, so we ended up committing the sbsa-ref board
> > > > with EHCI and a plan to switch to XHCI when the sysbus-xhci
> > > > device was done, which we then forgot about:
> > > > https://mail.gnu.org/archive/html/qemu-arm/2018-11/msg00638.html
> > >
> > > Ah, thanks! That explains why we did the thing that made no sense :)
> > >
> > > To skip the migration hazard, my prefernece is we just leave the EHCI
> > > device in for now, and add a separate XHCI on PCIe. We can drop the
> > > EHCI device at some point in the future.
> > 
> > Why PCIe for the XHCI and not sysbus? At the time the board
> > was originally added the argument was in favour of using
> > a sysbus USB controller (you can see Ard making that point
> > in the linked archive thread).
> 
> The original argument was that having the device on the sysbus
> 1) enabled codepaths we wanted to exercise and

Sorry, for my poor engineering experience, I am confused about the meaning 
of "enabled codepaths" here. Is it like a code target that to realize the 
original purpose of this board ?

Yuquan

信息安全声明：本邮件包含信息归发件人所在组织所有,发件人所在组织对该邮件拥有所有权利。请接收者注意保密,未经发件人书面许可,不得向任何第三方组织和个人透露本邮件所含信息。
Information Security Notice: The information contained in this mail is solely 
property of the sender's organization.This mail communication is 
confidential.Recipients named above are obligated to maintain secrecy and are 
not permitted to disclose the contents of this communication to others.

Re: [PATCH v2 3/6] target/riscv: Set VS* bits to one in mideleg when H-Ext is enabled

2023-06-01 Thread Alistair Francis

On Sat, May 27, 2023 at 2:24 AM Rajnesh Kanwal  wrote:
>
> With H-Ext supported, VS bits are all hardwired to one in MIDELEG
> denoting always delegated interrupts. This is being done in rmw_mideleg
> but given mideleg is used in other places when routing interrupts
> this change initializes it in riscv_cpu_realize to be on the safe side.
>
> Signed-off-by: Rajnesh Kanwal 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.c | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index db0875fb43..269a094f42 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -1280,6 +1280,11 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
> **errp)
>riscv_pmu_timer_cb, cpu);
>  }
>   }
> +
> +/* With H-Ext, VSSIP, VSTIP, VSEIP and SGEIP are hardwired to one. */
> +if (riscv_has_ext(env, RVH)) {
> +env->mideleg = MIP_VSSIP | MIP_VSTIP | MIP_VSEIP | MIP_SGEIP;
> +}
>  #endif
>
>  riscv_cpu_finalize_features(cpu, _err);
> --
> 2.25.1
>
>

Re: [PATCH v2 2/6] target/riscv: Check for async flag in case of RISCV_EXCP_SEMIHOST.

2023-06-01 Thread Alistair Francis

On Sat, May 27, 2023 at 2:24 AM Rajnesh Kanwal  wrote:
>
> RISCV_EXCP_SEMIHOST is set to 0x10, which can be a local interrupt id
> as well. This change moves RISCV_EXCP_SEMIHOST to switch case so that
> async flag check is performed before invoking semihosting logic.
>
> Signed-off-by: Rajnesh Kanwal 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_helper.c | 10 --
>  1 file changed, 4 insertions(+), 6 deletions(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 57d04385f1..b25ee179e9 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -1602,15 +1602,13 @@ void riscv_cpu_do_interrupt(CPUState *cs)
>  target_ulong htval = 0;
>  target_ulong mtval2 = 0;
>
> -if  (cause == RISCV_EXCP_SEMIHOST) {
> -do_common_semihosting(cs);
> -env->pc += 4;
> -return;
> -}
> -
>  if (!async) {
>  /* set tval to badaddr for traps with address information */
>  switch (cause) {
> +case RISCV_EXCP_SEMIHOST:
> +do_common_semihosting(cs);
> +env->pc += 4;
> +return;
>  case RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT:
>  case RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT:
>  case RISCV_EXCP_LOAD_ADDR_MIS:
> --
> 2.25.1
>
>

Re: [PATCH v2 1/6] target/riscv: Without H-mode mask all HS mode inturrupts in mie.

2023-06-01 Thread Alistair Francis

On Sat, May 27, 2023 at 2:25 AM Rajnesh Kanwal  wrote:
>
> Signed-off-by: Rajnesh Kanwal 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/csr.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 4451bd1263..041f0b3e2e 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -1522,7 +1522,7 @@ static RISCVException rmw_mie64(CPURISCVState *env, int 
> csrno,
>  env->mie = (env->mie & ~mask) | (new_val & mask);
>
>  if (!riscv_has_ext(env, RVH)) {
> -env->mie &= ~((uint64_t)MIP_SGEIP);
> +env->mie &= ~((uint64_t)HS_MODE_INTERRUPTS);
>  }
>
>  return RISCV_EXCP_NONE;
> --
> 2.25.1
>
>

Re: [PATCH v7 0/3] hw/riscv/virt: pflash improvements

2023-06-01 Thread Alistair Francis

On Thu, Jun 1, 2023 at 3:00 PM Sunil V L  wrote:
>
> This series improves the pflash usage in RISC-V virt machine with solutions to
> below issues.
>
> 1) Currently the first pflash is reserved for ROM/M-mode firmware code. But 
> S-mode
> payload firmware like EDK2 need both pflash devices to have separate code and 
> variable
> store so that OS distros can keep the FW code as read-only.
>
> The issue is reported at
> https://salsa.debian.org/qemu-team/edk2/-/commit/c345655a0149f64c5020bfc1e53c619ce60587f6
>
> 2) The latest way of using pflash devices in other architectures and libvirt
> is by using -blockdev and machine options. However, currently this method is
> not working in RISC-V.
>
> With above issues fixed, added documentation on how to use pflash devices
> in RISC-V virt machine.
>
> This patch series is based on Alistair's riscv-to-apply.next branch.
>
> Changes since v6:
> 1) Updated the documentation patch as per text provided by Andrea.
>
> Changes since v5:
> 1) Added KVM use case as per feedback from Anup. Updated the 
> documentation
>patch that only S-mode payload is supported for KVM guests. Tested 
> with
>KVM enabled.
> 2) Updated tags.
>
> Changes since v4:
> 1) Updated patch 2 to avoid accessing private field as per feedback 
> from Philippe.
> 2) Updated documentation patch to add read-only for ROM usage.
> 3) Rebased to latest riscv-to-apply.next branch and updated tags.
>
> Changes since v3:
> 1) Converted single patch to a series with a cover letter since there 
> are
>multiple patches now.
> 2) Added a new patch to enable pflash usage via -blockdev option.
> 3) Separated the documentation change into new patch and updated the
>documentation to mention only -blockdev option which seems to be 
> the
>recommended way of using pflash.
>
> Changes since v2:
> 1) Reverted v2 changes and used v1 approach so that pflash0 can be 
> used
>for code and pflash1 for variable store.
> 2) Rebased to latest riscv-to-apply.next branch.
> 3) Added documentation for pflash usage.
>
> Changes since v1:
> 1) Simplified the fix such that it doesn't break current EDK2.
>
> Sunil V L (3):
>   hw/riscv: virt: Assume M-mode FW in pflash0 only when "-bios none"
>   riscv/virt: Support using pflash via -blockdev option
>   docs/system: riscv: Add pflash usage details

Thanks!

Applied to riscv-to-apply.next

Alistair

>
>  docs/system/riscv/virt.rst | 31 
>  hw/riscv/virt.c| 59 --
>  2 files changed, 56 insertions(+), 34 deletions(-)
>
> --
> 2.34.1
>
>

Re: [PATCH v14 03/10] accel: collecting TB execution count

2023-06-01 Thread Wu, Fei

On 6/1/2023 10:03 PM, Richard Henderson wrote:
> On 5/31/23 22:44, Wu, Fei wrote:
>> On 6/1/2023 8:05 AM, Richard Henderson wrote:
>>> On 5/30/23 01:35, Fei Wu wrote:
 From: "Vanderson M. do Rosario" 

 If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
 enabled, then we instrument the start code of this TB
 to atomically count the number of times it is executed.
 We count both the number of "normal" executions and atomic
 executions of a TB.

 The execution count of the TB is stored in its respective
 TBS.

 All TBStatistics are created by default with the flags from
 default_tbstats_flag.

 [Richard Henderson created the inline gen_tb_exec_count]

 Signed-off-by: Vanderson M. do Rosario 
 Message-Id: <20190829173437.5926-3-vanderson...@gmail.com>
 [AJB: Fix author]
 Signed-off-by: Alex Bennée 
 Signed-off-by: Fei Wu 
 ---
    accel/tcg/cpu-exec.c  |  6 ++
    accel/tcg/tb-stats.c  |  6 ++
    accel/tcg/tcg-runtime.c   |  1 +
    accel/tcg/translate-all.c |  7 +--
    accel/tcg/translator.c    | 25 +
    include/exec/gen-icount.h |  1 +
    include/exec/tb-stats-flags.h |  5 +
    include/exec/tb-stats.h   | 13 +
    8 files changed, 62 insertions(+), 2 deletions(-)

 diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
 index 0e741960da..c0d8f26237 100644
 --- a/accel/tcg/cpu-exec.c
 +++ b/accel/tcg/cpu-exec.c
 @@ -25,6 +25,7 @@
    #include "trace.h"
    #include "disas/disas.h"
    #include "exec/exec-all.h"
 +#include "exec/tb-stats.h"
    #include "tcg/tcg.h"
    #include "qemu/atomic.h"
    #include "qemu/rcu.h"
 @@ -562,7 +563,12 @@ void cpu_exec_step_atomic(CPUState *cpu)
    mmap_unlock();
    }
    +    if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
 +    tb->tb_stats->executions.atomic++;
 +    }
 +
    cpu_exec_enter(cpu);
 +
    /* execute the generated code */
    trace_exec_tb(tb, pc);
    cpu_tb_exec(cpu, tb, _exit);
 diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
 index f988bd8a31..143a52ef5c 100644
 --- a/accel/tcg/tb-stats.c
 +++ b/accel/tcg/tb-stats.c
 @@ -22,6 +22,7 @@ enum TBStatsStatus {
    };
      static enum TBStatsStatus tcg_collect_tb_stats;
 +static uint32_t default_tbstats_flag;
      void init_tb_stats_htable(void)
    {
 @@ -56,3 +57,8 @@ bool tb_stats_collection_paused(void)
    {
    return tcg_collect_tb_stats == TB_STATS_PAUSED;
    }
 +
 +uint32_t get_default_tbstats_flag(void)
 +{
 +    return default_tbstats_flag;
 +}
>>>
>>> What is the purpose of this function, instead of a global variable?
>>> What is the meaning of 'default' in its name?
>>>
>> tbs have their specific settings, e.g. after 'filter' cmd:
>> * the last_search tbs has their stats_enabled kept
>> * tbs not in the list sets their flag to TB_PAUSED
> 
> How does this affect anything at all?
> 
> We are not *checking* the tb->tb_stats->stats_enabled bit except at code
> generation time, not code execution time.  Therefore nothing ever reads
> the TB_PAUSED bit (or, correspondingly, the clearing of the other
> bits).  The setting of the bit is permanent.
> 
At dump time, it does check stats_enabled e.g. in dump_tb_header(). So
the question is whether FILTER is necessary at all? If not, we can
remove FILTER together with PAUSE, and only keep START & STOP in hmp cmd.

Thanks,
Fei.

>> yes, it might looks better. But there is no correctness issue either as
>> it checks if the specific bit is enabled during collecting stats.
> 
> No, it does not.  See above.
> 
> 
> r~

Re: [PATCH 2/4] target/riscv: Remove check on mode for MPRV

2023-06-01 Thread Weiwei Li




On 2023/6/2 07:03, Alistair Francis wrote:

On Thu, Jun 1, 2023 at 4:43 PM Weiwei Li  wrote:


On 2023/6/1 13:27, Alistair Francis wrote:

On Mon, May 29, 2023 at 10:19 PM Weiwei Li  wrote:

Normally, MPRV can be set to 1 only in M mode (It will be cleared
when returning to lower-privilege mode by MRET/SRET).

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
---
   target/riscv/cpu_helper.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index bd892c05d4..45baf95c77 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -44,7 +44,7 @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
   if (!ifetch) {
   uint64_t status = env->mstatus;

-if (mode == PRV_M && get_field(status, MSTATUS_MPRV)) {
+if (get_field(status, MSTATUS_MPRV)) {

The original check is correct though, why remove it?

Yeah. As described in the commit message, I think MPRV can only be set
to 1 in M mode normally

That's true. I do feel that keeping the check makes the code easier to
follow. Otherwise future developers need to check to see how MPRV can
be set. The current code is explicit and obviously follows the spec.

For a performance gain I think it's worth making the trade off, but it
doesn't sound like we really get any gain here.


Yeah. It's acceptable to me.

Just another question: whether MPRV is truly limited to work on M mode?

I can only find following description in the note:

"The MPRV and MXR mechanisms *were* conceived to improve the efficiency 
of M-mode routines

that emulate missing hardware features, e.g., misaligned loads and stores."

To some degree, It seems not limit them to work on other mode.

Even though MPRV normally can be set to 1 in M mode, it seems possible 
to set it to 1 in other mode by gdbstub.


Regards,

Weiwei Li


Alistair


which means check on MPRV is enough in this case. So I remove the check
on mode here.

Regards,

Weiwei Li


Alistair


   mode = get_field(env->mstatus, MSTATUS_MPP);
   virt = get_field(env->mstatus, MSTATUS_MPV) &&
  (mode != PRV_M);
--
2.25.1

[PATCH RFC 1/3] migration: switchover-hold parameter

2023-06-01 Thread Peter Xu

Add a new migration parameter switchover-hold which can block src qemu
migration from switching over to dest from running.

One can set this flag to true so src qemu will keep iterating the VM data,
not switching over to dest even if it can.

It means now live migration works somehow like COLO; we keep syncing data
from src to dst without stopping.

When the user is ready for the switchover, one can set the parameter from
true->false.  That'll contain a implicit kick to migration thread to be
alive and re-evaluate the switchover decision.

This can be used in two cases so far in my mind:

  (1) One can use this parameter to start pre-heating migration (but not
  really migrating, so a migrate-cancel will cancel the preheat).  When
  the user wants to really migrate, just clear the flag.  It'll in most
  cases migrate immediately because most pages are already synced.

  (2) Can also be used as a clean way to do qtest, in many of the precopy
  tests we have requirement to run after 1 iteration without completing
  the precopy migration.  Before that we have either set bandwidth to
  ridiculous low value, or tricks on detecting guest memory change over
  some adhoc guest memory position.  Now we can simply set this flag
  then we know precopy won't complete and will just keep going.

Signed-off-by: Peter Xu 
---
 qapi/migration.json| 25 +--
 migration/migration.h  |  7 +
 migration/migration-hmp-cmds.c |  3 ++
 migration/migration.c  | 56 --
 migration/options.c| 17 +++
 5 files changed, 102 insertions(+), 6 deletions(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index 179af0c4d8..1d0059d125 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -779,6 +779,15 @@
 # Nodes are mapped to their block device name if there is one, and
 # to their node name otherwise.  (Since 5.2)
 #
+# @switchover-hold: Whether we should hold-off precopy switchover from src
+# to dest QEMU, even if we can finish migration in the downtime
+# specified.  By default off, so precopy migration will complete as
+# soon as possible.  One can set it to explicitly keep iterating during
+# precopy migration until set the flag to false again to kick off the
+# final switchover.  Note, this does not affect postcopy switchover,
+# because the user can control that using "migrate-start-postcopy"
+# command explicitly. (Since 8.1)
+#
 # Features:
 #
 # @unstable: Member @x-checkpoint-delay is experimental.
@@ -800,7 +809,7 @@
'xbzrle-cache-size', 'max-postcopy-bandwidth',
'max-cpu-throttle', 'multifd-compression',
'multifd-zlib-level' ,'multifd-zstd-level',
-   'block-bitmap-mapping' ] }
+   'block-bitmap-mapping', 'switchover-hold' ] }
 
 ##
 # @MigrateSetParameters:
@@ -935,6 +944,10 @@
 # Nodes are mapped to their block device name if there is one, and
 # to their node name otherwise.  (Since 5.2)
 #
+# @switchover-hold: Whether we should hold-off precopy switchover from src
+# to dest QEMU.  For more details, please refer to MigrationParameter
+# entry of the same field. (Since 8.1)
+#
 # Features:
 #
 # @unstable: Member @x-checkpoint-delay is experimental.
@@ -972,7 +985,8 @@
 '*multifd-compression': 'MultiFDCompression',
 '*multifd-zlib-level': 'uint8',
 '*multifd-zstd-level': 'uint8',
-'*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
+'*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ],
+'*switchover-hold': 'bool' } }
 
 ##
 # @migrate-set-parameters:
@@ -1127,6 +1141,10 @@
 # Nodes are mapped to their block device name if there is one, and
 # to their node name otherwise.  (Since 5.2)
 #
+# @switchover-hold: Whether we should hold-off precopy switchover from src
+# to dest QEMU.  For more details, please refer to MigrationParameter
+# entry of the same field. (Since 8.1)
+#
 # Features:
 #
 # @unstable: Member @x-checkpoint-delay is experimental.
@@ -1161,7 +1179,8 @@
 '*multifd-compression': 'MultiFDCompression',
 '*multifd-zlib-level': 'uint8',
 '*multifd-zstd-level': 'uint8',
-'*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
+'*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ],
+'*switchover-hold': 'bool' } }
 
 ##
 # @query-migrate-parameters:
diff --git a/migration/migration.h b/migration/migration.h
index 48a46123a0..086ceec754 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -437,6 +437,13 @@ struct MigrationState {
 
 /* QEMU_VM_VMDESCRIPTION content filled for all non-iterable devices. */
 JSONWriter *vmdesc;
+/*
+ * Only migration thread will wait on it when switchover_hold==true.
+ *
+ * Only qmp set param will kick it when switching switchover_hold

[PATCH RFC 3/3] qtest/migration: Use switchover-hold to speedup

2023-06-01 Thread Peter Xu

Use the switchover-hold flag rather than tuning bw+downtime to guide test
convergence.

This can achieve similar goal of previous patch "tests/qtest: massively
speed up migration-test" but without magic offset to write or monitoring.

The initial solution can reduce migration-test time from 8min to 1min40s,
this patch can further reduce it from 1m40s to 1m1s per my local test.

Signed-off-by: Peter Xu 
---
 tests/qtest/migration-test.c | 20 ++--
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index b0c355bbd9..62bdd67fd9 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -433,16 +433,15 @@ static void migrate_set_parameter_bool(QTestState *who, 
const char *parameter,
 
 static void migrate_ensure_non_converge(QTestState *who)
 {
-/* Can't converge with 1ms downtime + 3 mbs bandwidth limit */
-migrate_set_parameter_int(who, "max-bandwidth", 3 * 1000 * 1000);
-migrate_set_parameter_int(who, "downtime-limit", 1);
+/* Hold off switchover for precopy only */
+migrate_set_parameter_bool(who, "switchover-hold", true);
 }
 
 static void migrate_ensure_converge(QTestState *who)
 {
-/* Should converge with 30s downtime + 1 gbs bandwidth limit */
-migrate_set_parameter_int(who, "max-bandwidth", 1 * 1000 * 1000 * 1000);
-migrate_set_parameter_int(who, "downtime-limit", 30 * 1000);
+/* No limitation on bandwidth so converge faster */
+migrate_set_parameter_int(who, "max-bandwidth", 0);
+migrate_set_parameter_bool(who, "switchover-hold", false);
 }
 
 static void migrate_pause(QTestState *who)
@@ -492,6 +491,13 @@ static void migrate_postcopy_start(QTestState *from, 
QTestState *to)
 }
 
 qtest_qmp_eventwait(to, "RESUME");
+
+/*
+ * Now allow precopy switchover (which will allow completion).  This
+ * needs to be done after migrate-start-postcopy to make sure we switch
+ * to postcopy first.
+ */
+migrate_ensure_converge(from);
 }
 
 typedef struct {
@@ -1164,6 +1170,8 @@ static int migrate_postcopy_prepare(QTestState **from_ptr,
 }
 
 migrate_ensure_non_converge(from);
+/* Still use unlimited precopy speed to finish 1st iteration fast */
+migrate_set_parameter_int(from, "max-bandwidth", 0);
 
 /* Wait for the first serial output from the source */
 wait_for_serial("src_serial");
-- 
2.40.1

[PATCH RFC 0/3] migration: switchover-hold flag

2023-06-01 Thread Peter Xu

This RFC patchset is based on Daniel's recent refactoring on migration-test:

https://lore.kernel.org/r/20230601161347.1803440-1-berra...@redhat.com
Based-on: <20230601161347.1803440-1-berra...@redhat.com>

A new flag "switchover-hold" is added to allow src qemu explicitly hold
switchover for precopy migration.  Note that this flag will not affect
postcopy switchover because src qemu already has migrate-start-postcopy,
which is a finer grained knob just for that.  In general this flag only
affects reaching migration completion phase, when set it'll block it from
happening while keep the migration iteration going.

This can be used in two cases so far in my mind:

  (1) One can use this parameter to start pre-heating migration (but not
  really migrating, so a migrate-cancel will cancel the preheat).  When
  the user wants to really migrate, just clear the flag.  It'll in most
  cases migrate immediately because most pages are already synced.

  (2) Can also be used as a clean way to do qtest, in many of the precopy
  tests we have requirement to run after 1 iteration without completing
  the precopy migration.  Before that we have either set bandwidth to
  ridiculous low value, or tricks on detecting guest memory change over
  some adhoc guest memory position.  Now we can simply set this flag
  then we know precopy won't complete and will just keep going.

The 1st use case may look a bit like COLO where we can actually keep both
QEMU _mostly_ in sync.  I'm not sure whether it can be useful anywhere,
though.

Patch 1 will introduce the new flag.

Patch 2 will temprarily revert the last patch from Daniel's series, so
potentially this will temporarily make migration-test slow again.  If we
want to avoid this we can merge patch 2 & 3 too.

Patch 3 will leverage the new flag to speed up migration-test.  There're
still some difference comparing to Daniel's solution (e.g., we can still
wait for a whole iteration for each test since we run the initial 3MB also
with full bw).  An initial test is this can make migration-test finish
within a little bit more than 1m.

Please have a look, thanks.

Peter Xu (3):
  migration: switchover-hold parameter
  Revert "tests/qtest: massively speed up migration-test"
  qtest/migration: Use switchover-hold to speedup

 qapi/migration.json|  25 -
 migration/migration.h  |   7 ++
 migration/migration-hmp-cmds.c |   3 +
 migration/migration.c  |  56 ++-
 migration/options.c|  17 
 tests/qtest/migration-test.c   | 163 +++--
 6 files changed, 134 insertions(+), 137 deletions(-)

-- 
2.40.1

[PATCH RFC 2/3] Revert "tests/qtest: massively speed up migration-test"

2023-06-01 Thread Peter Xu

This reverts commit e76a92b869f17d7a3f554890fb89b7da595dd652.
---
 tests/qtest/migration-test.c | 143 +--
 1 file changed, 18 insertions(+), 125 deletions(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index d2cd71e6cf..b0c355bbd9 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -46,20 +46,6 @@ static bool uffd_feature_thread_id;
 static bool got_src_stop;
 static bool got_dst_resume;
 
-/*
- * An initial 3 MB offset is used as that corresponds
- * to ~1 sec of data transfer with our bandwidth setting.
- */
-#define MAGIC_OFFSET_BASE (3 * 1024 * 1024)
-/*
- * A further 1k is added to ensure we're not a multiple
- * of TEST_MEM_PAGE_SIZE, thus avoid clash with writes
- * from the migration guest workload.
- */
-#define MAGIC_OFFSET_SHUFFLE 1024
-#define MAGIC_OFFSET (MAGIC_OFFSET_BASE + MAGIC_OFFSET_SHUFFLE)
-#define MAGIC_MARKER 0xFEED12345678CAFEULL
-
 /*
  * Dirtylimit stop working if dirty page rate error
  * value less than DIRTYLIMIT_TOLERANCE_RANGE
@@ -459,91 +445,6 @@ static void migrate_ensure_converge(QTestState *who)
 migrate_set_parameter_int(who, "downtime-limit", 30 * 1000);
 }
 
-/*
- * Our goal is to ensure that we run a single full migration
- * iteration, and also dirty memory, ensuring that at least
- * one further iteration is required.
- *
- * We can't directly synchronize with the start of a migration
- * so we have to apply some tricks monitoring memory that is
- * transferred.
- *
- * Initially we set the migration bandwidth to an insanely
- * low value, with tiny max downtime too. This basically
- * guarantees migration will never complete.
- *
- * This will result in a test that is unacceptably slow though,
- * so we can't let the entire migration pass run at this speed.
- * Our intent is to let it run just long enough that we can
- * prove data prior to the marker has been transferred *AND*
- * also prove this transferred data is dirty again.
- *
- * Before migration starts, we write a 64-bit magic marker
- * into a fixed location in the src VM RAM.
- *
- * Then watch dst memory until the marker appears. This is
- * proof that start_address -> MAGIC_OFFSET_BASE has been
- * transferred.
- *
- * Finally we go back to the source and read a byte just
- * before the marker untill we see it flip in value. This
- * is proof that start_address -> MAGIC_OFFSET_BASE
- * is now dirty again.
- *
- * IOW, we're guaranteed at least a 2nd migration pass
- * at this point.
- *
- * We can now let migration run at full speed to finish
- * the test
- */
-static void migrate_prepare_for_dirty_mem(QTestState *from)
-{
-/*
- * The guest workflow iterates from start_address to
- * end_address, writing 1 byte every TEST_MEM_PAGE_SIZE
- * bytes.
- *
- * IOW, if we write to mem at a point which is NOT
- * a multiple of TEST_MEM_PAGE_SIZE, our write won't
- * conflict with the migration workflow.
- *
- * We put in a marker here, that we'll use to determine
- * when the data has been transferred to the dst.
- */
-qtest_writeq(from, start_address + MAGIC_OFFSET, MAGIC_MARKER);
-}
-
-static void migrate_wait_for_dirty_mem(QTestState *from,
-   QTestState *to)
-{
-uint64_t watch_address = start_address + MAGIC_OFFSET_BASE;
-uint64_t marker_address = start_address + MAGIC_OFFSET;
-uint8_t watch_byte;
-
-/*
- * Wait for the MAGIC_MARKER to get transferred, as an
- * indicator that a migration pass has made some known
- * amount of progress.
- */
-do {
-usleep(1000 * 10);
-} while (qtest_readq(to, marker_address) != MAGIC_MARKER);
-
-/*
- * Now ensure that already transferred bytes are
- * dirty again from the guest workload. Note the
- * guest byte value will wrap around and by chance
- * match the original watch_byte. This is harmless
- * as we'll eventually see a different value if we
- * keep watching
- */
-watch_byte = qtest_readb(from, watch_address);
-do {
-usleep(1000 * 10);
-} while (qtest_readb(from, watch_address) == watch_byte);
-}
-
-
 static void migrate_pause(QTestState *who)
 {
 qtest_qmp_assert_success(who, "{ 'execute': 'migrate-pause' }");
@@ -676,10 +577,7 @@ typedef struct {
 MIG_TEST_FAIL_DEST_QUIT_ERR,
 } result;
 
-/*
- * Optional: set number of migration passes to wait for, if live==true.
- * If zero, then merely wait for a few MB of dirty data
- */
+/* Optional: set number of migration passes to wait for, if live==true */
 unsigned int iterations;
 
 /*
@@ -1267,14 +1165,12 @@ static int migrate_postcopy_prepare(QTestState 
**from_ptr,
 
 migrate_ensure_non_converge(from);
 
-migrate_prepare_for_dirty_mem(from);
-
 /* Wait for the first serial output from the source */
 wait_for_serial("src_serial");
 
 migrate_qmp(from, uri, "{}");
 
-

Re: [ANNOUNCE] KVM Microconference at LPC 2023

2023-06-01 Thread Sean Christopherson

On Thu, Jun 01, 2023, Mickaï¿½l Salaï¿½n wrote:
> Hi,
> 
> What is the status of this microconference proposal? We'd be happy to talk
> about Heki [1] and potentially other hypervisor supports.

Proposal submitted (deadline is/was today), now we wait :-)  IIUC, we should 
find
out rather quickly whether or not the KVM MC is a go.

[PULL 2/6] migration: never fail in global_state_store()

2023-06-01 Thread Juan Quintela

From: Vladimir Sementsov-Ogievskiy 

Actually global_state_store() can never fail. Let's get rid of extra
error paths.

To make things clear, use new runstate_get() and use same approach for
global_state_store() and global_state_store_running().

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
Message-Id: <20230517123752.21615-3-vsement...@yandex-team.ru>
Signed-off-by: Juan Quintela 
---
 include/migration/global_state.h |  2 +-
 migration/global_state.c | 29 +++---
 migration/migration.c| 41 +++-
 migration/savevm.c   |  6 +
 4 files changed, 35 insertions(+), 43 deletions(-)

diff --git a/include/migration/global_state.h b/include/migration/global_state.h
index 945eb35d5b..d7c2cd3216 100644
--- a/include/migration/global_state.h
+++ b/include/migration/global_state.h
@@ -16,7 +16,7 @@
 #include "qapi/qapi-types-run-state.h"
 
 void register_global_state(void);
-int global_state_store(void);
+void global_state_store(void);
 void global_state_store_running(void);
 bool global_state_received(void);
 RunState global_state_get_runstate(void);
diff --git a/migration/global_state.c b/migration/global_state.c
index a33947ca32..4e2a9d8ec0 100644
--- a/migration/global_state.c
+++ b/migration/global_state.c
@@ -29,23 +29,22 @@ typedef struct {
 
 static GlobalState global_state;
 
-int global_state_store(void)
+static void global_state_do_store(RunState state)
 {
-if (!runstate_store((char *)global_state.runstate,
-sizeof(global_state.runstate))) {
-error_report("runstate name too big: %s", global_state.runstate);
-trace_migrate_state_too_big();
-return -EINVAL;
-}
-return 0;
-}
-
-void global_state_store_running(void)
-{
-const char *state = RunState_str(RUN_STATE_RUNNING);
-assert(strlen(state) < sizeof(global_state.runstate));
+const char *state_str = RunState_str(state);
+assert(strlen(state_str) < sizeof(global_state.runstate));
 strpadcpy((char *)global_state.runstate, sizeof(global_state.runstate),
-  state, '\0');
+  state_str, '\0');
+}
+
+void global_state_store(void)
+{
+global_state_do_store(runstate_get());
+}
+
+void global_state_store_running(void)
+{
+global_state_do_store(RUN_STATE_RUNNING);
 }
 
 bool global_state_received(void)
diff --git a/migration/migration.c b/migration/migration.c
index 5de7f734b9..c75d5aa479 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2288,27 +2288,26 @@ static void migration_completion(MigrationState *s)
 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
 s->vm_was_running = runstate_is_running();
-ret = global_state_store();
+global_state_store();
 
-if (!ret) {
-ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
-trace_migration_completion_vm_stop(ret);
-if (ret >= 0) {
-ret = migration_maybe_pause(s, _active_state,
-MIGRATION_STATUS_DEVICE);
-}
-if (ret >= 0) {
-/*
- * Inactivate disks except in COLO, and track that we
- * have done so in order to remember to reactivate
- * them if migration fails or is cancelled.
- */
-s->block_inactive = !migrate_colo();
-migration_rate_set(RATE_LIMIT_DISABLED);
-ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
- s->block_inactive);
-}
+ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+trace_migration_completion_vm_stop(ret);
+if (ret >= 0) {
+ret = migration_maybe_pause(s, _active_state,
+MIGRATION_STATUS_DEVICE);
 }
+if (ret >= 0) {
+/*
+ * Inactivate disks except in COLO, and track that we
+ * have done so in order to remember to reactivate
+ * them if migration fails or is cancelled.
+ */
+s->block_inactive = !migrate_colo();
+migration_rate_set(RATE_LIMIT_DISABLED);
+ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
+ s->block_inactive);
+}
+
 qemu_mutex_unlock_iothread();
 
 if (ret < 0) {
@@ -3088,9 +3087,7 @@ static void *bg_migration_thread(void *opaque)
 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
 s->vm_was_running = runstate_is_running();
 
-if (global_state_store()) {
-goto fail;
-}
+global_state_store();
 /* Forcibly stop VM before saving state of vCPUs and devices */
 if (vm_stop_force_state(RUN_STATE_PAUSED))

[PULL 4/6] migration: switch from .vm_was_running to .vm_old_state

2023-06-01 Thread Juan Quintela

From: Vladimir Sementsov-Ogievskiy 

No logic change here, only refactoring. That's a preparation for next
commit where we finally restore the stopped vm state on migration
failure or cancellation.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
Message-Id: <20230517123752.21615-5-vsement...@yandex-team.ru>
Signed-off-by: Juan Quintela 
---
 migration/migration.h |  9 ++---
 migration/migration.c | 11 ++-
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/migration/migration.h b/migration/migration.h
index 48a46123a0..30c3e97635 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -25,6 +25,7 @@
 #include "net/announce.h"
 #include "qom/object.h"
 #include "postcopy-ram.h"
+#include "sysemu/runstate.h"
 
 struct PostcopyBlocktimeContext;
 
@@ -317,12 +318,14 @@ struct MigrationState {
 int64_t expected_downtime;
 bool capabilities[MIGRATION_CAPABILITY__MAX];
 int64_t setup_time;
+
 /*
- * Whether guest was running when we enter the completion stage.
+ * State before stopping the vm by vm_stop_force_state().
  * If migration is interrupted by any reason, we need to continue
- * running the guest on source.
+ * running the guest on source if it was running or restore its stopped
+ * state.
  */
-bool vm_was_running;
+RunState vm_old_state;
 
 /* Flag set once the migration has been asked to enter postcopy */
 bool start_postcopy;
diff --git a/migration/migration.c b/migration/migration.c
index c75d5aa479..033162cda0 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1402,7 +1402,7 @@ void migrate_init(MigrationState *s)
 
 s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 s->total_time = 0;
-s->vm_was_running = false;
+s->vm_old_state = -1;
 s->iteration_initial_bytes = 0;
 s->threshold_size = 0;
 }
@@ -2287,7 +2287,8 @@ static void migration_completion(MigrationState *s)
 qemu_mutex_lock_iothread();
 s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
-s->vm_was_running = runstate_is_running();
+
+s->vm_old_state = runstate_get();
 global_state_store();
 
 ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
@@ -2760,12 +2761,12 @@ static void migration_iteration_finish(MigrationState 
*s)
 case MIGRATION_STATUS_COLO:
 assert(migrate_colo());
 migrate_start_colo_process(s);
-s->vm_was_running = true;
+s->vm_old_state = RUN_STATE_RUNNING;
 /* Fallthrough */
 case MIGRATION_STATUS_FAILED:
 case MIGRATION_STATUS_CANCELLED:
 case MIGRATION_STATUS_CANCELLING:
-if (s->vm_was_running) {
+if (s->vm_old_state == RUN_STATE_RUNNING) {
 if (!runstate_check(RUN_STATE_SHUTDOWN)) {
 vm_start();
 }
@@ -3085,7 +3086,7 @@ static void *bg_migration_thread(void *opaque)
  * transition in vm_stop_force_state() we need to wakeup it up.
  */
 qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
-s->vm_was_running = runstate_is_running();
+s->vm_old_state = runstate_get();
 
 global_state_store();
 /* Forcibly stop VM before saving state of vCPUs and devices */
-- 
2.40.1

[PULL 0/6] Migration 20230601 patches

2023-06-01 Thread Juan Quintela

The following changes since commit 9eb400cdd7b0940bd696aa01462dd53004ae04e9:

  Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into 
staging (2023-06-01 11:47:58 -0700)

are available in the Git repository at:

  https://gitlab.com/juan.quintela/qemu.git tags/migration-20230601-pull-request

for you to fetch changes up to 3a8b81f2e6393828589699bb0b8ef557b9ae5937:

  migration: stop tracking ram writes when cancelling background migration 
(2023-06-02 01:03:19 +0200)


Migration Pull request (20230601)

Hi

In this series:
- improve background migration (fiona)
- improve vmstate failure states (vladimir)
- dropped all the RDMA cleanups

Please, apply.



Fiona Ebner (1):
  migration: stop tracking ram writes when cancelling background
migration

Vladimir Sementsov-Ogievskiy (5):
  runstate: add runstate_get()
  migration: never fail in global_state_store()
  runstate: drop unused runstate_store()
  migration: switch from .vm_was_running to .vm_old_state
  migration: restore vmstate on migration failure

 include/migration/global_state.h |  2 +-
 include/sysemu/runstate.h|  2 +-
 migration/migration.h|  9 +++--
 migration/global_state.c | 29 +++---
 migration/migration.c| 68 
 migration/savevm.c   |  6 +--
 softmmu/runstate.c   | 25 ++--
 7 files changed, 68 insertions(+), 73 deletions(-)

-- 
2.40.1

[PULL 6/6] migration: stop tracking ram writes when cancelling background migration

2023-06-01 Thread Juan Quintela

From: Fiona Ebner 

Currently, it is only done when the iteration finishes successfully.
Not cleaning up the userfaultfd write protection can lead to
symptoms/issues such as the process hanging in memmove or GDB not
being able to attach.

Signed-off-by: Fiona Ebner 
Message-Id: <20230526115908.196171-1-f.eb...@proxmox.com>
Signed-off-by: Juan Quintela 
---
 migration/migration.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 7c3425c6fe..dc05c6f6ea 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2400,13 +2400,6 @@ static void bg_migration_completion(MigrationState *s)
 {
 int current_active_state = s->state;
 
-/*
- * Stop tracking RAM writes - un-protect memory, un-register UFFD
- * memory ranges, flush kernel wait queues and wake up threads
- * waiting for write fault to be resolved.
- */
-ram_write_tracking_stop();
-
 if (s->state == MIGRATION_STATUS_ACTIVE) {
 /*
  * By this moment we have RAM content saved into the migration stream.
@@ -2788,6 +2781,13 @@ static void migration_iteration_finish(MigrationState *s)
 
 static void bg_migration_iteration_finish(MigrationState *s)
 {
+/*
+ * Stop tracking RAM writes - un-protect memory, un-register UFFD
+ * memory ranges, flush kernel wait queues and wake up threads
+ * waiting for write fault to be resolved.
+ */
+ram_write_tracking_stop();
+
 qemu_mutex_lock_iothread();
 switch (s->state) {
 case MIGRATION_STATUS_COMPLETED:
-- 
2.40.1

[PULL 1/6] runstate: add runstate_get()

2023-06-01 Thread Juan Quintela

From: Vladimir Sementsov-Ogievskiy 

It's necessary to restore the state after failed/cancelled migration in
further commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
Message-Id: <20230517123752.21615-2-vsement...@yandex-team.ru>
Signed-off-by: Juan Quintela 
---
 include/sysemu/runstate.h | 1 +
 softmmu/runstate.c| 5 +
 2 files changed, 6 insertions(+)

diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
index f3ed52548e..85f5d9a419 100644
--- a/include/sysemu/runstate.h
+++ b/include/sysemu/runstate.h
@@ -6,6 +6,7 @@
 
 bool runstate_check(RunState state);
 void runstate_set(RunState new_state);
+RunState runstate_get(void);
 bool runstate_is_running(void);
 bool runstate_needs_reset(void);
 bool runstate_store(char *str, size_t size);
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index 2f2396c819..1e6f0bcecc 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -221,6 +221,11 @@ void runstate_set(RunState new_state)
 current_run_state = new_state;
 }
 
+RunState runstate_get(void)
+{
+return current_run_state;
+}
+
 bool runstate_is_running(void)
 {
 return runstate_check(RUN_STATE_RUNNING);
-- 
2.40.1

[PULL 3/6] runstate: drop unused runstate_store()

2023-06-01 Thread Juan Quintela

From: Vladimir Sementsov-Ogievskiy 

The function is unused since previous commit. Drop it.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
Message-Id: <20230517123752.21615-4-vsement...@yandex-team.ru>
Signed-off-by: Juan Quintela 
---
 include/sysemu/runstate.h |  1 -
 softmmu/runstate.c| 12 
 2 files changed, 13 deletions(-)

diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h
index 85f5d9a419..7beb29c2e2 100644
--- a/include/sysemu/runstate.h
+++ b/include/sysemu/runstate.h
@@ -9,7 +9,6 @@ void runstate_set(RunState new_state);
 RunState runstate_get(void);
 bool runstate_is_running(void);
 bool runstate_needs_reset(void);
-bool runstate_store(char *str, size_t size);
 
 typedef void VMChangeStateHandler(void *opaque, bool running, RunState state);
 
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index 1e6f0bcecc..0370230a5e 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -175,18 +175,6 @@ bool runstate_check(RunState state)
 return current_run_state == state;
 }
 
-bool runstate_store(char *str, size_t size)
-{
-const char *state = RunState_str(current_run_state);
-size_t len = strlen(state) + 1;
-
-if (len > size) {
-return false;
-}
-memcpy(str, state, len);
-return true;
-}
-
 static void runstate_init(void)
 {
 const RunStateTransition *p;
-- 
2.40.1

[PULL 5/6] migration: restore vmstate on migration failure

2023-06-01 Thread Juan Quintela

From: Vladimir Sementsov-Ogievskiy 

1. Otherwise failed migration just drops guest-panicked state, which is
   not good for management software.

2. We do keep different paused states like guest-panicked during
   migration with help of global_state state.

3. We do restore running state on source when migration is cancelled or
   failed.

4. "postmigrate" state is documented as "guest is paused following a
   successful 'migrate'", so originally it's only for successful path
   and we never documented current behavior.

Let's restore paused states like guest-panicked in case of cancel or
fail too. Allow same transitions like for inmigrate state.

This commit changes the behavior that was introduced by commit
42da5550d6 "migration: set state to post-migrate on failure" and
provides a bit different fix on related
  https://bugzilla.redhat.com/show_bug.cgi?id=1355683

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
Message-Id: <20230517123752.21615-6-vsement...@yandex-team.ru>
Signed-off-by: Juan Quintela 
---
 migration/migration.c | 2 +-
 softmmu/runstate.c| 8 +++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 033162cda0..7c3425c6fe 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2772,7 +2772,7 @@ static void migration_iteration_finish(MigrationState *s)
 }
 } else {
 if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
-runstate_set(RUN_STATE_POSTMIGRATE);
+runstate_set(s->vm_old_state);
 }
 }
 break;
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index 0370230a5e..1957caf73f 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -121,7 +121,13 @@ static const RunStateTransition runstate_transitions_def[] 
= {
 { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PAUSED },
 { RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE },
 { RUN_STATE_FINISH_MIGRATE, RUN_STATE_PRELAUNCH },
-{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_COLO},
+{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_COLO },
+{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_INTERNAL_ERROR },
+{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_IO_ERROR },
+{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_SHUTDOWN },
+{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_SUSPENDED },
+{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_WATCHDOG },
+{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_GUEST_PANICKED },
 
 { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING },
 { RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH },
-- 
2.40.1

Re: [PATCH v3 9/9] tests/qtest: massively speed up migration-test

2023-06-01 Thread Peter Xu

On Fri, Jun 02, 2023 at 01:00:10AM +0200, Juan Quintela wrote:
> That was basically my idea and that is why I am holding the last two
> patches and see if I can came with something in the next couple of days.

Ah! ...

If you haven't started, please hold off for one day.  I'll see whether I
can post mine tomorrow.

-- 
Peter Xu

Re: [PATCH 2/4] target/riscv: Remove check on mode for MPRV

2023-06-01 Thread Alistair Francis

On Thu, Jun 1, 2023 at 4:43 PM Weiwei Li  wrote:
>
>
> On 2023/6/1 13:27, Alistair Francis wrote:
> > On Mon, May 29, 2023 at 10:19 PM Weiwei Li  wrote:
> >> Normally, MPRV can be set to 1 only in M mode (It will be cleared
> >> when returning to lower-privilege mode by MRET/SRET).
> >>
> >> Signed-off-by: Weiwei Li 
> >> Signed-off-by: Junqiang Wang 
> >> ---
> >>   target/riscv/cpu_helper.c | 2 +-
> >>   1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> >> index bd892c05d4..45baf95c77 100644
> >> --- a/target/riscv/cpu_helper.c
> >> +++ b/target/riscv/cpu_helper.c
> >> @@ -44,7 +44,7 @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
> >>   if (!ifetch) {
> >>   uint64_t status = env->mstatus;
> >>
> >> -if (mode == PRV_M && get_field(status, MSTATUS_MPRV)) {
> >> +if (get_field(status, MSTATUS_MPRV)) {
> > The original check is correct though, why remove it?
>
> Yeah. As described in the commit message, I think MPRV can only be set
> to 1 in M mode normally

That's true. I do feel that keeping the check makes the code easier to
follow. Otherwise future developers need to check to see how MPRV can
be set. The current code is explicit and obviously follows the spec.

For a performance gain I think it's worth making the trade off, but it
doesn't sound like we really get any gain here.

Alistair

>
> which means check on MPRV is enough in this case. So I remove the check
> on mode here.
>
> Regards,
>
> Weiwei Li
>
> >
> > Alistair
> >
> >>   mode = get_field(env->mstatus, MSTATUS_MPP);
> >>   virt = get_field(env->mstatus, MSTATUS_MPV) &&
> >>  (mode != PRV_M);
> >> --
> >> 2.25.1
> >>
> >>
>

Re: [PATCH v3 9/9] tests/qtest: massively speed up migration-test

2023-06-01 Thread Juan Quintela

Peter Xu  wrote:
> On Wed, May 31, 2023 at 02:24:00PM +0100, Daniel P. Berrangé wrote:
>> The migration test cases that actually exercise live migration want to
>> ensure there is a minimum of two iterations of pre-copy, in order to
>> exercise the dirty tracking code.
>> 
>> Historically we've queried the migration status, looking for the
>> 'dirty-sync-count' value to increment to track iterations. This was
>> not entirely reliable because often all the data would get transferred
>> quickly enough that the migration would finish before we wanted it
>> to. So we massively dropped the bandwidth and max downtime to
>> guarantee non-convergance. This had the unfortunate side effect
>> that every migration took at least 30 seconds to run (100 MB of
>> dirty pages / 3 MB/sec).
>> 
>> This optimization takes a different approach to ensuring that a
>> mimimum of two iterations. Rather than waiting for dirty-sync-count
>> to increment, directly look for an indication that the source VM
>> has dirtied RAM that has already been transferred.
>> 
>> On the source VM a magic marker is written just after the 3 MB
>> offset. The destination VM is now montiored to detect when the
>> magic marker is transferred. This gives a guarantee that the
>> first 3 MB of memory have been transferred. Now the source VM
>> memory is monitored at exactly the 3MB offset until we observe
>> a flip in its value. This gives us a guaranteed that the guest
>> workload has dirtied a byte that has already been transferred.
>> 
>> Since we're looking at a place that is only 3 MB from the start
>> of memory, with the 3 MB/sec bandwidth, this test should complete
>> in 1 second, instead of 30 seconds.
>> 
>> Once we've proved there is some dirty memory, migration can be
>> set back to full speed for the remainder of the 1st iteration,
>> and the entire of the second iteration at which point migration
>> should be complete.
>> 
>> On a test machine this further reduces the migration test time
>> from 8 minutes to 1 minute 40.
>
> The outcome is definitely nice, but it does looks slightly hacky to me and
> make the test slightly more complicated.
>
> If it's all about making sure we finish the 1st iteration, can we simply
> add a src qemu parameter "switchover-hold"?  If it's set, src never
> switchover to dst but keeps the iterations.
>
> Then migrate_ensure_non_converge() will be as simple as setting
> switchover-hold to true.
>
> I am even thinking whether there can even be real-life use case for that,
> e.g., where a user might want to have a pre-heat of a migration of some VM,
> and trigger it immediately when the admin really wants (the pre-heats moved
> most of the pages and keep doing so).
>
> It'll be also similar to what Avihai proposed here on switchover-ack, just
> an ack mechanism on the src side:
>
> https://lore.kernel.org/r/20230530144821.1557-3-avih...@nvidia.com

That was basically my idea and that is why I am holding the last two
patches and see if I can came with something in the next couple of days.

Later, Juan.

Re: [PATCH v3 8/9] tests/qtest: make more migration pre-copy scenarios run non-live

2023-06-01 Thread Juan Quintela

Peter Xu  wrote:
> On Thu, Jun 01, 2023 at 04:55:25PM +0100, Daniel P. Berrangé wrote:
>> On Thu, Jun 01, 2023 at 11:53:17AM -0400, Peter Xu wrote:
>> > On Thu, Jun 01, 2023 at 04:39:48PM +0100, Daniel P. Berrangé wrote:
>> > > On Thu, Jun 01, 2023 at 11:30:10AM -0400, Peter Xu wrote:
>> > > > Thanks for looking into this.. definitely worthwhile.
>> > > > 
>> > > > On Wed, May 31, 2023 at 02:23:59PM +0100, Daniel P. Berrangé wrote:
>> > > > > There are 27 pre-copy live migration scenarios being tested. In all 
>> > > > > of
>> > > > > these we force non-convergance and run for one iteration, then let it
>> > > > > converge and wait for completion during the second (or following)
>> > > > > iterations. At 3 mbps bandwidth limit the first iteration takes a 
>> > > > > very
>> > > > > long time (~30 seconds).
>> > > > > 
>> > > > > While it is important to test the migration passes and convergance
>> > > > > logic, it is overkill to do this for all 27 pre-copy scenarios. The
>> > > > > TLS migration scenarios in particular are merely exercising different
>> > > > > code paths during connection establishment.
>> > > > > 
>> > > > > To optimize time taken, switch most of the test scenarios to run
>> > > > > non-live (ie guest CPUs paused) with no bandwidth limits. This gives
>> > > > > a massive speed up for most of the test scenarios.
>> > > > > 
>> > > > > For test coverage the following scenarios are unchanged
>> > > > 
>> > > > Curious how are below chosen?  I assume..
>> > > 
>> > > Chosen based on whether they exercise code paths that are unique
>> > > and interesting during the RAM transfer phase.
>> > > 
>> > > Essentially the goal is that if we have N% code coverage before this
>> > > patch, then we should still have the same N% code coverage after this
>> > > patch.
>> > > 
>> > > The TLS tests exercise code paths that are unique during the migration
>> > > establishment phase. Once establishd they don't exercise anything
>> > > "interesting" during RAM transfer phase. Thus we don't loose code 
>> > > coverage
>> > > by runing TLS tests non-live.
>> > > 
>> > > > 
>> > > > > 
>> > > > >  * Precopy with UNIX sockets
>> > > > 
>> > > > this one verifies dirty log.
>> > > > 
>> > > > >  * Precopy with UNIX sockets and dirty ring tracking
>> > > > 
>> > > > ... dirty ring...
>> > > > 
>> > > > >  * Precopy with XBZRLE
>> > > > 
>> > > > ... xbzrle I think needs a diff on old/new, makes sense.
>> > > > 
>> > > > >  * Precopy with UNIX compress
>> > > > >  * Precopy with UNIX compress (nowait)
>> > > > >  * Precopy with multifd
>> > > > 
>> > > > What about the rest three?  Especially for two compression tests.
>> > > 
>> > > The compress thread logic is unique/interesting during RAM transfer
>> > > so benefits from running live. The wait vs non-wait scenario tests
>> > > a distinct codepath/logic.
>> > 
>> > I assume you mean e.g. when compressing with guest page being modified and
>> > we should survive that rather than crashing the compressor?
>> 
>> No, i mean the compression code has a significant behaviour difference
>> between its two tests, because they toggle:
>> 
>>  @compress-wait-thread: Controls behavior when all compression
>>  threads are currently busy.  If true (default), wait for a free
>>  compression thread to become available; otherwise, send the page
>>  uncompressed.  (Since 3.1)
>> 
>> so we need to exercise the code path that falls back to sending
>> uncompressed, as well as the code path that waits for free threads.
>
> But then the question is why live is needed?
>
> IIUC whether the wait thing triggers have nothing directly related to VM is
> live or not, but whether all compress thread busy.  IOW, IIUC all compress
> paths will be tested even if non-live as long as we feed enough pages to
> the compressor threads.

It is even wrong.

We didn't fix this for compression:

commit 007e179ef0e97eafda4c9ff2a9d665a1947c7c6d
Author: Ilya Leoshkevich 
Date:   Tue Jul 5 22:35:59 2022 +0200

multifd: Copy pages before compressing them with zlib

zlib_send_prepare() compresses pages of a running VM. zlib does not
make any thread-safety guarantees with respect to changing deflate()
input concurrently with deflate() [1].


Not that anyone is going to use any accelerator to run zlib when we are
compression just 4k.

Intel AT engine had to also move to 64 pages at a time to make it a
difference.  As said, I can't think of a single scenary where
compression is a good option.

Later, Juan.

Re: [PATCH v3 8/9] tests/qtest: make more migration pre-copy scenarios run non-live

2023-06-01 Thread Juan Quintela

Daniel P. Berrangé  wrote:
> On Thu, Jun 01, 2023 at 11:53:17AM -0400, Peter Xu wrote:
>> On Thu, Jun 01, 2023 at 04:39:48PM +0100, Daniel P. Berrangé wrote:
>> > On Thu, Jun 01, 2023 at 11:30:10AM -0400, Peter Xu wrote:
>> > > Thanks for looking into this.. definitely worthwhile.
>> > > 
>> > > On Wed, May 31, 2023 at 02:23:59PM +0100, Daniel P. Berrangé wrote:
>> > > > There are 27 pre-copy live migration scenarios being tested. In all of
>> > > > these we force non-convergance and run for one iteration, then let it
>> > > > converge and wait for completion during the second (or following)
>> > > > iterations. At 3 mbps bandwidth limit the first iteration takes a very
>> > > > long time (~30 seconds).
>> > > > 
>> > > > While it is important to test the migration passes and convergance
>> > > > logic, it is overkill to do this for all 27 pre-copy scenarios. The
>> > > > TLS migration scenarios in particular are merely exercising different
>> > > > code paths during connection establishment.
>> > > > 
>> > > > To optimize time taken, switch most of the test scenarios to run
>> > > > non-live (ie guest CPUs paused) with no bandwidth limits. This gives
>> > > > a massive speed up for most of the test scenarios.
>> > > > 
>> > > > For test coverage the following scenarios are unchanged
>> > > 
>> > > Curious how are below chosen?  I assume..
>> > 
>> > Chosen based on whether they exercise code paths that are unique
>> > and interesting during the RAM transfer phase.
>> > 
>> > Essentially the goal is that if we have N% code coverage before this
>> > patch, then we should still have the same N% code coverage after this
>> > patch.
>> > 
>> > The TLS tests exercise code paths that are unique during the migration
>> > establishment phase. Once establishd they don't exercise anything
>> > "interesting" during RAM transfer phase. Thus we don't loose code coverage
>> > by runing TLS tests non-live.
>> > 
>> > > 
>> > > > 
>> > > >  * Precopy with UNIX sockets
>> > > 
>> > > this one verifies dirty log.
>> > > 
>> > > >  * Precopy with UNIX sockets and dirty ring tracking
>> > > 
>> > > ... dirty ring...
>> > > 
>> > > >  * Precopy with XBZRLE
>> > > 
>> > > ... xbzrle I think needs a diff on old/new, makes sense.
>> > > 
>> > > >  * Precopy with UNIX compress
>> > > >  * Precopy with UNIX compress (nowait)
>> > > >  * Precopy with multifd
>> > > 
>> > > What about the rest three?  Especially for two compression tests.
>> > 
>> > The compress thread logic is unique/interesting during RAM transfer
>> > so benefits from running live. The wait vs non-wait scenario tests
>> > a distinct codepath/logic.
>> 
>> I assume you mean e.g. when compressing with guest page being modified and
>> we should survive that rather than crashing the compressor?
>
> No, i mean the compression code has a significant behaviour difference
> between its two tests, because they toggle:
>
>  @compress-wait-thread: Controls behavior when all compression
>  threads are currently busy.  If true (default), wait for a free
>  compression thread to become available; otherwise, send the page
>  uncompressed.  (Since 3.1)
>
> so we need to exercise the code path that falls back to sending
> uncompressed, as well as the code path that waits for free threads.

It don't work.
I think that I am going to just drop it for this iteration.

I tried 2 or 3 years ago to get a test to run to compression -> was not
able to get it to work.

Moved compression on top of multifd, much, much faster and much cleaner
(each compression method is around 50 lines of code).

Lukas tried this time and he was not able to get it working either.

So I have no hope at all for this code.

To add insult to injury, it copies things so many times that is just not
worthy.

Later, Juan.

Re: [PATCH] qtest/migration: Document live=true cases

2023-06-01 Thread Juan Quintela

Peter Xu  wrote:
> Document every single live=true use cases on why it should be done in the
> live manner.  Also document on the parameter so new precopy cases should
> always use live=off unless with explicit reasonings.
>
> Cc: Thomas Huth 
> Cc: Juan Quintela 
> Cc: Daniel P. Berrangé 
> Signed-off-by: Peter Xu 

Reviewed-by: Juan Quintela

[PATCH v4 2/2] tests/tcg/aarch64: add DC CVA[D]P tests

2023-06-01 Thread Zhuojia Shen

Test execution of DC CVAP and DC CVADP instructions under user mode
emulation.

Signed-off-by: Zhuojia Shen 
Reviewed-by: Peter Maydell 
---
 tests/tcg/aarch64/Makefile.target | 11 ++
 tests/tcg/aarch64/dcpodp.c| 63 +++
 tests/tcg/aarch64/dcpop.c | 63 +++
 3 files changed, 137 insertions(+)
 create mode 100644 tests/tcg/aarch64/dcpodp.c
 create mode 100644 tests/tcg/aarch64/dcpop.c

diff --git a/tests/tcg/aarch64/Makefile.target 
b/tests/tcg/aarch64/Makefile.target
index 0315795487..3430fd3cd8 100644
--- a/tests/tcg/aarch64/Makefile.target
+++ b/tests/tcg/aarch64/Makefile.target
@@ -21,12 +21,23 @@ config-cc.mak: Makefile
$(quiet-@)( \
$(call cc-option,-march=armv8.1-a+sve,  CROSS_CC_HAS_SVE); \
$(call cc-option,-march=armv8.1-a+sve2, CROSS_CC_HAS_SVE2); 
\
+   $(call cc-option,-march=armv8.2-a,  
CROSS_CC_HAS_ARMV8_2); \
$(call cc-option,-march=armv8.3-a,  
CROSS_CC_HAS_ARMV8_3); \
+   $(call cc-option,-march=armv8.5-a,  
CROSS_CC_HAS_ARMV8_5); \
$(call cc-option,-mbranch-protection=standard,  
CROSS_CC_HAS_ARMV8_BTI); \
$(call cc-option,-march=armv8.5-a+memtag,   
CROSS_CC_HAS_ARMV8_MTE); \
$(call cc-option,-march=armv9-a+sme,
CROSS_CC_HAS_ARMV9_SME)) 3> config-cc.mak
 -include config-cc.mak
 
+ifneq ($(CROSS_CC_HAS_ARMV8_2),)
+AARCH64_TESTS += dcpop
+dcpop: CFLAGS += -march=armv8.2-a
+endif
+ifneq ($(CROSS_CC_HAS_ARMV8_5),)
+AARCH64_TESTS += dcpodp
+dcpodp: CFLAGS += -march=armv8.5-a
+endif
+
 # Pauth Tests
 ifneq ($(CROSS_CC_HAS_ARMV8_3),)
 AARCH64_TESTS += pauth-1 pauth-2 pauth-4 pauth-5
diff --git a/tests/tcg/aarch64/dcpodp.c b/tests/tcg/aarch64/dcpodp.c
new file mode 100644
index 00..2cf7df2e07
--- /dev/null
+++ b/tests/tcg/aarch64/dcpodp.c
@@ -0,0 +1,63 @@
+/*
+ * Test execution of DC CVADP instruction.
+ *
+ * Copyright (c) 2023 Zhuojia Shen 
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#ifndef HWCAP2_DCPODP
+#define HWCAP2_DCPODP (1 << 0)
+#endif
+
+bool should_fail = false;
+
+static void signal_handler(int sig, siginfo_t *si, void *data)
+{
+ucontext_t *uc = (ucontext_t *)data;
+
+if (should_fail) {
+uc->uc_mcontext.pc += 4;
+} else {
+exit(EXIT_FAILURE);
+}
+}
+
+static int do_dc_cvadp(void)
+{
+struct sigaction sa = {
+.sa_flags = SA_SIGINFO,
+.sa_sigaction = signal_handler,
+};
+
+sigemptyset(_mask);
+if (sigaction(SIGSEGV, , NULL) < 0) {
+perror("sigaction");
+return EXIT_FAILURE;
+}
+
+asm volatile("dc cvadp, %0\n\t" :: "r"());
+
+should_fail = true;
+asm volatile("dc cvadp, %0\n\t" :: "r"(NULL));
+should_fail = false;
+
+return EXIT_SUCCESS;
+}
+
+int main(void)
+{
+if (getauxval(AT_HWCAP2) & HWCAP2_DCPODP) {
+return do_dc_cvadp();
+} else {
+printf("SKIP: no HWCAP2_DCPODP on this system\n");
+return EXIT_SUCCESS;
+}
+}
diff --git a/tests/tcg/aarch64/dcpop.c b/tests/tcg/aarch64/dcpop.c
new file mode 100644
index 00..a332a804a4
--- /dev/null
+++ b/tests/tcg/aarch64/dcpop.c
@@ -0,0 +1,63 @@
+/*
+ * Test execution of DC CVAP instruction.
+ *
+ * Copyright (c) 2023 Zhuojia Shen 
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#ifndef HWCAP_DCPOP
+#define HWCAP_DCPOP (1 << 16)
+#endif
+
+bool should_fail = false;
+
+static void signal_handler(int sig, siginfo_t *si, void *data)
+{
+ucontext_t *uc = (ucontext_t *)data;
+
+if (should_fail) {
+uc->uc_mcontext.pc += 4;
+} else {
+exit(EXIT_FAILURE);
+}
+}
+
+static int do_dc_cvap(void)
+{
+struct sigaction sa = {
+.sa_flags = SA_SIGINFO,
+.sa_sigaction = signal_handler,
+};
+
+sigemptyset(_mask);
+if (sigaction(SIGSEGV, , NULL) < 0) {
+perror("sigaction");
+return EXIT_FAILURE;
+}
+
+asm volatile("dc cvap, %0\n\t" :: "r"());
+
+should_fail = true;
+asm volatile("dc cvap, %0\n\t" :: "r"(NULL));
+should_fail = false;
+
+return EXIT_SUCCESS;
+}
+
+int main(void)
+{
+if (getauxval(AT_HWCAP) & HWCAP_DCPOP) {
+return do_dc_cvap();
+} else {
+printf("SKIP: no HWCAP_DCPOP on this system\n");
+return EXIT_SUCCESS;
+}
+}
-- 
2.40.1

Re: [PATCH v4 09/10] tests/qtest: make more migration pre-copy scenarios run non-live

2023-06-01 Thread Juan Quintela

Daniel P. Berrangé  wrote:
> There are 27 pre-copy live migration scenarios being tested. In all of
> these we force non-convergence and run for one iteration, then let it
> converge and wait for completion during the second (or following)
> iterations. At 3 mbps bandwidth limit the first iteration takes a very
> long time (~30 seconds).
>
> While it is important to test the migration passes and convergence
> logic, it is overkill to do this for all 27 pre-copy scenarios. The
> TLS migration scenarios in particular are merely exercising different
> code paths during connection establishment.
>
> To optimize time taken, switch most of the test scenarios to run
> non-live (ie guest CPUs paused) with no bandwidth limits. This gives
> a massive speed up for most of the test scenarios.
>
> For test coverage the following scenarios are unchanged
>
>  * Precopy with UNIX sockets
>  * Precopy with UNIX sockets and dirty ring tracking
>  * Precopy with XBZRLE
>  * Precopy with UNIX compress
>  * Precopy with UNIX compress (nowait)
>  * Precopy with multifd
>
> On a test machine this reduces execution time from 13 minutes to
> 8 minutes.
>
> Tested-by: Thomas Huth 
> Signed-off-by: Daniel P. Berrangé 

Reviewed-by: Juan Quintela

Re: [PATCH v4 08/10] tests/qtest: distinguish src/dst migration VM stop/resume events

2023-06-01 Thread Juan Quintela

Daniel P. Berrangé  wrote:
> The 'got_stop' and 'got_resume' global variables apply to the src and
> dst migration VM respectively. Change their names to make this explicit
> to developers.
>
> Signed-off-by: Daniel P. Berrangé 


Reviewed-by: Juan Quintela

Re: [PATCH v4 02/10] tests/qtest: add support for callback to receive QMP events

2023-06-01 Thread Juan Quintela

Daniel P. Berrangé  wrote:
> Currently code must call one of the qtest_qmp_event* functions to
> fetch events. These are only usable if the immediate caller knows
> the particular event they want to capture, and are only interested
> in one specific event type. Adding ability to register an event
> callback lets the caller capture a range of events over any period
> of time.
>
> Signed-off-by: Daniel P. Berrangé 

Reviewed-by: Juan Quintela

Re: [PATCH v4 01/10] tests/qtest: add various qtest_qmp_assert_success() variants

2023-06-01 Thread Juan Quintela

Daniel P. Berrangé  wrote:
> Add several counterparts of qtest_qmp_assert_success() that can
>
>  * Use va_list instead of ...
>  * Accept a list of FDs to send
>  * Return the response data
>
> Reviewed-by: Thomas Huth 
> Signed-off-by: Daniel P. Berrangé 

Reviewed-by: Juan Quintela

[PATCH 0/2] target/s390x: Fix MXDB and MXDBR

2023-06-01 Thread Ilya Leoshkevich

Hi,

This is the last finding from [1].
With this fix, llvm-project testcases run without issues.

Best regards,
Ilya

[1] https://bugzilla.redhat.com/show_bug.cgi?id=2211472

Ilya Leoshkevich (2):
  target/s390x: Fix MXDB and MXDBR
  tests/tcg/s390x: Test MXDB and MXDBR

 target/s390x/helper.h|  2 +-
 target/s390x/tcg/fpu_helper.c|  5 +++--
 target/s390x/tcg/insn-data.h.inc |  4 ++--
 target/s390x/tcg/translate.c |  8 +---
 tests/tcg/s390x/Makefile.target  |  1 +
 tests/tcg/s390x/mxdb.c   | 30 ++
 6 files changed, 38 insertions(+), 12 deletions(-)
 create mode 100644 tests/tcg/s390x/mxdb.c

-- 
2.40.1

[PATCH 2/2] tests/tcg/s390x: Test MXDB and MXDBR

2023-06-01 Thread Ilya Leoshkevich

Add a small test to prevent regressions.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Ilya Leoshkevich 
---
 tests/tcg/s390x/Makefile.target |  1 +
 tests/tcg/s390x/mxdb.c  | 30 ++
 2 files changed, 31 insertions(+)
 create mode 100644 tests/tcg/s390x/mxdb.c

diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 3c239fdd082..d33960caa0a 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -35,6 +35,7 @@ TESTS+=chrl
 TESTS+=rxsbg
 TESTS+=ex-relative-long
 TESTS+=ex-branch
+TESTS+=mxdb
 
 cdsg: CFLAGS+=-pthread
 cdsg: LDFLAGS+=-pthread
diff --git a/tests/tcg/s390x/mxdb.c b/tests/tcg/s390x/mxdb.c
new file mode 100644
index 000..ae922559d3d
--- /dev/null
+++ b/tests/tcg/s390x/mxdb.c
@@ -0,0 +1,30 @@
+/*
+ * Test the MXDB and MXDBR instructions.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include 
+#include 
+
+int main(void)
+{
+union {
+double d[2];
+long double ld;
+} a;
+double b;
+
+a.d[0] = 1.2345;
+a.d[1] = 999;
+b = 6.789;
+asm("mxdb %[a],%[b]" : [a] "+f" (a.ld) : [b] "R" (b));
+assert(a.ld > 8.38 && a.ld < 8.39);
+
+a.d[0] = 1.2345;
+a.d[1] = 999;
+b = 6.789;
+asm("mxdbr %[a],%[b]" : [a] "+f" (a.ld) : [b] "f" (b));
+assert(a.ld > 8.38 && a.ld < 8.39);
+
+return EXIT_SUCCESS;
+}
-- 
2.40.1

[PATCH 1/2] target/s390x: Fix MXDB and MXDBR

2023-06-01 Thread Ilya Leoshkevich

These instructions multiply 64 bits by 64 bits, not 128 bits by 64 bits.

Reported-by: Tulio Magno Quites Machado Filho 
Fixes: 2b91240f95fd ("target/s390x: Use Int128 for passing float128")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Ilya Leoshkevich 
---
 target/s390x/helper.h| 2 +-
 target/s390x/tcg/fpu_helper.c| 5 +++--
 target/s390x/tcg/insn-data.h.inc | 4 ++--
 target/s390x/tcg/translate.c | 8 +---
 4 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 7529e725f29..6bc01df73d7 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -50,7 +50,7 @@ DEF_HELPER_FLAGS_3(meeb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(mdeb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(mdb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(mxb, TCG_CALL_NO_WG, i128, env, i128, i128)
-DEF_HELPER_FLAGS_3(mxdb, TCG_CALL_NO_WG, i128, env, i128, i64)
+DEF_HELPER_FLAGS_3(mxdb, TCG_CALL_NO_WG, i128, env, i64, i64)
 DEF_HELPER_FLAGS_2(ldeb, TCG_CALL_NO_WG, i64, env, i64)
 DEF_HELPER_FLAGS_3(ldxb, TCG_CALL_NO_WG, i64, env, i128, i32)
 DEF_HELPER_FLAGS_2(lxdb, TCG_CALL_NO_WG, i128, env, i64)
diff --git a/target/s390x/tcg/fpu_helper.c b/target/s390x/tcg/fpu_helper.c
index 0bdab5bcf71..57e58292833 100644
--- a/target/s390x/tcg/fpu_helper.c
+++ b/target/s390x/tcg/fpu_helper.c
@@ -321,10 +321,11 @@ Int128 HELPER(mxb)(CPUS390XState *env, Int128 a, Int128 b)
 }
 
 /* 128/64-bit FP multiplication */
-Int128 HELPER(mxdb)(CPUS390XState *env, Int128 a, uint64_t f2)
+Int128 HELPER(mxdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
 {
+float128 f1_128 = float64_to_float128(f1, >fpu_status);
 float128 ret = float64_to_float128(f2, >fpu_status);
-ret = float128_mul(ARG128(a), ret, >fpu_status);
+ret = float128_mul(f1_128, ret, >fpu_status);
 handle_exceptions(env, false, GETPC());
 return RET128(ret);
 }
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
index 937e18ea9d9..0a45dbbcda8 100644
--- a/target/s390x/tcg/insn-data.h.inc
+++ b/target/s390x/tcg/insn-data.h.inc
@@ -668,11 +668,11 @@
 F(0xb31c, MDBR,RRE,   Z,   f1, f2, new, f1, mdb, 0, IF_BFP)
 F(0xb34c, MXBR,RRE,   Z,   x1, x2, new_x, x1, mxb, 0, IF_BFP)
 F(0xb30c, MDEBR,   RRE,   Z,   f1, e2, new, f1, mdeb, 0, IF_BFP)
-F(0xb307, MXDBR,   RRE,   Z,   0, f2, x1, x1, mxdb, 0, IF_BFP)
+F(0xb307, MXDBR,   RRE,   Z,   f1, f2, new_x, x1, mxdb, 0, IF_BFP)
 F(0xed17, MEEB,RXE,   Z,   e1, m2_32u, new, e1, meeb, 0, IF_BFP)
 F(0xed1c, MDB, RXE,   Z,   f1, m2_64, new, f1, mdb, 0, IF_BFP)
 F(0xed0c, MDEB,RXE,   Z,   f1, m2_32u, new, f1, mdeb, 0, IF_BFP)
-F(0xed07, MXDB,RXE,   Z,   0, m2_64, x1, x1, mxdb, 0, IF_BFP)
+F(0xed07, MXDB,RXE,   Z,   f1, m2_64, new_x, x1, mxdb, 0, IF_BFP)
 /* MULTIPLY HALFWORD */
 C(0x4c00, MH,  RX_a,  Z,   r1_o, m2_16s, new, r1_32, mul, 0)
 C(0xe37c, MHY, RXY_a, GIE, r1_o, m2_16s, new, r1_32, mul, 0)
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index 3eb3708d551..3ac573dfce5 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -3421,7 +3421,7 @@ static DisasJumpType op_mxb(DisasContext *s, DisasOps *o)
 
 static DisasJumpType op_mxdb(DisasContext *s, DisasOps *o)
 {
-gen_helper_mxdb(o->out_128, cpu_env, o->in1_128, o->in2);
+gen_helper_mxdb(o->out_128, cpu_env, o->in1, o->in2);
 return DISAS_NEXT;
 }
 
@@ -5183,12 +5183,6 @@ static void prep_r1_P(DisasContext *s, DisasOps *o)
 }
 #define SPEC_prep_r1_P SPEC_r1_even
 
-static void prep_x1(DisasContext *s, DisasOps *o)
-{
-o->out_128 = load_freg_128(get_field(s, r1));
-}
-#define SPEC_prep_x1 SPEC_r1_f128
-
 /* == */
 /* The "Write OUTput" generators.  These generally perform some non-trivial
copy of data to TCG globals, or to main memory.  The trivial cases are
-- 
2.40.1

[PATCH v4 2/2] tests/tcg/aarch64: add DC CVA[D]P tests

2023-06-01 Thread Zhuojia Shen

Test execution of DC CVAP and DC CVADP instructions under user mode
emulation.

Signed-off-by: Zhuojia Shen 
Reviewed-by: Peter Maydell 
---
 tests/tcg/aarch64/Makefile.target | 11 ++
 tests/tcg/aarch64/dcpodp.c| 63 +++
 tests/tcg/aarch64/dcpop.c | 63 +++
 3 files changed, 137 insertions(+)
 create mode 100644 tests/tcg/aarch64/dcpodp.c
 create mode 100644 tests/tcg/aarch64/dcpop.c

diff --git a/tests/tcg/aarch64/Makefile.target 
b/tests/tcg/aarch64/Makefile.target
index 0315795487..3430fd3cd8 100644
--- a/tests/tcg/aarch64/Makefile.target
+++ b/tests/tcg/aarch64/Makefile.target
@@ -21,12 +21,23 @@ config-cc.mak: Makefile
$(quiet-@)( \
$(call cc-option,-march=armv8.1-a+sve,  CROSS_CC_HAS_SVE); \
$(call cc-option,-march=armv8.1-a+sve2, CROSS_CC_HAS_SVE2); 
\
+   $(call cc-option,-march=armv8.2-a,  
CROSS_CC_HAS_ARMV8_2); \
$(call cc-option,-march=armv8.3-a,  
CROSS_CC_HAS_ARMV8_3); \
+   $(call cc-option,-march=armv8.5-a,  
CROSS_CC_HAS_ARMV8_5); \
$(call cc-option,-mbranch-protection=standard,  
CROSS_CC_HAS_ARMV8_BTI); \
$(call cc-option,-march=armv8.5-a+memtag,   
CROSS_CC_HAS_ARMV8_MTE); \
$(call cc-option,-march=armv9-a+sme,
CROSS_CC_HAS_ARMV9_SME)) 3> config-cc.mak
 -include config-cc.mak
 
+ifneq ($(CROSS_CC_HAS_ARMV8_2),)
+AARCH64_TESTS += dcpop
+dcpop: CFLAGS += -march=armv8.2-a
+endif
+ifneq ($(CROSS_CC_HAS_ARMV8_5),)
+AARCH64_TESTS += dcpodp
+dcpodp: CFLAGS += -march=armv8.5-a
+endif
+
 # Pauth Tests
 ifneq ($(CROSS_CC_HAS_ARMV8_3),)
 AARCH64_TESTS += pauth-1 pauth-2 pauth-4 pauth-5
diff --git a/tests/tcg/aarch64/dcpodp.c b/tests/tcg/aarch64/dcpodp.c
new file mode 100644
index 00..2cf7df2e07
--- /dev/null
+++ b/tests/tcg/aarch64/dcpodp.c
@@ -0,0 +1,63 @@
+/*
+ * Test execution of DC CVADP instruction.
+ *
+ * Copyright (c) 2023 Zhuojia Shen 
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#ifndef HWCAP2_DCPODP
+#define HWCAP2_DCPODP (1 << 0)
+#endif
+
+bool should_fail = false;
+
+static void signal_handler(int sig, siginfo_t *si, void *data)
+{
+ucontext_t *uc = (ucontext_t *)data;
+
+if (should_fail) {
+uc->uc_mcontext.pc += 4;
+} else {
+exit(EXIT_FAILURE);
+}
+}
+
+static int do_dc_cvadp(void)
+{
+struct sigaction sa = {
+.sa_flags = SA_SIGINFO,
+.sa_sigaction = signal_handler,
+};
+
+sigemptyset(_mask);
+if (sigaction(SIGSEGV, , NULL) < 0) {
+perror("sigaction");
+return EXIT_FAILURE;
+}
+
+asm volatile("dc cvadp, %0\n\t" :: "r"());
+
+should_fail = true;
+asm volatile("dc cvadp, %0\n\t" :: "r"(NULL));
+should_fail = false;
+
+return EXIT_SUCCESS;
+}
+
+int main(void)
+{
+if (getauxval(AT_HWCAP2) & HWCAP2_DCPODP) {
+return do_dc_cvadp();
+} else {
+printf("SKIP: no HWCAP2_DCPODP on this system\n");
+return EXIT_SUCCESS;
+}
+}
diff --git a/tests/tcg/aarch64/dcpop.c b/tests/tcg/aarch64/dcpop.c
new file mode 100644
index 00..a332a804a4
--- /dev/null
+++ b/tests/tcg/aarch64/dcpop.c
@@ -0,0 +1,63 @@
+/*
+ * Test execution of DC CVAP instruction.
+ *
+ * Copyright (c) 2023 Zhuojia Shen 
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#ifndef HWCAP_DCPOP
+#define HWCAP_DCPOP (1 << 16)
+#endif
+
+bool should_fail = false;
+
+static void signal_handler(int sig, siginfo_t *si, void *data)
+{
+ucontext_t *uc = (ucontext_t *)data;
+
+if (should_fail) {
+uc->uc_mcontext.pc += 4;
+} else {
+exit(EXIT_FAILURE);
+}
+}
+
+static int do_dc_cvap(void)
+{
+struct sigaction sa = {
+.sa_flags = SA_SIGINFO,
+.sa_sigaction = signal_handler,
+};
+
+sigemptyset(_mask);
+if (sigaction(SIGSEGV, , NULL) < 0) {
+perror("sigaction");
+return EXIT_FAILURE;
+}
+
+asm volatile("dc cvap, %0\n\t" :: "r"());
+
+should_fail = true;
+asm volatile("dc cvap, %0\n\t" :: "r"(NULL));
+should_fail = false;
+
+return EXIT_SUCCESS;
+}
+
+int main(void)
+{
+if (getauxval(AT_HWCAP) & HWCAP_DCPOP) {
+return do_dc_cvap();
+} else {
+printf("SKIP: no HWCAP_DCPOP on this system\n");
+return EXIT_SUCCESS;
+}
+}
-- 
2.40.1

[PATCH v4 1/2] target/arm: allow DC CVA[D]P in user mode emulation

2023-06-01 Thread Zhuojia Shen

DC CVAP and DC CVADP instructions can be executed in EL0 on Linux,
either directly when SCTLR_EL1.UCI == 1 or emulated by the kernel (see
user_cache_maint_handler() in arch/arm64/kernel/traps.c).

This patch enables execution of the two instructions in user mode
emulation.

Signed-off-by: Zhuojia Shen 
Reviewed-by: Peter Maydell 
---
 target/arm/helper.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 0b7fd2e7e6..d4bee43bd0 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -7405,7 +7405,6 @@ static const ARMCPRegInfo rndr_reginfo[] = {
   .access = PL0_R, .readfn = rndr_readfn },
 };
 
-#ifndef CONFIG_USER_ONLY
 static void dccvap_writefn(CPUARMState *env, const ARMCPRegInfo *opaque,
   uint64_t value)
 {
@@ -7420,6 +7419,7 @@ static void dccvap_writefn(CPUARMState *env, const 
ARMCPRegInfo *opaque,
 /* This won't be crossing page boundaries */
 haddr = probe_read(env, vaddr, dline_size, mem_idx, GETPC());
 if (haddr) {
+#ifndef CONFIG_USER_ONLY
 
 ram_addr_t offset;
 MemoryRegion *mr;
@@ -7430,6 +7430,7 @@ static void dccvap_writefn(CPUARMState *env, const 
ARMCPRegInfo *opaque,
 if (mr) {
 memory_region_writeback(mr, offset, dline_size);
 }
+#endif /*CONFIG_USER_ONLY*/
 }
 }
 
@@ -7448,7 +7449,6 @@ static const ARMCPRegInfo dcpodp_reg[] = {
   .fgt = FGT_DCCVADP,
   .accessfn = aa64_cacheop_poc_access, .writefn = dccvap_writefn },
 };
-#endif /*CONFIG_USER_ONLY*/
 
 static CPAccessResult access_aa64_tid5(CPUARMState *env, const ARMCPRegInfo 
*ri,
bool isread)
@@ -9092,7 +9092,6 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 if (cpu_isar_feature(aa64_tlbios, cpu)) {
 define_arm_cp_regs(cpu, tlbios_reginfo);
 }
-#ifndef CONFIG_USER_ONLY
 /* Data Cache clean instructions up to PoP */
 if (cpu_isar_feature(aa64_dcpop, cpu)) {
 define_one_arm_cp_reg(cpu, dcpop_reg);
@@ -9101,7 +9100,6 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 define_one_arm_cp_reg(cpu, dcpodp_reg);
 }
 }
-#endif /*CONFIG_USER_ONLY*/
 
 /*
  * If full MTE is enabled, add all of the system registers.
-- 
2.40.1

[PATCH v4 0/2] target/arm: allow DC CVA[D]P in user mode emulation

2023-06-01 Thread Zhuojia Shen

This patch series enables executing DC CVAP and DC CVADP instructions in
AArch64 Linux user mode emulation and adds proper TCG tests.

Changes in v4:
- Add copyright and license header in new files

Changes in v3:
- Fix typo of HWCAP2_DCPODP
- Split tests into a separate patch
- Remove unnecessary handling of SIGILL in tests
- Merge 4 tests into 2

Changes in v2:
- Fix code to deal with unmapped address
- Add tests for DC'ing unmapped address

Zhuojia Shen (2):
  target/arm: allow DC CVA[D]P in user mode emulation
  tests/tcg/aarch64: add DC CVA[D]P tests

 target/arm/helper.c   |  6 +--
 tests/tcg/aarch64/Makefile.target | 11 ++
 tests/tcg/aarch64/dcpodp.c| 63 +++
 tests/tcg/aarch64/dcpop.c | 63 +++
 4 files changed, 139 insertions(+), 4 deletions(-)
 create mode 100644 tests/tcg/aarch64/dcpodp.c
 create mode 100644 tests/tcg/aarch64/dcpop.c

-- 
2.40.1

[PULL 04/21] test-cutils: Use g_assert_cmpuint where appropriate

2023-06-01 Thread Eric Blake

When debugging test failures, seeing unsigned values as large positive
values rather than negative values matters (assuming glib 2.78+; given
that I just fixed a bug in glib 2.76 [1] where g_assert_cmpuint
displays signed instead of unsigned values).  No impact when the test
is passing, but using a consistent style will matter more in upcoming
test additions.  Also, some tests are better with cmphex.

While at it, fix some spacing and minor typing issues spotted nearby.

[1] https://gitlab.gnome.org/GNOME/glib/-/issues/2997

Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
Message-Id: <20230522190441.64278-3-ebl...@redhat.com>
---
 tests/unit/test-cutils.c | 148 +++
 1 file changed, 74 insertions(+), 74 deletions(-)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index 0202ac0d5b3..38bd3990207 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -39,7 +39,7 @@ static void test_parse_uint_null(void)
 r = parse_uint(NULL, , , 0);

 g_assert_cmpint(r, ==, -EINVAL);
-g_assert_cmpint(i, ==, 0);
+g_assert_cmpuint(i, ==, 0);
 g_assert_null(endptr);
 }

@@ -54,7 +54,7 @@ static void test_parse_uint_empty(void)
 r = parse_uint(str, , , 0);

 g_assert_cmpint(r, ==, -EINVAL);
-g_assert_cmpint(i, ==, 0);
+g_assert_cmpuint(i, ==, 0);
 g_assert_true(endptr == str);
 }

@@ -69,7 +69,7 @@ static void test_parse_uint_whitespace(void)
 r = parse_uint(str, , , 0);

 g_assert_cmpint(r, ==, -EINVAL);
-g_assert_cmpint(i, ==, 0);
+g_assert_cmpuint(i, ==, 0);
 g_assert_true(endptr == str);
 }

@@ -85,7 +85,7 @@ static void test_parse_uint_invalid(void)
 r = parse_uint(str, , , 0);

 g_assert_cmpint(r, ==, -EINVAL);
-g_assert_cmpint(i, ==, 0);
+g_assert_cmpuint(i, ==, 0);
 g_assert_true(endptr == str);
 }

@@ -101,7 +101,7 @@ static void test_parse_uint_trailing(void)
 r = parse_uint(str, , , 0);

 g_assert_cmpint(r, ==, 0);
-g_assert_cmpint(i, ==, 123);
+g_assert_cmpuint(i, ==, 123);
 g_assert_true(endptr == str + 3);
 }

@@ -116,7 +116,7 @@ static void test_parse_uint_correct(void)
 r = parse_uint(str, , , 0);

 g_assert_cmpint(r, ==, 0);
-g_assert_cmpint(i, ==, 123);
+g_assert_cmpuint(i, ==, 123);
 g_assert_true(endptr == str + strlen(str));
 }

@@ -131,7 +131,7 @@ static void test_parse_uint_octal(void)
 r = parse_uint(str, , , 0);

 g_assert_cmpint(r, ==, 0);
-g_assert_cmpint(i, ==, 0123);
+g_assert_cmpuint(i, ==, 0123);
 g_assert_true(endptr == str + strlen(str));
 }

@@ -146,7 +146,7 @@ static void test_parse_uint_decimal(void)
 r = parse_uint(str, , , 10);

 g_assert_cmpint(r, ==, 0);
-g_assert_cmpint(i, ==, 123);
+g_assert_cmpuint(i, ==, 123);
 g_assert_true(endptr == str + strlen(str));
 }

@@ -162,7 +162,7 @@ static void test_parse_uint_llong_max(void)
 r = parse_uint(str, , , 0);

 g_assert_cmpint(r, ==, 0);
-g_assert_cmpint(i, ==, (unsigned long long)LLONG_MAX + 1);
+g_assert_cmpuint(i, ==, (unsigned long long)LLONG_MAX + 1);
 g_assert_true(endptr == str + strlen(str));

 g_free(str);
@@ -179,7 +179,7 @@ static void test_parse_uint_overflow(void)
 r = parse_uint(str, , , 0);

 g_assert_cmpint(r, ==, -ERANGE);
-g_assert_cmpint(i, ==, ULLONG_MAX);
+g_assert_cmpuint(i, ==, ULLONG_MAX);
 g_assert_true(endptr == str + strlen(str));
 }

@@ -194,7 +194,7 @@ static void test_parse_uint_negative(void)
 r = parse_uint(str, , , 0);

 g_assert_cmpint(r, ==, -ERANGE);
-g_assert_cmpint(i, ==, 0);
+g_assert_cmpuint(i, ==, 0);
 g_assert_true(endptr == str + strlen(str));
 }

@@ -208,7 +208,7 @@ static void test_parse_uint_full_trailing(void)
 r = parse_uint_full(str, , 0);

 g_assert_cmpint(r, ==, -EINVAL);
-g_assert_cmpint(i, ==, 0);
+g_assert_cmpuint(i, ==, 0);
 }

 static void test_parse_uint_full_correct(void)
@@ -220,7 +220,7 @@ static void test_parse_uint_full_correct(void)
 r = parse_uint_full(str, , 0);

 g_assert_cmpint(r, ==, 0);
-g_assert_cmpint(i, ==, 123);
+g_assert_cmpuint(i, ==, 123);
 }

 static void test_qemu_strtoi_correct(void)
@@ -428,7 +428,7 @@ static void test_qemu_strtoi_underflow(void)
 int res = 999;
 int err;

-err  = qemu_strtoi(str, , 0, );
+err = qemu_strtoi(str, , 0, );

 g_assert_cmpint(err, ==, -ERANGE);
 g_assert_cmpint(res, ==, INT_MIN);
@@ -479,10 +479,10 @@ static void test_qemu_strtoi_full_null(void)
 static void test_qemu_strtoi_full_empty(void)
 {
 const char *str = "";
-int res = 999L;
+int res = 999;
 int err;

-err =  qemu_strtoi(str, NULL, 0, );
+err = qemu_strtoi(str, NULL, 0, );

 g_assert_cmpint(err, ==, -EINVAL);
 }
@@ -728,7 +728,7 @@ static void test_qemu_strtoui_underflow(void)
 unsigned int res = 999;
 int err;

-err  = qemu_strtoui(str, , 0, );
+err = qemu_strtoui(str, , 0, );

[PULL 20/21] cutils: Improve qemu_strtod* error paths

2023-06-01 Thread Eric Blake

Previous patches changed all integral qemu_strto*() error paths to
guarantee that *value is never left uninitialized.  Do likewise for
qemu_strtod.  Also, tighten qemu_strtod_finite() to never return a
non-finite value (prior to this patch, we were rejecting "inf" with
-EINVAL and unspecified result 0.0, but failing "9e999" with -ERANGE
and HUGE_VAL - which is infinite on IEEE machines - despite our
function claiming to recognize only finite values).

Auditing callers, we have no external callers of qemu_strtod, and
among the callers of qemu_strtod_finite:

- qapi/qobject-input-visitor.c:qobject_input_type_number_keyval() and
  qapi/string-input-visitor.c:parse_type_number() which reject all
  errors (does not matter what we store)

- utils/cutils.c:do_strtosz() incorrectly assumes that *endptr points
  to '.' on all failures (that is, it is not distinguishing between
  EINVAL and ERANGE; and therefore still does the WRONG THING for
  "9.9e999".  The change here does not entirely fix that (a later
  patch will tackle this more systematically), but at least it fixes
  the read-out-of-bounds first diagnosed in
  https://gitlab.com/qemu-project/qemu/-/issues/1629

- our testsuite, which we can update to match what we document

Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
CC: qemu-sta...@nongnu.org
Message-Id: <20230522190441.64278-19-ebl...@redhat.com>
---
 tests/unit/test-cutils.c | 63 +++-
 util/cutils.c| 32 +++-
 2 files changed, 55 insertions(+), 40 deletions(-)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index c2dbed9eda9..0a589567461 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -2868,7 +2868,8 @@ static void test_qemu_strtod_einval(void)
 res = 999;
 err = qemu_strtod(str, , );
 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpfloat(res, ==, 999.0);
+g_assert_cmpfloat(res, ==, 0.0);
+g_assert_false(signbit(res));
 g_assert_null(endptr);

 /* not recognizable */
@@ -3101,7 +3102,8 @@ static void test_qemu_strtod_finite_einval(void)
 res = 999;
 err = qemu_strtod_finite(str, , );
 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpfloat(res, ==, 999.0);
+g_assert_cmpfloat(res, ==, 0.0);
+g_assert_false(signbit(res));
 g_assert_true(endptr == str);

 /* NULL */
@@ -3110,7 +3112,8 @@ static void test_qemu_strtod_finite_einval(void)
 res = 999;
 err = qemu_strtod_finite(str, , );
 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpfloat(res, ==, 999.0);
+g_assert_cmpfloat(res, ==, 0.0);
+g_assert_false(signbit(res));
 g_assert_null(endptr);

 /* not recognizable */
@@ -3119,7 +3122,8 @@ static void test_qemu_strtod_finite_einval(void)
 res = 999;
 err = qemu_strtod_finite(str, , );
 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpfloat(res, ==, 999.0);
+g_assert_cmpfloat(res, ==, 0.0);
+g_assert_false(signbit(res));
 g_assert_true(endptr == str);
 }

@@ -3130,24 +3134,26 @@ static void test_qemu_strtod_finite_erange(void)
 int err;
 double res;

-/* overflow */
+/* overflow turns into EINVAL */
 str = "9e999";
 endptr = "somewhere";
 res = 999;
 err = qemu_strtod_finite(str, , );
-g_assert_cmpint(err, ==, -ERANGE);
-g_assert_cmpfloat(res, ==, HUGE_VAL);
-g_assert_true(endptr == str + 5);
+g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpfloat(res, ==, 0.0);
+g_assert_false(signbit(res));
+g_assert_true(endptr == str);

 str = "-9e+999";
 endptr = "somewhere";
 res = 999;
 err = qemu_strtod_finite(str, , );
-g_assert_cmpint(err, ==, -ERANGE);
-g_assert_cmpfloat(res, ==, -HUGE_VAL);
-g_assert_true(endptr == str + 7);
+g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpfloat(res, ==, 0.0);
+g_assert_false(signbit(res));
+g_assert_true(endptr == str);

-/* underflow */
+/* underflow is still possible */
 str = "-9e-999";
 endptr = "somewhere";
 res = 999;
@@ -3172,7 +3178,8 @@ static void test_qemu_strtod_finite_nonfinite(void)
 res = 999;
 err = qemu_strtod_finite(str, , );
 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpfloat(res, ==, 999.0);
+g_assert_cmpfloat(res, ==, 0.0);
+g_assert_false(signbit(res));
 g_assert_true(endptr == str);

 str = "-infinity";
@@ -3180,7 +3187,8 @@ static void test_qemu_strtod_finite_nonfinite(void)
 res = 999;
 err = qemu_strtod_finite(str, , );
 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpfloat(res, ==, 999.0);
+g_assert_cmpfloat(res, ==, 0.0);
+g_assert_false(signbit(res));
 g_assert_true(endptr == str);

 /* not a number */
@@ -3189,7 +3197,8 @@ static void test_qemu_strtod_finite_nonfinite(void)
 res = 999;
 err = qemu_strtod_finite(str, , );
 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpfloat(res, ==, 999.0);
+

[PULL 21/21] cutils: Improve qemu_strtosz handling of fractions

2023-06-01 Thread Eric Blake

We have several limitations and bugs worth fixing; they are
inter-related enough that it is not worth splitting this patch into
smaller pieces:

* ".5k" should work to specify 512, just as "0.5k" does
* "1.k" and "1." + "9"*50 + "k" should both produce the same
  result of 2048 after rounding
* "1." + "0"*350 + "1B" should not be treated the same as "1.0B";
  underflow in the fraction should not be lost
* "7.99e99" and "7.99e999" look similar, but our code was doing a
  read-out-of-bounds on the latter because it was not expecting ERANGE
  due to overflow. While we document that scientific notation is not
  supported, and the previous patch actually fixed
  qemu_strtod_finite() to no longer return ERANGE overflows, it is
  easier to pre-filter than to try and determine after the fact if
  strtod() consumed more than we wanted.  Note that this is a
  low-level semantic change (when endptr is not NULL, we can now
  successfully parse with a scale of 'E' and then report trailing
  junk, instead of failing outright with EINVAL); but an earlier
  commit already argued that this is not a high-level semantic change
  since the only caller passing in a non-NULL endptr also checks that
  the tail is whitespace-only.

Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1629
Fixes: cf923b78 ("utils: Improve qemu_strtosz() to have 64 bits of precision", 
6.0.0)
Fixes: 7625a1ed ("utils: Use fixed-point arithmetic in qemu_strtosz", 6.0.0)
Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
Message-Id: <20230522190441.64278-20-ebl...@redhat.com>
[eblake: tweak function comment for accuracy]
---
 tests/unit/test-cutils.c | 50 +-
 util/cutils.c| 90 ++--
 2 files changed, 87 insertions(+), 53 deletions(-)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index 0a589567461..1db411489f0 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -3408,19 +3408,18 @@ static void test_qemu_strtosz_float(void)
 /* An empty fraction tail is tolerated */
 do_strtosz("1.k", 0, 1024, 3);

-/* FIXME An empty fraction head should be tolerated */
-do_strtosz(" .5k", -EINVAL /* FIXME 0 */, 0 /* FIXME 512 */,
-   0 /* FIXME 4 */);
+/* An empty fraction head is tolerated */
+do_strtosz(" .5k", 0, 512, 4);

 /* For convenience, we permit values that are not byte-exact */
 do_strtosz("12.345M", 0, (uint64_t) (12.345 * MiB + 0.5), 7);

-/* FIXME Fraction tail should round correctly */
+/* Fraction tail can round up */
 do_strtosz("1.k", 0, 2048, 7);
 do_strtosz("1.k", 0,
-   1024 /* FIXME 2048 */, 55);
+   2048, 55);

-/* FIXME ERANGE underflow in the fraction tail should not matter for 'k' */
+/* ERANGE underflow in the fraction tail does not matter for 'k' */
 do_strtosz("1."
"00"
"00"
@@ -3429,7 +3428,7 @@ static void test_qemu_strtosz_float(void)
"00"
"00"
"00"
-   "1k", 0, 1 /* FIXME 1024 */, 354);
+   "1k", 0, 1024, 354);
 }

 static void test_qemu_strtosz_invalid(void)
@@ -3453,10 +3452,9 @@ static void test_qemu_strtosz_invalid(void)
 do_strtosz("1.1B", -EINVAL, 0, 0);
 do_strtosz("1.1", -EINVAL, 0, 0);

-/* FIXME underflow in the fraction tail should matter for 'B' */
+/* 'B' cannot have any nonzero fraction, even with rounding or underflow */
 do_strtosz("1.1B", -EINVAL, 0, 0);
-do_strtosz("1.0001B", 0 /* FIXME -EINVAL */,
-   1 /* FIXME 0 */, 23 /* FIXME 0 */);
+do_strtosz("1.0001B", -EINVAL, 0, 0);
 do_strtosz("1."
"00"
"00"
@@ -3465,8 +3463,7 @@ static void test_qemu_strtosz_invalid(void)
"00"
"00"
"00"
-   "1B", 0 /* FIXME -EINVAL */, 1 /* FIXME 0 */,
-   354 /* FIXME 0 */);
+   "1B", -EINVAL, 0, 0);

 /* No hex fractions */
 do_strtosz("0x1.8k", -EINVAL, 0, 0);
@@ -3512,28 +3509,20 @@ static void test_qemu_strtosz_trailing(void)
 do_strtosz_full("123-45", qemu_strtosz, 0, 123, 3, -EINVAL, 0);
 do_strtosz_full(" 123 - 45", qemu_strtosz, 0, 123, 4, -EINVAL, 0);

-/* FIXME should stop parse after 'e'. No floating point exponents */
-do_strtosz_full("1.5e1k", qemu_strtosz, -EINVAL /* FIXME

[PULL 15/21] numa: Check for qemu_strtosz_MiB error

2023-06-01 Thread Eric Blake

As shown in the previous commit, qemu_strtosz_MiB sometimes leaves the
result value untouched (we have to audit further to learn that in that
case, the QAPI generator says that visit_type_NumaOptions() will have
zero-initialized it), and sometimes leaves it with the value of a
partial parse before -EINVAL occurs because of trailing garbage.
Rather than blindly treating any string the user may throw at us as
valid, we should check for parse failures.

Fixes: cc001888 ("numa: fixup parsed NumaNodeOptions earlier", v2.11.0)
Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
Message-Id: <20230522190441.64278-14-ebl...@redhat.com>
---
 hw/core/numa.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/hw/core/numa.c b/hw/core/numa.c
index d8d36b16d80..f08956ddb0f 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -531,10 +531,17 @@ static int parse_numa(void *opaque, QemuOpts *opts, Error 
**errp)
 /* Fix up legacy suffix-less format */
 if ((object->type == NUMA_OPTIONS_TYPE_NODE) && object->u.node.has_mem) {
 const char *mem_str = qemu_opt_get(opts, "mem");
-qemu_strtosz_MiB(mem_str, NULL, >u.node.mem);
+int ret = qemu_strtosz_MiB(mem_str, NULL, >u.node.mem);
+
+if (ret < 0) {
+error_setg_errno(, -ret, "could not parse memory size '%s'",
+ mem_str);
+}
 }

-set_numa_options(ms, object, );
+if (!err) {
+set_numa_options(ms, object, );
+}

 qapi_free_NumaOptions(object);
 if (err) {
-- 
2.40.1

[PULL 10/21] cutils: Allow NULL endptr in parse_uint()

2023-06-01 Thread Eric Blake

All the qemu_strto*() functions permit a NULL endptr, just like their
libc counterparts, leaving parse_uint() as the oddball that caused
SEGFAULT on NULL and required the user to call parse_uint_full()
instead.  Relax things for consistency, even though the testsuite is
the only impacted caller.  Add one more unit test to ensure even
parse_uint_full(NULL, 0, ) works.  This also fixes our code to
uniformly favor EINVAL over ERANGE when both apply.

Also fixes a doc mismatch @v vs. a parameter named value.

Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
Message-Id: <20230522190441.64278-9-ebl...@redhat.com>
---
 tests/unit/test-cutils.c | 18 --
 util/cutils.c| 34 --
 2 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index 65041bd3974..20ab0ecb673 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -270,14 +270,26 @@ static void test_parse_uint_full_correct(void)

 static void test_parse_uint_full_erange_junk(void)
 {
-/* FIXME - inconsistent with qemu_strto* which favors EINVAL */
+/* EINVAL has priority over ERANGE */
 uint64_t i = 999;
 const char *str = "-2junk";
 int r;

 r = parse_uint_full(str, 0, );

-g_assert_cmpint(r, ==, -ERANGE /* FIXME -EINVAL */);
+g_assert_cmpint(r, ==, -EINVAL);
+g_assert_cmpuint(i, ==, 0);
+}
+
+static void test_parse_uint_full_null(void)
+{
+uint64_t i = 999;
+const char *str = NULL;
+int r;
+
+r = parse_uint_full(str, 0, );
+
+g_assert_cmpint(r, ==, -EINVAL);
 g_assert_cmpuint(i, ==, 0);
 }

@@ -3328,6 +3340,8 @@ int main(int argc, char **argv)
 test_parse_uint_full_correct);
 g_test_add_func("/cutils/parse_uint_full/erange_junk",
 test_parse_uint_full_erange_junk);
+g_test_add_func("/cutils/parse_uint_full/null",
+test_parse_uint_full_null);

 /* qemu_strtoi() tests */
 g_test_add_func("/cutils/qemu_strtoi/correct",
diff --git a/util/cutils.c b/util/cutils.c
index 0e279a531aa..56a2aced8d4 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -722,8 +722,7 @@ const char *qemu_strchrnul(const char *s, int c)
  * parse_uint:
  *
  * @s: String to parse
- * @endptr: Destination for pointer to first character not consumed, must
- * not be %NULL
+ * @endptr: Destination for pointer to first character not consumed
  * @base: integer base, between 2 and 36 inclusive, or 0
  * @value: Destination for parsed integer value
  *
@@ -737,7 +736,8 @@ const char *qemu_strchrnul(const char *s, int c)
  *
  * Set *@endptr to point right beyond the parsed integer (even if the integer
  * overflows or is negative, all digits will be parsed and *@endptr will
- * point right beyond them).
+ * point right beyond them).  If @endptr is %NULL, any trailing character
+ * instead causes a result of -EINVAL with *@value of 0.
  *
  * If the integer is negative, set *@value to 0, and return -ERANGE.
  * (If you want to allow negative numbers that wrap around within
@@ -784,7 +784,12 @@ int parse_uint(const char *s, const char **endptr, int 
base, uint64_t *value)

 out:
 *value = val;
-*endptr = endp;
+if (endptr) {
+*endptr = endp;
+} else if (s && *endp) {
+r = -EINVAL;
+*value = 0;
+}
 return r;
 }

@@ -795,28 +800,13 @@ out:
  * @base: integer base, between 2 and 36 inclusive, or 0
  * @value: Destination for parsed integer value
  *
- * Parse unsigned integer from entire string
+ * Parse unsigned integer from entire string, rejecting any trailing slop.
  *
- * Have the same behavior of parse_uint(), but with an additional
- * check for additional data after the parsed number. If extra
- * characters are present after a non-overflowing parsed number, the
- * function will return -EINVAL, and *@v will be set to 0.
+ * Shorthand for parse_uint(s, NULL, base, value).
  */
 int parse_uint_full(const char *s, int base, uint64_t *value)
 {
-const char *endp;
-int r;
-
-r = parse_uint(s, , base, value);
-if (r < 0) {
-return r;
-}
-if (*endp) {
-*value = 0;
-return -EINVAL;
-}
-
-return 0;
+return parse_uint(s, NULL, base, value);
 }

 int qemu_parse_fd(const char *param)
-- 
2.40.1

[PULL 17/21] cutils: Set value in all qemu_strtosz* error paths

2023-06-01 Thread Eric Blake

Making callers determine whether or not *value was populated on error
is not nice for usability.  Pre-patch, we have unit tests that check
that *result is left unchanged on most EINVAL errors and set to 0 on
many ERANGE errors.  This is subtly different from libc strtoumax()
behavior which returns UINT64_MAX on ERANGE errors, as well as
different from our parse_uint() which slams to 0 on EINVAL on the
grounds that we want our functions to be harder to mis-use than
strtoumax().

Let's audit callers:

- hw/core/numa.c:parse_numa() fixed in the previous patch to check for
  errors

- migration/migration-hmp-cmds.c:hmp_migrate_set_parameter(),
  monitor/hmp.c:monitor_parse_arguments(),
  qapi/opts-visitor.c:opts_type_size(),
  qapi/qobject-input-visitor.c:qobject_input_type_size_keyval(),
  qemu-img.c:cvtnum_full(), qemu-io-cmds.c:cvtnum(),
  target/i386/cpu.c:x86_cpu_parse_featurestr(), and
  util/qemu-option.c:parse_option_size() appear to reject all failures
  (although some with distinct messages for ERANGE as opposed to
  EINVAL), so it doesn't matter what is in the value parameter on
  error.

- All remaining callers are in the testsuite, where we can tweak our
  expectations to match our new desired behavior.

Advancing to the end of the string parsed on overflow (ERANGE), while
still returning 0, makes sense (UINT64_MAX as a size is unlikely to be
useful); likewise, our size parsing code is complex enough that it's
easier to always return 0 when endptr is NULL but trailing garbage was
found, rather than trying to return the value of the prefix actually
parsed (no current caller cared about the value of the prefix).

Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
Message-Id: <20230522190441.64278-16-ebl...@redhat.com>
---
 tests/unit/test-cutils.c | 106 +++
 util/cutils.c|  17 +--
 2 files changed, 63 insertions(+), 60 deletions(-)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index a629ef2ea39..2189ebc92f3 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -3396,7 +3396,7 @@ static void test_qemu_strtosz_float(void)
 do_strtosz("1.k", 0, 1024, 3);

 /* FIXME An empty fraction head should be tolerated */
-do_strtosz(" .5k", -EINVAL /* FIXME 0 */, 0xbaadf00d /* FIXME 512 */,
+do_strtosz(" .5k", -EINVAL /* FIXME 0 */, 0 /* FIXME 512 */,
0 /* FIXME 4 */);

 /* For convenience, we permit values that are not byte-exact */
@@ -3421,29 +3421,29 @@ static void test_qemu_strtosz_float(void)

 static void test_qemu_strtosz_invalid(void)
 {
-do_strtosz(NULL, -EINVAL, 0xbaadf00d, 0);
+do_strtosz(NULL, -EINVAL, 0, 0);

 /* Must parse at least one digit */
-do_strtosz("", -EINVAL, 0xbaadf00d, 0);
-do_strtosz(" \t ", -EINVAL, 0xbaadf00d, 0);
-do_strtosz(".", -EINVAL, 0xbaadf00d, 0);
-do_strtosz(" .", -EINVAL, 0xbaadf00d, 0);
-do_strtosz(" .k", -EINVAL, 0xbaadf00d, 0);
-do_strtosz("inf", -EINVAL, 0xbaadf00d, 0);
-do_strtosz("NaN", -EINVAL, 0xbaadf00d, 0);
+do_strtosz("", -EINVAL, 0, 0);
+do_strtosz(" \t ", -EINVAL, 0, 0);
+do_strtosz(".", -EINVAL, 0, 0);
+do_strtosz(" .", -EINVAL, 0, 0);
+do_strtosz(" .k", -EINVAL, 0, 0);
+do_strtosz("inf", -EINVAL, 0, 0);
+do_strtosz("NaN", -EINVAL, 0, 0);

 /* Lone suffix is not okay */
-do_strtosz("k", -EINVAL, 0xbaadf00d, 0);
-do_strtosz(" M", -EINVAL, 0xbaadf00d, 0);
+do_strtosz("k", -EINVAL, 0, 0);
+do_strtosz(" M", -EINVAL, 0, 0);

 /* Fractional values require scale larger than bytes */
-do_strtosz("1.1B", -EINVAL, 0xbaadf00d, 0);
-do_strtosz("1.1", -EINVAL, 0xbaadf00d, 0);
+do_strtosz("1.1B", -EINVAL, 0, 0);
+do_strtosz("1.1", -EINVAL, 0, 0);

 /* FIXME underflow in the fraction tail should matter for 'B' */
-do_strtosz("1.1B", -EINVAL, 0xbaadf00d, 0);
+do_strtosz("1.1B", -EINVAL, 0, 0);
 do_strtosz("1.0001B", 0 /* FIXME -EINVAL */,
-   1 /* FIXME 0xbaadf00d */, 23 /* FIXME 0 */);
+   1 /* FIXME 0 */, 23 /* FIXME 0 */);
 do_strtosz("1."
"00"
"00"
@@ -3452,62 +3452,60 @@ static void test_qemu_strtosz_invalid(void)
"00"
"00"
"00"
-   "1B", 0 /* FIXME -EINVAL */, 1 /* FIXME 0xbaadf00d */,
+   "1B", 0 /* FIXME -EINVAL */, 1 /* FIXME 0 */,
354 /* FIXME 0 */);

 /* No hex fractions */
-do_strtosz("0x1.8k", -EINVAL, 0xbaadf00d, 0);
-do_strtosz("0x1.k", -EINVAL, 0xbaadf00d, 0);
+do_strtosz("0x1.8k", -EINVAL, 0, 0);
+do_strtosz("0x1.k", -EINVAL, 0, 0);

 /* No hex suffixes */
-do_strtosz("0x18M",

[PULL 09/21] cutils: Adjust signature of parse_uint[_full]

2023-06-01 Thread Eric Blake

It's already confusing that we have two very similar functions for
wrapping the parse of a 64-bit unsigned value, differing mainly on
whether they permit leading '-'.  Adjust the signature of parse_uint()
and parse_uint_full() to be like all of qemu_strto*(): put the result
parameter last, use the same types (uint64_t and unsigned long long
have the same width, but are not always the same type), and mark
endptr const (this latter change only affects the rare caller of
parse_uint).  Adjust all callers in the tree.

Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
Message-Id: <20230522190441.64278-8-ebl...@redhat.com>
---
 include/qemu/cutils.h |   5 +-
 audio/audio_legacy.c  |   4 +-
 block/gluster.c   |   4 +-
 block/nfs.c   |   4 +-
 blockdev.c|   4 +-
 contrib/ivshmem-server/main.c |   4 +-
 qapi/opts-visitor.c   |  10 +--
 tests/unit/test-cutils.c  | 119 +++---
 ui/vnc.c  |   4 +-
 util/cutils.c |  13 ++--
 util/guest-random.c   |   4 +-
 util/qemu-sockets.c   |  10 +--
 12 files changed, 85 insertions(+), 100 deletions(-)

diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index 92c436d8c70..92c927a6a35 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -163,9 +163,8 @@ int qemu_strtou64(const char *nptr, const char **endptr, 
int base,
 int qemu_strtod(const char *nptr, const char **endptr, double *result);
 int qemu_strtod_finite(const char *nptr, const char **endptr, double *result);

-int parse_uint(const char *s, unsigned long long *value, char **endptr,
-   int base);
-int parse_uint_full(const char *s, unsigned long long *value, int base);
+int parse_uint(const char *s, const char **endptr, int base, uint64_t *value);
+int parse_uint_full(const char *s, int base, uint64_t *value);

 int qemu_strtosz(const char *nptr, const char **end, uint64_t *result);
 int qemu_strtosz_MiB(const char *nptr, const char **end, uint64_t *result);
diff --git a/audio/audio_legacy.c b/audio/audio_legacy.c
index b848001ff70..dc72ba55e9a 100644
--- a/audio/audio_legacy.c
+++ b/audio/audio_legacy.c
@@ -35,8 +35,8 @@

 static uint32_t toui32(const char *str)
 {
-unsigned long long ret;
-if (parse_uint_full(str, , 10) || ret > UINT32_MAX) {
+uint64_t ret;
+if (parse_uint_full(str, 10, ) || ret > UINT32_MAX) {
 dolog("Invalid integer value `%s'\n", str);
 exit(1);
 }
diff --git a/block/gluster.c b/block/gluster.c
index 185a83e5e53..ad5fadbe793 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -424,7 +424,7 @@ static struct glfs 
*qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
 int ret;
 int old_errno;
 SocketAddressList *server;
-unsigned long long port;
+uint64_t port;

 glfs = glfs_find_preopened(gconf->volume);
 if (glfs) {
@@ -445,7 +445,7 @@ static struct glfs 
*qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
server->value->u.q_unix.path, 0);
 break;
 case SOCKET_ADDRESS_TYPE_INET:
-if (parse_uint_full(server->value->u.inet.port, , 10) < 0 ||
+if (parse_uint_full(server->value->u.inet.port, 10, ) < 0 ||
 port > 65535) {
 error_setg(errp, "'%s' is not a valid port number",
server->value->u.inet.port);
diff --git a/block/nfs.c b/block/nfs.c
index 8f89ece69fa..c24df49747d 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -114,13 +114,13 @@ static int nfs_parse_uri(const char *filename, QDict 
*options, Error **errp)
 qdict_put_str(options, "path", uri->path);

 for (i = 0; i < qp->n; i++) {
-unsigned long long val;
+uint64_t val;
 if (!qp->p[i].value) {
 error_setg(errp, "Value for NFS parameter expected: %s",
qp->p[i].name);
 goto out;
 }
-if (parse_uint_full(qp->p[i].value, , 0)) {
+if (parse_uint_full(qp->p[i].value, 0, )) {
 error_setg(errp, "Illegal value for NFS parameter: %s",
qp->p[i].name);
 goto out;
diff --git a/blockdev.c b/blockdev.c
index db2725fe741..e6eba61484a 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -341,10 +341,10 @@ static bool parse_stats_intervals(BlockAcctStats *stats, 
QList *intervals,
 switch (qobject_type(entry->value)) {

 case QTYPE_QSTRING: {
-unsigned long long length;
+uint64_t length;
 const char *str = qstring_get_str(qobject_to(QString,
  entry->value));
-if (parse_uint_full(str, , 10) == 0 &&
+if (parse_uint_full(str, 10, ) == 0 &&
 length > 0 && length <= UINT_MAX) {
 block_acct_add_interval(stats, (unsigned) length);
 } else {
diff --git

[PULL 12/21] test-cutils: Prepare for upcoming semantic change in qemu_strtosz

2023-06-01 Thread Eric Blake

A quick search for 'qemu_strtosz' in the code base shows that outside
of the testsuite, the ONLY place that passes a non-NULL pointer to
@endptr of any variant of a size parser is in hmp.c (the 'o' parser of
monitor_parse_arguments), and that particular caller warns of
"extraneous characters at the end of line" unless the trailing bytes
are purely whitespace.  Thus, it makes no semantic difference at the
high level whether we parse "1.5e1k" as "1" + ".5e1" + "k" (an attempt
to use scientific notation in strtod with a scaling suffix of 'k' with
no trailing junk, but which qemu_strtosz says should fail with
EINVAL), or as "1.5e" + "1k" (a valid size with scaling suffix of 'e'
for exabytes, followed by two junk bytes) - either way, any user
passing such a string will get an error message about a parse failure.

However, an upcoming patch to qemu_strtosz will fix other corner case
bugs in handling the fractional portion of a size, and in doing so, it
is easier to declare that qemu_strtosz() itself stops parsing at the
first 'e' rather than blindly consuming whatever strtod() will
recognize.  Once that is fixed, the difference will be visible at the
low level (getting a valid parse with trailing garbage when @endptr is
non-NULL, while continuing to get -EINVAL when @endptr is NULL); this
is easier to demonstrate by moving the affected strings from
test_qemu_strtosz_invalid() (which declares them as always -EINVAL) to
test_qemu_strtosz_trailing() (where @endptr affects behavior, for now
with FIXME comments).

Note that a similar argument could be made for having "0x1.5" or
"0x1M" parse as 0x1 with ".5" or "M" as trailing junk, instead of
blindly treating it as -EINVAL; however, as these cases do not suffer
from the same problems as floating point, they are not worth changing
at this time.

Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
Message-Id: <20230522190441.64278-11-ebl...@redhat.com>
---
 tests/unit/test-cutils.c | 42 ++--
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index e05572cd92c..3a095272d0f 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -3563,21 +3563,6 @@ static void test_qemu_strtosz_invalid(void)
 g_assert_cmphex(res, ==, 0xbaadf00d);
 g_assert_true(endptr == str);

-/* No floating point exponents */
-str = "1.5e1k";
-endptr = NULL;
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmphex(res, ==, 0xbaadf00d);
-g_assert_true(endptr == str);
-
-str = "1.5E+0k";
-endptr = NULL;
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmphex(res, ==, 0xbaadf00d);
-g_assert_true(endptr == str);
-
 /* No hex fractions */
 str = "0x1.8k";
 endptr = NULL;
@@ -3681,6 +3666,33 @@ static void test_qemu_strtosz_trailing(void)
 err = qemu_strtosz(str, NULL, );
 g_assert_cmpint(err, ==, -EINVAL);
 g_assert_cmphex(res, ==, 0xbaadf00d);
+
+/* FIXME should stop parse after 'e'. No floating point exponents */
+str = "1.5e1k";
+endptr = NULL;
+res = 0xbaadf00d;
+err = qemu_strtosz(str, , );
+g_assert_cmpint(err, ==, -EINVAL /* FIXME 0 */);
+g_assert_cmphex(res, ==, 0xbaadf00d /* FIXME EiB * 1.5 */);
+g_assert_true(endptr == str /* FIXME + 4 */);
+
+res = 0xbaadf00d;
+err = qemu_strtosz(str, NULL, );
+g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpint(res, ==, 0xbaadf00d);
+
+str = "1.5E+0k";
+endptr = NULL;
+res = 0xbaadf00d;
+err = qemu_strtosz(str, , );
+g_assert_cmpint(err, ==, -EINVAL /* FIXME 0 */);
+g_assert_cmphex(res, ==, 0xbaadf00d /* FIXME EiB * 1.5 */);
+g_assert_true(endptr == str /* FIXME + 4 */);
+
+res = 0xbaadf00d;
+err = qemu_strtosz(str, NULL, );
+g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmphex(res, ==, 0xbaadf00d);
 }

 static void test_qemu_strtosz_erange(void)
-- 
2.40.1

[PULL 16/21] test-cutils: Add more coverage to qemu_strtosz

2023-06-01 Thread Eric Blake

Add some more strings that the user might send our way.  In
particular, some of these additions include FIXME comments showing
where our parser doesn't quite behave the way we want.

Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
Message-Id: <20230522190441.64278-15-ebl...@redhat.com>
---
 tests/unit/test-cutils.c | 140 ---
 1 file changed, 129 insertions(+), 11 deletions(-)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index 26e3ba4b9f3..a629ef2ea39 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -3326,8 +3326,8 @@ static void test_qemu_strtosz_simple(void)
 /* Leading 0 gives decimal results, not octal */
 do_strtosz("08", 0, 8, 2);

-/* Leading space is ignored */
-do_strtosz(" 12345", 0, 12345, 6);
+/* Leading space and + are ignored */
+do_strtosz(" +12345", 0, 12345, 7);

 /* 2^53-1 */
 do_strtosz("9007199254740991", 0, 0x1fULL, 16);
@@ -3354,17 +3354,27 @@ static void test_qemu_strtosz_hex(void)

 do_strtosz("0xab", 0, 171, 4);

-do_strtosz("0xae", 0, 174, 4);
+do_strtosz(" +0xae", 0, 174, 6);
 }

 static void test_qemu_strtosz_units(void)
 {
-/* default is M */
+/* default scale depends on function */
+do_strtosz("1", 0, 1, 1);
 do_strtosz_MiB("1", 0, MiB, 1);
+do_strtosz_metric("1", 0, 1, 1);

+/* Explicit byte suffix works for all functions */
 do_strtosz("1B", 0, 1, 2);
+do_strtosz_MiB("1B", 0, 1, 2);
+do_strtosz_metric("1B", 0, 1, 2);

+/* Expose the scale */
 do_strtosz("1K", 0, KiB, 2);
+do_strtosz_MiB("1K", 0, KiB, 2);
+do_strtosz_metric("1K", 0, 1000, 2);
+
+/* Other suffixes, see also test_qemu_strtosz_metric */
 do_strtosz("1M", 0, MiB, 2);
 do_strtosz("1G", 0, GiB, 2);
 do_strtosz("1T", 0, TiB, 2);
@@ -3376,14 +3386,37 @@ static void test_qemu_strtosz_float(void)
 {
 do_strtosz("0.5E", 0, EiB / 2, 4);

+/* Implied M suffix okay */
+do_strtosz_MiB("0.5", 0, MiB / 2, 3);
+
 /* For convenience, a fraction of 0 is tolerated even on bytes */
 do_strtosz("1.0B", 0, 1, 4);

-/* An empty fraction is tolerated */
+/* An empty fraction tail is tolerated */
 do_strtosz("1.k", 0, 1024, 3);

+/* FIXME An empty fraction head should be tolerated */
+do_strtosz(" .5k", -EINVAL /* FIXME 0 */, 0xbaadf00d /* FIXME 512 */,
+   0 /* FIXME 4 */);
+
 /* For convenience, we permit values that are not byte-exact */
 do_strtosz("12.345M", 0, (uint64_t) (12.345 * MiB + 0.5), 7);
+
+/* FIXME Fraction tail should round correctly */
+do_strtosz("1.k", 0, 2048, 7);
+do_strtosz("1.k", 0,
+   1024 /* FIXME 2048 */, 55);
+
+/* FIXME ERANGE underflow in the fraction tail should not matter for 'k' */
+do_strtosz("1."
+   "00"
+   "00"
+   "00"
+   "00"
+   "00"
+   "00"
+   "00"
+   "1k", 0, 1 /* FIXME 1024 */, 354);
 }

 static void test_qemu_strtosz_invalid(void)
@@ -3393,57 +3426,142 @@ static void test_qemu_strtosz_invalid(void)
 /* Must parse at least one digit */
 do_strtosz("", -EINVAL, 0xbaadf00d, 0);
 do_strtosz(" \t ", -EINVAL, 0xbaadf00d, 0);
-do_strtosz("crap", -EINVAL, 0xbaadf00d, 0);
+do_strtosz(".", -EINVAL, 0xbaadf00d, 0);
+do_strtosz(" .", -EINVAL, 0xbaadf00d, 0);
+do_strtosz(" .k", -EINVAL, 0xbaadf00d, 0);
 do_strtosz("inf", -EINVAL, 0xbaadf00d, 0);
 do_strtosz("NaN", -EINVAL, 0xbaadf00d, 0);

+/* Lone suffix is not okay */
+do_strtosz("k", -EINVAL, 0xbaadf00d, 0);
+do_strtosz(" M", -EINVAL, 0xbaadf00d, 0);
+
 /* Fractional values require scale larger than bytes */
 do_strtosz("1.1B", -EINVAL, 0xbaadf00d, 0);
 do_strtosz("1.1", -EINVAL, 0xbaadf00d, 0);

+/* FIXME underflow in the fraction tail should matter for 'B' */
+do_strtosz("1.1B", -EINVAL, 0xbaadf00d, 0);
+do_strtosz("1.0001B", 0 /* FIXME -EINVAL */,
+   1 /* FIXME 0xbaadf00d */, 23 /* FIXME 0 */);
+do_strtosz("1."
+   "00"
+   "00"
+   "00"
+   "00"
+   "00"
+   "00"
+

[PULL 18/21] cutils: Set value in all integral qemu_strto* error paths

2023-06-01 Thread Eric Blake

Our goal in writing qemu_strtoi() and friends is to have an interface
harder to abuse than libc's strtol().  Leaving the return value
uninitialized on some but not all error paths does not lend itself
well to this goal; and our documentation wasn't helpful on what to
expect.

Note that the previous patch changed all qemu_strtosz() EINVAL error
paths to slam value to 0 rather than stay uninitialized, even when the
EINVAL eror occurs because of trailing junk.  But for the remaining
integral qemu_strto*, it's easier to return the parsed value than to
force things back to zero, in part because of how check_strtox_error
works; in part because people expect that from libc strto* (while
there is no libc strtosz to compare to), and in part because doing so
creates less churn in the testsuite.

Here, the list of affected callers is much longer ('git grep
"qemu_strto[ui]" "*.c" "**/*.c" | grep -v tests/ |wc -l' outputs 107,
although a few of those are the implementation in in cutils.c), so
touching as little as possible is the wisest course of action.

Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
Message-Id: <20230522190441.64278-17-ebl...@redhat.com>
---
 tests/unit/test-cutils.c | 24 +++
 util/cutils.c| 42 +---
 2 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index 2189ebc92f3..e5b780672d1 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -320,7 +320,7 @@ static void test_qemu_strtoi_null(void)
 err = qemu_strtoi(NULL, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpint(res, ==, 999);
+g_assert_cmpint(res, ==, 0);
 g_assert_null(endptr);
 }

@@ -661,7 +661,7 @@ static void test_qemu_strtoi_full_null(void)
 err = qemu_strtoi(NULL, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpint(res, ==, 999);
+g_assert_cmpint(res, ==, 0);
 g_assert_null(endptr);
 }

@@ -764,7 +764,7 @@ static void test_qemu_strtoui_null(void)
 err = qemu_strtoui(NULL, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpuint(res, ==, 999);
+g_assert_cmpuint(res, ==, 0);
 g_assert_null(endptr);
 }

@@ -1102,7 +1102,7 @@ static void test_qemu_strtoui_full_null(void)
 err = qemu_strtoui(NULL, NULL, 0, );

 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpuint(res, ==, 999);
+g_assert_cmpuint(res, ==, 0);
 }

 static void test_qemu_strtoui_full_empty(void)
@@ -1202,7 +1202,7 @@ static void test_qemu_strtol_null(void)
 err = qemu_strtol(NULL, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpint(res, ==, 999);
+g_assert_cmpint(res, ==, 0);
 g_assert_null(endptr);
 }

@@ -1516,7 +1516,7 @@ static void test_qemu_strtol_full_null(void)
 err = qemu_strtol(NULL, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpint(res, ==, 999);
+g_assert_cmpint(res, ==, 0);
 g_assert_null(endptr);
 }

@@ -1619,7 +1619,7 @@ static void test_qemu_strtoul_null(void)
 err = qemu_strtoul(NULL, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpuint(res, ==, 999);
+g_assert_cmpuint(res, ==, 0);
 g_assert_null(endptr);
 }

@@ -1932,7 +1932,7 @@ static void test_qemu_strtoul_full_null(void)
 err = qemu_strtoul(NULL, NULL, 0, );

 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpuint(res, ==, 999);
+g_assert_cmpuint(res, ==, 0);
 }

 static void test_qemu_strtoul_full_empty(void)
@@ -2032,7 +2032,7 @@ static void test_qemu_strtoi64_null(void)
 err = qemu_strtoi64(NULL, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpint(res, ==, 999);
+g_assert_cmpint(res, ==, 0);
 g_assert_null(endptr);
 }

@@ -2322,7 +2322,7 @@ static void test_qemu_strtoi64_full_null(void)
 err = qemu_strtoi64(NULL, NULL, 0, );

 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpint(res, ==, 999);
+g_assert_cmpint(res, ==, 0);
 }

 static void test_qemu_strtoi64_full_empty(void)
@@ -2425,7 +2425,7 @@ static void test_qemu_strtou64_null(void)
 err = qemu_strtou64(NULL, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpuint(res, ==, 999);
+g_assert_cmpuint(res, ==, 0);
 g_assert_null(endptr);
 }

@@ -2714,7 +2714,7 @@ static void test_qemu_strtou64_full_null(void)
 err = qemu_strtou64(NULL, NULL, 0, );

 g_assert_cmpint(err, ==, -EINVAL);
-g_assert_cmpuint(res, ==, 999);
+g_assert_cmpuint(res, ==, 0);
 }

 static void test_qemu_strtou64_full_empty(void)
diff --git a/util/cutils.c b/util/cutils.c
index c5530a5c2be..edfb71a2171 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -384,12 +384,13 @@ static int check_strtox_error(const char *nptr, char *ep,
  *
  * @nptr may be null, and no conversion is performed then.
  *
- * If no conversion is performed, store @nptr in *@endptr and return
- * -EINVAL.
+ * If no conversion is performed, store @nptr in *@endptr, 0 in

[PULL 19/21] cutils: Use parse_uint in qemu_strtosz for negative rejection

2023-06-01 Thread Eric Blake

Rather than open-coding two different ways to check for an unwanted
negative sign, reuse the same code in both functions.  That way, if we
decide down the road to accept "-0" instead of rejecting it, we have
fewer places to change.  Also, it means we now get ERANGE instead of
EINVAL for negative values in qemu_strtosz, which is reasonable for
what it represents.  This in turn changes the expected output of a
couple of iotests.

The change is not quite complete: negative fractional scaled values
can trip us up.  This will be fixed in a later patch addressing other
issues with fractional scaled values.

Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
Message-Id: <20230522190441.64278-18-ebl...@redhat.com>
---
 tests/unit/test-cutils.c | 7 +++
 util/cutils.c| 8 ++--
 tests/qemu-iotests/049.out   | 7 ++-
 tests/qemu-iotests/178.out.qcow2 | 3 +--
 tests/qemu-iotests/178.out.raw   | 3 +--
 5 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index e5b780672d1..c2dbed9eda9 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -3519,10 +3519,9 @@ static void test_qemu_strtosz_trailing(void)
 static void test_qemu_strtosz_erange(void)
 {
 /* FIXME negative values fit better as ERANGE */
-do_strtosz(" -0", -EINVAL /* FIXME -ERANGE */, 0, 0 /* FIXME 3 */);
-do_strtosz("-1", -EINVAL /* FIXME -ERANGE */, 0, 0 /* FIXME 2 */);
-do_strtosz_full("-2M", qemu_strtosz, -EINVAL /* FIXME -ERANGE */, 0,
-0 /* FIXME 2 */, -EINVAL, 0);
+do_strtosz(" -0", -ERANGE, 0, 3);
+do_strtosz("-1", -ERANGE, 0, 2);
+do_strtosz_full("-2M", qemu_strtosz, -ERANGE, 0, 2, -EINVAL, 0);
 do_strtosz(" -.0", -EINVAL /* FIXME -ERANGE */, 0, 0 /* FIXME 4 */);
 do_strtosz_full("-.1k", qemu_strtosz, -EINVAL /* FIXME -ERANGE */, 0,
 0 /* FIXME 3 */, -EINVAL, 0);
diff --git a/util/cutils.c b/util/cutils.c
index edfb71a2171..e3a49209a94 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -201,6 +201,7 @@ static int64_t suffix_mul(char suffix, int64_t unit)
  * - hex with scaling suffix, such as 0x20M
  * - octal, such as 08
  * - fractional hex, such as 0x1.8
+ * - negative values, including -0
  * - floating point exponents, such as 1e3
  *
  * The end pointer will be returned in *end, if not NULL.  If there is
@@ -226,15 +227,10 @@ static int do_strtosz(const char *nptr, const char **end,
 int64_t mul;

 /* Parse integral portion as decimal. */
-retval = qemu_strtou64(nptr, , 10, );
+retval = parse_uint(nptr, , 10, );
 if (retval) {
 goto out;
 }
-if (memchr(nptr, '-', endptr - nptr) != NULL) {
-endptr = nptr;
-retval = -EINVAL;
-goto out;
-}
 if (val == 0 && (*endptr == 'x' || *endptr == 'X')) {
 /* Input looks like hex; reparse, and insist on no fraction or suffix. 
*/
 retval = qemu_strtou64(nptr, , 16, );
diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out
index 8719c91b483..34e1b452e6e 100644
--- a/tests/qemu-iotests/049.out
+++ b/tests/qemu-iotests/049.out
@@ -92,13 +92,10 @@ Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 
extended_l2=off comp
 == 3. Invalid sizes ==

 qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1024
-qemu-img: Invalid image size specified. You may use k, M, G, T, P or E 
suffixes for
-qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes.
+qemu-img: Invalid image size specified. Must be between 0 and 
9223372036854775807.

 qemu-img create -f qcow2 -o size=-1024 TEST_DIR/t.qcow2
-qemu-img: TEST_DIR/t.qcow2: Parameter 'size' expects a non-negative number 
below 2^64
-Optional suffix k, M, G, T, P or E means kilo-, mega-, giga-, tera-, peta-
-and exabytes, respectively.
+qemu-img: TEST_DIR/t.qcow2: Value '-1024' is out of range for parameter 'size'

 qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- -1k
 qemu-img: Invalid image size specified. You may use k, M, G, T, P or E 
suffixes for
diff --git a/tests/qemu-iotests/178.out.qcow2 b/tests/qemu-iotests/178.out.qcow2
index 0d51fe401ec..fe193fd5f4f 100644
--- a/tests/qemu-iotests/178.out.qcow2
+++ b/tests/qemu-iotests/178.out.qcow2
@@ -13,8 +13,7 @@ qemu-img: Invalid option list: ,
 qemu-img: Invalid parameter 'snapshot.foo'
 qemu-img: Failed in parsing snapshot param 'snapshot.foo=bar'
 qemu-img: --output must be used with human or json as argument.
-qemu-img: Invalid image size specified. You may use k, M, G, T, P or E 
suffixes for
-qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes.
+qemu-img: Invalid image size specified. Must be between 0 and 
9223372036854775807.
 qemu-img: Unknown file format 'foo'

 == Size calculation for a new file (human) ==
diff --git a/tests/qemu-iotests/178.out.raw b/tests/qemu-iotests/178.out.raw
index 116241ddef2..445e460fad9 100644
--- a/tests/qemu-iotests/178.out.raw
+++

[PULL 08/21] cutils: Document differences between parse_uint and qemu_strtou64

2023-06-01 Thread Eric Blake

These two functions are subtly different, and not just because of
swapped parameter order.  It took me adding better unit tests to
figure out why.  Document the differences to make it more obvious to
developers trying to pick which one to use, as well as to aid in
upcoming semantic changes.

While touching the documentation, adjust a mis-statement: parse_uint
does not return -EINVAL on invalid base, but assert()s, like all the
other qemu_strto* functions that take a base argument.

Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
Message-Id: <20230522190441.64278-7-ebl...@redhat.com>
---
 util/cutils.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/util/cutils.c b/util/cutils.c
index 9b6ce9179c4..36c14b769fd 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -611,6 +611,8 @@ int qemu_strtoi64(const char *nptr, const char **endptr, 
int base,
  * Convert string @nptr to an uint64_t.
  *
  * Works like qemu_strtoul(), except it stores UINT64_MAX on overflow.
+ * (If you want to prohibit negative numbers that wrap around to
+ * positive, use parse_uint()).
  */
 int qemu_strtou64(const char *nptr, const char **endptr, int base,
   uint64_t *result)
@@ -721,7 +723,8 @@ const char *qemu_strchrnul(const char *s, int c)
  *
  * @s: String to parse
  * @value: Destination for parsed integer value
- * @endptr: Destination for pointer to first character not consumed
+ * @endptr: Destination for pointer to first character not consumed, must
+ * not be %NULL
  * @base: integer base, between 2 and 36 inclusive, or 0
  *
  * Parse unsigned integer
@@ -729,15 +732,16 @@ const char *qemu_strchrnul(const char *s, int c)
  * Parsed syntax is like strtoull()'s: arbitrary whitespace, a single optional
  * '+' or '-', an optional "0x" if @base is 0 or 16, one or more digits.
  *
- * If @s is null, or @base is invalid, or @s doesn't start with an
- * integer in the syntax above, set *@value to 0, *@endptr to @s, and
- * return -EINVAL.
+ * If @s is null, or @s doesn't start with an integer in the syntax
+ * above, set *@value to 0, *@endptr to @s, and return -EINVAL.
  *
  * Set *@endptr to point right beyond the parsed integer (even if the integer
  * overflows or is negative, all digits will be parsed and *@endptr will
  * point right beyond them).
  *
  * If the integer is negative, set *@value to 0, and return -ERANGE.
+ * (If you want to allow negative numbers that wrap around within
+ * bounds, use qemu_strtou64()).
  *
  * If the integer overflows unsigned long long, set *@value to
  * ULLONG_MAX, and return -ERANGE.
@@ -794,10 +798,10 @@ out:
  *
  * Parse unsigned integer from entire string
  *
- * Have the same behavior of parse_uint(), but with an additional check
- * for additional data after the parsed number. If extra characters are present
- * after the parsed number, the function will return -EINVAL, and *@v will
- * be set to 0.
+ * Have the same behavior of parse_uint(), but with an additional
+ * check for additional data after the parsed number. If extra
+ * characters are present after a non-overflowing parsed number, the
+ * function will return -EINVAL, and *@v will be set to 0.
  */
 int parse_uint_full(const char *s, unsigned long long *value, int base)
 {
-- 
2.40.1

[PULL 11/21] test-cutils: Add coverage of qemu_strtod

2023-06-01 Thread Eric Blake

It's hard to tweak code for consistency if I can't prove what will or
won't break from those tweaks.  Time to add unit tests for
qemu_strtod() and qemu_strtod_finite().

Among other things, I wrote a check whether we have C99 semantics for
strtod("0x1") (which MUST parse hex numbers) rather than C89 (which
must stop parsing at 'x').  These days, I suspect that is okay; but if
it fails CI checks, knowing the difference will help us decide what we
want to do about it.  Note that C2x, while not final at the time of
this patch, has been considering whether to make strtol("0b1") parse
as 1 with no slop instead of the C17 parse of 0 with slop "b1"; that
decision may also bleed over to strtod().  But for now, I didn't think
it worth adding unit tests on that front (to strtol or strtod) as
things may still change.

Likewise, there are plenty more corner cases of strtod proper that I
don't explicitly test here, but there are enough unit tests added here
that it covers all the branches reached in our wrappers.  In
particular, it demonstrates the difference on when *value is left
uninitialized, which an upcoming patch will normalize.

Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
Message-Id: <20230522190441.64278-10-ebl...@redhat.com>
---
 tests/unit/test-cutils.c | 512 +++
 1 file changed, 512 insertions(+)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index 20ab0ecb673..e05572cd92c 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -25,6 +25,8 @@
  * THE SOFTWARE.
  */

+#include 
+
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
 #include "qemu/units.h"
@@ -2789,6 +2791,487 @@ static void test_qemu_strtou64_full_erange_junk(void)
 g_assert_cmpuint(res, ==, UINT64_MAX);
 }

+static void test_qemu_strtod_simple(void)
+{
+const char *str;
+const char *endptr;
+int err;
+double res;
+
+/* no radix or exponent */
+str = "1";
+endptr = "somewhere";
+res = 999;
+err = qemu_strtod(str, , );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpfloat(res, ==, 1.0);
+g_assert_true(endptr == str + 1);
+
+/* leading space and sign */
+str = " -0.0";
+endptr = "somewhere";
+res = 999;
+err = qemu_strtod(str, , );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpfloat(res, ==, -0.0);
+g_assert_true(signbit(res));
+g_assert_true(endptr == str + 5);
+
+/* fraction only */
+str = "+.5";
+endptr = "somewhere";
+res = 999;
+err = qemu_strtod(str, , );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpfloat(res, ==, 0.5);
+g_assert_true(endptr == str + 3);
+
+/* exponent */
+str = "1.e+1";
+endptr = "somewhere";
+res = 999;
+err = qemu_strtod(str, , );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpfloat(res, ==, 10.0);
+g_assert_true(endptr == str + 5);
+
+/* hex without radix */
+str = "0x10";
+endptr = "somewhere";
+res = 999;
+err = qemu_strtod(str, , );
+g_assert_cmpint(err, ==, 0);
+g_assert_cmpfloat(res, ==, 16.0);
+g_assert_true(endptr == str + 4);
+}
+
+static void test_qemu_strtod_einval(void)
+{
+const char *str;
+const char *endptr;
+int err;
+double res;
+
+/* empty */
+str = "";
+endptr = "somewhere";
+res = 999;
+err = qemu_strtod(str, , );
+g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpfloat(res, ==, 0.0);
+g_assert_false(signbit(res));
+g_assert_true(endptr == str);
+
+/* NULL */
+str = NULL;
+endptr = "random";
+res = 999;
+err = qemu_strtod(str, , );
+g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpfloat(res, ==, 999.0);
+g_assert_null(endptr);
+
+/* not recognizable */
+str = " junk";
+endptr = "somewhere";
+res = 999;
+err = qemu_strtod(str, , );
+g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpfloat(res, ==, 0.0);
+g_assert_false(signbit(res));
+g_assert_true(endptr == str);
+}
+
+static void test_qemu_strtod_erange(void)
+{
+const char *str;
+const char *endptr;
+int err;
+double res;
+
+/* overflow */
+str = "9e999";
+endptr = "somewhere";
+res = 999;
+err = qemu_strtod(str, , );
+g_assert_cmpint(err, ==, -ERANGE);
+g_assert_cmpfloat(res, ==, HUGE_VAL);
+g_assert_true(endptr == str + 5);
+
+str = "-9e+999";
+endptr = "somewhere";
+res = 999;
+err = qemu_strtod(str, , );
+g_assert_cmpint(err, ==, -ERANGE);
+g_assert_cmpfloat(res, ==, -HUGE_VAL);
+g_assert_true(endptr == str + 7);
+
+/* underflow */
+str = "-9e-999";
+endptr = "somewhere";
+res = 999;
+err = qemu_strtod(str, , );
+g_assert_cmpint(err, ==, -ERANGE);
+g_assert_cmpfloat(res, >=, -DBL_MIN);
+g_assert_cmpfloat(res, <=, -0.0);
+g_assert_true(signbit(res));
+g_assert_true(endptr == str + 7);
+}
+
+static void test_qemu_strtod_nonfinite(void)
+{
+const char *str;
+const

[PULL 03/21] test-cutils: Avoid g_assert in unit tests

2023-06-01 Thread Eric Blake

glib documentation[1] is clear: g_assert() should be avoided in unit
tests because it is ineffective if G_DISABLE_ASSERT is defined; unit
tests should stick to constructs based on g_assert_true() instead.
Note that since commit 262a69f428, we intentionally state that you
cannot define G_DISABLE_ASSERT while building qemu; but our code can
be copied to other projects without that restriction, so we should be
consistent.

For most of the replacements in this patch, using g_assert_cmpstr()
would be a regression in quality - although it would helpfully display
the string contents of both pointers on test failure, here, we really
do care about pointer equality, not just string content equality.  But
when a NULL pointer is expected, g_assert_null works fine.

[1] https://libsoup.org/glib/glib-Testing.html#g-assert

Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
Reviewed-by: Philippe Mathieu-Daudé 
Message-Id: <20230522190441.64278-2-ebl...@redhat.com>
---
 tests/unit/test-cutils.c | 324 +++
 1 file changed, 162 insertions(+), 162 deletions(-)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index 3c4f8754202..0202ac0d5b3 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -1,7 +1,7 @@
 /*
  * cutils.c unit-tests
  *
- * Copyright (C) 2013 Red Hat Inc.
+ * Copyright Red Hat
  *
  * Authors:
  *  Eduardo Habkost 
@@ -40,7 +40,7 @@ static void test_parse_uint_null(void)

 g_assert_cmpint(r, ==, -EINVAL);
 g_assert_cmpint(i, ==, 0);
-g_assert(endptr == NULL);
+g_assert_null(endptr);
 }

 static void test_parse_uint_empty(void)
@@ -55,7 +55,7 @@ static void test_parse_uint_empty(void)

 g_assert_cmpint(r, ==, -EINVAL);
 g_assert_cmpint(i, ==, 0);
-g_assert(endptr == str);
+g_assert_true(endptr == str);
 }

 static void test_parse_uint_whitespace(void)
@@ -70,7 +70,7 @@ static void test_parse_uint_whitespace(void)

 g_assert_cmpint(r, ==, -EINVAL);
 g_assert_cmpint(i, ==, 0);
-g_assert(endptr == str);
+g_assert_true(endptr == str);
 }


@@ -86,7 +86,7 @@ static void test_parse_uint_invalid(void)

 g_assert_cmpint(r, ==, -EINVAL);
 g_assert_cmpint(i, ==, 0);
-g_assert(endptr == str);
+g_assert_true(endptr == str);
 }


@@ -102,7 +102,7 @@ static void test_parse_uint_trailing(void)

 g_assert_cmpint(r, ==, 0);
 g_assert_cmpint(i, ==, 123);
-g_assert(endptr == str + 3);
+g_assert_true(endptr == str + 3);
 }

 static void test_parse_uint_correct(void)
@@ -117,7 +117,7 @@ static void test_parse_uint_correct(void)

 g_assert_cmpint(r, ==, 0);
 g_assert_cmpint(i, ==, 123);
-g_assert(endptr == str + strlen(str));
+g_assert_true(endptr == str + strlen(str));
 }

 static void test_parse_uint_octal(void)
@@ -132,7 +132,7 @@ static void test_parse_uint_octal(void)

 g_assert_cmpint(r, ==, 0);
 g_assert_cmpint(i, ==, 0123);
-g_assert(endptr == str + strlen(str));
+g_assert_true(endptr == str + strlen(str));
 }

 static void test_parse_uint_decimal(void)
@@ -147,7 +147,7 @@ static void test_parse_uint_decimal(void)

 g_assert_cmpint(r, ==, 0);
 g_assert_cmpint(i, ==, 123);
-g_assert(endptr == str + strlen(str));
+g_assert_true(endptr == str + strlen(str));
 }


@@ -163,7 +163,7 @@ static void test_parse_uint_llong_max(void)

 g_assert_cmpint(r, ==, 0);
 g_assert_cmpint(i, ==, (unsigned long long)LLONG_MAX + 1);
-g_assert(endptr == str + strlen(str));
+g_assert_true(endptr == str + strlen(str));

 g_free(str);
 }
@@ -180,7 +180,7 @@ static void test_parse_uint_overflow(void)

 g_assert_cmpint(r, ==, -ERANGE);
 g_assert_cmpint(i, ==, ULLONG_MAX);
-g_assert(endptr == str + strlen(str));
+g_assert_true(endptr == str + strlen(str));
 }

 static void test_parse_uint_negative(void)
@@ -195,7 +195,7 @@ static void test_parse_uint_negative(void)

 g_assert_cmpint(r, ==, -ERANGE);
 g_assert_cmpint(i, ==, 0);
-g_assert(endptr == str + strlen(str));
+g_assert_true(endptr == str + strlen(str));
 }


@@ -235,7 +235,7 @@ static void test_qemu_strtoi_correct(void)

 g_assert_cmpint(err, ==, 0);
 g_assert_cmpint(res, ==, 12345);
-g_assert(endptr == str + 5);
+g_assert_true(endptr == str + 5);
 }

 static void test_qemu_strtoi_null(void)
@@ -248,7 +248,7 @@ static void test_qemu_strtoi_null(void)
 err = qemu_strtoi(NULL, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
-g_assert(endptr == NULL);
+g_assert_null(endptr);
 }

 static void test_qemu_strtoi_empty(void)
@@ -262,7 +262,7 @@ static void test_qemu_strtoi_empty(void)
 err = qemu_strtoi(str, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
-g_assert(endptr == str);
+g_assert_true(endptr == str);
 }

 static void test_qemu_strtoi_whitespace(void)
@@ -276,7 +276,7 @@ static void test_qemu_strtoi_whitespace(void)
 err = qemu_strtoi(str, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
-

[PULL 06/21] test-cutils: Test more integer corner cases

2023-06-01 Thread Eric Blake

We have quite a few undertested and underdocumented integer parsing
corner cases.  To ensure that any changes we make in the code are
intentional rather than accidental semantic changes, it is time to add
more unit tests of existing behavior.

In particular, this demonstrates that parse_uint() and qemu_strtou64()
behave differently.  For "-0", it's hard to argue why parse_uint needs
to reject it (it's not a negative integer), but the documentation sort
of mentions it; but it is intentional that all other negative values
are treated as ERANGE with value 0 (compared to qemu_strtou64()
treating "-2" as success and UINT64_MAX-1, for example).

Also, when mixing overflow/underflow with a check for no trailing
junk, parse_uint_full favors ERANGE over EINVAL, while qemu_strto[iu]*
favor EINVAL.  This behavior is outside the C standard, so we can pick
whatever we want, but it would be nice to be consistent.

Note that C requires that "9223372036854775808" fail strtoll() with
ERANGE/INT64_MAX, but "-9223372036854775808" pass with INT64_MIN; we
weren't testing this.  For strtol(), the behavior depends on whether
long is 32- or 64-bits (the cutoff point either being the same as
strtoll() or at "-2147483648").  Meanwhile, C is clear that
"-18446744073709551615" pass stroull() (but not strtoll) with value 1,
even though we want it to fail parse_uint().  And although
qemu_strtoui() has no C counterpart, it makes more sense if we design
it like 32-bit strtoul() (that is, where "-4294967296" be an alternate
acceptable spelling for "1", but "-0x0001" should be
treated as overflow and return 0x rather than 1).  We aren't
there yet, so some of the tests added in this patch have FIXME
comments.

However, note that C2x will (likely) be adding a SILENT semantic
change, where C17 strtol("0b1", , 2) returns 0 with ep="b1", but
C2x will have it return 1 with ep="".  I did not feel like adding
testing for those corner cases, in part because the next version of C
is not standard and libc support for binary parsing is not yet
wide-spread (as of this patch, glibc.git still misparses bare "0b":
https://sourceware.org/bugzilla/show_bug.cgi?id=30371).

Signed-off-by: Eric Blake 
Message-Id: <20230522190441.64278-5-ebl...@redhat.com>
[eblake: fix a few typos spotted by Hanna]
Reviewed-by: Hanna Czenczek 
---
 tests/unit/test-cutils.c | 929 ---
 1 file changed, 864 insertions(+), 65 deletions(-)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index 1eeaf21ae22..011123a2111 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -150,7 +150,6 @@ static void test_parse_uint_decimal(void)
 g_assert_true(endptr == str + strlen(str));
 }

-
 static void test_parse_uint_llong_max(void)
 {
 unsigned long long i = 999;
@@ -168,27 +167,87 @@ static void test_parse_uint_llong_max(void)
 g_free(str);
 }

+static void test_parse_uint_max(void)
+{
+unsigned long long i = 999;
+char f = 'X';
+char *endptr = 
+char *str = g_strdup_printf("%llu", ULLONG_MAX);
+int r;
+
+r = parse_uint(str, , , 0);
+
+g_assert_cmpint(r, ==, 0);
+g_assert_cmpuint(i, ==, ULLONG_MAX);
+g_assert_true(endptr == str + strlen(str));
+
+g_free(str);
+}
+
 static void test_parse_uint_overflow(void)
 {
-unsigned long long i = 999;
+unsigned long long i;
 char f = 'X';
-char *endptr = 
-const char *str = "99";
+char *endptr;
+const char *str;
 int r;

+i = 999;
+endptr = 
+str = "99";
 r = parse_uint(str, , , 0);
+g_assert_cmpint(r, ==, -ERANGE);
+g_assert_cmpuint(i, ==, ULLONG_MAX);
+g_assert_true(endptr == str + strlen(str));

+i = 999;
+endptr = 
+str = "0x1"; /* 65 bits, 64-bit sign bit clear */
+r = parse_uint(str, , , 0);
+g_assert_cmpint(r, ==, -ERANGE);
+g_assert_cmpuint(i, ==, ULLONG_MAX);
+g_assert_true(endptr == str + strlen(str));
+
+i = 999;
+endptr = 
+str = "0x180008000"; /* 65 bits, 64-bit sign bit set */
+r = parse_uint(str, , , 0);
 g_assert_cmpint(r, ==, -ERANGE);
 g_assert_cmpuint(i, ==, ULLONG_MAX);
 g_assert_true(endptr == str + strlen(str));
 }

 static void test_parse_uint_negative(void)
+{
+unsigned long long i;
+char f = 'X';
+char *endptr;
+const char *str;
+int r;
+
+i = 999;
+endptr = 
+str = " \t -321";
+r = parse_uint(str, , , 0);
+g_assert_cmpint(r, ==, -ERANGE);
+g_assert_cmpuint(i, ==, 0);
+g_assert_true(endptr == str + strlen(str));
+
+i = 999;
+endptr = 
+str = "-0x0001";
+r = parse_uint(str, , , 0);
+g_assert_cmpint(r, ==, -ERANGE);
+g_assert_cmpuint(i, ==, 0);
+g_assert_true(endptr == str + strlen(str));
+}
+
+static void test_parse_uint_negzero(void)
 {
 unsigned long long i = 999;
 char f = 'X';

[PULL 07/21] cutils: Fix wraparound parsing in qemu_strtoui

2023-06-01 Thread Eric Blake

While we were matching 32-bit strtol in qemu_strtoi, our use of a
64-bit parse was leaking through for some inaccurate answers in
qemu_strtoui in comparison to a 32-bit strtoul (see the unit test for
examples).  The comment for that function even described what we have
to do for a correct parse, but didn't implement it correctly: since
strtoull checks for overflow against the wrong values and then
negates, we have to temporarily undo negation before checking for
overflow against our desired value.

Our int wrappers would be a lot easier to write if libc had a
guaranteed 32-bit parser even on platforms with 64-bit long.

Whether we parse C2x binary strings like "0b1000" is currently up to
what libc does; our unit tests intentionally don't cover that at the
moment, though.

Fixes: 473a2a331e ("cutils: add qemu_strtoi & qemu_strtoui parsers for 
int/unsigned int types", v2.12.0)
Signed-off-by: Eric Blake 
CC: qemu-sta...@nongnu.org
Message-Id: <20230522190441.64278-6-ebl...@redhat.com>
Reviewed-by: Hanna Czenczek 
---
 tests/unit/test-cutils.c | 20 +---
 util/cutils.c| 25 +++--
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index 011123a2111..ce71900cb73 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -909,7 +909,7 @@ static void test_qemu_strtoui_hex(void)

 static void test_qemu_strtoui_wrap(void)
 {
-/* FIXME - wraparound should be consistent with 32-bit strtoul */
+/* wraparound is consistent with 32-bit strtoul */
 const char *str = "-4294967295"; /* 1 mod 2^32 */
 char f = 'X';
 const char *endptr = 
@@ -918,8 +918,8 @@ static void test_qemu_strtoui_wrap(void)

 err = qemu_strtoui(str, , 0, );

-g_assert_cmpint(err, ==, -ERANGE /* FIXME 0 */);
-g_assert_cmphex(res, ==, UINT_MAX /* FIXME 1 */);
+g_assert_cmpint(err, ==, 0);
+g_assert_cmphex(res, ==, 1);
 g_assert_true(endptr == str + strlen(str));
 }

@@ -978,13 +978,12 @@ static void test_qemu_strtoui_overflow(void)
 g_assert_cmpuint(res, ==, UINT_MAX);
 g_assert_true(endptr == str + strlen(str));

-/* FIXME - overflow should be consistent with 32-bit strtoul */
 str = "0xfffe"; /* ULLONG_MAX - 1 (not UINT_MAX - 1) */
 endptr = "somewhere";
 res = 999;
 err = qemu_strtoui(str, , 0, );
-g_assert_cmpint(err, ==, 0 /* FIXME -ERANGE */);
-g_assert_cmpuint(res, ==, UINT_MAX - 1 /* FIXME UINT_MAX */);
+g_assert_cmpint(err, ==, -ERANGE);
+g_assert_cmpuint(res, ==, UINT_MAX);
 g_assert_true(endptr == str + strlen(str));

 str = "0x1"; /* 65 bits, 32-bit sign bit clear */
@@ -1019,21 +1018,20 @@ static void test_qemu_strtoui_underflow(void)
 g_assert_cmpuint(res, ==, UINT_MAX);
 g_assert_true(endptr == str + strlen(str));

-/* FIXME - overflow should be consistent with 32-bit strtoul */
 str = "-18446744073709551615"; /* -UINT64_MAX (not -(-1)) */
 endptr = "somewhere";
 res = 999;
 err = qemu_strtoui(str, , 0, );
-g_assert_cmpint(err, ==, 0 /* FIXME -ERANGE */);
-g_assert_cmpuint(res, ==, 1 /* FIXME UINT_MAX */);
+g_assert_cmpint(err, ==, -ERANGE);
+g_assert_cmpuint(res, ==, UINT_MAX);
 g_assert_true(endptr == str + strlen(str));

 str = "-0x0002";
 endptr = "somewhere";
 res = 999;
 err = qemu_strtoui(str, , 0, );
-g_assert_cmpint(err, ==, 0 /* FIXME -ERANGE */);
-g_assert_cmpuint(res, ==, UINT_MAX - 1 /* FIXME UINT_MAX */);
+g_assert_cmpint(err, ==, -ERANGE);
+g_assert_cmpuint(res, ==, UINT_MAX);
 g_assert_true(endptr == str + strlen(str));

 str = "-0x1"; /* 65 bits, 32-bit sign bit clear */
diff --git a/util/cutils.c b/util/cutils.c
index 5887e744140..9b6ce9179c4 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -391,6 +391,9 @@ static int check_strtox_error(const char *nptr, char *ep,
  * and return -ERANGE.
  *
  * Else store the converted value in @result, and return zero.
+ *
+ * This matches the behavior of strtol() on 32-bit platforms, even on
+ * platforms where long is 64-bits.
  */
 int qemu_strtoi(const char *nptr, const char **endptr, int base,
 int *result)
@@ -443,13 +446,15 @@ int qemu_strtoi(const char *nptr, const char **endptr, 
int base,
  *
  * Note that a number with a leading minus sign gets converted without
  * the minus sign, checked for overflow (see above), then negated (in
- * @result's type).  This is exactly how strtoul() works.
+ * @result's type).  This matches the behavior of strtoul() on 32-bit
+ * platforms, even on platforms where long is 64-bits.
  */
 int qemu_strtoui(const char *nptr, const char **endptr, int base,
  unsigned int *result)
 {
 char *ep;
-long long lresult;
+unsigned long long lresult;
+bool neg;

 assert((unsigned) base <= 36 && base != 1);
 if (!nptr) {
@@ -466,14 +471,22

[PULL 14/21] cutils: Allow NULL str in qemu_strtosz

2023-06-01 Thread Eric Blake

All the other qemu_strto* and parse_uint allow a NULL str.  Having
qemu_strtosz not crash on qemu_strtosz(NULL, NULL, ) is an easy
fix that adds some consistency between our string parsers.

Signed-off-by: Eric Blake 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Hanna Czenczek 
Message-Id: <20230522190441.64278-13-ebl...@redhat.com>
---
 tests/unit/test-cutils.c | 10 +-
 util/cutils.c|  2 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index 96bc9d1f202..26e3ba4b9f3 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -3285,7 +3285,12 @@ static void do_strtosz_full(const char *str, 
qemu_strtosz_fn fn,
 ret = fn(str, , );
 g_assert_cmpint(ret, ==, exp_ptr_ret);
 g_assert_cmpuint(val, ==, exp_ptr_val);
-g_assert_true(endptr == str + exp_ptr_offset);
+if (str) {
+g_assert_true(endptr == str + exp_ptr_offset);
+} else {
+g_assert_cmpint(exp_ptr_offset, ==, 0);
+g_assert_null(endptr);
+}

 val = 0xbaadf00d;
 ret = fn(str, NULL, );
@@ -3383,6 +3388,9 @@ static void test_qemu_strtosz_float(void)

 static void test_qemu_strtosz_invalid(void)
 {
+do_strtosz(NULL, -EINVAL, 0xbaadf00d, 0);
+
+/* Must parse at least one digit */
 do_strtosz("", -EINVAL, 0xbaadf00d, 0);
 do_strtosz(" \t ", -EINVAL, 0xbaadf00d, 0);
 do_strtosz("crap", -EINVAL, 0xbaadf00d, 0);
diff --git a/util/cutils.c b/util/cutils.c
index 56a2aced8d4..1dc67d201dc 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -306,7 +306,7 @@ static int do_strtosz(const char *nptr, const char **end,
 out:
 if (end) {
 *end = endptr;
-} else if (*endptr) {
+} else if (nptr && *endptr) {
 retval = -EINVAL;
 }
 if (retval == 0) {
-- 
2.40.1

[PULL 05/21] test-cutils: Test integral qemu_strto* value on failures

2023-06-01 Thread Eric Blake

We are inconsistent on the contents of *value after a strto* parse
failure.  I found the following behaviors:

- parse_uint() and parse_uint_full(), which document that *value is
  slammed to 0 on all EINVAL failures and 0 or UINT_MAX on ERANGE
  failures, and has unit tests for that (note that parse_uint requires
  non-NULL endptr, and does not fail with EINVAL for trailing junk)

- qemu_strtosz(), which leaves *value untouched on all failures (both
  EINVAL and ERANGE), and has unit tests but not documentation for
  that

- qemu_strtoi() and other integral friends, which document *value on
  ERANGE failures but is unspecified on EINVAL (other than implicitly
  by comparison to libc strto*); there, *value is untouched for NULL
  string, slammed to 0 on no conversion, and left at the prefix value
  on NULL endptr; unit tests do not consistently check the value

- qemu_strtod(), which documents *value on ERANGE failures but is
  unspecified on EINVAL; there, *value is untouched for NULL string,
  slammed to 0.0 for no conversion, and left at the prefix value on
  NULL endptr; there are no unit tests (other than indirectly through
  qemu_strtosz)

- qemu_strtod_finite(), which documents *value on ERANGE failures but
  is unspecified on EINVAL; there, *value is left at the prefix for
  'inf' or 'nan' and untouched in all other cases; there are no unit
  tests (other than indirectly through qemu_strtosz)

Upcoming patches will change behaviors for consistency, but it's best
to first have more unit test coverage to see the impact of those
changes.

Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
Message-Id: <20230522190441.64278-4-ebl...@redhat.com>
---
 tests/unit/test-cutils.c | 58 +++-
 1 file changed, 51 insertions(+), 7 deletions(-)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index 38bd3990207..1eeaf21ae22 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -248,6 +248,7 @@ static void test_qemu_strtoi_null(void)
 err = qemu_strtoi(NULL, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpint(res, ==, 999);
 g_assert_null(endptr);
 }

@@ -262,6 +263,7 @@ static void test_qemu_strtoi_empty(void)
 err = qemu_strtoi(str, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpint(res, ==, 0);
 g_assert_true(endptr == str);
 }

@@ -276,6 +278,7 @@ static void test_qemu_strtoi_whitespace(void)
 err = qemu_strtoi(str, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpint(res, ==, 0);
 g_assert_true(endptr == str);
 }

@@ -290,6 +293,7 @@ static void test_qemu_strtoi_invalid(void)
 err = qemu_strtoi(str, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpint(res, ==, 0);
 g_assert_true(endptr == str);
 }

@@ -473,6 +477,7 @@ static void test_qemu_strtoi_full_null(void)
 err = qemu_strtoi(NULL, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpint(res, ==, 999);
 g_assert_null(endptr);
 }

@@ -485,6 +490,7 @@ static void test_qemu_strtoi_full_empty(void)
 err = qemu_strtoi(str, NULL, 0, );

 g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpint(res, ==, 0);
 }

 static void test_qemu_strtoi_full_negative(void)
@@ -502,18 +508,19 @@ static void test_qemu_strtoi_full_negative(void)
 static void test_qemu_strtoi_full_trailing(void)
 {
 const char *str = "123xxx";
-int res;
+int res = 999;
 int err;

 err = qemu_strtoi(str, NULL, 0, );

 g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpint(res, ==, 123);
 }

 static void test_qemu_strtoi_full_max(void)
 {
 char *str = g_strdup_printf("%d", INT_MAX);
-int res;
+int res = 999;
 int err;

 err = qemu_strtoi(str, NULL, 0, );
@@ -548,6 +555,7 @@ static void test_qemu_strtoui_null(void)
 err = qemu_strtoui(NULL, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpuint(res, ==, 999);
 g_assert_null(endptr);
 }

@@ -562,6 +570,7 @@ static void test_qemu_strtoui_empty(void)
 err = qemu_strtoui(str, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpuint(res, ==, 0);
 g_assert_true(endptr == str);
 }

@@ -576,6 +585,7 @@ static void test_qemu_strtoui_whitespace(void)
 err = qemu_strtoui(str, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpuint(res, ==, 0);
 g_assert_true(endptr == str);
 }

@@ -590,6 +600,7 @@ static void test_qemu_strtoui_invalid(void)
 err = qemu_strtoui(str, , 0, );

 g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpuint(res, ==, 0);
 g_assert_true(endptr == str);
 }

@@ -771,6 +782,7 @@ static void test_qemu_strtoui_full_null(void)
 err = qemu_strtoui(NULL, NULL, 0, );

 g_assert_cmpint(err, ==, -EINVAL);
+g_assert_cmpuint(res, ==, 999);
 }

 static void test_qemu_strtoui_full_empty(void)
@@ -782,7 +794,9 @@ static void test_qemu_strtoui_full_empty(void)
 err = qemu_strtoui(str, NULL, 0, );

[PULL 01/21] iotests: Fix test 104 under NBD

2023-06-01 Thread Eric Blake

In the past, commit a231cb27 ("iotests: Fix 104 for NBD", v2.3.0)
added an additional filter to _filter_img_info to rewrite NBD URIs
into the expected output form.  This recently broke when we tweaked
tests to run in a per-format directory, which did not match the regex,
because _img_info itself is now already changing
SOCK_DIR=/tmp/tmpphjfbphd/raw-nbd-104 into
/tmp/tmpphjfbphd/IMGFMT-nbd-104 prior to _img_info_filter getting a
chance to further filter things.

While diagnosing the problem, I also noticed some filter lines
rendered completely useless by a typo when we switched from TCP to
Unix sockets for NBD (in shell, '\\+' is different from "\\+" (one
gives two backslash to the regex, matching the literal 2-byte sequence
<\+> after a single digit; the other gives one backslash to the regex,
as the metacharacter \+ to match one or more of <[0-9]>); since the
literal string  is not a valid URI, that regex
hasn't been matching anything for years so it is fine to just drop it
rather than fix the typo.

Fixes: f3923a72 ("iotests: Switch nbd tests to use Unix rather than TCP", 
v4.2.0)
Fixes: 5ba7db09 ("iotests: always use a unique sub-directory per test", v8.0.0)
Signed-off-by: Eric Blake 
Message-Id: <20230519150216.2599189-1-ebl...@redhat.com>
Reviewed-by: Daniel P. Berrangé 
---
 tests/qemu-iotests/common.filter | 4 +---
 tests/qemu-iotests/common.rc | 3 ++-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index 6b32c7fbfa1..fc3c64bcb8e 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 #
-# Copyright (C) 2009 Red Hat, Inc.
+# Copyright Red Hat
 # Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or
@@ -131,7 +131,6 @@ _filter_img_create_filenames()
 -e "s#$SOCK_DIR#SOCK_DIR#g" \
 -e 's#SOCK_DIR/fuse-#TEST_DIR/#g' \
 -e "s#$IMGFMT#IMGFMT#g" \
--e 's#nbd:127.0.0.1:[0-9]\\+#TEST_DIR/t.IMGFMT#g' \
 -e 's#nbd+unix:///\??socket=SOCK_DIR/nbd#TEST_DIR/t.IMGFMT#g'
 }

@@ -229,7 +228,6 @@ _filter_img_info()
 -e "s#$TEST_DIR#TEST_DIR#g" \
 -e "s#$SOCK_DIR#SOCK_DIR#g" \
 -e "s#$IMGFMT#IMGFMT#g" \
--e 's#nbd://127.0.0.1:[0-9]\\+$#TEST_DIR/t.IMGFMT#g' \
 -e 's#nbd+unix:///\??socket=SOCK_DIR/nbd#TEST_DIR/t.IMGFMT#g' \
 -e 's#SOCK_DIR/fuse-#TEST_DIR/#g' \
 -e "/encrypted: yes/d" \
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index f4476b62f7d..d145f08201c 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 #
-# Copyright (C) 2009 Red Hat, Inc.
+# Copyright Red Hat
 # Copyright (c) 2000-2006 Silicon Graphics, Inc.  All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or modify
@@ -717,6 +717,7 @@ _img_info()
 -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \
 -e "s#$TEST_DIR#TEST_DIR#g" \
 -e "s#$SOCK_DIR/fuse-#TEST_DIR/#g" \
+-e "s#$SOCK_DIR/#SOCK_DIR/#g" \
 -e "s#$IMGFMT#IMGFMT#g" \
 -e 's/\(compression type: \)\(zlib\|zstd\)/\1COMPRESSION_TYPE/' \
 -e "/^disk size:/ D" \
-- 
2.40.1

[PULL 13/21] test-cutils: Refactor qemu_strtosz tests for less boilerplate

2023-06-01 Thread Eric Blake

No need to copy-and-paste lots of boilerplate per string tested, when
we can consolidate that behind helper functions.  Plus, this adds a
bit more coverage (we now test all strings both with and without
endptr, whereas before some tests skipped the NULL endptr case), which
exposed a SEGFAULT on qemu_strtosz(NULL, NULL, ) that will be
fixed in an upcoming patch.

Note that duplicating boilerplate has one advantage lost here - a
failed test tells you which line number failed; but a helper function
does not show the call stack that reached the failure.  Since we call
the helper more than once within many of the "unit tests", even the
unit test name doesn't point out which call is failing.  But that only
matters when tests fail (they normally pass); at which point I'm
debugging the failures under gdb anyways, so I'm not too worried about
it.

Signed-off-by: Eric Blake 
Reviewed-by: Hanna Czenczek 
Message-Id: <20230522190441.64278-12-ebl...@redhat.com>
---
 tests/unit/test-cutils.c | 503 ---
 1 file changed, 100 insertions(+), 403 deletions(-)

diff --git a/tests/unit/test-cutils.c b/tests/unit/test-cutils.c
index 3a095272d0f..96bc9d1f202 100644
--- a/tests/unit/test-cutils.c
+++ b/tests/unit/test-cutils.c
@@ -3272,473 +3272,170 @@ static void test_qemu_strtod_finite_erange_junk(void)
 g_assert_cmpfloat(res, ==, 999.0);
 }

+typedef int (*qemu_strtosz_fn)(const char *, const char **, uint64_t *);
+static void do_strtosz_full(const char *str, qemu_strtosz_fn fn,
+int exp_ptr_ret, uint64_t exp_ptr_val,
+size_t exp_ptr_offset, int exp_null_ret,
+uint64_t exp_null_val)
+{
+const char *endptr = "somewhere";
+uint64_t val = 0xbaadf00d;
+int ret;
+
+ret = fn(str, , );
+g_assert_cmpint(ret, ==, exp_ptr_ret);
+g_assert_cmpuint(val, ==, exp_ptr_val);
+g_assert_true(endptr == str + exp_ptr_offset);
+
+val = 0xbaadf00d;
+ret = fn(str, NULL, );
+g_assert_cmpint(ret, ==, exp_null_ret);
+g_assert_cmpuint(val, ==, exp_null_val);
+}
+
+static void do_strtosz(const char *str, int exp_ret, uint64_t exp_val,
+   size_t exp_offset)
+{
+do_strtosz_full(str, qemu_strtosz, exp_ret, exp_val, exp_offset,
+exp_ret, exp_val);
+}
+
+static void do_strtosz_MiB(const char *str, int exp_ret, uint64_t exp_val,
+   size_t exp_offset)
+{
+do_strtosz_full(str, qemu_strtosz_MiB, exp_ret, exp_val, exp_offset,
+exp_ret, exp_val);
+}
+
+static void do_strtosz_metric(const char *str, int exp_ret, uint64_t exp_val,
+  size_t exp_offset)
+{
+do_strtosz_full(str, qemu_strtosz_metric, exp_ret, exp_val, exp_offset,
+exp_ret, exp_val);
+}
+
 static void test_qemu_strtosz_simple(void)
 {
-const char *str;
-const char *endptr;
-int err;
-uint64_t res;
-
-str = "0";
-endptr = str;
-res = 0xbaadf00d;
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmpuint(res, ==, 0);
-g_assert_true(endptr == str + 1);
+do_strtosz("0", 0, 0, 1);

 /* Leading 0 gives decimal results, not octal */
-str = "08";
-endptr = str;
-res = 0xbaadf00d;
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmpuint(res, ==, 8);
-g_assert_true(endptr == str + 2);
+do_strtosz("08", 0, 8, 2);

 /* Leading space is ignored */
-str = " 12345";
-endptr = str;
-res = 0xbaadf00d;
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmpuint(res, ==, 12345);
-g_assert_true(endptr == str + 6);
+do_strtosz(" 12345", 0, 12345, 6);

-res = 0xbaadf00d;
-err = qemu_strtosz(str, NULL, );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmpuint(res, ==, 12345);
+/* 2^53-1 */
+do_strtosz("9007199254740991", 0, 0x1fULL, 16);

-str = "9007199254740991"; /* 2^53-1 */
-endptr = str;
-res = 0xbaadf00d;
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmphex(res, ==, 0x1fULL);
-g_assert_true(endptr == str + 16);
+/* 2^53 */
+do_strtosz("9007199254740992", 0, 0x20ULL, 16);

-str = "9007199254740992"; /* 2^53 */
-endptr = str;
-res = 0xbaadf00d;
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmphex(res, ==, 0x20ULL);
-g_assert_true(endptr == str + 16);
+/* 2^53+1 */
+do_strtosz("9007199254740993", 0, 0x21ULL, 16);

-str = "9007199254740993"; /* 2^53+1 */
-endptr = str;
-res = 0xbaadf00d;
-err = qemu_strtosz(str, , );
-g_assert_cmpint(err, ==, 0);
-g_assert_cmphex(res, ==, 0x21ULL);
-g_assert_true(endptr == str + 16);
+/* 0xf800 (53 msbs set) */
+do_strtosz("18446744073709549568", 0,

[PULL 00/21] NBD and miscellaneous patches for 2023-06-01

2023-06-01 Thread Eric Blake

The following changes since commit 19a720b74fde7e859d19f12c66a72e545947a657:

  Merge tag 'tracing-pull-request' of https://gitlab.com/stefanha/qemu into 
staging (2023-06-01 08:30:29 -0700)

are available in the Git repository at:

  https://repo.or.cz/qemu/ericb.git tags/pull-nbd-2023-06-01

for you to fetch changes up to 58516caac47c4bc7ed3ad6a8e2f565404a563dd3:

  cutils: Improve qemu_strtosz handling of fractions (2023-06-01 16:55:25 -0500)


nbd and misc patches for 2023-06-01

- Eric Blake: Fix iotest 104 for NBD
- Eric Blake: Improve qcow2 spec on padding bytes
- Eric Blake: Fix read-beyond-bounds bug in qemu_strtosz


Eric Blake (21):
  iotests: Fix test 104 under NBD
  qcow2: Explicit mention of padding bytes
  test-cutils: Avoid g_assert in unit tests
  test-cutils: Use g_assert_cmpuint where appropriate
  test-cutils: Test integral qemu_strto* value on failures
  test-cutils: Test more integer corner cases
  cutils: Fix wraparound parsing in qemu_strtoui
  cutils: Document differences between parse_uint and qemu_strtou64
  cutils: Adjust signature of parse_uint[_full]
  cutils: Allow NULL endptr in parse_uint()
  test-cutils: Add coverage of qemu_strtod
  test-cutils: Prepare for upcoming semantic change in qemu_strtosz
  test-cutils: Refactor qemu_strtosz tests for less boilerplate
  cutils: Allow NULL str in qemu_strtosz
  numa: Check for qemu_strtosz_MiB error
  test-cutils: Add more coverage to qemu_strtosz
  cutils: Set value in all qemu_strtosz* error paths
  cutils: Set value in all integral qemu_strto* error paths
  cutils: Use parse_uint in qemu_strtosz for negative rejection
  cutils: Improve qemu_strtod* error paths
  cutils: Improve qemu_strtosz handling of fractions

 docs/interop/qcow2.txt   |1 +
 include/qemu/cutils.h|5 +-
 audio/audio_legacy.c |4 +-
 block/gluster.c  |4 +-
 block/nfs.c  |4 +-
 blockdev.c   |4 +-
 contrib/ivshmem-server/main.c|4 +-
 hw/core/numa.c   |   11 +-
 qapi/opts-visitor.c  |   10 +-
 tests/unit/test-cutils.c | 2469 --
 ui/vnc.c |4 +-
 util/cutils.c|  263 ++--
 util/guest-random.c  |4 +-
 util/qemu-sockets.c  |   10 +-
 tests/qemu-iotests/common.filter |4 +-
 tests/qemu-iotests/common.rc |3 +-
 tests/qemu-iotests/049.out   |7 +-
 tests/qemu-iotests/178.out.qcow2 |3 +-
 tests/qemu-iotests/178.out.raw   |3 +-
 19 files changed, 2035 insertions(+), 782 deletions(-)

-- 
2.40.1

[PULL 02/21] qcow2: Explicit mention of padding bytes

2023-06-01 Thread Eric Blake

Although we already covered the need for padding bytes with our
changes in commit 3ae3fcfa, commit 66fcbca5 (both v5.0.0) added one
byte and relied on the rest of the text for implicitly covering 7
padding bytes.  For consistency with other parts of the header (such
as the header extension format listing padding from n - m, or the
snapshot table entry listing variable padding), we might as well call
out the remaining 7 bytes as padding until such time (as any) as they
gain another meaning.

Signed-off-by: Eric Blake 
CC: Vladimir Sementsov-Ogievskiy 
Message-Id: <20230522184631.47211-1-ebl...@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 docs/interop/qcow2.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index e7f036c286b..2c4618375ad 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -226,6 +226,7 @@ version 2.
  in QEMU. However, clusters with the
 deflate compression type do not have zlib headers.

+105 - 111:  Padding, contents defined below.

 === Header padding ===

-- 
2.40.1

Re: [PATCH v2] pc: q35: Bump max_cpus to 1024

2023-06-01 Thread Suthikulpanit, Suravee





On 6/1/2023 6:09 PM, Michael S. Tsirkin wrote:

On Thu, Jun 01, 2023 at 11:17:30AM +0100, Daniel P. Berrangé wrote:

On Thu, Jun 01, 2023 at 11:09:45AM +0100, Joao Martins wrote:


On 31/05/2023 23:51, Suravee Suthikulpanit wrote:

Since KVM_MAX_VCPUS is currently defined to 1024 for x86 as shown in
arch/x86/include/asm/kvm_host.h, update QEMU limits to the same number.

In case KVM could not support the specified number of vcpus, QEMU would
return the following error message:

   qemu-system-x86_64: kvm_init_vcpu: kvm_get_vcpu failed (xxx): Invalid 
argument

Signed-off-by: Suravee Suthikulpanit
---

Changes from V1:
(https://lore.kernel.org/all/ynkdgsii1vfvx...@redhat.com/T/)
  * Bump from 512 to KVM_MAX_VCPUS (per Igor's suggestion)

Note:
  From the last discussion, Daniel mentioned that SMBIO 2.1 tables might
  cause overflow at approx 720 CPUs, and it might require using the
  SMBIO 3.0 entry point. Also, we might need to change the default for
  the x86 machine type to SMBIO 3.0. However, I do not know the status
  of this.


I suspect smbios 3.0 (64-bit entry point) is already supported.

With current qemu and all the smbios fixes in the last cycle, perhaps this is
mainly just setting smbios_entry_point_type to SMBIOS_ENTRY_POINT_TYPE_64 if
MachineState::smp::max_cpus is bigger than 720 (e.g. in pc_q35_init()?)
>> The need for the 64-bit entry point depends on a combination of RAM 

config

and CPU count. IMHO we need to unconditionally switch the latest machine
types to use the 64-bit entry point by default, rather than trying to infer
some special condition to dynamically change on the fly.

Yes, makes sense.


Thanks all for the feedback. So, IIUC, here is how the SMBIOS entry 
point types would be affected by the QEMU options:


- pc-q35-8.1 and later, default to SMBIOS EP type 64.
- pc-q35-8.0 and older, default to SMBIOS EP type 32.
- User can override the type w/ QEMU option "-M .., 
smbios-entry-point-type=[32|64]"


Please let me know if I am missing anything. If this is accurate, I'll 
send out v3 with this change.


Thanks,
Suravee

Re: [ANNOUNCE] KVM Microconference at LPC 2023

2023-06-01 Thread Mickaël Salaün

Hi,

What is the status of this microconference proposal? We'd be happy to 
talk about Heki [1] and potentially other hypervisor supports.

Regards,
 Mickaël

[1] https://lore.kernel.org/all/20230505152046.6575-1-...@digikod.net/

On 26/05/2023 18:09, Mickaël Salaün wrote:

See James Morris's proposal here:
https://lore.kernel.org/all/17f62cb1-a5de-2020-2041-359b8e96b...@linux.microsoft.com/

On 26/05/2023 04:36, James Morris wrote:
  > [Side topic]
  >
  > Would folks be interested in a Linux Plumbers Conference MC on this
  > topic generally, across different hypervisors, VMMs, and architectures?
  >
  > If so, please let me know who the key folk would be and we can try
writing
  > up an MC proposal.

The fine-grain memory management proposal from James Gowans looks
interesting, especially the "side-car" virtual machines:
https://lore.kernel.org/all/88db2d9cb42e471692ff1feb0b9ca855906a9d95.ca...@amazon.com/

On 09/05/2023 11:55, Paolo Bonzini wrote:

Hi all!

We are planning on submitting a CFP to host a KVM Microconference at
Linux Plumbers Conference 2023. To help justify the proposal, we would
like to gather a list of folks that would likely attend, and crowdsource
a list of topics to include in the proposal.

For both this year and future years, the intent is that a KVM
Microconference will complement KVM Forum, *NOT* supplant it. As you
probably noticed, KVM Forum is going through a somewhat radical change in
how it's organized; the conference is now free and (with some help from
Red Hat) organized directly by the KVM and QEMU communities. Despite the
unexpected changes and some teething pains, community response to KVM
Forum continues to be overwhelmingly positive! KVM Forum will remain
the venue of choice for KVM/userspace collaboration, for educational
content covering both KVM and userspace, and to discuss new features in
QEMU and other userspace projects.

At least on the x86 side, however, the success of KVM Forum led us
virtualization folks to operate in relative isolation. KVM depends on
and impacts multiple subsystems (MM, scheduler, perf) in profound ways,
and recently we’ve seen more and more ideas/features that require
non-trivial changes outside KVM and buy-in from stakeholders that
(typically) do not attend KVM Forum. Linux Plumbers Conference is a
natural place to establish such collaboration within the kernel.

Therefore, the aim of the KVM Microconference will be:
* to provide a setting in which to discuss KVM and kernel internals
* to increase collaboration and reduce friction with other subsystems
* to discuss system virtualization issues that require coordination with
other subsystems (such as VFIO, or guest support in arch/)

Below is a rough draft of the planned CFP submission.

Thanks!

Paolo Bonzini (KVM Maintainer)
Sean Christopherson (KVM x86 Co-Maintainer)
Marc Zyngier (KVM ARM Co-Maintainer)

===
KVM Microconference
===

KVM (Kernel-based Virtual Machine) enables the use of hardware features
to improve the efficiency, performance, and security of virtual machines
created and managed by userspace.  KVM was originally developed to host
and accelerate "full" virtual machines running a traditional kernel and
operating system, but has long since expanded to cover a wide array of use
cases, e.g. hosting real time workloads, sandboxing untrusted workloads,
deprivileging third party code, reducing the trusted computed base of
security sensitive workloads, etc.  As KVM's use cases have grown, so too
have the requirements placed on KVM and the interactions between it and
other kernel subsystems.

The KVM Microconference will focus on how to evolve KVM and adjacent
subsystems in order to satisfy new and upcoming requirements: serving
guest memory that cannot be accessed by host userspace[1], providing
accurate, feature-rich PMU/perf virtualization in cloud VMs[2], etc.

Potential Topics:
 - Serving inaccessible/unmappable memory for KVM guests (protected VMs)
 - Optimizing mmu_notifiers, e.g. reducing TLB flushes and spurious zapping
 - Supporting multiple KVM modules (for non-disruptive upgrades)
 - Improving and hardening KVM+perf interactions
 - Implementing arch-agnostic abstractions in KVM (e.g. MMU)
 - Defining KVM requirements for hardware vendors
 - Utilizing "fault" injection to increase test coverage of edge cases
 - KVM vs VFIO (e.g. memory types, a rather hot topic on the ARM side)

Key Attendees:
 - Paolo Bonzini  (KVM Maintainer)
 - Sean Christopherson   (KVM x86 Co-Maintainer)
 - Your name could be here!

[1] 
https://lore.kernel.org/all/20221202061347.1070246-1-chao.p.p...@linux.intel.com
[2] 
https://lore.kernel.org/all/CALMp9eRBOmwz=mspp0m5q093k3rmueasf3vel39mgv5br9w...@mail.gmail.com

Re: [PATCH v3 19/19] cutils: Improve qemu_strtosz handling of fractions

2023-06-01 Thread Eric Blake

On Mon, May 22, 2023 at 02:04:41PM -0500, Eric Blake wrote:
> We have several limitations and bugs worth fixing; they are
> inter-related enough that it is not worth splitting this patch into
> smaller pieces:
> 
> +++ b/util/cutils.c
> @@ -194,15 +194,18 @@ static int64_t suffix_mul(char suffix, int64_t unit)
>   * - 12345 - decimal, scale determined by @default_suffix and @unit
>   * - 12345{bBkKmMgGtTpPeE} - decimal, scale determined by suffix and @unit
>   * - 12345.678{kKmMgGtTpPeE} - decimal, scale determined by suffix, and
> - *   fractional portion is truncated to byte
> + *   fractional portion is truncated to byte, either side of . may be empty
>   * - 0x7fEE - hexadecimal, unit determined by @default_suffix
>   *
>   * The following are intentionally not supported
> - * - hex with scaling suffix, such as 0x20M
> - * - octal, such as 08
> - * - fractional hex, such as 0x1.8
> - * - negative values, including -0
> - * - floating point exponents, such as 1e3
> + * - hex with scaling suffix, such as 0x20M (0x1b is 27, not 1)
> + * - octal, such as 08 (parsed as decimal instead)
> + * - binary, such as 0b1000 (parsed as 0b with trailing garbage "1000")
> + * - fractional hex, such as 0x1.8 (parsed as 0 with trailing garbage "x1.8")
> + * - negative values, including -0 (fail with -ERANGE)
> + * - floating point exponents, such as 1e3 (parsed as 1e with trailing
> + *   garbage "3") or 0x1p3 (parsed as 1 with trailing garbage "p3")

This latter clause is wrong - we reject 0x1p3 earlier under the hex
with scaling suffix rule.  I've touched up the comment as part of
preparing the pull request.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PULL 0/8] Block patches

2023-06-01 Thread Richard Henderson


On 6/1/23 08:25, Stefan Hajnoczi wrote:

The following changes since commit c6a5fc2ac76c5ab709896ee1b0edd33685a67ed1:

   decodetree: Add --output-null for meson testing (2023-05-31 19:56:42 -0700)

are available in the Git repository at:

   https://gitlab.com/stefanha/qemu.git  tags/block-pull-request

for you to fetch changes up to 98b126f5e3228a346c774e569e26689943b401dd:

   qapi: add '@fdset' feature for BlockdevOptionsVirtioBlkVhostVdpa (2023-06-01 
11:08:21 -0400)


Pull request

- Stefano Garzarella's blkio block driver 'fd' parameter
- My thread-local blk_io_plug() series


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/8.1 as 
appropriate.


r~

Re: [PATCH v3 00/19] Fix qemu_strtosz() read-out-of-bounds

2023-06-01 Thread Eric Blake

On Mon, May 22, 2023 at 02:04:22PM -0500, Eric Blake wrote:
> v2 was here:
> https://lists.gnu.org/archive/html/qemu-devel/2023-05/msg02951.html
> 
> Since then:
>  - fix another qemu_strtoui bug
>  - address review comments from Hanna

This series has been reviewed; I fixed up the last few bits, and am
queueing it through my NBD tree (not really about NBD directly, but
tangentially we rely on size parsing in unit testing...), in order to
prepare a pull request today.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH] qcow2: Explicit mention of padding bytes

2023-06-01 Thread Eric Blake

On Mon, May 22, 2023 at 11:26:03PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> On 22.05.23 21:46, Eric Blake wrote:
> > Although we already covered the need for padding bytes with our
> > changes in commit 3ae3fcfa, commit 66fcbca5 (both v5.0.0) added one
> > byte and relied on the rest of the text for implicitly covering 7
> > padding bytes.  For consistency with other parts of the header (such
> > as the header extension format listing padding from n - m, or the
> > snapshot table entry listing variable padding), we might as well call
> > out the remaining 7 bytes as padding until such time (as any) as they
> > gain another meaning.
> > 
> > Signed-off-by: Eric Blake 
> > CC: Vladimir Sementsov-Ogievskiy 
> 
> Reviewed-by: Vladimir Sementsov-Ogievskiy 

Not strictly related to NBD, but I'll pick it up since I'm about to do a pull 
request.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH 1/6] qemu-img: rebase: stop when reaching EOF of old backing file

2023-06-01 Thread Michael Tokarev


01.06.2023 22:28, Andrey Drobyshev via пишет:

In case when we're rebasing within one backing chain, and when target image
is larger than old backing file, bdrv_is_allocated_above() ends up setting
*pnum = 0.  As a result, target offset isn't getting incremented, and we
get stuck in an infinite for loop.  Let's detect this case and proceed
further down the loop body, as the offsets beyond the old backing size need
to be explicitly zeroed.

Signed-off-by: Andrey Drobyshev 


It looks like you forgot the Reviewed-by: Denis V. Lunev here and
in the subsequent patch.

Should this be backported to -stable? Not that I've seen this issue,
it's a quite specific and somewhat rare case..

Thanks,

/mjt

Big TCG slowdown when using zstd with aarch64

2023-06-01 Thread Juan Quintela



Hi

Before I continue investigating this further, do you have any clue what
is going on here.  I am running qemu-system-aarch64 on x86_64.

$ time ./tests/qtest/migration-test -p /aarch64/migration/multifd/tcp/plain/none
TAP version 13
# random seed: R02S3d50a0e874b28727af4b862a3cc4214e
# Start of aarch64 tests
# Start of migration tests
# Start of multifd tests
# Start of tcp tests
# Start of plain tests
# starting QEMU: exec ./qemu-system-aarch64 -qtest unix:/tmp/qtest-2888203.sock 
-qtest-log /dev/null -chardev socket,path=/tmp/qtest-2888203.qmp,id=char0 -mon 
chardev=char0,mode=control -display none -net none -accel kvm -accel tcg 
-machine virt,gic-version=max -name source,debug-threads=on -m 150M -serial 
file:/tmp/migration-test-WT9151/src_serial -cpu max -kernel 
/tmp/migration-test-WT9151/bootsect -accel qtest
# starting QEMU: exec ./qemu-system-aarch64 -qtest unix:/tmp/qtest-2888203.sock 
-qtest-log /dev/null -chardev socket,path=/tmp/qtest-2888203.qmp,id=char0 -mon 
chardev=char0,mode=control -display none -net none -accel kvm -accel tcg 
-machine virt,gic-version=max -name target,debug-threads=on -m 150M -serial 
file:/tmp/migration-test-WT9151/dest_serial -incoming defer -cpu max -kernel 
/tmp/migration-test-WT9151/bootsect-accel qtest
ok 1 /aarch64/migration/multifd/tcp/plain/none
# End of plain tests
# End of tcp tests
# End of multifd tests
# End of migration tests
# End of aarch64 tests
1..1

real0m4.559s
user0m4.898s
sys 0m1.156s
$ time ./tests/qtest/migration-test -p /aarch64/migration/multifd/tcp/plain/zlib
TAP version 13
# random seed: R02S014dd197350726bdd95aea37b81d3898
# Start of aarch64 tests
# Start of migration tests
# Start of multifd tests
# Start of tcp tests
# Start of plain tests
# starting QEMU: exec ./qemu-system-aarch64 -qtest unix:/tmp/qtest-2888278.sock 
-qtest-log /dev/null -chardev socket,path=/tmp/qtest-2888278.qmp,id=char0 -mon 
chardev=char0,mode=control -display none -net none -accel kvm -accel tcg 
-machine virt,gic-version=max -name source,debug-threads=on -m 150M -serial 
file:/tmp/migration-test-25U151/src_serial -cpu max -kernel 
/tmp/migration-test-25U151/bootsect -accel qtest
# starting QEMU: exec ./qemu-system-aarch64 -qtest unix:/tmp/qtest-2888278.sock 
-qtest-log /dev/null -chardev socket,path=/tmp/qtest-2888278.qmp,id=char0 -mon 
chardev=char0,mode=control -display none -net none -accel kvm -accel tcg 
-machine virt,gic-version=max -name target,debug-threads=on -m 150M -serial 
file:/tmp/migration-test-25U151/dest_serial -incoming defer -cpu max -kernel 
/tmp/migration-test-25U151/bootsect-accel qtest
ok 1 /aarch64/migration/multifd/tcp/plain/zlib
# End of plain tests
# End of tcp tests
# End of multifd tests
# End of migration tests
# End of aarch64 tests
1..1

real0m1.645s
user0m3.484s
sys 0m0.512s
$ time ./tests/qtest/migration-test -p /aarch64/migration/multifd/tcp/plain/zstd
TAP version 13
# random seed: R02Se49afe2ea9d2b76a1eda1fa2bc8d812c
# Start of aarch64 tests
# Start of migration tests
# Start of multifd tests
# Start of tcp tests
# Start of plain tests
# starting QEMU: exec ./qemu-system-aarch64 -qtest unix:/tmp/qtest-2888353.sock 
-qtest-log /dev/null -chardev socket,path=/tmp/qtest-2888353.qmp,id=char0 -mon 
chardev=char0,mode=control -display none -net none -accel kvm -accel tcg 
-machine virt,gic-version=max -name source,debug-threads=on -m 150M -serial 
file:/tmp/migration-test-UILY51/src_serial -cpu max -kernel 
/tmp/migration-test-UILY51/bootsect -accel qtest
# starting QEMU: exec ./qemu-system-aarch64 -qtest unix:/tmp/qtest-2888353.sock 
-qtest-log /dev/null -chardev socket,path=/tmp/qtest-2888353.qmp,id=char0 -mon 
chardev=char0,mode=control -display none -net none -accel kvm -accel tcg 
-machine virt,gic-version=max -name target,debug-threads=on -m 150M -serial 
file:/tmp/migration-test-UILY51/dest_serial -incoming defer -cpu max -kernel 
/tmp/migration-test-UILY51/bootsect-accel qtest



ok 1 /aarch64/migration/multifd/tcp/plain/zstd
# End of plain tests
# End of tcp tests
# End of multifd tests
# End of migration tests
# End of aarch64 tests
1..1

real0m48.022s
user8m17.306s
sys 0m35.217s


This test is very amenable to compression, basically we only modify one
byte for each page, and basically all the pages are the same.

no compression: 4.5 seconds
zlib compression: 1.6 seconds (inside what I would expect)
zstd compression: 48 seconds, what is going on here?

As a comparison, this are the times for x86_64 running natively, values
much more reasonable.

$ time ./tests/qtest/migration-test -p /x86_64/migration/multifd/tcp/plain/none
TAP version 13
# random seed: R02S579fbe8739386c3a3336486f2adbfecd
# Start of x86_64 tests
# Start of migration tests
# Start of multifd tests
# Start of tcp tests
# Start of plain tests
# starting QEMU: exec ./qemu-system-x86_64 -qtest unix:/tmp/qtest-3002254.sock 
-qtest-log /dev/null -chardev socket,path=/tmp/qtest-3002254.qmp,id=char0

Re: [PATCH v2 0/2] qemu-img: fix getting stuck in infinite loop on in-chain rebase

2023-06-01 Thread Andrey Drobyshev

On 5/25/23 21:02, Andrey Drobyshev wrote:
> v1 -> v2:
> 
>   * Avoid breaking the loop just yet, as the offsets beyond the old
> backing size need to be explicitly zeroed;
>   * Amend the commit message accordingly;
>   * Alter the added test case to take the last zeroed cluster into
> consideration.
> 
> v1: https://lists.nongnu.org/archive/html/qemu-block/2023-05/msg00674.html
> 
> Andrey Drobyshev (2):
>   qemu-img: rebase: stop when reaching EOF of old backing file
>   qemu-iotests: 024: add rebasing test case for overlay_size >
> backing_size
> 
>  qemu-img.c | 13 -
>  tests/qemu-iotests/024 | 57 ++
>  tests/qemu-iotests/024.out | 30 
>  3 files changed, 99 insertions(+), 1 deletion(-)
> 

Since there're no comments so far, I've included this same bugfix into
the bigger series regarding "qemu-img rebase".  Please refer to
https://lists.nongnu.org/archive/html/qemu-block/2023-06/msg00068.html.

Re: [PATCH v2] target/ppc: Fix nested-hv HEAI delivery

2023-06-01 Thread Daniel Henrique Barboza


Queued in gitlab.com/danielhb/qemu/tree/ppc-next. Thanks,


Daniel

On 5/30/23 10:21, Nicholas Piggin wrote:

ppc hypervisors turn HEAI interrupts into program interrupts injected
into the guest that executed the illegal instruction, if the hypervisor
doesn't handle it some other way.

The nested-hv implementation failed to account for this HEAI->program
conversion. The virtual hypervisor wants to see the HEAI when running
a nested guest, so that interrupt type can be returned to its KVM
caller.

Fixes: 7cebc5db2eba6 ("target/ppc: Introduce a vhyp framework for nested HV 
support")
Cc: bala...@eik.bme.hu
Reviewed-by: Fabiano Rosas 
Signed-off-by: Nicholas Piggin 
---
Since v1:
- Address review comments style and typo fixes

  target/ppc/excp_helper.c | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index d69bd0033a..0f7ed58673 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -1387,9 +1387,12 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp)
  
  /*

   * We don't want to generate a Hypervisor Emulation Assistance
- * Interrupt if we don't have HVB in msr_mask (PAPR mode).
+ * Interrupt if we don't have HVB in msr_mask (PAPR mode),
+ * unless running a nested-hv guest, in which case the L1
+ * kernel wants the interrupt.
   */
-if (excp == POWERPC_EXCP_HV_EMU && !(env->msr_mask & MSR_HVB)) {
+if (excp == POWERPC_EXCP_HV_EMU && !(env->msr_mask & MSR_HVB) &&
+!books_vhyp_handles_hv_excp(cpu)) {
  excp = POWERPC_EXCP_PROGRAM;
  }

Re: [PATCH v3 1/3] hw/i2c: add smbus pec utility function

2023-06-01 Thread Philippe Mathieu-Daudé


On 31/5/23 13:47, Klaus Jensen wrote:

From: Klaus Jensen 

Add i2c_smbus_pec() to calculate the SMBus Packet Error Code for a
message.

Signed-off-by: Klaus Jensen 
---
  hw/i2c/smbus_master.c | 28 
  include/hw/i2c/smbus_master.h |  2 ++
  2 files changed, 30 insertions(+)

diff --git a/hw/i2c/smbus_master.c b/hw/i2c/smbus_master.c
index 6a53c34e70b7..47f9eb24e033 100644
--- a/hw/i2c/smbus_master.c
+++ b/hw/i2c/smbus_master.c
@@ -15,6 +15,34 @@
  #include "hw/i2c/i2c.h"
  #include "hw/i2c/smbus_master.h"
  
+static uint8_t crc8(uint16_t data)

+{
+#define POLY (0x1070U << 3)


static const unsigned crc8_poly = ..., but why not inline the single use?
  data ^= 0x1070U << 3;
and
  data <<= 1;


+int i;
+
+for (i = 0; i < 8; i++) {
+if (data & 0x8000) {
+data = data ^ POLY;
+}
+
+data = data << 1;
+}
+
+return (uint8_t)(data >> 8);
+#undef POLY
+}


We have "qemu/crc32c.h", maybe we could have a similar crc8.h. Just 
wondering...

Re: [PATCH v4] target/ppc: Fix PMU hflags calculation

2023-06-01 Thread Daniel Henrique Barboza


Reviewed-by: Daniel Henrique Barboza 


and queued. Thanks,


Daniel

On 5/30/23 10:04, Nicholas Piggin wrote:

Some of the PMU hflags bits can go out of synch, for example a store to
MMCR0 with PMCjCE=1 fails to update hflags correctly and results in
hflags mismatch:

   qemu: fatal: TCG hflags mismatch (current:0x2408003d rebuilt:0x240a003d)

This can be reproduced by running perf on a recent machine.

Some of the fragility here is the duplication of PMU hflags calculations.
This change consolidates that in a single place to update pmu-related
hflags, to be called after a well defined state changes.

The post-load PMU update is pulled out of the MSR update because it does
not depend on the MSR value.

Fixes: 8b3d1c49a9f0 ("target/ppc: Add new PMC HFLAGS")
Signed-off-by: Nicholas Piggin 
---
This is a significant rework from v3, which missed a couple of hflags.
I think it's more robust.

Question came up whether we should rearm overflow timers in something
like cpu post load, but that's for a later patch.

This is probably a stable candidate but I will wait for upstream
before ccing.

Thanks,
Nick
---

  target/ppc/cpu_init.c|  2 +-
  target/ppc/helper_regs.c | 73 ++--
  target/ppc/helper_regs.h |  1 +
  target/ppc/machine.c |  8 ++---
  target/ppc/power8-pmu.c  | 38 -
  target/ppc/power8-pmu.h  |  4 +--
  6 files changed, 85 insertions(+), 41 deletions(-)

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 05bf73296b..398f2d9966 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7083,7 +7083,7 @@ static void ppc_cpu_reset_hold(Object *obj)
  if (env->mmu_model != POWERPC_MMU_REAL) {
  ppc_tlb_invalidate_all(env);
  }
-pmu_update_summaries(env);
+pmu_mmcr01_updated(env);
  }
  
  /* clean any pending stop state */

diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
index fb351c303f..bc7e9d7eda 100644
--- a/target/ppc/helper_regs.c
+++ b/target/ppc/helper_regs.c
@@ -47,6 +47,48 @@ void hreg_swap_gpr_tgpr(CPUPPCState *env)
  env->tgpr[3] = tmp;
  }
  
+static uint32_t hreg_compute_pmu_hflags_value(CPUPPCState *env)

+{
+uint32_t hflags = 0;
+
+#if defined(TARGET_PPC64)
+if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC0) {
+hflags |= 1 << HFLAGS_PMCC0;
+}
+if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC1) {
+hflags |= 1 << HFLAGS_PMCC1;
+}
+if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCjCE) {
+hflags |= 1 << HFLAGS_PMCJCE;
+}
+
+#ifndef CONFIG_USER_ONLY
+if (env->pmc_ins_cnt) {
+hflags |= 1 << HFLAGS_INSN_CNT;
+}
+if (env->pmc_ins_cnt & 0x1e) {
+hflags |= 1 << HFLAGS_PMC_OTHER;
+}
+#endif
+#endif
+
+return hflags;
+}
+
+/* Mask of all PMU hflags */
+static uint32_t hreg_compute_pmu_hflags_mask(CPUPPCState *env)
+{
+uint32_t hflags_mask = 0;
+#if defined(TARGET_PPC64)
+hflags_mask |= 1 << HFLAGS_PMCC0;
+hflags_mask |= 1 << HFLAGS_PMCC1;
+hflags_mask |= 1 << HFLAGS_PMCJCE;
+hflags_mask |= 1 << HFLAGS_INSN_CNT;
+hflags_mask |= 1 << HFLAGS_PMC_OTHER;
+#endif
+return hflags_mask;
+}
+
  static uint32_t hreg_compute_hflags_value(CPUPPCState *env)
  {
  target_ulong msr = env->msr;
@@ -104,30 +146,12 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState 
*env)
  if (env->spr[SPR_LPCR] & LPCR_HR) {
  hflags |= 1 << HFLAGS_HR;
  }
-if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC0) {
-hflags |= 1 << HFLAGS_PMCC0;
-}
-if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC1) {
-hflags |= 1 << HFLAGS_PMCC1;
-}
-if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCjCE) {
-hflags |= 1 << HFLAGS_PMCJCE;
-}
  
  #ifndef CONFIG_USER_ONLY

  if (!env->has_hv_mode || (msr & (1ull << MSR_HV))) {
  hflags |= 1 << HFLAGS_HV;
  }
  
-#if defined(TARGET_PPC64)

-if (env->pmc_ins_cnt) {
-hflags |= 1 << HFLAGS_INSN_CNT;
-}
-if (env->pmc_ins_cnt & 0x1e) {
-hflags |= 1 << HFLAGS_PMC_OTHER;
-}
-#endif
-
  /*
   * This is our encoding for server processors. The architecture
   * specifies that there is no such thing as userspace with
@@ -172,6 +196,8 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState *env)
  hflags |= dmmu_idx << HFLAGS_DMMU_IDX;
  #endif
  
+hflags |= hreg_compute_pmu_hflags_value(env);

+
  return hflags | (msr & msr_mask);
  }
  
@@ -180,6 +206,17 @@ void hreg_compute_hflags(CPUPPCState *env)

  env->hflags = hreg_compute_hflags_value(env);
  }
  
+/*

+ * This can be used as a lighter-weight alternative to hreg_compute_hflags
+ * when PMU MMCR0 or pmc_ins_cnt changes. pmc_ins_cnt is changed by
+ * pmu_update_summaries.
+ */
+void hreg_update_pmu_hflags(CPUPPCState *env)
+{
+env->hflags &= ~hreg_compute_pmu_hflags_mask(env);
+env->hflags |= hreg_compute_pmu_hflags_value(env);
+}
+
  #ifdef CONFIG_DEBUG_TCG
  void

Re: [PATCH] pnv/xive2: Quiet down some error messages

2023-06-01 Thread Daniel Henrique Barboza


Queued in gitlab.com/danielhb/qemu/tree/ppc-next. Thanks,


Daniel

On 5/31/23 12:05, Frederic Barrat wrote:

When dumping the END and NVP tables ("info pic" from the HMP) on the
P10 model, we're likely to be flooded with error messages such as:

   XIVE[0] - VST: invalid NVPT entry f33800 !?

The error is printed when finding an empty VSD in an indirect
table (thus END and NVP tables with skiboot), which is going to happen
when dumping the xive state. So let's tune down those messages. They
can be re-enabled easily with a macro if needed.

Those errors were already hidden on xive/P9, for the same reason.

Signed-off-by: Frederic Barrat 
---
  hw/intc/pnv_xive2.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/hw/intc/pnv_xive2.c b/hw/intc/pnv_xive2.c
index c80316657a..397679390c 100644
--- a/hw/intc/pnv_xive2.c
+++ b/hw/intc/pnv_xive2.c
@@ -163,7 +163,9 @@ static uint64_t pnv_xive2_vst_addr_indirect(PnvXive2 *xive, 
uint32_t type,
  ldq_be_dma(_space_memory, vsd_addr, , MEMTXATTRS_UNSPECIFIED);
  
  if (!(vsd & VSD_ADDRESS_MASK)) {

+#ifdef XIVE2_DEBUG
  xive2_error(xive, "VST: invalid %s entry %x !?", info->name, idx);
+#endif
  return 0;
  }
  
@@ -185,7 +187,9 @@ static uint64_t pnv_xive2_vst_addr_indirect(PnvXive2 *xive, uint32_t type,

 MEMTXATTRS_UNSPECIFIED);
  
  if (!(vsd & VSD_ADDRESS_MASK)) {

+#ifdef XIVE2_DEBUG
  xive2_error(xive, "VST: invalid %s entry %x !?", info->name, idx);
+#endif
  return 0;
  }

Re: [PATCH v2 0/5] Various xive fixes

2023-06-01 Thread Daniel Henrique Barboza


Queued in gitlab.com/danielhb/qemu/tree/ppc-next. Thanks,


Daniel

On 6/1/23 09:13, Frederic Barrat wrote:

A set of small fixes for the interrupt controller (xive2) on P10.

Change log:
v2:
   split last patch to do a bit of cleanup first
   add Cedric's reviewed-by on the first 3 patches

Frederic Barrat (5):
   pnv/xive2: Add definition for TCTXT Config register
   pnv/xive2: Add definition for the ESB cache configuration register
   pnv/xive2: Allow writes to the Physical Thread Enable registers
   pnv/xive2: Introduce macros to manipulate TIMA addresses
   pnv/xive2: Handle TIMA access through all ports

  hw/intc/pnv_xive2.c| 20 +++-
  hw/intc/pnv_xive2_regs.h   |  8 
  hw/intc/xive.c | 16 
  include/hw/ppc/xive_regs.h | 16 
  4 files changed, 51 insertions(+), 9 deletions(-)

Re: [PATCH v3 32/48] tcg: Spit out exec/translation-block.h

2023-06-01 Thread Philippe Mathieu-Daudé


On 31/5/23 06:03, Richard Henderson wrote:

This is all that is required by tcg/ from exec-all.h.

Signed-off-by: Richard Henderson 
---
  include/exec/exec-all.h  | 135 +--
  include/exec/translation-block.h | 152 +++
  tcg/tcg-op-ldst.c|   2 +-
  3 files changed, 154 insertions(+), 135 deletions(-)
  create mode 100644 include/exec/translation-block.h


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v5 8/9] vfio/migration: Add x-allow-pre-copy VFIO device property

2023-06-01 Thread Alex Williamson

On Tue, 30 May 2023 17:48:20 +0300
Avihai Horon  wrote:

> Add a new VFIO device property x-allow-pre-copy to keep migration
> compatibility to/from older QEMU versions that don't have VFIO pre-copy
> support.

This doesn't make sense to me, vfio migration is not currently
supported, it can only be enabled via an experimental flag.  AFAIK we
have no obligation to maintain migration compatibility against
experimental features.  Is there any other reason we need a flag to
disable pre-copy?

OTOH, should this series finally remove the experimental migration
flag?  Do we require Joao's vIOMMU support to finally make it
supportable?  Is there something else?  Thanks,

Alex

> Signed-off-by: Avihai Horon 
> Reviewed-by: Cédric Le Goater 
> ---
>  include/hw/vfio/vfio-common.h | 1 +
>  hw/core/machine.c | 1 +
>  hw/vfio/migration.c   | 3 ++-
>  hw/vfio/pci.c | 2 ++
>  4 files changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 1db901c194..a53ecbe2e0 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -146,6 +146,7 @@ typedef struct VFIODevice {
>  VFIOMigration *migration;
>  Error *migration_blocker;
>  OnOffAuto pre_copy_dirty_page_tracking;
> +bool allow_pre_copy;
>  bool dirty_pages_supported;
>  bool dirty_tracking;
>  } VFIODevice;
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 1000406211..64ac3fe38e 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -41,6 +41,7 @@
>  
>  GlobalProperty hw_compat_8_0[] = {
>  { "migration", "multifd-flush-after-each-section", "on"},
> +{ "vfio-pci", "x-allow-pre-copy", "false" },
>  };
>  const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
>  
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index d8f6a22ae1..cb6923ed3f 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -323,7 +323,8 @@ static bool vfio_precopy_supported(VFIODevice *vbasedev)
>  {
>  VFIOMigration *migration = vbasedev->migration;
>  
> -return migration->mig_flags & VFIO_MIGRATION_PRE_COPY;
> +return vbasedev->allow_pre_copy &&
> +   migration->mig_flags & VFIO_MIGRATION_PRE_COPY;
>  }
>  
>  /* -- */
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 73874a94de..c69813af7f 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3335,6 +3335,8 @@ static Property vfio_pci_dev_properties[] = {
>  DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice,
>  vbasedev.pre_copy_dirty_page_tracking,
>  ON_OFF_AUTO_ON),
> +DEFINE_PROP_BOOL("x-allow-pre-copy", VFIOPCIDevice,
> + vbasedev.allow_pre_copy, true),
>  DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice,
>  display, ON_OFF_AUTO_OFF),
>  DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0),

Re: [PATCH v3 28/48] tcg: Split tcg/tcg-op-gvec.h

2023-06-01 Thread Philippe Mathieu-Daudé


On 31/5/23 06:03, Richard Henderson wrote:

Create tcg/tcg-op-gvec-common.h, moving everything that does not
concern TARGET_LONG_BITS.  Adjust tcg-op-gvec.c to use the new header.

Signed-off-by: Richard Henderson 
---
  include/tcg/tcg-op-gvec-common.h | 426 +
  include/tcg/tcg-op-gvec.h| 444 +--
  tcg/tcg-op-gvec.c|   2 +-
  3 files changed, 437 insertions(+), 435 deletions(-)
  create mode 100644 include/tcg/tcg-op-gvec-common.h


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v3 21/48] tcg: Move temp_idx and tcgv_i32_temp debug out of line

2023-06-01 Thread Philippe Mathieu-Daudé


On 31/5/23 06:03, Richard Henderson wrote:

Removes a multiplicty of calls to __assert_fail, saving up


"Multiplicity"


to 360kiB of .text space as measured on an x86_64 host.

Old New Less%Change
9257272 680 368592  3.98%   qemu-system-aarch64
6100968 5911832 189136  3.10%   qemu-system-riscv64
5839112 5707032 132080  2.26%   qemu-system-mips
4447608 4341752 105856  2.38%   qemu-system-s390x

Signed-off-by: Richard Henderson 
---
  include/tcg/tcg.h | 30 --
  tcg/tcg.c | 19 +++
  2 files changed, 35 insertions(+), 14 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v3 18/48] tcg: Remove outdated comments in helper-head.h

2023-06-01 Thread Philippe Mathieu-Daudé


On 31/5/23 06:03, Richard Henderson wrote:

Signed-off-by: Richard Henderson 
---
  include/exec/helper-head.h | 18 +++---
  1 file changed, 3 insertions(+), 15 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH v3 12/48] target/arm: Fix test of TCG_OVERSIZED_GUEST

2023-06-01 Thread Philippe Mathieu-Daudé


On 31/5/23 06:02, Richard Henderson wrote:

The symbol is always defined, even if to 0.
We wanted to test for TCG_OVERSIZED_GUEST == 0.


Fixes: 71943a1e90 ("target/arm: Implement FEAT_HAFDBS, access flag portion")


Signed-off-by: Richard Henderson 
---
  target/arm/ptw.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/target/arm/ptw.c b/target/arm/ptw.c
index 69c05cd9da..b0d2a05403 100644
--- a/target/arm/ptw.c
+++ b/target/arm/ptw.c
@@ -418,6 +418,7 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t 
old_val,
   uint64_t new_val, S1Translate *ptw,
   ARMMMUFaultInfo *fi)
  {
+#ifdef TARGET_AARCH64


This change ^ ...


  uint64_t cur_val;
  void *host = ptw->out_host;
  
@@ -473,7 +474,7 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t old_val,

   * we know that TCG_OVERSIZED_GUEST is set, which means that we are
   * running in round-robin mode and could only race with dma i/o.
   */
-#ifndef TCG_OVERSIZED_GUEST
+#if !TCG_OVERSIZED_GUEST
  # error "Unexpected configuration"
  #endif
  bool locked = qemu_mutex_iothread_locked();
@@ -497,6 +498,10 @@ static uint64_t arm_casq_ptw(CPUARMState *env, uint64_t 
old_val,
  #endif
  
  return cur_val;

+#else
+/* AArch32 does not have FEAT_HADFS. */
+g_assert_not_reached();


... isn't documented. Do you mind adding a quick line about it?

Reviewed-by: Philippe Mathieu-Daudé 


+#endif
  }
  
  static bool get_level1_table_address(CPUARMState *env, ARMMMUIdx mmu_idx,

Re: [PATCH v3 0/3] hw/{i2c, nvme}: mctp endpoint, nvme management interface model

2023-06-01 Thread Corey Minyard

On Wed, May 31, 2023 at 01:47:41PM +0200, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> This adds a generic MCTP endpoint model that other devices may derive
> from. I'm not 100% happy with the design of the class methods, but it's
> a start.
> 
> Also included is a very basic implementation of an NVMe-MI device,
> supporting only a small subset of the required commands. Lior (CC'ed) has some
> patches coming up that adds futher support.
> 
> Since this all relies on i2c target mode, this can currently only be
> used with an SoC that includes the Aspeed I2C controller.
> 
> The easiest way to get up and running with this, is to grab my buildroot
> overlay[1]. It includes modified a modified dts as well as a couple of
> required packages.
> 
> QEMU can then be launched along these lines:
> 
>   qemu-system-arm \
> -nographic \
> -M ast2600-evb \
> -kernel output/images/zImage \
> -initrd output/images/rootfs.cpio \
> -dtb output/images/aspeed-ast2600-evb-nmi.dtb \
> -nic user,hostfwd=tcp::-:22 \
> -device nmi-i2c,address=0x3a \
> -serial mon:stdio
> 
> From within the booted system,
> 
>   mctp addr add 8 dev mctpi2c15
>   mctp link set mctpi2c15 up
>   mctp route add 9 via mctpi2c15
>   mctp neigh add 9 dev mctpi2c15 lladdr 0x3a
>   mi-mctp 1 9 info
> 
> Comments are very welcome!
> 
>   [1]: https://github.com/birkelund/buildroots/tree/main/mctp-i2c
> 
> Changes since v2
> 
> 
>   - Applied a bunch of feedback from Jonathan:
> + Moved a lot of internally used structs out of the include headers
>   and into the source files.
> + Added spec references in various places
> + Split the patch for i2c_smbus_pec() into its own
> + Fix a compile error (and bug) in nmi-i2c.c.
> 
>   - From Corey:
> + Reworked the buffer handling. The deriving devices now returns a
>   pointer to their own buffer that the mctp core copies into.

You didn't do what I asked here, I guess I wasn't clear.  You have:

+static void i2c_mctp_handle_control_set_eid(MCTPI2CEndpoint *mctp, uint8_t eid)
+{
+mctp->my_eid = eid;
+
+uint8_t buf[] = {
+0x0, 0x0, eid, 0x0,
+};
+
+memcpy(i2c_mctp_control_data(mctp->buffer), buf, sizeof(buf));
+mctp->len += sizeof(buf);
+}

That style of programming can lead to buffer overruns as code changes,
as you aren't checking the length of the target buffer.  I don't think
there are any issues now, but as people change the code you might end up
with one if someone gets a length wrong.

What I would like is for you to create a function like:

  i2c_mctp_add_bytes(mctp, buf, len)

that checks that len bytes will fit, then does the addition of the
bytes.  You need to adjust this to fit how you are doing things, and you
probably want one that adds just one byte, but hopefully you get the idea.

I'm sorry to be picky, but I've seen and fixed too many buffer overruns
(including one in the qemu i2c code) in situations like this.  Corey's
rule is: Never add anything to a buffer without checking the length.

Everything else looks good.

-corey

> + Added a couple of extra debugging trace events.
> 
> Changes since v1
> 
> 
>   - Fix SPDX-License tag for hw/nvme/nmi-i2c.c (Philippe)
>   - Add some asserts to verify buffer indices (by request from Corey).
>   - Drop short packets that could result in underflow (Corey)
>   - Move i2c_smbus_pec() to smbus common code (Corey)
>   - A couple of logic fixes (patch from Jeremy squashed in)
>   - Added a patch to handle messages with dest eid 0 (Matt)
> Maybe squash this as well.
> 
> Klaus Jensen (3):
>   hw/i2c: add smbus pec utility function
>   hw/i2c: add mctp core
>   hw/nvme: add nvme management interface model
> 
>  MAINTAINERS   |   7 +
>  hw/arm/Kconfig|   1 +
>  hw/i2c/Kconfig|   4 +
>  hw/i2c/mctp.c | 398 ++
>  hw/i2c/meson.build|   1 +
>  hw/i2c/smbus_master.c |  28 +++
>  hw/i2c/trace-events   |  13 ++
>  hw/nvme/meson.build   |   1 +
>  hw/nvme/nmi-i2c.c | 367 +++
>  hw/nvme/trace-events  |   6 +
>  include/hw/i2c/mctp.h | 137 
>  include/hw/i2c/smbus_master.h |   2 +
>  include/net/mctp.h|  28 +++
>  13 files changed, 993 insertions(+)
>  create mode 100644 hw/i2c/mctp.c
>  create mode 100644 hw/nvme/nmi-i2c.c
>  create mode 100644 include/hw/i2c/mctp.h
>  create mode 100644 include/net/mctp.h
> 
> -- 
> 2.40.0
> 
>

[PATCH 0/6] qemu-img: rebase: add compression support

2023-06-01 Thread Andrey Drobyshev via

This series is adding [-c | --compress] option to "qemu-img rebase"
command, which might prove useful for saving some disk space when, for
instance, manipulating chains of backup images.  Along the way I had to
make a couple of minor improvements.

The first 2 patches are a bug fix + corresponding test case.
Patch 3 merely fixes wrong args used in allocation.
Patch 4 makes write requests during rebase operation cluster_size-aligned,
which seems to be beneficial for both non-compressed and compressed mode.
The last 2 patches are the actual feature implementation + tests.

Andrey Drobyshev (6):
  qemu-img: rebase: stop when reaching EOF of old backing file
  qemu-iotests: 024: add rebasing test case for overlay_size >
backing_size
  qemu-img: rebase: use backing files' BlockBackend for buffer alignment
  qemu-img: rebase: avoid unnecessary COW operations
  qemu-img: add compression option to rebase subcommand
  iotests: add test 314 for "qemu-img rebase" with compression

 docs/tools/qemu-img.rst|   6 +-
 qemu-img-cmds.hx   |   4 +-
 qemu-img.c | 106 ++--
 tests/qemu-iotests/024 |  57 +
 tests/qemu-iotests/024.out |  30 +++
 tests/qemu-iotests/314 | 165 +
 tests/qemu-iotests/314.out |  75 +
 7 files changed, 415 insertions(+), 28 deletions(-)
 create mode 100755 tests/qemu-iotests/314
 create mode 100644 tests/qemu-iotests/314.out

-- 
2.31.1

[PATCH 1/6] qemu-img: rebase: stop when reaching EOF of old backing file

2023-06-01 Thread Andrey Drobyshev via

In case when we're rebasing within one backing chain, and when target image
is larger than old backing file, bdrv_is_allocated_above() ends up setting
*pnum = 0.  As a result, target offset isn't getting incremented, and we
get stuck in an infinite for loop.  Let's detect this case and proceed
further down the loop body, as the offsets beyond the old backing size need
to be explicitly zeroed.

Signed-off-by: Andrey Drobyshev 
---
 qemu-img.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/qemu-img.c b/qemu-img.c
index 27f48051b0..78433f3746 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3801,6 +3801,8 @@ static int img_rebase(int argc, char **argv)
 }
 
 if (prefix_chain_bs) {
+uint64_t bytes = n;
+
 /*
  * If cluster wasn't changed since prefix_chain, we don't need
  * to take action
@@ -3813,9 +3815,18 @@ static int img_rebase(int argc, char **argv)
  strerror(-ret));
 goto out;
 }
-if (!ret) {
+if (!ret && n) {
 continue;
 }
+if (!n) {
+/*
+ * If we've reached EOF of the old backing, it means that
+ * offsets beyond the old backing size were read as zeroes.
+ * Now we will need to explicitly zero the cluster in
+ * order to preserve that state after the rebase.
+ */
+n = bytes;
+}
 }
 
 /*
-- 
2.31.1

[PATCH 5/6] qemu-img: add compression option to rebase subcommand

2023-06-01 Thread Andrey Drobyshev via

If we rebase an image whose backing file has compressed clusters, we
might end up wasting disk space since the copied clusters are now
uncompressed.  In order to have better control over this, let's add
"--compress" option to the "qemu-img rebase" command.

Note that this option affects only the clusters which are actually being
copied from the original backing file.  The clusters which were
uncompressed in the target image will remain so.

Signed-off-by: Andrey Drobyshev 
---
 docs/tools/qemu-img.rst |  6 --
 qemu-img-cmds.hx|  4 ++--
 qemu-img.c  | 19 +--
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/docs/tools/qemu-img.rst b/docs/tools/qemu-img.rst
index 15aeddc6d8..973a912dec 100644
--- a/docs/tools/qemu-img.rst
+++ b/docs/tools/qemu-img.rst
@@ -663,7 +663,7 @@ Command description:
 
   List, apply, create or delete snapshots in image *FILENAME*.
 
-.. option:: rebase [--object OBJECTDEF] [--image-opts] [-U] [-q] [-f FMT] [-t 
CACHE] [-T SRC_CACHE] [-p] [-u] -b BACKING_FILE [-F BACKING_FMT] FILENAME
+.. option:: rebase [--object OBJECTDEF] [--image-opts] [-U] [-q] [-f FMT] [-t 
CACHE] [-T SRC_CACHE] [-p] [-u] [-c] -b BACKING_FILE [-F BACKING_FMT] FILENAME
 
   Changes the backing file of an image. Only the formats ``qcow2`` and
   ``qed`` support changing the backing file.
@@ -690,7 +690,9 @@ Command description:
 
 In order to achieve this, any clusters that differ between
 *BACKING_FILE* and the old backing file of *FILENAME* are merged
-into *FILENAME* before actually changing the backing file.
+into *FILENAME* before actually changing the backing file. With ``-c``
+option specified, the clusters which are being merged (but not the
+entire *FILENAME* image) are written in the compressed mode.
 
 Note that the safe mode is an expensive operation, comparable to
 converting an image. It only works if the old backing file still
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 1b1dab5b17..068692d13e 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -88,9 +88,9 @@ SRST
 ERST
 
 DEF("rebase", img_rebase,
-"rebase [--object objectdef] [--image-opts] [-U] [-q] [-f fmt] [-t cache] 
[-T src_cache] [-p] [-u] -b backing_file [-F backing_fmt] filename")
+"rebase [--object objectdef] [--image-opts] [-U] [-q] [-f fmt] [-t cache] 
[-T src_cache] [-p] [-u] [-c] -b backing_file [-F backing_fmt] filename")
 SRST
-.. option:: rebase [--object OBJECTDEF] [--image-opts] [-U] [-q] [-f FMT] [-t 
CACHE] [-T SRC_CACHE] [-p] [-u] -b BACKING_FILE [-F BACKING_FMT] FILENAME
+.. option:: rebase [--object OBJECTDEF] [--image-opts] [-U] [-q] [-f FMT] [-t 
CACHE] [-T SRC_CACHE] [-p] [-u] [-c] -b BACKING_FILE [-F BACKING_FMT] FILENAME
 ERST
 
 DEF("resize", img_resize,
diff --git a/qemu-img.c b/qemu-img.c
index 9a469cd609..108da27b23 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3517,11 +3517,13 @@ static int img_rebase(int argc, char **argv)
 char *filename;
 const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
 int c, flags, src_flags, ret;
+BdrvRequestFlags write_flags = 0;
 bool writethrough, src_writethrough;
 int unsafe = 0;
 bool force_share = false;
 int progress = 0;
 bool quiet = false;
+bool compress = false;
 Error *local_err = NULL;
 bool image_opts = false;
 
@@ -3537,9 +3539,10 @@ static int img_rebase(int argc, char **argv)
 {"object", required_argument, 0, OPTION_OBJECT},
 {"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
 {"force-share", no_argument, 0, 'U'},
+{"compress", no_argument, 0, 'c'},
 {0, 0, 0, 0}
 };
-c = getopt_long(argc, argv, ":hf:F:b:upt:T:qU",
+c = getopt_long(argc, argv, ":hf:F:b:upt:T:qUc",
 long_options, NULL);
 if (c == -1) {
 break;
@@ -3587,6 +3590,9 @@ static int img_rebase(int argc, char **argv)
 case 'U':
 force_share = true;
 break;
+case 'c':
+compress = true;
+break;
 }
 }
 
@@ -3639,6 +3645,14 @@ static int img_rebase(int argc, char **argv)
 
 unfiltered_bs = bdrv_skip_filters(bs);
 
+if (compress && !block_driver_can_compress(unfiltered_bs->drv)) {
+error_report("Compression not supported for this file format");
+ret = -1;
+goto out;
+} else if (compress) {
+write_flags |= BDRV_REQ_WRITE_COMPRESSED;
+}
+
 if (out_basefmt != NULL) {
 if (bdrv_find_format(out_basefmt) == NULL) {
 error_report("Invalid format name: '%s'", out_basefmt);
@@ -3903,7 +3917,8 @@ static int img_rebase(int argc, char **argv)
 bdi.cluster_size);
 end = end > size ? size : end;
 ret = blk_pwrite(blk, start, end - start,
- buf_old + (start -

[PATCH 4/6] qemu-img: rebase: avoid unnecessary COW operations

2023-06-01 Thread Andrey Drobyshev via

When rebasing an image from one backing file to another, we need to
compare data from old and new backings.  If the diff between that data
happens to be unaligned to the target cluster size, we might end up
doing partial writes, which would lead to copy-on-write and additional IO.

Consider the following simple case (virtual_size == cluster_size == 64K):

base <-- inc1 <-- inc2

qemu-io -c "write -P 0xaa 0 32K" base.qcow2
qemu-io -c "write -P 0xcc 32K 32K" base.qcow2
qemu-io -c "write -P 0xbb 0 32K" inc1.qcow2
qemu-io -c "write -P 0xcc 32K 32K" inc1.qcow2
qemu-img rebase -f qcow2 -b base.qcow2 -F qcow2 inc2.qcow2

While doing rebase, we'll write a half of the cluster to inc2, and block
layer will have to read the 2nd half of the same cluster from the base image
inc1 while doing this write operation, although the whole cluster is already
read earlier to perform data comparison.

In order to avoid these unnecessary IO cycles, let's make sure every
write request is aligned to the overlay cluster size.

Signed-off-by: Andrey Drobyshev 
---
 qemu-img.c | 72 +++---
 1 file changed, 52 insertions(+), 20 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 60f4c06487..9a469cd609 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3513,6 +3513,7 @@ static int img_rebase(int argc, char **argv)
 uint8_t *buf_new = NULL;
 BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
 BlockDriverState *unfiltered_bs;
+BlockDriverInfo bdi = {0};
 char *filename;
 const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
 int c, flags, src_flags, ret;
@@ -3646,6 +3647,15 @@ static int img_rebase(int argc, char **argv)
 }
 }
 
+/* We need overlay cluster size to make sure write requests are aligned */
+ret = bdrv_get_info(unfiltered_bs, );
+if (ret < 0) {
+error_report("could not get block driver info");
+goto out;
+} else if (bdi.cluster_size == 0) {
+bdi.cluster_size = 1;
+}
+
 /* For safe rebasing we need to compare old and new backing file */
 if (!unsafe) {
 QDict *options = NULL;
@@ -3744,6 +3754,7 @@ static int img_rebase(int argc, char **argv)
 int64_t new_backing_size = 0;
 uint64_t offset;
 int64_t n;
+int64_t n_old = 0, n_new = 0;
 float local_progress = 0;
 
 buf_old = blk_blockalign(blk_old_backing, IO_BUF_SIZE);
@@ -3784,7 +3795,7 @@ static int img_rebase(int argc, char **argv)
 }
 
 for (offset = 0; offset < size; offset += n) {
-bool buf_old_is_zero = false;
+bool old_backing_eof = false;
 
 /* How many bytes can we handle with the next read? */
 n = MIN(IO_BUF_SIZE, size - offset);
@@ -3829,33 +3840,38 @@ static int img_rebase(int argc, char **argv)
 }
 }
 
+/* At this point n must be aligned to the target cluster size. */
+if (offset + n < size) {
+assert(n % bdi.cluster_size == 0);
+}
+
+/*
+ * Much like the with the target image, we'll try to read as much
+ * of the old and new backings as we can.
+ */
+n_old = MIN(n, MAX(0, old_backing_size - (int64_t) offset));
+if (blk_new_backing) {
+n_new = MIN(n, MAX(0, new_backing_size - (int64_t) offset));
+}
+
 /*
  * Read old and new backing file and take into consideration that
  * backing files may be smaller than the COW image.
  */
-if (offset >= old_backing_size) {
-memset(buf_old, 0, n);
-buf_old_is_zero = true;
+memset(buf_old + n_old, 0, n - n_old);
+if (!n_old) {
+old_backing_eof = true;
 } else {
-if (offset + n > old_backing_size) {
-n = old_backing_size - offset;
-}
-
-ret = blk_pread(blk_old_backing, offset, n, buf_old, 0);
+ret = blk_pread(blk_old_backing, offset, n_old, buf_old, 0);
 if (ret < 0) {
 error_report("error while reading from old backing file");
 goto out;
 }
 }
 
-if (offset >= new_backing_size || !blk_new_backing) {
-memset(buf_new, 0, n);
-} else {
-if (offset + n > new_backing_size) {
-n = new_backing_size - offset;
-}
-
-ret = blk_pread(blk_new_backing, offset, n, buf_new, 0);
+memset(buf_new + n_new, 0, n - n_new);
+if (blk_new_backing && n_new) {
+ret = blk_pread(blk_new_backing, offset, n_new, buf_new, 0);
 if (ret < 0) {
 error_report("error while reading from new backing file");
 goto

[PATCH 6/6] iotests: add test 314 for "qemu-img rebase" with compression

2023-06-01 Thread Andrey Drobyshev via

The test cases considered so far:

1. Check that compression mode isn't compatible with "-f raw" (raw
   format doesn't support compression).
2. Check that rebasing an image onto no backing file preserves the data
   and writes the copied clusters actually compressed.
3. Same as 2, but with a raw backing file (i.e. the clusters copied from the
   backing are originally uncompressed -- we check they end up compressed
   after being merged).
4. Remove a single delta from a backing chain, perform the same checks
   as in 2.
5. Check that even when backing and overlay are initially uncompressed,
   copied clusters end up compressed when rebase with compression is
   performed.

Signed-off-by: Andrey Drobyshev 
---
 tests/qemu-iotests/314 | 165 +
 tests/qemu-iotests/314.out |  75 +
 2 files changed, 240 insertions(+)
 create mode 100755 tests/qemu-iotests/314
 create mode 100644 tests/qemu-iotests/314.out

diff --git a/tests/qemu-iotests/314 b/tests/qemu-iotests/314
new file mode 100755
index 00..96d7b4d258
--- /dev/null
+++ b/tests/qemu-iotests/314
@@ -0,0 +1,165 @@
+#!/usr/bin/env bash
+# group: rw backing auto quick
+#
+# Test qemu-img rebase with compression
+#
+# Copyright (c) 2023 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+# creator
+owner=andrey.drobys...@virtuozzo.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+status=1   # failure is the default!
+
+_cleanup()
+{
+_cleanup_test_img
+_rm_test_img "$TEST_IMG.base"
+_rm_test_img "$TEST_IMG.itmd"
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+# Want the size divisible by 2 and 3
+size=$(( 48 * 1024 * 1024 ))
+half_size=$(( size / 2 ))
+third_size=$(( size / 3 ))
+
+# 1. "qemu-img rebase -c" should refuse working with any format which doesn't
+# support compression.  We only check "-f raw" here.
+echo
+echo "=== Testing compressed rebase format compatibility ==="
+echo
+
+$QEMU_IMG create -f raw "$TEST_IMG" "$size" | _filter_img_create
+$QEMU_IMG rebase -c -f raw -b "" "$TEST_IMG"
+
+# 2. Write the 1st half of $size to backing file (compressed), 2nd half -- to
+# the top image (also compressed).  Rebase the top image onto no backing file,
+# with compression (i.e. "qemu-img -c -b ''").  Check that the resulting image
+# has the written data preserved, and "qemu-img check" reports 100% clusters
+# as compressed.
+echo
+echo "=== Testing rebase with compression onto no backing file ==="
+echo
+
+TEST_IMG="$TEST_IMG.base" _make_test_img $size
+_make_test_img -b "$TEST_IMG.base" -F $IMGFMT $size
+
+$QEMU_IO -c "write -c -P 0xaa 0 $half_size" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -c -P 0xbb $half_size $half_size" "$TEST_IMG" \
+| _filter_qemu_io
+
+$QEMU_IMG rebase -c -f $IMGFMT -b "" "$TEST_IMG"
+
+$QEMU_IO -c "read -P 0xaa 0 $half_size" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0xbb $half_size $half_size" "$TEST_IMG" | _filter_qemu_io
+
+$QEMU_IMG check "$TEST_IMG" | _filter_testdir
+
+# 3. Same as the previous one, but with raw backing file (hence write to
+# the backing is uncompressed).
+echo
+echo "=== Testing rebase with compression with raw backing file ==="
+echo
+
+$QEMU_IMG create -f raw "$TEST_IMG.base" "$half_size" | _filter_img_create
+_make_test_img -b "$TEST_IMG.base" -F raw $size
+
+$QEMU_IO -f raw -c "write -P 0xaa 0 $half_size" "$TEST_IMG.base" \
+| _filter_qemu_io
+$QEMU_IO -c "write -c -P 0xbb $half_size $half_size" \
+"$TEST_IMG" | _filter_qemu_io
+
+$QEMU_IMG rebase -c -f $IMGFMT -b "" "$TEST_IMG"
+
+$QEMU_IO -c "read -P 0xaa 0 $half_size" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0xbb $half_size $half_size" "$TEST_IMG" | _filter_qemu_io
+
+$QEMU_IMG check "$TEST_IMG" | _filter_testdir
+
+# 4. Create a backing chain base<--itmd<--img, filling 1st, 2nd and 3rd
+# thirds of them, respectively (with compression).  Rebase img onto base,
+# effectively deleting itmd from the chain, and check that written data is
+# preserved in the resulting image.  Also check that "qemu-img check" reports
+# 100% clusters as compressed.
+echo
+echo "=== Testing compressed rebase removing single delta from the chain ==="
+echo
+

[PATCH 2/6] qemu-iotests: 024: add rebasing test case for overlay_size > backing_size

2023-06-01 Thread Andrey Drobyshev via

Before previous commit, rebase was getting infitely stuck in case of
rebasing within the same backing chain and when overlay_size > backing_size.
Let's add this case to the rebasing test 024 to make sure it doesn't
break again.

Signed-off-by: Andrey Drobyshev 
---
 tests/qemu-iotests/024 | 57 ++
 tests/qemu-iotests/024.out | 30 
 2 files changed, 87 insertions(+)

diff --git a/tests/qemu-iotests/024 b/tests/qemu-iotests/024
index 25a564a150..98a7c8fd65 100755
--- a/tests/qemu-iotests/024
+++ b/tests/qemu-iotests/024
@@ -199,6 +199,63 @@ echo
 # $BASE_OLD and $BASE_NEW)
 $QEMU_IMG map "$OVERLAY" | _filter_qemu_img_map
 
+# Check that rebase within the chain is working when
+# overlay_size > old_backing_size
+#
+# base_new <-- base_old <-- overlay
+#
+# Backing (new): 11 11 11 11 11
+# Backing (old): 22 22 22 22
+# Overlay:   -- -- -- -- --
+#
+# As a result, overlay should contain data identical to base_old, with the
+# last cluster remaining unallocated.
+
+echo
+echo "=== Test rebase within one backing chain ==="
+echo
+
+echo "Creating backing chain"
+echo
+
+TEST_IMG=$BASE_NEW _make_test_img $(( CLUSTER_SIZE * 5 ))
+TEST_IMG=$BASE_OLD _make_test_img -b "$BASE_NEW" -F $IMGFMT \
+$(( CLUSTER_SIZE * 4 ))
+TEST_IMG=$OVERLAY _make_test_img -b "$BASE_OLD" -F $IMGFMT \
+$(( CLUSTER_SIZE * 5 ))
+
+echo
+echo "Fill backing files with data"
+echo
+
+$QEMU_IO "$BASE_NEW" -c "write -P 0x11 0 $(( CLUSTER_SIZE * 5 ))" \
+| _filter_qemu_io
+$QEMU_IO "$BASE_OLD" -c "write -P 0x22 0 $(( CLUSTER_SIZE * 4 ))" \
+| _filter_qemu_io
+
+echo
+echo "Check the last cluster is zeroed in overlay before the rebase"
+echo
+$QEMU_IO "$OVERLAY" -c "read -P 0x00 $(( CLUSTER_SIZE * 4 )) $CLUSTER_SIZE" \
+| _filter_qemu_io
+
+echo
+echo "Rebase onto another image in the same chain"
+echo
+
+$QEMU_IMG rebase -b "$BASE_NEW" -F $IMGFMT "$OVERLAY"
+
+echo "Verify that data is read the same before and after rebase"
+echo
+
+# Verify the first 4 clusters are still read the same as in the old base
+$QEMU_IO "$OVERLAY" -c "read -P 0x22 0 $(( CLUSTER_SIZE * 4 ))" \
+| _filter_qemu_io
+# Verify the last cluster still reads as zeroes
+$QEMU_IO "$OVERLAY" -c "read -P 0x00 $(( CLUSTER_SIZE * 4 )) $CLUSTER_SIZE" \
+| _filter_qemu_io
+
+echo
 
 # success, all done
 echo "*** done"
diff --git a/tests/qemu-iotests/024.out b/tests/qemu-iotests/024.out
index 973a5a3711..245fe8b1d1 100644
--- a/tests/qemu-iotests/024.out
+++ b/tests/qemu-iotests/024.out
@@ -171,4 +171,34 @@ read 65536/65536 bytes at offset 196608
 Offset  Length  File
 0   0x3 TEST_DIR/subdir/t.IMGFMT
 0x3 0x1 TEST_DIR/subdir/t.IMGFMT.base_new
+
+=== Test rebase within one backing chain ===
+
+Creating backing chain
+
+Formatting 'TEST_DIR/subdir/t.IMGFMT.base_new', fmt=IMGFMT size=327680
+Formatting 'TEST_DIR/subdir/t.IMGFMT.base_old', fmt=IMGFMT size=262144 
backing_file=TEST_DIR/subdir/t.IMGFMT.base_new backing_fmt=IMGFMT
+Formatting 'TEST_DIR/subdir/t.IMGFMT', fmt=IMGFMT size=327680 
backing_file=TEST_DIR/subdir/t.IMGFMT.base_old backing_fmt=IMGFMT
+
+Fill backing files with data
+
+wrote 327680/327680 bytes at offset 0
+320 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 262144/262144 bytes at offset 0
+256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+Check the last cluster is zeroed in overlay before the rebase
+
+read 65536/65536 bytes at offset 262144
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+Rebase onto another image in the same chain
+
+Verify that data is read the same before and after rebase
+
+read 262144/262144 bytes at offset 0
+256 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 65536/65536 bytes at offset 262144
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
 *** done
-- 
2.31.1

[PATCH 3/6] qemu-img: rebase: use backing files' BlockBackend for buffer alignment

2023-06-01 Thread Andrey Drobyshev via

Since commit bb1c05973cf ("qemu-img: Use qemu_blockalign"), buffers for
the data read from the old and new backing files are aligned using
BlockDriverState (or BlockBackend later on) referring to the target image.
However, this isn't quite right, because target image is only being
written to and has nothing to do with those buffers.  Let's fix that.

Signed-off-by: Andrey Drobyshev 
---
 qemu-img.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 78433f3746..60f4c06487 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3746,8 +3746,8 @@ static int img_rebase(int argc, char **argv)
 int64_t n;
 float local_progress = 0;
 
-buf_old = blk_blockalign(blk, IO_BUF_SIZE);
-buf_new = blk_blockalign(blk, IO_BUF_SIZE);
+buf_old = blk_blockalign(blk_old_backing, IO_BUF_SIZE);
+buf_new = blk_blockalign(blk_new_backing, IO_BUF_SIZE);
 
 size = blk_getlength(blk);
 if (size < 0) {
-- 
2.31.1

Re: [PATCH] hvf: Handle EC_INSNABORT

2023-06-01 Thread Mark Burton




> On 1 Jun 2023, at 18:45, Peter Maydell  wrote:
> 
> WARNING: This email originated from outside of Qualcomm. Please be wary of 
> any links or attachments, and do not enable macros.
> 
> On Thu, 1 Jun 2023 at 17:00, Mark Burton  wrote:
>> This patch came from a discussion on the KVM call the other day.
>> It may well be the case there is a better/different implementation
>> - so the patch is more by way of asking the question.
>> 
>> Re-phrasing your question - I think it boils down to “should HVF
>> (and KVM) support executing instructions from IO space?”.
> 
> I think this falls into "might theoretically be nice but is
> probably too painful to actually implement". In practice
> well-behaved guests don't try to execute out of MMIO devices.
> 

>> In that case, this is a ‘partial’ answer to providing such
>> support for HVF - partial in that it relies upon a memory
>> region being created “dynamically” for the IO space that
>> has been accessed as a side-effect of a normal access.
> 
> But nothing in (upstream) QEMU magically creates MemoryRegions
> just because the guest tries to access them. Either there's
> nothing there in the AddressSpace at all (definitely can't
> execute from it) or there's already RAM (happy case) or there's
> already a device there. If there's already a device there
> then something would need to do a "put a bit of RAM in
> temporarily, fill in the single instruction by doing an
> address_space_read() to find the data value and writing it
> to the scratch RAM, tell KVM/HVF to do a single-step, undo
> everything again".
> 

Indeed, that’s basically what we’re implementing. In TCG mode you ’see’ the 
access, we’re just making it so that in HVF you equally ‘see’ such accesses to 
the ‘device’ (so you can put the bit of RAM in, out, shake it all about). A 
cleaner implementation may be some sort of “pre-i-side-access-op I’m about to 
access this device/address please register a ‘memory region’ I can use 
(temporarily)”. I’d have thought that could be useful any time you execute from 
e.g. a temporary ram of any sort (whatever the accelerator).

Cheers
Mark.


> -- PMM

Re: [PATCH v2 0/8] misc AHCI cleanups

2023-06-01 Thread John Snow

On Thu, Jun 1, 2023 at 9:45 AM Niklas Cassel  wrote:
>
> From: Niklas Cassel 
>
> Hello John,
>
> Here comes some misc AHCI cleanups.
>
> Most are related to error handling.
>
> Please review.
>
> (I'm also working on a second series which will add support for
> READ LOG EXT and READ LOG DMA EXT, but I will send that one out
> once it is ready. (It might take a couple of weeks still, since
> I've been a bit busy lately.))
>
>
> Changes since v1:
> -Picked up Reviewed-by tags.
>  (I did not convert your ACK to explicit Acked-by tags, since I assume
>  that the patches will go via your tree).

Guess so! I haven't been involved with IDE for a minute so I left the
ACKs in case I wandered off to signify that I hadn't reviewed them
thoroughly, but they *looked* good. Since I haven't wandered off,
guess I will actually take this and send an MR. I'll try to do this
Friday, June 2nd.

Thanks again for the very detailed commit messages, which make this easy. :)

--js

> -Rebased on master in order to fix a conflict in patch
>  "hw/ide/ahci: simplify and document PxCI handling".
> -Dropped patch "hw/ide/ahci: trigger either error IRQ or regular IRQ, not 
> both"
>  for now, as it caused a boot time regression in SeaBIOS.
>  This appears to be a bug in SeaBIOS, for more info see:
>  
> https://mail.coreboot.org/hyperkitty/list/seab...@seabios.org/thread/RIHV3FZ4EVMAJA4TEDPASKNYV7V72O4C/
>  I will resend the QEMU patch separately once the SeaBIOS patch has been
>  merged, and once QEMU has updated to a SeaBIOS tag that includes the fix.
>
>
> Kind regards,
> Niklas
>
> Niklas Cassel (8):
>   hw/ide/ahci: remove stray backslash
>   hw/ide/core: set ERR_STAT in unsupported command completion
>   hw/ide/ahci: write D2H FIS on when processing NCQ command
>   hw/ide/ahci: simplify and document PxCI handling
>   hw/ide/ahci: PxCI should not get cleared when ERR_STAT is set
>   hw/ide/ahci: PxSACT and PxCI is cleared when PxCMD.ST is cleared
>   hw/ide/ahci: fix ahci_write_fis_sdb()
>   hw/ide/ahci: fix broken SError handling
>
>  hw/ide/ahci.c | 112 +++---
>  hw/ide/core.c |   2 +-
>  2 files changed, 81 insertions(+), 33 deletions(-)
>
> --
> 2.40.1
>

Re: [PATCH 6/6] mtest2make: stop disabling meson test timeouts

2023-06-01 Thread Thomas Huth


On 01/06/2023 18.31, Daniel P. Berrangé wrote:

The mtest2make.py script passes the arg '-t 0' to 'meson test' which
disables all test timeouts. This is a major source of pain when running
in GitLab CI and a test gets stuck. It will stall until GitLab kills the
CI job. This leaves us with little easily consumable information about
the stalled test. The TAP format doesn't show the test name until it is
completed, and TAP output from multiple tests it interleaved. So we
have to analyse the log to figure out what tests had un-finished TAP
output present and thus infer which test case caused the hang. This is
very time consuming and error prone.

By allowing meson to kill stalled tests, we get a direct display of what
test program got stuck, which lets us more directly focus in on what
specific test case within the test program hung.

The other issue with disabling meson test timeouts by default is that it
makes it more likely that maintainers inadvertantly introduce slowdowns.
For example the recent-ish change that accidentally made migrate-test
take 15-20 minutes instead of around 1 minute.

The main risk of this change is that the individual test timeouts might
be too short to allow completion in high load scenarios. Thus, there is
likely to be some short term pain where we have to bump the timeouts for
certain tests to make them reliable enough. The preceeding few patches
raised the timeouts for all failures that were immediately apparent
in GitLab CI.

Even with the possible short term instability, this should still be a
net win for debuggability of failed CI pipelines over the long term.

Signed-off-by: Daniel P. Berrangé 
---
  scripts/mtest2make.py | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/mtest2make.py b/scripts/mtest2make.py
index 179dd54871..eb01a05ddb 100644
--- a/scripts/mtest2make.py
+++ b/scripts/mtest2make.py
@@ -27,7 +27,8 @@ def names(self, base):
  .speed.slow = $(foreach s,$(sort $(filter-out %-thorough, $1)), --suite $s)
  .speed.thorough = $(foreach s,$(sort $1), --suite $s)
  
-.mtestargs = --no-rebuild -t 0

+TIMEOUT_MULTIPLIER = 1
+.mtestargs = --no-rebuild -t $(TIMEOUT_MULTIPLIER)
  ifneq ($(SPEED), quick)
  .mtestargs += --setup $(SPEED)
  endif


Basically Ack, but could you please double-check that "make check 
-j$(nproc)" still works if configure has been run with "--enable-debug" ? 
... maybe we need to adjust the multiplier in that case...


 Thomas

Re: [PATCH 4/6] qtest: bump aspeed_smc-test timeout to 2 minutes

2023-06-01 Thread Thomas Huth


On 01/06/2023 18.31, Daniel P. Berrangé wrote:

On a reasonably modern laptop this test takes 40 seconds with the arm
emulator. Raising the timeout to 2 minutes gives greater headroom for
slowdown under GitLab CI.

Signed-off-by: Daniel P. Berrangé 
---
  tests/qtest/meson.build | 1 +
  1 file changed, 1 insertion(+)

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 6943bbfdd5..d9fa30edbc 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -1,4 +1,5 @@
  slow_qtests = {
+  'aspeed_smc-test': 120,
'bios-tables-test' : 120,
'migration-test' : 300,
'npcm7xx_pwm-test': 150,


Reviewed-by: Thomas Huth

Re: [PATCH 5/6] qtest: bump bios-table-test timeout to 6 minutes

2023-06-01 Thread Thomas Huth


On 01/06/2023 18.31, Daniel P. Berrangé wrote:

This is reliably hitting the current 2 minute timeout in GitLab CI
for the TCI job, and even hits a 4 minute timeout.


That sentence is somewhat hard to parse... maybe rather:

This is reliably hitting the current 2 minute timeout in GitLab CI, and for 
the TCI job, it even hits a 4 minute timeout.


?


At 6 minutes it
looks sufficiently reliable.

Signed-off-by: Daniel P. Berrangé 
---
  tests/qtest/meson.build | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index d9fa30edbc..4f45369421 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -1,6 +1,6 @@
  slow_qtests = {
'aspeed_smc-test': 120,
-  'bios-tables-test' : 120,
+  'bios-tables-test' : 360,
'migration-test' : 300,
'npcm7xx_pwm-test': 150,
'qom-test' : 420,


With the commit description updated:
Reviewed-by: Thomas Huth

Re: [PATCH 3/6] qtest: bump qom-test timeout to 7 minutes

2023-06-01 Thread Thomas Huth


On 01/06/2023 18.31, Daniel P. Berrangé wrote:

The qom-test is periodically hitting the 5 minute timeout when running
on the aarch64 emulator under GitLab CI. Add another 2 minutes headroom
to the timeout to improve reliability.

Signed-off-by: Daniel P. Berrangé 
---
  tests/qtest/meson.build | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 6684591fcf..6943bbfdd5 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -2,7 +2,7 @@ slow_qtests = {
'bios-tables-test' : 120,
'migration-test' : 300,
'npcm7xx_pwm-test': 150,
-  'qom-test' : 300,
+  'qom-test' : 420,
'test-hmp' : 120,
  }



Reviewed-by: Thomas Huth

1 2 3 4 5 >

1 - 100 of 414 matches

Mail list logo