Re: [PATCH v2] target/loongarch/kvm: Enable LSX/LASX extension

2024-01-22 Thread maobibo




On 2024/1/22 下午5:02, Song Gao wrote:

The kernel had already support LSX and LASX [1],
but QEMU is disable LSX/LASX for kvm. This patch adds
kvm_check_cpucfg2() to check CPUCFG2.

[1]: 
https://lore.kernel.org/all/cabgobfzhrf7e_7jk4uprmsyxty3eiuuywhc35jqncnl9s-z...@mail.gmail.com/

Signed-off-by: Song Gao 
---
  linux-headers/asm-loongarch/kvm.h |  1 +
  target/loongarch/kvm/kvm.c| 45 ++-
  2 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/linux-headers/asm-loongarch/kvm.h 
b/linux-headers/asm-loongarch/kvm.h
index c6ad2ee610..923d0bd382 100644
--- a/linux-headers/asm-loongarch/kvm.h
+++ b/linux-headers/asm-loongarch/kvm.h
@@ -79,6 +79,7 @@ struct kvm_fpu {
  #define LOONGARCH_REG_64(TYPE, REG)   (TYPE | KVM_REG_SIZE_U64 | (REG << 
LOONGARCH_REG_SHIFT))
  #define KVM_IOC_CSRID(REG)LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, 
REG)
  #define KVM_IOC_CPUCFG(REG)   
LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
+#define KVM_LOONGARCH_VCPU_CPUCFG  0
  
  struct kvm_debug_exit_arch {

  };
diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c
index 84bcdf5f86..2712bb7ab6 100644
--- a/target/loongarch/kvm/kvm.c
+++ b/target/loongarch/kvm/kvm.c
@@ -537,6 +537,38 @@ static int kvm_loongarch_get_cpucfg(CPUState *cs)
  return ret;
  }
  
+static int kvm_check_cpucfg2(CPUState *cs)

+{
+int ret;
+uint64_t val;
+struct kvm_device_attr attr = {
+.group = KVM_LOONGARCH_VCPU_CPUCFG,
+.attr = 2,
+.addr = (uint64_t),
+};
+LoongArchCPU *cpu = LOONGARCH_CPU(cs);
+CPULoongArchState *env = >env;
+
+ret = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, );
+
+if (!ret) {
+kvm_vcpu_ioctl(cs, KVM_GET_DEVICE_ATTR, );
+env->cpucfg[2] &= val;
+
+if (FIELD_EX32(env->cpucfg[2], CPUCFG2, FP)) {
+/* The FP minimal version is 1. */
+env->cpucfg[2] = FIELD_DP32(env->cpucfg[2], CPUCFG2, FP_VER, 1);
+}
+
+if (FIELD_EX32(env->cpucfg[2], CPUCFG2, LLFTP)) {
+/* The LLFTP minimal version is 1. */
+env->cpucfg[2] = FIELD_DP32(env->cpucfg[2], CPUCFG2, LLFTP_VER, 1);
+}
+}
+
+return ret;
+}
+
  static int kvm_loongarch_put_cpucfg(CPUState *cs)
  {
  int i, ret = 0;
@@ -545,14 +577,13 @@ static int kvm_loongarch_put_cpucfg(CPUState *cs)
  uint64_t val;
  
  for (i = 0; i < 21; i++) {

+   if (i == 2) {
+ret = kvm_check_cpucfg2(cs);
+if (ret) {
+return ret;
+}
+   }
  val = env->cpucfg[i];
-/* LSX and LASX and LBT are not supported in kvm now */
-if (i == 2) {
-val &= ~(BIT(R_CPUCFG2_LSX_SHIFT) | BIT(R_CPUCFG2_LASX_SHIFT));
-val &= ~(BIT(R_CPUCFG2_LBT_X86_SHIFT) |
- BIT(R_CPUCFG2_LBT_ARM_SHIFT) |
- BIT(R_CPUCFG2_LBT_MIPS_SHIFT));
-}
  ret = kvm_set_one_reg(cs, KVM_IOC_CPUCFG(i), );
  if (ret < 0) {
  trace_kvm_failed_put_cpucfg(strerror(errno));


Reviewed-by: Bibo Mao 




Re: [PATCH rfcv1 2/6] hw/pci: introduce pci_device_set/unset_iommu_device()

2024-01-22 Thread Cédric Le Goater

On 1/23/24 07:37, Duan, Zhenzhong wrote:




-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce
pci_device_set/unset_iommu_device()

On 1/15/24 11:13, Zhenzhong Duan wrote:

From: Yi Liu 

This adds pci_device_set/unset_iommu_device() to set/unset
IOMMUFDDevice for a given PCIe device. Caller of set
should fail if set operation fails.

Extract out pci_device_get_iommu_bus_devfn() to facilitate
implementation of pci_device_set/unset_iommu_device().

Signed-off-by: Yi Liu 
Signed-off-by: Yi Sun 
Signed-off-by: Nicolin Chen 
Signed-off-by: Zhenzhong Duan 
---
   include/hw/pci/pci.h | 39 ++-
   hw/pci/pci.c | 49

+++-

   2 files changed, 86 insertions(+), 2 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index fa6313aabc..a810c0ec74 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -7,6 +7,8 @@
   /* PCI includes legacy ISA access.  */
   #include "hw/isa/isa.h"

+#include "sysemu/iommufd_device.h"
+
   extern bool pci_available;

   /* PCI bus */
@@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps {
*
* @devfn: device and function number
*/
-   AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int

devfn);

+AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int

devfn);

+/**
+ * @set_iommu_device: set iommufd device for a PCI device to

vIOMMU

+ *
+ * Optional callback, if not implemented in vIOMMU, then vIOMMU

can't

+ * utilize iommufd specific features.
+ *
+ * Return true if iommufd device is accepted, or else return false with
+ * errp set.
+ *
+ * @bus: the #PCIBus of the PCI device.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number of the PCI device.
+ *
+ * @idev: the data structure representing iommufd device.
+ *
+ */
+int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn,
+IOMMUFDDevice *idev, Error **errp);
+/**
+ * @unset_iommu_device: unset iommufd device for a PCI device from

vIOMMU

+ *
+ * Optional callback.
+ *
+ * @bus: the #PCIBus of the PCI device.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number of the PCI device.
+ */
+void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t

devfn);

   } PCIIOMMUOps;

   AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
+int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice

*idev,

+Error **errp);
+void pci_device_unset_iommu_device(PCIDevice *dev);

   /**
* pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 76080af580..3848662f95 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2672,7 +2672,10 @@ static void

pci_device_class_base_init(ObjectClass *klass, void *data)

   }
   }

-AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
+static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
+   PCIBus **aliased_pbus,
+   PCIBus **piommu_bus,
+   uint8_t *aliased_pdevfn)
   {
   PCIBus *bus = pci_get_bus(dev);
   PCIBus *iommu_bus = bus;
@@ -2717,6 +2720,18 @@ AddressSpace

*pci_device_iommu_address_space(PCIDevice *dev)


   iommu_bus = parent_bus;
   }
+*aliased_pbus = bus;
+*piommu_bus = iommu_bus;
+*aliased_pdevfn = devfn;
+}
+
+AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
+{
+PCIBus *bus;
+PCIBus *iommu_bus;
+uint8_t devfn;
+
+pci_device_get_iommu_bus_devfn(dev, , _bus, );
   if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
   return iommu_bus->iommu_ops->get_address_space(bus,
iommu_bus->iommu_opaque, devfn);
@@ -2724,6 +2739,38 @@ AddressSpace

*pci_device_iommu_address_space(PCIDevice *dev)

   return _space_memory;
   }

+int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice

*idev,

+Error **errp)
+{
+PCIBus *bus;
+PCIBus *iommu_bus;
+uint8_t devfn;
+
+pci_device_get_iommu_bus_devfn(dev, , _bus, );
+if (!pci_bus_bypass_iommu(bus) && iommu_bus &&


Why do we test iommu_bus in pci_device_un/set_iommu_device routines
and
not in pci_device_iommu_address_space() ?


iommu_bus check in pci_device_iommu_address_space() is dropped in
below commit, I didn't find related discussion in mail history, maybe
by accident? I can add it back if it's not intentional.


Can iommu_bus be NULL or should we add an assert ?

C.



ba7d12eb8c  hw/pci: modify pci_setup_iommu() to set PCIIOMMUOps

Thanks
Zhenzhong





Re: How can I know Page Table address on RAM?

2024-01-22 Thread Nicholas Piggin
On Mon Jan 22, 2024 at 6:54 PM AEST, Thomas Huth wrote:
> On 22/01/2024 05.11, Junho wrote:
> > Hello,
> > 
> > I'm a QEMU user with PowerPc target architecture.
> > I have some personal modifications related to tb jmp cache and chaining 
> > logic to improve the performance of a specific guest code. To verify the 
> > safety, I have to guarantee that the page table on RAM does not change 
> > after 
> > initialization. Do you have any information related to this work? 
> > Currently, 
> > what I need to find is the page table start address on the RAM so that I 
> > can 
> > test with the range detected.
> > 
> > I look forward to your response.
> > 
> > Thank you
> > Junho
>
>   Hi,
>
> maybe it's best to ask this question on the qemu-ppc mailing list instead 
> (done now), since most PPC folks will rather read than one instead of the 
> high-traffic qemu-devel mailing list.

Hi Junho,

ppc targets have a lot of different MMUs, so it depends what you are
looking at.

The hash MMU has a page table that is linear in physical (real) memory,
so you might feasibly be able to watch it for updates. The SDR1 SPR has
hash table base and size. ISA v3.0 and later use an in-memory table
that is pointed to by the PTCR SPR.

Other types are software loaded and radix page tables which might be
infeasible or impossible to really track.

It would be interesting to know what kind of modifications you're doing,
it's possible they might be achieved another way. For example, there is
no requirement in the architecture for the TLB to be kept coherent with
page table modifications, so you might be able to watch for TLB flush
instructions rather than page table changes.

Thanks,
Nick



Re: [PATCH] target/ppc: Re-name registers to match ISA

2024-01-22 Thread Cédric Le Goater

On 12/1/23 13:24, Nicholas Piggin wrote:

Several registers have names that don't match the ISA (or convention
with other QEMU PPC registers), making them unintuitive to use with
GDB.

Fortunately most of these registers are obscure and/or have not been
correctly implemented in the gdb server (e.g., DEC, TB, CFAR), so risk
of breaking users should be low.

QEMU should follow the ISA for register name convention (where there is
no established GDB name).

Signed-off-by: Nicholas Piggin 



Acked-by: Cédric Le Goater 

Thanks,

C.



---
There is never a great time to change user interface, but I'd like to
make this change for 9.0.

Thanks,
Nick

  target/ppc/cpu_init.c| 20 ++--
  target/ppc/helper_regs.c |  2 +-
  2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 40fe14a6c2..15c1f2fdc8 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -5062,7 +5062,7 @@ static void register_970_hid_sprs(CPUPPCState *env)
  
  static void register_970_hior_sprs(CPUPPCState *env)

  {
-spr_register(env, SPR_HIOR, "SPR_HIOR",
+spr_register(env, SPR_HIOR, "HIOR",
   SPR_NOACCESS, SPR_NOACCESS,
   _read_hior, _write_hior,
   0x);
@@ -5070,11 +5070,11 @@ static void register_970_hior_sprs(CPUPPCState *env)
  
  static void register_book3s_ctrl_sprs(CPUPPCState *env)

  {
-spr_register(env, SPR_CTRL, "SPR_CTRL",
+spr_register(env, SPR_CTRL, "CTRL",
   SPR_NOACCESS, SPR_NOACCESS,
   SPR_NOACCESS, _write_CTRL,
   0x);
-spr_register(env, SPR_UCTRL, "SPR_UCTRL",
+spr_register(env, SPR_UCTRL, "UCTRL",
   _read_ureg, SPR_NOACCESS,
   _read_ureg, SPR_NOACCESS,
   0x);
@@ -5465,7 +5465,7 @@ static void register_book3s_purr_sprs(CPUPPCState *env)
  static void register_power6_dbg_sprs(CPUPPCState *env)
  {
  #if !defined(CONFIG_USER_ONLY)
-spr_register(env, SPR_CFAR, "SPR_CFAR",
+spr_register(env, SPR_CFAR, "CFAR",
   SPR_NOACCESS, SPR_NOACCESS,
   _read_cfar, _write_cfar,
   0x);
@@ -5483,7 +5483,7 @@ static void register_power5p_common_sprs(CPUPPCState *env)
  static void register_power6_common_sprs(CPUPPCState *env)
  {
  #if !defined(CONFIG_USER_ONLY)
-spr_register_kvm(env, SPR_DSCR, "SPR_DSCR",
+spr_register_kvm(env, SPR_DSCR, "DSCR",
   SPR_NOACCESS, SPR_NOACCESS,
   _read_generic, _write_generic,
   KVM_REG_PPC_DSCR, 0x);
@@ -5695,7 +5695,7 @@ static void register_power8_book4_sprs(CPUPPCState *env)
   _read_generic, _write_generic,
   KVM_REG_PPC_ACOP, 0);
  /* PID is only in BookE in ISA v2.07 */
-spr_register_kvm(env, SPR_BOOKS_PID, "PID",
+spr_register_kvm(env, SPR_BOOKS_PID, "PIDR",
   SPR_NOACCESS, SPR_NOACCESS,
   _read_generic, _write_pidr,
   KVM_REG_PPC_PID, 0);
@@ -5716,7 +5716,7 @@ static void register_power7_book4_sprs(CPUPPCState *env)
   _read_generic, _write_generic,
   KVM_REG_PPC_ACOP, 0);
  /* PID is only in BookE in ISA v2.06 */
-spr_register_kvm(env, SPR_BOOKS_PID, "PID",
+spr_register_kvm(env, SPR_BOOKS_PID, "PIDR",
   SPR_NOACCESS, SPR_NOACCESS,
   _read_generic, _write_generic32,
   KVM_REG_PPC_PID, 0);
@@ -5750,7 +5750,7 @@ static void register_power9_mmu_sprs(CPUPPCState *env)
  _read_generic, _write_generic,
  0x);
  /* PID is part of the BookS ISA from v3.0 */
-spr_register_kvm(env, SPR_BOOKS_PID, "PID",
+spr_register_kvm(env, SPR_BOOKS_PID, "PIDR",
   SPR_NOACCESS, SPR_NOACCESS,
   _read_generic, _write_pidr,
   KVM_REG_PPC_PID, 0);
@@ -5791,7 +5791,7 @@ static void register_power10_dexcr_sprs(CPUPPCState *env)
  _read_generic, _write_generic32,
  0);
  
-spr_register(env, SPR_UDEXCR, "DEXCR",

+spr_register(env, SPR_UDEXCR, "UDEXCR",
  _read_dexcr_ureg, SPR_NOACCESS,
  _read_dexcr_ureg, SPR_NOACCESS,
  0);
@@ -5802,7 +5802,7 @@ static void register_power10_dexcr_sprs(CPUPPCState *env)
  _read_generic, _write_generic32,
  0);
  
-spr_register(env, SPR_UHDEXCR, "HDEXCR",

+spr_register(env, SPR_UHDEXCR, "UHDEXCR",
  _read_dexcr_ureg, SPR_NOACCESS,
  _read_dexcr_ureg, SPR_NOACCESS,
  0);
diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
index f1493ddca0..59cf3dfaae 100644
--- a/target/ppc/helper_regs.c
+++ b/target/ppc/helper_regs.c
@@ -509,7 +509,7 @@ void 

Re: [PATCH v6 9/9] ppc/pnv: Test pnv i2c master and connected devices

2024-01-22 Thread Cédric Le Goater

On 11/27/23 19:16, Glenn Miles wrote:

Tests the following for both P9 and P10:
   - I2C master POR status
   - I2C master status after immediate reset

Tests the following for powernv10-ranier only:
   - Config pca9552 hotplug device pins as inputs then
 Read the INPUT0/1 registers to verify all pins are high
   - Connected GPIO pin tests of P10 PCA9552 device.  Tests
 output of pins 0-4 affect input of pins 5-9 respectively.
   - PCA9554 GPIO pins test.  Tests input and ouput functionality.

Signed-off-by: Glenn Miles 
---


A couple of comments below, anyhow :

Reviewed-by: Cédric Le Goater 





No change from previous version

  tests/qtest/meson.build |   1 +
  tests/qtest/pnv-host-i2c-test.c | 650 
  2 files changed, 651 insertions(+)
  create mode 100644 tests/qtest/pnv-host-i2c-test.c

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 47dabf91d0..fbb0bd204c 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -163,6 +163,7 @@ qtests_ppc64 = \
qtests_ppc + \
(config_all_devices.has_key('CONFIG_PSERIES') ? ['device-plug-test'] : []) 
+   \
(config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-xscom-test'] : []) +  
   \
+  (config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-host-i2c-test'] : []) 
+  \
(config_all_devices.has_key('CONFIG_PSERIES') ? ['rtas-test'] : []) +   
   \
(slirp.found() ? ['pxe-test'] : []) +  \
(config_all_devices.has_key('CONFIG_USB_UHCI') ? ['usb-hcd-uhci-test'] : 
[]) + \
diff --git a/tests/qtest/pnv-host-i2c-test.c b/tests/qtest/pnv-host-i2c-test.c
new file mode 100644
index 00..377525e458
--- /dev/null
+++ b/tests/qtest/pnv-host-i2c-test.c
@@ -0,0 +1,650 @@
+/*
+ * QTest testcase for PowerNV 10 Host I2C Communications
+ *
+ * Copyright (c) 2023, IBM Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "hw/misc/pca9554_regs.h"
+#include "hw/misc/pca9552_regs.h"
+
+#define PPC_BIT(bit)(0x8000ULL >> (bit))
+#define PPC_BIT32(bit)  (0x8000 >> (bit))
+#define PPC_BIT8(bit)   (0x80 >> (bit))
+#define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
+#define PPC_BITMASK32(bs, be)   ((PPC_BIT32(bs) - PPC_BIT32(be)) | \
+ PPC_BIT32(bs))
+
+#define MASK_TO_LSH(m)  (__builtin_ffsll(m) - 1)
+#define GETFIELD(m, v)  (((v) & (m)) >> MASK_TO_LSH(m))
+#define SETFIELD(m, v, val) \
+(((v) & ~(m)) | typeof(v))(val)) << MASK_TO_LSH(m)) & (m)))
+
+#define P10_XSCOM_BASE  0x000603fcull
+#define PNV10_CHIP_MAX_I2C  5
+#define PNV10_XSCOM_I2CM_BASE   0xa
+#define PNV10_XSCOM_I2CM_SIZE   0x1000
+
+/* I2C FIFO register */
+#define I2C_FIFO_REG0x4
+#define I2C_FIFOPPC_BITMASK(0, 7)
+
+/* I2C command register */
+#define I2C_CMD_REG 0x5
+#define I2C_CMD_WITH_START  PPC_BIT(0)
+#define I2C_CMD_WITH_ADDR   PPC_BIT(1)
+#define I2C_CMD_READ_CONT   PPC_BIT(2)
+#define I2C_CMD_WITH_STOP   PPC_BIT(3)
+#define I2C_CMD_INTR_STEERING   PPC_BITMASK(6, 7) /* P9 */
+#define   I2C_CMD_INTR_STEER_HOST   1
+#define   I2C_CMD_INTR_STEER_OCC2
+#define I2C_CMD_DEV_ADDRPPC_BITMASK(8, 14)
+#define I2C_CMD_READ_NOT_WRITE  PPC_BIT(15)
+#define I2C_CMD_LEN_BYTES   PPC_BITMASK(16, 31)
+#define I2C_MAX_TFR_LEN 0xfff0ull
+
+/* I2C mode register */
+#define I2C_MODE_REG0x6
+#define I2C_MODE_BIT_RATE_DIV   PPC_BITMASK(0, 15)
+#define I2C_MODE_PORT_NUM   PPC_BITMASK(16, 21)
+#define I2C_MODE_ENHANCED   PPC_BIT(28)
+#define I2C_MODE_DIAGNOSTIC PPC_BIT(29)
+#define I2C_MODE_PACING_ALLOW   PPC_BIT(30)
+#define I2C_MODE_WRAP   PPC_BIT(31)
+
+/* I2C watermark register */
+#define I2C_WATERMARK_REG   0x7
+#define I2C_WATERMARK_HIGH  PPC_BITMASK(16, 19)
+#define I2C_WATERMARK_LOW   PPC_BITMASK(24, 27)
+
+/*
+ * I2C interrupt mask and condition registers
+ *
+ * NB: The function of 0x9 and 0xa changes depending on whether you're reading
+ * or writing to them. When read they return the interrupt condition bits
+ * and on writes they update the interrupt mask register.
+ *
+ *  The bit definitions are the same for all the interrupt registers.
+ */
+#define I2C_INTR_MASK_REG   0x8
+
+#define I2C_INTR_RAW_COND_REG   0x9 /* read */
+#define I2C_INTR_MASK_OR_REG0x9 /* write*/
+
+#define I2C_INTR_COND_REG   0xa /* read */
+#define I2C_INTR_MASK_AND_REG   0xa /* write */
+
+#define 

Re: [PATCH v3 8/8] target/ppc: Add SMT support to time facilities

2024-01-22 Thread Cédric Le Goater

On 12/1/23 13:16, Nicholas Piggin wrote:

The TB, VTB, PURR, HDEC SPRs are per-LPAR registers, and the TFMR is a
per-core register. Add the necessary SMT synchronisation and value
sharing.

The TFMR can only drive the timebase state machine via thread 0 of the
core, which is almost certainly not right, but it is enough for skiboot
and certain other proprietary firmware.

Signed-off-by: Nicholas Piggin 



Acked-by: Cédric Le Goater 

Thanks,

C.



---
  target/ppc/timebase_helper.c | 105 ---
  target/ppc/translate.c   |  42 +-
  2 files changed, 136 insertions(+), 11 deletions(-)

diff --git a/target/ppc/timebase_helper.c b/target/ppc/timebase_helper.c
index bc1d54a427..a23fbf75ff 100644
--- a/target/ppc/timebase_helper.c
+++ b/target/ppc/timebase_helper.c
@@ -60,19 +60,55 @@ target_ulong helper_load_purr(CPUPPCState *env)
  
  void helper_store_purr(CPUPPCState *env, target_ulong val)

  {
-cpu_ppc_store_purr(env, val);
+CPUState *cs = env_cpu(env);
+CPUState *ccs;
+uint32_t nr_threads = cs->nr_threads;
+
+if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
+cpu_ppc_store_purr(env, val);
+return;
+}
+
+THREAD_SIBLING_FOREACH(cs, ccs) {
+CPUPPCState *cenv = _CPU(ccs)->env;
+cpu_ppc_store_purr(cenv, val);
+}
  }
  #endif
  
  #if !defined(CONFIG_USER_ONLY)

  void helper_store_tbl(CPUPPCState *env, target_ulong val)
  {
-cpu_ppc_store_tbl(env, val);
+CPUState *cs = env_cpu(env);
+CPUState *ccs;
+uint32_t nr_threads = cs->nr_threads;
+
+if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
+cpu_ppc_store_tbl(env, val);
+return;
+}
+
+THREAD_SIBLING_FOREACH(cs, ccs) {
+CPUPPCState *cenv = _CPU(ccs)->env;
+cpu_ppc_store_tbl(cenv, val);
+}
  }
  
  void helper_store_tbu(CPUPPCState *env, target_ulong val)

  {
-cpu_ppc_store_tbu(env, val);
+CPUState *cs = env_cpu(env);
+CPUState *ccs;
+uint32_t nr_threads = cs->nr_threads;
+
+if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
+cpu_ppc_store_tbu(env, val);
+return;
+}
+
+THREAD_SIBLING_FOREACH(cs, ccs) {
+CPUPPCState *cenv = _CPU(ccs)->env;
+cpu_ppc_store_tbu(cenv, val);
+}
  }
  
  void helper_store_atbl(CPUPPCState *env, target_ulong val)

@@ -102,17 +138,53 @@ target_ulong helper_load_hdecr(CPUPPCState *env)
  
  void helper_store_hdecr(CPUPPCState *env, target_ulong val)

  {
-cpu_ppc_store_hdecr(env, val);
+CPUState *cs = env_cpu(env);
+CPUState *ccs;
+uint32_t nr_threads = cs->nr_threads;
+
+if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
+cpu_ppc_store_hdecr(env, val);
+return;
+}
+
+THREAD_SIBLING_FOREACH(cs, ccs) {
+CPUPPCState *cenv = _CPU(ccs)->env;
+cpu_ppc_store_hdecr(cenv, val);
+}
  }
  
  void helper_store_vtb(CPUPPCState *env, target_ulong val)

  {
-cpu_ppc_store_vtb(env, val);
+CPUState *cs = env_cpu(env);
+CPUState *ccs;
+uint32_t nr_threads = cs->nr_threads;
+
+if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
+cpu_ppc_store_vtb(env, val);
+return;
+}
+
+THREAD_SIBLING_FOREACH(cs, ccs) {
+CPUPPCState *cenv = _CPU(ccs)->env;
+cpu_ppc_store_vtb(cenv, val);
+}
  }
  
  void helper_store_tbu40(CPUPPCState *env, target_ulong val)

  {
-cpu_ppc_store_tbu40(env, val);
+CPUState *cs = env_cpu(env);
+CPUState *ccs;
+uint32_t nr_threads = cs->nr_threads;
+
+if (nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) {
+cpu_ppc_store_tbu40(env, val);
+return;
+}
+
+THREAD_SIBLING_FOREACH(cs, ccs) {
+CPUPPCState *cenv = _CPU(ccs)->env;
+cpu_ppc_store_tbu40(cenv, val);
+}
  }
  
  target_ulong helper_load_40x_pit(CPUPPCState *env)

@@ -211,6 +283,21 @@ static uint64_t tfmr_new_tb_state(uint64_t tfmr, unsigned 
int tbst)
  return tfmr;
  }
  
+static void write_tfmr(CPUPPCState *env, target_ulong val)

+{
+CPUState *cs = env_cpu(env);
+
+if (cs->nr_threads == 1) {
+env->spr[SPR_TFMR] = val;
+} else {
+CPUState *ccs;
+THREAD_SIBLING_FOREACH(cs, ccs) {
+CPUPPCState *cenv = _CPU(ccs)->env;
+cenv->spr[SPR_TFMR] = val;
+}
+}
+}
+
  static void tb_state_machine_step(CPUPPCState *env)
  {
  uint64_t tfmr = env->spr[SPR_TFMR];
@@ -224,7 +311,7 @@ static void tb_state_machine_step(CPUPPCState *env)
  env->pnv_tod_tbst.tb_sync_pulse_timer--;
  } else {
  tfmr |= TFMR_TB_SYNC_OCCURED;
-env->spr[SPR_TFMR] = tfmr;
+write_tfmr(env, tfmr);
  }
  
  if (env->pnv_tod_tbst.tb_state_timer) {

@@ -262,7 +349,7 @@ static void tb_state_machine_step(CPUPPCState *env)
  }
  }
  
-env->spr[SPR_TFMR] = tfmr;

+write_tfmr(env, tfmr);
  }
  
  

Re: [PATCH v3 5/8] ppc/pnv: Wire ChipTOD model to powernv9 and powernv10 machines

2024-01-22 Thread Cédric Le Goater

On 12/1/23 13:16, Nicholas Piggin wrote:

Wire the ChipTOD model to powernv9 and powernv10 machines.

Suggested-by-by: Cédric Le Goater 
Signed-off-by: Nicholas Piggin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  include/hw/ppc/pnv_chip.h |  3 +++
  hw/ppc/pnv.c  | 30 ++
  2 files changed, 33 insertions(+)

diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h
index 0ab5c42308..bfc4772cf3 100644
--- a/include/hw/ppc/pnv_chip.h
+++ b/include/hw/ppc/pnv_chip.h
@@ -2,6 +2,7 @@
  #define PPC_PNV_CHIP_H
  
  #include "hw/pci-host/pnv_phb4.h"

+#include "hw/ppc/pnv_chiptod.h"
  #include "hw/ppc/pnv_core.h"
  #include "hw/ppc/pnv_homer.h"
  #include "hw/ppc/pnv_lpc.h"
@@ -78,6 +79,7 @@ struct Pnv9Chip {
  PnvXive  xive;
  Pnv9Psi  psi;
  PnvLpcController lpc;
+PnvChipTOD   chiptod;
  PnvOCC   occ;
  PnvSBE   sbe;
  PnvHomer homer;
@@ -110,6 +112,7 @@ struct Pnv10Chip {
  PnvXive2 xive;
  Pnv9Psi  psi;
  PnvLpcController lpc;
+PnvChipTOD   chiptod;
  PnvOCC   occ;
  PnvSBE   sbe;
  PnvHomer homer;
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index b949398689..d3cb76 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1419,6 +1419,8 @@ static void pnv_chip_power9_instance_init(Object *obj)
  
  object_initialize_child(obj, "lpc", >lpc, TYPE_PNV9_LPC);
  
+object_initialize_child(obj, "chiptod", >chiptod, TYPE_PNV9_CHIPTOD);

+
  object_initialize_child(obj, "occ", >occ, TYPE_PNV9_OCC);
  
  object_initialize_child(obj, "sbe", >sbe, TYPE_PNV9_SBE);

@@ -1565,6 +1567,19 @@ static void pnv_chip_power9_realize(DeviceState *dev, 
Error **errp)
  chip->dt_isa_nodename = g_strdup_printf("/lpcm-opb@%" PRIx64 "/lpc@0",
  (uint64_t) PNV9_LPCM_BASE(chip));
  
+/* ChipTOD */

+object_property_set_bool(OBJECT(>chiptod), "primary",
+ chip->chip_id == 0, _abort);
+object_property_set_bool(OBJECT(>chiptod), "secondary",
+ chip->chip_id == 1, _abort);
+object_property_set_link(OBJECT(>chiptod), "chip", OBJECT(chip),
+ _abort);
+if (!qdev_realize(DEVICE(>chiptod), NULL, errp)) {
+return;
+}
+pnv_xscom_add_subregion(chip, PNV9_XSCOM_CHIPTOD_BASE,
+>chiptod.xscom_regs);
+
  /* Create the simplified OCC model */
  if (!qdev_realize(DEVICE(>occ), NULL, errp)) {
  return;
@@ -1677,6 +1692,8 @@ static void pnv_chip_power10_instance_init(Object *obj)
"xive-fabric");
  object_initialize_child(obj, "psi", >psi, TYPE_PNV10_PSI);
  object_initialize_child(obj, "lpc", >lpc, TYPE_PNV10_LPC);
+object_initialize_child(obj, "chiptod", >chiptod,
+TYPE_PNV10_CHIPTOD);
  object_initialize_child(obj, "occ",  >occ, TYPE_PNV10_OCC);
  object_initialize_child(obj, "sbe",  >sbe, TYPE_PNV10_SBE);
  object_initialize_child(obj, "homer", >homer, TYPE_PNV10_HOMER);
@@ -1810,6 +1827,19 @@ static void pnv_chip_power10_realize(DeviceState *dev, 
Error **errp)
  chip->dt_isa_nodename = g_strdup_printf("/lpcm-opb@%" PRIx64 "/lpc@0",
  (uint64_t) PNV10_LPCM_BASE(chip));
  
+/* ChipTOD */

+object_property_set_bool(OBJECT(>chiptod), "primary",
+ chip->chip_id == 0, _abort);
+object_property_set_bool(OBJECT(>chiptod), "secondary",
+ chip->chip_id == 1, _abort);
+object_property_set_link(OBJECT(>chiptod), "chip", OBJECT(chip),
+ _abort);
+if (!qdev_realize(DEVICE(>chiptod), NULL, errp)) {
+return;
+}
+pnv_xscom_add_subregion(chip, PNV10_XSCOM_CHIPTOD_BASE,
+>chiptod.xscom_regs);
+
  /* Create the simplified OCC model */
  if (!qdev_realize(DEVICE(>occ), NULL, errp)) {
  return;





Re: [PATCH] Make 'uri' optional for migrate QAPI

2024-01-22 Thread Het Gala


On 23/01/24 12:12 pm, Het Gala wrote:

'uri' argument should be optional, as 'uri' and 'channels'
arguments are mutally exclusive in nature.


Also verified by running the qemu CI/CD pipeline. ref: 
https://gitlab.com/galahet/Qemu/-/pipelines/1147048455



Fixes: 074dbce5fcce (migration: New migrate and
migrate-incoming argument 'channels')
Signed-off-by: Het Gala
---
  qapi/migration.json | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index eb2f883513..197d3faa43 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1757,7 +1757,7 @@
  #
  ##
  { 'command': 'migrate',
-  'data': {'uri': 'str',
+  'data': {'*uri': 'str',
 '*channels': [ 'MigrationChannel' ],
 '*blk': { 'type': 'bool', 'features': [ 'deprecated' ] },
 '*inc': { 'type': 'bool', 'features': [ 'deprecated' ] },


Regards,

Het Gala


[PATCH] Make 'uri' optional for migrate QAPI

2024-01-22 Thread Het Gala
'uri' argument should be optional, as 'uri' and 'channels'
arguments are mutally exclusive in nature.

Fixes: 074dbce5fcce (migration: New migrate and
migrate-incoming argument 'channels')
Signed-off-by: Het Gala 
---
 qapi/migration.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index eb2f883513..197d3faa43 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1757,7 +1757,7 @@
 #
 ##
 { 'command': 'migrate',
-  'data': {'uri': 'str',
+  'data': {'*uri': 'str',
'*channels': [ 'MigrationChannel' ],
'*blk': { 'type': 'bool', 'features': [ 'deprecated' ] },
'*inc': { 'type': 'bool', 'features': [ 'deprecated' ] },
-- 
2.22.3




[PATCH 2/8] hw/arm/highbank: Check for CPU types in machine_run_board_init()

2024-01-22 Thread Philippe Mathieu-Daudé
Restrict MachineClass::valid_cpu_types[] to the single
valid CPU types.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/arm/highbank.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c
index e6e27d69af..67677eb651 100644
--- a/hw/arm/highbank.c
+++ b/hw/arm/highbank.c
@@ -344,10 +344,15 @@ static void midway_init(MachineState *machine)
 
 static void highbank_class_init(ObjectClass *oc, void *data)
 {
+static const char * const valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-a9"),
+NULL
+};
 MachineClass *mc = MACHINE_CLASS(oc);
 
 mc->desc = "Calxeda Highbank (ECX-1000)";
 mc->init = highbank_init;
+mc->valid_cpu_types = valid_cpu_types;
 mc->block_default_type = IF_IDE;
 mc->units_per_default_bus = 1;
 mc->max_cpus = 4;
@@ -363,10 +368,15 @@ static const TypeInfo highbank_type = {
 
 static void midway_class_init(ObjectClass *oc, void *data)
 {
+static const char * const valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-a15"),
+NULL
+};
 MachineClass *mc = MACHINE_CLASS(oc);
 
 mc->desc = "Calxeda Midway (ECX-2000)";
 mc->init = midway_init;
+mc->valid_cpu_types = valid_cpu_types;
 mc->block_default_type = IF_IDE;
 mc->units_per_default_bus = 1;
 mc->max_cpus = 4;
-- 
2.41.0




[PATCH 3/8] hw/arm/vexpress: Check for CPU types in machine_run_board_init()

2024-01-22 Thread Philippe Mathieu-Daudé
Restrict MachineClass::valid_cpu_types[] to the single
valid CPU types.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/arm/vexpress.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c
index f1b45245d5..a3561a1b56 100644
--- a/hw/arm/vexpress.c
+++ b/hw/arm/vexpress.c
@@ -783,22 +783,32 @@ static void vexpress_class_init(ObjectClass *oc, void 
*data)
 
 static void vexpress_a9_class_init(ObjectClass *oc, void *data)
 {
+static const char * const valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-a9"),
+NULL
+};
 MachineClass *mc = MACHINE_CLASS(oc);
 VexpressMachineClass *vmc = VEXPRESS_MACHINE_CLASS(oc);
 
 mc->desc = "ARM Versatile Express for Cortex-A9";
 mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a9");
+mc->valid_cpu_types = valid_cpu_types;
 
 vmc->daughterboard = _daughterboard;
 }
 
 static void vexpress_a15_class_init(ObjectClass *oc, void *data)
 {
+static const char * const valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-a15"),
+NULL
+};
 MachineClass *mc = MACHINE_CLASS(oc);
 VexpressMachineClass *vmc = VEXPRESS_MACHINE_CLASS(oc);
 
 mc->desc = "ARM Versatile Express for Cortex-A15";
 mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15");
+mc->valid_cpu_types = valid_cpu_types;
 
 vmc->daughterboard = _daughterboard;
 
-- 
2.41.0




[PATCH 7/8] hw/arm/aspeed/1030: Check for CPU types in machine_run_board_init()

2024-01-22 Thread Philippe Mathieu-Daudé
Restrict MachineClass::valid_cpu_types[] to the single
valid CPU type.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/arm/aspeed.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 393c97d55e..62d08899d8 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -1162,6 +1162,11 @@ static const char * const ast2600_a3_valid_cpu_types[] = 
{
 NULL
 };
 
+static const char * const ast1030_a1_valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-m4"), /* TODO cortex-m4f */
+NULL
+};
+
 static void aspeed_machine_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
@@ -1613,6 +1618,7 @@ static void 
aspeed_minibmc_machine_ast1030_evb_class_init(ObjectClass *oc,
 AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
 
 mc->desc = "Aspeed AST1030 MiniBMC (Cortex-M4)";
+mc->valid_cpu_types = ast1030_a1_valid_cpu_types;
 amc->soc_name = "ast1030-a1";
 amc->hw_strap1 = 0;
 amc->hw_strap2 = 0;
-- 
2.41.0




[PATCH 8/8] hw/arm/zynq: Check for CPU types in machine_run_board_init()

2024-01-22 Thread Philippe Mathieu-Daudé
Restrict MachineClass::valid_cpu_types[] to the single
valid CPU type.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/arm/xilinx_zynq.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/arm/xilinx_zynq.c b/hw/arm/xilinx_zynq.c
index 66d0de139f..6ec65d4780 100644
--- a/hw/arm/xilinx_zynq.c
+++ b/hw/arm/xilinx_zynq.c
@@ -355,6 +355,10 @@ static void zynq_init(MachineState *machine)
 
 static void zynq_machine_class_init(ObjectClass *oc, void *data)
 {
+static const char * const valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-a9"),
+NULL
+};
 MachineClass *mc = MACHINE_CLASS(oc);
 mc->desc = "Xilinx Zynq Platform Baseboard for Cortex-A9";
 mc->init = zynq_init;
@@ -362,6 +366,7 @@ static void zynq_machine_class_init(ObjectClass *oc, void 
*data)
 mc->no_sdcard = 1;
 mc->ignore_memory_transaction_failures = true;
 mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a9");
+mc->valid_cpu_types = valid_cpu_types;
 mc->default_ram_id = "zynq.ext_ram";
 }
 
-- 
2.41.0




[PATCH 0/8] hw/arm/cortex-a: Check for CPU types in machine_run_board_init()

2024-01-22 Thread Philippe Mathieu-Daudé
Following Gavin recent CPU type enforcement cleanups,
restrict more single-CPU ARM machines (here Cortex-A SoC).

Based-on: <20240118200643.29037-1-phi...@linaro.org> (arm-next)

Philippe Mathieu-Daudé (8):
  hw/arm/exynos: Check for CPU types in machine_run_board_init()
  hw/arm/highbank: Check for CPU types in machine_run_board_init()
  hw/arm/vexpress: Check for CPU types in machine_run_board_init()
  hw/arm/aspeed/2400: Check for CPU types in machine_run_board_init()
  hw/arm/aspeed/2500: Check for CPU types in machine_run_board_init()
  hw/arm/aspeed/2600: Check for CPU types in machine_run_board_init()
  hw/arm/aspeed/1030: Check for CPU types in machine_run_board_init()
  hw/arm/zynq: Check for CPU types in machine_run_board_init()

 hw/arm/aspeed.c | 40 
 hw/arm/exynos4_boards.c |  8 
 hw/arm/highbank.c   | 10 ++
 hw/arm/vexpress.c   | 10 ++
 hw/arm/xilinx_zynq.c|  5 +
 5 files changed, 73 insertions(+)

-- 
2.41.0




[PATCH 4/8] hw/arm/aspeed/2400: Check for CPU types in machine_run_board_init()

2024-01-22 Thread Philippe Mathieu-Daudé
Restrict MachineClass::valid_cpu_types[] to the single
valid CPU type.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/arm/aspeed.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index cc59176563..e0e0877b1d 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -1147,6 +1147,11 @@ static int aspeed_soc_num_cpus(const char *soc_name)
return sc->num_cpus;
 }
 
+static const char * const ast2400_a1_valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("arm926"),
+NULL
+};
+
 static void aspeed_machine_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
@@ -1175,6 +1180,7 @@ static void 
aspeed_machine_palmetto_class_init(ObjectClass *oc, void *data)
 amc->spi_model = "mx25l25635f";
 amc->num_cs= 1;
 amc->i2c_init  = palmetto_bmc_i2c_init;
+mc->valid_cpu_types = ast2400_a1_valid_cpu_types;
 mc->default_ram_size   = 256 * MiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1192,6 +1198,7 @@ static void 
aspeed_machine_quanta_q71l_class_init(ObjectClass *oc, void *data)
 amc->spi_model = "mx25l25635e";
 amc->num_cs= 1;
 amc->i2c_init  = quanta_q71l_bmc_i2c_init;
+mc->valid_cpu_types = ast2400_a1_valid_cpu_types;
 mc->default_ram_size   = 128 * MiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1211,6 +1218,7 @@ static void 
aspeed_machine_supermicrox11_bmc_class_init(ObjectClass *oc,
 amc->num_cs= 1;
 amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
 amc->i2c_init  = palmetto_bmc_i2c_init;
+mc->valid_cpu_types = ast2400_a1_valid_cpu_types;
 mc->default_ram_size = 256 * MiB;
 }
 
-- 
2.41.0




[PATCH 5/8] hw/arm/aspeed/2500: Check for CPU types in machine_run_board_init()

2024-01-22 Thread Philippe Mathieu-Daudé
Restrict MachineClass::valid_cpu_types[] to the single
valid CPU type.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/arm/aspeed.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index e0e0877b1d..df627096d2 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -1152,6 +1152,11 @@ static const char * const ast2400_a1_valid_cpu_types[] = 
{
 NULL
 };
 
+static const char * const ast2500_a1_valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("arm1176"),
+NULL
+};
+
 static void aspeed_machine_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
@@ -1236,6 +1241,7 @@ static void 
aspeed_machine_supermicro_x11spi_bmc_class_init(ObjectClass *oc,
 amc->num_cs= 1;
 amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
 amc->i2c_init  = palmetto_bmc_i2c_init;
+mc->valid_cpu_types = ast2500_a1_valid_cpu_types;
 mc->default_ram_size = 512 * MiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1253,6 +1259,7 @@ static void 
aspeed_machine_ast2500_evb_class_init(ObjectClass *oc, void *data)
 amc->spi_model = "mx25l25635f";
 amc->num_cs= 1;
 amc->i2c_init  = ast2500_evb_i2c_init;
+mc->valid_cpu_types = ast2500_a1_valid_cpu_types;
 mc->default_ram_size   = 512 * MiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1271,6 +1278,7 @@ static void 
aspeed_machine_yosemitev2_class_init(ObjectClass *oc, void *data)
 amc->spi_model = "mx25l25635e";
 amc->num_cs= 2;
 amc->i2c_init  = yosemitev2_bmc_i2c_init;
+mc->valid_cpu_types = ast2500_a1_valid_cpu_types;
 mc->default_ram_size   = 512 * MiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1288,6 +1296,7 @@ static void aspeed_machine_romulus_class_init(ObjectClass 
*oc, void *data)
 amc->spi_model = "mx66l1g45g";
 amc->num_cs= 2;
 amc->i2c_init  = romulus_bmc_i2c_init;
+mc->valid_cpu_types = ast2500_a1_valid_cpu_types;
 mc->default_ram_size   = 512 * MiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1306,6 +1315,7 @@ static void 
aspeed_machine_tiogapass_class_init(ObjectClass *oc, void *data)
 amc->spi_model = "mx25l25635e";
 amc->num_cs= 2;
 amc->i2c_init  = tiogapass_bmc_i2c_init;
+mc->valid_cpu_types = ast2500_a1_valid_cpu_types;
 mc->default_ram_size   = 1 * GiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1324,6 +1334,7 @@ static void 
aspeed_machine_sonorapass_class_init(ObjectClass *oc, void *data)
 amc->spi_model = "mx66l1g45g";
 amc->num_cs= 2;
 amc->i2c_init  = sonorapass_bmc_i2c_init;
+mc->valid_cpu_types = ast2500_a1_valid_cpu_types;
 mc->default_ram_size   = 512 * MiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1341,6 +1352,7 @@ static void 
aspeed_machine_witherspoon_class_init(ObjectClass *oc, void *data)
 amc->spi_model = "mx66l1g45g";
 amc->num_cs= 2;
 amc->i2c_init  = witherspoon_bmc_i2c_init;
+mc->valid_cpu_types = ast2500_a1_valid_cpu_types;
 mc->default_ram_size = 512 * MiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1398,6 +1410,7 @@ static void aspeed_machine_g220a_class_init(ObjectClass 
*oc, void *data)
 amc->num_cs= 2;
 amc->macs_mask  = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
 amc->i2c_init  = g220a_bmc_i2c_init;
+mc->valid_cpu_types = ast2500_a1_valid_cpu_types;
 mc->default_ram_size = 1024 * MiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1416,6 +1429,7 @@ static void 
aspeed_machine_fp5280g2_class_init(ObjectClass *oc, void *data)
 amc->num_cs= 2;
 amc->macs_mask  = ASPEED_MAC0_ON | ASPEED_MAC1_ON;
 amc->i2c_init  = fp5280g2_bmc_i2c_init;
+mc->valid_cpu_types = ast2500_a1_valid_cpu_types;
 mc->default_ram_size = 512 * MiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
-- 
2.41.0




[PATCH 6/8] hw/arm/aspeed/2600: Check for CPU types in machine_run_board_init()

2024-01-22 Thread Philippe Mathieu-Daudé
Restrict MachineClass::valid_cpu_types[] to the single
valid CPU type.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/arm/aspeed.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index df627096d2..393c97d55e 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -1157,6 +1157,11 @@ static const char * const ast2500_a1_valid_cpu_types[] = 
{
 NULL
 };
 
+static const char * const ast2600_a3_valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-a9"),
+NULL
+};
+
 static void aspeed_machine_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
@@ -1373,6 +1378,7 @@ static void 
aspeed_machine_ast2600_evb_class_init(ObjectClass *oc, void *data)
 amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON | ASPEED_MAC2_ON |
  ASPEED_MAC3_ON;
 amc->i2c_init  = ast2600_evb_i2c_init;
+mc->valid_cpu_types = ast2600_a3_valid_cpu_types;
 mc->default_ram_size = 1 * GiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1392,6 +1398,7 @@ static void aspeed_machine_tacoma_class_init(ObjectClass 
*oc, void *data)
 amc->num_cs= 2;
 amc->macs_mask  = ASPEED_MAC2_ON;
 amc->i2c_init  = witherspoon_bmc_i2c_init; /* Same board layout */
+mc->valid_cpu_types = ast2600_a3_valid_cpu_types;
 mc->default_ram_size = 1 * GiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1449,6 +1456,7 @@ static void aspeed_machine_rainier_class_init(ObjectClass 
*oc, void *data)
 amc->num_cs= 2;
 amc->macs_mask  = ASPEED_MAC2_ON | ASPEED_MAC3_ON;
 amc->i2c_init  = rainier_bmc_i2c_init;
+mc->valid_cpu_types = ast2600_a3_valid_cpu_types;
 mc->default_ram_size = 1 * GiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1471,6 +1479,7 @@ static void aspeed_machine_fuji_class_init(ObjectClass 
*oc, void *data)
 amc->macs_mask = ASPEED_MAC3_ON;
 amc->i2c_init = fuji_bmc_i2c_init;
 amc->uart_default = ASPEED_DEV_UART1;
+mc->valid_cpu_types = ast2600_a3_valid_cpu_types;
 mc->default_ram_size = FUJI_BMC_RAM_SIZE;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1492,6 +1501,7 @@ static void 
aspeed_machine_bletchley_class_init(ObjectClass *oc, void *data)
 amc->num_cs= 2;
 amc->macs_mask = ASPEED_MAC2_ON;
 amc->i2c_init  = bletchley_bmc_i2c_init;
+mc->valid_cpu_types = ast2600_a3_valid_cpu_types;
 mc->default_ram_size = BLETCHLEY_BMC_RAM_SIZE;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1631,6 +1641,7 @@ static void 
aspeed_machine_qcom_dc_scm_v1_class_init(ObjectClass *oc,
 amc->num_cs= 2;
 amc->macs_mask = ASPEED_MAC2_ON | ASPEED_MAC3_ON;
 amc->i2c_init  = qcom_dc_scm_bmc_i2c_init;
+mc->valid_cpu_types = ast2600_a3_valid_cpu_types;
 mc->default_ram_size = 1 * GiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
@@ -1651,6 +1662,7 @@ static void 
aspeed_machine_qcom_firework_class_init(ObjectClass *oc,
 amc->num_cs= 2;
 amc->macs_mask = ASPEED_MAC2_ON | ASPEED_MAC3_ON;
 amc->i2c_init  = qcom_dc_scm_firework_i2c_init;
+mc->valid_cpu_types = ast2600_a3_valid_cpu_types;
 mc->default_ram_size = 1 * GiB;
 mc->default_cpus = mc->min_cpus = mc->max_cpus =
 aspeed_soc_num_cpus(amc->soc_name);
-- 
2.41.0




[PATCH 1/8] hw/arm/exynos: Check for CPU types in machine_run_board_init()

2024-01-22 Thread Philippe Mathieu-Daudé
Restrict MachineClass::valid_cpu_types[] to the single
valid CPU type.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/arm/exynos4_boards.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/hw/arm/exynos4_boards.c b/hw/arm/exynos4_boards.c
index b0e13eb4f0..01c7618a67 100644
--- a/hw/arm/exynos4_boards.c
+++ b/hw/arm/exynos4_boards.c
@@ -34,6 +34,7 @@
 #include "hw/qdev-properties.h"
 #include "hw/boards.h"
 #include "hw/irq.h"
+#include "target/arm/cpu-qom.h"
 
 #define SMDK_LAN9118_BASE_ADDR  0x0500
 
@@ -150,12 +151,18 @@ static void smdkc210_init(MachineState *machine)
 arm_load_kernel(s->soc.cpu[0], machine, _board_binfo);
 }
 
+static const char * const valid_cpu_types[] = {
+ARM_CPU_TYPE_NAME("cortex-a9"),
+NULL
+};
+
 static void nuri_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
 
 mc->desc = "Samsung NURI board (Exynos4210)";
 mc->init = nuri_init;
+mc->valid_cpu_types = valid_cpu_types;
 mc->max_cpus = EXYNOS4210_NCPUS;
 mc->min_cpus = EXYNOS4210_NCPUS;
 mc->default_cpus = EXYNOS4210_NCPUS;
@@ -174,6 +181,7 @@ static void smdkc210_class_init(ObjectClass *oc, void *data)
 
 mc->desc = "Samsung SMDKC210 board (Exynos4210)";
 mc->init = smdkc210_init;
+mc->valid_cpu_types = valid_cpu_types;
 mc->max_cpus = EXYNOS4210_NCPUS;
 mc->min_cpus = EXYNOS4210_NCPUS;
 mc->default_cpus = EXYNOS4210_NCPUS;
-- 
2.41.0




RE: [PATCH rfcv1 2/6] hw/pci: introduce pci_device_set/unset_iommu_device()

2024-01-22 Thread Duan, Zhenzhong


>-Original Message-
>From: Cédric Le Goater 
>Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce
>pci_device_set/unset_iommu_device()
>
>On 1/15/24 11:13, Zhenzhong Duan wrote:
>> From: Yi Liu 
>>
>> This adds pci_device_set/unset_iommu_device() to set/unset
>> IOMMUFDDevice for a given PCIe device. Caller of set
>> should fail if set operation fails.
>>
>> Extract out pci_device_get_iommu_bus_devfn() to facilitate
>> implementation of pci_device_set/unset_iommu_device().
>>
>> Signed-off-by: Yi Liu 
>> Signed-off-by: Yi Sun 
>> Signed-off-by: Nicolin Chen 
>> Signed-off-by: Zhenzhong Duan 
>> ---
>>   include/hw/pci/pci.h | 39 ++-
>>   hw/pci/pci.c | 49
>+++-
>>   2 files changed, 86 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index fa6313aabc..a810c0ec74 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -7,6 +7,8 @@
>>   /* PCI includes legacy ISA access.  */
>>   #include "hw/isa/isa.h"
>>
>> +#include "sysemu/iommufd_device.h"
>> +
>>   extern bool pci_available;
>>
>>   /* PCI bus */
>> @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps {
>>*
>>* @devfn: device and function number
>>*/
>> -   AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int
>devfn);
>> +AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int
>devfn);
>> +/**
>> + * @set_iommu_device: set iommufd device for a PCI device to
>vIOMMU
>> + *
>> + * Optional callback, if not implemented in vIOMMU, then vIOMMU
>can't
>> + * utilize iommufd specific features.
>> + *
>> + * Return true if iommufd device is accepted, or else return false with
>> + * errp set.
>> + *
>> + * @bus: the #PCIBus of the PCI device.
>> + *
>> + * @opaque: the data passed to pci_setup_iommu().
>> + *
>> + * @devfn: device and function number of the PCI device.
>> + *
>> + * @idev: the data structure representing iommufd device.
>> + *
>> + */
>> +int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn,
>> +IOMMUFDDevice *idev, Error **errp);
>> +/**
>> + * @unset_iommu_device: unset iommufd device for a PCI device from
>vIOMMU
>> + *
>> + * Optional callback.
>> + *
>> + * @bus: the #PCIBus of the PCI device.
>> + *
>> + * @opaque: the data passed to pci_setup_iommu().
>> + *
>> + * @devfn: device and function number of the PCI device.
>> + */
>> +void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t
>devfn);
>>   } PCIIOMMUOps;
>>
>>   AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice
>*idev,
>> +Error **errp);
>> +void pci_device_unset_iommu_device(PCIDevice *dev);
>>
>>   /**
>>* pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index 76080af580..3848662f95 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -2672,7 +2672,10 @@ static void
>pci_device_class_base_init(ObjectClass *klass, void *data)
>>   }
>>   }
>>
>> -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>> +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
>> +   PCIBus **aliased_pbus,
>> +   PCIBus **piommu_bus,
>> +   uint8_t *aliased_pdevfn)
>>   {
>>   PCIBus *bus = pci_get_bus(dev);
>>   PCIBus *iommu_bus = bus;
>> @@ -2717,6 +2720,18 @@ AddressSpace
>*pci_device_iommu_address_space(PCIDevice *dev)
>>
>>   iommu_bus = parent_bus;
>>   }
>> +*aliased_pbus = bus;
>> +*piommu_bus = iommu_bus;
>> +*aliased_pdevfn = devfn;
>> +}
>> +
>> +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>> +{
>> +PCIBus *bus;
>> +PCIBus *iommu_bus;
>> +uint8_t devfn;
>> +
>> +pci_device_get_iommu_bus_devfn(dev, , _bus, );
>>   if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
>>   return iommu_bus->iommu_ops->get_address_space(bus,
>>iommu_bus->iommu_opaque, devfn);
>> @@ -2724,6 +2739,38 @@ AddressSpace
>*pci_device_iommu_address_space(PCIDevice *dev)
>>   return _space_memory;
>>   }
>>
>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice
>*idev,
>> +Error **errp)
>> +{
>> +PCIBus *bus;
>> +PCIBus *iommu_bus;
>> +uint8_t devfn;
>> +
>> +pci_device_get_iommu_bus_devfn(dev, , _bus, );
>> +if (!pci_bus_bypass_iommu(bus) && iommu_bus &&
>
>Why do we test iommu_bus in pci_device_un/set_iommu_device routines
>and
>not in pci_device_iommu_address_space() ?

iommu_bus check in pci_device_iommu_address_space() is dropped in
below commit, I 

Re: [PATCH 3/5] util/uri: Remove the uri_string_escape() function

2024-01-22 Thread Thomas Huth

On 22/01/2024 21.59, Stefan Weil wrote:

Am 22.01.24 um 20:17 schrieb Thomas Huth:


It is not used in QEMU - and if somebody needs this functionality,
they can simply use g_uri_escape_string() from the glib instead.

Signed-off-by: Thomas Huth 
---
  include/qemu/uri.h |  1 -
  util/uri.c | 64 --
  2 files changed, 65 deletions(-)



The removed function is used in util/uri.c, so this patch breaks the 
compilation.


That can be fixed by applying patch 4 before this one.

With that re-ordering you may add my signature:

Reviewed-by: Stefan Weil 


D'oh, I originally developed the patches the other way round indeed, but 
then thought it would be nicer for review this way and swapped the order 
without checking :-( ... I'll swap it back again.


Thanks!

 Thomas




Re: [PATCH 2/5] util/uri: Simplify uri_string_unescape()

2024-01-22 Thread Thomas Huth

On 22/01/2024 22.22, Stefan Weil wrote:

Am 22.01.24 um 20:17 schrieb Thomas Huth:


uri_string_unescape() basically does the same as the glib function
g_uri_unescape_string(), with just an additional length parameter.
So we can simplify this function a lot by limiting the length with
g_strndup() first and then by calling g_uri_unescape_string() instead
of walking through the string manually.

Suggested-by: Stefan Weil


Can my e-mail address be replaced by another one (s...@weilnetz.de)?


Sure! ... not sure where I copy-n-pasted the other one from ... sorry for that.


@@ -1585,8 +1576,7 @@ static int is_hex(char c)
   */
  char *uri_string_unescape(const char *str, int len)
  {
-char *ret, *out;
-const char *in;
+g_autofree char *lstr = NULL;



Is it necessary to assign NULL? It does not look so.


Yes, it's necessary for the early "return NULL" statement below. Since it's 
an g_autofree variable, it must either be set to a valid allocated buffer or 
NULL before returning.


  
  if (str == NULL) {

  return NULL;
@@ -1594,42 +1584,9 @@ char *uri_string_unescape(const char *str, int len)
  if (len <= 0) {
  len = strlen(str);
  }
-if (len < 0) {
-return NULL;
-}
-
-ret = g_malloc(len + 1);
+lstr = g_strndup(str, len);
  
-in = str;

-out = ret;
-while (len > 0) {
-if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
-in++;
-if ((*in >= '0') && (*in <= '9')) {
-*out = (*in - '0');
-} else if ((*in >= 'a') && (*in <= 'f')) {
-*out = (*in - 'a') + 10;
-} else if ((*in >= 'A') && (*in <= 'F')) {
-*out = (*in - 'A') + 10;
-}
-in++;
-if ((*in >= '0') && (*in <= '9')) {
-*out = *out * 16 + (*in - '0');
-} else if ((*in >= 'a') && (*in <= 'f')) {
-*out = *out * 16 + (*in - 'a') + 10;
-} else if ((*in >= 'A') && (*in <= 'F')) {
-*out = *out * 16 + (*in - 'A') + 10;
-}
-in++;
-len -= 3;
-out++;
-} else {
-*out++ = *in++;
-len--;
-}
-}
-*out = 0;
-return ret;
+return g_uri_unescape_string(lstr, NULL);
  }
  
  /**



Thank you.

Reviewed-by: Stefan Weil 


Thanks!

 Thomas





RE: [PATCH rfcv1 1/6] backends/iommufd_device: introduce IOMMUFDDevice

2024-01-22 Thread Duan, Zhenzhong


>-Original Message-
>From: Cédric Le Goater 
>Subject: Re: [PATCH rfcv1 1/6] backends/iommufd_device: introduce
>IOMMUFDDevice
>
>On 1/19/24 08:31, Duan, Zhenzhong wrote:
>>
>>
>>> -Original Message-
>>> From: Eric Auger 
>>> Subject: Re: [PATCH rfcv1 1/6] backends/iommufd_device: introduce
>>> IOMMUFDDevice
>>>
>>>
>>>
>>> On 1/15/24 11:13, Zhenzhong Duan wrote:
 IOMMUFDDevice represents a device in iommufd and can be used as
 a communication interface between devices (i.e., VFIO, VDPA) and
 vIOMMU.

 Currently it includes iommufd handler and device id information
 which could be used by vIOMMU to get hw IOMMU information.

 In future nested translation support, vIOMMU is going to have
 more iommufd related operations like allocate hwpt for a device,
 attach/detach hwpt, etc. So IOMMUFDDevice will be further expanded.

 IOMMUFDDevice is willingly not a QOM object because we don't want
 it to be visible from the user interface.

 Introduce a helper iommufd_device_init to initialize IOMMUFDDevice.

 Originally-by: Yi Liu 
 Signed-off-by: Yi Sun 
 Signed-off-by: Zhenzhong Duan 
 ---
   MAINTAINERS |  4 +--
   include/sysemu/iommufd_device.h | 31 
   backends/iommufd_device.c   | 50
>>> +
>>> Maybe it is still time to move the iommufd files in a sepate dir, under
>>> hw at the same level as vfio.
>>>
>>> Thoughts?
>>
>> Any reason for the movement? Hw dir contains entries to emulate
>different
>> Devices. Iommufd is not a real device. It's more a backend.
>
>I would include the new services in the existing iommufd .[ch] files.
>No need for a new file since the changes are all related to the IOMMUFD
>device usage.

Make sense, will do.

Thanks
Zhenzhong


Re: [PATCH v3 0/2] riscv: support new isa extension detection devicetree properties

2024-01-22 Thread Alistair Francis
On Mon, Jan 22, 2024 at 10:25 PM Conor Dooley  wrote:
>
> On Mon, Jan 22, 2024 at 03:24:19PM +1000, Alistair Francis wrote:
> > On Wed, Jan 10, 2024 at 8:27 PM Conor Dooley  wrote:
> > >
> > > From: Conor Dooley 
> > >
> > > Making it a series to keep the standalone change to riscv_isa_string()
> > > that Drew reported separate.
> > >
> > > Changes in v3:
> > > - g_free() isa_extensions too
> > > - use misa_mxl_max rather than the compile target for the base isa
> > > - add a new patch changing riscv_isa_string() to do the same
> > > - drop a null check that cannot be null
> > > - rebased on top of Alistair's next branch
> >
> > Do you mind rebasing on
> > https://github.com/alistair23/qemu/tree/riscv-to-apply.next again?
> > There was a big re-org recently so lots of rebasing is required
>
> I can, sure. Do you want me to introduce the macro that I mentioned in
> the first patch as a helper for misa_mxl_max -> width conversions when I
> do?

Yes please!

Alistair

>
> Thanks,
> Conor.



Re: [PATCH 5/5] util/uri: Remove unused macros ISA_RESERVED() and ISA_GEN_DELIM()

2024-01-22 Thread Philippe Mathieu-Daudé

On 22/1/24 20:17, Thomas Huth wrote:

They are not used anywhere, so there's no need to keep them around.

Signed-off-by: Thomas Huth 
---
  util/uri.c | 13 -
  1 file changed, 13 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé 

(could be reordered as patch 1 to clarify it is not
 an effect of the uri_string_foo() cleanups)



Re: [PATCH v3 01/21] hw/riscv: Use misa_mxl instead of misa_mxl_max

2024-01-22 Thread Alistair Francis
On Tue, Jan 23, 2024 at 12:57 AM Alex Bennée  wrote:
>
> From: Akihiko Odaki 
>
> The effective MXL value matters when booting.
>
> Signed-off-by: Akihiko Odaki 
> Message-Id: <20240103173349.398526-23-alex.ben...@linaro.org>
> Message-Id: <20231213-riscv-v7-1-a760156a3...@daynix.com>
> Signed-off-by: Alex Bennée 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/riscv/boot.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
> index 0ffca05189f..bc67c0bd189 100644
> --- a/hw/riscv/boot.c
> +++ b/hw/riscv/boot.c
> @@ -36,7 +36,7 @@
>
>  bool riscv_is_32bit(RISCVHartArrayState *harts)
>  {
> -return harts->harts[0].env.misa_mxl_max == MXL_RV32;
> +return harts->harts[0].env.misa_mxl == MXL_RV32;
>  }
>
>  /*
> --
> 2.39.2
>
>



Re: [PATCH v4 00/13] target/riscv: add 'cpu->cfg.vlenb', remove

2024-01-22 Thread Alistair Francis
On Tue, Jan 23, 2024 at 2:11 AM Daniel Henrique Barboza
 wrote:
>
> Hi,
>
> This new version is rebased with a newer riscv-to-apply.next branch
> (@ 096b6b0729). No other changes made.
>
> All patches acked.
>
> v4 link: 
> https://lore.kernel.org/qemu-riscv/20240116205817.344178-1-dbarb...@ventanamicro.com/
>
> Daniel Henrique Barboza (13):
>   target/riscv: add 'vlenb' field in cpu->cfg
>   target/riscv/csr.c: use 'vlenb' instead of 'vlen'
>   target/riscv/gdbstub.c: use 'vlenb' instead of shifting 'vlen'
>   target/riscv/insn_trans/trans_rvbf16.c.inc: use cpu->cfg.vlenb
>   target/riscv/insn_trans/trans_rvv.c.inc: use 'vlenb'
>   target/riscv/insn_trans/trans_rvvk.c.inc: use 'vlenb'
>   target/riscv/vector_helper.c: use 'vlenb'
>   target/riscv/vector_helper.c: use vlenb in HELPER(vsetvl)
>   target/riscv/insn_trans/trans_rvv.c.inc: use 'vlenb' in MAXSZ()
>   target/riscv/cpu.h: use 'vlenb' in vext_get_vlmax()
>   target/riscv: change vext_get_vlmax() arguments
>   trans_rvv.c.inc: use vext_get_vlmax() in trans_vrgather_v*()
>   target/riscv/cpu.c: remove cpu->cfg.vlen

Thanks!

Applied to riscv-to-apply.next

Alistair

>
>  target/riscv/cpu.c |  12 +-
>  target/riscv/cpu.h |  14 +-
>  target/riscv/cpu_cfg.h |   2 +-
>  target/riscv/cpu_helper.c  |  11 +-
>  target/riscv/csr.c |   4 +-
>  target/riscv/gdbstub.c |   6 +-
>  target/riscv/insn_trans/trans_rvbf16.c.inc |  12 +-
>  target/riscv/insn_trans/trans_rvv.c.inc| 152 ++---
>  target/riscv/insn_trans/trans_rvvk.c.inc   |  16 +--
>  target/riscv/tcg/tcg-cpu.c |   4 +-
>  target/riscv/vector_helper.c   |  43 +++---
>  11 files changed, 148 insertions(+), 128 deletions(-)
>
> --
> 2.43.0
>
>



Re: [PATCH 1/5] util/uri: Remove the unused "target" argument from uri_string_unescape()

2024-01-22 Thread Philippe Mathieu-Daudé

On 22/1/24 20:17, Thomas Huth wrote:

All callers pass NULL as target, so we can simplify the code by
dropping this parameter.

Signed-off-by: Thomas Huth 
---
  include/qemu/uri.h |  2 +-
  util/uri.c | 32 ++--
  2 files changed, 15 insertions(+), 19 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH 6/7] hw/riscv/virt.c: use g_autofree in virt_machine_init()

2024-01-22 Thread Philippe Mathieu-Daudé

On 22/1/24 23:15, Daniel Henrique Barboza wrote:

Move 'soc_name' to the loop, and give it g_autofree, to avoid the manual
g_free().

Signed-off-by: Daniel Henrique Barboza 
---
  hw/riscv/virt.c | 5 ++---
  1 file changed, 2 insertions(+), 3 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH 5/7] hw/riscv/virt.c: use g_autofree in create_fdt_virtio()

2024-01-22 Thread Philippe Mathieu-Daudé

On 22/1/24 23:15, Daniel Henrique Barboza wrote:

Put 'name' declaration inside the loop, with g_autofree, to avoid
manually doing g_free() in each iteration.

Signed-off-by: Daniel Henrique Barboza 
---
  hw/riscv/virt.c | 5 ++---
  1 file changed, 2 insertions(+), 3 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH 3/7] hw/riscv/virt.c: use g_autofree in create_fdt_socket_cpus()

2024-01-22 Thread Philippe Mathieu-Daudé

On 22/1/24 23:15, Daniel Henrique Barboza wrote:

Move all char pointers to the loop. Use g_autofree in all of them to
avoid the g_free() calls.

Signed-off-by: Daniel Henrique Barboza 
---
  hw/riscv/virt.c | 12 +---
  1 file changed, 5 insertions(+), 7 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH v3 37/38] tcg/s390x: Support TCG_COND_TST{EQ,NE}

2024-01-22 Thread Philippe Mathieu-Daudé

+Ilya/David

On 10/1/24 23:44, Richard Henderson wrote:

Signed-off-by: Richard Henderson 
---
  tcg/s390x/tcg-target.h |   2 +-
  tcg/s390x/tcg-target.c.inc | 139 +
  2 files changed, 97 insertions(+), 44 deletions(-)

diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 53bed8c8d2..ae448c3a3a 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -138,7 +138,7 @@ extern uint64_t s390_facilities[3];
  
  #define TCG_TARGET_HAS_qemu_ldst_i128 1
  
-#define TCG_TARGET_HAS_tst0

+#define TCG_TARGET_HAS_tst1
  
  #define TCG_TARGET_HAS_v64HAVE_FACILITY(VECTOR)

  #define TCG_TARGET_HAS_v128   HAVE_FACILITY(VECTOR)
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 86ec737768..cb1693c9cf 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -112,6 +112,9 @@ typedef enum S390Opcode {
  RI_OILH = 0xa50a,
  RI_OILL = 0xa50b,
  RI_TMLL = 0xa701,
+RI_TMLH = 0xa700,
+RI_TMHL = 0xa703,
+RI_TMHH = 0xa702,
  
  RIEb_CGRJ= 0xec64,

  RIEb_CLGRJ   = 0xec65,
@@ -404,10 +407,15 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind 
kind, int slot)
  #define S390_CC_NEVER   0
  #define S390_CC_ALWAYS  15
  
+#define S390_TM_EQ  8  /* CC == 0 */

+#define S390_TM_NE  7  /* CC in {1,2,3} */
+
  /* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
-static const uint8_t tcg_cond_to_s390_cond[] = {
+static const uint8_t tcg_cond_to_s390_cond[16] = {
  [TCG_COND_EQ]  = S390_CC_EQ,
  [TCG_COND_NE]  = S390_CC_NE,
+[TCG_COND_TSTEQ] = S390_CC_EQ,
+[TCG_COND_TSTNE] = S390_CC_NE,
  [TCG_COND_LT]  = S390_CC_LT,
  [TCG_COND_LE]  = S390_CC_LE,
  [TCG_COND_GT]  = S390_CC_GT,
@@ -421,9 +429,11 @@ static const uint8_t tcg_cond_to_s390_cond[] = {
  /* Condition codes that result from a LOAD AND TEST.  Here, we have no
 unsigned instruction variation, however since the test is vs zero we
 can re-map the outcomes appropriately.  */
-static const uint8_t tcg_cond_to_ltr_cond[] = {
+static const uint8_t tcg_cond_to_ltr_cond[16] = {
  [TCG_COND_EQ]  = S390_CC_EQ,
  [TCG_COND_NE]  = S390_CC_NE,
+[TCG_COND_TSTEQ] = S390_CC_ALWAYS,
+[TCG_COND_TSTNE] = S390_CC_NEVER,
  [TCG_COND_LT]  = S390_CC_LT,
  [TCG_COND_LE]  = S390_CC_LE,
  [TCG_COND_GT]  = S390_CC_GT,
@@ -542,10 +552,13 @@ static bool risbg_mask(uint64_t c)
  static bool tcg_target_const_match(int64_t val, int ct,
 TCGType type, TCGCond cond, int vece)
  {
+uint64_t uval = val;
+
  if (ct & TCG_CT_CONST) {
  return true;
  }
  if (type == TCG_TYPE_I32) {
+uval = (uint32_t)val;
  val = (int32_t)val;
  }
  
@@ -567,6 +580,15 @@ static bool tcg_target_const_match(int64_t val, int ct,

  case TCG_COND_GTU:
  ct |= TCG_CT_CONST_U32;  /* CLGFI */
  break;
+case TCG_COND_TSTNE:
+case TCG_COND_TSTEQ:
+if (is_const_p16(uval) >= 0) {
+return true;  /* TMxx */
+}
+if (risbg_mask(uval)) {
+return true;  /* RISBG */
+}
+break;
  default:
  g_assert_not_reached();
  }
@@ -588,10 +610,6 @@ static bool tcg_target_const_match(int64_t val, int ct,
  if (ct & TCG_CT_CONST_INV) {
  val = ~val;
  }
-/*
- * Note that is_const_p16 is a subset of is_const_p32,
- * so we don't need both constraints.
- */
  if ((ct & TCG_CT_CONST_P32) && is_const_p32(val) >= 0) {
  return true;
  }
@@ -868,6 +886,9 @@ static const S390Opcode oi_insns[4] = {
  static const S390Opcode lif_insns[2] = {
  RIL_LLILF, RIL_LLIHF,
  };
+static const S390Opcode tm_insns[4] = {
+RI_TMLL, RI_TMLH, RI_TMHL, RI_TMHH
+};
  
  /* load a register with an immediate value */

  static void tcg_out_movi(TCGContext *s, TCGType type,
@@ -1228,6 +1249,36 @@ static int tgen_cmp2(TCGContext *s, TCGType type, 
TCGCond c, TCGReg r1,
  TCGCond inv_c = tcg_invert_cond(c);
  S390Opcode op;
  
+if (is_tst_cond(c)) {

+tcg_debug_assert(!need_carry);
+
+if (!c2const) {
+if (type == TCG_TYPE_I32) {
+tcg_out_insn(s, RRFa, NRK, TCG_REG_R0, r1, c2);
+} else {
+tcg_out_insn(s, RRFa, NGRK, TCG_REG_R0, r1, c2);
+}
+goto exit;
+}
+
+if (type == TCG_TYPE_I32) {
+c2 = (uint32_t)c2;
+}
+
+int i = is_const_p16(c2);
+if (i >= 0) {
+tcg_out_insn_RI(s, tm_insns[i], r1, c2 >> (i * 16));
+*inv_cc = TCG_COND_TSTEQ ? S390_TM_NE : S390_TM_EQ;
+return *inv_cc ^ 15;
+}
+
+if (risbg_mask(c2)) {
+tgen_andi_risbg(s, TCG_REG_R0, r1, c2);
+goto exit;
+ 

[PATCH v4] qemu-img: Fix Column Width and Improve Formatting in snapshot list

2024-01-22 Thread Abhiram Tilak
When running the command `qemu-img snapshot -l SNAPSHOT` the output of
VM_CLOCK (measures the offset between host and VM clock) cannot to
accommodate values in the order of thousands (4-digit).

This line [1] hints on the problem. Additionally, the column width for
the VM_CLOCK field was reduced from 15 to 13 spaces in commit b39847a5
in line [2], resulting in a shortage of space.

[1]:
https://gitlab.com/qemu-project/qemu/-/blob/master/block/qapi.c?ref_type=heads#L753
[2]:
https://gitlab.com/qemu-project/qemu/-/blob/master/block/qapi.c?ref_type=heads#L763

This patch restores the column width to 15 spaces and makes adjustments
to the affected iotests accordingly. Furthermore, addresses a potential
source
of confusion by removing whitespace in column headers. Example, VM CLOCK
is modified to VM_CLOCK. Additionally a '--' symbol is introduced when
ICOUNT returns no output for clarity.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2062
Fixes: b39847a50553 (migration: introduce icount field for snapshots )
Signed-off-by: Abhiram Tilak 
---
 v4:
* Fit the column width within 80 characters by shrinking 'ID' field
* Add spaces between both fields and values for better spacing and
   printing
 v3:
* Make a patch by avoid changing the .patch file
 v2:
* Change email provider to 'gmail' to avoid auto-wrapping patches
* Modify iotests for file 'qcow2-internal-snapshots.out'

 block/qapi.c  | 10 ++--
 tests/qemu-iotests/176.out| 16 +++
 tests/qemu-iotests/267.out| 48 +--
 .../tests/qcow2-internal-snapshots.out| 14 +++---
 4 files changed, 45 insertions(+), 43 deletions(-)

diff --git a/block/qapi.c b/block/qapi.c
index 9e806fa230..5f2182c406 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -742,15 +742,15 @@ void bdrv_snapshot_dump(QEMUSnapshotInfo *sn)
 char *sizing = NULL;
 
 if (!sn) {
-qemu_printf("%-10s%-17s%8s%20s%13s%11s",
-"ID", "TAG", "VM SIZE", "DATE", "VM CLOCK", "ICOUNT");
+qemu_printf("%-7s %-16s %8s %19s %15s %10s",
+"ID", "TAG", "VM_SIZE", "DATE", "VM_CLOCK", "ICOUNT");
 } else {
 g_autoptr(GDateTime) date = 
g_date_time_new_from_unix_local(sn->date_sec);
 g_autofree char *date_buf = g_date_time_format(date, "%Y-%m-%d 
%H:%M:%S");
 
 secs = sn->vm_clock_nsec / 10;
 snprintf(clock_buf, sizeof(clock_buf),
- "%02d:%02d:%02d.%03d",
+ "%04d:%02d:%02d.%03d",
  (int)(secs / 3600),
  (int)((secs / 60) % 60),
  (int)(secs % 60),
@@ -759,8 +759,10 @@ void bdrv_snapshot_dump(QEMUSnapshotInfo *sn)
 if (sn->icount != -1ULL) {
 snprintf(icount_buf, sizeof(icount_buf),
 "%"PRId64, sn->icount);
+} else {
+snprintf(icount_buf, sizeof(icount_buf), "--");
 }
-qemu_printf("%-9s %-16s %8s%20s%13s%11s",
+qemu_printf("%-7s %-16s %8s %19s %15s %10s",
 sn->id_str, sn->name,
 sizing,
 date_buf,
diff --git a/tests/qemu-iotests/176.out b/tests/qemu-iotests/176.out
index 9d09b60452..dce1ca0446 100644
--- a/tests/qemu-iotests/176.out
+++ b/tests/qemu-iotests/176.out
@@ -37,8 +37,8 @@ Offset  Length  File
 0x7ffe  0x2 TEST_DIR/t.IMGFMT.itmd
 0x8340  0x200   TEST_DIR/t.IMGFMT.itmd
 Snapshot list:
-IDTAG
-1 snap
+ID  TAG
+1   snap
 
 === Test pass snapshot.1 ===
 
@@ -78,8 +78,8 @@ Offset  Length  File
 0x7fff  0x1 TEST_DIR/t.IMGFMT
 0x8340  0x200   TEST_DIR/t.IMGFMT
 Snapshot list:
-IDTAG
-1 snap
+ID  TAG
+1   snap
 
 === Test pass snapshot.2 ===
 
@@ -119,8 +119,8 @@ Offset  Length  File
 0x7fff  0x1 TEST_DIR/t.IMGFMT
 0x8340  0x200   TEST_DIR/t.IMGFMT
 Snapshot list:
-IDTAG
-1 snap
+ID  TAG
+1   snap
 
 === Test pass snapshot.3 ===
 
@@ -157,8 +157,8 @@ Offset  Length  File
 0x7fff  0x1 TEST_DIR/t.IMGFMT
 0x8340  0x200   TEST_DIR/t.IMGFMT
 Snapshot list:
-IDTAG
-1 snap
+ID  TAG
+1   snap
 
 === Test pass bitmap.0 ===
 
diff --git a/tests/qemu-iotests/267.out b/tests/qemu-iotests/267.out
index 7176e376e1..f6f5d8715a 100644
--- a/tests/qemu-iotests/267.out
+++ b/tests/qemu-iotests/267.out
@@ -33,8 +33,8 @@ QEMU X.Y.Z monitor - type 'help' for more information
 (qemu) savevm snap0
 (qemu) info snapshots
 List of snapshots present on all disks:
-IDTAG   VM SIZEDATE VM CLOCK ICOUNT
---snap0SIZE -mm-dd hh:mm:ss 00:00:00.000   
+ID  TAG   VM_SIZEDATEVM_CLOCK 

Re: [External] Re: [PATCH v2 04/20] So we use multifd to transmit zero pages.

2024-01-22 Thread Hao Xiang
On Thu, Nov 16, 2023 at 7:14 AM Fabiano Rosas  wrote:
>
> Hao Xiang  writes:
>
> > From: Juan Quintela 
> >
> > Signed-off-by: Juan Quintela 
> > Reviewed-by: Leonardo Bras 
> > ---
> >  migration/multifd.c |  7 ---
> >  migration/options.c | 13 +++--
> >  migration/ram.c | 45 ++---
> >  qapi/migration.json |  1 -
> >  4 files changed, 49 insertions(+), 17 deletions(-)
> >
> > diff --git a/migration/multifd.c b/migration/multifd.c
> > index 1b994790d5..1198ffde9c 100644
> > --- a/migration/multifd.c
> > +++ b/migration/multifd.c
> > @@ -13,6 +13,7 @@
> >  #include "qemu/osdep.h"
> >  #include "qemu/cutils.h"
> >  #include "qemu/rcu.h"
> > +#include "qemu/cutils.h"
> >  #include "exec/target_page.h"
> >  #include "sysemu/sysemu.h"
> >  #include "exec/ramblock.h"
> > @@ -459,7 +460,6 @@ static int multifd_send_pages(QEMUFile *f)
> >  p->packet_num = multifd_send_state->packet_num++;
> >  multifd_send_state->pages = p->pages;
> >  p->pages = pages;
> > -
> >  qemu_mutex_unlock(>mutex);
> >  qemu_sem_post(>sem);
> >
> > @@ -684,7 +684,7 @@ static void *multifd_send_thread(void *opaque)
> >  MigrationThread *thread = NULL;
> >  Error *local_err = NULL;
> >  /* qemu older than 8.2 don't understand zero page on multifd channel */
> > -bool use_zero_page = !migrate_use_main_zero_page();
> > +bool use_multifd_zero_page = !migrate_use_main_zero_page();
> >  int ret = 0;
> >  bool use_zero_copy_send = migrate_zero_copy_send();
> >
> > @@ -713,6 +713,7 @@ static void *multifd_send_thread(void *opaque)
> >  RAMBlock *rb = p->pages->block;
> >  uint64_t packet_num = p->packet_num;
> >  uint32_t flags;
> > +
> >  p->normal_num = 0;
> >  p->zero_num = 0;
> >
> > @@ -724,7 +725,7 @@ static void *multifd_send_thread(void *opaque)
> >
> >  for (int i = 0; i < p->pages->num; i++) {
> >  uint64_t offset = p->pages->offset[i];
> > -if (use_zero_page &&
> > +if (use_multifd_zero_page &&
>
> We could have a new function in multifd_ops for zero page
> handling. We're already considering an accelerator for the compression
> method in the other series[1] and in this series we're adding an
> accelerator for zero page checking. It's about time we make the
> multifd_ops generic instead of only compression/no compression.

Sorry I overlooked this email earlier.
I will extend the multifd_ops interface and add a new API for zero
page checking.

>
> 1- [PATCH v2 0/4] Live Migration Acceleration with IAA Compression
> https://lore.kernel.org/r/20231109154638.488213-1-yuan1@intel.com
>
> >  buffer_is_zero(rb->host + offset, p->page_size)) {
> >  p->zero[p->zero_num] = offset;
> >  p->zero_num++;
> > diff --git a/migration/options.c b/migration/options.c
> > index 00c0c4a0d6..97d121d4d7 100644
> > --- a/migration/options.c
> > +++ b/migration/options.c
> > @@ -195,6 +195,7 @@ Property migration_properties[] = {
> >  DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
> >  DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
> >  DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
> > +DEFINE_PROP_MIG_CAP("x-main-zero-page", 
> > MIGRATION_CAPABILITY_MAIN_ZERO_PAGE),
> >  DEFINE_PROP_MIG_CAP("x-background-snapshot",
> >  MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
> >  #ifdef CONFIG_LINUX
> > @@ -288,13 +289,9 @@ bool migrate_multifd(void)
> >
> >  bool migrate_use_main_zero_page(void)
> >  {
> > -//MigrationState *s;
> > -
> > -//s = migrate_get_current();
> > +MigrationState *s = migrate_get_current();
> >
> > -// We will enable this when we add the right code.
> > -// return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];
> > -return true;
> > +return s->capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];
>
> What happens if we disable main-zero-page while multifd is not enabled?

In ram.c
...
if (migrate_multifd() && !migrate_use_main_zero_page()) {
migration_ops->ram_save_target_page = ram_save_target_page_multifd;
} else {
migration_ops->ram_save_target_page = ram_save_target_page_legacy;
}
...

So if main-zero-page is disabled and multifd is also disabled, it will
go with the "else" path, which is the legacy path
ram_save_target_page_legacy() and do zero page checking from the main
thread.

>
> >  }
> >
> >  bool migrate_pause_before_switchover(void)
> > @@ -457,6 +454,7 @@ 
> > INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
> >  MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
> >  MIGRATION_CAPABILITY_RETURN_PATH,
> >  MIGRATION_CAPABILITY_MULTIFD,
> > +MIGRATION_CAPABILITY_MAIN_ZERO_PAGE,
> >  MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
> >  MIGRATION_CAPABILITY_AUTO_CONVERGE,
> >  

Re: [PATCH] migration/docs: Explain two solutions for VMSD compatibility

2024-01-22 Thread Peter Xu
On Mon, Jan 22, 2024 at 12:39:06PM -0300, Fabiano Rosas wrote:
> pet...@redhat.com writes:
> 
> > From: Peter Xu 
> >
> > The current article is not extremely easy to follow, and may contain too
> > much information for someone looking for solutions on VMSD compatibility
> > issues.  Meanwhile, VMSD versioning is not discussed.
> >
> > I'm not yet sure whether we should just obsolete VMSD versioning; it's
> > still used quite a lot.  And I had a feeling that for simple use cases
> > where backward migration is not strongly required, device developers can
> > still consider using it.  So in this patch I decided to keep it (anyway, we
> > can't drop it in the near future because of massive existing users), and we
> > can still allow user to use it in contexts where forward-only migration
> > might be enough.
> >
> > This doc patch does below changes:
> >
> >   - Rename the page from "Backward compatibility" to "Migration
> >   compatibility", to avoid using "backward" as a word (because we'll want
> >   to identify "forward" / "backward" migrations in the new doc)
> >
> >   - Add a TOC for this page for better indexing
> >
> >   - A new section to explain what is forward/backward migration
> >
> >   - A new small section for VMSD just to make things complete
> >
> >   - Explain the two ways to make VMSD compatible with old qemu binaries
> >
> > For this one, I added a small section on how to use VMSD versioning for
> > new fields just to start such documents.  Rename the old "How backwards
> > compatibility works" section to "machine type based (forward+backward
> > migration)" to be the 2nd solution (I called it machine type based
> > solution). When at it, I provided a summary and a TODO for the 2nd
> > solution.
> >
> >   - A new section to explain which solution to choose
> >
> >   - Moved the other two existing sections into "Extended readings", because
> >   they can be even further away to most device developers
> >
> > Signed-off-by: Peter Xu 
> > ---
> >  docs/devel/migration/compatibility.rst | 140 -
> >  1 file changed, 137 insertions(+), 3 deletions(-)
> >
> > diff --git a/docs/devel/migration/compatibility.rst 
> > b/docs/devel/migration/compatibility.rst
> > index 5a5417ef06..ea9da201ef 100644
> > --- a/docs/devel/migration/compatibility.rst
> > +++ b/docs/devel/migration/compatibility.rst
> > @@ -1,8 +1,139 @@
> > -Backwards compatibility
> >  ===
> > +Migration compatibility
> > +===
> > +
> > +Migration is a hard topic sometimes.  One of the major reason is that it
> > +has a strict compatibility requirement - a migration (live or not) can
> > +happen between two different versions of QEMUs, so QEMU needs to make sure
> 
> S/QEMUs/QEMU/

I'll fix all these up that you suggested as English issues.

> 
> > +the migration can work across different versions of QEMU binaries.
> > +
> > +This document majorly discusses the compatibility requirement of forward /
> 
> s/majorly/mainly/
> 
> > +backward migrations that QEMU need to maintain, and what QEMU developers
> 
> s/need/needs/
> 
> > +should do to achieve such compatibility requirements across different QEMU
> 
> maybe s/achieve/maintain/ ?
> 
> > +versions.
> > +
> > +.. contents::
> > +
> > +Types of migrations (forward / backward)
> > +
> > +
> > +Let's firstly define the terms **forward migration** and **backward
> > +migration**.
> > +
> > +.. note::
> > +
> > +To simplify the use case, we always discuss between two consecutive
> > +versions of QEMU major releases (between QEMU version *N* and QEMU
> > +version *N-1*).  But logically it applies to the case where the two
> > +QEMU binaries involved contains more than one major version difference.
> > +
> > +.. _forward_migration:
> > +
> > +**Forward migration**: can be seen as the use case where a VM cluster can
> > +upgrade its nodes to a newer version of QEMU (version *N*) from an older
> > +version of QEMU (version *N-1*).
> > +
> > +.. _backward_migration:
> > +
> > +**Backward migration**: can be seen as the use case where a VM cluster
> > +would like to migrate from a newer version of QEMU (version *N*) back to an
> > +even older version of QEMU (version *N-1*).
> 
> I'd drop the VM cluster part from these. Define the terms in a more
> strict manner (QEMU versions, n/n-1, that's it). Then the parts below
> could be second paragraphs further detailing the use-cases of the two
> types of migration.

OK.

> 
> > +
> > +A forward migration is more common, where system upgrades are needed.  In
> > +this case, the upgrade can be done seamlessly by live migrating the old VMs
> > +to the new VMs with the new binaries.
> 
> I got a bit confused whether this was describing migration to a
> different host or within the same host. I suggest we spell it out some
> more:

It can be same-host or across-host.

> 
> "live migrating an existing VM that uses 

Re: [PATCH v4 0/4] Support RISC-V IOPMP

2024-01-22 Thread Ethan Chen via
On Mon, Jan 22, 2024 at 04:01:12PM +1000, Alistair Francis wrote:
> On Thu, Dec 21, 2023 at 4:38 PM Ethan Chen  wrote:
> >
> > On Mon, Dec 18, 2023 at 02:18:58PM +1000, Alistair Francis wrote:
> > > On Wed, Nov 22, 2023 at 3:36 PM Ethan Chen via  
> > > wrote:
> > > >
> > > > This series implements IOPMP specification v1.0.0-draft4 rapid-k model.
> > > > The specification url:
> > > > https://github.com/riscv-non-isa/iopmp-spec/blob/main/riscv_iopmp_specification.pdf
> > > >
> > > > When IOPMP is enabled, a DMA device ATCDMAC300 is added to RISC-V virt
> > > > platform. This DMA device is connected to the IOPMP and has the 
> > > > functionalities
> > >
> > > I don't think we want to add an Andes DMA device to the virt machine.
> > >
> > > I can't even find the spec for the ATCDMAC300, which isn't great
> > >
> > > Alistair
> >
> > Since the IOPMP does not take effect when there is no other device connects 
> > to
> > IOPMP, I think it is necessary to have a DMA device for IOPMP demonstration.
> 
> That is true, but that device shouldn't be a vendor specific device
> for the virt machine.
> 
> >
> > Do you have any suggestions for supporting IOPMP on RISC-V virt machine?
> 
> A RVI device would be fine. Otherwise something that has become a
> defacto standard by being commonly used (the SiFive PLIC for example).
> 
> I really don't think it should be some vendor IP, especially one that
> doesn't have a public datasheet.
> 
> You could add an Andes machine that can use your vendor IP. Otherwise
> we can look at adding IOPMP and not connecting it, but that is a pain.

In submitted patch v5, I removed vendor IP and made generic PCIe host
bridge on RISC-V virt machine connect to IOPMP. DMA operation from PCI
devices on the bridge will be check by IOPMP.

> 
> What is the IOPMP spec group doing for testing?

IOPMP TG is doing an implementation(RTL) testing.

NVidia will provide SystemC stimulus from different ports to test or 
observe the object under testing. A test bench will be provided in 
the form of TLM-2.0 transaction level modeling.

Thanks,
Ethan Chen



Re: [PATCH] tcg/arm: Fix SIGILL in tcg_out_qemu_st_direct

2024-01-22 Thread Richard Henderson

On 1/22/24 07:14, Joseph Burt wrote:

When tcg_out_qemu_st_{index,direct} were merged, the direct case for
MO_64 was omitted, causing qemu_st_i64 to be encoded as 0x due
to underflow when adding h.base and h.index.

Fixes: 1df6d611bdc2("tcg/arm: Introduce HostAddress")
Signed-off-by: Joseph Burt 
---
  tcg/arm/tcg-target.c.inc | 3 +++
  1 file changed, 3 insertions(+)


Reviewed-by: Richard Henderson 


r~




Re: [PATCH 0/5] migration: Fix migration state reference counting

2024-01-22 Thread Peter Xu
On Fri, Jan 19, 2024 at 08:39:17PM -0300, Fabiano Rosas wrote:
> We currently have a bug when running migration code in bottom
> halves. The issue has already been reported in Gitlab[1] and it
> started happening very frequently on my machine for some reason.
> 
> The issue is that we're dropping the last reference to the
> MigrationState object while the cleanup bottom half is still running
> and it leads to an use after free. More details on the commit message.
> 
> This series fixes the issue and does a refactoring around the
> migration BH scheduling aiming to consolidate some code so that it is
> less error prone.
> 
> 1- https://gitlab.com/qemu-project/qemu/-/issues/1969
> 
> CI run: https://gitlab.com/farosas/qemu/-/pipelines/1144927625
> 
> Fabiano Rosas (5):
>   migration: Fix use-after-free of migration state object
>   migration: Take reference to migration state around
> bg_migration_vm_start_bh
>   migration: Reference migration state around
> loadvm_postcopy_handle_run_bh
>   migration: Add a wrapper to qemu_bh_schedule
>   migration: Centralize BH creation and dispatch
> 
>  migration/migration.c | 82 +--
>  migration/migration.h |  5 +--
>  migration/savevm.c|  5 +--
>  3 files changed, 49 insertions(+), 43 deletions(-)

Looks all good now, queued.  I'll amend the "Resolve:" line in patch 1.

-- 
Peter Xu




Re: [PATCH v3 2/2] target/ppc: Implement attn instruction on BookS 64-bit processors

2024-01-22 Thread Nicholas Piggin
On Fri Jan 19, 2024 at 8:09 AM AEST, Richard Henderson wrote:
> On 1/19/24 02:25, Nicholas Piggin wrote:
> > +/* attn enable check   
> >   */
> > +static inline int check_attn_none(CPUPPCState *env)
>
> Don't mark inline ...
>
> > @@ -2150,6 +2170,7 @@ POWERPC_FAMILY(405)(ObjectClass *oc, void *data)
> >   dc->desc = "PowerPC 405";
> >   pcc->init_proc = init_proc_405;
> >   pcc->check_pow = check_pow_nocheck;
> > +pcc->check_attn = check_attn_none;
>
> ... since it is never called directly.

True, I mindlessly followed check_pow_none(). Maybe all
those helpers could be uninlined and moved into cpu_init.c.

Thanks,
Nick



Re: [PATCH 2/8] ppc/spapr|pnv: Remove SAO from pa-features when running MTTCG

2024-01-22 Thread Nicholas Piggin
On Fri Jan 19, 2024 at 10:23 AM AEST, David Gibson wrote:
> On Fri, Jan 19, 2024 at 12:09:36AM +1000, Nicholas Piggin wrote:
> > SAO is a page table attribute that strengthens the memory ordering of
> > accesses. QEMU with MTTCG does not implement this, so clear it in
> > ibm,pa-features. There is a complication with spapr migration that is
> > addressed with comments, it is not a new problem here.
> > 
> > Signed-off-by: Nicholas Piggin 
> > ---
> >  hw/ppc/pnv.c   |  5 +
> >  hw/ppc/spapr.c | 15 +++
> >  2 files changed, 20 insertions(+)
> > 
> > diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> > index b949398689..4969fbdb05 100644
> > --- a/hw/ppc/pnv.c
> > +++ b/hw/ppc/pnv.c
> > @@ -158,6 +158,11 @@ static void pnv_dt_core(PnvChip *chip, PnvCore *pc, 
> > void *fdt)
> >  char *nodename;
> >  int cpus_offset = get_cpus_node(fdt);
> >  
> > +if (qemu_tcg_mttcg_enabled()) {
> > +/* SSO (SAO) ordering is not supported under MTTCG. */
> > +pa_features[4 + 2] &= ~0x80;
> > +}
> > +
> >  nodename = g_strdup_printf("%s@%x", dc->fw_name, pc->pir);
> >  offset = fdt_add_subnode(fdt, cpus_offset, nodename);
> >  _FDT(offset);
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 021b1a00e1..1c79d5670d 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -284,6 +284,21 @@ static void spapr_dt_pa_features(SpaprMachineState 
> > *spapr,
> >  return;
> >  }
> >  
> > +if (qemu_tcg_mttcg_enabled()) {
> > +/*
> > + * SSO (SAO) ordering is not supported under MTTCG, so disable it.
> > + * There is no cap for this, so there is a migration bug here.
> > + * However don't disable it entirely, to allow it to be used under
> > + * KVM. This is a minor concern because:
> > + * - SAO is an obscure an rarely (if ever) used feature.
> > + * - SAO is removed from POWER10 / v3.1, so there is already a
> > + *   migration problem today.
> > + * - Linux does not test this pa-features bit today anyway, so it's
> > + *   academic.
> > + */
> > +pa_features[4 + 2] &= ~0x80;
>
> Oof.. I see the reasoning but modifying guest visible parameters based
> on host capabilities without a cap really worries me nonetheless.

Yeah :( It's not a new problem, but changing it based on host
does make it look uglier I guess.

Other option could be to just disable it always. I don't mind
but someone did mention experimenting with it when I asked
about removing support from Linux. They could still test with
bare metal, and if ever started actually being used then we
could add a cap for it.

Thanks,
Nick



Re: [PATCH 1/5] migration: Fix use-after-free of migration state object

2024-01-22 Thread Peter Xu
On Mon, Jan 22, 2024 at 01:55:45PM -0300, Fabiano Rosas wrote:
> Peter Xu  writes:
> 
> > On Mon, Jan 22, 2024 at 05:49:01PM +0800, Peter Xu wrote:
> >> On Fri, Jan 19, 2024 at 08:39:18PM -0300, Fabiano Rosas wrote:
> >> > We're currently allowing the process_incoming_migration_bh bottom-half
> >> > to run without holding a reference to the 'current_migration' object,
> >> > which leads to a segmentation fault if the BH is still live after
> >> > migration_shutdown() has dropped the last reference to
> >> > current_migration.
> >> > 
> >> > In my system the bug manifests as migrate_multifd() returning true
> >> > when it shouldn't and multifd_load_shutdown() calling
> >> > multifd_recv_terminate_threads() which crashes due to an uninitialized
> >> > multifd_recv_state.
> >> > 
> >> > Fix the issue by holding a reference to the object when scheduling the
> >> > BH and dropping it before returning from the BH. The same is already
> >> > done for the cleanup_bh at migrate_fd_cleanup_schedule().
> >> > 
> >> > Signed-off-by: Fabiano Rosas 
> >> > ---
> >> >  migration/migration.c | 2 ++
> >> >  1 file changed, 2 insertions(+)
> >> > 
> >> > diff --git a/migration/migration.c b/migration/migration.c
> >> > index 219447dea1..cf17b68e57 100644
> >> > --- a/migration/migration.c
> >> > +++ b/migration/migration.c
> >> > @@ -648,6 +648,7 @@ static void process_incoming_migration_bh(void 
> >> > *opaque)
> >> >MIGRATION_STATUS_COMPLETED);
> >> >  qemu_bh_delete(mis->bh);
> >> >  migration_incoming_state_destroy();
> >> > +object_unref(OBJECT(migrate_get_current()));
> >> >  }
> >> >  
> >> >  static void coroutine_fn
> >> > @@ -713,6 +714,7 @@ process_incoming_migration_co(void *opaque)
> >> >  }
> >> >  
> >> >  mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
> >> > +object_ref(OBJECT(migrate_get_current()));
> >> >  qemu_bh_schedule(mis->bh);
> >> >  return;
> >> >  fail:
> >> > -- 
> >> > 2.35.3
> >> > 
> >> 
> > I know I missed something, but I'd better ask: use-after-free needs to
> > happen only after migration_shutdown() / qemu_cleanup(), am I right?
> > 
> > If so, shouldn't qemu_main_loop() already returned?  Then how could any BH
> > keep running (including migration's) without qemu_main_loop()?
> 
> The aio_bh_poll() -> aio_bh_call() sequence doesn't depend on
> qemu_main_loop(). In the stack you found below it happens after
> aio_wait_bh_oneshot() -> AIO_WAIT_WHILE -> aio_poll().
> 
> The stack I see is:
> 
> #0  0x5625c97bffc0 in multifd_recv_terminate_threads (err=0x0) at 
> ../migration/multifd.c:992
> #1  0x5625c97c0086 in multifd_load_shutdown () at 
> ../migration/multifd.c:1012
> #2  0x5625c97b6163 in process_incoming_migration_bh 
> (opaque=0x5625cbce59f0) at ../migration/migration.c:624
> #3  0x5625c9c740c2 in aio_bh_call (bh=0x5625cc9e1320) at 
> ../util/async.c:169
> #4  0x5625c9c741de in aio_bh_poll (ctx=0x5625cba2a670) at 
> ../util/async.c:216
> 
> here^
> 
> #5  0x5625c9af0599 in bdrv_graph_wrunlock () at ../block/graph-lock.c:170
> #6  0x5625c9aba8bd in bdrv_close (bs=0x5625cbcb3d80) at ../block.c:5099
> #7  0x5625c9abb71a in bdrv_delete (bs=0x5625cbcb3d80) at ../block.c:5501
> #8  0x5625c9abe840 in bdrv_unref (bs=0x5625cbcb3d80) at ../block.c:7075
> #9  0x5625c9abe865 in bdrv_schedule_unref_bh (opaque=0x5625cbcb3d80) at 
> ../block.c:7083
> #10 0x5625c9c740c2 in aio_bh_call (bh=0x5625cbde09d0) at 
> ../util/async.c:169
> #11 0x5625c9c741de in aio_bh_poll (ctx=0x5625cba2a670) at 
> ../util/async.c:216
> #12 0x5625c9af0599 in bdrv_graph_wrunlock () at ../block/graph-lock.c:170
> #13 0x5625c9ae05db in blk_remove_bs (blk=0x5625cbcc1070) at 
> ../block/block-backend.c:907
> #14 0x5625c9adfb1b in blk_remove_all_bs () at ../block/block-backend.c:571
> #15 0x5625c9abab0d in bdrv_close_all () at ../block.c:5146
> #16 0x5625c97892e4 in qemu_cleanup (status=0) at ../system/runstate.c:880
> #17 0x5625c9a58081 in qemu_default_main () at ../system/main.c:38
> #18 0x5625c9a580af in main (argc=35, argv=0x7ffe30ab0578) at 
> ../system/main.c:48
> 
> > Hmm, I saw a pretty old stack mentioned in commit fd392cfa8e6:
> >
> > Original output:
> > qemu-system-x86_64: terminating on signal 15 from pid 31980 ( > process>)
> > =
> > ==31958==ERROR: AddressSanitizer: heap-use-after-free on address 
> > 0x6191d210
> >   at pc 0x58a535ca bp 0x7fffb190 sp 0x7fffb188
> > READ of size 8 at 0x6191d210 thread T0 (qemu-vm-0)
> > #0 0x58a535c9 in migrate_fd_cleanup 
> > migration/migration.c:1502:23
> > #1 0x594fde0a in aio_bh_call util/async.c:90:5
> > #2 0x594fe522 in aio_bh_poll util/async.c:118:13
> > #3 0x59524783 in aio_poll util/aio-posix.c:725:17
> > #4 0x59504fb3 in aio_wait_bh_oneshot util/aio-wait.c:71:5
> 

Re: [PATCH 00/26] target/ppc: TCG improvements and fixes

2024-01-22 Thread Nicholas Piggin
On Fri Jan 19, 2024 at 6:58 PM AEST, Cédric Le Goater wrote:
> Hello Nick,
>
> On 1/18/24 16:06, Nicholas Piggin wrote:
> > This is mostly TCG core emulation improvements and fixes. I
> > got the chiptod model in there because it's intertwined with
> > TFMR SPR.
> > 
> > Other non-TCG patches are spapr MSR entry point change which
> > goes together with the other machine check / MSR[ME] fixes.
> > And Saif's gdb patches, as well as some SPR renaming.
> > 
> > Will probably a bit more similar patches too, e.g., Dan's SPR
> > patches, but I'll just get this out for review before
> > upstreaming it.
>
> Before we start a new round of reviews, could we please uptream the ones
> reviewed in the previous cycle [1] ? Some are part of this series and we
> shoudn't have to go through them again.

Yeah good idea, will do.

Thanks,
Nick



Re: [PATCH v6 08/10] migration/yank: Use channel features

2024-01-22 Thread Peter Xu
On Mon, Jan 22, 2024 at 05:08:09PM -0300, Fabiano Rosas wrote:
> Peter Xu  writes:
> 
> > On Mon, Sep 11, 2023 at 02:13:18PM -0300, Fabiano Rosas wrote:
> >> Stop using outside knowledge about the io channels when registering
> >> yank functions. Query for features instead.
> >> 
> >> The yank method for all channels used with migration code currently is
> >> to call the qio_channel_shutdown() function, so query for
> >> QIO_CHANNEL_FEATURE_SHUTDOWN. We could add a separate feature in the
> >> future for indicating whether a channel supports yanking, but that
> >> seems overkill at the moment.
> >> 
> >> Signed-off-by: Fabiano Rosas 
> >
> > Reviewed-by: Peter Xu 
> 
> Hi Peter, this one has fell through the cracks, think we could merge it?

Yep, queued.

-- 
Peter Xu




Re: [External] Re: [PATCH v3 03/20] multifd: Zero pages transmission

2024-01-22 Thread Hao Xiang
On Sun, Jan 14, 2024 at 11:01 PM Shivam Kumar  wrote:
>
>
>
> > On 04-Jan-2024, at 6:14 AM, Hao Xiang  wrote:
> >
> > From: Juan Quintela 
> >
> > This implements the zero page dection and handling.
> >
> > Signed-off-by: Juan Quintela 
> > ---
> > migration/multifd.c | 41 +++--
> > migration/multifd.h |  5 +
> > 2 files changed, 44 insertions(+), 2 deletions(-)
> >
> > diff --git a/migration/multifd.c b/migration/multifd.c
> > index 5a1f50c7e8..756673029d 100644
> > --- a/migration/multifd.c
> > +++ b/migration/multifd.c
> > @@ -11,6 +11,7 @@
> >  */
> >
> > #include "qemu/osdep.h"
> > +#include "qemu/cutils.h"
> > #include "qemu/rcu.h"
> > #include "exec/target_page.h"
> > #include "sysemu/sysemu.h"
> > @@ -279,6 +280,12 @@ static void multifd_send_fill_packet(MultiFDSendParams 
> > *p)
> >
> > packet->offset[i] = cpu_to_be64(temp);
> > }
> > +for (i = 0; i < p->zero_num; i++) {
> > +/* there are architectures where ram_addr_t is 32 bit */
> > +uint64_t temp = p->zero[i];
> > +
> > +packet->offset[p->normal_num + i] = cpu_to_be64(temp);
> > +}
> > }
> >
> > static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
> > @@ -361,6 +368,18 @@ static int 
> > multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
> > p->normal[i] = offset;
> > }
> >
> > +for (i = 0; i < p->zero_num; i++) {
> > +uint64_t offset = be64_to_cpu(packet->offset[p->normal_num + i]);
> > +
> > +if (offset > (p->block->used_length - p->page_size)) {
> > +error_setg(errp, "multifd: offset too long %" PRIu64
> > +   " (max " RAM_ADDR_FMT ")",
> > +   offset, p->block->used_length);
> > +return -1;
> > +}
> > +p->zero[i] = offset;
> > +}
> > +
> > return 0;
> > }
> >
> > @@ -664,6 +683,8 @@ static void *multifd_send_thread(void *opaque)
> > MultiFDSendParams *p = opaque;
> > MigrationThread *thread = NULL;
> > Error *local_err = NULL;
> > +/* qemu older than 8.2 don't understand zero page on multifd channel */
> > +bool use_zero_page = !migrate_use_main_zero_page();
> > int ret = 0;
> > bool use_zero_copy_send = migrate_zero_copy_send();
> >
> > @@ -689,6 +710,7 @@ static void *multifd_send_thread(void *opaque)
> > qemu_mutex_lock(>mutex);
> >
> > if (p->pending_job) {
> > +RAMBlock *rb = p->pages->block;
> > uint64_t packet_num = p->packet_num;
> > uint32_t flags;
> > p->normal_num = 0;
> > @@ -701,8 +723,16 @@ static void *multifd_send_thread(void *opaque)
> > }
> >
> > for (int i = 0; i < p->pages->num; i++) {
> > -p->normal[p->normal_num] = p->pages->offset[i];
> > -p->normal_num++;
> > +uint64_t offset = p->pages->offset[i];
> > +if (use_zero_page &&
> > +buffer_is_zero(rb->host + offset, p->page_size)) {
> > +p->zero[p->zero_num] = offset;
> > +p->zero_num++;
> > +ram_release_page(rb->idstr, offset);
> > +} else {
> > +p->normal[p->normal_num] = offset;
> > +p->normal_num++;
> > +}
> > }
> >
> > if (p->normal_num) {
> > @@ -1155,6 +1185,13 @@ static void *multifd_recv_thread(void *opaque)
> > }
> > }
> >
> > +for (int i = 0; i < p->zero_num; i++) {
> > +void *page = p->host + p->zero[i];
> > +if (!buffer_is_zero(page, p->page_size)) {
> > +memset(page, 0, p->page_size);
> > +}
> > +}
> > +
> I am wondering if zeroing the zero page on the destination can also be 
> offloaded to DSA. Can it help in reducing cpu consumption on the destination 
> in case of multifd-based migration?

I have that change coded up and I have tested for performance. It's
not saving as much CPU cycles as I hoped. The problem is that on the
sender side, we run zero page detection on every single page but on
the receiver side, we only zero-ing the pages if the sender tells us
so. For instance, if a multifd packet with 128 pages are all zero
pages, we can offload the zero-ing pages on the entire 128 pages in a
single DSA operation and that's the best case. In a worse case, if a
multifd packet with 128 pages only has say 10 zero pages, we can
offload the zero-ing pages on only the 10 pages instead of the entire
128 pages. Considering DSA software overhead, that is not a good deal.

In the future, if we refactor the multifd path to separate zero pages
in its own packet, it will be a different story. For instance, if
there are 1024 non-contiguous zero pages detected, they will be sent
across multiple packets. With a new packet format, those pages can be
put into the same packet (and we can put more than 

Re: [External] Re: [PATCH v3 13/20] migration/multifd: Prepare to introduce DSA acceleration on the multifd path.

2024-01-22 Thread Hao Xiang
On Sun, Jan 14, 2024 at 10:46 PM Shivam Kumar  wrote:
>
>
>
> > On 04-Jan-2024, at 6:14 AM, Hao Xiang  wrote:
> >
> > 1. Refactor multifd_send_thread function.
> > 2. Implement buffer_is_zero_use_cpu to handle CPU based zero page
> > checking.
> > 3. Introduce the batch task structure in MultiFDSendParams.
> >
> > Signed-off-by: Hao Xiang 
> > ---
> > include/qemu/dsa.h  | 43 +++--
> > migration/multifd.c | 77 -
> > migration/multifd.h |  2 ++
> > util/dsa.c  | 51 +-
> > 4 files changed, 148 insertions(+), 25 deletions(-)
> >
> > diff --git a/include/qemu/dsa.h b/include/qemu/dsa.h
> > index e002652879..fe7772107a 100644
> > --- a/include/qemu/dsa.h
> > +++ b/include/qemu/dsa.h
> > @@ -2,6 +2,7 @@
> > #define QEMU_DSA_H
> >
> > #include "qemu/error-report.h"
> > +#include "exec/cpu-common.h"
> > #include "qemu/thread.h"
> > #include "qemu/queue.h"
> >
> > @@ -42,6 +43,20 @@ typedef struct dsa_batch_task {
> > QSIMPLEQ_ENTRY(dsa_batch_task) entry;
> > } dsa_batch_task;
> >
> > +#endif
> > +
> > +struct batch_task {
> > +/* Address of each pages in pages */
> > +ram_addr_t *addr;
> > +/* Zero page checking results */
> > +bool *results;
> > +#ifdef CONFIG_DSA_OPT
> > +struct dsa_batch_task *dsa_batch;
> > +#endif
> > +};
> > +
> > +#ifdef CONFIG_DSA_OPT
> > +
> > /**
> >  * @brief Initializes DSA devices.
> >  *
> > @@ -74,7 +89,7 @@ void dsa_cleanup(void);
> > bool dsa_is_running(void);
> >
> > /**
> > - * @brief Initializes a buffer zero batch task.
> > + * @brief Initializes a buffer zero DSA batch task.
> >  *
> >  * @param task A pointer to the batch task to initialize.
> >  * @param results A pointer to an array of zero page checking results.
> > @@ -102,7 +117,7 @@ void buffer_zero_batch_task_destroy(struct 
> > dsa_batch_task *task);
> >  * @return Zero if successful, otherwise non-zero.
> >  */
> > int
> > -buffer_is_zero_dsa_batch_async(struct dsa_batch_task *batch_task,
> > +buffer_is_zero_dsa_batch_async(struct batch_task *batch_task,
> >const void **buf, size_t count, size_t len);
> >
> > #else
> > @@ -128,6 +143,30 @@ static inline void dsa_stop(void) {}
> >
> > static inline void dsa_cleanup(void) {}
> >
> > +static inline int
> > +buffer_is_zero_dsa_batch_async(struct batch_task *batch_task,
> > +   const void **buf, size_t count, size_t len)
> > +{
> > +exit(1);
> > +}
> > +
> > #endif
> >
> > +/**
> > + * @brief Initializes a general buffer zero batch task.
> > + *
> > + * @param task A pointer to the general batch task to initialize.
> > + * @param batch_size The number of zero page checking tasks in the batch.
> > + */
> > +void
> > +batch_task_init(struct batch_task *task, int batch_size);
> > +
> > +/**
> > + * @brief Destroys a general buffer zero batch task.
> > + *
> > + * @param task A pointer to the general batch task to destroy.
> > + */
> > +void
> > +batch_task_destroy(struct batch_task *task);
> > +
> > #endif
> > diff --git a/migration/multifd.c b/migration/multifd.c
> > index eece85569f..e7c549b93e 100644
> > --- a/migration/multifd.c
> > +++ b/migration/multifd.c
> > @@ -14,6 +14,8 @@
> > #include "qemu/cutils.h"
> > #include "qemu/rcu.h"
> > #include "qemu/cutils.h"
> > +#include "qemu/dsa.h"
> > +#include "qemu/memalign.h"
> > #include "exec/target_page.h"
> > #include "sysemu/sysemu.h"
> > #include "exec/ramblock.h"
> > @@ -574,6 +576,8 @@ void multifd_save_cleanup(void)
> > p->name = NULL;
> > multifd_pages_clear(p->pages);
> > p->pages = NULL;
> > +batch_task_destroy(p->batch_task);
> > +p->batch_task = NULL;
> > p->packet_len = 0;
> > g_free(p->packet);
> > p->packet = NULL;
> > @@ -678,13 +682,66 @@ int multifd_send_sync_main(QEMUFile *f)
> > return 0;
> > }
> >
> > +static void set_page(MultiFDSendParams *p, bool zero_page, uint64_t offset)
> > +{
> > +RAMBlock *rb = p->pages->block;
> > +if (zero_page) {
> > +p->zero[p->zero_num] = offset;
> > +p->zero_num++;
> > +ram_release_page(rb->idstr, offset);
> > +} else {
> > +p->normal[p->normal_num] = offset;
> > +p->normal_num++;
> > +}
> > +}
> > +
> > +static void buffer_is_zero_use_cpu(MultiFDSendParams *p)
> > +{
> > +const void **buf = (const void **)p->batch_task->addr;
> > +assert(!migrate_use_main_zero_page());
> > +
> > +for (int i = 0; i < p->pages->num; i++) {
> > +p->batch_task->results[i] = buffer_is_zero(buf[i], p->page_size);
> > +}
> > +}
> > +
> > +static void set_normal_pages(MultiFDSendParams *p)
> > +{
> > +for (int i = 0; i < p->pages->num; i++) {
> > +p->batch_task->results[i] = false;
> > +}
> > +}
> Please correct me if I am wrong but set_normal_pages will not be a part of 
> the final patch, right? They are there for testing out the performance 
> 

Re: [External] Re: [PATCH v3 01/20] multifd: Add capability to enable/disable zero_page

2024-01-22 Thread Hao Xiang
On Sun, Jan 14, 2024 at 10:02 PM Shivam Kumar  wrote:
>
>
>
> > On 04-Jan-2024, at 6:14 AM, Hao Xiang  wrote:
> >
> > From: Juan Quintela 
> >
> > We have to enable it by default until we introduce the new code.
> >
> > Signed-off-by: Juan Quintela 
> > ---
> > migration/options.c | 15 +++
> > migration/options.h |  1 +
> > qapi/migration.json |  8 +++-
> > 3 files changed, 23 insertions(+), 1 deletion(-)
> >
> > diff --git a/migration/options.c b/migration/options.c
> > index 8d8ec73ad9..0f6bd78b9f 100644
> > --- a/migration/options.c
> > +++ b/migration/options.c
> > @@ -204,6 +204,8 @@ Property migration_properties[] = {
> > DEFINE_PROP_MIG_CAP("x-switchover-ack",
> > MIGRATION_CAPABILITY_SWITCHOVER_ACK),
> > DEFINE_PROP_MIG_CAP("x-dirty-limit", MIGRATION_CAPABILITY_DIRTY_LIMIT),
> > +DEFINE_PROP_MIG_CAP("main-zero-page",
> > +MIGRATION_CAPABILITY_MAIN_ZERO_PAGE),
> > DEFINE_PROP_END_OF_LIST(),
> > };
> >
> > @@ -284,6 +286,19 @@ bool migrate_multifd(void)
> > return s->capabilities[MIGRATION_CAPABILITY_MULTIFD];
> > }
> >
> > +bool migrate_use_main_zero_page(void)
> > +{
> > +/* MigrationState *s; */
> > +
> > +/* s = migrate_get_current(); */
> > +
> > +/*
> > + * We will enable this when we add the right code.
> > + * return s->enabled_capabilities[MIGRATION_CAPABILITY_MAIN_ZERO_PAGE];
> > + */
> > +return true;
> > +}
> > +
> > bool migrate_pause_before_switchover(void)
> > {
> > MigrationState *s = migrate_get_current();
> > diff --git a/migration/options.h b/migration/options.h
> > index 246c160aee..c901eb57c6 100644
> > --- a/migration/options.h
> > +++ b/migration/options.h
> > @@ -88,6 +88,7 @@ int migrate_multifd_channels(void);
> > MultiFDCompression migrate_multifd_compression(void);
> > int migrate_multifd_zlib_level(void);
> > int migrate_multifd_zstd_level(void);
> > +bool migrate_use_main_zero_page(void);
> > uint8_t migrate_throttle_trigger_threshold(void);
> > const char *migrate_tls_authz(void);
> > const char *migrate_tls_creds(void);
> > diff --git a/qapi/migration.json b/qapi/migration.json
> > index eb2f883513..80c4b13516 100644
> > --- a/qapi/migration.json
> > +++ b/qapi/migration.json
> > @@ -531,6 +531,12 @@
> > # and can result in more stable read performance.  Requires KVM
> > # with accelerator property "dirty-ring-size" set.  (Since 8.1)
> > #
> > +#
> > +# @main-zero-page: If enabled, the detection of zero pages will be
> > +#  done on the main thread.  Otherwise it is done on
> > +#  the multifd threads.
> > +#  (since 8.2)
> > +#
> Should the capability name be something like "zero-page-detection" or just 
> “zero-page”?
> CC: Fabiano Rosas

I think the same concern was brought up last time Juan sent out the
original patchset. Right now, the zero page detection is done in the
main migration thread and it is always "ON". This change added a
functionality to move the zero page detection from the main thread to
the multifd sender threads. Now "main-zero-page" is turned "OFF" by
default, and zero page checking is done in the multifd sender thread
(much better performance). If user wants to run the zero page
detection in the main thread (keep current behavior), user can change
"main-zero-page" to "ON".

Renaming it to "zero-page-detection" or just “zero-page” can not
differentiate the old behavior and the new behavior.

Here are the options:
1) Keep the current behavior. "main-zero-page" is OFF by default and
zero page detection runs on the multifd thread by default. User can
turn the switch to "ON" if they want old behavior.
2) Make "main-zero-page" switch ON as default. This would keep the
current behavior by default. User can set it to "OFF" for better
performance.

> > # Features:
> > #
> > # @deprecated: Member @block is deprecated.  Use blockdev-mirror with
> > @@ -555,7 +561,7 @@
> >{ 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
> >'validate-uuid', 'background-snapshot',
> >'zero-copy-send', 'postcopy-preempt', 'switchover-ack',
> > -   'dirty-limit'] }
> > +   'dirty-limit', 'main-zero-page'] }
> >
> > ##
> > # @MigrationCapabilityStatus:
> > --
> > 2.30.2
> >
> >
> >
>



Re: [PATCH v3 3/3] tests/tcg: Add the PROT_NONE gdbstub test

2024-01-22 Thread Alex Bennée
Ilya Leoshkevich  writes:

> On Mon, Jan 22, 2024 at 03:54:32PM +, Alex Bennée wrote:
>> Ilya Leoshkevich  writes:
>> 
>> > Make sure that qemu gdbstub, like gdbserver, allows reading from and
>> > writing to PROT_NONE pages.
>> >
>> > Signed-off-by: Ilya Leoshkevich 
>> > ---
>> >  tests/tcg/multiarch/Makefile.target  |  9 +-
>> >  tests/tcg/multiarch/gdbstub/prot-none.py | 22 +
>> >  tests/tcg/multiarch/prot-none.c  | 40 
>> >  3 files changed, 70 insertions(+), 1 deletion(-)
>> >  create mode 100644 tests/tcg/multiarch/gdbstub/prot-none.py
>> >  create mode 100644 tests/tcg/multiarch/prot-none.c
>
> [...]
>
>> > +def run_test():
>> > +"""Run through the tests one by one"""
>> > +gdb.Breakpoint("break_here")
>> > +gdb.execute("continue")
>> > +val = gdb.parse_and_eval("*(char[2] *)q").string()
>> 
>> Better traceback:
>> 
>>   Breakpoint 1, break_here (q=0x40802fff) at 
>> /home/alex/lsrc/qemu.git/tests/tcg/multiarch/prot-none.c:14
>>   14  }
>>   GDB Exception:
>>   Traceback (most recent call last):
>> File "/home/alex/lsrc/qemu.git/tests/guest-debug/test_gdbstub.py", line 
>> 42, in main
>>   test()
>> File "./tests/tcg/multiarch/gdbstub/prot-none.py", line 14, in run_test
>>   val = gdb.parse_and_eval("*(char[2] *)q").string()
>> 
>>   gdb.MemoryError: Cannot access memory at address 0x40802fff
>>   Python 3.11.2 (main, Mar 13 2023, 12:18:29) [GCC 12.2.0] on linux
>>   Type "help", "copyright", "credits" or "license" for more information.
>>   (InteractiveConsole)
>>   >>> 
>
> Thanks for the debug output. This shows that the feature being tested
> doesn't work (the value of `q` looks sane to me). May I ask what host
> distro is this? I tried on x86_64 Fedora 38 and x86_64 Ubuntu 22.04 so
> far, and the test was successful.

Debian Bookworm (x86_64) with gdb-multiarch installed.
>
> [...]

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



[PATCH 4/7] hw/riscv/virt.c: use g_autofree in create_fdt_sockets()

2024-01-22 Thread Daniel Henrique Barboza
Move 'clust_name' inside the loop, and g_autofree, to avoid having to
g_free() manually in each loop iteration.

'intc_phandles' is also g_autofreed to avoid another manual g_free().

Signed-off-by: Daniel Henrique Barboza 
---
 hw/riscv/virt.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 373b1dd96b..d0f402e0d5 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -721,11 +721,11 @@ static void create_fdt_sockets(RISCVVirtState *s, const 
MemMapEntry *memmap,
uint32_t *irq_virtio_phandle,
uint32_t *msi_pcie_phandle)
 {
-char *clust_name;
 int socket, phandle_pos;
 MachineState *ms = MACHINE(s);
 uint32_t msi_m_phandle = 0, msi_s_phandle = 0;
-uint32_t *intc_phandles, xplic_phandles[MAX_NODES];
+uint32_t xplic_phandles[MAX_NODES];
+g_autofree uint32_t *intc_phandles = NULL;
 int socket_count = riscv_socket_count(ms);
 
 qemu_fdt_add_subnode(ms->fdt, "/cpus");
@@ -739,6 +739,7 @@ static void create_fdt_sockets(RISCVVirtState *s, const 
MemMapEntry *memmap,
 
 phandle_pos = ms->smp.cpus;
 for (socket = (socket_count - 1); socket >= 0; socket--) {
+g_autofree char *clust_name = NULL;
 phandle_pos -= s->soc[socket].num_harts;
 
 clust_name = g_strdup_printf("/cpus/cpu-map/cluster%d", socket);
@@ -749,8 +750,6 @@ static void create_fdt_sockets(RISCVVirtState *s, const 
MemMapEntry *memmap,
 
 create_fdt_socket_memory(s, memmap, socket);
 
-g_free(clust_name);
-
 if (tcg_enabled()) {
 if (s->have_aclint) {
 create_fdt_socket_aclint(s, memmap, socket,
@@ -793,8 +792,6 @@ static void create_fdt_sockets(RISCVVirtState *s, const 
MemMapEntry *memmap,
 }
 }
 
-g_free(intc_phandles);
-
 if (kvm_enabled() && virt_use_kvm_aia(s)) {
 *irq_mmio_phandle = xplic_phandles[0];
 *irq_virtio_phandle = xplic_phandles[0];
-- 
2.43.0




[PATCH 7/7] hw/riscv/virt.c: use g_autofree in create_fdt_*

2024-01-22 Thread Daniel Henrique Barboza
We have a lot of cases where a char or an uint32_t pointer is used once
to alloc a string/array, read/written during the function, and then
g_free() at the end. There's no pointer re-use - a single alloc, a
single g_free().

Use 'g_autofree' to avoid the g_free() calls.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/riscv/virt.c | 78 ++---
 1 file changed, 22 insertions(+), 56 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 710fbbda2c..1c257e89d2 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -285,7 +285,7 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int 
socket,
 static void create_fdt_socket_memory(RISCVVirtState *s,
  const MemMapEntry *memmap, int socket)
 {
-char *mem_name;
+g_autofree char *mem_name = NULL;
 uint64_t addr, size;
 MachineState *ms = MACHINE(s);
 
@@ -297,7 +297,6 @@ static void create_fdt_socket_memory(RISCVVirtState *s,
 addr >> 32, addr, size >> 32, size);
 qemu_fdt_setprop_string(ms->fdt, mem_name, "device_type", "memory");
 riscv_socket_fdt_write_id(ms, mem_name, socket);
-g_free(mem_name);
 }
 
 static void create_fdt_socket_clint(RISCVVirtState *s,
@@ -305,8 +304,8 @@ static void create_fdt_socket_clint(RISCVVirtState *s,
 uint32_t *intc_phandles)
 {
 int cpu;
-char *clint_name;
-uint32_t *clint_cells;
+g_autofree char *clint_name = NULL;
+g_autofree uint32_t *clint_cells = NULL;
 unsigned long clint_addr;
 MachineState *ms = MACHINE(s);
 static const char * const clint_compat[2] = {
@@ -333,9 +332,6 @@ static void create_fdt_socket_clint(RISCVVirtState *s,
 qemu_fdt_setprop(ms->fdt, clint_name, "interrupts-extended",
 clint_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 4);
 riscv_socket_fdt_write_id(ms, clint_name, socket);
-g_free(clint_name);
-
-g_free(clint_cells);
 }
 
 static void create_fdt_socket_aclint(RISCVVirtState *s,
@@ -346,9 +342,9 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
 char *name;
 unsigned long addr, size;
 uint32_t aclint_cells_size;
-uint32_t *aclint_mswi_cells;
-uint32_t *aclint_sswi_cells;
-uint32_t *aclint_mtimer_cells;
+g_autofree uint32_t *aclint_mswi_cells = NULL;
+g_autofree uint32_t *aclint_sswi_cells = NULL;
+g_autofree uint32_t *aclint_mtimer_cells = NULL;
 MachineState *ms = MACHINE(s);
 
 aclint_mswi_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2);
@@ -420,10 +416,6 @@ static void create_fdt_socket_aclint(RISCVVirtState *s,
 riscv_socket_fdt_write_id(ms, name, socket);
 g_free(name);
 }
-
-g_free(aclint_mswi_cells);
-g_free(aclint_mtimer_cells);
-g_free(aclint_sswi_cells);
 }
 
 static void create_fdt_socket_plic(RISCVVirtState *s,
@@ -432,8 +424,8 @@ static void create_fdt_socket_plic(RISCVVirtState *s,
uint32_t *plic_phandles)
 {
 int cpu;
-char *plic_name;
-uint32_t *plic_cells;
+g_autofree char *plic_name = NULL;
+g_autofree uint32_t *plic_cells;
 unsigned long plic_addr;
 MachineState *ms = MACHINE(s);
 static const char * const plic_compat[2] = {
@@ -493,10 +485,6 @@ static void create_fdt_socket_plic(RISCVVirtState *s,
memmap[VIRT_PLATFORM_BUS].size,
VIRT_PLATFORM_BUS_IRQ);
 }
-
-g_free(plic_name);
-
-g_free(plic_cells);
 }
 
 uint32_t imsic_num_bits(uint32_t count)
@@ -515,11 +503,12 @@ static void create_fdt_one_imsic(RISCVVirtState *s, 
hwaddr base_addr,
  bool m_mode, uint32_t imsic_guest_bits)
 {
 int cpu, socket;
-char *imsic_name;
+g_autofree char *imsic_name = NULL;
 MachineState *ms = MACHINE(s);
 int socket_count = riscv_socket_count(ms);
-uint32_t imsic_max_hart_per_socket;
-uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size;
+uint32_t imsic_max_hart_per_socket, imsic_addr, imsic_size;
+g_autofree uint32_t *imsic_cells = NULL;
+g_autofree uint32_t *imsic_regs = NULL;
 
 imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2);
 imsic_regs = g_new0(uint32_t, socket_count * 4);
@@ -571,10 +560,6 @@ static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr 
base_addr,
   IMSIC_MMIO_GROUP_MIN_SHIFT);
 }
 qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle);
-
-g_free(imsic_name);
-g_free(imsic_regs);
-g_free(imsic_cells);
 }
 
 static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap,
@@ -606,12 +591,10 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int 
socket,
  bool m_mode, int num_harts)
 {
 int cpu;
-char *aplic_name;
-uint32_t *aplic_cells;
+g_autofree char *aplic_name = NULL;
+g_autofree uint32_t 

[PATCH 5/7] hw/riscv/virt.c: use g_autofree in create_fdt_virtio()

2024-01-22 Thread Daniel Henrique Barboza
Put 'name' declaration inside the loop, with g_autofree, to avoid
manually doing g_free() in each iteration.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/riscv/virt.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index d0f402e0d5..f8278df83f 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -820,12 +820,12 @@ static void create_fdt_virtio(RISCVVirtState *s, const 
MemMapEntry *memmap,
   uint32_t irq_virtio_phandle)
 {
 int i;
-char *name;
 MachineState *ms = MACHINE(s);
 
 for (i = 0; i < VIRTIO_COUNT; i++) {
-name = g_strdup_printf("/soc/virtio_mmio@%lx",
+g_autofree char *name =  g_strdup_printf("/soc/virtio_mmio@%lx",
 (long)(memmap[VIRT_VIRTIO].base + i * memmap[VIRT_VIRTIO].size));
+
 qemu_fdt_add_subnode(ms->fdt, name);
 qemu_fdt_setprop_string(ms->fdt, name, "compatible", "virtio,mmio");
 qemu_fdt_setprop_cells(ms->fdt, name, "reg",
@@ -840,7 +840,6 @@ static void create_fdt_virtio(RISCVVirtState *s, const 
MemMapEntry *memmap,
 qemu_fdt_setprop_cells(ms->fdt, name, "interrupts",
VIRTIO_IRQ + i, 0x4);
 }
-g_free(name);
 }
 }
 
-- 
2.43.0




[PATCH 6/7] hw/riscv/virt.c: use g_autofree in virt_machine_init()

2024-01-22 Thread Daniel Henrique Barboza
Move 'soc_name' to the loop, and give it g_autofree, to avoid the manual
g_free().

Signed-off-by: Daniel Henrique Barboza 
---
 hw/riscv/virt.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index f8278df83f..710fbbda2c 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -1356,7 +1356,6 @@ static void virt_machine_init(MachineState *machine)
 RISCVVirtState *s = RISCV_VIRT_MACHINE(machine);
 MemoryRegion *system_memory = get_system_memory();
 MemoryRegion *mask_rom = g_new(MemoryRegion, 1);
-char *soc_name;
 DeviceState *mmio_irqchip, *virtio_irqchip, *pcie_irqchip;
 int i, base_hartid, hart_count;
 int socket_count = riscv_socket_count(machine);
@@ -1376,6 +1375,8 @@ static void virt_machine_init(MachineState *machine)
 /* Initialize sockets */
 mmio_irqchip = virtio_irqchip = pcie_irqchip = NULL;
 for (i = 0; i < socket_count; i++) {
+g_autofree char *soc_name = g_strdup_printf("soc%d", i);
+
 if (!riscv_socket_check_hartids(machine, i)) {
 error_report("discontinuous hartids in socket%d", i);
 exit(1);
@@ -1393,10 +1394,8 @@ static void virt_machine_init(MachineState *machine)
 exit(1);
 }
 
-soc_name = g_strdup_printf("soc%d", i);
 object_initialize_child(OBJECT(machine), soc_name, >soc[i],
 TYPE_RISCV_HART_ARRAY);
-g_free(soc_name);
 object_property_set_str(OBJECT(>soc[i]), "cpu-type",
 machine->cpu_type, _abort);
 object_property_set_int(OBJECT(>soc[i]), "hartid-base",
-- 
2.43.0




[PATCH 2/7] hw/riscv/numa.c: use g_autofree in socket_fdt_write_distance_matrix()

2024-01-22 Thread Daniel Henrique Barboza
Use g_autofree in 'dist_matrix' to avoid the manual g_free().

Signed-off-by: Daniel Henrique Barboza 
---
 hw/riscv/numa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/riscv/numa.c b/hw/riscv/numa.c
index d319aefb45..cf686f4ff1 100644
--- a/hw/riscv/numa.c
+++ b/hw/riscv/numa.c
@@ -167,7 +167,8 @@ void riscv_socket_fdt_write_id(const MachineState *ms, 
const char *node_name,
 void riscv_socket_fdt_write_distance_matrix(const MachineState *ms)
 {
 int i, j, idx;
-uint32_t *dist_matrix, dist_matrix_size;
+g_autofree uint32_t *dist_matrix = NULL;
+uint32_t dist_matrix_size;
 
 if (numa_enabled(ms) && ms->numa_state->have_numa_distance) {
 dist_matrix_size = riscv_socket_count(ms) * riscv_socket_count(ms);
@@ -189,7 +190,6 @@ void riscv_socket_fdt_write_distance_matrix(const 
MachineState *ms)
 "numa-distance-map-v1");
 qemu_fdt_setprop(ms->fdt, "/distance-map", "distance-matrix",
  dist_matrix, dist_matrix_size);
-g_free(dist_matrix);
 }
 }
 
-- 
2.43.0




[PATCH 3/7] hw/riscv/virt.c: use g_autofree in create_fdt_socket_cpus()

2024-01-22 Thread Daniel Henrique Barboza
Move all char pointers to the loop. Use g_autofree in all of them to
avoid the g_free() calls.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/riscv/virt.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index f9fd1341fc..373b1dd96b 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -215,12 +215,16 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int 
socket,
 int cpu;
 uint32_t cpu_phandle;
 MachineState *ms = MACHINE(s);
-char *name, *cpu_name, *core_name, *intc_name, *sv_name;
 bool is_32_bit = riscv_is_32bit(>soc[0]);
 uint8_t satp_mode_max;
 
 for (cpu = s->soc[socket].num_harts - 1; cpu >= 0; cpu--) {
 RISCVCPU *cpu_ptr = >soc[socket].harts[cpu];
+g_autofree char *name = NULL;
+g_autofree char *cpu_name = NULL;
+g_autofree char *core_name = NULL;
+g_autofree char *intc_name = NULL;
+g_autofree char *sv_name = NULL;
 
 cpu_phandle = (*phandle)++;
 
@@ -233,12 +237,10 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int 
socket,
 sv_name = g_strdup_printf("riscv,%s",
   satp_mode_str(satp_mode_max, is_32_bit));
 qemu_fdt_setprop_string(ms->fdt, cpu_name, "mmu-type", sv_name);
-g_free(sv_name);
 }
 
 name = riscv_isa_string(cpu_ptr);
 qemu_fdt_setprop_string(ms->fdt, cpu_name, "riscv,isa", name);
-g_free(name);
 
 if (cpu_ptr->cfg.ext_zicbom) {
 qemu_fdt_setprop_cell(ms->fdt, cpu_name, "riscv,cbom-block-size",
@@ -277,10 +279,6 @@ static void create_fdt_socket_cpus(RISCVVirtState *s, int 
socket,
 core_name = g_strdup_printf("%s/core%d", clust_name, cpu);
 qemu_fdt_add_subnode(ms->fdt, core_name);
 qemu_fdt_setprop_cell(ms->fdt, core_name, "cpu", cpu_phandle);
-
-g_free(core_name);
-g_free(intc_name);
-g_free(cpu_name);
 }
 }
 
-- 
2.43.0




[PATCH 0/7] hw/riscv: fix leak, add more g_autofree

2024-01-22 Thread Daniel Henrique Barboza
Hi,

First patch fixes a leak found when using Valgrind. The root cause is a
missing g_free() in a string. 

In fact, I found while doing reviews that we keep repeating the same
pattern:


char *name;
name = g_strdup_printf(...);
(...)
g_free(name);


With this in mind, I ended up making this rather trivial series to
introduce more string/array autocleaning in the 'virt' machine code. The
advantage of doing 'g_autofree' is that we'll guarantee that we'll clean
ourselves up when the variable goes out of scope, avoiding leaks like
the one patch 1 fixes. We want to enforce this autoclean style in
reviews, and for that we need to get rid of at least some of the uses we
do it right now.

I didn't bother changing the 'spike' and the 'sifive' boards for now
because the bulk of new patches is done on top of the 'virt' machine,
so it's more important to tidy this board first.


Daniel Henrique Barboza (7):
  hw/riscv/virt-acpi-build.c: fix leak in build_rhct()
  hw/riscv/numa.c: use g_autofree in socket_fdt_write_distance_matrix()
  hw/riscv/virt.c: use g_autofree in create_fdt_socket_cpus()
  hw/riscv/virt.c: use g_autofree in create_fdt_sockets()
  hw/riscv/virt.c: use g_autofree in create_fdt_virtio()
  hw/riscv/virt.c: use g_autofree in virt_machine_init()
  hw/riscv/virt.c: use g_autofree in create_fdt_*

 hw/riscv/numa.c|   4 +-
 hw/riscv/virt-acpi-build.c |   2 +-
 hw/riscv/virt.c| 109 -
 3 files changed, 37 insertions(+), 78 deletions(-)

-- 
2.43.0




[PATCH 1/7] hw/riscv/virt-acpi-build.c: fix leak in build_rhct()

2024-01-22 Thread Daniel Henrique Barboza
The 'isa' char pointer isn't being freed after use.

Issue detected by Valgrind:

==38752== 128 bytes in 1 blocks are definitely lost in loss record 3,190 of 
3,884
==38752==at 0x484280F: malloc (vg_replace_malloc.c:442)
==38752==by 0x5189619: g_malloc (gmem.c:130)
==38752==by 0x51A5BF2: g_strconcat (gstrfuncs.c:628)
==38752==by 0x6C1E3E: riscv_isa_string_ext (cpu.c:2321)
==38752==by 0x6C1E3E: riscv_isa_string (cpu.c:2343)
==38752==by 0x6BD2EA: build_rhct (virt-acpi-build.c:232)
==38752==by 0x6BD2EA: virt_acpi_build (virt-acpi-build.c:556)
==38752==by 0x6BDC86: virt_acpi_setup (virt-acpi-build.c:662)
==38752==by 0x9C8DC6: notifier_list_notify (notify.c:39)
==38752==by 0x4A595A: qdev_machine_creation_done (machine.c:1589)
==38752==by 0x61E052: qemu_machine_creation_done (vl.c:2680)
==38752==by 0x61E052: qmp_x_exit_preconfig.part.0 (vl.c:2709)
==38752==by 0x6220C6: qmp_x_exit_preconfig (vl.c:2702)
==38752==by 0x6220C6: qemu_init (vl.c:3758)
==38752==by 0x425858: main (main.c:47)

Fixes: ebfd392893 ("hw/riscv/virt: virt-acpi-build.c: Add RHCT Table")
Signed-off-by: Daniel Henrique Barboza 
---
 hw/riscv/virt-acpi-build.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/riscv/virt-acpi-build.c b/hw/riscv/virt-acpi-build.c
index 26c7e4482d..fb8baf64f6 100644
--- a/hw/riscv/virt-acpi-build.c
+++ b/hw/riscv/virt-acpi-build.c
@@ -196,7 +196,7 @@ static void build_rhct(GArray *table_data,
 RISCVCPU *cpu = >soc[0].harts[0];
 uint32_t mmu_offset = 0;
 uint8_t satp_mode_max;
-char *isa;
+g_autofree char *isa = NULL;
 
 AcpiTable table = { .sig = "RHCT", .rev = 1, .oem_id = s->oem_id,
 .oem_table_id = s->oem_table_id };
-- 
2.43.0




Re: [PULL 0/8] tcg + linux-user patch queue

2024-01-22 Thread Richard Henderson

On 1/23/24 01:26, Peter Maydell wrote:

On Sun, 21 Jan 2024 at 00:22, Richard Henderson
 wrote:


The following changes since commit 3f2a357b95845ea0bf7463eff6661e43b97d1afc:

   Merge tag 'hw-cpus-20240119' of https://github.com/philmd/qemu into staging 
(2024-01-19 11:39:38 +)

are available in the Git repository at:

   https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20240121

for you to fetch changes up to 1d5e32e3198d2d8fd2342c8f7f8e0875aeff49c5:

   linux-user/elfload: check PR_GET_DUMPABLE before creating coredump 
(2024-01-21 10:25:07 +1100)


tcg/s390x: Fix encoding of VRIc, VRSa, VRSc insns
tcg: Clean up error paths in alloc_code_gen_buffer_splitwx_memfd
linux-user/riscv: Adjust vdso signal frame cfa offsets
linux-user: Fixed cpu restore with pc 0 on SIGBUS




The new chacha test seems to consistently segfault on aarch64 host:

https://gitlab.com/qemu-project/qemu/-/jobs/5979230595
https://gitlab.com/qemu-project/qemu/-/jobs/5978381815
https://gitlab.com/qemu-project/qemu/-/jobs/5982075408


Oh dear.  It seems to be a problem with the aarch64 cross-compiler for s390x.
If I use a binary created on an s390x or x86_64 host, it works.

Unless someone has a better idea, I'll drop the testcase for now.


r~



Re: [PATCH v2] cpu-exec: simplify jump cache management

2024-01-22 Thread Richard Henderson

On 1/23/24 01:34, Paolo Bonzini wrote:

Unless I'm missing something egregious, the jmp cache is only every
populated with a valid entry by the same thread that reads the cache.
Therefore, the contents of any valid entry are always consistent and
there is no need for any acquire/release magic.


I think you're right, and I over-complicated this thinking about invalidations.


Because of this, there is really nothing to win in splitting the CF_PCREL
and !CF_PCREL paths.  It is easier to just always use the ->pc field in
the jump cache.


Once upon a time, PCREL was an ifdef, and the jump cache pc did not exist for !PCREL.  The 
split has not been addressed since then.



The cleanup looks good.

Reviewed-by: Richard Henderson 


r~



Re: [PATCH v2 4/4] Exclude TPM ioctls definitions for the GNU/Hurd

2024-01-22 Thread Stefan Berger




On 1/22/24 15:46, Peter Maydell wrote:

On Mon, 22 Jan 2024 at 19:30, Stefan Berger  wrote:




On 1/22/24 12:16, Peter Maydell wrote:

On Thu, 18 Jan 2024 at 16:04, Manolo de Medici  wrote:


The Hurd currently doesn't have any TPM driver, compilation fails
for missing symbols unless these are left undefined.

Signed-off-by: Manolo de Medici 
---
   backends/tpm/tpm_ioctl.h | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backends/tpm/tpm_ioctl.h b/backends/tpm/tpm_ioctl.h
index 1933ab6855..c721bf8847 100644
--- a/backends/tpm/tpm_ioctl.h
+++ b/backends/tpm/tpm_ioctl.h
@@ -274,7 +274,7 @@ typedef struct ptm_lockstorage ptm_lockstorage;
   #define PTM_CAP_SEND_COMMAND_HEADER (1 << 15)
   #define PTM_CAP_LOCK_STORAGE   (1 << 16)

-#ifndef _WIN32
+#if !defined(_WIN32) && !defined(__GNU__)
   enum {
   PTM_GET_CAPABILITY = _IOR('P', 0, ptm_cap),
   PTM_INIT   = _IOWR('P', 1, ptm_init),
--
2.43.0


This looks plausible as a change, but looking at the history
of the file in git it seems like this is a file we import
from a third-party swtpm project.

Stefan: should we get this change made in the swtpm project
too? Or have we diverged from that copy of the header?


The diffs are minimal at the moment:
$ diff swtpm/include/swtpm/tpm_ioctl.h qemu/backends/tpm/tpm_ioctl.h
15,16d14
< #include 
< #include 

Since we already handle _WIN32 we can just take this case for __GNU__.


OK, so how should we handle the mechanics of it -- just take
this commit in QEMU and then you'll sort it out in swtpm?


Yes.


Or do we need to change swtpm first and then sync?


No.

Regarding the patch:

Reviewed-by: Stefan Berger 



thanks
-- PMM




Re: [PATCH 4/5] hw/s390x/css-bridge: switch virtual-css bus to 3-phase-reset

2024-01-22 Thread Eric Farman
On Fri, 2024-01-19 at 16:35 +, Peter Maydell wrote:
> Switch the s390x virtual-css bus from using BusClass::reset to the
> Resettable interface.
> 
> This has no behavioural change, because the BusClass code to support
> subclasses that use the legacy BusClass::reset will call that method
> in the hold phase of 3-phase reset.
> 
> Signed-off-by: Peter Maydell 
> ---
>  hw/s390x/css-bridge.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)

Reviewed-by: Eric Farman 



Re: [PATCH v3 15/38 6/6] target/s390x: Improve general case of disas_jcc

2024-01-22 Thread Ilya Leoshkevich
On Sat, Jan 20, 2024 at 12:23:02AM +0100, Philippe Mathieu-Daudé wrote:
> From: Richard Henderson 
> 
> Avoid code duplication by handling 7 of the 14 cases
> by inverting the test for the other 7 cases.
> 
> Use TCG_COND_TSTNE for cc in {1,3}.
> Use (cc - 1) <= 1 for cc in {1,2}.
> 
> Signed-off-by: Richard Henderson 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  target/s390x/tcg/translate.c | 52 +++-
>  1 file changed, 15 insertions(+), 37 deletions(-)

Acked-by: Ilya Leoshkevich 



Re: Re: [PATCH v3 3/3] tests/tcg: Add the PROT_NONE gdbstub test

2024-01-22 Thread Ilya Leoshkevich
On Mon, Jan 22, 2024 at 03:54:32PM +, Alex Bennée wrote:
> Ilya Leoshkevich  writes:
> 
> > Make sure that qemu gdbstub, like gdbserver, allows reading from and
> > writing to PROT_NONE pages.
> >
> > Signed-off-by: Ilya Leoshkevich 
> > ---
> >  tests/tcg/multiarch/Makefile.target  |  9 +-
> >  tests/tcg/multiarch/gdbstub/prot-none.py | 22 +
> >  tests/tcg/multiarch/prot-none.c  | 40 
> >  3 files changed, 70 insertions(+), 1 deletion(-)
> >  create mode 100644 tests/tcg/multiarch/gdbstub/prot-none.py
> >  create mode 100644 tests/tcg/multiarch/prot-none.c

[...]

> > +def run_test():
> > +"""Run through the tests one by one"""
> > +gdb.Breakpoint("break_here")
> > +gdb.execute("continue")
> > +val = gdb.parse_and_eval("*(char[2] *)q").string()
> 
> Better traceback:
> 
>   Breakpoint 1, break_here (q=0x40802fff) at 
> /home/alex/lsrc/qemu.git/tests/tcg/multiarch/prot-none.c:14
>   14  }
>   GDB Exception:
>   Traceback (most recent call last):
> File "/home/alex/lsrc/qemu.git/tests/guest-debug/test_gdbstub.py", line 
> 42, in main
>   test()
> File "./tests/tcg/multiarch/gdbstub/prot-none.py", line 14, in run_test
>   val = gdb.parse_and_eval("*(char[2] *)q").string()
> 
>   gdb.MemoryError: Cannot access memory at address 0x40802fff
>   Python 3.11.2 (main, Mar 13 2023, 12:18:29) [GCC 12.2.0] on linux
>   Type "help", "copyright", "credits" or "license" for more information.
>   (InteractiveConsole)
>   >>> 

Thanks for the debug output. This shows that the feature being tested
doesn't work (the value of `q` looks sane to me). May I ask what host
distro is this? I tried on x86_64 Fedora 38 and x86_64 Ubuntu 22.04 so
far, and the test was successful.

[...]



Re: [PATCH 2/5] util/uri: Simplify uri_string_unescape()

2024-01-22 Thread Stefan Weil via

Am 22.01.24 um 20:17 schrieb Thomas Huth:


uri_string_unescape() basically does the same as the glib function
g_uri_unescape_string(), with just an additional length parameter.
So we can simplify this function a lot by limiting the length with
g_strndup() first and then by calling g_uri_unescape_string() instead
of walking through the string manually.

Suggested-by: Stefan Weil


Can my e-mail address be replaced by another one (s...@weilnetz.de)?


Signed-off-by: Thomas Huth
---
  util/uri.c | 49 +++--
  1 file changed, 3 insertions(+), 46 deletions(-)

diff --git a/util/uri.c b/util/uri.c
index 33b6c7214e..2a75f535ba 100644
--- a/util/uri.c
+++ b/util/uri.c
@@ -1561,15 +1561,6 @@ done_cd:
  return 0;
  }
  
-static int is_hex(char c)

-{
-if (((c >= '0') && (c <= '9')) || ((c >= 'a') && (c <= 'f')) ||
-((c >= 'A') && (c <= 'F'))) {
-return 1;
-}
-return 0;
-}
-
  /**
   * uri_string_unescape:
   * @str:  the string to unescape
@@ -1585,8 +1576,7 @@ static int is_hex(char c)
   */
  char *uri_string_unescape(const char *str, int len)
  {
-char *ret, *out;
-const char *in;
+g_autofree char *lstr = NULL;



Is it necessary to assign NULL? It does not look so.


  
  if (str == NULL) {

  return NULL;
@@ -1594,42 +1584,9 @@ char *uri_string_unescape(const char *str, int len)
  if (len <= 0) {
  len = strlen(str);
  }
-if (len < 0) {
-return NULL;
-}
-
-ret = g_malloc(len + 1);
+lstr = g_strndup(str, len);
  
-in = str;

-out = ret;
-while (len > 0) {
-if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
-in++;
-if ((*in >= '0') && (*in <= '9')) {
-*out = (*in - '0');
-} else if ((*in >= 'a') && (*in <= 'f')) {
-*out = (*in - 'a') + 10;
-} else if ((*in >= 'A') && (*in <= 'F')) {
-*out = (*in - 'A') + 10;
-}
-in++;
-if ((*in >= '0') && (*in <= '9')) {
-*out = *out * 16 + (*in - '0');
-} else if ((*in >= 'a') && (*in <= 'f')) {
-*out = *out * 16 + (*in - 'a') + 10;
-} else if ((*in >= 'A') && (*in <= 'F')) {
-*out = *out * 16 + (*in - 'A') + 10;
-}
-in++;
-len -= 3;
-out++;
-} else {
-*out++ = *in++;
-len--;
-}
-}
-*out = 0;
-return ret;
+return g_uri_unescape_string(lstr, NULL);
  }
  
  /**



Thank you.

Reviewed-by: Stefan Weil 



[Bug 1225187] Re: qemu hangs in windows 7 host with -serial pipe:windbg

2024-01-22 Thread Jacob
Thank you for the info.

Current issues tracking this bug:
https://gitlab.com/qemu-project/qemu/-/issues/675
https://gitlab.com/qemu-project/qemu/-/issues/1468
https://gitlab.com/qemu-project/qemu/-/issues/1802

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1225187

Title:
  qemu hangs in windows 7 host with -serial pipe:windbg

Status in QEMU:
  New

Bug description:
  Execution line:
  qemu-system-i386.exe -m 512 c:\Disks\Qemu_XP_en.vhd  -serial pipe:windbg

  It waits for the pipe.
  Execute windbg
  c:\WINDDK\7600.16385.1\Debuggers\windbg.exe -k 
com:pipe,port=\\.\pipe\windbg,resets=0,reconnect

  GUI black screen shown. QEMU hangs.

  qemu v1.5.3 (c0b1a7e207094dba0b37a892b41fe4cab3195e44). MinGW built.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1225187/+subscriptions




[PATCH v2] docs/system/arm/virt.rst: Add note on CPU features off by default

2024-01-22 Thread Gustavo Romero
Add a note on CPU features that are off by default in `virt` machines.
Some CPU features will remain off even if a CPU-capable CPU (e.g.,
`-cpu max`) is selected because they require support in both the CPU
itself and in the wider system. Therefore, the user, besides selecting a
CPU that supports such features, must also turn on the feature using a
machine option.

Signed-off-by: Gustavo Romero 
---
 docs/system/arm/virt.rst | 13 +
 1 file changed, 13 insertions(+)

diff --git a/docs/system/arm/virt.rst b/docs/system/arm/virt.rst
index c245c52b7a..1888e31956 100644
--- a/docs/system/arm/virt.rst
+++ b/docs/system/arm/virt.rst
@@ -69,6 +69,19 @@ Supported guest CPU types:
 Note that the default is ``cortex-a15``, so for an AArch64 guest you must
 specify a CPU type.
 
+Also, please note that passing ``max`` CPU (i.e. ``-cpu max``) won't
+enable all the CPU features for a given ``virt`` machine. Where a CPU
+architectural feature requires support in both the CPU itself and in the
+wider system (e.g. the MTE feature), it may not be enabled by default,
+but instead requires a machine option to enable it.
+
+For example, MTE support must be enabled with ``-machine virt,mte=on``,
+as well as by selecting an MTE-capable CPU (e.g., ``max``) with the
+``-cpu`` option.
+
+See the machine-specific options below or check them for a given machine
+passing ``help``, like: ``-machine virt-9.0,help``.
+
 Graphics output is available, but unlike the x86 PC machine types
 there is no default display device enabled: you should select one from
 the Display devices section of "-device help". The recommended option
-- 
2.34.1




[PATCH v2 1/3] hw/gpio: Implement STM32L4x5 GPIO

2024-01-22 Thread Inès Varhol
Features supported :
- the 8 STM32L4x5 GPIOs are initialized with their reset values
(except IDR, see below)
- input mode : setting a pin in input mode "externally" (using input
irqs) results in an out irq (transmitted to SYSCFG)
- output mode : setting a bit in ODR sets the corresponding out irq
(if this line is configured in output mode)
- pull-up, pull-down
- push-pull, open-drain

Difference with the real GPIOs :
- Alternate Function and Analog mode aren't implemented :
pins in AF/Analog behave like pins in input mode
- floating pins stay at their last value
- register IDR reset values differ from the real one :
values are coherent with the other registers reset values
and the fact that AF/Analog modes aren't implemented
- setting I/O output speed isn't supported
- locking port bits isn't supported
- ADC function isn't supported
- GPIOH has 16 pins instead of 2 pins
- writing to registers LCKR, AFRL, AFRH and ASCR is ineffective

Signed-off-by: Arnaud Minier 
Signed-off-by: Inès Varhol 
---
 MAINTAINERS|   1 +
 docs/system/arm/b-l475e-iot01a.rst |   2 +-
 hw/gpio/Kconfig|   3 +
 hw/gpio/meson.build|   1 +
 hw/gpio/stm32l4x5_gpio.c   | 447 +
 hw/gpio/trace-events   |   6 +
 include/hw/gpio/stm32l4x5_gpio.h   |  65 +
 7 files changed, 524 insertions(+), 1 deletion(-)
 create mode 100644 hw/gpio/stm32l4x5_gpio.c
 create mode 100644 include/hw/gpio/stm32l4x5_gpio.h

diff --git a/MAINTAINERS b/MAINTAINERS
index c4085c32a7..269ed96052 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1131,6 +1131,7 @@ F: hw/arm/stm32l4x5_soc.c
 F: hw/misc/stm32l4x5_exti.c
 F: hw/misc/stm32l4x5_syscfg.c
 F: hw/misc/stm32l4x5_rcc.c
+F: hw/gpio/stm32l4x5_gpio.c
 F: include/hw/*/stm32l4x5_*.h
 
 B-L475E-IOT01A IoT Node
diff --git a/docs/system/arm/b-l475e-iot01a.rst 
b/docs/system/arm/b-l475e-iot01a.rst
index b857a56ca4..0afef8e4f4 100644
--- a/docs/system/arm/b-l475e-iot01a.rst
+++ b/docs/system/arm/b-l475e-iot01a.rst
@@ -18,6 +18,7 @@ Currently B-L475E-IOT01A machine's only supports the 
following devices:
 - STM32L4x5 EXTI (Extended interrupts and events controller)
 - STM32L4x5 SYSCFG (System configuration controller)
 - STM32L4x5 RCC (Reset and clock control)
+- STM32L4x5 GPIOs (General-purpose I/Os)
 
 Missing devices
 """
@@ -25,7 +26,6 @@ Missing devices
 The B-L475E-IOT01A does *not* support the following devices:
 
 - Serial ports (UART)
-- General-purpose I/Os (GPIO)
 - Analog to Digital Converter (ADC)
 - SPI controller
 - Timer controller (TIMER)
diff --git a/hw/gpio/Kconfig b/hw/gpio/Kconfig
index d2cf3accc8..712940b8e0 100644
--- a/hw/gpio/Kconfig
+++ b/hw/gpio/Kconfig
@@ -16,3 +16,6 @@ config GPIO_PWR
 
 config SIFIVE_GPIO
 bool
+
+config STM32L4X5_GPIO
+bool
diff --git a/hw/gpio/meson.build b/hw/gpio/meson.build
index 066ea96480..8470ca1639 100644
--- a/hw/gpio/meson.build
+++ b/hw/gpio/meson.build
@@ -9,6 +9,7 @@ system_ss.add(when: 'CONFIG_IMX', if_true: files('imx_gpio.c'))
 system_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_gpio.c'))
 system_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_gpio.c'))
 system_ss.add(when: 'CONFIG_OMAP', if_true: files('omap_gpio.c'))
+system_ss.add(when: 'CONFIG_STM32L4X5_SOC', if_true: files('stm32l4x5_gpio.c'))
 system_ss.add(when: 'CONFIG_RASPI', if_true: files('bcm2835_gpio.c'))
 system_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('aspeed_gpio.c'))
 system_ss.add(when: 'CONFIG_SIFIVE_GPIO', if_true: files('sifive_gpio.c'))
diff --git a/hw/gpio/stm32l4x5_gpio.c b/hw/gpio/stm32l4x5_gpio.c
new file mode 100644
index 00..58b321a299
--- /dev/null
+++ b/hw/gpio/stm32l4x5_gpio.c
@@ -0,0 +1,447 @@
+/*
+ * STM32L4x5 GPIO (General Purpose Input/Ouput)
+ *
+ * Copyright (c) 2023 Arnaud Minier 
+ * Copyright (c) 2023 Inès Varhol 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/*
+ * The reference used is the STMicroElectronics RM0351 Reference manual
+ * for STM32L4x5 and STM32L4x6 advanced Arm ® -based 32-bit MCUs.
+ * 
https://www.st.com/en/microcontrollers-microprocessors/stm32l4x5/documentation.html
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "hw/gpio/stm32l4x5_gpio.h"
+#include "hw/irq.h"
+#include "hw/qdev-clock.h"
+#include "hw/qdev-properties.h"
+#include "qapi/visitor.h"
+#include "qapi/error.h"
+#include "migration/vmstate.h"
+#include "trace.h"
+
+#define GPIO_MODER 0x00
+#define GPIO_OTYPER 0x04
+#define GPIO_OSPEEDR 0x08
+#define GPIO_PUPDR 0x0C
+#define GPIO_IDR 0x10
+#define GPIO_ODR 0x14
+#define GPIO_BSRR 0x18
+#define GPIO_LCKR 0x1C
+#define GPIO_AFRL 0x20
+#define GPIO_AFRH 0x24
+#define GPIO_BRR 0x28
+#define GPIO_ASCR 0x2C
+
+/* 0b___ */
+#define RESERVED_BITS_MASK 0x
+
+static 

[PATCH v2 2/3] hw/arm: Connect STM32L4x5 GPIO to STM32L4x5 SoC

2024-01-22 Thread Inès Varhol
Signed-off-by: Arnaud Minier 
Signed-off-by: Inès Varhol 
---
 hw/arm/Kconfig |  3 +-
 hw/arm/stm32l4x5_soc.c | 79 --
 include/hw/arm/stm32l4x5_soc.h |  2 +
 3 files changed, 69 insertions(+), 15 deletions(-)

diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 6bd7ba424f..3e49b913f8 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -459,9 +459,10 @@ config STM32L4X5_SOC
 bool
 select ARM_V7M
 select OR_IRQ
-select STM32L4X5_SYSCFG
 select STM32L4X5_EXTI
+select STM32L4X5_SYSCFG
 select STM32L4X5_RCC
+select STM32L4X5_GPIO
 
 config XLNX_ZYNQMP_ARM
 bool
diff --git a/hw/arm/stm32l4x5_soc.c b/hw/arm/stm32l4x5_soc.c
index bcdad69e92..0333cbb81a 100644
--- a/hw/arm/stm32l4x5_soc.c
+++ b/hw/arm/stm32l4x5_soc.c
@@ -78,13 +78,45 @@ static const int exti_irq[NUM_EXTI_IRQ] = {
 #define RCC_BASE_ADDRESS 0x40021000
 #define RCC_IRQ 5
 
+static const uint32_t gpio_addr[] = {
+0x4800,
+0x48000400,
+0x48000800,
+0x48000C00,
+0x48001000,
+0x48001400,
+0x48001800,
+0x48001C00,
+};
+
+static const struct {
+uint32_t moder;
+uint32_t ospeedr;
+uint32_t pupdr;
+} stm32l4x5_gpio_initval[NUM_GPIOS] = {
+{ 0xABFF, 0x0C00, 0x6400 },
+{ 0xFEBF, 0x, 0x0100 },
+{ 0x, 0x, 0x },
+{ 0x, 0x, 0x },
+{ 0x, 0x, 0x },
+{ 0x, 0x, 0x },
+{ 0x, 0x, 0x },
+{ 0x000F, 0x, 0x },
+};
+
 static void stm32l4x5_soc_initfn(Object *obj)
 {
 Stm32l4x5SocState *s = STM32L4X5_SOC(obj);
+g_autofree char *name = NULL;
 
 object_initialize_child(obj, "exti", >exti, TYPE_STM32L4X5_EXTI);
 object_initialize_child(obj, "syscfg", >syscfg, TYPE_STM32L4X5_SYSCFG);
 object_initialize_child(obj, "rcc", >rcc, TYPE_STM32L4X5_RCC);
+
+for (unsigned i = 0; i < NUM_GPIOS; i++) {
+name = g_strdup_printf("gpio%c", 'a' + i);
+object_initialize_child(obj, name, >gpio[i], TYPE_STM32L4X5_GPIO);
+}
 }
 
 static void stm32l4x5_soc_realize(DeviceState *dev_soc, Error **errp)
@@ -93,8 +125,10 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, 
Error **errp)
 Stm32l4x5SocState *s = STM32L4X5_SOC(dev_soc);
 const Stm32l4x5SocClass *sc = STM32L4X5_SOC_GET_CLASS(dev_soc);
 MemoryRegion *system_memory = get_system_memory();
-DeviceState *armv7m;
+DeviceState *armv7m, *dev;
 SysBusDevice *busdev;
+uint32_t pin_index;
+g_autofree char *name = NULL;
 
 if (!memory_region_init_rom(>flash, OBJECT(dev_soc), "flash",
 sc->flash_size, errp)) {
@@ -134,17 +168,42 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, 
Error **errp)
 return;
 }
 
+/* GPIOs */
+for (unsigned i = 0; i < NUM_GPIOS; i++) {
+dev = DEVICE(>gpio[i]);
+name = g_strdup_printf("%c", 'A' + i);
+qdev_prop_set_string(dev, "name", name);
+qdev_prop_set_uint32(dev, "mode-reset",
+ stm32l4x5_gpio_initval[i].moder);
+qdev_prop_set_uint32(dev, "ospeed-reset",
+ stm32l4x5_gpio_initval[i].ospeedr);
+qdev_prop_set_uint32(dev, "pupd-reset",
+stm32l4x5_gpio_initval[i].pupdr);
+busdev = SYS_BUS_DEVICE(>gpio[i]);
+name = g_strdup_printf("gpio%c-out", 'a' + i);
+qdev_connect_clock_in(DEVICE(>gpio[i]), "clk",
+qdev_get_clock_out(DEVICE(&(s->rcc)), name));
+if (!sysbus_realize(busdev, errp)) {
+return;
+}
+sysbus_mmio_map(busdev, 0, gpio_addr[i]);
+}
+
 /* System configuration controller */
 busdev = SYS_BUS_DEVICE(>syscfg);
 if (!sysbus_realize(busdev, errp)) {
 return;
 }
 sysbus_mmio_map(busdev, 0, SYSCFG_ADDR);
-/*
- * TODO: when the GPIO device is implemented, connect it
- * to SYCFG using `qdev_connect_gpio_out`, NUM_GPIOS and
- * GPIO_NUM_PINS.
- */
+
+for (unsigned i = 0; i < NUM_GPIOS; i++) {
+for (unsigned j = 0; j < GPIO_NUM_PINS; j++) {
+pin_index = GPIO_NUM_PINS * i + j;
+qdev_connect_gpio_out(DEVICE(>gpio[i]), j,
+  qdev_get_gpio_in(DEVICE(>syscfg),
+  pin_index));
+}
+}
 
 /* EXTI device */
 busdev = SYS_BUS_DEVICE(>exti);
@@ -241,14 +300,6 @@ static void stm32l4x5_soc_realize(DeviceState *dev_soc, 
Error **errp)
 /* RESERVED:0x40024400, 0x7FDBC00 */
 
 /* AHB2 BUS */
-create_unimplemented_device("GPIOA", 0x4800, 0x400);
-create_unimplemented_device("GPIOB", 0x48000400, 0x400);
-create_unimplemented_device("GPIOC", 0x48000800, 0x400);
-create_unimplemented_device("GPIOD", 0x48000C00, 0x400);
-

Re: Re: [PATCH v3 2/3] tests/tcg: Factor out gdbstub test functions

2024-01-22 Thread Ilya Leoshkevich
On Mon, Jan 22, 2024 at 04:00:44PM +, Alex Bennée wrote:
> Ilya Leoshkevich  writes:
> 
> > Both the report() function as well as the initial gdbstub test sequence
> > are copy-pasted into ~10 files with slight modifications. This
> > indicates that they are indeed generic, so factor them out. While
> > at it, add a few newlines to make the formatting closer to PEP-8.
> >
> > Signed-off-by: Ilya Leoshkevich 
> > ---
> >  tests/guest-debug/run-test.py |  7 ++-
> >  tests/guest-debug/test_gdbstub.py | 58 +++
> >  tests/tcg/aarch64/gdbstub/test-sve-ioctl.py   | 34 +--
> >  tests/tcg/aarch64/gdbstub/test-sve.py | 33 +--
> >  tests/tcg/multiarch/gdbstub/interrupt.py  | 47 ++-
> >  tests/tcg/multiarch/gdbstub/memory.py | 41 +
> >  tests/tcg/multiarch/gdbstub/registers.py  | 41 ++---
> >  tests/tcg/multiarch/gdbstub/sha1.py   | 40 ++---
> >  .../multiarch/gdbstub/test-proc-mappings.py   | 39 +
> >  .../multiarch/gdbstub/test-qxfer-auxv-read.py | 37 +---
> >  .../gdbstub/test-thread-breakpoint.py | 37 +---
> >  tests/tcg/s390x/gdbstub/test-signals-s390x.py | 42 +-
> >  tests/tcg/s390x/gdbstub/test-svc.py   | 39 +
> >  13 files changed, 98 insertions(+), 397 deletions(-)
> >  create mode 100644 tests/guest-debug/test_gdbstub.py

[...]

> > +if gdb.parse_and_eval("$pc") == 0:
> > +print("SKIP: PC not set")
> > +exit(0)
> > +
> > +try:
> > +test()
> > +except:
> > +print("GDB Exception:")
> > +traceback.print_exc(file=sys.stdout)
> > +global fail_count
> > +fail_count += 1
> > +import code
> > +code.InteractiveConsole(locals=globals()).interact()
> > +raise
> 
> While I can see this is useful we don't want to default to an
> interactive console as that will hang the test in CI type setups. Can we
> make this a option we enable?

Would something like `export QEMU_TEST_INTERACTIVE=1` be okay?

[...]



[PATCH v2 0/3] Add device STM32L4x5 GPIO

2024-01-22 Thread Inès Varhol
This patch adds a new device STM32L4x5 GPIO device and is part
of a series implementing the STM32L4x5 with a few peripherals.

Changes from v1 :
- replacing test GPIO register `DISCONNECTED_PINS` with an object
property accessed using `qtest_qmp()` in the qtest (through helpers
`get_disconnected_pins()` and `disconnect_all_pins()`)
- removing GPIO subclasses and storing MODER, OSPEEDR and PUPDR reset
values in properties
- adding a `name` property and using it for more lisible traces
- using `g_strdup_printf()` to facilitate setting irqs in the qtest,
and initializing GPIO children in soc_initfn

Changes from RFC v1 :
- `stm32l4x5-gpio-test.c` : correct typos, make the test generic,
add a test for bitwise writing in register ODR
- `stm32l4x5_soc.c` : connect gpios to their clock, use an
array of GpioState
- `stm32l4x5_gpio.c` : correct comments in `update_gpio_idr()`,
correct `get_gpio_pins_to_disconnect()`, correct `stm32l4x5_gpio_init()`
and initialize the clock, add a realize function
- update MAINAINERS

Based-on: 20240118091107.87831-1-arnaud.min...@telecom-paris.fr
([PATCH v2 0/7] Add device STM32L4x5 RCC)

Signed-off-by: Arnaud Minier 
Signed-off-by: Inès Varhol 

Inès Varhol (3):
  hw/gpio: Implement STM32L4x5 GPIO
  hw/arm: Connect STM32L4x5 GPIO to STM32L4x5 SoC
  tests/qtest: Add STM32L4x5 GPIO QTest testcase

 MAINTAINERS|   1 +
 docs/system/arm/b-l475e-iot01a.rst |   2 +-
 hw/arm/Kconfig |   3 +-
 hw/arm/stm32l4x5_soc.c |  79 -
 hw/gpio/Kconfig|   3 +
 hw/gpio/meson.build|   1 +
 hw/gpio/stm32l4x5_gpio.c   | 447 +
 hw/gpio/trace-events   |   6 +
 include/hw/arm/stm32l4x5_soc.h |   2 +
 include/hw/gpio/stm32l4x5_gpio.h   |  65 
 tests/qtest/meson.build|   3 +-
 tests/qtest/stm32l4x5_gpio-test.c  | 520 +
 12 files changed, 1115 insertions(+), 17 deletions(-)
 create mode 100644 hw/gpio/stm32l4x5_gpio.c
 create mode 100644 include/hw/gpio/stm32l4x5_gpio.h
 create mode 100644 tests/qtest/stm32l4x5_gpio-test.c

-- 
2.43.0




[PATCH v2 3/3] tests/qtest: Add STM32L4x5 GPIO QTest testcase

2024-01-22 Thread Inès Varhol
The testcase contains :
- `test_idr_reset_value()` :
Checks the reset values of MODER, OTYPER, PUPDR, ODR and IDR.
- `test_gpio_output_mode()` :
Checks that writing a bit in register ODR results in the corresponding
pin rising or lowering, if this pin is configured in output mode.
- `test_gpio_input_mode()` :
Checks that a input pin set high or low externally results
in the pin rising and lowering.
- `test_pull_up_pull_down()` :
Checks that a floating pin in pull-up/down mode is actually high/down.
- `test_push_pull()` :
Checks that a pin set externally is disconnected when configured in
push-pull output mode, and can't be set externally while in this mode.
- `test_open_drain()` :
Checks that a pin set externally high is disconnected when configured
in open-drain output mode, and can't be set high while in this mode.
- `test_bsrr_brr()` :
Checks that writing to BSRR and BRR has the desired result in ODR.

Acked-by: Thomas Huth 
Signed-off-by: Arnaud Minier 
Signed-off-by: Inès Varhol 
---
 tests/qtest/meson.build   |   3 +-
 tests/qtest/stm32l4x5_gpio-test.c | 520 ++
 2 files changed, 522 insertions(+), 1 deletion(-)
 create mode 100644 tests/qtest/stm32l4x5_gpio-test.c

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index b0d9a8c2de..5692da4fc1 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -198,7 +198,8 @@ qtests_aspeed = \
 qtests_stm32l4x5 = \
   ['stm32l4x5_exti-test',
'stm32l4x5_syscfg-test',
-   'stm32l4x5_rcc-test']
+   'stm32l4x5_rcc-test',
+   'stm32l4x5_gpio-test']
 
 qtests_arm = \
   (config_all_devices.has_key('CONFIG_MPS2') ? ['sse-timer-test'] : []) + \
diff --git a/tests/qtest/stm32l4x5_gpio-test.c 
b/tests/qtest/stm32l4x5_gpio-test.c
new file mode 100644
index 00..3803687a6a
--- /dev/null
+++ b/tests/qtest/stm32l4x5_gpio-test.c
@@ -0,0 +1,520 @@
+/*
+ * QTest testcase for STM32L4x5_EXTI
+ *
+ * Copyright (c) 2023 Arnaud Minier 
+ * Copyright (c) 2023 Inès Varhol 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest-single.h"
+
+#define GPIO_BASE_ADDR 0x4800
+#define GPIO_SIZE  0x400
+#define NUM_GPIOS  8
+#define NUM_GPIO_PINS  16
+
+#define GPIO_A 0x4800
+#define GPIO_B 0x48000400
+#define GPIO_C 0x48000800
+#define GPIO_D 0x48000C00
+#define GPIO_E 0x48001000
+#define GPIO_F 0x48001400
+#define GPIO_G 0x48001800
+#define GPIO_H 0x48001C00
+
+#define MODER 0x00
+#define OTYPER 0x04
+#define PUPDR 0x0C
+#define IDR 0x10
+#define ODR 0x14
+#define BSRR 0x18
+#define BRR 0x28
+
+#define MODER_INPUT 0
+#define MODER_OUTPUT 1
+
+#define PUPDR_NONE 0
+#define PUPDR_PULLUP 1
+#define PUPDR_PULLDOWN 2
+
+#define OTYPER_PUSH_PULL 0
+#define OTYPER_OPEN_DRAIN 1
+
+const uint32_t moder_reset[NUM_GPIOS] = {
+0xABFF,
+0xFEBF,
+0x,
+0x,
+0x,
+0x,
+0x,
+0x000F
+};
+
+const uint32_t pupdr_reset[NUM_GPIOS] = {
+0x6400,
+0x0100,
+0x,
+0x,
+0x,
+0x,
+0x,
+0x
+};
+
+const uint32_t idr_reset[NUM_GPIOS] = {
+0xA000,
+0x0010,
+0x,
+0x,
+0x,
+0x,
+0x,
+0x
+};
+
+static uint32_t gpio_readl(unsigned int gpio, unsigned int offset)
+{
+return readl(gpio + offset);
+}
+
+static void gpio_writel(unsigned int gpio, unsigned int offset, uint32_t value)
+{
+writel(gpio + offset, value);
+}
+
+static void gpio_set_bit(unsigned int gpio, unsigned int reg,
+ unsigned int pin, uint32_t value)
+{
+uint32_t mask = 0x & ~(0x1 << pin);
+gpio_writel(gpio, reg, (gpio_readl(gpio, reg) & mask) | value << pin);
+}
+
+static void gpio_set_2bits(unsigned int gpio, unsigned int reg,
+   unsigned int pin, uint32_t value)
+{
+uint32_t offset = 2 * pin;
+uint32_t mask = 0x & ~(0x3 << offset);
+gpio_writel(gpio, reg, (gpio_readl(gpio, reg) & mask) | value << offset);
+}
+
+static unsigned int get_gpio_id(uint32_t gpio_addr)
+{
+return (gpio_addr - GPIO_BASE_ADDR) / GPIO_SIZE;
+}
+
+static void gpio_set_irq(unsigned int gpio, int num, int level)
+{
+g_autofree char *name = g_strdup_printf("/machine/soc/gpio%c",
+get_gpio_id(gpio) + 'a');
+qtest_set_irq_in(global_qtest, name, NULL, num, level);
+}
+
+static void disconnect_all_pins(unsigned int gpio)
+{
+g_autofree char *path = g_strdup_printf("/machine/soc/gpio%c",
+get_gpio_id(gpio) + 'a');
+QDict *r;
+
+r = qtest_qmp(global_qtest, "{ 'execute': 'qom-set', 'arguments': "
+"{ 'path': %s, 'property': 'disconnected-pins', 'value': %d } }",
+path, 0x);
+

Re: [PATCH 5/5] util/uri: Remove unused macros ISA_RESERVED() and ISA_GEN_DELIM()

2024-01-22 Thread Stefan Weil via

Am 22.01.24 um 20:17 schrieb Thomas Huth:


They are not used anywhere, so there's no need to keep them around.

Signed-off-by: Thomas Huth 
---
  util/uri.c | 13 -
  1 file changed, 13 deletions(-)



Reviewed-by: Stefan Weil 




Re: [PATCH 4/5] util/uri: Remove unused functions uri_resolve() and uri_resolve_relative()

2024-01-22 Thread Stefan Weil via

Am 22.01.24 um 20:17 schrieb Thomas Huth:


These rather complex functions have never been used since they've been
introduced in 2012, so looks like they are not really useful for QEMU.
And since the static normalize_uri_path() function is also only used by
uri_resolve(), we can remove that function now, too.

Signed-off-by: Thomas Huth 
---
  include/qemu/uri.h |   2 -
  util/uri.c | 689 -
  2 files changed, 691 deletions(-)



This patch should be applied before patch 3 (so switch the order of the 
patches 3 and 4). With this change:


Reviewed-by: Stefan Weil 




[Bug 1225187] Re: qemu hangs in windows 7 host with -serial pipe:windbg

2024-01-22 Thread Peter Maydell
This bug tracker for QEMU has been obsolete for years. If you think
there's a problem with QEMU, please file a full report in the gitlab
bugtracker: https://gitlab.com/qemu-project/qemu/-/issues . Thanks.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1225187

Title:
  qemu hangs in windows 7 host with -serial pipe:windbg

Status in QEMU:
  New

Bug description:
  Execution line:
  qemu-system-i386.exe -m 512 c:\Disks\Qemu_XP_en.vhd  -serial pipe:windbg

  It waits for the pipe.
  Execute windbg
  c:\WINDDK\7600.16385.1\Debuggers\windbg.exe -k 
com:pipe,port=\\.\pipe\windbg,resets=0,reconnect

  GUI black screen shown. QEMU hangs.

  qemu v1.5.3 (c0b1a7e207094dba0b37a892b41fe4cab3195e44). MinGW built.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1225187/+subscriptions




Re: [PATCH 3/5] util/uri: Remove the uri_string_escape() function

2024-01-22 Thread Stefan Weil via

Am 22.01.24 um 20:17 schrieb Thomas Huth:


It is not used in QEMU - and if somebody needs this functionality,
they can simply use g_uri_escape_string() from the glib instead.

Signed-off-by: Thomas Huth 
---
  include/qemu/uri.h |  1 -
  util/uri.c | 64 --
  2 files changed, 65 deletions(-)



The removed function is used in util/uri.c, so this patch breaks the 
compilation.


That can be fixed by applying patch 4 before this one.

With that re-ordering you may add my signature:

Reviewed-by: Stefan Weil 




Re: [PATCH v2 4/4] Exclude TPM ioctls definitions for the GNU/Hurd

2024-01-22 Thread Peter Maydell
On Mon, 22 Jan 2024 at 19:30, Stefan Berger  wrote:
>
>
>
> On 1/22/24 12:16, Peter Maydell wrote:
> > On Thu, 18 Jan 2024 at 16:04, Manolo de Medici  
> > wrote:
> >>
> >> The Hurd currently doesn't have any TPM driver, compilation fails
> >> for missing symbols unless these are left undefined.
> >>
> >> Signed-off-by: Manolo de Medici 
> >> ---
> >>   backends/tpm/tpm_ioctl.h | 2 +-
> >>   1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/backends/tpm/tpm_ioctl.h b/backends/tpm/tpm_ioctl.h
> >> index 1933ab6855..c721bf8847 100644
> >> --- a/backends/tpm/tpm_ioctl.h
> >> +++ b/backends/tpm/tpm_ioctl.h
> >> @@ -274,7 +274,7 @@ typedef struct ptm_lockstorage ptm_lockstorage;
> >>   #define PTM_CAP_SEND_COMMAND_HEADER (1 << 15)
> >>   #define PTM_CAP_LOCK_STORAGE   (1 << 16)
> >>
> >> -#ifndef _WIN32
> >> +#if !defined(_WIN32) && !defined(__GNU__)
> >>   enum {
> >>   PTM_GET_CAPABILITY = _IOR('P', 0, ptm_cap),
> >>   PTM_INIT   = _IOWR('P', 1, ptm_init),
> >> --
> >> 2.43.0
> >
> > This looks plausible as a change, but looking at the history
> > of the file in git it seems like this is a file we import
> > from a third-party swtpm project.
> >
> > Stefan: should we get this change made in the swtpm project
> > too? Or have we diverged from that copy of the header?
>
> The diffs are minimal at the moment:
> $ diff swtpm/include/swtpm/tpm_ioctl.h qemu/backends/tpm/tpm_ioctl.h
> 15,16d14
> < #include 
> < #include 
>
> Since we already handle _WIN32 we can just take this case for __GNU__.

OK, so how should we handle the mechanics of it -- just take
this commit in QEMU and then you'll sort it out in swtpm?
Or do we need to change swtpm first and then sync?

thanks
-- PMM



Re: [PATCH 1/2] target/xtensa: wrap MMU and MPU state into structures

2024-01-22 Thread Peter Maydell
On Mon, 22 Jan 2024 at 18:45, Max Filippov  wrote:
>
> On Mon, Jan 22, 2024 at 10:29 AM Peter Maydell  
> wrote:
> >
> > On Fri, 19 Jan 2024 at 20:47, Max Filippov  wrote:
> > > +union {
> > > +XtensaMMU mmu;
> > > +XtensaMPU mpu;
> > > +};
> >
> > Is it really worth having this be a union ? I suspect it will
> > make adding migration/savevm support later more awkward.
>
> I have a draft implementation of savevm for xtensa and I did this part
> using subsections with the .needed callback checking whether the
> MMU or MPU option is enabled in the config. I wonder where the
> awkwardness is expected.

Oh, well if it works that's fine I guess. I was kind of thinking
that if you didn't have the union you could avoid having
subsections entirely.

On Arm we don't try to save space in the CPU struct by
using unions, even though some fields are A-profile
specific and some are R or M-profile specific.

-- PMM



Re: [PATCH 1/5] util/uri: Remove the unused "target" argument from uri_string_unescape()

2024-01-22 Thread Stefan Weil via

Am 22.01.24 um 20:17 schrieb Thomas Huth:


All callers pass NULL as target, so we can simplify the code by
dropping this parameter.

Signed-off-by: Thomas Huth 
---
  include/qemu/uri.h |  2 +-
  util/uri.c | 32 ++--
  2 files changed, 15 insertions(+), 19 deletions(-)



Reviewed-by: Stefan Weil 





[Bug 1225187] Re: qemu hangs in windows 7 host with -serial pipe:windbg

2024-01-22 Thread Jacob
Tested and this issue still affects the latest version of QEMU (8.2.0)
compiled for Windows.

Instructions in original post are still sufficient to reproduce the problem on 
Windows 7 x64.
Both i386 and x86_64 were tested and both result in a hung QEMU process, except 
on my system the GUI for QEMU displayed a white screen when hung instead of 
black.

** Changed in: qemu
   Status: Expired => New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1225187

Title:
  qemu hangs in windows 7 host with -serial pipe:windbg

Status in QEMU:
  New

Bug description:
  Execution line:
  qemu-system-i386.exe -m 512 c:\Disks\Qemu_XP_en.vhd  -serial pipe:windbg

  It waits for the pipe.
  Execute windbg
  c:\WINDDK\7600.16385.1\Debuggers\windbg.exe -k 
com:pipe,port=\\.\pipe\windbg,resets=0,reconnect

  GUI black screen shown. QEMU hangs.

  qemu v1.5.3 (c0b1a7e207094dba0b37a892b41fe4cab3195e44). MinGW built.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1225187/+subscriptions




Re: [PATCH v2 2/4] Avoid conflicting types for 'copy_file_range'

2024-01-22 Thread Sergey Bugaev
On Mon, Jan 22, 2024 at 9:23 PM Sergey Bugaev  wrote:
> call such a function. For example on GNU/Linux, remove(2) is a stub,

(That was supposed to say revoke(2), of course.)



Re: [PATCH v2 2/4] Avoid conflicting types for 'copy_file_range'

2024-01-22 Thread Sergey Bugaev
Hello,

On Mon, Jan 22, 2024 at 8:05 PM Peter Maydell  wrote:
>
> On Thu, 18 Jan 2024 at 16:03, Manolo de Medici  
> wrote:
> >
> > Compilation fails on systems where copy_file_range is already defined as a
> > stub.
>
> What do you mean by "stub" here ? If the system headers define
> a prototype for the function, I would have expected the
> meson check to set HAVE_COPY_FILE_RANGE, and then this
> function doesn't get defined at all. That is, the prototype
> mismatch shouldn't matter because if the prototype exists we
> use the libc function, and if it doesn't then we use our version.

Let me answer :)

glibc has this stubs mechanism: a function can be declared in the
system headers, but only implemented as a stub that always fails with
ENOSYS (or some such). You get a linker warning at link time if you
call such a function. For example on GNU/Linux, remove(2) is a stub,
and if I try to use it, the code does compile, but I get

/usr/bin/ld: /tmp/ccLCnRnW.o: in function `main':
demo.c:(.text+0xa): warning: revoke is not implemented and will always fail

during linking. This is done by embedding a
'.gnu.warning.function-name' section inside libc.so (try readelf
--wide --section-headers /lib64/libc.so.6 | grep warning). You can
also find the list of stubs in the gnu/stubs.h header, which contains
definitions like __stub_revoke.

Meson's has_function() knows about this mechanism, and returns false
if the function is declared, but is actually just a stub (by looking
for "__stub_{func}" being defined); autoconf does, too. But as the
prototype is still declared (and the function technically exists, too,
even if it's a stub), you'll get errors if you define the same
function incompatibly yourself.

Sergey



Re: [PATCH] crypto/gcrypt: prefer kernel as direct source of entropy

2024-01-22 Thread Cristian Rodríguez
On Mon, Jan 22, 2024 at 5:19 PM Daniel P. Berrangé 
wrote:

>
> If the DRBG is required for FIPS compliance, and QEMU hardcoded
> the system RNG, then QEMU can't be used in a FIPS environment.
>

No, the library overrides this choice.. the DRBG has higher priority.


Re: [PATCH] crypto/gcrypt: prefer kernel as direct source of entropy

2024-01-22 Thread Daniel P . Berrangé
On Mon, Jan 22, 2024 at 05:08:16PM -0300, Cristian Rodríguez wrote:
> On Mon, Jan 22, 2024 at 11:48 AM Daniel P. Berrangé 
> wrote:
> 
> > On Fri, Jan 19, 2024 at 05:39:40PM -0300, Cristian Rodríguez wrote:
> > > gcrypt by default uses an userspace RNG, which cannot know
> > > when it is time to discard/invalidate its buffer
> > > (suspend, resume, vm forks, other corner cases)
> > > as a "when to discard" event is unavailable to userspace.
> >
> > So in this scenario QEMU is impacted when QEMU is running inside
> > another VM. ie the L0 QEMU "forks" the guest, and the L1 QEMU
> > needs to re-init its RNG.
> >
> > > Set GCRYCTL_SET_PREFERRED_RNG_TYPE to GCRY_RNG_TYPE_SYSTEM
> > > which must be done before the first call to gcry_check_version()
> >
> > QEMU is just one out of many applications that use libgcrypt and
> > I see no reason why QEMU should be special cased in this respect.
> >
> > Updating each application to hardcode use of GCRY_RNG_TYPE_SYSTEM
> > does not feel like a good solution. If this was a good default
> > to use everywhere, then gcrypt should have set this default
> > already, rather than requiring every app to solve the forking
> > problem itself.
> >
> 
> this default is because either other OSs had problems or in the past the
> linux rng was not as performant as it is right now,
>  now it outputs 100's of MB per second on a potato.
> 
> Updating every app that uses gcrypt is not really practical
> > in terms of time investment anyway.
> >
> 
> Yeah, it will be pretty time consuming so I have so far only sent a few
> patches for things I consider important.
> 
> >
> > If gcrypt doesn't want to make this its global default, then
> > I would suggest that gcrypt should make its default be
> > configurable. I see from its docs:
> >
> >
> > https://gnupg.org/documentation/manuals/gcrypt/Configuration.html#Configuration
> >
> > that it already supports a /etc/gcrypt/random.conf file.
> > Perhaps they would extend that to allow selection of the
> > default RNG backend, system-wide.
> 
> 
> And things will remain problematic by default , because of all the
> obscurity and that FIPS mode overrides
> all defaults you choose anyways, including if I hardcode the preference in
> the source code like I did here.

If the DRBG is required for FIPS compliance, and QEMU hardcoded
the system RNG, then QEMU can't be used in a FIPS environment.
So I still think this question of defaults is something to be
fixed in the gcrypt code centrally, not in individual apps.


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH] crypto/gcrypt: prefer kernel as direct source of entropy

2024-01-22 Thread Cristian Rodríguez
On Mon, Jan 22, 2024 at 11:48 AM Daniel P. Berrangé 
wrote:

> On Fri, Jan 19, 2024 at 05:39:40PM -0300, Cristian Rodríguez wrote:
> > gcrypt by default uses an userspace RNG, which cannot know
> > when it is time to discard/invalidate its buffer
> > (suspend, resume, vm forks, other corner cases)
> > as a "when to discard" event is unavailable to userspace.
>
> So in this scenario QEMU is impacted when QEMU is running inside
> another VM. ie the L0 QEMU "forks" the guest, and the L1 QEMU
> needs to re-init its RNG.
>
> > Set GCRYCTL_SET_PREFERRED_RNG_TYPE to GCRY_RNG_TYPE_SYSTEM
> > which must be done before the first call to gcry_check_version()
>
> QEMU is just one out of many applications that use libgcrypt and
> I see no reason why QEMU should be special cased in this respect.
>
> Updating each application to hardcode use of GCRY_RNG_TYPE_SYSTEM
> does not feel like a good solution. If this was a good default
> to use everywhere, then gcrypt should have set this default
> already, rather than requiring every app to solve the forking
> problem itself.
>

this default is because either other OSs had problems or in the past the
linux rng was not as performant as it is right now,
 now it outputs 100's of MB per second on a potato.

Updating every app that uses gcrypt is not really practical
> in terms of time investment anyway.
>

Yeah, it will be pretty time consuming so I have so far only sent a few
patches for things I consider important.

>
> If gcrypt doesn't want to make this its global default, then
> I would suggest that gcrypt should make its default be
> configurable. I see from its docs:
>
>
> https://gnupg.org/documentation/manuals/gcrypt/Configuration.html#Configuration
>
> that it already supports a /etc/gcrypt/random.conf file.
> Perhaps they would extend that to allow selection of the
> default RNG backend, system-wide.


And things will remain problematic by default , because of all the
obscurity and that FIPS mode overrides
all defaults you choose anyways, including if I hardcode the preference in
the source code like I did here.
.


>
>
>


Re: [PATCH v6 08/10] migration/yank: Use channel features

2024-01-22 Thread Fabiano Rosas
Peter Xu  writes:

> On Mon, Sep 11, 2023 at 02:13:18PM -0300, Fabiano Rosas wrote:
>> Stop using outside knowledge about the io channels when registering
>> yank functions. Query for features instead.
>> 
>> The yank method for all channels used with migration code currently is
>> to call the qio_channel_shutdown() function, so query for
>> QIO_CHANNEL_FEATURE_SHUTDOWN. We could add a separate feature in the
>> future for indicating whether a channel supports yanking, but that
>> seems overkill at the moment.
>> 
>> Signed-off-by: Fabiano Rosas 
>
> Reviewed-by: Peter Xu 

Hi Peter, this one has fell through the cracks, think we could merge it?



Re: [PATCH v2 4/4] Exclude TPM ioctls definitions for the GNU/Hurd

2024-01-22 Thread Stefan Berger




On 1/22/24 12:16, Peter Maydell wrote:

On Thu, 18 Jan 2024 at 16:04, Manolo de Medici  wrote:


The Hurd currently doesn't have any TPM driver, compilation fails
for missing symbols unless these are left undefined.

Signed-off-by: Manolo de Medici 
---
  backends/tpm/tpm_ioctl.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backends/tpm/tpm_ioctl.h b/backends/tpm/tpm_ioctl.h
index 1933ab6855..c721bf8847 100644
--- a/backends/tpm/tpm_ioctl.h
+++ b/backends/tpm/tpm_ioctl.h
@@ -274,7 +274,7 @@ typedef struct ptm_lockstorage ptm_lockstorage;
  #define PTM_CAP_SEND_COMMAND_HEADER (1 << 15)
  #define PTM_CAP_LOCK_STORAGE   (1 << 16)

-#ifndef _WIN32
+#if !defined(_WIN32) && !defined(__GNU__)
  enum {
  PTM_GET_CAPABILITY = _IOR('P', 0, ptm_cap),
  PTM_INIT   = _IOWR('P', 1, ptm_init),
--
2.43.0


This looks plausible as a change, but looking at the history
of the file in git it seems like this is a file we import
from a third-party swtpm project.

Stefan: should we get this change made in the swtpm project
too? Or have we diverged from that copy of the header?


The diffs are minimal at the moment:
$ diff swtpm/include/swtpm/tpm_ioctl.h qemu/backends/tpm/tpm_ioctl.h
15,16d14
< #include 
< #include 

Since we already handle _WIN32 we can just take this case for __GNU__.

  Stefan


If the latter, then the simple thing woud be to delete
this enum entirely, because as far as I can see we don't
use any of the values in QEMU, so we can avoid the
portability problem that way.

thanks
-- PMM





Re: [PATCH 0/5] util/uri: Simplify the code, remove unused functions

2024-01-22 Thread Daniel P . Berrangé
On Mon, Jan 22, 2024 at 08:17:48PM +0100, Thomas Huth wrote:
> The URI function uri_string_unescape() has some overlap with functions
> from the glib, so we can simplify our code here quite a bit.
> While at it, I also noticed that there are many unused functions in
> here which we likely can drop nowadays (it's better to use the functions
> from glib anyway).
> 
> Thomas Huth (5):
>   util/uri: Remove the unused "target" argument from
> uri_string_unescape()
>   util/uri: Simplify uri_string_unescape()
>   util/uri: Remove the uri_string_escape() function
>   util/uri: Remove unused functions uri_resolve() and
> uri_resolve_relative()
>   util/uri: Remove unused macros ISA_RESERVED() and ISA_GEN_DELIM()
> 
>  include/qemu/uri.h |   5 +-
>  util/uri.c | 843 +
>  2 files changed, 16 insertions(+), 832 deletions(-)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




[PATCH 4/5] util/uri: Remove unused functions uri_resolve() and uri_resolve_relative()

2024-01-22 Thread Thomas Huth
These rather complex functions have never been used since they've been
introduced in 2012, so looks like they are not really useful for QEMU.
And since the static normalize_uri_path() function is also only used by
uri_resolve(), we can remove that function now, too.

Signed-off-by: Thomas Huth 
---
 include/qemu/uri.h |   2 -
 util/uri.c | 689 -
 2 files changed, 691 deletions(-)

diff --git a/include/qemu/uri.h b/include/qemu/uri.h
index c1734d28c3..93538b7578 100644
--- a/include/qemu/uri.h
+++ b/include/qemu/uri.h
@@ -72,8 +72,6 @@ typedef struct URI {
 } URI;
 
 URI *uri_new(void);
-char *uri_resolve(const char *URI, const char *base);
-char *uri_resolve_relative(const char *URI, const char *base);
 URI *uri_parse(const char *str);
 URI *uri_parse_raw(const char *str, int raw);
 int uri_parse_into(URI *uri, const char *str);
diff --git a/util/uri.c b/util/uri.c
index 912e406523..5f5ca79792 100644
--- a/util/uri.c
+++ b/util/uri.c
@@ -1355,212 +1355,6 @@ void uri_free(URI *uri)
  *  *
  /
 
-/**
- * normalize_uri_path:
- * @path:  pointer to the path string
- *
- * Applies the 5 normalization steps to a path string--that is, RFC 2396
- * Section 5.2, steps 6.c through 6.g.
- *
- * Normalization occurs directly on the string, no new allocation is done
- *
- * Returns 0 or an error code
- */
-static int normalize_uri_path(char *path)
-{
-char *cur, *out;
-
-if (path == NULL) {
-return -1;
-}
-
-/* Skip all initial "/" chars.  We want to get to the beginning of the
- * first non-empty segment.
- */
-cur = path;
-while (cur[0] == '/') {
-++cur;
-}
-if (cur[0] == '\0') {
-return 0;
-}
-
-/* Keep everything we've seen so far.  */
-out = cur;
-
-/*
- * Analyze each segment in sequence for cases (c) and (d).
- */
-while (cur[0] != '\0') {
-/*
- * c) All occurrences of "./", where "." is a complete path segment,
- *are removed from the buffer string.
- */
-if ((cur[0] == '.') && (cur[1] == '/')) {
-cur += 2;
-/* '//' normalization should be done at this point too */
-while (cur[0] == '/') {
-cur++;
-}
-continue;
-}
-
-/*
- * d) If the buffer string ends with "." as a complete path segment,
- *that "." is removed.
- */
-if ((cur[0] == '.') && (cur[1] == '\0')) {
-break;
-}
-
-/* Otherwise keep the segment.  */
-while (cur[0] != '/') {
-if (cur[0] == '\0') {
-goto done_cd;
-}
-(out++)[0] = (cur++)[0];
-}
-/* nomalize // */
-while ((cur[0] == '/') && (cur[1] == '/')) {
-cur++;
-}
-
-(out++)[0] = (cur++)[0];
-}
-done_cd:
-out[0] = '\0';
-
-/* Reset to the beginning of the first segment for the next sequence.  */
-cur = path;
-while (cur[0] == '/') {
-++cur;
-}
-if (cur[0] == '\0') {
-return 0;
-}
-
-/*
- * Analyze each segment in sequence for cases (e) and (f).
- *
- * e) All occurrences of "/../", where  is a
- *complete path segment not equal to "..", are removed from the
- *buffer string.  Removal of these path segments is performed
- *iteratively, removing the leftmost matching pattern on each
- *iteration, until no matching pattern remains.
- *
- * f) If the buffer string ends with "/..", where 
- *is a complete path segment not equal to "..", that
- *"/.." is removed.
- *
- * To satisfy the "iterative" clause in (e), we need to collapse the
- * string every time we find something that needs to be removed.  Thus,
- * we don't need to keep two pointers into the string: we only need a
- * "current position" pointer.
- */
-while (1) {
-char *segp, *tmp;
-
-/* At the beginning of each iteration of this loop, "cur" points to
- * the first character of the segment we want to examine.
- */
-
-/* Find the end of the current segment.  */
-segp = cur;
-while ((segp[0] != '/') && (segp[0] != '\0')) {
-++segp;
-}
-
-/* If this is the last segment, we're done (we need at least two
- * segments to meet the criteria for the (e) and (f) cases).
- */
-if (segp[0] == '\0') {
-break;
-}
-
-/* If the first segment is "..", or if the next segment _isn't_ "..",
- * keep this segment and try the next one.
- */
-++segp;
-if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur + 3)) ||
-((segp[0] != '.') || (segp[1] != '.') ||
- 

[PATCH 5/5] util/uri: Remove unused macros ISA_RESERVED() and ISA_GEN_DELIM()

2024-01-22 Thread Thomas Huth
They are not used anywhere, so there's no need to keep them around.

Signed-off-by: Thomas Huth 
---
 util/uri.c | 13 -
 1 file changed, 13 deletions(-)

diff --git a/util/uri.c b/util/uri.c
index 5f5ca79792..2deab91da3 100644
--- a/util/uri.c
+++ b/util/uri.c
@@ -163,19 +163,6 @@ static void uri_clean(URI *uri);
  ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||  
\
  ((*(p) == '=')) || ((*(p) == '\'')))
 
-/*
- *gen-delims= ":" / "/" / "?" / "#" / "[" / "]" / "@"
- */
-#define ISA_GEN_DELIM(p)   
\
-(((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||  
\
- ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||  
\
- ((*(p) == '@')))
-
-/*
- *reserved  = gen-delims / sub-delims
- */
-#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
-
 /*
  *unreserved= ALPHA / DIGIT / "-" / "." / "_" / "~"
  */
-- 
2.43.0




[PATCH 3/5] util/uri: Remove the uri_string_escape() function

2024-01-22 Thread Thomas Huth
It is not used in QEMU - and if somebody needs this functionality,
they can simply use g_uri_escape_string() from the glib instead.

Signed-off-by: Thomas Huth 
---
 include/qemu/uri.h |  1 -
 util/uri.c | 64 --
 2 files changed, 65 deletions(-)

diff --git a/include/qemu/uri.h b/include/qemu/uri.h
index aa54b6f251..c1734d28c3 100644
--- a/include/qemu/uri.h
+++ b/include/qemu/uri.h
@@ -78,7 +78,6 @@ URI *uri_parse(const char *str);
 URI *uri_parse_raw(const char *str, int raw);
 int uri_parse_into(URI *uri, const char *str);
 char *uri_to_string(URI *uri);
-char *uri_string_escape(const char *str, const char *list);
 char *uri_string_unescape(const char *str, int len);
 void uri_free(URI *uri);
 
diff --git a/util/uri.c b/util/uri.c
index 2a75f535ba..912e406523 100644
--- a/util/uri.c
+++ b/util/uri.c
@@ -1589,70 +1589,6 @@ char *uri_string_unescape(const char *str, int len)
 return g_uri_unescape_string(lstr, NULL);
 }
 
-/**
- * uri_string_escape:
- * @str:  string to escape
- * @list: exception list string of chars not to escape
- *
- * This routine escapes a string to hex, ignoring reserved characters (a-z)
- * and the characters in the exception list.
- *
- * Returns a new escaped string or NULL in case of error.
- */
-char *uri_string_escape(const char *str, const char *list)
-{
-char *ret, ch;
-char *temp;
-const char *in;
-int len, out;
-
-if (str == NULL) {
-return NULL;
-}
-if (str[0] == 0) {
-return g_strdup(str);
-}
-len = strlen(str);
-if (!(len > 0)) {
-return NULL;
-}
-
-len += 20;
-ret = g_malloc(len);
-in = str;
-out = 0;
-while (*in != 0) {
-if (len - out <= 3) {
-temp = realloc2n(ret, );
-ret = temp;
-}
-
-ch = *in;
-
-if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) {
-unsigned char val;
-ret[out++] = '%';
-val = ch >> 4;
-if (val <= 9) {
-ret[out++] = '0' + val;
-} else {
-ret[out++] = 'A' + val - 0xA;
-}
-val = ch & 0xF;
-if (val <= 9) {
-ret[out++] = '0' + val;
-} else {
-ret[out++] = 'A' + val - 0xA;
-}
-in++;
-} else {
-ret[out++] = *in++;
-}
-}
-ret[out] = 0;
-return ret;
-}
-
 /
  *  *
  *   Public functions   *
-- 
2.43.0




[PATCH 0/5] util/uri: Simplify the code, remove unused functions

2024-01-22 Thread Thomas Huth
The URI function uri_string_unescape() has some overlap with functions
from the glib, so we can simplify our code here quite a bit.
While at it, I also noticed that there are many unused functions in
here which we likely can drop nowadays (it's better to use the functions
from glib anyway).

Thomas Huth (5):
  util/uri: Remove the unused "target" argument from
uri_string_unescape()
  util/uri: Simplify uri_string_unescape()
  util/uri: Remove the uri_string_escape() function
  util/uri: Remove unused functions uri_resolve() and
uri_resolve_relative()
  util/uri: Remove unused macros ISA_RESERVED() and ISA_GEN_DELIM()

 include/qemu/uri.h |   5 +-
 util/uri.c | 843 +
 2 files changed, 16 insertions(+), 832 deletions(-)

-- 
2.43.0




[PATCH 1/5] util/uri: Remove the unused "target" argument from uri_string_unescape()

2024-01-22 Thread Thomas Huth
All callers pass NULL as target, so we can simplify the code by
dropping this parameter.

Signed-off-by: Thomas Huth 
---
 include/qemu/uri.h |  2 +-
 util/uri.c | 32 ++--
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/include/qemu/uri.h b/include/qemu/uri.h
index 1855b764f2..aa54b6f251 100644
--- a/include/qemu/uri.h
+++ b/include/qemu/uri.h
@@ -79,7 +79,7 @@ URI *uri_parse_raw(const char *str, int raw);
 int uri_parse_into(URI *uri, const char *str);
 char *uri_to_string(URI *uri);
 char *uri_string_escape(const char *str, const char *list);
-char *uri_string_unescape(const char *str, int len, char *target);
+char *uri_string_unescape(const char *str, int len);
 void uri_free(URI *uri);
 
 /* Single web service query parameter 'name=value'. */
diff --git a/util/uri.c b/util/uri.c
index dcb3305236..33b6c7214e 100644
--- a/util/uri.c
+++ b/util/uri.c
@@ -267,7 +267,7 @@ static int rfc3986_parse_fragment(URI *uri, const char 
**str)
 if (uri->cleanup & 2) {
 uri->fragment = g_strndup(*str, cur - *str);
 } else {
-uri->fragment = uri_string_unescape(*str, cur - *str, NULL);
+uri->fragment = uri_string_unescape(*str, cur - *str);
 }
 }
 *str = cur;
@@ -368,7 +368,7 @@ static int rfc3986_parse_user_info(URI *uri, const char 
**str)
 if (uri->cleanup & 2) {
 uri->user = g_strndup(*str, cur - *str);
 } else {
-uri->user = uri_string_unescape(*str, cur - *str, NULL);
+uri->user = uri_string_unescape(*str, cur - *str);
 }
 }
 *str = cur;
@@ -496,7 +496,7 @@ found:
 if (uri->cleanup & 2) {
 uri->server = g_strndup(host, cur - host);
 } else {
-uri->server = uri_string_unescape(host, cur - host, NULL);
+uri->server = uri_string_unescape(host, cur - host);
 }
 } else {
 uri->server = NULL;
@@ -614,7 +614,7 @@ static int rfc3986_parse_path_ab_empty(URI *uri, const char 
**str)
 if (uri->cleanup & 2) {
 uri->path = g_strndup(*str, cur - *str);
 } else {
-uri->path = uri_string_unescape(*str, cur - *str, NULL);
+uri->path = uri_string_unescape(*str, cur - *str);
 }
 } else {
 uri->path = NULL;
@@ -663,7 +663,7 @@ static int rfc3986_parse_path_absolute(URI *uri, const char 
**str)
 if (uri->cleanup & 2) {
 uri->path = g_strndup(*str, cur - *str);
 } else {
-uri->path = uri_string_unescape(*str, cur - *str, NULL);
+uri->path = uri_string_unescape(*str, cur - *str);
 }
 } else {
 uri->path = NULL;
@@ -709,7 +709,7 @@ static int rfc3986_parse_path_rootless(URI *uri, const char 
**str)
 if (uri->cleanup & 2) {
 uri->path = g_strndup(*str, cur - *str);
 } else {
-uri->path = uri_string_unescape(*str, cur - *str, NULL);
+uri->path = uri_string_unescape(*str, cur - *str);
 }
 } else {
 uri->path = NULL;
@@ -755,7 +755,7 @@ static int rfc3986_parse_path_no_scheme(URI *uri, const 
char **str)
 if (uri->cleanup & 2) {
 uri->path = g_strndup(*str, cur - *str);
 } else {
-uri->path = uri_string_unescape(*str, cur - *str, NULL);
+uri->path = uri_string_unescape(*str, cur - *str);
 }
 } else {
 uri->path = NULL;
@@ -1574,7 +1574,6 @@ static int is_hex(char c)
  * uri_string_unescape:
  * @str:  the string to unescape
  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
- * @target:  optional destination buffer
  *
  * Unescaping routine, but does not check that the string is an URI. The
  * output is a direct unsigned char translation of %XX values (no encoding)
@@ -1584,7 +1583,7 @@ static int is_hex(char c)
  * Returns a copy of the string, but unescaped, will return NULL only in case
  * of error
  */
-char *uri_string_unescape(const char *str, int len, char *target)
+char *uri_string_unescape(const char *str, int len)
 {
 char *ret, *out;
 const char *in;
@@ -1599,11 +1598,8 @@ char *uri_string_unescape(const char *str, int len, char 
*target)
 return NULL;
 }
 
-if (target == NULL) {
-ret = g_malloc(len + 1);
-} else {
-ret = target;
-}
+ret = g_malloc(len + 1);
+
 in = str;
 out = ret;
 while (len > 0) {
@@ -2274,14 +2270,14 @@ struct QueryParams *query_params_parse(const char 
*query)
  * and consistent with CGI.pm we assume value is "".
  */
 else if (!eq) {
-name = uri_string_unescape(query, end - query, NULL);
+name = uri_string_unescape(query, end - query);
   

[PATCH 2/5] util/uri: Simplify uri_string_unescape()

2024-01-22 Thread Thomas Huth
uri_string_unescape() basically does the same as the glib function
g_uri_unescape_string(), with just an additional length parameter.
So we can simplify this function a lot by limiting the length with
g_strndup() first and then by calling g_uri_unescape_string() instead
of walking through the string manually.

Suggested-by: Stefan Weil 
Signed-off-by: Thomas Huth 
---
 util/uri.c | 49 +++--
 1 file changed, 3 insertions(+), 46 deletions(-)

diff --git a/util/uri.c b/util/uri.c
index 33b6c7214e..2a75f535ba 100644
--- a/util/uri.c
+++ b/util/uri.c
@@ -1561,15 +1561,6 @@ done_cd:
 return 0;
 }
 
-static int is_hex(char c)
-{
-if (((c >= '0') && (c <= '9')) || ((c >= 'a') && (c <= 'f')) ||
-((c >= 'A') && (c <= 'F'))) {
-return 1;
-}
-return 0;
-}
-
 /**
  * uri_string_unescape:
  * @str:  the string to unescape
@@ -1585,8 +1576,7 @@ static int is_hex(char c)
  */
 char *uri_string_unescape(const char *str, int len)
 {
-char *ret, *out;
-const char *in;
+g_autofree char *lstr = NULL;
 
 if (str == NULL) {
 return NULL;
@@ -1594,42 +1584,9 @@ char *uri_string_unescape(const char *str, int len)
 if (len <= 0) {
 len = strlen(str);
 }
-if (len < 0) {
-return NULL;
-}
-
-ret = g_malloc(len + 1);
+lstr = g_strndup(str, len);
 
-in = str;
-out = ret;
-while (len > 0) {
-if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
-in++;
-if ((*in >= '0') && (*in <= '9')) {
-*out = (*in - '0');
-} else if ((*in >= 'a') && (*in <= 'f')) {
-*out = (*in - 'a') + 10;
-} else if ((*in >= 'A') && (*in <= 'F')) {
-*out = (*in - 'A') + 10;
-}
-in++;
-if ((*in >= '0') && (*in <= '9')) {
-*out = *out * 16 + (*in - '0');
-} else if ((*in >= 'a') && (*in <= 'f')) {
-*out = *out * 16 + (*in - 'a') + 10;
-} else if ((*in >= 'A') && (*in <= 'F')) {
-*out = *out * 16 + (*in - 'A') + 10;
-}
-in++;
-len -= 3;
-out++;
-} else {
-*out++ = *in++;
-len--;
-}
-}
-*out = 0;
-return ret;
+return g_uri_unescape_string(lstr, NULL);
 }
 
 /**
-- 
2.43.0




Re: [PATCH 2/2] target/xtensa: tidy TLB way variability logic

2024-01-22 Thread Max Filippov
On Mon, Jan 22, 2024 at 10:42 AM Peter Maydell  wrote:
>
> On Fri, 19 Jan 2024 at 20:47, Max Filippov  wrote:
> >
> > Whether TLB ways 5 and 6 are variable is not a property of the TLB
> > instance or a TLB entry instance, it's a property of the xtensa core
> > configuration.
> > Remove 'varway56' field from the xtensa_tlb structure and remove
> > 'variable' field from the xtensa_tlb_entry structure. Add
> > 'tlb_variable_way' array to the XtensaConfig and use it instead of
> > removed fields.
> >
> > Signed-off-by: Max Filippov 
> > ---
> >  target/xtensa/cpu.h  |  3 +--
> >  target/xtensa/mmu_helper.c   | 38 ++--
> >  target/xtensa/overlay_tool.h | 15 --
> >  3 files changed, 24 insertions(+), 32 deletions(-)
> >
> > diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h
> > index 497325466397..24d3f15ea1bf 100644
> > --- a/target/xtensa/cpu.h
> > +++ b/target/xtensa/cpu.h
> > @@ -316,13 +316,11 @@ typedef struct xtensa_tlb_entry {
> >  uint32_t paddr;
> >  uint8_t asid;
> >  uint8_t attr;
> > -bool variable;
> >  } xtensa_tlb_entry;
> >
> >  typedef struct xtensa_tlb {
> >  unsigned nways;
> >  const unsigned way_size[10];
> > -bool varway56;
> >  unsigned nrefillentries;
> >  } xtensa_tlb;
> >
> > @@ -493,6 +491,7 @@ typedef struct XtensaConfig {
> >
> >  xtensa_tlb itlb;
> >  xtensa_tlb dtlb;
> > +bool tlb_variable_way[16];
> >
> >  uint32_t mpu_align;
> >  unsigned n_mpu_fg_segments;
> > diff --git a/target/xtensa/mmu_helper.c b/target/xtensa/mmu_helper.c
> > index d9f845e7fb6f..414c2f5ef669 100644
> > --- a/target/xtensa/mmu_helper.c
> > +++ b/target/xtensa/mmu_helper.c
> > @@ -105,23 +105,19 @@ static uint32_t xtensa_tlb_get_addr_mask(const 
> > CPUXtensaState *env,
> >   bool dtlb, uint32_t way)
> >  {
> >  if (xtensa_option_enabled(env->config, XTENSA_OPTION_MMU)) {
> > -bool varway56 = dtlb ?
> > -env->config->dtlb.varway56 :
> > -env->config->itlb.varway56;
> > -
> >  switch (way) {
> >  case 4:
> >  return 0xfff0 << get_page_size(env, dtlb, way) * 2;
> >
> >  case 5:
> > -if (varway56) {
> > +if (env->config->tlb_variable_way[5]) {
> >  return 0xf800 << get_page_size(env, dtlb, way);
> >  } else {
> >  return 0xf800;
> >  }
> >
> >  case 6:
> > -if (varway56) {
> > +if (env->config->tlb_variable_way[6]) {
> >  return 0xf000 << (1 - get_page_size(env, dtlb, way));
> >  } else {
> >  return 0xf000;
>
> So we now have a tlb_variable_way bool for all 16 possible
> ways, but the code actually only checks it for ways 5 and 6.

xtensa_tlb_set_entry checks this for all possible ways.

I would say that this is an unfortunate definition of MMU in the
xtensa ISA book that uses the variability of the ways 5/6 as a
discriminator between MMUv2 and MMUv3.

> Should we have an assertion somewhere that the config
> doesn't try to set it on ways where it has no effect ?
> Or is there actually a generic behaviour that would make
> sense for eg "way 3 is variable-way" that we just don't
> currently implement?

We currently use the TLB structure to implement the following
xtensa memory management options: cacheattr, region protection,
region translation, MMUv2 and MMUv3. First three only have
one variable way, in MMUv2 all ways except 5 and 6 are variable
and in MMUv3 all ways are variable. QEMU supports all of it
and tlb_variable_way is set properly in all of these cases.

> > @@ -150,11 +146,8 @@ static uint32_t get_vpn_mask(const CPUXtensaState 
> > *env, bool dtlb, uint32_t way)
> >  return xtensa_tlb_get_addr_mask(env, dtlb, way) << 2;
> >  } else if (way <= 6) {
> >  uint32_t mask = xtensa_tlb_get_addr_mask(env, dtlb, way);
> > -bool varway56 = dtlb ?
> > -env->config->dtlb.varway56 :
> > -env->config->itlb.varway56;
> >
> > -if (varway56) {
> > +if (env->config->tlb_variable_way[5]) {
> >  return mask << (way == 5 ? 2 : 3);
> >  } else {
> >  return mask << 1;
>
> This doesn't look right -- this branch of the if-else deals
> with way == 5 and way == 6, but we're only looking at
> tlb_variable_way[5].

Yeah, that's MMUv2 vs MMUv3 check, again.

> > @@ -172,10 +165,6 @@ static void split_tlb_entry_spec_way(const 
> > CPUXtensaState *env, uint32_t v,
> >   bool dtlb, uint32_t *vpn,
> >   uint32_t wi, uint32_t *ei)
> >  {
> > -bool varway56 = dtlb ?
> > -env->config->dtlb.varway56 :
> > -env->config->itlb.varway56;
> > -
> >  if (!dtlb) {
> >  wi &= 7;
> >  }
> > @@ -195,7 +184,7 @@ static void 

[RFC v3 7/7] hw/nvme: make ZDED persistent

2024-01-22 Thread Sam Li
Zone descriptor extension data (ZDED) is not persistent across QEMU
restarts. The zone descriptor extension valid bit (ZDEV) is part of
zone attributes, which sets to one when the ZDED is associated with
the zone.

With the qcow2 img as the backing file, the NVMe ZNS device stores
the zone attributes at the following eight bit of zone type bit of write
pointers for each zone. The ZDED is stored as part of zoned metadata as
write pointers.

Signed-off-by: Sam Li 
---
 block/qcow2.c| 45 
 hw/nvme/ctrl.c   |  1 +
 include/block/block-common.h |  1 +
 3 files changed, 47 insertions(+)

diff --git a/block/qcow2.c b/block/qcow2.c
index 43ee0f47b9..f2d58d86c4 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -25,6 +25,7 @@
 #include "qemu/osdep.h"
 
 #include "block/qdict.h"
+#include "block/nvme.h"
 #include "sysemu/block-backend.h"
 #include "qemu/main-loop.h"
 #include "qemu/module.h"
@@ -197,6 +198,17 @@ qcow2_extract_crypto_opts(QemuOpts *opts, const char *fmt, 
Error **errp)
 
 #define QCOW2_ZT_IS_CONV(wp)(wp & 1ULL << 59)
 
+static inline void qcow2_set_za(uint64_t *wp, uint8_t za)
+{
+/*
+ * The zone attribute takes up one byte. Store it after the zoned
+ * bit.
+ */
+uint64_t addr = *wp;
+addr |= ((uint64_t)za << 51);
+*wp = addr;
+}
+
 /*
  * To emulate a real zoned device, closed, empty and full states are
  * preserved after a power cycle. The open states are in-memory and will
@@ -5053,6 +5065,36 @@ unlock:
 return ret;
 }
 
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_zns_set_zded(BlockDriverState *bs, uint32_t index)
+{
+BDRVQcow2State *s = bs->opaque;
+int ret;
+
+qemu_co_mutex_lock(>wps->colock);
+uint64_t *wp = >wps->wp[index];
+BlockZoneState zs = qcow2_get_zone_state(bs, index);
+if (zs == BLK_ZS_EMPTY) {
+if (!qcow2_can_activate_zone(bs)) {
+goto unlock;
+}
+
+qcow2_set_za(wp, NVME_ZA_ZD_EXT_VALID);
+ret = qcow2_write_wp_at(bs, wp, index);
+if (ret < 0) {
+error_report("Failed to set zone extension at 0x%" PRIx64 "", *wp);
+goto unlock;
+}
+s->nr_zones_closed++;
+qemu_co_mutex_unlock(>wps->colock);
+return ret;
+}
+
+unlock:
+qemu_co_mutex_unlock(>wps->colock);
+return NVME_ZONE_INVAL_TRANSITION;
+}
+
 static int coroutine_fn GRAPH_RDLOCK
 qcow2_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
int64_t offset, int64_t len)
@@ -5110,6 +5152,9 @@ qcow2_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
 case BLK_ZO_OFFLINE:
 /* There are no transitions from the offline state to any other state 
*/
 break;
+case BLK_ZO_SET_ZDED:
+ret = qcow2_zns_set_zded(bs, index);
+break;
 default:
 error_report("Unsupported zone op: 0x%x", op);
 ret = -ENOTSUP;
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index de41d8bac8..2799a3ac31 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -3465,6 +3465,7 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, 
NvmeRequest *req)
 break;
 
 case NVME_ZONE_ACTION_SET_ZD_EXT:
+op = BLK_ZO_SET_ZDED;
 int zd_ext_size = blk_get_zd_ext_size(blk);
 trace_pci_nvme_set_descriptor_extension(slba, zone_idx);
 if (all || !zd_ext_size) {
diff --git a/include/block/block-common.h b/include/block/block-common.h
index 7690b05149..7c501e053e 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -88,6 +88,7 @@ typedef enum BlockZoneOp {
 BLK_ZO_FINISH,
 BLK_ZO_RESET,
 BLK_ZO_OFFLINE,
+BLK_ZO_SET_ZDED,
 } BlockZoneOp;
 
 typedef enum BlockZoneModel {
-- 
2.40.1




[RFC v3 5/7] hw/nvme: make the metadata of ZNS emulation persistent

2024-01-22 Thread Sam Li
The NVMe ZNS devices follow NVMe ZNS spec but the state of namespace
zones does not persist accross restarts of QEMU. This patch makes the
metadata of ZNS emulation persistent by using new block layer APIs. The
ZNS device calls zone report and zone mgmt APIs from the block layer
which will handle zone state transition and manage zone resources.

Signed-off-by: Sam Li 
---
 block/qcow2.c|3 +
 hw/nvme/ctrl.c   | 1115 +++---
 hw/nvme/ns.c |   77 +--
 hw/nvme/nvme.h   |   85 +--
 include/block/block-common.h |8 +
 include/block/block_int-common.h |2 +
 6 files changed, 264 insertions(+), 1026 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 5098edf656..0bb249fa6e 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -5107,6 +5107,9 @@ qcow2_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
 case BLK_ZO_RESET:
 ret = qcow2_reset_zone(bs, index, len);
 break;
+case BLK_ZO_OFFLINE:
+/* There are no transitions from the offline state to any other state 
*/
+break;
 default:
 error_report("Unsupported zone op: 0x%x", op);
 ret = -ENOTSUP;
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index dae6f00e4f..e31aa52c06 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -372,67 +372,6 @@ static inline bool nvme_parse_pid(NvmeNamespace *ns, 
uint16_t pid,
 return nvme_ph_valid(ns, *ph) && nvme_rg_valid(ns->endgrp, *rg);
 }
 
-static void nvme_assign_zone_state(NvmeNamespace *ns, NvmeZone *zone,
-   NvmeZoneState state)
-{
-if (QTAILQ_IN_USE(zone, entry)) {
-switch (nvme_get_zone_state(zone)) {
-case NVME_ZONE_STATE_EXPLICITLY_OPEN:
-QTAILQ_REMOVE(>exp_open_zones, zone, entry);
-break;
-case NVME_ZONE_STATE_IMPLICITLY_OPEN:
-QTAILQ_REMOVE(>imp_open_zones, zone, entry);
-break;
-case NVME_ZONE_STATE_CLOSED:
-QTAILQ_REMOVE(>closed_zones, zone, entry);
-break;
-case NVME_ZONE_STATE_FULL:
-QTAILQ_REMOVE(>full_zones, zone, entry);
-default:
-;
-}
-}
-
-nvme_set_zone_state(zone, state);
-
-switch (state) {
-case NVME_ZONE_STATE_EXPLICITLY_OPEN:
-QTAILQ_INSERT_TAIL(>exp_open_zones, zone, entry);
-break;
-case NVME_ZONE_STATE_IMPLICITLY_OPEN:
-QTAILQ_INSERT_TAIL(>imp_open_zones, zone, entry);
-break;
-case NVME_ZONE_STATE_CLOSED:
-QTAILQ_INSERT_TAIL(>closed_zones, zone, entry);
-break;
-case NVME_ZONE_STATE_FULL:
-QTAILQ_INSERT_TAIL(>full_zones, zone, entry);
-case NVME_ZONE_STATE_READ_ONLY:
-break;
-default:
-zone->d.za = 0;
-}
-}
-
-static uint16_t nvme_zns_check_resources(NvmeNamespace *ns, uint32_t act,
- uint32_t opn, uint32_t zrwa)
-{
-if (zrwa > ns->zns.numzrwa) {
-return NVME_NOZRWA | NVME_DNR;
-}
-
-return NVME_SUCCESS;
-}
-
-/*
- * Check if we can open a zone without exceeding open/active limits.
- * AOR stands for "Active and Open Resources" (see TP 4053 section 2.5).
- */
-static uint16_t nvme_aor_check(NvmeNamespace *ns, uint32_t act, uint32_t opn)
-{
-return nvme_zns_check_resources(ns, act, opn, 0);
-}
-
 static NvmeFdpEvent *nvme_fdp_alloc_event(NvmeCtrl *n, NvmeFdpEventBuffer 
*ebuf)
 {
 NvmeFdpEvent *ret = NULL;
@@ -1769,355 +1708,11 @@ static inline uint32_t nvme_zone_idx(NvmeNamespace 
*ns, uint64_t slba)
 slba / ns->zone_size;
 }
 
-static inline NvmeZone *nvme_get_zone_by_slba(NvmeNamespace *ns, uint64_t slba)
-{
-uint32_t zone_idx = nvme_zone_idx(ns, slba);
-
-if (zone_idx >= ns->num_zones) {
-return NULL;
-}
-
-return >zone_array[zone_idx];
-}
-
-static uint16_t nvme_check_zone_state_for_write(NvmeZone *zone)
-{
-uint64_t zslba = zone->d.zslba;
-
-switch (nvme_get_zone_state(zone)) {
-case NVME_ZONE_STATE_EMPTY:
-case NVME_ZONE_STATE_IMPLICITLY_OPEN:
-case NVME_ZONE_STATE_EXPLICITLY_OPEN:
-case NVME_ZONE_STATE_CLOSED:
-return NVME_SUCCESS;
-case NVME_ZONE_STATE_FULL:
-trace_pci_nvme_err_zone_is_full(zslba);
-return NVME_ZONE_FULL;
-case NVME_ZONE_STATE_OFFLINE:
-trace_pci_nvme_err_zone_is_offline(zslba);
-return NVME_ZONE_OFFLINE;
-case NVME_ZONE_STATE_READ_ONLY:
-trace_pci_nvme_err_zone_is_read_only(zslba);
-return NVME_ZONE_READ_ONLY;
-default:
-assert(false);
-}
-
-return NVME_INTERNAL_DEV_ERROR;
-}
-
-static uint16_t nvme_check_zone_write(NvmeNamespace *ns, NvmeZone *zone,
-  uint64_t slba, uint32_t nlb)
-{
-uint64_t zcap = nvme_zone_wr_boundary(zone);
-uint16_t status;
-
-status = nvme_check_zone_state_for_write(zone);
-if (status) {
- 

[RFC v3 2/7] qcow2: add zd_extension configurations to zoned metadata

2024-01-22 Thread Sam Li
Zone descriptor extension data is host definied data that is
associated with each zone. Add zone descriptor extensions
to zonedmeta struct.

Signed-off-by: Sam Li 
---
 block/qcow2.c| 70 +---
 block/qcow2.h|  2 +
 include/block/block_int-common.h |  6 +++
 qapi/block-core.json |  4 ++
 4 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index db28585b82..5098edf656 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -448,9 +448,9 @@ qcow2_refresh_zonedmeta(BlockDriverState *bs)
 {
 int ret;
 BDRVQcow2State *s = bs->opaque;
-uint64_t wps_size = s->zoned_header.zonedmeta_size;
+uint64_t wps_size = s->zoned_header.zonedmeta_size -
+s->zded_size;
 g_autofree uint64_t *temp;
-
 temp = g_new(uint64_t, wps_size);
 ret = bdrv_pread(bs->file, s->zoned_header.zonedmeta_offset,
  wps_size, temp, 0);
@@ -459,7 +459,17 @@ qcow2_refresh_zonedmeta(BlockDriverState *bs)
 return ret;
 }
 
+g_autofree uint8_t *zded = NULL;
+zded = g_try_malloc0(s->zded_size);
+ret = bdrv_pread(bs->file, s->zoned_header.zonedmeta_offset + wps_size,
+ s->zded_size, zded, 0);
+if (ret < 0) {
+error_report("Can not read zded");
+return ret;
+}
+
 memcpy(bs->wps->wp, temp, wps_size);
+memcpy(bs->zd_extensions, zded, s->zded_size);
 return 0;
 }
 
@@ -520,6 +530,19 @@ qcow2_check_zone_options(Qcow2ZonedHeaderExtension 
*zone_opt)
 zone_opt->max_open_zones = sequential_zones;
 }
 
+if (zone_opt->zd_extension_size) {
+if (zone_opt->zd_extension_size & 0x3f) {
+   error_report("zone descriptor extension size must be a "
+"multiple of 64B");
+   return false;
+}
+
+if ((zone_opt->zd_extension_size >> 6) > 0xff) {
+error_report("Zone descriptor extension size is too large");
+return false;
+}
+}
+
 return true;
 }
 return false;
@@ -784,6 +807,8 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t 
start_offset,
 zoned_ext.conventional_zones =
 be32_to_cpu(zoned_ext.conventional_zones);
 zoned_ext.nr_zones = be32_to_cpu(zoned_ext.nr_zones);
+zoned_ext.zd_extension_size =
+be32_to_cpu(zoned_ext.zd_extension_size);
 zoned_ext.max_open_zones = be32_to_cpu(zoned_ext.max_open_zones);
 zoned_ext.max_active_zones =
 be32_to_cpu(zoned_ext.max_active_zones);
@@ -794,7 +819,8 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t 
start_offset,
 zoned_ext.zonedmeta_size = be64_to_cpu(zoned_ext.zonedmeta_size);
 s->zoned_header = zoned_ext;
 bs->wps = g_malloc(sizeof(BlockZoneWps)
-+ s->zoned_header.zonedmeta_size);
++ zoned_ext.zonedmeta_size - s->zded_size);
+bs->zd_extensions = g_malloc0(s->zded_size);
 ret = qcow2_refresh_zonedmeta(bs);
 if (ret < 0) {
 return ret;
@@ -2370,6 +2396,7 @@ static void qcow2_refresh_limits(BlockDriverState *bs, 
Error **errp)
 bs->bl.zone_size = s->zoned_header.zone_size;
 bs->bl.zone_capacity = s->zoned_header.zone_capacity;
 bs->bl.write_granularity = BDRV_SECTOR_SIZE;
+bs->bl.zd_extension_size = s->zoned_header.zd_extension_size;
 }
 
 static int GRAPH_UNLOCKED
@@ -3621,6 +3648,8 @@ int qcow2_update_header(BlockDriverState *bs)
 .conventional_zones =
 cpu_to_be32(s->zoned_header.conventional_zones),
 .nr_zones   = cpu_to_be32(s->zoned_header.nr_zones),
+.zd_extension_size  =
+cpu_to_be32(s->zoned_header.zd_extension_size),
 .max_open_zones = cpu_to_be32(s->zoned_header.max_open_zones),
 .max_active_zones   =
 cpu_to_be32(s->zoned_header.max_active_zones),
@@ -4373,6 +4402,15 @@ qcow2_co_create(BlockdevCreateOptions *create_options, 
Error **errp)
 }
 s->zoned_header.max_append_bytes = zone_host_managed->max_append_bytes;
 
+uint64_t zded_size = 0;
+if (zone_host_managed->has_descriptor_extension_size) {
+s->zoned_header.zd_extension_size =
+zone_host_managed->descriptor_extension_size;
+zded_size = s->zoned_header.zd_extension_size *
+bs->bl.nr_zones;
+}
+s->zded_size = zded_size;
+
 if (!qcow2_check_zone_options(>zoned_header)) {
 s->zoned_header.zoned = BLK_Z_NONE;
 ret = -EINVAL;
@@ -4380,7 +4418,7 @@ qcow2_co_create(BlockdevCreateOptions *create_options, 
Error **errp)
 }
 
 uint32_t nrz = s->zoned_header.nr_zones;
-zoned_meta_size =  sizeof(uint64_t) * nrz;
+

[RFC v3 3/7] hw/nvme: use blk_get_*() to access zone info in the block layer

2024-01-22 Thread Sam Li
The zone information is contained in the BlockLimits fileds. Add blk_get_*() 
functions
to access the block layer and update zone info accessing in the NVMe device 
emulation.

Signed-off-by: Sam Li 
---
 block/block-backend.c | 72 +++
 hw/nvme/ctrl.c| 34 +--
 hw/nvme/ns.c  | 61 --
 hw/nvme/nvme.h|  3 --
 include/sysemu/block-backend-io.h |  9 
 5 files changed, 111 insertions(+), 68 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 209eb07528..c23f2a731b 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2359,6 +2359,78 @@ int blk_get_max_iov(BlockBackend *blk)
 return blk->root->bs->bl.max_iov;
 }
 
+uint8_t blk_get_zone_model(BlockBackend *blk)
+{
+BlockDriverState *bs = blk_bs(blk);
+IO_CODE();
+return bs ? bs->bl.zoned: 0;
+
+}
+
+uint32_t blk_get_zone_size(BlockBackend *blk)
+{
+BlockDriverState *bs = blk_bs(blk);
+IO_CODE();
+
+return bs ? bs->bl.zone_size : 0;
+}
+
+uint32_t blk_get_zone_capacity(BlockBackend *blk)
+{
+BlockDriverState *bs = blk_bs(blk);
+IO_CODE();
+
+return bs ? bs->bl.zone_capacity : 0;
+}
+
+uint32_t blk_get_max_open_zones(BlockBackend *blk)
+{
+BlockDriverState *bs = blk_bs(blk);
+IO_CODE();
+
+return bs ? bs->bl.max_open_zones : 0;
+}
+
+uint32_t blk_get_max_active_zones(BlockBackend *blk)
+{
+BlockDriverState *bs = blk_bs(blk);
+IO_CODE();
+
+return bs ? bs->bl.max_active_zones : 0;
+}
+
+uint32_t blk_get_max_append_sectors(BlockBackend *blk)
+{
+BlockDriverState *bs = blk_bs(blk);
+IO_CODE();
+
+return bs ? bs->bl.max_append_sectors : 0;
+}
+
+uint32_t blk_get_nr_zones(BlockBackend *blk)
+{
+BlockDriverState *bs = blk_bs(blk);
+IO_CODE();
+
+return bs ? bs->bl.nr_zones : 0;
+}
+
+uint32_t blk_get_write_granularity(BlockBackend *blk)
+{
+BlockDriverState *bs = blk_bs(blk);
+IO_CODE();
+
+return bs ? bs->bl.write_granularity : 0;
+}
+
+BlockZoneWps *blk_get_zone_wps(BlockBackend *blk)
+{
+BlockDriverState *bs = blk_bs(blk);
+IO_CODE();
+
+return bs ? bs->wps : NULL;
+}
+
 void *blk_try_blockalign(BlockBackend *blk, size_t size)
 {
 IO_CODE();
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index f026245d1e..e64b021454 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -417,18 +417,6 @@ static void nvme_assign_zone_state(NvmeNamespace *ns, 
NvmeZone *zone,
 static uint16_t nvme_zns_check_resources(NvmeNamespace *ns, uint32_t act,
  uint32_t opn, uint32_t zrwa)
 {
-if (ns->params.max_active_zones != 0 &&
-ns->nr_active_zones + act > ns->params.max_active_zones) {
-trace_pci_nvme_err_insuff_active_res(ns->params.max_active_zones);
-return NVME_ZONE_TOO_MANY_ACTIVE | NVME_DNR;
-}
-
-if (ns->params.max_open_zones != 0 &&
-ns->nr_open_zones + opn > ns->params.max_open_zones) {
-trace_pci_nvme_err_insuff_open_res(ns->params.max_open_zones);
-return NVME_ZONE_TOO_MANY_OPEN | NVME_DNR;
-}
-
 if (zrwa > ns->zns.numzrwa) {
 return NVME_NOZRWA | NVME_DNR;
 }
@@ -1988,9 +1976,9 @@ static uint16_t nvme_zrm_reset(NvmeNamespace *ns, 
NvmeZone *zone)
 static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns)
 {
 NvmeZone *zone;
+int moz = blk_get_max_open_zones(ns->blkconf.blk);
 
-if (ns->params.max_open_zones &&
-ns->nr_open_zones == ns->params.max_open_zones) {
+if (moz && ns->nr_open_zones == moz) {
 zone = QTAILQ_FIRST(>imp_open_zones);
 if (zone) {
 /*
@@ -2160,7 +2148,7 @@ void nvme_rw_complete_cb(void *opaque, int ret)
 block_acct_done(stats, acct);
 }
 
-if (ns->params.zoned && nvme_is_write(req)) {
+if (blk_get_zone_model(blk) && nvme_is_write(req)) {
 nvme_finalize_zoned_write(ns, req);
 }
 
@@ -2882,7 +2870,7 @@ static void nvme_copy_out_completed_cb(void *opaque, int 
ret)
 goto out;
 }
 
-if (ns->params.zoned) {
+if (blk_get_zone_model(ns->blkconf.blk)) {
 nvme_advance_zone_wp(ns, iocb->zone, nlb);
 }
 
@@ -2994,7 +2982,7 @@ static void nvme_copy_in_completed_cb(void *opaque, int 
ret)
 goto invalid;
 }
 
-if (ns->params.zoned) {
+if (blk_get_zone_model(ns->blkconf.blk)) {
 status = nvme_check_zone_write(ns, iocb->zone, iocb->slba, nlb);
 if (status) {
 goto invalid;
@@ -3088,7 +3076,7 @@ static void nvme_do_copy(NvmeCopyAIOCB *iocb)
 }
 }
 
-if (ns->params.zoned) {
+if (blk_get_zone_model(ns->blkconf.blk)) {
 status = nvme_check_zone_read(ns, slba, nlb);
 if (status) {
 goto invalid;
@@ -3164,7 +3152,7 @@ static uint16_t nvme_copy(NvmeCtrl *n, NvmeRequest *req)
 
 iocb->slba = le64_to_cpu(copy->sdlba);
 
-if (ns->params.zoned) {
+if 

[RFC v3 0/7] Add persistence to NVMe ZNS emulation

2024-01-22 Thread Sam Li
ZNS emulation follows NVMe ZNS spec but the state of namespace
zones does not persist accross restarts of QEMU. This patch makes the
metadata of ZNS emulation persistent by using new block layer APIs and
the qcow2 img as backing file. It is the second part after the patches
- adding full zoned storage emulation to qcow2 driver [v7]

The metadata of ZNS emulation divides into two parts, zone metadata and
zone descriptor extension data. The zone metadata is composed of zone
states, zone type, wp and zone attributes. The zone information can be
stored at an uint64_t wp to save space and easy access. The structure of
wp of each zone is as follows:
|(4)| zone type (1)| zone attr (8)| wp (51) ||

The zone descriptor extension data is relatively small comparing to the
overall size therefore we adopt the option that store zded of all zones
in an array regardless of the valid bit set.

Creating a zns format qcow2 image file adds one more option zd_extension_size
to zoned device configurations.

For a closer look, you can apply the zns patches on this branch:
https://github.com/sgzerolc/qemu/tree/dev-qcow2-v7
Or use the local zns branch directly:
https://github.com/sgzerolc/qemu/tree/dev-zns-v7

To attach this file as emulated zns drive in the command line of QEMU, use:
  -drive file=${znsimg},id=nvmezns0,format=qcow2,if=none \
  -device nvme-ns,drive=nvmezns0,bus=nvme0,nsid=1,uuid=xxx \

Acked-by: Klaus Jensen 

---

v2->v3:
- fix compatability issue with the qcow2 patch series [Markus]
- address review comments [Markus]

v1->v2:
- split [v1 2/5] patch to three (doc, config, block layer API)
- adapt qcow2 v6

Sam Li (7):
  docs/qcow2: add zd_extension_size option to the zoned format feature
  qcow2: add zd_extension configurations to zoned metadata
  hw/nvme: use blk_get_*() to access zone info in the block layer
  hw/nvme: add blk_get_zone_extension to access zd_extensions
  hw/nvme: make the metadata of ZNS emulation persistent
  hw/nvme: refactor zone append write using block layer APIs
  hw/nvme: make ZDED persistent

 block/block-backend.c |   88 ++
 block/qcow2.c |  120 ++-
 block/qcow2.h |2 +
 docs/interop/qcow2.txt|9 +
 hw/nvme/ctrl.c| 1246 -
 hw/nvme/ns.c  |  162 +---
 hw/nvme/nvme.h|   95 +--
 include/block/block-common.h  |9 +
 include/block/block_int-common.h  |8 +
 include/sysemu/block-backend-io.h |   11 +
 include/sysemu/dma.h  |3 +
 qapi/block-core.json  |4 +
 system/dma-helpers.c  |   17 +
 13 files changed, 648 insertions(+), 1126 deletions(-)

-- 
2.40.1




[RFC v3 1/7] docs/qcow2: add zd_extension_size option to the zoned format feature

2024-01-22 Thread Sam Li
The NVMe ZNS command set has the zone descriptor extension feature for
associating the data to a zone. Devices that supports ZAC/ZBC have zero
zone descriptor extension size.

Signed-off-by: Sam Li 
---
 docs/interop/qcow2.txt | 9 +
 1 file changed, 9 insertions(+)

diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index a8dd4c3b15..106477d9ad 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -436,6 +436,15 @@ The fields of the zoned extension are:
The offset of zoned metadata structure in the contained
image, in bytes.
 
+ 44 - 51:  zd_extension_size
+   The size of zone descriptor extension data in bytes.
+   The value must be a multiple of 64.
+
+   The zone descriptor extension feature is associating data
+   to a zone which is only available in the NVMe ZNS command
+   set. A value of zero indicates the feature is not
+   available.
+
 == Full disk encryption header pointer ==
 
 The full disk encryption header must be present if, and only if, the
-- 
2.40.1




[RFC v3 6/7] hw/nvme: refactor zone append write using block layer APIs

2024-01-22 Thread Sam Li
Signed-off-by: Sam Li 
---
 block/qcow2.c|   2 +-
 hw/nvme/ctrl.c   | 190 ---
 include/sysemu/dma.h |   3 +
 system/dma-helpers.c |  17 
 4 files changed, 162 insertions(+), 50 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 0bb249fa6e..43ee0f47b9 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2395,7 +2395,7 @@ static void qcow2_refresh_limits(BlockDriverState *bs, 
Error **errp)
 bs->bl.max_open_zones = s->zoned_header.max_open_zones;
 bs->bl.zone_size = s->zoned_header.zone_size;
 bs->bl.zone_capacity = s->zoned_header.zone_capacity;
-bs->bl.write_granularity = BDRV_SECTOR_SIZE;
+bs->bl.write_granularity = BDRV_SECTOR_SIZE; /* physical block size */
 bs->bl.zd_extension_size = s->zoned_header.zd_extension_size;
 }
 
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index e31aa52c06..de41d8bac8 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1726,6 +1726,95 @@ static void nvme_misc_cb(void *opaque, int ret)
 nvme_enqueue_req_completion(nvme_cq(req), req);
 }
 
+typedef struct NvmeZoneCmdAIOCB {
+NvmeRequest *req;
+NvmeCmd *cmd;
+NvmeCtrl *n;
+
+union {
+struct {
+  uint32_t partial;
+  unsigned int nr_zones;
+  BlockZoneDescriptor *zones;
+} zone_report_data;
+struct {
+  int64_t offset;
+} zone_append_data;
+};
+} NvmeZoneCmdAIOCB;
+
+static void nvme_blk_zone_append_complete_cb(void *opaque, int ret)
+{
+NvmeZoneCmdAIOCB *cb = opaque;
+NvmeRequest *req = cb->req;
+int64_t *offset = (int64_t *)>cqe;
+
+if (ret) {
+nvme_aio_err(req, ret);
+}
+
+*offset = nvme_b2l(req->ns, cb->zone_append_data.offset);
+nvme_enqueue_req_completion(nvme_cq(req), req);
+g_free(cb);
+}
+
+static inline void nvme_blk_zone_append(BlockBackend *blk, int64_t *offset,
+  uint32_t align,
+  BlockCompletionFunc *cb,
+  NvmeZoneCmdAIOCB *aiocb)
+{
+NvmeRequest *req = aiocb->req;
+assert(req->sg.flags & NVME_SG_ALLOC);
+
+if (req->sg.flags & NVME_SG_DMA) {
+req->aiocb = dma_blk_zone_append(blk, >sg.qsg, (int64_t)offset,
+ align, cb, aiocb);
+} else {
+req->aiocb = blk_aio_zone_append(blk, offset, >sg.iov, 0,
+ cb, aiocb);
+}
+}
+
+static void nvme_zone_append_cb(void *opaque, int ret)
+{
+NvmeZoneCmdAIOCB *aiocb = opaque;
+NvmeRequest *req = aiocb->req;
+NvmeNamespace *ns = req->ns;
+
+BlockBackend *blk = ns->blkconf.blk;
+
+trace_pci_nvme_rw_cb(nvme_cid(req), blk_name(blk));
+
+if (ret) {
+goto out;
+}
+
+if (ns->lbaf.ms) {
+NvmeRwCmd *rw = (NvmeRwCmd *)>cmd;
+uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb) + 1;
+int64_t offset = aiocb->zone_append_data.offset;
+
+if (nvme_ns_ext(ns) || req->cmd.mptr) {
+uint16_t status;
+
+nvme_sg_unmap(>sg);
+status = nvme_map_mdata(nvme_ctrl(req), nlb, req);
+if (status) {
+ret = -EFAULT;
+goto out;
+}
+
+return nvme_blk_zone_append(blk, , 1,
+nvme_blk_zone_append_complete_cb,
+aiocb);
+}
+}
+
+out:
+nvme_blk_zone_append_complete_cb(aiocb, ret);
+}
+
+
 void nvme_rw_complete_cb(void *opaque, int ret)
 {
 NvmeRequest *req = opaque;
@@ -3052,6 +3141,9 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest 
*req, bool append,
 uint64_t mapped_size = data_size;
 uint64_t data_offset;
 BlockBackend *blk = ns->blkconf.blk;
+BlockZoneWps *wps = blk_get_zone_wps(blk);
+uint32_t zone_size = blk_get_zone_size(blk);
+uint32_t zone_idx;
 uint16_t status;
 
 if (nvme_ns_ext(ns)) {
@@ -3082,42 +3174,47 @@ static uint16_t nvme_do_write(NvmeCtrl *n, NvmeRequest 
*req, bool append,
 }
 
 if (blk_get_zone_model(blk)) {
-uint32_t zone_size = blk_get_zone_size(blk);
-uint32_t zone_idx = slba / zone_size;
-int64_t zone_start = zone_idx * zone_size;
+assert(wps);
+if (zone_size) {
+zone_idx = slba / zone_size;
+int64_t zone_start = zone_idx * zone_size;
+
+if (append) {
+bool piremap = !!(ctrl & NVME_RW_PIREMAP);
+
+if (n->params.zasl &&
+data_size > (uint64_t)
+n->page_size << n->params.zasl) {
+trace_pci_nvme_err_zasl(data_size);
+return NVME_INVALID_FIELD | NVME_DNR;
+}
 
-if (append) {
-bool piremap = !!(ctrl & NVME_RW_PIREMAP);
+rw->slba = cpu_to_le64(slba);
 
-if (n->params.zasl &&
-data_size > 

[RFC v3 4/7] hw/nvme: add blk_get_zone_extension to access zd_extensions

2024-01-22 Thread Sam Li
Signed-off-by: Sam Li 
---
 block/block-backend.c | 16 
 hw/nvme/ctrl.c| 20 ++--
 hw/nvme/ns.c  | 24 
 hw/nvme/nvme.h|  7 ---
 include/sysemu/block-backend-io.h |  2 ++
 5 files changed, 36 insertions(+), 33 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index c23f2a731b..3bebee12b9 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2431,6 +2431,22 @@ BlockZoneWps *blk_get_zone_wps(BlockBackend *blk)
 return bs ? bs->wps : NULL;
 }
 
+uint8_t *blk_get_zone_extension(BlockBackend *blk)
+{
+BlockDriverState *bs = blk_bs(blk);
+IO_CODE();
+
+return bs ? bs->zd_extensions : NULL;
+}
+
+uint32_t blk_get_zd_ext_size(BlockBackend *blk)
+{
+BlockDriverState *bs = blk_bs(blk);
+IO_CODE();
+
+return bs ? bs->bl.zd_extension_size : 0;
+}
+
 void *blk_try_blockalign(BlockBackend *blk, size_t size)
 {
 IO_CODE();
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index e64b021454..dae6f00e4f 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4004,6 +4004,12 @@ static uint16_t nvme_zone_mgmt_send_zrwa_flush(NvmeCtrl 
*n, NvmeZone *zone,
 return NVME_SUCCESS;
 }
 
+static inline uint8_t *nvme_get_zd_extension(NvmeNamespace *ns,
+uint32_t zone_idx)
+{
+return >zd_extensions[zone_idx * blk_get_zd_ext_size(ns->blkconf.blk)];
+}
+
 static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
 {
 NvmeZoneSendCmd *cmd = (NvmeZoneSendCmd *)>cmd;
@@ -4088,11 +4094,11 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, 
NvmeRequest *req)
 
 case NVME_ZONE_ACTION_SET_ZD_EXT:
 trace_pci_nvme_set_descriptor_extension(slba, zone_idx);
-if (all || !ns->params.zd_extension_size) {
+if (all || !blk_get_zd_ext_size(ns->blkconf.blk)) {
 return NVME_INVALID_FIELD | NVME_DNR;
 }
 zd_ext = nvme_get_zd_extension(ns, zone_idx);
-status = nvme_h2c(n, zd_ext, ns->params.zd_extension_size, req);
+status = nvme_h2c(n, zd_ext, blk_get_zd_ext_size(ns->blkconf.blk), 
req);
 if (status) {
 trace_pci_nvme_err_zd_extension_map_error(zone_idx);
 return status;
@@ -4183,7 +4189,8 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, 
NvmeRequest *req)
 if (zra != NVME_ZONE_REPORT && zra != NVME_ZONE_REPORT_EXTENDED) {
 return NVME_INVALID_FIELD | NVME_DNR;
 }
-if (zra == NVME_ZONE_REPORT_EXTENDED && !ns->params.zd_extension_size) {
+if (zra == NVME_ZONE_REPORT_EXTENDED &&
+!blk_get_zd_ext_size(ns->blkconf.blk)) {
 return NVME_INVALID_FIELD | NVME_DNR;
 }
 
@@ -4205,7 +4212,7 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, 
NvmeRequest *req)
 
 zone_entry_sz = sizeof(NvmeZoneDescr);
 if (zra == NVME_ZONE_REPORT_EXTENDED) {
-zone_entry_sz += ns->params.zd_extension_size;
+zone_entry_sz += blk_get_zd_ext_size(ns->blkconf.blk) ;
 }
 
 max_zones = (data_size - sizeof(NvmeZoneReportHeader)) / zone_entry_sz;
@@ -4243,11 +4250,12 @@ static uint16_t nvme_zone_mgmt_recv(NvmeCtrl *n, 
NvmeRequest *req)
 }
 
 if (zra == NVME_ZONE_REPORT_EXTENDED) {
+int zd_ext_size = blk_get_zd_ext_size(ns->blkconf.blk);
 if (zone->d.za & NVME_ZA_ZD_EXT_VALID) {
 memcpy(buf_p, nvme_get_zd_extension(ns, zone_idx),
-   ns->params.zd_extension_size);
+   zd_ext_size);
 }
-buf_p += ns->params.zd_extension_size;
+buf_p += zd_ext_size;
 }
 
 max_zones--;
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 82d4f7932d..45c08391f5 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -218,15 +218,15 @@ static int 
nvme_ns_zoned_check_calc_geometry(NvmeNamespace *ns, Error **errp)
 
 static void nvme_ns_zoned_init_state(NvmeNamespace *ns)
 {
+BlockBackend *blk = ns->blkconf.blk;
 uint64_t start = 0, zone_size = ns->zone_size;
 uint64_t capacity = ns->num_zones * zone_size;
 NvmeZone *zone;
 int i;
 
 ns->zone_array = g_new0(NvmeZone, ns->num_zones);
-if (ns->params.zd_extension_size) {
-ns->zd_extensions = g_malloc0(ns->params.zd_extension_size *
-  ns->num_zones);
+if (blk_get_zone_extension(blk)) {
+ns->zd_extensions = blk_get_zone_extension(blk);
 }
 
 QTAILQ_INIT(>exp_open_zones);
@@ -275,7 +275,7 @@ static void nvme_ns_init_zoned(NvmeNamespace *ns)
 for (i = 0; i <= ns->id_ns.nlbaf; i++) {
 id_ns_z->lbafe[i].zsze = cpu_to_le64(ns->zone_size);
 id_ns_z->lbafe[i].zdes =
-ns->params.zd_extension_size >> 6; /* Units of 64B */
+blk_get_zd_ext_size(blk) >> 6; /* Units of 64B */
 }
 
 if (ns->params.zrwas) {
@@ -576,19 +576,6 @@ static 

[PATCH v7 2/4] qcow2: add configurations for zoned format extension

2024-01-22 Thread Sam Li
To configure the zoned format feature on the qcow2 driver, it
requires settings as: the device size, zone model, zone size,
zone capacity, number of conventional zones, limits on zone
resources (max append bytes, max open zones, and max_active_zones).

To create a qcow2 image with zoned format feature, use command like
this:
qemu-img create -f qcow2 zbc.qcow2 -o size=768M \
-o zone.size=64M -o zone.capacity=64M -o zone.conventional_zones=0 \
-o zone.max_append_bytes=4096 -o zone.max_open_zones=6 \
-o zone.max_active_zones=8 -o zone.mode=host-managed

Signed-off-by: Sam Li 
---
 block/qcow2.c| 252 ++-
 block/qcow2.h|  36 -
 docs/interop/qcow2.txt   | 107 -
 include/block/block_int-common.h |  13 ++
 qapi/block-core.json |  67 +++-
 5 files changed, 469 insertions(+), 6 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 9bee66fff5..b987f1e751 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -73,6 +73,7 @@ typedef struct {
 #define  QCOW2_EXT_MAGIC_CRYPTO_HEADER 0x0537be77
 #define  QCOW2_EXT_MAGIC_BITMAPS 0x23852875
 #define  QCOW2_EXT_MAGIC_DATA_FILE 0x44415441
+#define  QCOW2_EXT_MAGIC_ZONED_FORMAT 0x007a6264
 
 static int coroutine_fn
 qcow2_co_preadv_compressed(BlockDriverState *bs,
@@ -194,6 +195,68 @@ qcow2_extract_crypto_opts(QemuOpts *opts, const char *fmt, 
Error **errp)
 return cryptoopts_qdict;
 }
 
+/*
+ * Passing by the zoned device configurations by a zoned_header struct, check
+ * if the zone device options are under constraints. Return false when some
+ * option is invalid
+ */
+static inline bool
+qcow2_check_zone_options(Qcow2ZonedHeaderExtension *zone_opt)
+{
+if (zone_opt) {
+uint32_t sequential_zones;
+
+if (zone_opt->zone_size == 0) {
+error_report("Zoned extension header zone_size field "
+ "can not be 0");
+return false;
+}
+
+if (zone_opt->zone_capacity > zone_opt->zone_size) {
+error_report("zone capacity %" PRIu32 "B exceeds zone size "
+ "%" PRIu32 "B", zone_opt->zone_capacity,
+ zone_opt->zone_size);
+return false;
+}
+
+if (zone_opt->max_append_bytes + BDRV_SECTOR_SIZE >=
+zone_opt->zone_capacity) {
+error_report("max append bytes %" PRIu32 "B exceeds zone "
+ "capacity %" PRIu32 "B by more than block size",
+ zone_opt->zone_capacity,
+ zone_opt->max_append_bytes);
+return false;
+}
+
+if (zone_opt->max_active_zones > zone_opt->nr_zones) {
+error_report("Max_active_zones %" PRIu32 " exceeds "
+ "nr_zones %" PRIu32 ". Set it to nr_zones.",
+ zone_opt->max_active_zones, zone_opt->nr_zones);
+zone_opt->max_active_zones = zone_opt->nr_zones;
+}
+
+if (zone_opt->max_open_zones > zone_opt->max_active_zones) {
+error_report("Max_open_zones %" PRIu32 " exceeds "
+ "max_active_zones %" PRIu32 ". Set it to "
+ "max_active_zones.",
+ zone_opt->max_open_zones,
+ zone_opt->max_active_zones);
+zone_opt->max_open_zones = zone_opt->max_active_zones;
+}
+
+sequential_zones = zone_opt->nr_zones - zone_opt->conventional_zones;
+if (zone_opt->max_open_zones > sequential_zones) {
+error_report("Max_open_zones field can not be larger "
+ "than the number of SWR zones. Set it to number of "
+ "SWR zones %" PRIu32 ".", sequential_zones);
+zone_opt->max_open_zones = sequential_zones;
+}
+
+return true;
+}
+return false;
+}
+
 /*
  * read qcow2 extension and fill bs
  * start reading from start_offset
@@ -211,6 +274,7 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t 
start_offset,
 uint64_t offset;
 int ret;
 Qcow2BitmapHeaderExt bitmaps_ext;
+Qcow2ZonedHeaderExtension zoned_ext;
 
 if (need_update_header != NULL) {
 *need_update_header = false;
@@ -432,6 +496,51 @@ qcow2_read_extensions(BlockDriverState *bs, uint64_t 
start_offset,
 break;
 }
 
+case QCOW2_EXT_MAGIC_ZONED_FORMAT:
+{
+if (ext.len < sizeof(zoned_ext)) {
+/* Missing fields */
+error_setg(errp, "zoned_ext: len=%" PRIu32 " too small "
+   "(<%zu)", ext.len, sizeof(zoned_ext));
+return -EINVAL;
+}
+ret = bdrv_pread(bs->file, offset, ext.len, _ext, 0);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "zoned_ext: "
+ "Could not read ext header");
+return ret;
+  

  1   2   3   >