from:"Cédric Le Goater"

Re: [PATCH v2 03/10] ppc/pnv: Add a Power11 Pnv11Chip, and a Power11 Machine

2024-04-26 Thread Cédric Le Goater


On 4/26/24 19:34, Aditya Gupta wrote:

Hello Cédric,



<...snip...>

- * Multi processor support for POWER8, POWER8NVL and POWER9.
+ * Multi processor support for POWER8, POWER8NVL, POWER9, POWER10 and Power11.


POWER10 -> Power10. Don't ask me why.


Sure, got it !




* XSCOM, serial communication sideband bus to configure chiplets.
* Simple LPC Controller.
* Processor Service Interface (PSI) Controller.
- * Interrupt Controller, XICS (POWER8) and XIVE (POWER9) and XIVE2 (Power10).
+ * Interrupt Controller, XICS (POWER8) and XIVE (POWER9) and XIVE2 (Power10 &
+   Power11).
* POWER8 PHB3 PCIe Host bridge and POWER9 PHB4 PCIe Host bridge.
* Simple OCC is an on-chip micro-controller used for power management tasks.
* iBT device to handle BMC communication, with the internal BMC simulator
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 74e7908e5ffb..06e272f3bdd3 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -431,13 +431,27 @@ static const uint8_t pa_features_31[] = { 74, 0,
   static void pnv_chip_power10_dt_populate(PnvChip *chip, void *fdt)
   {
-static const char compat[] = "ibm,power10-xscom\0ibm,xscom";
+PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+static const char compat_p10[] = "ibm,power10-xscom\0ibm,xscom";
+static const char compat_p11[] = "ibm,power11-xscom\0ibm,xscom";
+
+PnvChipType chip_type = pcc->chip_type;
+const char *compat;
+int compat_len;
   int i;
+if (chip_type == PNV_TYPE_POWER11) {
+compat = compat_p11;
+compat_len = sizeof(compat_p11);
+} else {
+compat = compat_p10;
+compat_len = sizeof(compat_p10);
+}


please introduce a pnv_chip_power11_dt_populate() routine instead.


Okay.




   pnv_dt_xscom(chip, fdt, 0,
cpu_to_be64(PNV10_XSCOM_BASE(chip)),
cpu_to_be64(PNV10_XSCOM_SIZE),
- compat, sizeof(compat));
+ compat, compat_len);
   for (i = 0; i < chip->nr_cores; i++) {
   PnvCore *pnv_core = chip->cores[i];
@@ -1288,6 +1302,8 @@ static void pnv_chip_power10_intc_print_info(PnvChip 
*chip, PowerPCCPU *cpu,
   #define POWER10_CORE_MASK  (0xffull)
+#define POWER11_CORE_MASK  (0xffull)
+
   static void pnv_chip_power8_instance_init(Object *obj)
   {
   Pnv8Chip *chip8 = PNV8_CHIP(obj);
@@ -1838,6 +1854,7 @@ static void pnv_chip_power10_instance_init(Object *obj)
   static void pnv_chip_power10_quad_realize(Pnv10Chip *chip10, Error **errp)
   {
   PnvChip *chip = PNV_CHIP(chip10);
+PnvChipClass *chip_class = PNV_CHIP_GET_CLASS(chip);
   int i;
   chip10->nr_quads = DIV_ROUND_UP(chip->nr_cores, 4);
@@ -1846,7 +1863,11 @@ static void pnv_chip_power10_quad_realize(Pnv10Chip 
*chip10, Error **errp)
   for (i = 0; i < chip10->nr_quads; i++) {
   PnvQuad *eq = >quads[i];
-pnv_chip_quad_realize_one(chip, eq, chip->cores[i * 4],
+if (chip_class->chip_type == PNV_TYPE_POWER11)
+pnv_chip_quad_realize_one(chip, eq, chip->cores[i * 4],
+  PNV_QUAD_TYPE_NAME("power11"));
+else
+pnv_chip_quad_realize_one(chip, eq, chip->cores[i * 4],
 PNV_QUAD_TYPE_NAME("power10"));



Please change introduce a new pnv_chip_quad_realize() routine taking an
extra type_name argument.


Sure, can do it. But as it's called from 'pnv_chip_power10_realize',
might require a 'pnv_chip_power11_realize' function also, so it can pass
type_name as "power11" vs "power10".


Do the Power11 and Power10 processors have the same XSCOM and MMIO
address spaces ?


Will do it.





   pnv_xscom_add_subregion(chip, PNV10_XSCOM_EQ_BASE(eq->quad_id),
@@ -2116,6 +2137,35 @@ static void pnv_chip_power10_class_init(ObjectClass 
*klass, void *data)
   >parent_realize);
   }
+static void pnv_chip_power11_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+static const int i2c_ports_per_engine[PNV10_CHIP_MAX_I2C] = {14, 14, 2, 
16};
+
+k->chip_cfam_id = 0x120da0498000ull; /* P11 (with NX) */
+k->chip_type = PNV_TYPE_POWER11;
+k->cores_mask = POWER11_CORE_MASK;
+k->chip_pir = pnv_chip_pir_p10;
+k->intc_create = pnv_chip_power10_intc_create;
+k->intc_reset = pnv_chip_power10_intc_reset;
+k->intc_destroy = pnv_chip_power10_intc_destroy;
+k->intc_print_info = pnv_chip_power10_intc_print_info;
+k->isa_create = pnv_chip_power10_isa_create;
+k->dt_populate = pnv_chip_power10_dt_populate;
+k->pic_print_info = pnv_chip_power10_pic_print_info;
+k->xscom_core_base = pnv_chip_power10_xscom_core_base;
+k->xscom_pcba = pnv_chip_power10_xscom_pcba;
+dc->desc = "PowerNV Chip POWER11";
+k->num_pecs = PNV10_CHIP_MAX_PEC;
+k->i2c_num_engines = PNV10_CHIP_MAX_I2C;
+k->i2c_ports_per_engine =

Re: [PATCH v2 01/10] ppc/pseries: Add Power11 cpu type

2024-04-26 Thread Cédric Le Goater


On 4/26/24 19:05, Aditya Gupta wrote:

Hello Cédric,

Thanks for your reviews.

On Fri, Apr 26, 2024 at 04:27:04PM +0200, Cédric Le Goater wrote:

Hello Aditya

On 4/26/24 13:00, Aditya Gupta wrote:

Add base support for "--cpu power11" in QEMU.

Power11 core is same as Power10, hence reuse functions defined for
Power10.


Power11 uses the same ISA it seems. What's the value then ?


Yes, it uses the same ISA. But I added this option so we can have a
Power11 PVR in QEMU, which should be identified as Power11 in skiboot
and linux, hence defined Power11 cpu type, even though code here is
almost same as Power10.





Cc: Cédric Le Goater 
Cc: Daniel Henrique Barboza 
Cc: David Gibson 
Cc: Harsh Prateek Bora 
Cc: Mahesh J Salgaonkar 
Cc: Madhavan Srinivasan 
Cc: Nicholas Piggin 
Signed-off-by: Aditya Gupta 
---
   docs/system/ppc/pseries.rst |  6 +--
   hw/ppc/spapr_cpu_core.c |  1 +



I would separate the CPU target code adding support for a new POWER
Processor from the machine code (pseries).


Sure, I will split it in v3.





   target/ppc/compat.c |  7 +++
   target/ppc/cpu-models.c |  2 +
   target/ppc/cpu-models.h |  2 +
   target/ppc/cpu_init.c   | 99 +
   6 files changed, 114 insertions(+), 3 deletions(-)

diff --git a/docs/system/ppc/pseries.rst b/docs/system/ppc/pseries.rst
index a876d897b6e4..3277564b34c2 100644
--- a/docs/system/ppc/pseries.rst
+++ b/docs/system/ppc/pseries.rst
@@ -15,9 +15,9 @@ Supported devices
   =
* Multi processor support for many Power processors generations: POWER7,
-   POWER7+, POWER8, POWER8NVL, POWER9, and Power10. Support for POWER5+ exists,
-   but its state is unknown.
- * Interrupt Controller, XICS (POWER8) and XIVE (POWER9 and Power10)
+   POWER7+, POWER8, POWER8NVL, POWER9, Power10 and Power11. Support for POWER5+
+   exists, but its state is unknown.


The POWER5+ pseries machine seems functionnal with SLOF
(Sep 18 2023 18:57:48) and Linux 6.6.3 under TCG. May be worth
to mention (for AIX users) in another patch.


+ * Interrupt Controller, XICS (POWER8) and XIVE (POWER9, Power10, Power11)
* vPHB PCIe Host bridge.
* vscsi and vnet devices, compatible with the same devices available on a
  PowerVM hypervisor with VIOS managing LPARs.
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index e7c9edd033c8..c6e85c031178 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -401,6 +401,7 @@ static const TypeInfo spapr_cpu_core_type_infos[] = {
   DEFINE_SPAPR_CPU_CORE_TYPE("power9_v2.0"),
   DEFINE_SPAPR_CPU_CORE_TYPE("power9_v2.2"),
   DEFINE_SPAPR_CPU_CORE_TYPE("power10_v2.0"),
+DEFINE_SPAPR_CPU_CORE_TYPE("power11"),
   #ifdef CONFIG_KVM
   DEFINE_SPAPR_CPU_CORE_TYPE("host"),
   #endif
diff --git a/target/ppc/compat.c b/target/ppc/compat.c
index ebef2cccecf3..12dd8ae290ca 100644
--- a/target/ppc/compat.c
+++ b/target/ppc/compat.c
@@ -100,6 +100,13 @@ static const CompatInfo compat_table[] = {
   .pcr_level = PCR_COMPAT_3_10,
   .max_vthreads = 8,
   },
+{ /* POWER11, ISA3.10 */
+.name = "power11",
+.pvr = CPU_POWERPC_LOGICAL_3_10_PLUS,
+.pcr = PCR_COMPAT_3_10,
+.pcr_level = PCR_COMPAT_3_10,
+.max_vthreads = 8,
+},
   };
   static const CompatInfo *compat_by_pvr(uint32_t pvr)
diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c
index f2301b43f78b..1870e69b63df 100644
--- a/target/ppc/cpu-models.c
+++ b/target/ppc/cpu-models.c
@@ -734,6 +734,8 @@
   "POWER9 v2.2")
   POWERPC_DEF("power10_v2.0",  CPU_POWERPC_POWER10_DD20,   POWER10,
   "POWER10 v2.0")
+POWERPC_DEF("power11",  CPU_POWERPC_POWER11,   POWER11,
+"POWER11")
   #endif /* defined (TARGET_PPC64) */
   /***/
diff --git a/target/ppc/cpu-models.h b/target/ppc/cpu-models.h
index 0229ef3a9a5c..a1b540c3aa9e 100644
--- a/target/ppc/cpu-models.h
+++ b/target/ppc/cpu-models.h
@@ -354,6 +354,7 @@ enum {
   CPU_POWERPC_POWER10_BASE   = 0x0080,
   CPU_POWERPC_POWER10_DD1= 0x00801100,
   CPU_POWERPC_POWER10_DD20   = 0x00801200,
+CPU_POWERPC_POWER11= 0x00821200,


is that a DD2.2 PVR ? If so, It should be mentionned in the definition.



Yes, I have kept the last 2 bytes same as P10 DD2. I will mention it
above the line I have added it, in v3.


Skiboot reports :

[0.121234172,6] P11 DD1.00 detected


C.

Re: [PATCH v2 01/10] ppc/pseries: Add Power11 cpu type

2024-04-26 Thread Cédric Le Goater


On 4/26/24 19:12, Aditya Gupta wrote:

Hello Cédric,


diff --git a/docs/system/ppc/pseries.rst b/docs/system/ppc/pseries.rst
index a876d897b6e4..3277564b34c2 100644
--- a/docs/system/ppc/pseries.rst
+++ b/docs/system/ppc/pseries.rst
@@ -15,9 +15,9 @@ Supported devices
   =
* Multi processor support for many Power processors generations: POWER7,
-   POWER7+, POWER8, POWER8NVL, POWER9, and Power10. Support for POWER5+ exists,
-   but its state is unknown.
- * Interrupt Controller, XICS (POWER8) and XIVE (POWER9 and Power10)
+   POWER7+, POWER8, POWER8NVL, POWER9, Power10 and Power11. Support for POWER5+
+   exists, but its state is unknown.


The POWER5+ pseries machine seems functionnal with SLOF
(Sep 18 2023 18:57:48) and Linux 6.6.3 under TCG. May be worth
to mention (for AIX users) in another patch.


Sure, thanks for pointing this, will add these changes in a separate
patch.

There were some points in 'powernv' docs, that might need updating:

Quoting lines from docs/system/ppc/powernv.rst:


Missing devices
---

A lot is missing, among which :

  * I2C controllers (yet to be merged).
  * NPU/NPU2/NPU3 controllers.
  * EEH support for PCIe Host bridge controllers.
  * NX controller.
  * VAS controller.
  * chipTOD (Time Of Day).
  * Self Boot Engine (SBE).
  * FSI bus.


I can see that 'chipTOD' was added in commit 9a69950feb098. I2C mentions
yet to merge, is it merged yet ?


yes.



I will check whether this needs updating, but might do it in a separate
patch than this series.


Thanks,

Re: [PATCH v2 03/10] ppc/pnv: Add a Power11 Pnv11Chip, and a Power11 Machine

2024-04-26 Thread Cédric Le Goater


On 4/26/24 13:00, Aditya Gupta wrote:

Power11 core is same as Power10, use the existing functionalities to
introduce a Power11 chip and machine, with Power10 chip as parent of
Power11 chip, thus going through similar class_init paths

Cc: Cédric Le Goater 
Cc: Frédéric Barrat 
Cc: Mahesh J Salgaonkar 
Cc: Madhavan Srinivasan 
Cc: Nicholas Piggin 
Signed-off-by: Aditya Gupta 
---
  docs/system/ppc/powernv.rst |  9 ++--
  hw/ppc/pnv.c| 98 +++--
  hw/ppc/pnv_core.c   | 16 +-
  include/hw/ppc/pnv.h|  5 ++
  include/hw/ppc/pnv_chip.h   | 10 
  5 files changed, 130 insertions(+), 8 deletions(-)

diff --git a/docs/system/ppc/powernv.rst b/docs/system/ppc/powernv.rst
index 09f39658587d..626f2a119521 100644
--- a/docs/system/ppc/powernv.rst
+++ b/docs/system/ppc/powernv.rst
@@ -1,5 +1,5 @@
-PowerNV family boards (``powernv8``, ``powernv9``, ``powernv10``)
-==
+PowerNV family boards (``powernv8``, ``powernv9``, ``powernv10``, 
``powernv11``)
+
  
  PowerNV (as Non-Virtualized) is the "bare metal" platform using the

  OPAL firmware. It runs Linux on IBM and OpenPOWER systems and it can
@@ -15,11 +15,12 @@ beyond the scope of what QEMU addresses today.
  Supported devices
  -
  
- * Multi processor support for POWER8, POWER8NVL and POWER9.

+ * Multi processor support for POWER8, POWER8NVL, POWER9, POWER10 and Power11.


POWER10 -> Power10. Don't ask me why.


   * XSCOM, serial communication sideband bus to configure chiplets.
   * Simple LPC Controller.
   * Processor Service Interface (PSI) Controller.
- * Interrupt Controller, XICS (POWER8) and XIVE (POWER9) and XIVE2 (Power10).
+ * Interrupt Controller, XICS (POWER8) and XIVE (POWER9) and XIVE2 (Power10 &
+   Power11).
   * POWER8 PHB3 PCIe Host bridge and POWER9 PHB4 PCIe Host bridge.
   * Simple OCC is an on-chip micro-controller used for power management tasks.
   * iBT device to handle BMC communication, with the internal BMC simulator
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 74e7908e5ffb..06e272f3bdd3 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -431,13 +431,27 @@ static const uint8_t pa_features_31[] = { 74, 0,
  
  static void pnv_chip_power10_dt_populate(PnvChip *chip, void *fdt)

  {
-static const char compat[] = "ibm,power10-xscom\0ibm,xscom";
+PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+static const char compat_p10[] = "ibm,power10-xscom\0ibm,xscom";
+static const char compat_p11[] = "ibm,power11-xscom\0ibm,xscom";
+
+PnvChipType chip_type = pcc->chip_type;
+const char *compat;
+int compat_len;
  int i;
  
+if (chip_type == PNV_TYPE_POWER11) {

+compat = compat_p11;
+compat_len = sizeof(compat_p11);
+} else {
+compat = compat_p10;
+compat_len = sizeof(compat_p10);
+}


please introduce a pnv_chip_power11_dt_populate() routine instead.


  pnv_dt_xscom(chip, fdt, 0,
   cpu_to_be64(PNV10_XSCOM_BASE(chip)),
   cpu_to_be64(PNV10_XSCOM_SIZE),
- compat, sizeof(compat));
+ compat, compat_len);
  
  for (i = 0; i < chip->nr_cores; i++) {

  PnvCore *pnv_core = chip->cores[i];
@@ -1288,6 +1302,8 @@ static void pnv_chip_power10_intc_print_info(PnvChip 
*chip, PowerPCCPU *cpu,
  
  #define POWER10_CORE_MASK  (0xffull)
  
+#define POWER11_CORE_MASK  (0xffull)

+
  static void pnv_chip_power8_instance_init(Object *obj)
  {
  Pnv8Chip *chip8 = PNV8_CHIP(obj);
@@ -1838,6 +1854,7 @@ static void pnv_chip_power10_instance_init(Object *obj)
  static void pnv_chip_power10_quad_realize(Pnv10Chip *chip10, Error **errp)
  {
  PnvChip *chip = PNV_CHIP(chip10);
+PnvChipClass *chip_class = PNV_CHIP_GET_CLASS(chip);
  int i;
  
  chip10->nr_quads = DIV_ROUND_UP(chip->nr_cores, 4);

@@ -1846,7 +1863,11 @@ static void pnv_chip_power10_quad_realize(Pnv10Chip 
*chip10, Error **errp)
  for (i = 0; i < chip10->nr_quads; i++) {
  PnvQuad *eq = >quads[i];
  
-pnv_chip_quad_realize_one(chip, eq, chip->cores[i * 4],

+if (chip_class->chip_type == PNV_TYPE_POWER11)
+pnv_chip_quad_realize_one(chip, eq, chip->cores[i * 4],
+  PNV_QUAD_TYPE_NAME("power11"));
+else
+pnv_chip_quad_realize_one(chip, eq, chip->cores[i * 4],
PNV_QUAD_TYPE_NAME("power10"));



Please change introduce a new pnv_chip_quad_realize() routine taking an
extra type_name argument.


  
  pnv_xscom_add_subregion(chip, PNV10_XSCOM_EQ_BASE(eq->quad_id),

@@ -2116,6 +2137,35 @@ static void pnv_chip_power10_class_init(ObjectClass 
*klass, void *

Re: [PATCH v2 10/10] ppc/pnv: Update skiboot.lid to support Power11

2024-04-26 Thread Cédric Le Goater


On 4/26/24 13:00, Aditya Gupta wrote:

Skiboot/OPAL patches are in discussion upstream [1], with corresponding
commits in github repository [2].

Update skiboot.lid, with binary built from 'upstream_power11' branch
of skiboot repository with Power11 enablement patches [2].

---
This patch can be skipped for now, if need to wait for patches to be
merged in open-power/skiboot. 


yes.


Have updated the skiboot.lid to aid in testing this patch series.


Thanks for doing so.


---

[1]:https://lists.ozlabs.org/pipermail/skiboot/2024-April/018963.html
[2]:https://github.com/maheshsal/skiboot.

Cc: Cédric Le Goater
Cc: Joel Stanley
Cc: Mahesh J Salgaonkar
Cc: Madhavan Srinivasan
Cc: Nicholas Piggin
Signed-off-by: Aditya Gupta
---
  pc-bios/skiboot.lid | Bin 2527328 -> 2527328 bytes
  1 file changed, 0 insertions(+), 0 deletions(-)


We avoid sending such big blobs on the mailing list. We usually send a
PR to the sub-system maintainer (Nick). When time comes (support is
merged is skiboot), we will address the skiboot image update in QEMU.

C.

Re: [PATCH v2 08/10] ppc/pnv: Add SBE model for Power11

2024-04-26 Thread Cédric Le Goater


On 4/26/24 13:00, Aditya Gupta wrote:

Power11 core is same as Power10, reuse PNV10_SBER initialisation, by
declaring PNV11_PSI as child class of PNV10_PSI

Cc: Cédric Le Goater 
Cc: Frédéric Barrat 
Cc: Mahesh J Salgaonkar 
Cc: Madhavan Srinivasan 
Cc: Nicholas Piggin 
Signed-off-by: Aditya Gupta 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/ppc/pnv_sbe.c | 15 +++
  include/hw/ppc/pnv_sbe.h |  2 ++
  2 files changed, 17 insertions(+)

diff --git a/hw/ppc/pnv_sbe.c b/hw/ppc/pnv_sbe.c
index 74cee4eea7ad..a655cc7f8c9e 100644
--- a/hw/ppc/pnv_sbe.c
+++ b/hw/ppc/pnv_sbe.c
@@ -366,6 +366,20 @@ static const TypeInfo pnv_sbe_power10_type_info = {
  .name  = TYPE_PNV10_SBE,
  .parent= TYPE_PNV9_SBE,
  .class_init= pnv_sbe_power10_class_init,
+.class_base_init = pnv_sbe_power10_class_init,
+};
+
+static void pnv_sbe_power11_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+
+dc->desc = "PowerNV SBE Controller (POWER11)";
+}
+
+static const TypeInfo pnv_sbe_power11_type_info = {
+.name  = TYPE_PNV11_SBE,
+.parent= TYPE_PNV10_SBE,
+.class_init= pnv_sbe_power11_class_init,
  };
  
  static void pnv_sbe_realize(DeviceState *dev, Error **errp)

@@ -409,6 +423,7 @@ static void pnv_sbe_register_types(void)
  type_register_static(_sbe_type_info);
  type_register_static(_sbe_power9_type_info);
  type_register_static(_sbe_power10_type_info);
+type_register_static(_sbe_power11_type_info);
  }
  
  type_init(pnv_sbe_register_types);

diff --git a/include/hw/ppc/pnv_sbe.h b/include/hw/ppc/pnv_sbe.h
index b6b378ad14c7..09073a1256d6 100644
--- a/include/hw/ppc/pnv_sbe.h
+++ b/include/hw/ppc/pnv_sbe.h
@@ -29,6 +29,8 @@ OBJECT_DECLARE_TYPE(PnvSBE, PnvSBEClass, PNV_SBE)
  DECLARE_INSTANCE_CHECKER(PnvSBE, PNV9_SBE, TYPE_PNV9_SBE)
  #define TYPE_PNV10_SBE TYPE_PNV_SBE "-POWER10"
  DECLARE_INSTANCE_CHECKER(PnvSBE, PNV10_SBE, TYPE_PNV10_SBE)
+#define TYPE_PNV11_SBE TYPE_PNV_SBE "-POWER11"
+DECLARE_INSTANCE_CHECKER(PnvSBE, PNV11_SBE, TYPE_PNV11_SBE)
  
  struct PnvSBE {

  DeviceState xd;

Re: [PATCH v2 05/10] ppc/pnv: Add a LPC controller for POWER11

2024-04-26 Thread Cédric Le Goater


On 4/26/24 13:00, Aditya Gupta wrote:

Power11 core is same as Power10 core, declare PNV11_LPC as a child
class of PNV10_LPC, so it goes through same class init

Cc: Cédric Le Goater 
Cc: Frédéric Barrat 
Cc: Mahesh J Salgaonkar 
Cc: Madhavan Srinivasan 
Cc: Nicholas Piggin 
Signed-off-by: Aditya Gupta 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/ppc/pnv_lpc.c | 14 ++
  include/hw/ppc/pnv_lpc.h |  4 
  2 files changed, 18 insertions(+)

diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
index d692858bee78..54b366221bc7 100644
--- a/hw/ppc/pnv_lpc.c
+++ b/hw/ppc/pnv_lpc.c
@@ -698,6 +698,19 @@ static const TypeInfo pnv_lpc_power10_info = {
  .class_init= pnv_lpc_power10_class_init,
  };
  
+static void pnv_lpc_power11_class_init(ObjectClass *klass, void *data)

+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+
+dc->desc = "PowerNV LPC Controller POWER11";
+}
+
+static const TypeInfo pnv_lpc_power11_info = {
+.name  = TYPE_PNV11_LPC,
+.parent= TYPE_PNV10_LPC,
+.class_init= pnv_lpc_power11_class_init,
+};
+
  static void pnv_lpc_realize(DeviceState *dev, Error **errp)
  {
  PnvLpcController *lpc = PNV_LPC(dev);
@@ -771,6 +784,7 @@ static void pnv_lpc_register_types(void)
  type_register_static(_lpc_power8_info);
  type_register_static(_lpc_power9_info);
  type_register_static(_lpc_power10_info);
+type_register_static(_lpc_power11_info);
  }
  
  type_init(pnv_lpc_register_types)

diff --git a/include/hw/ppc/pnv_lpc.h b/include/hw/ppc/pnv_lpc.h
index 5d22c4557041..1069bca38dfd 100644
--- a/include/hw/ppc/pnv_lpc.h
+++ b/include/hw/ppc/pnv_lpc.h
@@ -41,6 +41,10 @@ DECLARE_INSTANCE_CHECKER(PnvLpcController, PNV9_LPC,
  DECLARE_INSTANCE_CHECKER(PnvLpcController, PNV10_LPC,
   TYPE_PNV10_LPC)
  
+#define TYPE_PNV11_LPC TYPE_PNV_LPC "-POWER11"

+DECLARE_INSTANCE_CHECKER(PnvLpcController, PNV11_LPC,
+ TYPE_PNV11_LPC)
+
  struct PnvLpcController {
  DeviceState parent;

Re: [PATCH v2 07/10] ppc/pnv: Add a PSI bridge model for Power11

2024-04-26 Thread Cédric Le Goater


On 4/26/24 13:00, Aditya Gupta wrote:

Power11 core is same as Power10, reuse PNV10_PSI initialisation, by
declaring 'PNV11_PSI' as child class of 'PNV10_PSI'

Cc: Cédric Le Goater 
Cc: Frédéric Barrat 
Cc: Mahesh J Salgaonkar 
Cc: Madhavan Srinivasan 
Cc: Nicholas Piggin 
Signed-off-by: Aditya Gupta 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/ppc/pnv_psi.c | 24 
  include/hw/ppc/pnv_psi.h |  2 ++
  2 files changed, 26 insertions(+)

diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c
index 26460d210deb..1f708b183a87 100644
--- a/hw/ppc/pnv_psi.c
+++ b/hw/ppc/pnv_psi.c
@@ -939,6 +939,29 @@ static const TypeInfo pnv_psi_power10_info = {
  .name  = TYPE_PNV10_PSI,
  .parent= TYPE_PNV9_PSI,
  .class_init= pnv_psi_power10_class_init,
+.class_base_init = pnv_psi_power10_class_init,
+};
+
+static void pnv_psi_power11_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+PnvPsiClass *ppc = PNV_PSI_CLASS(klass);
+static const char compat[] = "ibm,power11-psihb-x\0ibm,psihb-x";
+
+dc->desc= "PowerNV PSI Controller POWER11";
+
+/*
+ * ppc->xscom_pbca and ppc->xscom_size will be set up by
+ * pnv_psi_power10_class_init
+ */
+ppc->compat = compat;
+ppc->compat_size = sizeof(compat);
+}
+
+static const TypeInfo pnv_psi_power11_info = {
+.name  = TYPE_PNV11_PSI,
+.parent= TYPE_PNV10_PSI,
+.class_init= pnv_psi_power11_class_init,
  };
  
  static void pnv_psi_class_init(ObjectClass *klass, void *data)

@@ -973,6 +996,7 @@ static void pnv_psi_register_types(void)
  type_register_static(_psi_power8_info);
  type_register_static(_psi_power9_info);
  type_register_static(_psi_power10_info);
+type_register_static(_psi_power11_info);
  }
  
  type_init(pnv_psi_register_types);

diff --git a/include/hw/ppc/pnv_psi.h b/include/hw/ppc/pnv_psi.h
index 2a6f715350b6..9e1d31779204 100644
--- a/include/hw/ppc/pnv_psi.h
+++ b/include/hw/ppc/pnv_psi.h
@@ -70,6 +70,8 @@ struct Pnv9Psi {
  
  #define TYPE_PNV10_PSI TYPE_PNV_PSI "-POWER10"
  
+#define TYPE_PNV11_PSI TYPE_PNV_PSI "-POWER11"

+
  
  struct PnvPsiClass {

  SysBusDeviceClass parent_class;

Re: [PATCH v2 04/10] ppc/pnv: Add HOMER for POWER11

2024-04-26 Thread Cédric Le Goater


On 4/26/24 13:00, Aditya Gupta wrote:

Power11 core is same as Power10, declare PNV11_HOMER as a child
class of PNV10_HOMER, so it goes through same class init

Cc: Cédric Le Goater 
Cc: Frédéric Barrat 
Cc: Mahesh J Salgaonkar 
Cc: Madhavan Srinivasan 
Cc: Nicholas Piggin 
Signed-off-by: Aditya Gupta 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/ppc/pnv_homer.c | 8 
  include/hw/ppc/pnv_homer.h | 3 +++
  2 files changed, 11 insertions(+)

diff --git a/hw/ppc/pnv_homer.c b/hw/ppc/pnv_homer.c
index f9a203d11d0d..1b0123a6f2ea 100644
--- a/hw/ppc/pnv_homer.c
+++ b/hw/ppc/pnv_homer.c
@@ -394,6 +394,13 @@ static const TypeInfo pnv_homer_power10_type_info = {
  .parent= TYPE_PNV_HOMER,
  .instance_size = sizeof(PnvHomer),
  .class_init= pnv_homer_power10_class_init,
+.class_base_init = pnv_homer_power10_class_init,
+};
+
+static const TypeInfo pnv_homer_power11_type_info = {
+.name  = TYPE_PNV11_HOMER,
+.parent= TYPE_PNV10_HOMER,
+.instance_size = sizeof(PnvHomer),
  };
  
  static void pnv_homer_realize(DeviceState *dev, Error **errp)

@@ -442,6 +449,7 @@ static void pnv_homer_register_types(void)
  type_register_static(_homer_power8_type_info);
  type_register_static(_homer_power9_type_info);
  type_register_static(_homer_power10_type_info);
+type_register_static(_homer_power11_type_info);
  }
  
  type_init(pnv_homer_register_types);

diff --git a/include/hw/ppc/pnv_homer.h b/include/hw/ppc/pnv_homer.h
index b1c5d498dc55..8f1cc8135937 100644
--- a/include/hw/ppc/pnv_homer.h
+++ b/include/hw/ppc/pnv_homer.h
@@ -35,6 +35,9 @@ DECLARE_INSTANCE_CHECKER(PnvHomer, PNV9_HOMER,
  #define TYPE_PNV10_HOMER TYPE_PNV_HOMER "-POWER10"
  DECLARE_INSTANCE_CHECKER(PnvHomer, PNV10_HOMER,
   TYPE_PNV10_HOMER)
+#define TYPE_PNV11_HOMER TYPE_PNV_HOMER "-POWER11"
+DECLARE_INSTANCE_CHECKER(PnvHomer, PNV11_HOMER,
+ TYPE_PNV11_HOMER)
  
  struct PnvHomer {

  DeviceState parent;

Re: [PATCH v2 06/10] ppc/pnv: Add OCC for Power11

2024-04-26 Thread Cédric Le Goater


On 4/26/24 13:00, Aditya Gupta wrote:

Power11 core is same as Power10, reuse PNV10_OCC initialisation,
by declaring `PNV11_OCC` as child class of `PNV10_OCC`


Reviewed-by: Cédric Le Goater 

Thanks,

C.





Cc: Cédric Le Goater 
Cc: Frédéric Barrat 
Cc: Mahesh J Salgaonkar 
Cc: Madhavan Srinivasan 
Cc: Nicholas Piggin 
Signed-off-by: Aditya Gupta 
---
  hw/ppc/pnv_occ.c | 14 ++
  include/hw/ppc/pnv_occ.h |  2 ++
  2 files changed, 16 insertions(+)

diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c
index 48123ceae176..4f510419045e 100644
--- a/hw/ppc/pnv_occ.c
+++ b/hw/ppc/pnv_occ.c
@@ -262,6 +262,19 @@ static const TypeInfo pnv_occ_power10_type_info = {
  .class_init= pnv_occ_power10_class_init,
  };
  
+static void pnv_occ_power11_class_init(ObjectClass *klass, void *data)

+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+
+dc->desc = "PowerNV OCC Controller (POWER11)";
+}
+
+static const TypeInfo pnv_occ_power11_type_info = {
+.name  = TYPE_PNV11_OCC,
+.parent= TYPE_PNV10_OCC,
+.class_init= pnv_occ_power11_class_init,
+};
+
  static void pnv_occ_realize(DeviceState *dev, Error **errp)
  {
  PnvOCC *occ = PNV_OCC(dev);
@@ -305,6 +318,7 @@ static void pnv_occ_register_types(void)
  type_register_static(_occ_power8_type_info);
  type_register_static(_occ_power9_type_info);
  type_register_static(_occ_power10_type_info);
+type_register_static(_occ_power11_type_info);
  }
  
  type_init(pnv_occ_register_types);

diff --git a/include/hw/ppc/pnv_occ.h b/include/hw/ppc/pnv_occ.h
index df321244e3b1..7b5e28f13b4f 100644
--- a/include/hw/ppc/pnv_occ.h
+++ b/include/hw/ppc/pnv_occ.h
@@ -34,6 +34,8 @@ DECLARE_INSTANCE_CHECKER(PnvOCC, PNV9_OCC,
   TYPE_PNV9_OCC)
  #define TYPE_PNV10_OCC TYPE_PNV_OCC "-POWER10"
  DECLARE_INSTANCE_CHECKER(PnvOCC, PNV10_OCC, TYPE_PNV10_OCC)
+#define TYPE_PNV11_OCC TYPE_PNV_OCC "-POWER11"
+DECLARE_INSTANCE_CHECKER(PnvOCC, PNV11_OCC, TYPE_PNV11_OCC)
  
  #define PNV_OCC_SENSOR_DATA_BLOCK_OFFSET 0x0058

  #define PNV_OCC_SENSOR_DATA_BLOCK_SIZE   0x00025800

Re: [PATCH v2 09/10] ppc: Make Power11 as default cpu type for 'pseries' and 'powernv'

2024-04-26 Thread Cédric Le Goater


On 4/26/24 13:00, Aditya Gupta wrote:

Make Power11 as default cpu type for 'pseries' and 'powernv' machine type,
with Power11 being the newest supported Power processor in QEMU.


This is too early. We should merge Power11 support first, possibly in 9.1,
and then change default in a future release, 9.2, 10.0

Thanks,

C.





Cc: Cédric Le Goater 
Cc: Daniel Henrique Barboza 
Cc: David Gibson 
Cc: Frédéric Barrat 
Cc: Harsh Prateek Bora 
Cc: Mahesh J Salgaonkar 
Cc: Madhavan Srinivasan 
Cc: Nicholas Piggin 
Signed-off-by: Aditya Gupta 
---
  hw/ppc/pnv.c   | 4 ++--
  hw/ppc/spapr.c | 2 +-
  2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 06e272f3bdd3..0c5a6bc424af 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -2531,8 +2531,6 @@ static void pnv_machine_p10_common_class_init(ObjectClass 
*oc, void *data)
  mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power10_v2.0");
  compat_props_add(mc->compat_props, phb_compat, G_N_ELEMENTS(phb_compat));
  
-mc->alias = "powernv";

-
  pmc->compat = compat;
  pmc->compat_size = sizeof(compat);
  pmc->dt_power_mgt = pnv_dt_power_mgt;
@@ -2569,6 +2567,8 @@ static void pnv_machine_power11_class_init(ObjectClass 
*oc, void *data)
  /* do power10_class_init as p11 core is same as p10 */
  pnv_machine_p10_common_class_init(oc, data);
  
+mc->alias = "powernv";

+
  mc->desc = "IBM PowerNV (Non-Virtualized) POWER11";
  mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power11");
  
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c

index d2d1e310a3be..1c3e2da8e9e4 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4698,7 +4698,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
  
  smc->dr_lmb_enabled = true;

  smc->update_dt_enabled = true;
-mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power10_v2.0");
+mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power11");
  mc->has_hotpluggable_cpus = true;
  mc->nvdimm_supported = true;
  smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED;

Re: [PATCH v2 01/10] ppc/pseries: Add Power11 cpu type

2024-04-26 Thread Cédric Le Goater


Hello Aditya

On 4/26/24 13:00, Aditya Gupta wrote:

Add base support for "--cpu power11" in QEMU.

Power11 core is same as Power10, hence reuse functions defined for
Power10.


Power11 uses the same ISA it seems. What's the value then ?



Cc: Cédric Le Goater 
Cc: Daniel Henrique Barboza 
Cc: David Gibson 
Cc: Harsh Prateek Bora 
Cc: Mahesh J Salgaonkar 
Cc: Madhavan Srinivasan 
Cc: Nicholas Piggin 
Signed-off-by: Aditya Gupta 
---
  docs/system/ppc/pseries.rst |  6 +--
  hw/ppc/spapr_cpu_core.c |  1 +



I would separate the CPU target code adding support for a new POWER
Processor from the machine code (pseries).



  target/ppc/compat.c |  7 +++
  target/ppc/cpu-models.c |  2 +
  target/ppc/cpu-models.h |  2 +
  target/ppc/cpu_init.c   | 99 +
  6 files changed, 114 insertions(+), 3 deletions(-)

diff --git a/docs/system/ppc/pseries.rst b/docs/system/ppc/pseries.rst
index a876d897b6e4..3277564b34c2 100644
--- a/docs/system/ppc/pseries.rst
+++ b/docs/system/ppc/pseries.rst
@@ -15,9 +15,9 @@ Supported devices
  =
  
   * Multi processor support for many Power processors generations: POWER7,

-   POWER7+, POWER8, POWER8NVL, POWER9, and Power10. Support for POWER5+ exists,
-   but its state is unknown.
- * Interrupt Controller, XICS (POWER8) and XIVE (POWER9 and Power10)
+   POWER7+, POWER8, POWER8NVL, POWER9, Power10 and Power11. Support for POWER5+
+   exists, but its state is unknown.


The POWER5+ pseries machine seems functionnal with SLOF
(Sep 18 2023 18:57:48) and Linux 6.6.3 under TCG. May be worth
to mention (for AIX users) in another patch.


+ * Interrupt Controller, XICS (POWER8) and XIVE (POWER9, Power10, Power11)
   * vPHB PCIe Host bridge.
   * vscsi and vnet devices, compatible with the same devices available on a
 PowerVM hypervisor with VIOS managing LPARs.
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index e7c9edd033c8..c6e85c031178 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -401,6 +401,7 @@ static const TypeInfo spapr_cpu_core_type_infos[] = {
  DEFINE_SPAPR_CPU_CORE_TYPE("power9_v2.0"),
  DEFINE_SPAPR_CPU_CORE_TYPE("power9_v2.2"),
  DEFINE_SPAPR_CPU_CORE_TYPE("power10_v2.0"),
+DEFINE_SPAPR_CPU_CORE_TYPE("power11"),
  #ifdef CONFIG_KVM
  DEFINE_SPAPR_CPU_CORE_TYPE("host"),
  #endif
diff --git a/target/ppc/compat.c b/target/ppc/compat.c
index ebef2cccecf3..12dd8ae290ca 100644
--- a/target/ppc/compat.c
+++ b/target/ppc/compat.c
@@ -100,6 +100,13 @@ static const CompatInfo compat_table[] = {
  .pcr_level = PCR_COMPAT_3_10,
  .max_vthreads = 8,
  },
+{ /* POWER11, ISA3.10 */
+.name = "power11",
+.pvr = CPU_POWERPC_LOGICAL_3_10_PLUS,
+.pcr = PCR_COMPAT_3_10,
+.pcr_level = PCR_COMPAT_3_10,
+.max_vthreads = 8,
+},
  };
  
  static const CompatInfo *compat_by_pvr(uint32_t pvr)

diff --git a/target/ppc/cpu-models.c b/target/ppc/cpu-models.c
index f2301b43f78b..1870e69b63df 100644
--- a/target/ppc/cpu-models.c
+++ b/target/ppc/cpu-models.c
@@ -734,6 +734,8 @@
  "POWER9 v2.2")
  POWERPC_DEF("power10_v2.0",  CPU_POWERPC_POWER10_DD20,   POWER10,
  "POWER10 v2.0")
+POWERPC_DEF("power11",  CPU_POWERPC_POWER11,   POWER11,
+"POWER11")
  #endif /* defined (TARGET_PPC64) */
  
  /***/

diff --git a/target/ppc/cpu-models.h b/target/ppc/cpu-models.h
index 0229ef3a9a5c..a1b540c3aa9e 100644
--- a/target/ppc/cpu-models.h
+++ b/target/ppc/cpu-models.h
@@ -354,6 +354,7 @@ enum {
  CPU_POWERPC_POWER10_BASE   = 0x0080,
  CPU_POWERPC_POWER10_DD1= 0x00801100,
  CPU_POWERPC_POWER10_DD20   = 0x00801200,
+CPU_POWERPC_POWER11= 0x00821200,


is that a DD2.2 PVR ? If so, It should be mentionned in the definition.



Thanks,

C.




  CPU_POWERPC_970_v22= 0x00390202,
  CPU_POWERPC_970FX_v10  = 0x00391100,
  CPU_POWERPC_970FX_v20  = 0x003C0200,
@@ -391,6 +392,7 @@ enum {
  CPU_POWERPC_LOGICAL_2_07   = 0x0F04,
  CPU_POWERPC_LOGICAL_3_00   = 0x0F05,
  CPU_POWERPC_LOGICAL_3_10   = 0x0F06,
+CPU_POWERPC_LOGICAL_3_10_PLUS  = 0x0F07,
  };
  
  /* System version register (used on MPC 8xxx)*/

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 6d82f24c8756..17c159c8187b 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6655,6 +6655,105 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
  pcc->l1_icache_size = 0x8000;
  }
  
+static bool ppc_pvr_match_power11(PowerPCCPUClass *pcc, uint32_t pvr, bool best)

+{
+uint32_t base = pvr & CPU_POWERPC_POWER_S

Re: [PATCH v2 02/10] ppc/pnv: Introduce 'PnvChipClass::chip_type'

2024-04-26 Thread Cédric Le Goater


On 4/26/24 13:00, Aditya Gupta wrote:

Introduce 'PnvChipClass::chip_type' to easily get which Power chip is
it.
This helps generalise similar codes such as *_dt_populate, and removes
duplication of code between Power11 and Power10 changes in following
patches.

Cc: Cédric Le Goater 
Cc: Frédéric Barrat 
Cc: Mahesh J Salgaonkar 
Cc: Madhavan Srinivasan 
Cc: Nicholas Piggin 
Signed-off-by: Aditya Gupta 
---
  hw/ppc/pnv.c  |  5 +
  include/hw/ppc/pnv_chip.h | 10 ++
  2 files changed, 15 insertions(+)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 6e3a5ccdec76..74e7908e5ffb 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1457,6 +1457,7 @@ static void pnv_chip_power8e_class_init(ObjectClass 
*klass, void *data)
  PnvChipClass *k = PNV_CHIP_CLASS(klass);
  
  k->chip_cfam_id = 0x221ef0498000ull;  /* P8 Murano DD2.1 */

+k->chip_type = PNV_TYPE_POWER8E;
  k->cores_mask = POWER8E_CORE_MASK;
  k->num_phbs = 3;
  k->chip_pir = pnv_chip_pir_p8;
@@ -1481,6 +1482,7 @@ static void pnv_chip_power8_class_init(ObjectClass 
*klass, void *data)
  PnvChipClass *k = PNV_CHIP_CLASS(klass);
  
  k->chip_cfam_id = 0x220ea0498000ull; /* P8 Venice DD2.0 */

+k->chip_type = PNV_TYPE_POWER8;
  k->cores_mask = POWER8_CORE_MASK;
  k->num_phbs = 3;
  k->chip_pir = pnv_chip_pir_p8;
@@ -1505,6 +1507,7 @@ static void pnv_chip_power8nvl_class_init(ObjectClass 
*klass, void *data)
  PnvChipClass *k = PNV_CHIP_CLASS(klass);
  
  k->chip_cfam_id = 0x120d30498000ull;  /* P8 Naples DD1.0 */

+k->chip_type = PNV_TYPE_POWER8NVL;
  k->cores_mask = POWER8_CORE_MASK;
  k->num_phbs = 4;
  k->chip_pir = pnv_chip_pir_p8;
@@ -1779,6 +1782,7 @@ static void pnv_chip_power9_class_init(ObjectClass 
*klass, void *data)
  static const int i2c_ports_per_engine[PNV9_CHIP_MAX_I2C] = {2, 13, 2, 2};
  
  k->chip_cfam_id = 0x220d10498000ull; /* P9 Nimbus DD2.0 */

+k->chip_type = PNV_TYPE_POWER9;
  k->cores_mask = POWER9_CORE_MASK;
  k->chip_pir = pnv_chip_pir_p9;
  k->intc_create = pnv_chip_power9_intc_create;
@@ -2091,6 +2095,7 @@ static void pnv_chip_power10_class_init(ObjectClass 
*klass, void *data)
  static const int i2c_ports_per_engine[PNV10_CHIP_MAX_I2C] = {14, 14, 2, 
16};
  
  k->chip_cfam_id = 0x120da0498000ull; /* P10 DD1.0 (with NX) */

+k->chip_type = PNV_TYPE_POWER10;
  k->cores_mask = POWER10_CORE_MASK;
  k->chip_pir = pnv_chip_pir_p10;
  k->intc_create = pnv_chip_power10_intc_create;
diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h
index 8589f3291ed3..ebfe82b89537 100644
--- a/include/hw/ppc/pnv_chip.h
+++ b/include/hw/ppc/pnv_chip.h
@@ -17,12 +17,21 @@
  OBJECT_DECLARE_TYPE(PnvChip, PnvChipClass,
  PNV_CHIP)
  
+typedef enum PnvChipType {

+PNV_TYPE_POWER8E, /* AKA Murano (default) */
+PNV_TYPE_POWER8,  /* AKA Venice */
+PNV_TYPE_POWER8NVL,   /* AKA Naples */
+PNV_TYPE_POWER9,  /* AKA Nimbus */
+PNV_TYPE_POWER10,
+} PnvChipType;


Nope.


+
  struct PnvChip {
  /*< private >*/
  SysBusDevice parent_obj;
  
  /*< public >*/

  uint32_t chip_id;
+
  uint64_t ram_start;
  uint64_t ram_size;
  
@@ -137,6 +146,7 @@ struct PnvChipClass {

  SysBusDeviceClass parent_class;
  
  /*< public >*/

+PnvChipType  chip_type;
  uint64_t chip_cfam_id;
  uint64_t cores_mask;
  uint32_t num_pecs;


Adding an enum type under PnvChipClass which is a type already
looks wrong. Please find another way. It is possible I am sure.

Thanks,

C.

Re: [PATCH v2 4/4] vfio/ccw: Make vfio_ccw_register_irq_notifier() return a bool

2024-04-25 Thread Cédric Le Goater


On 4/25/24 14:55, Eric Farman wrote:

On Thu, 2024-04-25 at 12:56 +0200, Markus Armbruster wrote:

Cédric Le Goater  writes:


Since vfio_ccw_register_irq_notifier() takes an 'Error **'
argument,
best practices suggest to return a bool. See the qapi/error.h Rules
section.

Signed-off-by: Cédric Le Goater 
---
  hw/vfio/ccw.c | 22 +++---
  1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index
6764388bc47a970329fce2233626ccb8178e0165..1c630f6e9abe93ae0c2b5615d
4409669f096c8c9 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -379,7 +379,7 @@ read_err:
  css_inject_io_interrupt(sch);
  }
  
-static void vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev,

+static bool vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev,
     unsigned int irq,
     Error **errp)
  {
@@ -405,13 +405,13 @@ static void
vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev,
  break;
  default:
  error_setg(errp, "vfio: Unsupported device irq(%d)", irq);
-    return;
+    return false;
  }
  
  if (vdev->num_irqs < irq + 1) {

  error_setg(errp, "vfio: IRQ %u not available (number of
irqs %u)",
     irq, vdev->num_irqs);
-    return;
+    return false;
  }
  
  argsz = sizeof(*irq_info);

@@ -421,14 +421,14 @@ static void
vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev,
  if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO,
    irq_info) < 0 || irq_info->count < 1) {
  error_setg_errno(errp, errno, "vfio: Error getting irq
info");
-    return;
+    return false;
  }
  
  if (event_notifier_init(notifier, 0)) {

  error_setg_errno(errp, errno,
   "vfio: Unable to init event notifier for
irq (%d)",
   irq);
-    return;
+    return false;
  }
  
  fd = event_notifier_get_fd(notifier);

@@ -439,6 +439,8 @@ static void
vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev,
  qemu_set_fd_handler(fd, NULL, NULL, vcdev);
  event_notifier_cleanup(notifier);
  }
+
+    return true;
  }
  
  static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev,

@@ -602,20 +604,18 @@ static void vfio_ccw_realize(DeviceState
*dev, Error **errp)
  goto out_region_err;
  }
  
-    vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX,

);
-    if (err) {
+    if (!vfio_ccw_register_irq_notifier(vcdev,
VFIO_CCW_IO_IRQ_INDEX, )) {


Please pass errp instead of 


  goto out_io_notifier_err;
  }
  
  if (vcdev->crw_region) {

-    vfio_ccw_register_irq_notifier(vcdev,
VFIO_CCW_CRW_IRQ_INDEX, );
-    if (err) {
+    if (!vfio_ccw_register_irq_notifier(vcdev,
VFIO_CCW_CRW_IRQ_INDEX,
+    )) {


Likewise.


  goto out_irq_notifier_err;
  }
  }
  
-    vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_REQ_IRQ_INDEX,

);
-    if (err) {
+    if (!vfio_ccw_register_irq_notifier(vcdev,
VFIO_CCW_REQ_IRQ_INDEX, )) {
  /*
   * Report this error, but do not make it a failing
condition.
   * Lack of this IRQ in the host does not prevent normal
operation.

     */
    error_report_err(err);

Not this patch's problem, but here goes anyway: since this isn't an
error, we shouldn't use error_report_err().  Would warn_report_err()
be
appropriate?  info_report_err() doesn't exist, but it could.

Preferably with errp instead of  (two times):
Reviewed-by: Markus Armbruster 



Don't recall why I used error_report_err() instead of something else
(or creating info_), but probably just familiarity. There's no need for
it (or the equivalent code in -ap) to be error, and could be another
cleanup.


yes. I will send an extra cleanup to replace error_... with warn_...
and another one to use errp.

Thanks,

C.




Reviewed-by: Eric Farman

Re: [PATCH v2 3/5] intel_iommu: Add a framework to do compatibility check with host IOMMU cap/ecap

2024-04-25 Thread Cédric Le Goater


On 4/25/24 10:46, Duan, Zhenzhong wrote:

Hi Cédric,


-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 3/5] intel_iommu: Add a framework to do
compatibility check with host IOMMU cap/ecap

Hello Zhenzhong,

On 4/18/24 10:42, Duan, Zhenzhong wrote:

Hi Cédric,


-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 3/5] intel_iommu: Add a framework to do
compatibility check with host IOMMU cap/ecap

Hello Zhenzhong

On 4/17/24 11:24, Duan, Zhenzhong wrote:




-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 3/5] intel_iommu: Add a framework to do
compatibility check with host IOMMU cap/ecap

On 4/17/24 06:21, Duan, Zhenzhong wrote:




-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 3/5] intel_iommu: Add a framework to do
compatibility check with host IOMMU cap/ecap

Hello,

On 4/16/24 09:09, Duan, Zhenzhong wrote:

Hi Cédric,


-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 3/5] intel_iommu: Add a framework to do
compatibility check with host IOMMU cap/ecap

On 4/8/24 10:44, Zhenzhong Duan wrote:

From: Yi Liu 

If check fails, the host side device(either vfio or vdpa device)

should

not

be passed to guest.

Implementation details for different backends will be in

following

patches.


Signed-off-by: Yi Liu 
Signed-off-by: Yi Sun 
Signed-off-by: Zhenzhong Duan 
---
   hw/i386/intel_iommu.c | 35

+++

   1 file changed, 35 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4f84e2e801..a49b587c73 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -35,6 +35,7 @@
   #include "sysemu/kvm.h"
   #include "sysemu/dma.h"
   #include "sysemu/sysemu.h"
+#include "sysemu/iommufd.h"
   #include "hw/i386/apic_internal.h"
   #include "kvm/kvm_i386.h"
   #include "migration/vmstate.h"
@@ -3819,6 +3820,32 @@ VTDAddressSpace

*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,

   return vtd_dev_as;
   }

+static int vtd_check_legacy_hdev(IntelIOMMUState *s,
+ HostIOMMUDevice *hiod,
+ Error **errp)
+{
+return 0;
+}
+
+static int vtd_check_iommufd_hdev(IntelIOMMUState *s,
+  HostIOMMUDevice *hiod,
+  Error **errp)
+{
+return 0;
+}
+
+static int vtd_check_hdev(IntelIOMMUState *s,

VTDHostIOMMUDevice

*vtd_hdev,

+  Error **errp)
+{
+HostIOMMUDevice *hiod = vtd_hdev->dev;
+
+if (object_dynamic_cast(OBJECT(hiod),

TYPE_HIOD_IOMMUFD))

{

+return vtd_check_iommufd_hdev(s, hiod, errp);
+}
+
+return vtd_check_legacy_hdev(s, hiod, errp);
+}



I think we should be using the .get_host_iommu_info() class

handler

instead. Can we refactor the code slightly to avoid this check on
the type ?


There is some difficulty ini avoiding this check, the behavior of

vtd_check_legacy_hdev

and vtd_check_iommufd_hdev are different especially after

nesting

support introduced.

vtd_check_iommufd_hdev() has much wider check over cap/ecap

bits

besides aw_bits.

I think it is important to fully separate the vIOMMU model from the
host IOMMU backing device.


This comment is true for the structures also.


Could we introduce a new HostIOMMUDeviceClass
handler .check_hdev() handler, which would

call .get_host_iommu_info() ?

This means that HIOD_LEGACY_INFO and HIOD_IOMMUFD_INFO should

be

a common structure 'HostIOMMUDeviceInfo' holding all attributes
for the different backends. Each .get_host_iommu_info() implementation
would translate the specific host iommu device data presentation
into the common 'HostIOMMUDeviceInfo', this is true for host_aw_bits.


I see, it's just not easy to define the unified elements in

HostIOMMUDeviceInfo

so that they maps to bits or fields in host return IOMMU info.


The proposal is adding a vIOMMU <-> HostIOMMUDevice interface and a
new
API needs to be completely defined for it. The IOMMU backend
implementation
could be anything, legacy, iommufd, iommufd v2, some other framework
and
the vIOMMU shouldn't be aware of its implementation.

Exposing the kernel structures as done below should be avoided because
they are part of the QEMU <-> kernel IOMMUFD interface.



Different platform returned host IOMMU info is platform specific.
For vtd and siommu:

struct iommu_hw_info_vtd {
  __u32 flags;
  __u32 __reserved;
  __aligned_u64 cap_reg;
  __aligned_u64 ecap_reg;
};

struct iommu_hw_info_arm_smmuv3 {
 __u32 flags;
 __u32 __reserved;
 __u32 idr[6];
 __u32 iidr;
 __u32 aidr;
};

I can think of two kinds of declaration of HostIOMMUDeviceInfo:

struct HostIOMMUDeviceInfo {
  uint8_t aw_bits;
  enum iommu_hw_info_type type;
  unio

[PATCH v2 3/4] vfio/ccw: Use g_autofree variable in vfio_ccw_register_irq_notifier()

2024-04-25 Thread Cédric Le Goater

Signed-off-by: Cédric Le Goater 
---
 hw/vfio/ccw.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 
90e4a534371684c08e112364e1537eb8979f73f4..6764388bc47a970329fce2233626ccb8178e0165
 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -384,7 +384,7 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice 
*vcdev,
Error **errp)
 {
 VFIODevice *vdev = >vdev;
-struct vfio_irq_info *irq_info;
+g_autofree struct vfio_irq_info *irq_info = NULL;
 size_t argsz;
 int fd;
 EventNotifier *notifier;
@@ -421,14 +421,14 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice 
*vcdev,
 if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO,
   irq_info) < 0 || irq_info->count < 1) {
 error_setg_errno(errp, errno, "vfio: Error getting irq info");
-goto out_free_info;
+return;
 }
 
 if (event_notifier_init(notifier, 0)) {
 error_setg_errno(errp, errno,
  "vfio: Unable to init event notifier for irq (%d)",
  irq);
-goto out_free_info;
+return;
 }
 
 fd = event_notifier_get_fd(notifier);
@@ -439,9 +439,6 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice 
*vcdev,
 qemu_set_fd_handler(fd, NULL, NULL, vcdev);
 event_notifier_cleanup(notifier);
 }
-
-out_free_info:
-g_free(irq_info);
 }
 
 static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev,
-- 
2.44.0

[PATCH v2 1/4] vfio/ap: Use g_autofree variable in vfio_ap_register_irq_notifier()

2024-04-25 Thread Cédric Le Goater

Signed-off-by: Cédric Le Goater 
---
 hw/vfio/ap.c | 10 +++---
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 
7c4caa5938636937680fec87e999249ac84a4498..03f8ffaa5e2bf13cf8daa2f44aa4cf17809abd94
 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -77,7 +77,7 @@ static void vfio_ap_register_irq_notifier(VFIOAPDevice 
*vapdev,
 size_t argsz;
 IOHandler *fd_read;
 EventNotifier *notifier;
-struct vfio_irq_info *irq_info;
+g_autofree struct vfio_irq_info *irq_info = NULL;
 VFIODevice *vdev = >vdev;
 
 switch (irq) {
@@ -104,14 +104,14 @@ static void vfio_ap_register_irq_notifier(VFIOAPDevice 
*vapdev,
 if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO,
   irq_info) < 0 || irq_info->count < 1) {
 error_setg_errno(errp, errno, "vfio: Error getting irq info");
-goto out_free_info;
+return;
 }
 
 if (event_notifier_init(notifier, 0)) {
 error_setg_errno(errp, errno,
  "vfio: Unable to init event notifier for irq (%d)",
  irq);
-goto out_free_info;
+return;
 }
 
 fd = event_notifier_get_fd(notifier);
@@ -122,10 +122,6 @@ static void vfio_ap_register_irq_notifier(VFIOAPDevice 
*vapdev,
 qemu_set_fd_handler(fd, NULL, NULL, vapdev);
 event_notifier_cleanup(notifier);
 }
-
-out_free_info:
-g_free(irq_info);
-
 }
 
 static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev,
-- 
2.44.0

[PATCH v2 2/4] vfio/ap: Make vfio_ap_register_irq_notifier() return a bool

2024-04-25 Thread Cédric Le Goater

Since vfio_ap_register_irq_notifier() takes and 'Error **' argument,
best practices suggest to return a bool. See the qapi/error.h Rules
section.

Signed-off-by: Cédric Le Goater 
---
 hw/vfio/ap.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 
03f8ffaa5e2bf13cf8daa2f44aa4cf17809abd94..8bb024e2fde4a1d72346dee4b662d762374326b9
 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -70,7 +70,7 @@ static void vfio_ap_req_notifier_handler(void *opaque)
 }
 }
 
-static void vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev,
+static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev,
   unsigned int irq, Error **errp)
 {
 int fd;
@@ -87,13 +87,13 @@ static void vfio_ap_register_irq_notifier(VFIOAPDevice 
*vapdev,
 break;
 default:
 error_setg(errp, "vfio: Unsupported device irq(%d)", irq);
-return;
+return false;
 }
 
 if (vdev->num_irqs < irq + 1) {
 error_setg(errp, "vfio: IRQ %u not available (number of irqs %u)",
irq, vdev->num_irqs);
-return;
+return false;
 }
 
 argsz = sizeof(*irq_info);
@@ -104,14 +104,14 @@ static void vfio_ap_register_irq_notifier(VFIOAPDevice 
*vapdev,
 if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO,
   irq_info) < 0 || irq_info->count < 1) {
 error_setg_errno(errp, errno, "vfio: Error getting irq info");
-return;
+return false;
 }
 
 if (event_notifier_init(notifier, 0)) {
 error_setg_errno(errp, errno,
  "vfio: Unable to init event notifier for irq (%d)",
  irq);
-return;
+return false;
 }
 
 fd = event_notifier_get_fd(notifier);
@@ -122,6 +122,8 @@ static void vfio_ap_register_irq_notifier(VFIOAPDevice 
*vapdev,
 qemu_set_fd_handler(fd, NULL, NULL, vapdev);
 event_notifier_cleanup(notifier);
 }
+
+return true;
 }
 
 static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev,
@@ -167,8 +169,7 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
 goto error;
 }
 
-vfio_ap_register_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX, );
-if (err) {
+if (!vfio_ap_register_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX, )) {
 /*
  * Report this error, but do not make it a failing condition.
  * Lack of this IRQ in the host does not prevent normal operation.
-- 
2.44.0

[PATCH v2 4/4] vfio/ccw: Make vfio_ccw_register_irq_notifier() return a bool

2024-04-25 Thread Cédric Le Goater

Since vfio_ccw_register_irq_notifier() takes an 'Error **' argument,
best practices suggest to return a bool. See the qapi/error.h Rules
section.

Signed-off-by: Cédric Le Goater 
---
 hw/vfio/ccw.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 
6764388bc47a970329fce2233626ccb8178e0165..1c630f6e9abe93ae0c2b5615d4409669f096c8c9
 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -379,7 +379,7 @@ read_err:
 css_inject_io_interrupt(sch);
 }
 
-static void vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev,
+static bool vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev,
unsigned int irq,
Error **errp)
 {
@@ -405,13 +405,13 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice 
*vcdev,
 break;
 default:
 error_setg(errp, "vfio: Unsupported device irq(%d)", irq);
-return;
+return false;
 }
 
 if (vdev->num_irqs < irq + 1) {
 error_setg(errp, "vfio: IRQ %u not available (number of irqs %u)",
irq, vdev->num_irqs);
-return;
+return false;
 }
 
 argsz = sizeof(*irq_info);
@@ -421,14 +421,14 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice 
*vcdev,
 if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO,
   irq_info) < 0 || irq_info->count < 1) {
 error_setg_errno(errp, errno, "vfio: Error getting irq info");
-return;
+return false;
 }
 
 if (event_notifier_init(notifier, 0)) {
 error_setg_errno(errp, errno,
  "vfio: Unable to init event notifier for irq (%d)",
  irq);
-return;
+return false;
 }
 
 fd = event_notifier_get_fd(notifier);
@@ -439,6 +439,8 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice 
*vcdev,
 qemu_set_fd_handler(fd, NULL, NULL, vcdev);
 event_notifier_cleanup(notifier);
 }
+
+return true;
 }
 
 static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev,
@@ -602,20 +604,18 @@ static void vfio_ccw_realize(DeviceState *dev, Error 
**errp)
 goto out_region_err;
 }
 
-vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, );
-if (err) {
+if (!vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, )) {
 goto out_io_notifier_err;
 }
 
 if (vcdev->crw_region) {
-vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX, );
-if (err) {
+if (!vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX,
+)) {
 goto out_irq_notifier_err;
 }
 }
 
-vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_REQ_IRQ_INDEX, );
-if (err) {
+if (!vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_REQ_IRQ_INDEX, )) {
 /*
  * Report this error, but do not make it a failing condition.
  * Lack of this IRQ in the host does not prevent normal operation.
-- 
2.44.0

[PATCH] vfio/ccw: Use g_autofree variable

2024-04-24 Thread Cédric Le Goater

Also change the return value of vfio_ccw_register_irq_notifier() to be
a bool since it takes and 'Error **' argument. See the qapi/error.h
Rules section.

Signed-off-by: Cédric Le Goater 
---
 hw/vfio/ccw.c | 25 +++--
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 
90e4a534371684c08e112364e1537eb8979f73f4..1c630f6e9abe93ae0c2b5615d4409669f096c8c9
 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -379,12 +379,12 @@ read_err:
 css_inject_io_interrupt(sch);
 }
 
-static void vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev,
+static bool vfio_ccw_register_irq_notifier(VFIOCCWDevice *vcdev,
unsigned int irq,
Error **errp)
 {
 VFIODevice *vdev = >vdev;
-struct vfio_irq_info *irq_info;
+g_autofree struct vfio_irq_info *irq_info = NULL;
 size_t argsz;
 int fd;
 EventNotifier *notifier;
@@ -405,13 +405,13 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice 
*vcdev,
 break;
 default:
 error_setg(errp, "vfio: Unsupported device irq(%d)", irq);
-return;
+return false;
 }
 
 if (vdev->num_irqs < irq + 1) {
 error_setg(errp, "vfio: IRQ %u not available (number of irqs %u)",
irq, vdev->num_irqs);
-return;
+return false;
 }
 
 argsz = sizeof(*irq_info);
@@ -421,14 +421,14 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice 
*vcdev,
 if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO,
   irq_info) < 0 || irq_info->count < 1) {
 error_setg_errno(errp, errno, "vfio: Error getting irq info");
-goto out_free_info;
+return false;
 }
 
 if (event_notifier_init(notifier, 0)) {
 error_setg_errno(errp, errno,
  "vfio: Unable to init event notifier for irq (%d)",
  irq);
-goto out_free_info;
+return false;
 }
 
 fd = event_notifier_get_fd(notifier);
@@ -440,8 +440,7 @@ static void vfio_ccw_register_irq_notifier(VFIOCCWDevice 
*vcdev,
 event_notifier_cleanup(notifier);
 }
 
-out_free_info:
-g_free(irq_info);
+return true;
 }
 
 static void vfio_ccw_unregister_irq_notifier(VFIOCCWDevice *vcdev,
@@ -605,20 +604,18 @@ static void vfio_ccw_realize(DeviceState *dev, Error 
**errp)
 goto out_region_err;
 }
 
-vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, );
-if (err) {
+if (!vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_IO_IRQ_INDEX, )) {
 goto out_io_notifier_err;
 }
 
 if (vcdev->crw_region) {
-vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX, );
-if (err) {
+if (!vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_CRW_IRQ_INDEX,
+)) {
 goto out_irq_notifier_err;
 }
 }
 
-vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_REQ_IRQ_INDEX, );
-if (err) {
+if (!vfio_ccw_register_irq_notifier(vcdev, VFIO_CCW_REQ_IRQ_INDEX, )) {
 /*
  * Report this error, but do not make it a failing condition.
  * Lack of this IRQ in the host does not prevent normal operation.
-- 
2.44.0

[PATCH] vfio/ap: Use g_autofree variable

2024-04-24 Thread Cédric Le Goater

Also change the return value of vfio_ap_register_irq_notifier() to be
a bool since it takes and 'Error **' argument. See the qapi/error.h
Rules section.

Signed-off-by: Cédric Le Goater 
---
 hw/vfio/ap.c | 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 
7c4caa5938636937680fec87e999249ac84a4498..8bb024e2fde4a1d72346dee4b662d762374326b9
 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -70,14 +70,14 @@ static void vfio_ap_req_notifier_handler(void *opaque)
 }
 }
 
-static void vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev,
+static bool vfio_ap_register_irq_notifier(VFIOAPDevice *vapdev,
   unsigned int irq, Error **errp)
 {
 int fd;
 size_t argsz;
 IOHandler *fd_read;
 EventNotifier *notifier;
-struct vfio_irq_info *irq_info;
+g_autofree struct vfio_irq_info *irq_info = NULL;
 VFIODevice *vdev = >vdev;
 
 switch (irq) {
@@ -87,13 +87,13 @@ static void vfio_ap_register_irq_notifier(VFIOAPDevice 
*vapdev,
 break;
 default:
 error_setg(errp, "vfio: Unsupported device irq(%d)", irq);
-return;
+return false;
 }
 
 if (vdev->num_irqs < irq + 1) {
 error_setg(errp, "vfio: IRQ %u not available (number of irqs %u)",
irq, vdev->num_irqs);
-return;
+return false;
 }
 
 argsz = sizeof(*irq_info);
@@ -104,14 +104,14 @@ static void vfio_ap_register_irq_notifier(VFIOAPDevice 
*vapdev,
 if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO,
   irq_info) < 0 || irq_info->count < 1) {
 error_setg_errno(errp, errno, "vfio: Error getting irq info");
-goto out_free_info;
+return false;
 }
 
 if (event_notifier_init(notifier, 0)) {
 error_setg_errno(errp, errno,
  "vfio: Unable to init event notifier for irq (%d)",
  irq);
-goto out_free_info;
+return false;
 }
 
 fd = event_notifier_get_fd(notifier);
@@ -123,9 +123,7 @@ static void vfio_ap_register_irq_notifier(VFIOAPDevice 
*vapdev,
 event_notifier_cleanup(notifier);
 }
 
-out_free_info:
-g_free(irq_info);
-
+return true;
 }
 
 static void vfio_ap_unregister_irq_notifier(VFIOAPDevice *vapdev,
@@ -171,8 +169,7 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
 goto error;
 }
 
-vfio_ap_register_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX, );
-if (err) {
+if (!vfio_ap_register_irq_notifier(vapdev, VFIO_AP_REQ_IRQ_INDEX, )) {
 /*
  * Report this error, but do not make it a failing condition.
  * Lack of this IRQ in the host does not prevent normal operation.
-- 
2.44.0

[PATCH] ppc/pnv: Introduce pnv_chip_foreach_cpu()

2024-04-24 Thread Cédric Le Goater

This helper routine uses the machine definition, sockets, cores and
threads, to loop on all CPUs of the machine. Replace CPU_FOREACH()
with it.

Signed-off-by: Cédric Le Goater 
---
 hw/ppc/pnv.c | 48 
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 
6e3a5ccdec764c8f6cbd076e27f59c7082e64876..5f400ed127921c4c3a45bc54863b2cafa53e7030
 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -2264,6 +2264,21 @@ PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t 
pir)
 return NULL;
 }
 
+static void pnv_chip_foreach_cpu(PnvChip *chip,
+   void (*fn)(PnvChip *chip, PowerPCCPU *cpu, void *opaque),
+   void *opaque)
+{
+int i, j;
+
+for (i = 0; i < chip->nr_cores; i++) {
+PnvCore *pc = chip->cores[i];
+
+for (j = 0; j < CPU_CORE(pc)->nr_threads; j++) {
+fn(chip, pc->threads[j], opaque);
+}
+}
+}
+
 static ICSState *pnv_ics_get(XICSFabric *xi, int irq)
 {
 PnvMachineState *pnv = PNV_MACHINE(xi);
@@ -2332,23 +2347,26 @@ static ICPState *pnv_icp_get(XICSFabric *xi, int pir)
 return cpu ? ICP(pnv_cpu_state(cpu)->intc) : NULL;
 }
 
+static void pnv_pic_intc_print_info(PnvChip *chip, PowerPCCPU *cpu,
+void *opaque)
+{
+PNV_CHIP_GET_CLASS(chip)->intc_print_info(chip, cpu, opaque);
+}
+
 static void pnv_pic_print_info(InterruptStatsProvider *obj,
Monitor *mon)
 {
 PnvMachineState *pnv = PNV_MACHINE(obj);
 int i;
-CPUState *cs;
 
-CPU_FOREACH(cs) {
-PowerPCCPU *cpu = POWERPC_CPU(cs);
+for (i = 0; i < pnv->num_chips; i++) {
+PnvChip *chip = pnv->chips[i];
 
-/* XXX: loop on each chip/core/thread instead of CPU_FOREACH() */
-PNV_CHIP_GET_CLASS(pnv->chips[0])->intc_print_info(pnv->chips[0], cpu,
-   mon);
-}
+/* First CPU presenters */
+pnv_chip_foreach_cpu(chip, pnv_pic_intc_print_info, mon);
 
-for (i = 0; i < pnv->num_chips; i++) {
-PNV_CHIP_GET_CLASS(pnv->chips[i])->pic_print_info(pnv->chips[i], mon);
+/* Then other devices, PHB, PSI, XIVE */
+PNV_CHIP_GET_CLASS(chip)->pic_print_info(chip, mon);
 }
 }
 
@@ -2549,12 +2567,18 @@ static void pnv_cpu_do_nmi_on_cpu(CPUState *cs, 
run_on_cpu_data arg)
 }
 }
 
+static void pnv_cpu_do_nmi(PnvChip *chip, PowerPCCPU *cpu, void *opaque)
+{
+async_run_on_cpu(CPU(cpu), pnv_cpu_do_nmi_on_cpu, RUN_ON_CPU_NULL);
+}
+
 static void pnv_nmi(NMIState *n, int cpu_index, Error **errp)
 {
-CPUState *cs;
+PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
+int i;
 
-CPU_FOREACH(cs) {
-async_run_on_cpu(cs, pnv_cpu_do_nmi_on_cpu, RUN_ON_CPU_NULL);
+for (i = 0; i < pnv->num_chips; i++) {
+pnv_chip_foreach_cpu(pnv->chips[i], pnv_cpu_do_nmi, NULL);
 }
 }
 
-- 
2.44.0

Re: [PATCH v2 5/6] hw/ppc: SPI controller wiring to P10 chip and create seeprom device

2024-04-22 Thread Cédric Le Goater


On 4/9/24 19:56, Chalapathi V wrote:

In this commit
Creates SPI controller on p10 chip.
Create the keystore seeprom of type "seeprom-25csm04"
Connect the cs of seeprom to PIB_SPIC[2] cs irq.

The QOM tree of spi controller and seeprom are.
/machine (powernv10-machine)
   /chip[0] (power10_v2.0-pnv-chip)
 /pib_spic[2] (pnv-spi-controller)
   /bus (pnv-spi-bus)
 /pnv-spi-bus.2 (SSI)
   /xscom-spi-controller-regs[0] (memory-region)

/machine (powernv10-machine)
   /unattached (container)
 /device[7] (seeprom-25csm04)
   /ssi-gpio-cs[0] (irq)

(qemu) qom-get /machine/unattached/device[7] "parent_bus"
"/machine/chip[0]/pib_spic[2]/bus/pnv-spi-bus.2"

Signed-off-by: Chalapathi V 
---
  include/hw/ppc/pnv_chip.h |  3 +++
  hw/ppc/pnv.c  | 36 +++-
  2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h
index 8589f3291e..3edf13e8f9 100644
--- a/include/hw/ppc/pnv_chip.h
+++ b/include/hw/ppc/pnv_chip.h
@@ -6,6 +6,7 @@
  #include "hw/ppc/pnv_core.h"
  #include "hw/ppc/pnv_homer.h"
  #include "hw/ppc/pnv_n1_chiplet.h"
+#include "hw/ppc/pnv_spi_controller.h"
  #include "hw/ppc/pnv_lpc.h"
  #include "hw/ppc/pnv_occ.h"
  #include "hw/ppc/pnv_psi.h"
@@ -118,6 +119,8 @@ struct Pnv10Chip {
  PnvSBE   sbe;
  PnvHomer homer;
  PnvN1Chiplet n1_chiplet;
+#define PNV10_CHIP_MAX_PIB_SPIC 6
+PnvSpiController pib_spic[PNV10_CHIP_MAX_PIB_SPIC];
  
  uint32_t nr_quads;

  PnvQuad  *quads;
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 6e3a5ccdec..eeb2d650bd 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -46,6 +46,7 @@
  #include "hw/pci-host/pnv_phb.h"
  #include "hw/pci-host/pnv_phb3.h"
  #include "hw/pci-host/pnv_phb4.h"
+#include "hw/ssi/ssi.h"
  
  #include "hw/ppc/xics.h"

  #include "hw/qdev-properties.h"
@@ -1829,6 +1830,11 @@ static void pnv_chip_power10_instance_init(Object *obj)
  for (i = 0; i < pcc->i2c_num_engines; i++) {
  object_initialize_child(obj, "i2c[*]", >i2c[i], TYPE_PNV_I2C);
  }
+
+for (i = 0; i < PNV10_CHIP_MAX_PIB_SPIC ; i++) {
+object_initialize_child(obj, "pib_spic[*]", >pib_spic[i],
+TYPE_PNV_SPI_CONTROLLER);
+}
  }
  
  static void pnv_chip_power10_quad_realize(Pnv10Chip *chip10, Error **errp)

@@ -2043,7 +2049,35 @@ static void pnv_chip_power10_realize(DeviceState *dev, 
Error **errp)
qdev_get_gpio_in(DEVICE(>psi),
 PSIHB9_IRQ_SBE_I2C));
  }
-
+/* PIB SPI Controller */
+for (i = 0; i < PNV10_CHIP_MAX_PIB_SPIC; i++) {
+object_property_set_int(OBJECT(>pib_spic[i]), "spic_num",
+i , _fatal);
+/*
+ * The TPM attached SPIC needs to reverse the bit order in each byte
+ * it sends to the TPM.
+ */
+if (i == 4) {
+object_property_set_bool(OBJECT(>pib_spic[i]),
+"reverse_bits", true, _fatal);
+}


or

  object_property_set_bool(OBJECT(>pib_spic[i]),
"reverse_bits", (i == 4) , _fatal);


That said. This setting looks weird to me.

Why do we need to reverse the bits ? is it an endian issue ?

Are there other SPI devices on the buses ?


+if (!qdev_realize(DEVICE(>pib_spic[i]), NULL, errp)) {
+return;
+}
+pnv_xscom_add_subregion(chip, PNV10_XSCOM_PIB_SPIC_BASE +
+i * PNV10_XSCOM_PIB_SPIC_SIZE,
+>pib_spic[i].xscom_spic_regs);
+}



The devices below belong to the rainer machine it seems. We should introduce
a per-machine handler to create them like it was done for the I2C devices.
For this purpose, the PnvMachineClass::i2c_init) handler could be changed
to create all machine specific devices.


+/* Primary MEAS/MVPD/Keystore SEEPROM connected to pib_spic[2] */
+DeviceState *seeprom = qdev_new("seeprom-25csm04");
+qdev_prop_set_string(seeprom, "filename",
+ "sbe_measurement_seeprom.bin.ecc");


This should be done differently. Here is a command line example :

$ qemu-system-arm -M ast2600-evb \
  -blockdev node-name=fmc0,driver=file,filename=/path/to/fmc0.img \
  -device mx66u51235f,bus=ssi.0,cs=0x0,drive=fmc0 \
  -blockdev node-name=fmc1,driver=file,filename=/path/to/fmc1.img \
  -device mx66u51235f,bus=ssi.0,cs=0x1,drive=fmc1 \
  -blockdev node-name=spi1,driver=file,filename=/path/to/spi1.img \
  -device mx66u51235f,cs=0x0,bus=ssi.1,drive=spi1 \
  ...

Please try to rework "seeprom-25csm04" on top of "m25p80". It should help.



+ssi_realize_and_unref(seeprom, ((>pib_spic[2])->bus).ssi_bus,
+  _fatal);
+qemu_irq seeprom_cs = qdev_get_gpio_in_named(seeprom, SSI_GPIO_CS, 0);
+Object *bus = OBJECT(&(>pib_spic[2])->bus);
+

Re: [PATCH v2 4/6] hw/misc: Microchip's 25CSM04 SEEPROM model

2024-04-22 Thread Cédric Le Goater


Hello Chalapathi

On 4/9/24 19:56, Chalapathi V wrote:

This commit implements a Serial EEPROM utilizing the Serial Peripheral
Interface (SPI) compatible bus.
Currently implemented SEEPROM is Microchip's 25CSM04 which provides 4 Mbits
of Serial EEPROM utilizing the Serial Peripheral Interface (SPI) compatible
bus. The device is organized as 524288 bytes of 8 bits each (512Kbyte) and
is optimized for use in consumer and industrial applications where reliable
and dependable nonvolatile memory storage is essential.

This seeprom device is created from a parent "ssi-peripheral".



Can the hw/block/m25p80c model be extented instead ?


Thanks,

C.






Signed-off-by: Chalapathi V 
---
  include/hw/misc/seeprom_25csm04.h |  48 ++
  hw/misc/seeprom_25csm04.c | 780 ++
  hw/misc/Kconfig   |   3 +
  hw/misc/meson.build   |   1 +
  hw/ppc/Kconfig|   1 +
  5 files changed, 833 insertions(+)
  create mode 100644 include/hw/misc/seeprom_25csm04.h
  create mode 100644 hw/misc/seeprom_25csm04.c

diff --git a/include/hw/misc/seeprom_25csm04.h 
b/include/hw/misc/seeprom_25csm04.h
new file mode 100644
index 00..0343530354
--- /dev/null
+++ b/include/hw/misc/seeprom_25csm04.h
@@ -0,0 +1,48 @@
+/*
+ * 25CSM04 Serial EEPROM model
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * The Microchip Technology Inc. 25CSM04 provides 4 Mbits of Serial EEPROM
+ * utilizing the Serial Peripheral Interface (SPI) compatible bus. The device
+ * is organized as 524288 bytes of 8 bits each (512Kbyte) and is optimized
+ * for use in consumer and industrial applications where reliable and
+ * dependable nonvolatile memory storage is essential
+ */
+
+#ifndef SEEPROM_25CSM04_H
+#define SEEPROM_25CSM04_H
+
+#include "hw/ssi/ssi.h"
+#include "qom/object.h"
+
+#define TYPE_SEEPROM_25CSM04 "seeprom-25csm04"
+
+OBJECT_DECLARE_SIMPLE_TYPE(SeepromCsm04, SEEPROM_25CSM04)
+
+typedef struct SeepromCsm04 {
+SSIPeripheral parent_object;
+
+char*file;
+char*file_name;
+uint8_t opcode;
+uint32_taddr;
+uint8_t rd_state;
+boollocked;
+boolcommand_byte;
+/* device registers */
+uint8_t status0;
+uint8_t status1;
+uint8_t dsn[16];
+uint8_t uplid[256];
+uint8_t mpr[8];
+uint8_t idr[5];
+} SeepromCsm04;
+
+uint32_t seeprom_transfer(SSIPeripheral *ss, uint32_t tx);
+void seeprom_realize(SSIPeripheral *dev, Error **errp);
+bool compute_addr(SeepromCsm04 *s, uint32_t tx);
+bool validate_addr(SeepromCsm04 *s);
+#endif /* PPC_PNV_SPI_SEEPROM_H */
diff --git a/hw/misc/seeprom_25csm04.c b/hw/misc/seeprom_25csm04.c
new file mode 100644
index 00..45df66e4b0
--- /dev/null
+++ b/hw/misc/seeprom_25csm04.c
@@ -0,0 +1,780 @@
+/*
+ * 25CSM04 Serial EEPROM model
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "hw/misc/seeprom_25csm04.h"
+#include "hw/qdev-properties.h"
+#include "qemu/datadir.h"
+#include 
+
+#define SPI_DEBUG(x)
+
+/*
+ * 2-byte STATUS register which is a combination of six nonvolatile bits of
+ * EEPROM and five volatile latches.
+ *
+ * status 0:
+ * bit 7 WPEN: Write-Protect Enable bit
+ * 1 = Write-Protect pin is enabled, 0 = Write-Protect pin is ignored
+ *
+ * bit 3-2 BP<1:0>: Block Protection bits
+ * 00 = No array write protection
+ * 01 = Upper quarter memory array protection
+ * 10 = Upper half memory array protection
+ * 11 = Entire memory array protection
+ *
+ * bit 1 WEL: Write Enable Latch bit
+ * 1 = WREN has been executed and device is enabled for writing
+ * 0 = Device is not write-enabled
+ *
+ * bit 0 RDY/BSY: Ready/Busy Status Latch bit
+ * 1 = Device is busy with an internal write cycle
+ * 0 = Device is ready for a new sequence
+ */
+#define STATUS0_WPEN0x7
+#define STATUS0_BP  0x2
+#define STATUS0_WEL 0x1
+#define STATUS0_BUSY0x0
+
+/*
+ * status 1:
+ * bit 7 WPM: Write Protection Mode bit(1)
+ * 1 = Enhanced Write Protection mode selected (factory default)
+ * 0 = Legacy Write Protection mode selected
+ *
+ * bit 6 ECS: Error Correction State Latch bit
+ * 1 = The previously executed read sequence did require the ECC
+ * 0 = The previous executed read sequence did not require the ECC
+ *
+ * bit 5 FMPC: Freeze Memory Protection Configuration bit(2)
+ * 1 = Memory Partition registers and write protection mode are permanently
+ * frozen and cannot be modified
+ * 0 = Memory Partition registers and write protection mode are not frozen
+ * and are modifiable
+ *
+ * bit 4 PREL: Partition Register Write Enable Latch bit
+ * 1 = PRWE has been executed and WMPR, FRZR and PPAB instructions are enabled
+ * 0 = WMPR, FRZR and PPAB instructions are disabled
+ *
+ * bit 3 PABP: Partition Address Boundary Protection bit
+ *

Re: [PATCH v2 2/6] hw/ppc: SPI controller model - registers implementation

2024-04-22 Thread Cédric Le Goater


On 4/16/24 19:02, Chalapathi V wrote:


On 15-04-2024 20:44, Cédric Le Goater wrote:

Hello Chalapathi

The subject could be rephrased to : "ppc/pnv: Add SPI controller model".

On 4/9/24 19:56, Chalapathi V wrote:

SPI controller device model supports a connection to a single SPI responder.
This provide access to SPI seeproms, TPM, flash device and an ADC controller.

All SPI function control is mapped into the SPI register space to enable full
control by firmware. In this commit SPI configuration component is modelled
which contains all SPI configuration and status registers as well as the hold
registers for data to be sent or having been received.

An existing QEMU SSI framework is used and SSI_BUS is created.

Signed-off-by: Chalapathi V 
---
  include/hw/ppc/pnv_spi_controller.h  |  55 +
  include/hw/ppc/pnv_spi_controller_regs.h | 114 ++


These two files should be under hw/ssi/ and include/hw/ssi/. Please
remove '_controller'.

Sure. Thank You.



  include/hw/ppc/pnv_xscom.h |   3 +
  hw/ppc/pnv_spi_controller.c  | 278 +++
  hw/ppc/Kconfig   |   1 +
  hw/ppc/meson.build   |   1 +
  6 files changed, 452 insertions(+)
  create mode 100644 include/hw/ppc/pnv_spi_controller.h
  create mode 100644 include/hw/ppc/pnv_spi_controller_regs.h
  create mode 100644 hw/ppc/pnv_spi_controller.c

diff --git a/include/hw/ppc/pnv_spi_controller.h 
b/include/hw/ppc/pnv_spi_controller.h
new file mode 100644
index 00..5ec50fb14c
--- /dev/null
+++ b/include/hw/ppc/pnv_spi_controller.h
@@ -0,0 +1,55 @@
+/*
+ * QEMU PowerPC SPI Controller model
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This model Supports a connection to a single SPI responder.
+ * Introduced for P10 to provide access to SPI seeproms, TPM, flash device
+ * and an ADC controller.
+ */
+#include "hw/ssi/ssi.h"
+
+#ifndef PPC_PNV_SPI_CONTROLLER_H
+#define PPC_PNV_SPI_CONTROLLER_H
+
+#define TYPE_PNV_SPI_CONTROLLER "pnv-spi-controller"
+#define PNV_SPICONTROLLER(obj) \
+    OBJECT_CHECK(PnvSpiController, (obj), TYPE_PNV_SPI_CONTROLLER)


You could use OBJECT_DECLARE_SIMPLE_TYPE ? Anyhow, I would prefer
naming the macro PNV_SPI_CONTROLLER.


+#define SPI_CONTROLLER_REG_SIZE 8
+
+typedef struct SSIBus SSIBus;


why ?

I might have got compile time errors. I will recheck and update. Thank You.




+
+#define TYPE_PNV_SPI_BUS "pnv-spi-bus"
+OBJECT_DECLARE_SIMPLE_TYPE(PnvSPIBus, PNV_SPI_BUS)
+
+typedef struct PnvSPIBus {


I don't think this extra PnvSPIBus model is useful.


+    SysBusDevice parent_obj;
+
+    SSIBus *ssi_bus;
+    qemu_irq *cs_line;


These two attributes could live under PnvSpiController.

This is added to have a SysBusDevice parent so that I can use the busname in 
command line for TPM. I will add these in PnvSpiController with SysBusDevice 
parent and test.


You could still compute the bus name from pnv_spi_controller_realize()
and move all PnvSPIBus attributes under PnvSpiController. The PnvSPIBus
is not required.




+    uint32_t id;


and this one would become useless.


+} PnvSPIBus;

+typedef struct PnvSpiController {
+    DeviceState parent;
+
+    PnvSPIBus   bus;
+    MemoryRegion    xscom_spic_regs;
+    /* SPI controller object number */
+    uint32_t    spic_num;
+
+    /* SPI Controller registers */
+    uint64_t    error_reg;
+    uint64_t    counter_config_reg;
+    uint64_t    config_reg1;
+    uint64_t    clock_config_reset_control;
+    uint64_t    memory_mapping_reg;
+    uint64_t    transmit_data_reg;
+    uint64_t    receive_data_reg;
+    uint8_t sequencer_operation_reg[SPI_CONTROLLER_REG_SIZE];
+    uint64_t    status_reg;


You could use an array of uint64_t also.

Sure. I will try and check.


That's not a must have. Both approach work but since the memops use
the MMIO offest to address the register, it is sometime simpler to
use an array of uint64_t.







+} PnvSpiController;
+#endif /* PPC_PNV_SPI_CONTROLLER_H */
diff --git a/include/hw/ppc/pnv_spi_controller_regs.h 
b/include/hw/ppc/pnv_spi_controller_regs.h
new file mode 100644
index 00..6f613aca5e
--- /dev/null
+++ b/include/hw/ppc/pnv_spi_controller_regs.h
@@ -0,0 +1,114 @@
+/*
+ * QEMU PowerPC SPI Controller model
+ *
+ * Copyright (c) 2023, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef SPI_CONTROLLER_REGS_H
+#define SPI_CONTROLLER_REGS_H
+
+/* Error Register */
+#define ERROR_REG   0x00
+
+/* counter_config_reg */
+#define COUNTER_CONFIG_REG  0x01
+#define COUNTER_CONFIG_REG_SHIFT_COUNT_N1   PPC_BITMASK(0, 7)
+#define COUNTER_CONFIG_REG_SHIFT_COUNT_N2   PPC_BITMASK(8, 15)
+#define COUNTER_CONFIG_REG_COUNT_COMPARE1   PPC_BITMASK(24, 31)
+#define COUNTER_CONFIG_REG_COUNT_COMPARE2   PPC_BITMASK(32, 39)
+#de

Re: [PATCH v3 07/16] aspeed/smc: fix dma moving incorrect data length issue

2024-04-19 Thread Cédric Le Goater


On 4/16/24 11:18, Jamin Lin wrote:

DMA length is from 1 byte to 32MB for AST2600 and AST10x0
and DMA length is from 4 bytes to 32MB for AST2500.

In other words, if "R_DMA_LEN" is 0, it should move at least 1 byte
data for AST2600 and AST10x0 and 4 bytes data for AST2500.

To support all ASPEED SOCs, adds dma_start_length parameter to store

the start length, add helper routines function to compute the dma length
and update DMA_LENGTH mask to "1FF" to fix dma moving
incorrect data length issue.


OK. There are two problems to address, the "zero" length transfer and
the DMA length unit, which is missing today. Newer SoC use a 1 bit / byte
and older ones, AST2400 and AST2500, use 1 bit / 4 bytes.

We can introduce a AspeedSMCClass::dma_len_unit and rework the loop to :

do {

  

   if (s->regs[R_DMA_LEN]) {
s->regs[R_DMA_LEN] -= 4 / asc->dma_len_unit;
}
} while (s->regs[R_DMA_LEN]);

It should fix the current implementation.

I don't think this is necessary to add a Fixes tag because the problem
has been there for ages and no one reported it. Probably because the
only place DMA transfers are used is in U-Boot and transfers have a
non-zero length.


Currently, only supports dma length 4 bytes aligned.


this looks like a third topic. So the minimum value R_DMA_LEN should
have on the AST2600 SoC and above is '3'. I would opt to replace the
DMA_LENGTH macro with a dma_length_sanitize() helper to fix the software
input of R_DMA_LEN.


Thanks,

C.


 

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 
---
  hw/ssi/aspeed_smc.c | 52 -
  include/hw/ssi/aspeed_smc.h |  1 +
  2 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c
index 8a8d77b480..71abc7a2d8 100644
--- a/hw/ssi/aspeed_smc.c
+++ b/hw/ssi/aspeed_smc.c
@@ -178,13 +178,17 @@
   * DMA flash addresses should be 4 bytes aligned and the valid address
   * range is 0x2000 - 0x2FFF.
   *
- * DMA length is from 4 bytes to 32MB
+ * DMA length is from 4 bytes to 32MB (AST2500)
   *   0: 4 bytes
   *   0x7F: 32M bytes
+ *
+ * DMA length is from 1 byte to 32MB (AST2600, AST10x0)
+ *   0: 1 byte
+ *   0x1FF: 32M bytes
   */
  #define DMA_DRAM_ADDR(asc, val)   ((val) & (asc)->dma_dram_mask)
  #define DMA_FLASH_ADDR(asc, val)  ((val) & (asc)->dma_flash_mask)
-#define DMA_LENGTH(val) ((val) & 0x01FC)
+#define DMA_LENGTH(val) ((val) & 0x01FF)
  
  /* Flash opcodes. */

  #define SPI_OP_READ   0x03/* Read data bytes (low frequency) */
@@ -843,6 +847,24 @@ static bool aspeed_smc_inject_read_failure(AspeedSMCState 
*s)
  }
  }
  
+static uint32_t aspeed_smc_dma_len(AspeedSMCState *s)

+{
+AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
+uint32_t dma_len;
+uint32_t extra;
+
+dma_len = s->regs[R_DMA_LEN] + asc->dma_start_length;
+
+/* dma length 4 bytes aligned */
+extra = dma_len % 4;
+
+if (extra != 0) {
+dma_len += 4 - extra;
+}
+
+return dma_len;
+}
+
  /*
   * Accumulate the result of the reads to provide a checksum that will
   * be used to validate the read timing settings.
@@ -850,6 +872,7 @@ static bool aspeed_smc_inject_read_failure(AspeedSMCState 
*s)
  static void aspeed_smc_dma_checksum(AspeedSMCState *s)
  {
  MemTxResult result;
+uint32_t dma_len;
  uint32_t data;
  
  if (s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE) {

@@ -861,7 +884,9 @@ static void aspeed_smc_dma_checksum(AspeedSMCState *s)
  aspeed_smc_dma_calibration(s);
  }
  
-while (s->regs[R_DMA_LEN]) {

+dma_len = aspeed_smc_dma_len(s);
+
+while (dma_len) {
  data = address_space_ldl_le(>flash_as, s->regs[R_DMA_FLASH_ADDR],
  MEMTXATTRS_UNSPECIFIED, );
  if (result != MEMTX_OK) {
@@ -877,7 +902,8 @@ static void aspeed_smc_dma_checksum(AspeedSMCState *s)
   */
  s->regs[R_DMA_CHECKSUM] += data;
  s->regs[R_DMA_FLASH_ADDR] += 4;
-s->regs[R_DMA_LEN] -= 4;
+dma_len -= 4;
+s->regs[R_DMA_LEN] = dma_len;
  }
  
  if (s->inject_failure && aspeed_smc_inject_read_failure(s)) {

@@ -889,14 +915,17 @@ static void aspeed_smc_dma_checksum(AspeedSMCState *s)
  static void aspeed_smc_dma_rw(AspeedSMCState *s)
  {
  MemTxResult result;
+uint32_t dma_len;
  uint32_t data;
  
+dma_len = aspeed_smc_dma_len(s);

+
  trace_aspeed_smc_dma_rw(s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE ?
  "write" : "read",
  s->regs[R_DMA_FLASH_ADDR],
  s->regs[R_DMA_DRAM_ADDR],
-s->regs[R_DMA_LEN]);
-while (s->regs[R_DMA_LEN]) {
+dma_len);
+while (dma_len) {
  if (s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE) {
  data = address_space_ldl_le(>dram_as, s->regs[R_DMA_DRAM_ADDR],

Re: [PATCH v3 12/16] aspeed/soc: Add AST2700 support

2024-04-19 Thread Cédric Le Goater


On 4/16/24 11:18, Jamin Lin wrote:

Initial definitions for a simple machine using an AST2700 SOC (Cortex-a35 CPU).

AST2700 SOC and its interrupt controller are too complex to handle
in the common Aspeed SoC framework. We introduce a new ast2700
class with instance_init and realize handlers.

AST2700 is a 64 bits quad core cpus and support 8 watchdog.
Update maximum ASPEED_CPUS_NUM to 4 and ASPEED_WDTS_NUM to 8.
In addition, update AspeedSocState to support scuio, sli, sliio and intc.

Add TYPE_ASPEED27X0_SOC machine type.

The SDMC controller is unlocked at SPL stage.
At present, only supports to emulate booting
start from u-boot stage. Set SDMC controller
unlocked by default.

In INTC, each interrupt of INT 128 to INT 136 combines 32 interrupts.
It connect GICINT IRQ GPIO-OUTPUT pins to GIC device with irq 128 to 136.
And, if a device irq is 128 to 136, its irq GPIO-OUTPUT pin is connected to
GICINT or-gates instead of GIC device.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 


Before I forget, please see a little comment below regarding user creatable
devices.

The model looks fine. The interrupt controller part is more complex than
the previous SoCs so I will come back to it later when I have more time.


---
  hw/arm/aspeed_ast27x0.c | 554 
  hw/arm/meson.build  |   1 +
  include/hw/arm/aspeed_soc.h |  26 +-
  3 files changed, 579 insertions(+), 2 deletions(-)
  create mode 100644 hw/arm/aspeed_ast27x0.c

diff --git a/hw/arm/aspeed_ast27x0.c b/hw/arm/aspeed_ast27x0.c
new file mode 100644
index 00..754c963230
--- /dev/null
+++ b/hw/arm/aspeed_ast27x0.c
@@ -0,0 +1,554 @@
+/*
+ * ASPEED SoC 27x0 family
+ *
+ * Copyright (C) 2024 ASPEED Technology Inc.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Implementation extracted from the AST2600 and adapted for AST27x0.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/misc/unimp.h"
+#include "hw/arm/aspeed_soc.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "hw/i2c/aspeed_i2c.h"
+#include "net/net.h"
+#include "sysemu/sysemu.h"
+#include "hw/intc/arm_gicv3.h"
+#include "qapi/qmp/qlist.h"
+
+static const hwaddr aspeed_soc_ast2700_memmap[] = {
+[ASPEED_DEV_SPI_BOOT]  =  0x4,
+[ASPEED_DEV_SRAM]  =  0x1000,
+[ASPEED_DEV_SDMC]  =  0x12C0,
+[ASPEED_DEV_SCU]   =  0x12C02000,
+[ASPEED_DEV_SCUIO] =  0x14C02000,
+[ASPEED_DEV_UART0] =  0X14C33000,
+[ASPEED_DEV_UART1] =  0X14C33100,
+[ASPEED_DEV_UART2] =  0X14C33200,
+[ASPEED_DEV_UART3] =  0X14C33300,
+[ASPEED_DEV_UART4] =  0X12C1A000,
+[ASPEED_DEV_UART5] =  0X14C33400,
+[ASPEED_DEV_UART6] =  0X14C33500,
+[ASPEED_DEV_UART7] =  0X14C33600,
+[ASPEED_DEV_UART8] =  0X14C33700,
+[ASPEED_DEV_UART9] =  0X14C33800,
+[ASPEED_DEV_UART10]=  0X14C33900,
+[ASPEED_DEV_UART11]=  0X14C33A00,
+[ASPEED_DEV_UART12]=  0X14C33B00,
+[ASPEED_DEV_WDT]   =  0x14C37000,
+[ASPEED_DEV_VUART] =  0X14C3,
+[ASPEED_DEV_FMC]   =  0x1400,
+[ASPEED_DEV_SPI0]  =  0x1401,
+[ASPEED_DEV_SPI1]  =  0x1402,
+[ASPEED_DEV_SPI2]  =  0x1403,
+[ASPEED_DEV_SDRAM] =  0x4,
+[ASPEED_DEV_MII1]  =  0x1404,
+[ASPEED_DEV_MII2]  =  0x14040008,
+[ASPEED_DEV_MII3]  =  0x14040010,
+[ASPEED_DEV_ETH1]  =  0x1405,
+[ASPEED_DEV_ETH2]  =  0x1406,
+[ASPEED_DEV_ETH3]  =  0x1407,
+[ASPEED_DEV_EMMC]  =  0x1209,
+[ASPEED_DEV_INTC]  =  0x1210,
+[ASPEED_DEV_SLI]   =  0x12C17000,
+[ASPEED_DEV_SLIIO] =  0x14C1E000,
+[ASPEED_GIC_DIST]  =  0x1220,
+[ASPEED_GIC_REDIST]=  0x1228,
+};
+
+#define AST2700_MAX_IRQ 288
+
+/* Shared Peripheral Interrupt values below are offset by -32 from datasheet */
+static const int aspeed_soc_ast2700_irqmap[] = {
+[ASPEED_DEV_UART0] = 132,
+[ASPEED_DEV_UART1] = 132,
+[ASPEED_DEV_UART2] = 132,
+[ASPEED_DEV_UART3] = 132,
+[ASPEED_DEV_UART4] = 8,
+[ASPEED_DEV_UART5] = 132,
+[ASPEED_DEV_UART6] = 132,
+[ASPEED_DEV_UART7] = 132,
+[ASPEED_DEV_UART8] = 132,
+[ASPEED_DEV_UART9] = 132,
+[ASPEED_DEV_UART10]= 132,
+[ASPEED_DEV_UART11]= 132,
+[ASPEED_DEV_UART12]= 132,
+[ASPEED_DEV_FMC]   = 131,
+[ASPEED_DEV_SDMC]  = 0,
+[ASPEED_DEV_SCU]   = 12,
+[ASPEED_DEV_ADC]   = 130,
+[ASPEED_DEV_XDMA]  = 5,
+[ASPEED_DEV_EMMC]  = 15,
+[ASPEED_DEV_GPIO]  = 11,
+[ASPEED_DEV_GPIO_1_8V] = 130,
+[ASPEED_DEV_RTC]   = 13,
+[ASPEED_DEV_TIMER1]= 16,
+[ASPEED_DEV_TIMER2]= 17,
+[ASPEED_DEV_TIMER3]= 18,
+[ASPEED_DEV_TIMER4]= 19,
+[ASPEED_DEV_TIMER5]=

Re: [PATCH v2 3/5] intel_iommu: Add a framework to do compatibility check with host IOMMU cap/ecap

2024-04-19 Thread Cédric Le Goater


Hello Zhenzhong,

On 4/18/24 10:42, Duan, Zhenzhong wrote:

Hi Cédric,


-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 3/5] intel_iommu: Add a framework to do
compatibility check with host IOMMU cap/ecap

Hello Zhenzhong

On 4/17/24 11:24, Duan, Zhenzhong wrote:




-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 3/5] intel_iommu: Add a framework to do
compatibility check with host IOMMU cap/ecap

On 4/17/24 06:21, Duan, Zhenzhong wrote:




-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 3/5] intel_iommu: Add a framework to do
compatibility check with host IOMMU cap/ecap

Hello,

On 4/16/24 09:09, Duan, Zhenzhong wrote:

Hi Cédric,


-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 3/5] intel_iommu: Add a framework to do
compatibility check with host IOMMU cap/ecap

On 4/8/24 10:44, Zhenzhong Duan wrote:

From: Yi Liu 

If check fails, the host side device(either vfio or vdpa device) should

not

be passed to guest.

Implementation details for different backends will be in following

patches.


Signed-off-by: Yi Liu 
Signed-off-by: Yi Sun 
Signed-off-by: Zhenzhong Duan 
---
  hw/i386/intel_iommu.c | 35

+++

  1 file changed, 35 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4f84e2e801..a49b587c73 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -35,6 +35,7 @@
  #include "sysemu/kvm.h"
  #include "sysemu/dma.h"
  #include "sysemu/sysemu.h"
+#include "sysemu/iommufd.h"
  #include "hw/i386/apic_internal.h"
  #include "kvm/kvm_i386.h"
  #include "migration/vmstate.h"
@@ -3819,6 +3820,32 @@ VTDAddressSpace

*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,

  return vtd_dev_as;
  }

+static int vtd_check_legacy_hdev(IntelIOMMUState *s,
+ HostIOMMUDevice *hiod,
+ Error **errp)
+{
+return 0;
+}
+
+static int vtd_check_iommufd_hdev(IntelIOMMUState *s,
+  HostIOMMUDevice *hiod,
+  Error **errp)
+{
+return 0;
+}
+
+static int vtd_check_hdev(IntelIOMMUState *s,

VTDHostIOMMUDevice

*vtd_hdev,

+  Error **errp)
+{
+HostIOMMUDevice *hiod = vtd_hdev->dev;
+
+if (object_dynamic_cast(OBJECT(hiod), TYPE_HIOD_IOMMUFD))

{

+return vtd_check_iommufd_hdev(s, hiod, errp);
+}
+
+return vtd_check_legacy_hdev(s, hiod, errp);
+}



I think we should be using the .get_host_iommu_info() class handler
instead. Can we refactor the code slightly to avoid this check on
the type ?


There is some difficulty ini avoiding this check, the behavior of

vtd_check_legacy_hdev

and vtd_check_iommufd_hdev are different especially after nesting

support introduced.

vtd_check_iommufd_hdev() has much wider check over cap/ecap bits

besides aw_bits.

I think it is important to fully separate the vIOMMU model from the
host IOMMU backing device.


This comment is true for the structures also.


Could we introduce a new HostIOMMUDeviceClass
handler .check_hdev() handler, which would

call .get_host_iommu_info() ?

This means that HIOD_LEGACY_INFO and HIOD_IOMMUFD_INFO should be
a common structure 'HostIOMMUDeviceInfo' holding all attributes
for the different backends. Each .get_host_iommu_info() implementation
would translate the specific host iommu device data presentation
into the common 'HostIOMMUDeviceInfo', this is true for host_aw_bits.


I see, it's just not easy to define the unified elements in HostIOMMUDeviceInfo
so that they maps to bits or fields in host return IOMMU info.


The proposal is adding a vIOMMU <-> HostIOMMUDevice interface and a new
API needs to be completely defined for it. The IOMMU backend implementation
could be anything, legacy, iommufd, iommufd v2, some other framework and
the vIOMMU shouldn't be aware of its implementation.

Exposing the kernel structures as done below should be avoided because
they are part of the QEMU <-> kernel IOMMUFD interface.



Different platform returned host IOMMU info is platform specific.
For vtd and siommu:

struct iommu_hw_info_vtd {
 __u32 flags;
 __u32 __reserved;
 __aligned_u64 cap_reg;
 __aligned_u64 ecap_reg;
};

struct iommu_hw_info_arm_smmuv3 {
__u32 flags;
__u32 __reserved;
__u32 idr[6];
__u32 iidr;
__u32 aidr;
};

I can think of two kinds of declaration of HostIOMMUDeviceInfo:

struct HostIOMMUDeviceInfo {
 uint8_t aw_bits;
 enum iommu_hw_info_type type;
 union {
 struct iommu_hw_info_vtd vtd;
 struct iommu_hw_info_arm_smmuv3;
 ..
 } data;
}

or

struct HostIOMMUDeviceInfo {
 uint8_t aw_bits;
 enum iommu_hw_info_type type;
 __u32 flags;
 __aligned_u64 cap_

Re: [PATCH v3 09/16] aspeed/smc: Add AST2700 support

2024-04-18 Thread Cédric Le Goater


On 4/16/24 11:18, Jamin Lin wrote:

AST2700 fmc/spi controller's address decoding unit is 64KB
and only bits [31:16] are used for decoding. Introduce seg_to_reg
and reg_to_seg handlers for ast2700 fmc/spi controller.
In addition, adds ast2700 fmc, spi0, spi1, and spi2 class init handler.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/ssi/aspeed_smc.c | 222 +++-
  1 file changed, 220 insertions(+), 2 deletions(-)

diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c
index a67cac3d0f..e768e5463c 100644
--- a/hw/ssi/aspeed_smc.c
+++ b/hw/ssi/aspeed_smc.c
@@ -185,7 +185,7 @@
   *   0: 4 bytes
   *   0x7F: 32M bytes
   *
- * DMA length is from 1 byte to 32MB (AST2600, AST10x0)
+ * DMA length is from 1 byte to 32MB (AST2600, AST10x0 and AST2700)
   *   0: 1 byte
   *   0x1FF: 32M bytes
   */
@@ -670,7 +670,7 @@ static const MemoryRegionOps aspeed_smc_flash_ops = {
  .endianness = DEVICE_LITTLE_ENDIAN,
  .valid = {
  .min_access_size = 1,
-.max_access_size = 4,
+.max_access_size = 8,
  },
  };
  
@@ -1951,6 +1951,220 @@ static const TypeInfo aspeed_1030_spi2_info = {

  .class_init = aspeed_1030_spi2_class_init,
  };
  
+/*

+ * The FMC Segment Registers of the AST2700 have a 64KB unit.
+ * Only bits [31:16] are used for decoding.
+ */
+#define AST2700_SEG_ADDR_MASK 0x
+
+static uint32_t aspeed_2700_smc_segment_to_reg(const AspeedSMCState *s,
+   const AspeedSegments *seg)
+{
+uint32_t reg = 0;
+
+/* Disabled segments have a nil register */
+if (!seg->size) {
+return 0;
+}
+
+reg |= (seg->addr & AST2700_SEG_ADDR_MASK) >> 16; /* start offset */
+reg |= (seg->addr + seg->size - 1) & AST2700_SEG_ADDR_MASK; /* end offset 
*/
+return reg;
+}
+
+static void aspeed_2700_smc_reg_to_segment(const AspeedSMCState *s,
+   uint32_t reg, AspeedSegments *seg)
+{
+uint32_t start_offset = (reg << 16) & AST2700_SEG_ADDR_MASK;
+uint32_t end_offset = reg & AST2700_SEG_ADDR_MASK;
+AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
+
+if (reg) {
+seg->addr = asc->flash_window_base + start_offset;
+seg->size = end_offset + (64 * KiB) - start_offset;
+} else {
+seg->addr = asc->flash_window_base;
+seg->size = 0;
+}
+}
+
+static const uint32_t aspeed_2700_fmc_resets[ASPEED_SMC_R_MAX] = {
+[R_CONF] = (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE0 |
+CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE1),
+[R_CE_CTRL] = 0xaa00,
+[R_CTRL0] = 0x406b0641,
+[R_CTRL1] = 0x0400,
+[R_CTRL2] = 0x0400,
+[R_CTRL3] = 0x0400,
+[R_SEG_ADDR0] = 0x0800,
+[R_SEG_ADDR1] = 0x1800,
+[R_SEG_ADDR2] = 0x,
+[R_SEG_ADDR3] = 0x,
+[R_DUMMY_DATA] = 0x0001,
+[R_DMA_DRAM_ADDR_HIGH] = 0x,
+[R_TIMINGS] = 0x007b,
+};
+
+static const AspeedSegments aspeed_2700_fmc_segments[] = {
+{ 0x0, 128 * MiB }, /* start address is readonly */
+{ 128 * MiB, 128 * MiB }, /* default is disabled but needed for -kernel */
+{ 256 * MiB, 128 * MiB }, /* default is disabled but needed for -kernel */
+{ 0x0, 0 }, /* disabled */
+};
+
+static void aspeed_2700_fmc_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
+
+dc->desc   = "Aspeed 2700 FMC Controller";
+asc->r_conf= R_CONF;
+asc->r_ce_ctrl = R_CE_CTRL;
+asc->r_ctrl0   = R_CTRL0;
+asc->r_timings = R_TIMINGS;
+asc->nregs_timings = 3;
+asc->conf_enable_w0= CONF_ENABLE_W0;
+asc->cs_num_max= 3;
+asc->segments  = aspeed_2700_fmc_segments;
+asc->segment_addr_mask = 0x;
+asc->resets= aspeed_2700_fmc_resets;
+asc->flash_window_base = 0x1;
+asc->flash_window_size = 1 * GiB;
+asc->features  = ASPEED_SMC_FEATURE_DMA |
+ ASPEED_SMC_FEATURE_DMA_DRAM_ADDR_HIGH;
+asc->dma_flash_mask= 0x2FFC;
+asc->dma_dram_mask = 0xFFFC;
+asc->dma_start_length  = 1;
+asc->nregs = ASPEED_SMC_R_MAX;
+asc->segment_to_reg= aspeed_2700_smc_segment_to_reg;
+asc->reg_to_segment= aspeed_2700_smc_reg_to_segment;
+asc->dma_ctrl  = aspeed_2600_smc_dma_ctrl;
+}
+
+static const TypeInfo aspeed_2700_fmc_info = {
+.name =  "aspeed.fmc-ast2700",
+.parent = TYPE_ASPEED_SMC,
+.class_init = aspeed_2700_fmc_class_init,
+};
+
+static const AspeedSegments aspeed_2700_spi0_segments[] = {
+{ 0x0, 128 * MiB }, /* start address is

Re: [PATCH v3 08/16] aspeed/smc: support 64 bits dma dram address

2024-04-18 Thread Cédric Le Goater


Hello Jamin,

On 4/16/24 11:18, Jamin Lin wrote:

AST2700 support the maximum dram size is 8GiB
and has a "DMA DRAM Side Address High Part(0x7C)"
register to support 64 bits dma dram address.
Add helper routines functions to compute the dma dram
address, new features and update trace-event
to support 64 bits dram address.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 
---
  hw/ssi/aspeed_smc.c | 66 +++--
  hw/ssi/trace-events |  2 +-
  2 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c
index 71abc7a2d8..a67cac3d0f 100644
--- a/hw/ssi/aspeed_smc.c
+++ b/hw/ssi/aspeed_smc.c
@@ -132,6 +132,9 @@
  #define   FMC_WDT2_CTRL_BOOT_SOURCE  BIT(4) /* O: primary 1: alternate */
  #define   FMC_WDT2_CTRL_EN   BIT(0)
  
+/* DMA DRAM Side Address High Part (AST2700) */

+#define R_DMA_DRAM_ADDR_HIGH   (0x7c / 4)
+
  /* DMA Control/Status Register */
  #define R_DMA_CTRL(0x80 / 4)
  #define   DMA_CTRL_REQUEST  (1 << 31)
@@ -187,6 +190,7 @@
   *   0x1FF: 32M bytes
   */
  #define DMA_DRAM_ADDR(asc, val)   ((val) & (asc)->dma_dram_mask)
+#define DMA_DRAM_ADDR_HIGH(val)   ((val) & 0xf)
  #define DMA_FLASH_ADDR(asc, val)  ((val) & (asc)->dma_flash_mask)
  #define DMA_LENGTH(val) ((val) & 0x01FF)
  
@@ -207,6 +211,7 @@ static const AspeedSegments aspeed_2500_spi2_segments[];

  #define ASPEED_SMC_FEATURE_DMA   0x1
  #define ASPEED_SMC_FEATURE_DMA_GRANT 0x2
  #define ASPEED_SMC_FEATURE_WDT_CONTROL 0x4
+#define ASPEED_SMC_FEATURE_DMA_DRAM_ADDR_HIGH 0x08
  
  static inline bool aspeed_smc_has_dma(const AspeedSMCClass *asc)

  {
@@ -218,6 +223,11 @@ static inline bool aspeed_smc_has_wdt_control(const 
AspeedSMCClass *asc)
  return !!(asc->features & ASPEED_SMC_FEATURE_WDT_CONTROL);
  }
  
+static inline bool aspeed_smc_has_dma_dram_addr_high(const AspeedSMCClass *asc)


To ease the reading, I would call the helper aspeed_smc_has_dma64()


+{
+return !!(asc->features & ASPEED_SMC_FEATURE_DMA_DRAM_ADDR_HIGH);
+}
+
  #define aspeed_smc_error(fmt, ...)  \
  qemu_log_mask(LOG_GUEST_ERROR, "%s: " fmt "\n", __func__, ## __VA_ARGS__)
  
@@ -747,6 +757,9 @@ static uint64_t aspeed_smc_read(void *opaque, hwaddr addr, unsigned int size)

  (aspeed_smc_has_dma(asc) && addr == R_DMA_CTRL) ||
  (aspeed_smc_has_dma(asc) && addr == R_DMA_FLASH_ADDR) ||
  (aspeed_smc_has_dma(asc) && addr == R_DMA_DRAM_ADDR) ||
+(aspeed_smc_has_dma(asc) &&
+ aspeed_smc_has_dma_dram_addr_high(asc) &&
+ addr == R_DMA_DRAM_ADDR_HIGH) ||
  (aspeed_smc_has_dma(asc) && addr == R_DMA_LEN) ||
  (aspeed_smc_has_dma(asc) && addr == R_DMA_CHECKSUM) ||
  (addr >= R_SEG_ADDR0 &&
@@ -847,6 +860,23 @@ static bool aspeed_smc_inject_read_failure(AspeedSMCState 
*s)
  }
  }
  
+static uint64_t aspeed_smc_dma_dram_addr(AspeedSMCState *s)

+{
+AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
+uint64_t dram_addr_high;
+uint64_t dma_dram_addr;
+
+if (aspeed_smc_has_dma_dram_addr_high(asc)) {
+dram_addr_high = s->regs[R_DMA_DRAM_ADDR_HIGH];
+dram_addr_high <<= 32;
+dma_dram_addr = dram_addr_high | s->regs[R_DMA_DRAM_ADDR];


Here is a proposal to shorten the routine :

return ((uint64_t) s->regs[R_DMA_DRAM_ADDR_HIGH] << 32) |
s->regs[R_DMA_DRAM_ADDR];



+} else {
+dma_dram_addr = s->regs[R_DMA_DRAM_ADDR];


and
return s->regs[R_DMA_DRAM_ADDR];


+}
+
+return dma_dram_addr;
+}
+
  static uint32_t aspeed_smc_dma_len(AspeedSMCState *s)
  {
  AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
@@ -914,24 +944,34 @@ static void aspeed_smc_dma_checksum(AspeedSMCState *s)
  
  static void aspeed_smc_dma_rw(AspeedSMCState *s)

  {
+AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
+uint64_t dram_addr_high;


This variable doesn't look very useful
 

+uint64_t dma_dram_addr;
+uint64_t dram_addr;


and dram_addr is redundant with dma_dram_addr. Please use only one.



  MemTxResult result;
  uint32_t dma_len;
  uint32_t data;
  
  dma_len = aspeed_smc_dma_len(s);

+dma_dram_addr = aspeed_smc_dma_dram_addr(s);
+
+if (aspeed_smc_has_dma_dram_addr_high(asc)) {
+dram_addr = dma_dram_addr - s->dram_mr->container->addr;


Why do you truncate the address again ? It should already be done with

#define DMA_DRAM_ADDR_HIGH(val)   ((val) & 0xf)


+} else {
+dram_addr = dma_dram_addr;
+}
  
  trace_aspeed_smc_dma_rw(s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE ?

  "write" : "read",
  s->regs[R_DMA_FLASH_ADDR],
-s->regs[R_DMA_DRAM_ADDR],
+dram_addr,
  dma_len);
  while (dma_len) {
  if (s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE) {
-data

Re: [PATCH v2 2/4] hw/i2c: Fix checkpatch line over 80 chars warnings

2024-04-18 Thread Cédric Le Goater


On 4/17/24 16:20, Philippe Mathieu-Daudé wrote:

On 17/4/24 08:24, Cédric Le Goater wrote:

Hello,

On 4/16/24 20:47, Philippe Mathieu-Daudé wrote:

We are going to modify these lines, fix their style
in order to avoid checkpatch.pl warnings:

   WARNING: line over 80 characters

Signed-off-by: Philippe Mathieu-Daudé 
---
  include/hw/i2c/i2c.h    |  11 ++-
  include/hw/nvram/eeprom_at24c.h |   6 +-
  hw/arm/aspeed.c | 140 +++-
  hw/nvram/eeprom_at24c.c |   6 +-
  4 files changed, 98 insertions(+), 65 deletions(-)




-    i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 3), "dps310", 0x76);
-    i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 3), "max31785", 
0x52);
-    i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 4), "tmp423", 0x4c);
-    i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 5), "tmp423", 0x4c);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 3),
+    "dps310", 0x76);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 3),
+    "max31785", 0x52);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 4),
+    "tmp423", 0x4c);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 5),
+    "tmp423", 0x4c);
  /* The Witherspoon expects a TMP275 but a TMP105 is compatible */
-    i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 9), TYPE_TMP105,
- 0x4a);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 9),
+    TYPE_TMP105, 0x4a);
  /* The witherspoon board expects Epson RX8900 I2C RTC but a ds1338 is
   * good enough */
-    i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 11), "ds1338", 0x32);
+    i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 11),
+    "ds1338", 0x32);


If the definitions were on a single line, they would be more
readable IMHO. So I would do the opposit change ...

An alternate solution could be to define an array of devices
at the machine class level, something like
   struct i2c_device [
   const char *type;
   uint8_t bus;
   uint8_t addr;
   } devices[] = { ... };


I agree this would be better, but this should be done separately
of this series. For now I propose not modifying hw/arm/aspeed.c
in this patch, and ignoring the checkpatch errors in the next
patch. What do you think?


sure, np.

Thanks,

C.

Re: [PATCH v2 3/5] intel_iommu: Add a framework to do compatibility check with host IOMMU cap/ecap

2024-04-18 Thread Cédric Le Goater


Hello Zhenzhong

On 4/17/24 11:24, Duan, Zhenzhong wrote:




-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 3/5] intel_iommu: Add a framework to do
compatibility check with host IOMMU cap/ecap

On 4/17/24 06:21, Duan, Zhenzhong wrote:




-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 3/5] intel_iommu: Add a framework to do
compatibility check with host IOMMU cap/ecap

Hello,

On 4/16/24 09:09, Duan, Zhenzhong wrote:

Hi Cédric,


-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 3/5] intel_iommu: Add a framework to do
compatibility check with host IOMMU cap/ecap

On 4/8/24 10:44, Zhenzhong Duan wrote:

From: Yi Liu 

If check fails, the host side device(either vfio or vdpa device) should

not

be passed to guest.

Implementation details for different backends will be in following

patches.


Signed-off-by: Yi Liu 
Signed-off-by: Yi Sun 
Signed-off-by: Zhenzhong Duan 
---
 hw/i386/intel_iommu.c | 35

+++

 1 file changed, 35 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4f84e2e801..a49b587c73 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -35,6 +35,7 @@
 #include "sysemu/kvm.h"
 #include "sysemu/dma.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/iommufd.h"
 #include "hw/i386/apic_internal.h"
 #include "kvm/kvm_i386.h"
 #include "migration/vmstate.h"
@@ -3819,6 +3820,32 @@ VTDAddressSpace

*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,

 return vtd_dev_as;
 }

+static int vtd_check_legacy_hdev(IntelIOMMUState *s,
+ HostIOMMUDevice *hiod,
+ Error **errp)
+{
+return 0;
+}
+
+static int vtd_check_iommufd_hdev(IntelIOMMUState *s,
+  HostIOMMUDevice *hiod,
+  Error **errp)
+{
+return 0;
+}
+
+static int vtd_check_hdev(IntelIOMMUState *s,

VTDHostIOMMUDevice

*vtd_hdev,

+  Error **errp)
+{
+HostIOMMUDevice *hiod = vtd_hdev->dev;
+
+if (object_dynamic_cast(OBJECT(hiod), TYPE_HIOD_IOMMUFD)) {
+return vtd_check_iommufd_hdev(s, hiod, errp);
+}
+
+return vtd_check_legacy_hdev(s, hiod, errp);
+}



I think we should be using the .get_host_iommu_info() class handler
instead. Can we refactor the code slightly to avoid this check on
the type ?


There is some difficulty ini avoiding this check, the behavior of

vtd_check_legacy_hdev

and vtd_check_iommufd_hdev are different especially after nesting

support introduced.

vtd_check_iommufd_hdev() has much wider check over cap/ecap bits

besides aw_bits.

I think it is important to fully separate the vIOMMU model from the
host IOMMU backing device. 


This comment is true for the structures also.


Could we introduce a new HostIOMMUDeviceClass
handler .check_hdev() handler, which would call .get_host_iommu_info() ?


This means that HIOD_LEGACY_INFO and HIOD_IOMMUFD_INFO should be
a common structure 'HostIOMMUDeviceInfo' holding all attributes
for the different backends. Each .get_host_iommu_info() implementation
would translate the specific host iommu device data presentation
into the common 'HostIOMMUDeviceInfo', this is true for host_aw_bits.

'type' could be handled the same way, with a 'HostIOMMUDeviceInfo'
type attribute and host iommu device type definitions, or as you
suggested with a QOM interface. This is more complex however. In
this case, I would suggest to implement a .compatible() handler to
compare the host iommu device type with the vIOMMU type.

The resulting check_hdev routine would look something like :

static int vtd_check_hdev(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hdev,
  Error **errp)
{
HostIOMMUDevice *hiod = vtd_hdev->dev;
HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
HostIOMMUDevice info;
int host_aw_bits, ret;

ret = hiodc->get_host_iommu_info(hiod, , sizeof(info), errp);
if (ret) {
return ret;
}

ret = hiodc->is_compatible(hiod, VIOMMU_INTERFACE(s));
if (ret) {
return ret;
}

if (s->aw_bits > info.aw_bits) {

error_setg(errp, "aw-bits %d > host aw-bits %d",
   s->aw_bits, info.aw_bits);
return -EINVAL;
}
}

and the HostIOMMUDeviceClass::is_compatible() handler would call a
vIOMMUInterface::compatible() handler simply returning
IOMMU_HW_INFO_TYPE_INTEL_VTD. How does that sound ?

Including the type in HostIOMMUDeviceInfo is much simpler to start with.

Thanks,

C.







Understood, besides the new .check_hdev() handler, I think we also need a

new interface

class TYPE_IOMMU_CHECK_HDEV which has two handlers

check_[legacy|iommufd]_hdev(),

and different vIOMMUs have different implementation.


I am

Re: [PATCH 2/2] ppc/pnv: Implement ADU access to LPC space

2024-04-17 Thread Cédric Le Goater


On 4/17/24 13:02, Nicholas Piggin wrote:

One of the functions of the ADU is indirect memory access engines that
send and receive data via ADU registers.

This implements the ADU LPC memory access functionality sufficiently
for IBM proprietary firmware to access the UART and print characters
to the serial port as it does on real hardware.

This requires a linkage between adu and lpc, which allows adu to
perform memory access in the lpc space.

Signed-off-by: Nicholas Piggin 
---
  include/hw/ppc/pnv_adu.h |  7 
  include/hw/ppc/pnv_lpc.h |  5 +++
  hw/ppc/pnv.c |  4 ++
  hw/ppc/pnv_adu.c | 91 
  hw/ppc/pnv_lpc.c | 12 +++---
  5 files changed, 113 insertions(+), 6 deletions(-)

diff --git a/include/hw/ppc/pnv_adu.h b/include/hw/ppc/pnv_adu.h
index 9dc91857a9..b7b5d1bb21 100644
--- a/include/hw/ppc/pnv_adu.h
+++ b/include/hw/ppc/pnv_adu.h
@@ -10,6 +10,7 @@
  #define PPC_PNV_ADU_H
  
  #include "hw/ppc/pnv.h"

+#include "hw/ppc/pnv_lpc.h"
  #include "hw/qdev-core.h"
  
  #define TYPE_PNV_ADU "pnv-adu"

@@ -19,6 +20,12 @@ OBJECT_DECLARE_TYPE(PnvADU, PnvADUClass, PNV_ADU)
  struct PnvADU {
  DeviceState xd;
  
+/* LPCMC (LPC Master Controller) access engine */

+PnvLpcController *lpc;
+uint64_t lpc_base_reg;
+uint64_t lpc_cmd_reg;
+uint64_t lpc_data_reg;


I don't see reset values for these registers. Is that ok ?


  MemoryRegion xscom_regs;
  };
  
diff --git a/include/hw/ppc/pnv_lpc.h b/include/hw/ppc/pnv_lpc.h

index 5d22c45570..016e2998a8 100644
--- a/include/hw/ppc/pnv_lpc.h
+++ b/include/hw/ppc/pnv_lpc.h
@@ -94,6 +94,11 @@ struct PnvLpcClass {
  DeviceRealize parent_realize;
  };
  
+bool pnv_opb_lpc_read(PnvLpcController *lpc, uint32_t addr,

+  uint8_t *data, int sz);
+bool pnv_opb_lpc_write(PnvLpcController *lpc, uint32_t addr,
+   uint8_t *data, int sz);


May be rename to pnv_lpc_opb_read/write ?


  ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error 
**errp);
  int pnv_dt_lpc(PnvChip *chip, void *fdt, int root_offset,
 uint64_t lpcm_addr, uint64_t lpcm_size);
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 5869aac89a..eb9dbc62dd 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1642,6 +1642,8 @@ static void pnv_chip_power9_realize(DeviceState *dev, 
Error **errp)
  }
  
  /* ADU */

+object_property_set_link(OBJECT(>adu), "lpc", OBJECT(>lpc),
+ _abort);


I would add an assert on the lpc pointer in the ADU realize routine.

Thanks,

C.


  if (!qdev_realize(DEVICE(>adu), NULL, errp)) {
  return;
  }
@@ -1908,6 +1910,8 @@ static void pnv_chip_power10_realize(DeviceState *dev, 
Error **errp)
  }
  
  /* ADU */

+object_property_set_link(OBJECT(>adu), "lpc", OBJECT(>lpc),
+ _abort);
  if (!qdev_realize(DEVICE(>adu), NULL, errp)) {
  return;
  }
diff --git a/hw/ppc/pnv_adu.c b/hw/ppc/pnv_adu.c
index 5bd33a3841..d5570c23e2 100644
--- a/hw/ppc/pnv_adu.c
+++ b/hw/ppc/pnv_adu.c
@@ -21,9 +21,15 @@
  #include "hw/ppc/pnv.h"
  #include "hw/ppc/pnv_adu.h"
  #include "hw/ppc/pnv_chip.h"
+#include "hw/ppc/pnv_lpc.h"
  #include "hw/ppc/pnv_xscom.h"
  #include "trace.h"
  
+#define ADU_LPC_BASE_REG 0x40

+#define ADU_LPC_CMD_REG  0x41
+#define ADU_LPC_DATA_REG 0x42
+#define ADU_LPC_STATUS_REG   0x43
+
  static uint64_t pnv_adu_xscom_read(void *opaque, hwaddr addr, unsigned width)
  {
  PnvADU *adu = PNV_ADU(opaque);
@@ -35,6 +41,24 @@ static uint64_t pnv_adu_xscom_read(void *opaque, hwaddr 
addr, unsigned width)
  case 0x12: /* log register */
  case 0x13: /* error register */
  break;
+case ADU_LPC_BASE_REG:
+/*
+ * LPC Address Map in Pervasive ADU Workbook
+ *
+ * return PNV10_LPCM_BASE(chip) & PPC_BITMASK(8, 31);
+ * XXX: implement as class property, or get from LPC?
+ */
+qemu_log_mask(LOG_UNIMP, "ADU: LPC_BASE_REG is not implemented\n");
+break;
+case ADU_LPC_CMD_REG:
+val = adu->lpc_cmd_reg;
+break;
+case ADU_LPC_DATA_REG:
+val = adu->lpc_data_reg;
+break;
+case ADU_LPC_STATUS_REG:
+val = PPC_BIT(0); /* ack / done */
+break;
  
  default:

  qemu_log_mask(LOG_UNIMP, "ADU Unimplemented read register: Ox%08x\n",
@@ -46,6 +70,26 @@ static uint64_t pnv_adu_xscom_read(void *opaque, hwaddr 
addr, unsigned width)
  return val;
  }
  
+static bool lpc_cmd_read(PnvADU *adu)

+{
+return !!(adu->lpc_cmd_reg & PPC_BIT(0));
+}
+
+static bool lpc_cmd_write(PnvADU *adu)
+{
+return !lpc_cmd_read(adu);
+}
+
+static uint32_t lpc_cmd_addr(PnvADU *adu)
+{
+return (adu->lpc_cmd_reg & PPC_BITMASK(32, 63)) >> PPC_BIT_NR(63);
+}
+
+static uint32_t lpc_cmd_size(PnvADU *adu)
+{
+return (adu->lpc_cmd_reg & PPC_BITMASK(5, 11)) >>

Re: [PATCH 1/2] ppc/pnv: Begin a more complete ADU LPC model for POWER9/10

2024-04-17 Thread Cédric Le Goater


Hello Nick,

On 4/17/24 13:02, Nicholas Piggin wrote:

This implements a framework for an ADU unit model.

The ADU unit actually implements XSCOM, which is the bridge between MMIO
and PIB. However it also includes control and status registers and other
functions that are exposed as PIB (xscom) registers.

To keep things simple, pnv_xscom.c remains the XSCOM bridge
implementation, and pnv_adu.c implements the ADU registers and other
functions.

So far, just the ADU no-op registers in the pnv_xscom.c default handler
are moved over to the adu model.

Signed-off-by: Nicholas Piggin 
---
  include/hw/ppc/pnv_adu.h   |  34 
  include/hw/ppc/pnv_chip.h  |   3 +
  include/hw/ppc/pnv_xscom.h |   6 ++
  hw/ppc/pnv.c   |  16 ++
  hw/ppc/pnv_adu.c   | 111 +
  hw/ppc/pnv_xscom.c |   9 ---
  hw/ppc/meson.build |   1 +
  hw/ppc/trace-events|   4 ++
  8 files changed, 175 insertions(+), 9 deletions(-)
  create mode 100644 include/hw/ppc/pnv_adu.h
  create mode 100644 hw/ppc/pnv_adu.c

diff --git a/include/hw/ppc/pnv_adu.h b/include/hw/ppc/pnv_adu.h
new file mode 100644
index 00..9dc91857a9
--- /dev/null
+++ b/include/hw/ppc/pnv_adu.h
@@ -0,0 +1,34 @@
+/*
+ * QEMU PowerPC PowerNV Emulation of some ADU behaviour
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later



Did you mean GPL-2.0-or-later ?

The rest looks good.

Thanks,

C.





+ */
+
+#ifndef PPC_PNV_ADU_H
+#define PPC_PNV_ADU_H
+
+#include "hw/ppc/pnv.h"
+#include "hw/qdev-core.h"
+
+#define TYPE_PNV_ADU "pnv-adu"
+
+OBJECT_DECLARE_TYPE(PnvADU, PnvADUClass, PNV_ADU)
+
+struct PnvADU {
+DeviceState xd;
+
+MemoryRegion xscom_regs;
+};
+
+struct PnvADUClass {
+DeviceClass parent_class;
+
+int xscom_ctrl_size;
+int xscom_mbox_size;
+const MemoryRegionOps *xscom_ctrl_ops;
+const MemoryRegionOps *xscom_mbox_ops;
+};
+
+#endif /* PPC_PNV_ADU_H */
diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h
index 8589f3291e..96e50a2983 100644
--- a/include/hw/ppc/pnv_chip.h
+++ b/include/hw/ppc/pnv_chip.h
@@ -2,6 +2,7 @@
  #define PPC_PNV_CHIP_H
  
  #include "hw/pci-host/pnv_phb4.h"

+#include "hw/ppc/pnv_adu.h"
  #include "hw/ppc/pnv_chiptod.h"
  #include "hw/ppc/pnv_core.h"
  #include "hw/ppc/pnv_homer.h"
@@ -77,6 +78,7 @@ struct Pnv9Chip {
  PnvChip  parent_obj;
  
  /*< public >*/

+PnvADU   adu;
  PnvXive  xive;
  Pnv9Psi  psi;
  PnvLpcController lpc;
@@ -110,6 +112,7 @@ struct Pnv10Chip {
  PnvChip  parent_obj;
  
  /*< public >*/

+PnvADU   adu;
  PnvXive2 xive;
  Pnv9Psi  psi;
  PnvLpcController lpc;
diff --git a/include/hw/ppc/pnv_xscom.h b/include/hw/ppc/pnv_xscom.h
index 6209e18492..e93d310e79 100644
--- a/include/hw/ppc/pnv_xscom.h
+++ b/include/hw/ppc/pnv_xscom.h
@@ -82,6 +82,9 @@ struct PnvXScomInterfaceClass {
  #define PNV_XSCOM_PBCQ_SPCI_BASE  0x9013c00
  #define PNV_XSCOM_PBCQ_SPCI_SIZE  0x5
  
+#define PNV9_XSCOM_ADU_BASE   0x009

+#define PNV9_XSCOM_ADU_SIZE   0x55
+
  /*
   * Layout of the XSCOM PCB addresses (POWER 9)
   */
@@ -128,6 +131,9 @@ struct PnvXScomInterfaceClass {
  #define PNV9_XSCOM_PEC_PCI_STK1   0x140
  #define PNV9_XSCOM_PEC_PCI_STK2   0x180
  
+#define PNV10_XSCOM_ADU_BASE  0x009

+#define PNV10_XSCOM_ADU_SIZE  0x55
+
  /*
   * Layout of the XSCOM PCB addresses (POWER 10)
   */
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 6e3a5ccdec..5869aac89a 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1530,6 +1530,7 @@ static void pnv_chip_power9_instance_init(Object *obj)
  PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
  int i;
  
+object_initialize_child(obj, "adu",  >adu, TYPE_PNV_ADU);

  object_initialize_child(obj, "xive", >xive, TYPE_PNV_XIVE);
  object_property_add_alias(obj, "xive-fabric", OBJECT(>xive),
"xive-fabric");
@@ -1640,6 +1641,13 @@ static void pnv_chip_power9_realize(DeviceState *dev, 
Error **errp)
  return;
  }
  
+/* ADU */

+if (!qdev_realize(DEVICE(>adu), NULL, errp)) {
+return;
+}
+pnv_xscom_add_subregion(chip, PNV9_XSCOM_ADU_BASE,
+>adu.xscom_regs);
+
  pnv_chip_quad_realize(chip9, _err);
  if (local_err) {
  error_propagate(errp, local_err);
@@ -1806,6 +1814,7 @@ static void pnv_chip_power10_instance_init(Object *obj)
  PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
  int i;
  
+object_initialize_child(obj, "adu",  >adu, TYPE_PNV_ADU);

  object_initialize_child(obj, "xive", >xive, TYPE_PNV_XIVE2);
  object_property_add_alias(obj, "xive-fabric", OBJECT(>xive),
"xive-fabric");
@@ -1898,6 +1907,13 @@ static void pnv_chip_power10_realize(DeviceState *dev, 
Error **errp)
  return;
  }
  
+/* ADU */

+if (!qdev_realize(DEVICE(>adu),

Re: [PATCH v2 3/5] intel_iommu: Add a framework to do compatibility check with host IOMMU cap/ecap

2024-04-17 Thread Cédric Le Goater

On 4/17/24 06:21, Duan, Zhenzhong wrote:

-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 3/5] intel_iommu: Add a framework to do
compatibility check with host IOMMU cap/ecap

Hello,

On 4/16/24 09:09, Duan, Zhenzhong wrote:

Hi Cédric,

-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 3/5] intel_iommu: Add a framework to do
compatibility check with host IOMMU cap/ecap

On 4/8/24 10:44, Zhenzhong Duan wrote:

From: Yi Liu 

If check fails, the host side device(either vfio or vdpa device) should not
be passed to guest.

Implementation details for different backends will be in following

patches.

Signed-off-by: Yi Liu 
Signed-off-by: Yi Sun 
Signed-off-by: Zhenzhong Duan 
---
hw/i386/intel_iommu.c | 35

+++

1 file changed, 35 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4f84e2e801..a49b587c73 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -35,6 +35,7 @@
#include "sysemu/kvm.h"
#include "sysemu/dma.h"
#include "sysemu/sysemu.h"
+#include "sysemu/iommufd.h"
#include "hw/i386/apic_internal.h"
#include "kvm/kvm_i386.h"
#include "migration/vmstate.h"
@@ -3819,6 +3820,32 @@ VTDAddressSpace

*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,

return vtd_dev_as;
}

+static int vtd_check_legacy_hdev(IntelIOMMUState *s,
+ HostIOMMUDevice *hiod,
+ Error **errp)
+{
+return 0;
+}
+
+static int vtd_check_iommufd_hdev(IntelIOMMUState *s,
+  HostIOMMUDevice *hiod,
+  Error **errp)
+{
+return 0;
+}
+
+static int vtd_check_hdev(IntelIOMMUState *s,

VTDHostIOMMUDevice

*vtd_hdev,

+  Error **errp)
+{
+HostIOMMUDevice *hiod = vtd_hdev->dev;
+
+if (object_dynamic_cast(OBJECT(hiod), TYPE_HIOD_IOMMUFD)) {
+return vtd_check_iommufd_hdev(s, hiod, errp);
+}
+
+return vtd_check_legacy_hdev(s, hiod, errp);
+}

I think we should be using the .get_host_iommu_info() class handler
instead. Can we refactor the code slightly to avoid this check on
the type ?

There is some difficulty ini avoiding this check, the behavior of

vtd_check_legacy_hdev

and vtd_check_iommufd_hdev are different especially after nesting

support introduced.

vtd_check_iommufd_hdev() has much wider check over cap/ecap bits

besides aw_bits.

I think it is important to fully separate the vIOMMU model from the
host IOMMU backing device. Could we introduce a new
HostIOMMUDeviceClass
handler .check_hdev() handler, which would call .get_host_iommu_info() ?

Understood, besides the new .check_hdev() handler, I think we also need a new 
interface
class TYPE_IOMMU_CHECK_HDEV which has two handlers 
check_[legacy|iommufd]_hdev(),
and different vIOMMUs have different implementation.

I am not sure to understand. Which class hierarchy would implement this
new "TYPE_IOMMU_CHECK_HDEV" interface ? vIOMMU or host iommu  ?

Could you please explain with an update of your diagram :

HostIOMMUDevice
   | .get_host_iommu_info()
   |
   |
..
|  | |
  HIODLegacyVFIO[HIODLegacyVDPA]HIODIOMMUFD
| .vdev| [.vdev] | .iommufd
 | .devid
 | [.ioas_id]
 | [.attach_hwpt()]
 | [.detach_hwpt()]
 |
.--.
|  |
   HIODIOMMUFDVFIO [HIODIOMMUFDVDPA]
| .vdev| [.vdev]

Thanks,

C.

Then legacy and iommufd host device have different implementation of 
.check_hdev()
and calls into one of the two interface handlers.

Let me know if I misunderstand any of your point.

Thanks
Zhenzhong

Thanks,

C.

That the reason I have two functions to do different thing.
See:

https://github.com/yiliu1765/qemu/blob/zhenzhong/iommufd_nesting_rfc
v2/hw/i386/intel_iommu.c#L5472

Meanwhile in vtd_check_legacy_hdev(), when legacy VFIO device attaches

to modern vIOMMU,

this is unsupported and error out early, it will not

call .get_host_iommu_info().

I mean we don't need to unconditionally call .get_host_iommu_info() in

some cases.

Thanks
Zhenzhong

Re: [PATCH v2 2/4] hw/i2c: Fix checkpatch line over 80 chars warnings

2024-04-17 Thread Cédric Le Goater


Hello,

On 4/16/24 20:47, Philippe Mathieu-Daudé wrote:

We are going to modify these lines, fix their style
in order to avoid checkpatch.pl warnings:

   WARNING: line over 80 characters

Signed-off-by: Philippe Mathieu-Daudé 
---
  include/hw/i2c/i2c.h|  11 ++-
  include/hw/nvram/eeprom_at24c.h |   6 +-
  hw/arm/aspeed.c | 140 +++-
  hw/nvram/eeprom_at24c.c |   6 +-
  4 files changed, 98 insertions(+), 65 deletions(-)

diff --git a/include/hw/i2c/i2c.h b/include/hw/i2c/i2c.h
index c18a69e4b6..a1b3f4d179 100644
--- a/include/hw/i2c/i2c.h
+++ b/include/hw/i2c/i2c.h
@@ -31,7 +31,10 @@ struct I2CSlaveClass {
  /* Master to slave. Returns non-zero for a NAK, 0 for success. */
  int (*send)(I2CSlave *s, uint8_t data);
  
-/* Master to slave (asynchronous). Receiving slave must call i2c_ack(). */

+/*
+ * Master to slave (asynchronous).
+ * Receiving slave must call i2c_ack().
+ */
  void (*send_async)(I2CSlave *s, uint8_t data);
  
  /*

@@ -83,7 +86,8 @@ struct I2CPendingMaster {
  };
  
  typedef QLIST_HEAD(I2CNodeList, I2CNode) I2CNodeList;

-typedef QSIMPLEQ_HEAD(I2CPendingMasters, I2CPendingMaster) I2CPendingMasters;
+typedef QSIMPLEQ_HEAD(I2CPendingMasters, I2CPendingMaster)
+I2CPendingMasters;
  
  struct I2CBus {

  BusState qbus;
@@ -176,7 +180,8 @@ I2CSlave *i2c_slave_new(const char *name, uint8_t addr);
   * Create the device state structure, initialize it, put it on the
   * specified @bus, and drop the reference to it (the device is realized).
   */
-I2CSlave *i2c_slave_create_simple(I2CBus *bus, const char *name, uint8_t addr);
+I2CSlave *i2c_slave_create_simple(I2CBus *bus,
+  const char *name, uint8_t addr);
  
  /**

   * Realize and drop a reference an I2C slave device
diff --git a/include/hw/nvram/eeprom_at24c.h b/include/hw/nvram/eeprom_at24c.h
index acb9857b2a..9d29f0a69a 100644
--- a/include/hw/nvram/eeprom_at24c.h
+++ b/include/hw/nvram/eeprom_at24c.h
@@ -33,7 +33,9 @@ I2CSlave *at24c_eeprom_init(I2CBus *bus, uint8_t address, 
uint32_t rom_size);
   * @bus, and drop the reference to it (the device is realized). Copies the 
data
   * from @init_rom to the beginning of the EEPROM memory buffer.
   */
-I2CSlave *at24c_eeprom_init_rom(I2CBus *bus, uint8_t address, uint32_t 
rom_size,
-const uint8_t *init_rom, uint32_t 
init_rom_size);
+I2CSlave *at24c_eeprom_init_rom(I2CBus *bus,
+uint8_t address, uint32_t rom_size,
+const uint8_t *init_rom,
+uint32_t init_rom_size);
  
  #endif

diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 93ca87fda2..8279ad748a 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -649,18 +649,23 @@ static void witherspoon_bmc_i2c_init(AspeedMachineState 
*bmc)
  qdev_connect_gpio_out(dev, pca1_leds[i].gpio_id,
qdev_get_gpio_in(DEVICE(led), 0));
  }
-i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 3), "dps310", 0x76);
-i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 3), "max31785", 
0x52);
-i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 4), "tmp423", 0x4c);
-i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 5), "tmp423", 0x4c);
+i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 3),
+"dps310", 0x76);
+i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 3),
+"max31785", 0x52);
+i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 4),
+"tmp423", 0x4c);
+i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 5),
+"tmp423", 0x4c);
  
  /* The Witherspoon expects a TMP275 but a TMP105 is compatible */

-i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 9), TYPE_TMP105,
- 0x4a);
+i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 9),
+TYPE_TMP105, 0x4a);
  
  /* The witherspoon board expects Epson RX8900 I2C RTC but a ds1338 is

   * good enough */
-i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 11), "ds1338", 0x32);
+i2c_slave_create_simple(aspeed_i2c_get_bus(>i2c, 11),
+"ds1338", 0x32);


If the definitions were on a single line, they would be more
readable IMHO. So I would do the opposit change ...

An alternate solution could be to define an array of devices
at the machine class level, something like
  struct i2c_device [
  const char *type;
  uint8_t bus;
  uint8_t addr;
  } devices[] = { ... };


Thanks,

C.



  smbus_eeprom_init_one(aspeed_i2c_get_bus(>i2c, 11), 0x51,
eeprom_buf);
@@ -717,19 +722,20 @@ static void fp5280g2_bmc_i2c_init(AspeedMachineState *bmc)
  at24c_eeprom_init(aspeed_i2c_get_bus(>i2c, 1), 0x50, 32768);
  
  /* The fp5280g2 expects a

Re: [PATCH v3 15/16] test/avocado/machine_aspeed.py: Add AST2700 test case

2024-04-16 Thread Cédric Le Goater


On 4/16/24 11:19, Jamin Lin wrote:

Add a test case to test Aspeed OpenBMC SDK v09.01 on AST2700 board.

It loads u-boot-nodtb.bin, u-boot.dtb, tfa and optee-os
images to dram first which base address is 0x4.
Then, boot and launch 4 cpu cores.

```
qemu-system-aarch64 -machine ast2700-evb
 -device loader,force-raw=on,addr=0x4,file=workdir/u-boot-nodtb.bin 
\
 -device loader,force-raw=on,addr=uboot_dtb_load_addr,file=u-boot.dtb\
 -device loader,force-raw=on,addr=0x43000,file=workdir/bl31.bin\
 -device 
loader,force-raw=on,addr=0x43008,file=workdir/optee/tee-raw.bin\
 -device loader,cpu-num=0,addr=0x43000 \
 -device loader,cpu-num=1,addr=0x43000 \
 -device loader,cpu-num=2,addr=0x43000 \
 -device loader,cpu-num=3,addr=0x43000 \
 -smp 4 \
 -drive file=workdir/image-bmc,format=raw,if=mtd
```

A test image is downloaded from the ASPEED Forked OpenBMC GitHub release 
repository :
https://github.com/AspeedTech-BMC/openbmc/releases/

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  tests/avocado/machine_aspeed.py | 62 +
  1 file changed, 62 insertions(+)

diff --git a/tests/avocado/machine_aspeed.py b/tests/avocado/machine_aspeed.py
index cec0181424..3a20644fb2 100644
--- a/tests/avocado/machine_aspeed.py
+++ b/tests/avocado/machine_aspeed.py
@@ -311,6 +311,17 @@ def do_test_arm_aspeed_sdk_start(self, image):
  self, 'boot', '## Loading kernel from FIT Image')
  self.wait_for_console_pattern('Starting kernel ...')
  
+def do_test_aarch64_aspeed_sdk_start(self, image):

+self.vm.set_console()
+self.vm.add_args('-drive', 'file=' + image + ',if=mtd,format=raw')
+
+self.vm.launch()
+
+self.wait_for_console_pattern('U-Boot 2023.10')
+self.wait_for_console_pattern('## Loading kernel from FIT Image')
+self.wait_for_console_pattern('Starting kernel ...')
+self.wait_for_console_pattern("systemd[1]: Hostname set to")
+
  @skipUnless(os.getenv('QEMU_TEST_FLAKY_TESTS'), 'Test is unstable on 
GitLab')
  
  def test_arm_ast2500_evb_sdk(self):

@@ -375,3 +386,54 @@ def test_arm_ast2600_evb_sdk(self):
   'i2c i2c-5: new_device: Instantiated device ds1307 at 0x32');
  year = time.strftime("%Y")
  self.ssh_command_output_contains('/sbin/hwclock -f /dev/rtc1', year);
+
+def test_aarch64_ast2700_evb_sdk_v09_01(self):
+"""
+:avocado: tags=arch:aarch64
+:avocado: tags=machine:ast2700-evb
+"""
+
+image_url = ('https://github.com/AspeedTech-BMC/openbmc/releases/'
+ 'download/v09.01/ast2700-default-obmc.tar.gz')
+image_hash = 
'b1cc0fd73c7650d34c9c8459a243f52a91e9e27144b8608b2645ab19461d1e07'
+image_path = self.fetch_asset(image_url, asset_hash=image_hash,
+  algorithm='sha256')
+archive.extract(image_path, self.workdir)
+
+num_cpu = 4
+image_dir = self.workdir + '/ast2700-default/'
+uboot_size = os.path.getsize(image_dir + 'u-boot-nodtb.bin')
+uboot_dtb_load_addr = hex(0x4 + uboot_size)
+
+load_images_list = [
+{
+'addr': '0x4',
+'file': image_dir + 'u-boot-nodtb.bin'
+},
+{
+'addr': str(uboot_dtb_load_addr),
+'file': image_dir + 'u-boot.dtb'
+},
+{
+'addr': '0x43000',
+'file': image_dir + 'bl31.bin'
+},
+{
+'addr': '0x43008',
+'file': image_dir + 'optee/tee-raw.bin'
+}
+]
+
+for load_image in load_images_list:
+addr = load_image['addr']
+file = load_image['file']
+self.vm.add_args('-device',
+ f'loader,force-raw=on,addr={addr},file={file}')
+
+for i in range(num_cpu):
+self.vm.add_args('-device',
+ f'loader,addr=0x43000,cpu-num={i}')
+
+self.vm.add_args('-smp', str(num_cpu))
+self.do_test_aarch64_aspeed_sdk_start(image_dir + 'image-bmc')
+

Re: [PATCH v3 16/16] docs:aspeed: Add AST2700 Evaluation board

2024-04-16 Thread Cédric Le Goater


On 4/16/24 11:19, Jamin Lin wrote:

Add AST2700 Evaluation board and its boot command.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  docs/system/arm/aspeed.rst | 39 ++
  1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst
index b2dea54eed..320ff2a4cd 100644
--- a/docs/system/arm/aspeed.rst
+++ b/docs/system/arm/aspeed.rst
@@ -1,11 +1,12 @@
-Aspeed family boards (``*-bmc``, ``ast2500-evb``, ``ast2600-evb``)
-==
+Aspeed family boards (``*-bmc``, ``ast2500-evb``, ``ast2600-evb``, 
``ast2700-evb``)
+===
  
  The QEMU Aspeed machines model BMCs of various OpenPOWER systems and

  Aspeed evaluation boards. They are based on different releases of the
  Aspeed SoC : the AST2400 integrating an ARM926EJ-S CPU (400MHz), the
-AST2500 with an ARM1176JZS CPU (800MHz) and more recently the AST2600
-with dual cores ARM Cortex-A7 CPUs (1.2GHz).
+AST2500 with an ARM1176JZS CPU (800MHz), the AST2600
+with dual cores ARM Cortex-A7 CPUs (1.2GHz) and more recently the AST2700
+with quad cores ARM Cortex-A35 64 bits CPUs (1.6GHz)
  
  The SoC comes with RAM, Gigabit ethernet, USB, SD/MMC, USB, SPI, I2C,

  etc.
@@ -38,6 +39,10 @@ AST2600 SoC based machines :
  - ``qcom-dc-scm-v1-bmc``   Qualcomm DC-SCM V1 BMC
  - ``qcom-firework-bmc``Qualcomm Firework BMC
  
+AST2700 SoC based machines :

+
+- ``ast2700-evb``  Aspeed AST2700 Evaluation board (Cortex-A35)
+
  Supported devices
  -
  
@@ -66,6 +71,7 @@ Supported devices

   * eMMC Boot Controller (dummy)
   * PECI Controller (minimal)
   * I3C Controller
+ * Internal Bridge Controller (SLI dummy)
  
  
  Missing devices

@@ -95,6 +101,10 @@ or directly from the OpenBMC GitHub release repository :
  
 https://github.com/openbmc/openbmc/releases
  
+or directly from the ASPEED Forked OpenBMC GitHub release repository :

+
+   https://github.com/AspeedTech-BMC/openbmc/releases
+
  To boot a kernel directly from a Linux build tree:
  
  .. code-block:: bash

@@ -164,6 +174,27 @@ under Linux), use :
  
-M ast2500-evb,bmc-console=uart3
  
+

+Boot the AST2700 machine from the flash image, use an MTD drive :
+
+.. code-block:: bash
+
+  IMGDIR=ast2700-default
+  UBOOT_SIZE=$(stat --format=%s -L ${IMGDIR}/u-boot-nodtb.bin)
+
+  $ qemu-system-aarch64 -M ast2700-evb \
+   -device 
loader,force-raw=on,addr=0x4,file=${IMGDIR}/u-boot-nodtb.bin \
+   -device loader,force-raw=on,addr=$((0x4 + 
${UBOOT_SIZE})),file=u-boot.dtb \
+   -device loader,force-raw=on,addr=0x43000,file=${IMGDIR}/bl31.bin \
+   -device 
loader,force-raw=on,addr=0x43008,file=${IMGDIR}/optee/tee-raw.bin \
+   -device loader,cpu-num=0,addr=0x43000 \
+   -device loader,cpu-num=1,addr=0x43000 \
+   -device loader,cpu-num=2,addr=0x43000 \
+   -device loader,cpu-num=3,addr=0x43000 \
+   -smp 4 \
+   -drive file=${IMGDIR}/image-bmc,format=raw,if=mtd \
+   -nographic
+
  Aspeed minibmc family boards (``ast1030-evb``)
  ==

Re: [PATCH v3 06/16] aspeed/smc: correct device description

2024-04-16 Thread Cédric Le Goater


On 4/16/24 11:18, Jamin Lin wrote:

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/ssi/aspeed_smc.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c
index 6e1a84c197..8a8d77b480 100644
--- a/hw/ssi/aspeed_smc.c
+++ b/hw/ssi/aspeed_smc.c
@@ -1448,7 +1448,7 @@ static void aspeed_2500_fmc_class_init(ObjectClass 
*klass, void *data)
  DeviceClass *dc = DEVICE_CLASS(klass);
  AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
  
-dc->desc   = "Aspeed 2600 FMC Controller";

+dc->desc   = "Aspeed 2500 FMC Controller";
  asc->r_conf= R_CONF;
  asc->r_ce_ctrl = R_CE_CTRL;
  asc->r_ctrl0   = R_CTRL0;
@@ -1486,7 +1486,7 @@ static void aspeed_2500_spi1_class_init(ObjectClass 
*klass, void *data)
  DeviceClass *dc = DEVICE_CLASS(klass);
  AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
  
-dc->desc   = "Aspeed 2600 SPI1 Controller";

+dc->desc   = "Aspeed 2500 SPI1 Controller";
  asc->r_conf= R_CONF;
  asc->r_ce_ctrl = R_CE_CTRL;
  asc->r_ctrl0   = R_CTRL0;
@@ -1521,7 +1521,7 @@ static void aspeed_2500_spi2_class_init(ObjectClass 
*klass, void *data)
  DeviceClass *dc = DEVICE_CLASS(klass);
  AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
  
-dc->desc   = "Aspeed 2600 SPI2 Controller";

+dc->desc   = "Aspeed 2500 SPI2 Controller";
  asc->r_conf= R_CONF;
  asc->r_ce_ctrl = R_CE_CTRL;
  asc->r_ctrl0   = R_CTRL0;

Re: [PATCH v3 05/16] aspeed/sdmc: Add AST2700 support

2024-04-16 Thread Cédric Le Goater


On 4/16/24 11:18, Jamin Lin wrote:

The SDRAM memory controller(DRAMC) controls the access to external
DDR4 and DDR5 SDRAM and power up to DDR4 and DDR5 PHY.

The DRAM memory controller of AST2700 is not backward compatible
to previous chips such AST2600, AST2500 and AST2400.

Max memory is now 8GiB on the AST2700. Introduce new
aspeed_2700_sdmc and class with read/write operation and
reset handlers.

Define DRAMC necessary protected registers and
unprotected registers for AST2700 and increase
the register set to 0x1000.

Add unlocked property to change controller protected status.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/misc/aspeed_sdmc.c | 190 +-
  include/hw/misc/aspeed_sdmc.h |   5 +-
  2 files changed, 193 insertions(+), 2 deletions(-)

diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c
index 873d67c592..69a34903db 100644
--- a/hw/misc/aspeed_sdmc.c
+++ b/hw/misc/aspeed_sdmc.c
@@ -27,6 +27,7 @@
  #define   PROT_SOFTLOCKED0x00
  
  #define   PROT_KEY_UNLOCK 0xFC600309

+#define   PROT_2700_KEY_UNLOCK  0x1688A8A8
  #define   PROT_KEY_HARDLOCK   0xDEADDEAD /* AST2600 */
  
  /* Configuration Register */

@@ -54,6 +55,46 @@
  #define R_DRAM_TIME   (0x8c / 4)
  #define R_ECC_ERR_INJECT  (0xb4 / 4)
  
+/* AST2700 Register */

+#define R_2700_PROT (0x00 / 4)
+#define R_INT_STATUS(0x04 / 4)
+#define R_INT_CLEAR (0x08 / 4)
+#define R_INT_MASK  (0x0c / 4)
+#define R_MAIN_CONF (0x10 / 4)
+#define R_MAIN_CONTROL  (0x14 / 4)
+#define R_MAIN_STATUS   (0x18 / 4)
+#define R_ERR_STATUS(0x1c / 4)
+#define R_ECC_FAIL_STATUS   (0x78 / 4)
+#define R_ECC_FAIL_ADDR (0x7c / 4)
+#define R_ECC_TESTING_CONTROL   (0x80 / 4)
+#define R_PROT_REGION_LOCK_STATUS   (0x94 / 4)
+#define R_TEST_FAIL_ADDR(0xd4 / 4)
+#define R_TEST_FAIL_D0  (0xd8 / 4)
+#define R_TEST_FAIL_D1  (0xdc / 4)
+#define R_TEST_FAIL_D2  (0xe0 / 4)
+#define R_TEST_FAIL_D3  (0xe4 / 4)
+#define R_DBG_STATUS(0xf4 / 4)
+#define R_PHY_INTERFACE_STATUS  (0xf8 / 4)
+#define R_GRAPHIC_MEM_BASE_ADDR (0x10c / 4)
+#define R_PORT0_INTERFACE_MONITOR0  (0x240 / 4)
+#define R_PORT0_INTERFACE_MONITOR1  (0x244 / 4)
+#define R_PORT0_INTERFACE_MONITOR2  (0x248 / 4)
+#define R_PORT1_INTERFACE_MONITOR0  (0x2c0 / 4)
+#define R_PORT1_INTERFACE_MONITOR1  (0x2c4 / 4)
+#define R_PORT1_INTERFACE_MONITOR2  (0x2c8 / 4)
+#define R_PORT2_INTERFACE_MONITOR0  (0x340 / 4)
+#define R_PORT2_INTERFACE_MONITOR1  (0x344 / 4)
+#define R_PORT2_INTERFACE_MONITOR2  (0x348 / 4)
+#define R_PORT3_INTERFACE_MONITOR0  (0x3c0 / 4)
+#define R_PORT3_INTERFACE_MONITOR1  (0x3c4 / 4)
+#define R_PORT3_INTERFACE_MONITOR2  (0x3c8 / 4)
+#define R_PORT4_INTERFACE_MONITOR0  (0x440 / 4)
+#define R_PORT4_INTERFACE_MONITOR1  (0x444 / 4)
+#define R_PORT4_INTERFACE_MONITOR2  (0x448 / 4)
+#define R_PORT5_INTERFACE_MONITOR0  (0x4c0 / 4)
+#define R_PORT5_INTERFACE_MONITOR1  (0x4c4 / 4)
+#define R_PORT5_INTERFACE_MONITOR2  (0x4c8 / 4)
+
  /*
   * Configuration register Ox4 (for Aspeed AST2400 SOC)
   *
@@ -101,6 +142,19 @@
   ASPEED_SDMC_AST2500_RESERVED | ASPEED_SDMC_VGA_COMPAT |\
   ASPEED_SDMC_VGA_APERTURE(ASPEED_SDMC_VGA_64MB))
  
+/*

+ * Main Configuration register Ox10 (for Aspeed AST2700 SOC and higher)
+ *
+ */
+#define ASPEED_SDMC_AST2700_RESERVED0x2082 /* 31:16, 13, 7, 1 */
+#define ASPEED_SDMC_AST2700_DATA_SCRAMBLE   (1 << 8)
+#define ASPEED_SDMC_AST2700_ECC_ENABLE  (1 << 6)
+#define ASPEED_SDMC_AST2700_PAGE_MATCHING_ENABLE(1 << 5)
+#define ASPEED_SDMC_AST2700_DRAM_SIZE(x)((x & 0x7) << 2)
+
+#define ASPEED_SDMC_AST2700_READONLY_MASK   \
+ (ASPEED_SDMC_AST2700_RESERVED)
+
  static uint64_t aspeed_sdmc_read(void *opaque, hwaddr addr, unsigned size)
  {
  AspeedSDMCState *s = ASPEED_SDMC(opaque);
@@ -216,7 +270,7 @@ static void aspeed_sdmc_realize(DeviceState *dev, Error 
**errp)
  AspeedSDMCState *s = ASPEED_SDMC(dev);
  AspeedSDMCClass *asc = ASPEED_SDMC_GET_CLASS(s);
  
-assert(asc->max_ram_size < 4 * GiB); /* 32-bit address bus */

+assert(asc->max_ram_size < 4 * GiB || asc->is_bus64bit);
  s->max_ram_size = asc->max_ram_size;
  
  memory_region_init_io(>iomem, OBJECT(s), _sdmc_ops, s,

@@ -236,6 +290,7 @@ static const VMStateDescription vmstate_aspeed_sdmc = {
  
  static Property aspeed_sdmc_properties[] = {

  DEFINE_PROP_UINT64("max-ram-size", AspeedSDMCState, max_ram_size, 0),
+DEFINE_PROP_BOOL("unlocked", AspeedSDMCState, unlocked, false),
  DEFINE_PROP_END_OF_LIST(),
  };
  
@@ -500,12 +555,145 @@ static const TypeInfo aspeed_2600_sdmc_info = {

  .class_init = aspeed_2600_

Re: [PATCH v3 02/16] aspeed/sli: Add AST2700 support

2024-04-16 Thread Cédric Le Goater

 .class_size= sizeof(AspeedSLIClass),
+.abstract  = true,
+};
+
+static void aspeed_2700_sli_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+
+dc->desc = "AST2700 SLI Controller";
+}
+
+static void aspeed_2700_sliio_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+
+dc->desc = "AST2700 I/O SLI Controller";
+dc->realize = aspeed_sliio_realize;
+}
+
+static const TypeInfo aspeed_2700_sli_info = {
+.name   = TYPE_ASPEED_2700_SLI,
+.parent = TYPE_ASPEED_SLI,
+.class_init = aspeed_2700_sli_class_init,
+};
+
+static const TypeInfo aspeed_2700_sliio_info = {
+.name   = TYPE_ASPEED_2700_SLIIO,
+.parent = TYPE_ASPEED_SLI,
+.class_init = aspeed_2700_sliio_class_init,
+};
+
+static void aspeed_sli_register_types(void)
+{
+type_register_static(_sli_info);
+type_register_static(_2700_sli_info);
+type_register_static(_2700_sliio_info);
+}
+
+type_init(aspeed_sli_register_types);
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index 86596a3888..2ca8717be2 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -136,7 +136,8 @@ system_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files(
'aspeed_sbc.c',
'aspeed_sdmc.c',
'aspeed_xdma.c',
-  'aspeed_peci.c'))
+  'aspeed_peci.c',
+  'aspeed_sli.c'))
  
  system_ss.add(when: 'CONFIG_MSF2', if_true: files('msf2-sysreg.c'))

  system_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c'))
diff --git a/hw/misc/trace-events b/hw/misc/trace-events
index 5d241cb40a..e13b648221 100644
--- a/hw/misc/trace-events
+++ b/hw/misc/trace-events
@@ -351,3 +351,10 @@ djmemc_write(int reg, uint64_t value, unsigned int size) 
"reg=0x%x value=0x%"PRI
  # iosb.c
  iosb_read(int reg, uint64_t value, unsigned int size) "reg=0x%x value=0x%"PRIx64" 
size=%u"
  iosb_write(int reg, uint64_t value, unsigned int size) "reg=0x%x value=0x%"PRIx64" 
size=%u"
+
+# aspeed_sli.c
+aspeed_sli_write(uint64_t offset, unsigned int size, uint32_t data) "To 0x%" PRIx64 
" of size %u: 0x%" PRIx32
+aspeed_sli_read(uint64_t offset, unsigned int size, uint32_t data) "To 0x%" PRIx64 
" of size %u: 0x%" PRIx32
+aspeed_sliio_write(uint64_t offset, unsigned int size, uint32_t data) "To 0x%" PRIx64 
" of size %u: 0x%" PRIx32
+aspeed_sliio_read(uint64_t offset, unsigned int size, uint32_t data) "To 0x%" PRIx64 
" of size %u: 0x%" PRIx32
+
diff --git a/include/hw/misc/aspeed_sli.h b/include/hw/misc/aspeed_sli.h
new file mode 100644
index 00..2329002b84
--- /dev/null
+++ b/include/hw/misc/aspeed_sli.h
@@ -0,0 +1,31 @@
+/*
+ * ASPEED SLI Controller
+ *
+ * Copyright (C) 2024 ASPEED Technology Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef ASPEED_SLI_H
+#define ASPEED_SLI_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_ASPEED_SLI "aspeed.sli"
+#define TYPE_ASPEED_2700_SLI TYPE_ASPEED_SLI "-ast2700"
+#define TYPE_ASPEED_2700_SLIIO TYPE_ASPEED_SLI "io" "-ast2700"
+OBJECT_DECLARE_TYPE(AspeedSLIState, AspeedSLIClass, ASPEED_SLI)
+
+#define ASPEED_SLI_NR_REGS  (0x500 >> 2)
+
+struct AspeedSLIState {
+SysBusDevice parent;
+MemoryRegion iomem;
+
+uint32_t regs[ASPEED_SLI_NR_REGS];
+};
+
+struct AspeedSLIClass {
+SysBusDeviceClass parent_class;
+};


May be use OBJECT_DECLARE_SIMPLE_TYPE() to avoid the empty class.

Anyhow,


Reviewed-by: Cédric Le Goater 

Thanks,

C.

Re: [PATCH v3 04/16] aspeed/sdmc: fix coding style

2024-04-16 Thread Cédric Le Goater


On 4/16/24 11:18, Jamin Lin wrote:

Fix coding style issues from checkpatch.pl

Test command:
scripts/checkpatch.pl --no-tree -f hw/misc/aspeed_sdmc.c

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/misc/aspeed_sdmc.c | 11 +++
  1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c
index 74279bbe8e..873d67c592 100644
--- a/hw/misc/aspeed_sdmc.c
+++ b/hw/misc/aspeed_sdmc.c
@@ -296,7 +296,8 @@ static void aspeed_2400_sdmc_write(AspeedSDMCState *s, 
uint32_t reg,
 uint32_t data)
  {
  if (reg == R_PROT) {
-s->regs[reg] = (data == PROT_KEY_UNLOCK) ? PROT_UNLOCKED : 
PROT_SOFTLOCKED;
+s->regs[reg] =
+(data == PROT_KEY_UNLOCK) ? PROT_UNLOCKED : PROT_SOFTLOCKED;
  return;
  }
  
@@ -354,7 +355,8 @@ static void aspeed_2500_sdmc_write(AspeedSDMCState *s, uint32_t reg,

 uint32_t data)
  {
  if (reg == R_PROT) {
-s->regs[reg] = (data == PROT_KEY_UNLOCK) ? PROT_UNLOCKED : 
PROT_SOFTLOCKED;
+s->regs[reg] =
+(data == PROT_KEY_UNLOCK) ? PROT_UNLOCKED : PROT_SOFTLOCKED;
  return;
  }
  
@@ -434,8 +436,9 @@ static void aspeed_2600_sdmc_write(AspeedSDMCState *s, uint32_t reg,

  }
  
  if (s->regs[R_PROT] == PROT_HARDLOCKED) {

-qemu_log_mask(LOG_GUEST_ERROR, "%s: SDMC is locked until system 
reset!\n",
-__func__);
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: SDMC is locked until system reset!\n",
+  __func__);
  return;
  }

Re: [PATCH v3 03/16] aspeed/sdmc: remove redundant macros

2024-04-16 Thread Cédric Le Goater


On 4/16/24 11:18, Jamin Lin wrote:

These macros are no longer used for ASPEED SOCs, so removes them.

Signed-off-by: Troy Lee 
Signed-off-by: Jamin Lin 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/misc/aspeed_sdmc.c | 15 ---
  1 file changed, 15 deletions(-)

diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c
index 64cd1a81dc..74279bbe8e 100644
--- a/hw/misc/aspeed_sdmc.c
+++ b/hw/misc/aspeed_sdmc.c
@@ -76,10 +76,6 @@
  #define ASPEED_SDMC_VGA_32MB0x2
  #define ASPEED_SDMC_VGA_64MB0x3
  #define ASPEED_SDMC_DRAM_SIZE(x)(x & 0x3)
-#define ASPEED_SDMC_DRAM_64MB   0x0
-#define ASPEED_SDMC_DRAM_128MB  0x1
-#define ASPEED_SDMC_DRAM_256MB  0x2
-#define ASPEED_SDMC_DRAM_512MB  0x3
  
  #define ASPEED_SDMC_READONLY_MASK   \

  (ASPEED_SDMC_RESERVED | ASPEED_SDMC_VGA_COMPAT |\
@@ -100,17 +96,6 @@
  #define ASPEED_SDMC_CACHE_ENABLE(1 << 10) /* differs from AST2400 */
  #define ASPEED_SDMC_DRAM_TYPE   (1 << 4)  /* differs from AST2400 */
  
-/* DRAM size definitions differs */

-#define ASPEED_SDMC_AST2500_128MB   0x0
-#define ASPEED_SDMC_AST2500_256MB   0x1
-#define ASPEED_SDMC_AST2500_512MB   0x2
-#define ASPEED_SDMC_AST2500_1024MB  0x3
-
-#define ASPEED_SDMC_AST2600_256MB   0x0
-#define ASPEED_SDMC_AST2600_512MB   0x1
-#define ASPEED_SDMC_AST2600_1024MB  0x2
-#define ASPEED_SDMC_AST2600_2048MB  0x3
-
  #define ASPEED_SDMC_AST2500_READONLY_MASK   \
  (ASPEED_SDMC_HW_VERSION(0xf) | ASPEED_SDMC_CACHE_INITIAL_DONE | \
   ASPEED_SDMC_AST2500_RESERVED | ASPEED_SDMC_VGA_COMPAT |\

Re: [PATCH v2 3/5] intel_iommu: Add a framework to do compatibility check with host IOMMU cap/ecap

2024-04-16 Thread Cédric Le Goater


Hello,

On 4/16/24 09:09, Duan, Zhenzhong wrote:

Hi Cédric,


-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 3/5] intel_iommu: Add a framework to do
compatibility check with host IOMMU cap/ecap

On 4/8/24 10:44, Zhenzhong Duan wrote:

From: Yi Liu 

If check fails, the host side device(either vfio or vdpa device) should not
be passed to guest.

Implementation details for different backends will be in following patches.

Signed-off-by: Yi Liu 
Signed-off-by: Yi Sun 
Signed-off-by: Zhenzhong Duan 
---
   hw/i386/intel_iommu.c | 35

+++

   1 file changed, 35 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4f84e2e801..a49b587c73 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -35,6 +35,7 @@
   #include "sysemu/kvm.h"
   #include "sysemu/dma.h"
   #include "sysemu/sysemu.h"
+#include "sysemu/iommufd.h"
   #include "hw/i386/apic_internal.h"
   #include "kvm/kvm_i386.h"
   #include "migration/vmstate.h"
@@ -3819,6 +3820,32 @@ VTDAddressSpace

*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,

   return vtd_dev_as;
   }

+static int vtd_check_legacy_hdev(IntelIOMMUState *s,
+ HostIOMMUDevice *hiod,
+ Error **errp)
+{
+return 0;
+}
+
+static int vtd_check_iommufd_hdev(IntelIOMMUState *s,
+  HostIOMMUDevice *hiod,
+  Error **errp)
+{
+return 0;
+}
+
+static int vtd_check_hdev(IntelIOMMUState *s, VTDHostIOMMUDevice

*vtd_hdev,

+  Error **errp)
+{
+HostIOMMUDevice *hiod = vtd_hdev->dev;
+
+if (object_dynamic_cast(OBJECT(hiod), TYPE_HIOD_IOMMUFD)) {
+return vtd_check_iommufd_hdev(s, hiod, errp);
+}
+
+return vtd_check_legacy_hdev(s, hiod, errp);
+}



I think we should be using the .get_host_iommu_info() class handler
instead. Can we refactor the code slightly to avoid this check on
the type ?


There is some difficulty ini avoiding this check, the behavior of 
vtd_check_legacy_hdev
and vtd_check_iommufd_hdev are different especially after nesting support 
introduced.
vtd_check_iommufd_hdev() has much wider check over cap/ecap bits besides 
aw_bits.


I think it is important to fully separate the vIOMMU model from the
host IOMMU backing device. Could we introduce a new HostIOMMUDeviceClass
handler .check_hdev() handler, which would call .get_host_iommu_info() ?


Thanks,

C.



That the reason I have two functions to do different thing.
See:
https://github.com/yiliu1765/qemu/blob/zhenzhong/iommufd_nesting_rfcv2/hw/i386/intel_iommu.c#L5472

Meanwhile in vtd_check_legacy_hdev(), when legacy VFIO device attaches to 
modern vIOMMU,
this is unsupported and error out early, it will not call 
.get_host_iommu_info().
I mean we don't need to unconditionally call .get_host_iommu_info() in some 
cases.

Thanks
Zhenzhong

Re: [PATCH v2 02/10] vfio: Introduce HIODLegacyVFIO device

2024-04-16 Thread Cédric Le Goater


Hello,

On 4/16/24 05:41, Duan, Zhenzhong wrote:

Hi Cédric,


-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v2 02/10] vfio: Introduce HIODLegacyVFIO device

On 4/8/24 10:12, Zhenzhong Duan wrote:

HIODLegacyVFIO represents a host IOMMU device under VFIO legacy
container backend.

It includes a link to VFIODevice.

Suggested-by: Eric Auger 
Suggested-by: Cédric Le Goater 
Signed-off-by: Zhenzhong Duan 
---
   include/hw/vfio/vfio-common.h | 11 +++
   hw/vfio/container.c   | 11 ++-
   2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-

common.h

index b9da6c08ef..f30772f534 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -31,6 +31,7 @@
   #endif
   #include "sysemu/sysemu.h"
   #include "hw/vfio/vfio-container-base.h"
+#include "sysemu/host_iommu_device.h"

   #define VFIO_MSG_PREFIX "vfio %s: "

@@ -147,6 +148,16 @@ typedef struct VFIOGroup {
   bool ram_block_discard_allowed;
   } VFIOGroup;

+#define TYPE_HIOD_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-

vfio"

I would prefer to keep the prefix TYPE_HOST_IOMMU_DEVICE.


Will do.




+OBJECT_DECLARE_SIMPLE_TYPE(HIODLegacyVFIO, HIOD_LEGACY_VFIO)
+
+/* Abstraction of VFIO legacy host IOMMU device */
+struct HIODLegacyVFIO {


same here


Should I do the same for all the HostIOMMUDevice and HostIOMMUDeviceClass 
sub-structures?


I would for type names. The main reason is for naming consistency, which is
useful for grep and code analysis.



The reason I used 'HIOD' abbreviation is some function names become extremely 
long
and exceed 80 characters. E.g.:

@@ -1148,9 +1148,9 @@ static void vfio_iommu_legacy_class_init(ObjectClass 
*klass, void *data)
  vioc->pci_hot_reset = vfio_legacy_pci_hot_reset;
  };

-static int hiod_legacy_vfio_get_host_iommu_info(HostIOMMUDevice *hiod,
-void *data, uint32_t len,
-Error **errp)
+static int host_iommu_device_legacy_vfio_get_host_iommu_info(HostIOMMUDevice 
*hiod,
+ void *data, 
uint32_t len,
+ Error **errp)
  {
  VFIODevice *vbasedev = HIOD_LEGACY_VFIO(hiod)->vdev;
  /* iova_ranges is a sorted list */
@@ -1173,7 +1173,7 @@ static void hiod_legacy_vfio_class_init(ObjectClass *oc, 
void *data)
  {
  HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);

-hioc->get_host_iommu_info = hiod_legacy_vfio_get_host_iommu_info;
+hioc->get_host_iommu_info = 
host_iommu_device_legacy_vfio_get_host_iommu_info;
  };

I didn't find other way to make it meet the 80 chars limitation. Any 
suggestions on this?


Try :

@@ -1177,7 +1177,8 @@ static void hiod_legacy_vfio_class_init(
 {
 HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
 
-hioc->get_host_iommu_info = hiod_legacy_vfio_get_host_iommu_info;

+hioc->get_host_iommu_info =
+host_iommu_device_legacy_vfio_get_host_iommu_info;
 };
 
 static const TypeInfo types[] = {


That said, I agree that 'host_iommu_device_legacy_vfio' routine prefix
could be shortened to 'hiod_legacy_vfio'.


Thanks,

C.








+/*< private >*/
+HostIOMMUDevice parent;
+VFIODevice *vdev;


It seems to me that the back pointer should be on the container instead.
Looks more correct conceptually.


Yes, that makes sense for legacy VFIO, as iova_ranges, pgsizes etc are all 
saved in bcontainer.





+};
+
   typedef struct VFIODMABuf {
   QemuDmaBuf buf;
   uint32_t pos_x, pos_y, pos_updates;
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 77bdec276e..44018ef085 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1143,12 +1143,21 @@ static void

vfio_iommu_legacy_class_init(ObjectClass *klass, void *data)

   vioc->pci_hot_reset = vfio_legacy_pci_hot_reset;
   };

+static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data)
+{
+};


Is it preferable to introduce routines when they are actually useful.
Please drop the .class_init definition.


Sure.

Thanks
Zhenzhong



Thanks,

C.



+
   static const TypeInfo types[] = {
   {
   .name = TYPE_VFIO_IOMMU_LEGACY,
   .parent = TYPE_VFIO_IOMMU,
   .class_init = vfio_iommu_legacy_class_init,
-},
+}, {
+.name = TYPE_HIOD_LEGACY_VFIO,
+.parent = TYPE_HOST_IOMMU_DEVICE,
+.instance_size = sizeof(HIODLegacyVFIO),
+.class_init = hiod_legacy_vfio_class_init,
+}
   };

   DEFINE_TYPES(types)

Re: [PATCH v2 3/6] hw/ppc: SPI controller model - sequencer and shifter

2024-04-16 Thread Cédric Le Goater


Hello,

Please rephrase the subject to something like:

  "ppc/pnv: Extend SPI model ..."

Using a verb is preferable.

On 4/9/24 19:56, Chalapathi V wrote:

In this commit SPI shift engine and sequencer logic is implemented.
Shift engine performs serialization and de-serialization according to the
control by the sequencer and according to the setup defined in the
configuration registers. Sequencer implements the main control logic and
FSM to handle data transmit and data receive control of the shift engine.

Signed-off-by: Chalapathi V 
---
  include/hw/ppc/pnv_spi_controller.h |   72 ++
  hw/ppc/pnv_spi_controller.c | 1311 ++-
  2 files changed, 1382 insertions(+), 1 deletion(-)

diff --git a/include/hw/ppc/pnv_spi_controller.h 
b/include/hw/ppc/pnv_spi_controller.h
index 5ec50fb14c..ee8e7a17da 100644
--- a/include/hw/ppc/pnv_spi_controller.h
+++ b/include/hw/ppc/pnv_spi_controller.h
@@ -8,6 +8,14 @@
   * This model Supports a connection to a single SPI responder.
   * Introduced for P10 to provide access to SPI seeproms, TPM, flash device
   * and an ADC controller.
+ *
+ * All SPI function control is mapped into the SPI register space to enable
+ * full control by firmware.
+ *
+ * SPI Controller has sequencer and shift engine. The SPI shift engine
+ * performs serialization and de-serialization according to the control by
+ * the sequencer and according to the setup defined in the configuration
+ * registers and the SPI sequencer implements the main control logic.
   */
  #include "hw/ssi/ssi.h"
  
@@ -21,6 +29,7 @@

  #define SPI_CONTROLLER_REG_SIZE 8
  
  typedef struct SSIBus SSIBus;

+typedef struct xfer_buffer xfer_buffer;


Please use CamelCase names for typedef. The forward declaration doesn't
seem useful.


  #define TYPE_PNV_SPI_BUS "pnv-spi-bus"
  OBJECT_DECLARE_SIMPLE_TYPE(PnvSPIBus, PNV_SPI_BUS)
@@ -33,6 +42,21 @@ typedef struct PnvSPIBus {
  uint32_t id;
  } PnvSPIBus;
  
+/* xfer_buffer */

+typedef struct xfer_buffer {
+
+uint32_tlen;
+uint8_t*data;
+
+} xfer_buffer;
+
+uint8_t *xfer_buffer_write_ptr(xfer_buffer *payload, uint32_t offset,
+uint32_t length);
+void xfer_buffer_read_ptr(xfer_buffer *payload, uint8_t **read_buf,
+uint32_t offset, uint32_t length);
+xfer_buffer *xfer_buffer_new(void);
+void xfer_buffer_free(xfer_buffer *payload);
+


I don't think these helper routines need to be defined in the header file
of the PnvPsi model. They look internal to me.


  typedef struct PnvSpiController {
  DeviceState parent;
  
@@ -40,6 +64,39 @@ typedef struct PnvSpiController {

  MemoryRegionxscom_spic_regs;
  /* SPI controller object number */
  uint32_tspic_num;
+uint8_t responder_select;
+/* To verify if shift_n1 happens prior to shift_n2 */
+boolshift_n1_done;
+/*
+ * Internal flags for the first and last indicators for the SPI
+ * interface methods
+ */
+uint8_t first;
+uint8_t last;
+/* Loop counter for branch operation opcode Ex/Fx */
+uint8_t loop_counter_1;
+uint8_t loop_counter_2;
+/* N1/N2_bits specifies the size of the N1/N2 segment of a frame in bits.*/
+uint8_t N1_bits;
+uint8_t N2_bits;
+/* Number of bytes in a payload for the N1/N2 frame segment.*/
+uint8_t N1_bytes;
+uint8_t N2_bytes;
+/* Number of N1/N2 bytes marked for transmit */
+uint8_t N1_tx;
+uint8_t N2_tx;
+/* Number of N1/N2 bytes marked for receive */
+uint8_t N1_rx;
+uint8_t N2_rx;
+/*
+ * Setting this attribute to true will cause the engine to reverse the
+ * bit order of each byte it appends to a payload before sending the
+ * payload to a device. There may be cases where an end device expects
+ * a reversed order, like in the case of the Nuvoton TPM device. The
+ * order of bytes in the payload is not reversed, only the order of the
+ * 8 bits in each payload byte.
+ */
+boolreverse_bits;
  
  /* SPI Controller registers */

  uint64_terror_reg;
@@ -52,4 +109,19 @@ typedef struct PnvSpiController {
  uint8_t sequencer_operation_reg[SPI_CONTROLLER_REG_SIZE];
  uint64_tstatus_reg;
  } PnvSpiController;
+
+void log_all_N_counts(PnvSpiController *spi_controller);
+void spi_response(PnvSpiController *spi_controller, int bits,
+xfer_buffer *rsp_payload);
+void operation_sequencer(PnvSpiController *spi_controller);
+bool operation_shiftn1(PnvSpiController *spi_controller, uint8_t opcode,
+   xfer_buffer **payload, bool send_n1_alone);
+bool operation_shiftn2(PnvSpiController *spi_controller, uint8_t opcode,
+   xfer_buffer **payload);
+bool does_rdr_match(PnvSpiController *spi_controller);
+uint8_t get_from_offset(PnvSpiController *spi_controller,

Re: [PATCH v2 3/5] intel_iommu: Add a framework to do compatibility check with host IOMMU cap/ecap

2024-04-15 Thread Cédric Le Goater


On 4/8/24 10:44, Zhenzhong Duan wrote:

From: Yi Liu 

If check fails, the host side device(either vfio or vdpa device) should not
be passed to guest.

Implementation details for different backends will be in following patches.

Signed-off-by: Yi Liu 
Signed-off-by: Yi Sun 
Signed-off-by: Zhenzhong Duan 
---
  hw/i386/intel_iommu.c | 35 +++
  1 file changed, 35 insertions(+)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4f84e2e801..a49b587c73 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -35,6 +35,7 @@
  #include "sysemu/kvm.h"
  #include "sysemu/dma.h"
  #include "sysemu/sysemu.h"
+#include "sysemu/iommufd.h"
  #include "hw/i386/apic_internal.h"
  #include "kvm/kvm_i386.h"
  #include "migration/vmstate.h"
@@ -3819,6 +3820,32 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, 
PCIBus *bus,
  return vtd_dev_as;
  }
  
+static int vtd_check_legacy_hdev(IntelIOMMUState *s,

+ HostIOMMUDevice *hiod,
+ Error **errp)
+{
+return 0;
+}
+
+static int vtd_check_iommufd_hdev(IntelIOMMUState *s,
+  HostIOMMUDevice *hiod,
+  Error **errp)
+{
+return 0;
+}
+
+static int vtd_check_hdev(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hdev,
+  Error **errp)
+{
+HostIOMMUDevice *hiod = vtd_hdev->dev;
+
+if (object_dynamic_cast(OBJECT(hiod), TYPE_HIOD_IOMMUFD)) {
+return vtd_check_iommufd_hdev(s, hiod, errp);
+}
+
+return vtd_check_legacy_hdev(s, hiod, errp);
+}



I think we should be using the .get_host_iommu_info() class handler
instead. Can we refactor the code slightly to avoid this check on
the type ?


Thanks,

C.





+
  static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
  HostIOMMUDevice *hiod, Error **errp)
  {
@@ -3829,6 +3856,7 @@ static int vtd_dev_set_iommu_device(PCIBus *bus, void 
*opaque, int devfn,
  .devfn = devfn,
  };
  struct vtd_as_key *new_key;
+int ret;
  
  assert(hiod);
  
@@ -3848,6 +3876,13 @@ static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,

  vtd_hdev->iommu_state = s;
  vtd_hdev->dev = hiod;
  
+ret = vtd_check_hdev(s, vtd_hdev, errp);

+if (ret) {
+g_free(vtd_hdev);
+vtd_iommu_unlock(s);
+return ret;
+}
+
  new_key = g_malloc(sizeof(*new_key));
  new_key->bus = bus;
  new_key->devfn = devfn;

Re: [PATCH v2 2/6] hw/ppc: SPI controller model - registers implementation

2024-04-15 Thread Cédric Le Goater


Hello Chalapathi

The subject could be rephrased to : "ppc/pnv: Add SPI controller model".

On 4/9/24 19:56, Chalapathi V wrote:

SPI controller device model supports a connection to a single SPI responder.
This provide access to SPI seeproms, TPM, flash device and an ADC controller.

All SPI function control is mapped into the SPI register space to enable full
control by firmware. In this commit SPI configuration component is modelled
which contains all SPI configuration and status registers as well as the hold
registers for data to be sent or having been received.

An existing QEMU SSI framework is used and SSI_BUS is created.

Signed-off-by: Chalapathi V 
---
  include/hw/ppc/pnv_spi_controller.h  |  55 +
  include/hw/ppc/pnv_spi_controller_regs.h | 114 ++


These two files should be under hw/ssi/ and include/hw/ssi/. Please
remove '_controller'.


  include/hw/ppc/pnv_xscom.h   |   3 +
  hw/ppc/pnv_spi_controller.c  | 278 +++
  hw/ppc/Kconfig   |   1 +
  hw/ppc/meson.build   |   1 +
  6 files changed, 452 insertions(+)
  create mode 100644 include/hw/ppc/pnv_spi_controller.h
  create mode 100644 include/hw/ppc/pnv_spi_controller_regs.h
  create mode 100644 hw/ppc/pnv_spi_controller.c

diff --git a/include/hw/ppc/pnv_spi_controller.h 
b/include/hw/ppc/pnv_spi_controller.h
new file mode 100644
index 00..5ec50fb14c
--- /dev/null
+++ b/include/hw/ppc/pnv_spi_controller.h
@@ -0,0 +1,55 @@
+/*
+ * QEMU PowerPC SPI Controller model
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This model Supports a connection to a single SPI responder.
+ * Introduced for P10 to provide access to SPI seeproms, TPM, flash device
+ * and an ADC controller.
+ */
+#include "hw/ssi/ssi.h"
+
+#ifndef PPC_PNV_SPI_CONTROLLER_H
+#define PPC_PNV_SPI_CONTROLLER_H
+
+#define TYPE_PNV_SPI_CONTROLLER "pnv-spi-controller"
+#define PNV_SPICONTROLLER(obj) \
+OBJECT_CHECK(PnvSpiController, (obj), TYPE_PNV_SPI_CONTROLLER)


You could use OBJECT_DECLARE_SIMPLE_TYPE ? Anyhow, I would prefer
naming the macro PNV_SPI_CONTROLLER.


+#define SPI_CONTROLLER_REG_SIZE 8
+
+typedef struct SSIBus SSIBus;


why ?


+
+#define TYPE_PNV_SPI_BUS "pnv-spi-bus"
+OBJECT_DECLARE_SIMPLE_TYPE(PnvSPIBus, PNV_SPI_BUS)
+
+typedef struct PnvSPIBus {


I don't think this extra PnvSPIBus model is useful.


+SysBusDevice parent_obj;
+
+SSIBus *ssi_bus;
+qemu_irq *cs_line;


These two attributes could live under PnvSpiController.


+uint32_t id;


and this one would become useless.


+} PnvSPIBus;

+typedef struct PnvSpiController {
+DeviceState parent;
+
+PnvSPIBus   bus;
+MemoryRegionxscom_spic_regs;
+/* SPI controller object number */
+uint32_tspic_num;
+
+/* SPI Controller registers */
+uint64_terror_reg;
+uint64_tcounter_config_reg;
+uint64_tconfig_reg1;
+uint64_tclock_config_reset_control;
+uint64_tmemory_mapping_reg;
+uint64_ttransmit_data_reg;
+uint64_treceive_data_reg;
+uint8_t sequencer_operation_reg[SPI_CONTROLLER_REG_SIZE];
+uint64_tstatus_reg;


You could use an array of uint64_t also.



+} PnvSpiController;
+#endif /* PPC_PNV_SPI_CONTROLLER_H */
diff --git a/include/hw/ppc/pnv_spi_controller_regs.h 
b/include/hw/ppc/pnv_spi_controller_regs.h
new file mode 100644
index 00..6f613aca5e
--- /dev/null
+++ b/include/hw/ppc/pnv_spi_controller_regs.h
@@ -0,0 +1,114 @@
+/*
+ * QEMU PowerPC SPI Controller model
+ *
+ * Copyright (c) 2023, IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef SPI_CONTROLLER_REGS_H
+#define SPI_CONTROLLER_REGS_H
+
+/* Error Register */
+#define ERROR_REG   0x00
+
+/* counter_config_reg */
+#define COUNTER_CONFIG_REG  0x01
+#define COUNTER_CONFIG_REG_SHIFT_COUNT_N1   PPC_BITMASK(0, 7)
+#define COUNTER_CONFIG_REG_SHIFT_COUNT_N2   PPC_BITMASK(8, 15)
+#define COUNTER_CONFIG_REG_COUNT_COMPARE1   PPC_BITMASK(24, 31)
+#define COUNTER_CONFIG_REG_COUNT_COMPARE2   PPC_BITMASK(32, 39)
+#define COUNTER_CONFIG_REG_N1_COUNT_CONTROL PPC_BITMASK(48, 51)
+#define COUNTER_CONFIG_REG_N2_COUNT_CONTROL PPC_BITMASK(52, 55)
+
+/* config_reg */
+#define CONFIG_REG1 0x02
+
+/* clock_config_reset_control_ecc_enable_reg */
+#define CLOCK_CONFIG_REG0x03
+#define CLOCK_CONFIG_RESET_CONTROL_HARD_RESET   0x0084;
+#define CLOCK_CONFIG_REG_RESET_CONTROL  PPC_BITMASK(24, 27)
+#define CLOCK_CONFIG_REG_ECC_CONTROLPPC_BITMASK(28, 30)
+
+/* memory_mapping_reg */
+#define MEMORY_MAPPING_REG  0x04
+#define MEMORY_MAPPING_REG_MMSPISM_BASE_ADDRPPC_BITMASK(0, 15)
+#define MEMORY_MAPPING_REG_MMSPISM_ADDR_MASKPPC_BITMASK(16, 31)

Re: [PATCH v2 1/6] hw/ppc: remove SPI responder model

2024-04-15 Thread Cédric Le Goater


On 4/9/24 19:56, Chalapathi V wrote:

-- Empty commit to align the patch numbers between PATCH v1 and PATCH v2.
SPI responder model is removed as pnv spi controller and seeprom is
implemented using QEMU SSI framework.


Please drop this empty patch. Patch numbers do not need to be aligned
between respins of the same patchset.

Thanks,

C.

Re: [PATCH 02/12] hw/vfio/pci: Replace sprintf() by g_strdup_printf()

2024-04-15 Thread Cédric Le Goater


On 4/12/24 17:25, Alex Williamson wrote:

On Wed, 10 Apr 2024 18:06:03 +0200
Philippe Mathieu-Daudé  wrote:


sprintf() is deprecated on Darwin since macOS 13.0 / XCode 14.1,
resulting in painful developper experience. Use g_strdup_printf()
instead.


Isn't this code only compiled for Linux hosts?  


It is not.


Maybe still a valid change, but the rationale seems irrelevant.


I agree the commit log should be rephrased.

There is also a v2 doing a different change :

  https://lore.kernel.org/qemu-devel/20240411101550.99392-1-phi...@linaro.org/

This is a bit confusing.

Thanks,

C.

Re: [PATCH v2 10/10] vfio: Pass HostIOMMUDevice to vIOMMU

2024-04-15 Thread Cédric Le Goater


On 4/8/24 10:12, Zhenzhong Duan wrote:

With HostIOMMUDevice passed, vIOMMU can check compatibility with host
IOMMU, call into IOMMUFD specific methods, etc.

Originally-by: Yi Liu 
Signed-off-by: Nicolin Chen 
Signed-off-by: Yi Sun 
Signed-off-by: Zhenzhong Duan 


LGTM, waiting v3.


Thanks,

C.





---
  hw/vfio/pci.c | 20 +++-
  1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 64780d1b79..224501a86e 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3111,11 +3111,17 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
  
  vfio_bars_register(vdev);
  
-ret = vfio_add_capabilities(vdev, errp);

+ret = pci_device_set_iommu_device(pdev, vbasedev->hiod, errp);
  if (ret) {
+error_prepend(errp, "Failed to set iommu_device: ");
  goto out_teardown;
  }
  
+ret = vfio_add_capabilities(vdev, errp);

+if (ret) {
+goto out_unset_idev;
+}
+
  if (vdev->vga) {
  vfio_vga_quirk_setup(vdev);
  }
@@ -3132,7 +3138,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
  error_setg(errp,
 "cannot support IGD OpRegion feature on hotplugged "
 "device");
-goto out_teardown;
+goto out_unset_idev;
  }
  
  ret = vfio_get_dev_region_info(vbasedev,

@@ -3141,13 +3147,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
  if (ret) {
  error_setg_errno(errp, -ret,
   "does not support requested IGD OpRegion 
feature");
-goto out_teardown;
+goto out_unset_idev;
  }
  
  ret = vfio_pci_igd_opregion_init(vdev, opregion, errp);

  g_free(opregion);
  if (ret) {
-goto out_teardown;
+goto out_unset_idev;
  }
  }
  
@@ -3233,6 +3239,8 @@ out_deregister:

  if (vdev->intx.mmap_timer) {
  timer_free(vdev->intx.mmap_timer);
  }
+out_unset_idev:
+pci_device_unset_iommu_device(pdev);
  out_teardown:
  vfio_teardown_msi(vdev);
  vfio_bars_exit(vdev);
@@ -3261,6 +3269,7 @@ static void vfio_instance_finalize(Object *obj)
  static void vfio_exitfn(PCIDevice *pdev)
  {
  VFIOPCIDevice *vdev = VFIO_PCI(pdev);
+VFIODevice *vbasedev = >vbasedev;
  
  vfio_unregister_req_notifier(vdev);

  vfio_unregister_err_notifier(vdev);
@@ -3275,7 +3284,8 @@ static void vfio_exitfn(PCIDevice *pdev)
  vfio_teardown_msi(vdev);
  vfio_pci_disable_rp_atomics(vdev);
  vfio_bars_exit(vdev);
-vfio_migration_exit(>vbasedev);
+vfio_migration_exit(vbasedev);
+pci_device_unset_iommu_device(pdev);
  }
  
  static void vfio_pci_reset(DeviceState *dev)

Re: [PATCH v2 09/10] hw/pci: Introduce pci_device_set/unset_iommu_device()

2024-04-15 Thread Cédric Le Goater


On 4/8/24 10:12, Zhenzhong Duan wrote:

From: Yi Liu 

This adds pci_device_set/unset_iommu_device() to set/unset
HostIOMMUDevice for a given PCI device. Caller of set
should fail if set operation fails.

Extract out pci_device_get_iommu_bus_devfn() to facilitate


I would separate this change in a prereq patch.


Thanks,

C.



implementation of pci_device_set/unset_iommu_device().

Signed-off-by: Yi Liu 
Signed-off-by: Yi Sun 
Signed-off-by: Nicolin Chen 
Signed-off-by: Zhenzhong Duan 
---
  include/hw/pci/pci.h | 40 ++-
  hw/pci/pci.c | 75 ++--
  2 files changed, 111 insertions(+), 4 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index eaa3fc99d8..4ae7fe6f3f 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -3,6 +3,7 @@
  
  #include "exec/memory.h"

  #include "sysemu/dma.h"
+#include "sysemu/host_iommu_device.h"
  
  /* PCI includes legacy ISA access.  */

  #include "hw/isa/isa.h"
@@ -383,10 +384,47 @@ typedef struct PCIIOMMUOps {
   *
   * @devfn: device and function number
   */
-   AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+/**
+ * @set_iommu_device: attach a HostIOMMUDevice to a vIOMMU
+ *
+ * Optional callback, if not implemented in vIOMMU, then vIOMMU can't
+ * retrieve host information from the associated HostIOMMUDevice.
+ *
+ * Return true if HostIOMMUDevice is attached, or else return false
+ * with errp set.
+ *
+ * @bus: the #PCIBus of the PCI device.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number of the PCI device.
+ *
+ * @dev: the data structure representing host IOMMU device.
+ *
+ * @errp: pass an Error out only when return false
+ *
+ */
+int (*set_iommu_device)(PCIBus *bus, void *opaque, int devfn,
+HostIOMMUDevice *dev, Error **errp);
+/**
+ * @unset_iommu_device: detach a HostIOMMUDevice from a vIOMMU
+ *
+ * Optional callback.
+ *
+ * @bus: the #PCIBus of the PCI device.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number of the PCI device.
+ */
+void (*unset_iommu_device)(PCIBus *bus, void *opaque, int devfn);
  } PCIIOMMUOps;
  
  AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);

+int pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
+Error **errp);
+void pci_device_unset_iommu_device(PCIDevice *dev);
  
  /**

   * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index e7a39cb203..8ece617673 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2648,11 +2648,27 @@ static void pci_device_class_base_init(ObjectClass 
*klass, void *data)
  }
  }
  
-AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)

+/*
+ * Get IOMMU root bus, aliased bus and devfn of a PCI device
+ *
+ * IOMMU root bus is needed by all call sites to call into iommu_ops.
+ * For call sites which don't need aliased BDF, passing NULL to
+ * aliased_[bus/devfn] is allowed.
+ *
+ * @piommu_bus: return root #PCIBus backed by an IOMMU for the PCI device.
+ *
+ * @aliased_bus: return aliased #PCIBus of the PCI device, optional.
+ *
+ * @aliased_devfn: return aliased devfn of the PCI device, optional.
+ */
+static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
+   PCIBus **piommu_bus,
+   PCIBus **aliased_bus,
+   int *aliased_devfn)
  {
  PCIBus *bus = pci_get_bus(dev);
  PCIBus *iommu_bus = bus;
-uint8_t devfn = dev->devfn;
+int devfn = dev->devfn;
  
  while (iommu_bus && !iommu_bus->iommu_ops && iommu_bus->parent_dev) {

  PCIBus *parent_bus = pci_get_bus(iommu_bus->parent_dev);
@@ -2693,13 +2709,66 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice 
*dev)
  
  iommu_bus = parent_bus;

  }
-if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
+
+assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
+assert(iommu_bus);
+
+if (pci_bus_bypass_iommu(bus) || !iommu_bus->iommu_ops) {
+iommu_bus = NULL;
+}
+
+*piommu_bus = iommu_bus;
+
+if (aliased_bus) {
+*aliased_bus = bus;
+}
+
+if (aliased_devfn) {
+*aliased_devfn = devfn;
+}
+}
+
+AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
+{
+PCIBus *bus;
+PCIBus *iommu_bus;
+int devfn;
+
+pci_device_get_iommu_bus_devfn(dev, _bus, , );
+if (iommu_bus) {
  return iommu_bus->iommu_ops->get_address_space(bus,
   iommu_bus->iommu_opaque, devfn);
  }
  return

Re: [PATCH v2 08/10] vfio: Create host IOMMU device instance

2024-04-15 Thread Cédric Le Goater


On 4/8/24 10:12, Zhenzhong Duan wrote:

Create host IOMMU device instance and initialize it based on backend.

Signed-off-by: Zhenzhong Duan 
---
  include/hw/vfio/vfio-common.h | 1 +
  hw/vfio/container.c   | 5 +
  hw/vfio/iommufd.c | 8 
  3 files changed, 14 insertions(+)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index d382b12ec1..4fbba85018 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -126,6 +126,7 @@ typedef struct VFIODevice {
  OnOffAuto pre_copy_dirty_page_tracking;
  bool dirty_pages_supported;
  bool dirty_tracking;
+HostIOMMUDevice *hiod;
  int devid;
  IOMMUFDBackend *iommufd;
  } VFIODevice;
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index ba0ad4a41b..fc0c027501 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -915,6 +915,7 @@ static int vfio_legacy_attach_device(const char *name, 
VFIODevice *vbasedev,
  VFIODevice *vbasedev_iter;
  VFIOGroup *group;
  VFIOContainerBase *bcontainer;
+HIODLegacyVFIO *hiod_vfio;


s/hiod_vfio/hiod/ please. Same below.


Thanks,

C.




  int ret;
  
  if (groupid < 0) {

@@ -945,6 +946,9 @@ static int vfio_legacy_attach_device(const char *name, 
VFIODevice *vbasedev,
  vbasedev->bcontainer = bcontainer;
  QLIST_INSERT_HEAD(>device_list, vbasedev, container_next);
  QLIST_INSERT_HEAD(_device_list, vbasedev, global_next);
+hiod_vfio = HIOD_LEGACY_VFIO(object_new(TYPE_HIOD_LEGACY_VFIO));
+hiod_vfio->vdev = vbasedev;
+vbasedev->hiod = HOST_IOMMU_DEVICE(hiod_vfio);
  
  return ret;

  }
@@ -959,6 +963,7 @@ static void vfio_legacy_detach_device(VFIODevice *vbasedev)
  trace_vfio_detach_device(vbasedev->name, group->groupid);
  vfio_put_base_device(vbasedev);
  vfio_put_group(group);
+object_unref(vbasedev->hiod);
  }
  
  static int vfio_legacy_pci_hot_reset(VFIODevice *vbasedev, bool single)

diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 115b9f8e7f..b6d058339b 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -308,6 +308,7 @@ static int iommufd_cdev_attach(const char *name, VFIODevice 
*vbasedev,
  VFIOIOMMUFDContainer *container;
  VFIOAddressSpace *space;
  struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
+HIODIOMMUFDVFIO *hiod_vfio;
  int ret, devfd;
  uint32_t ioas_id;
  Error *err = NULL;
@@ -431,6 +432,12 @@ found_container:
  QLIST_INSERT_HEAD(>device_list, vbasedev, container_next);
  QLIST_INSERT_HEAD(_device_list, vbasedev, global_next);
  
+hiod_vfio = HIOD_IOMMUFD_VFIO(object_new(TYPE_HIOD_IOMMUFD_VFIO));

+hiod_iommufd_init(HIOD_IOMMUFD(hiod_vfio), vbasedev->iommufd,
+  vbasedev->devid);
+hiod_vfio->vdev = vbasedev;
+vbasedev->hiod = HOST_IOMMU_DEVICE(hiod_vfio);
+
  trace_iommufd_cdev_device_info(vbasedev->name, devfd, vbasedev->num_irqs,
 vbasedev->num_regions, vbasedev->flags);
  return 0;
@@ -468,6 +475,7 @@ static void iommufd_cdev_detach(VFIODevice *vbasedev)
  iommufd_cdev_detach_container(vbasedev, container);
  iommufd_cdev_container_destroy(container);
  vfio_put_address_space(space);
+object_unref(vbasedev->hiod);
  
  iommufd_cdev_unbind_and_disconnect(vbasedev);

  close(vbasedev->fd);

Re: [PATCH v2 07/10] backends/iommufd: Implement get_host_iommu_info() callback

2024-04-15 Thread Cédric Le Goater


On 4/8/24 10:12, Zhenzhong Duan wrote:

It calls iommufd_backend_get_device_info() to get host IOMMU
related information.

Define a common structure HIOD_IOMMUFD_INFO to describe the info
returned from kernel. Currently only vtd, but easy to add arm smmu
when kernel supports.


I think you can merge the previous patch and this one.
 


Signed-off-by: Zhenzhong Duan 
---
  include/sysemu/iommufd.h |  7 +++
  backends/iommufd.c   | 17 +
  2 files changed, 24 insertions(+)

diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index fa1a866237..44ec1335b2 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h


I just noticed that include/sysemu/iommufd.h lacks a header.  Could you fix
that please ?


@@ -39,6 +39,13 @@ int iommufd_backend_get_device_info(IOMMUFDBackend *be, 
uint32_t devid,
  enum iommu_hw_info_type *type,
  void *data, uint32_t len, Error **errp);
  
+typedef struct HIOD_IOMMUFD_INFO {


Please use CamelCase names.


Thanks,

C.



+enum iommu_hw_info_type type;
+union {
+struct iommu_hw_info_vtd vtd;
+} data;
+} HIOD_IOMMUFD_INFO;
+
  #define TYPE_HIOD_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"
  OBJECT_DECLARE_TYPE(HIODIOMMUFD, HIODIOMMUFDClass, HIOD_IOMMUFD)
  
diff --git a/backends/iommufd.c b/backends/iommufd.c

index 559affa9ec..1e9c469e65 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -240,8 +240,25 @@ void hiod_iommufd_init(HIODIOMMUFD *idev, IOMMUFDBackend 
*iommufd,
  idev->devid = devid;
  }
  
+static int hiod_iommufd_get_host_iommu_info(HostIOMMUDevice *hiod,

+void *data, uint32_t len,
+Error **errp)
+{
+HIODIOMMUFD *idev = HIOD_IOMMUFD(hiod);
+HIOD_IOMMUFD_INFO *info = data;
+
+assert(sizeof(HIOD_IOMMUFD_INFO) <= len);
+
+return iommufd_backend_get_device_info(idev->iommufd, idev->devid,
+   >type, >data,
+   sizeof(info->data), errp);
+}
+
  static void hiod_iommufd_class_init(ObjectClass *oc, void *data)
  {
+HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_CLASS(oc);
+
+hiodc->get_host_iommu_info = hiod_iommufd_get_host_iommu_info;
  }
  
  static const TypeInfo types[] = {

Re: [PATCH v2 06/10] backends/iommufd: Introduce helper function iommufd_backend_get_device_info()

2024-04-15 Thread Cédric Le Goater


On 4/8/24 10:12, Zhenzhong Duan wrote:

Introduce a helper function iommufd_backend_get_device_info() to get
host IOMMU related information through iommufd uAPI.

Signed-off-by: Yi Liu 
Signed-off-by: Yi Sun 
Signed-off-by: Zhenzhong Duan 
---
  include/sysemu/iommufd.h |  4 
  backends/iommufd.c   | 23 ++-
  2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 71c53cbb45..fa1a866237 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -4,6 +4,7 @@
  #include "qom/object.h"
  #include "exec/hwaddr.h"
  #include "exec/cpu-common.h"
+#include 
  #include "sysemu/host_iommu_device.h"
  
  #define TYPE_IOMMUFD_BACKEND "iommufd"

@@ -34,6 +35,9 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t 
ioas_id, hwaddr iova,
  ram_addr_t size, void *vaddr, bool readonly);
  int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
hwaddr iova, ram_addr_t size);
+int iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,
+enum iommu_hw_info_type *type,
+void *data, uint32_t len, Error **errp);
  
  #define TYPE_HIOD_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"

  OBJECT_DECLARE_TYPE(HIODIOMMUFD, HIODIOMMUFDClass, HIOD_IOMMUFD)
diff --git a/backends/iommufd.c b/backends/iommufd.c
index ef8b3a808b..559affa9ec 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -20,7 +20,6 @@
  #include "monitor/monitor.h"
  #include "trace.h"
  #include 
-#include 
  
  static void iommufd_backend_init(Object *obj)

  {
@@ -212,6 +211,28 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t 
ioas_id,
  return ret;
  }
  
+int iommufd_backend_get_device_info(IOMMUFDBackend *be, uint32_t devid,

+enum iommu_hw_info_type *type,
+void *data, uint32_t len, Error **errp)
+{
+struct iommu_hw_info info = {
+.size = sizeof(info),
+.dev_id = devid,
+.data_len = len,
+.data_uptr = (uintptr_t)data,
+};
+int ret;
+
+ret = ioctl(be->fd, IOMMU_GET_HW_INFO, );
+if (ret) {
+error_setg_errno(errp, errno, "Failed to get hardware info");
+} else {
+*type = info.out_data_type;


type should not be NULL.

+}
+
+return ret;
+}
+
  void hiod_iommufd_init(HIODIOMMUFD *idev, IOMMUFDBackend *iommufd,
 uint32_t devid)
  {

Re: [PATCH v2 05/10] vfio: Implement get_host_iommu_info() callback

2024-04-15 Thread Cédric Le Goater


On 4/8/24 10:12, Zhenzhong Duan wrote:

Utilize iova_ranges to calculate host IOMMU address width and
package it in HIOD_LEGACY_INFO for vIOMMU usage.

HIOD_LEGACY_INFO will be used by both VFIO and VDPA so declare
it in host_iommu_device.h.

Signed-off-by: Zhenzhong Duan 
---
  include/sysemu/host_iommu_device.h | 10 ++
  hw/vfio/container.c| 24 
  2 files changed, 34 insertions(+)

diff --git a/include/sysemu/host_iommu_device.h 
b/include/sysemu/host_iommu_device.h
index 22ccbe3a5d..beb8be8231 100644
--- a/include/sysemu/host_iommu_device.h
+++ b/include/sysemu/host_iommu_device.h
@@ -16,4 +16,14 @@ struct HostIOMMUDeviceClass {
  int (*get_host_iommu_info)(HostIOMMUDevice *hiod, void *data, uint32_t 
len,
 Error **errp);
  };
+
+/*
+ * Define the format of host IOMMU related info that current VFIO
+ * or VDPA can privode to vIOMMU.
+ *
+ * @aw_bits: Host IOMMU address width. 0xff if no limitation.
+ */
+typedef struct HIOD_LEGACY_INFO {


Please use CamelCase names.


+uint8_t aw_bits;
+} HIOD_LEGACY_INFO;
  #endif
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 44018ef085..ba0ad4a41b 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1143,8 +1143,32 @@ static void vfio_iommu_legacy_class_init(ObjectClass 
*klass, void *data)
  vioc->pci_hot_reset = vfio_legacy_pci_hot_reset;
  };
  
+static int hiod_legacy_vfio_get_host_iommu_info(HostIOMMUDevice *hiod,

+void *data, uint32_t len,
+Error **errp)
+{
+VFIODevice *vbasedev = HIOD_LEGACY_VFIO(hiod)->vdev;
+/* iova_ranges is a sorted list */
+GList *l = g_list_last(vbasedev->bcontainer->iova_ranges);
+HIOD_LEGACY_INFO *info = data;
+
+assert(sizeof(HIOD_LEGACY_INFO) <= len);
+
+if (l) {
+Range *range = l->data;
+info->aw_bits = find_last_bit(>upb, BITS_PER_LONG) + 1;


There is a comment in range.h saying:

/*
 * Do not access members directly, use the functions!

Please introduce a new helper.


Thanks,

C.




+} else {
+info->aw_bits = 0xff;
+}
+
+return 0;
+}
+
  static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data)
  {
+HostIOMMUDeviceClass *hioc = HOST_IOMMU_DEVICE_CLASS(oc);
+
+hioc->get_host_iommu_info = hiod_legacy_vfio_get_host_iommu_info;
  };
  
  static const TypeInfo types[] = {

Re: [PATCH v2 03/10] backends/iommufd: Introduce abstract HIODIOMMUFD device

2024-04-15 Thread Cédric Le Goater


On 4/8/24 10:12, Zhenzhong Duan wrote:

HIODIOMMUFD represents a host IOMMU device under iommufd backend.

Currently it includes only public iommufd handle and device id.
which could be used to get hw IOMMU information.

When nested translation is supported in future, vIOMMU is going
to have iommufd related operations like attaching/detaching hwpt,
So IOMMUFDDevice interface will be further extended at that time.

VFIO and VDPA device have different way of attaching/detaching hwpt.
So HIODIOMMUFD is still an abstract class which will be inherited by
VFIO and VDPA device.

Introduce a helper hiod_iommufd_init() to initialize HIODIOMMUFD
device.

Suggested-by: Cédric Le Goater 
Originally-by: Yi Liu 
Signed-off-by: Yi Sun 
Signed-off-by: Zhenzhong Duan 
---
  include/sysemu/iommufd.h | 22 +++
  backends/iommufd.c   | 47 ++--
  2 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/include/sysemu/iommufd.h b/include/sysemu/iommufd.h
index 9af27ebd6c..71c53cbb45 100644
--- a/include/sysemu/iommufd.h
+++ b/include/sysemu/iommufd.h
@@ -4,6 +4,7 @@
  #include "qom/object.h"
  #include "exec/hwaddr.h"
  #include "exec/cpu-common.h"
+#include "sysemu/host_iommu_device.h"
  
  #define TYPE_IOMMUFD_BACKEND "iommufd"

  OBJECT_DECLARE_TYPE(IOMMUFDBackend, IOMMUFDBackendClass, IOMMUFD_BACKEND)
@@ -33,4 +34,25 @@ int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t 
ioas_id, hwaddr iova,
  ram_addr_t size, void *vaddr, bool readonly);
  int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
hwaddr iova, ram_addr_t size);
+
+#define TYPE_HIOD_IOMMUFD TYPE_HOST_IOMMU_DEVICE "-iommufd"


Please keep TYPE_HOST_IOMMU_DEVICE


+OBJECT_DECLARE_TYPE(HIODIOMMUFD, HIODIOMMUFDClass, HIOD_IOMMUFD)
+
+struct HIODIOMMUFD {
+/*< private >*/
+HostIOMMUDevice parent;
+void *opaque;
+
+/*< public >*/
+IOMMUFDBackend *iommufd;
+uint32_t devid;
+};
+
+struct HIODIOMMUFDClass {
+/*< private >*/
+HostIOMMUDeviceClass parent_class;
+};


This new class doesn't seem useful. Do you have plans for handlers ?


+
+void hiod_iommufd_init(HIODIOMMUFD *idev, IOMMUFDBackend *iommufd,
+   uint32_t devid);
  #endif
diff --git a/backends/iommufd.c b/backends/iommufd.c
index 62a79fa6b0..ef8b3a808b 100644
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -212,23 +212,38 @@ int iommufd_backend_unmap_dma(IOMMUFDBackend *be, 
uint32_t ioas_id,
  return ret;
  }
  
-static const TypeInfo iommufd_backend_info = {

-.name = TYPE_IOMMUFD_BACKEND,
-.parent = TYPE_OBJECT,
-.instance_size = sizeof(IOMMUFDBackend),
-.instance_init = iommufd_backend_init,
-.instance_finalize = iommufd_backend_finalize,
-.class_size = sizeof(IOMMUFDBackendClass),
-.class_init = iommufd_backend_class_init,
-.interfaces = (InterfaceInfo[]) {
-{ TYPE_USER_CREATABLE },
-{ }
-}
-};
+void hiod_iommufd_init(HIODIOMMUFD *idev, IOMMUFDBackend *iommufd,
+   uint32_t devid)
+{
+idev->iommufd = iommufd;
+idev->devid = devid;
+}


This routine doesn't seem useful. I wonder if we shouldn't introduce
properties. I'm not sure this is useful either.



-static void register_types(void)
+static void hiod_iommufd_class_init(ObjectClass *oc, void *data)
  {
-type_register_static(_backend_info);
  }
  
-type_init(register_types);

+static const TypeInfo types[] = {
+{
+.name = TYPE_IOMMUFD_BACKEND,
+.parent = TYPE_OBJECT,
+.instance_size = sizeof(IOMMUFDBackend),
+.instance_init = iommufd_backend_init,
+.instance_finalize = iommufd_backend_finalize,
+.class_size = sizeof(IOMMUFDBackendClass),
+.class_init = iommufd_backend_class_init,
+.interfaces = (InterfaceInfo[]) {
+{ TYPE_USER_CREATABLE },
+{ }
+}
+}, {
+.name = TYPE_HIOD_IOMMUFD,
+.parent = TYPE_HOST_IOMMU_DEVICE,
+.instance_size = sizeof(HIODIOMMUFD),
+.class_size = sizeof(HIODIOMMUFDClass),
+.class_init = hiod_iommufd_class_init,
+.abstract = true,
+}
+};
+
+DEFINE_TYPES(types)

Re: [PATCH v2 02/10] vfio: Introduce HIODLegacyVFIO device

2024-04-15 Thread Cédric Le Goater


On 4/8/24 10:12, Zhenzhong Duan wrote:

HIODLegacyVFIO represents a host IOMMU device under VFIO legacy
container backend.

It includes a link to VFIODevice.

Suggested-by: Eric Auger 
Suggested-by: Cédric Le Goater 
Signed-off-by: Zhenzhong Duan 
---
  include/hw/vfio/vfio-common.h | 11 +++
  hw/vfio/container.c   | 11 ++-
  2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index b9da6c08ef..f30772f534 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -31,6 +31,7 @@
  #endif
  #include "sysemu/sysemu.h"
  #include "hw/vfio/vfio-container-base.h"
+#include "sysemu/host_iommu_device.h"
  
  #define VFIO_MSG_PREFIX "vfio %s: "
  
@@ -147,6 +148,16 @@ typedef struct VFIOGroup {

  bool ram_block_discard_allowed;
  } VFIOGroup;
  
+#define TYPE_HIOD_LEGACY_VFIO TYPE_HOST_IOMMU_DEVICE "-legacy-vfio"


I would prefer to keep the prefix TYPE_HOST_IOMMU_DEVICE.


+OBJECT_DECLARE_SIMPLE_TYPE(HIODLegacyVFIO, HIOD_LEGACY_VFIO)
+
+/* Abstraction of VFIO legacy host IOMMU device */
+struct HIODLegacyVFIO {


same here


+/*< private >*/
+HostIOMMUDevice parent;
+VFIODevice *vdev;


It seems to me that the back pointer should be on the container instead.
Looks more correct conceptually.



+};
+
  typedef struct VFIODMABuf {
  QemuDmaBuf buf;
  uint32_t pos_x, pos_y, pos_updates;
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 77bdec276e..44018ef085 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -1143,12 +1143,21 @@ static void vfio_iommu_legacy_class_init(ObjectClass 
*klass, void *data)
  vioc->pci_hot_reset = vfio_legacy_pci_hot_reset;
  };
  
+static void hiod_legacy_vfio_class_init(ObjectClass *oc, void *data)

+{
+};


Is it preferable to introduce routines when they are actually useful.
Please drop the .class_init definition.

Thanks,

C.



+
  static const TypeInfo types[] = {
  {
  .name = TYPE_VFIO_IOMMU_LEGACY,
  .parent = TYPE_VFIO_IOMMU,
  .class_init = vfio_iommu_legacy_class_init,
-},
+}, {
+.name = TYPE_HIOD_LEGACY_VFIO,
+.parent = TYPE_HOST_IOMMU_DEVICE,
+.instance_size = sizeof(HIODLegacyVFIO),
+.class_init = hiod_legacy_vfio_class_init,
+}
  };
  
  DEFINE_TYPES(types)

Re: [PATCH v2 01/10] backends: Introduce abstract HostIOMMUDevice

2024-04-15 Thread Cédric Le Goater


On 4/8/24 10:12, Zhenzhong Duan wrote:

Introduce HostIOMMUDevice as an abstraction of host IOMMU device.

get_host_iommu_info() is used to get host IOMMU info, different
backends can have different implementations and result format.

Introduce a macro CONFIG_HOST_IOMMU_DEVICE to define the usage
for VFIO, and VDPA in the future.

Suggested-by: Cédric Le Goater 
Signed-off-by: Zhenzhong Duan 


LGTM,


---
  MAINTAINERS|  2 ++
  include/sysemu/host_iommu_device.h | 19 +++
  backends/host_iommu_device.c   | 19 +++
  backends/Kconfig   |  5 +
  backends/meson.build   |  1 +
  5 files changed, 46 insertions(+)
  create mode 100644 include/sysemu/host_iommu_device.h
  create mode 100644 backends/host_iommu_device.c

diff --git a/MAINTAINERS b/MAINTAINERS
index e71183eef9..22f71cbe02 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2202,6 +2202,8 @@ M: Zhenzhong Duan 
  S: Supported
  F: backends/iommufd.c
  F: include/sysemu/iommufd.h
+F: backends/host_iommu_device.c
+F: include/sysemu/host_iommu_device.h
  F: include/qemu/chardev_open.h
  F: util/chardev_open.c
  F: docs/devel/vfio-iommufd.rst
diff --git a/include/sysemu/host_iommu_device.h 
b/include/sysemu/host_iommu_device.h
new file mode 100644
index 00..22ccbe3a5d
--- /dev/null
+++ b/include/sysemu/host_iommu_device.h
@@ -0,0 +1,19 @@
+#ifndef HOST_IOMMU_DEVICE_H
+#define HOST_IOMMU_DEVICE_H
+
+#include "qom/object.h"
+
+#define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
+OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUDeviceClass, HOST_IOMMU_DEVICE)
+
+struct HostIOMMUDevice {
+Object parent;
+};
+
+struct HostIOMMUDeviceClass {
+ObjectClass parent_class;


Could you please document the struct and its handlers ? This is more for
the future reader to understand the VFIO concepts than for the generated
docs. Anyhow, it could be useful for the docs also. Overall, the QEMU VFIO
susbsytem suffers from a lack of documentation and we should try to improve
that in the next cycle.

Thanks,

C.




+int (*get_host_iommu_info)(HostIOMMUDevice *hiod, void *data, uint32_t len,
+   Error **errp);
+};
+#endif
diff --git a/backends/host_iommu_device.c b/backends/host_iommu_device.c
new file mode 100644
index 00..6cb6007d8c
--- /dev/null
+++ b/backends/host_iommu_device.c
@@ -0,0 +1,19 @@
+#include "qemu/osdep.h"
+#include "sysemu/host_iommu_device.h"
+
+OBJECT_DEFINE_ABSTRACT_TYPE(HostIOMMUDevice,
+host_iommu_device,
+HOST_IOMMU_DEVICE,
+OBJECT)
+
+static void host_iommu_device_class_init(ObjectClass *oc, void *data)
+{
+}
+
+static void host_iommu_device_init(Object *obj)
+{
+}
+
+static void host_iommu_device_finalize(Object *obj)
+{
+}
diff --git a/backends/Kconfig b/backends/Kconfig
index 2cb23f62fa..34ab29e994 100644
--- a/backends/Kconfig
+++ b/backends/Kconfig
@@ -3,3 +3,8 @@ source tpm/Kconfig
  config IOMMUFD
  bool
  depends on VFIO
+
+config HOST_IOMMU_DEVICE
+bool
+default y
+depends on VFIO
diff --git a/backends/meson.build b/backends/meson.build
index 8b2b111497..2e975d641e 100644
--- a/backends/meson.build
+++ b/backends/meson.build
@@ -25,6 +25,7 @@ if have_vhost_user
  endif
  system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: 
files('cryptodev-vhost.c'))
  system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c'))
+system_ss.add(when: 'CONFIG_HOST_IOMMU_DEVICE', if_true: 
files('host_iommu_device.c'))
  if have_vhost_user_crypto
system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: 
files('cryptodev-vhost-user.c'))
  endif

Re: [PATCH 0/2] P11 support for QEMU

2024-04-02 Thread Cédric Le Goater


On 4/2/24 09:00, Aditya Gupta wrote:

Hello Cédric,


<...snip...>

Please run ./scripts/get_maintainer.pl when sending a series. qemu-ppc should be
in Cc:


Tried it now, For some reason, get_maintainer.pl shows no maintainers:

  $ ./scripts/get_maintainer.pl -f 
0002-ppc-powernv11-add-base-support-for-P11-PowerNV.patch
  get_maintainer.pl: No maintainers found, printing recent contributors.
  get_maintainer.pl: Do not blindly cc: them on patches!  Use common sense.
  qemu-devel@nongnu.org (open list:All patches CC here)


Weird. I downloaded your series with b4 and ran the get_maintainer.pl script :

$ ./scripts/get_maintainer.pl 
20240401_adityag_p11_support_for_qemu.patches/0001_ppc_pseries_add_p11_cpu_type.patch
 
20240401_adityag_p11_support_for_qemu.patches/0002_ppc_powernv11_add_base_support_for_p11_powernv.patch

Nicholas Piggin  (odd fixer:sPAPR (pseries))
Daniel Henrique Barboza  (reviewer:sPAPR (pseries))
David Gibson  (reviewer:sPAPR (pseries))
Harsh Prateek Bora  (reviewer:sPAPR (pseries))
"Cédric Le Goater"  (odd fixer:PowerNV Non-Virt...)
"Frédéric Barrat"  (reviewer:PowerNV Non-Virt...)
qemu-...@nongnu.org (open list:sPAPR (pseries))
qemu-devel@nongnu.org (open list:All patches CC here)


So, it should have worked, I will check if I can get it to work.




I checked the MAINTAINERS file, will add maintainers in Cc, thanks.



Briefly looking at this, please separate the changes using one patch per model,
that is : first CPU (target), LPC, OCC, PSI, SBE, PnvCore, SpaprCore. Last the
PnvChip and the machines, powernv11 and pseries. A minimum commit log describing
the HW is required.


Sure, I will split the changes and improve my commit descriptions.


I don't see PHB6 or XIVE3. Why ?


Power11 core is same as Power10, so it supports till PHB5 and XIVE2,
same as P10. That's why I have not added any code for them.


ok. That's typically the info the commit log should have.


Okay, I will add these details also.


Forgot to add, please update :

  docs/system/ppc/powernv.rst

and the relevant tests under tests/qtest and tests/avocado. It helps
maintenance and CI.

Make sure the code is based on the HEAD of the QEMU tree. This proposal
isn't and so does not compile.


Thanks,

C.

Re: [PATCH for-9.0] tests/qtest: Fix STM32L4x5 GPIO test on 32-bit

2024-04-02 Thread Cédric Le Goater


On 3/29/24 13:50, Philippe Mathieu-Daudé wrote:

Hi Cédric, Thomas,

On 29/3/24 10:27, Cédric Le Goater wrote:

The test mangles the GPIO address and the pin number in the
qtest_add_data_func data parameter. Doing so, it assumes that the host
pointer size is always 64-bit, which breaks on 32-bit :

../tests/qtest/stm32l4x5_gpio-test.c: In function ‘test_gpio_output_mode’:
../tests/qtest/stm32l4x5_gpio-test.c:272:25: error: cast from pointer to 
integer of different size [-Werror=pointer-to-int-cast]
   272 | unsigned int pin = ((uint64_t)data) & 0xF;
   | ^
../tests/qtest/stm32l4x5_gpio-test.c:273:22: error: cast from pointer to 
integer of different size [-Werror=pointer-to-int-cast]
   273 | uint32_t gpio = ((uint64_t)data) >> 32;
   |  ^


Any clue why this isn't this covered by CI?


There is possibly an issue with cross compile in CI. I am sorry I don't
follow close enough the changes in CI to tell the exact reason. 32-bit
host support has become best effort now I suppose.

Thanks,

C.

Re: [PATCH 0/2] P11 support for QEMU

2024-04-02 Thread Cédric Le Goater


Hello Aditya,

On 4/2/24 08:39, Aditya Gupta wrote:

Hello Cédric,

Thanks for reviewing this.

On Mon, Apr 01, 2024 at 10:25:31AM +0200, Cédric Le Goater wrote:

Hello Aditya,

Please run ./scripts/get_maintainer.pl when sending a series. qemu-ppc should be
in Cc:


Tried it now, For some reason, get_maintainer.pl shows no maintainers:

 $ ./scripts/get_maintainer.pl -f 
0002-ppc-powernv11-add-base-support-for-P11-PowerNV.patch
 get_maintainer.pl: No maintainers found, printing recent contributors.
 get_maintainer.pl: Do not blindly cc: them on patches!  Use common sense.
 
 qemu-devel@nongnu.org (open list:All patches CC here)


Weird. I downloaded your series with b4 and ran the get_maintainer.pl script :

$ ./scripts/get_maintainer.pl 
20240401_adityag_p11_support_for_qemu.patches/0001_ppc_pseries_add_p11_cpu_type.patch
 
20240401_adityag_p11_support_for_qemu.patches/0002_ppc_powernv11_add_base_support_for_p11_powernv.patch

Nicholas Piggin  (odd fixer:sPAPR (pseries))
Daniel Henrique Barboza  (reviewer:sPAPR (pseries))
David Gibson  (reviewer:sPAPR (pseries))
Harsh Prateek Bora  (reviewer:sPAPR (pseries))
"Cédric Le Goater"  (odd fixer:PowerNV Non-Virt...)
"Frédéric Barrat"  (reviewer:PowerNV Non-Virt...)
qemu-...@nongnu.org (open list:sPAPR (pseries))
qemu-devel@nongnu.org (open list:All patches CC here)

 

I checked the MAINTAINERS file, will add maintainers in Cc, thanks.



Briefly looking at this, please separate the changes using one patch per model,
that is : first CPU (target), LPC, OCC, PSI, SBE, PnvCore, SpaprCore. Last the
PnvChip and the machines, powernv11 and pseries. A minimum commit log describing
the HW is required.


Sure, I will split the changes and improve my commit descriptions.


I don't see PHB6 or XIVE3. Why ?


Power11 core is same as Power10, so it supports till PHB5 and XIVE2,
same as P10. That's why I have not added any code for them.


ok. That's typically the info the commit log should have.


Also, you will need an OPAL update. The above changes are pointless without it.
The minimum for now is a git commit from the opal repo, then you will need to
update QEMU with a binary.


Agreed. I will consult when we push it to public. Will update this in
next series.

There might be some days delay in the next patch series.


We have entered the QEMU 9.1 cycle. There is time. I will comment more
the next respin.

Thanks,

C.

Re: [PATCH 0/2] P11 support for QEMU

2024-04-01 Thread Cédric Le Goater


Hello Aditya,

Please run ./scripts/get_maintainer.pl when sending a series. qemu-ppc should be
in Cc:

Briefly looking at this, please separate the changes using one patch per model,
that is : first CPU (target), LPC, OCC, PSI, SBE, PnvCore, SpaprCore. Last the
PnvChip and the machines, powernv11 and pseries. A minimum commit log describing
the HW is required. I don't see PHB6 or XIVE3. Why ?

Also, you will need an OPAL update. The above changes are pointless without it.
The minimum for now is a git commit from the opal repo, then you will need to
update QEMU with a binary.

Thanks,

C.

On 4/1/24 07:55, Aditya Gupta wrote:

This patch series adds support for Power11 pseries and powernv machine targets
to emulate VMs running on Power11.

Most of the P11 support code has been taken from P10 code in QEMU.
And has been tested in pseries, powernv, with and without compat mode.

Git Tree for Testing: https://github.com/adi-g15-ibm/qemu/tree/p11

Aditya Gupta (2):
   ppc: pseries: add P11 cpu type
   ppc: powernv11: add base support for P11 PowerNV

  docs/system/ppc/pseries.rst |   6 +-
  hw/ppc/pnv.c| 409 
  hw/ppc/pnv_core.c   |  94 +
  hw/ppc/pnv_homer.c  |  64 ++
  hw/ppc/pnv_lpc.c|  14 ++
  hw/ppc/pnv_occ.c|  14 ++
  hw/ppc/pnv_psi.c|  21 ++
  hw/ppc/pnv_sbe.c|  19 ++
  hw/ppc/spapr_cpu_core.c |   1 +
  include/hw/ppc/pnv.h|  51 +
  include/hw/ppc/pnv_chip.h   |  30 +++
  include/hw/ppc/pnv_homer.h  |   3 +
  include/hw/ppc/pnv_lpc.h|   4 +
  include/hw/ppc/pnv_occ.h|   2 +
  include/hw/ppc/pnv_psi.h|   2 +
  include/hw/ppc/pnv_sbe.h|   2 +
  include/hw/ppc/pnv_xscom.h  |  55 +
  target/ppc/compat.c |   7 +
  target/ppc/cpu-models.c |   2 +
  target/ppc/cpu-models.h |   2 +
  target/ppc/cpu_init.c   | 162 ++
  21 files changed, 961 insertions(+), 3 deletions(-)

Re: [PATCH v1 01/11] Introduce a common abstract struct HostIOMMUDevice

2024-03-29 Thread Cédric Le Goater


Hello Zhenzhong,

On 3/28/24 04:06, Duan, Zhenzhong wrote:

Hi Cédric,


-Original Message-
From: Cédric Le Goater 
Subject: Re: [PATCH v1 01/11] Introduce a common abstract struct
HostIOMMUDevice

Hello Zhenzhong,

On 3/19/24 12:58, Duan, Zhenzhong wrote:

Hi Cédric,


-Original Message-
From: Cédric Le Goater 
Sent: Tuesday, March 19, 2024 4:17 PM
To: Duan, Zhenzhong ; qemu-
de...@nongnu.org
Cc: alex.william...@redhat.com; eric.au...@redhat.com;
pet...@redhat.com; jasow...@redhat.com; m...@redhat.com;
j...@nvidia.com; nicol...@nvidia.com; joao.m.mart...@oracle.com; Tian,
Kevin ; Liu, Yi L ; Sun, Yi Y
; Peng, Chao P 
Subject: Re: [PATCH v1 01/11] Introduce a common abstract struct
HostIOMMUDevice

Hello Zhenzhong,

On 2/28/24 04:58, Zhenzhong Duan wrote:

HostIOMMUDevice will be inherited by two sub classes,
legacy and iommufd currently.

Introduce a helper function host_iommu_base_device_init to initialize it.

Suggested-by: Eric Auger 
Signed-off-by: Zhenzhong Duan 
---
include/sysemu/host_iommu_device.h | 22

++

1 file changed, 22 insertions(+)
create mode 100644 include/sysemu/host_iommu_device.h

diff --git a/include/sysemu/host_iommu_device.h

b/include/sysemu/host_iommu_device.h

new file mode 100644
index 00..fe80ab25fb
--- /dev/null
+++ b/include/sysemu/host_iommu_device.h
@@ -0,0 +1,22 @@
+#ifndef HOST_IOMMU_DEVICE_H
+#define HOST_IOMMU_DEVICE_H
+
+typedef enum HostIOMMUDevice_Type {
+HID_LEGACY,
+HID_IOMMUFD,
+HID_MAX,
+} HostIOMMUDevice_Type;
+
+typedef struct HostIOMMUDevice {
+HostIOMMUDevice_Type type;


A type field is not a good sign and that's where QOM is useful.


Yes, agree.
I didn't choose QOM because in iommufd-cdev series, VFIOContainer

chooses not using QOM model.

See the discussion:

https://lore.kernel.org/all/YmuFv2s5TPuw7K%2Fu@yekko/

I thought HostIOMMUDevice need to follow same rule.

But after further digging into this, I think it may be ok to use QOM model

as long as we don't expose

HostIOMMUDevice in qapi/qom.json and not use USER_CREATABLE

interface. Your thoughts?

yes. Can we change a bit this series to use QOM ? something like :

 typedef struct HostIOMMUDevice {
 Object parent;
 } HostIOMMUDevice;

 #define TYPE_HOST_IOMMU "host.iommu"
 OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUClass,
HOST_IOMMU)

 struct HostIOMMUClass {
 ObjectClass parent_class;

 int (*get_type)(HostIOMMUDevice *hiod, uint64_t *type, Error **errp);
 int (*get_cap)(HostIOMMUDevice *hiod, uint64_t *cap, Error **errp);
 };

Inherited objects would be TYPE_HOST_IOMMU_IOMMUFD and
TYPE_HOST_IOMMU_LEGACY.
Each class implementing the handlers or not (legacy mode).


Understood, thanks for your guide.



The class handlers are introduced for the intel-iommu helper
vtd_check_hdev()
in order to avoid using iommufd routines directly. HostIOMMUDevice is
supposed
to abstract the Host IOMMU device, so we need to abstract also all the
interfaces to this object.


I'd like to have a minimal adjustment to class handers. Just let me know if you 
have strong
preference.

Cap/ecap is intel_iommu specific, I'd like to make it a bit generic also for 
arm smmu usage,
and merge get_type and get_cap into one function as they both calls 
ioctl(IOMMU_GET_HW_INFO),
something like:
get_info(HostIOMMUDevice *hiod, enum iommu_hw_info_type *type, void **data, 
void **len,  Error **errp);


OK. Let's see how it goes. Having more users of this new object Host
IOMMU device is important to get a better feeling of the interface.
As of today, it doesn't have not much value. The iommufd object could
be QOM linked to the vIOMMU when available and we could get the bind
devid in some other ways I suppose. Anyhow, please keep it simple and
let's explore.

Thanks,

C.





and let iommu emulater to extract content of *data. For intel_iommu, it's:

struct iommu_hw_info_vtd {
 __u32 flags;
 __u32 __reserved;
 __aligned_u64 cap_reg;
 __aligned_u64 ecap_reg;
};



The .host_iommu_device_create() handler could be merged
in .attach_device()
possibly. Anyhow, please use now object_new() and object_unref() instead.
host_iommu_base_device_init() is useless IMHO.


Good idea, will do.







Is vtd_check_hdev() the only use of this field ?


Currently yes. virtio-iommu may have similar usage.


If so, can we simplify with a QOM interface in any way ?


QOM interface is a set of callbacks, guess you mean QOM class,
saying HostIOMMUDevice class, IOMMULegacyDevice class and

IOMMUFDDevice class?

See above proposal. it should work fine.

Also, I think it is better to use a IOMMUFDBackend* parameter for
iommufd_device_get_info() to be consistent with the other routines.


Sure, then I'd like to also rename it to iommufd_backend_get_device_info().

Thanks
Zhenzhong



Then It would interesting to see how this applies to Eric's series.

Thanks,

C.

[PATCH] raspi4b: Reduce RAM to 1Gb on 32-bit hosts

2024-03-29 Thread Cédric Le Goater

Change the board revision number and RAM size to 1Gb on 32-bit hosts.
On these systems, RAM has a 2047 MB limit and this breaks the tests.

Fixes: 7785e8ea2204 ("hw/arm: Introduce Raspberry PI 4 machine")
Signed-off-by: Cédric Le Goater 
---
 hw/arm/raspi4b.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/hw/arm/raspi4b.c b/hw/arm/raspi4b.c
index 
cb1b1f2f147e8685a1dba6f137335ea0bc89bca5..85877880fc706d216de04ff1e081d66e6080ebac
 100644
--- a/hw/arm/raspi4b.c
+++ b/hw/arm/raspi4b.c
@@ -112,7 +112,11 @@ static void raspi4b_machine_class_init(ObjectClass *oc, 
void *data)
 MachineClass *mc = MACHINE_CLASS(oc);
 RaspiBaseMachineClass *rmc = RASPI_BASE_MACHINE_CLASS(oc);
 
+#if HOST_LONG_BITS == 32
+rmc->board_rev = 0xa03111; /* Revision 1.1, 1 Gb RAM */
+#else
 rmc->board_rev = 0xb03115; /* Revision 1.5, 2 Gb RAM */
+#endif
 raspi_machine_class_common_init(mc, rmc->board_rev);
 mc->init = raspi4b_machine_init;
 }
-- 
2.44.0

[PATCH for-9.1] migration: Add Error** argument to add_bitmaps_to_list()

2024-03-29 Thread Cédric Le Goater

This allows to report more precise errors in the migration handler
dirty_bitmap_save_setup().

Suggested-by Vladimir Sementsov-Ogievskiy  
Signed-off-by: Cédric Le Goater 
---

 To apply on top of : 
 https://lore.kernel.org/qemu-devel/20240320064911.545001-1-...@redhat.com/
 
 migration/block-dirty-bitmap.c | 34 --
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 
542a8c297b329abc30d1b3a205d29340fa59a961..a7d55048c23505fde565ca784cec3c917dca37e5
 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -481,13 +481,13 @@ static void dirty_bitmap_do_save_cleanup(DBMSaveState *s)
 
 /* Called with the BQL taken. */
 static int add_bitmaps_to_list(DBMSaveState *s, BlockDriverState *bs,
-   const char *bs_name, GHashTable *alias_map)
+   const char *bs_name, GHashTable *alias_map,
+   Error **errp)
 {
 BdrvDirtyBitmap *bitmap;
 SaveBitmapState *dbms;
 GHashTable *bitmap_aliases;
 const char *node_alias, *bitmap_name, *bitmap_alias;
-Error *local_err = NULL;
 
 /* When an alias map is given, @bs_name must be @bs's node name */
 assert(!alias_map || !strcmp(bs_name, bdrv_get_node_name(bs)));
@@ -504,8 +504,8 @@ static int add_bitmaps_to_list(DBMSaveState *s, 
BlockDriverState *bs,
 bitmap_name = bdrv_dirty_bitmap_name(bitmap);
 
 if (!bs_name || strcmp(bs_name, "") == 0) {
-error_report("Bitmap '%s' in unnamed node can't be migrated",
- bitmap_name);
+error_setg(errp, "Bitmap '%s' in unnamed node can't be migrated",
+   bitmap_name);
 return -1;
 }
 
@@ -525,9 +525,9 @@ static int add_bitmaps_to_list(DBMSaveState *s, 
BlockDriverState *bs,
 }
 
 if (node_alias[0] == '#') {
-error_report("Bitmap '%s' in a node with auto-generated "
- "name '%s' can't be migrated",
- bitmap_name, node_alias);
+error_setg(errp, "Bitmap '%s' in a node with auto-generated "
+   "name '%s' can't be migrated",
+   bitmap_name, node_alias);
 return -1;
 }
 
@@ -538,8 +538,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, 
BlockDriverState *bs,
 continue;
 }
 
-if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, _err)) {
-error_report_err(local_err);
+if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_DEFAULT, errp)) {
 return -1;
 }
 
@@ -558,9 +557,9 @@ static int add_bitmaps_to_list(DBMSaveState *s, 
BlockDriverState *bs,
 }
 } else {
 if (strlen(bitmap_name) > UINT8_MAX) {
-error_report("Cannot migrate bitmap '%s' on node '%s': "
- "Name is longer than %u bytes",
- bitmap_name, bs_name, UINT8_MAX);
+error_setg(errp, "Cannot migrate bitmap '%s' on node '%s': "
+   "Name is longer than %u bytes",
+   bitmap_name, bs_name, UINT8_MAX);
 return -1;
 }
 bitmap_alias = bitmap_name;
@@ -599,7 +598,7 @@ static int add_bitmaps_to_list(DBMSaveState *s, 
BlockDriverState *bs,
 }
 
 /* Called with the BQL taken. */
-static int init_dirty_bitmap_migration(DBMSaveState *s)
+static int init_dirty_bitmap_migration(DBMSaveState *s, Error **errp)
 {
 BlockDriverState *bs;
 SaveBitmapState *dbms;
@@ -643,7 +642,7 @@ static int init_dirty_bitmap_migration(DBMSaveState *s)
 }
 
 if (bs && bs->drv && !bs->drv->is_filter) {
-if (add_bitmaps_to_list(s, bs, name, NULL)) {
+if (add_bitmaps_to_list(s, bs, name, NULL, errp)) {
 goto fail;
 }
 g_hash_table_add(handled_by_blk, bs);
@@ -656,7 +655,8 @@ static int init_dirty_bitmap_migration(DBMSaveState *s)
 continue;
 }
 
-if (add_bitmaps_to_list(s, bs, bdrv_get_node_name(bs), alias_map)) {
+if (add_bitmaps_to_list(s, bs, bdrv_get_node_name(bs), alias_map,
+errp)) {
 goto fail;
 }
 }
@@ -1218,9 +1218,7 @@ static int dirty_bitmap_save_setup(QEMUFile *f, void 
*opaque, Error **errp)
 DBMSaveState *s = &((DBMState *)opaque)->save;
 SaveBitmapState *dbms = NULL;
 
-if (init_dirty_bitmap_migration(s) < 0) {
-error_setg(errp,
-   "Failed to initialize dirty tracking bitmap for blocks");
+if (init_dirty_bitmap_migration(s, errp) < 0) {
 return -1;
 }
 
-- 
2.44.0

Re: [PATCH for-9.1 v5 07/14] migration: Add Error** argument to .save_setup() handler

2024-03-29 Thread Cédric Le Goater


Hello Vladimir,

On 3/29/24 10:32, Vladimir Sementsov-Ogievskiy wrote:

On 20.03.24 09:49, Cédric Le Goater wrote:

diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
index 
2708abf3d762de774ed294d3fdb8e56690d2974c..542a8c297b329abc30d1b3a205d29340fa59a961
 100644
--- a/migration/block-dirty-bitmap.c
+++ b/migration/block-dirty-bitmap.c
@@ -1213,12 +1213,14 @@ fail:
  return ret;
  }
-static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque)
+static int dirty_bitmap_save_setup(QEMUFile *f, void *opaque, Error **errp)
  {
  DBMSaveState *s = &((DBMState *)opaque)->save;
  SaveBitmapState *dbms = NULL;
  if (init_dirty_bitmap_migration(s) < 0) {
+    error_setg(errp,
+   "Failed to initialize dirty tracking bitmap for blocks");


No, that's not about initializing a bitmap. This all is about migration of 
block-dirty-bitmaps themselves.

So correct would be say "Failed to initialize migration of block dirty bitmaps".

with this, for block dirty bitmap migration:
Acked-by: Vladimir Sementsov-Ogievskiy 


I had kept your previous R-b.

Should we remove it ? or is it ok if I address your comments below in a
followup patch, in which case the error message above would be removed.


Still, a lot better is add errp to init_dirty_bitmap_migration() and to 
add_bitmaps_to_list() too: look,

init_dirty_bitmap_migration() fails only if add_bitmaps_to_list() fails

in turn,

add_bitmaps_to_list() have several clear failure points, where it always does 
error_report (or error_report_err), which would be better to pass-through to 
the user.


Good idea. Will do.

Thanks,

C.

[PATCH for-9.0] tests/qtest: Fix STM32L4x5 GPIO test on 32-bit

2024-03-29 Thread Cédric Le Goater

The test mangles the GPIO address and the pin number in the
qtest_add_data_func data parameter. Doing so, it assumes that the host
pointer size is always 64-bit, which breaks on 32-bit :

../tests/qtest/stm32l4x5_gpio-test.c: In function ‘test_gpio_output_mode’:
../tests/qtest/stm32l4x5_gpio-test.c:272:25: error: cast from pointer to 
integer of different size [-Werror=pointer-to-int-cast]
  272 | unsigned int pin = ((uint64_t)data) & 0xF;
  | ^
../tests/qtest/stm32l4x5_gpio-test.c:273:22: error: cast from pointer to 
integer of different size [-Werror=pointer-to-int-cast]
  273 | uint32_t gpio = ((uint64_t)data) >> 32;
  |  ^

To fix, improve the mangling of the GPIO address and pin number fields
by using GPIO_SIZE so that the resulting value fits in a 32-bit pointer.
While at it, include some helpers to hide the details.

Cc: Arnaud Minier 
Cc: Inès Varhol 
Signed-off-by: Cédric Le Goater 
---
 tests/qtest/stm32l4x5_gpio-test.c | 59 ++-
 1 file changed, 35 insertions(+), 24 deletions(-)

diff --git a/tests/qtest/stm32l4x5_gpio-test.c 
b/tests/qtest/stm32l4x5_gpio-test.c
index 
cc56be2031f7a5e0c501db02e7484ad70e54573c..0f6bda54d3c0704f4bbb982824d89bb2aca75367
 100644
--- a/tests/qtest/stm32l4x5_gpio-test.c
+++ b/tests/qtest/stm32l4x5_gpio-test.c
@@ -76,6 +76,17 @@ const uint32_t idr_reset[NUM_GPIOS] = {
 0x
 };
 
+#define PIN_MASK0xF
+#define GPIO_ADDR_MASK  (~(GPIO_SIZE - 1))
+
+static inline void *test_data(uint32_t gpio_addr, uint8_t pin)
+{
+return (void *)(uintptr_t)((gpio_addr & GPIO_ADDR_MASK) | (pin & 
PIN_MASK));
+}
+
+#define test_gpio_addr(data)  ((uintptr_t)(data) & GPIO_ADDR_MASK)
+#define test_pin(data)((uintptr_t)(data) & PIN_MASK)
+
 static uint32_t gpio_readl(unsigned int gpio, unsigned int offset)
 {
 return readl(gpio + offset);
@@ -269,8 +280,8 @@ static void test_gpio_output_mode(const void *data)
  * Additionally, it checks that values written to ODR
  * when not in output mode are stored and not discarded.
  */
-unsigned int pin = ((uint64_t)data) & 0xF;
-uint32_t gpio = ((uint64_t)data) >> 32;
+unsigned int pin = test_pin(data);
+uint32_t gpio = test_gpio_addr(data);
 unsigned int gpio_id = get_gpio_id(gpio);
 
 qtest_irq_intercept_in(global_qtest, "/machine/soc/syscfg");
@@ -304,8 +315,8 @@ static void test_gpio_input_mode(const void *data)
  * corresponding GPIO line high/low : it should set the
  * right bit in IDR and send an irq to syscfg.
  */
-unsigned int pin = ((uint64_t)data) & 0xF;
-uint32_t gpio = ((uint64_t)data) >> 32;
+unsigned int pin = test_pin(data);
+uint32_t gpio = test_gpio_addr(data);
 unsigned int gpio_id = get_gpio_id(gpio);
 
 qtest_irq_intercept_in(global_qtest, "/machine/soc/syscfg");
@@ -333,8 +344,8 @@ static void test_pull_up_pull_down(const void *data)
  * Test that a floating pin with pull-up sets the pin
  * high and vice-versa.
  */
-unsigned int pin = ((uint64_t)data) & 0xF;
-uint32_t gpio = ((uint64_t)data) >> 32;
+unsigned int pin = test_pin(data);
+uint32_t gpio = test_gpio_addr(data);
 unsigned int gpio_id = get_gpio_id(gpio);
 
 qtest_irq_intercept_in(global_qtest, "/machine/soc/syscfg");
@@ -363,8 +374,8 @@ static void test_push_pull(const void *data)
  * disconnects the pin, that the pin can't be set or reset
  * externally afterwards.
  */
-unsigned int pin = ((uint64_t)data) & 0xF;
-uint32_t gpio = ((uint64_t)data) >> 32;
+unsigned int pin = test_pin(data);
+uint32_t gpio = test_gpio_addr(data);
 uint32_t gpio2 = GPIO_BASE_ADDR + (GPIO_H - gpio);
 
 qtest_irq_intercept_in(global_qtest, "/machine/soc/syscfg");
@@ -410,8 +421,8 @@ static void test_open_drain(const void *data)
  * However a pin set low externally shouldn't be disconnected,
  * and it can be set low externally when in open-drain mode.
  */
-unsigned int pin = ((uint64_t)data) & 0xF;
-uint32_t gpio = ((uint64_t)data) >> 32;
+unsigned int pin = test_pin(data);
+uint32_t gpio = test_gpio_addr(data);
 uint32_t gpio2 = GPIO_BASE_ADDR + (GPIO_H - gpio);
 
 qtest_irq_intercept_in(global_qtest, "/machine/soc/syscfg");
@@ -466,8 +477,8 @@ static void test_bsrr_brr(const void *data)
  * has the desired effect on ODR.
  * In BSRR, BSx has priority over BRx.
  */
-unsigned int pin = ((uint64_t)data) & 0xF;
-uint32_t gpio = ((uint64_t)data) >> 32;
+unsigned int pin = test_pin(data);
+uint32_t gpio = test_gpio_addr(data);
 
 gpio_writel(gpio, BSRR, (1 << pin));
 g_assert_cmphex(gpio_readl(gpio, ODR), ==, reset(gpio, ODR) | (1 << pin));
@@ -507,40 +518,40 @@ int main(int argc, char **argv)
  * is pr

Re: [PATCH for-9.0 1/2] migration: Set migration error in migration_completion()

2024-03-28 Thread Cédric Le Goater


On 3/28/24 16:50, Avihai Horon wrote:


On 28/03/2024 17:21, Cédric Le Goater wrote:

External email: Use caution opening links or attachments


Hello Avihai,

On 3/28/24 15:02, Avihai Horon wrote:

After commit 9425ef3f990a ("migration: Use migrate_has_error() in
close_return_path_on_source()"), close_return_path_on_source() assumes
that migration error is set if an error occurs during migration.

This may not be true if migration errors in migration_completion(). For
example, if qemu_savevm_state_complete_precopy() errors, migration error
will not be set


Out of curiosity, could you describe a bit more the context ? Did
vfio_save_complete_precopy() fail ? why ?


Yep, vfio_save_complete_precopy() failed (but it failed while I was 
experimenting with an unofficial debug FW).



We should propagate errors of .save_live_complete_precopy() handlers as
it was done .save_setup handlers(). For 9.1.


Agreed.




This in turn, will cause a migration hang bug, similar to the bug that
was fixed by commit 22b04245f0d5 ("migration: Join the return path
thread before releasing to_dst_file"), as shutdown() will not be issued
for the return-path channel.


yes, but this test :

    if (ret < 0) {
    goto fail;
    }

will skip the close_return_path_on_source() call. Won't it ? So I don't
understand how it can be an issue. Am I missing something ?


It will skip the close_return_path_on_source() call in migration_completion(), 
but there is another close_return_path_on_source() call in migrate_fd_cleanup().


OK. Found it. This is a code path I hadn't explored yet.

Acked-by: Cédric Le Goater 

Thanks,

C.







Fix it by ensuring migration error is set in case of error in
migration_completion().


Why didn't you add a reference to commit 9425ef3f990a ?


I thought this commit didn't introduce this bug, but looking again in the 
mailing list [1], it kinda did:
The hang bug was fully fixed by commit 22b04245f0d ("migration: Join the return path 
thread before releasing to_dst_file") and then 9425ef3f990a re-introduced the bug, 
but only for migration_completion() case.
So, you are right, a fixes line with 9425ef3f990a should be added.

Thanks.

[1] https://lore.kernel.org/all/20240226203122.22894-1-faro...@suse.de/





Signed-off-by: Avihai Horon 
---
  migration/migration.c | 10 ++
  1 file changed, 10 insertions(+)

diff --git a/migration/migration.c b/migration/migration.c
index 9fe8fd2afd7..b73ae3a72c4 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2799,6 +2799,7 @@ static void migration_completion(MigrationState *s)
  {
  int ret = 0;
  int current_active_state = s->state;
+    Error *local_err = NULL;

  if (s->state == MIGRATION_STATUS_ACTIVE) {
  ret = migration_completion_precopy(s, _active_state);
@@ -2832,6 +2833,15 @@ static void migration_completion(MigrationState *s)
  return;

  fail:
+    if (qemu_file_get_error_obj(s->to_dst_file, _err)) {
+    migrate_set_error(s, local_err);
+    error_free(local_err);
+    } else if (ret) {
+    error_setg_errno(_err, -ret, "Error in migration completion");


The 'ret = -1' case could be improved with error_setg(). As a followup.

Thanks,

C.





+    migrate_set_error(s, local_err);
+    error_free(local_err);
+    }
+
  migration_completion_failed(s, current_active_state);
  }

Re: [PATCH for-9.0 1/2] migration: Set migration error in migration_completion()

2024-03-28 Thread Cédric Le Goater


Hello Avihai,

On 3/28/24 15:02, Avihai Horon wrote:

After commit 9425ef3f990a ("migration: Use migrate_has_error() in
close_return_path_on_source()"), close_return_path_on_source() assumes
that migration error is set if an error occurs during migration.

This may not be true if migration errors in migration_completion(). For
example, if qemu_savevm_state_complete_precopy() errors, migration error
will not be set


Out of curiosity, could you describe a bit more the context ? Did
vfio_save_complete_precopy() fail ? why ?

We should propagate errors of .save_live_complete_precopy() handlers as
it was done .save_setup handlers(). For 9.1.


This in turn, will cause a migration hang bug, similar to the bug that
was fixed by commit 22b04245f0d5 ("migration: Join the return path
thread before releasing to_dst_file"), as shutdown() will not be issued
for the return-path channel.


yes, but this test :

if (ret < 0) {
goto fail;
}

will skip the close_return_path_on_source() call. Won't it ? So I don't
understand how it can be an issue. Am I missing something ?


Fix it by ensuring migration error is set in case of error in
migration_completion().


Why didn't you add a reference to commit 9425ef3f990a ?



Signed-off-by: Avihai Horon 
---
  migration/migration.c | 10 ++
  1 file changed, 10 insertions(+)

diff --git a/migration/migration.c b/migration/migration.c
index 9fe8fd2afd7..b73ae3a72c4 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2799,6 +2799,7 @@ static void migration_completion(MigrationState *s)
  {
  int ret = 0;
  int current_active_state = s->state;
+Error *local_err = NULL;
  
  if (s->state == MIGRATION_STATUS_ACTIVE) {

  ret = migration_completion_precopy(s, _active_state);
@@ -2832,6 +2833,15 @@ static void migration_completion(MigrationState *s)
  return;
  
  fail:

+if (qemu_file_get_error_obj(s->to_dst_file, _err)) {
+migrate_set_error(s, local_err);
+error_free(local_err);
+} else if (ret) {
+error_setg_errno(_err, -ret, "Error in migration completion");


The 'ret = -1' case could be improved with error_setg(). As a followup.

Thanks,

C.





+migrate_set_error(s, local_err);
+error_free(local_err);
+}
+
  migration_completion_failed(s, current_active_state);
  }

Re: [PATCH for-9.0 2/2] migration/postcopy: Ensure postcopy_start() sets errp if it fails

2024-03-28 Thread Cédric Le Goater


On 3/28/24 15:02, Avihai Horon wrote:

There are several places where postcopy_start() fails without setting
errp. This can cause a null pointer de-reference, as in case of error,
the caller of postcopy_start() copies/prints the error set in errp.

Fix it by setting errp in all of postcopy_start() error paths.

Fixes: 908927db28ea ("migration: Update error description whenever migration 
fails")
Signed-off-by: Avihai Horon 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  migration/migration.c | 8 
  1 file changed, 8 insertions(+)

diff --git a/migration/migration.c b/migration/migration.c
index b73ae3a72c4..86bf76e9258 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2510,6 +2510,8 @@ static int postcopy_start(MigrationState *ms, Error 
**errp)
  migration_wait_main_channel(ms);
  if (postcopy_preempt_establish_channel(ms)) {
  migrate_set_state(>state, ms->state, MIGRATION_STATUS_FAILED);
+error_setg(errp, "%s: Failed to establish preempt channel",
+   __func__);
  return -1;
  }
  }
@@ -2525,17 +2527,22 @@ static int postcopy_start(MigrationState *ms, Error 
**errp)
  
  ret = migration_stop_vm(ms, RUN_STATE_FINISH_MIGRATE);

  if (ret < 0) {
+error_setg_errno(errp, -ret, "%s: Failed to stop the VM", __func__);
  goto fail;
  }
  
  ret = migration_maybe_pause(ms, _state,

  MIGRATION_STATUS_POSTCOPY_ACTIVE);
  if (ret < 0) {
+error_setg_errno(errp, -ret, "%s: Failed in migration_maybe_pause()",
+ __func__);
  goto fail;
  }
  
  ret = bdrv_inactivate_all();

  if (ret < 0) {
+error_setg_errno(errp, -ret, "%s: Failed in bdrv_inactivate_all()",
+ __func__);
  goto fail;
  }
  restart_block = true;
@@ -2612,6 +2619,7 @@ static int postcopy_start(MigrationState *ms, Error 
**errp)
  
  /* Now send that blob */

  if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
+error_setg(errp, "%s: Failed to send packaged data", __func__);
  goto fail_closefb;
  }
  qemu_fclose(fb);

Re: [PATCH v3] vfio/pci: migration: Skip config space check for Vendor Specific Information in VSC during restore/load

2024-03-28 Thread Cédric Le Goater


On 3/27/24 21:52, Alex Williamson wrote:

On Wed, 27 Mar 2024 16:11:37 -0400
"Michael S. Tsirkin"  wrote:


On Wed, Mar 27, 2024 at 11:39:15AM -0600, Alex Williamson wrote:

On Fri, 22 Mar 2024 12:12:10 +0530
Vinayak Kale  wrote:
   

In case of migration, during restore operation, qemu checks config space of the
pci device with the config space in the migration stream captured during save
operation. In case of config space data mismatch, restore operation is failed.

config space check is done in function get_pci_config_device(). By default VSC
(vendor-specific-capability) in config space is checked.

Due to qemu's config space check for VSC, live migration is broken across NVIDIA
vGPU devices in situation where source and destination host driver is different.
In this situation, Vendor Specific Information in VSC varies on the destination
to ensure vGPU feature capabilities exposed to the guest driver are compatible
with destination host.

If a vfio-pci device is migration capable and vfio-pci vendor driver is OK with
volatile Vendor Specific Info in VSC then qemu should exempt config space check
for Vendor Specific Info. It is vendor driver's responsibility to ensure that
VSC is consistent across migration. Here consistency could mean that VSC format
should be same on source and destination, however actual Vendor Specific Info
may not be byte-to-byte identical.

This patch skips the check for Vendor Specific Information in VSC for VFIO-PCI
device by clearing pdev->cmask[] offsets. Config space check is still enforced
for 3 byte VSC header. If cmask[] is not set for an offset, then qemu skips
config space check for that offset.

Signed-off-by: Vinayak Kale 
---
Version History
v2->v3:
 - Config space check skipped only for Vendor Specific Info in VSC, check is
   still enforced for 3 byte VSC header.
 - Updated commit description with live migration failure scenario.
v1->v2:
 - Limited scope of change to vfio-pci devices instead of all pci devices.

  hw/vfio/pci.c | 24 
  1 file changed, 24 insertions(+)



Acked-by: Alex Williamson 



A very reasonable way to do it.

Reviewed-by: Michael S. Tsirkin 

Merge through the VFIO tree I presume?


Yep, Cédric said he´d grab it for 9.1.  Thanks,



Applied to vfio-next.

Thanks,

C.

Re: [PATCH 10/10] pnv/phb4: Mask off LSI Source-ID based on number of interrupts

2024-03-27 Thread Cédric Le Goater


On 3/27/24 10:59, Saif Abrar wrote:

Hello Cedric,


  }
  +static void pnv_phb4_fund_A_reset(PnvPHB4 *phb)

What is fund_A ?


I used 'fund_A' as an abbreviation to "Fundamental Register Set A".

Please let know if you suggest another abbreviation to name this method.


pnv_phb4_reset_xsrc may be ?


Thanks,

C.




+{
+    phb->regs[PHB_LSI_SOURCE_ID >> 3] = PPC_BITMASK(4, 12);


Is this mask the default value for HW ?

Yes, the spec defines the bits[04:12] of LSI Source ID having reset value: 0x1FF


Regards,

Saif


On 25-03-2024 07:04 pm, Cédric Le Goater wrote:

On 3/21/24 11:04, Saif Abrar wrote:

Add a method to reset the value of LSI Source-ID.
Mask off LSI source-id based on number of interrupts in the big/small PHB.


Looks ok.



Signed-off-by: Saif Abrar 
---
  hw/pci-host/pnv_phb4.c | 10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index f48750ee54..8fbaf6512e 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -489,6 +489,7 @@ static void pnv_phb4_update_xsrc(PnvPHB4 *phb)
    lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
  lsi_base <<= 3;
+    lsi_base &= (xsrc->nr_irqs - 1);
    /* TODO: handle reset values of PHB_LSI_SRC_ID */
  if (!lsi_base) {
@@ -1966,6 +1967,12 @@ static void pnv_phb4_ro_mask_init(PnvPHB4 *phb)
  /* TODO: Add more RO-masks as regs are implemented in the model */
  }
  +static void pnv_phb4_fund_A_reset(PnvPHB4 *phb)


What is fund_A ?


+{
+    phb->regs[PHB_LSI_SOURCE_ID >> 3] = PPC_BITMASK(4, 12);


Is this mask the default value for HW ?


Thanks,

C.



+    pnv_phb4_update_xsrc(phb);
+}
+
  static void pnv_phb4_err_reg_reset(PnvPHB4 *phb)
  {
  STICKY_RST(PHB_ERR_STATUS,   0, PPC_BITMASK(0, 33));
@@ -2023,6 +2030,7 @@ static void pnv_phb4_reset(void *dev)
  pnv_phb4_cfg_core_reset(phb);
  pnv_phb4_pbl_core_reset(phb);
  +    pnv_phb4_fund_A_reset(phb);
  pnv_phb4_err_reg_reset(phb);
  pnv_phb4_pcie_stack_reg_reset(phb);
  pnv_phb4_regb_err_reg_reset(phb);
@@ -2102,8 +2110,6 @@ static void pnv_phb4_realize(DeviceState *dev, Error 
**errp)
  return;
  }
  -    pnv_phb4_update_xsrc(phb);
-
  phb->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs);
    pnv_phb4_xscom_realize(phb);

Re: [PATCH v1 01/11] Introduce a common abstract struct HostIOMMUDevice

2024-03-27 Thread Cédric Le Goater


Hello Zhenzhong,

On 3/19/24 12:58, Duan, Zhenzhong wrote:

Hi Cédric,


-Original Message-
From: Cédric Le Goater 
Sent: Tuesday, March 19, 2024 4:17 PM
To: Duan, Zhenzhong ; qemu-
de...@nongnu.org
Cc: alex.william...@redhat.com; eric.au...@redhat.com;
pet...@redhat.com; jasow...@redhat.com; m...@redhat.com;
j...@nvidia.com; nicol...@nvidia.com; joao.m.mart...@oracle.com; Tian,
Kevin ; Liu, Yi L ; Sun, Yi Y
; Peng, Chao P 
Subject: Re: [PATCH v1 01/11] Introduce a common abstract struct
HostIOMMUDevice

Hello Zhenzhong,

On 2/28/24 04:58, Zhenzhong Duan wrote:

HostIOMMUDevice will be inherited by two sub classes,
legacy and iommufd currently.

Introduce a helper function host_iommu_base_device_init to initialize it.

Suggested-by: Eric Auger 
Signed-off-by: Zhenzhong Duan 
---
   include/sysemu/host_iommu_device.h | 22 ++
   1 file changed, 22 insertions(+)
   create mode 100644 include/sysemu/host_iommu_device.h

diff --git a/include/sysemu/host_iommu_device.h

b/include/sysemu/host_iommu_device.h

new file mode 100644
index 00..fe80ab25fb
--- /dev/null
+++ b/include/sysemu/host_iommu_device.h
@@ -0,0 +1,22 @@
+#ifndef HOST_IOMMU_DEVICE_H
+#define HOST_IOMMU_DEVICE_H
+
+typedef enum HostIOMMUDevice_Type {
+HID_LEGACY,
+HID_IOMMUFD,
+HID_MAX,
+} HostIOMMUDevice_Type;
+
+typedef struct HostIOMMUDevice {
+HostIOMMUDevice_Type type;


A type field is not a good sign and that's where QOM is useful.


Yes, agree.
I didn't choose QOM because in iommufd-cdev series, VFIOContainer chooses not 
using QOM model.
See the discussion: https://lore.kernel.org/all/YmuFv2s5TPuw7K%2Fu@yekko/
I thought HostIOMMUDevice need to follow same rule.

But after further digging into this, I think it may be ok to use QOM model as 
long as we don't expose
HostIOMMUDevice in qapi/qom.json and not use USER_CREATABLE interface. Your 
thoughts?


yes. Can we change a bit this series to use QOM ? something like :

typedef struct HostIOMMUDevice {
Object parent;
} HostIOMMUDevice;

#define TYPE_HOST_IOMMU "host.iommu"

OBJECT_DECLARE_TYPE(HostIOMMUDevice, HostIOMMUClass, HOST_IOMMU)

struct HostIOMMUClass {

ObjectClass parent_class;

int (*get_type)(HostIOMMUDevice *hiod, uint64_t *type, Error **errp);

int (*get_cap)(HostIOMMUDevice *hiod, uint64_t *cap, Error **errp);
};

Inherited objects would be TYPE_HOST_IOMMU_IOMMUFD and TYPE_HOST_IOMMU_LEGACY.
Each class implementing the handlers or not (legacy mode).

The class handlers are introduced for the intel-iommu helper vtd_check_hdev()
in order to avoid using iommufd routines directly. HostIOMMUDevice is supposed
to abstract the Host IOMMU device, so we need to abstract also all the
interfaces to this object.

The .host_iommu_device_create() handler could be merged in .attach_device()
possibly. Anyhow, please use now object_new() and object_unref() instead.
host_iommu_base_device_init() is useless IMHO.





Is vtd_check_hdev() the only use of this field ?


Currently yes. virtio-iommu may have similar usage.


If so, can we simplify with a QOM interface in any way ?


QOM interface is a set of callbacks, guess you mean QOM class,
saying HostIOMMUDevice class, IOMMULegacyDevice class and IOMMUFDDevice class?


See above proposal. it should work fine.

Also, I think it is better to use a IOMMUFDBackend* parameter for
iommufd_device_get_info() to be consistent with the other routines.

Then It would interesting to see how this applies to Eric's series.

Thanks,

C.

Re: [PATCH] misc/pca955*: Move models under hw/gpio

2024-03-26 Thread Cédric Le Goater


On 3/26/24 10:55, Philippe Mathieu-Daudé wrote:

On 25/3/24 14:48, Cédric Le Goater wrote:

The PCA9552 and PCA9554 devices are both I2C GPIO controllers and the
PCA9552 also can drive LEDs. Do all the necessary adjustments to move
the models under hw/gpio.

Cc: Glenn Miles 
Signed-off-by: Cédric Le Goater 
---
  MAINTAINERS  | 4 ++--
  include/hw/{misc => gpio}/pca9552.h  | 0
  include/hw/{misc => gpio}/pca9552_regs.h | 0
  include/hw/{misc => gpio}/pca9554.h  | 0
  include/hw/{misc => gpio}/pca9554_regs.h | 0
  hw/arm/aspeed.c  | 2 +-
  hw/{misc => gpio}/pca9552.c  | 4 ++--
  hw/{misc => gpio}/pca9554.c  | 4 ++--
  tests/qtest/pca9552-test.c   | 2 +-
  tests/qtest/pnv-host-i2c-test.c  | 4 ++--
  hw/gpio/meson.build  | 2 ++
  hw/gpio/trace-events | 4 
  hw/misc/meson.build  | 2 --
  hw/misc/trace-events | 4 
  14 files changed, 16 insertions(+), 16 deletions(-)
  rename include/hw/{misc => gpio}/pca9552.h (100%)
  rename include/hw/{misc => gpio}/pca9552_regs.h (100%)
  rename include/hw/{misc => gpio}/pca9554.h (100%)
  rename include/hw/{misc => gpio}/pca9554_regs.h (100%)
  rename hw/{misc => gpio}/pca9552.c (99%)
  rename hw/{misc => gpio}/pca9554.c (99%)


Thanks, patch queued.


This one is merged,

https://gitlab.com/qemu-project/qemu/-/commit/6328d8ffa6cb9d750e4bfcfd73ac25d3a39ceb63

Thanks,

C.

Re: [PATCH 04/10] pnv/phb4: Implement read-only and write-only bits of registers

2024-03-25 Thread Cédric Le Goater


On 3/21/24 11:04, Saif Abrar wrote:

SW cannot write the read-only(RO) bits of a register
and write-only(WO) bits of a register return 0 when read.

Added ro_mask[] for each register that defines which
bits in that register are RO.
When writing to a register, the RO-bits are not updated.

When reading a register, clear the WO bits and return the updated value.

Tested the registers PHB_DMA_SYNC, PHB_PCIE_HOTPLUG_STATUS, PHB_PCIE_LMR,
PHB_PCIE_DLP_TRWCTL, PHB_LEM_ERROR_AND_MASK and PHB_LEM_ERROR_OR_MASK
by writing all 1's and reading back the value.
The WO bits in these registers should read back as 0.

Signed-off-by: Saif Abrar 
---
  hw/pci-host/pnv_phb4.c  | 77 ++---
  include/hw/pci-host/pnv_phb4.h  |  7 +++
  include/hw/pci-host/pnv_phb4_regs.h | 19 +--
  tests/qtest/pnv-phb4-test.c | 60 +-
  4 files changed, 150 insertions(+), 13 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index b3a83837f8..a81763f34c 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -735,6 +735,10 @@ static void pnv_phb4_reg_write(void *opaque, hwaddr off, 
uint64_t val,
  return;
  }
  
+/* Update 'val' according to the register's RO-mask */

+val = (phb->regs[off >> 3] & phb->ro_mask[off >> 3]) |
+  (val & ~(phb->ro_mask[off >> 3]));
+
  /* Record whether it changed */
  changed = phb->regs[off >> 3] != val;
  
@@ -808,7 +812,7 @@ static void pnv_phb4_reg_write(void *opaque, hwaddr off, uint64_t val,

  case PHB_TCE_TAG_ENABLE:
  case PHB_INT_NOTIFY_ADDR:
  case PHB_INT_NOTIFY_INDEX:
-case PHB_DMARD_SYNC:
+case PHB_DMA_SYNC:
 break;
  
  /* Noise on anything else */

@@ -846,7 +850,7 @@ static uint64_t pnv_phb4_reg_read(void *opaque, hwaddr off, 
unsigned size)
  case PHB_VERSION:
  return PNV_PHB4_PEC_GET_CLASS(phb->pec)->version;
  
-/* Read-only */

+/* Read-only */
  case PHB_PHB4_GEN_CAP:
  return 0xe4b8ull;
  case PHB_PHB4_TCE_CAP:
@@ -856,18 +860,49 @@ static uint64_t pnv_phb4_reg_read(void *opaque, hwaddr 
off, unsigned size)
  case PHB_PHB4_EEH_CAP:
  return phb->big_phb ? 0x2000ull : 0x1000ull;
  
+/* Write-only, read will return zeros */

+case PHB_LEM_ERROR_AND_MASK:
+case PHB_LEM_ERROR_OR_MASK:
+return 0;
+case PHB_PCIE_DLP_TRWCTL:
+val &= ~PHB_PCIE_DLP_TRWCTL_WREN;
+return val;
  /* IODA table accesses */
  case PHB_IODA_DATA0:
  return pnv_phb4_ioda_read(phb);
  
+/*

+ * DMA sync: make it look like it's complete,
+ *   clear write-only read/write start sync bits.
+ */
+case PHB_DMA_SYNC:
+val = PHB_DMA_SYNC_RD_COMPLETE |
+~(PHB_DMA_SYNC_RD_START | PHB_DMA_SYNC_WR_START);
+return val;
+
+/*
+ * PCI-E Stack registers
+ */
+case PHB_PCIE_SCR:
+val |= PHB_PCIE_SCR_PLW_X16; /* RO bit */
+break;
+
  /* Link training always appears trained */
  case PHB_PCIE_DLP_TRAIN_CTL:
  /* TODO: Do something sensible with speed ? */
-return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TL_LINKACT;
+val |= PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TL_LINKACT;
+return val;
+
+case PHB_PCIE_HOTPLUG_STATUS:
+/* Clear write-only bit */
+val &= ~PHB_PCIE_HPSTAT_RESAMPLE;
+return val;
  
-/* DMA read sync: make it look like it's complete */

-case PHB_DMARD_SYNC:
-return PHB_DMARD_SYNC_COMPLETE;
+/* Link Management Register */
+case PHB_PCIE_LMR:
+/* These write-only bits always read as 0 */
+val &= ~(PHB_PCIE_LMR_CHANGELW | PHB_PCIE_LMR_RETRAINLINK);
+return val;
  
  /* Silent simple reads */

  case PHB_LSI_SOURCE_ID:
@@ -1712,6 +1747,33 @@ static PCIIOMMUOps pnv_phb4_iommu_ops = {
  .get_address_space = pnv_phb4_dma_iommu,
  };
  
+static void pnv_phb4_ro_mask_init(PnvPHB4 *phb)

+{
+/* Clear RO-mask to make all regs as R/W by default */
+memset(phb->ro_mask, 0x0, PNV_PHB4_NUM_REGS * sizeof(uint64_t));
+
+/*
+ * Set register specific RO-masks
+ */
+
+/* PBL - Error Injection Register (0x1910) */
+phb->ro_mask[PHB_PBL_ERR_INJECT >> 3] =
+PPC_BITMASK(0, 23) | PPC_BITMASK(28, 35) | PPC_BIT(38) | PPC_BIT(46) |
+PPC_BITMASK(49, 51) | PPC_BITMASK(55, 63);
+
+/* Reserved bits[60:63] */
+phb->ro_mask[PHB_TXE_ERR_LEM_ENABLE >> 3] =
+phb->ro_mask[PHB_TXE_ERR_AIB_FENCE_ENABLE >> 3] = PPC_BITMASK(60, 63);
+/* Reserved bits[36:63] */
+phb->ro_mask[PHB_RXE_TCE_ERR_LEM_ENABLE >> 3] =
+phb->ro_mask[PHB_RXE_TCE_ERR_AIB_FENCE_ENABLE >> 3] = PPC_BITMASK(36, 63);
+/* Reserved bits[40:63] */
+phb->ro_mask[PHB_ERR_LEM_ENABLE >> 3] =
+phb->ro_mask[PHB_ERR_AIB_FENCE_ENABLE >> 3] = PPC_BITMASK(40, 63);



These constant values should be

Re: [PATCH 05/10] pnv/phb4: Implement write-clear and return 1's on unimplemented reg read

2024-03-25 Thread Cédric Le Goater


On 3/21/24 11:04, Saif Abrar wrote:

Implement write-1-to-clear and write-X-to-clear logic.
Update registers with silent simple read and write.
Return all 1's when an unimplemented/reserved register is read.

Test that reading address 0x0 returns all 1's (i.e. -1).

Signed-off-by: Saif Abrar 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/pci-host/pnv_phb4.c  | 190 ++--
  include/hw/pci-host/pnv_phb4_regs.h |  12 +-
  tests/qtest/pnv-phb4-test.c |   9 ++
  3 files changed, 170 insertions(+), 41 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index a81763f34c..4e3a6b37f9 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -683,8 +683,41 @@ static void pnv_phb4_reg_write(void *opaque, hwaddr off, 
uint64_t val,
  return;
  }
  
-/* Handle masking */

+/* Handle RO, W1C, WxC and masking */
  switch (off) {
+/* W1C: Write-1-to-Clear registers */
+case PHB_TXE_ERR_STATUS:
+case PHB_RXE_ARB_ERR_STATUS:
+case PHB_RXE_MRG_ERR_STATUS:
+case PHB_RXE_TCE_ERR_STATUS:
+case PHB_ERR_STATUS:
+case PHB_REGB_ERR_STATUS:
+case PHB_PCIE_DLP_ERRLOG1:
+case PHB_PCIE_DLP_ERRLOG2:
+case PHB_PCIE_DLP_ERR_STATUS:
+case PHB_PBL_ERR_STATUS:
+phb->regs[off >> 3] &= ~val;
+return;
+
+/* WxC: Clear register on any write */
+case PHB_PBL_ERR1_STATUS:
+case PHB_PBL_ERR_LOG_0 ... PHB_PBL_ERR_LOG_1:
+case PHB_REGB_ERR1_STATUS:
+case PHB_REGB_ERR_LOG_0 ... PHB_REGB_ERR_LOG_1:
+case PHB_TXE_ERR1_STATUS:
+case PHB_TXE_ERR_LOG_0 ... PHB_TXE_ERR_LOG_1:
+case PHB_RXE_ARB_ERR1_STATUS:
+case PHB_RXE_ARB_ERR_LOG_0 ... PHB_RXE_ARB_ERR_LOG_1:
+case PHB_RXE_MRG_ERR1_STATUS:
+case PHB_RXE_MRG_ERR_LOG_0 ... PHB_RXE_MRG_ERR_LOG_1:
+case PHB_RXE_TCE_ERR1_STATUS:
+case PHB_RXE_TCE_ERR_LOG_0 ... PHB_RXE_TCE_ERR_LOG_1:
+case PHB_ERR1_STATUS:
+case PHB_ERR_LOG_0 ... PHB_ERR_LOG_1:
+phb->regs[off >> 3] = 0;
+return;
+
+/* Write value updated by masks */
  case PHB_LSI_SOURCE_ID:
  val &= PHB_LSI_SRC_ID;
  break;
@@ -723,7 +756,6 @@ static void pnv_phb4_reg_write(void *opaque, hwaddr off, 
uint64_t val,
  case PHB_LEM_WOF:
  val = 0;
  break;
-/* TODO: More regs ..., maybe create a table with masks... */
  
  /* Read only registers */

  case PHB_CPU_LOADSTORE_STATUS:
@@ -732,6 +764,12 @@ static void pnv_phb4_reg_write(void *opaque, hwaddr off, 
uint64_t val,
  case PHB_PHB4_TCE_CAP:
  case PHB_PHB4_IRQ_CAP:
  case PHB_PHB4_EEH_CAP:
+case PHB_VERSION:
+case PHB_DMA_CHAN_STATUS:
+case PHB_TCE_TAG_STATUS:
+case PHB_PBL_BUF_STATUS:
+case PHB_PCIE_BNR:
+case PHB_PCIE_PHY_RXEQ_STAT_G3_00_03 ... PHB_PCIE_PHY_RXEQ_STAT_G5_12_15:
  return;
  }
  
@@ -752,6 +790,7 @@ static void pnv_phb4_reg_write(void *opaque, hwaddr off, uint64_t val,

  pnv_phb4_update_all_msi_regions(phb);
  }
  break;
+
  case PHB_M32_START_ADDR:
  case PHB_M64_UPPER_BITS:
  if (changed) {
@@ -797,27 +836,63 @@ static void pnv_phb4_reg_write(void *opaque, hwaddr off, 
uint64_t val,
  break;
  
  /* Silent simple writes */

-case PHB_ASN_CMPM:
-case PHB_CONFIG_ADDRESS:
-case PHB_IODA_ADDR:
-case PHB_TCE_KILL:
-case PHB_TCE_SPEC_CTL:
-case PHB_PEST_BAR:
-case PHB_PELTV_BAR:
+/* PHB Fundamental register set A */
+case PHB_CONFIG_DATA ... PHB_LOCK1:
  case PHB_RTT_BAR:
-case PHB_LEM_FIR_ACCUM:
-case PHB_LEM_ERROR_MASK:
-case PHB_LEM_ACTION0:
-case PHB_LEM_ACTION1:
-case PHB_TCE_TAG_ENABLE:
+case PHB_PELTV_BAR:
+case PHB_PEST_BAR:
+case PHB_CAPI_CMPM ... PHB_M64_AOMASK:
+case PHB_NXLATE_PREFIX ... PHB_DMA_SYNC:
+case PHB_TCE_KILL ... PHB_IODA_ADDR:
+case PHB_PAPR_ERR_INJ_CTL ... PHB_PAPR_ERR_INJ_MASK:
  case PHB_INT_NOTIFY_ADDR:
  case PHB_INT_NOTIFY_INDEX:
-case PHB_DMA_SYNC:
-   break;
+/* Fundamental register set B */
+case PHB_AIB_FENCE_CTRL ... PHB_Q_DMA_R:
+/* FIR & Error registers */
+case PHB_LEM_FIR_ACCUM:
+case PHB_LEM_ERROR_MASK:
+case PHB_LEM_ACTION0 ... PHB_LEM_WOF:
+case PHB_ERR_INJECT ... PHB_ERR_AIB_FENCE_ENABLE:
+case PHB_ERR_STATUS_MASK ... PHB_ERR1_STATUS_MASK:
+case PHB_TXE_ERR_INJECT ... PHB_TXE_ERR_AIB_FENCE_ENABLE:
+case PHB_TXE_ERR_STATUS_MASK ... PHB_TXE_ERR1_STATUS_MASK:
+case PHB_RXE_ARB_ERR_INJECT ... PHB_RXE_ARB_ERR_AIB_FENCE_ENABLE:
+case PHB_RXE_ARB_ERR_STATUS_MASK ... PHB_RXE_ARB_ERR1_STATUS_MASK:
+case PHB_RXE_MRG_ERR_INJECT ... PHB_RXE_MRG_ERR_AIB_FENCE_ENABLE:
+case PHB_RXE_MRG_ERR_STATUS_MASK ... PHB_RXE_MRG_ERR1_STATUS_MASK:
+case PHB_RXE_TCE_ERR_INJECT ... PHB_RXE_TCE_ERR_AIB_FENCE_ENABLE:
+case PHB_RXE_TCE_ERR_STATUS_MASK ... PHB_RXE_TCE_ERR1_STATUS_MASK:
+

[PATCH] misc/pca955*: Move models under hw/gpio

2024-03-25 Thread Cédric Le Goater

The PCA9552 and PCA9554 devices are both I2C GPIO controllers and the
PCA9552 also can drive LEDs. Do all the necessary adjustments to move
the models under hw/gpio.

Cc: Glenn Miles 
Signed-off-by: Cédric Le Goater 
---
 MAINTAINERS  | 4 ++--
 include/hw/{misc => gpio}/pca9552.h  | 0
 include/hw/{misc => gpio}/pca9552_regs.h | 0
 include/hw/{misc => gpio}/pca9554.h  | 0
 include/hw/{misc => gpio}/pca9554_regs.h | 0
 hw/arm/aspeed.c  | 2 +-
 hw/{misc => gpio}/pca9552.c  | 4 ++--
 hw/{misc => gpio}/pca9554.c  | 4 ++--
 tests/qtest/pca9552-test.c   | 2 +-
 tests/qtest/pnv-host-i2c-test.c  | 4 ++--
 hw/gpio/meson.build  | 2 ++
 hw/gpio/trace-events | 4 
 hw/misc/meson.build  | 2 --
 hw/misc/trace-events | 4 
 14 files changed, 16 insertions(+), 16 deletions(-)
 rename include/hw/{misc => gpio}/pca9552.h (100%)
 rename include/hw/{misc => gpio}/pca9552_regs.h (100%)
 rename include/hw/{misc => gpio}/pca9554.h (100%)
 rename include/hw/{misc => gpio}/pca9554_regs.h (100%)
 rename hw/{misc => gpio}/pca9552.c (99%)
 rename hw/{misc => gpio}/pca9554.c (99%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 409d7db4d457..a07af6b9d48e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1549,8 +1549,8 @@ M: Glenn Miles 
 L: qemu-...@nongnu.org
 L: qemu-...@nongnu.org
 S: Odd Fixes
-F: hw/misc/pca955*.c
-F: include/hw/misc/pca955*.h
+F: hw/gpio/pca955*.c
+F: include/hw/gpio/pca955*.h
 
 virtex_ml507
 M: Edgar E. Iglesias 
diff --git a/include/hw/misc/pca9552.h b/include/hw/gpio/pca9552.h
similarity index 100%
rename from include/hw/misc/pca9552.h
rename to include/hw/gpio/pca9552.h
diff --git a/include/hw/misc/pca9552_regs.h b/include/hw/gpio/pca9552_regs.h
similarity index 100%
rename from include/hw/misc/pca9552_regs.h
rename to include/hw/gpio/pca9552_regs.h
diff --git a/include/hw/misc/pca9554.h b/include/hw/gpio/pca9554.h
similarity index 100%
rename from include/hw/misc/pca9554.h
rename to include/hw/gpio/pca9554.h
diff --git a/include/hw/misc/pca9554_regs.h b/include/hw/gpio/pca9554_regs.h
similarity index 100%
rename from include/hw/misc/pca9554_regs.h
rename to include/hw/gpio/pca9554_regs.h
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 729f66941462..badf6f6fa09d 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -18,7 +18,7 @@
 #include "hw/block/flash.h"
 #include "hw/i2c/i2c_mux_pca954x.h"
 #include "hw/i2c/smbus_eeprom.h"
-#include "hw/misc/pca9552.h"
+#include "hw/gpio/pca9552.h"
 #include "hw/nvram/eeprom_at24c.h"
 #include "hw/sensor/tmp105.h"
 #include "hw/misc/led.h"
diff --git a/hw/misc/pca9552.c b/hw/gpio/pca9552.c
similarity index 99%
rename from hw/misc/pca9552.c
rename to hw/gpio/pca9552.c
index 2ae13af35e93..27d4db068095 100644
--- a/hw/misc/pca9552.c
+++ b/hw/gpio/pca9552.c
@@ -15,8 +15,8 @@
 #include "qemu/module.h"
 #include "qemu/bitops.h"
 #include "hw/qdev-properties.h"
-#include "hw/misc/pca9552.h"
-#include "hw/misc/pca9552_regs.h"
+#include "hw/gpio/pca9552.h"
+#include "hw/gpio/pca9552_regs.h"
 #include "hw/irq.h"
 #include "migration/vmstate.h"
 #include "qapi/error.h"
diff --git a/hw/misc/pca9554.c b/hw/gpio/pca9554.c
similarity index 99%
rename from hw/misc/pca9554.c
rename to hw/gpio/pca9554.c
index 5e31696797d9..7d10a64ba7c1 100644
--- a/hw/misc/pca9554.c
+++ b/hw/gpio/pca9554.c
@@ -11,8 +11,8 @@
 #include "qemu/module.h"
 #include "qemu/bitops.h"
 #include "hw/qdev-properties.h"
-#include "hw/misc/pca9554.h"
-#include "hw/misc/pca9554_regs.h"
+#include "hw/gpio/pca9554.h"
+#include "hw/gpio/pca9554_regs.h"
 #include "hw/irq.h"
 #include "migration/vmstate.h"
 #include "qapi/error.h"
diff --git a/tests/qtest/pca9552-test.c b/tests/qtest/pca9552-test.c
index ccca2b3d9140..747495769239 100644
--- a/tests/qtest/pca9552-test.c
+++ b/tests/qtest/pca9552-test.c
@@ -12,7 +12,7 @@
 #include "libqtest.h"
 #include "libqos/qgraph.h"
 #include "libqos/i2c.h"
-#include "hw/misc/pca9552_regs.h"
+#include "hw/gpio/pca9552_regs.h"
 
 #define PCA9552_TEST_ID   "pca9552-test"
 #define PCA9552_TEST_ADDR 0x60
diff --git a/tests/qtest/pnv-host-i2c-test.c b/tests/qtest/pnv-host-i2c-test.c
index c6351772520c..7f64d597ac1d 100644
--- a/tests/qtest/pnv-host-i2c-test.c
+++ b/tests/qtest/pnv-host-i2c-test.c
@@ -8,8 +8,8 @@
  */
 #include "qemu/osdep.h"
 #include "libqtest.h"
-#include "hw/misc/pca9554_regs.h"
-#include "hw/misc/pca9552_regs.h"
+#include "hw/gpio/pca9554_regs.h"
+#i

Re: [PATCH 07/10] pnv/phb4: Set link speed and width in the DLP training control register

2024-03-25 Thread Cédric Le Goater


On 3/21/24 11:04, Saif Abrar wrote:

Get the current link-status from PCIE macro.
Extract link-speed and link-width from the link-status
and set in the DLP training control (PCIE_DLP_TCR) register.

Signed-off-by: Saif Abrar 


Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/pci-host/pnv_phb4.c | 21 +++--
  1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 7b3d75bae6..6823ffab54 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -980,10 +980,27 @@ static uint64_t pnv_phb4_reg_read(void *opaque, hwaddr 
off, unsigned size)
  val |= PHB_PCIE_SCR_PLW_X16; /* RO bit */
  break;
  
-/* Link training always appears trained */

  case PHB_PCIE_DLP_TRAIN_CTL:
-/* TODO: Do something sensible with speed ? */
+/* Link training always appears trained */
  val |= PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TL_LINKACT;
+
+/* Get the current link-status from PCIE */
+uint32_t exp_offset = get_exp_offset(phb);
+uint32_t lnkstatus = bswap32(pnv_phb4_rc_config_read(phb,
+exp_offset + PCI_EXP_LNKSTA, 4));
+
+/* Extract link-speed from the link-status */
+uint32_t v = lnkstatus & PCI_EXP_LNKSTA_CLS;
+/* Set the current link-speed at the LINK_SPEED position */
+val = SETFIELD(PHB_PCIE_DLP_LINK_SPEED, val, v);
+
+/*
+ * Extract link-width from the link-status,
+ * after shifting the required bitfields.
+ */
+v = (lnkstatus & PCI_EXP_LNKSTA_NLW) >> PCI_EXP_LNKSTA_NLW_SHIFT;
+/* Set the current link-width at the LINK_WIDTH position */
+val = SETFIELD(PHB_PCIE_DLP_LINK_WIDTH, val, v);
  return val;
  
  /*

Re: [PATCH 08/10] pnv/phb4: Implement IODA PCT table

2024-03-25 Thread Cédric Le Goater


On 3/21/24 11:04, Saif Abrar wrote:

IODA PCT table (#3) is implemented
without any functionality, being a debug table.

Signed-off-by: Saif Abrar 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/pci-host/pnv_phb4.c  | 6 ++
  include/hw/pci-host/pnv_phb4.h  | 2 ++
  include/hw/pci-host/pnv_phb4_regs.h | 1 +
  3 files changed, 9 insertions(+)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 6823ffab54..f48750ee54 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -263,6 +263,10 @@ static uint64_t *pnv_phb4_ioda_access(PnvPHB4 *phb,
  mask = phb->big_phb ? PNV_PHB4_MAX_MIST : (PNV_PHB4_MAX_MIST >> 1);
  mask -= 1;
  break;
+case IODA3_TBL_PCT:
+tptr = phb->ioda_PCT;
+mask = 7;
+break;
  case IODA3_TBL_RCAM:
  mask = phb->big_phb ? 127 : 63;
  break;
@@ -361,6 +365,8 @@ static void pnv_phb4_ioda_write(PnvPHB4 *phb, uint64_t val)
  /* Handle side effects */
  switch (table) {
  case IODA3_TBL_LIST:
+case IODA3_TBL_PCT:
+/* No action for debug tables */
  break;
  case IODA3_TBL_MIST: {
  /* Special mask for MIST partial write */
diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h
index 91e81eee0e..6d83e5616f 100644
--- a/include/hw/pci-host/pnv_phb4.h
+++ b/include/hw/pci-host/pnv_phb4.h
@@ -64,6 +64,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(PnvPHB4, PNV_PHB4)
  #define PNV_PHB4_MAX_LSIs  8
  #define PNV_PHB4_MAX_INTs  4096
  #define PNV_PHB4_MAX_MIST  (PNV_PHB4_MAX_INTs >> 2)
+#define PNV_PHB4_MAX_PCT   128
  #define PNV_PHB4_MAX_MMIO_WINDOWS  32
  #define PNV_PHB4_MIN_MMIO_WINDOWS  16
  #define PNV_PHB4_NUM_REGS  (0x3000 >> 3)
@@ -144,6 +145,7 @@ struct PnvPHB4 {
  /* On-chip IODA tables */
  uint64_t ioda_LIST[PNV_PHB4_MAX_LSIs];
  uint64_t ioda_MIST[PNV_PHB4_MAX_MIST];
+uint64_t ioda_PCT[PNV_PHB4_MAX_PCT];
  uint64_t ioda_TVT[PNV_PHB4_MAX_TVEs];
  uint64_t ioda_MBT[PNV_PHB4_MAX_MBEs];
  uint64_t ioda_MDT[PNV_PHB4_MAX_PEs];
diff --git a/include/hw/pci-host/pnv_phb4_regs.h 
b/include/hw/pci-host/pnv_phb4_regs.h
index c1d5a83271..e30adff7b2 100644
--- a/include/hw/pci-host/pnv_phb4_regs.h
+++ b/include/hw/pci-host/pnv_phb4_regs.h
@@ -486,6 +486,7 @@
  
  #define IODA3_TBL_LIST  1

  #define IODA3_TBL_MIST  2
+#define IODA3_TBL_PCT   3
  #define IODA3_TBL_RCAM  5
  #define IODA3_TBL_MRT   6
  #define IODA3_TBL_PESTA 7

Re: [PATCH 09/10] hw/pci: Set write-mask bits for PCIE Link-Control-2 register

2024-03-25 Thread Cédric Le Goater


On 3/21/24 11:04, Saif Abrar wrote:

PHB updates the register PCIE Link-Control-2.
Set the write-mask bits for TLS, ENTER_COMP, TX_MARGIN,
HASD, MOD_COMP, COMP_SOS and COMP_P_DE.



You should resend this patch independently of the PowerNV PHB changes.


Thanks,

C.




Signed-off-by: Saif Abrar 
---
  hw/pci/pcie.c | 6 ++
  include/standard-headers/linux/pci_regs.h | 3 +++
  2 files changed, 9 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 4b2f0805c6..e3081f6b84 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -212,6 +212,12 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset,
  
  pci_set_word(dev->wmask + pos + PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_EETLPPB);
  
+pci_set_word(dev->wmask + pos + PCI_EXP_LNKCTL2,

+PCI_EXP_LNKCTL2_TLS | PCI_EXP_LNKCTL2_ENTER_COMP |
+PCI_EXP_LNKCTL2_TX_MARGIN | PCI_EXP_LNKCTL2_HASD |
+PCI_EXP_LNKCTL2_MOD_COMP | PCI_EXP_LNKCTL2_COMP_SOS |
+PCI_EXP_LNKCTL2_COMP_P_DE);
+
  if (dev->cap_present & QEMU_PCIE_EXTCAP_INIT) {
  /* read-only to behave like a 'NULL' Extended Capability Header */
  pci_set_long(dev->wmask + PCI_CONFIG_SPACE_SIZE, 0);
diff --git a/include/standard-headers/linux/pci_regs.h 
b/include/standard-headers/linux/pci_regs.h
index a39193213f..f743defe91 100644
--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
@@ -694,6 +694,9 @@
  #define  PCI_EXP_LNKCTL2_ENTER_COMP   0x0010 /* Enter Compliance */
  #define  PCI_EXP_LNKCTL2_TX_MARGIN0x0380 /* Transmit Margin */
  #define  PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */
+#define  PCI_EXP_LNKCTL2_MOD_COMP  0x0400 /* Enter Modified Compliance */
+#define  PCI_EXP_LNKCTL2_COMP_SOS  0x0800 /* Compliance SOS */
+#define  PCI_EXP_LNKCTL2_COMP_P_DE 0xF000 /* Compliance Preset/De-emphasis 
*/
  #define PCI_EXP_LNKSTA2   0x32/* Link Status 2 */
  #define  PCI_EXP_LNKSTA2_FLIT 0x0400 /* Flit Mode Status */
  #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V20x32/* end of v2 EPs w/ 
link */

Re: [PATCH 10/10] pnv/phb4: Mask off LSI Source-ID based on number of interrupts

2024-03-25 Thread Cédric Le Goater


On 3/21/24 11:04, Saif Abrar wrote:

Add a method to reset the value of LSI Source-ID.
Mask off LSI source-id based on number of interrupts in the big/small PHB.


Looks ok.

 

Signed-off-by: Saif Abrar 
---
  hw/pci-host/pnv_phb4.c | 10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index f48750ee54..8fbaf6512e 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -489,6 +489,7 @@ static void pnv_phb4_update_xsrc(PnvPHB4 *phb)
  
  lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);

  lsi_base <<= 3;
+lsi_base &= (xsrc->nr_irqs - 1);
  
  /* TODO: handle reset values of PHB_LSI_SRC_ID */

  if (!lsi_base) {
@@ -1966,6 +1967,12 @@ static void pnv_phb4_ro_mask_init(PnvPHB4 *phb)
  /* TODO: Add more RO-masks as regs are implemented in the model */
  }
  
+static void pnv_phb4_fund_A_reset(PnvPHB4 *phb)


What is fund_A ?


+{
+phb->regs[PHB_LSI_SOURCE_ID >> 3] = PPC_BITMASK(4, 12);


Is this mask the default value for HW ?


Thanks,

C.



+pnv_phb4_update_xsrc(phb);
+}
+
  static void pnv_phb4_err_reg_reset(PnvPHB4 *phb)
  {
  STICKY_RST(PHB_ERR_STATUS,   0, PPC_BITMASK(0, 33));
@@ -2023,6 +2030,7 @@ static void pnv_phb4_reset(void *dev)
  pnv_phb4_cfg_core_reset(phb);
  pnv_phb4_pbl_core_reset(phb);
  
+pnv_phb4_fund_A_reset(phb);

  pnv_phb4_err_reg_reset(phb);
  pnv_phb4_pcie_stack_reg_reset(phb);
  pnv_phb4_regb_err_reg_reset(phb);
@@ -2102,8 +2110,6 @@ static void pnv_phb4_realize(DeviceState *dev, Error 
**errp)
  return;
  }
  
-pnv_phb4_update_xsrc(phb);

-
  phb->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs);
  
  pnv_phb4_xscom_realize(phb);

Re: [PATCH 02/10] pnv/phb4: Add reset logic to PHB4

2024-03-25 Thread Cédric Le Goater


Cc: +Fred +Daniel

On 3/21/24 11:04, Saif Abrar wrote:

Add a method to be invoked on QEMU reset.
Also add CFG and PBL core-blocks reset logic using
appropriate bits of PHB_PCIE_CRESET register.

Tested by reading the reset value of a register.

Signed-off-by: Saif Abrar 
---
  hw/pci-host/pnv_phb4.c  | 104 +++-
  include/hw/pci-host/pnv_phb4_regs.h |  16 -
  tests/qtest/pnv-phb4-test.c |  10 +++
  3 files changed, 127 insertions(+), 3 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 075499d36d..d2e7403b37 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -1,7 +1,7 @@
  /*
- * QEMU PowerPC PowerNV (POWER9) PHB4 model
+ * QEMU PowerPC PowerNV (POWER10) PHB4 model


You can add an extra line for POWER10/PHB5 but please keep POWER9/PHB4.
POWER8 and POWER9 are still supported.


   *
- * Copyright (c) 2018-2020, IBM Corporation.
+ * Copyright (c) 2018-2024, IBM Corporation.
   *
   * This code is licensed under the GPL version 2 or later. See the
   * COPYING file in the top-level directory.
@@ -22,6 +22,7 @@
  #include "hw/qdev-properties.h"
  #include "qom/object.h"
  #include "trace.h"
+#include "sysemu/reset.h"
  
  #define phb_error(phb, fmt, ...)\

  qemu_log_mask(LOG_GUEST_ERROR, "phb4[%d:%d]: " fmt "\n",\
@@ -499,6 +500,86 @@ static void pnv_phb4_update_xsrc(PnvPHB4 *phb)
  }
  }
  
+/*

+ * Get the PCI-E capability offset from the root-port
+ */
+static uint32_t get_exp_offset(PnvPHB4 *phb)
+{
+PCIHostState *pci = PCI_HOST_BRIDGE(phb->phb_base);
+PCIDevice *pdev;
+pdev = pci_find_device(pci->bus, 0, 0);
+if (!pdev) {
+phb_error(phb, "PCI device not found");
+return ~0;
+}
+PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(pdev);
+return rpc->exp_offset;
+}
+
+#define RC_CONFIG_WRITE(a, v) pnv_phb4_rc_config_write(phb, a, 4, v);


This helper RC_CONFIG_WRITE doesn't look very useful.


+
+static void pnv_phb4_cfg_core_reset(PnvPHB4 *phb)
+{
+/* Zero all registers initially */
+int i;
+for (i = PCI_COMMAND ; i < PHB_RC_CONFIG_SIZE ; i += 4) {
+RC_CONFIG_WRITE(i, 0)
+}
+
+RC_CONFIG_WRITE(PCI_COMMAND,  0x100100);
+RC_CONFIG_WRITE(PCI_CLASS_REVISION,   0x604);
+RC_CONFIG_WRITE(PCI_CACHE_LINE_SIZE,  0x1);
+RC_CONFIG_WRITE(PCI_MEMORY_BASE,  0x10);
+RC_CONFIG_WRITE(PCI_PREF_MEMORY_BASE, 0x10011);
+RC_CONFIG_WRITE(PCI_CAPABILITY_LIST,  0x40);
+RC_CONFIG_WRITE(PCI_INTERRUPT_LINE,   0x2);


Can we use literal defined values instead of numerical ones ? It would
help the reader understand what are the default settings.


+/* PM Capabilities Register */
+RC_CONFIG_WRITE(PCI_BRIDGE_CONTROL + PCI_PM_PMC, 0xC8034801);
+
+uint32_t exp_offset = get_exp_offset(phb);
+RC_CONFIG_WRITE(exp_offset, 0x420010);
+RC_CONFIG_WRITE(exp_offset + PCI_EXP_DEVCAP,  0x8022);
+RC_CONFIG_WRITE(exp_offset + PCI_EXP_DEVCTL,  0x140);
+RC_CONFIG_WRITE(exp_offset + PCI_EXP_LNKCAP,  0x300105);
+RC_CONFIG_WRITE(exp_offset + PCI_EXP_LNKCTL,  0x2010008);
+RC_CONFIG_WRITE(exp_offset + PCI_EXP_SLTCTL,  0x2000);
+RC_CONFIG_WRITE(exp_offset + PCI_EXP_DEVCAP2, 0x1003F);
+RC_CONFIG_WRITE(exp_offset + PCI_EXP_DEVCTL2, 0x20);
+RC_CONFIG_WRITE(exp_offset + PCI_EXP_LNKCAP2, 0x80003E);
+RC_CONFIG_WRITE(exp_offset + PCI_EXP_LNKCTL2, 0x5);
+
+RC_CONFIG_WRITE(PHB_AER_ECAP,0x14810001);
+RC_CONFIG_WRITE(PHB_AER_CAPCTRL, 0xA0);
+RC_CONFIG_WRITE(PHB_SEC_ECAP,0x1A010019);
+
+RC_CONFIG_WRITE(PHB_LMR_ECAP, 0x1E810027);
+/* LMR - Margining Lane Control / Status Register # 2 to 16 */
+for (i = PHB_LMR_CTLSTA_2 ; i <= PHB_LMR_CTLSTA_16 ; i += 4) {
+RC_CONFIG_WRITE(i, 0x9C38);
+}
+
+RC_CONFIG_WRITE(PHB_DLF_ECAP, 0x1F410025);
+RC_CONFIG_WRITE(PHB_DLF_CAP,  0x8001);
+RC_CONFIG_WRITE(P16_ECAP, 0x22410026);
+RC_CONFIG_WRITE(P32_ECAP, 0x1002A);
+RC_CONFIG_WRITE(P32_CAP,  0x103);
+}


The reset of the root complex register values should be done in
pnv_phb_root_port_reset_hold().

A lot of changes were done on the PHB4/5 models 2 or 3 years ago to
prepare libvirt support of the PowerNV machines. User created PHB
devices was added and generic models of the root complex and the PHB
were introduced to facilitate the machine definition from a libvirt
POV.

Livirt support was abandoned but the PHB models didn't change. I think
there are possible cleanups if we deprecate the generic models.


+static void pnv_phb4_pbl_core_reset(PnvPHB4 *phb)
+{
+/* Zero all registers initially */
+int i;
+for (i = PHB_PBL_CONTROL ; i <= PHB_PBL_ERR1_STATUS_MASK ; i += 8) {
+phb->regs[i >> 3] = 0x0;
+}
+
+/* Set specific register values */
+phb->regs[PHB_PBL_CONTROL   >> 3] = 0xC009;
+phb->regs[PHB_PBL_TIMEOUT_CTRL  >> 3] =

Re: [PATCH 01/10] qtest/phb4: Add testbench for PHB4

2024-03-25 Thread Cédric Le Goater


Hello Saif,

On 3/21/24 11:04, Saif Abrar wrote:

New qtest TB added for PHB4.
TB reads PHB Version register and asserts that
bits[24:31] have value 0xA5.

Signed-off-by: Saif Abrar 
---
  tests/qtest/meson.build |  1 +
  tests/qtest/pnv-phb4-test.c | 74 +
  2 files changed, 75 insertions(+)
  create mode 100644 tests/qtest/pnv-phb4-test.c

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 36c5c13a7b..4795e51c17 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -168,6 +168,7 @@ qtests_ppc64 = \
(config_all_devices.has_key('CONFIG_PSERIES') ? ['device-plug-test'] : []) 
+   \
(config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-xscom-test'] : []) +  
   \
(config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-host-i2c-test'] : []) 
+  \
+  (config_all_devices.has_key('CONFIG_POWERNV') ? ['pnv-phb4-test'] : []) +
  \
(config_all_devices.has_key('CONFIG_PSERIES') ? ['rtas-test'] : []) +   
   \
(slirp.found() ? ['pxe-test'] : []) +  \
(config_all_devices.has_key('CONFIG_USB_UHCI') ? ['usb-hcd-uhci-test'] : 
[]) + \
diff --git a/tests/qtest/pnv-phb4-test.c b/tests/qtest/pnv-phb4-test.c
new file mode 100644
index 00..e3b809e9c4
--- /dev/null
+++ b/tests/qtest/pnv-phb4-test.c
@@ -0,0 +1,74 @@
+/*
+ * QTest testcase for PowerNV PHB4
+ *
+ * Copyright (c) 2024, IBM Corporation.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+#include "hw/pci-host/pnv_phb4_regs.h"
+
+#define P10_XSCOM_BASE  0x000603fcull
+#define PHB4_MMIO   0x000600c3c000ull
+#define PHB4_XSCOM  0x8010900ull
+
+#define PPC_BIT(bit)(0x8000ULL >> (bit))
+#define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
+
+static uint64_t pnv_xscom_addr(uint32_t pcba)
+{
+return P10_XSCOM_BASE | ((uint64_t) pcba << 3);
+}
+
+static uint64_t pnv_phb4_xscom_addr(uint32_t reg)
+{
+return pnv_xscom_addr(PHB4_XSCOM + reg);
+}


Please use tests/qtest/pnv-xscom.h instead.


+/*
+ * XSCOM read/write is indirect in PHB4:
+ * Write 'SCOM - HV Indirect Address Register'
+ * with register-offset to read/write.
+   - bit[0]: Valid Bit
+   - bit[51:61]: Indirect Address(00:10)
+ * Read/write 'SCOM - HV Indirect Data Register' to get/set the value.
+ */
+
+static uint64_t pnv_phb4_xscom_read(QTestState *qts, uint32_t reg)
+{
+qtest_writeq(qts, pnv_phb4_xscom_addr(PHB_SCOM_HV_IND_ADDR),
+PPC_BIT(0) | reg);
+return qtest_readq(qts, pnv_phb4_xscom_addr(PHB_SCOM_HV_IND_DATA));
+}



+/* Assert that 'PHB - Version Register Offset 0x0800' bits-[24:31] are 0xA5 */
+static void phb4_version_test(QTestState *qts)
+{
+uint64_t ver = pnv_phb4_xscom_read(qts, PHB_VERSION);
+
+/* PHB Version register [24:31]: Major Revision ID 0xA5 */
+ver = ver >> (63 - 31);
+g_assert_cmpuint(ver, ==, 0xA5);
+}
+
+static void test_phb4(void)
+{
+QTestState *qts = NULL;
+
+qts = qtest_initf("-machine powernv10 -accel tcg -nographic -d unimp");


"-nographic -d unimp" is not needed.


+
+/* Make sure test is running on PHB */
+phb4_version_test(qts);


Please add similar tests for phb[345]. See tests/qtest/pnv-xscom-test.c.

Thanks,

C.



+
+qtest_quit(qts);
+}
+
+int main(int argc, char **argv)
+{
+g_test_init(, , NULL);
+qtest_add_func("phb4", test_phb4);
+return g_test_run();
+}

Re: [PATCH 0/4] hw/nmi: Remove @cpu_index argument

2024-03-22 Thread Cédric Le Goater


On 3/20/24 16:00, Peter Maydell wrote:

On Wed, 20 Mar 2024 at 14:10, Mark Burton  wrote:

I’d broaden this to all ’signals’ (IRQ, Reset etc) - and I guess
similar statements apply, with the “bridge” between the function
and the GPIO mechanism moved closer or further from the originator(s)
of the activity.

The issue isn’t my “machine” model, rather the compose-ability of
(any) such machine.  A-priori, a model writer doesn’t know if they
should respond directly to an NMI or not - Hence they dont know if
they should implement the TYPE_NMI or not. That’s a decision only
the machine composer knows.
My suggestion would be to use a GPIO interface to models, which can
then be appropriately wired. (And, hence, to have a single place
that implements the TYPE_NMI interface and provides the GPIO wire
ready for wiring to appropriate devices).


I feel like that's a long way in the future, but my back-of-the-envelope
design sketch of that is that the TYPE_MACHINE class that's implementing
the "I am just a container for all the devices that the user has
specified and wired together" machine would itself implement TYPE_NMI and
when an NMI came in it would assert a GPIO line that the user could
wire up, or not wire up, as they chose.

Right now we can't do that though, because, among other reasons,
TYPE_MACHINE isn't a TYPE_DEVICE. (I do want to fix that, though:
I'm hoping it won't be too difficult.)


Oh that's interesting. Will that introduce an extra level of container
with multiple machines below ?

/qemu
  /machine[0]
...
/peripheral (container)
/peripheral-anon (container)
  /machine[1]
...
/peripheral (container)
/peripheral-anon (container)
  /unattached (container)
...
/sysbus (System)
/system[0] (memory-region)

Thanks,

C.

Re: [PATCH] misc/pca9554: Fix check of pin range value in property accessors

2024-03-22 Thread Cédric Le Goater


On 3/21/24 18:15, Philippe Mathieu-Daudé wrote:

On 21/3/24 17:01, Cédric Le Goater wrote:

Coverity detected an "Integer handling" issue with the pin value :

   In expression "state >> pin", right shifting "state" by more than 7
   bits always yields zero.  The shift amount, "pin", is as much as 8.

In practice, this should not happen because the properties "pin8" and
above are not created. Nevertheless, fix the range to avoid this warning.

Fixes: CID 1534917
Fixes: de0c7d543bca ("misc: Add a pca9554 GPIO device model")
Cc: Glenn Miles 
Signed-off-by: Cédric Le Goater 
---
  hw/misc/pca9554.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)


Isn't it the one Peter fixed in
https://lore.kernel.org/qemu-devel/20240312183810.557768-5-peter.mayd...@linaro.org/?


Oh yes. I missed it. Hopefully, they are similar. Let's keep Peter's.

However, what I would like to do as a follow-up is to move the
hw/misc/pca955* models under hw/gpio/. Is it something we can do
for 9.0 ?

Thanks,

C.

Re: [PATCH] misc/pca9554: Fix check of pin range value in property accessors

2024-03-21 Thread Cédric Le Goater


On 3/21/24 17:08, Miles Glenn wrote:

On Thu, 2024-03-21 at 17:01 +0100, Cédric Le Goater wrote:

Coverity detected an "Integer handling" issue with the pin value :

   In expression "state >> pin", right shifting "state" by more than 7
   bits always yields zero.  The shift amount, "pin", is as much as 8.

In practice, this should not happen because the properties "pin8" and
above are not created. Nevertheless, fix the range to avoid this
warning.

Fixes: CID 1534917
Fixes: de0c7d543bca ("misc: Add a pca9554 GPIO device model")
Cc: Glenn Miles 
Signed-off-by: Cédric Le Goater 
---
  hw/misc/pca9554.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/misc/pca9554.c b/hw/misc/pca9554.c
index
778b32e4430a8b618322c26b1b185ed3ead97cc4..5e31696797d9564666ece6fe177
37ee2a9733e96 100644
--- a/hw/misc/pca9554.c
+++ b/hw/misc/pca9554.c
@@ -160,7 +160,7 @@ static void pca9554_get_pin(Object *obj, Visitor
*v, const char *name,
  error_setg(errp, "%s: error reading %s", __func__, name);
  return;
  }
-if (pin < 0 || pin > PCA9554_PIN_COUNT) {
+if (pin < 0 || pin >= PCA9554_PIN_COUNT) {
  error_setg(errp, "%s invalid pin %s", __func__, name);
  return;
  }
@@ -187,7 +187,7 @@ static void pca9554_set_pin(Object *obj, Visitor
*v, const char *name,
  error_setg(errp, "%s: error reading %s", __func__, name);
  return;
  }
-if (pin < 0 || pin > PCA9554_PIN_COUNT) {
+if (pin < 0 || pin >= PCA9554_PIN_COUNT) {
  error_setg(errp, "%s invalid pin %s", __func__, name);
  return;
  }


Thanks, Cédric!  I guess I should be running coverity myself.


I don't myself. I get reports from :

  https://scan.coverity.com/projects/qemu

Thanks,

C.

[PATCH] misc/pca9554: Fix check of pin range value in property accessors

2024-03-21 Thread Cédric Le Goater

Coverity detected an "Integer handling" issue with the pin value :

  In expression "state >> pin", right shifting "state" by more than 7
  bits always yields zero.  The shift amount, "pin", is as much as 8.

In practice, this should not happen because the properties "pin8" and
above are not created. Nevertheless, fix the range to avoid this warning.

Fixes: CID 1534917
Fixes: de0c7d543bca ("misc: Add a pca9554 GPIO device model")
Cc: Glenn Miles 
Signed-off-by: Cédric Le Goater 
---
 hw/misc/pca9554.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/misc/pca9554.c b/hw/misc/pca9554.c
index 
778b32e4430a8b618322c26b1b185ed3ead97cc4..5e31696797d9564666ece6fe17737ee2a9733e96
 100644
--- a/hw/misc/pca9554.c
+++ b/hw/misc/pca9554.c
@@ -160,7 +160,7 @@ static void pca9554_get_pin(Object *obj, Visitor *v, const 
char *name,
 error_setg(errp, "%s: error reading %s", __func__, name);
 return;
 }
-if (pin < 0 || pin > PCA9554_PIN_COUNT) {
+if (pin < 0 || pin >= PCA9554_PIN_COUNT) {
 error_setg(errp, "%s invalid pin %s", __func__, name);
 return;
 }
@@ -187,7 +187,7 @@ static void pca9554_set_pin(Object *obj, Visitor *v, const 
char *name,
 error_setg(errp, "%s: error reading %s", __func__, name);
 return;
 }
-if (pin < 0 || pin > PCA9554_PIN_COUNT) {
+if (pin < 0 || pin >= PCA9554_PIN_COUNT) {
 error_setg(errp, "%s invalid pin %s", __func__, name);
 return;
 }
-- 
2.44.0

Re: [PATCH for-9.1 v5 09/14] memory: Add Error** argument to .log_global_start() handler

2024-03-20 Thread Cédric Le Goater


On 3/20/24 15:42, Peter Xu wrote:

On Wed, Mar 20, 2024 at 07:49:05AM +0100, Cédric Le Goater wrote:

Modify all .log_global_start() handlers to take an Error** parameter
and return a bool. Adapt memory_global_dirty_log_start() to interrupt
on the first error the loop on handlers. In such case, a rollback is
performed to stop dirty logging on all listeners where it was
previously enabled.

Cc: Stefano Stabellini 
Cc: Anthony Perard 
Cc: Paul Durrant 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Cc: David Hildenbrand 
Signed-off-by: Cédric Le Goater 


Reviewed-by: Peter Xu 

Still one comment below:


@@ -3014,8 +3044,11 @@ static void listener_add_address_space(MemoryListener 
*listener,
  listener->begin(listener);
  }
  if (global_dirty_tracking) {
+/*
+ * Migration has already started. Assert on any error.


If you won't mind, I can change this to:

   /*
* Currently only VFIO can fail log_global_start(), and it's not allowed
* to hotplug a VFIO device during migration, so this should never fail
* when invoked.  If it can start to fail in the future, we need to be
* able to fail the whole listener_add_address_space() and its callers.
*/


Sure, or I will in a v6. Markus had a comment on 8/14.

Thanks,

C.

Re: [PATCH for-9.1 v5 08/14] migration: Add Error** argument to .load_setup() handler

2024-03-20 Thread Cédric Le Goater


On 3/20/24 09:02, Markus Armbruster wrote:

Cédric Le Goater  writes:


This will be useful to report errors at a higher level, mostly in VFIO
today.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Peter Xu 
Signed-off-by: Cédric Le Goater 
---


[...]


diff --git a/migration/savevm.c b/migration/savevm.c
index 
535ad5a32d67057dd172ce25d561a66a07172e97..8f42999a15d1685957de9ed517d6bc9ba49c3f11
 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2747,8 +2747,9 @@ static void 
qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis)
  trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num);
  }
  
-static int qemu_loadvm_state_setup(QEMUFile *f)

+static int qemu_loadvm_state_setup(QEMUFile *f, Error **errp)
  {
+ERRP_GUARD(); /* error_prepend use */


The comment is correct, but prone to go stale.  No other use of
ERRP_GUARD() is commented.  Suggest to drop it.


OK. I found it interesting, for me at least. Will drop in the VFIO
patches also.

Thanks,

C.





  SaveStateEntry *se;
  int ret;
  
@@ -2763,10 +2764,11 @@ static int qemu_loadvm_state_setup(QEMUFile *f)

  }
  }
  
-ret = se->ops->load_setup(f, se->opaque);

+ret = se->ops->load_setup(f, se->opaque, errp);
  if (ret < 0) {
+error_prepend(errp, "Load state of device %s failed: ",
+  se->idstr);
  qemu_file_set_error(f, ret);
-error_report("Load state of device %s failed", se->idstr);
  return ret;
  }
  }
@@ -2947,7 +2949,8 @@ int qemu_loadvm_state(QEMUFile *f)
  return ret;
  }
  
-if (qemu_loadvm_state_setup(f) != 0) {

+if (qemu_loadvm_state_setup(f, _err) != 0) {
+error_report_err(local_err);
  return -EINVAL;
  }

[PATCH for-9.1 v5 11/14] memory: Add Error** argument to the global_dirty_log routines

2024-03-20 Thread Cédric Le Goater

Now that the log_global*() handlers take an Error** parameter and
return a bool, do the same for memory_global_dirty_log_start() and
memory_global_dirty_log_stop(). The error is reported in the callers
for now and it will be propagated in the call stack in the next
changes.

To be noted a functional change in ram_init_bitmaps(), if the dirty
pages logger fails to start, there is no need to synchronize the dirty
pages bitmaps. colo_incoming_start_dirty_log() could be modified in a
similar way.

Cc: Stefano Stabellini 
Cc: Anthony Perard 
Cc: Paul Durrant 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Cc: David Hildenbrand 
Cc: Hyman Huang 
Signed-off-by: Cédric Le Goater 
---

 Changes in v5:

 - Removed Yong Huang's R-b 
 - Made use of ram_bitmaps_destroy() in ram_init_bitmaps() to cleanup
   allocated bitmaps
 
 include/exec/memory.h |  5 -
 hw/i386/xen/xen-hvm.c |  2 +-
 migration/dirtyrate.c | 13 +++--
 migration/ram.c   | 23 +--
 system/memory.c   | 11 +--
 5 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 
567bc4c9fdb53e8f63487f1400980275687d..c129ee6db7162504bd72d4cfc69b5affb2cd87e8
 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -2570,8 +2570,11 @@ void memory_listener_unregister(MemoryListener 
*listener);
  * memory_global_dirty_log_start: begin dirty logging for all regions
  *
  * @flags: purpose of starting dirty log, migration or dirty rate
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Return: true on success, else false setting @errp with error.
  */
-void memory_global_dirty_log_start(unsigned int flags);
+bool memory_global_dirty_log_start(unsigned int flags, Error **errp);
 
 /**
  * memory_global_dirty_log_stop: end dirty logging for all regions
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index 
f6e9a1bc86491783077b5cb5aafdb19ab294e392..006d219ad52d739cc406ad5f8082ca82c16c61cc
 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -669,7 +669,7 @@ void xen_hvm_modified_memory(ram_addr_t start, ram_addr_t 
length)
 void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
 {
 if (enable) {
-memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
+memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp);
 } else {
 memory_global_dirty_log_stop(GLOBAL_DIRTY_MIGRATION);
 }
diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
index 
1d2e85746fb7b10eb7f149976970f9a92125af8a..d02d70b7b4b86a29d4d5540ded416543536d8f98
 100644
--- a/migration/dirtyrate.c
+++ b/migration/dirtyrate.c
@@ -90,9 +90,15 @@ static int64_t do_calculate_dirtyrate(DirtyPageRecord 
dirty_pages,
 
 void global_dirty_log_change(unsigned int flag, bool start)
 {
+Error *local_err = NULL;
+bool ret;
+
 bql_lock();
 if (start) {
-memory_global_dirty_log_start(flag);
+ret = memory_global_dirty_log_start(flag, _err);
+if (!ret) {
+error_report_err(local_err);
+}
 } else {
 memory_global_dirty_log_stop(flag);
 }
@@ -608,9 +614,12 @@ static void calculate_dirtyrate_dirty_bitmap(struct 
DirtyRateConfig config)
 {
 int64_t start_time;
 DirtyPageRecord dirty_pages;
+Error *local_err = NULL;
 
 bql_lock();
-memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
+if (!memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE, _err)) {
+error_report_err(local_err);
+}
 
 /*
  * 1'round of log sync may return all 1 bits with
diff --git a/migration/ram.c b/migration/ram.c
index 
f0bd71438a4f7212118593b51648b645737933d4..bade3e9281ae839578033524b800dcf3c6f486dc
 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2862,18 +2862,32 @@ static void 
migration_bitmap_clear_discarded_pages(RAMState *rs)
 
 static void ram_init_bitmaps(RAMState *rs)
 {
+Error *local_err = NULL;
+bool ret = true;
+
 qemu_mutex_lock_ramlist();
 
 WITH_RCU_READ_LOCK_GUARD() {
 ram_list_init_bitmaps();
 /* We don't use dirty log with background snapshots */
 if (!migrate_background_snapshot()) {
-memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION);
+ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
+_err);
+if (!ret) {
+error_report_err(local_err);
+goto out_unlock;
+}
 migration_bitmap_sync_precopy(rs, false);
 }
 }
+out_unlock:
 qemu_mutex_unlock_ramlist();
 
+if (!ret) {
+ram_bitmaps_destroy();
+return;
+}
+
 /*
  * After an eventual first bitmap sync, fixup the initial bitmap
  * containing all 1s to exclude any discarded pages from migration.
@@ -3665,6 +3679,8 @@ int colo_init_ram_cache(void)
 void colo_incoming_start_dirty_log(void)
 {
 RAMBlock *block = NULL;

[PATCH for-9.1 v5 07/14] migration: Add Error** argument to .save_setup() handler

2024-03-20 Thread Cédric Le Goater

The purpose is to record a potential error in the migration stream if
qemu_savevm_state_setup() fails. Most of the current .save_setup()
handlers can be modified to use the Error argument instead of managing
their own and calling locally error_report().

Cc: Nicholas Piggin 
Cc: Harsh Prateek Bora 
Cc: Halil Pasic 
Cc: Thomas Huth 
Cc: Eric Blake 
Cc: Vladimir Sementsov-Ogievskiy 
Cc: John Snow 
Cc: Stefan Hajnoczi 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Peter Xu 
Reviewed-by: Thomas Huth 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Cédric Le Goater 
---
 include/migration/register.h   |  3 ++-
 hw/ppc/spapr.c |  2 +-
 hw/s390x/s390-stattrib.c   |  6 ++
 hw/vfio/migration.c| 17 -
 migration/block-dirty-bitmap.c |  4 +++-
 migration/block.c  | 13 -
 migration/ram.c| 15 ---
 migration/savevm.c |  4 +---
 8 files changed, 29 insertions(+), 35 deletions(-)

diff --git a/include/migration/register.h b/include/migration/register.h
index 
d7b70a8be68c9df47c7843bda7d430989d7ca384..64fc7c11036c82edd6d69513e56a0216d36c17aa
 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -60,10 +60,11 @@ typedef struct SaveVMHandlers {
  *
  * @f: QEMUFile where to send the data
  * @opaque: data pointer passed to register_savevm_live()
+ * @errp: pointer to Error*, to store an error if it happens.
  *
  * Returns zero to indicate success and negative for error
  */
-int (*save_setup)(QEMUFile *f, void *opaque);
+int (*save_setup)(QEMUFile *f, void *opaque, Error **errp);
 
 /**
  * @save_cleanup
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 
c417f9dd523547eabf6d66a8f505093758e80461..144a3f2b604872e09268b509b9b79ee5b2226136
 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2171,7 +2171,7 @@ static const VMStateDescription vmstate_spapr = {
 }
 };
 
-static int htab_save_setup(QEMUFile *f, void *opaque)
+static int htab_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 SpaprMachineState *spapr = opaque;
 
diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c
index 
b743e8a2fee84c7374460ccea6df1cf447cda44b..bc04187b2b69226db80219da1a964a87428adc0c
 100644
--- a/hw/s390x/s390-stattrib.c
+++ b/hw/s390x/s390-stattrib.c
@@ -168,19 +168,17 @@ static int cmma_load(QEMUFile *f, void *opaque, int 
version_id)
 return ret;
 }
 
-static int cmma_save_setup(QEMUFile *f, void *opaque)
+static int cmma_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 S390StAttribState *sas = S390_STATTRIB(opaque);
 S390StAttribClass *sac = S390_STATTRIB_GET_CLASS(sas);
-Error *local_err = NULL;
 int res;
 /*
  * Signal that we want to start a migration, thus needing PGSTE dirty
  * tracking.
  */
-res = sac->set_migrationmode(sas, true, _err);
+res = sac->set_migrationmode(sas, true, errp);
 if (res) {
-error_report_err(local_err);
 return res;
 }
 qemu_put_be64(f, STATTR_FLAG_EOS);
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 
bf5a29ddc15b0dbc7ae9c44f289539dd0cdddb0d..5763c0b68376b1e24ef3e77c3d19fcd406922c79
 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -376,7 +376,7 @@ static int vfio_save_prepare(void *opaque, Error **errp)
 return 0;
 }
 
-static int vfio_save_setup(QEMUFile *f, void *opaque)
+static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 VFIODevice *vbasedev = opaque;
 VFIOMigration *migration = vbasedev->migration;
@@ -390,8 +390,8 @@ static int vfio_save_setup(QEMUFile *f, void *opaque)
   stop_copy_size);
 migration->data_buffer = g_try_malloc0(migration->data_buffer_size);
 if (!migration->data_buffer) {
-error_report("%s: Failed to allocate migration data buffer",
- vbasedev->name);
+error_setg(errp, "%s: Failed to allocate migration data buffer",
+   vbasedev->name);
 return -ENOMEM;
 }
 
@@ -401,8 +401,8 @@ static int vfio_save_setup(QEMUFile *f, void *opaque)
 ret = vfio_migration_set_state(vbasedev, 
VFIO_DEVICE_STATE_PRE_COPY,
VFIO_DEVICE_STATE_RUNNING);
 if (ret) {
-error_report("%s: Failed to set new PRE_COPY state",
- vbasedev->name);
+error_setg(errp, "%s: Failed to set new PRE_COPY state",
+   vbasedev->name);
 return ret;
 }
 
@@ -413,8 +413,8 @@ static int vfio_save_setup(QEMUFile *f, void *opaque)
 /* vfio_save_complete_precopy() will go to STOP_COPY */
 break;
 default:
-error_report("%s: Invalid device state %d", vbasedev->name,
-

[PATCH for-9.1 v5 06/14] migration: Add Error** argument to qemu_savevm_state_setup()

2024-03-20 Thread Cédric Le Goater

This prepares ground for the changes coming next which add an Error**
argument to the .save_setup() handler. Callers of qemu_savevm_state_setup()
now handle the error and fail earlier setting the migration state from
MIGRATION_STATUS_SETUP to MIGRATION_STATUS_FAILED.

In qemu_savevm_state(), move the cleanup to preserve the error
reported by .save_setup() handlers.

Since the previous behavior was to ignore errors at this step of
migration, this change should be examined closely to check that
cleanups are still correctly done.

Signed-off-by: Cédric Le Goater 
---

 Changes in v5:
 
 - Removed Fabiano's R-b because of changes 
 - Handled qemu_savevm_state_setup() failures after waiting for
   virtio-net-failover devices to unplug.
   
 migration/savevm.h|  2 +-
 migration/migration.c | 33 +++--
 migration/savevm.c| 26 +++---
 3 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/migration/savevm.h b/migration/savevm.h
index 
74669733dd63a080b765866c703234a5c4939223..9ec96a995c93a42aad621595f0ed58596c532328
 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -32,7 +32,7 @@
 bool qemu_savevm_state_blocked(Error **errp);
 void qemu_savevm_non_migratable_list(strList **reasons);
 int qemu_savevm_state_prepare(Error **errp);
-void qemu_savevm_state_setup(QEMUFile *f);
+int qemu_savevm_state_setup(QEMUFile *f, Error **errp);
 bool qemu_savevm_state_guest_unplug_pending(void);
 int qemu_savevm_state_resume_prepare(MigrationState *s);
 void qemu_savevm_state_header(QEMUFile *f);
diff --git a/migration/migration.c b/migration/migration.c
index 
f60bd371e3f896a74df8be4282a15b4280eba732..cd6b6120e31798de9361d02ee43d89989c8d30ce
 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -3427,6 +3427,8 @@ static void *migration_thread(void *opaque)
 int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
 MigThrError thr_error;
 bool urgent = false;
+Error *local_err = NULL;
+int ret;
 
 thread = migration_threads_add("live_migration", qemu_get_thread_id());
 
@@ -3470,12 +3472,24 @@ static void *migration_thread(void *opaque)
 }
 
 bql_lock();
-qemu_savevm_state_setup(s->to_dst_file);
+ret = qemu_savevm_state_setup(s->to_dst_file, _err);
 bql_unlock();
 
 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_ACTIVE);
 
+/*
+ * Handle SETUP failures after waiting for virtio-net-failover
+ * devices to unplug. This to preserve migration state transitions.
+ */
+if (ret) {
+migrate_set_error(s, local_err);
+error_free(local_err);
+migrate_set_state(>state, MIGRATION_STATUS_ACTIVE,
+  MIGRATION_STATUS_FAILED);
+goto out;
+}
+
 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
 
 trace_migration_thread_setup_complete();
@@ -3549,6 +3563,8 @@ static void *bg_migration_thread(void *opaque)
 MigThrError thr_error;
 QEMUFile *fb;
 bool early_fail = true;
+Error *local_err = NULL;
+int ret;
 
 rcu_register_thread();
 object_ref(OBJECT(s));
@@ -3582,12 +3598,24 @@ static void *bg_migration_thread(void *opaque)
 
 bql_lock();
 qemu_savevm_state_header(s->to_dst_file);
-qemu_savevm_state_setup(s->to_dst_file);
+ret = qemu_savevm_state_setup(s->to_dst_file, _err);
 bql_unlock();
 
 qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_ACTIVE);
 
+/*
+ * Handle SETUP failures after waiting for virtio-net-failover
+ * devices to unplug. This to preserve migration state transitions.
+ */
+if (ret) {
+migrate_set_error(s, local_err);
+error_free(local_err);
+migrate_set_state(>state, MIGRATION_STATUS_ACTIVE,
+  MIGRATION_STATUS_FAILED);
+goto fail_setup;
+}
+
 s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
 
 trace_migration_thread_setup_complete();
@@ -3656,6 +3684,7 @@ fail:
 bql_unlock();
 }
 
+fail_setup:
 bg_migration_iteration_finish(s);
 
 qemu_fclose(fb);
diff --git a/migration/savevm.c b/migration/savevm.c
index 
1a7b5cb78a912c36ae16db703afc90ef2906b61f..0eb94e61f888adba2c0732c2cb701b110814c455
 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1310,11 +1310,11 @@ int qemu_savevm_state_prepare(Error **errp)
 return 0;
 }
 
-void qemu_savevm_state_setup(QEMUFile *f)
+int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
 {
+ERRP_GUARD();
 MigrationState *ms = migrate_get_current();
 SaveStateEntry *se;
-Error *local_err = NULL;
 int ret = 0;
 
 json_writer_int64(ms->vmdesc, "page_size", qemu_target_page_size());
@@ -1323,10 +1323,9 @@ void qemu_savevm_state_setup(QEMUFile *f)
 trace_savevm_state_setup();
 QTAILQ_FOREACH(se, _st

[PATCH for-9.1 v5 00/14] migration: Improve error reporting

2024-03-20 Thread Cédric Le Goater

Hello,

The motivation behind these changes is to improve error reporting to
the upper management layer (libvirt) with a more detailed error, this
to let it decide, depending on the reported error, whether to try
migration again later. It would be useful in cases where migration
fails due to lack of HW resources on the host. For instance, some
adapters can only initiate a limited number of simultaneous dirty
tracking requests and this imposes a limit on the the number of VMs
that can be migrated simultaneously.

We are not quite ready for such a mechanism but what we can do first is
to cleanup the error reporting in the early save_setup sequence. This
is what the following changes propose, by adding an Error** argument to
various handlers and propagating it to the core migration subsystem.


Patchset is organized as follow :

* [1-4] are prerequisite changes in other components related to the
  migration save_setup() handler. They make sure a failure is not
  returned without setting an error.
  
  s390/stattrib: Add Error** argument to set_migrationmode() handler
  vfio: Always report an error in vfio_save_setup()
  migration: Always report an error in block_save_setup()
  migration: Always report an error in ram_save_setup()
  migration: Add Error** argument to vmstate_save()

* [5-14] are the core changes in migration and memory components to
  propagate an error reported in a save_setup() handler.

  migration: Add Error** argument to qemu_savevm_state_setup()
  migration: Add Error** argument to .save_setup() handler
  migration: Add Error** argument to .load_setup() handler
  memory: Add Error** argument to .log_global_start() handler
  migration: Introduce ram_bitmaps_destroy()
  memory: Add Error** argument to the global_dirty_log routines
  migration: Add Error** argument to ram_state_init()
  migration: Add Error** argument to xbzrle_init()
  migration: Modify ram_init_bitmaps() to report dirty tracking errors

The VFIO changes depend on the above. They are simpler and have been
reviewed already. I kept them for another series.

Thanks,

C.

Changes in v5:
 
 - Rebased on 2e128776dc56 ("migration: Skip only empty block devices")
 - Removed Fabiano's R-b because of changes 
 - Handled qemu_savevm_state_setup() failures after waiting for
   virtio-net-failover devices to unplug.
 - Removed memory_global_dirty_log_rollback()
 - Introduced memory_global_dirty_log_do_start() to call
   .log_global_start() handlers and do the rollback in case of error.
 - Kept modification of the global_dirty_tracking flag within
   memory_global_dirty_log_start()  
 - Added an assert on error of a .log_global_start() handler in
   listener_add_address_space()
 - Removed Yong Huang's R-b
 - Introduced ram_bitmaps_destroy()
 - Added Error** argument to ram_state_init() and xbzrle_init()
 - Made use of ram_bitmaps_destroy() in ram_init_bitmaps() to cleanup
   allocated bitmaps
 - Took into account changes of ram_state_init() and xbzrle_init() to
   propagate the error.
 - Reduced series to migration. VFIO can come later. 

Changes in v4:

 - Fixed frenchism futur to future
 - Fixed typo in set_migrationmode() handler
 - Added error_free() in hmp_migrationmode()
 - Fixed state name printed out in error returned by vfio_save_setup()
 - Fixed test on error returned by qemu_file_get_error()
 - Added an error when bdrv_nb_sectors() returns a negative value 
 - Dropped log_global_stop() and log_global_sync() changes
 - Dropped MEMORY_LISTENER_CALL_LOG_GLOBAL
 - Modified memory_global_dirty_log_start() to loop on the list of
   listeners and handle errors directly.
 - Introduced memory_global_dirty_log_rollback() to revert operations
   previously done

Changes in v3:

 - New changes to make sure an error is always set in case of failure.
   This is the reason behind the 5/6 extra patches. (Markus)
 - Documentation fixup (Peter + Avihai)
 - Set migration state to MIGRATION_STATUS_FAILED always
 - Fixed error handling in bg_migration_thread() (Peter)
 - Fixed return value of vfio_listener_log_global_start/stop(). 
   Went unnoticed because value is not tested. (Peter)
 - Add ERRP_GUARD() when error_prepend is used 
 - Use error_setg_errno() when possible

Changes in v2:

- Removed v1 patches addressing the return-path thread termination as
  they are now superseded by :  
  https://lore.kernel.org/qemu-devel/20240226203122.22894-1-faro...@suse.de/
- Documentation updates of handlers
- Removed call to PRECOPY_NOTIFY_SETUP notifiers in case of errors
- Modified routines taking an Error** argument to return a bool when
  possible and made adjustments in callers.
- new MEMORY_LISTENER_CALL_LOG_GLOBAL macro for .log_global*()
  handlers
- Handled SETUP state when migration terminates
- Modified memory_get_xlat_addr() to take an Error** argument
- Various refinements on error handling

Cédric Le Goater (14):
  s390/stattrib: Add Error** argument to set_migrationmode() handler
  vfio: Always report an error in vfio_

[PATCH for-9.1 v5 03/14] migration: Always report an error in block_save_setup()

2024-03-20 Thread Cédric Le Goater

This will prepare ground for future changes adding an Error** argument
to the save_setup() handler. We need to make sure that on failure,
block_save_setup() always sets a new error.

Cc: Stefan Hajnoczi 
Reviewed-by: Fabiano Rosas 
Signed-off-by: Cédric Le Goater 
---

 Changes in v5:

 - Rebased on 2e128776dc56 ("migration: Skip only empty block devices")

 migration/block.c | 20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/migration/block.c b/migration/block.c
index 
2b9054889ad2ba739828594c50cf047703757e96..f8a11beb37dac3df5c2cc654db6440509d1181ea
 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -367,7 +367,7 @@ static void unset_dirty_tracking(void)
 }
 }
 
-static int init_blk_migration(QEMUFile *f)
+static int init_blk_migration(QEMUFile *f, Error **errp)
 {
 BlockDriverState *bs;
 BlkMigDevState *bmds;
@@ -378,7 +378,6 @@ static int init_blk_migration(QEMUFile *f)
 BlkMigDevState *bmds;
 BlockDriverState *bs;
 } *bmds_bs;
-Error *local_err = NULL;
 int ret;
 
 GRAPH_RDLOCK_GUARD_MAINLOOP();
@@ -406,6 +405,8 @@ static int init_blk_migration(QEMUFile *f)
 continue;
 }
 if (sectors < 0) {
+error_setg(errp, "Error getting length of block device %s",
+   bdrv_get_device_name(bs));
 ret = sectors;
 bdrv_next_cleanup();
 goto out;
@@ -442,9 +443,8 @@ static int init_blk_migration(QEMUFile *f)
 bs = bmds_bs[i].bs;
 
 if (bmds) {
-ret = blk_insert_bs(bmds->blk, bs, _err);
+ret = blk_insert_bs(bmds->blk, bs, errp);
 if (ret < 0) {
-error_report_err(local_err);
 goto out;
 }
 
@@ -714,6 +714,7 @@ static void block_migration_cleanup(void *opaque)
 static int block_save_setup(QEMUFile *f, void *opaque)
 {
 int ret;
+Error *local_err = NULL;
 
 trace_migration_block_save("setup", block_mig_state.submitted,
block_mig_state.transferred);
@@ -721,18 +722,27 @@ static int block_save_setup(QEMUFile *f, void *opaque)
 warn_report("block migration is deprecated;"
 " use blockdev-mirror with NBD instead");
 
-ret = init_blk_migration(f);
+ret = init_blk_migration(f, _err);
 if (ret < 0) {
+error_report_err(local_err);
 return ret;
 }
 
 /* start track dirty blocks */
 ret = set_dirty_tracking();
 if (ret) {
+error_setg_errno(_err, -ret,
+ "Failed to start block dirty tracking");
+error_report_err(local_err);
 return ret;
 }
 
 ret = flush_blks(f);
+if (ret) {
+error_setg_errno(_err, -ret, "Flushing block failed");
+error_report_err(local_err);
+return ret;
+}
 blk_mig_reset_dirty_cursor();
 qemu_put_be64(f, BLK_MIG_FLAG_EOS);
 
-- 
2.44.0

[PATCH for-9.1 v5 10/14] migration: Introduce ram_bitmaps_destroy()

2024-03-20 Thread Cédric Le Goater

We will use it in ram_init_bitmaps() to clear the allocated bitmaps when
support for error reporting is added to memory_global_dirty_log_start().

Signed-off-by: Cédric Le Goater 
---
 migration/ram.c | 22 +++---
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 
4cd4f0158c8675e1515ef8476c64d1203eed4458..f0bd71438a4f7212118593b51648b645737933d4
 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2438,10 +2438,23 @@ static void xbzrle_cleanup(void)
 XBZRLE_cache_unlock();
 }
 
+static void ram_bitmaps_destroy(void)
+{
+RAMBlock *block;
+
+RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+g_free(block->clear_bmap);
+block->clear_bmap = NULL;
+g_free(block->bmap);
+block->bmap = NULL;
+g_free(block->file_bmap);
+block->file_bmap = NULL;
+}
+}
+
 static void ram_save_cleanup(void *opaque)
 {
 RAMState **rsp = opaque;
-RAMBlock *block;
 
 /* We don't use dirty log with background snapshots */
 if (!migrate_background_snapshot()) {
@@ -2458,12 +2471,7 @@ static void ram_save_cleanup(void *opaque)
 }
 }
 
-RAMBLOCK_FOREACH_NOT_IGNORED(block) {
-g_free(block->clear_bmap);
-block->clear_bmap = NULL;
-g_free(block->bmap);
-block->bmap = NULL;
-}
+ram_bitmaps_destroy();
 
 xbzrle_cleanup();
 compress_threads_save_cleanup();
-- 
2.44.0

[PATCH for-9.1 v5 08/14] migration: Add Error** argument to .load_setup() handler

2024-03-20 Thread Cédric Le Goater

This will be useful to report errors at a higher level, mostly in VFIO
today.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Peter Xu 
Signed-off-by: Cédric Le Goater 
---
 include/migration/register.h |  3 ++-
 hw/vfio/migration.c  |  9 +++--
 migration/ram.c  |  3 ++-
 migration/savevm.c   | 11 +++
 4 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/include/migration/register.h b/include/migration/register.h
index 
64fc7c11036c82edd6d69513e56a0216d36c17aa..f60e797894e5faacdf55d2d6de175074ac58944f
 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -234,10 +234,11 @@ typedef struct SaveVMHandlers {
  *
  * @f: QEMUFile where to receive the data
  * @opaque: data pointer passed to register_savevm_live()
+ * @errp: pointer to Error*, to store an error if it happens.
  *
  * Returns zero to indicate success and negative for error
  */
-int (*load_setup)(QEMUFile *f, void *opaque);
+int (*load_setup)(QEMUFile *f, void *opaque, Error **errp);
 
 /**
  * @load_cleanup
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 
5763c0b68376b1e24ef3e77c3d19fcd406922c79..06ae40969b6c19037e190008e14f28be646278cd
 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -588,12 +588,17 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
 }
 }
 
-static int vfio_load_setup(QEMUFile *f, void *opaque)
+static int vfio_load_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 VFIODevice *vbasedev = opaque;
+int ret;
 
-return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
+ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
vbasedev->migration->device_state);
+if (ret) {
+error_setg(errp, "%s: Failed to set RESUMING state", vbasedev->name);
+}
+return ret;
 }
 
 static int vfio_load_cleanup(void *opaque)
diff --git a/migration/ram.c b/migration/ram.c
index 
6ea5a06e00e30d0d1e4d8a6defdeb86c81fa707b..4cd4f0158c8675e1515ef8476c64d1203eed4458
 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3704,8 +3704,9 @@ void colo_release_ram_cache(void)
  *
  * @f: QEMUFile where to receive the data
  * @opaque: RAMState pointer
+ * @errp: pointer to Error*, to store an error if it happens.
  */
-static int ram_load_setup(QEMUFile *f, void *opaque)
+static int ram_load_setup(QEMUFile *f, void *opaque, Error **errp)
 {
 xbzrle_load_setup();
 ramblock_recv_map_init();
diff --git a/migration/savevm.c b/migration/savevm.c
index 
535ad5a32d67057dd172ce25d561a66a07172e97..8f42999a15d1685957de9ed517d6bc9ba49c3f11
 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2747,8 +2747,9 @@ static void 
qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis)
 trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num);
 }
 
-static int qemu_loadvm_state_setup(QEMUFile *f)
+static int qemu_loadvm_state_setup(QEMUFile *f, Error **errp)
 {
+ERRP_GUARD(); /* error_prepend use */
 SaveStateEntry *se;
 int ret;
 
@@ -2763,10 +2764,11 @@ static int qemu_loadvm_state_setup(QEMUFile *f)
 }
 }
 
-ret = se->ops->load_setup(f, se->opaque);
+ret = se->ops->load_setup(f, se->opaque, errp);
 if (ret < 0) {
+error_prepend(errp, "Load state of device %s failed: ",
+  se->idstr);
 qemu_file_set_error(f, ret);
-error_report("Load state of device %s failed", se->idstr);
 return ret;
 }
 }
@@ -2947,7 +2949,8 @@ int qemu_loadvm_state(QEMUFile *f)
 return ret;
 }
 
-if (qemu_loadvm_state_setup(f) != 0) {
+if (qemu_loadvm_state_setup(f, _err) != 0) {
+error_report_err(local_err);
 return -EINVAL;
 }
 
-- 
2.44.0

[PATCH for-9.1 v5 14/14] migration: Modify ram_init_bitmaps() to report dirty tracking errors

2024-03-20 Thread Cédric Le Goater

The .save_setup() handler has now an Error** argument that we can use
to propagate errors reported by the .log_global_start() handler. Do
that for the RAM. The caller qemu_savevm_state_setup() will store the
error under the migration stream for later detection in the migration
sequence.

Signed-off-by: Cédric Le Goater 
---

 Changes in v5:

 - Took into account changes of ram_state_init() and xbzrle_init() to
   propagate the error.
   
 migration/ram.c | 27 +++
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/migration/ram.c b/migration/ram.c
index 
70797ef5d80c6ccf61fee987bbe3969041664c69..daffcd82d4f15a2defc66059e967092ebc3ec055
 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2857,9 +2857,8 @@ static void 
migration_bitmap_clear_discarded_pages(RAMState *rs)
 }
 }
 
-static void ram_init_bitmaps(RAMState *rs)
+static bool ram_init_bitmaps(RAMState *rs, Error **errp)
 {
-Error *local_err = NULL;
 bool ret = true;
 
 qemu_mutex_lock_ramlist();
@@ -2868,10 +2867,8 @@ static void ram_init_bitmaps(RAMState *rs)
 ram_list_init_bitmaps();
 /* We don't use dirty log with background snapshots */
 if (!migrate_background_snapshot()) {
-ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION,
-_err);
+ret = memory_global_dirty_log_start(GLOBAL_DIRTY_MIGRATION, errp);
 if (!ret) {
-error_report_err(local_err);
 goto out_unlock;
 }
 migration_bitmap_sync_precopy(rs, false);
@@ -2882,7 +2879,7 @@ out_unlock:
 
 if (!ret) {
 ram_bitmaps_destroy();
-return;
+return false;
 }
 
 /*
@@ -2890,24 +2887,23 @@ out_unlock:
  * containing all 1s to exclude any discarded pages from migration.
  */
 migration_bitmap_clear_discarded_pages(rs);
+return true;
 }
 
-static int ram_init_all(RAMState **rsp)
+static int ram_init_all(RAMState **rsp, Error **errp)
 {
-Error *local_err = NULL;
-
-if (!ram_state_init(rsp, _err)) {
-error_report_err(local_err);
+if (!ram_state_init(rsp, errp)) {
 return -1;
 }
 
-if (!xbzrle_init(_err)) {
-error_report_err(local_err);
+if (!xbzrle_init(errp)) {
 ram_state_cleanup(rsp);
 return -1;
 }
 
-ram_init_bitmaps(*rsp);
+if (!ram_init_bitmaps(*rsp, errp)) {
+return -1;
+}
 
 return 0;
 }
@@ -3104,8 +3100,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque, 
Error **errp)
 
 /* migration has already setup the bitmap, reuse it. */
 if (!migration_in_colo_state()) {
-if (ram_init_all(rsp) != 0) {
-error_setg(errp, "%s: failed to setup RAM for migration", 
__func__);
+if (ram_init_all(rsp, errp) != 0) {
 compress_threads_save_cleanup();
 return -1;
 }
-- 
2.44.0

[PATCH for-9.1 v5 09/14] memory: Add Error** argument to .log_global_start() handler

2024-03-20 Thread Cédric Le Goater

Modify all .log_global_start() handlers to take an Error** parameter
and return a bool. Adapt memory_global_dirty_log_start() to interrupt
on the first error the loop on handlers. In such case, a rollback is
performed to stop dirty logging on all listeners where it was
previously enabled.

Cc: Stefano Stabellini 
Cc: Anthony Perard 
Cc: Paul Durrant 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Cc: David Hildenbrand 
Signed-off-by: Cédric Le Goater 
---

 Changes in v5:

 - Removed memory_global_dirty_log_rollback
 - Introduced memory_global_dirty_log_do_start() to call
   .log_global_start() handlers and do the rollback in case of error.
 - Kept modification of the global_dirty_tracking flag within
   memory_global_dirty_log_start()  
 - Added an assert on error of a .log_global_start() handler in
   listener_add_address_space()

 include/exec/memory.h |  5 -
 hw/i386/xen/xen-hvm.c |  3 ++-
 hw/vfio/common.c  |  4 +++-
 hw/virtio/vhost.c |  3 ++-
 system/memory.c   | 37 +++--
 5 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/include/exec/memory.h b/include/exec/memory.h
index 
8626a355b310ed7b1a1db7978ba4b394032c2f15..567bc4c9fdb53e8f63487f1400980275687d
 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -998,8 +998,11 @@ struct MemoryListener {
  * active at that time.
  *
  * @listener: The #MemoryListener.
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Return: true on success, else false setting @errp with error.
  */
-void (*log_global_start)(MemoryListener *listener);
+bool (*log_global_start)(MemoryListener *listener, Error **errp);
 
 /**
  * @log_global_stop:
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index 
7745cb39631ea423aeb6e5d3eb7f7bcbe27ec6fa..f6e9a1bc86491783077b5cb5aafdb19ab294e392
 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -457,11 +457,12 @@ static void xen_log_sync(MemoryListener *listener, 
MemoryRegionSection *section)
   int128_get64(section->size));
 }
 
-static void xen_log_global_start(MemoryListener *listener)
+static bool xen_log_global_start(MemoryListener *listener, Error **errp)
 {
 if (xen_enabled()) {
 xen_in_migration = true;
 }
+return true;
 }
 
 static void xen_log_global_stop(MemoryListener *listener)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 
011ceaab89433de4496dffadc737286e053f321d..8f9cbdc0264044ce587877a7d19d14b28527291b
 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1066,7 +1066,8 @@ out:
 return ret;
 }
 
-static void vfio_listener_log_global_start(MemoryListener *listener)
+static bool vfio_listener_log_global_start(MemoryListener *listener,
+   Error **errp)
 {
 VFIOContainerBase *bcontainer = container_of(listener, VFIOContainerBase,
  listener);
@@ -1083,6 +1084,7 @@ static void vfio_listener_log_global_start(MemoryListener 
*listener)
  ret, strerror(-ret));
 vfio_set_migration_error(ret);
 }
+return !ret;
 }
 
 static void vfio_listener_log_global_stop(MemoryListener *listener)
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 
2e4e040db87acf45166da86d268077f54511d82c..d405f03caf2fd3a5ea23bdc0392f4c6c072bc10b
 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1044,7 +1044,7 @@ check_dev_state:
 return r;
 }
 
-static void vhost_log_global_start(MemoryListener *listener)
+static bool vhost_log_global_start(MemoryListener *listener, Error **errp)
 {
 int r;
 
@@ -1052,6 +1052,7 @@ static void vhost_log_global_start(MemoryListener 
*listener)
 if (r < 0) {
 abort();
 }
+return true;
 }
 
 static void vhost_log_global_stop(MemoryListener *listener)
diff --git a/system/memory.c b/system/memory.c
index 
a229a79988fce2aa3cb77e3a130db4c694e8cd49..ca4d91484fb3d06f4b902486fea49dba86dc141b
 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -2914,9 +2914,33 @@ static unsigned int postponed_stop_flags;
 static VMChangeStateEntry *vmstate_change;
 static void memory_global_dirty_log_stop_postponed_run(void);
 
+static bool memory_global_dirty_log_do_start(Error **errp)
+{
+MemoryListener *listener;
+
+QTAILQ_FOREACH(listener, _listeners, link) {
+if (listener->log_global_start) {
+if (!listener->log_global_start(listener, errp)) {
+goto err;
+}
+}
+}
+return true;
+
+err:
+while ((listener = QTAILQ_PREV(listener, link)) != NULL) {
+if (listener->log_global_stop) {
+listener->log_global_stop(listener);
+}
+}
+
+return false;
+}
+
 void memory_global_dirty_log_start(unsigned int flags)
 {
 unsigned int old_flags;
+Error *local_err = NULL;
 
 assert(flags && !(flags & (~GLOBAL_DIRT

[PATCH for-9.1 v5 05/14] migration: Add Error** argument to vmstate_save()

2024-03-20 Thread Cédric Le Goater

This will prepare ground for future changes adding an Error** argument
to qemu_savevm_state_setup().

Reviewed-by: Prasad Pandit 
Signed-off-by: Cédric Le Goater 
---
 migration/savevm.c | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/migration/savevm.c b/migration/savevm.c
index 
388d7af7cdd842ec94fe782ed53979b800ffd4f6..1a7b5cb78a912c36ae16db703afc90ef2906b61f
 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1009,11 +1009,10 @@ static void save_section_footer(QEMUFile *f, 
SaveStateEntry *se)
 }
 }
 
-static int vmstate_save(QEMUFile *f, SaveStateEntry *se, JSONWriter *vmdesc)
+static int vmstate_save(QEMUFile *f, SaveStateEntry *se, JSONWriter *vmdesc,
+Error **errp)
 {
 int ret;
-Error *local_err = NULL;
-MigrationState *s = migrate_get_current();
 
 if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
 return 0;
@@ -1035,10 +1034,9 @@ static int vmstate_save(QEMUFile *f, SaveStateEntry *se, 
JSONWriter *vmdesc)
 if (!se->vmsd) {
 vmstate_save_old_style(f, se, vmdesc);
 } else {
-ret = vmstate_save_state_with_err(f, se->vmsd, se->opaque, vmdesc, 
_err);
+ret = vmstate_save_state_with_err(f, se->vmsd, se->opaque, vmdesc,
+  errp);
 if (ret) {
-migrate_set_error(s, local_err);
-error_report_err(local_err);
 return ret;
 }
 }
@@ -1325,8 +1323,10 @@ void qemu_savevm_state_setup(QEMUFile *f)
 trace_savevm_state_setup();
 QTAILQ_FOREACH(se, _state.handlers, entry) {
 if (se->vmsd && se->vmsd->early_setup) {
-ret = vmstate_save(f, se, ms->vmdesc);
+ret = vmstate_save(f, se, ms->vmdesc, _err);
 if (ret) {
+migrate_set_error(ms, local_err);
+error_report_err(local_err);
 qemu_file_set_error(f, ret);
 break;
 }
@@ -1542,6 +1542,7 @@ int 
qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
 JSONWriter *vmdesc = ms->vmdesc;
 int vmdesc_len;
 SaveStateEntry *se;
+Error *local_err = NULL;
 int ret;
 
 QTAILQ_FOREACH(se, _state.handlers, entry) {
@@ -1552,8 +1553,10 @@ int 
qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
 
 start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
 
-ret = vmstate_save(f, se, vmdesc);
+ret = vmstate_save(f, se, vmdesc, _err);
 if (ret) {
+migrate_set_error(ms, local_err);
+error_report_err(local_err);
 qemu_file_set_error(f, ret);
 return ret;
 }
@@ -1568,7 +1571,6 @@ int 
qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
  * bdrv_activate_all() on the other end won't fail. */
 ret = bdrv_inactivate_all();
 if (ret) {
-Error *local_err = NULL;
 error_setg(_err, "%s: bdrv_inactivate_all() failed (%d)",
__func__, ret);
 migrate_set_error(ms, local_err);
@@ -1764,6 +1766,8 @@ void qemu_savevm_live_state(QEMUFile *f)
 
 int qemu_save_device_state(QEMUFile *f)
 {
+MigrationState *ms = migrate_get_current();
+Error *local_err = NULL;
 SaveStateEntry *se;
 
 if (!migration_in_colo_state()) {
@@ -1778,8 +1782,10 @@ int qemu_save_device_state(QEMUFile *f)
 if (se->is_ram) {
 continue;
 }
-ret = vmstate_save(f, se, NULL);
+ret = vmstate_save(f, se, NULL, _err);
 if (ret) {
+migrate_set_error(ms, local_err);
+error_report_err(local_err);
 return ret;
 }
 }
-- 
2.44.0

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 9146 matches

Mail list logo