date:20220614

On Fri, Jun 10, 2022 at 1:20 PM  wrote:
>
> From: Frank Chang 
>
> Current RISC-V debug assumes that only type 2 trigger is supported.
> To allow more types of triggers to be supported in the future
> (e.g. type 6 trigger, which is similar to type 2 trigger with additional
>  functionality), we should determine the trigger type from tdata1.type.
>
> RV_MAX_TRIGGERS is also introduced in replacement of TRIGGER_TYPE2_NUM.
>
> Signed-off-by: Frank Chang 
> ---
>  target/riscv/cpu.h |   2 +-
>  target/riscv/csr.c |   2 +-
>  target/riscv/debug.c   | 183 -
>  target/riscv/debug.h   |  15 ++--
>  target/riscv/machine.c |   2 +-
>  5 files changed, 137 insertions(+), 67 deletions(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 7d6397acdf..535123a989 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -289,7 +289,7 @@ struct CPUArchState {
>
>  /* trigger module */
>  target_ulong trigger_cur;
> -type2_trigger_t type2_trig[TRIGGER_TYPE2_NUM];
> +type2_trigger_t type2_trig[RV_MAX_TRIGGERS];
>
>  /* machine specific rdtime callback */
>  uint64_t (*rdtime_fn)(void *);
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 6dbe9b541f..005ae31a01 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -2776,7 +2776,7 @@ static RISCVException read_tdata(CPURISCVState *env, 
> int csrno,
>   target_ulong *val)
>  {
>  /* return 0 in tdata1 to end the trigger enumeration */
> -if (env->trigger_cur >= TRIGGER_NUM && csrno == CSR_TDATA1) {
> +if (env->trigger_cur >= RV_MAX_TRIGGERS && csrno == CSR_TDATA1) {
>  *val = 0;
>  return RISCV_EXCP_NONE;
>  }
> diff --git a/target/riscv/debug.c b/target/riscv/debug.c
> index fc6e13222f..abbcd38a17 100644
> --- a/target/riscv/debug.c
> +++ b/target/riscv/debug.c
> @@ -52,8 +52,15 @@
>  /* tdata availability of a trigger */
>  typedef bool tdata_avail[TDATA_NUM];
>
> -static tdata_avail tdata_mapping[TRIGGER_NUM] = {
> -[TRIGGER_TYPE2_IDX_0 ... TRIGGER_TYPE2_IDX_1] = { true, true, false },
> +static tdata_avail tdata_mapping[TRIGGER_TYPE_NUM] = {
> +[TRIGGER_TYPE_NO_EXIST] = { false, false, false },
> +[TRIGGER_TYPE_AD_MATCH] = { true, true, true },
> +[TRIGGER_TYPE_INST_CNT] = { true, false, true },
> +[TRIGGER_TYPE_INT] = { true, true, true },
> +[TRIGGER_TYPE_EXCP] = { true, true, true },
> +[TRIGGER_TYPE_AD_MATCH6] = { true, true, true },
> +[TRIGGER_TYPE_EXT_SRC] = { true, false, false },
> +[TRIGGER_TYPE_UNAVAIL] = { true, true, true }
>  };
>
>  /* only breakpoint size 1/2/4/8 supported */
> @@ -67,6 +74,26 @@ static int access_size[SIZE_NUM] = {
>  [6 ... 15] = -1,
>  };
>
> +static inline target_ulong extract_trigger_type(CPURISCVState *env,
> +target_ulong tdata1)
> +{
> +switch (riscv_cpu_mxl(env)) {
> +case MXL_RV32:
> +return extract32(tdata1, 28, 4);
> +case MXL_RV64:
> +return extract64(tdata1, 60, 4);

I guess we should add a "case MXL_RV128" here so that it won't break rv128.
See commit d1d8541217ce8a23e9e751cd868c7d618817134a

> +default:
> +g_assert_not_reached();
> +}
> +}
> +
> +static inline target_ulong get_trigger_type(CPURISCVState *env,
> +target_ulong trigger_index)
> +{
> +target_ulong tdata1 = env->type2_trig[trigger_index].mcontrol;
> +return extract_trigger_type(env, tdata1);
> +}
> +
>  static inline target_ulong trigger_type(CPURISCVState *env,
>  trigger_type_t type)
>  {
> @@ -89,15 +116,17 @@ static inline target_ulong trigger_type(CPURISCVState 
> *env,
>
>  bool tdata_available(CPURISCVState *env, int tdata_index)
>  {
> +int trigger_type = get_trigger_type(env, env->trigger_cur);
> +
>  if (unlikely(tdata_index >= TDATA_NUM)) {
>  return false;
>  }
>
> -if (unlikely(env->trigger_cur >= TRIGGER_NUM)) {
> +if (unlikely(env->trigger_cur >= RV_MAX_TRIGGERS)) {
>  return false;
>  }
>
> -return tdata_mapping[env->trigger_cur][tdata_index];
> +return tdata_mapping[trigger_type][tdata_index];
>  }
>
>  target_ulong tselect_csr_read(CPURISCVState *env)
> @@ -137,6 +166,7 @@ static target_ulong tdata1_validate(CPURISCVState *env, 
> target_ulong val,
>  qemu_log_mask(LOG_GUEST_ERROR,
>"ignoring type write to tdata1 register\n");
>  }
> +
>  if (dmode != 0) {
>  qemu_log_mask(LOG_UNIMP, "debug mode is not supported\n");
>  }
> @@ -261,9 +291,8 @@ static void type2_breakpoint_remove(CPURISCVState *env, 
> target_ulong index)
>  }
>
>  static target_ulong type2_reg_read(CPURISCVState *env,
> -   target_ulong trigger_index, int 
> tdata_index)
> +   target_ulong index, int tdata_index)
>  {

Re: [PATCH 1/9] target/riscv: debug: Determine the trigger type from tdata1.type

On Fri, Jun 10, 2022 at 1:20 PM  wrote:
>
> From: Frank Chang 
>
> Current RISC-V debug assumes that only type 2 trigger is supported.
> To allow more types of triggers to be supported in the future
> (e.g. type 6 trigger, which is similar to type 2 trigger with additional
>  functionality), we should determine the trigger type from tdata1.type.
>
> RV_MAX_TRIGGERS is also introduced in replacement of TRIGGER_TYPE2_NUM.
>
> Signed-off-by: Frank Chang 
> ---
>  target/riscv/cpu.h |   2 +-
>  target/riscv/csr.c |   2 +-
>  target/riscv/debug.c   | 183 -
>  target/riscv/debug.h   |  15 ++--
>  target/riscv/machine.c |   2 +-
>  5 files changed, 137 insertions(+), 67 deletions(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 7d6397acdf..535123a989 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -289,7 +289,7 @@ struct CPUArchState {
>
>  /* trigger module */
>  target_ulong trigger_cur;
> -type2_trigger_t type2_trig[TRIGGER_TYPE2_NUM];
> +type2_trigger_t type2_trig[RV_MAX_TRIGGERS];
>
>  /* machine specific rdtime callback */
>  uint64_t (*rdtime_fn)(void *);
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 6dbe9b541f..005ae31a01 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -2776,7 +2776,7 @@ static RISCVException read_tdata(CPURISCVState *env, 
> int csrno,
>   target_ulong *val)
>  {
>  /* return 0 in tdata1 to end the trigger enumeration */
> -if (env->trigger_cur >= TRIGGER_NUM && csrno == CSR_TDATA1) {
> +if (env->trigger_cur >= RV_MAX_TRIGGERS && csrno == CSR_TDATA1) {
>  *val = 0;
>  return RISCV_EXCP_NONE;
>  }
> diff --git a/target/riscv/debug.c b/target/riscv/debug.c
> index fc6e13222f..abbcd38a17 100644
> --- a/target/riscv/debug.c
> +++ b/target/riscv/debug.c
> @@ -52,8 +52,15 @@
>  /* tdata availability of a trigger */
>  typedef bool tdata_avail[TDATA_NUM];
>
> -static tdata_avail tdata_mapping[TRIGGER_NUM] = {
> -[TRIGGER_TYPE2_IDX_0 ... TRIGGER_TYPE2_IDX_1] = { true, true, false },
> +static tdata_avail tdata_mapping[TRIGGER_TYPE_NUM] = {
> +[TRIGGER_TYPE_NO_EXIST] = { false, false, false },
> +[TRIGGER_TYPE_AD_MATCH] = { true, true, true },
> +[TRIGGER_TYPE_INST_CNT] = { true, false, true },
> +[TRIGGER_TYPE_INT] = { true, true, true },
> +[TRIGGER_TYPE_EXCP] = { true, true, true },
> +[TRIGGER_TYPE_AD_MATCH6] = { true, true, true },
> +[TRIGGER_TYPE_EXT_SRC] = { true, false, false },
> +[TRIGGER_TYPE_UNAVAIL] = { true, true, true }
>  };
>
>  /* only breakpoint size 1/2/4/8 supported */
> @@ -67,6 +74,26 @@ static int access_size[SIZE_NUM] = {
>  [6 ... 15] = -1,
>  };
>
> +static inline target_ulong extract_trigger_type(CPURISCVState *env,
> +target_ulong tdata1)
> +{
> +switch (riscv_cpu_mxl(env)) {
> +case MXL_RV32:
> +return extract32(tdata1, 28, 4);
> +case MXL_RV64:
> +return extract64(tdata1, 60, 4);
> +default:
> +g_assert_not_reached();
> +}
> +}
> +
> +static inline target_ulong get_trigger_type(CPURISCVState *env,
> +target_ulong trigger_index)
> +{
> +target_ulong tdata1 = env->type2_trig[trigger_index].mcontrol;
> +return extract_trigger_type(env, tdata1);
> +}
> +
>  static inline target_ulong trigger_type(CPURISCVState *env,
>  trigger_type_t type)
>  {
> @@ -89,15 +116,17 @@ static inline target_ulong trigger_type(CPURISCVState 
> *env,
>
>  bool tdata_available(CPURISCVState *env, int tdata_index)
>  {
> +int trigger_type = get_trigger_type(env, env->trigger_cur);
> +
>  if (unlikely(tdata_index >= TDATA_NUM)) {
>  return false;
>  }
>
> -if (unlikely(env->trigger_cur >= TRIGGER_NUM)) {
> +if (unlikely(env->trigger_cur >= RV_MAX_TRIGGERS)) {
>  return false;
>  }
>
> -return tdata_mapping[env->trigger_cur][tdata_index];
> +return tdata_mapping[trigger_type][tdata_index];
>  }
>
>  target_ulong tselect_csr_read(CPURISCVState *env)
> @@ -137,6 +166,7 @@ static target_ulong tdata1_validate(CPURISCVState *env, 
> target_ulong val,
>  qemu_log_mask(LOG_GUEST_ERROR,
>"ignoring type write to tdata1 register\n");
>  }
> +
>  if (dmode != 0) {
>  qemu_log_mask(LOG_UNIMP, "debug mode is not supported\n");
>  }
> @@ -261,9 +291,8 @@ static void type2_breakpoint_remove(CPURISCVState *env, 
> target_ulong index)
>  }
>
>  static target_ulong type2_reg_read(CPURISCVState *env,
> -   target_ulong trigger_index, int 
> tdata_index)
> +   target_ulong index, int tdata_index)
>  {
> -uint32_t index = trigger_index - TRIGGER_TYPE2_IDX_0;
>  target_ulong tdata;
>
>  switch (tdata_index) {
> @@

Re: [PATCH] target/ppc: cpu_init: Clean up stop state on cpu reset


On 6/14/22 10:29, Frederic Barrat wrote:

The 'resume_as_sreset' attribute of a cpu can be set when a thread is
entering a stop state on ppc books. It causes the thread to be
re-routed to vector 0x100 when woken up by an exception. So it must be
cleaned on reset or a thread might be re-routed unexpectedly after a
reset, when it was not in a stop state and/or when the appropriate
exception handler isn't set up yet.


What is the test scenario ? and what are the symptoms ?



Signed-off-by: Frederic Barrat 


Reviewed-by: Cédric Le Goater 



---

I didn't find an appropriate commit to add a "Fixes:". It originates
when adding support for power management states but the code looked
quite different in 2016 and it's not clear whether we were supporting
reset then.


It was added when we needed some support for the POWER8 stop states.
About that time.

Thanks,

C.



target/ppc/cpu_init.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 0f891afa04..c16cb8dbe7 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7186,6 +7186,9 @@ static void ppc_cpu_reset(DeviceState *dev)
  }
  pmu_update_summaries(env);
  }
+
+/* clean any pending stop state */
+env->resume_as_sreset = 0;
  #endif
  hreg_compute_hflags(env);
  env->reserve_addr = (target_ulong)-1ULL;

Re: [PATCH qemu] ppc/spapr: Implement H_WATCHDOG


Hello Alexey,

On 6/8/22 05:01, Alexey Kardashevskiy wrote:

The new PAPR 2.12 defines a watchdog facility managed via the new
H_WATCHDOG hypercall.

This adds H_WATCHDOG support which a proposed driver for pseries uses:
https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=303120

This was tested by running QEMU with a debug kernel and command line:
-append \
  "pseries-wdt.timeout=60 pseries-wdt.nowayout=1 pseries-wdt.action=2"

and running "echo V > /dev/watchdog0" inside the VM.

Signed-off-by: Alexey Kardashevskiy 
---
  include/hw/ppc/spapr.h  |  23 +++-
  hw/ppc/spapr.c  |   5 +
  hw/ppc/spapr_watchdog.c | 239 


Watchdogs are under :

  hw/watchdog/


  hw/ppc/meson.build  |   1 +
  hw/ppc/trace-events |   7 ++
  5 files changed, 274 insertions(+), 1 deletion(-)
  create mode 100644 hw/ppc/spapr_watchdog.c

diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 072dda2c7265..39aa8609df7b 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -164,6 +164,19 @@ struct SpaprMachineClass {
  SpaprIrq *irq;
  };
  
+#define WDT_MAX_WATCHDOGS   4  /* Maximum number of watchdog devices */

+
+#define WDT_HARD_POWER_OFF  0
+#define WDT_HARD_RESTART1
+#define WDT_DUMP_RESTART2
+
+typedef struct SpaprWatchdog {
+unsigned num;
+QEMUTimer timer;
+uint8_t action;
+uint64_t timeout;
+} SpaprWatchdog;



couldn't we QOM'ify this model ?


  /**
   * SpaprMachineState:
   */
@@ -264,6 +277,8 @@ struct SpaprMachineState {
  uint32_t FORM2_assoc_array[NUMA_NODES_MAX_NUM][FORM2_NUMA_ASSOC_SIZE];
  
  Error *fwnmi_migration_blocker;

+
+SpaprWatchdog wds[WDT_MAX_WATCHDOGS];
  };
  
  #define H_SUCCESS 0

@@ -344,6 +359,7 @@ struct SpaprMachineState {
  #define H_P7  -60
  #define H_P8  -61
  #define H_P9  -62
+#define H_NOOP-63
  #define H_UNSUPPORTED -67
  #define H_OVERLAP -68
  #define H_UNSUPPORTED_FLAG -256
@@ -564,8 +580,9 @@ struct SpaprMachineState {
  #define H_SCM_HEALTH0x400
  #define H_RPT_INVALIDATE0x448
  #define H_SCM_FLUSH 0x44C
+#define H_WATCHDOG  0x45C
  
-#define MAX_HCALL_OPCODEH_SCM_FLUSH

+#define MAX_HCALL_OPCODEH_WATCHDOG
  
  /* The hcalls above are standardized in PAPR and implemented by pHyp

   * as well.
@@ -1027,6 +1044,7 @@ extern const VMStateDescription 
vmstate_spapr_cap_large_decr;
  extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
  extern const VMStateDescription vmstate_spapr_cap_fwnmi;
  extern const VMStateDescription vmstate_spapr_cap_rpt_invalidate;
+extern const VMStateDescription vmstate_spapr_wdt;
  
  static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)

  {
@@ -1063,4 +1081,7 @@ target_ulong 
spapr_vof_client_architecture_support(MachineState *ms,
 target_ulong ovec_addr);
  void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt);
  
+/* H_WATCHDOG */

+void spapr_watchdog_init(SpaprMachineState *spapr);
+
  #endif /* HW_SPAPR_H */
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index fd4942e8813c..eaa75f5afd02 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -898,6 +898,8 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void 
*fdt)
  add_str(hypertas, "hcall-hpt-resize");
  }
  
+add_str(hypertas, "hcall-watchdog");

+
  _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
   hypertas->str, hypertas->len));
  g_string_free(hypertas, TRUE);
@@ -2067,6 +2069,7 @@ static const VMStateDescription vmstate_spapr = {
  _spapr_cap_fwnmi,
  _spapr_fwnmi,
  _spapr_cap_rpt_invalidate,
+_spapr_wdt,
  NULL
  }
  };
@@ -3051,6 +3054,8 @@ static void spapr_machine_init(MachineState *machine)
  spapr->vof->fw_size = fw_size; /* for claim() on itself */
  spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, spapr_h_vof_client);
  }
+
+spapr_watchdog_init(spapr);
  }
  
  #define DEFAULT_KVM_TYPE "auto"

diff --git a/hw/ppc/spapr_watchdog.c b/hw/ppc/spapr_watchdog.c
new file mode 100644
index ..5206e40078b5
--- /dev/null
+++ b/hw/ppc/spapr_watchdog.c
@@ -0,0 +1,239 @@
+/*
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library;

[PATCH] target/riscv: Update tval for hardware watchpoint

From: Bin Meng 

When watchpoint is hit, the breakpoint exception should update tval
to point to the faulting virtual address.

Signed-off-by: Bin Meng 
---

 target/riscv/cpu.h| 1 +
 target/riscv/cpu_helper.c | 6 ++
 target/riscv/debug.c  | 2 ++
 3 files changed, 9 insertions(+)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 7d6397acdf..fdcba8978b 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -289,6 +289,7 @@ struct CPUArchState {
 
 /* trigger module */
 target_ulong trigger_cur;
+bool wp_hit;
 type2_trigger_t type2_trig[TRIGGER_TYPE2_NUM];
 
 /* machine specific rdtime callback */
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 4a6700c890..f1564ce51e 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -1345,6 +1345,12 @@ void riscv_cpu_do_interrupt(CPUState *cs)
 target_ulong htval = 0;
 target_ulong mtval2 = 0;
 
+/* only update tval for watchpoint */
+if (cause == RISCV_EXCP_BREAKPOINT && env->wp_hit) {
+env->wp_hit = false;
+tval = env->badaddr;
+}
+
 if  (cause == RISCV_EXCP_SEMIHOST) {
 if (env->priv >= PRV_S) {
 env->gpr[xA0] = do_common_semihosting(cs);
diff --git a/target/riscv/debug.c b/target/riscv/debug.c
index fc6e13222f..89b12c6bef 100644
--- a/target/riscv/debug.c
+++ b/target/riscv/debug.c
@@ -407,6 +407,8 @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, 
CPUWatchpoint *wp)
 if ((wp->flags & flags) && (wp->vaddr == addr)) {
 /* check U/S/M bit against current privilege level */
 if ((ctrl >> 3) & BIT(env->priv)) {
+env->wp_hit = true;
+env->badaddr = addr;
 return true;
 }
 }
-- 
2.34.1

Re: [PATCH] hw/riscv: virt: pass random seed to fdt

On Mon, Jun 13, 2022 at 8:08 PM Jason A. Donenfeld  wrote:
>
> If the FDT contains /chosen/rng-seed, then the Linux RNG will use it to
> initialize early. Set this using the usual guest random number
> generation function. This is confirmed to successfully initialize the
> RNG on Linux 5.19-rc2.
>
> Cc: Alistair Francis 
> Signed-off-by: Jason A. Donenfeld 
> ---
>  hw/riscv/virt.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index bc424dd2f5..368a723bf6 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -21,6 +21,7 @@
>  #include "qemu/osdep.h"
>  #include "qemu/units.h"
>  #include "qemu/error-report.h"
> +#include "qemu/guest-random.h"
>  #include "qapi/error.h"
>  #include "hw/boards.h"
>  #include "hw/loader.h"
> @@ -998,6 +999,7 @@ static void create_fdt(RISCVVirtState *s, const 
> MemMapEntry *memmap,
>  MachineState *mc = MACHINE(s);
>  uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1;
>  uint32_t irq_pcie_phandle = 1, irq_virtio_phandle = 1;
> +uint8_t rng_seed[32];
>
>  if (mc->dtb) {
>  mc->fdt = load_device_tree(mc->dtb, >fdt_size);
> @@ -1046,6 +1048,10 @@ update_bootargs:
>  if (cmdline && *cmdline) {
>  qemu_fdt_setprop_string(mc->fdt, "/chosen", "bootargs", cmdline);
>  }
> +
> +/* Pass seed to RNG. */

nits: please remove the ending period

> +qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));
> +qemu_fdt_setprop(mc->fdt, "/chosen", "rng-seed", rng_seed, 
> sizeof(rng_seed));
>  }
>
>  static inline DeviceState *gpex_pcie_init(MemoryRegion *sys_mem,
> --

Otherwise,
Reviewed-by: Bin Meng

Re: [PATCH 1/2] hw/nvme: Implement shadow doorbell buffer support

2022-06-14 Thread Jinhao Fan

> On Jun 14, 2022, at 11:41 PM, Keith Busch  wrote:
> 
> It's a pretty nasty hack, and definitely not in compliance with the spec: the
> db_addr is supposed to be read-only from the device side, though I do think
> it's safe for this environment. Unless Klaus or anyone finds something I'm
> missing, I feel this is an acceptable compromise to address this odd
> discrepency.

:) In my next patch I will check the performance numbers with this hack. Not
sure if updating db_addr value from the host will have any performance 
implications but I guess it should be OK.

> 
> I believe the recommended tag for something like this is "Suggested-by:", but
> no need to credit me. Just fold it into your first patch and send a v2.

Sure. Thanks!

> 
> By the way, I noticed that the patch never updates the cq's ei_addr value. Is
> that on purpose?

Klaus also raised a similar question in a prior comment. I think we need to 
figure
this out before we move on to the v2 patch. I did this because the original 
Google
extension patch did not update cq’s ei_addr. This seems to make sense because
the purpose of cq’s ei_addr is for the guest to notify the host about cq head
changes when necessary. However, the host does not need this notification 
because we let the host proactively check for cq’s db_addr value when it wants
to post a new cqe. This is also the only point where the host uses the cq’s
db_addr. Therefore, it is OK to postpone the check for cq’s db_addr to this 
point,
instead of getting timely but not useful notifications by updating cq’s ei_addr.
This helps to reduce the number of MMIO’s on the cq’s doorbell register.

Klaus, Keith, do you think this design makes sense?

[PATCH] q35：Enable TSEG only when G_SMRAME and TSEG_EN both enabled

2022-06-14 Thread Zhenzhong Duan

According to spec:
"TSEG Enable (T_EN): Enabling of SMRAM memory for Extended SMRAM space
only. When G_SMRAME = 1 and TSEG_EN = 1, the TSEG is enabled to appear
in the appropriate physical address space. Note that once D_LCK is set,
this bit becomes read only."

Changed to match the spec description.

Signed-off-by: Zhenzhong Duan 
---
 hw/pci-host/q35.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index ab5a47aff560..20da1213747c 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -379,7 +379,8 @@ static void mch_update_smram(MCHPCIState *mch)
 memory_region_set_enabled(>high_smram, false);
 }
 
-if (pd->config[MCH_HOST_BRIDGE_ESMRAMC] & MCH_HOST_BRIDGE_ESMRAMC_T_EN) {
+if ((pd->config[MCH_HOST_BRIDGE_ESMRAMC] & MCH_HOST_BRIDGE_ESMRAMC_T_EN) &&
+(pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_G_SMRAME)) {
 switch (pd->config[MCH_HOST_BRIDGE_ESMRAMC] &
 MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK) {
 case MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_1MB:
-- 
2.25.1

Re: [RFC PATCH v8 00/21] Net Control VQ support with asid in vDPA SVQ

2022-06-14 Thread Jason Wang

On Tue, Jun 14, 2022 at 5:32 PM Eugenio Perez Martin
 wrote:
>
> On Tue, Jun 14, 2022 at 10:20 AM Jason Wang  wrote:
> >
> > On Tue, Jun 14, 2022 at 4:14 PM Eugenio Perez Martin
> >  wrote:
> > >
> > > On Tue, Jun 14, 2022 at 10:02 AM Jason Wang  wrote:
> > > >
> > > > On Tue, Jun 14, 2022 at 12:32 AM Eugenio Perez Martin
> > > >  wrote:
> > > > >
> > > > > On Wed, Jun 8, 2022 at 9:28 PM Eugenio Perez Martin 
> > > > >  wrote:
> > > > > >
> > > > > > On Wed, Jun 8, 2022 at 7:51 AM Jason Wang  
> > > > > > wrote:
> > > > > > >
> > > > > > >
> > > > > > > 在 2022/5/20 03:12, Eugenio Pérez 写道:
> > > > > > > > Control virtqueue is used by networking device for accepting 
> > > > > > > > various
> > > > > > > > commands from the driver. It's a must to support multiqueue and 
> > > > > > > > other
> > > > > > > > configurations.
> > > > > > > >
> > > > > > > > Shadow VirtQueue (SVQ) already makes possible migration of 
> > > > > > > > virtqueue
> > > > > > > > states, effectively intercepting them so qemu can track what 
> > > > > > > > regions of memory
> > > > > > > > are dirty because device action and needs migration. However, 
> > > > > > > > this does not
> > > > > > > > solve networking device state seen by the driver because CVQ 
> > > > > > > > messages, like
> > > > > > > > changes on MAC addresses from the driver.
> > > > > > > >
> > > > > > > > To solve that, this series uses SVQ infraestructure proposed to 
> > > > > > > > intercept
> > > > > > > > networking control messages used by the device. This way, qemu 
> > > > > > > > is able to
> > > > > > > > update VirtIONet device model and to migrate it.
> > > > > > > >
> > > > > > > > However, to intercept all queues would slow device data 
> > > > > > > > forwarding. To solve
> > > > > > > > that, only the CVQ must be intercepted all the time. This is 
> > > > > > > > achieved using
> > > > > > > > the ASID infraestructure, that allows different translations 
> > > > > > > > for different
> > > > > > > > virtqueues. The most updated kernel part of ASID is proposed at 
> > > > > > > > [1].
> > > > > > > >
> > > > > > > > You can run qemu in two modes after applying this series: only 
> > > > > > > > intercepting
> > > > > > > > cvq with x-cvq-svq=on or intercept all the virtqueues adding 
> > > > > > > > cmdline x-svq=on:
> > > > > > > >
> > > > > > > > -netdev 
> > > > > > > > type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0,x-cvq-svq=on,x-svq=on
> > > > > > > >
> > > > > > > > First three patches enable the update of the virtio-net device 
> > > > > > > > model for each
> > > > > > > > CVQ message acknoledged by the device.
> > > > > > > >
> > > > > > > > Patches from 5 to 9 enables individual SVQ to copy the buffers 
> > > > > > > > to QEMU's VA.
> > > > > > > > This allows simplyfing the memory mapping, instead of map all 
> > > > > > > > the guest's
> > > > > > > > memory like in the data virtqueues.
> > > > > > > >
> > > > > > > > Patch 10 allows to inject control messages to the device. This 
> > > > > > > > allows to set
> > > > > > > > state to the device both at QEMU startup and at live migration 
> > > > > > > > destination. In
> > > > > > > > the future, this may also be used to emulate _F_ANNOUNCE.
> > > > > > > >
> > > > > > > > Patch 11 updates kernel headers, but it assign random numbers 
> > > > > > > > to needed ioctls
> > > > > > > > because they are still not accepted in the kernel.
> > > > > > > >
> > > > > > > > Patches 12-16 enables the set of the features of the net device 
> > > > > > > > model to the
> > > > > > > > vdpa device at device start.
> > > > > > > >
> > > > > > > > Last ones enables the sepparated ASID and SVQ.
> > > > > > > >
> > > > > > > > Comments are welcomed.
> > > > > > >
> > > > > > >
> > > > > > > As discussed, I think we need to split this huge series into 
> > > > > > > smaller ones:
> > > > > > >
> > > > > > > 1) shadow CVQ only, this makes rx-filter-event work
> > > > > > > 2) ASID support for CVQ
> > > > > > >
> > > > > > > And for 1) we need consider whether or not it could be simplified.
> > > > > > >
> > > > > > > Or do it in reverse order, since if we do 1) first, we may have 
> > > > > > > security
> > > > > > > issues.
> > > > > > >
> > > > > >
> > > > > > I'm ok with both, but I also think 2) before 1) might make more 
> > > > > > sense.
> > > > > > There is no way to only shadow CVQ otherwise ATM.
> > > > > >
> > > > >
> > > > > On second thought, that order is kind of harder.
> > > > >
> > > > > If we only map CVQ buffers, we need to either:
> > > > > a. Copy them to controlled buffers
> > > > > b. Track properly when to unmap them
> > > >
> > > > Just to make sure we're at the same page:
> > > >
> > > > I meant we can start with e.g having a dedicated ASID for CVQ but
> > > > still using CVQ passthrough.
> > > >
> > >
> > > That would imply duplicating all the memory listener updates to both
> > > ASIDs. That part of the code needs to be reverted. I'm ok with that,
> > > but I'm not

Re: [RFC PATCH v2 3/8] qapi: net: add stream and dgram netdevs

2022-06-14 Thread Laurent Vivier


On 13/05/2022 13:44, Markus Armbruster wrote:

Laurent Vivier  writes:


Copied from socket netdev file and modified to use SocketAddress
to be able to introduce new features like unix socket.

"udp" and "mcast" are squashed into dgram netdev, multicast is detected
according to the IP address type.
"listen" and "connect" modes are managed by stream netdev. An optional
parameter "server" defines the mode (server by default)

Signed-off-by: Laurent Vivier 
---
  hmp-commands.hx |   2 +-
  net/clients.h   |   6 +
  net/dgram.c | 630 
  net/hub.c   |   2 +
  net/meson.build |   2 +
  net/net.c   |  24 +-
  net/stream.c| 425 
  qapi/net.json   |  38 ++-
  8 files changed, 1125 insertions(+), 4 deletions(-)
  create mode 100644 net/dgram.c
  create mode 100644 net/stream.c

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 03e6a73d1f55..172dbab1dfed 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1269,7 +1269,7 @@ ERST
  {
  .name   = "netdev_add",
  .args_type  = "netdev:O",
-.params = 
"[user|tap|socket|vde|bridge|hubport|netmap|vhost-user],id=str[,prop=value][,...]",
+.params = 
"[user|tap|socket|stream|dgram|vde|bridge|hubport|netmap|vhost-user],id=str[,prop=value][,...]",
  .help   = "add host network device",
  .cmd= hmp_netdev_add,
  .command_completion = netdev_add_completion,


Does qemu-options.hx need an update, too?


Done




diff --git a/net/clients.h b/net/clients.h
index 92f9b59aedce..c1b51d79b147 100644
--- a/net/clients.h
+++ b/net/clients.h
@@ -40,6 +40,12 @@ int net_init_hubport(const Netdev *netdev, const char *name,
  int net_init_socket(const Netdev *netdev, const char *name,
  NetClientState *peer, Error **errp);
  
+int net_init_stream(const Netdev *netdev, const char *name,

+NetClientState *peer, Error **errp);
+
+int net_init_dgram(const Netdev *netdev, const char *name,
+   NetClientState *peer, Error **errp);
+
  int net_init_tap(const Netdev *netdev, const char *name,
   NetClientState *peer, Error **errp);
  
diff --git a/net/dgram.c b/net/dgram.c

new file mode 100644
index ..aa4240501ed0
--- /dev/null
+++ b/net/dgram.c
@@ -0,0 +1,630 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */


Blank line here, please.

Why not GPLv2+?


I've kept the original text copied from net/socket.c, but I can move this to 
GPL2+




+#include "qemu/osdep.h"


[...]


diff --git a/net/net.c b/net/net.c
index 2aab7167316c..fd6b30a10c57 100644
--- a/net/net.c
+++ b/net/net.c
@@ -1015,6 +1015,8 @@ static int (* const 
net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
  #endif
  [NET_CLIENT_DRIVER_TAP]   = net_init_tap,
  [NET_CLIENT_DRIVER_SOCKET]= net_init_socket,
+[NET_CLIENT_DRIVER_STREAM]= net_init_stream,
+[NET_CLIENT_DRIVER_DGRAM] = net_init_dgram,
  #ifdef CONFIG_VDE
  [NET_CLIENT_DRIVER_VDE]   = net_init_vde,
  #endif
@@ -1097,6 +1099,8 @@ void show_netdevs(void)
  int idx;
  const char *available_netdevs[] = {
  "socket",
+"stream",
+"dgram",
  "hubport",
  "tap",
  #ifdef CONFIG_SLIRP
@@ -1606,7 +1610,25 @@ int net_init_clients(Error **errp)
   */
  static bool netdev_is_modern(const char *optarg)
  {
-return false;
+static QemuOptsList dummy_opts = {
+.name = "netdev",
+.implied_opt_name = "type",
+.head = QTAILQ_HEAD_INITIALIZER(dummy_opts.head),
+.desc = { { } },
+};
+const char *netdev;
+QemuOpts *opts;
+bool is_modern;
+
+opts = qemu_opts_parse(_opts, optarg, true, _fatal);
+netdev = qemu_opt_get(opts, "type");
+
+is_modern =

[PATCH] ui/cocoa: Fix clipboard text release

[-NSPasteboard dataForType:] returns an autoreleased NSString,
and callings its release method will result in double-free when
the global autorelease pool is released. Use NSAutoreleasePool to
release it properly.

Signed-off-by: Akihiko Odaki 
---
 ui/cocoa.m | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ui/cocoa.m b/ui/cocoa.m
index 84c84e98fc5..6a4dccff7f0 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -1894,16 +1894,18 @@ static void cocoa_clipboard_notify(Notifier *notifier, 
void *data)
 static void cocoa_clipboard_request(QemuClipboardInfo *info,
 QemuClipboardType type)
 {
+NSAutoreleasePool *pool;
 NSData *text;
 
 switch (type) {
 case QEMU_CLIPBOARD_TYPE_TEXT:
+pool = [[NSAutoreleasePool alloc] init];
 text = [[NSPasteboard generalPasteboard] 
dataForType:NSPasteboardTypeString];
 if (text) {
 qemu_clipboard_set_data(, info, type,
 [text length], [text bytes], true);
-[text release];
 }
+[pool release];
 break;
 default:
 break;
-- 
2.32.1 (Apple Git-133)

[PATCH] ui/cocoa: Take refresh rate into account

Signed-off-by: Akihiko Odaki 
---
 meson.build |  3 ++-
 ui/cocoa.m  | 12 
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index 0c2e11ff071..0f83f3730af 100644
--- a/meson.build
+++ b/meson.build
@@ -575,7 +575,8 @@ if get_option('attr').allowed()
   endif
 endif
 
-cocoa = dependency('appleframeworks', modules: 'Cocoa', required: 
get_option('cocoa'))
+cocoa = dependency('appleframeworks', modules: ['Cocoa', 'CoreVideo'],
+   required: get_option('cocoa'))
 if cocoa.found() and get_option('sdl').enabled()
   error('Cocoa and SDL cannot be enabled at the same time')
 endif
diff --git a/ui/cocoa.m b/ui/cocoa.m
index 84c84e98fc5..a3949c6 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -561,8 +561,20 @@ - (void) updateUIInfoLocked
 CGDirectDisplayID display = [[description 
objectForKey:@"NSScreenNumber"] unsignedIntValue];
 NSSize screenSize = [[[self window] screen] frame].size;
 CGSize screenPhysicalSize = CGDisplayScreenSize(display);
+CVDisplayLinkRef displayLink;
 
 frameSize = isFullscreen ? screenSize : [self frame].size;
+
+if (!CVDisplayLinkCreateWithCGDisplay(display, )) {
+CVTime period = 
CVDisplayLinkGetNominalOutputVideoRefreshPeriod(displayLink);
+CVDisplayLinkRelease(displayLink);
+if (!(period.flags & kCVTimeIsIndefinite)) {
+update_displaychangelistener(,
+ 1000 * period.timeValue / 
period.timeScale);
+info.refresh_rate = (int64_t)1000 * period.timeScale / 
period.timeValue;
+}
+}
+
 info.width_mm = frameSize.width / screenSize.width * 
screenPhysicalSize.width;
 info.height_mm = frameSize.height / screenSize.height * 
screenPhysicalSize.height;
 } else {
-- 
2.32.1 (Apple Git-133)

[PATCH v4 4/4] net: Use bundle mechanism

Signed-off-by: Akihiko Odaki 
---
 include/net/net.h | 2 +-
 meson.build   | 8 +++-
 net/tap.c | 6 +-
 qemu-options.hx   | 4 ++--
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/include/net/net.h b/include/net/net.h
index 523136c7acb..4a5ed27a4b7 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -228,7 +228,7 @@ NetClientState *net_hub_port_find(int hub_id);
 
 #define DEFAULT_NETWORK_SCRIPT CONFIG_SYSCONFDIR "/qemu-ifup"
 #define DEFAULT_NETWORK_DOWN_SCRIPT CONFIG_SYSCONFDIR "/qemu-ifdown"
-#define DEFAULT_BRIDGE_HELPER CONFIG_QEMU_HELPERDIR "/qemu-bridge-helper"
+#define DEFAULT_BUNDLE_BRIDGE_HELPER CONFIG_QEMU_BUNDLE_HELPERDIR 
"/qemu-bridge-helper"
 #define DEFAULT_BRIDGE_INTERFACE "br0"
 
 void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd);
diff --git a/meson.build b/meson.build
index e7b385eaf34..72d006f228e 100644
--- a/meson.build
+++ b/meson.build
@@ -1686,7 +1686,7 @@ config_host_data.set_quoted('CONFIG_QEMU_CONFDIR', 
get_option('prefix') / qemu_c
 config_host_data.set_quoted('CONFIG_QEMU_BUNDLE_DATADIR', qemu_datadir)
 config_host_data.set_quoted('CONFIG_QEMU_DESKTOPDIR', get_option('prefix') / 
qemu_desktopdir)
 config_host_data.set_quoted('CONFIG_QEMU_FIRMWAREPATH', get_option('prefix') / 
get_option('qemu_firmwarepath'))
-config_host_data.set_quoted('CONFIG_QEMU_HELPERDIR', get_option('prefix') / 
get_option('libexecdir'))
+config_host_data.set_quoted('CONFIG_QEMU_BUNDLE_HELPERDIR', 
get_option('libexecdir'))
 config_host_data.set_quoted('CONFIG_QEMU_BUNDLE_ICONDIR', qemu_icondir)
 config_host_data.set_quoted('CONFIG_QEMU_LOCALEDIR', get_option('prefix') / 
get_option('localedir'))
 config_host_data.set_quoted('CONFIG_QEMU_LOCALSTATEDIR', get_option('prefix') 
/ get_option('localstatedir'))
@@ -3573,6 +3573,12 @@ if have_tools
dependencies: [authz, crypto, io, qom, qemuutil,
   libcap_ng, mpathpersist],
install: true)
+
+run_command('mkdir', '-p', qemu_bundledir / get_option('libexecdir'),
+check: true)
+
+run_command('ln', '-sf', '../../qemu-bridge-helper', qemu_bundledir / 
get_option('libexecdir'),
+check: true)
   endif
 
   if have_ivshmem
diff --git a/net/tap.c b/net/tap.c
index b3ddfd4a74b..ea013ca3873 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -507,7 +507,11 @@ static int net_bridge_run_helper(const char *helper, const 
char *bridge,
 sigprocmask(SIG_BLOCK, , );
 
 if (!helper) {
-helper = default_helper = get_relocated_path(DEFAULT_BRIDGE_HELPER);
+helper = default_helper = find_bundle(DEFAULT_BUNDLE_BRIDGE_HELPER);
+if (!helper) {
+error_setg(errp, "bridge helper not found");
+return -1;
+}
 }
 
 if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
diff --git a/qemu-options.hx b/qemu-options.hx
index 377d22fbd82..1959db01061 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2665,7 +2665,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
 "to configure it and 'dfile' (default=" 
DEFAULT_NETWORK_DOWN_SCRIPT ")\n"
 "to deconfigure it\n"
 "use '[down]script=no' to disable script execution\n"
-"use network helper 'helper' (default=" 
DEFAULT_BRIDGE_HELPER ") to\n"
+"use network helper 'helper' (default=" 
DEFAULT_BUNDLE_BRIDGE_HELPER ") to\n"
 "configure it\n"
 "use 'fd=h' to connect to an already opened TAP 
interface\n"
 "use 'fds=x:y:...:z' to connect to already opened 
multiqueue capable TAP interfaces\n"
@@ -2684,7 +2684,7 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
 "-netdev bridge,id=str[,br=bridge][,helper=helper]\n"
 "configure a host TAP network backend with ID 'str' that 
is\n"
 "connected to a bridge (default=" DEFAULT_BRIDGE_INTERFACE 
")\n"
-"using the program 'helper (default=" 
DEFAULT_BRIDGE_HELPER ")\n"
+"using the program 'helper (default=" 
DEFAULT_BUNDLE_BRIDGE_HELPER ")\n"
 #endif
 #ifdef __linux__
 "-netdev 
l2tpv3,id=str,src=srcaddr,dst=dstaddr[,srcport=srcport][,dstport=dstport]\n"
-- 
2.32.1 (Apple Git-133)

RE: [RFC PATCH 00/13] Add a plugin to support out-of-band live migration for VFIO pass-through device

2022-06-14 Thread Dong, Eddie



> -Original Message-
> From: Alex Williamson 
> Sent: Wednesday, June 1, 2022 11:01 AM
> To: Dong, Eddie 
> Cc: Rao, Lei ; Tian, Kevin ; Zeng,
> Jason ; quint...@redhat.com; dgilb...@redhat.com;
> Li, Yadong ; Liu, Yi L ; qemu-
> de...@nongnu.org
> Subject: Re: [RFC PATCH 00/13] Add a plugin to support out-of-band live
> migration for VFIO pass-through device
> 
> On Wed, 1 Jun 2022 17:09:25 +
> "Dong, Eddie"  wrote:
> 
> > > -Original Message-
> > > From: Qemu-devel  > > bounces+eddie.dong=intel@nongnu.org> On Behalf Of Alex
> > > bounces+Williamson
> > > On Tue, 24 May 2022 14:18:35 +0800
> > > Lei Rao  wrote:
> > > > This proposal adopts a plugin mechanism (an example can be found
> > > > in
> > > > [1]) given that IPU/DPU vendors usually implement proprietary
> > > > migration interfaces without a standard. But we are also open if
> > > > an alternative option makes better sense, e.g. via loadable
> > > > modules (with Qemu supporting gRPC or JSON-RPC support) or an IPC
> > > > mechanism similar
> > > to vhost-user.
> > >
> > > AFAIU, QEMU is not interested in supporting plugin modules,
> > > especially proprietary ones.  I don't see that a case has really
> > > been made that this cannot be done in-band, through a vfio-pci
> > > variant driver, possibly making use of proprietary interfaces to a
> > > userspace agent if necessary (though please don't ask such to be
> > > accepted in-tree for the kernel either).  A vfio- user device server
> > > might also host such proprietary, device specific agents while
> > > supporting the standard, in-band migration interface.  Thanks,
> > >
> >
> > Thanks Alex. Removing plug-in module is not a problem.
> >
> > Do you mean to implement the migration and protocol handling inside
> > Qemu-client (probably vfio-client after the VFIO-user is merged)? Or
> > to build as part of libvfio-user? We can also build it as a separate
> > process of Qemu-server as part of Multi-Process Qemu.
> 
> AIUI, the QEMU "client" in a vfio-user configuration is simply QEMU itself.
> The vfio-user "server" provides the actual device implementation, which
> could support different license models, depending on what libraries or
> existing code is incorporated to implement that server.  The QEMU remote
> machine type is simply a QEMU-based implementation of a vfio-user server.
> The vfio-user server is analogous to a vfio-pci variant driver in the
> kernel/ioctl interface model.  The vfio-user client should be device agnostic,
> possibly with similar exceptions we have today via device specific quirk
> handling for the vfio kernel interface.
> 
> > In here, the protocol between host CPU and SoC is based on gRPC, which
> > support Rust code in client (Host CPU side here) more than C/C++. Do
> > you have any suggestion to better support Rust code with Qemu C/C++
> > code?
> 
> I'm not qualified to provide suggestions regarding Rust code integration with
> QEMU, but I think that's only required if the device specific migration
> support is on the "client".  As above, I don't think that's the correct model,
> the vfio migration protocol is meant to support any device specific
> requirements on the device end of the connection, ie. the "server" end for
> vfio-user, which can be an entirely separate, non-QEMU based process.  I
> think there are also ways to write kernel drivers in Rust, so possibly a 
> kernel
> interface vfio-pci variant driver could also work.  Thanks,
> 


Alex:
Thanks for your suggestion. Yes, agree Qemu (client) is, by nature, 
neutral to physical device knowledge.
With more thinking, it seems that:
1: Solution to have a separate kernel driver:   
The way the host CPU talking with the SoC chip of the device is 
going thru TCP/IP network, plus high level protocol (gRPC or Json..). This is 
going to be very complicated and might be hard to be accepted by community.

2: Implement as a full qemu-server device model.
This way works if we implement a full device model in 
vfio-user, but given that the device (NVME for now) works in VFIO passthru mode 
for performance, the issue Kevin Tian raised in another email is a real concern 
too.

3: Implement partial (or supplemental) feature in Qemu-server device 
model.
This solution defines a Qemu/VFIO migration interface between 
the client and server for migration.  Client migration-proxy uses hardware 
transparent interface to talk with the remote-migration server. The remote 
server may manage device-specific (protocol specific to be more precisely) 
components to talk with different hardware backend. In this case, we rely on 
today's VFIO module to manage the device and manipulate the device thru kernel 
driver. During migration, it will use the migration-proxy to get/set state 
etc...   
Users can configure the additional Qemu command line parameter 
to choose a remote migration-proxy for a VFIO device.

[PATCH v4 3/4] ui/icons: Use bundle mechanism

Signed-off-by: Akihiko Odaki 
---
 meson.build  |  2 +-
 ui/cocoa.m   | 29 -
 ui/gtk.c |  6 +-
 ui/icons/meson.build | 36 
 ui/sdl2.c| 18 +++---
 5 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/meson.build b/meson.build
index c573815813f..e7b385eaf34 100644
--- a/meson.build
+++ b/meson.build
@@ -1687,7 +1687,7 @@ config_host_data.set_quoted('CONFIG_QEMU_BUNDLE_DATADIR', 
qemu_datadir)
 config_host_data.set_quoted('CONFIG_QEMU_DESKTOPDIR', get_option('prefix') / 
qemu_desktopdir)
 config_host_data.set_quoted('CONFIG_QEMU_FIRMWAREPATH', get_option('prefix') / 
get_option('qemu_firmwarepath'))
 config_host_data.set_quoted('CONFIG_QEMU_HELPERDIR', get_option('prefix') / 
get_option('libexecdir'))
-config_host_data.set_quoted('CONFIG_QEMU_ICONDIR', get_option('prefix') / 
qemu_icondir)
+config_host_data.set_quoted('CONFIG_QEMU_BUNDLE_ICONDIR', qemu_icondir)
 config_host_data.set_quoted('CONFIG_QEMU_LOCALEDIR', get_option('prefix') / 
get_option('localedir'))
 config_host_data.set_quoted('CONFIG_QEMU_LOCALSTATEDIR', get_option('prefix') 
/ get_option('localstatedir'))
 config_host_data.set_quoted('CONFIG_QEMU_MODDIR', get_option('prefix') / 
qemu_moddir)
diff --git a/ui/cocoa.m b/ui/cocoa.m
index 84c84e98fc5..bd8a3211d3b 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -1562,21 +1562,24 @@ - (BOOL)verifyQuit
 - (IBAction) do_about_menu_item: (id) sender
 {
 NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
-char *icon_path_c = get_relocated_path(CONFIG_QEMU_ICONDIR 
"/hicolor/512x512/apps/qemu.png");
-NSString *icon_path = [NSString stringWithUTF8String:icon_path_c];
-g_free(icon_path_c);
-NSImage *icon = [[NSImage alloc] initWithContentsOfFile:icon_path];
+char *icon_path_c = find_bundle(CONFIG_QEMU_BUNDLE_ICONDIR 
"/hicolor/512x512/apps/qemu.png");
 NSString *version = @"QEMU emulator version " QEMU_FULL_VERSION;
 NSString *copyright = @QEMU_COPYRIGHT;
-NSDictionary *options;
-if (icon) {
-options = @{
-NSAboutPanelOptionApplicationIcon : icon,
-NSAboutPanelOptionApplicationVersion : version,
-@"Copyright" : copyright,
-};
-[icon release];
-} else {
+NSDictionary *options = nil;
+if (icon_path_c) {
+NSString *icon_path = [NSString stringWithUTF8String:icon_path_c];
+g_free(icon_path_c);
+NSImage *icon = [[NSImage alloc] initWithContentsOfFile:icon_path];
+if (icon) {
+options = @{
+NSAboutPanelOptionApplicationIcon : icon,
+NSAboutPanelOptionApplicationVersion : version,
+@"Copyright" : copyright,
+};
+[icon release];
+}
+}
+if (!options) {
 options = @{
 NSAboutPanelOptionApplicationVersion : version,
 @"Copyright" : copyright,
diff --git a/ui/gtk.c b/ui/gtk.c
index 2a791dd2aa0..8f7afe795f4 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -2321,7 +2321,11 @@ static void gtk_display_init(DisplayState *ds, 
DisplayOptions *opts)
 s->opts = opts;
 
 theme = gtk_icon_theme_get_default();
-dir = get_relocated_path(CONFIG_QEMU_ICONDIR);
+dir = find_bundle(CONFIG_QEMU_BUNDLE_ICONDIR);
+if (dir) {
+gtk_icon_theme_prepend_search_path(theme, dir);
+g_free(dir);
+}
 gtk_icon_theme_prepend_search_path(theme, dir);
 g_free(dir);
 g_set_prgname("qemu");
diff --git a/ui/icons/meson.build b/ui/icons/meson.build
index 12c52080ebd..23292773074 100644
--- a/ui/icons/meson.build
+++ b/ui/icons/meson.build
@@ -1,13 +1,33 @@
+icons = [
+  {
+'source': 'qemu_32x32.bmp',
+'install': 'hicolor' / '32x32' / 'apps' / 'qemu.bmp',
+  },
+  {
+'source': 'qemu.svg',
+'install': 'hicolor' / 'scalable' / 'apps' / 'qemu.svg',
+  },
+]
+
 foreach s: [16, 24, 32, 48, 64, 128, 256, 512]
   s = '@0@x@0@'.format(s.to_string())
-  install_data('qemu_@0@.png'.format(s),
-   rename: 'qemu.png',
-   install_dir: qemu_icondir / 'hicolor' / s / 'apps')
+  icons += {
+'source': 'qemu_@0@.png'.format(s),
+'install': 'hicolor' / s / 'apps' / 'qemu.png',
+  }
 endforeach
 
-install_data('qemu_32x32.bmp',
- rename: 'qemu.bmp',
- install_dir: qemu_icondir / 'hicolor' / '32x32' / 'apps')
+foreach icon: icons
+  source = icon.get('source')
+  install = icon.get('install')
+
+  install_data(source,
+   rename: fs.name(install),
+   install_dir: qemu_icondir / fs.parent(install))
 
-install_data('qemu.svg',
- install_dir: qemu_icondir / 'hicolor' / 'scalable' / 'apps')
+  run_command('mkdir', '-p', qemu_bundledir / qemu_icondir / 
fs.parent(install),
+  check: true)
+
+  run_command('ln', '-sf', meson.current_source_dir() / source, qemu_bundledir 
/ qemu_icondir / install,
+

[PATCH v4 2/4] datadir: Use bundle mechanism

softmmu/datadir.c had its own implementation to find files in the
build tree, but now bundle mechanism provides the unified
implementation which works for datadir and the other files.

Signed-off-by: Akihiko Odaki 
---
 .travis.yml |  2 +-
 meson.build |  3 ++-
 pc-bios/keymaps/meson.build |  3 +++
 pc-bios/meson.build | 17 +
 scripts/oss-fuzz/build.sh   |  2 +-
 softmmu/datadir.c   | 35 ---
 tests/qtest/fuzz/fuzz.c | 15 ---
 tests/vm/fedora |  2 +-
 tests/vm/freebsd|  2 +-
 tests/vm/netbsd |  2 +-
 tests/vm/openbsd|  2 +-
 11 files changed, 32 insertions(+), 53 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 9afc4a54b8f..9fee2167b95 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -223,7 +223,7 @@ jobs:
 - BUILD_RC=0 && make -j${JOBS} || BUILD_RC=$?
 - |
   if [ "$BUILD_RC" -eq 0 ] ; then
-  mv pc-bios/s390-ccw/*.img pc-bios/ ;
+  mv pc-bios/s390-ccw/*.img qemu-bundle/share/qemu ;
   ${TEST_CMD} ;
   else
   $(exit $BUILD_RC);
diff --git a/meson.build b/meson.build
index 0c2e11ff071..c573815813f 100644
--- a/meson.build
+++ b/meson.build
@@ -32,6 +32,7 @@ if get_option('qemu_suffix').startswith('/')
   error('qemu_suffix cannot start with a /')
 endif
 
+qemu_bundledir = meson.project_build_root() / 'qemu-bundle'
 qemu_confdir = get_option('sysconfdir') / get_option('qemu_suffix')
 qemu_datadir = get_option('datadir') / get_option('qemu_suffix')
 qemu_docdir = get_option('docdir') / get_option('qemu_suffix')
@@ -1682,7 +1683,7 @@ endif
 config_host_data.set_quoted('CONFIG_BINDIR', get_option('prefix') / 
get_option('bindir'))
 config_host_data.set_quoted('CONFIG_PREFIX', get_option('prefix'))
 config_host_data.set_quoted('CONFIG_QEMU_CONFDIR', get_option('prefix') / 
qemu_confdir)
-config_host_data.set_quoted('CONFIG_QEMU_DATADIR', get_option('prefix') / 
qemu_datadir)
+config_host_data.set_quoted('CONFIG_QEMU_BUNDLE_DATADIR', qemu_datadir)
 config_host_data.set_quoted('CONFIG_QEMU_DESKTOPDIR', get_option('prefix') / 
qemu_desktopdir)
 config_host_data.set_quoted('CONFIG_QEMU_FIRMWAREPATH', get_option('prefix') / 
get_option('qemu_firmwarepath'))
 config_host_data.set_quoted('CONFIG_QEMU_HELPERDIR', get_option('prefix') / 
get_option('libexecdir'))
diff --git a/pc-bios/keymaps/meson.build b/pc-bios/keymaps/meson.build
index 44247a12b54..b8bac138756 100644
--- a/pc-bios/keymaps/meson.build
+++ b/pc-bios/keymaps/meson.build
@@ -67,3 +67,6 @@ if native_qemu_keymap.found()
 endif
 
 install_data(['sl', 'sv'], install_dir: qemu_datadir / 'keymaps')
+
+run_command('ln', '-sf', '../../../pc-bios/keymaps', qemu_bundledir / 
qemu_datadir,
+check: true)
diff --git a/pc-bios/meson.build b/pc-bios/meson.build
index 41ba1c0ec7b..d1ff75b0b13 100644
--- a/pc-bios/meson.build
+++ b/pc-bios/meson.build
@@ -1,3 +1,5 @@
+run_command('mkdir', '-p', qemu_bundledir / qemu_datadir, check: true)
+
 roms = []
 if unpack_edk2_blobs
   fds = [
@@ -20,6 +22,9 @@ if unpack_edk2_blobs
   install: get_option('install_blobs'),
   install_dir: qemu_datadir,
   command: [ bzip2, '-dc', '@INPUT0@' ])
+
+run_command('ln', '-sf', '../../../pc-bios' / f, qemu_bundledir / 
qemu_datadir,
+check: true)
   endforeach
 endif
 
@@ -85,15 +90,11 @@ blobs = [
   'vof-nvram.bin',
 ]
 
-ln_s = [find_program('ln', required: true), '-sf']
+install_data(blobs, install_dir: qemu_datadir)
+
 foreach f : blobs
-  roms += custom_target(f,
-build_by_default: have_system,
-output: f,
-input: files('meson.build'),# dummy input
-install: get_option('install_blobs'),
-install_dir: qemu_datadir,
-command: [ ln_s, meson.project_source_root() / 'pc-bios' / f, 
'@OUTPUT@' ])
+  run_command('ln', '-sf', meson.current_source_dir() / f, qemu_bundledir / 
qemu_datadir,
+  check: true)
 endforeach
 
 subdir('descriptors')
diff --git a/scripts/oss-fuzz/build.sh b/scripts/oss-fuzz/build.sh
index 98b56e05210..cbf8b3080e9 100755
--- a/scripts/oss-fuzz/build.sh
+++ b/scripts/oss-fuzz/build.sh
@@ -88,7 +88,7 @@ if [ "$GITLAB_CI" != "true" ]; then
 fi
 
 # Copy over the datadir
-cp  -r ../pc-bios/ "$DEST_DIR/pc-bios"
+cp  -r ../pc-bios/ "$DEST_DIR/qemu-bundle/share/qemu"
 
 targets=$(./qemu-fuzz-i386 | awk '$1 ~ /\*/  {print $2}')
 base_copy="$DEST_DIR/qemu-fuzz-i386-target-$(echo "$targets" | head -n 1)"
diff --git a/softmmu/datadir.c b/softmmu/datadir.c
index 160cac999a6..4dadf0e010c 100644
--- a/softmmu/datadir.c
+++ b/softmmu/datadir.c
@@ -35,6 +35,7 @@ char *qemu_find_file(int type, const char *name)
 int i;
 const char *subdir;
 char *buf;
+char *bundle;
 
 /* Try the name as a straight path first */
 if

[PATCH v4 1/4] cutils: Introduce bundle mechanism

Developers often run QEMU without installing. The bundle mechanism
allows to look up files which should be present in installation even in
such a situation.

It is a general mechanism and can find any files located relative
to the installation tree. The build tree must have a new directory,
qemu-bundle, to represent what files the installation tree would
have for reference by the executables.

Signed-off-by: Akihiko Odaki 
---
 include/qemu/cutils.h | 19 +++
 util/cutils.c | 33 +
 2 files changed, 52 insertions(+)

diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index 40e10e19a7e..3b66026cd3c 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -213,6 +213,25 @@ const char *qemu_get_exec_dir(void);
  */
 char *get_relocated_path(const char *dir);
 
+/**
+ * find_bundle:
+ * @path: Relative path
+ *
+ * Returns a path for the specified directory or file bundled in QEMU. It uses
+ * the directory of the running executable as the prefix first. See
+ * get_relocated_path() for the details. The next candidate is "qemu-bundle"
+ * directory in the directory of the running executable. "qemu-bundle"
+ * directory is typically present in the build tree.
+ *
+ * The returned string should be freed by the caller.
+ *
+ * Returns: a path that can access the bundle, or NULL if no matching bundle
+ * exists.
+ */
+char *find_bundle(const char *path);
+
+void list_bundle_candidates(const char *path);
+
 static inline const char *yes_no(bool b)
 {
  return b ? "yes" : "no";
diff --git a/util/cutils.c b/util/cutils.c
index a58bcfd80e7..fe3bbb1c4eb 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -1086,3 +1086,36 @@ char *get_relocated_path(const char *dir)
 }
 return g_string_free(result, false);
 }
+
+static const char * const bundle_formats[] = {
+"%s" G_DIR_SEPARATOR_S ".." G_DIR_SEPARATOR_S "%s",
+"%s" G_DIR_SEPARATOR_S "qemu-bundle" G_DIR_SEPARATOR_S "%s"
+};
+
+char *find_bundle(const char *path)
+{
+const char *dir = qemu_get_exec_dir();
+char *candidate;
+int i;
+
+for (i = 0; i < ARRAY_SIZE(bundle_formats); i++) {
+candidate = g_strdup_printf(bundle_formats[i], dir, path);
+if (access(candidate, R_OK) == 0) {
+return candidate;
+}
+g_free(candidate);
+}
+
+return NULL;
+}
+
+void list_bundle_candidates(const char *path)
+{
+const char *dir = qemu_get_exec_dir();
+int i;
+
+for (i = 0; i < ARRAY_SIZE(bundle_formats); i++) {
+printf(bundle_formats[i], dir, path);
+putc('\n', stdout);
+}
+}
-- 
2.32.1 (Apple Git-133)

[PATCH v4 0/4] cutils: Introduce bundle mechanism

Developers often run QEMU without installing. The bundle mechanism
allows to look up files which should be present in installation even in
such a situation.

It is a general mechanism and can find any files located relative
to the installation tree. The build tree must have a new directory,
qemu-bundle, to represent what files the installation tree would
have for reference by the executables.

v4:
* Add Daniel P. Berrangé to CC. Hopefully this helps merging his patch:
  https://mail.gnu.org/archive/html/qemu-devel/2022-06/msg02276.html
* Rebased to the latest QEMU.

v3:
* Note that the bundle mechanism is for any files located relative to the
  installation tree including but not limited to datadir. (Peter Maydell)
* Fix "bridge" typo (Philippe Mathieu-Daudé)

v2: Rebased to the latest QEMU.

Akihiko Odaki (4):
  cutils: Introduce bundle mechanism
  datadir: Use bundle mechanism
  ui/icons: Use bundle mechanism
  net: Use bundle mechanism

 .travis.yml |  2 +-
 include/net/net.h   |  2 +-
 include/qemu/cutils.h   | 19 +++
 meson.build | 13 ++---
 net/tap.c   |  6 +-
 pc-bios/keymaps/meson.build |  3 +++
 pc-bios/meson.build | 17 +
 qemu-options.hx |  4 ++--
 scripts/oss-fuzz/build.sh   |  2 +-
 softmmu/datadir.c   | 35 ---
 tests/qtest/fuzz/fuzz.c | 15 ---
 tests/vm/fedora |  2 +-
 tests/vm/freebsd|  2 +-
 tests/vm/netbsd |  2 +-
 tests/vm/openbsd|  2 +-
 ui/cocoa.m  | 29 -
 ui/gtk.c|  6 +-
 ui/icons/meson.build| 36 
 ui/sdl2.c   | 18 +++---
 util/cutils.c   | 33 +
 20 files changed, 160 insertions(+), 88 deletions(-)

-- 
2.32.1 (Apple Git-133)

Re: [PATCH v6 0/8] KVM: mm: fd-based approach for supporting KVM guest private memory

2022-06-14 Thread Andy Lutomirski

On Tue, Jun 14, 2022 at 12:09 PM Sean Christopherson  wrote:
>
> On Tue, Jun 14, 2022, Andy Lutomirski wrote:
> > On Tue, Jun 14, 2022 at 12:32 AM Chao Peng  
> > wrote:
> > >
> > > On Thu, Jun 09, 2022 at 08:29:06PM +, Sean Christopherson wrote:
> > > > On Wed, Jun 08, 2022, Vishal Annapurve wrote:
> > > >
> > > > One argument is that userspace can simply rely on cgroups to detect 
> > > > misbehaving
> > > > guests, but (a) those types of OOMs will be a nightmare to debug and 
> > > > (b) an OOM
> > > > kill from the host is typically considered a _host_ issue and will be 
> > > > treated as
> > > > a missed SLO.
> > > >
> > > > An idea for handling this in the kernel without too much complexity 
> > > > would be to
> > > > add F_SEAL_FAULT_ALLOCATIONS (terrible name) that would prevent page 
> > > > faults from
> > > > allocating pages, i.e. holes can only be filled by an explicit 
> > > > fallocate().  Minor
> > > > faults, e.g. due to NUMA balancing stupidity, and major faults due to 
> > > > swap would
> > > > still work, but writes to previously unreserved/unallocated memory 
> > > > would get a
> > > > SIGSEGV on something it has mapped.  That would allow the userspace VMM 
> > > > to prevent
> > > > unintentional allocations without having to coordinate 
> > > > unmapping/remapping across
> > > > multiple processes.
> > >
> > > Since this is mainly for shared memory and the motivation is catching
> > > misbehaved access, can we use mprotect(PROT_NONE) for this? We can mark
> > > those range backed by private fd as PROT_NONE during the conversion so
> > > subsequence misbehaved accesses will be blocked instead of causing double
> > > allocation silently.
>
> PROT_NONE, a.k.a. mprotect(), has the same vma downsides as munmap().
>
> > This patch series is fairly close to implementing a rather more
> > efficient solution.  I'm not familiar enough with hypervisor userspace
> > to really know if this would work, but:
> >
> > What if shared guest memory could also be file-backed, either in the
> > same fd or with a second fd covering the shared portion of a memslot?
> > This would allow changes to the backing store (punching holes, etc) to
> > be some without mmap_lock or host-userspace TLB flushes?  Depending on
> > what the guest is doing with its shared memory, userspace might need
> > the memory mapped or it might not.
>
> That's what I'm angling for with the F_SEAL_FAULT_ALLOCATIONS idea.  The 
> issue,
> unless I'm misreading code, is that punching a hole in the shared memory 
> backing
> store doesn't prevent reallocating that hole on fault, i.e. a helper process 
> that
> keeps a valid mapping of guest shared memory can silently fill the hole.
>
> What we're hoping to achieve is a way to prevent allocating memory without a 
> very
> explicit action from userspace, e.g. fallocate().

Ah, I misunderstood.  I thought your goal was to mmap it and prevent
page faults from allocating.

It is indeed the case (and has been since before quite a few of us
were born) that a hole in a sparse file is logically just a bunch of
zeros.  A way to make a file for which a hole is an actual hole seems
like it would solve this problem nicely.  It could also be solved more
specifically for KVM by making sure that the private/shared mode that
userspace programs is strict enough to prevent accidental allocations
-- if a GPA is definitively private, shared, neither, or (potentially,
on TDX only) both, then a page that *isn't* shared will never be
accidentally allocated by KVM.  If the shared backing is not mmapped,
it also won't be accidentally allocated by host userspace on a stray
or careless write.


--Andy

Re: [PATCH v6 3/8] mm/memfd: Introduce MFD_INACCESSIBLE flag

2022-06-14 Thread Sean Christopherson

On Thu, Jun 02, 2022, Chao Peng wrote:
> On Wed, Jun 01, 2022 at 02:11:42PM +0200, Gupta, Pankaj wrote:
> > 
> > > > > Introduce a new memfd_create() flag indicating the content of the
> > > > > created memfd is inaccessible from userspace through ordinary MMU
> > > > > access (e.g., read/write/mmap). However, the file content can be
> > > > > accessed via a different mechanism (e.g. KVM MMU) indirectly.
> > > > > 
> > > > 
> > > > SEV, TDX, pkvm and software-only VMs seem to have usecases to set up
> > > > initial guest boot memory with the needed blobs.
> > > > TDX already supports a KVM IOCTL to transfer contents to private
> > > > memory using the TDX module but rest of the implementations will need
> > > > to invent
> > > > a way to do this.
> > > 
> > > There are some discussions in 
> > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flkml.org%2Flkml%2F2022%2F5%2F9%2F1292data=05%7C01%7Cpankaj.gupta%40amd.com%7Cb81ef334e2dd44c6143308da43b87d17%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637896756895977587%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7Csdata=oQbM2Hj7GlhJTwnTM%2FPnwsfJlmTL7JR9ULBysAqm6V8%3Dreserved=0
> > > already. I somehow agree with Sean. TDX is using an dedicated ioctl to
> > > copy guest boot memory to private fd so the rest can do that similarly.
> > > The concern is the performance (extra memcpy) but it's trivial since the
> > > initial guest payload is usually optimized in size.
> > > 
> > > > 
> > > > Is there a plan to support a common implementation for either allowing
> > > > initial write access from userspace to private fd or adding a KVM
> > > > IOCTL to transfer contents to such a file,
> > > > as part of this series through future revisions?
> > > 
> > > Indeed, adding pre-boot private memory populating on current design
> > > isn't impossible, but there are still some opens, e.g. how to expose
> > > private fd to userspace for access, pKVM and CC usages may have
> > > different requirements. Before that's well-studied I would tend to not
> > > add that and instead use an ioctl to copy. Whether we need a generic
> > > ioctl or feature-specific ioctl, I don't have strong opinion here.
> > > Current TDX uses a feature-specific ioctl so it's not covered in this
> > > series.
> > 
> > Common function or ioctl to populate preboot private memory actually makes
> > sense.
> > 
> > Sorry, did not follow much of TDX code yet, Is it possible to filter out
> > the current TDX specific ioctl to common function so that it can be used by
> > other technologies?
> 
> TDX code is here:
> https://patchwork.kernel.org/project/kvm/patch/70ed041fd47c1f7571aa259450b3f9244edda48d.1651774250.git.isaku.yamah...@intel.com/
> 
> AFAICS It might be possible to filter that out to a common function. But
> would like to hear from Paolo/Sean for their opinion.

Eh, I wouldn't put too much effort into creating a common helper, I would be 
very
surprised if TDX and SNP can share a meaningful amount of code that isn't 
already
shared, e.g. provided by MMU helpers.

The only part I truly care about sharing is whatever ioctl(s) get added, i.e. I
don't want to end up with two ioctls that do the same thing for TDX vs. SNP.

[PATCH] hw:m25p80: Add STATE_STANDBY command state

2022-06-14 Thread Dan Zhang

HW normally will switch it to stand by mode when receive incorrect
command.
i.e. Macronix MX66L1G45G data sheet section 8 DEVICE OPERATION described
```
2. When an incorrect command is written to this device, it enters
standby mode and stays in standby mode until the next CS# falling edge.
In standby mode, This device's SO pin should be High-Z.
```
Add STATE_STANDBY CMDState and let the device ignore all input and keep
SO as HIZ (output 1)

Signed-off-by: Dan Zhang 
---
A usage of this new state can be aborting in HPM checking 
or unknown command code received.

 hw/block/m25p80.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index b6bd430a99..9f89773b11 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -423,6 +423,7 @@ typedef enum {
 STATE_COLLECTING_DATA,
 STATE_COLLECTING_VAR_LEN_DATA,
 STATE_READING_DATA,
+STATE_STANDBY,
 } CMDState;
 
 typedef enum {
@@ -1472,6 +1473,9 @@ static uint32_t m25p80_transfer8(SSIPeripheral *ss, 
uint32_t tx)
   s->cur_addr, (uint8_t)tx);
 
 switch (s->state) {
+case STATE_STANDBY:
+r = 0x; /* StandBy state SO shall be HiZ */
+break;
 
 case STATE_PAGE_PROGRAM:
 trace_m25p80_page_program(s, s->cur_addr, (uint8_t)tx);
-- 
2.34.3

Re: [PATCH v6 0/8] KVM: mm: fd-based approach for supporting KVM guest private memory

2022-06-14 Thread Sean Christopherson

On Tue, Jun 14, 2022, Andy Lutomirski wrote:
> On Tue, Jun 14, 2022 at 12:32 AM Chao Peng  
> wrote:
> >
> > On Thu, Jun 09, 2022 at 08:29:06PM +, Sean Christopherson wrote:
> > > On Wed, Jun 08, 2022, Vishal Annapurve wrote:
> > >
> > > One argument is that userspace can simply rely on cgroups to detect 
> > > misbehaving
> > > guests, but (a) those types of OOMs will be a nightmare to debug and (b) 
> > > an OOM
> > > kill from the host is typically considered a _host_ issue and will be 
> > > treated as
> > > a missed SLO.
> > >
> > > An idea for handling this in the kernel without too much complexity would 
> > > be to
> > > add F_SEAL_FAULT_ALLOCATIONS (terrible name) that would prevent page 
> > > faults from
> > > allocating pages, i.e. holes can only be filled by an explicit 
> > > fallocate().  Minor
> > > faults, e.g. due to NUMA balancing stupidity, and major faults due to 
> > > swap would
> > > still work, but writes to previously unreserved/unallocated memory would 
> > > get a
> > > SIGSEGV on something it has mapped.  That would allow the userspace VMM 
> > > to prevent
> > > unintentional allocations without having to coordinate 
> > > unmapping/remapping across
> > > multiple processes.
> >
> > Since this is mainly for shared memory and the motivation is catching
> > misbehaved access, can we use mprotect(PROT_NONE) for this? We can mark
> > those range backed by private fd as PROT_NONE during the conversion so
> > subsequence misbehaved accesses will be blocked instead of causing double
> > allocation silently.

PROT_NONE, a.k.a. mprotect(), has the same vma downsides as munmap().
 
> This patch series is fairly close to implementing a rather more
> efficient solution.  I'm not familiar enough with hypervisor userspace
> to really know if this would work, but:
> 
> What if shared guest memory could also be file-backed, either in the
> same fd or with a second fd covering the shared portion of a memslot?
> This would allow changes to the backing store (punching holes, etc) to
> be some without mmap_lock or host-userspace TLB flushes?  Depending on
> what the guest is doing with its shared memory, userspace might need
> the memory mapped or it might not.

That's what I'm angling for with the F_SEAL_FAULT_ALLOCATIONS idea.  The issue,
unless I'm misreading code, is that punching a hole in the shared memory backing
store doesn't prevent reallocating that hole on fault, i.e. a helper process 
that
keeps a valid mapping of guest shared memory can silently fill the hole.

What we're hoping to achieve is a way to prevent allocating memory without a 
very
explicit action from userspace, e.g. fallocate().

Re: [PATCH v2] docs: add PCIe root bus for VGA compat guideline

2022-06-14 Thread Kevin Locke

On Tue, 2022-06-14 at 10:52 +0200, Gerd Hoffmann wrote:
>> On 06/12/22 19:32, Kevin Locke wrote:
>>> PCI Express devices which use legacy VGA compatibility should be placed
>>> on the Root Complex.  This simplifies ioport access to VGA registers,
>>> which requires use of a special exception bit to work across PCI(e)
>>> bridges.  It is also necessary for ioport access to VESA BIOS Extension
>>> (VBE) registers, which is not forwarded over PCI(e) bridges, even with
>>> the special exception bit for VGA register access.[1]
>>> 
>>> Update the PCI Express Guidelines to add these to the list of devices
>>> which can be placed directly on the Root Complex.
>>> 
>>> Note that the only PCI Express display devices currently supported
>>> (bochs-display and virtio-gpu-pci) do not offer VGA compatibility.
>>> Legacy PCI devices (e.g. vga, qxl-vga, virtio-vga) are already
>>> documented as allowed on the Root Complex by the first item in the list.
>>> However, this item documents an additional consideration for placing
>>> devices which was not previously mentioned, and may be relevant for PCIe
>>> devices offering VGA compatibility in the future.
> 
> Well, the *key* problem is emulated VGA devices with VBE registers in
> io address space, because those are not forwarded over bridges.
> 
> For normal VGA registers this isn't much of a problem (in theory, not
> fully sure whenever that holds in practice, Alex?).  The linux kernel
> knows how to use the bridge control register to manage access to VGA
> registers.
> 
> So, if the document already covers vga & qxl & virtio-vga (didn't check
> that beforehand) I'm not sure we actually need an update ...

Section 2.1 Root Bus mentions attaching legacy PCI devices to the Root
Complex.  VGA/qxl-vga/virtio-vga are implicitly included (if the
reader is aware they are PCI, not PCIe), but they are not specifically
mentioned in the document.  By my reading, the document does not
recommend for or against attaching legacy PCI devices to the Root
Complex, other than noting hot-unplugging from the Root Complex is not
supported (in Section 2.3) and the general advice to prefer flat
hierarchies.

There is currently no mention of VGA or VBE in the document.

I think documenting the issue with VBE registers would be helpful.
Doing so with a recommendation for how to avoid the issue seems even
better.  Would a recommendation to attach a device which supports VBE
to the Root Complex if VBE will be used by the guest make sense?

As you noted, applying the recommendation to all VGA compatible
devices may be too broad.  I'm not sure whether it makes sense to
recommend attaching VGA compatible devices to the Root Complex to
avoid the complexity of the VGA exception bits, or if that is a
non-issue.  In fact, if I understand correctly, it may make sense to
recommend attaching VGA compatible devices to separate PCI bridges if
the VM will have multiple VGA compatible devices so that the guest can
perform VGA arbitration.

Unless I hear otherwise, I'll plan to create a v4 which documents the
issue with VBE registers more specifically.  Any suggestions for how
best to do that would be appreciated.

Cheers,
Kevin

New "IndustryStandard" fw_cfg?

2022-06-14 Thread Dionna Amalie Glaze

Hi y'all, I'm Dionna. I work on Confidential VMs at Google Cloud. I've
been keeping up with the TDX and SEV-SNP developments in OVMF and
Linux, and some in Qemu.

There's a new UEFI feature in v2.9 of the specification (March 2021)
that allows for memory ranges to be classified as "unaccepted", since
both TDX and SEV-SNP require that the guest VM accept any host-made
changes to page state. We should expect newer technologies on non-x86
architectures to require memory acceptance as well. Operating systems
are not necessarily going to support this memory type, however.

This leads to a problem: how does the UEFI know that the OS it's going
to boot will support unaccepted memory? Right now we (Google Compute
Engine) have a system of "tagging" for guest image providers to state
that their OS supports some new feature so that we can enable
appropriate configurations for certain images.

I could go about adding a Google-specific fw_cfg file path and
definition to tell our custom OVMF build to use unaccepted memory or
not, but I personally prefer open source. I don't know y'all's process
though, so I'm asking before making a patch set.

There are two approaches I've considered.

1. An arch-specific config key for a u64 value:

The idea would be that I would add QemuFwCfgItemUnacceptedMinimum = 0x8005 here
https://github.com/tianocore/edk2/blob/master/OvmfPkg/Include/IndustryStandard/QemuFwCfg.h#L50

For Qemu, the main code I see for adding config is here, but I'm not
sure what y'all's preferred external configuration method is to get a
value from an invocation (flag, config file, etc) to fw_cfg.c:
https://github.com/qemu/qemu/blob/58b53669e87fed0d70903e05cd42079fbbdbc195/hw/i386/fw_cfg.c#L95

We'd add something like

fw_cfg_add_u64(fw_cfg, FW_CFG_MINIMUM_ACCEPTED_MEMORY_SIZE,
ms->minimum_accepted_memory_size);

where FW_CFG_MINIMUM_ACCEPTED_MEMORY_SIZE is #defined as
FW_CFG_ARCH_LOCAL + 5 in
https://github.com/qemu/qemu/blob/266469947161aa10b1d36843580d369d5aa38589/hw/i386/fw_cfg.h

The name has "minimum" in it since the firmware can choose to accept
more than the minimum, and specifically interpret 0 as UINT64_MAX.

2. A "well-known" file path to be included in the file slots starting
at 0x0020, such as "etc/min_accepted_mem_size", still plumbed through
like in 1.

Thanks!

--
-Dionna Glaze, PhD (she/her)

Re: [PULL 00/10] Block jobs & NBD patches


On 6/14/22 03:29, Vladimir Sementsov-Ogievskiy wrote:

The following changes since commit debd0753663bc89c86f5462a53268f2e3f680f60:

   Merge tag 'pull-testing-next-140622-1' of https://github.com/stsquad/qemu 
into staging (2022-06-13 21:10:57 -0700)

are available in the Git repository at:

   https://gitlab.com/vsementsov/qemu.git tags/pull-block-2022-06-14

for you to fetch changes up to 5aef6747a250f545ff53ba7e1a3ed7a3d166011a:

   MAINTAINERS: update Vladimir's address and repositories (2022-06-14 12:51:48 
+0300)


Block jobs & NBD patches

- add new options for copy-before-write filter
- new trace points for NBD
- prefer unsigned type for some 'in_flight' fields
- update my addresses in MAINTAINERS (already in Stefan's tree, but
   I think it's OK to send it with this PULL)


Note also, that I've recently updated my pgp key with new address and
new expire time.
Updated key is here: 
https://keys.openpgp.org/search?q=vsementsov%40yandex-team.ru


This introduces or exposes new timeouts:

https://gitlab.com/qemu-project/qemu/-/pipelines/563590515/failures


r~

Re: [PATCH v6 0/8] KVM: mm: fd-based approach for supporting KVM guest private memory

2022-06-14 Thread Andy Lutomirski

On Tue, Jun 14, 2022 at 12:32 AM Chao Peng  wrote:
>
> On Thu, Jun 09, 2022 at 08:29:06PM +, Sean Christopherson wrote:
> > On Wed, Jun 08, 2022, Vishal Annapurve wrote:
> >
> > One argument is that userspace can simply rely on cgroups to detect 
> > misbehaving
> > guests, but (a) those types of OOMs will be a nightmare to debug and (b) an 
> > OOM
> > kill from the host is typically considered a _host_ issue and will be 
> > treated as
> > a missed SLO.
> >
> > An idea for handling this in the kernel without too much complexity would 
> > be to
> > add F_SEAL_FAULT_ALLOCATIONS (terrible name) that would prevent page faults 
> > from
> > allocating pages, i.e. holes can only be filled by an explicit fallocate(). 
> >  Minor
> > faults, e.g. due to NUMA balancing stupidity, and major faults due to swap 
> > would
> > still work, but writes to previously unreserved/unallocated memory would 
> > get a
> > SIGSEGV on something it has mapped.  That would allow the userspace VMM to 
> > prevent
> > unintentional allocations without having to coordinate unmapping/remapping 
> > across
> > multiple processes.
>
> Since this is mainly for shared memory and the motivation is catching
> misbehaved access, can we use mprotect(PROT_NONE) for this? We can mark
> those range backed by private fd as PROT_NONE during the conversion so
> subsequence misbehaved accesses will be blocked instead of causing double
> allocation silently.

This patch series is fairly close to implementing a rather more
efficient solution.  I'm not familiar enough with hypervisor userspace
to really know if this would work, but:

What if shared guest memory could also be file-backed, either in the
same fd or with a second fd covering the shared portion of a memslot?
This would allow changes to the backing store (punching holes, etc) to
be some without mmap_lock or host-userspace TLB flushes?  Depending on
what the guest is doing with its shared memory, userspace might need
the memory mapped or it might not.

--Andy

Re: [PATCH v2 07/11] vfio/migration: Implement VFIO migration protocol v2

2022-06-14 Thread Joao Martins




On 6/14/22 17:34, Avihai Horon wrote:
> 
> On 6/14/2022 2:08 PM, Joao Martins wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> On 5/30/22 18:07, Avihai Horon wrote:
>>> +static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
>>> +{
>>> +VFIODevice *vbasedev = opaque;
>>> +enum vfio_device_mig_state recover_state;
>>> +int ret;
>>> +
>>> +/* We reach here with device state STOP or STOP_COPY only */
>>> +recover_state = VFIO_DEVICE_STATE_STOP;
>>> +ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
>>> +   recover_state);
>>> +if (ret) {
>>> +return ret;
>>> +}
>>> +
>>> +do {
>>> +ret = vfio_save_block(f, vbasedev->migration);
>>> +if (ret < 0) {
>>> +return ret;
>>> +}
>>> +} while (!ret);
>>> +
>>> +qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
>>> +ret = qemu_file_get_error(f);
>>> +if (ret) {
>>> +return ret;
>>> +}
>>> +
>>> +ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP,
>>> +   recover_state);
>> Is it expected that you are setting VFIO_DEVICE_STATE_STOP while
>> @recover_state is the same value (VFIO_DEVICE_STATE_STOP) ?
> 
> 
> Yes.
> Transitioning to any other state from STOP_COPY will first go through 
> STOP state (this is done internally by kernel).
> So there is no better option for the recover state but STOP.
> 
I was think about ERROR state given that you can transition there
from any state, but wasn't quite sure if it's appropriate to make that arc
while in stop copy migration phase.

>>> +if (ret) {
>>> +return ret;
>>> +}
>>> +
>>> +trace_vfio_save_complete_precopy(vbasedev->name);
>>> +
>>> +return 0;

just a cosmetic nit: you could probably rewrite these last couple of lines as:

if (!ret) {
trace_vfio_save_complete_precopy(vbasedev->name);
}

return ret;

Let's you avoid the double return path.

>>> +}
>>> +
>>>   static int vfio_v1_save_complete_precopy(QEMUFile *f, void *opaque)
>>>   {
>>>   VFIODevice *vbasedev = opaque;
>>> @@ -593,6 +775,14 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
>>>   }
>>>   }
>>>
>>> +static int vfio_load_setup(QEMUFile *f, void *opaque)
>>> +{
>>> +VFIODevice *vbasedev = opaque;
>>> +
>>> +return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
>>> +   vbasedev->migration->device_state);
>>> +}
>>> +
>>>   static int vfio_v1_load_setup(QEMUFile *f, void *opaque)
>>>   {
>>>   VFIODevice *vbasedev = opaque;
>>> @@ -620,6 +810,15 @@ static int vfio_v1_load_setup(QEMUFile *f, void 
>>> *opaque)
>>>   return ret;
>>>   }
>>>
>>> +static int vfio_load_cleanup(void *opaque)
>>> +{
>>> +VFIODevice *vbasedev = opaque;
>>> +
>>> +vfio_migration_cleanup(vbasedev);
>>> +trace_vfio_load_cleanup(vbasedev->name);
>>> +return 0;
>>> +}
>>> +
>>>   static int vfio_v1_load_cleanup(void *opaque)
>>>   {
>>>   VFIODevice *vbasedev = opaque;
>>> @@ -662,7 +861,11 @@ static int vfio_load_state(QEMUFile *f, void *opaque, 
>>> int version_id)
>>>   uint64_t data_size = qemu_get_be64(f);
>>>
>>>   if (data_size) {
>>> -ret = vfio_v1_load_buffer(f, vbasedev, data_size);
>>> +if (vbasedev->migration->v2) {
>>> +ret = vfio_load_buffer(f, vbasedev, data_size);
>>> +} else {
>>> +ret = vfio_v1_load_buffer(f, vbasedev, data_size);
>>> +}
>>>   if (ret < 0) {
>>>   return ret;
>>>   }
>>> @@ -683,6 +886,16 @@ static int vfio_load_state(QEMUFile *f, void *opaque, 
>>> int version_id)
>>>   return ret;
>>>   }
>>>
>>> +static SaveVMHandlers savevm_vfio_handlers = {
>>> +.save_setup = vfio_save_setup,
>>> +.save_cleanup = vfio_save_cleanup,
>>> +.save_live_complete_precopy = vfio_save_complete_precopy,
>>> +.save_state = vfio_save_state,
>>> +.load_setup = vfio_load_setup,
>>> +.load_cleanup = vfio_load_cleanup,
>>> +.load_state = vfio_load_state,
>>> +};
>>> +
>>>   static SaveVMHandlers savevm_vfio_v1_handlers = {
>>>   .save_setup = vfio_v1_save_setup,
>>>   .save_cleanup = vfio_v1_save_cleanup,
>>> @@ -697,6 +910,34 @@ static SaveVMHandlers savevm_vfio_v1_handlers = {
>>>
>>>   /* -- 
>>> */
>>>
>>> +static void vfio_vmstate_change(void *opaque, bool running, RunState state)
>>> +{
>>> +VFIODevice *vbasedev = opaque;
>>> +enum vfio_device_mig_state new_state;
>>> +int ret;
>>> +
>>> +if (running) {
>>> +new_state = VFIO_DEVICE_STATE_RUNNING;
>>> +} else {
>>> +new_state = VFIO_DEVICE_STATE_STOP;
>>> +}
>>> +
>>> +ret =

Re: [PATCH] hw:w25p80: Add STATE_STANDBY to handle incorrect command

2022-06-14 Thread Dan Zhang

Hi Cedric,

I am sorry that accidently submit a pre-view code change as a patch using the
git-sendmail. 
I originally mean to copy the following code in email reply and let
commnity get better understand my proposal.

Let me submit a formal patch in seperate thread. And will remove the
code using this STATE_STANDBY state, as those code shall be in @iris WP#
patch.

BRs
Dan

On Tue, Jun 14, 2022 at 09:02:46AM -0700, Dan Zhang wrote:
> ---
>  hw/block/m25p80.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
> index b6bd430a99..3bb0466dca 100644
> --- a/hw/block/m25p80.c
> +++ b/hw/block/m25p80.c
> @@ -423,6 +423,7 @@ typedef enum {
>  STATE_COLLECTING_DATA,
>  STATE_COLLECTING_VAR_LEN_DATA,
>  STATE_READING_DATA,
> +STATE_STANDBY,
>  } CMDState;
>  
>  typedef enum {
> @@ -1218,6 +1219,9 @@ static void decode_new_cmd(Flash *s, uint32_t value)
>  || !s->write_enable) {
>  qemu_log_mask(LOG_GUEST_ERROR,
>"M25P80: Status register write is disabled!\n");
> + qemu_log_mask(LOG_GUEST_ERROR,
> +  "M25P80: switch to standby, re-aseert CS to 
> reactivate \n");
> + s->state = STATE_STANDBY;
>  break;
>  }
>  
> @@ -1472,6 +1476,9 @@ static uint32_t m25p80_transfer8(SSIPeripheral *ss, 
> uint32_t tx)
>s->cur_addr, (uint8_t)tx);
>  
>  switch (s->state) {
> +case STATE_STANDBY:
> + r = 0x; /* StandBy state SO shall be HiZ */
> + break;
>  
>  case STATE_PAGE_PROGRAM:
>  trace_m25p80_page_program(s, s->cur_addr, (uint8_t)tx);
> -- 
> 2.34.3
>

Re: [PATCH v16 7/9] target/loongarch: Adjust functions and structure to support user-mode


On 6/14/22 02:05, Song Gao wrote:

@@ -172,17 +173,20 @@ static void loongarch_cpu_do_interrupt(CPUState *cs)
  update_badinstr = 0;
  break;
  case EXCCODE_ADEM:
+case EXCCODE_BCE:
  case EXCCODE_SYS:
  case EXCCODE_BRK:
+case EXCCODE_INE:
+case EXCCODE_IPE:
+case EXCCODE_FPE:
+env->badvaddr = env->pc;
+QEMU_FALLTHROUGH;


This is incorrect still.

(1) env->badaddr (in this patch renamed badvaddr) is actually unused prior to this patch 
and should go away.  It seems to have been copied from RISC-V?  The correct LoongArch 
variable is env->CSR_BADV (see raise_mmu_exception in tlb_helper.c).


(2) EXCCODE_ADEM is on the wrong side of this FALLTHROUGH.  This is the exception raised 
by TLB faults, and should retain the BADV address of the fault, not the faulting instruction.


Also, this patch is trying to do too many things at once.  Please split it into smaller 
logical changes.  Any bug fixes for the system code, for instance raising EXCCODE_BCE 
instead of EXCCODE_ADEM for helper_asrtle_d should be completely separated.



r~

Re: [PATCH] configure: cleanup -fno-pie detection


On 6/14/22 07:50, Paolo Bonzini wrote:

Place it only inside the 'if test "$pie" = "no"' conditional.

Signed-off-by: Paolo Bonzini 
---
  configure | 13 -
  1 file changed, 4 insertions(+), 9 deletions(-)


Worth mentioning 43924d1e53f, which obviated...


-# Check we support -fno-pie and -no-pie first; we will need the former for
-# building ROMs, and both for everything if --disable-pie is passed.


... this comment.

Either "Fixes:" or just in the text, "Since XXX, we no longer require this probe for 
building ROMs".


Reviewed-by: Richard Henderson 


r~

[PATCH RESEND 2/3] target/ppc: Move tlbiel to decode tree

Also decode RIC, PRS and R operands.

Signed-off-by: Leandro Lupori 
---
 target/ppc/insn32.decode |  1 +
 target/ppc/translate.c   | 22 
 target/ppc/translate/storage-ctrl-impl.c.inc | 16 +-
 3 files changed, 12 insertions(+), 27 deletions(-)

diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 1710babfc4..44ac5f0785 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -793,3 +793,4 @@ XVF64GERNN  111011 ... --  0 . 1010 ..-  
@XX3_at xa=%xx_xa_pair
 @X_tlbie.. rs:5 - ric:2 prs:1 r:1 rb:5 .. . _tlbie
 
 TLBIE   01 . - .. . . . 0100110010 -@X_tlbie
+TLBIEL  01 . - .. . . . 0100010010 -@X_tlbie
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index c945ff0362..4fcb311c2d 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -5424,26 +5424,6 @@ static void gen_tlbia(DisasContext *ctx)
 #endif  /* defined(CONFIG_USER_ONLY) */
 }
 
-/* tlbiel */
-static void gen_tlbiel(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-GEN_PRIV;
-#else
-bool psr = (ctx->opcode >> 17) & 0x1;
-
-if (ctx->pr || (!ctx->hv && !psr && ctx->hr)) {
-/*
- * tlbiel is privileged except when PSR=0 and HR=1, making it
- * hypervisor privileged.
- */
-GEN_PRIV;
-}
-
-gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
 /* tlbsync */
 static void gen_tlbsync(DisasContext *ctx)
 {
@@ -6901,8 +6881,6 @@ GEN_HANDLER(tlbia, 0x1F, 0x12, 0x0B, 0x03FFFC01, 
PPC_MEM_TLBIA),
  * XXX Those instructions will need to be handled differently for
  * different ISA versions
  */
-GEN_HANDLER(tlbiel, 0x1F, 0x12, 0x08, 0x001F0001, PPC_MEM_TLBIE),
-GEN_HANDLER_E(tlbiel, 0x1F, 0x12, 0x08, 0x0011, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(tlbsync, 0x1F, 0x16, 0x11, 0x03FFF801, PPC_MEM_TLBSYNC),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(slbia, 0x1F, 0x12, 0x0F, 0x031FFC01, PPC_SLBI),
diff --git a/target/ppc/translate/storage-ctrl-impl.c.inc 
b/target/ppc/translate/storage-ctrl-impl.c.inc
index 33733c082c..7793297dd4 100644
--- a/target/ppc/translate/storage-ctrl-impl.c.inc
+++ b/target/ppc/translate/storage-ctrl-impl.c.inc
@@ -46,21 +46,21 @@ static bool do_tlbie(DisasContext *ctx, arg_X_tlbie *a, 
bool local)
 }
 
 if (ctx->pr) {
-/* tlbie is privileged... */
+/* tlbie[l] is privileged... */
 gen_priv_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 return true;
 } else if (!ctx->hv) {
-if (!ctx->gtse || (!a->prs && ctx->hr)) {
+if ((!a->prs && ctx->hr) || (!local && !ctx->gtse)) {
 /*
- * ... except when GTSE=0 or when PRS=0 and HR=1, making it
- * hypervisor privileged.
+ * ... except when PRS=0 and HR=1, or when GTSE=0 for tlbie,
+ * making it hypervisor privileged.
  */
 gen_priv_exception(ctx, POWERPC_EXCP_PRIV_OPC);
 return true;
 }
 }
 
-if (NARROW_MODE(ctx)) {
+if (!local && NARROW_MODE(ctx)) {
 TCGv t0 = tcg_temp_new();
 tcg_gen_ext32u_tl(t0, cpu_gpr[rb]);
 gen_helper_tlbie(cpu_env, t0);
@@ -68,6 +68,11 @@ static bool do_tlbie(DisasContext *ctx, arg_X_tlbie *a, bool 
local)
 } else {
 gen_helper_tlbie(cpu_env, cpu_gpr[rb]);
 }
+
+if (local) {
+return true;
+}
+
 t1 = tcg_temp_new_i32();
 tcg_gen_ld_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
 tcg_gen_ori_i32(t1, t1, TLB_NEED_GLOBAL_FLUSH);
@@ -79,3 +84,4 @@ static bool do_tlbie(DisasContext *ctx, arg_X_tlbie *a, bool 
local)
 }
 
 TRANS_FLAGS(MEM_TLBIE, TLBIE, do_tlbie, false)
+TRANS_FLAGS(MEM_TLBIE, TLBIEL, do_tlbie, true)
-- 
2.25.1

[PATCH] tests/vm: allow running tests in an unconfigured source tree

2022-06-14 Thread Paolo Bonzini

tests/vm/Makefile.include used to assume that it could run in an unconfigured
source tree, and Cirrus CI relies on that.  It was however broken by commit
f4c66f1705 ("tests: use tests/venv to run basevm.py-based scripts", 2022-06-06),
which co-opted the virtual environment being used by avocado tests
to also run the basevm.py tests.

For now, reintroduce the usage of qemu.qmp from the source directory, but
without the sys.path() hacks.  The CI configuration can be changed to
install the package via pip when qemu.qmp is removed from the source tree.

Cc: John Snow 
Signed-off-by: Paolo Bonzini 
---
 tests/vm/Makefile.include | 26 +-
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/tests/vm/Makefile.include b/tests/vm/Makefile.include
index 588bc999cc..5f5b1fbfe6 100644
--- a/tests/vm/Makefile.include
+++ b/tests/vm/Makefile.include
@@ -1,8 +1,17 @@
 # Makefile for VM tests
 
-.PHONY: vm-build-all vm-clean-all
+# Hack to allow running in an unconfigured build tree
+ifeq ($(wildcard $(SRC_PATH)/config-host.mak),)
+VM_PYTHON = PYTHONPATH=$(SRC_PATH)/python /usr/bin/env python3
+VM_VENV =
+HOST_ARCH := $(shell uname -m)
+else
+VM_PYTHON = $(TESTS_PYTHON)
+VM_VENV = check-venv
+HOST_ARCH = $(ARCH)
+endif
 
-HOST_ARCH = $(if $(ARCH),$(ARCH),$(shell uname -m))
+.PHONY: vm-build-all vm-clean-all
 
 EFI_AARCH64 = $(wildcard $(BUILD_DIR)/pc-bios/edk2-aarch64-code.fd)
 
@@ -85,10 +94,10 @@ vm-clean-all:
 $(IMAGES_DIR)/%.img:   $(SRC_PATH)/tests/vm/% \
$(SRC_PATH)/tests/vm/basevm.py \
$(SRC_PATH)/tests/vm/Makefile.include \
-   check-venv
+   $(VM_VENV)
@mkdir -p $(IMAGES_DIR)
$(call quiet-command, \
-   $(TESTS_PYTHON) $< \
+   $(VM_PYTHON) $< \
$(if $(V)$(DEBUG), --debug) \
$(if $(GENISOIMAGE),--genisoimage $(GENISOIMAGE)) \
$(if $(QEMU_LOCAL),--build-path $(BUILD_DIR)) \
@@ -100,11 +109,10 @@ $(IMAGES_DIR)/%.img:  $(SRC_PATH)/tests/vm/% \
--build-image $@, \
"  VM-IMAGE $*")
 
-
 # Build in VM $(IMAGE)
-vm-build-%: $(IMAGES_DIR)/%.img check-venv
+vm-build-%: $(IMAGES_DIR)/%.img $(VM_VENV)
$(call quiet-command, \
-   $(TESTS_PYTHON) $(SRC_PATH)/tests/vm/$* \
+   $(VM_PYTHON) $(SRC_PATH)/tests/vm/$* \
$(if $(V)$(DEBUG), --debug) \
$(if $(DEBUG), --interactive) \
$(if $(J),--jobs $(J)) \
@@ -128,9 +136,9 @@ vm-boot-serial-%: $(IMAGES_DIR)/%.img
-device virtio-net-pci,netdev=vnet \
|| true
 
-vm-boot-ssh-%: $(IMAGES_DIR)/%.img check-venv
+vm-boot-ssh-%: $(IMAGES_DIR)/%.img $(VM_VENV)
$(call quiet-command, \
-   $(TESTS_PYTHON) $(SRC_PATH)/tests/vm/$* \
+   $(VM_PYTHON) $(SRC_PATH)/tests/vm/$* \
$(if $(J),--jobs $(J)) \
$(if $(V)$(DEBUG), --debug) \
$(if $(QEMU_LOCAL),--build-path $(BUILD_DIR)) \
-- 
2.36.1

Re: [PATCH v16 9/9] target/loongarch: Update README


On 6/14/22 02:05, Song Gao wrote:

Add linux-user emulation introduction

Signed-off-by: Song Gao
Signed-off-by: Xiaojuan Yang
---
  target/loongarch/README | 39 +--
  1 file changed, 37 insertions(+), 2 deletions(-)


Reviewed-by: Richard Henderson 


r~

Re: [PATCH v2 07/11] vfio/migration: Implement VFIO migration protocol v2

2022-06-14 Thread Avihai Horon




On 6/14/2022 2:08 PM, Joao Martins wrote:

External email: Use caution opening links or attachments


On 5/30/22 18:07, Avihai Horon wrote:

+static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
+{
+VFIODevice *vbasedev = opaque;
+enum vfio_device_mig_state recover_state;
+int ret;
+
+/* We reach here with device state STOP or STOP_COPY only */
+recover_state = VFIO_DEVICE_STATE_STOP;
+ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
+   recover_state);
+if (ret) {
+return ret;
+}
+
+do {
+ret = vfio_save_block(f, vbasedev->migration);
+if (ret < 0) {
+return ret;
+}
+} while (!ret);
+
+qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+ret = qemu_file_get_error(f);
+if (ret) {
+return ret;
+}
+
+ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP,
+   recover_state);

Is it expected that you are setting VFIO_DEVICE_STATE_STOP while
@recover_state is the same value (VFIO_DEVICE_STATE_STOP) ?



Yes.
Transitioning to any other state from STOP_COPY will first go through 
STOP state (this is done internally by kernel).

So there is no better option for the recover state but STOP.


+if (ret) {
+return ret;
+}
+
+trace_vfio_save_complete_precopy(vbasedev->name);
+
+return 0;
+}
+
  static int vfio_v1_save_complete_precopy(QEMUFile *f, void *opaque)
  {
  VFIODevice *vbasedev = opaque;
@@ -593,6 +775,14 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
  }
  }

+static int vfio_load_setup(QEMUFile *f, void *opaque)
+{
+VFIODevice *vbasedev = opaque;
+
+return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
+   vbasedev->migration->device_state);
+}
+
  static int vfio_v1_load_setup(QEMUFile *f, void *opaque)
  {
  VFIODevice *vbasedev = opaque;
@@ -620,6 +810,15 @@ static int vfio_v1_load_setup(QEMUFile *f, void *opaque)
  return ret;
  }

+static int vfio_load_cleanup(void *opaque)
+{
+VFIODevice *vbasedev = opaque;
+
+vfio_migration_cleanup(vbasedev);
+trace_vfio_load_cleanup(vbasedev->name);
+return 0;
+}
+
  static int vfio_v1_load_cleanup(void *opaque)
  {
  VFIODevice *vbasedev = opaque;
@@ -662,7 +861,11 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int 
version_id)
  uint64_t data_size = qemu_get_be64(f);

  if (data_size) {
-ret = vfio_v1_load_buffer(f, vbasedev, data_size);
+if (vbasedev->migration->v2) {
+ret = vfio_load_buffer(f, vbasedev, data_size);
+} else {
+ret = vfio_v1_load_buffer(f, vbasedev, data_size);
+}
  if (ret < 0) {
  return ret;
  }
@@ -683,6 +886,16 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int 
version_id)
  return ret;
  }

+static SaveVMHandlers savevm_vfio_handlers = {
+.save_setup = vfio_save_setup,
+.save_cleanup = vfio_save_cleanup,
+.save_live_complete_precopy = vfio_save_complete_precopy,
+.save_state = vfio_save_state,
+.load_setup = vfio_load_setup,
+.load_cleanup = vfio_load_cleanup,
+.load_state = vfio_load_state,
+};
+
  static SaveVMHandlers savevm_vfio_v1_handlers = {
  .save_setup = vfio_v1_save_setup,
  .save_cleanup = vfio_v1_save_cleanup,
@@ -697,6 +910,34 @@ static SaveVMHandlers savevm_vfio_v1_handlers = {

  /* -- */

+static void vfio_vmstate_change(void *opaque, bool running, RunState state)
+{
+VFIODevice *vbasedev = opaque;
+enum vfio_device_mig_state new_state;
+int ret;
+
+if (running) {
+new_state = VFIO_DEVICE_STATE_RUNNING;
+} else {
+new_state = VFIO_DEVICE_STATE_STOP;
+}
+
+ret = vfio_migration_set_state(vbasedev, new_state,
+   VFIO_DEVICE_STATE_ERROR);
+if (ret) {
+/*
+ * Migration should be aborted in this case, but vm_state_notify()
+ * currently does not support reporting failures.
+ */
+if (migrate_get_current()->to_dst_file) {
+qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
+}
+}
+
+trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
+  new_state);
+}
+
  static void vfio_v1_vmstate_change(void *opaque, bool running, RunState state)
  {
  VFIODevice *vbasedev = opaque;
@@ -770,12 +1011,17 @@ static void vfio_migration_state_notifier(Notifier 
*notifier, void *data)
  case MIGRATION_STATUS_CANCELLED:
  case MIGRATION_STATUS_FAILED:
  bytes_transferred = 0;
-ret = vfio_migration_v1_set_state(vbasedev,
-

[PATCH RESEND 1/3] target/ppc: Move tlbie to decode tree

Also decode RIC, PRS and R operands.

Signed-off-by: Leandro Lupori 
---
 target/ppc/cpu_init.c|  4 +-
 target/ppc/insn32.decode |  7 ++
 target/ppc/translate.c   | 42 +-
 target/ppc/translate/storage-ctrl-impl.c.inc | 81 
 4 files changed, 92 insertions(+), 42 deletions(-)
 create mode 100644 target/ppc/translate/storage-ctrl-impl.c.inc

diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 0f891afa04..b802bbb641 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6368,7 +6368,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
PPC_FLOAT_EXT |
PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
PPC_MEM_SYNC | PPC_MEM_EIEIO |
-   PPC_MEM_TLBSYNC |
+   PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
PPC_SEGMENT_64B | PPC_SLBI |
PPC_POPCNTB | PPC_POPCNTWD |
@@ -6585,7 +6585,7 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
PPC_FLOAT_EXT |
PPC_CACHE | PPC_CACHE_ICBI | PPC_CACHE_DCBZ |
PPC_MEM_SYNC | PPC_MEM_EIEIO |
-   PPC_MEM_TLBSYNC |
+   PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
PPC_64B | PPC_64H | PPC_64BX | PPC_ALTIVEC |
PPC_SEGMENT_64B | PPC_SLBI |
PPC_POPCNTB | PPC_POPCNTWD |
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 18a94fa3b5..1710babfc4 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -786,3 +786,10 @@ XVF64GERPP  111011 ... --  0 . 00111010 ..-  
@XX3_at xa=%xx_xa_pair
 XVF64GERPN  111011 ... --  0 . 10111010 ..-  @XX3_at xa=%xx_xa_pair
 XVF64GERNP  111011 ... --  0 . 0010 ..-  @XX3_at xa=%xx_xa_pair
 XVF64GERNN  111011 ... --  0 . 1010 ..-  @XX3_at xa=%xx_xa_pair
+
+## TLB Management Instructions
+
+_tlbierb rs ric prs:bool r:bool
+@X_tlbie.. rs:5 - ric:2 prs:1 r:1 rb:5 .. . _tlbie
+
+TLBIE   01 . - .. . . . 0100110010 -@X_tlbie
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 1d6daa4608..c945ff0362 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -5444,44 +5444,6 @@ static void gen_tlbiel(DisasContext *ctx)
 #endif /* defined(CONFIG_USER_ONLY) */
 }
 
-/* tlbie */
-static void gen_tlbie(DisasContext *ctx)
-{
-#if defined(CONFIG_USER_ONLY)
-GEN_PRIV;
-#else
-bool psr = (ctx->opcode >> 17) & 0x1;
-TCGv_i32 t1;
-
-if (ctx->pr) {
-/* tlbie is privileged... */
-GEN_PRIV;
-} else if (!ctx->hv) {
-if (!ctx->gtse || (!psr && ctx->hr)) {
-/*
- * ... except when GTSE=0 or when PSR=0 and HR=1, making it
- * hypervisor privileged.
- */
-GEN_PRIV;
-}
-}
-
-if (NARROW_MODE(ctx)) {
-TCGv t0 = tcg_temp_new();
-tcg_gen_ext32u_tl(t0, cpu_gpr[rB(ctx->opcode)]);
-gen_helper_tlbie(cpu_env, t0);
-tcg_temp_free(t0);
-} else {
-gen_helper_tlbie(cpu_env, cpu_gpr[rB(ctx->opcode)]);
-}
-t1 = tcg_temp_new_i32();
-tcg_gen_ld_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
-tcg_gen_ori_i32(t1, t1, TLB_NEED_GLOBAL_FLUSH);
-tcg_gen_st_i32(t1, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
-tcg_temp_free_i32(t1);
-#endif /* defined(CONFIG_USER_ONLY) */
-}
-
 /* tlbsync */
 static void gen_tlbsync(DisasContext *ctx)
 {
@@ -6699,6 +6661,8 @@ static bool resolve_PLS_D(DisasContext *ctx, arg_D *d, 
arg_PLS_D *a)
 
 #include "translate/branch-impl.c.inc"
 
+#include "translate/storage-ctrl-impl.c.inc"
+
 /* Handles lfdp */
 static void gen_dform39(DisasContext *ctx)
 {
@@ -6938,9 +6902,7 @@ GEN_HANDLER(tlbia, 0x1F, 0x12, 0x0B, 0x03FFFC01, 
PPC_MEM_TLBIA),
  * different ISA versions
  */
 GEN_HANDLER(tlbiel, 0x1F, 0x12, 0x08, 0x001F0001, PPC_MEM_TLBIE),
-GEN_HANDLER(tlbie, 0x1F, 0x12, 0x09, 0x001F0001, PPC_MEM_TLBIE),
 GEN_HANDLER_E(tlbiel, 0x1F, 0x12, 0x08, 0x0011, PPC_NONE, PPC2_ISA300),
-GEN_HANDLER_E(tlbie, 0x1F, 0x12, 0x09, 0x0011, PPC_NONE, PPC2_ISA300),
 GEN_HANDLER(tlbsync, 0x1F, 0x16, 0x11, 0x03FFF801, PPC_MEM_TLBSYNC),
 #if defined(TARGET_PPC64)
 GEN_HANDLER(slbia, 0x1F, 0x12, 0x0F, 0x031FFC01, PPC_SLBI),
diff --git a/target/ppc/translate/storage-ctrl-impl.c.inc 
b/target/ppc/translate/storage-ctrl-impl.c.inc
new file mode 100644
index 00..33733c082c
--- /dev/null
+++ b/target/ppc/translate/storage-ctrl-impl.c.inc
@@ -0,0 +1,81 @@
+/*
+ * Power ISA decode for Storage Control instructions
+ *
+ * Copyright (c) 2022 Instituto de Pesquisas Eldorado (eldorado.org.br)
+ *
+ * This library is free software;

[PATCH RESEND 0/3] ppc: Implement ISA 3.00 tlbie[l]

Add support for ISA 3.00 tlbie/tlbiel instructions, with
RIC, PRS and R operands.

Also, for Radix MMU, add support for the TLB invalidation of a
single page. Flush by PID/LPID, or based in process/partition
scope is not supported, because it would make using the
generic QEMU TLB implementation hard. In these cases, all
entries are flushed.

Resent after rebasing and fixing conflicts with master.

Leandro Lupori (3):
  target/ppc: Move tlbie to decode tree
  target/ppc: Move tlbiel to decode tree
  target/ppc: Implement ISA 3.00 tlbie[l]

 target/ppc/cpu_init.c|   4 +-
 target/ppc/helper.h  |  18 +++
 target/ppc/insn32.decode |   8 +
 target/ppc/mmu_helper.c  | 154 +++
 target/ppc/translate.c   |  64 +---
 target/ppc/translate/storage-ctrl-impl.c.inc | 102 
 6 files changed, 286 insertions(+), 64 deletions(-)
 create mode 100644 target/ppc/translate/storage-ctrl-impl.c.inc

-- 
2.25.1

Re: [PATCH v12 00/14] vfio-user server in QEMU

2022-06-14 Thread Stefan Hajnoczi

On Tue, Jun 14, 2022 at 02:37:02PM +, Jag Raman wrote:
> > On Jun 14, 2022, at 3:06 AM, Stefan Hajnoczi  wrote:
> > 
> > On Mon, Jun 13, 2022 at 04:26:20PM -0400, Jagannathan Raman wrote:
> >> This is v12 of the server side changes to enable vfio-user in QEMU.
> >> 
> >> Thanks so much for reviewing this series and sharing your feedback.
> >> 
> >> We made the following changes in this series:
> >> [PATCH v12 13/14] vfio-user: handle device interrupts
> >> - Renamed msi_set_irq_state() and msix_set_irq_state() as
> >>   msi_set_mask() and msix_set_mask() respectively
> >> - Added missing return statement for error case in msi_set_mask()
> > 
> > Thanks, applied to my block tree:
> > https://gitlab.com/stefanha/qemu/commits/block
> 
> Thank you very much, Stefan! :)

You're welcome! Thanks for the persistence in getting the vfio-user
server into QEMU.

I have mirrored libvfio-user here:
https://gitlab.com/qemu-project/libvfio-user

The QEMU project's policy is to mirror dependencies so full source code
can be provided even in the event that dependencies become unavailable.
The mirror is currently manually updated, so please ping me if you want
newer commits.

Thanks,
Stefan


signature.asc
Description: PGP signature

Re: [PATCH 0/2] linux-aio: fix unbalanced plugged counter in laio_io_unplug()

2022-06-14 Thread Stefan Hajnoczi

On Thu, Jun 09, 2022 at 05:47:10PM +0100, Stefan Hajnoczi wrote:
> An unlucky I/O pattern can result in stalled Linux AIO requests when the
> plugged counter becomes unbalanced. See Patch 1 for details.
> 
> Patch 2 adds a comment to explain why the laio_io_unplug() even checks max
> batch in the first place.
> 
> Stefan Hajnoczi (2):
>   linux-aio: fix unbalanced plugged counter in laio_io_unplug()
>   linux-aio: explain why max batch is checked in laio_io_unplug()
> 
>  block/linux-aio.c | 10 +-
>  1 file changed, 9 insertions(+), 1 deletion(-)
> 
> -- 
> 2.36.1
> 

Thanks, applied to my block tree:
https://gitlab.com/stefanha/qemu/commits/block

Stefan


signature.asc
Description: PGP signature

[PATCH RESEND 3/3] target/ppc: Implement ISA 3.00 tlbie[l]

This initial version supports the invalidation of one or all
TLB entries. Flush by PID/LPID, or based in process/partition
scope is not supported, because it would make using the
generic QEMU TLB implementation hard. In these cases, all
entries are flushed.

Signed-off-by: Leandro Lupori 
---
 target/ppc/helper.h  |  18 +++
 target/ppc/mmu_helper.c  | 154 +++
 target/ppc/translate/storage-ctrl-impl.c.inc |  15 ++
 3 files changed, 187 insertions(+)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 6233e28d85..0b2bc8020b 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -664,6 +664,24 @@ DEF_HELPER_FLAGS_1(tlbia, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_2(tlbie, TCG_CALL_NO_RWG, void, env, tl)
 DEF_HELPER_FLAGS_2(tlbiva, TCG_CALL_NO_RWG, void, env, tl)
 #if defined(TARGET_PPC64)
+
+/*
+ * tlbie[l] helper flags
+ *
+ * RIC, PRS, R and local are passed as flags in the last argument.
+ */
+#define TLBIE_F_RIC_SHIFT   0
+#define TLBIE_F_PRS_SHIFT   2
+#define TLBIE_F_R_SHIFT 3
+#define TLBIE_F_LOCAL_SHIFT 4
+
+#define TLBIE_F_RIC_MASK(3 << TLBIE_F_RIC_SHIFT)
+#define TLBIE_F_PRS (1 << TLBIE_F_PRS_SHIFT)
+#define TLBIE_F_R   (1 << TLBIE_F_R_SHIFT)
+#define TLBIE_F_LOCAL   (1 << TLBIE_F_LOCAL_SHIFT)
+
+DEF_HELPER_FLAGS_4(tlbie_isa300, TCG_CALL_NO_WG, void, \
+env, tl, tl, i32)
 DEF_HELPER_FLAGS_3(store_slb, TCG_CALL_NO_RWG, void, env, tl, tl)
 DEF_HELPER_2(load_slb_esid, tl, env, tl)
 DEF_HELPER_2(load_slb_vsid, tl, env, tl)
diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
index 15239dc95b..b881aee23f 100644
--- a/target/ppc/mmu_helper.c
+++ b/target/ppc/mmu_helper.c
@@ -429,6 +429,160 @@ void helper_tlbie(CPUPPCState *env, target_ulong addr)
 ppc_tlb_invalidate_one(env, addr);
 }
 
+#if defined(TARGET_PPC64)
+
+/* Invalidation Selector */
+#define TLBIE_IS_VA 0
+#define TLBIE_IS_PID1
+#define TLBIE_IS_LPID   2
+#define TLBIE_IS_ALL3
+
+/* Radix Invalidation Control */
+#define TLBIE_RIC_TLB   0
+#define TLBIE_RIC_PWC   1
+#define TLBIE_RIC_ALL   2
+#define TLBIE_RIC_GRP   3
+
+/* Radix Actual Page sizes */
+#define TLBIE_R_AP_4K   0
+#define TLBIE_R_AP_64K  5
+#define TLBIE_R_AP_2M   1
+#define TLBIE_R_AP_1G   2
+
+/* RB field masks */
+#define TLBIE_RB_EPN_MASK   PPC_BITMASK(0, 51)
+#define TLBIE_RB_IS_MASKPPC_BITMASK(52, 53)
+#define TLBIE_RB_AP_MASKPPC_BITMASK(56, 58)
+
+void helper_tlbie_isa300(CPUPPCState *env, target_ulong rb, target_ulong rs,
+ uint32_t flags)
+{
+unsigned ric = (flags & TLBIE_F_RIC_MASK) >> TLBIE_F_RIC_SHIFT;
+/*
+ * With the exception of the checks for invalid instruction forms,
+ * PRS is currently ignored, because we don't know if a given TLB entry
+ * is process or partition scoped.
+ */
+bool prs = flags & TLBIE_F_PRS;
+bool r = flags & TLBIE_F_R;
+bool local = flags & TLBIE_F_LOCAL;
+bool effR;
+unsigned is = extract64(rb, PPC_BIT_NR(53), 2), set;
+unsigned ap;/* actual page size */
+target_ulong addr, pgoffs_mask;
+
+qemu_log_mask(CPU_LOG_MMU,
+"%s: local=%d addr=" TARGET_FMT_lx " ric=%u prs=%d r=%d is=%u\n",
+__func__, local, rb & TARGET_PAGE_MASK, ric, prs, r, is);
+
+effR = FIELD_EX64(env->msr, MSR, HV) ? r : env->spr[SPR_LPCR] & LPCR_HR;
+
+/* Partial TLB invalidation is supported for Radix only for now. */
+if (!effR) {
+goto inval_all;
+}
+
+/* Check for invalid instruction forms (effR=1). */
+if (unlikely(ric == TLBIE_RIC_GRP ||
+ ((ric == TLBIE_RIC_PWC || ric == TLBIE_RIC_ALL) &&
+   is == TLBIE_IS_VA) ||
+ (!prs && is == TLBIE_IS_PID))) {
+qemu_log_mask(LOG_GUEST_ERROR,
+"%s: invalid instruction form: ric=%u prs=%d r=%d is=%u\n",
+__func__, ric, prs, r, is);
+goto invalid;
+}
+
+/* We don't cache Page Walks. */
+if (ric == TLBIE_RIC_PWC) {
+if (local) {
+set = extract64(rb, PPC_BIT_NR(51), 12);
+if (set != 0) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid set: %d\n",
+  __func__, set);
+goto invalid;
+}
+}
+return;
+}
+
+/*
+ * Invalidation by LPID or PID is not supported, so fallback
+ * to full TLB flush in these cases.
+ */
+if (is != TLBIE_IS_VA) {
+goto inval_all;
+}
+
+/*
+ * The results of an attempt to invalidate a translation outside of
+ * quadrant 0 for Radix Tree translation (effR=1, RIC=0, PRS=1, IS=0,
+ * and EA 0:1 != 0b00) are boundedly undefined.
+ */
+if (unlikely(ric == TLBIE_RIC_TLB && prs && is == TLBIE_IS_VA &&
+ (rb & R_EADDR_QUADRANT) != R_EADDR_QUADRANT0)) {
+

Re: [PATCH v2 1/2] QIOChannelSocket: Reduce ifdefs to improve readability

2022-06-14 Thread Leonardo Bras Soares Passos

On Tue, Jun 14, 2022 at 5:36 AM Daniel P. Berrangé  wrote:
>
> On Mon, Jun 13, 2022 at 06:21:18PM -0300, Leonardo Bras Soares Passos wrote:
> > On Fri, Jun 10, 2022 at 5:25 AM Daniel P. Berrangé  
> > wrote:
> > >
> >
> > [...]
> >
> > > Ok, so if it is checked earlier then we merely need an assert.
> > >
> > >  if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
> > >  #ifdef QEMU_MSG_ZEROCOPY
> > >  sflags = MSG_ZEROCOPY;
> > >  zero_copy_enabled = true;
> > >  #else
> > >  g_assert_unreachable();
> > >  #endif
> > > > }
> >
> > Ok, I will add that in the next version.
> >
> > >
> > >
> > >
> > > > > > @@ -592,15 +594,13 @@ static ssize_t 
> > > > > > qio_channel_socket_writev(QIOChannel *ioc,
> > > > > >  return QIO_CHANNEL_ERR_BLOCK;
> > > > > >  case EINTR:
> > > > > >  goto retry;
> > > > > > -#ifdef QEMU_MSG_ZEROCOPY
> > > > > >  case ENOBUFS:
> > > > > > -if (sflags & MSG_ZEROCOPY) {
> > > > > > +if (zero_copy_enabled) {
> > > > >
> > > > > if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY)
> > > > >
> > > > > avoids the #ifdef without needing to add yet another
> > > > > variable expressing what's already expressed in both
> > > > > 'flags' and 'sflags'.
> > > >
> > > > Yes, it does, but at the cost of not compiling-out the zero-copy part
> > > > when it's not supported,
> > > > since the QIO_CHANNEL_WRITE_FLAG_ZERO_COPY comes as a parameter. This 
> > > > ends up
> > > > meaning there will be at least one extra test for every time this
> > > > function is called (the one in the next patch).
> > >
> > > The cost of a simple bit test is between negligible-and-non-existant
> > > with branch prediction. I doubt it would be possible to even measure
> > > it.
> >
> > Yeah, you are probably right on that.
> > So the main learning point here is that it's not worth creating a new
> > boolean for compiling-out
> > code that should not impact performance ?
>
> As ever "it depends" so there's no hard rule, and sometimes it can
> verge on bikeshed colouring :-)
>
> I didn't like the variable in this case, because it introduces a 3rd
> variable to the method for representing whether zero copy is need,
> which is excessive. I'm not a fan of redundancy as it can often then
> lead to inconsistency. So it would need a compelling reason why it is
> better, which is difficult for such a simple method. If the code was
> more complex, a variable might have benefit of clarity, but in this
> case IMHO it was just overkill.

I see. Thanks for the clarification!

Best regards,
Leo

>
> With regards,
> Daniel
> --
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
>

Re: [PATCH v16 3/9] linux-user: Add LoongArch elf support


On 6/14/22 02:05, Song Gao wrote:

+#define ELF_HWCAP get_elf_hwcap()
+
+static uint32_t get_elf_hwcap(void)
+{
+return 0;
+}


This should not be zero.  See cpu_probe_common in the kernel.  At minimum 
HWCAP_LOONGARCH_CRC32 and HWCAP_LOONGARCH_FPU are missing.  I don't know how many of the 
other features are implemented in target/loongarch/.


Missing ELF_PLATFORM, per the kernel's set_elf_platform(cpu, "loongarch").


r~

[PATCH] hw:w25p80: Add STATE_STANDBY to handle incorrect command

2022-06-14 Thread Dan Zhang

---
 hw/block/m25p80.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index b6bd430a99..3bb0466dca 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -423,6 +423,7 @@ typedef enum {
 STATE_COLLECTING_DATA,
 STATE_COLLECTING_VAR_LEN_DATA,
 STATE_READING_DATA,
+STATE_STANDBY,
 } CMDState;
 
 typedef enum {
@@ -1218,6 +1219,9 @@ static void decode_new_cmd(Flash *s, uint32_t value)
 || !s->write_enable) {
 qemu_log_mask(LOG_GUEST_ERROR,
   "M25P80: Status register write is disabled!\n");
+   qemu_log_mask(LOG_GUEST_ERROR,
+  "M25P80: switch to standby, re-aseert CS to 
reactivate \n");
+   s->state = STATE_STANDBY;
 break;
 }
 
@@ -1472,6 +1476,9 @@ static uint32_t m25p80_transfer8(SSIPeripheral *ss, 
uint32_t tx)
   s->cur_addr, (uint8_t)tx);
 
 switch (s->state) {
+case STATE_STANDBY:
+   r = 0x; /* StandBy state SO shall be HiZ */
+   break;
 
 case STATE_PAGE_PROGRAM:
 trace_m25p80_page_program(s, s->cur_addr, (uint8_t)tx);
-- 
2.34.3

Re: [PATCH v16 2/9] linux-user: Add LoongArch signal support


On 6/14/22 02:05, Song Gao wrote:

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
---
  linux-user/loongarch64/signal.c| 283 +
  linux-user/loongarch64/target_signal.h |  13 ++
  2 files changed, 296 insertions(+)
  create mode 100644 linux-user/loongarch64/signal.c
  create mode 100644 linux-user/loongarch64/target_signal.h


You copied too much directly from the kernel, without changing to match the host/guest 
split that is present in qemu.



+struct target_ctx_layout {
+struct target_sctx_info *addr;


abi_ulong.


+unsigned int size;
+};
+
+struct target_extctx_layout {
+unsigned long size;


unsigned int -- it only needs to hold sizeof(target_fpu_context) + 
sizeof(target_sctx_info).  Use of "unsigned long" in qemu is generally incorrect.


Both of these two structures should drop the "target_" prefix from the name, because they 
do not appear in target memory.  They are implementation details of this file.



+static void *get_ctx(struct target_sctx_info *info)
+{
+return (void *)((char *)info + sizeof(struct target_sctx_info));
+}


Return type should be struct target_sctx_info *.


+static unsigned long extframe_alloc(struct target_extctx_layout *extctx,
+struct target_ctx_layout *layout,
+size_t size, unsigned long base)
+{
+unsigned long new_base = base - size;
+
+new_base -= sizeof(struct target_sctx_info);
+layout->addr = (void *)new_base;
+layout->size = (unsigned int)(base - new_base);
+extctx->size += layout->size;


All of these unsigned long should be abi_ulong.
The cast into layout->addr is wrong.


+static unsigned long setup_extcontext(struct target_extctx_layout *extctx,
+  unsigned long sp)
+{
+unsigned long new_sp = sp;
+
+memset(extctx, 0, sizeof(struct target_extctx_layout));
+new_sp -= sizeof(struct target_sctx_info);
+
+extctx->end.addr = (void *) new_sp;
+extctx->end.size = (unsigned int)sizeof(struct target_sctx_info);
+extctx->size += extctx->end.size;
+extctx->flags = SC_USED_FP;
+
+new_sp = extframe_alloc(extctx, >fpu,
+sizeof(struct target_fpu_context), new_sp);
+
+return new_sp;
+}


More unsigned long and casting errors.



+static void restore_sigcontext(CPULoongArchState *env,
+   struct target_sigcontext *sc)
+{
+int i;
+struct target_extctx_layout extctx;
+
+memset(, 0, sizeof(struct target_extctx_layout));
+
+__get_user(extctx.flags, >sc_flags);
+
+extctx.fpu.addr = (struct target_sctx_info *)>sc_extcontext;


This is wrong.  You're missing all of the code from parse_extcontext().


r~

Re: [PATCH 08/11] ppc/pnv: turn chip8->phbs[] into a PnvPHB3* array


On 6/14/22 17:39, Daniel Henrique Barboza wrote:



On 6/14/22 06:53, Frederic Barrat wrote:



On 13/06/2022 17:44, Daniel Henrique Barboza wrote:

When enabling user created PHBs (a change reverted by commit 9c10d86fee)
we were handling PHBs created by default versus by the user in different
manners. The only difference between these PHBs is that one will have a
valid phb3->chip that is assigned during pnv_chip_power8_realize(),
while the user created needs to search which chip it belongs to.

Aside from that there shouldn't be any difference. Making the default
PHBs behave in line with the user created ones will make it easier to
re-introduce them later on. It will also make the code easier to follow
since we are dealing with them in equal manner.

The first step is to turn chip8->phbs[] into a PnvPHB3 pointer array.
This will allow us to assign user created PHBs into it later on. The way
we initilize the default case is now more in line with that would happen
with the user created case: the object is created, parented by the chip
because pnv_xscom_dt() relies on it, and then assigned to the array.

Signed-off-by: Daniel Henrique Barboza 
---



This patch is more prep work for the user-created device instead of general 
cleanup like the previous ones, but I don't see anything wrong with it. So:

Reviewed-by: Frederic Barrat 



I've been thinking about it and I guess I could do better with this
and the proxy pnv-phb series that is already in v2. What I'm thinking
is:

- crop patches 8-11 from this series. Patches 1-7 would be the prep cleanup
series;

- split the pnv-phb series in two:

   - first series will just introduce the pnv-phb devices and consolidate the
root ports. We're going to deal just with default devices. No consideration
about future user-created devices will be made;


Yes. From what I have read, this looks very feasible with a v2.

Thanks,

C.



   - a second series would then re-introduce user creatable phbs and root ports.
Patches 8-11 of this series would be handled in this second patch set since it's
closely related to user devices.


Does that sound fair?


Thanks,


Daniel






   Fred




  hw/ppc/pnv.c | 19 ++-
  include/hw/ppc/pnv.h |  6 +-
  2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 5e3323e950..6ce9e94e05 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -660,7 +660,7 @@ static void pnv_chip_power8_pic_print_info(PnvChip *chip, 
Monitor *mon)
  ics_pic_print_info(>psi.ics, mon);
  for (i = 0; i < chip8->num_phbs; i++) {
-    PnvPHB3 *phb3 = >phbs[i];
+    PnvPHB3 *phb3 = chip8->phbs[i];
  pnv_phb3_msi_pic_print_info(>msis, mon);
  ics_pic_print_info(>lsis, mon);
@@ -1149,7 +1149,16 @@ static void pnv_chip_power8_instance_init(Object *obj)
  chip8->num_phbs = pcc->num_phbs;
  for (i = 0; i < chip8->num_phbs; i++) {
-    object_initialize_child(obj, "phb[*]", >phbs[i], TYPE_PNV_PHB3);
+    PnvPHB3 *phb3 = PNV_PHB3(object_new(TYPE_PNV_PHB3));
+
+    /*
+ * We need the chip to parent the PHB to allow the DT
+ * to build correctly (via pnv_xscom_dt()).
+ *
+ * TODO: the PHB should be parented by a PEC device.
+ */
+    object_property_add_child(obj, "phb[*]", OBJECT(phb3));
+    chip8->phbs[i] = phb3;
  }
  }
@@ -1278,7 +1287,7 @@ static void pnv_chip_power8_realize(DeviceState *dev, 
Error **errp)
  /* PHB3 controllers */
  for (i = 0; i < chip8->num_phbs; i++) {
-    PnvPHB3 *phb = >phbs[i];
+    PnvPHB3 *phb = chip8->phbs[i];
  object_property_set_int(OBJECT(phb), "index", i, _fatal);
  object_property_set_int(OBJECT(phb), "chip-id", chip->chip_id,
@@ -1963,7 +1972,7 @@ static ICSState *pnv_ics_get(XICSFabric *xi, int irq)
  }
  for (j = 0; j < chip8->num_phbs; j++) {
-    pnv_ics_get_phb_ics(>phbs[j], );
+    pnv_ics_get_phb_ics(chip8->phbs[j], );
  if (args.ics) {
  return args.ics;
@@ -1996,7 +2005,7 @@ static void pnv_ics_resend(XICSFabric *xi)
  Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]);
  for (j = 0; j < chip8->num_phbs; j++) {
-    PnvPHB3 *phb3 = >phbs[j];
+    PnvPHB3 *phb3 = chip8->phbs[j];
  ics_resend(>lsis);
  ics_resend(ICS(>msis));
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index 033890a23f..11f1089289 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -80,7 +80,11 @@ struct Pnv8Chip {
  PnvHomer homer;
  #define PNV8_CHIP_PHB3_MAX 4
-    PnvPHB3  phbs[PNV8_CHIP_PHB3_MAX];
+    /*
+ * The array is used to allow quick access to the phbs by
+ * pnv_ics_get_child() and pnv_ics_resend_child().
+ */
+    PnvPHB3  *phbs[PNV8_CHIP_PHB3_MAX];
  uint32_t num_phbs;
  XICSFabric    *xics;

[PATCH] build: fix check for -fsanitize-coverage-allowlist

2022-06-14 Thread Alexander Bulekov

The existing check has two problems:
1. Meson uses a private directory for the get_supported_arguments check.
./instrumentation-filter does not exist in that private directory (it is
copied into the root of the build-directory).

2. fsanitize-coverage-allowlist is unused when coverage instrumentation
is not configured. No instrumentation are passed for the
get_supported_arguments check

Thus the check always fails. To work around this, change the check to an
"if cc.compiles" check and provide /dev/null, instead of the real
filter.

Meson log:
Working directory:  build/meson-private/tmpl6wld2d9
Command line:  clang-13 -m64 -mcx16
build/meson-private/tmpl6wld2d9/output.obj -c -O3 -D_FILE_OFFSET_BITS=64
-O0 -Werror=implicit-function-declaration -Werror=unknown-warning-option
-Werror=unused-command-line-argument
-Werror=ignored-optimization-argument
-fsanitize-coverage-allowlist=instrumentation-filter

Error:
error: argument unused during compilation:
'-fsanitize-coverage-allowlist=instrumentation-filter'

Signed-off-by: Alexander Bulekov 
---
 meson.build | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/meson.build b/meson.build
index 0c2e11ff07..85134267b5 100644
--- a/meson.build
+++ b/meson.build
@@ -209,9 +209,13 @@ if get_option('fuzzing')
   configure_file(output: 'instrumentation-filter',
  input: 'scripts/oss-fuzz/instrumentation-filter-template',
  copy: true)
-  add_global_arguments(
-  
cc.get_supported_arguments('-fsanitize-coverage-allowlist=instrumentation-filter'),
-  native: false, language: ['c', 'cpp', 'objc'])
+
+  if cc.compiles('int main () { return 0; }',
+  name: '-fsanitize-coverage-allowlist=/dev/null',
+ args: ['-fsanitize-coverage-allowlist=/dev/null'] )
+
add_global_arguments('-fsanitize-coverage-allowlist=instrumentation-filter',
+ native: false, language: ['c', 'cpp', 'objc'])
+  endif
 
   if get_option('fuzzing_engine') == ''
 # Add CFLAGS to tell clang to add fuzzer-related instrumentation to all the
-- 
2.27.0

Re: [PATCH 08/11] ppc/pnv: turn chip8->phbs[] into a PnvPHB3* array

2022-06-14 Thread Frederic Barrat





On 14/06/2022 17:39, Daniel Henrique Barboza wrote:

I've been thinking about it and I guess I could do better with this
and the proxy pnv-phb series that is already in v2. What I'm thinking
is:

- crop patches 8-11 from this series. Patches 1-7 would be the prep cleanup
series;

- split the pnv-phb series in two:

   - first series will just introduce the pnv-phb devices and 
consolidate the

root ports. We're going to deal just with default devices. No consideration
about future user-created devices will be made;

   - a second series would then re-introduce user creatable phbs and 
root ports.
Patches 8-11 of this series would be handled in this second patch set 
since it's

closely related to user devices.


Does that sound fair?



Sounds good to me. That should keep series smaller and easier to review 
and merge.


  Fred

Re: [PATCH 1/2] hw/nvme: Implement shadow doorbell buffer support

2022-06-14 Thread Keith Busch

On Tue, Jun 14, 2022 at 03:24:37PM +0800, Jinhao Fan wrote:
> > On Jun 14, 2022, at 5:15 AM, Keith Busch  wrote:
> > @@ -6538,9 +6544,25 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr 
> > addr, int val)
> > 
> > trace_pci_nvme_mmio_doorbell_sq(sq->sqid, new_tail);
> > 
> > -if (!sq->db_addr) {
> > sq->tail = new_tail;
> > +if (sq->db_addr) {
> > +/*
> > + * The spec states "the host shall also update the controller's
> > + * corresponding doorbell property to match the value of that 
> > entry
> > + * in the Shadow Doorbell buffer."
> > + *
> > + * Since this context is currently a VM trap, we can safely 
> > enforce
> > + * the requirement from the device side in case the host is
> > + * misbehaving.
> > + *
> > + * Note, we shouldn't have to do this, but various drivers
> > + * including ones that run on Linux, are not updating Admin 
> > Queues,
> > + * so we can't trust reading it for an appropriate sq tail.
> > + */
> > +pci_dma_write(>parent_obj, sq->db_addr, >tail,
> > +sizeof(sq->tail));
> > }
> > +
> > timer_mod(sq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
> > }
> > }
> > --
> 
> Thanks Keith,
> 
> This is an interesting hack. I wonder how should I incorporate your changes 
> in my patch. I guess I can modify the code in PATCH 1/2 and add a 
> “Proposed-by” tag. Is this the correct way?

It's a pretty nasty hack, and definitely not in compliance with the spec: the
db_addr is supposed to be read-only from the device side, though I do think
it's safe for this environment. Unless Klaus or anyone finds something I'm
missing, I feel this is an acceptable compromise to address this odd
discrepency.

I believe the recommended tag for something like this is "Suggested-by:", but
no need to credit me. Just fold it into your first patch and send a v2.

By the way, I noticed that the patch never updates the cq's ei_addr value. Is
that on purpose?

Re: [PATCH 08/11] ppc/pnv: turn chip8->phbs[] into a PnvPHB3* array





On 6/14/22 06:53, Frederic Barrat wrote:



On 13/06/2022 17:44, Daniel Henrique Barboza wrote:

When enabling user created PHBs (a change reverted by commit 9c10d86fee)
we were handling PHBs created by default versus by the user in different
manners. The only difference between these PHBs is that one will have a
valid phb3->chip that is assigned during pnv_chip_power8_realize(),
while the user created needs to search which chip it belongs to.

Aside from that there shouldn't be any difference. Making the default
PHBs behave in line with the user created ones will make it easier to
re-introduce them later on. It will also make the code easier to follow
since we are dealing with them in equal manner.

The first step is to turn chip8->phbs[] into a PnvPHB3 pointer array.
This will allow us to assign user created PHBs into it later on. The way
we initilize the default case is now more in line with that would happen
with the user created case: the object is created, parented by the chip
because pnv_xscom_dt() relies on it, and then assigned to the array.

Signed-off-by: Daniel Henrique Barboza 
---



This patch is more prep work for the user-created device instead of general 
cleanup like the previous ones, but I don't see anything wrong with it. So:

Reviewed-by: Frederic Barrat 



I've been thinking about it and I guess I could do better with this
and the proxy pnv-phb series that is already in v2. What I'm thinking
is:

- crop patches 8-11 from this series. Patches 1-7 would be the prep cleanup
series;

- split the pnv-phb series in two:

  - first series will just introduce the pnv-phb devices and consolidate the
root ports. We're going to deal just with default devices. No consideration
about future user-created devices will be made;

  - a second series would then re-introduce user creatable phbs and root ports.
Patches 8-11 of this series would be handled in this second patch set since it's
closely related to user devices.


Does that sound fair?


Thanks,


Daniel






   Fred




  hw/ppc/pnv.c | 19 ++-
  include/hw/ppc/pnv.h |  6 +-
  2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 5e3323e950..6ce9e94e05 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -660,7 +660,7 @@ static void pnv_chip_power8_pic_print_info(PnvChip *chip, 
Monitor *mon)
  ics_pic_print_info(>psi.ics, mon);
  for (i = 0; i < chip8->num_phbs; i++) {
-    PnvPHB3 *phb3 = >phbs[i];
+    PnvPHB3 *phb3 = chip8->phbs[i];
  pnv_phb3_msi_pic_print_info(>msis, mon);
  ics_pic_print_info(>lsis, mon);
@@ -1149,7 +1149,16 @@ static void pnv_chip_power8_instance_init(Object *obj)
  chip8->num_phbs = pcc->num_phbs;
  for (i = 0; i < chip8->num_phbs; i++) {
-    object_initialize_child(obj, "phb[*]", >phbs[i], TYPE_PNV_PHB3);
+    PnvPHB3 *phb3 = PNV_PHB3(object_new(TYPE_PNV_PHB3));
+
+    /*
+ * We need the chip to parent the PHB to allow the DT
+ * to build correctly (via pnv_xscom_dt()).
+ *
+ * TODO: the PHB should be parented by a PEC device.
+ */
+    object_property_add_child(obj, "phb[*]", OBJECT(phb3));
+    chip8->phbs[i] = phb3;
  }
  }
@@ -1278,7 +1287,7 @@ static void pnv_chip_power8_realize(DeviceState *dev, 
Error **errp)
  /* PHB3 controllers */
  for (i = 0; i < chip8->num_phbs; i++) {
-    PnvPHB3 *phb = >phbs[i];
+    PnvPHB3 *phb = chip8->phbs[i];
  object_property_set_int(OBJECT(phb), "index", i, _fatal);
  object_property_set_int(OBJECT(phb), "chip-id", chip->chip_id,
@@ -1963,7 +1972,7 @@ static ICSState *pnv_ics_get(XICSFabric *xi, int irq)
  }
  for (j = 0; j < chip8->num_phbs; j++) {
-    pnv_ics_get_phb_ics(>phbs[j], );
+    pnv_ics_get_phb_ics(chip8->phbs[j], );
  if (args.ics) {
  return args.ics;
@@ -1996,7 +2005,7 @@ static void pnv_ics_resend(XICSFabric *xi)
  Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]);
  for (j = 0; j < chip8->num_phbs; j++) {
-    PnvPHB3 *phb3 = >phbs[j];
+    PnvPHB3 *phb3 = chip8->phbs[j];
  ics_resend(>lsis);
  ics_resend(ICS(>msis));
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index 033890a23f..11f1089289 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -80,7 +80,11 @@ struct Pnv8Chip {
  PnvHomer homer;
  #define PNV8_CHIP_PHB3_MAX 4
-    PnvPHB3  phbs[PNV8_CHIP_PHB3_MAX];
+    /*
+ * The array is used to allow quick access to the phbs by
+ * pnv_ics_get_child() and pnv_ics_resend_child().
+ */
+    PnvPHB3  *phbs[PNV8_CHIP_PHB3_MAX];
  uint32_t num_phbs;
  XICSFabric    *xics;

Re: [PULL 00/15] Kraxel 20220614 patches


On 6/14/22 05:15, Gerd Hoffmann wrote:

The following changes since commit debd0753663bc89c86f5462a53268f2e3f680f60:

   Merge tag 'pull-testing-next-140622-1' of https://github.com/stsquad/qemu 
into staging (2022-06-13 21:10:57 -0700)

are available in the Git repository at:

   git://git.kraxel.org/qemu tags/kraxel-20220614-pull-request

for you to fetch changes up to b95b56311a0890da0c9f7fc624529c3d7f8dbce0:

   virtio-gpu: Respect UI refresh rate for EDID (2022-06-14 10:34:37 +0200)


usb: add CanoKey device, fixes for ehci + redir
ui: fixes for gtk and cocoa, rework refresh rate
virtio-gpu: scanout flush fix


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/7.1 as 
appropriate.


r~






Akihiko Odaki (4):
   ui/cocoa: Fix poweroff request code
   ui/console: Do not return a value with ui_info
   ui: Deliver refresh rate via QemuUIInfo
   virtio-gpu: Respect UI refresh rate for EDID

Arnout Engelen (1):
   hw/usb/hcd-ehci: fix writeback order

Dongwon Kim (1):
   virtio-gpu: update done only on the scanout associated with rect

Hongren (Zenithal) Zheng (6):
   hw/usb: Add CanoKey Implementation
   hw/usb/canokey: Add trace events
   meson: Add CanoKey
   docs: Add CanoKey documentation
   docs/system/devices/usb: Add CanoKey to USB devices examples
   MAINTAINERS: add myself as CanoKey maintainer

Joelle van Dyne (1):
   usbredir: avoid queuing hello packet on snapshot restore

Volker Rümelin (2):
   ui/gtk-gl-area: implement GL context destruction
   ui/gtk-gl-area: create the requested GL context version

  meson_options.txt|   2 +
  hw/usb/canokey.h |  69 +++
  include/hw/virtio/virtio-gpu.h   |   1 +
  include/ui/console.h |   4 +-
  include/ui/gtk.h |   2 +-
  hw/display/virtio-gpu-base.c |   7 +-
  hw/display/virtio-gpu.c  |   4 +
  hw/display/virtio-vga.c  |   5 +-
  hw/display/xenfb.c   |  14 +-
  hw/usb/canokey.c | 313 +++
  hw/usb/hcd-ehci.c|   5 +-
  hw/usb/redirect.c|   3 +-
  hw/vfio/display.c|   8 +-
  ui/console.c |   6 -
  ui/gtk-egl.c |   4 +-
  ui/gtk-gl-area.c |  42 -
  ui/gtk.c |  45 +++--
  MAINTAINERS  |   8 +
  docs/system/device-emulation.rst |   1 +
  docs/system/devices/canokey.rst  | 168 +
  docs/system/devices/usb.rst  |   4 +
  hw/usb/Kconfig   |   5 +
  hw/usb/meson.build   |   5 +
  hw/usb/trace-events  |  16 ++
  meson.build  |   6 +
  scripts/meson-buildoptions.sh|   3 +
  ui/cocoa.m   |   6 +-
  ui/trace-events  |   2 +
  28 files changed, 707 insertions(+), 51 deletions(-)
  create mode 100644 hw/usb/canokey.h
  create mode 100644 hw/usb/canokey.c
  create mode 100644 docs/system/devices/canokey.rst

Re: [PATCH 06/11] ppc/pnv: make pnv_ics_resend() use chip8->phbs[]





On 6/14/22 06:24, Frederic Barrat wrote:



On 13/06/2022 17:44, Daniel Henrique Barboza wrote:

pnv_ics_resend() is scrolling through all the child objects of the chip
to search for the PHBs. It's faster and simpler to just use the phbs[]
array.

pnv_ics_resend_child() was folded into pnv_ics_resend() since it's too
simple to justify its own function.

Signed-off-by: Daniel Henrique Barboza 
---
  hw/ppc/pnv.c | 22 +++---
  1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 05a8d5034f..d70deffa1d 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1993,28 +1993,20 @@ PnvChip *pnv_get_chip(PnvMachineState *pnv, uint32_t 
chip_id)
  return NULL;
  }
-static int pnv_ics_resend_child(Object *child, void *opaque)
-{
-    PnvPHB3 *phb3 = (PnvPHB3 *) object_dynamic_cast(child, TYPE_PNV_PHB3);
-
-    if (phb3) {
-    ics_resend(>lsis);
-    ics_resend(ICS(>msis));
-    }
-    return 0;
-}
-
  static void pnv_ics_resend(XICSFabric *xi)
  {
  PnvMachineState *pnv = PNV_MACHINE(xi);
-    int i;
+    int i, j;
  for (i = 0; i < pnv->num_chips; i++) {
-    PnvChip *chip = pnv->chips[i];
  Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]);
-    ics_resend(>psi.ics);



That line shouldn't be dropped, right?


ooo. It shouldn't. I'll fix it in the v2.


It didn't break anything I could see though. OS boots with network
with ping 



Daniel



   Fred



-    object_child_foreach(OBJECT(chip), pnv_ics_resend_child, NULL);
+    for (j = 0; j < chip8->num_phbs; j++) {
+    PnvPHB3 *phb3 = >phbs[j];
+
+    ics_resend(>lsis);
+    ics_resend(ICS(>msis));
+    }
  }
  }

Re: [PATCH 4/5] tests/vm: switch CentOS 8 to CentOS 8 Stream

2022-06-14 Thread John Snow

On Tue, Jun 14, 2022 at 5:09 AM Daniel P. Berrangé  wrote:
>
> On Mon, Jun 13, 2022 at 09:50:43PM -0400, John Snow wrote:
> > The old CentOS image didn't work anymore because it was already EOL at
> > the beginning of 2022.
> >
> > Signed-off-by: John Snow 
> > ---
> >  tests/vm/centos | 8 
> >  1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/tests/vm/centos b/tests/vm/centos
> > index be4f6ff2f14..f5bbdecf62d 100755
> > --- a/tests/vm/centos
> > +++ b/tests/vm/centos
> > @@ -1,8 +1,8 @@
> >  #!/usr/bin/env python3
> >  #
> > -# CentOS image
> > +# CentOS 8 Stream image
> >  #
> > -# Copyright 2018 Red Hat Inc.
> > +# Copyright 2018, 2022 Red Hat Inc.
> >  #
> >  # Authors:
> >  #  Fam Zheng 
> > @@ -18,7 +18,7 @@ import basevm
> >  import time
> >
> >  class CentosVM(basevm.BaseVM):
> > -name = "centos"
> > +name = "centos8s"
>
>
> What's the effect of this ?  It feels a little odd to set name to 'centos8s'
> here but have this file still called just 'centos' - I assume the 'name'
> variable was intended to always match the filename
>

Changes the logfile names in ~/.cache/qemu-vm, changes the hostname
config in gen_cloud_init_iso(), not much else.

You're right, though, I shouldn't change it in one place but not the
other ... I'll just leave it as "centos". I felt compelled briefly to
indicate it was "the newer, different CentOS" but with the old one
being EOL I suppose it's easy enough to infer.

--js

Re: [PATCH 3/5] tests/vm: use 'cp' instead of 'ln' for temporary vm images

2022-06-14 Thread John Snow

On Tue, Jun 14, 2022 at 12:40 AM Thomas Huth  wrote:
>
> On 14/06/2022 03.50, John Snow wrote:
> > If the initial setup fails, you've permanently altered the state of the
> > downloaded image in an unknowable way. Use 'cp' like our other test
> > setup scripts do.
> >
> > Signed-off-by: John Snow 
> > ---
> >   tests/vm/centos | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/tests/vm/centos b/tests/vm/centos
> > index 5c7bc1c1a9a..be4f6ff2f14 100755
> > --- a/tests/vm/centos
> > +++ b/tests/vm/centos
> > @@ -34,7 +34,7 @@ class CentosVM(basevm.BaseVM):
> >   def build_image(self, img):
> >   cimg = 
> > self._download_with_cache("https://cloud.centos.org/centos/8/x86_64/images/CentOS-8-GenericCloud-8.3.2011-20201204.2.x86_64.qcow2;)
> >   img_tmp = img + ".tmp"
> > -subprocess.check_call(["ln", "-f", cimg, img_tmp])
> > +subprocess.check_call(['cp', '-f', cimg, img_tmp])
>
> I wonder whether it would make sense to use "qemu-img create -b" instead to
> save some disk space?
>
> Anyway, your patch is certainly already an improvement, so:
>
> Reviewed-by: Thomas Huth 

I wondered the same, but decided to keep a smaller series this time
around. VM tests already use a lot of space, so I doubt this is adding
new constraints that didn't exist before. A more rigorous overhaul may
be in order, but not right now. (It looks like the config file stuff
to override defaults is not necessarily rigorously respected by the
different installer recipes.)

I think the caching of the fully set-up image needs work, too. In
practice we leave the image sitting around, but we seem to always
rebuild it no matter what, so it's not that useful. There's a few
things that can be done here to drastically speed up some things,
but... later.

--js

[PATCH] configure: cleanup -fno-pie detection

2022-06-14 Thread Paolo Bonzini

Place it only inside the 'if test "$pie" = "no"' conditional.

Signed-off-by: Paolo Bonzini 
---
 configure | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/configure b/configure
index f3dcbd10c3..e2b64334b5 100755
--- a/configure
+++ b/configure
@@ -1346,13 +1346,6 @@ static THREAD int tls_var;
 int main(void) { return tls_var; }
 EOF
 
-# Check we support -fno-pie and -no-pie first; we will need the former for
-# building ROMs, and both for everything if --disable-pie is passed.
-if compile_prog "-Werror -fno-pie" "-no-pie"; then
-  CFLAGS_NOPIE="-fno-pie"
-  LDFLAGS_NOPIE="-no-pie"
-fi
-
 if test "$static" = "yes"; then
   if test "$pie" != "no" && compile_prog "-Werror -fPIE -DPIE" "-static-pie"; 
then
 CONFIGURE_CFLAGS="-fPIE -DPIE $CONFIGURE_CFLAGS"
@@ -1365,8 +1358,10 @@ if test "$static" = "yes"; then
 pie="no"
   fi
 elif test "$pie" = "no"; then
-  CONFIGURE_CFLAGS="$CFLAGS_NOPIE $CONFIGURE_CFLAGS"
-  CONFIGURE_LDFLAGS="$LDFLAGS_NOPIE $CONFIGURE_LDFLAGS"
+  if compile_prog "-Werror -fno-pie" "-no-pie"; then
+CONFIGURE_CFLAGS="-fno-pie $CONFIGURE_CFLAGS"
+CONFIGURE_LDFLAGS="-no-pie $CONFIGURE_LDFLAGS"
+  fi
 elif compile_prog "-Werror -fPIE -DPIE" "-pie"; then
   CONFIGURE_CFLAGS="-fPIE -DPIE $CONFIGURE_CFLAGS"
   CONFIGURE_LDFLAGS="-pie $CONFIGURE_LDFLAGS"
-- 
2.36.1

Re: [PATCH v12 00/14] vfio-user server in QEMU

2022-06-14 Thread Jag Raman




> On Jun 14, 2022, at 3:06 AM, Stefan Hajnoczi  wrote:
> 
> On Mon, Jun 13, 2022 at 04:26:20PM -0400, Jagannathan Raman wrote:
>> This is v12 of the server side changes to enable vfio-user in QEMU.
>> 
>> Thanks so much for reviewing this series and sharing your feedback.
>> 
>> We made the following changes in this series:
>> [PATCH v12 13/14] vfio-user: handle device interrupts
>> - Renamed msi_set_irq_state() and msix_set_irq_state() as
>>   msi_set_mask() and msix_set_mask() respectively
>> - Added missing return statement for error case in msi_set_mask()
> 
> Thanks, applied to my block tree:
> https://gitlab.com/stefanha/qemu/commits/block

Thank you very much, Stefan! :)

> 
> Stefan

Re: [PATCH 2/5] tests/qemu-iotests: skip 108 when FUSE is not loaded

2022-06-14 Thread John Snow

On Tue, Jun 14, 2022 at 4:59 AM Daniel P. Berrangé  wrote:
>
> On Tue, Jun 14, 2022 at 06:46:35AM +0200, Thomas Huth wrote:
> > On 14/06/2022 03.50, John Snow wrote:
> > > In certain container environments we may not have FUSE at all, so skip
> > > the test in this circumstance too.
> > >
> > > Signed-off-by: John Snow 
> > > ---
> > >   tests/qemu-iotests/108 | 6 ++
> > >   1 file changed, 6 insertions(+)
> > >
> > > diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108
> > > index 9e923d6a59f..e401c5e9933 100755
> > > --- a/tests/qemu-iotests/108
> > > +++ b/tests/qemu-iotests/108
> > > @@ -60,6 +60,12 @@ if sudo -n losetup &>/dev/null; then
> > >   else
> > >   loopdev=false
> > > +# Check for fuse support in the host environment:
> > > +lsmod | grep fuse &>/dev/null;
> >
> > That doesn't work if fuse has been linked statically into the kernel. Would
> > it make sense to test for /sys/fs/fuse instead?
> >
> > (OTOH, we likely hardly won't run this on statically linked kernels anyway,
> > so it might not matter too much)
>
> But more importantly 'lsmod' may not be installed in our container
> images. So checking /sys/fs/fuse avoids introducing a dep on the
> 'kmod' package.
>
> >
> > > +if [[ $? -ne 0 ]]; then
> >
> > I'd prefer single "[" instead of "[[" ... but since we're requiring bash
> > anyway, it likely doesn't matter.
>
> Or
>
> if  test $? != 0 ; then
>
> >
> > > +_notrun 'No Passwordless sudo nor FUSE kernel module'
> > > +fi
> > > +
> > >   # QSD --export fuse will either yield "Parameter 'id' is missing"
> > >   # or "Invalid parameter 'fuse'", depending on whether there is
> > >   # FUSE support or not.
> >

Good suggestions, thanks!

--js

Re: [External] [PATCH v13 3/8] QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX

2022-06-14 Thread Dr. David Alan Gilbert

* chuang xu (xuchuangxc...@bytedance.com) wrote:
> 
> On 2022/5/13 下午2:28, Leonardo Bras wrote:
> > @@ -557,15 +578,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel 
> > *ioc,
> >   memcpy(CMSG_DATA(cmsg), fds, fdsize);
> >   }
> > +#ifdef QEMU_MSG_ZEROCOPY
> > +if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
> > +sflags = MSG_ZEROCOPY;
> > +}
> > +#endif
> > +
> >retry:
> > -ret = sendmsg(sioc->fd, , 0);
> > +ret = sendmsg(sioc->fd, , sflags);
> >   if (ret <= 0) {
> > -if (errno == EAGAIN) {
> > +switch (errno) {
> > +case EAGAIN:
> >   return QIO_CHANNEL_ERR_BLOCK;
> > -}
> > -if (errno == EINTR) {
> > +case EINTR:
> >   goto retry;
> > +#ifdef QEMU_MSG_ZEROCOPY
> > +case ENOBUFS:
> > +if (sflags & MSG_ZEROCOPY) {
> > +error_setg_errno(errp, errno,
> > + "Process can't lock enough memory for 
> > using MSG_ZEROCOPY");
> > +return -1;
> > +}
> > +break;
> > +#endif
> >   }
> > +
> >   error_setg_errno(errp, errno,
> >"Unable to write to socket");
> >   return -1;
> 
> Hi, Leo.
> 
> There are some other questions I would like to discuss with you.
> 
> I tested the multifd zero_copy migration and found that sometimes even if
> max locked memory of qemu was set to 16GB（much greater than
> `MULTIFD_PACKET_SIZE`）, the error "Process can't lock enough memory for
> using MSG_ZEROCOPY" would still be reported.
> 
> I noticed that the
> doc(https://www.kernel.org/doc/html/v5.12/networking/msg_zerocopy.html) says
> "A zerocopy failure will return -1 with errno ENOBUFS. This happens if the
> socket option was not set, _the socket exceeds its optmem limit_ or the user
> exceeds its ulimit on locked pages."
> 
> I also found that the RFC(https://lwn.net/Articles/715279/) says _"__The
> change to allocate notification skbuffs from optmem requires__ensuring that
> net.core.optmem is at least a few 100KB."_

Interesting.

> On my host,  optmem was initially set to 20KB, I tried to change it to 100KB
> (echo 102400 > /proc/sys/net/core/optmem_max) as the RFC says.Then I tested
> the multifd zero_copy migration repeatedly，and the error disappeared.
> 
> So when sendmsg returns -1 with errno ENOBUFS, should we distinguish between
> error ''socket exceeds optmem limit" and error "user exceeds ulimit on
> locked pages"? Or is there any better way to avoid this problem?

I don't think we can tell which one of them triggered the error; so the
only thing I can suggest is that we document the need for optmem_max
setting; I wonder how we get a better answer than 'a few 100KB'?
I guess it's something like the number of packets inflight *
sizeof(cmsghdr) ?

Dave

> Best Regards,
> 
> chuang xu
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [PATCH 01/11] ppc/pnv: move root port attach to pnv_phb4_realize()





On 6/14/22 09:02, Cédric Le Goater wrote:

On 6/13/22 17:44, Daniel Henrique Barboza wrote:

Creating a root port is something related to the PHB, not the PEC. It
also makes the logic more in line with what pnv-phb3 does.

Signed-off-by: Daniel Henrique Barboza 


Reviewed-by: Cédric Le Goater 

So the root port is back where it was.

Could we avoid the pci_new() and use object_initialize_child() instead ?



We could but then we would need to deal with yet another difference with
default versus user created devices, given that for user devices we can't
initialize_child(). And since we're also unifying the root ports later on
I'd rather wait to see how it turns out when everything is finished.


Tanks,

Daniel



Thanks,

C.



---
  hw/pci-host/pnv_phb4.c | 4 
  hw/pci-host/pnv_phb4_pec.c | 3 ---
  2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 6594016121..23ad8de7ee 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -1547,6 +1547,7 @@ static void pnv_phb4_instance_init(Object *obj)
  static void pnv_phb4_realize(DeviceState *dev, Error **errp)
  {
  PnvPHB4 *phb = PNV_PHB4(dev);
+    PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(phb->pec);
  PCIHostState *pci = PCI_HOST_BRIDGE(dev);
  XiveSource *xsrc = >xsrc;
  int nr_irqs;
@@ -1583,6 +1584,9 @@ static void pnv_phb4_realize(DeviceState *dev, Error 
**errp)
  pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb);
  pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
+    /* Add a single Root port if running with defaults */
+    pnv_phb_attach_root_port(pci, pecc->rp_model);
+
  /* Setup XIVE Source */
  if (phb->big_phb) {
  nr_irqs = PNV_PHB4_MAX_INTs;
diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c
index 8b7e823fa5..c9aaf1c28e 100644
--- a/hw/pci-host/pnv_phb4_pec.c
+++ b/hw/pci-host/pnv_phb4_pec.c
@@ -130,9 +130,6 @@ static void pnv_pec_default_phb_realize(PnvPhb4PecState 
*pec,
  if (!sysbus_realize(SYS_BUS_DEVICE(phb), errp)) {
  return;
  }
-
-    /* Add a single Root port if running with defaults */
-    pnv_phb_attach_root_port(PCI_HOST_BRIDGE(phb), pecc->rp_model);
  }
  static void pnv_pec_realize(DeviceState *dev, Error **errp)

Re: [PATCH] hw/mem/nvdimm: fix error message for 'unarmed' flag

2022-06-14 Thread Igor Mammedov

On Tue, 14 Jun 2022 11:50:43 +0200
David Hildenbrand  wrote:

> On 14.06.22 10:54, Igor Mammedov wrote:
> > On Mon, 13 Jun 2022 16:09:53 +0100
> > Stefan Hajnoczi  wrote:
> >   
> >> On Mon, Jun 13, 2022 at 05:01:10PM +0200, Julia Suvorova wrote:  
> >>> On Tue, May 31, 2022 at 5:32 PM Stefan Hajnoczi  
> >>> wrote:
> 
>  On Tue, May 31, 2022 at 04:51:47PM +0200, Julia Suvorova wrote:
> > In the ACPI specification [1], the 'unarmed' bit is set when a device
> > cannot accept a persistent write. This means that when a memdev is
> > read-only, the 'unarmed' flag must be turned on. The logic is correct,
> > just changing the error message.
> >
> > [1] ACPI NFIT NVDIMM Region Mapping Structure "NVDIMM State Flags" Bit 3
> >
> > Signed-off-by: Julia Suvorova 
> > ---
> >  hw/mem/nvdimm.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> 
>  Reviewed-by: Stefan Hajnoczi 
> >>>
> >>> It seems like Xiao is not active, whose tree should this patch go to?
> 
> Is that a temporary or a permanent thing? Do we know?
> 
> > 
> > Perhaps David can add himself as maintainer (i.e. put it
> > under memory mantanership umbrella) and merge it   
> 
> Maybe it makes sense to combine NVDIMM with pc-dimm.c and
> memory-device.c into a "MEMORY DEVICE" section. Then, remove "hw/mem/*"
> from "ACPI/SMBIOS".
just keep me on supporter list for them so I won't miss
patches that needs reviewing.

> cxl_type3.c, npcm7xx_mc.c and sparse-mem.c in /hw/mem/ are a bit
> different. We could add cxl_type3.c to "Compute Express Link".
> npcm7xx_mc.c and sparse-mem.c should be already covered. 
for cxl I'd add Michael as it's mostly all PCI stuff

Re: [PATCH 2/2] docs: build-platforms: Clarify stance on minor releases and backports

2022-06-14 Thread Andrea Bolognani

On Wed, May 04, 2022 at 09:23:28AM +0100, Daniel P. Berrangé wrote:
> On Wed, May 04, 2022 at 01:01:03AM -0700, Andrea Bolognani wrote:
> > On Wed, Apr 20, 2022 at 09:18:47AM -0700, Andrea Bolognani wrote:
> > > On Wed, Apr 20, 2022 at 05:15:08PM +0100, Daniel P. Berrangé wrote:
> > > > On Wed, Apr 20, 2022 at 06:03:11PM +0200, Andrea Bolognani wrote:
> > > > > These changes match those made in the following libvirt commits:
> > > > >
> > > > >   2ac78307af docs: Clarify our stance on backported packages
> > > > >   78cffd450a docs: Spell out our policy concerning minor releases
> > > > >
> > > > > Since QEMU's platform support policy is based on libvirt's, it
> > > > > makes sense to mirror these recent changes made to the latter.
> > > > >
> > > > > The policy is not altered significantly - we're simply spelling
> > > > > out some rules that were likely already being implicitly
> > > > > enforced.
> > > >
> > > > Indeed, I think that's basically defacto the case already.
> > > >
> > > > Reviewed-by: Daniel P. Berrangé 
> > >
> > > Thanks! Are you going to bring these in through one of your trees, or
> > > do I need to bug someone else so that they will pick them up? :)
> >
> > I see these haven't gone in yet. Anything I can/should do to make
> > that happen?
>
> The tragedy of QEMU not having a central docs maintainer. I'll queue
> this one for my next pull request.

Still doesn't seem to have been merged. Not pressuring you or
anything, just making sure it doesn't slip through the cracks :)

-- 
Andrea Bolognani / Red Hat / Virtualization

Re: [PATCH 1/2] Trivial: 3 char repeat typos





On 6/14/22 07:40, Dr. David Alan Gilbert (git) wrote:

From: "Dr. David Alan Gilbert" 

Inspired by Julia Lawall's fixing of Linux
kernel comments, I looked at qemu, although I did it manually.

Signed-off-by: Dr. David Alan Gilbert 
---


Reviewed-by: Daniel Henrique Barboza 


  hw/intc/openpic.c| 2 +-
  hw/net/imx_fec.c | 2 +-
  hw/pci/pcie_aer.c| 2 +-
  hw/pci/shpc.c| 3 ++-
  hw/ppc/spapr_caps.c  | 2 +-
  hw/scsi/spapr_vscsi.c| 2 +-
  qapi/net.json| 2 +-
  tools/virtiofsd/passthrough_ll.c | 2 +-
  ui/input.c   | 2 +-
  9 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c
index 49504e740f..b0787e8ee7 100644
--- a/hw/intc/openpic.c
+++ b/hw/intc/openpic.c
@@ -729,7 +729,7 @@ static void openpic_tmr_set_tmr(OpenPICTimer *tmr, uint32_t 
val, bool enabled)
  }
  
  /*

- * Returns the currrent tccr value, i.e., timer value (in clocks) with
+ * Returns the current tccr value, i.e., timer value (in clocks) with
   * appropriate TOG.
   */
  static uint64_t openpic_tmr_get_timer(OpenPICTimer *tmr)
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
index 0db9aaf76a..8c11b237de 100644
--- a/hw/net/imx_fec.c
+++ b/hw/net/imx_fec.c
@@ -438,7 +438,7 @@ static void imx_eth_update(IMXFECState *s)
   *   assignment fail.
   *
   * To ensure that all versions of Linux work, generate ENET_INT_MAC
- * interrrupts on both interrupt lines. This should be changed if and when
+ * interrupts on both interrupt lines. This should be changed if and when
   * qemu supports IOMUX.
   */
  if (s->regs[ENET_EIR] & s->regs[ENET_EIMR] &
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 92bd0530dd..eff62f3945 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -323,7 +323,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const 
PCIEAERMsg *msg)
   */
  }
  
-/* Errro Message Received: Root Error Status register */

+/* Error Message Received: Root Error Status register */
  switch (msg->severity) {
  case PCI_ERR_ROOT_CMD_COR_EN:
  if (root_status & PCI_ERR_ROOT_COR_RCV) {
diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
index f822f18b98..e71f3a7483 100644
--- a/hw/pci/shpc.c
+++ b/hw/pci/shpc.c
@@ -480,7 +480,8 @@ static const MemoryRegionOps shpc_mmio_ops = {
  .endianness = DEVICE_LITTLE_ENDIAN,
  .valid = {
  /* SHPC ECN requires dword accesses, but the original 1.0 spec 
doesn't.
- * It's easier to suppport all sizes than worry about it. */
+ * It's easier to support all sizes than worry about it.
+ */
  .min_access_size = 1,
  .max_access_size = 4,
  },
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 655ab856a0..b4283055c1 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -553,7 +553,7 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, 
uint8_t val,
   * instruction is a harmless no-op.  It won't correctly
   * implement the cache count flush *but* if we have
   * count-cache-disabled in the host, that flush is
- * unnnecessary.  So, specifically allow this case.  This
+ * unnecessary.  So, specifically allow this case.  This
   * allows us to have better performance on POWER9 DD2.3,
   * while still working on POWER9 DD2.2 and POWER8 host
   * cpus.
diff --git a/hw/scsi/spapr_vscsi.c b/hw/scsi/spapr_vscsi.c
index a07a8e1523..e320ccaa23 100644
--- a/hw/scsi/spapr_vscsi.c
+++ b/hw/scsi/spapr_vscsi.c
@@ -1013,7 +1013,7 @@ static int vscsi_send_capabilities(VSCSIState *s, 
vscsi_req *req)
  }
  
  /*

- * Current implementation does not suppport any migration or
+ * Current implementation does not support any migration or
   * reservation capabilities. Construct the response telling the
   * guest not to use them.
   */
diff --git a/qapi/net.json b/qapi/net.json
index d6f7cfd4d6..9af11e9a3b 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -298,7 +298,7 @@
  #
  # @udp: use the udp version of l2tpv3 encapsulation
  #
-# @cookie64: use 64 bit coookies
+# @cookie64: use 64 bit cookies
  #
  # @counter: have sequence counter
  #
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index b15c631ca5..7a73dfcce9 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -2319,7 +2319,7 @@ static int do_lo_create(fuse_req_t req, struct lo_inode 
*parent_inode,
   * If security.selinux has not been remapped and selinux is enabled,
   * use fscreate to set context before file creation. If not, use
   * tmpfile method for regular files. Otherwise fallback to
- * non-atomic method of file creation and xattr settting.
+ * non-atomic method of file creation and xattr

Re: [External] [PATCH v13 3/8] QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX

2022-06-14 Thread chuang xu



On 2022/5/13 下午2:28, Leonardo Bras wrote:

@@ -557,15 +578,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
  memcpy(CMSG_DATA(cmsg), fds, fdsize);
  }
  
+#ifdef QEMU_MSG_ZEROCOPY

+if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+sflags = MSG_ZEROCOPY;
+}
+#endif
+
   retry:
-ret = sendmsg(sioc->fd, , 0);
+ret = sendmsg(sioc->fd, , sflags);
  if (ret <= 0) {
-if (errno == EAGAIN) {
+switch (errno) {
+case EAGAIN:
  return QIO_CHANNEL_ERR_BLOCK;
-}
-if (errno == EINTR) {
+case EINTR:
  goto retry;
+#ifdef QEMU_MSG_ZEROCOPY
+case ENOBUFS:
+if (sflags & MSG_ZEROCOPY) {
+error_setg_errno(errp, errno,
+ "Process can't lock enough memory for using 
MSG_ZEROCOPY");
+return -1;
+}
+break;
+#endif
  }
+
  error_setg_errno(errp, errno,
   "Unable to write to socket");
  return -1;


Hi, Leo.

There are some other questions I would like to discuss with you.

I tested the multifd zero_copy migration and found that sometimes even 
if max locked memory of qemu was set to 16GB（much greater than 
`MULTIFD_PACKET_SIZE`）, the error "Process can't lock enough memory for 
using MSG_ZEROCOPY" would still be reported.


I noticed that the 
doc(https://www.kernel.org/doc/html/v5.12/networking/msg_zerocopy.html) 
says "A zerocopy failure will return -1 with errno ENOBUFS. This happens 
if the socket option was not set, _the socket exceeds its optmem limit_ 
or the user exceeds its ulimit on locked pages."


I also found that the RFC(https://lwn.net/Articles/715279/) says _"__The 
change to allocate notification skbuffs from optmem requires__ensuring 
that net.core.optmem is at least a few 100KB."_


On my host,  optmem was initially set to 20KB, I tried to change it to 
100KB (echo 102400 > /proc/sys/net/core/optmem_max) as the RFC says.Then 
I tested the multifd zero_copy migration repeatedly，and the error 
disappeared.


So when sendmsg returns -1 with errno ENOBUFS, should we distinguish 
between error ''socket exceeds optmem limit" and error "user exceeds 
ulimit on locked pages"? Or is there any better way to avoid this problem?


Best Regards,

chuang xu

[PULL 14/15] ui: Deliver refresh rate via QemuUIInfo

From: Akihiko Odaki 

This change adds a new member, refresh_rate to QemuUIInfo in
include/ui/console.h. It represents the refresh rate of the
physical display backend, and it is more appropriate than
GUI update interval as the refresh rate which the emulated device
reports:
- sdl may set GUI update interval shorter than the refresh rate
  of the physical display to respond to user-generated events.
- sdl and vnc aggressively changes GUI update interval, but
  a guests is typically not designed to respond to frequent
  refresh rate changes, or frequent "display mode" changes in
  general. The frequency of refresh rate changes of the physical
  display backend matches better to the guest's expectation.

QemuUIInfo also has other members representing "display mode",
which makes it suitable for refresh rate representation. It has
a throttling of update notifications, and prevents frequent changes
of the display mode.

Signed-off-by: Akihiko Odaki 
Message-Id: <20220226115516.59830-3-akihiko.od...@gmail.com>
Signed-off-by: Gerd Hoffmann 
---
 include/ui/console.h |  2 +-
 include/ui/gtk.h |  2 +-
 hw/display/xenfb.c   | 14 +++---
 ui/console.c |  6 --
 ui/gtk-egl.c |  4 ++--
 ui/gtk-gl-area.c |  3 +--
 ui/gtk.c | 45 +---
 7 files changed, 42 insertions(+), 34 deletions(-)

diff --git a/include/ui/console.h b/include/ui/console.h
index 642d6f5248cf..b64d82436097 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -139,6 +139,7 @@ typedef struct QemuUIInfo {
 int   yoff;
 uint32_t  width;
 uint32_t  height;
+uint32_t  refresh_rate;
 } QemuUIInfo;
 
 /* cursor data format is 32bit RGBA */
@@ -431,7 +432,6 @@ typedef struct GraphicHwOps {
 void (*gfx_update)(void *opaque);
 bool gfx_update_async; /* if true, calls graphic_hw_update_done() */
 void (*text_update)(void *opaque, console_ch_t *text);
-void (*update_interval)(void *opaque, uint64_t interval);
 void (*ui_info)(void *opaque, uint32_t head, QemuUIInfo *info);
 void (*gl_block)(void *opaque, bool block);
 } GraphicHwOps;
diff --git a/include/ui/gtk.h b/include/ui/gtk.h
index 101b147d1b98..ae0f53740d19 100644
--- a/include/ui/gtk.h
+++ b/include/ui/gtk.h
@@ -155,7 +155,7 @@ extern bool gtk_use_gl_area;
 
 /* ui/gtk.c */
 void gd_update_windowsize(VirtualConsole *vc);
-int gd_monitor_update_interval(GtkWidget *widget);
+void gd_update_monitor_refresh_rate(VirtualConsole *vc, GtkWidget *widget);
 void gd_hw_gl_flushed(void *vc);
 
 /* ui/gtk-egl.c */
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index cea10fe3c780..50857cd97a0b 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -777,16 +777,24 @@ static void xenfb_update(void *opaque)
 xenfb->up_fullscreen = 0;
 }
 
-static void xenfb_update_interval(void *opaque, uint64_t interval)
+static void xenfb_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info)
 {
 struct XenFB *xenfb = opaque;
+uint32_t refresh_rate;
 
 if (xenfb->feature_update) {
 #ifdef XENFB_TYPE_REFRESH_PERIOD
 if (xenfb_queue_full(xenfb)) {
 return;
 }
-xenfb_send_refresh_period(xenfb, interval);
+
+refresh_rate = info->refresh_rate;
+if (!refresh_rate) {
+refresh_rate = 75;
+}
+
+/* T = 1 / f = 1 [s*Hz] / f = 1000*1000 [ms*mHz] / f */
+xenfb_send_refresh_period(xenfb, 1000 * 1000 / refresh_rate);
 #endif
 }
 }
@@ -983,5 +991,5 @@ struct XenDevOps xen_framebuffer_ops = {
 static const GraphicHwOps xenfb_ops = {
 .invalidate  = xenfb_invalidate,
 .gfx_update  = xenfb_update,
-.update_interval = xenfb_update_interval,
+.ui_info = xenfb_ui_info,
 };
diff --git a/ui/console.c b/ui/console.c
index 36c80cd1de85..9331b85203a0 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -160,7 +160,6 @@ static void gui_update(void *opaque)
 uint64_t dcl_interval;
 DisplayState *ds = opaque;
 DisplayChangeListener *dcl;
-QemuConsole *con;
 
 ds->refreshing = true;
 dpy_refresh(ds);
@@ -175,11 +174,6 @@ static void gui_update(void *opaque)
 }
 if (ds->update_interval != interval) {
 ds->update_interval = interval;
-QTAILQ_FOREACH(con, , next) {
-if (con->hw_ops->update_interval) {
-con->hw_ops->update_interval(con->hw, interval);
-}
-}
 trace_console_refresh(interval);
 }
 ds->last_update = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
diff --git a/ui/gtk-egl.c b/ui/gtk-egl.c
index e3bd4bc27431..b5bffbab2522 100644
--- a/ui/gtk-egl.c
+++ b/ui/gtk-egl.c
@@ -140,8 +140,8 @@ void gd_egl_refresh(DisplayChangeListener *dcl)
 {
 VirtualConsole *vc = container_of(dcl, VirtualConsole, gfx.dcl);
 
-vc->gfx.dcl.update_interval = gd_monitor_update_interval(
-vc->window ? vc->window : vc->gfx.drawing_area);
+gd_update_monitor_refresh_rate(
+vc,

[PULL 02/15] ui/gtk-gl-area: create the requested GL context version

From: Volker Rümelin 

Since about 2018 virglrenderer (commit fa835b0f88 "vrend: don't
hardcode context version") tries to open the highest available GL
context version. This is done by creating the known GL context
versions from the highest to the lowest until (*create_gl_context)
returns a context != NULL.

This does not work properly with
the current QEMU gd_gl_area_create_context() function, because
gdk_gl_context_realize() on Wayland creates a version 3.0 legacy
context if the requested GL context version can't be created.

In order for virglrenderer to find the highest available GL
context version, return NULL if the created context version is
lower than the requested version.

This fixes the following error:
QEMU started with -device virtio-vga-gl -display gtk,gl=on.
Under Wayland, the guest window remains black and the following
information can be seen on the host.

gl_version 30 - compat profile
(qemu:5978): Gdk-WARNING **: 16:19:01.533:
  gdk_gl_context_set_required_version
  - GL context versions less than 3.2 are not supported.

(qemu:5978): Gdk-WARNING **: 16:19:01.537:
  gdk_gl_context_set_required_version -
  GL context versions less than 3.2 are not supported.

(qemu:5978): Gdk-WARNING **: 16:19:01.554:
  gdk_gl_context_set_required_version -
  GL context versions less than 3.2 are not supported.
vrend_renderer_fill_caps: Entering with stale GL error: 1282

To reproduce this error, an OpenGL driver is required on the host
that doesn't have the latest OpenGL extensions fully implemented.
An example for this is the Intel i965 driver on a Haswell processor.

Signed-off-by: Volker Rümelin 
Message-Id: <20220605085131.7711-2-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 ui/gtk-gl-area.c | 31 ++-
 ui/trace-events  |  1 +
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c
index 0e20ea031d34..2e0129c28cd4 100644
--- a/ui/gtk-gl-area.c
+++ b/ui/gtk-gl-area.c
@@ -170,6 +170,23 @@ void gd_gl_area_switch(DisplayChangeListener *dcl,
 }
 }
 
+static int gd_cmp_gl_context_version(int major, int minor, QEMUGLParams 
*params)
+{
+if (major > params->major_ver) {
+return 1;
+}
+if (major < params->major_ver) {
+return -1;
+}
+if (minor > params->minor_ver) {
+return 1;
+}
+if (minor < params->minor_ver) {
+return -1;
+}
+return 0;
+}
+
 QEMUGLContext gd_gl_area_create_context(DisplayGLCtx *dgc,
 QEMUGLParams *params)
 {
@@ -177,8 +194,8 @@ QEMUGLContext gd_gl_area_create_context(DisplayGLCtx *dgc,
 GdkWindow *window;
 GdkGLContext *ctx;
 GError *err = NULL;
+int major, minor;
 
-gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area));
 window = gtk_widget_get_window(vc->gfx.drawing_area);
 ctx = gdk_window_create_gl_context(window, );
 if (err) {
@@ -196,6 +213,18 @@ QEMUGLContext gd_gl_area_create_context(DisplayGLCtx *dgc,
 g_clear_object();
 return NULL;
 }
+
+gdk_gl_context_make_current(ctx);
+gdk_gl_context_get_version(ctx, , );
+gdk_gl_context_clear_current();
+gtk_gl_area_make_current(GTK_GL_AREA(vc->gfx.drawing_area));
+
+if (gd_cmp_gl_context_version(major, minor, params) == -1) {
+/* created ctx version < requested version */
+g_clear_object();
+}
+
+trace_gd_gl_area_create_context(ctx, params->major_ver, params->minor_ver);
 return ctx;
 }
 
diff --git a/ui/trace-events b/ui/trace-events
index 1040ba0f88c7..a922f00e10b4 100644
--- a/ui/trace-events
+++ b/ui/trace-events
@@ -26,6 +26,7 @@ gd_key_event(const char *tab, int gdk_keycode, int qkeycode, 
const char *action)
 gd_grab(const char *tab, const char *device, const char *reason) "tab=%s, 
dev=%s, reason=%s"
 gd_ungrab(const char *tab, const char *device) "tab=%s, dev=%s"
 gd_keymap_windowing(const char *name) "backend=%s"
+gd_gl_area_create_context(void *ctx, int major, int minor) "ctx=%p, major=%d, 
minor=%d"
 gd_gl_area_destroy_context(void *ctx, void *current_ctx) "ctx=%p, 
current_ctx=%p"
 
 # vnc-auth-sasl.c
-- 
2.36.1

Re: [PATCH] target/ppc: cpu_init: Clean up stop state on cpu reset

2022-06-14 Thread Fabiano Rosas

Frederic Barrat  writes:

> The 'resume_as_sreset' attribute of a cpu can be set when a thread is
> entering a stop state on ppc books. It causes the thread to be
> re-routed to vector 0x100 when woken up by an exception. So it must be
> cleaned on reset or a thread might be re-routed unexpectedly after a
> reset, when it was not in a stop state and/or when the appropriate
> exception handler isn't set up yet.
>
> Signed-off-by: Frederic Barrat 

Reviewed-by: Fabiano Rosas

[PULL 11/15] usbredir: avoid queuing hello packet on snapshot restore

From: Joelle van Dyne 

When launching QEMU with "-loadvm", usbredir_create_parser() should avoid
setting up the hello packet (just as with "-incoming". On the latest version
of libusbredir, usbredirparser_unserialize() will return error if the parser
is not "pristine."

Signed-off-by: Joelle van Dyne 
Message-Id: <20220507041850.98716-...@getutm.app>
Signed-off-by: Gerd Hoffmann 
---
 hw/usb/redirect.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index fd7df599bc0b..1bd30efc3ef0 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -1280,7 +1280,8 @@ static void usbredir_create_parser(USBRedirDevice *dev)
 }
 #endif
 
-if (runstate_check(RUN_STATE_INMIGRATE)) {
+if (runstate_check(RUN_STATE_INMIGRATE) ||
+runstate_check(RUN_STATE_PRELAUNCH)) {
 flags |= usbredirparser_fl_no_hello;
 }
 usbredirparser_init(dev->parser, VERSION, caps, USB_REDIR_CAPS_SIZE,
-- 
2.36.1

[PULL 12/15] virtio-gpu: update done only on the scanout associated with rect

From: Dongwon Kim 

It only needs to update the scanouts containing the rect area
coming with the resource-flush request from the guest.

Cc: Gerd Hoffmann 
Cc: Vivek Kasireddy 
Signed-off-by: Dongwon Kim 
Message-Id: <20220505214030.4261-1-dongwon@intel.com>
Signed-off-by: Gerd Hoffmann 
---
 hw/display/virtio-gpu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index cd4a56056fd9..55c6dd576318 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -514,6 +514,9 @@ static void virtio_gpu_resource_flush(VirtIOGPU *g,
 for (i = 0; i < g->parent_obj.conf.max_outputs; i++) {
 scanout = >parent_obj.scanout[i];
 if (scanout->resource_id == res->resource_id &&
+rf.r.x >= scanout->x && rf.r.y >= scanout->y &&
+rf.r.x + rf.r.width <= scanout->x + scanout->width &&
+rf.r.y + rf.r.height <= scanout->y + scanout->height &&
 console_has_gl(scanout->con)) {
 dpy_gl_update(scanout->con, 0, 0, scanout->width,
   scanout->height);
-- 
2.36.1

Re: [PULL 00/16] Kraxel 20220613 patches

On Mon, Jun 13, 2022 at 08:52:21AM -0700, Richard Henderson wrote:
> On 6/13/22 04:36, Gerd Hoffmann wrote:
> > The following changes since commit dcb40541ebca7ec98a14d461593b3cd7282b4fac:
> > 
> >Merge tag 'mips-20220611' of https://github.com/philmd/qemu into staging 
> > (2022-06-11 21:13:27 -0700)
> > 
> > are available in the Git repository at:
> > 
> >git://git.kraxel.org/qemu tags/kraxel-20220613-pull-request
> > 
> > for you to fetch changes up to 23b87f7a3a13e93e248eef8a4b7257548855a620:
> > 
> >ui: move 'pc-bios/keymaps' to 'ui/keymaps' (2022-06-13 10:59:25 +0200)
> > 
> > 
> > usb: add CanoKey device, fixes for ehci + redir
> > ui: fixes for gtk and cocoa, move keymaps (v2), rework refresh rate
> > virtio-gpu: scanout flush fix
> 
> This doesn't even configure:
> 
> ../src/ui/keymaps/meson.build:55:4: ERROR: File ar does not exist.

dropped keymaps patch for now, new version sent.

take care,
  Gerd

[PULL 05/15] hw/usb/canokey: Add trace events

From: "Hongren (Zenithal) Zheng" 

Signed-off-by: Hongren (Zenithal) Zheng 
Message-Id: 
Signed-off-by: Gerd Hoffmann 
---
 hw/usb/canokey.c| 13 +
 hw/usb/trace-events | 16 
 2 files changed, 29 insertions(+)

diff --git a/hw/usb/canokey.c b/hw/usb/canokey.c
index 6cb8b7cdb089..4a08b1cbd776 100644
--- a/hw/usb/canokey.c
+++ b/hw/usb/canokey.c
@@ -14,6 +14,7 @@
 #include "qapi/error.h"
 #include "hw/usb.h"
 #include "hw/qdev-properties.h"
+#include "trace.h"
 #include "desc.h"
 #include "canokey.h"
 
@@ -66,6 +67,7 @@ static const USBDesc desc_canokey = {
  */
 int canokey_emu_stall_ep(void *base, uint8_t ep)
 {
+trace_canokey_emu_stall_ep(ep);
 CanoKeyState *key = base;
 uint8_t ep_in = CANOKEY_EP_IN(ep); /* INTR IN has ep 129 */
 key->ep_in_size[ep_in] = 0;
@@ -75,6 +77,7 @@ int canokey_emu_stall_ep(void *base, uint8_t ep)
 
 int canokey_emu_set_address(void *base, uint8_t addr)
 {
+trace_canokey_emu_set_address(addr);
 CanoKeyState *key = base;
 key->dev.addr = addr;
 return 0;
@@ -83,6 +86,7 @@ int canokey_emu_set_address(void *base, uint8_t addr)
 int canokey_emu_prepare_receive(
 void *base, uint8_t ep, uint8_t *pbuf, uint16_t size)
 {
+trace_canokey_emu_prepare_receive(ep, size);
 CanoKeyState *key = base;
 key->ep_out[ep] = pbuf;
 key->ep_out_size[ep] = size;
@@ -92,6 +96,7 @@ int canokey_emu_prepare_receive(
 int canokey_emu_transmit(
 void *base, uint8_t ep, const uint8_t *pbuf, uint16_t size)
 {
+trace_canokey_emu_transmit(ep, size);
 CanoKeyState *key = base;
 uint8_t ep_in = CANOKEY_EP_IN(ep); /* INTR IN has ep 129 */
 memcpy(key->ep_in[ep_in] + key->ep_in_size[ep_in],
@@ -125,6 +130,7 @@ uint32_t canokey_emu_get_rx_data_size(void *base, uint8_t 
ep)
  */
 static void canokey_handle_reset(USBDevice *dev)
 {
+trace_canokey_handle_reset();
 CanoKeyState *key = CANOKEY(dev);
 for (int i = 0; i != CANOKEY_EP_NUM; ++i) {
 key->ep_in_state[i] = CANOKEY_EP_IN_WAIT;
@@ -137,6 +143,7 @@ static void canokey_handle_reset(USBDevice *dev)
 static void canokey_handle_control(USBDevice *dev, USBPacket *p,
int request, int value, int index, int length, uint8_t *data)
 {
+trace_canokey_handle_control_setup(request, value, index, length);
 CanoKeyState *key = CANOKEY(dev);
 
 canokey_emu_setup(request, value, index, length);
@@ -144,6 +151,7 @@ static void canokey_handle_control(USBDevice *dev, 
USBPacket *p,
 uint32_t dir_in = request & DeviceRequest;
 if (!dir_in) {
 /* OUT */
+trace_canokey_handle_control_out();
 if (key->ep_out[0] != NULL) {
 memcpy(key->ep_out[0], data, length);
 }
@@ -163,6 +171,7 @@ static void canokey_handle_control(USBDevice *dev, 
USBPacket *p,
 case CANOKEY_EP_IN_READY:
 memcpy(data, key->ep_in[0], key->ep_in_size[0]);
 p->actual_length = key->ep_in_size[0];
+trace_canokey_handle_control_in(p->actual_length);
 /* reset state */
 key->ep_in_state[0] = CANOKEY_EP_IN_WAIT;
 key->ep_in_size[0] = 0;
@@ -182,6 +191,7 @@ static void canokey_handle_data(USBDevice *dev, USBPacket 
*p)
 uint32_t out_len;
 switch (p->pid) {
 case USB_TOKEN_OUT:
+trace_canokey_handle_data_out(ep_out, p->iov.size);
 usb_packet_copy(p, key->ep_out_buffer[ep_out], p->iov.size);
 out_pos = 0;
 while (out_pos != p->iov.size) {
@@ -226,6 +236,7 @@ static void canokey_handle_data(USBDevice *dev, USBPacket 
*p)
 key->ep_in_size[ep_in] = 0;
 key->ep_in_pos[ep_in] = 0;
 }
+trace_canokey_handle_data_in(ep_in, in_len);
 break;
 }
 break;
@@ -237,6 +248,7 @@ static void canokey_handle_data(USBDevice *dev, USBPacket 
*p)
 
 static void canokey_realize(USBDevice *base, Error **errp)
 {
+trace_canokey_realize();
 CanoKeyState *key = CANOKEY(base);
 
 if (key->file == NULL) {
@@ -260,6 +272,7 @@ static void canokey_realize(USBDevice *base, Error **errp)
 
 static void canokey_unrealize(USBDevice *base)
 {
+trace_canokey_unrealize();
 }
 
 static Property canokey_properties[] = {
diff --git a/hw/usb/trace-events b/hw/usb/trace-events
index 9773cb53300d..914ca7166829 100644
--- a/hw/usb/trace-events
+++ b/hw/usb/trace-events
@@ -345,3 +345,19 @@ usb_serial_set_baud(int bus, int addr, int baud) "dev 
%d:%u baud rate %d"
 usb_serial_set_data(int bus, int addr, int parity, int data, int stop) "dev 
%d:%u parity %c, data bits %d, stop bits %d"
 usb_serial_set_flow_control(int bus, int addr, int index) "dev %d:%u flow 
control %d"
 usb_serial_set_xonxoff(int bus, int addr, uint8_t xon, uint8_t xoff) "dev 
%d:%u xon 0x%x xoff 0x%x"
+
+# canokey.c
+canokey_emu_stall_ep(uint8_t ep) "ep %d"
+canokey_emu_set_address(uint8_t addr) "addr %d"
+canokey_emu_prepare_receive(uint8_t ep, uint16_t size) "ep %d size %d"
+canokey_emu_transmit(uint8_t

[PULL 08/15] docs/system/devices/usb: Add CanoKey to USB devices examples

From: "Hongren (Zenithal) Zheng" 

Signed-off-by: Hongren (Zenithal) Zheng 
Message-Id: 
Signed-off-by: Gerd Hoffmann 
---
 docs/system/devices/usb.rst | 4 
 1 file changed, 4 insertions(+)

diff --git a/docs/system/devices/usb.rst b/docs/system/devices/usb.rst
index afb7d6c2268d..872d9167589b 100644
--- a/docs/system/devices/usb.rst
+++ b/docs/system/devices/usb.rst
@@ -199,6 +199,10 @@ option or the ``device_add`` monitor command. Available 
devices are:
 ``u2f-{emulated,passthru}``
Universal Second Factor device
 
+``canokey``
+   An Open-source Secure Key implementing FIDO2, OpenPGP, PIV and more.
+   For more information, see :ref:`canokey`.
+
 Physical port addressing
 
 
-- 
2.36.1

[PULL 13/15] ui/console: Do not return a value with ui_info

From: Akihiko Odaki 

The returned value is not used and misleading.

Signed-off-by: Akihiko Odaki 
Message-Id: <20220226115516.59830-2-akihiko.od...@gmail.com>
Signed-off-by: Gerd Hoffmann 
---
 include/ui/console.h | 2 +-
 hw/display/virtio-gpu-base.c | 6 +++---
 hw/display/virtio-vga.c  | 5 ++---
 hw/vfio/display.c| 8 +++-
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/include/ui/console.h b/include/ui/console.h
index c44b28a972ca..642d6f5248cf 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -432,7 +432,7 @@ typedef struct GraphicHwOps {
 bool gfx_update_async; /* if true, calls graphic_hw_update_done() */
 void (*text_update)(void *opaque, console_ch_t *text);
 void (*update_interval)(void *opaque, uint64_t interval);
-int (*ui_info)(void *opaque, uint32_t head, QemuUIInfo *info);
+void (*ui_info)(void *opaque, uint32_t head, QemuUIInfo *info);
 void (*gl_block)(void *opaque, bool block);
 } GraphicHwOps;
 
diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c
index 790cec333c8c..b21d6e5b0be8 100644
--- a/hw/display/virtio-gpu-base.c
+++ b/hw/display/virtio-gpu-base.c
@@ -69,12 +69,12 @@ static void virtio_gpu_notify_event(VirtIOGPUBase *g, 
uint32_t event_type)
 virtio_notify_config(>parent_obj);
 }
 
-static int virtio_gpu_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info)
+static void virtio_gpu_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info)
 {
 VirtIOGPUBase *g = opaque;
 
 if (idx >= g->conf.max_outputs) {
-return -1;
+return;
 }
 
 g->req_state[idx].x = info->xoff;
@@ -92,7 +92,7 @@ static int virtio_gpu_ui_info(void *opaque, uint32_t idx, 
QemuUIInfo *info)
 
 /* send event to guest */
 virtio_gpu_notify_event(g, VIRTIO_GPU_EVENT_DISPLAY);
-return 0;
+return;
 }
 
 static void
diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
index c206b5da384b..4dcb34c4a740 100644
--- a/hw/display/virtio-vga.c
+++ b/hw/display/virtio-vga.c
@@ -47,15 +47,14 @@ static void virtio_vga_base_text_update(void *opaque, 
console_ch_t *chardata)
 }
 }
 
-static int virtio_vga_base_ui_info(void *opaque, uint32_t idx, QemuUIInfo 
*info)
+static void virtio_vga_base_ui_info(void *opaque, uint32_t idx, QemuUIInfo 
*info)
 {
 VirtIOVGABase *vvga = opaque;
 VirtIOGPUBase *g = vvga->vgpu;
 
 if (g->hw_ops->ui_info) {
-return g->hw_ops->ui_info(g, idx, info);
+g->hw_ops->ui_info(g, idx, info);
 }
-return -1;
 }
 
 static void virtio_vga_base_gl_block(void *opaque, bool block)
diff --git a/hw/vfio/display.c b/hw/vfio/display.c
index 89bc90508fb8..78f4d82c1c35 100644
--- a/hw/vfio/display.c
+++ b/hw/vfio/display.c
@@ -106,14 +106,14 @@ err:
 return;
 }
 
-static int vfio_display_edid_ui_info(void *opaque, uint32_t idx,
- QemuUIInfo *info)
+static void vfio_display_edid_ui_info(void *opaque, uint32_t idx,
+  QemuUIInfo *info)
 {
 VFIOPCIDevice *vdev = opaque;
 VFIODisplay *dpy = vdev->dpy;
 
 if (!dpy->edid_regs) {
-return 0;
+return;
 }
 
 if (info->width && info->height) {
@@ -121,8 +121,6 @@ static int vfio_display_edid_ui_info(void *opaque, uint32_t 
idx,
 } else {
 vfio_display_edid_update(vdev, false, 0, 0);
 }
-
-return 0;
 }
 
 static void vfio_display_edid_init(VFIOPCIDevice *vdev)
-- 
2.36.1

[PULL 15/15] virtio-gpu: Respect UI refresh rate for EDID

From: Akihiko Odaki 

Signed-off-by: Akihiko Odaki 
Message-Id: <20220226115516.59830-4-akihiko.od...@gmail.com>
Signed-off-by: Gerd Hoffmann 
---
 include/hw/virtio/virtio-gpu.h | 1 +
 hw/display/virtio-gpu-base.c   | 1 +
 hw/display/virtio-gpu.c| 1 +
 3 files changed, 3 insertions(+)

diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index afff9e158e31..2e28507efe21 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -80,6 +80,7 @@ struct virtio_gpu_scanout {
 struct virtio_gpu_requested_state {
 uint16_t width_mm, height_mm;
 uint32_t width, height;
+uint32_t refresh_rate;
 int x, y;
 };
 
diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c
index b21d6e5b0be8..a29f191aa82e 100644
--- a/hw/display/virtio-gpu-base.c
+++ b/hw/display/virtio-gpu-base.c
@@ -79,6 +79,7 @@ static void virtio_gpu_ui_info(void *opaque, uint32_t idx, 
QemuUIInfo *info)
 
 g->req_state[idx].x = info->xoff;
 g->req_state[idx].y = info->yoff;
+g->req_state[idx].refresh_rate = info->refresh_rate;
 g->req_state[idx].width = info->width;
 g->req_state[idx].height = info->height;
 g->req_state[idx].width_mm = info->width_mm;
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 55c6dd576318..20cc703dcc6e 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -217,6 +217,7 @@ virtio_gpu_generate_edid(VirtIOGPU *g, int scanout,
 .height_mm = b->req_state[scanout].height_mm,
 .prefx = b->req_state[scanout].width,
 .prefy = b->req_state[scanout].height,
+.refresh_rate = b->req_state[scanout].refresh_rate,
 };
 
 edid->size = cpu_to_le32(sizeof(edid->edid));
-- 
2.36.1

[PULL 06/15] meson: Add CanoKey

From: "Hongren (Zenithal) Zheng" 

Signed-off-by: Hongren (Zenithal) Zheng 
Message-Id: 
Signed-off-by: Gerd Hoffmann 
---
 meson_options.txt | 2 ++
 hw/usb/Kconfig| 5 +
 hw/usb/meson.build| 5 +
 meson.build   | 6 ++
 scripts/meson-buildoptions.sh | 3 +++
 5 files changed, 21 insertions(+)

diff --git a/meson_options.txt b/meson_options.txt
index 2de94af03712..0e8197386b99 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -189,6 +189,8 @@ option('spice_protocol', type : 'feature', value : 'auto',
description: 'Spice protocol support')
 option('u2f', type : 'feature', value : 'auto',
description: 'U2F emulation support')
+option('canokey', type : 'feature', value : 'auto',
+   description: 'CanoKey support')
 option('usb_redir', type : 'feature', value : 'auto',
description: 'libusbredir support')
 option('l2tpv3', type : 'feature', value : 'auto',
diff --git a/hw/usb/Kconfig b/hw/usb/Kconfig
index 53f8283ffdc1..ce4f4339763e 100644
--- a/hw/usb/Kconfig
+++ b/hw/usb/Kconfig
@@ -119,6 +119,11 @@ config USB_U2F
 default y
 depends on USB
 
+config USB_CANOKEY
+bool
+default y
+depends on USB
+
 config IMX_USBPHY
 bool
 default y
diff --git a/hw/usb/meson.build b/hw/usb/meson.build
index de853d780dd8..793df42e2127 100644
--- a/hw/usb/meson.build
+++ b/hw/usb/meson.build
@@ -63,6 +63,11 @@ if u2f.found()
   softmmu_ss.add(when: 'CONFIG_USB_U2F', if_true: [u2f, 
files('u2f-emulated.c')])
 endif
 
+# CanoKey
+if canokey.found()
+  softmmu_ss.add(when: 'CONFIG_USB_CANOKEY', if_true: [canokey, 
files('canokey.c')])
+endif
+
 # usb redirect
 if usbredir.found()
   usbredir_ss = ss.source_set()
diff --git a/meson.build b/meson.build
index 21cd949082dc..0c2e11ff0715 100644
--- a/meson.build
+++ b/meson.build
@@ -1408,6 +1408,12 @@ if have_system
method: 'pkg-config',
kwargs: static_kwargs)
 endif
+canokey = not_found
+if have_system
+  canokey = dependency('canokey-qemu', required: get_option('canokey'),
+   method: 'pkg-config',
+   kwargs: static_kwargs)
+endif
 usbredir = not_found
 if not get_option('usb_redir').auto() or have_system
   usbredir = dependency('libusbredirparser-0.5', required: 
get_option('usb_redir'),
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 00ea4d8cd169..1fc1d2e2c362 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -73,6 +73,7 @@ meson_options_help() {
   printf "%s\n" '  bpf eBPF support'
   printf "%s\n" '  brlapi  brlapi character device driver'
   printf "%s\n" '  bzip2   bzip2 support for DMG images'
+  printf "%s\n" '  canokey CanoKey support'
   printf "%s\n" '  cap-ng  cap_ng support'
   printf "%s\n" '  capstoneWhether and how to find the capstone 
library'
   printf "%s\n" '  cloop   cloop image format support'
@@ -204,6 +205,8 @@ _meson_option_parse() {
 --disable-brlapi) printf "%s" -Dbrlapi=disabled ;;
 --enable-bzip2) printf "%s" -Dbzip2=enabled ;;
 --disable-bzip2) printf "%s" -Dbzip2=disabled ;;
+--enable-canokey) printf "%s" -Dcanokey=enabled ;;
+--disable-canokey) printf "%s" -Dcanokey=disabled ;;
 --enable-cap-ng) printf "%s" -Dcap_ng=enabled ;;
 --disable-cap-ng) printf "%s" -Dcap_ng=disabled ;;
 --enable-capstone) printf "%s" -Dcapstone=enabled ;;
-- 
2.36.1

[PULL 04/15] hw/usb: Add CanoKey Implementation

From: "Hongren (Zenithal) Zheng" 

This commit added a new emulated device called CanoKey to QEMU.

CanoKey implements platform independent features in canokey-core
https://github.com/canokeys/canokey-core, and leaves the USB implementation
to the platform.

In this commit the USB part was implemented in QEMU using QEMU's USB APIs,
therefore the emulated CanoKey can communicate with the guest OS using USB.

Signed-off-by: Hongren (Zenithal) Zheng 
Message-Id: 
Signed-off-by: Gerd Hoffmann 
---
 hw/usb/canokey.h |  69 +++
 hw/usb/canokey.c | 300 +++
 2 files changed, 369 insertions(+)
 create mode 100644 hw/usb/canokey.h
 create mode 100644 hw/usb/canokey.c

diff --git a/hw/usb/canokey.h b/hw/usb/canokey.h
new file mode 100644
index ..24cf30420346
--- /dev/null
+++ b/hw/usb/canokey.h
@@ -0,0 +1,69 @@
+/*
+ * CanoKey QEMU device header.
+ *
+ * Copyright (c) 2021-2022 Canokeys.org 
+ * Written by Hongren (Zenithal) Zheng 
+ *
+ * This code is licensed under the Apache-2.0.
+ */
+
+#ifndef CANOKEY_H
+#define CANOKEY_H
+
+#include "hw/qdev-core.h"
+
+#define TYPE_CANOKEY "canokey"
+#define CANOKEY(obj) \
+OBJECT_CHECK(CanoKeyState, (obj), TYPE_CANOKEY)
+
+/*
+ * State of Canokey (i.e. hw/canokey.c)
+ */
+
+/* CTRL INTR BULK */
+#define CANOKEY_EP_NUM 3
+/* BULK/INTR IN can be up to 1352 bytes, e.g. get key info */
+#define CANOKEY_EP_IN_BUFFER_SIZE 2048
+/* BULK OUT can be up to 270 bytes, e.g. PIV import cert */
+#define CANOKEY_EP_OUT_BUFFER_SIZE 512
+
+typedef enum {
+CANOKEY_EP_IN_WAIT,
+CANOKEY_EP_IN_READY,
+CANOKEY_EP_IN_STALL
+} CanoKeyEPState;
+
+typedef struct CanoKeyState {
+USBDevice dev;
+
+/* IN packets from canokey device loop */
+uint8_t ep_in[CANOKEY_EP_NUM][CANOKEY_EP_IN_BUFFER_SIZE];
+/*
+ * See canokey_emu_transmit
+ *
+ * For large INTR IN, receive multiple data from canokey device loop
+ * in this case ep_in_size would increase with every call
+ */
+uint32_t ep_in_size[CANOKEY_EP_NUM];
+/*
+ * Used in canokey_handle_data
+ * for IN larger than p->iov.size, we would do multiple handle_data()
+ *
+ * The difference between ep_in_pos and ep_in_size:
+ * We first increase ep_in_size to fill ep_in buffer in device_loop,
+ * then use ep_in_pos to submit data from ep_in buffer in handle_data
+ */
+uint32_t ep_in_pos[CANOKEY_EP_NUM];
+CanoKeyEPState ep_in_state[CANOKEY_EP_NUM];
+
+/* OUT pointer to canokey recv buffer */
+uint8_t *ep_out[CANOKEY_EP_NUM];
+uint32_t ep_out_size[CANOKEY_EP_NUM];
+/* For large BULK OUT, multiple write to ep_out is needed */
+uint8_t ep_out_buffer[CANOKEY_EP_NUM][CANOKEY_EP_OUT_BUFFER_SIZE];
+
+/* Properties */
+char *file; /* canokey-file */
+} CanoKeyState;
+
+#endif /* CANOKEY_H */
diff --git a/hw/usb/canokey.c b/hw/usb/canokey.c
new file mode 100644
index ..6cb8b7cdb089
--- /dev/null
+++ b/hw/usb/canokey.c
@@ -0,0 +1,300 @@
+/*
+ * CanoKey QEMU device implementation.
+ *
+ * Copyright (c) 2021-2022 Canokeys.org 
+ * Written by Hongren (Zenithal) Zheng 
+ *
+ * This code is licensed under the Apache-2.0.
+ */
+
+#include "qemu/osdep.h"
+#include 
+
+#include "qemu/module.h"
+#include "qapi/error.h"
+#include "hw/usb.h"
+#include "hw/qdev-properties.h"
+#include "desc.h"
+#include "canokey.h"
+
+#define CANOKEY_EP_IN(ep) ((ep) & 0x7F)
+
+#define CANOKEY_VENDOR_NUM 0x20a0
+#define CANOKEY_PRODUCT_NUM0x42d2
+
+/*
+ * placeholder, canokey-qemu implements its own usb desc
+ * Namely we do not use usb_desc_handle_contorl
+ */
+enum {
+STR_MANUFACTURER = 1,
+STR_PRODUCT,
+STR_SERIALNUMBER
+};
+
+static const USBDescStrings desc_strings = {
+[STR_MANUFACTURER] = "canokeys.org",
+[STR_PRODUCT]  = "CanoKey QEMU",
+[STR_SERIALNUMBER] = "0"
+};
+
+static const USBDescDevice desc_device_canokey = {
+.bcdUSB= 0x0,
+.bMaxPacketSize0   = 16,
+.bNumConfigurations= 0,
+.confs = NULL,
+};
+
+static const USBDesc desc_canokey = {
+.id = {
+.idVendor  = CANOKEY_VENDOR_NUM,
+.idProduct = CANOKEY_PRODUCT_NUM,
+.bcdDevice = 0x0100,
+.iManufacturer = STR_MANUFACTURER,
+.iProduct  = STR_PRODUCT,
+.iSerialNumber = STR_SERIALNUMBER,
+},
+.full = _device_canokey,
+.high = _device_canokey,
+.str  = desc_strings,
+};
+
+
+/*
+ * libcanokey-qemu.so side functions
+ * All functions are called from canokey_emu_device_loop
+ */
+int canokey_emu_stall_ep(void *base, uint8_t ep)
+{
+CanoKeyState *key = base;
+uint8_t ep_in = CANOKEY_EP_IN(ep); /* INTR IN has ep 129 */
+key->ep_in_size[ep_in] = 0;
+key->ep_in_state[ep_in] = CANOKEY_EP_IN_STALL;
+return 0;
+}
+
+int canokey_emu_set_address(void *base, uint8_t addr)
+{
+CanoKeyState *key = base;
+key->dev.addr =

[PULL 10/15] hw/usb/hcd-ehci: fix writeback order

From: Arnout Engelen 

The 'active' bit passes control over a qTD between the guest and the
controller: set to 1 by guest to enable execution by the controller,
and the controller sets it to '0' to hand back control to the guest.

ehci_state_writeback write two dwords to main memory using DMA:
the third dword of the qTD (containing dt, total bytes to transfer,
cpage, cerr and status) and the fourth dword of the qTD (containing
the offset).

This commit makes sure the fourth dword is written before the third,
avoiding a race condition where a new offset written into the qTD
by the guest after it observed the status going to go to '0' gets
overwritten by a 'late' DMA writeback of the previous offset.

This race condition could lead to 'cpage out of range (5)' errors,
and reproduced by:

./qemu-system-x86_64 -enable-kvm -bios $SEABIOS/bios.bin -m 4096 -device 
usb-ehci -blockdev 
driver=file,read-only=on,filename=/home/aengelen/Downloads/openSUSE-Tumbleweed-DVD-i586-Snapshot20220428-Media.iso,node-name=iso
 -device usb-storage,drive=iso,bootindex=0 -chardev 
pipe,id=shell,path=/tmp/pipe -device virtio-serial -device 
virtconsole,chardev=shell -device virtio-rng-pci -serial mon:stdio -nographic

(press a key, select 'Installation' (2), and accept the default
values. On my machine the 'cpage out of range' is reproduced while
loading the Linux Kernel about once per 7 attempts. With the fix in
this commit it no longer fails)

This problem was previously reported as a seabios problem in
https://mail.coreboot.org/hyperkitty/list/seab...@seabios.org/thread/OUTHT5ISSQJGXPNTUPY3O5E5EPZJCHM3/
and as a nixos CI build failure in
https://github.com/NixOS/nixpkgs/issues/170803

Signed-off-by: Arnout Engelen 
Signed-off-by: Gerd Hoffmann 
---
 hw/usb/hcd-ehci.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 33a8a377bd95..d4da8dcb8d15 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -2011,7 +2011,10 @@ static int ehci_state_writeback(EHCIQueue *q)
 ehci_trace_qtd(q, NLPTR_GET(p->qtdaddr), (EHCIqtd *) >qh.next_qtd);
 qtd = (uint32_t *) >qh.next_qtd;
 addr = NLPTR_GET(p->qtdaddr);
-put_dwords(q->ehci, addr + 2 * sizeof(uint32_t), qtd + 2, 2);
+/* First write back the offset */
+put_dwords(q->ehci, addr + 3 * sizeof(uint32_t), qtd + 3, 1);
+/* Then write back the token, clearing the 'active' bit */
+put_dwords(q->ehci, addr + 2 * sizeof(uint32_t), qtd + 2, 1);
 ehci_free_packet(p);
 
 /*
-- 
2.36.1

[PULL 09/15] MAINTAINERS: add myself as CanoKey maintainer

From: "Hongren (Zenithal) Zheng" 

Signed-off-by: Hongren (Zenithal) Zheng 
Message-Id: 
Signed-off-by: Gerd Hoffmann 
---
 MAINTAINERS | 8 
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0df25ed4b0a3..4cf6174f9f37 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2427,6 +2427,14 @@ F: hw/intc/s390_flic*.c
 F: include/hw/s390x/s390_flic.h
 L: qemu-s3...@nongnu.org
 
+CanoKey
+M: Hongren (Zenithal) Zheng 
+S: Maintained
+R: Canokeys.org 
+F: hw/usb/canokey.c
+F: hw/usb/canokey.h
+F: docs/system/devices/canokey.rst
+
 Subsystems
 --
 Overall Audio backends
-- 
2.36.1

[PULL 07/15] docs: Add CanoKey documentation

From: "Hongren (Zenithal) Zheng" 

Signed-off-by: Hongren (Zenithal) Zheng 
Message-Id: 
Signed-off-by: Gerd Hoffmann 
---
 docs/system/device-emulation.rst |   1 +
 docs/system/devices/canokey.rst  | 168 +++
 2 files changed, 169 insertions(+)
 create mode 100644 docs/system/devices/canokey.rst

diff --git a/docs/system/device-emulation.rst b/docs/system/device-emulation.rst
index 3b729b920d7c..05060060563f 100644
--- a/docs/system/device-emulation.rst
+++ b/docs/system/device-emulation.rst
@@ -92,3 +92,4 @@ Emulated Devices
devices/vhost-user.rst
devices/virtio-pmem.rst
devices/vhost-user-rng.rst
+   devices/canokey.rst
diff --git a/docs/system/devices/canokey.rst b/docs/system/devices/canokey.rst
new file mode 100644
index ..169f99b8eb82
--- /dev/null
+++ b/docs/system/devices/canokey.rst
@@ -0,0 +1,168 @@
+.. _canokey:
+
+CanoKey QEMU
+
+
+CanoKey [1]_ is an open-source secure key with supports of
+
+* U2F / FIDO2 with Ed25519 and HMAC-secret
+* OpenPGP Card V3.4 with RSA4096, Ed25519 and more [2]_
+* PIV (NIST SP 800-73-4)
+* HOTP / TOTP
+* NDEF
+
+All these platform-independent features are in canokey-core [3]_.
+
+For different platforms, CanoKey has different implementations,
+including both hardware implementions and virtual cards:
+
+* CanoKey STM32 [4]_
+* CanoKey Pigeon [5]_
+* (virt-card) CanoKey USB/IP
+* (virt-card) CanoKey FunctionFS
+
+In QEMU, yet another CanoKey virt-card is implemented.
+CanoKey QEMU exposes itself as a USB device to the guest OS.
+
+With the same software configuration as a hardware key,
+the guest OS can use all the functionalities of a secure key as if
+there was actually an hardware key plugged in.
+
+CanoKey QEMU provides much convenience for debuging:
+
+* libcanokey-qemu supports debuging output thus developers can
+  inspect what happens inside a secure key
+* CanoKey QEMU supports trace event thus event
+* QEMU USB stack supports pcap thus USB packet between the guest
+  and key can be captured and analysed
+
+Then for developers:
+
+* For developers on software with secure key support (e.g. FIDO2, OpenPGP),
+  they can see what happens inside the secure key
+* For secure key developers, USB packets between guest OS and CanoKey
+  can be easily captured and analysed
+
+Also since this is a virtual card, it can be easily used in CI for testing
+on code coping with secure key.
+
+Building
+
+
+libcanokey-qemu is required to use CanoKey QEMU.
+
+.. code-block:: shell
+
+git clone https://github.com/canokeys/canokey-qemu
+mkdir canokey-qemu/build
+pushd canokey-qemu/build
+
+If you want to install libcanokey-qemu in a different place,
+add ``-DCMAKE_INSTALL_PREFIX=/path/to/your/place`` to cmake below.
+
+.. code-block:: shell
+
+cmake ..
+make
+make install # may need sudo
+popd
+
+Then configuring and building:
+
+.. code-block:: shell
+
+# depending on your env, lib/pkgconfig can be lib64/pkgconfig
+export PKG_CONFIG_PATH=/path/to/your/place/lib/pkgconfig:$PKG_CONFIG_PATH
+./configure --enable-canokey && make
+
+Using CanoKey QEMU
+==
+
+CanoKey QEMU stores all its data on a file of the host specified by the 
argument
+when invoking qemu.
+
+.. parsed-literal::
+
+|qemu_system| -usb -device canokey,file=$HOME/.canokey-file
+
+Note: you should keep this file carefully as it may contain your private key!
+
+The first time when the file is used, it is created and initialized by CanoKey,
+afterwards CanoKey QEMU would just read this file.
+
+After the guest OS boots, you can check that there is a USB device.
+
+For example, If the guest OS is an Linux machine. You may invoke lsusb
+and find CanoKey QEMU there:
+
+.. code-block:: shell
+
+$ lsusb
+Bus 001 Device 002: ID 20a0:42d4 Clay Logic CanoKey QEMU
+
+You may setup the key as guided in [6]_. The console for the key is at [7]_.
+
+Debuging
+
+
+CanoKey QEMU consists of two parts, ``libcanokey-qemu.so`` and ``canokey.c``,
+the latter of which resides in QEMU. The former provides core functionality
+of a secure key while the latter provides platform-dependent functions:
+USB packet handling.
+
+If you want to trace what happens inside the secure key, when compiling
+libcanokey-qemu, you should add ``-DQEMU_DEBUG_OUTPUT=ON`` in cmake command
+line:
+
+.. code-block:: shell
+
+cmake .. -DQEMU_DEBUG_OUTPUT=ON
+
+If you want to trace events happened in canokey.c, use
+
+.. parsed-literal::
+
+|qemu_system| --trace "canokey_*" \\
+-usb -device canokey,file=$HOME/.canokey-file
+
+If you want to capture USB packets between the guest and the host, you can:
+
+.. parsed-literal::
+
+|qemu_system| -usb -device canokey,file=$HOME/.canokey-file,pcap=key.pcap
+
+Limitations
+===
+
+Currently libcanokey-qemu.so has dozens of global variables as it was 
originally
+designed for embedded systems. Thus one qemu instance can not have
+multiple CanoKey QEMU

[PULL 00/15] Kraxel 20220614 patches

The following changes since commit debd0753663bc89c86f5462a53268f2e3f680f60:

  Merge tag 'pull-testing-next-140622-1' of https://github.com/stsquad/qemu 
into staging (2022-06-13 21:10:57 -0700)

are available in the Git repository at:

  git://git.kraxel.org/qemu tags/kraxel-20220614-pull-request

for you to fetch changes up to b95b56311a0890da0c9f7fc624529c3d7f8dbce0:

  virtio-gpu: Respect UI refresh rate for EDID (2022-06-14 10:34:37 +0200)


usb: add CanoKey device, fixes for ehci + redir
ui: fixes for gtk and cocoa, rework refresh rate
virtio-gpu: scanout flush fix



Akihiko Odaki (4):
  ui/cocoa: Fix poweroff request code
  ui/console: Do not return a value with ui_info
  ui: Deliver refresh rate via QemuUIInfo
  virtio-gpu: Respect UI refresh rate for EDID

Arnout Engelen (1):
  hw/usb/hcd-ehci: fix writeback order

Dongwon Kim (1):
  virtio-gpu: update done only on the scanout associated with rect

Hongren (Zenithal) Zheng (6):
  hw/usb: Add CanoKey Implementation
  hw/usb/canokey: Add trace events
  meson: Add CanoKey
  docs: Add CanoKey documentation
  docs/system/devices/usb: Add CanoKey to USB devices examples
  MAINTAINERS: add myself as CanoKey maintainer

Joelle van Dyne (1):
  usbredir: avoid queuing hello packet on snapshot restore

Volker Rümelin (2):
  ui/gtk-gl-area: implement GL context destruction
  ui/gtk-gl-area: create the requested GL context version

 meson_options.txt|   2 +
 hw/usb/canokey.h |  69 +++
 include/hw/virtio/virtio-gpu.h   |   1 +
 include/ui/console.h |   4 +-
 include/ui/gtk.h |   2 +-
 hw/display/virtio-gpu-base.c |   7 +-
 hw/display/virtio-gpu.c  |   4 +
 hw/display/virtio-vga.c  |   5 +-
 hw/display/xenfb.c   |  14 +-
 hw/usb/canokey.c | 313 +++
 hw/usb/hcd-ehci.c|   5 +-
 hw/usb/redirect.c|   3 +-
 hw/vfio/display.c|   8 +-
 ui/console.c |   6 -
 ui/gtk-egl.c |   4 +-
 ui/gtk-gl-area.c |  42 -
 ui/gtk.c |  45 +++--
 MAINTAINERS  |   8 +
 docs/system/device-emulation.rst |   1 +
 docs/system/devices/canokey.rst  | 168 +
 docs/system/devices/usb.rst  |   4 +
 hw/usb/Kconfig   |   5 +
 hw/usb/meson.build   |   5 +
 hw/usb/trace-events  |  16 ++
 meson.build  |   6 +
 scripts/meson-buildoptions.sh|   3 +
 ui/cocoa.m   |   6 +-
 ui/trace-events  |   2 +
 28 files changed, 707 insertions(+), 51 deletions(-)
 create mode 100644 hw/usb/canokey.h
 create mode 100644 hw/usb/canokey.c
 create mode 100644 docs/system/devices/canokey.rst

-- 
2.36.1

[PULL 03/15] ui/cocoa: Fix poweroff request code

From: Akihiko Odaki 

Signed-off-by: Akihiko Odaki 
Reviewed-by: Philippe Mathieu-Daudé 
Message-Id: <20220529082508.89097-1-akihiko.od...@gmail.com>
Signed-off-by: Gerd Hoffmann 
---
 ui/cocoa.m | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/ui/cocoa.m b/ui/cocoa.m
index 09a62817f2a9..84c84e98fc5e 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -35,6 +35,7 @@
 #include "ui/kbd-state.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/runstate.h"
+#include "sysemu/runstate-action.h"
 #include "sysemu/cpu-throttle.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-block.h"
@@ -1290,7 +1291,10 @@ - (void)applicationWillTerminate:(NSNotification 
*)aNotification
 {
 COCOA_DEBUG("QemuCocoaAppController: applicationWillTerminate\n");
 
-qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_UI);
+with_iothread_lock(^{
+shutdown_action = SHUTDOWN_ACTION_POWEROFF;
+qemu_system_shutdown_request(SHUTDOWN_CAUSE_HOST_UI);
+});
 
 /*
  * Sleep here, because returning will cause OSX to kill us
-- 
2.36.1

[PULL 01/15] ui/gtk-gl-area: implement GL context destruction

From: Volker Rümelin 

The counterpart function for gd_gl_area_create_context() is
currently empty. Implement the gd_gl_area_destroy_context()
function to avoid GL context leaks.

Signed-off-by: Volker Rümelin 
Message-Id: <20220605085131.7711-1-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 ui/gtk-gl-area.c | 8 +++-
 ui/trace-events  | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/ui/gtk-gl-area.c b/ui/gtk-gl-area.c
index fc5a082eb846..0e20ea031d34 100644
--- a/ui/gtk-gl-area.c
+++ b/ui/gtk-gl-area.c
@@ -201,7 +201,13 @@ QEMUGLContext gd_gl_area_create_context(DisplayGLCtx *dgc,
 
 void gd_gl_area_destroy_context(DisplayGLCtx *dgc, QEMUGLContext ctx)
 {
-/* FIXME */
+GdkGLContext *current_ctx = gdk_gl_context_get_current();
+
+trace_gd_gl_area_destroy_context(ctx, current_ctx);
+if (ctx == current_ctx) {
+gdk_gl_context_clear_current();
+}
+g_clear_object();
 }
 
 void gd_gl_area_scanout_texture(DisplayChangeListener *dcl,
diff --git a/ui/trace-events b/ui/trace-events
index f78b5e66061f..1040ba0f88c7 100644
--- a/ui/trace-events
+++ b/ui/trace-events
@@ -26,6 +26,7 @@ gd_key_event(const char *tab, int gdk_keycode, int qkeycode, 
const char *action)
 gd_grab(const char *tab, const char *device, const char *reason) "tab=%s, 
dev=%s, reason=%s"
 gd_ungrab(const char *tab, const char *device) "tab=%s, dev=%s"
 gd_keymap_windowing(const char *name) "backend=%s"
+gd_gl_area_destroy_context(void *ctx, void *current_ctx) "ctx=%p, 
current_ctx=%p"
 
 # vnc-auth-sasl.c
 # vnc-auth-vencrypt.c
-- 
2.36.1

Re: [PULL 00/16] Kraxel 20220613 patches

> > Hmm, build worked here and CI passed too.
> > 
> > I think this is one of those cases where the build directory must be
> > deleted because one subdirectory is replaced by a compatibility
> > symlink.
> 
> Except 'configure' deals with that, as it explicitly rm -rf's the
> symlink target:
> 
> symlink() {
>   rm -rf "$2"
>   mkdir -p "$(dirname "$2")"
>   ln -s "$1" "$2"
> }
> 
> so i'm pretty confused as to what's going wrong here still

'git rebase -x ./make.sh master queue/kraxel' not working (where make.sh
is a script effectively doing 'make -C build/$name' for multiple build
trees with different configurations).

'git status' lists ui/keymaps/* as deleted.
'git reset --hard' fixes it.

take care,
  Gerd

Re: [PATCH] hw/mem/nvdimm: fix error message for 'unarmed' flag

2022-06-14 Thread Julia Suvorova

On Tue, Jun 14, 2022 at 11:50 AM David Hildenbrand  wrote:
>
> On 14.06.22 10:54, Igor Mammedov wrote:
> > On Mon, 13 Jun 2022 16:09:53 +0100
> > Stefan Hajnoczi  wrote:
> >
> >> On Mon, Jun 13, 2022 at 05:01:10PM +0200, Julia Suvorova wrote:
> >>> On Tue, May 31, 2022 at 5:32 PM Stefan Hajnoczi  
> >>> wrote:
> 
>  On Tue, May 31, 2022 at 04:51:47PM +0200, Julia Suvorova wrote:
> > In the ACPI specification [1], the 'unarmed' bit is set when a device
> > cannot accept a persistent write. This means that when a memdev is
> > read-only, the 'unarmed' flag must be turned on. The logic is correct,
> > just changing the error message.
> >
> > [1] ACPI NFIT NVDIMM Region Mapping Structure "NVDIMM State Flags" Bit 3
> >
> > Signed-off-by: Julia Suvorova 
> > ---
> >  hw/mem/nvdimm.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> 
>  Reviewed-by: Stefan Hajnoczi 
> >>>
> >>> It seems like Xiao is not active, whose tree should this patch go to?
>
> Is that a temporary or a permanent thing? Do we know?

No idea. But his last signed-off was three years ago.

> >
> > Perhaps David can add himself as maintainer (i.e. put it
> > under memory mantanership umbrella) and merge it
>
> Maybe it makes sense to combine NVDIMM with pc-dimm.c and
> memory-device.c into a "MEMORY DEVICE" section. Then, remove "hw/mem/*"
> from "ACPI/SMBIOS".
>
> cxl_type3.c, npcm7xx_mc.c and sparse-mem.c in /hw/mem/ are a bit
> different. We could add cxl_type3.c to "Compute Express Link".
> npcm7xx_mc.c and sparse-mem.c should be already covered.
>
> --
> Thanks,
>
> David / dhildenb
>

Re: [PATCH 03/11] ppc/pnv: use dev->parent_bus->parent to get the PHB


On 6/13/22 17:44, Daniel Henrique Barboza wrote:

It is not advisable to execute an object_dynamic_cast() to poke into
bus->qbus.parent and follow it up with a C cast into the PnvPHB type we
think we got.

A better way is to access the PnvPHB object via a QOM macro accessing
the existing parent links of the DeviceState. For a given
pnv-phb3/4-root-port 'dev', dev->parent_bus will give us the PHB bus,
and dev->parent_bus->parent is the PHB. Use the adequate QOM macro to
assert the type, and keep the NULL check in case we didn't get the
object we were expecting.

Signed-off-by: Daniel Henrique Barboza 
---
  hw/pci-host/pnv_phb3.c | 10 +++---
  hw/pci-host/pnv_phb4.c | 10 +++---
  2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c
index 4ba660f8b9..7901d8172c 100644
--- a/hw/pci-host/pnv_phb3.c
+++ b/hw/pci-host/pnv_phb3.c
@@ -1139,12 +1139,16 @@ static void pnv_phb3_root_port_realize(DeviceState 
*dev, Error **errp)
  {
  PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
  PCIDevice *pci = PCI_DEVICE(dev);
-PCIBus *bus = pci_get_bus(pci);
  PnvPHB3 *phb = NULL;
  Error *local_err = NULL;
  
-phb = (PnvPHB3 *) object_dynamic_cast(OBJECT(bus->qbus.parent),

-  TYPE_PNV_PHB3);
+/*
+ * dev->parent_bus gives access to the pnv-phb-root bus.
+ * The PnvPHB3 is the owner (parent) of the bus.
+ */
+if (dev && dev->parent_bus) {
+phb = PNV_PHB3(dev->parent_bus->parent);
+}



Couldn't we simply use :

  phb = PNV_PHB3(bus);

?

Thanks,

C.


  if (!phb) {
  error_setg(errp,
diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index ffd9d8a947..bae9398d86 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -1782,12 +1782,16 @@ static void pnv_phb4_root_port_realize(DeviceState 
*dev, Error **errp)
  {
  PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
  PCIDevice *pci = PCI_DEVICE(dev);
-PCIBus *bus = pci_get_bus(pci);
  PnvPHB4 *phb = NULL;
  Error *local_err = NULL;
  
-phb = (PnvPHB4 *) object_dynamic_cast(OBJECT(bus->qbus.parent),

-  TYPE_PNV_PHB4);
+/*
+ * dev->parent_bus gives access to the pnv-phb-root bus.
+ * The PnvPHB4 is the owner (parent) of the bus.
+ */
+if (dev && dev->parent_bus) {
+phb = PNV_PHB4(dev->parent_bus->parent);
+}
  
  if (!phb) {

  error_setg(errp, "%s must be connected to pnv-phb4 buses", dev->id);

Re: [PATCH 02/11] ppc/pnv: attach phb3/phb4 root ports in QOM tree


On 6/13/22 17:44, Daniel Henrique Barboza wrote:

At this moment we leave the pnv-phb3(4)-root-port unattached in QOM:

   /unattached (container)
(...)
 /device[2] (pnv-phb3-root-port)
   /bus master container[0] (memory-region)
   /bus master[0] (memory-region)
   /pci_bridge_io[0] (memory-region)
   /pci_bridge_io[1] (memory-region)
   /pci_bridge_mem[0] (memory-region)
   /pci_bridge_pci[0] (memory-region)
   /pci_bridge_pref_mem[0] (memory-region)
   /pci_bridge_vga_io_hi[0] (memory-region)
   /pci_bridge_vga_io_lo[0] (memory-region)
   /pci_bridge_vga_mem[0] (memory-region)
   /pcie.0 (PCIE)

Let's make changes in pnv_phb_attach_root_port() to attach the created
root ports to its corresponding PHB.

This is the result afterwards:

 /pnv-phb3[0] (pnv-phb3)
   /lsi (ics)
   /msi (phb3-msi)
   /msi32[0] (memory-region)
   /msi64[0] (memory-region)
   /pbcq (pnv-pbcq)
 (...)
   /phb3_iommu[0] (pnv-phb3-iommu-memory-region)
   /pnv-phb3-root.0 (pnv-phb3-root)
 /pnv-phb3-root-port[0] (pnv-phb3-root-port)
   /bus master container[0] (memory-region)
   /bus master[0] (memory-region)
   /pci_bridge_io[0] (memory-region)
   /pci_bridge_io[1] (memory-region)
   /pci_bridge_mem[0] (memory-region)
   /pci_bridge_pci[0] (memory-region)
   /pci_bridge_pref_mem[0] (memory-region)
   /pci_bridge_vga_io_hi[0] (memory-region)
   /pci_bridge_vga_io_lo[0] (memory-region)
   /pci_bridge_vga_mem[0] (memory-region)
   /pcie.0 (PCIE)

Signed-off-by: Daniel Henrique Barboza 



Reviewed-by: Cédric Le Goater 

Thanks,

C.



---
  hw/pci-host/pnv_phb3.c | 2 +-
  hw/pci-host/pnv_phb4.c | 2 +-
  hw/ppc/pnv.c   | 7 ++-
  include/hw/ppc/pnv.h   | 2 +-
  4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c
index 26ac9b7123..4ba660f8b9 100644
--- a/hw/pci-host/pnv_phb3.c
+++ b/hw/pci-host/pnv_phb3.c
@@ -1052,7 +1052,7 @@ static void pnv_phb3_realize(DeviceState *dev, Error 
**errp)
  
  pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb);
  
-pnv_phb_attach_root_port(PCI_HOST_BRIDGE(phb), TYPE_PNV_PHB3_ROOT_PORT);

+pnv_phb_attach_root_port(pci, TYPE_PNV_PHB3_ROOT_PORT, phb->phb_id);
  }
  
  void pnv_phb3_update_regions(PnvPHB3 *phb)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 23ad8de7ee..ffd9d8a947 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -1585,7 +1585,7 @@ static void pnv_phb4_realize(DeviceState *dev, Error 
**errp)
  pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
  
  /* Add a single Root port if running with defaults */

-pnv_phb_attach_root_port(pci, pecc->rp_model);
+pnv_phb_attach_root_port(pci, pecc->rp_model, phb->phb_id);
  
  /* Setup XIVE Source */

  if (phb->big_phb) {
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 7c08a78d6c..40e0cbd84d 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1190,9 +1190,14 @@ static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error 
**errp)
  }
  
  /* Attach a root port device */

-void pnv_phb_attach_root_port(PCIHostState *pci, const char *name)
+void pnv_phb_attach_root_port(PCIHostState *pci, const char *name, int index)
  {
  PCIDevice *root = pci_new(PCI_DEVFN(0, 0), name);
+g_autofree char *default_id = g_strdup_printf("%s[%d]", name, index);
+const char *dev_id = DEVICE(root)->id;
+
+object_property_add_child(OBJECT(pci->bus), dev_id ? dev_id : default_id,
+  OBJECT(root));
  
  pci_realize_and_unref(root, pci->bus, _fatal);

  }
diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index 86cb7d7f97..033890a23f 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -189,7 +189,7 @@ DECLARE_INSTANCE_CHECKER(PnvChip, PNV_CHIP_POWER10,
   TYPE_PNV_CHIP_POWER10)
  
  PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir);

-void pnv_phb_attach_root_port(PCIHostState *pci, const char *name);
+void pnv_phb_attach_root_port(PCIHostState *pci, const char *name, int index);
  
  #define TYPE_PNV_MACHINE   MACHINE_TYPE_NAME("powernv")

  typedef struct PnvMachineClass PnvMachineClass;

Re: [PATCH 01/11] ppc/pnv: move root port attach to pnv_phb4_realize()


On 6/13/22 17:44, Daniel Henrique Barboza wrote:

Creating a root port is something related to the PHB, not the PEC. It
also makes the logic more in line with what pnv-phb3 does.

Signed-off-by: Daniel Henrique Barboza 


Reviewed-by: Cédric Le Goater 

So the root port is back where it was.

Could we avoid the pci_new() and use object_initialize_child() instead ?

Thanks,

C.



---
  hw/pci-host/pnv_phb4.c | 4 
  hw/pci-host/pnv_phb4_pec.c | 3 ---
  2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 6594016121..23ad8de7ee 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -1547,6 +1547,7 @@ static void pnv_phb4_instance_init(Object *obj)
  static void pnv_phb4_realize(DeviceState *dev, Error **errp)
  {
  PnvPHB4 *phb = PNV_PHB4(dev);
+PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(phb->pec);
  PCIHostState *pci = PCI_HOST_BRIDGE(dev);
  XiveSource *xsrc = >xsrc;
  int nr_irqs;
@@ -1583,6 +1584,9 @@ static void pnv_phb4_realize(DeviceState *dev, Error 
**errp)
  pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb);
  pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
  
+/* Add a single Root port if running with defaults */

+pnv_phb_attach_root_port(pci, pecc->rp_model);
+
  /* Setup XIVE Source */
  if (phb->big_phb) {
  nr_irqs = PNV_PHB4_MAX_INTs;
diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c
index 8b7e823fa5..c9aaf1c28e 100644
--- a/hw/pci-host/pnv_phb4_pec.c
+++ b/hw/pci-host/pnv_phb4_pec.c
@@ -130,9 +130,6 @@ static void pnv_pec_default_phb_realize(PnvPhb4PecState 
*pec,
  if (!sysbus_realize(SYS_BUS_DEVICE(phb), errp)) {
  return;
  }
-
-/* Add a single Root port if running with defaults */
-pnv_phb_attach_root_port(PCI_HOST_BRIDGE(phb), pecc->rp_model);
  }
  
  static void pnv_pec_realize(DeviceState *dev, Error **errp)

Re: [PATCH 0/2] Make local migration with TAP network device possible

2022-06-14 Thread Daniel P . Berrangé

On Tue, Jun 14, 2022 at 02:18:41PM +0300, Andrey Ryabinin wrote:
> Hi
> 
> These couple patches aims to  make possible local migration (within one host)
> on the same TAP device used by source and destination QEMU
> 
> The scenario looks like this
>  1. Create TAP devices and pass file descriptors to source QEMU
>  2. Launch destination QEMU (-incoming defer) and pass same descriptors to it.
>  3. Start migration
> 
> 
> Regarding the first patch: It makes possible to receive file descriptor in 
> non-blocking
> state. But I probably didn't cover all FD users which might need to set 
> blocking state after
> the patch. So I'm hopping for the hints where else, besides 
> fd_start_incoming_migration()
> I need to put qemu_socket_set_block() calls.

You'll need to check all callers of

qio_channel_readv_full
qio_channel_readv_full_all
qio_channel_readv_full_all_eof

and identify which pass a non-NULL parameter for 'fds'. If the caller
does NOT have a qemu_setnonblock call on the FD it gets back, then you
have to assume it is expecting it in blocking mode and so need to
add qemu_setblock


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

[PATCH 1/2] chardev: don't set O_NONBLOCK on SCM_RIGHTS file descriptors.

2022-06-14 Thread Andrey Ryabinin

This reverts commit 9b938c7262e4 ("chardev: clear O_NONBLOCK on SCM_RIGHTS file 
descriptors").
File descriptor passed to QEMU via 'getfd' QMP command always
changed to blocking mode. Instead of that, change blocking mode by QEMU
file descriptors users when necessary, e.g. like migration.

We need to preserve the state of the file descriptor in case it's still
used by an external process and before the QEMU itself started
using it.

E.g. our local migration scenario with TAP networking looks like this:
 1. Create TAP devices and pass file descriptors to source QEMU
 2. Launch destination QEMU (-incoming defer) and pass same descriptors to it.
 3. Start migration

In such scenario setting blocking state at stage (2) will hang source QEMU
since TAP fd suddenly become blocking.

Signed-off-by: Andrey Ryabinin 
---
 chardev/char-socket.c | 3 ---
 io/channel-socket.c   | 3 ---
 migration/fd.c| 2 ++
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index dc4e218eeb6..c9592fb5836 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -310,9 +310,6 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t 
len)
 continue;
 }
 
-/* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */
-qemu_socket_set_block(fd);
-
 #ifndef MSG_CMSG_CLOEXEC
 qemu_set_cloexec(fd);
 #endif
diff --git a/io/channel-socket.c b/io/channel-socket.c
index dc9c165de11..8b9679460dc 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -479,9 +479,6 @@ static void qio_channel_socket_copy_fds(struct msghdr *msg,
 continue;
 }
 
-/* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */
-qemu_socket_set_block(fd);
-
 #ifndef MSG_CMSG_CLOEXEC
 qemu_set_cloexec(fd);
 #endif
diff --git a/migration/fd.c b/migration/fd.c
index 6f2f50475f4..793fffeb169 100644
--- a/migration/fd.c
+++ b/migration/fd.c
@@ -60,6 +60,8 @@ void fd_start_incoming_migration(const char *fdname, Error 
**errp)
 return;
 }
 
+qemu_socket_set_block(fd);
+
 trace_migration_fd_incoming(fd);
 
 ioc = qio_channel_new_fd(fd, errp);
-- 
2.35.1

[PATCH 0/2] Make local migration with TAP network device possible

2022-06-14 Thread Andrey Ryabinin

Hi

These couple patches aims to  make possible local migration (within one host)
on the same TAP device used by source and destination QEMU

The scenario looks like this
 1. Create TAP devices and pass file descriptors to source QEMU
 2. Launch destination QEMU (-incoming defer) and pass same descriptors to it.
 3. Start migration


Regarding the first patch: It makes possible to receive file descriptor in 
non-blocking
state. But I probably didn't cover all FD users which might need to set 
blocking state after
the patch. So I'm hopping for the hints where else, besides 
fd_start_incoming_migration()
I need to put qemu_socket_set_block() calls.


Andrey Ryabinin (2):
  chardev: don't set O_NONBLOCK on SCM_RIGHTS file descriptors.
  tap: initialize TAPState->enabled according to the actual state of
queue

 chardev/char-socket.c |  3 ---
 io/channel-socket.c   |  3 ---
 migration/fd.c|  2 ++
 net/tap-bsd.c |  5 +
 net/tap-linux.c   | 12 
 net/tap-solaris.c |  5 +
 net/tap.c |  2 +-
 net/tap_int.h |  1 +
 8 files changed, 26 insertions(+), 7 deletions(-)

-- 
2.35.1

[PATCH 2/2] tap: initialize TAPState->enabled according to the actual state of queue

2022-06-14 Thread Andrey Ryabinin

Currently TAPState->enabled initialized as true. If fd was passed to qemu
in a disabled state it will cause an assert at the attempt to detach queue
in virtio_net_set_queues():

virtio_net_set_queues() :
r = peer_detach() -> tap_disable():
if (s->enabled == 0) {
   return 0;
} else {
   //Will return an error.
   ret = tap_fd_disable(s->fd);
   ...
   return ret;
assert(!r);

Initialize ->enabled according to the actual state of fd to fix this.

Signed-off-by: Andrey Ryabinin 
---
 net/tap-bsd.c |  5 +
 net/tap-linux.c   | 12 
 net/tap-solaris.c |  5 +
 net/tap.c |  2 +-
 net/tap_int.h |  1 +
 5 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 005ce05c6e0..8c21f058c8c 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -217,6 +217,11 @@ int tap_probe_vnet_hdr_len(int fd, int len)
 return 0;
 }
 
+bool tap_probe_enabled(int fd)
+{
+return true;
+}
+
 void tap_fd_set_vnet_hdr_len(int fd, int len)
 {
 }
diff --git a/net/tap-linux.c b/net/tap-linux.c
index 304ff45071d..6078ba03af6 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -193,6 +193,18 @@ int tap_probe_vnet_hdr_len(int fd, int len)
 return 1;
 }
 
+bool tap_probe_enabled(int fd)
+{
+struct ifreq ifr;
+
+if (ioctl(fd, TUNGETIFF, ) != 0) {
+error_report("TUNGETIFF ioctl() failed: %s",
+ strerror(errno));
+return false;
+}
+return !(ifr.ifr_flags & IFF_DETACH_QUEUE);
+}
+
 void tap_fd_set_vnet_hdr_len(int fd, int len)
 {
 if (ioctl(fd, TUNSETVNETHDRSZ, ) == -1) {
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index a44f8805c23..ccaa3334882 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -221,6 +221,11 @@ int tap_probe_vnet_hdr_len(int fd, int len)
 return 0;
 }
 
+bool tap_probe_enabled(int fd)
+{
+return true;
+}
+
 void tap_fd_set_vnet_hdr_len(int fd, int len)
 {
 }
diff --git a/net/tap.c b/net/tap.c
index b3ddfd4a74b..799f8ec7c76 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -399,7 +399,7 @@ static TAPState *net_tap_fd_init(NetClientState *peer,
 s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
 s->using_vnet_hdr = false;
 s->has_ufo = tap_probe_has_ufo(s->fd);
-s->enabled = true;
+s->enabled = tap_probe_enabled(s->fd);
 tap_set_offload(>nc, 0, 0, 0, 0, 0);
 /*
  * Make sure host header length is set correctly in tap:
diff --git a/net/tap_int.h b/net/tap_int.h
index 547f8a5a28f..b8fc3dfbfa7 100644
--- a/net/tap_int.h
+++ b/net/tap_int.h
@@ -37,6 +37,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, 
Error **errp);
 int tap_probe_vnet_hdr(int fd, Error **errp);
 int tap_probe_vnet_hdr_len(int fd, int len);
 int tap_probe_has_ufo(int fd);
+bool tap_probe_enabled(int fd);
 void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int 
ufo);
 void tap_fd_set_vnet_hdr_len(int fd, int len);
 int tap_fd_set_vnet_le(int fd, int vnet_is_le);
-- 
2.35.1

Re: [PATCH v2 07/11] vfio/migration: Implement VFIO migration protocol v2

2022-06-14 Thread Joao Martins

On 5/30/22 18:07, Avihai Horon wrote:
> +static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
> +{
> +VFIODevice *vbasedev = opaque;
> +enum vfio_device_mig_state recover_state;
> +int ret;
> +
> +/* We reach here with device state STOP or STOP_COPY only */
> +recover_state = VFIO_DEVICE_STATE_STOP;
> +ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
> +   recover_state);
> +if (ret) {
> +return ret;
> +}
> +
> +do {
> +ret = vfio_save_block(f, vbasedev->migration);
> +if (ret < 0) {
> +return ret;
> +}
> +} while (!ret);
> +
> +qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
> +ret = qemu_file_get_error(f);
> +if (ret) {
> +return ret;
> +}
> +
> +ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP,
> +   recover_state);

Is it expected that you are setting VFIO_DEVICE_STATE_STOP while
@recover_state is the same value (VFIO_DEVICE_STATE_STOP) ?

> +if (ret) {
> +return ret;
> +}
> +
> +trace_vfio_save_complete_precopy(vbasedev->name);
> +
> +return 0;
> +}
> +
>  static int vfio_v1_save_complete_precopy(QEMUFile *f, void *opaque)
>  {
>  VFIODevice *vbasedev = opaque;
> @@ -593,6 +775,14 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
>  }
>  }
>  
> +static int vfio_load_setup(QEMUFile *f, void *opaque)
> +{
> +VFIODevice *vbasedev = opaque;
> +
> +return vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING,
> +   vbasedev->migration->device_state);
> +}
> +
>  static int vfio_v1_load_setup(QEMUFile *f, void *opaque)
>  {
>  VFIODevice *vbasedev = opaque;
> @@ -620,6 +810,15 @@ static int vfio_v1_load_setup(QEMUFile *f, void *opaque)
>  return ret;
>  }
>  
> +static int vfio_load_cleanup(void *opaque)
> +{
> +VFIODevice *vbasedev = opaque;
> +
> +vfio_migration_cleanup(vbasedev);
> +trace_vfio_load_cleanup(vbasedev->name);
> +return 0;
> +}
> +
>  static int vfio_v1_load_cleanup(void *opaque)
>  {
>  VFIODevice *vbasedev = opaque;
> @@ -662,7 +861,11 @@ static int vfio_load_state(QEMUFile *f, void *opaque, 
> int version_id)
>  uint64_t data_size = qemu_get_be64(f);
>  
>  if (data_size) {
> -ret = vfio_v1_load_buffer(f, vbasedev, data_size);
> +if (vbasedev->migration->v2) {
> +ret = vfio_load_buffer(f, vbasedev, data_size);
> +} else {
> +ret = vfio_v1_load_buffer(f, vbasedev, data_size);
> +}
>  if (ret < 0) {
>  return ret;
>  }
> @@ -683,6 +886,16 @@ static int vfio_load_state(QEMUFile *f, void *opaque, 
> int version_id)
>  return ret;
>  }
>  
> +static SaveVMHandlers savevm_vfio_handlers = {
> +.save_setup = vfio_save_setup,
> +.save_cleanup = vfio_save_cleanup,
> +.save_live_complete_precopy = vfio_save_complete_precopy,
> +.save_state = vfio_save_state,
> +.load_setup = vfio_load_setup,
> +.load_cleanup = vfio_load_cleanup,
> +.load_state = vfio_load_state,
> +};
> +
>  static SaveVMHandlers savevm_vfio_v1_handlers = {
>  .save_setup = vfio_v1_save_setup,
>  .save_cleanup = vfio_v1_save_cleanup,
> @@ -697,6 +910,34 @@ static SaveVMHandlers savevm_vfio_v1_handlers = {
>  
>  /* -- */
>  
> +static void vfio_vmstate_change(void *opaque, bool running, RunState state)
> +{
> +VFIODevice *vbasedev = opaque;
> +enum vfio_device_mig_state new_state;
> +int ret;
> +
> +if (running) {
> +new_state = VFIO_DEVICE_STATE_RUNNING;
> +} else {
> +new_state = VFIO_DEVICE_STATE_STOP;
> +}
> +
> +ret = vfio_migration_set_state(vbasedev, new_state,
> +   VFIO_DEVICE_STATE_ERROR);
> +if (ret) {
> +/*
> + * Migration should be aborted in this case, but vm_state_notify()
> + * currently does not support reporting failures.
> + */
> +if (migrate_get_current()->to_dst_file) {
> +qemu_file_set_error(migrate_get_current()->to_dst_file, ret);
> +}
> +}
> +
> +trace_vfio_vmstate_change(vbasedev->name, running, RunState_str(state),
> +  new_state);
> +}
> +
>  static void vfio_v1_vmstate_change(void *opaque, bool running, RunState 
> state)
>  {
>  VFIODevice *vbasedev = opaque;
> @@ -770,12 +1011,17 @@ static void vfio_migration_state_notifier(Notifier 
> *notifier, void *data)
>  case MIGRATION_STATUS_CANCELLED:
>  case MIGRATION_STATUS_FAILED:
>  bytes_transferred = 0;
> -ret = vfio_migration_v1_set_state(vbasedev,
> -  ~(VFIO_DEVICE_STATE_V1_SAVING |
> -

[PATCH 0/2] Two sets of trivials

2022-06-14 Thread Dr. David Alan Gilbert (git)

From: "Dr. David Alan Gilbert" 

I've sent the 3 char set last month, but have updated
it a little; I cleaned up a comment style that was already
broken so checkpatch is happy.

The 'namesapce' is a new patch; it's amazing how many places
make the same typo!

Dave

Dr. David Alan Gilbert (2):
  Trivial: 3 char repeat typos
  trivial typos: namesapce

 hw/9pfs/9p-xattr-user.c  | 8 
 hw/acpi/nvdimm.c | 2 +-
 hw/intc/openpic.c| 2 +-
 hw/net/imx_fec.c | 2 +-
 hw/nvme/ctrl.c   | 2 +-
 hw/pci/pcie_aer.c| 2 +-
 hw/pci/shpc.c| 3 ++-
 hw/ppc/spapr_caps.c  | 2 +-
 hw/scsi/spapr_vscsi.c| 2 +-
 qapi/net.json| 2 +-
 tools/virtiofsd/passthrough_ll.c | 2 +-
 ui/input.c   | 2 +-
 12 files changed, 16 insertions(+), 15 deletions(-)

-- 
2.36.1

[PULL 10/10] MAINTAINERS: update Vladimir's address and repositories

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 MAINTAINERS | 22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0df25ed4b0..9e37bfe279 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2538,7 +2538,7 @@ F: scsi/*
 
 Block Jobs
 M: John Snow 
-M: Vladimir Sementsov-Ogievskiy 
+M: Vladimir Sementsov-Ogievskiy 
 L: qemu-bl...@nongnu.org
 S: Supported
 F: blockjob.c
@@ -2563,7 +2563,7 @@ F: block/aio_task.c
 F: util/qemu-co-shared-resource.c
 F: include/qemu/co-shared-resource.h
 T: git https://gitlab.com/jsnow/qemu.git jobs
-T: git https://src.openvz.org/scm/~vsementsov/qemu.git jobs
+T: git https://gitlab.com/vsementsov/qemu.git block
 
 Block QAPI, monitor, command line
 M: Markus Armbruster 
@@ -2584,7 +2584,7 @@ F: include/hw/cxl/
 
 Dirty Bitmaps
 M: Eric Blake 
-M: Vladimir Sementsov-Ogievskiy 
+M: Vladimir Sementsov-Ogievskiy 
 R: John Snow 
 L: qemu-bl...@nongnu.org
 S: Supported
@@ -2598,6 +2598,7 @@ F: util/hbitmap.c
 F: tests/unit/test-hbitmap.c
 F: docs/interop/bitmaps.rst
 T: git https://repo.or.cz/qemu/ericb.git bitmaps
+T: git https://gitlab.com/vsementsov/qemu.git block
 
 Character device backends
 M: Marc-André Lureau 
@@ -2808,16 +2809,17 @@ F: scripts/*.py
 F: tests/*.py
 
 Benchmark util
-M: Vladimir Sementsov-Ogievskiy 
+M: Vladimir Sementsov-Ogievskiy 
 S: Maintained
 F: scripts/simplebench/
-T: git https://src.openvz.org/scm/~vsementsov/qemu.git simplebench
+T: git https://gitlab.com/vsementsov/qemu.git simplebench
 
 Transactions helper
-M: Vladimir Sementsov-Ogievskiy 
+M: Vladimir Sementsov-Ogievskiy 
 S: Maintained
 F: include/qemu/transactions.h
 F: util/transactions.c
+T: git https://gitlab.com/vsementsov/qemu.git block
 
 QAPI
 M: Markus Armbruster 
@@ -3394,7 +3396,7 @@ F: block/iscsi-opts.c
 
 Network Block Device (NBD)
 M: Eric Blake 
-M: Vladimir Sementsov-Ogievskiy 
+M: Vladimir Sementsov-Ogievskiy 
 L: qemu-bl...@nongnu.org
 S: Maintained
 F: block/nbd*
@@ -3406,7 +3408,7 @@ F: docs/interop/nbd.txt
 F: docs/tools/qemu-nbd.rst
 F: tests/qemu-iotests/tests/*nbd*
 T: git https://repo.or.cz/qemu/ericb.git nbd
-T: git https://src.openvz.org/scm/~vsementsov/qemu.git nbd
+T: git https://gitlab.com/vsementsov/qemu.git block
 
 NFS
 M: Peter Lieven 
@@ -3491,13 +3493,13 @@ F: block/dmg.c
 parallels
 M: Stefan Hajnoczi 
 M: Denis V. Lunev 
-M: Vladimir Sementsov-Ogievskiy 
+M: Vladimir Sementsov-Ogievskiy 
 L: qemu-bl...@nongnu.org
 S: Supported
 F: block/parallels.c
 F: block/parallels-ext.c
 F: docs/interop/parallels.txt
-T: git https://src.openvz.org/scm/~vsementsov/qemu.git parallels
+T: git https://gitlab.com/vsementsov/qemu.git block
 
 qed
 M: Stefan Hajnoczi 
-- 
2.25.1

[PULL 09/10] block: use 'unsigned' for in_flight field on driver state

From: "Denis V. Lunev" 

This patch makes in_flight field 'unsigned' for BDRVNBDState and
MirrorBlockJob. This matches the definition of this field on BDS
and is generically correct - we should never get negative value here.

Signed-off-by: Denis V. Lunev 
CC: John Snow 
CC: Vladimir Sementsov-Ogievskiy 
CC: Kevin Wolf 
CC: Hanna Reitz 
CC: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/mirror.c | 2 +-
 block/nbd.c| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index d8ecb9efa2..3c4ab1159d 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -73,7 +73,7 @@ typedef struct MirrorBlockJob {
 
 uint64_t last_pause_ns;
 unsigned long *in_flight_bitmap;
-int in_flight;
+unsigned in_flight;
 int64_t bytes_in_flight;
 QTAILQ_HEAD(, MirrorOp) ops_in_flight;
 int ret;
diff --git a/block/nbd.c b/block/nbd.c
index bc8f128087..19e773d602 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -77,7 +77,7 @@ typedef struct BDRVNBDState {
 QemuMutex requests_lock;
 NBDClientState state;
 CoQueue free_sema;
-int in_flight;
+unsigned in_flight;
 NBDClientRequest requests[MAX_NBD_REQUESTS];
 QEMUTimer *reconnect_delay_timer;
 
-- 
2.25.1

[PULL 03/10] iotests: add copy-before-write: on-cbw-error tests

From: Vladimir Sementsov-Ogievskiy 

Add tests for new option of copy-before-write filter: on-cbw-error.

Note that we use QEMUMachine instead of VM class, because in further
commit we'll want to use throttling which doesn't work with -accel
qtest used by VM.

We also touch pylintrc to not break iotest 297.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/pylintrc   |   5 +
 tests/qemu-iotests/tests/copy-before-write| 132 ++
 .../qemu-iotests/tests/copy-before-write.out  |   5 +
 3 files changed, 142 insertions(+)
 create mode 100755 tests/qemu-iotests/tests/copy-before-write
 create mode 100644 tests/qemu-iotests/tests/copy-before-write.out

diff --git a/tests/qemu-iotests/pylintrc b/tests/qemu-iotests/pylintrc
index 32ab77b8bb..f4f823a991 100644
--- a/tests/qemu-iotests/pylintrc
+++ b/tests/qemu-iotests/pylintrc
@@ -51,3 +51,8 @@ notes=FIXME,
 
 # Maximum number of characters on a single line.
 max-line-length=79
+
+
+[SIMILARITIES]
+
+min-similarity-lines=6
diff --git a/tests/qemu-iotests/tests/copy-before-write 
b/tests/qemu-iotests/tests/copy-before-write
new file mode 100755
index 00..6c7638965e
--- /dev/null
+++ b/tests/qemu-iotests/tests/copy-before-write
@@ -0,0 +1,132 @@
+#!/usr/bin/env python3
+# group: auto backup
+#
+# Copyright (c) 2022 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import re
+
+from qemu.machine import QEMUMachine
+
+import iotests
+from iotests import qemu_img_create, qemu_io
+
+
+temp_img = os.path.join(iotests.test_dir, 'temp')
+source_img = os.path.join(iotests.test_dir, 'source')
+size = '1M'
+
+
+class TestCbwError(iotests.QMPTestCase):
+def tearDown(self):
+self.vm.shutdown()
+os.remove(temp_img)
+os.remove(source_img)
+
+def setUp(self):
+qemu_img_create('-f', iotests.imgfmt, source_img, size)
+qemu_img_create('-f', iotests.imgfmt, temp_img, size)
+qemu_io('-c', 'write 0 1M', source_img)
+
+self.vm = QEMUMachine(iotests.qemu_prog)
+self.vm.launch()
+
+def do_cbw_error(self, on_cbw_error):
+result = self.vm.qmp('blockdev-add', {
+'node-name': 'cbw',
+'driver': 'copy-before-write',
+'on-cbw-error': on_cbw_error,
+'file': {
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': source_img,
+}
+},
+'target': {
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'blkdebug',
+'image': {
+'driver': 'file',
+'filename': temp_img
+},
+'inject-error': [
+{
+'event': 'write_aio',
+'errno': 5,
+'immediately': False,
+'once': True
+}
+]
+}
+}
+})
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('blockdev-add', {
+'node-name': 'access',
+'driver': 'snapshot-access',
+'file': 'cbw'
+})
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('human-monitor-command',
+ command_line='qemu-io cbw "write 0 1M"')
+self.assert_qmp(result, 'return', '')
+
+result = self.vm.qmp('human-monitor-command',
+ command_line='qemu-io access "read 0 1M"')
+self.assert_qmp(result, 'return', '')
+
+self.vm.shutdown()
+log = self.vm.get_log()
+log = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', log)
+log = re.sub(r'\[I \+\d+\.\d+\] CLOSED\n?$', '', log)
+log = iotests.filter_qemu_io(log)
+return log
+
+def test_break_snapshot_on_cbw_error(self):
+"""break-snapshot behavior:
+Guest write succeed, but further snapshot-read fails, as snapshot is
+broken.
+"""
+log = self.do_cbw_error('break-snapshot')
+
+self.assertEqual(log, """\
+wrote

[PATCH 2/2] trivial typos: namesapce

2022-06-14 Thread Dr. David Alan Gilbert (git)

From: "Dr. David Alan Gilbert" 

'namespace' is misspelled in a bunch of places.

Signed-off-by: Dr. David Alan Gilbert 
---
 hw/9pfs/9p-xattr-user.c | 8 
 hw/acpi/nvdimm.c| 2 +-
 hw/nvme/ctrl.c  | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/9pfs/9p-xattr-user.c b/hw/9pfs/9p-xattr-user.c
index f2ae9582e6..535677ed60 100644
--- a/hw/9pfs/9p-xattr-user.c
+++ b/hw/9pfs/9p-xattr-user.c
@@ -27,7 +27,7 @@ static ssize_t mp_user_getxattr(FsContext *ctx, const char 
*path,
 {
 if (strncmp(name, "user.virtfs.", 12) == 0) {
 /*
- * Don't allow fetch of user.virtfs namesapce
+ * Don't allow fetch of user.virtfs namespace
  * in case of mapped security
  */
 errno = ENOATTR;
@@ -49,7 +49,7 @@ static ssize_t mp_user_listxattr(FsContext *ctx, const char 
*path,
 name_size -= 12;
 } else {
 /*
- * Don't allow fetch of user.virtfs namesapce
+ * Don't allow fetch of user.virtfs namespace
  * in case of mapped security
  */
 return 0;
@@ -74,7 +74,7 @@ static int mp_user_setxattr(FsContext *ctx, const char *path, 
const char *name,
 {
 if (strncmp(name, "user.virtfs.", 12) == 0) {
 /*
- * Don't allow fetch of user.virtfs namesapce
+ * Don't allow fetch of user.virtfs namespace
  * in case of mapped security
  */
 errno = EACCES;
@@ -88,7 +88,7 @@ static int mp_user_removexattr(FsContext *ctx,
 {
 if (strncmp(name, "user.virtfs.", 12) == 0) {
 /*
- * Don't allow fetch of user.virtfs namesapce
+ * Don't allow fetch of user.virtfs namespace
  * in case of mapped security
  */
 errno = EACCES;
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 0d43da19ea..5f85b16327 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -476,7 +476,7 @@ struct NvdimmFuncGetLabelDataOut {
 /* the size of buffer filled by QEMU. */
 uint32_t len;
 uint32_t func_ret_status; /* return status code. */
-uint8_t out_buf[]; /* the data got via Get Namesapce Label function. */
+uint8_t out_buf[]; /* the data got via Get Namespace Label function. */
 } QEMU_PACKED;
 typedef struct NvdimmFuncGetLabelDataOut NvdimmFuncGetLabelDataOut;
 QEMU_BUILD_BUG_ON(sizeof(NvdimmFuncGetLabelDataOut) > NVDIMM_DSM_MEMORY_SIZE);
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 1e6e0fcad9..770a38381a 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -71,7 +71,7 @@
  *   the SUBNQN field in the controller will report the NQN of the subsystem
  *   device. This also enables multi controller capability represented in
  *   Identify Controller data structure in CMIC (Controller Multi-path I/O and
- *   Namesapce Sharing Capabilities).
+ *   Namespace Sharing Capabilities).
  *
  * - `aerl`
  *   The Asynchronous Event Request Limit (AERL). Indicates the maximum number
-- 
2.36.1

[PULL 07/10] iotests: copy-before-write: add cases for cbw-timeout option

From: Vladimir Sementsov-Ogievskiy 

Add two simple test-cases: timeout failure with
break-snapshot-on-cbw-error behavior and similar with
break-guest-write-on-cbw-error behavior.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/tests/copy-before-write| 81 +++
 .../qemu-iotests/tests/copy-before-write.out  |  4 +-
 2 files changed, 83 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/tests/copy-before-write 
b/tests/qemu-iotests/tests/copy-before-write
index 6c7638965e..f01f26f01c 100755
--- a/tests/qemu-iotests/tests/copy-before-write
+++ b/tests/qemu-iotests/tests/copy-before-write
@@ -126,6 +126,87 @@ read 1048576/1048576 bytes at offset 0
 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 """)
 
+def do_cbw_timeout(self, on_cbw_error):
+result = self.vm.qmp('object-add', {
+'qom-type': 'throttle-group',
+'id': 'group0',
+'limits': {'bps-write': 300 * 1024}
+})
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('blockdev-add', {
+'node-name': 'cbw',
+'driver': 'copy-before-write',
+'on-cbw-error': on_cbw_error,
+'cbw-timeout': 1,
+'file': {
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': source_img,
+}
+},
+'target': {
+'driver': 'throttle',
+'throttle-group': 'group0',
+'file': {
+'driver': 'qcow2',
+'file': {
+'driver': 'file',
+'filename': temp_img
+}
+}
+}
+})
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('blockdev-add', {
+'node-name': 'access',
+'driver': 'snapshot-access',
+'file': 'cbw'
+})
+self.assert_qmp(result, 'return', {})
+
+result = self.vm.qmp('human-monitor-command',
+ command_line='qemu-io cbw "write 0 512K"')
+self.assert_qmp(result, 'return', '')
+
+# We need second write to trigger throttling
+result = self.vm.qmp('human-monitor-command',
+ command_line='qemu-io cbw "write 512K 512K"')
+self.assert_qmp(result, 'return', '')
+
+result = self.vm.qmp('human-monitor-command',
+ command_line='qemu-io access "read 0 1M"')
+self.assert_qmp(result, 'return', '')
+
+self.vm.shutdown()
+log = self.vm.get_log()
+log = re.sub(r'^\[I \d+\.\d+\] OPENED\n', '', log)
+log = re.sub(r'\[I \+\d+\.\d+\] CLOSED\n?$', '', log)
+log = iotests.filter_qemu_io(log)
+return log
+
+def test_timeout_break_guest(self):
+log = self.do_cbw_timeout('break-guest-write')
+self.assertEqual(log, """\
+wrote 524288/524288 bytes at offset 0
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+write failed: Connection timed out
+read 1048576/1048576 bytes at offset 0
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+""")
+
+def test_timeout_break_snapshot(self):
+log = self.do_cbw_timeout('break-snapshot')
+self.assertEqual(log, """\
+wrote 524288/524288 bytes at offset 0
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 524288/524288 bytes at offset 524288
+512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read failed: Permission denied
+""")
+
 
 if __name__ == '__main__':
 iotests.main(supported_fmts=['qcow2'],
diff --git a/tests/qemu-iotests/tests/copy-before-write.out 
b/tests/qemu-iotests/tests/copy-before-write.out
index fbc63e62f8..89968f35d7 100644
--- a/tests/qemu-iotests/tests/copy-before-write.out
+++ b/tests/qemu-iotests/tests/copy-before-write.out
@@ -1,5 +1,5 @@
-..
+
 --
-Ran 2 tests
+Ran 4 tests
 
 OK
-- 
2.25.1

[PATCH 1/2] Trivial: 3 char repeat typos

2022-06-14 Thread Dr. David Alan Gilbert (git)

From: "Dr. David Alan Gilbert" 

Inspired by Julia Lawall's fixing of Linux
kernel comments, I looked at qemu, although I did it manually.

Signed-off-by: Dr. David Alan Gilbert 
---
 hw/intc/openpic.c| 2 +-
 hw/net/imx_fec.c | 2 +-
 hw/pci/pcie_aer.c| 2 +-
 hw/pci/shpc.c| 3 ++-
 hw/ppc/spapr_caps.c  | 2 +-
 hw/scsi/spapr_vscsi.c| 2 +-
 qapi/net.json| 2 +-
 tools/virtiofsd/passthrough_ll.c | 2 +-
 ui/input.c   | 2 +-
 9 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/hw/intc/openpic.c b/hw/intc/openpic.c
index 49504e740f..b0787e8ee7 100644
--- a/hw/intc/openpic.c
+++ b/hw/intc/openpic.c
@@ -729,7 +729,7 @@ static void openpic_tmr_set_tmr(OpenPICTimer *tmr, uint32_t 
val, bool enabled)
 }
 
 /*
- * Returns the currrent tccr value, i.e., timer value (in clocks) with
+ * Returns the current tccr value, i.e., timer value (in clocks) with
  * appropriate TOG.
  */
 static uint64_t openpic_tmr_get_timer(OpenPICTimer *tmr)
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
index 0db9aaf76a..8c11b237de 100644
--- a/hw/net/imx_fec.c
+++ b/hw/net/imx_fec.c
@@ -438,7 +438,7 @@ static void imx_eth_update(IMXFECState *s)
  *   assignment fail.
  *
  * To ensure that all versions of Linux work, generate ENET_INT_MAC
- * interrrupts on both interrupt lines. This should be changed if and when
+ * interrupts on both interrupt lines. This should be changed if and when
  * qemu supports IOMUX.
  */
 if (s->regs[ENET_EIR] & s->regs[ENET_EIMR] &
diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c
index 92bd0530dd..eff62f3945 100644
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -323,7 +323,7 @@ static void pcie_aer_msg_root_port(PCIDevice *dev, const 
PCIEAERMsg *msg)
  */
 }
 
-/* Errro Message Received: Root Error Status register */
+/* Error Message Received: Root Error Status register */
 switch (msg->severity) {
 case PCI_ERR_ROOT_CMD_COR_EN:
 if (root_status & PCI_ERR_ROOT_COR_RCV) {
diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
index f822f18b98..e71f3a7483 100644
--- a/hw/pci/shpc.c
+++ b/hw/pci/shpc.c
@@ -480,7 +480,8 @@ static const MemoryRegionOps shpc_mmio_ops = {
 .endianness = DEVICE_LITTLE_ENDIAN,
 .valid = {
 /* SHPC ECN requires dword accesses, but the original 1.0 spec doesn't.
- * It's easier to suppport all sizes than worry about it. */
+ * It's easier to support all sizes than worry about it.
+ */
 .min_access_size = 1,
 .max_access_size = 4,
 },
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 655ab856a0..b4283055c1 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -553,7 +553,7 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, 
uint8_t val,
  * instruction is a harmless no-op.  It won't correctly
  * implement the cache count flush *but* if we have
  * count-cache-disabled in the host, that flush is
- * unnnecessary.  So, specifically allow this case.  This
+ * unnecessary.  So, specifically allow this case.  This
  * allows us to have better performance on POWER9 DD2.3,
  * while still working on POWER9 DD2.2 and POWER8 host
  * cpus.
diff --git a/hw/scsi/spapr_vscsi.c b/hw/scsi/spapr_vscsi.c
index a07a8e1523..e320ccaa23 100644
--- a/hw/scsi/spapr_vscsi.c
+++ b/hw/scsi/spapr_vscsi.c
@@ -1013,7 +1013,7 @@ static int vscsi_send_capabilities(VSCSIState *s, 
vscsi_req *req)
 }
 
 /*
- * Current implementation does not suppport any migration or
+ * Current implementation does not support any migration or
  * reservation capabilities. Construct the response telling the
  * guest not to use them.
  */
diff --git a/qapi/net.json b/qapi/net.json
index d6f7cfd4d6..9af11e9a3b 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -298,7 +298,7 @@
 #
 # @udp: use the udp version of l2tpv3 encapsulation
 #
-# @cookie64: use 64 bit coookies
+# @cookie64: use 64 bit cookies
 #
 # @counter: have sequence counter
 #
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index b15c631ca5..7a73dfcce9 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -2319,7 +2319,7 @@ static int do_lo_create(fuse_req_t req, struct lo_inode 
*parent_inode,
  * If security.selinux has not been remapped and selinux is enabled,
  * use fscreate to set context before file creation. If not, use
  * tmpfile method for regular files. Otherwise fallback to
- * non-atomic method of file creation and xattr settting.
+ * non-atomic method of file creation and xattr setting.
  */
 if (!mapped_name && lo->use_fscreate) {
 err = do_create_secctx_fscreate(req, parent_inode, name, mode, fi,
diff --git

[PULL 04/10] util: add qemu-co-timeout

From: Vladimir Sementsov-Ogievskiy 

Add new API, to make a time limited call of the coroutine.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/qemu/coroutine.h | 13 ++
 util/meson.build |  1 +
 util/qemu-co-timeout.c   | 89 
 3 files changed, 103 insertions(+)
 create mode 100644 util/qemu-co-timeout.c

diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index d1548d5b11..08c5bb3c76 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -331,6 +331,19 @@ static inline void coroutine_fn 
qemu_co_sleep_ns(QEMUClockType type, int64_t ns)
 qemu_co_sleep_ns_wakeable(, type, ns);
 }
 
+typedef void CleanupFunc(void *opaque);
+/**
+ * Run entry in a coroutine and start timer. Wait for entry to finish or for
+ * timer to elapse, what happen first. If entry finished, return 0, if timer
+ * elapsed earlier, return -ETIMEDOUT.
+ *
+ * Be careful, entry execution is not canceled, user should handle it somehow.
+ * If @clean is provided, it's called after coroutine finish if timeout
+ * happened.
+ */
+int coroutine_fn qemu_co_timeout(CoroutineEntry *entry, void *opaque,
+ uint64_t timeout_ns, CleanupFunc clean);
+
 /**
  * Wake a coroutine if it is sleeping in qemu_co_sleep_ns. The timer will be
  * deleted. @sleep_state must be the variable whose address was given to
diff --git a/util/meson.build b/util/meson.build
index 8f16018cd4..9abd2f5bcc 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -85,6 +85,7 @@ if have_block
   util_ss.add(files('block-helpers.c'))
   util_ss.add(files('qemu-coroutine-sleep.c'))
   util_ss.add(files('qemu-co-shared-resource.c'))
+  util_ss.add(files('qemu-co-timeout.c'))
   util_ss.add(files('thread-pool.c', 'qemu-timer.c'))
   util_ss.add(files('readline.c'))
   util_ss.add(files('throttle.c'))
diff --git a/util/qemu-co-timeout.c b/util/qemu-co-timeout.c
new file mode 100644
index 00..00cd335649
--- /dev/null
+++ b/util/qemu-co-timeout.c
@@ -0,0 +1,89 @@
+/*
+ * Helper functionality for distributing a fixed total amount of
+ * an abstract resource among multiple coroutines.
+ *
+ * Copyright (c) 2022 Virtuozzo International GmbH
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/coroutine.h"
+#include "block/aio.h"
+
+typedef struct QemuCoTimeoutState {
+CoroutineEntry *entry;
+void *opaque;
+QemuCoSleep sleep_state;
+bool marker;
+CleanupFunc *clean;
+} QemuCoTimeoutState;
+
+static void coroutine_fn qemu_co_timeout_entry(void *opaque)
+{
+QemuCoTimeoutState *s = opaque;
+
+s->entry(s->opaque);
+
+if (s->marker) {
+assert(!s->sleep_state.to_wake);
+/* .marker set by qemu_co_timeout, it have been failed */
+if (s->clean) {
+s->clean(s->opaque);
+}
+g_free(s);
+} else {
+s->marker = true;
+qemu_co_sleep_wake(>sleep_state);
+}
+}
+
+int coroutine_fn qemu_co_timeout(CoroutineEntry *entry, void *opaque,
+ uint64_t timeout_ns, CleanupFunc clean)
+{
+QemuCoTimeoutState *s;
+Coroutine *co;
+
+if (timeout_ns == 0) {
+entry(opaque);
+return 0;
+}
+
+s = g_new(QemuCoTimeoutState, 1);
+*s = (QemuCoTimeoutState) {
+.entry = entry,
+.opaque = opaque,
+.clean = clean
+};
+
+co = qemu_coroutine_create(qemu_co_timeout_entry, s);
+
+aio_co_enter(qemu_get_current_aio_context(), co);
+qemu_co_sleep_ns_wakeable(>sleep_state, QEMU_CLOCK_REALTIME, 
timeout_ns);
+
+if (s->marker) {
+/* .marker set by qemu_co_timeout_entry, success */
+g_free(s);
+return 0;
+}
+
+/* Don't free s, as we can't cancel qemu_co_timeout_entry execution */
+s->marker = true;
+return

[PULL 08/10] nbd: trace long NBD operations

From: "Denis V. Lunev" 

At the moment there are 2 sources of lengthy operations if configured:
* open connection, which could retry inside and
* reconnect of already opened connection
These operations could be quite lengthy and cumbersome to catch thus
it would be quite natural to add trace points for them.

This patch is based on the original downstream work made by Vladimir.

Signed-off-by: Denis V. Lunev 
CC: Eric Blake 
CC: Vladimir Sementsov-Ogievskiy 
CC: Kevin Wolf 
CC: Hanna Reitz 
CC: Paolo Bonzini 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/nbd.c | 6 +-
 block/trace-events  | 2 ++
 nbd/client-connection.c | 2 ++
 nbd/trace-events| 3 +++
 4 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/block/nbd.c b/block/nbd.c
index 6085ab1d2c..bc8f128087 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -371,6 +371,7 @@ static bool nbd_client_connecting(BDRVNBDState *s)
 /* Called with s->requests_lock taken.  */
 static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
 {
+int ret;
 bool blocking = s->state == NBD_CLIENT_CONNECTING_WAIT;
 
 /*
@@ -380,6 +381,8 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState 
*s)
 assert(nbd_client_connecting(s));
 assert(s->in_flight == 1);
 
+trace_nbd_reconnect_attempt(s->bs->in_flight);
+
 if (blocking && !s->reconnect_delay_timer) {
 /*
  * It's the first reconnect attempt after switching to
@@ -401,7 +404,8 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState 
*s)
 }
 
 qemu_mutex_unlock(>requests_lock);
-nbd_co_do_establish_connection(s->bs, blocking, NULL);
+ret = nbd_co_do_establish_connection(s->bs, blocking, NULL);
+trace_nbd_reconnect_attempt_result(ret, s->bs->in_flight);
 qemu_mutex_lock(>requests_lock);
 
 /*
diff --git a/block/trace-events b/block/trace-events
index 549090d453..48dbf10c66 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -172,6 +172,8 @@ nbd_read_reply_entry_fail(int ret, const char *err) "ret = 
%d, err: %s"
 nbd_co_request_fail(uint64_t from, uint32_t len, uint64_t handle, uint16_t 
flags, uint16_t type, const char *name, int ret, const char *err) "Request 
failed { .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags 
= 0x%" PRIx16 ", .type = %" PRIu16 " (%s) } ret = %d, err: %s"
 nbd_client_handshake(const char *export_name) "export '%s'"
 nbd_client_handshake_success(const char *export_name) "export '%s'"
+nbd_reconnect_attempt(unsigned in_flight) "in_flight %u"
+nbd_reconnect_attempt_result(int ret, unsigned in_flight) "ret %d in_flight %u"
 
 # ssh.c
 ssh_restart_coroutine(void *co) "co=%p"
diff --git a/nbd/client-connection.c b/nbd/client-connection.c
index 2a632931c3..0c5f917efa 100644
--- a/nbd/client-connection.c
+++ b/nbd/client-connection.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "trace.h"
 
 #include "block/nbd.h"
 
@@ -210,6 +211,7 @@ static void *connect_thread_func(void *opaque)
 object_unref(OBJECT(conn->sioc));
 conn->sioc = NULL;
 if (conn->do_retry && !conn->detached) {
+trace_nbd_connect_thread_sleep(timeout);
 qemu_mutex_unlock(>mutex);
 
 sleep(timeout);
diff --git a/nbd/trace-events b/nbd/trace-events
index c4919a2dd5..b7032ca277 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -73,3 +73,6 @@ nbd_co_receive_request_decode_type(uint64_t handle, uint16_t 
type, const char *n
 nbd_co_receive_request_payload_received(uint64_t handle, uint32_t len) 
"Payload received: handle = %" PRIu64 ", len = %" PRIu32
 nbd_co_receive_align_compliance(const char *op, uint64_t from, uint32_t len, 
uint32_t align) "client sent non-compliant unaligned %s request: from=0x%" 
PRIx64 ", len=0x%" PRIx32 ", align=0x%" PRIx32
 nbd_trip(void) "Reading request"
+
+# client-connection.c
+nbd_connect_thread_sleep(uint64_t timeout) "timeout %" PRIu64
-- 
2.25.1

[PULL 00/10] Block jobs & NBD patches

The following changes since commit debd0753663bc89c86f5462a53268f2e3f680f60:

  Merge tag 'pull-testing-next-140622-1' of https://github.com/stsquad/qemu 
into staging (2022-06-13 21:10:57 -0700)

are available in the Git repository at:

  https://gitlab.com/vsementsov/qemu.git tags/pull-block-2022-06-14

for you to fetch changes up to 5aef6747a250f545ff53ba7e1a3ed7a3d166011a:

  MAINTAINERS: update Vladimir's address and repositories (2022-06-14 12:51:48 
+0300)


Block jobs & NBD patches

- add new options for copy-before-write filter
- new trace points for NBD
- prefer unsigned type for some 'in_flight' fields
- update my addresses in MAINTAINERS (already in Stefan's tree, but
  I think it's OK to send it with this PULL)


Note also, that I've recently updated my pgp key with new address and
new expire time.
Updated key is here: 
https://keys.openpgp.org/search?q=vsementsov%40yandex-team.ru



Denis V. Lunev (2):
  nbd: trace long NBD operations
  block: use 'unsigned' for in_flight field on driver state

Vladimir Sementsov-Ogievskiy (8):
  block/copy-before-write: refactor option parsing
  block/copy-before-write: add on-cbw-error open parameter
  iotests: add copy-before-write: on-cbw-error tests
  util: add qemu-co-timeout
  block/block-copy: block_copy(): add timeout_ns parameter
  block/copy-before-write: implement cbw-timeout option
  iotests: copy-before-write: add cases for cbw-timeout option
  MAINTAINERS: update Vladimir's address and repositories

 MAINTAINERS   |  22 +-
 block/block-copy.c|  33 ++-
 block/copy-before-write.c | 111 ++---
 block/mirror.c|   2 +-
 block/nbd.c   |   8 +-
 block/trace-events|   2 +
 include/block/block-copy.h|   4 +-
 include/qemu/coroutine.h  |  13 ++
 nbd/client-connection.c   |   2 +
 nbd/trace-events  |   3 +
 qapi/block-core.json  |  31 ++-
 tests/qemu-iotests/pylintrc   |   5 +
 tests/qemu-iotests/tests/copy-before-write| 213 ++
 .../qemu-iotests/tests/copy-before-write.out  |   5 +
 util/meson.build  |   1 +
 util/qemu-co-timeout.c|  89 
 16 files changed, 492 insertions(+), 52 deletions(-)
 create mode 100755 tests/qemu-iotests/tests/copy-before-write
 create mode 100644 tests/qemu-iotests/tests/copy-before-write.out
 create mode 100644 util/qemu-co-timeout.c

-- 
2.25.1

[PULL 06/10] block/copy-before-write: implement cbw-timeout option