date:20221129

Re: regression: insmod module failed in VM with nvdimm on

2022-11-29 Thread Marc Zyngier

On Wed, 30 Nov 2022 02:52:35 +,
"chenxiang (M)"  wrote:
> 
> Hi,
> 
> We boot the VM using following commands (with nvdimm on)  (qemu
> version 6.1.50, kernel 6.0-r4):

How relevant is the presence of the nvdimm? Do you observe the failure
without this?

> 
> qemu-system-aarch64 -machine
> virt,kernel_irqchip=on,gic-version=3,nvdimm=on  -kernel
> /home/kernel/Image -initrd /home/mini-rootfs/rootfs.cpio.gz -bios
> /root/QEMU_EFI.FD -cpu host -enable-kvm -net none -nographic -m
> 2G,maxmem=64G,slots=3 -smp 4 -append 'rdinit=init console=ttyAMA0
> ealycon=pl0ll,0x9000 pcie_ports=native pciehp.pciehp_debug=1'
> -object memory-backend-ram,id=ram1,size=10G -device
> nvdimm,id=dimm1,memdev=ram1  -device ioh3420,id=root_port1,chassis=1
> -device vfio-pci,host=7d:01.0,id=net0,bus=root_port1
> 
> Then in VM we insmod a module, vmalloc error occurs as follows (kernel
> 5.19-rc4 is normal, and the issue is still on kernel 6.1-rc4):
> 
> estuary:/$ insmod /lib/modules/$(uname -r)/hnae3.ko
> [8.186563] vmap allocation for size 20480 failed: use
> vmalloc= to increase size

Have you tried increasing the vmalloc size to check that this is
indeed the problem?

[...]

> We git bisect the code, and find the patch c5a89f75d2a ("arm64: kaslr:
> defer initialization to initcall where permitted").

I guess you mean commit fc5a89f75d2a instead, right?

> Do you have any idea about the issue?

I sort of suspect that the nvdimm gets vmap-ed and consumes a large
portion of the vmalloc space, but you give very little information
that could help here...

M.

-- 
Without deviation from the norm, progress is not possible.

[PATCH v8 8/9] target/riscv: expose properties for Zc* extension

2022-11-29 Thread Weiwei Li

Expose zca,zcb,zcf,zcd,zcmp,zcmt properties

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
Reviewed-by: Alistair Francis 
---
 target/riscv/cpu.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 3d06b57416..5f03698b3b 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -81,6 +81,12 @@ static const struct isa_ext_data isa_edata_arr[] = {
 ISA_EXT_DATA_ENTRY(zfhmin, true, PRIV_VERSION_1_12_0, ext_zfhmin),
 ISA_EXT_DATA_ENTRY(zfinx, true, PRIV_VERSION_1_12_0, ext_zfinx),
 ISA_EXT_DATA_ENTRY(zdinx, true, PRIV_VERSION_1_12_0, ext_zdinx),
+ISA_EXT_DATA_ENTRY(zca, true, PRIV_VERSION_1_12_0, ext_zca),
+ISA_EXT_DATA_ENTRY(zcb, true, PRIV_VERSION_1_12_0, ext_zcb),
+ISA_EXT_DATA_ENTRY(zcf, true, PRIV_VERSION_1_12_0, ext_zcf),
+ISA_EXT_DATA_ENTRY(zcd, true, PRIV_VERSION_1_12_0, ext_zcd),
+ISA_EXT_DATA_ENTRY(zcmp, true, PRIV_VERSION_1_12_0, ext_zcmp),
+ISA_EXT_DATA_ENTRY(zcmt, true, PRIV_VERSION_1_12_0, ext_zcmt),
 ISA_EXT_DATA_ENTRY(zba, true, PRIV_VERSION_1_12_0, ext_zba),
 ISA_EXT_DATA_ENTRY(zbb, true, PRIV_VERSION_1_12_0, ext_zbb),
 ISA_EXT_DATA_ENTRY(zbc, true, PRIV_VERSION_1_12_0, ext_zbc),
@@ -1116,6 +1122,13 @@ static Property riscv_cpu_extensions[] = {
 
 /* These are experimental so mark with 'x-' */
 DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false),
+
+DEFINE_PROP_BOOL("x-zca", RISCVCPU, cfg.ext_zca, false),
+DEFINE_PROP_BOOL("x-zcb", RISCVCPU, cfg.ext_zcb, false),
+DEFINE_PROP_BOOL("x-zcd", RISCVCPU, cfg.ext_zcd, false),
+DEFINE_PROP_BOOL("x-zcf", RISCVCPU, cfg.ext_zcf, false),
+DEFINE_PROP_BOOL("x-zcmp", RISCVCPU, cfg.ext_zcmp, false),
+DEFINE_PROP_BOOL("x-zcmt", RISCVCPU, cfg.ext_zcmt, false),
 /* ePMP 0.9.3 */
 DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false),
 DEFINE_PROP_BOOL("x-smaia", RISCVCPU, cfg.ext_smaia, false),
-- 
2.25.1

[PATCH v8 6/9] target/riscv: add support for Zcmp extension

2022-11-29 Thread Weiwei Li

Add encode, trans* functions for Zcmp instructions

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/insn16.decode|  18 +++
 target/riscv/insn_trans/trans_rvzce.c.inc | 189 +-
 target/riscv/translate.c  |   5 +
 3 files changed, 211 insertions(+), 1 deletion(-)

diff --git a/target/riscv/insn16.decode b/target/riscv/insn16.decode
index 47603ec1e0..4654c23052 100644
--- a/target/riscv/insn16.decode
+++ b/target/riscv/insn16.decode
@@ -21,6 +21,8 @@
 %rs1_3 7:3!function=ex_rvc_register
 %rs2_3 2:3!function=ex_rvc_register
 %rs2_5 2:5
+%sreg1 7:3!function=ex_sreg_register
+%sreg2 2:3!function=ex_sreg_register
 
 # Immediates:
 %imm_ci12:s1 2:5
@@ -45,6 +47,8 @@
 
 %zcb_b_uimm  5:1 6:1
 %zcb_h_uimm  5:1 !function=ex_shift_1
+%zcmp_spimm  2:2 !function=ex_shift_4
+%zcmp_rlist  4:4
 
 # Argument sets imported from insn32.decode:
   !extern
@@ -56,7 +60,9 @@
  imm rd   !extern
  shamt rs1 rd !extern
 rd rs1   !extern
+_s  rs1 rs2  !extern
 
+  zcmp_rlist zcmp_spimm
 
 # Formats 16:
 @cr  . .  ..   rs2=%rs2_5   rs1=%rd %rd
@@ -98,6 +104,8 @@
 @zcb_lh   ... . .. ... .. ... ..imm=%zcb_h_uimm  rs1=%rs1_3 rd=%rs2_3
 @zcb_sb   ... . .. ... .. ... ..imm=%zcb_b_uimm  rs1=%rs1_3 
rs2=%rs2_3
 @zcb_sh   ... . .. ... .. ... ..imm=%zcb_h_uimm  rs1=%rs1_3 
rs2=%rs2_3
+@zcmp ... ...     ..%zcmp_rlist   %zcmp_spimm
+@cm_mv... ...  ... .. ... ..  _s  rs2=%sreg2rs1=%sreg1
 
 # *** RV32/64C Standard Extension (Quadrant 0) ***
 {
@@ -177,6 +185,16 @@ slli  000 .  .  . 10 @c_shift2
 {
   sq  101  ... ... .. ... 10 @c_sqsp
   c_fsd   101   ..  . 10 @c_sdsp
+
+  # *** RV64 and RV32 Zcmp Extension ***
+  [
+cm_push 101  11000   .. 10 @zcmp
+cm_pop  101  11010   .. 10 @zcmp
+cm_popret   101  0   .. 10 @zcmp
+cm_popretz  101  11100   .. 10 @zcmp
+cm_mva01s   101  011 ... 11 ... 10 @cm_mv
+cm_mvsa01   101  011 ... 01 ... 10 @cm_mv
+  ]
 }
 sw110 .  .  . 10 @c_swsp
 
diff --git a/target/riscv/insn_trans/trans_rvzce.c.inc 
b/target/riscv/insn_trans/trans_rvzce.c.inc
index de96c4afaf..30b53a9509 100644
--- a/target/riscv/insn_trans/trans_rvzce.c.inc
+++ b/target/riscv/insn_trans/trans_rvzce.c.inc
@@ -1,5 +1,5 @@
 /*
- * RISC-V translation routines for the Zcb Standard Extension.
+ * RISC-V translation routines for the Zc[b,mp] Standard Extensions.
  *
  * Copyright (c) 2021-2022 PLCT Lab
  *
@@ -21,6 +21,11 @@
 return false;   \
 } while (0)
 
+#define REQUIRE_ZCMP(ctx) do {   \
+if (!ctx->cfg_ptr->ext_zcmp) \
+return false;\
+} while (0)
+
 static bool trans_c_zext_b(DisasContext *ctx, arg_c_zext_b *a)
 {
 REQUIRE_ZCB(ctx);
@@ -98,3 +103,185 @@ static bool trans_c_sh(DisasContext *ctx, arg_c_sh *a)
 REQUIRE_ZCB(ctx);
 return gen_store(ctx, a, MO_UW);
 }
+
+#define X_S08
+#define X_S19
+#define X_Sn16
+
+static uint32_t decode_push_pop_list(DisasContext *ctx, target_ulong rlist)
+{
+uint32_t reg_bitmap = 0;
+
+if (ctx->cfg_ptr->ext_e && rlist > 6) {
+return 0;
+}
+
+switch (rlist) {
+case 15:
+reg_bitmap |=  1 << (X_Sn + 11) ;
+reg_bitmap |=  1 << (X_Sn + 10) ;
+/* FALL THROUGH */
+case 14:
+reg_bitmap |=  1 << (X_Sn + 9) ;
+/* FALL THROUGH */
+case 13:
+reg_bitmap |=  1 << (X_Sn + 8) ;
+/* FALL THROUGH */
+case 12:
+reg_bitmap |=  1 << (X_Sn + 7) ;
+/* FALL THROUGH */
+case 11:
+reg_bitmap |=  1 << (X_Sn + 6) ;
+/* FALL THROUGH */
+case 10:
+reg_bitmap |=  1 << (X_Sn + 5) ;
+/* FALL THROUGH */
+case 9:
+reg_bitmap |=  1 << (X_Sn + 4) ;
+/* FALL THROUGH */
+case 8:
+reg_bitmap |=  1 << (X_Sn + 3) ;
+/* FALL THROUGH */
+case 7:
+reg_bitmap |=  1 << (X_Sn + 2) ;
+/* FALL THROUGH */
+case 6:
+reg_bitmap |=  1 << X_S1 ;
+/* FALL THROUGH */
+case 5:
+reg_bitmap |= 1 << X_S0;
+/* FALL THROUGH */
+case 4:
+reg_bitmap |= 1 << xRA;
+break;
+default:
+break;
+}
+
+return reg_bitmap;
+}
+
+static bool gen_pop(DisasContext *ctx, arg_zcmp *a, bool ret, bool ret_val)
+{
+REQUIRE_ZCMP(ctx);
+
+uint32_t reg_bitmap = decode_push_pop_list(ctx, a->zcmp_rlist);
+if (reg_bitmap == 0) {
+return false;
+}
+
+MemOp memop = get_ol(ctx) == MXL_RV32 ? MO_TEUL : MO_TEUQ;
+int reg_size = memop_size(memop);
+target_ulong stack_adj =

[PATCH v8 7/9] target/riscv: add support for Zcmt extension

2022-11-29 Thread Weiwei Li

Add encode, trans* functions and helper functions support for Zcmt
instrutions
Add support for jvt csr

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/cpu.h|  4 ++
 target/riscv/cpu_bits.h   |  7 +++
 target/riscv/csr.c| 38 +++-
 target/riscv/helper.h |  3 ++
 target/riscv/insn16.decode|  7 ++-
 target/riscv/insn_trans/trans_rvzce.c.inc | 28 +++-
 target/riscv/machine.c| 19 
 target/riscv/meson.build  |  3 +-
 target/riscv/zce_helper.c | 55 +++
 9 files changed, 159 insertions(+), 5 deletions(-)
 create mode 100644 target/riscv/zce_helper.c

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 6e915b6937..7bcedc7467 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -181,6 +181,8 @@ struct CPUArchState {
 
 uint32_t features;
 
+target_ulong jvt;
+
 #ifdef CONFIG_USER_ONLY
 uint32_t elf_flags;
 #endif
@@ -600,6 +602,8 @@ void riscv_cpu_set_aia_ireg_rmw_fn(CPURISCVState *env, 
uint32_t priv,
  target_ulong new_val,
  target_ulong write_mask),
void *rmw_fn_arg);
+
+RISCVException smstateen_acc_ok(CPURISCVState *env, int index, uint64_t bit);
 #endif
 void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv);
 
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 8b0d7e20ea..ce347e5575 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -319,6 +319,7 @@
 #define SMSTATEEN_MAX_COUNT 4
 #define SMSTATEEN0_CS   (1ULL << 0)
 #define SMSTATEEN0_FCSR (1ULL << 1)
+#define SMSTATEEN0_JVT  (1ULL << 2)
 #define SMSTATEEN0_HSCONTXT (1ULL << 57)
 #define SMSTATEEN0_IMSIC(1ULL << 58)
 #define SMSTATEEN0_AIA  (1ULL << 59)
@@ -523,6 +524,9 @@
 /* Crypto Extension */
 #define CSR_SEED0x015
 
+/* Zcmt Extension */
+#define CSR_JVT 0x017
+
 /* mstatus CSR bits */
 #define MSTATUS_UIE 0x0001
 #define MSTATUS_SIE 0x0002
@@ -894,4 +898,7 @@ typedef enum RISCVException {
 #define MHPMEVENT_IDX_MASK 0xF
 #define MHPMEVENT_SSCOF_RESVD  16
 
+/* JVT CSR bits */
+#define JVT_MODE   0x3F
+#define JVT_BASE   (~0x3F)
 #endif
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index e6f8250929..a752e8b215 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -42,8 +42,7 @@ void riscv_set_csr_ops(int csrno, riscv_csr_operations *ops)
 
 /* Predicates */
 #if !defined(CONFIG_USER_ONLY)
-static RISCVException smstateen_acc_ok(CPURISCVState *env, int index,
-   uint64_t bit)
+RISCVException smstateen_acc_ok(CPURISCVState *env, int index, uint64_t bit)
 {
 bool virt = riscv_cpu_virt_enabled(env);
 CPUState *cs = env_cpu(env);
@@ -163,6 +162,24 @@ static RISCVException ctr32(CPURISCVState *env, int csrno)
 return ctr(env, csrno);
 }
 
+static RISCVException zcmt(CPURISCVState *env, int csrno)
+{
+RISCVCPU *cpu = env_archcpu(env);
+
+if (!cpu->cfg.ext_zcmt) {
+return RISCV_EXCP_ILLEGAL_INST;
+}
+
+#if !defined(CONFIG_USER_ONLY)
+RISCVException ret = smstateen_acc_ok(env, 0, SMSTATEEN0_JVT);
+if (ret != RISCV_EXCP_NONE) {
+return ret;
+}
+#endif
+
+return RISCV_EXCP_NONE;
+}
+
 #if !defined(CONFIG_USER_ONLY)
 static RISCVException mctr(CPURISCVState *env, int csrno)
 {
@@ -3980,6 +3997,20 @@ RISCVException riscv_csrrw_debug(CPURISCVState *env, int 
csrno,
 return ret;
 }
 
+static RISCVException read_jvt(CPURISCVState *env, int csrno,
+   target_ulong *val)
+{
+*val = env->jvt;
+return RISCV_EXCP_NONE;
+}
+
+static RISCVException write_jvt(CPURISCVState *env, int csrno,
+target_ulong val)
+{
+env->jvt = val;
+return RISCV_EXCP_NONE;
+}
+
 /* Control and Status Register function table */
 riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
 /* User Floating-Point CSRs */
@@ -4017,6 +4048,9 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
 /* Crypto Extension */
 [CSR_SEED] = { "seed", seed, NULL, NULL, rmw_seed },
 
+/* Zcmt Extension */
+[CSR_JVT] = {"jvt", zcmt, read_jvt, write_jvt},
+
 #if !defined(CONFIG_USER_ONLY)
 /* Machine Timers and Counters */
 [CSR_MCYCLE]= { "mcycle",any,   read_hpmcounter,
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index 227c7122ef..d979f0bfc4 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -1136,3 +1136,6 @@ DEF_HELPER_FLAGS_1(aes64im, TCG_CALL_NO_RWG_SE, tl, tl)
 
 DEF_HELPER_FLAGS_3(sm4ed, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl)

[PATCH v8 1/9] target/riscv: add cfg properties for Zc* extension

2022-11-29 Thread Weiwei Li

Add properties for Zca,Zcb,Zcf,Zcd,Zcmp,Zcmt extension
Add check for these properties

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/cpu.c | 43 +++
 target/riscv/cpu.h |  6 ++
 2 files changed, 49 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index c705331bbe..3d06b57416 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -808,6 +808,49 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 }
 }
 
+if (cpu->cfg.ext_c) {
+cpu->cfg.ext_zca = true;
+if (cpu->cfg.ext_f && env->misa_mxl_max == MXL_RV32) {
+cpu->cfg.ext_zcf = true;
+}
+if (cpu->cfg.ext_d) {
+cpu->cfg.ext_zcd = true;
+}
+}
+
+if (env->misa_mxl_max != MXL_RV32 && cpu->cfg.ext_zcf) {
+error_setg(errp, "Zcf extension is only relevant to RV32");
+return;
+}
+
+if (!cpu->cfg.ext_f && cpu->cfg.ext_zcf) {
+error_setg(errp, "Zcf extension requires F extension");
+return;
+}
+
+if (!cpu->cfg.ext_d && cpu->cfg.ext_zcd) {
+error_setg(errp, "Zcd extension requires D extension");
+return;
+}
+
+if ((cpu->cfg.ext_zcf || cpu->cfg.ext_zcd || cpu->cfg.ext_zcb ||
+ cpu->cfg.ext_zcmp || cpu->cfg.ext_zcmt) && !cpu->cfg.ext_zca) {
+error_setg(errp, "Zcf/Zcd/Zcb/Zcmp/Zcmt extensions require Zca "
+ "extension");
+return;
+}
+
+if (cpu->cfg.ext_zcd && (cpu->cfg.ext_zcmp || cpu->cfg.ext_zcmt)) {
+error_setg(errp, "Zcmp/Zcmt extensions are incompatible with "
+ "Zcd extension");
+return;
+}
+
+if (cpu->cfg.ext_zcmt && !cpu->cfg.ext_icsr) {
+error_setg(errp, "Zcmt extension requires Zicsr extension");
+return;
+}
+
 if (cpu->cfg.ext_zk) {
 cpu->cfg.ext_zkn = true;
 cpu->cfg.ext_zkr = true;
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 9bd539d77a..6e915b6937 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -434,6 +434,12 @@ struct RISCVCPUConfig {
 bool ext_zbkc;
 bool ext_zbkx;
 bool ext_zbs;
+bool ext_zca;
+bool ext_zcb;
+bool ext_zcd;
+bool ext_zcf;
+bool ext_zcmp;
+bool ext_zcmt;
 bool ext_zk;
 bool ext_zkn;
 bool ext_zknd;
-- 
2.25.1

[PATCH v8 9/9] disas/riscv.c: add disasm support for Zc*

2022-11-29 Thread Weiwei Li

Zcmp/Zcmt instructions will override disasm for c.fld*/c.fsd*
instructions currently

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
Acked-by: Alistair Francis 
---
 disas/riscv.c | 228 +-
 1 file changed, 227 insertions(+), 1 deletion(-)

diff --git a/disas/riscv.c b/disas/riscv.c
index d216b9c39b..f75da98540 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -163,6 +163,13 @@ typedef enum {
 rv_codec_v_i,
 rv_codec_vsetvli,
 rv_codec_vsetivli,
+rv_codec_zcb_ext,
+rv_codec_zcb_mul,
+rv_codec_zcb_lb,
+rv_codec_zcb_lh,
+rv_codec_zcmp_cm_pushpop,
+rv_codec_zcmp_cm_mv,
+rv_codec_zcmt_jt,
 } rv_codec;
 
 typedef enum {
@@ -935,6 +942,26 @@ typedef enum {
 rv_op_vsetvli = 766,
 rv_op_vsetivli = 767,
 rv_op_vsetvl = 768,
+rv_op_c_zext_b = 769,
+rv_op_c_sext_b = 770,
+rv_op_c_zext_h = 771,
+rv_op_c_sext_h = 772,
+rv_op_c_zext_w = 773,
+rv_op_c_not = 774,
+rv_op_c_mul = 775,
+rv_op_c_lbu = 776,
+rv_op_c_lhu = 777,
+rv_op_c_lh = 778,
+rv_op_c_sb = 779,
+rv_op_c_sh = 780,
+rv_op_cm_push = 781,
+rv_op_cm_pop = 782,
+rv_op_cm_popret = 783,
+rv_op_cm_popretz = 784,
+rv_op_cm_mva01s = 785,
+rv_op_cm_mvsa01 = 786,
+rv_op_cm_jt = 787,
+rv_op_cm_jalt = 788,
 } rv_op;
 
 /* structures */
@@ -958,6 +985,7 @@ typedef struct {
 uint8_t   rnum;
 uint8_t   vm;
 uint32_t  vzimm;
+uint8_t   rlist;
 } rv_decode;
 
 typedef struct {
@@ -1070,6 +1098,10 @@ static const char rv_vreg_name_sym[32][4] = {
 #define rv_fmt_vd_vm  "O\tDm"
 #define rv_fmt_vsetvli"O\t0,1,v"
 #define rv_fmt_vsetivli   "O\t0,u,v"
+#define rv_fmt_rs1_rs2_zce_ldst   "O\t2,i(1)"
+#define rv_fmt_push_rlist "O\tx,-i"
+#define rv_fmt_pop_rlist  "O\tx,i"
+#define rv_fmt_zcmt_index "O\ti"
 
 /* pseudo-instruction constraints */
 
@@ -2065,7 +2097,27 @@ const rv_opcode_data opcode_data[] = {
 { "vsext.vf8", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, rv_op_vsext_vf8, 
rv_op_vsext_vf8, 0 },
 { "vsetvli", rv_codec_vsetvli, rv_fmt_vsetvli, NULL, rv_op_vsetvli, 
rv_op_vsetvli, 0 },
 { "vsetivli", rv_codec_vsetivli, rv_fmt_vsetivli, NULL, rv_op_vsetivli, 
rv_op_vsetivli, 0 },
-{ "vsetvl", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, rv_op_vsetvl, 
rv_op_vsetvl, 0 }
+{ "vsetvl", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, rv_op_vsetvl, 
rv_op_vsetvl, 0 },
+{ "c.zext.b", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
+{ "c.sext.b", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
+{ "c.zext.h", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
+{ "c.sext.h", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
+{ "c.zext.w", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
+{ "c.not", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
+{ "c.mul", rv_codec_zcb_mul, rv_fmt_rd_rs2, NULL, 0, 0 },
+{ "c.lbu", rv_codec_zcb_lb, rv_fmt_rs1_rs2_zce_ldst, NULL, 0, 0, 0 },
+{ "c.lhu", rv_codec_zcb_lh, rv_fmt_rs1_rs2_zce_ldst, NULL, 0, 0, 0 },
+{ "c.lh", rv_codec_zcb_lh, rv_fmt_rs1_rs2_zce_ldst, NULL, 0, 0, 0 },
+{ "c.sb", rv_codec_zcb_lb, rv_fmt_rs1_rs2_zce_ldst, NULL, 0, 0, 0 },
+{ "c.sh", rv_codec_zcb_lh, rv_fmt_rs1_rs2_zce_ldst, NULL, 0, 0, 0 },
+{ "cm.push", rv_codec_zcmp_cm_pushpop, rv_fmt_push_rlist, NULL, 0, 0 },
+{ "cm.pop", rv_codec_zcmp_cm_pushpop, rv_fmt_pop_rlist, NULL, 0, 0 },
+{ "cm.popret", rv_codec_zcmp_cm_pushpop, rv_fmt_pop_rlist, NULL, 0, 0, 0 },
+{ "cm.popretz", rv_codec_zcmp_cm_pushpop, rv_fmt_pop_rlist, NULL, 0, 0 },
+{ "cm.mva01s", rv_codec_zcmp_cm_mv, rv_fmt_rd_rs2, NULL, 0, 0, 0 },
+{ "cm.mvsa01", rv_codec_zcmp_cm_mv, rv_fmt_rd_rs2, NULL, 0, 0, 0 },
+{ "cm.jt", rv_codec_zcmt_jt, rv_fmt_zcmt_index, NULL, 0 },
+{ "cm.jalt", rv_codec_zcmt_jt, rv_fmt_zcmt_index, NULL, 0 },
 };
 
 /* CSR names */
@@ -2084,6 +2136,7 @@ static const char *csr_name(int csrno)
 case 0x000a: return "vxrm";
 case 0x000f: return "vcsr";
 case 0x0015: return "seed";
+case 0x0017: return "jvt";
 case 0x0040: return "uscratch";
 case 0x0041: return "uepc";
 case 0x0042: return "ucause";
@@ -2306,6 +2359,24 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa 
isa)
 op = rv_op_c_ld;
 }
 break;
+case 4:
+switch ((inst >> 10) & 0b111) {
+case 0: op = rv_op_c_lbu; break;
+case 1:
+if (((inst >> 6) & 1) == 0) {
+op = rv_op_c_lhu;
+} else {
+op = rv_op_c_lh;
+}
+break;
+case 2: op = rv_op_c_sb; break;
+case 3:
+if (((inst >> 6) & 1) == 0) {
+op = rv_op_c_sh;
+}
+break;
+}
+break;
 case 5:
 if (isa == rv128) {
 op = rv_op_c_sq;
@@

[PATCH v8 2/9] target/riscv: add support for Zca extension

2022-11-29 Thread Weiwei Li

Modify the check for C extension to Zca (C implies Zca)

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
Reviewed-by: Wilfred Mallawa 
---
 target/riscv/insn_trans/trans_rvi.c.inc | 4 ++--
 target/riscv/translate.c| 8 ++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvi.c.inc 
b/target/riscv/insn_trans/trans_rvi.c.inc
index 4496f21266..ef7c3002b0 100644
--- a/target/riscv/insn_trans/trans_rvi.c.inc
+++ b/target/riscv/insn_trans/trans_rvi.c.inc
@@ -56,7 +56,7 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a)
 tcg_gen_andi_tl(cpu_pc, cpu_pc, (target_ulong)-2);
 
 gen_set_pc(ctx, cpu_pc);
-if (!has_ext(ctx, RVC)) {
+if (!ctx->cfg_ptr->ext_zca) {
 TCGv t0 = tcg_temp_new();
 
 misaligned = gen_new_label();
@@ -178,7 +178,7 @@ static bool gen_branch(DisasContext *ctx, arg_b *a, TCGCond 
cond)
 
 gen_set_label(l); /* branch taken */
 
-if (!has_ext(ctx, RVC) && ((ctx->base.pc_next + a->imm) & 0x3)) {
+if (!ctx->cfg_ptr->ext_zca && ((ctx->base.pc_next + a->imm) & 0x3)) {
 /* misaligned */
 gen_exception_inst_addr_mis(ctx);
 } else {
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 2ab8772ebe..cf8f10afa2 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -557,7 +557,7 @@ static void gen_jal(DisasContext *ctx, int rd, target_ulong 
imm)
 
 /* check misaligned: */
 next_pc = ctx->base.pc_next + imm;
-if (!has_ext(ctx, RVC)) {
+if (!ctx->cfg_ptr->ext_zca) {
 if ((next_pc & 0x3) != 0) {
 gen_exception_inst_addr_mis(ctx);
 return;
@@ -1097,7 +1097,11 @@ static void decode_opc(CPURISCVState *env, DisasContext 
*ctx, uint16_t opcode)
 ctx->virt_inst_excp = false;
 /* Check for compressed insn */
 if (insn_len(opcode) == 2) {
-if (!has_ext(ctx, RVC)) {
+/*
+ * The Zca extension is added as way to refer to instructions in the C
+ * extension that do not include the floating-point loads and stores
+ */
+if (!ctx->cfg_ptr->ext_zca) {
 gen_exception_illegal(ctx);
 } else {
 ctx->opcode = opcode;
-- 
2.25.1

[PATCH v8 3/9] target/riscv: add support for Zcf extension

2022-11-29 Thread Weiwei Li

Separate c_flw/c_fsw from flw/fsw to add check for Zcf extension

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/insn16.decode  |  8 
 target/riscv/insn_trans/trans_rvf.c.inc | 18 ++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/target/riscv/insn16.decode b/target/riscv/insn16.decode
index ccfe59f294..f3ea650325 100644
--- a/target/riscv/insn16.decode
+++ b/target/riscv/insn16.decode
@@ -109,11 +109,11 @@ sw110  ... ... .. ... 00 @cs_w
 # *** RV32C and RV64C specific Standard Extension (Quadrant 0) ***
 {
   ld  011  ... ... .. ... 00 @cl_d
-  flw 011  ... ... .. ... 00 @cl_w
+  c_flw   011  ... ... .. ... 00 @cl_w
 }
 {
   sd  111  ... ... .. ... 00 @cs_d
-  fsw 111  ... ... .. ... 00 @cs_w
+  c_fsw   111  ... ... .. ... 00 @cs_w
 }
 
 # *** RV32/64C Standard Extension (Quadrant 1) ***
@@ -174,9 +174,9 @@ sw110 .  .  . 10 @c_swsp
 {
   c64_illegal 011 -  0  - 10 # c.ldsp, RES rd=0
   ld  011 .  .  . 10 @c_ldsp
-  flw 011 .  .  . 10 @c_lwsp
+  c_flw   011 .  .  . 10 @c_lwsp
 }
 {
   sd  111 .  .  . 10 @c_sdsp
-  fsw 111 .  .  . 10 @c_swsp
+  c_fsw   111 .  .  . 10 @c_swsp
 }
diff --git a/target/riscv/insn_trans/trans_rvf.c.inc 
b/target/riscv/insn_trans/trans_rvf.c.inc
index 965e1f8d11..5df9c148dc 100644
--- a/target/riscv/insn_trans/trans_rvf.c.inc
+++ b/target/riscv/insn_trans/trans_rvf.c.inc
@@ -30,6 +30,12 @@
 } \
 } while (0)
 
+#define REQUIRE_ZCF(ctx) do {  \
+if (!ctx->cfg_ptr->ext_zcf) {  \
+return false;  \
+}  \
+} while (0)
+
 static bool trans_flw(DisasContext *ctx, arg_flw *a)
 {
 TCGv_i64 dest;
@@ -61,6 +67,18 @@ static bool trans_fsw(DisasContext *ctx, arg_fsw *a)
 return true;
 }
 
+static bool trans_c_flw(DisasContext *ctx, arg_flw *a)
+{
+REQUIRE_ZCF(ctx);
+return trans_flw(ctx, a);
+}
+
+static bool trans_c_fsw(DisasContext *ctx, arg_fsw *a)
+{
+REQUIRE_ZCF(ctx);
+return trans_fsw(ctx, a);
+}
+
 static bool trans_fmadd_s(DisasContext *ctx, arg_fmadd_s *a)
 {
 REQUIRE_FPU;
-- 
2.25.1

[PATCH v8 0/9] support subsets of code size reduction extension

2022-11-29 Thread Weiwei Li

This patchset implements RISC-V Zc* extension v1.0.0.RC5.7 version instructions.

Specification:
https://github.com/riscv/riscv-code-size-reduction/tree/main/Zc-specification

The port is available here:
https://github.com/plctlab/plct-qemu/tree/plct-zce-upstream-v8

To test Zc* implementation, specify cpu argument with 
'x-zca=true,x-zcb=true,x-zcf=true,f=true" and "x-zcd=true,d=true" (or 
"x-zcmp=true,x-zcmt=true" with c or d=false) to enable Zca/Zcb/Zcf and Zcd(or 
Zcmp,Zcmt) extensions support.


This implementation can pass the basic zc tests from 
https://github.com/yulong-plct/zc-test

v8:
* improve disas support in Patch 9

v7:
* Fix description for Zca

v6：
* fix base address for jump table in Patch 7
* rebase on riscv-to-apply.next

v5:
* fix exception unwind problem for cpu_ld*_code in helper of cm_jalt

v4:
* improve Zcmp suggested by Richard
* fix stateen related check for Zcmt

v3:
* update the solution for Zcf to the way of Zcd
* update Zcb to reuse gen_load/store
* use trans function instead of helper for push/pop

v2:
* add check for relationship between Zca/Zcf/Zcd with C/F/D based on related 
discussion in review of Zc* spec
* separate c.fld{sp}/fsd{sp} with fld{sp}/fsd{sp} before support of zcmp/zcmt

Weiwei Li (9):
  target/riscv: add cfg properties for Zc* extension
  target/riscv: add support for Zca extension
  target/riscv: add support for Zcf extension
  target/riscv: add support for Zcd extension
  target/riscv: add support for Zcb extension
  target/riscv: add support for Zcmp extension
  target/riscv: add support for Zcmt extension
  target/riscv: expose properties for Zc* extension
  disas/riscv.c: add disasm support for Zc*

 disas/riscv.c | 228 +++-
 target/riscv/cpu.c|  56 
 target/riscv/cpu.h|  10 +
 target/riscv/cpu_bits.h   |   7 +
 target/riscv/csr.c|  38 ++-
 target/riscv/helper.h |   3 +
 target/riscv/insn16.decode|  63 -
 target/riscv/insn_trans/trans_rvd.c.inc   |  18 ++
 target/riscv/insn_trans/trans_rvf.c.inc   |  18 ++
 target/riscv/insn_trans/trans_rvi.c.inc   |   4 +-
 target/riscv/insn_trans/trans_rvzce.c.inc | 313 ++
 target/riscv/machine.c|  19 ++
 target/riscv/meson.build  |   3 +-
 target/riscv/translate.c  |  15 +-
 target/riscv/zce_helper.c |  55 
 15 files changed, 834 insertions(+), 16 deletions(-)
 create mode 100644 target/riscv/insn_trans/trans_rvzce.c.inc
 create mode 100644 target/riscv/zce_helper.c

-- 
2.25.1

[PATCH v8 4/9] target/riscv: add support for Zcd extension

2022-11-29 Thread Weiwei Li

Separate c_fld/c_fsd from fld/fsd to add additional check for
c.fld{sp}/c.fsd{sp} which is useful for zcmp/zcmt to reuse
their encodings

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/insn16.decode  |  8 
 target/riscv/insn_trans/trans_rvd.c.inc | 18 ++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/target/riscv/insn16.decode b/target/riscv/insn16.decode
index f3ea650325..b62664b6af 100644
--- a/target/riscv/insn16.decode
+++ b/target/riscv/insn16.decode
@@ -97,12 +97,12 @@
 }
 {
   lq  001  ... ... .. ... 00 @cl_q
-  fld 001  ... ... .. ... 00 @cl_d
+  c_fld   001  ... ... .. ... 00 @cl_d
 }
 lw010  ... ... .. ... 00 @cl_w
 {
   sq  101  ... ... .. ... 00 @cs_q
-  fsd 101  ... ... .. ... 00 @cs_d
+  c_fsd   101  ... ... .. ... 00 @cs_d
 }
 sw110  ... ... .. ... 00 @cs_w
 
@@ -148,7 +148,7 @@ addw  100 1 11 ... 01 ... 01 @cs_2
 slli  000 .  .  . 10 @c_shift2
 {
   lq  001  ... ... .. ... 10 @c_lqsp
-  fld 001 .  .  . 10 @c_ldsp
+  c_fld   001 .  .  . 10 @c_ldsp
 }
 {
   illegal 010 -  0  - 10 # c.lwsp, RES rd=0
@@ -166,7 +166,7 @@ slli  000 .  .  . 10 @c_shift2
 }
 {
   sq  101  ... ... .. ... 10 @c_sqsp
-  fsd 101   ..  . 10 @c_sdsp
+  c_fsd   101   ..  . 10 @c_sdsp
 }
 sw110 .  .  . 10 @c_swsp
 
diff --git a/target/riscv/insn_trans/trans_rvd.c.inc 
b/target/riscv/insn_trans/trans_rvd.c.inc
index 6e3159b797..47849ffdfd 100644
--- a/target/riscv/insn_trans/trans_rvd.c.inc
+++ b/target/riscv/insn_trans/trans_rvd.c.inc
@@ -31,6 +31,12 @@
 } \
 } while (0)
 
+#define REQUIRE_ZCD(ctx) do { \
+if (!ctx->cfg_ptr->ext_zcd) {  \
+return false; \
+} \
+} while (0)
+
 static bool trans_fld(DisasContext *ctx, arg_fld *a)
 {
 TCGv addr;
@@ -59,6 +65,18 @@ static bool trans_fsd(DisasContext *ctx, arg_fsd *a)
 return true;
 }
 
+static bool trans_c_fld(DisasContext *ctx, arg_fld *a)
+{
+REQUIRE_ZCD(ctx);
+return trans_fld(ctx, a);
+}
+
+static bool trans_c_fsd(DisasContext *ctx, arg_fsd *a)
+{
+REQUIRE_ZCD(ctx);
+return trans_fsd(ctx, a);
+}
+
 static bool trans_fmadd_d(DisasContext *ctx, arg_fmadd_d *a)
 {
 REQUIRE_FPU;
-- 
2.25.1

[PATCH v8 5/9] target/riscv: add support for Zcb extension

2022-11-29 Thread Weiwei Li

Add encode and trans* functions support for Zcb instructions

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
 target/riscv/insn16.decode|  24 ++
 target/riscv/insn_trans/trans_rvzce.c.inc | 100 ++
 target/riscv/translate.c  |   2 +
 3 files changed, 126 insertions(+)
 create mode 100644 target/riscv/insn_trans/trans_rvzce.c.inc

diff --git a/target/riscv/insn16.decode b/target/riscv/insn16.decode
index b62664b6af..47603ec1e0 100644
--- a/target/riscv/insn16.decode
+++ b/target/riscv/insn16.decode
@@ -43,6 +43,8 @@
 %imm_addi16sp  12:s1 3:2 5:1 2:1 6:1 !function=ex_shift_4
 %imm_lui   12:s1 2:5 !function=ex_shift_12
 
+%zcb_b_uimm  5:1 6:1
+%zcb_h_uimm  5:1 !function=ex_shift_1
 
 # Argument sets imported from insn32.decode:
   !extern
@@ -53,6 +55,7 @@
  imm rs2 rs1  !extern
  imm rd   !extern
  shamt rs1 rd !extern
+rd rs1   !extern
 
 
 # Formats 16:
@@ -89,6 +92,13 @@
 
 @c_andi ... . .. ... . ..  imm=%imm_ci rs1=%rs1_3 rd=%rs1_3
 
+@zcb_unary... ...  ... .. ... ..rs1=%rs1_3 rd=%rs1_3
+@zcb_binary   ... ...  ... .. ... ..rs2=%rs2_3   rs1=%rs1_3 rd=%rs1_3
+@zcb_lb   ... . .. ... .. ... ..imm=%zcb_b_uimm  rs1=%rs1_3 rd=%rs2_3
+@zcb_lh   ... . .. ... .. ... ..imm=%zcb_h_uimm  rs1=%rs1_3 rd=%rs2_3
+@zcb_sb   ... . .. ... .. ... ..imm=%zcb_b_uimm  rs1=%rs1_3 
rs2=%rs2_3
+@zcb_sh   ... . .. ... .. ... ..imm=%zcb_h_uimm  rs1=%rs1_3 
rs2=%rs2_3
+
 # *** RV32/64C Standard Extension (Quadrant 0) ***
 {
   # Opcode of all zeros is illegal; rd != 0, nzuimm == 0 is reserved.
@@ -180,3 +190,17 @@ sw110 .  .  . 10 @c_swsp
   sd  111 .  .  . 10 @c_sdsp
   c_fsw   111 .  .  . 10 @c_swsp
 }
+
+# *** RV64 and RV32 Zcb Extension ***
+c_zext_b  100 111  ... 11 000 01 @zcb_unary
+c_sext_b  100 111  ... 11 001 01 @zcb_unary
+c_zext_h  100 111  ... 11 010 01 @zcb_unary
+c_sext_h  100 111  ... 11 011 01 @zcb_unary
+c_zext_w  100 111  ... 11 100 01 @zcb_unary
+c_not 100 111  ... 11 101 01 @zcb_unary
+c_mul 100 111  ... 10 ... 01 @zcb_binary
+c_lbu 100 000  ... .. ... 00 @zcb_lb
+c_lhu 100 001  ... 0. ... 00 @zcb_lh
+c_lh  100 001  ... 1. ... 00 @zcb_lh
+c_sb  100 010  ... .. ... 00 @zcb_sb
+c_sh  100 011  ... 0. ... 00 @zcb_sh
diff --git a/target/riscv/insn_trans/trans_rvzce.c.inc 
b/target/riscv/insn_trans/trans_rvzce.c.inc
new file mode 100644
index 00..de96c4afaf
--- /dev/null
+++ b/target/riscv/insn_trans/trans_rvzce.c.inc
@@ -0,0 +1,100 @@
+/*
+ * RISC-V translation routines for the Zcb Standard Extension.
+ *
+ * Copyright (c) 2021-2022 PLCT Lab
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see .
+ */
+
+#define REQUIRE_ZCB(ctx) do {   \
+if (!ctx->cfg_ptr->ext_zcb) \
+return false;   \
+} while (0)
+
+static bool trans_c_zext_b(DisasContext *ctx, arg_c_zext_b *a)
+{
+REQUIRE_ZCB(ctx);
+return gen_unary(ctx, a, EXT_NONE, tcg_gen_ext8u_tl);
+}
+
+static bool trans_c_zext_h(DisasContext *ctx, arg_c_zext_h *a)
+{
+REQUIRE_ZCB(ctx);
+REQUIRE_ZBB(ctx);
+return gen_unary(ctx, a, EXT_NONE, tcg_gen_ext16u_tl);
+}
+
+static bool trans_c_sext_b(DisasContext *ctx, arg_c_sext_b *a)
+{
+REQUIRE_ZCB(ctx);
+REQUIRE_ZBB(ctx);
+return gen_unary(ctx, a, EXT_NONE, tcg_gen_ext8s_tl);
+}
+
+static bool trans_c_sext_h(DisasContext *ctx, arg_c_sext_h *a)
+{
+REQUIRE_ZCB(ctx);
+REQUIRE_ZBB(ctx);
+return gen_unary(ctx, a, EXT_NONE, tcg_gen_ext16s_tl);
+}
+
+static bool trans_c_zext_w(DisasContext *ctx, arg_c_zext_w *a)
+{
+REQUIRE_64BIT(ctx);
+REQUIRE_ZCB(ctx);
+REQUIRE_ZBA(ctx);
+return gen_unary(ctx, a, EXT_NONE, tcg_gen_ext32u_tl);
+}
+
+static bool trans_c_not(DisasContext *ctx, arg_c_not *a)
+{
+REQUIRE_ZCB(ctx);
+return gen_unary(ctx, a, EXT_NONE, tcg_gen_not_tl);
+}
+
+static bool trans_c_mul(DisasContext *ctx, arg_c_mul *a)
+{
+REQUIRE_ZCB(ctx);
+REQUIRE_M_OR_ZMMUL(ctx);
+return gen_arith(ctx, a, EXT_NONE, tcg_gen_mul_tl, NULL);
+}
+
+static bool trans_c_lbu(DisasContext *ctx, arg_c_lbu *a)
+{
+

Re: [PATCH for 8.0 v8 06/12] vdpa: extract vhost_vdpa_svq_allocate_iova_tree

2022-11-29 Thread Eugenio Perez Martin

On Wed, Nov 30, 2022 at 7:43 AM Jason Wang  wrote:
>
> On Thu, Nov 24, 2022 at 11:52 PM Eugenio Pérez  wrote:
> >
> > It can be allocated either if all virtqueues must be shadowed or if
> > vdpa-net detects it can shadow only cvq.
> >
> > Extract in its own function so we can reuse it.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >  net/vhost-vdpa.c | 29 +
> >  1 file changed, 17 insertions(+), 12 deletions(-)
> >
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index 88e0eec5fa..9ee3bc4cd3 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -240,6 +240,22 @@ static NetClientInfo net_vhost_vdpa_info = {
> >  .check_peer_type = vhost_vdpa_check_peer_type,
> >  };
> >
> > +static int vhost_vdpa_get_iova_range(int fd,
> > + struct vhost_vdpa_iova_range 
> > *iova_range)
> > +{
> > +int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
> > +
> > +return ret < 0 ? -errno : 0;
> > +}
>
> I don't get why this needs to be moved to net specific code.
>

It was already in net, this code just extracted it in its own function.

It's done in net because iova_tree must be the same for all queuepair
vhost, so we need to allocate before them.

Thanks!

> Thanks
>
> > +
> > +static VhostIOVATree *vhost_vdpa_svq_allocate_iova_tree(int vdpa_device_fd)
> > +{
> > +struct vhost_vdpa_iova_range iova_range;
> > +
> > +vhost_vdpa_get_iova_range(vdpa_device_fd, _range);
> > +return vhost_iova_tree_new(iova_range.first, iova_range.last);
> > +}
> > +
> >  static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
> >  {
> >  VhostIOVATree *tree = v->iova_tree;
> > @@ -587,14 +603,6 @@ static NetClientState 
> > *net_vhost_vdpa_init(NetClientState *peer,
> >  return nc;
> >  }
> >
> > -static int vhost_vdpa_get_iova_range(int fd,
> > - struct vhost_vdpa_iova_range 
> > *iova_range)
> > -{
> > -int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
> > -
> > -return ret < 0 ? -errno : 0;
> > -}
> > -
> >  static int vhost_vdpa_get_features(int fd, uint64_t *features, Error 
> > **errp)
> >  {
> >  int ret = ioctl(fd, VHOST_GET_FEATURES, features);
> > @@ -690,14 +698,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const 
> > char *name,
> >  }
> >
> >  if (opts->x_svq) {
> > -struct vhost_vdpa_iova_range iova_range;
> > -
> >  if (!vhost_vdpa_net_valid_svq_features(features, errp)) {
> >  goto err_svq;
> >  }
> >
> > -vhost_vdpa_get_iova_range(vdpa_device_fd, _range);
> > -iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
> > +iova_tree = vhost_vdpa_svq_allocate_iova_tree(vdpa_device_fd);
> >  }
> >
> >  ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
> > --
> > 2.31.1
> >
>

Re: [PATCH v2 3/4] vdpa: handle VIRTIO_NET_CTRL_ANNOUNCE in vhost_vdpa_net_handle_ctrl_avail

2022-11-29 Thread Eugenio Perez Martin

On Wed, Nov 30, 2022 at 8:10 AM Michael S. Tsirkin  wrote:
>
> On Thu, Nov 24, 2022 at 06:33:13PM +0100, Eugenio Pérez wrote:
> > Since this capability is emulated by qemu shadowed CVQ cannot forward it
> > to the device. Process all that command within qemu.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >  net/vhost-vdpa.c | 15 ---
> >  1 file changed, 12 insertions(+), 3 deletions(-)
> >
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index 2b4b85d8f8..8172aa8449 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -489,9 +489,18 @@ static int 
> > vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
> >  out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
> >   s->cvq_cmd_out_buffer,
> >   vhost_vdpa_net_cvq_cmd_len());
> > -dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
> > -if (unlikely(dev_written < 0)) {
> > -goto out;
> > +if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) {
> > +/*
> > + * Guest announce capability is emulated by qemu, so dont forward 
> > to
> > + * the device.
> > + */
>
> Hmm I'm not sure why. We don't forward the status bit to guest?
>

No, the idea is to make this feature entirely emulated by qemu so it
does not depend on device's features to support it.

Thanks!

> > +dev_written = sizeof(status);
> > +*s->status = VIRTIO_NET_OK;
> > +} else {
> > +dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, 
> > sizeof(status));
> > +if (unlikely(dev_written < 0)) {
> > +goto out;
> > +}
> >  }
> >
> >  if (unlikely(dev_written < sizeof(status))) {
> > --
> > 2.31.1
>

Re: [PATCH v2 3/4] vdpa: handle VIRTIO_NET_CTRL_ANNOUNCE in vhost_vdpa_net_handle_ctrl_avail

2022-11-29 Thread Michael S. Tsirkin

On Thu, Nov 24, 2022 at 06:33:13PM +0100, Eugenio Pérez wrote:
> Since this capability is emulated by qemu shadowed CVQ cannot forward it
> to the device. Process all that command within qemu.
> 
> Signed-off-by: Eugenio Pérez 
> ---
>  net/vhost-vdpa.c | 15 ---
>  1 file changed, 12 insertions(+), 3 deletions(-)
> 
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 2b4b85d8f8..8172aa8449 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -489,9 +489,18 @@ static int 
> vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
>  out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
>   s->cvq_cmd_out_buffer,
>   vhost_vdpa_net_cvq_cmd_len());
> -dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
> -if (unlikely(dev_written < 0)) {
> -goto out;
> +if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) {
> +/*
> + * Guest announce capability is emulated by qemu, so dont forward to
> + * the device.
> + */

Hmm I'm not sure why. We don't forward the status bit to guest?

> +dev_written = sizeof(status);
> +*s->status = VIRTIO_NET_OK;
> +} else {
> +dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
> +if (unlikely(dev_written < 0)) {
> +goto out;
> +}
>  }
>  
>  if (unlikely(dev_written < sizeof(status))) {
> -- 
> 2.31.1

Re: [PATCH v7 9/9] disas/riscv.c: add disasm support for Zc*

2022-11-29 Thread weiwei



On 2022/11/30 11:04, Shaobo Song wrote:


At 2022-11-29 10:43:43, "Weiwei Li"  wrote:

>Zcmp/Zcmt instructions will override disasm for c.fld*/c.fsd*
>instructions currently
>
>Signed-off-by: Weiwei Li 
>Signed-off-by: Junqiang Wang 
>Acked-by: Alistair Francis 
>---
> disas/riscv.c | 287 +-
> 1 file changed, 286 insertions(+), 1 deletion(-)
>
>diff --git a/disas/riscv.c b/disas/riscv.c
>index d216b9c39b..81369063b5 100644
>--- a/disas/riscv.c
>+++ b/disas/riscv.c
>@@ -163,6 +163,13 @@ typedef enum {
> rv_codec_v_i,
> rv_codec_vsetvli,
> rv_codec_vsetivli,
>+rv_codec_zcb_ext,
>+rv_codec_zcb_mul,
>+rv_codec_zcb_lb,
>+rv_codec_zcb_lh,
>+rv_codec_zcmp_cm_pushpop,
>+rv_codec_zcmp_cm_mv,
>+rv_codec_zcmt_jt,
> } rv_codec;
> 
> typedef enum {

>@@ -935,6 +942,26 @@ typedef enum {
> rv_op_vsetvli = 766,
> rv_op_vsetivli = 767,
> rv_op_vsetvl = 768,
>+rv_op_c_zext_b = 769,
>+rv_op_c_sext_b = 770,
>+rv_op_c_zext_h = 771,
>+rv_op_c_sext_h = 772,
>+rv_op_c_zext_w = 773,
>+rv_op_c_not = 774,
>+rv_op_c_mul = 775,
>+rv_op_c_lbu = 776,
>+rv_op_c_lhu = 777,
>+rv_op_c_lh = 778,
>+rv_op_c_sb = 779,
>+rv_op_c_sh = 780,
>+rv_op_cm_push = 781,
>+rv_op_cm_pop = 782,
>+rv_op_cm_popret = 783,
>+rv_op_cm_popretz = 784,
>+rv_op_cm_mva01s = 785,
>+rv_op_cm_mvsa01 = 786,
>+rv_op_cm_jt = 787,
>+rv_op_cm_jalt = 788,
> } rv_op;
> 
> /* structures */

>@@ -958,6 +985,7 @@ typedef struct {
> uint8_t   rnum;
> uint8_t   vm;
> uint32_t  vzimm;
>+uint8_t   rlist;
> } rv_decode;
> 
> typedef struct {

>@@ -1070,6 +1098,10 @@ static const char rv_vreg_name_sym[32][4] = {
> #define rv_fmt_vd_vm  "O\tDm"
> #define rv_fmt_vsetvli"O\t0,1,v"
> #define rv_fmt_vsetivli   "O\t0,u,v"
>+#define rv_fmt_rs1_rs2_zce_ldst   "O\t2,i(1)"
>+#define rv_fmt_push_rlist "O\tx,-i"
>+#define rv_fmt_pop_rlist  "O\tx,i"
>+#define rv_fmt_zcmt_index "O\ti"
> 
> /* pseudo-instruction constraints */
> 
>@@ -2065,7 +2097,27 @@ const rv_opcode_data opcode_data[] = {

> { "vsext.vf8", rv_codec_v_r, rv_fmt_vd_vs2_vm, NULL, rv_op_vsext_vf8, 
rv_op_vsext_vf8, 0 },
> { "vsetvli", rv_codec_vsetvli, rv_fmt_vsetvli, NULL, rv_op_vsetvli, 
rv_op_vsetvli, 0 },
> { "vsetivli", rv_codec_vsetivli, rv_fmt_vsetivli, NULL, rv_op_vsetivli, 
rv_op_vsetivli, 0 },
>-{ "vsetvl", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, rv_op_vsetvl, 
rv_op_vsetvl, 0 }
>+{ "vsetvl", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, rv_op_vsetvl, 
rv_op_vsetvl, 0 },
>+{ "c.zext.b", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
>+{ "c.sext.b", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
>+{ "c.zext.h", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
>+{ "c.sext.h", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
>+{ "c.zext.w", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
>+{ "c.not", rv_codec_zcb_ext, rv_fmt_rd, NULL, 0 },
>+{ "c.mul", rv_codec_zcb_mul, rv_fmt_rd_rs2, NULL, 0, 0 },
>+{ "c.lbu", rv_codec_zcb_lb, rv_fmt_rs1_rs2_zce_ldst, NULL, 0, 0, 0 },
>+{ "c.lhu", rv_codec_zcb_lh, rv_fmt_rs1_rs2_zce_ldst, NULL, 0, 0, 0 },
>+{ "c.lh", rv_codec_zcb_lh, rv_fmt_rs1_rs2_zce_ldst, NULL, 0, 0, 0 },
>+{ "c.sb", rv_codec_zcb_lb, rv_fmt_rs1_rs2_zce_ldst, NULL, 0, 0, 0 },
>+{ "c.sh", rv_codec_zcb_lh, rv_fmt_rs1_rs2_zce_ldst, NULL, 0, 0, 0 },
>+{ "cm.push", rv_codec_zcmp_cm_pushpop, rv_fmt_push_rlist, NULL, 0, 0 },
>+{ "cm.pop", rv_codec_zcmp_cm_pushpop, rv_fmt_pop_rlist, NULL, 0, 0 },
>+{ "cm.popret", rv_codec_zcmp_cm_pushpop, rv_fmt_pop_rlist, NULL, 0, 0, 0 
},
>+{ "cm.popretz", rv_codec_zcmp_cm_pushpop, rv_fmt_pop_rlist, NULL, 0, 0 },
>+{ "cm.mva01s", rv_codec_zcmp_cm_mv, rv_fmt_rd_rs2, NULL, 0, 0, 0 },
>+{ "cm.mvsa01", rv_codec_zcmp_cm_mv, rv_fmt_rd_rs2, NULL, 0, 0, 0 },
>+{ "cm.jt", rv_codec_zcmt_jt, rv_fmt_zcmt_index, NULL, 0 },
>+{ "cm.jalt", rv_codec_zcmt_jt, rv_fmt_zcmt_index, NULL, 0 },
> };
> 
> /* CSR names */

>@@ -2084,6 +2136,7 @@ static const char *csr_name(int csrno)
> case 0x000a: return "vxrm";
> case 0x000f: return "vcsr";
> case 0x0015: return "seed";
>+case 0x0017: return "jvt";
> case 0x0040: return "uscratch";
> case 0x0041: return "uepc";
> case 0x0042: return "ucause";
>@@ -2306,6 +2359,24 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa 
isa)
> op = rv_op_c_ld;
> }
> break;
>+case 4:
>+switch ((inst >> 10) & 0b111) {
>+case 0: op = rv_op_c_lbu; break;
>+case 1:
>+if (((inst >> 6) & 1) == 0) {
>+op = rv_op_c_lhu;
>+} else {
>+op = rv_op_c_lh;
>+}
>+break;
>+case 2: op = rv_op_c_sb; break;
>+case 3:
>+if (((inst

Re: [PATCH v2 3/4] vdpa: handle VIRTIO_NET_CTRL_ANNOUNCE in vhost_vdpa_net_handle_ctrl_avail

2022-11-29 Thread Eugenio Perez Martin

On Wed, Nov 30, 2022 at 8:02 AM Jason Wang  wrote:
>
> On Fri, Nov 25, 2022 at 1:33 AM Eugenio Pérez  wrote:
> >
> > Since this capability is emulated by qemu shadowed CVQ cannot forward it
> > to the device. Process all that command within qemu.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >  net/vhost-vdpa.c | 15 ---
> >  1 file changed, 12 insertions(+), 3 deletions(-)
> >
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index 2b4b85d8f8..8172aa8449 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -489,9 +489,18 @@ static int 
> > vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
> >  out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
> >   s->cvq_cmd_out_buffer,
> >   vhost_vdpa_net_cvq_cmd_len());
> > -dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
> > -if (unlikely(dev_written < 0)) {
> > -goto out;
> > +if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) {
> > +/*
> > + * Guest announce capability is emulated by qemu, so dont forward 
> > to
>
> s/dont/don't/
>

I'll correct it, thanks!

> > + * the device.
> > + */
> > +dev_written = sizeof(status);
> > +*s->status = VIRTIO_NET_OK;
>
> I wonder if we should avoid negotiating ANNOUNCE with vDPA parents if
> we do this?
>

I can re-check, but the next patch should avoid it. Even if
negotiated, the parent should never set the announce status bit, since
we never tell the device is a destination device.

But it's better to be on the safe side, I'll recheck.

Thanks!

> Thanks
>
> > +} else {
> > +dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, 
> > sizeof(status));
> > +if (unlikely(dev_written < 0)) {
> > +goto out;
> > +}
> >  }
> >
> >  if (unlikely(dev_written < sizeof(status))) {
> > --
> > 2.31.1
> >
>

Re: [PATCH v2 4/4] vdpa: do not handle VIRTIO_NET_F_GUEST_ANNOUNCE in vhost-vdpa

2022-11-29 Thread Jason Wang

On Fri, Nov 25, 2022 at 1:33 AM Eugenio Pérez  wrote:
>
> So qemu emulates it even in case the device does not support it.
>
> Signed-off-by: Eugenio Pérez 

Acked-by: Jason Wang 

Thanks

> ---
>  net/vhost-vdpa.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 8172aa8449..79f022c2bf 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -72,7 +72,6 @@ const int vdpa_feature_bits[] = {
>  VIRTIO_F_RING_RESET,
>  VIRTIO_NET_F_RSS,
>  VIRTIO_NET_F_HASH_REPORT,
> -VIRTIO_NET_F_GUEST_ANNOUNCE,
>  VIRTIO_NET_F_STATUS,
>  VHOST_INVALID_FEATURE_BIT
>  };
> --
> 2.31.1
>

Re: [PATCH v2 3/4] vdpa: handle VIRTIO_NET_CTRL_ANNOUNCE in vhost_vdpa_net_handle_ctrl_avail

2022-11-29 Thread Jason Wang

On Fri, Nov 25, 2022 at 1:33 AM Eugenio Pérez  wrote:
>
> Since this capability is emulated by qemu shadowed CVQ cannot forward it
> to the device. Process all that command within qemu.
>
> Signed-off-by: Eugenio Pérez 
> ---
>  net/vhost-vdpa.c | 15 ---
>  1 file changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 2b4b85d8f8..8172aa8449 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -489,9 +489,18 @@ static int 
> vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
>  out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
>   s->cvq_cmd_out_buffer,
>   vhost_vdpa_net_cvq_cmd_len());
> -dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
> -if (unlikely(dev_written < 0)) {
> -goto out;
> +if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) {
> +/*
> + * Guest announce capability is emulated by qemu, so dont forward to

s/dont/don't/

> + * the device.
> + */
> +dev_written = sizeof(status);
> +*s->status = VIRTIO_NET_OK;

I wonder if we should avoid negotiating ANNOUNCE with vDPA parents if
we do this?

Thanks

> +} else {
> +dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
> +if (unlikely(dev_written < 0)) {
> +goto out;
> +}
>  }
>
>  if (unlikely(dev_written < sizeof(status))) {
> --
> 2.31.1
>

Re: [PATCH v2 1/2] qga-win: add logging to Windows event log

2022-11-29 Thread Marc-André Lureau

Hi

On Tue, Nov 29, 2022 at 9:37 PM Andrey Drobyshev
 wrote:
>
> This commit allows QGA to write to Windows event log using Win32 API's
> ReportEvent() [1], much like syslog() under *nix guests.
>
> In order to generate log message definitions we use a very basic message
> text file [2], so that every QGA's message gets ID 1.  The tools
> "windmc" and "windres" respectively are used to generate ".rc" file and
> COFF object file, and then the COFF file is linked into qemu-ga.exe.
>
> [1] 
> https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-reporteventa
> [2] 
> https://learn.microsoft.com/en-us/windows/win32/eventlog/message-text-files
>
> Originally-by: Yuri Pudgorodskiy 
> Signed-off-by: Andrey Drobyshev 

Reviewed-by: Marc-André Lureau 

> ---
>  configure |  3 +++
>  qga/installer/qemu-ga.wxs |  5 +
>  qga/main.c| 16 +---
>  qga/meson.build   | 19 ++-
>  qga/messages-win32.mc |  9 +
>  5 files changed, 48 insertions(+), 4 deletions(-)
>  create mode 100644 qga/messages-win32.mc
>
> diff --git a/configure b/configure
> index 26c7bc5154..789a4f6cc9 100755
> --- a/configure
> +++ b/configure
> @@ -372,6 +372,7 @@ smbd="$SMBD"
>  strip="${STRIP-${cross_prefix}strip}"
>  widl="${WIDL-${cross_prefix}widl}"
>  windres="${WINDRES-${cross_prefix}windres}"
> +windmc="${WINDMC-${cross_prefix}windmc}"
>  pkg_config_exe="${PKG_CONFIG-${cross_prefix}pkg-config}"
>  query_pkg_config() {
>  "${pkg_config_exe}" ${QEMU_PKG_CONFIG_FLAGS} "$@"
> @@ -2561,6 +2562,7 @@ if test "$skip_meson" = no; then
>echo "strip = [$(meson_quote $strip)]" >> $cross
>echo "widl = [$(meson_quote $widl)]" >> $cross
>echo "windres = [$(meson_quote $windres)]" >> $cross
> +  echo "windmc = [$(meson_quote $windmc)]" >> $cross
>if test "$cross_compile" = "yes"; then
>  cross_arg="--cross-file config-meson.cross"
>  echo "[host_machine]" >> $cross
> @@ -2667,6 +2669,7 @@ preserve_env SMBD
>  preserve_env STRIP
>  preserve_env WIDL
>  preserve_env WINDRES
> +preserve_env WINDMC
>
>  printf "exec" >>config.status
>  for i in "$0" "$@"; do
> diff --git a/qga/installer/qemu-ga.wxs b/qga/installer/qemu-ga.wxs
> index 73ce2c4965..d9567836f3 100644
> --- a/qga/installer/qemu-ga.wxs
> +++ b/qga/installer/qemu-ga.wxs
> @@ -110,6 +110,11 @@
> Value="fb0a0d66-c7fb-4e2e-a16b-c4a3bfe8d13b" />
> Value="$(var.QEMU_GA_VERSION)" />
>  
> + + 
> Key="System\CurrentControlSet\Services\EventLog\Application\qemu-ga">
> +   />
> +   Value="[qemu_ga_directory]qemu-ga.exe" />
> +
>
>  
>
> diff --git a/qga/main.c b/qga/main.c
> index b3580508fa..e9f4f44cbb 100644
> --- a/qga/main.c
> +++ b/qga/main.c
> @@ -83,6 +83,7 @@ struct GAState {
>  #ifdef _WIN32
>  GAService service;
>  HANDLE wakeup_event;
> +HANDLE event_log;
>  #endif
>  bool delimit_response;
>  bool frozen;
> @@ -324,13 +325,14 @@ static void ga_log(const gchar *domain, GLogLevelFlags 
> level,
>  }
>
>  level &= G_LOG_LEVEL_MASK;
> -#ifndef _WIN32
>  if (g_strcmp0(domain, "syslog") == 0) {
> +#ifndef _WIN32
>  syslog(LOG_INFO, "%s: %s", level_str, msg);
> -} else if (level & s->log_level) {
>  #else
> -if (level & s->log_level) {
> +ReportEvent(s->event_log, EVENTLOG_INFORMATION_TYPE,
> +0, 1, NULL, 1, 0, , NULL);
>  #endif
> +} else if (level & s->log_level) {
>  g_autoptr(GDateTime) now = g_date_time_new_now_utc();
>  g_autofree char *nowstr = g_date_time_format(now, "%s.%f");
>  fprintf(s->log_file, "%s: %s: %s\n", nowstr, level_str, msg);
> @@ -1286,6 +1288,13 @@ static GAState *initialize_agent(GAConfig *config, int 
> socket_activation)
>  g_debug("Guest agent version %s started", QEMU_FULL_VERSION);
>
>  #ifdef _WIN32
> +s->event_log = RegisterEventSource(NULL, "qemu-ga");
> +if (!s->event_log) {
> +g_autofree gchar *errmsg = g_win32_error_message(GetLastError());
> +g_critical("unable to register event source: %s", errmsg);
> +return NULL;
> +}
> +
>  /* On win32 the state directory is application specific (be it the 
> default
>   * or a user override). We got past the command line parsing; let's 
> create
>   * the directory (with any intermediate directories). If we run into an
> @@ -1377,6 +1386,7 @@ static void cleanup_agent(GAState *s)
>  {
>  #ifdef _WIN32
>  CloseHandle(s->wakeup_event);
> +CloseHandle(s->event_log);
>  #endif
>  if (s->command_state) {
>  ga_command_state_cleanup_all(s->command_state);
> diff --git a/qga/meson.build b/qga/meson.build
> index 3cfb9166e5..1ff159edc1 100644
> --- a/qga/meson.build
> +++ b/qga/meson.build
> @@ -98,7 +98,24 @@ if targetos == 'windows'
>endif
>  endif
>
> -qga = executable('qemu-ga',

Re: [RESEND PATCH] virtio-pci: fix vector_irqfd leak in virtio_pci_set_guest_notifiers

2022-11-29 Thread Michael S. Tsirkin

On Wed, Nov 30, 2022 at 01:56:11PM +0800, leixiang wrote:
> proxy->vector_irqfd did not free when set guest notifier failed.
> 
> Signed-off-by: Lei Xiang 
> Tested-by: Zeng Chi 
> Suggested-by: Xie Ming 

Thanks a lot!  Fixes tag? When was the leak introduced?

> ---
>  hw/virtio/virtio-pci.c | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index c6b47a9c..4862f83b 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1038,6 +1038,12 @@ assign_error:
>  while (--n >= 0) {
>  virtio_pci_set_guest_notifier(d, n, !assign, with_irqfd);
>  }
> +
> +   g_free(proxy->vector_irqfd);
> +   proxy->vector_irqfd = NULL;
> +
>  return r;
>  }
>  
> -- 
> 
> 
> No virus found
>   Checked by Hillstone Network AntiVirus

Re: [PATCH v2 2/4] virtio_net: copy VIRTIO_NET_S_ANNOUNCE if device model has it

2022-11-29 Thread Jason Wang

On Fri, Nov 25, 2022 at 1:33 AM Eugenio Pérez  wrote:
>
> Status part of the emulated feature. It will follow device model, so we
> must copy it as long as NIC device model has it set.
>
> Signed-off-by: Eugenio Pérez 
> ---
>  hw/net/virtio-net.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index eed629766f..bf71ef33e8 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -183,6 +183,7 @@ static void virtio_net_get_config(VirtIODevice *vdev, 
> uint8_t *config)
>  memcpy(netcfg.mac, n->mac, ETH_ALEN);
>  }
>
> +netcfg.status |= (n->status & VIRTIO_NET_S_ANNOUNCE);

Do we need to care about the endian here? We use:

virtio_stw_p(vdev, , n->status);

At the beginning of this function.

Thanks

>  memcpy(config, , n->config_size);
>  }
>  }
> --
> 2.31.1
>

Re: [PATCH for 7.2-rc3 v3 0/7] fix vhost-user issues with CI

2022-11-29 Thread Michael S. Tsirkin

Patch 1 is good but inappropriate for 7.2
Patch 2 should be last in series.
Patch 4 we are dropping.
I thought hard about it, I think we should patch vhost user net too
because of the risk introduced by patch 2 (which affects everyone).
Can be a patch on top though.

Besides this, for series:

Reviewed-by: Michael S. Tsirkin 

-- 
MST

Re: [PATCH v2 1/4] virtio_net: Modify virtio_net_get_config to early return

2022-11-29 Thread Jason Wang

On Fri, Nov 25, 2022 at 1:33 AM Eugenio Pérez  wrote:
>
> Next patches introduce more code on vhost-vdpa branch, with already have
> too much indentation.
>
> Signed-off-by: Eugenio Pérez 
> Reviewed-by: Philippe Mathieu-Daudé 
> Reviewed-by: Michael S. Tsirkin 

Acked-by: Jason Wang 

Thanks

> ---
>  hw/net/virtio-net.c | 28 +++-
>  1 file changed, 15 insertions(+), 13 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index aba12759d5..eed629766f 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -168,20 +168,22 @@ static void virtio_net_get_config(VirtIODevice *vdev, 
> uint8_t *config)
>  if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
>  ret = vhost_net_get_config(get_vhost_net(nc->peer), (uint8_t 
> *),
> n->config_size);
> -if (ret != -1) {
> -/*
> - * Some NIC/kernel combinations present 0 as the mac address.  As
> - * that is not a legal address, try to proceed with the
> - * address from the QEMU command line in the hope that the
> - * address has been configured correctly elsewhere - just not
> - * reported by the device.
> - */
> -if (memcmp(, , sizeof(zero)) == 0) {
> -info_report("Zero hardware mac address detected. Ignoring.");
> -memcpy(netcfg.mac, n->mac, ETH_ALEN);
> -}
> -memcpy(config, , n->config_size);
> +if (ret == -1) {
> +return;
>  }
> +
> +/*
> + * Some NIC/kernel combinations present 0 as the mac address.  As 
> that
> + * is not a legal address, try to proceed with the address from the
> + * QEMU command line in the hope that the address has been configured
> + * correctly elsewhere - just not reported by the device.
> + */
> +if (memcmp(, , sizeof(zero)) == 0) {
> +info_report("Zero hardware mac address detected. Ignoring.");
> +memcpy(netcfg.mac, n->mac, ETH_ALEN);
> +}
> +
> +memcpy(config, , n->config_size);
>  }
>  }
>
> --
> 2.31.1
>

Re: [PATCH for 8.0 v8 12/12] vdpa: always start CVQ in SVQ mode if possible

2022-11-29 Thread Jason Wang

On Thu, Nov 24, 2022 at 11:52 PM Eugenio Pérez  wrote:
>
> Isolate control virtqueue in its own group, allowing to intercept control
> commands but letting dataplane run totally passthrough to the guest.
>
> Signed-off-by: Eugenio Pérez 
> ---
> v8:
> * Do not allocate iova_tree on net_init_vhost_vdpa if only CVQ is
>   shadowed. Move the iova_tree handling in this case to
>   vhost_vdpa_net_cvq_start and vhost_vdpa_net_cvq_stop.
>
> v7:
> * Never ask for number of address spaces, just react if isolation is not
>   possible.
> * Return ASID ioctl errors instead of masking them as if the device has
>   no asid.
> * Simplify net_init_vhost_vdpa logic
> * Add "if possible" suffix
>
> v6:
> * Disable control SVQ if the device does not support it because of
> features.
>
> v5:
> * Fixing the not adding cvq buffers when x-svq=on is specified.
> * Move vring state in vhost_vdpa_get_vring_group instead of using a
>   parameter.
> * Rename VHOST_VDPA_NET_CVQ_PASSTHROUGH to VHOST_VDPA_NET_DATA_ASID
>
> v4:
> * Squash vhost_vdpa_cvq_group_is_independent.
> * Rebased on last CVQ start series, that allocated CVQ cmd bufs at load
> * Do not check for cvq index on vhost_vdpa_net_prepare, we only have one
>   that callback registered in that NetClientInfo.
>
> v3:
> * Make asid related queries print a warning instead of returning an
>   error and stop the start of qemu.
> ---
>  hw/virtio/vhost-vdpa.c |   3 +-
>  net/vhost-vdpa.c   | 106 -
>  2 files changed, 107 insertions(+), 2 deletions(-)
>
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 8e54c5c0fc..45bb72d359 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -652,7 +652,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev 
> *dev)
>  {
>  uint64_t features;
>  uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
> -0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
> +0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
> +0x1ULL << VHOST_BACKEND_F_IOTLB_ASID;
>  int r;
>
>  if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, )) {
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index a1f1e29b7c..bce57fa724 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -102,6 +102,8 @@ static const uint64_t vdpa_svq_device_features =
>  BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
>  BIT_ULL(VIRTIO_NET_F_STANDBY);
>
> +#define VHOST_VDPA_NET_CVQ_ASID 1
> +
>  VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
>  {
>  VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> @@ -259,6 +261,40 @@ static VhostIOVATree 
> *vhost_vdpa_svq_allocate_iova_tree(int vdpa_device_fd)
>  return vhost_iova_tree_new(iova_range.first, iova_range.last);
>  }
>
> +static int64_t vhost_vdpa_get_vring_group(int device_fd, unsigned vq_index)
> +{
> +struct vhost_vring_state state = {
> +.index = vq_index,
> +};
> +int r = ioctl(device_fd, VHOST_VDPA_GET_VRING_GROUP, );
> +
> +if (unlikely(r < 0)) {
> +error_report("Cannot get VQ %u group: %s", vq_index,
> + g_strerror(errno));
> +return r;
> +}
> +
> +return state.num;
> +}
> +
> +static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v,
> +   unsigned vq_group,
> +   unsigned asid_num)
> +{
> +struct vhost_vring_state asid = {
> +.index = vq_group,
> +.num = asid_num,
> +};
> +int r;
> +
> +r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, );
> +if (unlikely(r < 0)) {
> +error_report("Can't set vq group %u asid %u, errno=%d (%s)",
> + asid.index, asid.num, errno, g_strerror(errno));
> +}
> +return r;
> +}
> +
>  static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
>  {
>  VhostIOVATree *tree = v->iova_tree;
> @@ -333,11 +369,71 @@ dma_map_err:
>  static int vhost_vdpa_net_cvq_start(NetClientState *nc)
>  {
>  VhostVDPAState *s;
> -int r;
> +struct vhost_vdpa *v;
> +uint64_t backend_features;
> +int64_t cvq_group;
> +int cvq_index, r;
>
>  assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>
>  s = DO_UPCAST(VhostVDPAState, nc, nc);
> +v = >vhost_vdpa;
> +
> +v->shadow_data = s->always_svq;
> +v->shadow_vqs_enabled = s->always_svq;
> +s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID;
> +
> +if (s->always_svq) {
> +/* SVQ is already configured for all virtqueues */
> +goto out;
> +}
> +
> +/* Backend features are not available in v->dev yet. */
> +r = ioctl(v->device_fd, VHOST_GET_BACKEND_FEATURES, _features);
> +if (unlikely(r < 0)) {
> +error_report("Cannot get vdpa backend_features: %s(%d)",
> +g_strerror(errno), errno);
> +return -1;
> +}
> +if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) ||
> +

Re: [PATCH for 8.0 v8 06/12] vdpa: extract vhost_vdpa_svq_allocate_iova_tree

2022-11-29 Thread Jason Wang

On Thu, Nov 24, 2022 at 11:52 PM Eugenio Pérez  wrote:
>
> It can be allocated either if all virtqueues must be shadowed or if
> vdpa-net detects it can shadow only cvq.
>
> Extract in its own function so we can reuse it.
>
> Signed-off-by: Eugenio Pérez 
> ---
>  net/vhost-vdpa.c | 29 +
>  1 file changed, 17 insertions(+), 12 deletions(-)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 88e0eec5fa..9ee3bc4cd3 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -240,6 +240,22 @@ static NetClientInfo net_vhost_vdpa_info = {
>  .check_peer_type = vhost_vdpa_check_peer_type,
>  };
>
> +static int vhost_vdpa_get_iova_range(int fd,
> + struct vhost_vdpa_iova_range 
> *iova_range)
> +{
> +int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
> +
> +return ret < 0 ? -errno : 0;
> +}

I don't get why this needs to be moved to net specific code.

Thanks

> +
> +static VhostIOVATree *vhost_vdpa_svq_allocate_iova_tree(int vdpa_device_fd)
> +{
> +struct vhost_vdpa_iova_range iova_range;
> +
> +vhost_vdpa_get_iova_range(vdpa_device_fd, _range);
> +return vhost_iova_tree_new(iova_range.first, iova_range.last);
> +}
> +
>  static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
>  {
>  VhostIOVATree *tree = v->iova_tree;
> @@ -587,14 +603,6 @@ static NetClientState 
> *net_vhost_vdpa_init(NetClientState *peer,
>  return nc;
>  }
>
> -static int vhost_vdpa_get_iova_range(int fd,
> - struct vhost_vdpa_iova_range 
> *iova_range)
> -{
> -int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
> -
> -return ret < 0 ? -errno : 0;
> -}
> -
>  static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
>  {
>  int ret = ioctl(fd, VHOST_GET_FEATURES, features);
> @@ -690,14 +698,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const 
> char *name,
>  }
>
>  if (opts->x_svq) {
> -struct vhost_vdpa_iova_range iova_range;
> -
>  if (!vhost_vdpa_net_valid_svq_features(features, errp)) {
>  goto err_svq;
>  }
>
> -vhost_vdpa_get_iova_range(vdpa_device_fd, _range);
> -iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
> +iova_tree = vhost_vdpa_svq_allocate_iova_tree(vdpa_device_fd);
>  }
>
>  ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
> --
> 2.31.1
>

Re: [PATCH for 8.0 v8 04/12] vhost: move iova_tree set to vhost_svq_start

2022-11-29 Thread Jason Wang

On Thu, Nov 24, 2022 at 11:52 PM Eugenio Pérez  wrote:
>
> Since we don't know if we will use SVQ at qemu initialization, let's
> allocate iova_tree only if needed. To do so, accept it at SVQ start, not
> at initialization.
>
> This will avoid to create it if the device does not support SVQ.
>
> Signed-off-by: Eugenio Pérez 

Acked-by: Jason Wang 

Thanks

> ---
>  hw/virtio/vhost-shadow-virtqueue.h | 5 ++---
>  hw/virtio/vhost-shadow-virtqueue.c | 9 -
>  hw/virtio/vhost-vdpa.c | 5 ++---
>  3 files changed, 8 insertions(+), 11 deletions(-)
>
> diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
> b/hw/virtio/vhost-shadow-virtqueue.h
> index d04c34a589..926a4897b1 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.h
> +++ b/hw/virtio/vhost-shadow-virtqueue.h
> @@ -126,11 +126,10 @@ size_t vhost_svq_driver_area_size(const 
> VhostShadowVirtqueue *svq);
>  size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
>
>  void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
> - VirtQueue *vq);
> + VirtQueue *vq, VhostIOVATree *iova_tree);
>  void vhost_svq_stop(VhostShadowVirtqueue *svq);
>
> -VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
> -const VhostShadowVirtqueueOps *ops,
> +VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops,
>  void *ops_opaque);
>
>  void vhost_svq_free(gpointer vq);
> diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> b/hw/virtio/vhost-shadow-virtqueue.c
> index 3b05bab44d..4307296358 100644
> --- a/hw/virtio/vhost-shadow-virtqueue.c
> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> @@ -642,9 +642,10 @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue 
> *svq, int svq_kick_fd)
>   * @svq: Shadow Virtqueue
>   * @vdev: VirtIO device
>   * @vq: Virtqueue to shadow
> + * @iova_tree: Tree to perform descriptors translations
>   */
>  void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
> - VirtQueue *vq)
> + VirtQueue *vq, VhostIOVATree *iova_tree)
>  {
>  size_t desc_size, driver_size, device_size;
>
> @@ -655,6 +656,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, 
> VirtIODevice *vdev,
>  svq->last_used_idx = 0;
>  svq->vdev = vdev;
>  svq->vq = vq;
> +svq->iova_tree = iova_tree;
>
>  svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq));
>  driver_size = vhost_svq_driver_area_size(svq);
> @@ -712,18 +714,15 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
>   * Creates vhost shadow virtqueue, and instructs the vhost device to use the
>   * shadow methods and file descriptors.
>   *
> - * @iova_tree: Tree to perform descriptors translations
>   * @ops: SVQ owner callbacks
>   * @ops_opaque: ops opaque pointer
>   */
> -VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
> -const VhostShadowVirtqueueOps *ops,
> +VhostShadowVirtqueue *vhost_svq_new(const VhostShadowVirtqueueOps *ops,
>  void *ops_opaque)
>  {
>  VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
>
>  event_notifier_init_fd(>svq_kick, VHOST_FILE_UNBIND);
> -svq->iova_tree = iova_tree;
>  svq->ops = ops;
>  svq->ops_opaque = ops_opaque;
>  return svq;
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 3df2775760..691bcc811a 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -430,8 +430,7 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
> struct vhost_vdpa *v,
>  for (unsigned n = 0; n < hdev->nvqs; ++n) {
>  VhostShadowVirtqueue *svq;
>
> -svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
> -v->shadow_vq_ops_opaque);
> +svq = vhost_svq_new(v->shadow_vq_ops, v->shadow_vq_ops_opaque);
>  g_ptr_array_add(shadow_vqs, svq);
>  }
>
> @@ -1063,7 +1062,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
>  goto err;
>  }
>
> -vhost_svq_start(svq, dev->vdev, vq);
> +vhost_svq_start(svq, dev->vdev, vq, v->iova_tree);
>  ok = vhost_vdpa_svq_map_rings(dev, svq, , );
>  if (unlikely(!ok)) {
>  goto err_map;
> --
> 2.31.1
>

Re: [PATCH 3/3] intel-iommu: build iova tree during IOMMU translation

2022-11-29 Thread Jason Wang

On Tue, Nov 29, 2022 at 11:57 PM Peter Xu  wrote:
>
> On Tue, Nov 29, 2022 at 04:10:37PM +0800, Jason Wang wrote:
> > The IOVA tree is only built during page walk this breaks the device
> > that tries to use UNMAP notifier only. One example is vhost-net, it
> > tries to use UNMAP notifier when vIOMMU doesn't support DEVIOTLB_UNMAP
> > notifier (e.g when dt mode is not enabled). The interesting part is
> > that it doesn't use MAP since it can query the IOMMU translation by
> > itself upon a IOTLB miss.
> >
> > This doesn't work since Qemu doesn't build IOVA tree in IOMMU
> > translation which means the UNMAP notifier won't be triggered during
> > the page walk since Qemu think it is never mapped. This could be
> > noticed when vIOMMU is used with vhost_net but dt is disabled.
> >
> > Fixing this by build the iova tree during IOMMU translation, this
> > makes sure the UNMAP notifier event could be identified during page
> > walk. And we need to walk page table not only for UNMAP notifier but
> > for MAP notifier during PSI.
> >
> > Signed-off-by: Jason Wang 
> > ---
> >  hw/i386/intel_iommu.c | 43 ++-
> >  1 file changed, 18 insertions(+), 25 deletions(-)
> >
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > index d025ef2873..edeb62f4b2 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -1834,6 +1834,8 @@ static bool vtd_do_iommu_translate(VTDAddressSpace 
> > *vtd_as, PCIBus *bus,
> >  uint8_t access_flags;
> >  bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable;
> >  VTDIOTLBEntry *iotlb_entry;
> > +const DMAMap *mapped;
> > +DMAMap target;
> >
> >  /*
> >   * We have standalone memory region for interrupt addresses, we
> > @@ -1954,6 +1956,21 @@ out:
> >  entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & 
> > page_mask;
> >  entry->addr_mask = ~page_mask;
> >  entry->perm = access_flags;
> > +
> > +target.iova = entry->iova;
> > +target.size = entry->addr_mask;
> > +target.translated_addr = entry->translated_addr;
> > +target.perm = entry->perm;
> > +
> > +mapped = iova_tree_find(vtd_as->iova_tree, );
> > +if (!mapped) {
> > +/* To make UNMAP notifier work, we need build iova tree here
> > + * in order to have the UNMAP iommu notifier to be triggered
> > + * during the page walk.
> > + */
> > +iova_tree_insert(vtd_as->iova_tree, );
> > +}
> > +
> >  return true;
> >
> >  error:
> > @@ -2161,31 +2178,7 @@ static void 
> > vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
> >  ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
> > vtd_as->devfn, );
> >  if (!ret && domain_id == vtd_get_domain_id(s, , vtd_as->pasid)) 
> > {
> > -if (vtd_as_has_map_notifier(vtd_as)) {
> > -/*
> > - * As long as we have MAP notifications registered in
> > - * any of our IOMMU notifiers, we need to sync the
> > - * shadow page table.
> > - */
> > -vtd_sync_shadow_page_table_range(vtd_as, , addr, size);
> > -} else {
> > -/*
> > - * For UNMAP-only notifiers, we don't need to walk the
> > - * page tables.  We just deliver the PSI down to
> > - * invalidate caches.
> > - */
> > -IOMMUTLBEvent event = {
> > -.type = IOMMU_NOTIFIER_UNMAP,
> > -.entry = {
> > -.target_as = _space_memory,
> > -.iova = addr,
> > -.translated_addr = 0,
> > -.addr_mask = size - 1,
> > -.perm = IOMMU_NONE,
> > -},
> > -};
> > -memory_region_notify_iommu(_as->iommu, 0, event);
>
> Isn't this path the one that will be responsible for pass-through the UNMAP
> events from guest to vhost when there's no MAP notifier requested?

Yes, but it doesn't do the iova tree removing. More below.

>
> At least that's what I expected when introducing the iova tree, because for
> unmap-only device hierachy I thought we didn't need the tree at all.

Then the problem is the UNMAP notifier won't be trigger at all during
DSI page walk in vtd_page_walk_one() because there's no DMAMap stored
in the iova tree.:

if (!mapped) {
/* Skip since we didn't map this range at all */
trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask);
return 0;
}

So I choose to build the iova tree in translate then we won't go
within the above condition.

Thanks

>
> Jason, do you know where I miss?
>
> Thanks,
>
> > -}
> > +vtd_sync_shadow_page_table_range(vtd_as, , addr, size);
> >  }
> >  }
> >

Re: [PATCH 1/3] intel-iommu: fail MAP notifier without caching mode

2022-11-29 Thread Jason Wang

On Tue, Nov 29, 2022 at 11:35 PM Peter Xu  wrote:
>
> On Tue, Nov 29, 2022 at 04:10:35PM +0800, Jason Wang wrote:
> > Without caching mode, MAP notifier won't work correctly since guest
> > won't send IOTLB update event when it establishes new mappings in the
> > I/O page tables. Let's fail the IOMMU notifiers early instead of
> > misbehaving silently.
> >
> > Signed-off-by: Jason Wang 
> > ---
> >  hw/i386/intel_iommu.c | 7 +++
> >  1 file changed, 7 insertions(+)
> >
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > index a08ee85edf..9143376677 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -3186,6 +3186,13 @@ static int 
> > vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
> >   "Snoop Control with vhost or VFIO is not 
> > supported");
> >  return -ENOTSUP;
> >  }
> > +if (!s->caching_mode && (new & IOMMU_NOTIFIER_MAP)) {
> > +error_setg_errno(errp, ENOTSUP,
> > + "device %02x.%02x.%x requires caching mode",
> > + pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn),
> > + PCI_FUNC(vtd_as->devfn));
> > +return -ENOTSUP;
> > +}
>
> We used to have that but got reverted because it's too late to fail, so we
> moved it over even though not as clean..
>
> https://lore.kernel.org/all/20190916080718.3299-5-pet...@redhat.com/

One of the difference is that the patch doesn't do exit() here. I
think it's better to fail instead of misbehving silently, this is what
other vIOMMU did:

E.g in smmu we had:

if (new & IOMMU_NOTIFIER_MAP) {
error_setg(errp,
   "device %02x.%02x.%x requires iommu MAP notifier which is "
   "not currently supported", pci_bus_num(sdev->bus),
   PCI_SLOT(sdev->devfn), PCI_FUNC(sdev->devfn));
return -EINVAL;
}

So did for amd iommu.

Thanks

>
> Thanks,
>
> --
> Peter Xu
>

[RESEND PATCH] virtio-pci: fix vector_irqfd leak in virtio_pci_set_guest_notifiers

2022-11-29 Thread leixiang

proxy->vector_irqfd did not free when set guest notifier failed.

Signed-off-by: Lei Xiang 
Tested-by: Zeng Chi 
Suggested-by: Xie Ming 
---
 hw/virtio/virtio-pci.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index c6b47a9c..4862f83b 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1038,6 +1038,12 @@ assign_error:
 while (--n >= 0) {
 virtio_pci_set_guest_notifier(d, n, !assign, with_irqfd);
 }
+
+   g_free(proxy->vector_irqfd);
+   proxy->vector_irqfd = NULL;
+
 return r;
 }
 
-- 


No virus found
Checked by Hillstone Network AntiVirus

[RESEND PATCH] virtio-pci: fix vector_irqfd leak in virtio_pci_set_guest_notifiers

2022-11-29 Thread leixiang

proxy->vector_irqfd did not free when set guest notifier failed.

Signed-off-by: Lei Xiang 
Tested-by: Zeng Chi 
Suggested-by: Xie Ming 
---
 hw/virtio/virtio-pci.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index c6b47a9c..4862f83b 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1038,6 +1038,12 @@ assign_error:
 while (--n >= 0) {
 virtio_pci_set_guest_notifier(d, n, !assign, with_irqfd);
 }
+
+   g_free(proxy->vector_irqfd);
+   proxy->vector_irqfd = NULL;
+
 return r;
 }
 
-- 


No virus found
Checked by Hillstone Network AntiVirus

[PATCH v11 0/2] vhost-vdpa: add support for vIOMMU

2022-11-29 Thread Cindy Lu

These patches are to support vIOMMU in vdpa device

changes in V3
1. Move function vfio_get_xlat_addr to memory.c
2. Use the existing memory listener, while the MR is
iommu MR then call the function iommu_region_add/
iommu_region_del

changes in V4
1.make the comments in vfio_get_xlat_addr more general

changes in V5
1. Address the comments in the last version
2. Add a new arg in the function vfio_get_xlat_addr, which shows whether
the memory is backed by a discard manager. So the device can have its
own warning.

changes in V6
move the error_report for the unpopulated discard back to
memeory_get_xlat_addr

changes in V7
organize the error massage to avoid the duplicate information

changes in V8
Organize the code follow the comments in the last version

changes in V9
Organize the code follow the comments

changes in V10
Address the comments

changes in V11
Address the comments
Fix the crash found in test

Cindy Lu (2):
  vhost-vdpa: Skip the range check while MR is IOMMU
  vhost-vdpa: add support for vIOMMU

 hw/virtio/vhost-vdpa.c | 168 ++---
 include/hw/virtio/vhost-vdpa.h |  10 ++
 2 files changed, 167 insertions(+), 11 deletions(-)

-- 
2.34.3

[PATCH v11 1/2] vhost-vdpa: Skip the range check while MR is IOMMU

2022-11-29 Thread Cindy Lu

Skip the check in vhost_vdpa_listener_skipped_section() while
MR is IOMMU, Move this check to  vhost_vdpa_iommu_map_notify()

Signed-off-by: Cindy Lu 
---
 hw/virtio/vhost-vdpa.c | 21 ++---
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 3ff9ce3501..f0e9963d19 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -60,15 +60,22 @@ static bool 
vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
  iova_min, section->offset_within_address_space);
 return true;
 }
+/*
+ * While using vIOMMU, Sometimes the section will be larger than iova_max
+ * but the memory that  actually mapping is smaller, So skip the check
+ * here. Will add the check in vhost_vdpa_iommu_map_notify,
+ *There is the real size that maps to the kernel
+ */
 
-llend = vhost_vdpa_section_end(section);
-if (int128_gt(llend, int128_make64(iova_max))) {
-error_report("RAM section out of device range (max=0x%" PRIx64
- ", end addr=0x%" PRIx64 ")",
- iova_max, int128_get64(llend));
-return true;
+if (!memory_region_is_iommu(section->mr)) {
+llend = vhost_vdpa_section_end(section);
+if (int128_gt(llend, int128_make64(iova_max))) {
+error_report("RAM section out of device range (max=0x%" PRIx64
+ ", end addr=0x%" PRIx64 ")",
+ iova_max, int128_get64(llend));
+return true;
+}
 }
-
 return false;
 }
 
-- 
2.34.3

[PATCH v11 2/2] vhost-vdpa: add support for vIOMMU

2022-11-29 Thread Cindy Lu

Add support for vIOMMU. add the new function to deal with iommu MR.
- during iommu_region_add register a specific IOMMU notifier,
 and store all notifiers in a list.
- during iommu_region_del, compare and delete the IOMMU notifier from the list

Verified in vp_vdpa and vdpa_sim_net driver

Signed-off-by: Cindy Lu 
---
 hw/virtio/vhost-vdpa.c | 147 -
 include/hw/virtio/vhost-vdpa.h |  10 +++
 2 files changed, 153 insertions(+), 4 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index f0e9963d19..4137094fef 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -26,6 +26,7 @@
 #include "cpu.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "hw/virtio/virtio-access.h"
 
 /*
  * Return one past the end of the end of section. Be careful with uint64_t
@@ -180,6 +181,119 @@ static void vhost_vdpa_listener_commit(MemoryListener 
*listener)
 v->iotlb_batch_begin_sent = false;
 }
 
+static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+struct vdpa_iommu *iommu = container_of(n, struct vdpa_iommu, n);
+
+hwaddr iova = iotlb->iova + iommu->iommu_offset;
+struct vhost_vdpa *v = iommu->dev;
+void *vaddr;
+int ret;
+Int128 llend;
+
+if (iotlb->target_as != _space_memory) {
+error_report("Wrong target AS \"%s\", only system memory is allowed",
+ iotlb->target_as->name ? iotlb->target_as->name : "none");
+return;
+}
+RCU_READ_LOCK_GUARD();
+/* check if RAM section out of device range */
+llend =
+int128_add(int128_makes64(iotlb->addr_mask + 1), int128_makes64(iova));
+if (int128_gt(llend, int128_make64(v->iova_range.last))) {
+error_report("RAM section out of device range (max=0x%" PRIx64
+ ", end addr=0x%" PRIx64 ")",
+ v->iova_range.last, int128_get64(llend));
+return;
+}
+
+vhost_vdpa_iotlb_batch_begin_once(v);
+
+if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
+bool read_only;
+
+if (!memory_get_xlat_addr(iotlb, , NULL, _only, NULL)) {
+return;
+}
+
+
+ret =
+vhost_vdpa_dma_map(v, iova, iotlb->addr_mask + 1, vaddr, 
read_only);
+if (ret) {
+error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", "
+ "0x%" HWADDR_PRIx ", %p) = %d (%m)",
+ v, iova, iotlb->addr_mask + 1, vaddr, ret);
+}
+} else {
+ret = vhost_vdpa_dma_unmap(v, iova, iotlb->addr_mask + 1);
+if (ret) {
+error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", "
+ "0x%" HWADDR_PRIx ") = %d (%m)",
+ v, iova, iotlb->addr_mask + 1, ret);
+}
+}
+}
+
+static void vhost_vdpa_iommu_region_add(MemoryListener *listener,
+MemoryRegionSection *section)
+{
+struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
+
+struct vdpa_iommu *iommu;
+Int128 end;
+int iommu_idx;
+IOMMUMemoryRegion *iommu_mr;
+int ret;
+
+iommu_mr = IOMMU_MEMORY_REGION(section->mr);
+
+iommu = g_malloc0(sizeof(*iommu));
+end = int128_add(int128_make64(section->offset_within_region),
+section->size);
+end = int128_sub(end, int128_one());
+iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
+MEMTXATTRS_UNSPECIFIED);
+
+iommu->iommu_mr = iommu_mr;
+
+iommu_notifier_init(
+>n, vhost_vdpa_iommu_map_notify, IOMMU_NOTIFIER_IOTLB_EVENTS,
+section->offset_within_region, int128_get64(end), iommu_idx);
+iommu->iommu_offset =
+section->offset_within_address_space - section->offset_within_region;
+iommu->dev = v;
+
+ret = memory_region_register_iommu_notifier(section->mr, >n, NULL);
+if (ret) {
+g_free(iommu);
+return;
+}
+
+QLIST_INSERT_HEAD(>iommu_list, iommu, iommu_next);
+memory_region_iommu_replay(iommu->iommu_mr, >n);
+
+return;
+}
+
+static void vhost_vdpa_iommu_region_del(MemoryListener *listener,
+MemoryRegionSection *section)
+{
+struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener);
+
+struct vdpa_iommu *iommu;
+
+QLIST_FOREACH(iommu, >iommu_list, iommu_next)
+{
+if (MEMORY_REGION(iommu->iommu_mr) == section->mr &&
+iommu->n.start == section->offset_within_region) {
+memory_region_unregister_iommu_notifier(section->mr, >n);
+QLIST_REMOVE(iommu, iommu_next);
+g_free(iommu);
+break;
+}
+}
+}
+
 static void vhost_vdpa_listener_region_add(MemoryListener *listener,
MemoryRegionSection *section)
 {
@@ -193,6 +307,10 @@ static void vhost_vdpa_listener_region_add(MemoryListener 
*listener,

RE: [PATCH for-8.0 05/19] target/hexagon: Convert to 3-phase reset

2022-11-29 Thread Taylor Simpson




> -Original Message-
> From: Peter Maydell 
> Sent: Thursday, November 24, 2022 5:50 AM
> To: qemu-devel@nongnu.org
> Cc: Peter Maydell ; Michael Rolnik
> ; Edgar E. Iglesias ; Taylor
> Simpson ; Song Gao ;
> Xiaojuan Yang ; Laurent Vivier
> ; Philippe Mathieu-Daudé ;
> Aurelien Jarno ; Jiaxun Yang
> ; Aleksandar Rikalo
> ; Chris Wulff ; Marek
> Vasut ; Stafford Horne ; Daniel
> Henrique Barboza ; Cédric Le Goater
> ; David Gibson ; Greg Kurz
> ; Palmer Dabbelt ; Alistair Francis
> ; Bin Meng ;
> Yoshinori Sato ; Mark Cave-Ayland
> ; Artyom Tarasenko
> ; Bastian Koppelmann  paderborn.de>; Max Filippov ; qemu-
> a...@nongnu.org; qemu-...@nongnu.org; qemu-ri...@nongnu.org
> Subject: [PATCH for-8.0 05/19] target/hexagon: Convert to 3-phase reset
> 
> Convert the hexagon CPU class to use 3-phase reset, so it doesn't need to
> use device_class_set_parent_reset() any more.
> 
> Signed-off-by: Peter Maydell 
> ---
>  target/hexagon/cpu.h |  2 +-
>  target/hexagon/cpu.c | 12 
>  2 files changed, 9 insertions(+), 5 deletions(-)
> 

Reviewed-by: Taylor Simpson

Re: [PATCH 0/2] Implement something

2022-11-29 Thread Evgeny Ermakov

Evgeny Ermakov  writes:

Oops, sorry. Ignore this, I sent it by mistake.

> Hello!
>
> Evgeny Ermakov (2):
>   hw/char: Add STM32F7 peripheral: USART
>   hw/input: Add FT5336 touch controller
>
>  include/hw/char/stm32f7xx_usart.h |  30 +++
>  include/hw/input/ft5336.h |  14 ++
>  hw/char/stm32f7xx_usart.c | 361 ++
>  hw/input/ft5336.c | 357 +
>  hw/arm/Kconfig|   1 +
>  hw/char/Kconfig   |   3 +
>  hw/char/meson.build   |   1 +
>  hw/char/trace-events  |   4 +
>  hw/input/Kconfig  |   4 +
>  hw/input/meson.build  |   2 +
>  10 files changed, 777 insertions(+)
>  create mode 100644 include/hw/char/stm32f7xx_usart.h
>  create mode 100644 include/hw/input/ft5336.h
>  create mode 100644 hw/char/stm32f7xx_usart.c
>  create mode 100644 hw/input/ft5336.c
>
> -- 
> 2.38.1

[PATCH 1/2] hw/char: Add STM32F7 peripheral: USART

2022-11-29 Thread Evgeny Ermakov

Signed-off-by: Evgeny Ermakov 
---
 include/hw/char/stm32f7xx_usart.h |  30 +++
 hw/char/stm32f7xx_usart.c | 361 ++
 hw/arm/Kconfig|   1 +
 hw/char/Kconfig   |   3 +
 hw/char/meson.build   |   1 +
 hw/char/trace-events  |   4 +
 6 files changed, 400 insertions(+)
 create mode 100644 include/hw/char/stm32f7xx_usart.h
 create mode 100644 hw/char/stm32f7xx_usart.c

diff --git a/include/hw/char/stm32f7xx_usart.h 
b/include/hw/char/stm32f7xx_usart.h
new file mode 100644
index 00..ec005be8d8
--- /dev/null
+++ b/include/hw/char/stm32f7xx_usart.h
@@ -0,0 +1,30 @@
+/*
+ * STM32F7XX Universal synchronous/asynchronous receiver transmitter (USART)
+ *
+ * Copyright (c) 2022 Evgeny Ermakov 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_CHAR_STM32F7XX_USART_H
+#define HW_CHAR_STM32F7XX_USART_H
+
+#include "hw/arm/stm32f.h"
+#include "chardev/char-fe.h"
+
+#define TYPE_STM32F7XX_USART "stm32f7xx-usart"
+OBJECT_DECLARE_SIMPLE_TYPE(STM32F7XXUSARTState, STM32F7XX_USART)
+
+#define STM32F7XX_USART_R_MAX 11
+
+struct STM32F7XXUSARTState {
+/*< private >*/
+STM32FPeripheralState parent_obj;
+
+uint32_t regs[STM32F7XX_USART_R_MAX];
+
+CharBackend chr;
+qemu_irq irq;
+};
+
+#endif /* HW_CHAR_STM32F7XX_USART_H */
diff --git a/hw/char/stm32f7xx_usart.c b/hw/char/stm32f7xx_usart.c
new file mode 100644
index 00..122781705a
--- /dev/null
+++ b/hw/char/stm32f7xx_usart.c
@@ -0,0 +1,361 @@
+/*
+ * STM32F7XX Universal synchronous/asynchronous receiver transmitter (USART)
+ *
+ * Reference documents:
+ *   - Reference manual RM0385
+ *   "STM32F75xxx and stm32f74xxx advanced Arm(R)-based 32-bit MCUs"
+ *   - Reference manual RM0410
+ *   "STM32F76xxx and STM32F77xxx advanced Arm(R)-based 32-bit MCUs"
+ *
+ * Copyright (c) 2022 Evgeny Ermakov 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/char/stm32f7xx_usart.h"
+#include "hw/irq.h"
+#include "hw/qdev-properties-system.h"
+#include "hw/registerfields.h"
+#include "migration/vmstate.h"
+#include "qemu/log.h"
+#include "trace.h"
+
+#ifndef STM_USART_ERR_DEBUG
+#define STM_USART_ERR_DEBUG 0
+#endif
+
+#define DB_PRINT_L(lvl, fmt, args...)   \
+do {\
+if (STM_USART_ERR_DEBUG >= lvl) {   \
+qemu_log("%s: " fmt, __func__, ## args);\
+}   \
+} while (0)
+
+#define DB_PRINT(fmt, args...) DB_PRINT_L(1, fmt, ## args)
+
+REG32(CR1, 0x00)
+/* reserved: 31:29, 1 */
+FIELD(CR1,M1, 28, 1)
+FIELD(CR1,EOBIE,  27, 1)
+FIELD(CR1,RTOIE,  26, 1)
+FIELD(CR1,DEAT,   21, 5)
+FIELD(CR1,DEDT,   16, 5)
+FIELD(CR1,OVER8,  15, 1)
+FIELD(CR1,CMIE,   14, 1)
+FIELD(CR1,MME,13, 1)
+FIELD(CR1,M0, 12, 1)
+FIELD(CR1,WAKE,   11, 1)
+FIELD(CR1,PCE,10, 1)
+FIELD(CR1,PS,  9, 1)
+FIELD(CR1,PEIE,8, 1)
+FIELD(CR1,TXEIE,   7, 1)
+FIELD(CR1,TCIE,6, 1)
+FIELD(CR1,RXNEIE,  5, 1)
+FIELD(CR1,IDLEIE,  4, 1)
+FIELD(CR1,TE,  3, 1)
+FIELD(CR1,RE,  2, 1)
+FIELD(CR1,UE,  0, 1)
+REG32(CR2, 0x04)
+/* reserved: 7, 3:0 */
+FIELD(CR2,ADD,24, 8)
+FIELD(CR2,RTOEN,  23, 1)
+FIELD(CR2,ABRMOD, 21, 2)
+FIELD(CR2,ABREN,  20, 1)
+FIELD(CR2,MSBFIRST,   19, 1)
+FIELD(CR2,DATAINV,18, 1)
+FIELD(CR2,TXINV,  17, 1)
+FIELD(CR2,RXINV,  16, 1)
+FIELD(CR2,SWAP,   15, 1)
+FIELD(CR2,LINEN,  14, 1)
+FIELD(CR2,STOP,   12, 2)
+FIELD(CR2,CLKEN,  11, 1)
+FIELD(CR2,CPOL,   10, 1)
+FIELD(CR2,CPHA,9, 1)
+FIELD(CR2,LBCL,8, 1)
+FIELD(CR2,LBDIE,   6, 1)
+FIELD(CR2,LBDL,5, 1)
+FIELD(CR2,ADDM7,   4, 1)
+REG32(CR3, 0x08)
+/* reserved: 31:25, 16 */
+FIELD(CR3,TCBGTIE,24, 1)
+FIELD(CR3,UCESM,  23, 1)
+FIELD(CR3,WUFIE,  22, 1)
+FIELD(CR3,WUS,20, 2)
+FIELD(CR3,SCARCNT,17, 3)
+FIELD(CR3,DEP,15, 1)
+FIELD(CR3,DEM,14, 1)
+FIELD(CR3,DDRE,   13, 1)
+FIELD(CR3,OVRDIS, 12, 1)
+FIELD(CR3,ONEBIT,

[PATCH 0/2] Implement something

2022-11-29 Thread Evgeny Ermakov

Hello!

Evgeny Ermakov (2):
  hw/char: Add STM32F7 peripheral: USART
  hw/input: Add FT5336 touch controller

 include/hw/char/stm32f7xx_usart.h |  30 +++
 include/hw/input/ft5336.h |  14 ++
 hw/char/stm32f7xx_usart.c | 361 ++
 hw/input/ft5336.c | 357 +
 hw/arm/Kconfig|   1 +
 hw/char/Kconfig   |   3 +
 hw/char/meson.build   |   1 +
 hw/char/trace-events  |   4 +
 hw/input/Kconfig  |   4 +
 hw/input/meson.build  |   2 +
 10 files changed, 777 insertions(+)
 create mode 100644 include/hw/char/stm32f7xx_usart.h
 create mode 100644 include/hw/input/ft5336.h
 create mode 100644 hw/char/stm32f7xx_usart.c
 create mode 100644 hw/input/ft5336.c

-- 
2.38.1

[PATCH 2/2] hw/input: Add FT5336 touch controller

2022-11-29 Thread Evgeny Ermakov

Signed-off-by: Evgeny Ermakov 
---
 include/hw/input/ft5336.h |  14 ++
 hw/input/ft5336.c | 357 ++
 hw/input/Kconfig  |   4 +
 hw/input/meson.build  |   2 +
 4 files changed, 377 insertions(+)
 create mode 100644 include/hw/input/ft5336.h
 create mode 100644 hw/input/ft5336.c

diff --git a/include/hw/input/ft5336.h b/include/hw/input/ft5336.h
new file mode 100644
index 00..7bef3f9efb
--- /dev/null
+++ b/include/hw/input/ft5336.h
@@ -0,0 +1,14 @@
+/*
+ * FT5336 touch controller
+ *
+ * Copyright (c) 2022 Evgeny Ermakov 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_INPUT_FT5336_H
+#define HW_INPUT_FT5336_H
+
+#define TYPE_FT5336 "ft5336"
+
+#endif
diff --git a/hw/input/ft5336.c b/hw/input/ft5336.c
new file mode 100644
index 00..bacf79201a
--- /dev/null
+++ b/hw/input/ft5336.c
@@ -0,0 +1,357 @@
+/*
+ * FT5336 touch controller
+ *
+ * Copyright (c) 2022 Evgeny Ermakov 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/input/ft5336.h"
+#include "hw/i2c/i2c.h"
+#include "hw/irq.h"
+#include "migration/vmstate.h"
+#include "qemu/module.h"
+#include "qemu/log.h"
+#include "qemu/timer.h"
+#include "ui/input.h"
+#include "qom/object.h"
+
+OBJECT_DECLARE_SIMPLE_TYPE(FT5336TouchState, FT5336)
+
+struct FT5336TouchState {
+I2CSlave parent_obj;
+
+uint8_t i2c_cycle;
+uint8_t reg;
+
+qemu_irq irq;
+
+int32_t abs_x;
+int32_t abs_y;
+uint16_t touch_x;
+uint16_t touch_y;
+bool touch_press;
+
+bool inte;
+};
+
+/* I2C Slave address of touchscreen FocalTech FT5336 */
+#define FT5336_I2C_SLAVE_ADDRESS0x70
+
+/* Maximum border values of the touchscreen pad */
+#define FT5336_MAX_WIDTH((uint16_t)480) /* Touchscreen 
pad max width   */
+#define FT5336_MAX_HEIGHT   ((uint16_t)272) /* Touchscreen 
pad max height  */
+
+/* Max detectable simultaneous touches */
+#define FT5336_MAX_DETECTABLE_TOUCH 0x05
+
+
+enum {
+FT5336_P_XH   = 0x00,
+FT5336_P_XL   = 0x01,
+FT5336_P_YH   = 0x02,
+FT5336_P_YL   = 0x03,
+   /* Values Pn_XH and Pn_YH related */
+#define FT5336_TOUCH_EVT_FLAG_PRESS_DOWN0x00
+#define FT5336_TOUCH_EVT_FLAG_LIFT_UP   0x01
+#define FT5336_TOUCH_EVT_FLAG_CONTACT   0x02
+#define FT5336_TOUCH_EVT_FLAG_NO_EVENT  0x03
+
+FT5336_P_WEIGHT   = 0x04,
+/* Values Pn_WEIGHT related  */
+#define FT5336_TOUCH_WEIGHT_MASK0xFF
+#define FT5336_TOUCH_WEIGHT_SHIFT   0x00
+
+FT5336_P_MISC = 0x05
+/* Values related to FT5336_Pn_MISC_REG */
+#define FT5336_TOUCH_AREA_MASK  (0x04 << 4))
+#define FT5336_TOUCH_AREA_SHIFT 0x04
+};
+
+enum {
+FT5336_R_MODE   = 0x00,
+#define FT5336_DEV_MODE_WORKING 0x00
+#define FT5336_DEV_MODE_FACTORY 0x04
+
+#define FT5336_DEV_MODE_MASK0x07
+#define FT5336_DEV_MODE_SHIFT   0x04
+
+FT5336_R_GEST_ID= 0x01,
+#define FT5336_GEST_ID_NO_GESTURE   0x00
+#define FT5336_GEST_ID_MOVE_UP  0x10
+#define FT5336_GEST_ID_MOVE_RIGHT   0x14
+#define FT5336_GEST_ID_MOVE_DOWN0x18
+#define FT5336_GEST_ID_MOVE_LEFT0x1C
+#define FT5336_GEST_ID_SINGLE_CLICK 0x20
+#define FT5336_GEST_ID_DOUBLE_CLICK 0x22
+#define FT5336_GEST_ID_ROTATE_CLOCKWISE 0x28
+#define FT5336_GEST_ID_ROTATE_C_CLOCKWISE   0x29
+#define FT5336_GEST_ID_ZOOM_IN  0x40
+#define FT5336_GEST_ID_ZOOM_OUT 0x49
+
+FT5336_R_STAT   = 0x02,
+#define FT5336_TD_STAT_MASK 0x0F
+#define FT5336_TD_STAT_SHIFT0x00
+
+FT5336_R_P1_BASE= 0x03,
+FT5336_R_P2_BASE= 0x09,
+FT5336_R_P3_BASE= 0x0f,
+FT5336_R_P4_BASE= 0x15,
+FT5336_R_P5_BASE= 0x1b,
+FT5336_R_P6_BASE= 0x21,
+FT5336_R_P7_BASE= 0x27,
+FT5336_R_P8_BASE= 0x2d,
+FT5336_R_P9_BASE= 0x33,
+FT5336_R_P10_BASE   = 0x39,
+
+#define FT5336_TOUCH_EVT_FLAG_SHIFT 0x06
+#define FT5336_TOUCH_EVT_FLAG_MASK  (3 << FT5336_TOUCH_EVT_FLAG_SHIFT))
+
+#define FT5336_TOUCH_POS_MSB_MASK   0x0F
+#define FT5336_TOUCH_POS_MSB_SHIFT  0x00
+
+/* Values Pn_XL and Pn_YL related */
+#define FT5336_TOUCH_POS_LSB_MASK   0xFF
+#define FT5336_TOUCH_POS_LSB_SHIFT  0x00
+
+FT5336_R_TH_GROUP   = 0x80,
+/* Values FT5336_TH_GROUP_REG : threshold related  */
+#define FT5336_THRESHOLD_MASK   0xFF
+#define FT5336_THRESHOLD_SHIFT  0x00
+
+FT5336_R_TH_DIFF= 0x85,
+
+FT5336_R_CTRL   = 0x86,
+/* Values related to FT5336_CTRL_REG */
+
+/* Will keep the Active mode when

Re: [PATCH v2 03/11] kvm-all: Do not allow reap vcpu dirty ring buffer if not ready

2022-11-29 Thread Hyman Huang





在 2022/11/30 6:42, Peter Xu 写道:

Hi, Yong,

On Mon, Nov 21, 2022 at 11:26:35AM -0500, huang...@chinatelecom.cn wrote:

From: Hyman Huang(黄勇) 

When tested large vcpu size vm with dirtylimit feature, Qemu crashed
due to the assertion in kvm_dirty_ring_reap_one, which assert that
vcpu's kvm_dirty_gfns has been allocated and not NULL.

Because dirty ring reaper thread races with Qemu main thread, reaper
may reap vcpu's dirty ring buffer when main thread doesn't complete
vcpu instantiation. So add the waiting logic in reaper thread and
start to reap until vcpu instantiation is completed.

Signed-off-by: Hyman Huang(黄勇) 
---
  accel/kvm/kvm-all.c | 36 
  1 file changed, 36 insertions(+)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f99b0be..9457715 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1401,6 +1401,35 @@ out:
  kvm_slots_unlock();
  }
  
+/*

+ * test if dirty ring has been initialized by checking if vcpu
+ * has been initialized and gfns was allocated correspondlingly.
+ * return true if dirty ring has been initialized, false otherwise.
+ */
+static bool kvm_vcpu_dirty_ring_initialized(void)
+{
+CPUState *cpu;
+MachineState *ms = MACHINE(qdev_get_machine());
+int ncpus = ms->smp.cpus;
+
+/*
+ * assume vcpu has not been initilaized if generation
+ * id less than number of vcpu
+ */
+if (ncpus > cpu_list_generation_id_get()) {
+return false;
+}
+
+CPU_FOREACH(cpu) {
+if (!cpu->kvm_dirty_gfns) {
+return false;
+}
+}
+
+return true;
+}
+
+
  static void *kvm_dirty_ring_reaper_thread(void *data)
  {
  KVMState *s = data;
@@ -1410,6 +1439,13 @@ static void *kvm_dirty_ring_reaper_thread(void *data)
  
  trace_kvm_dirty_ring_reaper("init");
  
+retry:

+/* don't allow reaping dirty ring if ring buffer hasn't been mapped */
+if (!kvm_vcpu_dirty_ring_initialized()) {
+sleep(1);


The sleep here is probably not necessary.  Could you instead have a look at
the other much simpler patch?  
Of course yes, this patch is much more graceful, i'll cherry pick it 
next version.


Here:


https://lore.kernel.org/qemu-devel/20220927154653.77296-1-pet...@redhat.com/


+goto retry;
+}
+
  while (true) {
  r->reaper_state = KVM_DIRTY_RING_REAPER_WAIT;
  trace_kvm_dirty_ring_reaper("wait");
--
1.8.3.1






--
Best regard

Hyman Huang(黄勇)

regression: insmod module failed in VM with nvdimm on

2022-11-29 Thread chenxiang (M)


Hi,

We boot the VM using following commands (with nvdimm on)  (qemu version 
6.1.50, kernel 6.0-r4):


qemu-system-aarch64 -machine 
virt,kernel_irqchip=on,gic-version=3,nvdimm=on  -kernel 
/home/kernel/Image -initrd /home/mini-rootfs/rootfs.cpio.gz -bios 
/root/QEMU_EFI.FD -cpu host -enable-kvm -net none -nographic -m 
2G,maxmem=64G,slots=3 -smp 4 -append 'rdinit=init console=ttyAMA0 
ealycon=pl0ll,0x9000 pcie_ports=native pciehp.pciehp_debug=1' 
-object memory-backend-ram,id=ram1,size=10G -device 
nvdimm,id=dimm1,memdev=ram1  -device ioh3420,id=root_port1,chassis=1 
-device vfio-pci,host=7d:01.0,id=net0,bus=root_port1


Then in VM we insmod a module, vmalloc error occurs as follows (kernel 
5.19-rc4 is normal, and the issue is still on kernel 6.1-rc4):


estuary:/$ insmod /lib/modules/$(uname -r)/hnae3.ko
[8.186563] vmap allocation for size 20480 failed: use vmalloc= 
to increase size
[8.187288] insmod: vmalloc error: size 16384, vm_struct allocation 
failed, mode:0xcc0(GFP_KERNEL), nodemask=(null),cpuset=/,mems_allowed=0

[8.188402] CPU: 1 PID: 235 Comm: insmod Not tainted 6.0.0-rc4+ #1
[8.188958] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 
02/06/2015

[8.189593] Call trace:
[8.189825]  dump_backtrace.part.0+0xc4/0xd0
[8.190245]  show_stack+0x24/0x40
[8.190563]  dump_stack_lvl+0x68/0x84
[8.190913]  dump_stack+0x18/0x34
[8.191223]  warn_alloc+0x124/0x1b0
[8.191555]  __vmalloc_node_range+0xe4/0x55c
[8.191959]  module_alloc+0xf8/0x104
[8.192305]  load_module+0x854/0x1e70
[8.192655]  __do_sys_init_module+0x1e0/0x220
[8.193075]  __arm64_sys_init_module+0x28/0x34
[8.193489]  invoke_syscall+0x50/0x120
[8.193841]  el0_svc_common.constprop.0+0x58/0x1a0
[8.194296]  do_el0_svc+0x38/0xd0
[8.194613]  el0_svc+0x2c/0xc0
[8.194901]  el0t_64_sync_handler+0x1ac/0x1b0
[8.195313]  el0t_64_sync+0x19c/0x1a0
[8.195672] Mem-Info:
[8.195872] active_anon:17641 inactive_anon:118549 isolated_anon:0
[8.195872]  active_file:0 inactive_file:0 isolated_file:0
[8.195872]  unevictable:0 dirty:0 writeback:0
[8.195872]  slab_reclaimable:3439 slab_unreclaimable:3067
[8.195872]  mapped:877 shmem:135976 pagetables:39 bounce:0
[8.195872]  kernel_misc_reclaimable:0
[8.195872]  free:353735 free_pcp:3210 free_cma:0
[8.199119] Node 0 active_anon:70564kB inactive_anon:474196kB 
active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB 
isolated(file):0kB mapped:3508kB dirty:0kB writeback:0kB shmem:543904kB 
shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 0kB writeback_tmp:0kB 
kernel_stack:1904kB pagetables:156kB all_unreclaimable? no
[8.201683] Node 0 DMA free:1414940kB boost:0kB min:22528kB 
low:28160kB high:33792kB reserved_highatomic:0KB active_anon:70564kB 
inactive_anon:474196kB active_file:0kB inactive_file:0kB unevictable:0kB 
writepending:0kB present:2097152kB managed:2010444kB mlocked:0kB 
bounce:0kB free_pcp:12840kB local_pcp:2032kB free_cma:0kB

[8.204158] lowmem_reserve[]: 0 0 0 0
[8.204481] Node 0 DMA: 1*4kB (E) 1*8kB (U) 1*16kB (U) 2*32kB (UM) 
1*64kB (U) 1*128kB (U) 2*256kB (ME) 2*512kB (ME) 2*1024kB (M) 3*2048kB 
(UM) 343*4096kB (M) = 1414940kB
[8.205881] Node 0 hugepages_total=0 hugepages_free=0 
hugepages_surp=0 hugepages_size=1048576kB
[8.206644] Node 0 hugepages_total=0 hugepages_free=0 
hugepages_surp=0 hugepages_size=32768kB
[8.207381] Node 0 hugepages_total=0 hugepages_free=0 
hugepages_surp=0 hugepages_size=2048kB
[8.208111] Node 0 hugepages_total=0 hugepages_free=0 
hugepages_surp=0 hugepages_size=64kB

[8.208826] 135976 total pagecache pages
[8.209195] 0 pages in swap cache
[8.209484] Free swap  = 0kB
[8.209733] Total swap = 0kB
[8.209989] 524288 pages RAM
[8.210239] 0 pages HighMem/MovableOnly
[8.210571] 21677 pages reserved
[8.210852] 0 pages hwpoisoned
insmod: can't insert '/lib/modules/6.0.0-rc4+/hnae3.ko': Cannot allocate 
memory


We git bisect the code, and find the patch c5a89f75d2a ("arm64: kaslr: 
defer initialization to initcall where permitted").


Do you have any idea about the issue?


Best Regards,

Xiang Chen

Re: [PATCH v13 0/8] Add support for zoned device

2022-11-29 Thread Sam Li

Stefan Hajnoczi  于2022年11月30日周三 10:04写道：
>
> On Thu, 27 Oct 2022 at 11:46, Sam Li  wrote:
> > v13:
> > - add some tracing points for new zone APIs [Dmitry]
> > - change error handling in zone_mgmt [Damien, Stefan]
>
> Hi Sam,
> This looks very close! I sent comments.

That's great! I'll fix them.

Sam

Re: [PATCH v13 3/8] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls

2022-11-29 Thread Sam Li

Stefan Hajnoczi  于2022年11月30日周三 10:01写道：
>
> On Thu, 27 Oct 2022 at 11:46, Sam Li  wrote:
> >
> > Add a new zoned_host_device BlockDriver. The zoned_host_device option
> > accepts only zoned host block devices. By adding zone management
> > operations in this new BlockDriver, users can use the new block
> > layer APIs including Report Zone and four zone management operations
> > (open, close, finish, reset, reset_all).
> >
> > Qemu-io uses the new APIs to perform zoned storage commands of the device:
> > zone_report(zrp), zone_open(zo), zone_close(zc), zone_reset(zrs),
> > zone_finish(zf).
> >
> > For example, to test zone_report, use following command:
> > $ ./build/qemu-io --image-opts -n driver=zoned_host_device, 
> > filename=/dev/nullb0
> > -c "zrp offset nr_zones"
> >
> > Signed-off-by: Sam Li 
> > Reviewed-by: Hannes Reinecke 
> > ---
> >  block/block-backend.c | 147 +
> >  block/file-posix.c| 348 ++
> >  block/io.c|  41 
> >  include/block/block-io.h  |   7 +
> >  include/block/block_int-common.h  |  21 ++
> >  include/block/raw-aio.h   |   6 +-
> >  include/sysemu/block-backend-io.h |  18 ++
> >  meson.build   |   4 +
> >  qapi/block-core.json  |   8 +-
> >  qemu-io-cmds.c| 149 +
> >  10 files changed, 746 insertions(+), 3 deletions(-)
> >
> > diff --git a/block/block-backend.c b/block/block-backend.c
> > index aa4adf06ae..731f23e816 100644
> > --- a/block/block-backend.c
> > +++ b/block/block-backend.c
> > @@ -1431,6 +1431,15 @@ typedef struct BlkRwCo {
> >  void *iobuf;
> >  int ret;
> >  BdrvRequestFlags flags;
> > +union {
> > +struct {
> > +unsigned int *nr_zones;
> > +BlockZoneDescriptor *zones;
> > +} zone_report;
> > +struct {
> > +unsigned long op;
> > +} zone_mgmt;
> > +};
> >  } BlkRwCo;
> >
> >  int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
> > @@ -1775,6 +1784,144 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
> >  return ret;
> >  }
> >
> > +static void coroutine_fn blk_aio_zone_report_entry(void *opaque)
> > +{
> > +BlkAioEmAIOCB *acb = opaque;
> > +BlkRwCo *rwco = >rwco;
> > +
> > +rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
> > +   rwco->zone_report.nr_zones,
> > +   rwco->zone_report.zones);
> > +blk_aio_complete(acb);
> > +}
> > +
> > +BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
> > +unsigned int *nr_zones,
> > +BlockZoneDescriptor  *zones,
> > +BlockCompletionFunc *cb, void *opaque)
> > +{
> > +BlkAioEmAIOCB *acb;
> > +Coroutine *co;
> > +IO_CODE();
> > +
> > +blk_inc_in_flight(blk);
> > +acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque);
> > +acb->rwco = (BlkRwCo) {
> > +.blk= blk,
> > +.offset = offset,
> > +.ret= NOT_DONE,
> > +.zone_report = {
> > +.zones = zones,
> > +.nr_zones = nr_zones,
> > +},
> > +};
> > +acb->has_returned = false;
> > +
> > +co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
> > +bdrv_coroutine_enter(blk_bs(blk), co);
> > +
> > +acb->has_returned = true;
> > +if (acb->rwco.ret != NOT_DONE) {
> > +replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
> > + blk_aio_complete_bh, acb);
> > +}
> > +
> > +return >common;
> > +}
> > +
> > +static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque)
> > +{
> > +BlkAioEmAIOCB *acb = opaque;
> > +BlkRwCo *rwco = >rwco;
> > +
> > +rwco->ret = blk_co_zone_mgmt(rwco->blk, rwco->zone_mgmt.op,
> > + rwco->offset, acb->bytes);
> > +blk_aio_complete(acb);
> > +}
> > +
> > +BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
> > +  int64_t offset, int64_t len,
> > +  BlockCompletionFunc *cb, void *opaque) {
> > +BlkAioEmAIOCB *acb;
> > +Coroutine *co;
> > +IO_CODE();
> > +
> > +blk_inc_in_flight(blk);
> > +acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque);
> > +acb->rwco = (BlkRwCo) {
> > +.blk= blk,
> > +.offset = offset,
> > +.ret= NOT_DONE,
> > +.zone_mgmt = {
> > +.op = op,
> > +},
> > +};
> > +acb->bytes = len;
> > +acb->has_returned = false;
> > +
> > +co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
> > +bdrv_coroutine_enter(blk_bs(blk), co);
> > +
> > +acb->has_returned = true;
> > +if (acb->rwco.ret != NOT_DONE) {
> > +replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
>

[PATCH v1 3/3] contrib/elf2dmp: add PE name check and Windows Server 2022 support

2022-11-29 Thread Viktor Prutyanov

Since its inception elf2dmp has checked MZ signatures within an
address space above IDT[0] interrupt vector and took first PE image
found as Windows Kernel.
But in Windows Server 2022 memory dump this address space range is
full of invalid PE fragments and the tool must check that PE image
is 'ntoskrnl.exe' actually.
So, introduce additional validation by checking image name from
Export Directory against 'ntoskrnl.exe'.

Signed-off-by: Viktor Prutyanov 
Tested-by: Yuri Benditovich 
---
 contrib/elf2dmp/main.c | 28 ++--
 contrib/elf2dmp/pe.h   | 15 +++
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c
index f3052b3c64..f7de82a03e 100644
--- a/contrib/elf2dmp/main.c
+++ b/contrib/elf2dmp/main.c
@@ -17,6 +17,7 @@
 
 #define SYM_URL_BASE"https://msdl.microsoft.com/download/symbols/;
 #define PDB_NAME"ntkrnlmp.pdb"
+#define PE_NAME "ntoskrnl.exe"
 
 #define INITIAL_MXCSR   0x1f80
 
@@ -400,6 +401,25 @@ static int write_dump(struct pa_space *ps,
 return fclose(dmp_file);
 }
 
+static bool pe_check_export_name(uint64_t base, void *start_addr,
+struct va_space *vs)
+{
+IMAGE_EXPORT_DIRECTORY export_dir;
+const char *pe_name;
+
+if (pe_get_data_dir_entry(base, start_addr, IMAGE_FILE_EXPORT_DIRECTORY,
+_dir, sizeof(export_dir), vs)) {
+return false;
+}
+
+pe_name = va_space_resolve(vs, base + export_dir.Name);
+if (!pe_name) {
+return false;
+}
+
+return !strcmp(pe_name, PE_NAME);
+}
+
 static int pe_get_pdb_symstore_hash(uint64_t base, void *start_addr,
 char *hash, struct va_space *vs)
 {
@@ -484,6 +504,7 @@ int main(int argc, char *argv[])
 uint64_t KdDebuggerDataBlock;
 KDDEBUGGER_DATA64 *kdbg;
 uint64_t KdVersionBlock;
+bool kernel_found = false;
 
 if (argc != 3) {
 eprintf("usage:\n\t%s elf_file dmp_file\n", argv[0]);
@@ -531,11 +552,14 @@ int main(int argc, char *argv[])
 }
 
 if (*(uint16_t *)nt_start_addr == 0x5a4d) { /* MZ */
-break;
+if (pe_check_export_name(KernBase, nt_start_addr, )) {
+kernel_found = true;
+break;
+}
 }
 }
 
-if (!nt_start_addr) {
+if (!kernel_found) {
 eprintf("Failed to find NT kernel image\n");
 err = 1;
 goto out_ps;
diff --git a/contrib/elf2dmp/pe.h b/contrib/elf2dmp/pe.h
index 807d006364..71126af1ac 100644
--- a/contrib/elf2dmp/pe.h
+++ b/contrib/elf2dmp/pe.h
@@ -88,6 +88,20 @@ typedef struct IMAGE_NT_HEADERS64 {
 IMAGE_OPTIONAL_HEADER64 OptionalHeader;
 } __attribute__ ((packed)) IMAGE_NT_HEADERS64;
 
+typedef struct IMAGE_EXPORT_DIRECTORY {
+uint32_tCharacteristics;
+uint32_tTimeDateStamp;
+uint16_tMajorVersion;
+uint16_tMinorVersion;
+uint32_tName;
+uint32_tBase;
+uint32_tNumberOfFunctions;
+uint32_tNumberOfNames;
+uint32_tAddressOfFunctions;
+uint32_tAddressOfNames;
+uint32_tAddressOfNameOrdinals;
+} __attribute__ ((packed)) IMAGE_EXPORT_DIRECTORY;
+
 typedef struct IMAGE_DEBUG_DIRECTORY {
 uint32_t Characteristics;
 uint32_t TimeDateStamp;
@@ -102,6 +116,7 @@ typedef struct IMAGE_DEBUG_DIRECTORY {
 #define IMAGE_DEBUG_TYPE_CODEVIEW   2
 #endif
 
+#define IMAGE_FILE_EXPORT_DIRECTORY 0
 #define IMAGE_FILE_DEBUG_DIRECTORY  6
 
 typedef struct guid_t {
-- 
2.35.1

[PATCH v1 1/3] contrib/elf2dmp: fix code style

2022-11-29 Thread Viktor Prutyanov

Originally elf2dmp were added with some code style issues,
especially in pe.h header, and some were introduced by
2d0fc797faaa73fbc1d30f5f9e90407bf3dd93f0. Fix them now.

Signed-off-by: Viktor Prutyanov 
---
 contrib/elf2dmp/addrspace.c |   1 +
 contrib/elf2dmp/main.c  |   9 ++--
 contrib/elf2dmp/pe.h| 100 ++--
 3 files changed, 57 insertions(+), 53 deletions(-)

diff --git a/contrib/elf2dmp/addrspace.c b/contrib/elf2dmp/addrspace.c
index 53ded17061..0b04cba00e 100644
--- a/contrib/elf2dmp/addrspace.c
+++ b/contrib/elf2dmp/addrspace.c
@@ -11,6 +11,7 @@
 static struct pa_block *pa_space_find_block(struct pa_space *ps, uint64_t pa)
 {
 size_t i;
+
 for (i = 0; i < ps->block_nr; i++) {
 if (ps->block[i].paddr <= pa &&
 pa <= ps->block[i].paddr + ps->block[i].size) {
diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c
index d77b8f98f7..9224764239 100644
--- a/contrib/elf2dmp/main.c
+++ b/contrib/elf2dmp/main.c
@@ -282,14 +282,16 @@ static int fill_header(WinDumpHeader64 *hdr, struct 
pa_space *ps,
 };
 
 for (i = 0; i < ps->block_nr; i++) {
-h.PhysicalMemoryBlock.NumberOfPages += ps->block[i].size / 
ELF2DMP_PAGE_SIZE;
+h.PhysicalMemoryBlock.NumberOfPages +=
+ps->block[i].size / ELF2DMP_PAGE_SIZE;
 h.PhysicalMemoryBlock.Run[i] = (WinDumpPhyMemRun64) {
 .BasePage = ps->block[i].paddr / ELF2DMP_PAGE_SIZE,
 .PageCount = ps->block[i].size / ELF2DMP_PAGE_SIZE,
 };
 }
 
-h.RequiredDumpSpace += h.PhysicalMemoryBlock.NumberOfPages << 
ELF2DMP_PAGE_BITS;
+h.RequiredDumpSpace +=
+h.PhysicalMemoryBlock.NumberOfPages << ELF2DMP_PAGE_BITS;
 
 *hdr = h;
 
@@ -299,7 +301,8 @@ static int fill_header(WinDumpHeader64 *hdr, struct 
pa_space *ps,
 static int fill_context(KDDEBUGGER_DATA64 *kdbg,
 struct va_space *vs, QEMU_Elf *qe)
 {
-int i;
+int i;
+
 for (i = 0; i < qe->state_nr; i++) {
 uint64_t Prcb;
 uint64_t Context;
diff --git a/contrib/elf2dmp/pe.h b/contrib/elf2dmp/pe.h
index c2a4a6ba7c..807d006364 100644
--- a/contrib/elf2dmp/pe.h
+++ b/contrib/elf2dmp/pe.h
@@ -33,70 +33,70 @@ typedef struct IMAGE_DOS_HEADER {
 } __attribute__ ((packed)) IMAGE_DOS_HEADER;
 
 typedef struct IMAGE_FILE_HEADER {
-  uint16_t  Machine;
-  uint16_t  NumberOfSections;
-  uint32_t  TimeDateStamp;
-  uint32_t  PointerToSymbolTable;
-  uint32_t  NumberOfSymbols;
-  uint16_t  SizeOfOptionalHeader;
-  uint16_t  Characteristics;
+uint16_t  Machine;
+uint16_t  NumberOfSections;
+uint32_t  TimeDateStamp;
+uint32_t  PointerToSymbolTable;
+uint32_t  NumberOfSymbols;
+uint16_t  SizeOfOptionalHeader;
+uint16_t  Characteristics;
 } __attribute__ ((packed)) IMAGE_FILE_HEADER;
 
 typedef struct IMAGE_DATA_DIRECTORY {
-  uint32_t VirtualAddress;
-  uint32_t Size;
+uint32_t VirtualAddress;
+uint32_t Size;
 } __attribute__ ((packed)) IMAGE_DATA_DIRECTORY;
 
 #define IMAGE_NUMBEROF_DIRECTORY_ENTRIES 16
 
 typedef struct IMAGE_OPTIONAL_HEADER64 {
-  uint16_t  Magic; /* 0x20b */
-  uint8_t   MajorLinkerVersion;
-  uint8_t   MinorLinkerVersion;
-  uint32_t  SizeOfCode;
-  uint32_t  SizeOfInitializedData;
-  uint32_t  SizeOfUninitializedData;
-  uint32_t  AddressOfEntryPoint;
-  uint32_t  BaseOfCode;
-  uint64_t  ImageBase;
-  uint32_t  SectionAlignment;
-  uint32_t  FileAlignment;
-  uint16_t  MajorOperatingSystemVersion;
-  uint16_t  MinorOperatingSystemVersion;
-  uint16_t  MajorImageVersion;
-  uint16_t  MinorImageVersion;
-  uint16_t  MajorSubsystemVersion;
-  uint16_t  MinorSubsystemVersion;
-  uint32_t  Win32VersionValue;
-  uint32_t  SizeOfImage;
-  uint32_t  SizeOfHeaders;
-  uint32_t  CheckSum;
-  uint16_t  Subsystem;
-  uint16_t  DllCharacteristics;
-  uint64_t  SizeOfStackReserve;
-  uint64_t  SizeOfStackCommit;
-  uint64_t  SizeOfHeapReserve;
-  uint64_t  SizeOfHeapCommit;
-  uint32_t  LoaderFlags;
-  uint32_t  NumberOfRvaAndSizes;
-  IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES];
+uint16_t  Magic; /* 0x20b */
+uint8_t   MajorLinkerVersion;
+uint8_t   MinorLinkerVersion;
+uint32_t  SizeOfCode;
+uint32_t  SizeOfInitializedData;
+uint32_t  SizeOfUninitializedData;
+uint32_t  AddressOfEntryPoint;
+uint32_t  BaseOfCode;
+uint64_t  ImageBase;
+uint32_t  SectionAlignment;
+uint32_t  FileAlignment;
+uint16_t  MajorOperatingSystemVersion;
+uint16_t  MinorOperatingSystemVersion;
+uint16_t  MajorImageVersion;
+uint16_t  MinorImageVersion;
+uint16_t  MajorSubsystemVersion;
+uint16_t  MinorSubsystemVersion;
+uint32_t  Win32VersionValue;
+uint32_t  SizeOfImage;
+uint32_t  SizeOfHeaders;
+uint32_t  CheckSum;
+uint16_t  Subsystem;
+uint16_t  DllCharacteristics;
+uint64_t  SizeOfStackReserve;
+uint64_t  SizeOfStackCommit;
+uint64_t  SizeOfHeapReserve;
+uint64_t

[PATCH v1 2/3] contrib/elf2dmp: move PE dir search to pe_get_data_dir_entry

2022-11-29 Thread Viktor Prutyanov

Move out PE directory search functionality to be reused not only
for Debug Directory processing but for arbitrary PE directory.

Signed-off-by: Viktor Prutyanov 
---
 contrib/elf2dmp/main.c | 66 +++---
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c
index 9224764239..f3052b3c64 100644
--- a/contrib/elf2dmp/main.c
+++ b/contrib/elf2dmp/main.c
@@ -333,6 +333,40 @@ static int fill_context(KDDEBUGGER_DATA64 *kdbg,
 return 0;
 }
 
+static int pe_get_data_dir_entry(uint64_t base, void *start_addr, int idx,
+void *entry, size_t size, struct va_space *vs)
+{
+const char e_magic[2] = "MZ";
+const char Signature[4] = "PE\0\0";
+IMAGE_DOS_HEADER *dos_hdr = start_addr;
+IMAGE_NT_HEADERS64 nt_hdrs;
+IMAGE_FILE_HEADER *file_hdr = _hdrs.FileHeader;
+IMAGE_OPTIONAL_HEADER64 *opt_hdr = _hdrs.OptionalHeader;
+IMAGE_DATA_DIRECTORY *data_dir = nt_hdrs.OptionalHeader.DataDirectory;
+
+if (memcmp(_hdr->e_magic, e_magic, sizeof(e_magic))) {
+return 1;
+}
+
+if (va_space_rw(vs, base + dos_hdr->e_lfanew,
+_hdrs, sizeof(nt_hdrs), 0)) {
+return 1;
+}
+
+if (memcmp(_hdrs.Signature, Signature, sizeof(Signature)) ||
+file_hdr->Machine != 0x8664 || opt_hdr->Magic != 0x020b) {
+return 1;
+}
+
+if (va_space_rw(vs,
+base + data_dir[idx].VirtualAddress,
+entry, size, 0)) {
+return 1;
+}
+
+return 0;
+}
+
 static int write_dump(struct pa_space *ps,
 WinDumpHeader64 *hdr, const char *name)
 {
@@ -369,42 +403,16 @@ static int write_dump(struct pa_space *ps,
 static int pe_get_pdb_symstore_hash(uint64_t base, void *start_addr,
 char *hash, struct va_space *vs)
 {
-const char e_magic[2] = "MZ";
-const char Signature[4] = "PE\0\0";
 const char sign_rsds[4] = "RSDS";
-IMAGE_DOS_HEADER *dos_hdr = start_addr;
-IMAGE_NT_HEADERS64 nt_hdrs;
-IMAGE_FILE_HEADER *file_hdr = _hdrs.FileHeader;
-IMAGE_OPTIONAL_HEADER64 *opt_hdr = _hdrs.OptionalHeader;
-IMAGE_DATA_DIRECTORY *data_dir = nt_hdrs.OptionalHeader.DataDirectory;
 IMAGE_DEBUG_DIRECTORY debug_dir;
 OMFSignatureRSDS rsds;
 char *pdb_name;
 size_t pdb_name_sz;
 size_t i;
 
-QEMU_BUILD_BUG_ON(sizeof(*dos_hdr) >= ELF2DMP_PAGE_SIZE);
-
-if (memcmp(_hdr->e_magic, e_magic, sizeof(e_magic))) {
-return 1;
-}
-
-if (va_space_rw(vs, base + dos_hdr->e_lfanew,
-_hdrs, sizeof(nt_hdrs), 0)) {
-return 1;
-}
-
-if (memcmp(_hdrs.Signature, Signature, sizeof(Signature)) ||
-file_hdr->Machine != 0x8664 || opt_hdr->Magic != 0x020b) {
-return 1;
-}
-
-printf("Debug Directory RVA = 0x%08"PRIx32"\n",
-(uint32_t)data_dir[IMAGE_FILE_DEBUG_DIRECTORY].VirtualAddress);
-
-if (va_space_rw(vs,
-base + data_dir[IMAGE_FILE_DEBUG_DIRECTORY].VirtualAddress,
-_dir, sizeof(debug_dir), 0)) {
+if (pe_get_data_dir_entry(base, start_addr, IMAGE_FILE_DEBUG_DIRECTORY,
+_dir, sizeof(debug_dir), vs)) {
+eprintf("Failed to get Debug Directory\n");
 return 1;
 }
 
-- 
2.35.1

[PATCH 1/1] Fix some typos

2022-11-29 Thread Dongdong Zhang

Fix some typos in 'python' directory.

Signed-off-by: Dongdong Zhang 
---
 python/qemu/machine/console_socket.py | 2 +-
 python/qemu/machine/qtest.py  | 2 +-
 python/qemu/qmp/protocol.py   | 2 +-
 python/qemu/qmp/qmp_tui.py| 6 +++---
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/qemu/machine/console_socket.py 
b/python/qemu/machine/console_socket.py
index 8c4ff598ad..4e28ba9bb2 100644
--- a/python/qemu/machine/console_socket.py
+++ b/python/qemu/machine/console_socket.py
@@ -68,7 +68,7 @@ def _thread_start(self) -> threading.Thread:
 """Kick off a thread to drain the socket."""
 # Configure socket to not block and timeout.
 # This allows our drain thread to not block
-# on recieve and exit smoothly.
+# on receive and exit smoothly.
 socket.socket.setblocking(self, False)
 socket.socket.settimeout(self, 1)
 drain_thread = threading.Thread(target=self._drain_fn)
diff --git a/python/qemu/machine/qtest.py b/python/qemu/machine/qtest.py
index 1a1fc6c9b0..906bd13298 100644
--- a/python/qemu/machine/qtest.py
+++ b/python/qemu/machine/qtest.py
@@ -42,7 +42,7 @@ class QEMUQtestProtocol:
 :raise socket.error: on socket connection errors
 
 .. note::
-   No conection is estabalished by __init__(), this is done
+   No connection is estabalished by __init__(), this is done
by the connect() or accept() methods.
 """
 def __init__(self, address: SocketAddrT,
diff --git a/python/qemu/qmp/protocol.py b/python/qemu/qmp/protocol.py
index 6ea86650ad..15909b7dba 100644
--- a/python/qemu/qmp/protocol.py
+++ b/python/qemu/qmp/protocol.py
@@ -812,7 +812,7 @@ def _done(task: Optional['asyncio.Future[Any]']) -> bool:
 
 @bottom_half
 async def _bh_close_stream(self, error_pathway: bool = False) -> None:
-# NB: Closing the writer also implcitly closes the reader.
+# NB: Closing the writer also implicitly closes the reader.
 if not self._writer:
 return
 
diff --git a/python/qemu/qmp/qmp_tui.py b/python/qemu/qmp/qmp_tui.py
index ce239d8979..8369144723 100644
--- a/python/qemu/qmp/qmp_tui.py
+++ b/python/qemu/qmp/qmp_tui.py
@@ -71,7 +71,7 @@ def format_json(msg: str) -> str:
 due to an decoding error then a simple string manipulation is done to
 achieve a single line JSON string.
 
-Converting into single line is more asthetically pleasing when looking
+Converting into single line is more aesthetically pleasing when looking
 along with error messages.
 
 Eg:
@@ -91,7 +91,7 @@ def format_json(msg: str) -> str:
 
 [1, true, 3]: QMP message is not a JSON object.
 
-The single line mode is more asthetically pleasing.
+The single line mode is more aesthetically pleasing.
 
 :param msg:
 The message to formatted into single line.
@@ -498,7 +498,7 @@ def __init__(self, parent: App) -> None:
 class HistoryBox(urwid.ListBox):
 """
 This widget is modelled using the ListBox widget, contains the list of
-all messages both QMP messages and log messsages to be shown in the TUI.
+all messages both QMP messages and log messages to be shown in the TUI.
 
 The messages are urwid.Text widgets. On every append of a message, the
 focus is shifted to the last appended message.
-- 
2.17.1

[PATCH v1 0/3] contrib/elf2dmp: Windows Server 2022 support

2022-11-29 Thread Viktor Prutyanov

Hi,

For now, elf2dmp is unable to convert ELF-dump to DMP-dump made of
Windows Server 2022 guest. This patch series fixes it.

v1: improve code-style fix

Viktor Prutyanov (3):
  contrib/elf2dmp: fix code style
  contrib/elf2dmp: move PE dir search to pe_get_data_dir_entry
  contrib/elf2dmp: add PE name check and Windows Server 2022 support

 contrib/elf2dmp/addrspace.c |   1 +
 contrib/elf2dmp/main.c  | 103 +---
 contrib/elf2dmp/pe.h| 115 
 3 files changed, 135 insertions(+), 84 deletions(-)

-- 
2.35.1

[PATCH 0/1] Fix some typos

2022-11-29 Thread Dongdong Zhang

This patch mainly fixes some typos in the 'python' directory.

Dongdong Zhang (1):
  Fix some typos

 python/qemu/machine/console_socket.py | 2 +-
 python/qemu/machine/qtest.py  | 2 +-
 python/qemu/qmp/protocol.py   | 2 +-
 python/qemu/qmp/qmp_tui.py| 6 +++---
 4 files changed, 6 insertions(+), 6 deletions(-)

-- 
2.17.1

Re: [PATCH v13 0/8] Add support for zoned device

2022-11-29 Thread Stefan Hajnoczi

On Thu, 27 Oct 2022 at 11:46, Sam Li  wrote:
> v13:
> - add some tracing points for new zone APIs [Dmitry]
> - change error handling in zone_mgmt [Damien, Stefan]

Hi Sam,
This looks very close! I sent comments.

Stefan

Re: [PATCH v13 7/8] block: add some trace events for new block layer APIs

2022-11-29 Thread Stefan Hajnoczi

Reviewed-by: Stefan Hajnoczi 

On Thu, 27 Oct 2022 at 11:48, Sam Li  wrote:
>
> Signed-off-by: Sam Li 
> ---
>  block/file-posix.c | 3 +++
>  block/trace-events | 2 ++
>  2 files changed, 5 insertions(+)
>
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 02476c011e..fe52e91da4 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -3259,6 +3259,7 @@ static int coroutine_fn 
> raw_co_zone_report(BlockDriverState *bs, int64_t offset,
> BlockZoneDescriptor *zones) {
>  BDRVRawState *s = bs->opaque;
>  RawPosixAIOData acb;
> +trace_zbd_zone_report(bs, *nr_zones, offset >> BDRV_SECTOR_BITS);
>
>  acb = (RawPosixAIOData) {
>  .bs = bs,
> @@ -3337,6 +3338,8 @@ static int coroutine_fn 
> raw_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
>  },
>  };
>
> +trace_zbd_zone_mgmt(bs, op_name, offset >> BDRV_SECTOR_BITS,
> +len >> BDRV_SECTOR_BITS);
>  ret = raw_thread_pool_submit(bs, handle_aiocb_zone_mgmt, );
>  if (ret != 0) {
>  ret = -errno;
> diff --git a/block/trace-events b/block/trace-events
> index 48dbf10c66..3f4e1d088a 100644
> --- a/block/trace-events
> +++ b/block/trace-events
> @@ -209,6 +209,8 @@ file_FindEjectableOpticalMedia(const char *media) 
> "Matching using %s"
>  file_setup_cdrom(const char *partition) "Using %s as optical disc"
>  file_hdev_is_sg(int type, int version) "SG device found: type=%d, version=%d"
>  file_flush_fdatasync_failed(int err) "errno %d"
> +zbd_zone_report(void *bs, unsigned int nr_zones, int64_t sector) "bs %p 
> report %d zones starting at sector offset 0x%" PRIx64 ""
> +zbd_zone_mgmt(void *bs, const char *op_name, int64_t sector, int64_t len) 
> "bs %p %s starts at sector offset 0x%" PRIx64 " over a range of 0x%" PRIx64 " 
> sectors"
>
>  # ssh.c
>  sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int 
> sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)"
> --
> 2.38.1
>
>

Re: [PATCH v13 3/8] block: add block layer APIs resembling Linux ZonedBlockDevice ioctls

2022-11-29 Thread Stefan Hajnoczi

On Thu, 27 Oct 2022 at 11:46, Sam Li  wrote:
>
> Add a new zoned_host_device BlockDriver. The zoned_host_device option
> accepts only zoned host block devices. By adding zone management
> operations in this new BlockDriver, users can use the new block
> layer APIs including Report Zone and four zone management operations
> (open, close, finish, reset, reset_all).
>
> Qemu-io uses the new APIs to perform zoned storage commands of the device:
> zone_report(zrp), zone_open(zo), zone_close(zc), zone_reset(zrs),
> zone_finish(zf).
>
> For example, to test zone_report, use following command:
> $ ./build/qemu-io --image-opts -n driver=zoned_host_device, 
> filename=/dev/nullb0
> -c "zrp offset nr_zones"
>
> Signed-off-by: Sam Li 
> Reviewed-by: Hannes Reinecke 
> ---
>  block/block-backend.c | 147 +
>  block/file-posix.c| 348 ++
>  block/io.c|  41 
>  include/block/block-io.h  |   7 +
>  include/block/block_int-common.h  |  21 ++
>  include/block/raw-aio.h   |   6 +-
>  include/sysemu/block-backend-io.h |  18 ++
>  meson.build   |   4 +
>  qapi/block-core.json  |   8 +-
>  qemu-io-cmds.c| 149 +
>  10 files changed, 746 insertions(+), 3 deletions(-)
>
> diff --git a/block/block-backend.c b/block/block-backend.c
> index aa4adf06ae..731f23e816 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -1431,6 +1431,15 @@ typedef struct BlkRwCo {
>  void *iobuf;
>  int ret;
>  BdrvRequestFlags flags;
> +union {
> +struct {
> +unsigned int *nr_zones;
> +BlockZoneDescriptor *zones;
> +} zone_report;
> +struct {
> +unsigned long op;
> +} zone_mgmt;
> +};
>  } BlkRwCo;
>
>  int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
> @@ -1775,6 +1784,144 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
>  return ret;
>  }
>
> +static void coroutine_fn blk_aio_zone_report_entry(void *opaque)
> +{
> +BlkAioEmAIOCB *acb = opaque;
> +BlkRwCo *rwco = >rwco;
> +
> +rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
> +   rwco->zone_report.nr_zones,
> +   rwco->zone_report.zones);
> +blk_aio_complete(acb);
> +}
> +
> +BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
> +unsigned int *nr_zones,
> +BlockZoneDescriptor  *zones,
> +BlockCompletionFunc *cb, void *opaque)
> +{
> +BlkAioEmAIOCB *acb;
> +Coroutine *co;
> +IO_CODE();
> +
> +blk_inc_in_flight(blk);
> +acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque);
> +acb->rwco = (BlkRwCo) {
> +.blk= blk,
> +.offset = offset,
> +.ret= NOT_DONE,
> +.zone_report = {
> +.zones = zones,
> +.nr_zones = nr_zones,
> +},
> +};
> +acb->has_returned = false;
> +
> +co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
> +bdrv_coroutine_enter(blk_bs(blk), co);
> +
> +acb->has_returned = true;
> +if (acb->rwco.ret != NOT_DONE) {
> +replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
> + blk_aio_complete_bh, acb);
> +}
> +
> +return >common;
> +}
> +
> +static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque)
> +{
> +BlkAioEmAIOCB *acb = opaque;
> +BlkRwCo *rwco = >rwco;
> +
> +rwco->ret = blk_co_zone_mgmt(rwco->blk, rwco->zone_mgmt.op,
> + rwco->offset, acb->bytes);
> +blk_aio_complete(acb);
> +}
> +
> +BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
> +  int64_t offset, int64_t len,
> +  BlockCompletionFunc *cb, void *opaque) {
> +BlkAioEmAIOCB *acb;
> +Coroutine *co;
> +IO_CODE();
> +
> +blk_inc_in_flight(blk);
> +acb = blk_aio_get(_aio_em_aiocb_info, blk, cb, opaque);
> +acb->rwco = (BlkRwCo) {
> +.blk= blk,
> +.offset = offset,
> +.ret= NOT_DONE,
> +.zone_mgmt = {
> +.op = op,
> +},
> +};
> +acb->bytes = len;
> +acb->has_returned = false;
> +
> +co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
> +bdrv_coroutine_enter(blk_bs(blk), co);
> +
> +acb->has_returned = true;
> +if (acb->rwco.ret != NOT_DONE) {
> +replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
> + blk_aio_complete_bh, acb);
> +}
> +
> +return >common;
> +}
> +
> +/*
> + * Send a zone_report command.
> + * offset is a byte offset from the start of the device. No alignment
> + * required for offset.
> + * nr_zones represents IN maximum and OUT actual.
> + */
>

[PATCH v3 3/3] tests/qtest: sifive-e-aon-watchdog-test.c : Add QTest of watchdog of sifive_e

2022-11-29 Thread Tommy Wu

Add some simple tests of the watchdog timer in the always-on domain device
of HiFive 1 rev b.

Signed-off-by: Tommy Wu 
---
 tests/qtest/meson.build  |   3 +
 tests/qtest/sifive-e-aon-watchdog-test.c | 650 +++
 2 files changed, 653 insertions(+)
 create mode 100644 tests/qtest/sifive-e-aon-watchdog-test.c

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index c07a5b1a5f..4a38bcb829 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -230,6 +230,9 @@ qtests_s390x = \
'cpu-plug-test',
'migration-test']
 
+qtests_riscv32 = \
+  (config_all_devices.has_key('CONFIG_SIFIVE_E_AON') ? 
['sifive-e-aon-watchdog-test'] : [])
+
 qos_test_ss = ss.source_set()
 qos_test_ss.add(
   'ac97-test.c',
diff --git a/tests/qtest/sifive-e-aon-watchdog-test.c 
b/tests/qtest/sifive-e-aon-watchdog-test.c
new file mode 100644
index 00..1f454c266a
--- /dev/null
+++ b/tests/qtest/sifive-e-aon-watchdog-test.c
@@ -0,0 +1,650 @@
+#include "qemu/osdep.h"
+#include "qemu/timer.h"
+#include "qemu/bitops.h"
+#include "libqtest-single.h"
+#include "hw/registerfields.h"
+#include "hw/misc/sifive_e_aon.h"
+
+FIELD(AON_WDT_WDOGCFG,
+  SCALE, 0, 4)
+FIELD(AON_WDT_WDOGCFG,
+  RSVD0, 4, 4)
+FIELD(AON_WDT_WDOGCFG,
+  RSTEN, 8, 1)
+FIELD(AON_WDT_WDOGCFG,
+  ZEROCMP, 9, 1)
+FIELD(AON_WDT_WDOGCFG,
+  RSVD1, 10, 2)
+FIELD(AON_WDT_WDOGCFG,
+  EN_ALWAYS, 12, 1)
+FIELD(AON_WDT_WDOGCFG,
+  EN_CORE_AWAKE, 13, 1)
+FIELD(AON_WDT_WDOGCFG,
+  RSVD2, 14, 14)
+FIELD(AON_WDT_WDOGCFG,
+  IP0, 28, 1)
+FIELD(AON_WDT_WDOGCFG,
+  RSVD3, 29, 3)
+
+#define WDOG_BASE (0x1000)
+#define WDOGCFG (0x0)
+#define WDOGCOUNT (0x8)
+#define WDOGS (0x10)
+#define WDOGFEED (0x18)
+#define WDOGKEY (0x1c)
+#define WDOGCMP0 (0x20)
+
+#define SIFIVE_E_AON_WDOGKEY (0x51F15E)
+#define SIFIVE_E_AON_WDOGFEED (0xD09F00D)
+#define SIFIVE_E_LFCLK_DEFAULT_FREQ (32768)
+
+static void test_init(void)
+{
+writel(WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+writel(WDOG_BASE + WDOGCOUNT, 0);
+
+writel(WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+writel(WDOG_BASE + WDOGCFG, 0);
+
+writel(WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+writel(WDOG_BASE + WDOGCMP0, 0xBEEF);
+}
+
+static void test_wdogcount(void)
+{
+uint64_t tmp;
+
+test_init();
+
+tmp = readl(WDOG_BASE + WDOGCOUNT);
+writel(WDOG_BASE + WDOGCOUNT, 0xBEEF);
+g_assert(readl(WDOG_BASE + WDOGCOUNT) == tmp);
+
+writel(WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+writel(WDOG_BASE + WDOGCOUNT, 0xBEEF);
+g_assert(0xBEEF == readl(WDOG_BASE + WDOGCOUNT));
+
+writel(WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+writel(WDOG_BASE + WDOGCOUNT, 0x);
+g_assert(0x2AAA == readl(WDOG_BASE + WDOGCOUNT));
+
+writel(WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+writel(WDOG_BASE + WDOGFEED, 0x);
+g_assert(0x2AAA == readl(WDOG_BASE + WDOGCOUNT));
+
+writel(WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+writel(WDOG_BASE + WDOGFEED, SIFIVE_E_AON_WDOGFEED);
+g_assert(0 == readl(WDOG_BASE + WDOGCOUNT));
+}
+
+static void test_wdogcfg(void)
+{
+uint32_t tmp_cfg;
+
+test_init();
+
+tmp_cfg = readl(WDOG_BASE + WDOGCFG);
+writel(WDOG_BASE + WDOGCFG, 0x);
+g_assert(readl(WDOG_BASE + WDOGCFG) == tmp_cfg);
+
+writel(WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+writel(WDOG_BASE + WDOGCFG, 0x);
+g_assert(0x == readl(WDOG_BASE + WDOGCFG));
+
+tmp_cfg = readl(WDOG_BASE + WDOGCFG);
+g_assert(15 ==
+ FIELD_EX32(tmp_cfg,
+AON_WDT_WDOGCFG,
+SCALE));
+g_assert(1 ==
+ FIELD_EX32(tmp_cfg,
+AON_WDT_WDOGCFG,
+ RSTEN));
+g_assert(1 ==
+ FIELD_EX32(tmp_cfg,
+AON_WDT_WDOGCFG,
+ZEROCMP));
+g_assert(1 ==
+ FIELD_EX32(tmp_cfg,
+AON_WDT_WDOGCFG,
+EN_ALWAYS));
+g_assert(1 ==
+ FIELD_EX32(tmp_cfg,
+AON_WDT_WDOGCFG,
+EN_CORE_AWAKE));
+g_assert(1 ==
+ FIELD_EX32(tmp_cfg,
+AON_WDT_WDOGCFG,
+IP0));
+
+writel(WDOG_BASE + WDOGKEY, SIFIVE_E_AON_WDOGKEY);
+writel(WDOG_BASE + WDOGCFG, 0);
+tmp_cfg = readl(WDOG_BASE + WDOGCFG);
+g_assert(0 ==
+ FIELD_EX32(tmp_cfg,
+AON_WDT_WDOGCFG,
+SCALE));
+g_assert(0 ==
+ FIELD_EX32(tmp_cfg,
+AON_WDT_WDOGCFG,
+ RSTEN));
+g_assert(0 ==
+ FIELD_EX32(tmp_cfg,
+AON_WDT_WDOGCFG,
+ZEROCMP));
+g_assert(0 ==
+ FIELD_EX32(tmp_cfg,
+AON_WDT_WDOGCFG,
+

[PATCH v3 0/3] Implement the watchdog timer of HiFive 1 rev b.

2022-11-29 Thread Tommy Wu

The HiFive 1 rev b includes a watchdog module based on a 32-bit
counter. The watchdog timer is in the always-on domain device of
HiFive 1 rev b, so this patch added the AON device to the sifive_e
machine. This patch only implemented the functionality of the
watchdog timer, not all the functionality of the AON device.

You can test the patchset by the QTest tests/qtest/sifive-e-aon-watchdog-test.c

Changes since v1 ( Thank Alistair for the feedback ):
- Use the register field macro.
- Delete the public create function. The board creates the aon device itself.
- Keep all variable declarations at the top of the code block.

Changes since v2 ( Thank Alistair for the feedback ):
- Delete the declaration and definition of the create function.

Tommy Wu (3):
  hw/misc: sifive_e_aon: Support the watchdog timer of HiFive 1 rev b.
  hw/riscv: sifive_e: Support the watchdog timer of HiFive 1 rev b.
  tests/qtest: sifive-e-aon-watchdog-test.c : Add QTest of watchdog of
sifive_e

 hw/misc/Kconfig  |   3 +
 hw/misc/meson.build  |   1 +
 hw/misc/sifive_e_aon.c   | 383 +
 hw/riscv/Kconfig |   1 +
 hw/riscv/sifive_e.c  |  13 +-
 include/hw/misc/sifive_e_aon.h   |  60 +++
 include/hw/riscv/sifive_e.h  |   8 +-
 tests/qtest/meson.build  |   3 +
 tests/qtest/sifive-e-aon-watchdog-test.c | 650 +++
 9 files changed, 1117 insertions(+), 5 deletions(-)
 create mode 100644 hw/misc/sifive_e_aon.c
 create mode 100644 include/hw/misc/sifive_e_aon.h
 create mode 100644 tests/qtest/sifive-e-aon-watchdog-test.c

-- 
2.27.0

[PATCH v3 1/3] hw/misc: sifive_e_aon: Support the watchdog timer of HiFive 1 rev b.

2022-11-29 Thread Tommy Wu

The watchdog timer is in the always-on domain device of HiFive 1 rev b,
so this patch added the AON device to the sifive_e machine. This patch
only implemented the functionality of the watchdog timer.

Signed-off-by: Tommy Wu 
---
 hw/misc/Kconfig|   3 +
 hw/misc/meson.build|   1 +
 hw/misc/sifive_e_aon.c | 383 +
 include/hw/misc/sifive_e_aon.h |  60 ++
 4 files changed, 447 insertions(+)
 create mode 100644 hw/misc/sifive_e_aon.c
 create mode 100644 include/hw/misc/sifive_e_aon.h

diff --git a/hw/misc/Kconfig b/hw/misc/Kconfig
index cbabe9f78c..7d1247822e 100644
--- a/hw/misc/Kconfig
+++ b/hw/misc/Kconfig
@@ -162,6 +162,9 @@ config SIFIVE_TEST
 config SIFIVE_E_PRCI
 bool
 
+config SIFIVE_E_AON
+bool
+
 config SIFIVE_U_OTP
 bool
 
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index 95268eddc0..94170dce76 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -31,6 +31,7 @@ softmmu_ss.add(when: 'CONFIG_MCHP_PFSOC_IOSCB', if_true: 
files('mchp_pfsoc_ioscb
 softmmu_ss.add(when: 'CONFIG_MCHP_PFSOC_SYSREG', if_true: 
files('mchp_pfsoc_sysreg.c'))
 softmmu_ss.add(when: 'CONFIG_SIFIVE_TEST', if_true: files('sifive_test.c'))
 softmmu_ss.add(when: 'CONFIG_SIFIVE_E_PRCI', if_true: files('sifive_e_prci.c'))
+softmmu_ss.add(when: 'CONFIG_SIFIVE_E_AON', if_true: files('sifive_e_aon.c'))
 softmmu_ss.add(when: 'CONFIG_SIFIVE_U_OTP', if_true: files('sifive_u_otp.c'))
 softmmu_ss.add(when: 'CONFIG_SIFIVE_U_PRCI', if_true: files('sifive_u_prci.c'))
 
diff --git a/hw/misc/sifive_e_aon.c b/hw/misc/sifive_e_aon.c
new file mode 100644
index 00..27ec26cf7c
--- /dev/null
+++ b/hw/misc/sifive_e_aon.c
@@ -0,0 +1,383 @@
+/*
+ * SiFive HiFive1 AON (Always On Domain) for QEMU.
+ *
+ * Copyright (c) 2022 SiFive, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/timer.h"
+#include "qemu/log.h"
+#include "hw/irq.h"
+#include "hw/registerfields.h"
+#include "hw/misc/sifive_e_aon.h"
+#include "qapi/visitor.h"
+#include "qapi/error.h"
+#include "sysemu/watchdog.h"
+
+REG32(AON_WDT_WDOGCFG, 0x0)
+FIELD(AON_WDT_WDOGCFG,
+  SCALE, 0, 4)
+FIELD(AON_WDT_WDOGCFG,
+  RSVD0, 4, 4)
+FIELD(AON_WDT_WDOGCFG,
+  RSTEN, 8, 1)
+FIELD(AON_WDT_WDOGCFG,
+  ZEROCMP, 9, 1)
+FIELD(AON_WDT_WDOGCFG,
+  RSVD1, 10, 2)
+FIELD(AON_WDT_WDOGCFG,
+  EN_ALWAYS, 12, 1)
+FIELD(AON_WDT_WDOGCFG,
+  EN_CORE_AWAKE, 13, 1)
+FIELD(AON_WDT_WDOGCFG,
+  RSVD2, 14, 14)
+FIELD(AON_WDT_WDOGCFG,
+  IP0, 28, 1)
+FIELD(AON_WDT_WDOGCFG,
+  RSVD3, 29, 3)
+REG32(AON_WDT_WDOGCOUNT, 0x8)
+REG32(AON_WDT_WDOGS, 0x10)
+REG32(AON_WDT_WDOGFEED, 0x18)
+REG32(AON_WDT_WDOGKEY, 0x1c)
+REG32(AON_WDT_WDOGCMP0, 0x20)
+
+static void sifive_e_aon_wdt_update_wdogcount(SiFiveEAONState *r)
+{
+int64_t now;
+if (0 == FIELD_EX32(r->wdogcfg,
+AON_WDT_WDOGCFG,
+EN_ALWAYS) &&
+0 == FIELD_EX32(r->wdogcfg,
+AON_WDT_WDOGCFG,
+EN_CORE_AWAKE)) {
+return;
+}
+
+now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+r->wdogcount += muldiv64(now - r->wdog_restart_time,
+ r->wdogclk_freq, NANOSECONDS_PER_SECOND);
+/* Clean the most significant bit. */
+r->wdogcount = ((r->wdogcount << 1) >> 1);
+r->wdog_restart_time = now;
+}
+
+static void sifive_e_aon_wdt_update_state(SiFiveEAONState *r)
+{
+uint16_t wdogs;
+bool cmp_signal = false;
+sifive_e_aon_wdt_update_wdogcount(r);
+wdogs = (uint16_t)(r->wdogcount >>
+FIELD_EX32(r->wdogcfg,
+   AON_WDT_WDOGCFG,
+   SCALE));
+if (wdogs >= r->wdogcmp0) {
+cmp_signal = true;
+if (1 == FIELD_EX32(r->wdogcfg,
+AON_WDT_WDOGCFG,
+ZEROCMP)) {
+r->wdogcount = 0;
+wdogs = 0;
+}
+}
+
+if (cmp_signal) {
+if (1 == FIELD_EX32(r->wdogcfg,
+AON_WDT_WDOGCFG,
+RSTEN)) {
+watchdog_perform_action();
+}
+r->wdogcfg =
+FIELD_DP32(r->wdogcfg,
+

[PATCH v3 2/3] hw/riscv: sifive_e: Support the watchdog timer of HiFive 1 rev b.

2022-11-29 Thread Tommy Wu

Create the AON device when we realize the sifive_e machine.
This patch only implemented the functionality of the watchdog timer,
not all the functionality of the AON device.

Signed-off-by: Tommy Wu 
---
 hw/riscv/Kconfig|  1 +
 hw/riscv/sifive_e.c | 13 +++--
 include/hw/riscv/sifive_e.h |  8 +---
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig
index 79ff61c464..50890b1b75 100644
--- a/hw/riscv/Kconfig
+++ b/hw/riscv/Kconfig
@@ -59,6 +59,7 @@ config SIFIVE_E
 select SIFIVE_PLIC
 select SIFIVE_UART
 select SIFIVE_E_PRCI
+select SIFIVE_E_AON
 select UNIMP
 
 config SIFIVE_U
diff --git a/hw/riscv/sifive_e.c b/hw/riscv/sifive_e.c
index d65d2fd869..c866ffe232 100644
--- a/hw/riscv/sifive_e.c
+++ b/hw/riscv/sifive_e.c
@@ -45,6 +45,7 @@
 #include "hw/intc/riscv_aclint.h"
 #include "hw/intc/sifive_plic.h"
 #include "hw/misc/sifive_e_prci.h"
+#include "hw/misc/sifive_e_aon.h"
 #include "chardev/char.h"
 #include "sysemu/sysemu.h"
 
@@ -222,8 +223,13 @@ static void sifive_e_soc_realize(DeviceState *dev, Error 
**errp)
 RISCV_ACLINT_DEFAULT_MTIMER_SIZE, 0, ms->smp.cpus,
 RISCV_ACLINT_DEFAULT_MTIMECMP, RISCV_ACLINT_DEFAULT_MTIME,
 RISCV_ACLINT_DEFAULT_TIMEBASE_FREQ, false);
-create_unimplemented_device("riscv.sifive.e.aon",
-memmap[SIFIVE_E_DEV_AON].base, memmap[SIFIVE_E_DEV_AON].size);
+
+s->aon = qdev_new(TYPE_SIFIVE_E_AON);
+if (!sysbus_realize(SYS_BUS_DEVICE(s->aon), errp)) {
+return;
+}
+sysbus_mmio_map(SYS_BUS_DEVICE(s->aon), 0, memmap[SIFIVE_E_DEV_AON].base);
+
 sifive_e_prci_create(memmap[SIFIVE_E_DEV_PRCI].base);
 
 /* GPIO */
@@ -244,6 +250,9 @@ static void sifive_e_soc_realize(DeviceState *dev, Error 
**errp)
qdev_get_gpio_in(DEVICE(s->plic),
 SIFIVE_E_GPIO0_IRQ0 + i));
 }
+sysbus_connect_irq(SYS_BUS_DEVICE(s->aon), 0,
+   qdev_get_gpio_in(DEVICE(s->plic),
+SIFIVE_E_AON_WDT_IRQ));
 
 sifive_uart_create(sys_mem, memmap[SIFIVE_E_DEV_UART0].base,
 serial_hd(0), qdev_get_gpio_in(DEVICE(s->plic), SIFIVE_E_UART0_IRQ));
diff --git a/include/hw/riscv/sifive_e.h b/include/hw/riscv/sifive_e.h
index d738745925..e2de1564a7 100644
--- a/include/hw/riscv/sifive_e.h
+++ b/include/hw/riscv/sifive_e.h
@@ -35,6 +35,7 @@ typedef struct SiFiveESoCState {
 /*< public >*/
 RISCVHartArrayState cpus;
 DeviceState *plic;
+DeviceState *aon;
 SIFIVEGPIOState gpio;
 MemoryRegion xip_mem;
 MemoryRegion mask_rom;
@@ -76,9 +77,10 @@ enum {
 };
 
 enum {
-SIFIVE_E_UART0_IRQ  = 3,
-SIFIVE_E_UART1_IRQ  = 4,
-SIFIVE_E_GPIO0_IRQ0 = 8
+SIFIVE_E_AON_WDT_IRQ  = 1,
+SIFIVE_E_UART0_IRQ= 3,
+SIFIVE_E_UART1_IRQ= 4,
+SIFIVE_E_GPIO0_IRQ0   = 8
 };
 
 #define SIFIVE_E_PLIC_HART_CONFIG "M"
-- 
2.27.0

Re: [PATCH v2 08/11] migration: Export dirty-limit time info

2022-11-29 Thread Peter Xu

On Mon, Nov 21, 2022 at 11:26:40AM -0500, huang...@chinatelecom.cn wrote:
> From: Hyman Huang(黄勇) 
> 
> Export dirty limit throttle time and estimated ring full
> time, through which we can observe the process of dirty
> limit during live migration.
> 
> Signed-off-by: Hyman Huang(黄勇) 
> ---
>  include/sysemu/dirtylimit.h |  2 ++
>  migration/migration.c   | 10 ++
>  monitor/hmp-cmds.c  | 10 ++
>  qapi/migration.json | 10 +-
>  softmmu/dirtylimit.c| 31 +++
>  5 files changed, 62 insertions(+), 1 deletion(-)
> 
> diff --git a/include/sysemu/dirtylimit.h b/include/sysemu/dirtylimit.h
> index 8d2c1f3..98cc4a6 100644
> --- a/include/sysemu/dirtylimit.h
> +++ b/include/sysemu/dirtylimit.h
> @@ -34,4 +34,6 @@ void dirtylimit_set_vcpu(int cpu_index,
>  void dirtylimit_set_all(uint64_t quota,
>  bool enable);
>  void dirtylimit_vcpu_execute(CPUState *cpu);
> +int64_t dirtylimit_throttle_us_per_full(void);
> +int64_t dirtylimit_us_ring_full(void);
>  #endif
> diff --git a/migration/migration.c b/migration/migration.c
> index 096b61a..886c25d 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -62,6 +62,7 @@
>  #include "yank_functions.h"
>  #include "sysemu/qtest.h"
>  #include "sysemu/kvm.h"
> +#include "sysemu/dirtylimit.h"
>  
>  #define MAX_THROTTLE  (128 << 20)  /* Migration transfer speed 
> throttling */
>  
> @@ -1112,6 +1113,15 @@ static void populate_ram_info(MigrationInfo *info, 
> MigrationState *s)
>  info->ram->remaining = ram_bytes_remaining();
>  info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
>  }
> +
> +if (migrate_dirty_limit() && dirtylimit_in_service()) {
> +info->has_dirty_limit_throttle_us_per_full = true;
> +info->dirty_limit_throttle_us_per_full =
> +dirtylimit_throttle_us_per_full();
> +
> +info->has_dirty_limit_us_ring_full = true;
> +info->dirty_limit_us_ring_full = dirtylimit_us_ring_full();
> +}
>  }
>  
>  static void populate_disk_info(MigrationInfo *info)
> diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
> index 9ad6ee5..9d02baf 100644
> --- a/monitor/hmp-cmds.c
> +++ b/monitor/hmp-cmds.c
> @@ -339,6 +339,16 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
> info->cpu_throttle_percentage);
>  }
>  
> +if (info->has_dirty_limit_throttle_us_per_full) {
> +monitor_printf(mon, "dirty-limit throttle time: %" PRIi64 " us\n",
> +   info->dirty_limit_throttle_us_per_full);
> +}
> +
> +if (info->has_dirty_limit_us_ring_full) {
> +monitor_printf(mon, "dirty-limit ring full time: %" PRIi64 " us\n",
> +   info->dirty_limit_us_ring_full);
> +}
> +
>  if (info->has_postcopy_blocktime) {
>  monitor_printf(mon, "postcopy blocktime: %u\n",
> info->postcopy_blocktime);
> diff --git a/qapi/migration.json b/qapi/migration.json
> index af6b2da..62db5cb 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -242,6 +242,12 @@
>  #   Present and non-empty when migration is blocked.
>  #   (since 6.0)
>  #
> +# @dirty-limit-throttle-us-per-full: Throttle time (us) during the period of
> +#dirty ring full (since 7.1)
> +#
> +# @dirty-limit-us-ring-full: Estimated periodic time (us) of dirty ring full.
> +#(since 7.1)

s/7.1/7.3/

Could you enrich the document for the new fields?  For example, currently
you only report throttle time for vcpu0 on the 1st field, while for the
latter it's an average of all vcpus.  These need to be mentioned.

OTOH, how do you normally use these values?  Maybe that can also be added
into the documents too.

> +#
>  # Since: 0.14
>  ##
>  { 'struct': 'MigrationInfo',
> @@ -259,7 +265,9 @@
> '*postcopy-blocktime' : 'uint32',
> '*postcopy-vcpu-blocktime': ['uint32'],
> '*compression': 'CompressionStats',
> -   '*socket-address': ['SocketAddress'] } }
> +   '*socket-address': ['SocketAddress'],
> +   '*dirty-limit-throttle-us-per-full': 'int64',
> +   '*dirty-limit-us-ring-full': 'int64'} }
>  
>  ##
>  # @query-migrate:
> diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
> index 3f3c405..9d1df9b 100644
> --- a/softmmu/dirtylimit.c
> +++ b/softmmu/dirtylimit.c
> @@ -573,6 +573,37 @@ static struct DirtyLimitInfo *dirtylimit_query_vcpu(int 
> cpu_index)
>  return info;
>  }
>  
> +/* Pick up first vcpu throttle time by default */
> +int64_t dirtylimit_throttle_us_per_full(void)
> +{
> +CPUState *cpu = first_cpu;
> +return cpu->throttle_us_per_full;

Why would vcpu0 be the standard on this sampling?

I'm wondering whether it'll make more sense to collect the MAX() of all
vcpus here, because that'll be the maximum delay

Re: [PATCH v2 06/11] migration: Introduce dirty-limit capability

2022-11-29 Thread Peter Xu

On Mon, Nov 21, 2022 at 11:26:38AM -0500, huang...@chinatelecom.cn wrote:
> From: Hyman Huang(黄勇) 
> 
> Introduce migration dirty-limit capability, which can
> be turned on before live migration and limit dirty
> page rate durty live migration.
> 
> Introduce migrate_dirty_limit function to help check
> if dirty-limit capability enabled during live migration.
> 
> Meanwhile, refactor vcpu_dirty_rate_stat_collect
> so that period can be configured instead of hardcoded.
> 
> dirty-limit capability is kind of like auto-converge
> but using dirty limit instead of traditional cpu-throttle
> to throttle guest down. To enable this feature, turn on
> the dirty-limit capability before live migration using
> migratioin-set-capabilities, and set the parameters
> "x-vcpu-dirty-limit-period", "vcpu-dirty-limit" suitably
> to speed up convergence.
> 
> Signed-off-by: Hyman Huang(黄勇) 

Acked-by: Peter Xu 

PS: please replace 7.1 with 7.3 in this patch and the previous one.

-- 
Peter Xu

Re: [PATCH v2 05/11] qapi/migration: Introduce vcpu-dirty-limit parameters

2022-11-29 Thread Peter Xu

On Mon, Nov 21, 2022 at 11:26:37AM -0500, huang...@chinatelecom.cn wrote:
> From: Hyman Huang(黄勇) 
> 
> Introduce "vcpu-dirty-limit" migration parameter used
> to limit dirty page rate during live migration.
> 
> "vcpu-dirty-limit" and "x-vcpu-dirty-limit-period" are
> two dirty-limit-related migration parameters, which can
> be set before and during live migration by qmp
> migrate-set-parameters.
> 
> This two parameters are used to help implement the dirty
> page rate limit algo of migration.
> 
> Signed-off-by: Hyman Huang(黄勇) 

Acked-by: Peter Xu 

-- 
Peter Xu

Re: [PATCH v14 4/5] hw/riscv: virt: Add PMU DT node to the device tree

2022-11-29 Thread Conor.Dooley

+CC Rob, which I probably should've done earlier, so
context all preserved

On 29/11/2022 09:42, Conor Dooley wrote:
> On 29/11/2022 09:27, Atish Kumar Patra wrote:
>> EXTERNAL EMAIL: Do not click links or open attachments unless you know the 
>> content is safe
>>
>> On Mon, Nov 28, 2022 at 11:32 PM  wrote:
>>>
>>> On 29/11/2022 07:08, Andrew Jones wrote:
 EXTERNAL EMAIL: Do not click links or open attachments unless you know the 
 content is safe

 On Mon, Nov 28, 2022 at 09:10:03PM +, conor.doo...@microchip.com wrote:
> On 28/11/2022 20:41, Atish Kumar Patra wrote:
>> EXTERNAL EMAIL: Do not click links or open attachments unless you know 
>> the content is safe
>>
>> On Mon, Nov 28, 2022 at 12:38 PM  wrote:
>>>
>>> On 28/11/2022 20:16, Atish Kumar Patra wrote:
 On Thu, Nov 24, 2022 at 5:17 AM Conor Dooley 
  wrote:
>
> On Wed, Aug 24, 2022 at 03:17:00PM -0700, Atish Patra wrote:
>> Qemu virt machine can support few cache events and cycle/instret 
>> counters.
>> It also supports counter overflow for these events.
>>
>> Add a DT node so that OpenSBI/Linux kernel is aware of the virt 
>> machine
>> capabilities. There are some dummy nodes added for testing as well.
>
> Hey Atish!
>
> I was fiddling with dumping the virt machine dtb again today to check
> some dt-binding changes I was making for the isa string would play
> nicely with the virt machine & I noticed that this patch has 
> introduced
> a new validation failure:
>
> ./build/qemu-system-riscv64 -nographic -machine virt,dumpdtb=qemu.dtb
>
> dt-validate -p 
> ../linux/Documentation/devicetree/bindings/processed-schema.json 
> qemu.dtb
> /home/conor/stuff/qemu/qemu.dtb: soc: pmu: 
> {'riscv,event-to-mhpmcounters': [[1, 1, 524281, 2, 2, 524284, 65561, 
> 65561, 524280, 65563, 65563, 524280, 65569, 65569, 524280, 0, 0, 0, 
> 0, 0]], 'compatible': ['riscv,pmu']} should not be valid under 
> {'type': 'object'}
>   From schema: 
> /home/conor/.local/lib/python3.10/site-packages/dtschema/schemas/simple-bus.yaml
>
> I assume this is the aforementioned "dummy" node & you have no 
> intention
> of creating a binding for this?
>

 It is a dummy node from Linux kernel perspective. OpenSbi use this
 node to figure out the hpmcounter mappings.
>>>
>>> Aye, but should it not have a binding anyway, since they're not
>>> meant to be linux specific?
>>>
>> It is documented in OpenSBI.
>> https://github.com/riscv-software-src/opensbi/blob/master/docs/pmu_support.md
>>
>> Are you suggesting that any non-Linux specific DT nodes should be part
>> of Linux DT binding as well ?
>
> I thought the point was that they were *not* meant to be linux specific,
> just happening to be housed there.
>

 I'm not sure if there's an official policy on where DT nodes should be
 specified, but it looks like Samuel's opinion is that they should live
 in the Linux kernel, whether they're used there or not [1].

 [1] http://lists.infradead.org/pipermail/opensbi/2022-October/003522.html
>>>
>>> Yah, that was also my understanding. See also U-Boot moving to unify
>>> their custom bindings into the linux repo:
>>> https://lore.kernel.org/linux-devicetree/20220930001410.2802843-1-...@chromium.org/
>>>
>>
>> This adds the U-Boot specific DT properties to the dts schema itself,
>> not Linux kernel DT bindings.
> 
> Yeah, sorry. I muddled things up a little there. My point was that they
> are trying to get to a stage where dt-validate and those tools work for
> them too. I'm not sure were I said "linux repo" rather than "dt-schema
> repo" when I double checked the file paths in the link before pasting it
> to make sure it was the dt-schema one.. I blame it being early.
> 
>> I am not opposed to adding PMU DT bindings to Linux but there should
>> be a clear policy on this.
>> What about OpenSBI domain DT bindings ?
>> If every other DT based open source project starts adding their DT
>> binding to the Linux kernel, that may go downhill pretty soon.

Rob, perhaps you can be a source of clarity here :) My early morning
muddling didn't help things.


> Maybe I am misunderstanding, but I had thought the goal was to get to
> user-independent bindings. Rob and Krzysztof certainly labour the point
> that the bindings should not reflect how one operating system's drivers
> would like to see them & u-boot or FreeBSD using a property is grounds
> for it not being removed from the bindings in the linux tree.
> 
> I'll go and actually ask Rob.

I did go & ask Rob, to which he said "I'll apply it even if no driver."

Do you want to whip up a binding, or shall I?

Re: [PATCH v2 01/11] dirtylimit: Fix overflow when computing MB

2022-11-29 Thread Peter Xu

On Mon, Nov 21, 2022 at 11:26:33AM -0500, huang...@chinatelecom.cn 
wrote:
> From: Hyman Huang(黄勇) 
> 
> overity points out a overflow problem when computing MB,
> dirty_ring_size and TARGET_PAGE_SIZE are both 32 bits,
> multiplication will be done as a 32-bit operation, which
> could overflow. Simplify the formula.
> 
> Meanwhile, fix spelling mistake of variable name.
> 
> Reported-by: Peter Maydell 
> Signed-off-by: Peter Maydell 
> Signed-off-by: Richard Henderson 
> Signed-off-by: Hyman Huang(黄勇) 

Reviewed-by: Peter Xu 

-- 
Peter Xu

Re: [PATCH v2 02/11] softmmu/dirtylimit: Add parameter check for hmp "set_vcpu_dirty_limit"

2022-11-29 Thread Peter Xu

On Mon, Nov 21, 2022 at 11:26:34AM -0500, huang...@chinatelecom.cn wrote:
> From: Hyman Huang(黄勇) 
> 
> dirty_rate paraemter of hmp command "set_vcpu_dirty_limit" is invalid
> if less than 0, so add parameter check for it.
> 
> Signed-off-by: Hyman Huang(黄勇) 

Reviewed-by: Peter Xu 

-- 
Peter Xu

Re: [PATCH v2 07/11] migration: Implement dirty-limit convergence algo

2022-11-29 Thread Peter Xu

On Mon, Nov 21, 2022 at 11:26:39AM -0500, huang...@chinatelecom.cn wrote:
> diff --git a/migration/migration.c b/migration/migration.c
> index 86950a1..096b61a 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -240,6 +240,7 @@ void migration_cancel(const Error *error)
>  if (error) {
>  migrate_set_error(current_migration, error);
>  }
> +qmp_cancel_vcpu_dirty_limit(false, -1, NULL);

Disable it only if migrate_dirty_limit() is true?  It seems okay if the
admin wants to use dirtylimit separately from migration.

>  migrate_fd_cancel(current_migration);
>  }

[...]

> @@ -1148,22 +1175,31 @@ static void migration_trigger_throttle(RAMState *rs)
>  uint64_t bytes_dirty_period = rs->num_dirty_pages_period * 
> TARGET_PAGE_SIZE;
>  uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
>  
> -/* During block migration the auto-converge logic incorrectly detects
> - * that ram migration makes no progress. Avoid this by disabling the
> - * throttling logic during the bulk phase of block migration. */
> -if (migrate_auto_converge() && !blk_mig_bulk_active()) {
> -/* The following detection logic can be refined later. For now:
> -   Check to see if the ratio between dirtied bytes and the approx.
> -   amount of bytes that just got transferred since the last time
> -   we were in this routine reaches the threshold. If that happens
> -   twice, start or increase throttling. */
> -
> -if ((bytes_dirty_period > bytes_dirty_threshold) &&
> -(++rs->dirty_rate_high_cnt >= 2)) {
> +/*
> + * The following detection logic can be refined later. For now:
> + * Check to see if the ratio between dirtied bytes and the approx.
> + * amount of bytes that just got transferred since the last time
> + * we were in this routine reaches the threshold. If that happens
> + * twice, start or increase throttling.
> + */
> +
> +if ((bytes_dirty_period > bytes_dirty_threshold) &&
> +(++rs->dirty_rate_high_cnt >= 2)) {
> +rs->dirty_rate_high_cnt = 0;
> +/*
> + * During block migration the auto-converge logic incorrectly detects
> + * that ram migration makes no progress. Avoid this by disabling the
> + * throttling logic during the bulk phase of block migration
> + */
> +
> +if (migrate_auto_converge() && !blk_mig_bulk_active()) {

Does dirtylimit cap needs to check blk_mig_bulk_active() too?  I assume
that check was used to ignore the bulk block migration phase where major
bandwidth will be consumed by block migrations so the measured bandwidth is
not accurate.  IIUC it applies to dirtylimit too.

>  trace_migration_throttle();
> -rs->dirty_rate_high_cnt = 0;
>  mig_throttle_guest_down(bytes_dirty_period,
>  bytes_dirty_threshold);
> +} else if (migrate_dirty_limit() &&
> +   kvm_dirty_ring_enabled() &&
> +   migration_is_active(s)) {

Is "kvm_dirty_ring_enabled()" and "migration_is_active(s)" check helpful?
Can we only rely on migrate_dirty_limit() alone?

-- 
Peter Xu

Re: [PATCH v3 4/7] hw/virtio: ensure a valid host_feature set for virtio-user-gpio

2022-11-29 Thread Alex Bennée

Stefan Hajnoczi  writes:

> Hi Alex,
> I'm waiting for a v4 or a confirmation that you've retested and I can
> just drop this patch.

I've re-ordered and I'll post the up to date series with the dropped
patch tomorrow. I was hoping for r-b's for the other patches.

>
> Thanks!
>
> Stefan

-- 
Alex Bennée

Re: [PATCH v2 04/11] qapi/migration: Introduce x-vcpu-dirty-limit-period parameter

2022-11-29 Thread Peter Xu

On Mon, Nov 21, 2022 at 11:26:36AM -0500, huang...@chinatelecom.cn wrote:
> From: Hyman Huang(黄勇) 
> 
> Introduce "x-vcpu-dirty-limit-period" migration experimental
> parameter, which is in the range of 1 to 1000ms and used to
> make dirtyrate calculation period configurable.
> 
> Signed-off-by: Hyman Huang(黄勇) 
> ---
>  migration/migration.c | 26 ++
>  monitor/hmp-cmds.c|  8 
>  qapi/migration.json   | 34 +++---
>  3 files changed, 61 insertions(+), 7 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index 739bb68..701267c 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -116,6 +116,8 @@
>  #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS5
>  #define DEFAULT_MIGRATE_ANNOUNCE_STEP100
>  
> +#define DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD 500 /* ms */
> +
>  static NotifierList migration_state_notifiers =
>  NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
>  
> @@ -963,6 +965,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
> **errp)
> s->parameters.block_bitmap_mapping);
>  }
>  
> +params->has_x_vcpu_dirty_limit_period = true;
> +params->x_vcpu_dirty_limit_period = 
> s->parameters.x_vcpu_dirty_limit_period;
> +
>  return params;
>  }
>  
> @@ -1564,6 +1569,15 @@ static bool migrate_params_check(MigrationParameters 
> *params, Error **errp)
>  }
>  #endif
>  
> +if (params->has_x_vcpu_dirty_limit_period &&
> +(params->x_vcpu_dirty_limit_period < 1 ||
> + params->x_vcpu_dirty_limit_period > 1000)) {
> +error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
> +   "x_vcpu_dirty_limit_period",
> +   "is invalid, it must be in the range of 1 to 1000 ms");
> +return false;
> +}
> +
>  return true;
>  }
>  
> @@ -1663,6 +1677,10 @@ static void 
> migrate_params_test_apply(MigrateSetParameters *params,
>  dest->has_block_bitmap_mapping = true;
>  dest->block_bitmap_mapping = params->block_bitmap_mapping;
>  }
> +
> +if (params->has_x_vcpu_dirty_limit_period) {
> +dest->x_vcpu_dirty_limit_period = params->x_vcpu_dirty_limit_period;
> +}
>  }
>  
>  static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
> @@ -1785,6 +1803,10 @@ static void migrate_params_apply(MigrateSetParameters 
> *params, Error **errp)
>  QAPI_CLONE(BitmapMigrationNodeAliasList,
> params->block_bitmap_mapping);
>  }
> +if (params->has_x_vcpu_dirty_limit_period) {
> +s->parameters.x_vcpu_dirty_limit_period =
> +params->x_vcpu_dirty_limit_period;
> +}
>  }
>  
>  void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
> @@ -4386,6 +4408,9 @@ static Property migration_properties[] = {
>  DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds),
>  DEFINE_PROP_STRING("tls-hostname", MigrationState, 
> parameters.tls_hostname),
>  DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz),
> +DEFINE_PROP_UINT64("x-vcpu-dirty-limit-period", MigrationState,
> +   parameters.x_vcpu_dirty_limit_period,
> +   DEFAULT_MIGRATE_VCPU_DIRTY_LIMIT_PERIOD),
>  
>  /* Migration capabilities */
>  DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
> @@ -4477,6 +4502,7 @@ static void migration_instance_init(Object *obj)
>  params->has_tls_creds = true;
>  params->has_tls_hostname = true;
>  params->has_tls_authz = true;
> +params->has_x_vcpu_dirty_limit_period = true;
>  
>  qemu_sem_init(>postcopy_pause_sem, 0);
>  qemu_sem_init(>postcopy_pause_rp_sem, 0);
> diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
> index 01b789a..a3170ca 100644
> --- a/monitor/hmp-cmds.c
> +++ b/monitor/hmp-cmds.c
> @@ -513,6 +513,10 @@ void hmp_info_migrate_parameters(Monitor *mon, const 
> QDict *qdict)
>  }
>  }
>  }
> +
> +monitor_printf(mon, "%s: %" PRIu64 " ms\n",
> +
> MigrationParameter_str(MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD),
> +params->x_vcpu_dirty_limit_period);
>  }
>  
>  qapi_free_MigrationParameters(params);
> @@ -1332,6 +1336,10 @@ void hmp_migrate_set_parameter(Monitor *mon, const 
> QDict *qdict)
>  error_setg(, "The block-bitmap-mapping parameter can only be set 
> "
> "through QMP");
>  break;
> +case MIGRATION_PARAMETER_X_VCPU_DIRTY_LIMIT_PERIOD:
> +p->has_x_vcpu_dirty_limit_period = true;
> +visit_type_size(v, param, >x_vcpu_dirty_limit_period, );
> +break;
>  default:
>  assert(0);
>  }
> diff --git a/qapi/migration.json b/qapi/migration.json
> index 88ecf86..5175779 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -776,8 +776,13 @@
>  #block

Re: [PATCH v2 03/11] kvm-all: Do not allow reap vcpu dirty ring buffer if not ready

2022-11-29 Thread Peter Xu

Hi, Yong,

On Mon, Nov 21, 2022 at 11:26:35AM -0500, huang...@chinatelecom.cn wrote:
> From: Hyman Huang(黄勇) 
> 
> When tested large vcpu size vm with dirtylimit feature, Qemu crashed
> due to the assertion in kvm_dirty_ring_reap_one, which assert that
> vcpu's kvm_dirty_gfns has been allocated and not NULL.
> 
> Because dirty ring reaper thread races with Qemu main thread, reaper
> may reap vcpu's dirty ring buffer when main thread doesn't complete
> vcpu instantiation. So add the waiting logic in reaper thread and
> start to reap until vcpu instantiation is completed.
> 
> Signed-off-by: Hyman Huang(黄勇) 
> ---
>  accel/kvm/kvm-all.c | 36 
>  1 file changed, 36 insertions(+)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index f99b0be..9457715 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -1401,6 +1401,35 @@ out:
>  kvm_slots_unlock();
>  }
>  
> +/*
> + * test if dirty ring has been initialized by checking if vcpu
> + * has been initialized and gfns was allocated correspondlingly.
> + * return true if dirty ring has been initialized, false otherwise.
> + */
> +static bool kvm_vcpu_dirty_ring_initialized(void)
> +{
> +CPUState *cpu;
> +MachineState *ms = MACHINE(qdev_get_machine());
> +int ncpus = ms->smp.cpus;
> +
> +/*
> + * assume vcpu has not been initilaized if generation
> + * id less than number of vcpu
> + */
> +if (ncpus > cpu_list_generation_id_get()) {
> +return false;
> +}
> +
> +CPU_FOREACH(cpu) {
> +if (!cpu->kvm_dirty_gfns) {
> +return false;
> +}
> +}
> +
> +return true;
> +}
> +
> +
>  static void *kvm_dirty_ring_reaper_thread(void *data)
>  {
>  KVMState *s = data;
> @@ -1410,6 +1439,13 @@ static void *kvm_dirty_ring_reaper_thread(void *data)
>  
>  trace_kvm_dirty_ring_reaper("init");
>  
> +retry:
> +/* don't allow reaping dirty ring if ring buffer hasn't been mapped */
> +if (!kvm_vcpu_dirty_ring_initialized()) {
> +sleep(1);

The sleep here is probably not necessary.  Could you instead have a look at
the other much simpler patch?  Here:

https://lore.kernel.org/qemu-devel/20220927154653.77296-1-pet...@redhat.com/

> +goto retry;
> +}
> +
>  while (true) {
>  r->reaper_state = KVM_DIRTY_RING_REAPER_WAIT;
>  trace_kvm_dirty_ring_reaper("wait");
> -- 
> 1.8.3.1
> 
> 

-- 
Peter Xu

Re: [PATCH qemu.git v2 9/9] hw/timer/imx_epit: fix compare timer handling

2022-11-29 Thread Axel Heider


Peter,


If you're correcting behaviour of the timer use here,
you should start by fixing the way the timers are currently
created with PTIMER_POLICY_LEGACY. That setting is basically
"bug-for-bug-compatibility with very old QEMU, for devices
where nobody really knows what the hardware behaviour should
be". Where we do know what the hardware's supposed to do and
we have some way of testing we're not breaking guest code,
the right thing is to set the correct policy flags for
the desired behaviour. These are documented in a comment
near the top of include/hw/ptimer.h.


I would prefer to postpone changing PTIMER_POLICY_LEGACY to a
separate patchset, which is on top of the current one, as this
seems not to be an issue at the moment. Fixing the general isses
on access and ensure the flags are correct seem more pressing,
and this seem unrelated to the timer policy.



It is modestly harmful because the sequence
counter = ptimer_get_count(s->timer_reload);
...
ptimer_set_count(s->timer_cmp, counter);

will cause the counter to lose or gain time. This happens because
when you call "get" the ptimer code will look at the current exact
time in nanoseconds and tell you the counter value at that point.
That is probably somewhere in the middle of a timer-clock period
(which runs at whatever frequency you tell the ptimer to use):
for argument's sake, suppose the timer-clock counts every 1000ns.
Suppose at the point of the 'get' the next tick will be in 300ns time.
When you do a "set" that is assumed to be the result of a guest
register write of some kind, and will effectively start a new
timer-clock period. This means the next tick will not be for
a full 1000ns, and we just lost 300ns (or gained 700ns perhaps).
So it's better to avoid this kind of "get-and-then-set" code.


I see you point. The "get-and-then-set" was already in the code, I
did not really change this. I have tried to find a better way to
implement this, but could not come up with something so far. Any
suggestions here that is non trivial? Othereise I would prefer to
look into this in a new patch-set, together with replacing the
PTIMER_POLICY_LEGACY.


Axel

QEMU 7.2 non-deterministic migration-test CI failure

2022-11-29 Thread Stefan Hajnoczi

Hi,
The following cross-i386-tci migration-test failure was triggered by CI:

>>> MALLOC_PERTURB_=133 
>>> G_TEST_DBUS_DAEMON=/builds/qemu-project/qemu/tests/dbus-vmstate-daemon.sh 
>>> QTEST_QEMU_BINARY=./qemu-system-i386 
>>> /builds/qemu-project/qemu/build/tests/qtest/migration-test --tap -k
― ✀  ―
stderr:
Could not access KVM kernel module: No such file or directory
Could not access KVM kernel module: No such file or directory
Could not access KVM kernel module: No such file or directory
Could not access KVM kernel module: No such file or directory
qemu-system-i386: Unknown savevm section type 126
../tests/qtest/libqtest.c:188: kill_qemu() detected QEMU death from signal 11 
(Segmentation fault) (core dumped)
TAP parsing error: Too few tests run (expected 14, got 2)
(test program exited with status code -6)
――

https://gitlab.com/qemu-project/qemu/-/jobs/3397205431

When I retried the test succeeded. I haven't managed to reproduce it
locally yet either.

Any ideas?

Stefan


signature.asc
Description: PGP signature

Re: [PATCH for-7.2] target/arm: Set TCGCPUOps.restore_state_to_opc for v7m

2022-11-29 Thread Evgeny Ermakov

Signed-off-by: Evgeny Ermakov 
---

Re: [PATCH v3 4/7] hw/virtio: ensure a valid host_feature set for virtio-user-gpio

2022-11-29 Thread Michael S. Tsirkin

On Tue, Nov 29, 2022 at 04:01:25PM -0500, Stefan Hajnoczi wrote:
> Hi Alex,
> I'm waiting for a v4 or a confirmation that you've retested and I can
> just drop this patch.
> 
> Thanks!
> 
> Stefan

Note things need to be reordered, patch 2 should come last.
So I'd really like to see v4 if possible.

-- 
MST

Re: [PATCH v3 4/7] hw/virtio: ensure a valid host_feature set for virtio-user-gpio

2022-11-29 Thread Stefan Hajnoczi

Hi Alex,
I'm waiting for a v4 or a confirmation that you've retested and I can
just drop this patch.

Thanks!

Stefan

Re: [PATCH for-7.2] target/arm: Set TCGCPUOps.restore_state_to_opc for v7m

2022-11-29 Thread Stefan Hajnoczi

Waiting for Evgeny's Signed-off-by. It seems a -rc4 tag will be
required anyway for vhost fixes so I'll wait.

Stefan

Re: [PATCH v2 5/8] hw/arm/virt: Fix devicetree warnings about the GPIO node

2022-11-29 Thread Rob Herring

On Tue, Sep 27, 2022 at 6:25 AM Peter Maydell  wrote:
>
> On Tue, 27 Sept 2022 at 11:12, Jean-Philippe Brucker
>  wrote:
> >
> > Since the pl061 device can be used as interrupt controller, its node
> > should contain "interrupt-controller" and "#interrupt-cells" properties.
>
> It *can* be, but this PL061 is *not* an interrupt controller.
> I don't see any reason why we should claim so in the DT.

Taking another look, it is an interrupt controller. The GPIOs are
connected to the 'gpio-keys' node which is interrupt based (there's a
polled version too). That binding happens to be pretty lax and allows
the GPIO to be specified either with 'gpios' or 'interrupts' property.
The Linux PL061 driver happens to work only because it always
registers an interrupt controller regardless of having
"interrupt-controller" and "#interrupt-cells" properties or not.

Rob

Re: [PATCH for-7.2] block-backend: avoid bdrv_unregister_buf() NULL pointer deref

2022-11-29 Thread Stefan Hajnoczi

On Tue, 22 Nov 2022 at 03:22, Kevin Wolf  wrote:
>
> Am 21.11.2022 um 22:19 hat Stefan Hajnoczi geschrieben:
> > bdrv_*() APIs expect a valid BlockDriverState. Calling them with bs=NULL
> > leads to undefined behavior.
> >
> > Jonathan Cameron reported this following NULL pointer dereference when a
> > VM with a virtio-blk device and a memory-backend-file object is
> > terminated:
> > 1. qemu_cleanup() closes all drives, setting blk->root to NULL
> > 2. qemu_cleanup() calls user_creatable_cleanup(), which results in a RAM
> >block notifier callback because the memory-backend-file is destroyed.
> > 3. blk_unregister_buf() is called by virtio-blk's BlockRamRegistrar
> >notifier callback and undefined behavior occurs.
> >
> > Fixes: baf422684d73 ("virtio-blk: use BDRV_REQ_REGISTERED_BUF optimization 
> > hint")
> > Co-authored-by: Jonathan Cameron 
> > Signed-off-by: Stefan Hajnoczi 
>
> Reviewed-by: Kevin Wolf 
>
> This raises some questions, though. What happens if the graph isn't
> static between creation and deletion of the device? Do we need to do
> something with registered buffers when a node is attached to or detached
> from an existing device?

I think you are right. Graph changes need to be handled. Right now they aren't.

Stefan

Re: [PATCH v1 1/1] migration: Fix yank on postcopy multifd crashing guest after migration

2022-11-29 Thread Peter Xu

On Tue, Nov 29, 2022 at 05:28:26PM -0300, Leonardo Bras Soares Passos wrote:
> Hello Peter,

Leo,

> 
> On Thu, Nov 24, 2022 at 1:04 PM Peter Xu  wrote:
> >
> > On Wed, Nov 09, 2022 at 02:56:29AM -0300, Leonardo Bras wrote:
> > > diff --git a/migration/savevm.c b/migration/savevm.c
> > > index a0cdb714f7..250caff7f4 100644
> > > --- a/migration/savevm.c
> > > +++ b/migration/savevm.c
> > > @@ -1889,6 +1889,8 @@ static void *postcopy_ram_listen_thread(void 
> > > *opaque)
> > >  exit(EXIT_FAILURE);
> > >  }
> > >
> > > +migration_load_cleanup();
> >
> > It's a bit weird to call multifd-load-clean in a listen phase..
> 
> I agree.
> 
> >
> > How about moving it right above
> > trace_process_incoming_migration_co_postcopy_end_main()?  Then the new
> > helper can also be static.
> 
> Seems a nice Idea to have this function to be static.
> 
> We have to guarantee this is run after the migration finished, but
> before migration_incoming_state_destroy().

IIUC it doesn't need to be when migration finished.  It should be fine as
long as we finished precopy phase, and that's what the migration coroutine
does, iiuc.  The thing is postcopy doesn't use multifd at all, so logically
it can be released before postcopy starts.

Actually, IMHO it'll be safer to do it like that, just to make sure we
won't accidentally receive multifd pages _after_ postcopy starts, because
that'll be another more severe and hard to debug issue since the guest can
see partial copied pages from multifd recv channels.

> 
> You suggested calling it right above of
> trace_process_incoming_migration_co_postcopy_end_main(), which git
> grep pointed me to an if clause in process_incoming_migration_co().
> If I got the location correctly, it would not help: this coroutine is
> ran just after the VM went to the target host, and not when the
> migration finished.
> 
> If we are using multifd channels, this will break the migration with
> segmentation fault (SIGSEGV), since the channels have not finished
> sending yet.

If this happens, then I had a feeling that there's something else that
needs syncs.  As I discussed above, we should make sure multifd pages all
landed before we start vcpu threads.

Said that, now I think I'm not against your original proposal to fix this
immediate crash.  However I am still wondering whether we really should
disable multifd with postcopy, as there seem to be still a few missing
pieces even to enable multifd during precopy-only.

Thanks,

-- 
Peter Xu

Re: Plugin Memory Callback Debugging

2022-11-29 Thread Aaron Lindsay via

On Nov 22 10:57, Aaron Lindsay wrote:
> On Nov 21 18:22, Richard Henderson wrote:
> > On 11/21/22 13:51, Alex Bennée wrote:
> > > 
> > > Aaron Lindsay  writes:
> > > 
> > > > On Nov 15 22:36, Alex Bennée wrote:
> > > > > Aaron Lindsay  writes:
> > > > > > I believe the code *should* always reset `cpu->plugin_mem_cbs` to 
> > > > > > NULL at the
> > > > > > end of an instruction/TB's execution, so its not exactly clear to 
> > > > > > me how this
> > > > > > is occurring. However, I suspect it may be relevant that we are 
> > > > > > calling
> > > > > > `free_dyn_cb_arr()` because my plugin called `qemu_plugin_reset()`.
> > > > > 
> > > > > Hmm I'm going to have to remind myself about how this bit works.
> > > > 
> > > > When is it expected that cpu->plugin_mem_cbs is reset to NULL if it is
> > > > set for an instruction? Is it guaranteed it is reset by the end of the
> > > > tb?
> > > 
> > > It should be by the end of the instruction. See
> > > inject_mem_disable_helper() which inserts TCG code to disable the
> > > helpers. We also have plugin_gen_disable_mem_helpers() which should
> > > catch every exit out of a block (exit_tb, goto_tb, goto_ptr). That is
> > > why qemu_plugin_disable_mem_helpers() is only really concerned about
> > > when we longjmp out of the loop.
> > > 
> > > > If I were to put an assertion in cpu_tb_exec() just after the call
> > > > to tcg_qemu_tb_exec(), should cpu->plugin_mem_cbs always be NULL
> > > > there?
> > > 
> > > Yes I think so.
> > 
> > Indeed.
> 
> Well, the good news is that if this is an assumption we're relying on, it is
> now trivial to reproduce the problem!
> 
> Compile some simple program (doesn't really matter, the issue gets triggered
> early):
> 
> $ echo "int main() { return 0; }" > simple.c && gcc simple.c -o simple
> 
> Make this change to cpu_tb_exec():
> 
> > diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
> > index 356fe348de..50a010327d 100644
> > --- a/accel/tcg/cpu-exec.c
> > +++ b/accel/tcg/cpu-exec.c
> > @@ -436,6 +436,9 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int 
> > *tb_exit)
> > 
> >  qemu_thread_jit_execute();
> >  ret = tcg_qemu_tb_exec(env, tb_ptr);
> > +if (cpu->plugin_mem_cbs != NULL) {
> > +g_assert_not_reached();
> > +}
> >  cpu->can_do_io = 1;
> >  /*
> >   * TODO: Delay swapping back to the read-write region of the TB
> 
> And run:
> 
> $ ./build/qemu-aarch64 -plugin contrib/plugins/libexeclog.so -d plugin 
> ./simple
> 
> You should fairly quickly see something like:
> 
> > [snip]
> > 0, 0x5502814d04, 0xb482, ""
> > 0, 0x5502814d08, 0xf9400440, "", load, 0x5502844ed0
> > 0, 0x5502814d0c, 0xf1001c1f, ""
> > **
> > ERROR:../accel/tcg/cpu-exec.c:440:cpu_tb_exec: code should not be reached
> > Bail out! ERROR:../accel/tcg/cpu-exec.c:440:cpu_tb_exec: code should not be 
> > reached
> 
> When digging through my other failure in `rr` I saw the cpu->plugin_mem_cbs
> pointer changing from one non-null value to another (which also seems to
> indicate it is not being cleared between instructions).
> 
> Does this hint that there are cases where reset cpu->plugin_mem_cbs to NULL is
> getting optimized away, but not the code to set it in the first place?

Is there anyone who could help take a look at this from the code gen
perspective?

-Aaron

[PATCH for-7.2] target/arm: Set TCGCPUOps.restore_state_to_opc for v7m

2022-11-29 Thread Richard Henderson

This setting got missed, breaking v7m.

Fixes: 56c6c98df85c ("target/arm: Convert to tcg_ops restore_state_to_opc")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1347
Signed-off-by: Richard Henderson 
---

The patch is unchanged from the correct suggestion in the issue.
If Evgeny would like to reply with his s-o-b, this patch could be
commited with updated Author.


r~

---
 target/arm/internals.h | 4 
 target/arm/cpu.c   | 6 +++---
 target/arm/cpu_tcg.c   | 1 +
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index d9121d9ff8..161e42d50f 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -185,6 +185,10 @@ static inline int r14_bank_number(int mode)
 void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu);
 void arm_translate_init(void);
 
+void arm_restore_state_to_opc(CPUState *cs,
+  const TranslationBlock *tb,
+  const uint64_t *data);
+
 #ifdef CONFIG_TCG
 void arm_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb);
 #endif /* CONFIG_TCG */
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index a021df9e9e..38d066c294 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -91,9 +91,9 @@ void arm_cpu_synchronize_from_tb(CPUState *cs,
 }
 }
 
-static void arm_restore_state_to_opc(CPUState *cs,
- const TranslationBlock *tb,
- const uint64_t *data)
+void arm_restore_state_to_opc(CPUState *cs,
+  const TranslationBlock *tb,
+  const uint64_t *data)
 {
 CPUARMState *env = cs->env_ptr;
 
diff --git a/target/arm/cpu_tcg.c b/target/arm/cpu_tcg.c
index 60ff539fa1..9a2cef7d05 100644
--- a/target/arm/cpu_tcg.c
+++ b/target/arm/cpu_tcg.c
@@ -1035,6 +1035,7 @@ static const struct TCGCPUOps arm_v7m_tcg_ops = {
 .initialize = arm_translate_init,
 .synchronize_from_tb = arm_cpu_synchronize_from_tb,
 .debug_excp_handler = arm_debug_excp_handler,
+.restore_state_to_opc = arm_restore_state_to_opc,
 
 #ifdef CONFIG_USER_ONLY
 .record_sigsegv = arm_cpu_record_sigsegv,
-- 
2.34.1

Re: [PATCH v1 1/1] migration: Fix yank on postcopy multifd crashing guest after migration

2022-11-29 Thread Leonardo Bras Soares Passos

Hello Peter,

On Thu, Nov 24, 2022 at 1:04 PM Peter Xu  wrote:
>
> On Wed, Nov 09, 2022 at 02:56:29AM -0300, Leonardo Bras wrote:
> > diff --git a/migration/savevm.c b/migration/savevm.c
> > index a0cdb714f7..250caff7f4 100644
> > --- a/migration/savevm.c
> > +++ b/migration/savevm.c
> > @@ -1889,6 +1889,8 @@ static void *postcopy_ram_listen_thread(void *opaque)
> >  exit(EXIT_FAILURE);
> >  }
> >
> > +migration_load_cleanup();
>
> It's a bit weird to call multifd-load-clean in a listen phase..

I agree.

>
> How about moving it right above
> trace_process_incoming_migration_co_postcopy_end_main()?  Then the new
> helper can also be static.

Seems a nice Idea to have this function to be static.

We have to guarantee this is run after the migration finished, but
before migration_incoming_state_destroy().

You suggested calling it right above of
trace_process_incoming_migration_co_postcopy_end_main(), which git
grep pointed me to an if clause in process_incoming_migration_co().
If I got the location correctly, it would not help: this coroutine is
ran just after the VM went to the target host, and not when the
migration finished.

If we are using multifd channels, this will break the migration with
segmentation fault (SIGSEGV), since the channels have not finished
sending yet.

Best regards,
Leo

>
> > +
> >  migrate_set_state(>state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
> > MIGRATION_STATUS_COMPLETED);
> >  /*
> > --
> > 2.38.1
> >
>
> --
> Peter Xu
>

Re: [PATCH 10/12] pci: Inline do_pcie_aer_inject_error() into its only caller

2022-11-29 Thread Dr. David Alan Gilbert

* Markus Armbruster (arm...@redhat.com) wrote:
> Signed-off-by: Markus Armbruster 

Yeh that seems to have simplified out;

Reviewed-by: Dr. David Alan Gilbert 

> ---
>  hw/pci/pci-hmp-cmds.c | 41 ++---
>  1 file changed, 6 insertions(+), 35 deletions(-)
> 
> diff --git a/hw/pci/pci-hmp-cmds.c b/hw/pci/pci-hmp-cmds.c
> index 393ab4214a..b03badb1e6 100644
> --- a/hw/pci/pci-hmp-cmds.c
> +++ b/hw/pci/pci-hmp-cmds.c
> @@ -159,21 +159,7 @@ void pcibus_dev_print(Monitor *mon, DeviceState *dev, 
> int indent)
>  }
>  }
>  
> -typedef struct PCIEErrorDetails {
> -const char *id;
> -const char *root_bus;
> -int bus;
> -int devfn;
> -} PCIEErrorDetails;
> -
> -/*
> - * Inject an error described by @qdict.
> - * On success, set @details to show where error was sent.
> - * Return negative errno if injection failed and a message was emitted.
> - */
> -static int do_pcie_aer_inject_error(Monitor *mon,
> -const QDict *qdict,
> -PCIEErrorDetails *details)
> +void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
>  {
>  const char *id = qdict_get_str(qdict, "id");
>  const char *error_name;
> @@ -189,12 +175,12 @@ static int do_pcie_aer_inject_error(Monitor *mon,
>  monitor_printf(mon,
> "id or pci device path is invalid or device not "
> "found. %s\n", id);
> -return ret;
> +return;
>  }
>  if (!pci_is_express(dev)) {
>  monitor_printf(mon, "the device doesn't support pci express. %s\n",
> id);
> -return -ENOSYS;
> +return;
>  }
>  
>  error_name = qdict_get_str(qdict, "error_status");
> @@ -203,7 +189,7 @@ static int do_pcie_aer_inject_error(Monitor *mon,
>  || num > UINT32_MAX) {
>  monitor_printf(mon, "invalid error status value. \"%s\"",
> error_name);
> -return -EINVAL;
> +return;
>  }
>  error_status = num;
>  correctable = qdict_get_try_bool(qdict, "correctable", false);
> @@ -239,25 +225,10 @@ static int do_pcie_aer_inject_error(Monitor *mon,
>  if (ret < 0) {
>  monitor_printf(mon, "failed to inject error: %s\n",
> strerror(-ret));
> -return ret;
> -}
> -details->id = id;
> -details->root_bus = pci_root_bus_path(dev);
> -details->bus = pci_dev_bus_num(dev);
> -details->devfn = dev->devfn;
> -
> -return 0;
> -}
> -
> -void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
> -{
> -PCIEErrorDetails data;
> -
> -if (do_pcie_aer_inject_error(mon, qdict, ) < 0) {
>  return;
>  }
>  
>  monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
> -   data.id, data.root_bus, data.bus,
> -   PCI_SLOT(data.devfn), PCI_FUNC(data.devfn));
> +   id, pci_root_bus_path(dev), pci_dev_bus_num(dev),
> +   PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
>  }
> -- 
> 2.37.3
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [PATCH 12/12] pci: Improve do_pcie_aer_inject_error()'s error messages

2022-11-29 Thread Dr. David Alan Gilbert

* Markus Armbruster (arm...@redhat.com) wrote:
> Signed-off-by: Markus Armbruster 

Reviewed-by: Dr. David Alan Gilbert 

> ---
>  hw/pci/pci-hmp-cmds.c | 30 +++---
>  1 file changed, 15 insertions(+), 15 deletions(-)
> 
> diff --git a/hw/pci/pci-hmp-cmds.c b/hw/pci/pci-hmp-cmds.c
> index 0807a206e4..279851bfe6 100644
> --- a/hw/pci/pci-hmp-cmds.c
> +++ b/hw/pci/pci-hmp-cmds.c
> @@ -161,6 +161,7 @@ void pcibus_dev_print(Monitor *mon, DeviceState *dev, int 
> indent)
>  
>  void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
>  {
> +Error *err = NULL;
>  const char *id = qdict_get_str(qdict, "id");
>  const char *error_name;
>  uint32_t error_status;
> @@ -171,25 +172,21 @@ void hmp_pcie_aer_inject_error(Monitor *mon, const 
> QDict *qdict)
>  int ret;
>  
>  ret = pci_qdev_find_device(id, );
> -if (ret < 0) {
> -monitor_printf(mon,
> -   "id or pci device path is invalid or device not "
> -   "found. %s\n", id);
> -return;
> +if (ret == -ENODEV) {
> +error_setg(, "device '%s' not found", id);
> +goto out;
>  }
> -if (!pci_is_express(dev)) {
> -monitor_printf(mon, "the device doesn't support pci express. %s\n",
> -   id);
> -return;
> +if (ret < 0 || !pci_is_express(dev)) {
> +error_setg(, "device '%s' is not a PCIe device", id);
> +goto out;
>  }
>  
>  error_name = qdict_get_str(qdict, "error_status");
>  if (pcie_aer_parse_error_string(error_name, _status, 
> )) {
>  if (qemu_strtoul(error_name, NULL, 0, ) < 0
>  || num > UINT32_MAX) {
> -monitor_printf(mon, "invalid error status value. \"%s\"",
> -   error_name);
> -return;
> +error_setg(, "invalid error status value '%s'", error_name);
> +goto out;
>  }
>  error_status = num;
>  correctable = qdict_get_try_bool(qdict, "correctable", false);
> @@ -223,12 +220,15 @@ void hmp_pcie_aer_inject_error(Monitor *mon, const 
> QDict *qdict)
>  
>  ret = pcie_aer_inject_error(dev, _err);
>  if (ret < 0) {
> -monitor_printf(mon, "failed to inject error: %s\n",
> -   strerror(-ret));
> -return;
> +error_setg_errno(, -ret, "failed to inject error");
> +goto out;
>  }
>  
> +
>  monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
> id, pci_root_bus_path(dev), pci_dev_bus_num(dev),
> PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
> +
> +out:
> +hmp_handle_error(mon, err);
>  }
> -- 
> 2.37.3
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [PATCH v9 1/8] mm: Introduce memfd_restricted system call to create restricted user memory

2022-11-29 Thread Michael Roth

On Tue, Nov 29, 2022 at 01:06:58PM -0600, Michael Roth wrote:
> On Tue, Nov 29, 2022 at 10:06:15PM +0800, Chao Peng wrote:
> > On Mon, Nov 28, 2022 at 06:37:25PM -0600, Michael Roth wrote:
> > > On Tue, Oct 25, 2022 at 11:13:37PM +0800, Chao Peng wrote:
> > ...
> > > > +static long restrictedmem_fallocate(struct file *file, int mode,
> > > > +   loff_t offset, loff_t len)
> > > > +{
> > > > +   struct restrictedmem_data *data = file->f_mapping->private_data;
> > > > +   struct file *memfd = data->memfd;
> > > > +   int ret;
> > > > +
> > > > +   if (mode & FALLOC_FL_PUNCH_HOLE) {
> > > > +   if (!PAGE_ALIGNED(offset) || !PAGE_ALIGNED(len))
> > > > +   return -EINVAL;
> > > > +   }
> > > > +
> > > > +   restrictedmem_notifier_invalidate(data, offset, offset + len, 
> > > > true);
> > > 
> > > The KVM restrictedmem ops seem to expect pgoff_t, but here we pass
> > > loff_t. For SNP we've made this strange as part of the following patch
> > > and it seems to produce the expected behavior:
> > 
> > That's correct. Thanks.
> > 
> > > 
> > >   
> > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fmdroth%2Flinux%2Fcommit%2Fd669c7d3003ff7a7a47e73e8c3b4eeadbd2c4eb6data=05%7C01%7CMichael.Roth%40amd.com%7C0c26815eb6af4f1a243508dad23cf713%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638053456609134623%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7Csdata=kAL42bmyBB0alVwh%2FN%2BT3D%2BiVTdxxMsJ7V4TNuCTjM4%3Dreserved=0
> > > 
> > > > +   ret = memfd->f_op->fallocate(memfd, mode, offset, len);
> > > > +   restrictedmem_notifier_invalidate(data, offset, offset + len, 
> > > > false);
> > > > +   return ret;
> > > > +}
> > > > +
> > > 
> > > 
> > > 
> > > > +int restrictedmem_get_page(struct file *file, pgoff_t offset,
> > > > +  struct page **pagep, int *order)
> > > > +{
> > > > +   struct restrictedmem_data *data = file->f_mapping->private_data;
> > > > +   struct file *memfd = data->memfd;
> > > > +   struct page *page;
> > > > +   int ret;
> > > > +
> > > > +   ret = shmem_getpage(file_inode(memfd), offset, , 
> > > > SGP_WRITE);
> > > 
> > > This will result in KVM allocating pages that userspace hasn't necessary
> > > fallocate()'d. In the case of SNP we need to get the PFN so we can clean
> > > up the RMP entries when restrictedmem invalidations are issued for a GFN
> > > range.
> > 
> > Yes fallocate() is unnecessary unless someone wants to reserve some
> > space (e.g. for determination or performance purpose), this matches its
> > semantics perfectly at:
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.man7.org%2Flinux%2Fman-pages%2Fman2%2Ffallocate.2.htmldata=05%7C01%7CMichael.Roth%40amd.com%7C0c26815eb6af4f1a243508dad23cf713%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638053456609134623%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7Csdata=acBSquFG%2FHtpbcZfHDZrP2O63bu06rI0pjiPJFSJSj8%3Dreserved=0
> > 
> > > 
> > > If the guest supports lazy-acceptance however, these pages may not have
> > > been faulted in yet, and if the VMM defers actually fallocate()'ing space
> > > until the guest actually tries to issue a shared->private for that GFN
> > > (to support lazy-pinning), then there may never be a need to allocate
> > > pages for these backends.
> > > 
> > > However, the restrictedmem invalidations are for GFN ranges so there's
> > > no way to know inadvance whether it's been allocated yet or not. The
> > > xarray is one option but currently it defaults to 'private' so that
> > > doesn't help us here. It might if we introduced a 'uninitialized' state
> > > or something along that line instead of just the binary
> > > 'shared'/'private' though...
> > 
> > How about if we change the default to 'shared' as we discussed at
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Fall%2FY35gI0L8GMt9%2BOkK%40google.com%2Fdata=05%7C01%7CMichael.Roth%40amd.com%7C0c26815eb6af4f1a243508dad23cf713%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638053456609134623%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7Csdata=Q1vZWQiZ7mx12Qn5aKl4s8Ea9hNbwCJBb%2BjiA1du3Os%3Dreserved=0?
> 
> Need to look at this a bit more, but I think that could work as well.
> 
> > > 
> > > But for now we added a restrictedmem_get_page_noalloc() that uses
> > > SGP_NONE instead of SGP_WRITE to avoid accidentally allocating a bunch
> > > of memory as part of guest shutdown, and a
> > > kvm_restrictedmem_get_pfn_noalloc() variant to go along with that. But
> > > maybe a boolean param is better? Or maybe SGP_NOALLOC is the better
> > > default, and we just propagate an error to userspace if they didn't
> > > fallocate() in advance?
> > 
> > This (making

Re: [PULL 0/2] Seabios 1.16.1 20221128 patches

2022-11-29 Thread Stefan Hajnoczi

Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/7.2 for any 
user-visible changes.


signature.asc
Description: PGP signature

Re: [PATCH for-7.2] replay: Fix declaration of replay_read_next_clock

2022-11-29 Thread Stefan Hajnoczi

Merged, thanks!

Stefan

Re: [PATCH v9 1/8] mm: Introduce memfd_restricted system call to create restricted user memory

2022-11-29 Thread Michael Roth

On Tue, Nov 29, 2022 at 10:06:15PM +0800, Chao Peng wrote:
> On Mon, Nov 28, 2022 at 06:37:25PM -0600, Michael Roth wrote:
> > On Tue, Oct 25, 2022 at 11:13:37PM +0800, Chao Peng wrote:
> ...
> > > +static long restrictedmem_fallocate(struct file *file, int mode,
> > > + loff_t offset, loff_t len)
> > > +{
> > > + struct restrictedmem_data *data = file->f_mapping->private_data;
> > > + struct file *memfd = data->memfd;
> > > + int ret;
> > > +
> > > + if (mode & FALLOC_FL_PUNCH_HOLE) {
> > > + if (!PAGE_ALIGNED(offset) || !PAGE_ALIGNED(len))
> > > + return -EINVAL;
> > > + }
> > > +
> > > + restrictedmem_notifier_invalidate(data, offset, offset + len, true);
> > 
> > The KVM restrictedmem ops seem to expect pgoff_t, but here we pass
> > loff_t. For SNP we've made this strange as part of the following patch
> > and it seems to produce the expected behavior:
> 
> That's correct. Thanks.
> 
> > 
> >   
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fmdroth%2Flinux%2Fcommit%2Fd669c7d3003ff7a7a47e73e8c3b4eeadbd2c4eb6data=05%7C01%7Cmichael.roth%40amd.com%7C99e80696067a40d42f6e08dad2138556%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638053278531323330%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7Csdata=WDj4KxJjhcntBWJUGCjNmMPfZMGQkCSaAo6ElYrGgF0%3Dreserved=0
> > 
> > > + ret = memfd->f_op->fallocate(memfd, mode, offset, len);
> > > + restrictedmem_notifier_invalidate(data, offset, offset + len, false);
> > > + return ret;
> > > +}
> > > +
> > 
> > 
> > 
> > > +int restrictedmem_get_page(struct file *file, pgoff_t offset,
> > > +struct page **pagep, int *order)
> > > +{
> > > + struct restrictedmem_data *data = file->f_mapping->private_data;
> > > + struct file *memfd = data->memfd;
> > > + struct page *page;
> > > + int ret;
> > > +
> > > + ret = shmem_getpage(file_inode(memfd), offset, , SGP_WRITE);
> > 
> > This will result in KVM allocating pages that userspace hasn't necessary
> > fallocate()'d. In the case of SNP we need to get the PFN so we can clean
> > up the RMP entries when restrictedmem invalidations are issued for a GFN
> > range.
> 
> Yes fallocate() is unnecessary unless someone wants to reserve some
> space (e.g. for determination or performance purpose), this matches its
> semantics perfectly at:
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.man7.org%2Flinux%2Fman-pages%2Fman2%2Ffallocate.2.htmldata=05%7C01%7Cmichael.roth%40amd.com%7C99e80696067a40d42f6e08dad2138556%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638053278531323330%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7Csdata=67sdTY47cM1IBUG2eJCltYF5SyGOpd9%2FVxVlHUw02tU%3Dreserved=0
> 
> > 
> > If the guest supports lazy-acceptance however, these pages may not have
> > been faulted in yet, and if the VMM defers actually fallocate()'ing space
> > until the guest actually tries to issue a shared->private for that GFN
> > (to support lazy-pinning), then there may never be a need to allocate
> > pages for these backends.
> > 
> > However, the restrictedmem invalidations are for GFN ranges so there's
> > no way to know inadvance whether it's been allocated yet or not. The
> > xarray is one option but currently it defaults to 'private' so that
> > doesn't help us here. It might if we introduced a 'uninitialized' state
> > or something along that line instead of just the binary
> > 'shared'/'private' though...
> 
> How about if we change the default to 'shared' as we discussed at
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Fall%2FY35gI0L8GMt9%2BOkK%40google.com%2Fdata=05%7C01%7Cmichael.roth%40amd.com%7C99e80696067a40d42f6e08dad2138556%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638053278531323330%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7Csdata=qzWObDo7ZHW4YjuAjZ5%2B1wEwbqymgBiNM%2BYXiyUSBdI%3Dreserved=0?

Need to look at this a bit more, but I think that could work as well.

> > 
> > But for now we added a restrictedmem_get_page_noalloc() that uses
> > SGP_NONE instead of SGP_WRITE to avoid accidentally allocating a bunch
> > of memory as part of guest shutdown, and a
> > kvm_restrictedmem_get_pfn_noalloc() variant to go along with that. But
> > maybe a boolean param is better? Or maybe SGP_NOALLOC is the better
> > default, and we just propagate an error to userspace if they didn't
> > fallocate() in advance?
> 
> This (making fallocate() a hard requirement) not only complicates the
> userspace but also forces the lazy-faulting going through a long path of
> exiting to userspace. Unless we don't have other options I would not go
> this way.

Unless I'm missing something, it's already the case that userspace is
responsible for handling all the shared->private transitions in response
to

Re: [PATCH v9 1/8] mm: Introduce memfd_restricted system call to create restricted user memory

2022-11-29 Thread Vishal Annapurve

On Mon, Nov 28, 2022 at 4:37 PM Michael Roth  wrote:
>
> On Tue, Oct 25, 2022 at 11:13:37PM +0800, Chao Peng wrote:
> > From: "Kirill A. Shutemov" 
> >
> > Introduce 'memfd_restricted' system call with the ability to create
> > memory areas that are restricted from userspace access through ordinary
> > MMU operations (e.g. read/write/mmap). The memory content is expected to
> > be used through a new in-kernel interface by a third kernel module.
> >
> > memfd_restricted() is useful for scenarios where a file descriptor(fd)
> > can be used as an interface into mm but want to restrict userspace's
> > ability on the fd. Initially it is designed to provide protections for
> > KVM encrypted guest memory.
> >
> > Normally KVM uses memfd memory via mmapping the memfd into KVM userspace
> > (e.g. QEMU) and then using the mmaped virtual address to setup the
> > mapping in the KVM secondary page table (e.g. EPT). With confidential
> > computing technologies like Intel TDX, the memfd memory may be encrypted
> > with special key for special software domain (e.g. KVM guest) and is not
> > expected to be directly accessed by userspace. Precisely, userspace
> > access to such encrypted memory may lead to host crash so should be
> > prevented.
> >
> > memfd_restricted() provides semantics required for KVM guest encrypted
> > memory support that a fd created with memfd_restricted() is going to be
> > used as the source of guest memory in confidential computing environment
> > and KVM can directly interact with core-mm without the need to expose
> > the memoy content into KVM userspace.
> >
> > KVM userspace is still in charge of the lifecycle of the fd. It should
> > pass the created fd to KVM. KVM uses the new restrictedmem_get_page() to
> > obtain the physical memory page and then uses it to populate the KVM
> > secondary page table entries.
> >
> > The userspace restricted memfd can be fallocate-ed or hole-punched
> > from userspace. When these operations happen, KVM can get notified
> > through restrictedmem_notifier, it then gets chance to remove any
> > mapped entries of the range in the secondary page tables.
> >
> > memfd_restricted() itself is implemented as a shim layer on top of real
> > memory file systems (currently tmpfs). Pages in restrictedmem are marked
> > as unmovable and unevictable, this is required for current confidential
> > usage. But in future this might be changed.
> >
> > By default memfd_restricted() prevents userspace read, write and mmap.
> > By defining new bit in the 'flags', it can be extended to support other
> > restricted semantics in the future.
> >
> > The system call is currently wired up for x86 arch.
> >
> > Signed-off-by: Kirill A. Shutemov 
> > Signed-off-by: Chao Peng 
> > ---
> >  arch/x86/entry/syscalls/syscall_32.tbl |   1 +
> >  arch/x86/entry/syscalls/syscall_64.tbl |   1 +
> >  include/linux/restrictedmem.h  |  62 ++
> >  include/linux/syscalls.h   |   1 +
> >  include/uapi/asm-generic/unistd.h  |   5 +-
> >  include/uapi/linux/magic.h |   1 +
> >  kernel/sys_ni.c|   3 +
> >  mm/Kconfig |   4 +
> >  mm/Makefile|   1 +
> >  mm/restrictedmem.c | 250 +
> >  10 files changed, 328 insertions(+), 1 deletion(-)
> >  create mode 100644 include/linux/restrictedmem.h
> >  create mode 100644 mm/restrictedmem.c
> >
> > diff --git a/arch/x86/entry/syscalls/syscall_32.tbl 
> > b/arch/x86/entry/syscalls/syscall_32.tbl
> > index 320480a8db4f..dc70ba90247e 100644
> > --- a/arch/x86/entry/syscalls/syscall_32.tbl
> > +++ b/arch/x86/entry/syscalls/syscall_32.tbl
> > @@ -455,3 +455,4 @@
> >  448  i386process_mreleasesys_process_mrelease
> >  449  i386futex_waitv sys_futex_waitv
> >  450  i386set_mempolicy_home_node sys_set_mempolicy_home_node
> > +451  i386memfd_restrictedsys_memfd_restricted
> > diff --git a/arch/x86/entry/syscalls/syscall_64.tbl 
> > b/arch/x86/entry/syscalls/syscall_64.tbl
> > index c84d12608cd2..06516abc8318 100644
> > --- a/arch/x86/entry/syscalls/syscall_64.tbl
> > +++ b/arch/x86/entry/syscalls/syscall_64.tbl
> > @@ -372,6 +372,7 @@
> >  448  common  process_mreleasesys_process_mrelease
> >  449  common  futex_waitv sys_futex_waitv
> >  450  common  set_mempolicy_home_node sys_set_mempolicy_home_node
> > +451  common  memfd_restrictedsys_memfd_restricted
> >
> >  #
> >  # Due to a historical design error, certain syscalls are numbered 
> > differently
> > diff --git a/include/linux/restrictedmem.h b/include/linux/restrictedmem.h
> > new file mode 100644
> > index ..9c37c3ea3180
> > --- /dev/null
> > +++ b/include/linux/restrictedmem.h
> > @@ -0,0 +1,62 @@
> > +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> > +#ifndef _LINUX_RESTRICTEDMEM_H
> > +
> > +#include 
> > +#include 
> > +#include 
> > +
> >

Re: [PATCH v3 1/1] hw/arm/boot: set initrd with #address-cells type in fdt

2022-11-29 Thread Peter Maydell

On Tue, 29 Nov 2022 at 16:07, Schspa Shi  wrote:
>
> We use 32bit value for linux,initrd-[start/end], when we have
> loader_start > 4GB, there will be a wrong initrd_start passed
> to the kernel, and the kernel will report the following warning.
>
> [0.00] [ cut here ]
> [0.00] initrd not fully accessible via the linear mapping -- please 
> check your bootloader ...
> [0.00] WARNING: CPU: 0 PID: 0 at arch/arm64/mm/init.c:355 
> arm64_memblock_init+0x158/0x244
> [0.00] Modules linked in:
> [0.00] CPU: 0 PID: 0 Comm: swapper Tainted: GW  
> 6.1.0-rc3-13250-g30a0b95b1335-dirty #28
> [0.00] Hardware name: Horizon Sigi Virtual development board (DT)
> [0.00] pstate: 60c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> [0.00] pc : arm64_memblock_init+0x158/0x244
> [0.00] lr : arm64_memblock_init+0x158/0x244
> [0.00] sp : 89273df0
> [0.00] x29: 89273df0 x28: 001000cc0010 x27: 
> 8000
> [0.00] x26: 0050a3e2 x25: 88b46000 x24: 
> 88b46000
> [0.00] x23: 88a53000 x22: 8942 x21: 
> 88a53000
> [0.00] x20: 0400 x19: 0400 x18: 
> 1020
> [0.00] x17: 6568632065736165 x16: 6c70202d2d20676e x15: 
> 697070616d207261
> [0.00] x14: 656e696c20656874 x13: 0a2e2e2e20726564 x12: 
> 
> [0.00] x11:  x10:  x9 : 
> 
> [0.00] x8 :  x7 : 796c6c756620746f x6 : 
> 6e20647274696e69
> [0.00] x5 : 893c7c47 x4 : 88a2102f x3 : 
> 89273a88
> [0.00] x2 : 8000f038 x1 : 00c0 x0 : 
> 0056
> [0.00] Call trace:
> [0.00]  arm64_memblock_init+0x158/0x244
> [0.00]  setup_arch+0x164/0x1cc
> [0.00]  start_kernel+0x94/0x4ac
> [0.00]  __primary_switched+0xb4/0xbc
> [0.00] ---[ end trace  ]---
> [0.00] Zone ranges:
> [0.00]   DMA  [mem 0x0010-0x001007ff]
>
> To fix it, we can change it to #[address/size]-cells type.
>
> Signed-off-by: Schspa Shi 

Thanks for the respin; I've applied this version to my
target-arm-for-8.0 branch.

-- PMM

[PATCH v12 6/7] s390x/cpu_topology: activating CPU topology

2022-11-29 Thread Pierre Morel

The KVM capability, KVM_CAP_S390_CPU_TOPOLOGY is used to
activate the S390_FEAT_CONFIGURATION_TOPOLOGY feature and
the topology facility for the guest in the case the topology
is available in QEMU and in KVM.

The feature is fenced for SE (secure execution).

To allow smooth migration with old QEMU the feature is disabled by
default using the CPU flag -disable-topology.

Making the S390_FEAT_CONFIGURATION_TOPOLOGY belonging to the
default features makes the -ctop CPU flag is no more necessary,
turning the topology feature on is done with -disable-topology
only.

Signed-off-by: Pierre Morel 
---
 include/hw/s390x/cpu-topology.h |  5 +
 target/s390x/cpu_features_def.h.inc |  1 +
 target/s390x/cpu_models.c   | 17 +
 target/s390x/cpu_topology.c |  5 +
 target/s390x/gen-features.c |  3 +++
 target/s390x/kvm/kvm.c  | 14 ++
 6 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
index e88059ccec..b2fa24ba93 100644
--- a/include/hw/s390x/cpu-topology.h
+++ b/include/hw/s390x/cpu-topology.h
@@ -36,9 +36,6 @@ struct S390Topology {
 #define TYPE_S390_CPU_TOPOLOGY "s390-topology"
 OBJECT_DECLARE_SIMPLE_TYPE(S390Topology, S390_CPU_TOPOLOGY)
 
-static inline bool s390_has_topology(void)
-{
-return false;
-}
+bool s390_has_topology(void);
 
 #endif
diff --git a/target/s390x/cpu_features_def.h.inc 
b/target/s390x/cpu_features_def.h.inc
index e3cfe63735..016a720e38 100644
--- a/target/s390x/cpu_features_def.h.inc
+++ b/target/s390x/cpu_features_def.h.inc
@@ -147,6 +147,7 @@ DEF_FEAT(SIE_CEI, "cei", SCLP_CPU, 43, "SIE: 
Conditional-external-interception f
 DEF_FEAT(DAT_ENH_2, "dateh2", MISC, 0, "DAT-enhancement facility 2")
 DEF_FEAT(CMM, "cmm", MISC, 0, "Collaborative-memory-management facility")
 DEF_FEAT(AP, "ap", MISC, 0, "AP instructions installed")
+DEF_FEAT(DISABLE_CPU_TOPOLOGY, "disable-topology", MISC, 0, "Disable CPU 
Topology")
 
 /* Features exposed via the PLO instruction. */
 DEF_FEAT(PLO_CL, "plo-cl", PLO, 0, "PLO Compare and load (32 bit in general 
registers)")
diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
index c3a4f80633..1f5348d6a3 100644
--- a/target/s390x/cpu_models.c
+++ b/target/s390x/cpu_models.c
@@ -253,6 +253,7 @@ bool s390_has_feat(S390Feat feat)
 case S390_FEAT_SIE_CMMA:
 case S390_FEAT_SIE_PFMFI:
 case S390_FEAT_SIE_IBS:
+case S390_FEAT_CONFIGURATION_TOPOLOGY:
 return false;
 break;
 default:
@@ -422,6 +423,21 @@ void s390_cpu_list(void)
 }
 }
 
+static void check_incompatibility(S390CPUModel *model, Error **errp)
+{
+static int dep[][2] = {
+{ S390_FEAT_CONFIGURATION_TOPOLOGY, S390_FEAT_DISABLE_CPU_TOPOLOGY },
+};
+int i;
+
+for (i = 0; i < ARRAY_SIZE(dep); i++) {
+if (test_bit(dep[i][0], model->features) &&
+test_bit(dep[i][1], model->features)) {
+clear_bit(dep[i][0], model->features);
+}
+}
+}
+
 static void check_consistency(const S390CPUModel *model)
 {
 static int dep[][2] = {
@@ -592,6 +608,7 @@ void s390_realize_cpu_model(CPUState *cs, Error **errp)
 cpu->model->cpu_id_format = max_model->cpu_id_format;
 cpu->model->cpu_ver = max_model->cpu_ver;
 
+check_incompatibility(cpu->model, );
 check_consistency(cpu->model);
 check_compatibility(max_model, cpu->model, );
 if (err) {
diff --git a/target/s390x/cpu_topology.c b/target/s390x/cpu_topology.c
index b81f016ba1..8123e6ddf0 100644
--- a/target/s390x/cpu_topology.c
+++ b/target/s390x/cpu_topology.c
@@ -15,6 +15,11 @@
 #include "hw/s390x/cpu-topology.h"
 #include "hw/s390x/sclp.h"
 
+bool s390_has_topology(void)
+{
+return s390_has_feat(S390_FEAT_CONFIGURATION_TOPOLOGY);
+}
+
 /*
  * s390_topology_add_cpu:
  * @topo: pointer to the topology
diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index 1e3b7c0dc9..f3acfdd9a5 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -488,6 +488,7 @@ static uint16_t full_GEN9_GA3[] = {
 static uint16_t full_GEN10_GA1[] = {
 S390_FEAT_EDAT,
 S390_FEAT_CONFIGURATION_TOPOLOGY,
+S390_FEAT_DISABLE_CPU_TOPOLOGY,
 S390_FEAT_GROUP_MSA_EXT_2,
 S390_FEAT_ESOP,
 S390_FEAT_SIE_PFMFI,
@@ -605,6 +606,8 @@ static uint16_t default_GEN9_GA1[] = {
 static uint16_t default_GEN10_GA1[] = {
 S390_FEAT_EDAT,
 S390_FEAT_GROUP_MSA_EXT_2,
+S390_FEAT_DISABLE_CPU_TOPOLOGY,
+S390_FEAT_CONFIGURATION_TOPOLOGY,
 };
 
 #define default_GEN10_GA2 EmptyFeat
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index a79fdf1c79..ec2c9fd8fa 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -2471,6 +2471,20 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, 
Error **errp)
 set_bit(S390_FEAT_UNPACK, model->features);
 }
 
+/*
+ * If we have support for CPU Topology

Re: [PATCH for-7.2] replay: Fix declaration of replay_read_next_clock

2022-11-29 Thread Philippe Mathieu-Daudé


On 29/11/22 17:13, Stefan Hajnoczi wrote:

On Tue, 29 Nov 2022 at 02:39, Philippe Mathieu-Daudé  wrote:


On 29/11/22 02:05, Richard Henderson wrote:

Fixes the build with gcc 13:

replay/replay-time.c:34:6: error: conflicting types for  \
'replay_read_next_clock' due to enum/integer mismatch; \
have 'void(ReplayClockKind)' [-Werror=enum-int-mismatch]
 34 | void replay_read_next_clock(ReplayClockKind kind)
|  ^~
In file included from ../qemu/replay/replay-time.c:14:
replay/replay-internal.h:139:6: note: previous declaration of \
'replay_read_next_clock' with type 'void(unsigned int)'
139 | void replay_read_next_clock(unsigned int kind);
|  ^~

Fixes: 8eda206e090 ("replay: recording and replaying clock ticks")
Signed-off-by: Richard Henderson 
---
   replay/replay-internal.h | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index 89e377be90..b6836354ac 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -136,7 +136,7 @@ bool replay_next_event_is(int event);
   /*! Reads next clock value from the file.
   If clock kind read from the file is different from the parameter,
   the value is not used. */
-void replay_read_next_clock(unsigned int kind);
+void replay_read_next_clock(ReplayClockKind kind);


Preferably having this file including "sysemu/replay.h" in the same
patch:


ReplayClockKind isn't declared by anything in replay-internal.h but I
see another instance of this in the file (e.g. InputEvent).

Maybe send a follow-up patch for 8.0 that cleans up the #includes?


Sure!

[PATCH v12 5/7] s390x/cpu_topology: interception of PTF instruction

2022-11-29 Thread Pierre Morel

When the host supports the CPU topology facility, the PTF
instruction with function code 2 is interpreted by the SIE,
provided that the userland hypervizor activates the interpretation
by using the KVM_CAP_S390_CPU_TOPOLOGY KVM extension.

The PTF instructions with function code 0 and 1 are intercepted
and must be emulated by the userland hypervizor.

Signed-off-by: Pierre Morel 
Reviewed-by: Janis Schoetterl-Glausch 
---
 include/hw/s390x/s390-virtio-ccw.h |  6 
 hw/s390x/cpu-topology.c| 52 ++
 target/s390x/kvm/kvm.c | 11 +++
 3 files changed, 69 insertions(+)

diff --git a/include/hw/s390x/s390-virtio-ccw.h 
b/include/hw/s390x/s390-virtio-ccw.h
index 47ce0aa6fa..d78ea8843c 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -31,6 +31,12 @@ struct S390CcwMachineState {
 DeviceState *topology;
 };
 
+#define S390_PTF_REASON_NONE (0x00 << 8)
+#define S390_PTF_REASON_DONE (0x01 << 8)
+#define S390_PTF_REASON_BUSY (0x02 << 8)
+#define S390_TOPO_FC_MASK 0xffUL
+void s390_handle_ptf(S390CPU *cpu, uint8_t r1, uintptr_t ra);
+
 struct S390CcwMachineClass {
 /*< private >*/
 MachineClass parent_class;
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index 32908d13bb..12fcd041a3 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -20,6 +20,58 @@
 #include "hw/s390x/s390-virtio-ccw.h"
 #include "hw/s390x/cpu-topology.h"
 #include "migration/vmstate.h"
+#include "target/s390x/cpu.h"
+#include "hw/s390x/s390-virtio-ccw.h"
+
+/*
+ * s390_handle_ptf:
+ *
+ * @register 1: contains the function code
+ *
+ * Function codes 0 and 1 handle the CPU polarization.
+ * We assume an horizontal topology, the only one supported currently
+ * by Linux, consequently we answer to function code 0, requesting
+ * horizontal polarization that it is already the current polarization
+ * and reject vertical polarization request without further explanation.
+ *
+ * Function code 2 is handling topology changes and is interpreted
+ * by the SIE.
+ */
+void s390_handle_ptf(S390CPU *cpu, uint8_t r1, uintptr_t ra)
+{
+CPUS390XState *env = >env;
+uint64_t reg = env->regs[r1];
+uint8_t fc = reg & S390_TOPO_FC_MASK;
+
+if (!s390_has_feat(S390_FEAT_CONFIGURATION_TOPOLOGY)) {
+s390_program_interrupt(env, PGM_OPERATION, ra);
+return;
+}
+
+if (env->psw.mask & PSW_MASK_PSTATE) {
+s390_program_interrupt(env, PGM_PRIVILEGED, ra);
+return;
+}
+
+if (reg & ~S390_TOPO_FC_MASK) {
+s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+return;
+}
+
+switch (fc) {
+case 0:/* Horizontal polarization is already set */
+env->regs[r1] |= S390_PTF_REASON_DONE;
+setcc(cpu, 2);
+break;
+case 1:/* Vertical polarization is not supported */
+env->regs[r1] |= S390_PTF_REASON_NONE;
+setcc(cpu, 2);
+break;
+default:
+/* Note that fc == 2 is interpreted by the SIE */
+s390_program_interrupt(env, PGM_SPECIFICATION, ra);
+}
+}
 
 /**
  * s390_topology_realize:
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 5b6383eab0..a79fdf1c79 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -97,6 +97,7 @@
 
 #define PRIV_B9_EQBS0x9c
 #define PRIV_B9_CLP 0xa0
+#define PRIV_B9_PTF 0xa2
 #define PRIV_B9_PCISTG  0xd0
 #define PRIV_B9_PCILG   0xd2
 #define PRIV_B9_RPCIT   0xd3
@@ -1465,6 +1466,13 @@ static int kvm_mpcifc_service_call(S390CPU *cpu, struct 
kvm_run *run)
 }
 }
 
+static void kvm_handle_ptf(S390CPU *cpu, struct kvm_run *run)
+{
+uint8_t r1 = (run->s390_sieic.ipb >> 20) & 0x0f;
+
+s390_handle_ptf(cpu, r1, RA_IGNORED);
+}
+
 static int handle_b9(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
 {
 int r = 0;
@@ -1482,6 +1490,9 @@ static int handle_b9(S390CPU *cpu, struct kvm_run *run, 
uint8_t ipa1)
 case PRIV_B9_RPCIT:
 r = kvm_rpcit_service_call(cpu, run);
 break;
+case PRIV_B9_PTF:
+kvm_handle_ptf(cpu, run);
+break;
 case PRIV_B9_EQBS:
 /* just inject exception */
 r = -1;
-- 
2.31.1

[PATCH v12 7/7] docs/s390x: document s390x cpu topology

2022-11-29 Thread Pierre Morel

Add some basic examples for the definition of cpu topology
in s390x.

Signed-off-by: Pierre Morel 
---
 docs/system/s390x/cpu-topology.rst | 80 ++
 1 file changed, 80 insertions(+)
 create mode 100644 docs/system/s390x/cpu-topology.rst

diff --git a/docs/system/s390x/cpu-topology.rst 
b/docs/system/s390x/cpu-topology.rst
new file mode 100644
index 00..2fad28453c
--- /dev/null
+++ b/docs/system/s390x/cpu-topology.rst
@@ -0,0 +1,80 @@
+CPU Topology on s390x
+=
+
+CPU Topology on S390x provides up to 5 levels of topology containers:
+nodes, drawers, books, sockets and CPUs.
+While the higher level containers, Containers Topology List Entries,
+(Containers TLE) define a tree hierarchy, the lowest level of topology
+definition, the CPU Topology List Entry (CPU TLE), provides the placement
+of the CPUs inside the parent container.
+
+Currently QEMU CPU topology uses a single level of container: the sockets.
+
+For backward compatibility, threads can be declared on the ``-smp`` command
+line. They will be seen as CPUs by the guest as long as multithreading
+is not really supported by QEMU for S390.
+
+Prerequisites
+-
+
+To use CPU Topology a Linux QEMU/KVM machine providing the CPU Topology 
facility
+(STFLE bit 11) is required.
+
+However, since this facility has been enabled by default in an early version
+of QEMU, we use a capability, ``KVM_CAP_S390_CPU_TOPOLOGY``, to notify KVM
+QEMU use of the CPU Topology.
+
+Indicating the CPU topology to the Virtual Machine
+--
+
+The CPU Topology, can be specified on the QEMU command line
+with the ``-smp`` or the ``-device`` QEMU command arguments.
+
+Like in :
+
+.. code-block:: sh
+-smp cpus=5,sockets=8,cores=2,threads=2,maxcpus=32
+-device host-s390x-cpu,core-id=14
+
+New CPUs can be plugged using the device_add hmp command like in:
+
+.. code-block:: sh
+   (qemu) device_add host-s390x-cpu,core-id=9
+
+The core-id defines the placement of the core in the topology by
+starting with core 0 in socket 0 up to maxcpus.
+
+In the example above:
+
+* There are 5 CPUs provided to the guest with the ``-smp`` command line
+  They will take the core-ids 0,1,2,3,4
+  As we have 2 threads in 2 cores in a socket, we have 4 CPUs provided
+  to the guest in socket 0, with core-ids 0,1,2,3.
+  The last cpu, with core-id 4, will be on socket 1.
+
+* the core with ID 14 provided by the ``-device`` command line will
+  be placed in socket 3, with core-id 14
+
+* the core with ID 9 provided by the ``device_add`` qmp command will
+  be placed in socket 2, with core-id 9
+
+Note that the core ID is machine wide and the CPU TLE masks provided
+by the STSI instruction will be:
+
+* in socket 0: 0xf000 (core id 0,1,2,3)
+* in socket 1: 0x0040 (core id 9)
+* in socket 1: 0x0002 (core id 14)
+
+Enabling CPU topology
+-
+
+Currently, CPU topology is disabled by default.
+
+Enabling CPU topology can be done by setting the feature flag
+``disable-topology`` to ``off`` like in:
+
+.. code-block:: sh
+   -cpu gen16b,disable-topology=off
+
+Having the topology disabled by default allows migration between
+old and new QEMU without adding new flags.
-- 
2.31.1

[PATCH v12 2/7] s390x/cpu topology: reporting the CPU topology to the guest

2022-11-29 Thread Pierre Morel

The guest uses the STSI instruction to get information on the
CPU topology.

Let us implement the STSI instruction for the basis CPU topology
level, level 2.

Signed-off-by: Pierre Morel 
---
 target/s390x/cpu.h  |  77 +++
 hw/s390x/s390-virtio-ccw.c  |  12 +--
 target/s390x/cpu_topology.c | 186 
 target/s390x/kvm/kvm.c  |   6 +-
 target/s390x/meson.build|   1 +
 5 files changed, 274 insertions(+), 8 deletions(-)
 create mode 100644 target/s390x/cpu_topology.c

diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 7d6d01325b..dd878ac916 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -175,6 +175,7 @@ struct ArchCPU {
 /* needed for live migration */
 void *irqstate;
 uint32_t irqstate_saved_size;
+void *machine_data;
 };
 
 
@@ -565,6 +566,80 @@ typedef union SysIB {
 } SysIB;
 QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
 
+/*
+ * CPU Topology List provided by STSI with fc=15 provides a list
+ * of two different Topology List Entries (TLE) types to specify
+ * the topology hierarchy.
+ *
+ * - Container Topology List Entry
+ *   Defines a container to contain other Topology List Entries
+ *   of any type, nested containers or CPU.
+ * - CPU Topology List Entry
+ *   Specifies the CPUs position, type, entitlement and polarization
+ *   of the CPUs contained in the last Container TLE.
+ *
+ * There can be theoretically up to five levels of containers, QEMU
+ * uses only one level, the socket level.
+ *
+ * A container of with a nesting level (NL) greater than 1 can only
+ * contain another container of nesting level NL-1.
+ *
+ * A container of nesting level 1 (socket), contains as many CPU TLE
+ * as needed to describe the position and qualities of all CPUs inside
+ * the container.
+ * The qualities of a CPU are polarization, entitlement and type.
+ *
+ * The CPU TLE defines the position of the CPUs of identical qualities
+ * using a 64bits mask which first bit has its offset defined by
+ * the CPU address orgin field of the CPU TLE like in:
+ * CPU address = origin * 64 + bit position within the mask
+ *
+ */
+/* Container type Topology List Entry */
+typedef struct SysIBTl_container {
+uint8_t nl;
+uint8_t reserved[6];
+uint8_t id;
+} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_container;
+QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
+
+/* CPU type Topology List Entry */
+typedef struct SysIBTl_cpu {
+uint8_t nl;
+uint8_t reserved0[3];
+uint8_t reserved1:5;
+uint8_t dedicated:1;
+uint8_t polarity:2;
+uint8_t type;
+uint16_t origin;
+uint64_t mask;
+} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_cpu;
+QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
+
+#define S390_TOPOLOGY_MAG  6
+#define S390_TOPOLOGY_MAG6 0
+#define S390_TOPOLOGY_MAG5 1
+#define S390_TOPOLOGY_MAG4 2
+#define S390_TOPOLOGY_MAG3 3
+#define S390_TOPOLOGY_MAG2 4
+#define S390_TOPOLOGY_MAG1 5
+/* Configuration topology */
+typedef struct SysIB_151x {
+uint8_t  reserved0[2];
+uint16_t length;
+uint8_t  mag[S390_TOPOLOGY_MAG];
+uint8_t  reserved1;
+uint8_t  mnest;
+uint32_t reserved2;
+char tle[0];
+} QEMU_PACKED QEMU_ALIGNED(8) SysIB_151x;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
+
+/* Max size of a SYSIB structure is when all CPU are alone in a container */
+#define S390_TOPOLOGY_SYSIB_SIZE (sizeof(SysIB_151x) + 
\
+  S390_MAX_CPUS * (sizeof(SysIBTl_container) + 
\
+   sizeof(SysIBTl_cpu)))
+
 /* MMU defines */
 #define ASCE_ORIGIN   (~0xfffULL) /* segment table origin 
*/
 #define ASCE_SUBSPACE 0x200   /* subspace group control   
*/
@@ -843,4 +918,6 @@ S390CPU *s390_cpu_addr2state(uint16_t cpu_addr);
 
 #include "exec/cpu-all.h"
 
+void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar);
+
 #endif
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 973bbdd36e..4be07959fd 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -64,11 +64,10 @@ S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)
 return S390_CPU(ms->possible_cpus->cpus[cpu_addr].cpu);
 }
 
-static S390CPU *s390x_new_cpu(const char *typename, uint32_t core_id,
-  Error **errp)
+static void s390x_new_cpu(MachineState *ms, uint32_t core_id, Error **errp)
 {
-S390CPU *cpu = S390_CPU(object_new(typename));
-S390CPU *ret = NULL;
+S390CcwMachineState *s390ms = S390_CCW_MACHINE(ms);
+S390CPU *cpu = S390_CPU(object_new(ms->cpu_type));
 
 if (!object_property_set_int(OBJECT(cpu), "core-id", core_id, errp)) {
 goto out;
@@ -76,11 +75,10 @@ static S390CPU *s390x_new_cpu(const char *typename, 
uint32_t core_id,
 if (!qdev_realize(DEVICE(cpu), NULL, errp)) {
 goto out;
 }
-ret = cpu;
+cpu->machine_data =

[PATCH v12 0/7] s390x: CPU Topology

2022-11-29 Thread Pierre Morel

Hi,

The implementation of the CPU Topology in QEMU has been modified
since the last patch series.

- The two preliminary patches have been accepted and are no longer
  part of this series.

- The topology machine property has been abandoned

- the topology_capable QEMU capability has been abandoned

- both where replaced with a new CPU feature, topology-disable
  to fence per default the ctop topology information feature.

To use the QEMU patches, you will need Linux V6-rc1 or newer,
or use the following Linux mainline patches:

f5ecfee94493 2022-07-20 KVM: s390: resetting the Topology-Change-Report
24fe0195bc19 2022-07-20 KVM: s390: guest support for topology function 
0130337ec45b 2022-07-20 KVM: s390: Cleanup ipte lock access and SIIF fac.. 

Currently this code is for KVM only, I have no idea if it is interesting
to provide a TCG patch. If ever it will be done in another series.

To have a better understanding of the S390x CPU Topology and its
implementation in QEMU you can have a look at the documentation in the
last patch of this series.

The admin will want to match the host and the guest topology, taking
into account that the guest does not recognize multithreading.
Consequently, two vCPU assigned to threads of the same real CPU should
preferably be assigned to the same socket of the guest machine.

Regards,
Pierre

Pierre Morel (7):
  s390x/cpu topology: Creating CPU topology device
  s390x/cpu topology: reporting the CPU topology to the guest
  s390x/cpu_topology: resetting the Topology-Change-Report
  s390x/cpu_topology: CPU topology migration
  s390x/cpu_topology: interception of PTF instruction
  s390x/cpu_topology: activating CPU topology
  docs/s390x: document s390x cpu topology

 docs/system/s390x/cpu-topology.rst  |  80 +++
 include/hw/s390x/cpu-topology.h |  41 ++
 include/hw/s390x/s390-virtio-ccw.h  |   7 +
 target/s390x/cpu.h  |  79 +++
 target/s390x/kvm/kvm_s390x.h|   1 +
 target/s390x/cpu_features_def.h.inc |   1 +
 hw/s390x/cpu-topology.c | 200 
 hw/s390x/s390-virtio-ccw.c  |  38 +-
 target/s390x/cpu-sysemu.c   |  21 +++
 target/s390x/cpu_models.c   |  17 +++
 target/s390x/cpu_topology.c | 191 ++
 target/s390x/gen-features.c |   3 +
 target/s390x/kvm/kvm.c  |  48 ++-
 hw/s390x/meson.build|   1 +
 target/s390x/meson.build|   1 +
 15 files changed, 721 insertions(+), 8 deletions(-)
 create mode 100644 docs/system/s390x/cpu-topology.rst
 create mode 100644 include/hw/s390x/cpu-topology.h
 create mode 100644 hw/s390x/cpu-topology.c
 create mode 100644 target/s390x/cpu_topology.c

-- 
2.31.1

- since v11

- new CPU flag "disable-topology"
  I would have take "topology" if I was able to have
  it false on default.
  (Christian, Thomas)

- Build the topology during the interception of the
  STSI instruction.
  (Cedric)

- return CC3 in case the calculated SYSIB length is
  greater than 4096.
  (Janis)

- minor corections on documentation

- since v10

- change machine attribute "topology-disable" to "topology"
  (Cedric)
- Add preliminary patch for machine properties
  (Cedric)
- Use next machine as 7.2
  (Cedric / Connie)
- Remove unecessary mutex
  (Thomas)
- use ENOTSUP return value for kvm_s390_topology_set_mtcr()
  (Cedric)
- Add explanation on container and cpu TLEs
  (Thomas)
- use again cpu and socket count in topology structure
  (Cedric)
- Suppress the S390TopoTLE structure and integrate
  the TLE masks to the socket structure.
  (-)
- the STSI instruction now finds the topology from the machine
  (Cedric)

- since v9

- remove books and drawers

- remove thread denying and replace with a merge
  of cores * threads to specify the CPUs available
  to the guest

- add a class option to avoid topology on older
  machines
  (Cedric)

- Allocate a SYSIB buffer of the maximal length to
  avoid overflow.
  (Nico, Janis)

- suppress redundancy of smp parameters in topology
  and use directly the machine smp structure

- Early check for topology support
  (Cedric)

- since v8

- Linux patches are now mainline

- simplification of the implementation
  (Janis)

- Migration, new machine definition
  (Thomas)

- Documentation

- since v7

- Coherence with the Linux patch series changes for MTCR get
  (Pierre)

- check return values during new CPU creation
  (Thomas)

- Improving codding style and argument usages
  (Thomas)

- since v6

- Changes on smp args in qemu-options
  (Daniel)
  
- changed comments in machine.jason
  (Daniel)
 
- Added reset
  (Janosch)

- since v5

- rebasing on newer QEMU version

- reworked most lines above 80 characters.

- since v4

- Added drawer and books to topology

- Added numa topology

- Added documentation

- since v3

- Added migration
  (Thomas)

- Separated STSI instruction from KVM to prepare TCG
  (Thomas)

- Take care of endianess to prepare TCG

[PATCH v12 1/7] s390x/cpu topology: Creating CPU topology device

2022-11-29 Thread Pierre Morel

We will need a Topology device to transfer the topology
during migration and to implement machine reset.

The device creation is fenced by s390_has_topology().

Signed-off-by: Pierre Morel 
---
 include/hw/s390x/cpu-topology.h| 44 +++
 include/hw/s390x/s390-virtio-ccw.h |  1 +
 hw/s390x/cpu-topology.c| 87 ++
 hw/s390x/s390-virtio-ccw.c | 25 +
 hw/s390x/meson.build   |  1 +
 5 files changed, 158 insertions(+)
 create mode 100644 include/hw/s390x/cpu-topology.h
 create mode 100644 hw/s390x/cpu-topology.c

diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
new file mode 100644
index 00..e88059ccec
--- /dev/null
+++ b/include/hw/s390x/cpu-topology.h
@@ -0,0 +1,44 @@
+/*
+ * CPU Topology
+ *
+ * Copyright IBM Corp. 2022
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#ifndef HW_S390X_CPU_TOPOLOGY_H
+#define HW_S390X_CPU_TOPOLOGY_H
+
+#include "hw/qdev-core.h"
+#include "qom/object.h"
+
+#define S390_TOPOLOGY_CPU_IFL 0x03
+#define S390_TOPOLOGY_MAX_ORIGIN ((63 + S390_MAX_CPUS) / 64)
+
+#define S390_TOPOLOGY_POLARITY_HORIZONTAL  0x00
+#define S390_TOPOLOGY_POLARITY_VERTICAL_LOW0x01
+#define S390_TOPOLOGY_POLARITY_VERTICAL_MEDIUM 0x02
+#define S390_TOPOLOGY_POLARITY_VERTICAL_HIGH   0x03
+
+typedef struct S390TopoSocket {
+int active_count;
+uint64_t mask[S390_TOPOLOGY_MAX_ORIGIN];
+} S390TopoSocket;
+
+struct S390Topology {
+SysBusDevice parent_obj;
+uint32_t num_cores;
+uint32_t num_sockets;
+S390TopoSocket *socket;
+};
+
+#define TYPE_S390_CPU_TOPOLOGY "s390-topology"
+OBJECT_DECLARE_SIMPLE_TYPE(S390Topology, S390_CPU_TOPOLOGY)
+
+static inline bool s390_has_topology(void)
+{
+return false;
+}
+
+#endif
diff --git a/include/hw/s390x/s390-virtio-ccw.h 
b/include/hw/s390x/s390-virtio-ccw.h
index 9bba21a916..47ce0aa6fa 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -28,6 +28,7 @@ struct S390CcwMachineState {
 bool dea_key_wrap;
 bool pv;
 uint8_t loadparm[8];
+DeviceState *topology;
 };
 
 struct S390CcwMachineClass {
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
new file mode 100644
index 00..bbf97cd66a
--- /dev/null
+++ b/hw/s390x/cpu-topology.c
@@ -0,0 +1,87 @@
+/*
+ * CPU Topology
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Pierre Morel 
+
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "hw/sysbus.h"
+#include "hw/qdev-properties.h"
+#include "hw/boards.h"
+#include "qemu/typedefs.h"
+#include "target/s390x/cpu.h"
+#include "hw/s390x/s390-virtio-ccw.h"
+#include "hw/s390x/cpu-topology.h"
+
+/**
+ * s390_topology_realize:
+ * @dev: the device state
+ *
+ * We free the socket array allocated in realize.
+ */
+static void s390_topology_unrealize(DeviceState *dev)
+{
+S390Topology *topo = S390_CPU_TOPOLOGY(dev);
+
+g_free(topo->socket);
+}
+
+/**
+ * s390_topology_realize:
+ * @dev: the device state
+ * @errp: the error pointer (not used)
+ *
+ * During realize the machine CPU topology is initialized with the
+ * QEMU -smp parameters.
+ * The maximum count of CPU TLE in the all Topology can not be greater
+ * than the maximum CPUs.
+ */
+static void s390_topology_realize(DeviceState *dev, Error **errp)
+{
+S390Topology *topo = S390_CPU_TOPOLOGY(dev);
+
+topo->socket = g_new0(S390TopoSocket, topo->num_sockets);
+}
+
+static Property s390_topology_properties[] = {
+DEFINE_PROP_UINT32("num-cores", S390Topology, num_cores, 1),
+DEFINE_PROP_UINT32("num-sockets", S390Topology, num_sockets, 1),
+DEFINE_PROP_END_OF_LIST(),
+};
+
+/**
+ * topology_class_init:
+ * @oc: Object class
+ * @data: (not used)
+ *
+ * A very simple object we will need for reset and migration.
+ */
+static void topology_class_init(ObjectClass *oc, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(oc);
+
+dc->realize = s390_topology_realize;
+dc->unrealize = s390_topology_unrealize;
+device_class_set_props(dc, s390_topology_properties);
+set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+}
+
+static const TypeInfo cpu_topology_info = {
+.name  = TYPE_S390_CPU_TOPOLOGY,
+.parent= TYPE_SYS_BUS_DEVICE,
+.instance_size = sizeof(S390Topology),
+.class_init= topology_class_init,
+};
+
+static void topology_register(void)
+{
+type_register_static(_topology_info);
+}
+type_init(topology_register);
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 2e64ffab45..973bbdd36e 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -44,6 +44,7 @@
 #include

[PATCH v12 3/7] s390x/cpu_topology: resetting the Topology-Change-Report

2022-11-29 Thread Pierre Morel

During a subsystem reset the Topology-Change-Report is cleared
by the machine.
Let's ask KVM to clear the Modified Topology Change Report (MTCR)
 bit of the SCA in the case of a subsystem reset.

Signed-off-by: Pierre Morel 
Reviewed-by: Nico Boehr 
Reviewed-by: Janis Schoetterl-Glausch 
---
 target/s390x/cpu.h   |  1 +
 target/s390x/kvm/kvm_s390x.h |  1 +
 hw/s390x/cpu-topology.c  | 12 
 hw/s390x/s390-virtio-ccw.c   |  1 +
 target/s390x/cpu-sysemu.c| 13 +
 target/s390x/kvm/kvm.c   | 17 +
 6 files changed, 45 insertions(+)

diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index dd878ac916..2bd3d417e4 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -854,6 +854,7 @@ void s390_enable_css_support(S390CPU *cpu);
 void s390_do_cpu_set_diag318(CPUState *cs, run_on_cpu_data arg);
 int s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch_id,
 int vq, bool assign);
+void s390_cpu_topology_reset(void);
 #ifndef CONFIG_USER_ONLY
 unsigned int s390_cpu_set_state(uint8_t cpu_state, S390CPU *cpu);
 #else
diff --git a/target/s390x/kvm/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h
index f9785564d0..649dae5948 100644
--- a/target/s390x/kvm/kvm_s390x.h
+++ b/target/s390x/kvm/kvm_s390x.h
@@ -47,5 +47,6 @@ void kvm_s390_crypto_reset(void);
 void kvm_s390_restart_interrupt(S390CPU *cpu);
 void kvm_s390_stop_interrupt(S390CPU *cpu);
 void kvm_s390_set_diag318(CPUState *cs, uint64_t diag318_info);
+int kvm_s390_topology_set_mtcr(uint64_t attr);
 
 #endif /* KVM_S390X_H */
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index bbf97cd66a..b1082a4b88 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -56,6 +56,17 @@ static Property s390_topology_properties[] = {
 DEFINE_PROP_END_OF_LIST(),
 };
 
+/**
+ * s390_topology_reset:
+ * @dev: the device
+ *
+ * Calls the sysemu topology reset
+ */
+static void s390_topology_reset(DeviceState *dev)
+{
+s390_cpu_topology_reset();
+}
+
 /**
  * topology_class_init:
  * @oc: Object class
@@ -71,6 +82,7 @@ static void topology_class_init(ObjectClass *oc, void *data)
 dc->unrealize = s390_topology_unrealize;
 device_class_set_props(dc, s390_topology_properties);
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+dc->reset = s390_topology_reset;
 }
 
 static const TypeInfo cpu_topology_info = {
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 4be07959fd..f29e383d23 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -125,6 +125,7 @@ static const char *const reset_dev_types[] = {
 "s390-flic",
 "diag288",
 TYPE_S390_PCI_HOST_BRIDGE,
+TYPE_S390_CPU_TOPOLOGY,
 };
 
 static void subsystem_reset(void)
diff --git a/target/s390x/cpu-sysemu.c b/target/s390x/cpu-sysemu.c
index 948e4bd3e0..e27864c5f5 100644
--- a/target/s390x/cpu-sysemu.c
+++ b/target/s390x/cpu-sysemu.c
@@ -306,3 +306,16 @@ void s390_do_cpu_set_diag318(CPUState *cs, run_on_cpu_data 
arg)
 kvm_s390_set_diag318(cs, arg.host_ulong);
 }
 }
+
+void s390_cpu_topology_reset(void)
+{
+int ret;
+
+if (kvm_enabled()) {
+ret = kvm_s390_topology_set_mtcr(0);
+if (ret) {
+error_report("Failed to set Modified Topology Change Report: %s",
+ strerror(-ret));
+}
+}
+}
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 7dc96f3663..5b6383eab0 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -2593,6 +2593,23 @@ int kvm_s390_get_zpci_op(void)
 return cap_zpci_op;
 }
 
+int kvm_s390_topology_set_mtcr(uint64_t attr)
+{
+struct kvm_device_attr attribute = {
+.group = KVM_S390_VM_CPU_TOPOLOGY,
+.attr  = attr,
+};
+
+if (!s390_has_feat(S390_FEAT_CONFIGURATION_TOPOLOGY)) {
+return 0;
+}
+if (!kvm_vm_check_attr(kvm_state, KVM_S390_VM_CPU_TOPOLOGY, attr)) {
+return -ENOTSUP;
+}
+
+return kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, );
+}
+
 void kvm_arch_accel_class_init(ObjectClass *oc)
 {
 }
-- 
2.31.1

[PATCH v12 4/7] s390x/cpu_topology: CPU topology migration

2022-11-29 Thread Pierre Morel

The migration can only take place if both source and destination
of the migration both use or both do not use the CPU topology
facility.

We indicate a change in topology during migration postload for the
case the topology changed between source and destination.

Signed-off-by: Pierre Morel 
---
 target/s390x/cpu.h|  1 +
 hw/s390x/cpu-topology.c   | 49 +++
 target/s390x/cpu-sysemu.c |  8 +++
 3 files changed, 58 insertions(+)

diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 2bd3d417e4..d8c00de980 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -855,6 +855,7 @@ void s390_do_cpu_set_diag318(CPUState *cs, run_on_cpu_data 
arg);
 int s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch_id,
 int vq, bool assign);
 void s390_cpu_topology_reset(void);
+int s390_cpu_topology_mtcr_set(void);
 #ifndef CONFIG_USER_ONLY
 unsigned int s390_cpu_set_state(uint8_t cpu_state, S390CPU *cpu);
 #else
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index b1082a4b88..32908d13bb 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -19,6 +19,7 @@
 #include "target/s390x/cpu.h"
 #include "hw/s390x/s390-virtio-ccw.h"
 #include "hw/s390x/cpu-topology.h"
+#include "migration/vmstate.h"
 
 /**
  * s390_topology_realize:
@@ -67,6 +68,53 @@ static void s390_topology_reset(DeviceState *dev)
 s390_cpu_topology_reset();
 }
 
+/**
+ * cpu_topology_postload
+ * @opaque: a pointer to the S390Topology
+ * @version_id: version identifier
+ *
+ * We check that the topology is used or is not used
+ * on both side identically.
+ *
+ * If the topology is in use we set the Modified Topology Change Report
+ * on the destination host.
+ */
+static int cpu_topology_postload(void *opaque, int version_id)
+{
+int ret;
+
+/* We do not support CPU Topology, all is good */
+if (!s390_has_topology()) {
+return 0;
+}
+
+/* We support CPU Topology, set the MTCR unconditionally */
+ret = s390_cpu_topology_mtcr_set();
+if (ret) {
+error_report("Failed to set MTCR: %s", strerror(-ret));
+}
+return ret;
+}
+
+/**
+ * cpu_topology_needed:
+ * @opaque: The pointer to the S390Topology
+ *
+ * We always need to know if source and destination use the topology.
+ */
+static bool cpu_topology_needed(void *opaque)
+{
+return s390_has_topology();
+}
+
+const VMStateDescription vmstate_cpu_topology = {
+.name = "cpu_topology",
+.version_id = 1,
+.post_load = cpu_topology_postload,
+.minimum_version_id = 1,
+.needed = cpu_topology_needed,
+};
+
 /**
  * topology_class_init:
  * @oc: Object class
@@ -83,6 +131,7 @@ static void topology_class_init(ObjectClass *oc, void *data)
 device_class_set_props(dc, s390_topology_properties);
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 dc->reset = s390_topology_reset;
+dc->vmsd = _cpu_topology;
 }
 
 static const TypeInfo cpu_topology_info = {
diff --git a/target/s390x/cpu-sysemu.c b/target/s390x/cpu-sysemu.c
index e27864c5f5..a8e3e6219d 100644
--- a/target/s390x/cpu-sysemu.c
+++ b/target/s390x/cpu-sysemu.c
@@ -319,3 +319,11 @@ void s390_cpu_topology_reset(void)
 }
 }
 }
+
+int s390_cpu_topology_mtcr_set(void)
+{
+if (kvm_enabled()) {
+return kvm_s390_topology_set_mtcr(1);
+}
+return -ENOENT;
+}
-- 
2.31.1

[PATCH v2 0/2] qga: improve "syslog" domain logging

2022-11-29 Thread Andrey Drobyshev via

These patches extend QGA logging interface, primarily under Windows
guests.  They enable QGA to write to Windows event log, much like
syslog() on *nix.  In addition we get rid of hardcoded log level used by
ga_log().

v2:
* Close event_log handle when doing cleanup_agent()
* Fix switch cases indentation as reported by scripts/checkpatch.pl

Andrey Drobyshev (2):
  qga-win: add logging to Windows event log
  qga: map GLib log levels to system levels

 configure |  3 +++
 qga/installer/qemu-ga.wxs |  5 
 qga/main.c| 50 +++
 qga/meson.build   | 19 ++-
 qga/messages-win32.mc |  9 +++
 5 files changed, 81 insertions(+), 5 deletions(-)
 create mode 100644 qga/messages-win32.mc

-- 
2.38.1

[PATCH v2 2/2] qga: map GLib log levels to system levels

2022-11-29 Thread Andrey Drobyshev via

This patch translates GLib-specific log levels to system ones, so that
they may be used by both *nix syslog() (as a "priority" argument) and
Windows ReportEvent() (as a "wType" argument).

Currently the only codepath to write to "syslog" domain is slog()
function.  However, this patch allows the interface to be extended.

Note that since slog() is using G_LOG_LEVEL_INFO level, its behaviour
doesn't change.

Originally-by: Yuri Pudgorodskiy 
Signed-off-by: Andrey Drobyshev 
Reviewed-by: Marc-André Lureau 
---
 qga/main.c | 36 ++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/qga/main.c b/qga/main.c
index 9c3c35a423..cf784b279d 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -314,6 +314,38 @@ void ga_enable_logging(GAState *s)
 s->logging_enabled = true;
 }
 
+static int glib_log_level_to_system(int level)
+{
+switch (level) {
+#ifndef _WIN32
+case G_LOG_LEVEL_ERROR:
+return LOG_ERR;
+case G_LOG_LEVEL_CRITICAL:
+return LOG_CRIT;
+case G_LOG_LEVEL_WARNING:
+return LOG_WARNING;
+case G_LOG_LEVEL_MESSAGE:
+return LOG_NOTICE;
+case G_LOG_LEVEL_DEBUG:
+return LOG_DEBUG;
+case G_LOG_LEVEL_INFO:
+default:
+return LOG_INFO;
+#else
+case G_LOG_LEVEL_ERROR:
+case G_LOG_LEVEL_CRITICAL:
+return EVENTLOG_ERROR_TYPE;
+case G_LOG_LEVEL_WARNING:
+return EVENTLOG_WARNING_TYPE;
+case G_LOG_LEVEL_MESSAGE:
+case G_LOG_LEVEL_INFO:
+case G_LOG_LEVEL_DEBUG:
+default:
+return EVENTLOG_INFORMATION_TYPE;
+#endif
+}
+}
+
 static void ga_log(const gchar *domain, GLogLevelFlags level,
const gchar *msg, gpointer opaque)
 {
@@ -327,9 +359,9 @@ static void ga_log(const gchar *domain, GLogLevelFlags 
level,
 level &= G_LOG_LEVEL_MASK;
 if (g_strcmp0(domain, "syslog") == 0) {
 #ifndef _WIN32
-syslog(LOG_INFO, "%s: %s", level_str, msg);
+syslog(glib_log_level_to_system(level), "%s: %s", level_str, msg);
 #else
-ReportEvent(s->event_log, EVENTLOG_INFORMATION_TYPE,
+ReportEvent(s->event_log, glib_log_level_to_system(level),
 0, 1, NULL, 1, 0, , NULL);
 #endif
 } else if (level & s->log_level) {
-- 
2.38.1

[PATCH v2 1/2] qga-win: add logging to Windows event log

2022-11-29 Thread Andrey Drobyshev via

This commit allows QGA to write to Windows event log using Win32 API's
ReportEvent() [1], much like syslog() under *nix guests.

In order to generate log message definitions we use a very basic message
text file [2], so that every QGA's message gets ID 1.  The tools
"windmc" and "windres" respectively are used to generate ".rc" file and
COFF object file, and then the COFF file is linked into qemu-ga.exe.

[1] 
https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-reporteventa
[2] https://learn.microsoft.com/en-us/windows/win32/eventlog/message-text-files

Originally-by: Yuri Pudgorodskiy 
Signed-off-by: Andrey Drobyshev 
---
 configure |  3 +++
 qga/installer/qemu-ga.wxs |  5 +
 qga/main.c| 16 +---
 qga/meson.build   | 19 ++-
 qga/messages-win32.mc |  9 +
 5 files changed, 48 insertions(+), 4 deletions(-)
 create mode 100644 qga/messages-win32.mc

diff --git a/configure b/configure
index 26c7bc5154..789a4f6cc9 100755
--- a/configure
+++ b/configure
@@ -372,6 +372,7 @@ smbd="$SMBD"
 strip="${STRIP-${cross_prefix}strip}"
 widl="${WIDL-${cross_prefix}widl}"
 windres="${WINDRES-${cross_prefix}windres}"
+windmc="${WINDMC-${cross_prefix}windmc}"
 pkg_config_exe="${PKG_CONFIG-${cross_prefix}pkg-config}"
 query_pkg_config() {
 "${pkg_config_exe}" ${QEMU_PKG_CONFIG_FLAGS} "$@"
@@ -2561,6 +2562,7 @@ if test "$skip_meson" = no; then
   echo "strip = [$(meson_quote $strip)]" >> $cross
   echo "widl = [$(meson_quote $widl)]" >> $cross
   echo "windres = [$(meson_quote $windres)]" >> $cross
+  echo "windmc = [$(meson_quote $windmc)]" >> $cross
   if test "$cross_compile" = "yes"; then
 cross_arg="--cross-file config-meson.cross"
 echo "[host_machine]" >> $cross
@@ -2667,6 +2669,7 @@ preserve_env SMBD
 preserve_env STRIP
 preserve_env WIDL
 preserve_env WINDRES
+preserve_env WINDMC
 
 printf "exec" >>config.status
 for i in "$0" "$@"; do
diff --git a/qga/installer/qemu-ga.wxs b/qga/installer/qemu-ga.wxs
index 73ce2c4965..d9567836f3 100644
--- a/qga/installer/qemu-ga.wxs
+++ b/qga/installer/qemu-ga.wxs
@@ -110,6 +110,11 @@
   
   
 
+
+  
+  
+
   
 
   
diff --git a/qga/main.c b/qga/main.c
index b3580508fa..e9f4f44cbb 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -83,6 +83,7 @@ struct GAState {
 #ifdef _WIN32
 GAService service;
 HANDLE wakeup_event;
+HANDLE event_log;
 #endif
 bool delimit_response;
 bool frozen;
@@ -324,13 +325,14 @@ static void ga_log(const gchar *domain, GLogLevelFlags 
level,
 }
 
 level &= G_LOG_LEVEL_MASK;
-#ifndef _WIN32
 if (g_strcmp0(domain, "syslog") == 0) {
+#ifndef _WIN32
 syslog(LOG_INFO, "%s: %s", level_str, msg);
-} else if (level & s->log_level) {
 #else
-if (level & s->log_level) {
+ReportEvent(s->event_log, EVENTLOG_INFORMATION_TYPE,
+0, 1, NULL, 1, 0, , NULL);
 #endif
+} else if (level & s->log_level) {
 g_autoptr(GDateTime) now = g_date_time_new_now_utc();
 g_autofree char *nowstr = g_date_time_format(now, "%s.%f");
 fprintf(s->log_file, "%s: %s: %s\n", nowstr, level_str, msg);
@@ -1286,6 +1288,13 @@ static GAState *initialize_agent(GAConfig *config, int 
socket_activation)
 g_debug("Guest agent version %s started", QEMU_FULL_VERSION);
 
 #ifdef _WIN32
+s->event_log = RegisterEventSource(NULL, "qemu-ga");
+if (!s->event_log) {
+g_autofree gchar *errmsg = g_win32_error_message(GetLastError());
+g_critical("unable to register event source: %s", errmsg);
+return NULL;
+}
+
 /* On win32 the state directory is application specific (be it the default
  * or a user override). We got past the command line parsing; let's create
  * the directory (with any intermediate directories). If we run into an
@@ -1377,6 +1386,7 @@ static void cleanup_agent(GAState *s)
 {
 #ifdef _WIN32
 CloseHandle(s->wakeup_event);
+CloseHandle(s->event_log);
 #endif
 if (s->command_state) {
 ga_command_state_cleanup_all(s->command_state);
diff --git a/qga/meson.build b/qga/meson.build
index 3cfb9166e5..1ff159edc1 100644
--- a/qga/meson.build
+++ b/qga/meson.build
@@ -98,7 +98,24 @@ if targetos == 'windows'
   endif
 endif
 
-qga = executable('qemu-ga', qga_ss.sources(),
+qga_objs = []
+if targetos == 'windows'
+  windmc = find_program('windmc', required: true)
+  windres = find_program('windres', required: true)
+
+  msgrc = custom_target('messages-win32.rc',
+input: 'messages-win32.mc',
+output: ['messages-win32.rc', 'MSG00409.bin', 
'messages-win32.h'],
+command: [windmc, '-h', '@OUTDIR@', '-r', '@OUTDIR@', 
'@INPUT@'])
+  msgobj = custom_target('messages-win32.o',
+ input: msgrc[0],
+ output: 'messages-win32.o',

Re: [PATCH for-7.2] replay: Fix declaration of replay_read_next_clock

2022-11-29 Thread Stefan Hajnoczi

On Tue, 29 Nov 2022 at 02:39, Philippe Mathieu-Daudé  wrote:
>
> On 29/11/22 02:05, Richard Henderson wrote:
> > Fixes the build with gcc 13:
> >
> > replay/replay-time.c:34:6: error: conflicting types for  \
> >'replay_read_next_clock' due to enum/integer mismatch; \
> >have 'void(ReplayClockKind)' [-Werror=enum-int-mismatch]
> > 34 | void replay_read_next_clock(ReplayClockKind kind)
> >|  ^~
> > In file included from ../qemu/replay/replay-time.c:14:
> > replay/replay-internal.h:139:6: note: previous declaration of \
> >'replay_read_next_clock' with type 'void(unsigned int)'
> >139 | void replay_read_next_clock(unsigned int kind);
> >|  ^~
> >
> > Fixes: 8eda206e090 ("replay: recording and replaying clock ticks")
> > Signed-off-by: Richard Henderson 
> > ---
> >   replay/replay-internal.h | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/replay/replay-internal.h b/replay/replay-internal.h
> > index 89e377be90..b6836354ac 100644
> > --- a/replay/replay-internal.h
> > +++ b/replay/replay-internal.h
> > @@ -136,7 +136,7 @@ bool replay_next_event_is(int event);
> >   /*! Reads next clock value from the file.
> >   If clock kind read from the file is different from the parameter,
> >   the value is not used. */
> > -void replay_read_next_clock(unsigned int kind);
> > +void replay_read_next_clock(ReplayClockKind kind);
>
> Preferably having this file including "sysemu/replay.h" in the same
> patch:

ReplayClockKind isn't declared by anything in replay-internal.h but I
see another instance of this in the file (e.g. InputEvent).

Maybe send a follow-up patch for 8.0 that cleans up the #includes?

Stefan

Re: [PATCH v2 1/1] hw/arm/boot: set initrd with #[address/size]-cells type in fdt

2022-11-29 Thread Schspa Shi



Peter Maydell  writes:

> On Tue, 29 Nov 2022 at 10:48, Schspa Shi  wrote:
>>
>> We use 32bit value for linux,initrd-[start/end], when we have
>> loader_start > 4GB, there will be a wrong initrd_start passed
>> to the kernel, and the kernel will report the following warning.
>>
>> [0.00] [ cut here ]
>> [0.00] initrd not fully accessible via the linear mapping -- please 
>> check your bootloader ...
>> [0.00] WARNING: CPU: 0 PID: 0 at arch/arm64/mm/init.c:355 
>> arm64_memblock_init+0x158/0x244
>> [0.00] Modules linked in:
>> [0.00] CPU: 0 PID: 0 Comm: swapper Tainted: GW  
>> 6.1.0-rc3-13250-g30a0b95b1335-dirty #28
>> [0.00] Hardware name: Horizon Sigi Virtual development board (DT)
>> [0.00] pstate: 60c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS 
>> BTYPE=--)
>> [0.00] pc : arm64_memblock_init+0x158/0x244
>> [0.00] lr : arm64_memblock_init+0x158/0x244
>> [0.00] sp : 89273df0
>> [0.00] x29: 89273df0 x28: 001000cc0010 x27: 
>> 8000
>> [0.00] x26: 0050a3e2 x25: 88b46000 x24: 
>> 88b46000
>> [0.00] x23: 88a53000 x22: 8942 x21: 
>> 88a53000
>> [0.00] x20: 0400 x19: 0400 x18: 
>> 1020
>> [0.00] x17: 6568632065736165 x16: 6c70202d2d20676e x15: 
>> 697070616d207261
>> [0.00] x14: 656e696c20656874 x13: 0a2e2e2e20726564 x12: 
>> 
>> [0.00] x11:  x10:  x9 : 
>> 
>> [0.00] x8 :  x7 : 796c6c756620746f x6 : 
>> 6e20647274696e69
>> [0.00] x5 : 893c7c47 x4 : 88a2102f x3 : 
>> 89273a88
>> [0.00] x2 : 8000f038 x1 : 00c0 x0 : 
>> 0056
>> [0.00] Call trace:
>> [0.00]  arm64_memblock_init+0x158/0x244
>> [0.00]  setup_arch+0x164/0x1cc
>> [0.00]  start_kernel+0x94/0x4ac
>> [0.00]  __primary_switched+0xb4/0xbc
>> [0.00] ---[ end trace  ]---
>> [0.00] Zone ranges:
>> [0.00]   DMA  [mem 0x0010-0x001007ff]
>>
>> To fix it, we can change it to #[address/size]-cells type.
>>
>> Signed-off-by: Schspa Shi 
>>
>> --
>>
>> Changelog:
>> v1 -> v2:
>> - Use #[address/size]-cells for data type.
>> ---
>>  hw/arm/boot.c | 10 ++
>>  1 file changed, 6 insertions(+), 4 deletions(-)
>>
>> diff --git a/hw/arm/boot.c b/hw/arm/boot.c
>> index 57efb61ee419..98cd1fdad2c6 100644
>> --- a/hw/arm/boot.c
>> +++ b/hw/arm/boot.c
>> @@ -638,15 +638,17 @@ int arm_load_dtb(hwaddr addr, const struct 
>> arm_boot_info *binfo,
>>  }
>>
>>  if (binfo->initrd_size) {
>> -rc = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start",
>> -   binfo->initrd_start);
>> +rc = qemu_fdt_setprop_sized_cells(fdt, "/chosen", 
>> "linux,initrd-start",
>> + acells, binfo->initrd_start);
>
> Indentation here doesn't look quite right.
>

My mistake, and fixed in the v3 patch.

>>  if (rc < 0) {
>>  fprintf(stderr, "couldn't set /chosen/linux,initrd-start\n");
>>  goto fail;
>>  }
>>
>> -rc = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end",
>> -   binfo->initrd_start + 
>> binfo->initrd_size);
>> +rc = qemu_fdt_setprop_sized_cells(fdt, "/chosen", 
>> "linux,initrd-end",
>> +  scells,
>> +  binfo->initrd_start +
>> +  binfo->initrd_size);
>
> This should use 'acells', not 'scells', because it's an address,
> not a size.
>

It fixed in the v3 patch too.

> thanks
> -- PMM

-- 
BRs
Schspa Shi

Re: [PATCH for 7.2? V2] vhost: fix vq dirty bitmap syncing when vIOMMU is enabled

2022-11-29 Thread Eric Auger

Hi Michael,

On 11/29/22 16:44, Michael S. Tsirkin wrote:
> On Tue, Nov 29, 2022 at 10:52:29AM +0100, Eric Auger wrote:
>> Hi Jason,
>>
>> On 11/29/22 05:02, Jason Wang wrote:
>>> When vIOMMU is enabled, the vq->used_phys is actually the IOVA not
>>> GPA. So we need to translate it to GPA before the syncing otherwise we
>>> may hit the following crash since IOVA could be out of the scope of
>>> the GPA log size. This could be noted when using virtio-IOMMU with
>>> vhost using 1G memory.
>>>
>>> Fixes: c471ad0e9bd46 ("vhost_net: device IOTLB support")
>>> Cc: qemu-sta...@nongnu.org
>>> Tested-by: Lei Yang 
>>> Reported-by: Yalan Zhang 
>>> Signed-off-by: Jason Wang 
>>> ---
>>> Changes since V1:
>>> - Fix the address calculation when used ring is not page aligned
>>> - Fix the length for each round of dirty bitmap syncing
>>> - Use LOG_GUEST_ERROR to log wrong used adddress
>>> - Various other tweaks
>>> ---
>>>  hw/virtio/vhost.c | 76 ++-
>>>  1 file changed, 56 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
>>> index d1c4c20b8c..0cd5f25fcb 100644
>>> --- a/hw/virtio/vhost.c
>>> +++ b/hw/virtio/vhost.c
>>> @@ -20,6 +20,7 @@
>>>  #include "qemu/range.h"
>>>  #include "qemu/error-report.h"
>>>  #include "qemu/memfd.h"
>>> +#include "qemu/log.h"
>>>  #include "standard-headers/linux/vhost_types.h"
>>>  #include "hw/virtio/virtio-bus.h"
>>>  #include "hw/virtio/virtio-access.h"
>>> @@ -106,6 +107,24 @@ static void vhost_dev_sync_region(struct vhost_dev 
>>> *dev,
>>>  }
>>>  }
>>>  
>>> +static bool vhost_dev_has_iommu(struct vhost_dev *dev)
>>> +{
>>> +VirtIODevice *vdev = dev->vdev;
>>> +
>>> +/*
>>> + * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
>>> + * incremental memory mapping API via IOTLB API. For platform that
>>> + * does not have IOMMU, there's no need to enable this feature
>>> + * which may cause unnecessary IOTLB miss/update transactions.
>>> + */
>>> +if (vdev) {
>>> +return virtio_bus_device_iommu_enabled(vdev) &&
>>> +virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
>>> +} else {
>>> +return false;
>>> +}
>>> +}
>>> +
>>>  static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
>>> MemoryRegionSection *section,
>>> hwaddr first,
>>> @@ -137,8 +156,43 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev 
>>> *dev,
>>>  continue;
>>>  }
>>>  
>>> -vhost_dev_sync_region(dev, section, start_addr, end_addr, 
>>> vq->used_phys,
>>> -  range_get_last(vq->used_phys, 
>>> vq->used_size));
>>> +if (vhost_dev_has_iommu(dev)) {
>>> +IOMMUTLBEntry iotlb;
>>> +hwaddr used_phys = vq->used_phys, used_size = vq->used_size;
>>> +hwaddr phys, s;
>>> +
>>> +while (used_size) {
>>> +rcu_read_lock();
>>> +iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
>>> +  used_phys,
>>> +  true, 
>>> MEMTXATTRS_UNSPECIFIED);
>>> +rcu_read_unlock();
>>> +
>>> +if (!iotlb.target_as) {
>>> +qemu_log_mask(LOG_GUEST_ERROR, "translation "
>>> +  "failure for used_phys %"PRIx64"\n", 
>>> used_phys);
>> looks weird to see translation of "used_phys" whereas it is an iova. At
>> least I would reword the msg
>>> +return -EINVAL;
>>> +}
>>> +
>>> +phys = iotlb.translated_addr + (used_phys & 
>>> iotlb.addr_mask);
>> you may use a local variable storing this offset =
>>
>> used_phys & iotlb.addr_mask
>>
>>> +
>>> +/* Distance from start of used ring until last byte of
>>> +   IOMMU page */
>> you can avoid checkpatch warnings here
>>> +s = iotlb.addr_mask - (used_phys & iotlb.addr_mask);
>>> +/* Size of used ring, or of the part of it until end
>>> +   of IOMMU page */
>> and here
>>
>> I would suggest to rewrite this into
>> s =iotlb.addr_mask - (used_phys & iotlb.addr_mask) + 1
>> s = MIN(s, used_size);
> This does not work - if iotlb.addr_mask - (used_phys & iotlb.addr_mask)
> is all-ones then + 1 gives you 0 and MIN gives you 0.
> Theoretical but worth being safe here IMHO.
Ah OK, I should have read your previous discussion more thoroughly ...
Maybe just add a short comment then to justify the gym below and avoid
tempting sbdy else to rewrite it in a more common but wrong way.

Thanks

Eric
>
>
>>> +s = MIN(s, used_size - 1) + 1;
>>> +
>>> +vhost_dev_sync_region(dev, section, start_addr, end_addr, 
>>> phys,
>>> +  range_get_last(phys, s));
>>> +

1 2 >

1 - 100 of 164 matches

Mail list logo