date:20220608

We should disable extensions in riscv_cpu_realize() if minimum required
priv spec version is not satisfied. This also ensures that machines with
priv spec v1.11 (or lower) cannot enable H, V, and various multi-letter
extensions.

Fixes: a775398be2e9 ("target/riscv: Add isa extenstion strings to the device 
tree")
Signed-off-by: Anup Patel 
---
 target/riscv/cpu.c | 57 ++
 1 file changed, 52 insertions(+), 5 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 9f9c27a3f5..e7eb65d708 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -43,9 +43,13 @@ static const char riscv_single_letter_exts[] = "IEMAFDQCPVH";
 
 struct isa_ext_data {
 const char *name;
-bool enabled;
+int min_version;
+bool *enabled;
 };
 
+#define ISA_EDATA_ENTRY(name, prop) {#name, PRIV_VERSION_1_10_0, 
>cfg.prop}
+#define ISA_EDATA_ENTRY2(name, min_ver, prop) {#name, min_ver, >cfg.prop}
+
 const char * const riscv_int_regnames[] = {
   "x0/zero", "x1/ra",  "x2/sp",  "x3/gp",  "x4/tp",  "x5/t0",   "x6/t1",
   "x7/t2",   "x8/s0",  "x9/s1",  "x10/a0", "x11/a1", "x12/a2",  "x13/a3",
@@ -513,8 +517,42 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 CPURISCVState *env = >env;
 RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev);
 CPUClass *cc = CPU_CLASS(mcc);
-int priv_version = -1;
+int i, priv_version = -1;
 Error *local_err = NULL;
+const struct isa_ext_data isa_edata_arr[] = {
+ISA_EDATA_ENTRY2(h, PRIV_VERSION_1_12_0, ext_h),
+ISA_EDATA_ENTRY2(v, PRIV_VERSION_1_12_0, ext_v),
+ISA_EDATA_ENTRY2(zicsr, PRIV_VERSION_1_10_0, ext_icsr),
+ISA_EDATA_ENTRY2(zifencei, PRIV_VERSION_1_10_0, ext_ifencei),
+ISA_EDATA_ENTRY2(zfh, PRIV_VERSION_1_12_0, ext_zfh),
+ISA_EDATA_ENTRY2(zfhmin, PRIV_VERSION_1_12_0, ext_zfhmin),
+ISA_EDATA_ENTRY2(zfinx, PRIV_VERSION_1_12_0, ext_zfinx),
+ISA_EDATA_ENTRY2(zdinx, PRIV_VERSION_1_12_0, ext_zdinx),
+ISA_EDATA_ENTRY2(zba, PRIV_VERSION_1_12_0, ext_zba),
+ISA_EDATA_ENTRY2(zbb, PRIV_VERSION_1_12_0, ext_zbb),
+ISA_EDATA_ENTRY2(zbc, PRIV_VERSION_1_12_0, ext_zbc),
+ISA_EDATA_ENTRY2(zbkb, PRIV_VERSION_1_12_0, ext_zbkb),
+ISA_EDATA_ENTRY2(zbkc, PRIV_VERSION_1_12_0, ext_zbkc),
+ISA_EDATA_ENTRY2(zbkx, PRIV_VERSION_1_12_0, ext_zbkx),
+ISA_EDATA_ENTRY2(zbs, PRIV_VERSION_1_12_0, ext_zbs),
+ISA_EDATA_ENTRY2(zk, PRIV_VERSION_1_12_0, ext_zk),
+ISA_EDATA_ENTRY2(zkn, PRIV_VERSION_1_12_0, ext_zkn),
+ISA_EDATA_ENTRY2(zknd, PRIV_VERSION_1_12_0, ext_zknd),
+ISA_EDATA_ENTRY2(zkne, PRIV_VERSION_1_12_0, ext_zkne),
+ISA_EDATA_ENTRY2(zknh, PRIV_VERSION_1_12_0, ext_zknh),
+ISA_EDATA_ENTRY2(zkr, PRIV_VERSION_1_12_0, ext_zkr),
+ISA_EDATA_ENTRY2(zks, PRIV_VERSION_1_12_0, ext_zks),
+ISA_EDATA_ENTRY2(zksed, PRIV_VERSION_1_12_0, ext_zksed),
+ISA_EDATA_ENTRY2(zksh, PRIV_VERSION_1_12_0, ext_zksh),
+ISA_EDATA_ENTRY2(zkt, PRIV_VERSION_1_12_0, ext_zkt),
+ISA_EDATA_ENTRY2(zve32f, PRIV_VERSION_1_12_0, ext_zve32f),
+ISA_EDATA_ENTRY2(zve64f, PRIV_VERSION_1_12_0, ext_zve64f),
+ISA_EDATA_ENTRY2(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
+ISA_EDATA_ENTRY2(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin),
+ISA_EDATA_ENTRY2(svinval, PRIV_VERSION_1_12_0, ext_svinval),
+ISA_EDATA_ENTRY2(svnapot, PRIV_VERSION_1_12_0, ext_svnapot),
+ISA_EDATA_ENTRY2(svpbmt, PRIV_VERSION_1_12_0, ext_svpbmt),
+};
 
 cpu_exec_realizefn(cs, _err);
 if (local_err != NULL) {
@@ -541,6 +579,17 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 set_priv_version(env, priv_version);
 }
 
+/* Force disable extensions if priv spec version does not match */
+for (i = 0; i < ARRAY_SIZE(isa_edata_arr); i++) {
+if (*isa_edata_arr[i].enabled &&
+(env->priv_ver < isa_edata_arr[i].min_version)) {
+*isa_edata_arr[i].enabled = false;
+warn_report("disabling %s extension for hart 0x%lx because "
+"privilege spec version does not match",
+isa_edata_arr[i].name, (unsigned long)env->mhartid);
+}
+}
+
 if (cpu->cfg.mmu) {
 riscv_set_feature(env, RISCV_FEATURE_MMU);
 }
@@ -1011,8 +1060,6 @@ static void riscv_cpu_class_init(ObjectClass *c, void 
*data)
 device_class_set_props(dc, riscv_cpu_properties);
 }
 
-#define ISA_EDATA_ENTRY(name, prop) {#name, cpu->cfg.prop}
-
 static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, int 
max_str_len)
 {
 char *old = *isa_str;
@@ -1071,7 +1118,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
**isa_str, int max_str_len)
 };
 
 for (i = 0; i < ARRAY_SIZE(isa_edata_arr); i++) {
-if (isa_edata_arr[i].enabled) {
+if (*isa_edata_arr[i].enabled) {
 new = g_strconcat(old, "_",

[PATCH v5 3/4] target/riscv: Update [m|h]tinst CSR in riscv_cpu_do_interrupt()

We should write transformed instruction encoding of the trapped
instruction in [m|h]tinst CSR at time of taking trap as defined
by the RISC-V privileged specification v1.12.

Signed-off-by: Anup Patel 
---
 target/riscv/cpu.h|   3 +
 target/riscv/cpu_helper.c | 231 +-
 target/riscv/instmap.h|  43 +++
 3 files changed, 271 insertions(+), 6 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 194a58d760..11726e9031 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -271,6 +271,9 @@ struct CPUArchState {
 /* Signals whether the current exception occurred with two-stage address
translation active. */
 bool two_stage_lookup;
+/* Signals whether the current exception occurred while doing two-stage
+   address translation for the VS-stage page table walk. */
+bool two_stage_indirect_lookup;
 
 target_ulong scounteren;
 target_ulong mcounteren;
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 16c6045459..62a6762617 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -22,6 +22,7 @@
 #include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
+#include "instmap.h"
 #include "tcg/tcg-op.h"
 #include "trace.h"
 #include "semihosting/common-semi.h"
@@ -1055,7 +1056,8 @@ restart:
 
 static void raise_mmu_exception(CPURISCVState *env, target_ulong address,
 MMUAccessType access_type, bool pmp_violation,
-bool first_stage, bool two_stage)
+bool first_stage, bool two_stage,
+bool two_stage_indirect)
 {
 CPUState *cs = env_cpu(env);
 int page_fault_exceptions, vm;
@@ -1105,6 +1107,7 @@ static void raise_mmu_exception(CPURISCVState *env, 
target_ulong address,
 }
 env->badaddr = address;
 env->two_stage_lookup = two_stage;
+env->two_stage_indirect_lookup = two_stage_indirect;
 }
 
 hwaddr riscv_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
@@ -1150,6 +1153,7 @@ void riscv_cpu_do_transaction_failed(CPUState *cs, hwaddr 
physaddr,
 env->badaddr = addr;
 env->two_stage_lookup = riscv_cpu_virt_enabled(env) ||
 riscv_cpu_two_stage_lookup(mmu_idx);
+env->two_stage_indirect_lookup = false;
 cpu_loop_exit_restore(cs, retaddr);
 }
 
@@ -1175,6 +1179,7 @@ void riscv_cpu_do_unaligned_access(CPUState *cs, vaddr 
addr,
 env->badaddr = addr;
 env->two_stage_lookup = riscv_cpu_virt_enabled(env) ||
 riscv_cpu_two_stage_lookup(mmu_idx);
+env->two_stage_indirect_lookup = false;
 cpu_loop_exit_restore(cs, retaddr);
 }
 
@@ -1190,6 +1195,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
 bool pmp_violation = false;
 bool first_stage_error = true;
 bool two_stage_lookup = false;
+bool two_stage_indirect_error = false;
 int ret = TRANSLATE_FAIL;
 int mode = mmu_idx;
 /* default TLB page size */
@@ -1227,6 +1233,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
  */
 if (ret == TRANSLATE_G_STAGE_FAIL) {
 first_stage_error = false;
+two_stage_indirect_error = true;
 access_type = MMU_DATA_LOAD;
 }
 
@@ -1310,12 +1317,207 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, 
int size,
 raise_mmu_exception(env, address, access_type, pmp_violation,
 first_stage_error,
 riscv_cpu_virt_enabled(env) ||
-riscv_cpu_two_stage_lookup(mmu_idx));
+riscv_cpu_two_stage_lookup(mmu_idx),
+two_stage_indirect_error);
 cpu_loop_exit_restore(cs, retaddr);
 }
 
 return true;
 }
+
+static target_ulong riscv_transformed_insn(CPURISCVState *env,
+   target_ulong insn)
+{
+bool xinsn_has_addr_offset = false;
+target_ulong xinsn = 0;
+
+/*
+ * Only Quadrant 0 and Quadrant 2 of RVC instruction space need to
+ * be uncompressed. The Quadrant 1 of RVC instruction space need
+ * not be transformed because these instructions won't generate
+ * any load/store trap.
+ */
+
+if ((insn & 0x3) != 0x3) {
+/* Transform 16bit instruction into 32bit instruction */
+switch (GET_C_OP(insn)) {
+case OPC_RISC_C_OP_QUAD0: /* Quadrant 0 */
+switch (GET_C_FUNC(insn)) {
+case OPC_RISC_C_FUNC_FLD_LQ:
+if (riscv_cpu_xlen(env) != 128) { /* C.FLD (RV32/64) */
+xinsn = OPC_RISC_FLD;
+xinsn = SET_RD(xinsn, GET_C_RS2S(insn));
+xinsn = SET_RS1(xinsn, GET_C_RS1S(insn));
+xinsn = SET_I_IMM(xinsn, GET_C_LD_IMM(insn));
+xinsn_has_addr_offset = true;
+

[PATCH v5 0/4] QEMU RISC-V nested virtualization fixes

This series does fixes and improvements to have nested virtualization
on QEMU RISC-V.

These patches can also be found in riscv_nested_fixes_v5 branch at:
https://github.com/avpatel/qemu.git

The RISC-V nested virtualization was tested on QEMU RISC-V using
Xvisor RISC-V which has required hypervisor support to run another
hypervisor as Guest/VM.

Changes since 4:
 - Updated commit description in PATCH1, PATCH2, and PATCH4
 - Use "const" for local array in PATCH5

Changes since v3:
 - Updated PATCH3 to set special pseudoinstructions in htinst for
   guest page faults which result due to VS-stage page table walks
 - Updated warning message in PATCH4

Changes since v2:
 - Dropped the patch which are already in Alistair's next branch
 - Set "Addr. Offset" in the transformed instruction for PATCH3
 - Print warning in riscv_cpu_realize() if we are disabling an
   extension due to privilege spec verions mismatch for PATCH4

Changes since v1:
 - Set write_gva to env->two_stage_lookup which ensures that for
   HS-mode to HS-mode trap write_gva is true only for HLV/HSV
   instructions
 - Included "[PATCH 0/3] QEMU RISC-V priv spec version fixes"
   patches in this series for easy review
 - Re-worked PATCH7 to force disable extensions if required
   priv spec version is not staisfied
 - Added new PATCH8 to fix "aia=aplic-imsic" mode of virt machine

Anup Patel (4):
  target/riscv: Don't force update priv spec version to latest
  target/riscv: Add dummy mcountinhibit CSR for priv spec v1.11 or
higher
  target/riscv: Update [m|h]tinst CSR in riscv_cpu_do_interrupt()
  target/riscv: Force disable extensions if priv spec version does not
match

 target/riscv/cpu.c|  65 +--
 target/riscv/cpu.h|   3 +
 target/riscv/cpu_bits.h   |   3 +
 target/riscv/cpu_helper.c | 231 +-
 target/riscv/csr.c|   2 +
 target/riscv/instmap.h|  43 +++
 6 files changed, 333 insertions(+), 14 deletions(-)

-- 
2.34.1

[PATCH v5 1/4] target/riscv: Don't force update priv spec version to latest

The riscv_cpu_realize() sets priv spec version to v1.12 when it is
when "env->priv_ver == 0" (i.e. default v1.10) because the enum
value of priv spec v1.10 is zero.

Due to above issue, the sifive_u machine will see priv spec v1.12
instead of priv spec v1.10.

To fix this issue, we set latest priv spec version (i.e. v1.12)
for base rv64/rv32 cpu and riscv_cpu_realize() will override priv
spec version only when "cpu->cfg.priv_spec != NULL".

Fixes: 7100fe6c2441 ("target/riscv: Enable privileged spec version 1.12")
Signed-off-by: Anup Patel 
Reviewed-by: Frank Chang 
Reviewed-by: Alistair Francis 
Reviewed-by: Atish Patra 
Reviewed-by: Bin Meng 
---
 target/riscv/cpu.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 0497af45cc..9f9c27a3f5 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -169,6 +169,8 @@ static void rv64_base_cpu_init(Object *obj)
 CPURISCVState *env = _CPU(obj)->env;
 /* We set this in the realise function */
 set_misa(env, MXL_RV64, 0);
+/* Set latest version of privileged specification */
+set_priv_version(env, PRIV_VERSION_1_12_0);
 }
 
 static void rv64_sifive_u_cpu_init(Object *obj)
@@ -204,6 +206,8 @@ static void rv32_base_cpu_init(Object *obj)
 CPURISCVState *env = _CPU(obj)->env;
 /* We set this in the realise function */
 set_misa(env, MXL_RV32, 0);
+/* Set latest version of privileged specification */
+set_priv_version(env, PRIV_VERSION_1_12_0);
 }
 
 static void rv32_sifive_u_cpu_init(Object *obj)
@@ -509,7 +513,7 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 CPURISCVState *env = >env;
 RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev);
 CPUClass *cc = CPU_CLASS(mcc);
-int priv_version = 0;
+int priv_version = -1;
 Error *local_err = NULL;
 
 cpu_exec_realizefn(cs, _err);
@@ -533,10 +537,8 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 }
 }
 
-if (priv_version) {
+if (priv_version >= PRIV_VERSION_1_10_0) {
 set_priv_version(env, priv_version);
-} else if (!env->priv_ver) {
-set_priv_version(env, PRIV_VERSION_1_12_0);
 }
 
 if (cpu->cfg.mmu) {
-- 
2.34.1

[PATCH v5 2/4] target/riscv: Add dummy mcountinhibit CSR for priv spec v1.11 or higher

The mcountinhibit CSR is mandatory for priv spec v1.11 or higher. For
implementation that don't want to implement can simply have a dummy
mcountinhibit which is always zero.

Fixes: a4b2fa433125 ("target/riscv: Introduce privilege version field in the 
CSR ops.")
Signed-off-by: Anup Patel 
Reviewed-by: Frank Chang 
Reviewed-by: Alistair Francis 
Reviewed-by: Bin Meng 
---
 target/riscv/cpu_bits.h | 3 +++
 target/riscv/csr.c  | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 4d04b20d06..4a55c6a709 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -159,6 +159,9 @@
 #define CSR_MTVEC   0x305
 #define CSR_MCOUNTEREN  0x306
 
+/* Machine Counter Setup */
+#define CSR_MCOUNTINHIBIT   0x320
+
 /* 32-bit only */
 #define CSR_MSTATUSH0x310
 
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 6dbe9b541f..409a209f14 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -3391,6 +3391,8 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
 [CSR_MIE] = { "mie",any,   NULL,NULL,rmw_mie   
},
 [CSR_MTVEC]   = { "mtvec",  any,   read_mtvec,   write_mtvec   
},
 [CSR_MCOUNTEREN]  = { "mcounteren", any,   read_mcounteren,  
write_mcounteren  },
+[CSR_MCOUNTINHIBIT] = { "mcountinhibit", any, read_zero, write_ignore,
+ .min_priv_ver = 
PRIV_VERSION_1_11_0 },
 
 [CSR_MSTATUSH]= { "mstatush",   any32, read_mstatush,
write_mstatush},
 
-- 
2.34.1

Re: [PATCH v4 4/4] target/riscv: Force disable extensions if priv spec version does not match

On Wed, Jun 8, 2022 at 10:23 PM Richard Henderson
 wrote:
>
> On 6/8/22 09:14, Anup Patel wrote:
> > +struct isa_ext_data isa_edata_arr[] = {
>
> static const?

Using const is fine but we can't use "static const" because
the "struct isa_ext_data" has a pointer to ext_xyz which
is different for each CPU.

Regards,
Anup

>
>
> r~

[PATCH v2 2/2] hw: m25p80: add tests for write protect

2022-06-08 Thread Iris Chen

Signed-off-by: Iris Chen 
---
Include the tests in a separate patch. Using qtest_set_irq_in() as per review.

 tests/qtest/aspeed_smc-test.c | 60 +++
 1 file changed, 60 insertions(+)

diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c
index c5d97d4410..7786addfb8 100644
--- a/tests/qtest/aspeed_smc-test.c
+++ b/tests/qtest/aspeed_smc-test.c
@@ -392,6 +392,64 @@ static void test_read_status_reg(void)
 flash_reset();
 }
 
+static void test_status_reg_write_protection(void)
+{
+uint8_t r;
+
+spi_conf(CONF_ENABLE_W0);
+
+/* default case: WP# is high and SRWD is low -> status register writable */
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, WREN);
+/* test ability to write SRWD */
+writeb(ASPEED_FLASH_BASE, WRSR);
+writeb(ASPEED_FLASH_BASE, SRWD);
+writeb(ASPEED_FLASH_BASE, RDSR);
+r = readb(ASPEED_FLASH_BASE);
+spi_ctrl_stop_user();
+g_assert_cmphex(r & SRWD, ==, SRWD);
+
+/* WP# high and SRWD high -> status register writable */
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, WREN);
+/* test ability to write SRWD */
+writeb(ASPEED_FLASH_BASE, WRSR);
+writeb(ASPEED_FLASH_BASE, 0);
+writeb(ASPEED_FLASH_BASE, RDSR);
+r = readb(ASPEED_FLASH_BASE);
+spi_ctrl_stop_user();
+g_assert_cmphex(r & SRWD, ==, 0);
+
+/* WP# low and SRWD low -> status register writable */
+qtest_set_irq_in(global_qtest,
+ "/machine/soc/fmc/ssi.0/child[0]", "WP#", 0, 0);
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, WREN);
+/* test ability to write SRWD */
+writeb(ASPEED_FLASH_BASE, WRSR);
+writeb(ASPEED_FLASH_BASE, SRWD);
+writeb(ASPEED_FLASH_BASE, RDSR);
+r = readb(ASPEED_FLASH_BASE);
+spi_ctrl_stop_user();
+g_assert_cmphex(r & SRWD, ==, SRWD);
+
+/* WP# low and SRWD high -> status register NOT writable */
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, WREN);
+/* test ability to write SRWD */
+writeb(ASPEED_FLASH_BASE, WRSR);
+writeb(ASPEED_FLASH_BASE, 0);
+writeb(ASPEED_FLASH_BASE, RDSR);
+r = readb(ASPEED_FLASH_BASE);
+spi_ctrl_stop_user();
+/* write is not successful */
+g_assert_cmphex(r & SRWD, ==, SRWD);
+
+qtest_set_irq_in(global_qtest,
+ "/machine/soc/fmc/ssi.0/child[0]", "WP#", 0, 1);
+flash_reset();
+}
+
 static char tmp_path[] = "/tmp/qtest.m25p80.XX";
 
 int main(int argc, char **argv)
@@ -418,6 +476,8 @@ int main(int argc, char **argv)
 qtest_add_func("/ast2400/smc/read_page_mem", test_read_page_mem);
 qtest_add_func("/ast2400/smc/write_page_mem", test_write_page_mem);
 qtest_add_func("/ast2400/smc/read_status_reg", test_read_status_reg);
+qtest_add_func("/ast2400/smc/status_reg_write_protection",
+   test_status_reg_write_protection);
 
 ret = g_test_run();
 
-- 
2.30.2

[PATCH v2 1/2] hw: m25p80: add WP# pin and SRWD bit for write protection

2022-06-08 Thread Iris Chen

From: Iris Chen 

Signed-off-by: Iris Chen 
---
Addressed all comments from V1. The biggest change: removed 
object_class_property_add.

 hw/block/m25p80.c | 37 +++
 tests/qtest/aspeed_smc-test.c |  2 ++
 2 files changed, 39 insertions(+)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 81ba3da4df..1a20bd55d4 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -27,12 +27,14 @@
 #include "hw/qdev-properties.h"
 #include "hw/qdev-properties-system.h"
 #include "hw/ssi/ssi.h"
+#include "hw/irq.h"
 #include "migration/vmstate.h"
 #include "qemu/bitops.h"
 #include "qemu/log.h"
 #include "qemu/module.h"
 #include "qemu/error-report.h"
 #include "qapi/error.h"
+#include "qapi/visitor.h"
 #include "trace.h"
 #include "qom/object.h"
 
@@ -472,11 +474,13 @@ struct Flash {
 uint8_t spansion_cr2v;
 uint8_t spansion_cr3v;
 uint8_t spansion_cr4v;
+bool wp_level;
 bool write_enable;
 bool four_bytes_address_mode;
 bool reset_enable;
 bool quad_enable;
 bool aai_enable;
+bool status_register_write_disabled;
 uint8_t ear;
 
 int64_t dirty_page;
@@ -723,6 +727,21 @@ static void complete_collecting_data(Flash *s)
 flash_erase(s, s->cur_addr, s->cmd_in_progress);
 break;
 case WRSR:
+/*
+ * If WP# is low and status_register_write_disabled is high,
+ * status register writes are disabled.
+ * This is also called "hardware protected mode" (HPM). All other
+ * combinations of the two states are called "software protected mode"
+ * (SPM), and status register writes are permitted.
+ */
+if ((s->wp_level == 0 && s->status_register_write_disabled)
+|| !s->write_enable) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "M25P80: Status register write is disabled!\n");
+break;
+}
+s->status_register_write_disabled = extract32(s->data[0], 7, 1);
+
 switch (get_man(s)) {
 case MAN_SPANSION:
 s->quad_enable = !!(s->data[1] & 0x02);
@@ -1195,6 +1214,8 @@ static void decode_new_cmd(Flash *s, uint32_t value)
 
 case RDSR:
 s->data[0] = (!!s->write_enable) << 1;
+s->data[0] |= (!!s->status_register_write_disabled) << 7;
+
 if (get_man(s) == MAN_MACRONIX || get_man(s) == MAN_ISSI) {
 s->data[0] |= (!!s->quad_enable) << 6;
 }
@@ -1484,6 +1505,14 @@ static uint32_t m25p80_transfer8(SSIPeripheral *ss, 
uint32_t tx)
 return r;
 }
 
+static void m25p80_write_protect_pin_irq_handler(void *opaque, int n, int 
level)
+{
+Flash *s = M25P80(opaque);
+/* WP# is just a single pin. */
+assert(n == 0);
+s->wp_level = !!level;
+}
+
 static void m25p80_realize(SSIPeripheral *ss, Error **errp)
 {
 Flash *s = M25P80(ss);
@@ -1515,12 +1544,18 @@ static void m25p80_realize(SSIPeripheral *ss, Error 
**errp)
 s->storage = blk_blockalign(NULL, s->size);
 memset(s->storage, 0xFF, s->size);
 }
+
+qdev_init_gpio_in_named(DEVICE(s),
+m25p80_write_protect_pin_irq_handler, "WP#", 1);
 }
 
 static void m25p80_reset(DeviceState *d)
 {
 Flash *s = M25P80(d);
 
+s->wp_level = true;
+s->status_register_write_disabled = false;
+
 reset_memory(s);
 }
 
@@ -1601,6 +1636,8 @@ static const VMStateDescription vmstate_m25p80 = {
 VMSTATE_UINT8(needed_bytes, Flash),
 VMSTATE_UINT8(cmd_in_progress, Flash),
 VMSTATE_UINT32(cur_addr, Flash),
+VMSTATE_BOOL(wp_level, Flash),
+VMSTATE_BOOL(status_register_write_disabled, Flash),
 VMSTATE_BOOL(write_enable, Flash),
 VMSTATE_BOOL(reset_enable, Flash),
 VMSTATE_UINT8(ear, Flash),
diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c
index ec233315e6..c5d97d4410 100644
--- a/tests/qtest/aspeed_smc-test.c
+++ b/tests/qtest/aspeed_smc-test.c
@@ -56,7 +56,9 @@ enum {
 BULK_ERASE = 0xc7,
 READ = 0x03,
 PP = 0x02,
+WRSR = 0x1,
 WREN = 0x6,
+SRWD = 0x80,
 RESET_ENABLE = 0x66,
 RESET_MEMORY = 0x99,
 EN_4BYTE_ADDR = 0xB7,
-- 
2.30.2

[PATCH v15 9/9] target/loongarch: Update README

Add linux-user emulation introduction

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
---
 target/loongarch/README | 39 +--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/target/loongarch/README b/target/loongarch/README
index 4dcd0f1682..9f5edd10c8 100644
--- a/target/loongarch/README
+++ b/target/loongarch/README
@@ -24,9 +24,9 @@
 
 Download cross-tools.
 
-  wget 
https://github.com/loongson/build-tools/releases/latest/download/loongarch64-clfs-20211202-cross-tools.tar.xz
+  wget 
https://github.com/loongson/build-tools/releases/download/2022.05.29/loongarch64-clfs-5.0-cross-tools-gcc-full.tar.xz
 
-  tar -vxf loongarch64-clfs-20211202-cross-tools.tar.xz -C /opt
+  tar -vxf loongarch64-clfs-5.0-cross-tools-gcc-full.tar.xz -C /opt
 
 Config cross-tools env.
 
@@ -60,5 +60,40 @@
 
 ./build/qemu-system-loongarch64 -machine virt -m 4G -cpu Loongson-3A5000 
-smp 1 -kernel build/tests/tcg/loongarch64-softmmu/hello -monitor none -display 
none -chardev file,path=hello.out,id=output -serial chardev:output
 
+- Linux-user emulation
+
+  We already support Linux user emulation. We can use LoongArch cross-tools to 
build LoongArch executables on X86 machines,
+  and We can also use qemu-loongarch64 to run LoongArch executables.
+
+  1. Config cross-tools env.
+
+ see System emulation.
+
+  2. Test tests/tcg/multiarch.
+
+ ./configure  --static  --prefix=/usr  --disable-werror 
--target-list="loongarch64-linux-user" --enable-debug
+
+ cd build
+
+ make && make check-tcg
+
+  3. Run LoongArch system basic command with loongarch-clfs-system.
+
+ - Config clfs env.
+
+   wget 
https://github.com/loongson/build-tools/releases/download/2022.05.29/loongarch64-clfs-system-5.0.tar.bz2
+
+   tar -vxf loongarch64-clfs-system-5.0.tar.bz2 -C /opt/clfs
+
+   cp /opt/clfs/lib64/ld-linux-loongarch-lp64d.so.1  /lib64
+
+   export LD_LIBRARY_PATH="/opt/clfs/lib64"
+
+ - Run LoongArch system basic command.
+
+   ./qemu-loongarch64  /opt/clfs/usr/bin/bash
+   ./qemu-loongarch64  /opt/clfs/usr/bin/ls
+   ./qemu-loongarch64  /opt/clfs/usr/bin/pwd
+
 - Note.
   We can get the latest LoongArch documents or LoongArch tools at 
https://github.com/loongson/
-- 
2.31.1

[PATCH v15 6/9] default-configs: Add loongarch linux-user support

This patch adds loongarch64 linux-user default configs file.

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
Reviewed-by: Richard Henderson 
---
 configs/targets/loongarch64-linux-user.mak | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 configs/targets/loongarch64-linux-user.mak

diff --git a/configs/targets/loongarch64-linux-user.mak 
b/configs/targets/loongarch64-linux-user.mak
new file mode 100644
index 00..7d1b964020
--- /dev/null
+++ b/configs/targets/loongarch64-linux-user.mak
@@ -0,0 +1,3 @@
+# Default configuration for loongarch64-linux-user
+TARGET_ARCH=loongarch64
+TARGET_BASE_ARCH=loongarch
-- 
2.31.1

[PATCH v15 4/9] linux-user: Add LoongArch syscall support

We should disable '__BITS_PER_LONG' at [1] before run gensyscalls.sh

 [1] arch/loongarch/include/uapi/asm/bitsperlong.h

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
---
 linux-user/loongarch64/syscall_nr.h | 312 
 linux-user/loongarch64/target_syscall.h |  48 
 linux-user/syscall_defs.h   |  12 +-
 scripts/gensyscalls.sh  |   1 +
 4 files changed, 368 insertions(+), 5 deletions(-)
 create mode 100644 linux-user/loongarch64/syscall_nr.h
 create mode 100644 linux-user/loongarch64/target_syscall.h

diff --git a/linux-user/loongarch64/syscall_nr.h 
b/linux-user/loongarch64/syscall_nr.h
new file mode 100644
index 00..be00915adf
--- /dev/null
+++ b/linux-user/loongarch64/syscall_nr.h
@@ -0,0 +1,312 @@
+/*
+ * This file contains the system call numbers.
+ * Do not modify.
+ * This file is generated by scripts/gensyscalls.sh
+ */
+#ifndef LINUX_USER_LOONGARCH_SYSCALL_NR_H
+#define LINUX_USER_LOONGARCH_SYSCALL_NR_H
+
+#define TARGET_NR_io_setup 0
+#define TARGET_NR_io_destroy 1
+#define TARGET_NR_io_submit 2
+#define TARGET_NR_io_cancel 3
+#define TARGET_NR_io_getevents 4
+#define TARGET_NR_setxattr 5
+#define TARGET_NR_lsetxattr 6
+#define TARGET_NR_fsetxattr 7
+#define TARGET_NR_getxattr 8
+#define TARGET_NR_lgetxattr 9
+#define TARGET_NR_fgetxattr 10
+#define TARGET_NR_listxattr 11
+#define TARGET_NR_llistxattr 12
+#define TARGET_NR_flistxattr 13
+#define TARGET_NR_removexattr 14
+#define TARGET_NR_lremovexattr 15
+#define TARGET_NR_fremovexattr 16
+#define TARGET_NR_getcwd 17
+#define TARGET_NR_lookup_dcookie 18
+#define TARGET_NR_eventfd2 19
+#define TARGET_NR_epoll_create1 20
+#define TARGET_NR_epoll_ctl 21
+#define TARGET_NR_epoll_pwait 22
+#define TARGET_NR_dup 23
+#define TARGET_NR_dup3 24
+#define TARGET_NR_fcntl 25
+#define TARGET_NR_inotify_init1 26
+#define TARGET_NR_inotify_add_watch 27
+#define TARGET_NR_inotify_rm_watch 28
+#define TARGET_NR_ioctl 29
+#define TARGET_NR_ioprio_set 30
+#define TARGET_NR_ioprio_get 31
+#define TARGET_NR_flock 32
+#define TARGET_NR_mknodat 33
+#define TARGET_NR_mkdirat 34
+#define TARGET_NR_unlinkat 35
+#define TARGET_NR_symlinkat 36
+#define TARGET_NR_linkat 37
+#define TARGET_NR_umount2 39
+#define TARGET_NR_mount 40
+#define TARGET_NR_pivot_root 41
+#define TARGET_NR_nfsservctl 42
+#define TARGET_NR_statfs 43
+#define TARGET_NR_fstatfs 44
+#define TARGET_NR_truncate 45
+#define TARGET_NR_ftruncate 46
+#define TARGET_NR_fallocate 47
+#define TARGET_NR_faccessat 48
+#define TARGET_NR_chdir 49
+#define TARGET_NR_fchdir 50
+#define TARGET_NR_chroot 51
+#define TARGET_NR_fchmod 52
+#define TARGET_NR_fchmodat 53
+#define TARGET_NR_fchownat 54
+#define TARGET_NR_fchown 55
+#define TARGET_NR_openat 56
+#define TARGET_NR_close 57
+#define TARGET_NR_vhangup 58
+#define TARGET_NR_pipe2 59
+#define TARGET_NR_quotactl 60
+#define TARGET_NR_getdents64 61
+#define TARGET_NR_lseek 62
+#define TARGET_NR_read 63
+#define TARGET_NR_write 64
+#define TARGET_NR_readv 65
+#define TARGET_NR_writev 66
+#define TARGET_NR_pread64 67
+#define TARGET_NR_pwrite64 68
+#define TARGET_NR_preadv 69
+#define TARGET_NR_pwritev 70
+#define TARGET_NR_sendfile 71
+#define TARGET_NR_pselect6 72
+#define TARGET_NR_ppoll 73
+#define TARGET_NR_signalfd4 74
+#define TARGET_NR_vmsplice 75
+#define TARGET_NR_splice 76
+#define TARGET_NR_tee 77
+#define TARGET_NR_readlinkat 78
+#define TARGET_NR_sync 81
+#define TARGET_NR_fsync 82
+#define TARGET_NR_fdatasync 83
+#define TARGET_NR_sync_file_range 84
+#define TARGET_NR_timerfd_create 85
+#define TARGET_NR_timerfd_settime 86
+#define TARGET_NR_timerfd_gettime 87
+#define TARGET_NR_utimensat 88
+#define TARGET_NR_acct 89
+#define TARGET_NR_capget 90
+#define TARGET_NR_capset 91
+#define TARGET_NR_personality 92
+#define TARGET_NR_exit 93
+#define TARGET_NR_exit_group 94
+#define TARGET_NR_waitid 95
+#define TARGET_NR_set_tid_address 96
+#define TARGET_NR_unshare 97
+#define TARGET_NR_futex 98
+#define TARGET_NR_set_robust_list 99
+#define TARGET_NR_get_robust_list 100
+#define TARGET_NR_nanosleep 101
+#define TARGET_NR_getitimer 102
+#define TARGET_NR_setitimer 103
+#define TARGET_NR_kexec_load 104
+#define TARGET_NR_init_module 105
+#define TARGET_NR_delete_module 106
+#define TARGET_NR_timer_create 107
+#define TARGET_NR_timer_gettime 108
+#define TARGET_NR_timer_getoverrun 109
+#define TARGET_NR_timer_settime 110
+#define TARGET_NR_timer_delete 111
+#define TARGET_NR_clock_settime 112
+#define TARGET_NR_clock_gettime 113
+#define TARGET_NR_clock_getres 114
+#define TARGET_NR_clock_nanosleep 115
+#define TARGET_NR_syslog 116
+#define TARGET_NR_ptrace 117
+#define TARGET_NR_sched_setparam 118
+#define TARGET_NR_sched_setscheduler 119
+#define TARGET_NR_sched_getscheduler 120
+#define TARGET_NR_sched_getparam 121
+#define TARGET_NR_sched_setaffinity 122
+#define TARGET_NR_sched_getaffinity 123
+#define

[PATCH v15 3/9] linux-user: Add LoongArch elf support

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
---
 linux-user/elfload.c| 54 +
 linux-user/loongarch64/target_elf.h | 12 +++
 2 files changed, 66 insertions(+)
 create mode 100644 linux-user/loongarch64/target_elf.h

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index f7eae357f4..ffed313927 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -918,6 +918,60 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, 
const CPUPPCState *en
 
 #endif
 
+#ifdef TARGET_LOONGARCH64
+
+#define ELF_START_MMAP 0x8000
+
+#define ELF_CLASS   ELFCLASS64
+#define ELF_ARCHEM_LOONGARCH
+
+#define elf_check_arch(x) ((x) == EM_LOONGARCH)
+static inline void init_thread(struct target_pt_regs *regs,
+   struct image_info *infop)
+{
+/*Set crmd PG,DA = 1,0 */
+regs->csr.crmd = 2 << 3;
+regs->csr.era = infop->entry;
+regs->regs[3] = infop->start_stack;
+}
+
+/* See linux kernel: arch/loongarch/include/asm/elf.h.  */
+#define ELF_NREG 45
+typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG];
+
+enum {
+TARGET_EF_R0 = 0,
+TARGET_EF_CSR_ERA = TARGET_EF_R0 + 32,
+TARGET_EF_CSR_BADV = TARGET_EF_R0 + 33,
+};
+
+static void elf_core_copy_regs(target_elf_gregset_t *regs,
+   const CPULoongArchState *env)
+{
+int i;
+
+(*regs)[TARGET_EF_R0] = 0;
+
+for (i = 1; i < ARRAY_SIZE(env->gpr); i++) {
+(*regs)[TARGET_EF_R0 + i] = tswapreg(env->gpr[i]);
+}
+
+(*regs)[TARGET_EF_CSR_ERA] = tswapreg(env->pc);
+(*regs)[TARGET_EF_CSR_BADV] = tswapreg(env->badaddr);
+}
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE4096
+
+#define ELF_HWCAP get_elf_hwcap()
+
+static uint32_t get_elf_hwcap(void)
+{
+return 0;
+}
+
+#endif /* TARGET_LOONGARCH64 */
+
 #ifdef TARGET_MIPS
 
 #define ELF_START_MMAP 0x8000
diff --git a/linux-user/loongarch64/target_elf.h 
b/linux-user/loongarch64/target_elf.h
new file mode 100644
index 00..95c3f05a46
--- /dev/null
+++ b/linux-user/loongarch64/target_elf.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_TARGET_ELF_H
+#define LOONGARCH_TARGET_ELF_H
+static inline const char *cpu_get_model(uint32_t eflags)
+{
+return "la464";
+}
+#endif
-- 
2.31.1

[PATCH v15 1/9] linux-user: Add LoongArch generic header files

This includes:
- sockbits.h
- target_errno_defs.h
- target_fcntl.h
- termbits.h
- target_resource.h
- target_structs.h

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
Reviewed-by: Richard Henderson 
Reviewed-by: Philippe Mathieu-Daudé 
---
 linux-user/loongarch64/sockbits.h  | 11 +++
 linux-user/loongarch64/target_errno_defs.h | 12 
 linux-user/loongarch64/target_fcntl.h  | 11 +++
 linux-user/loongarch64/target_prctl.h  |  1 +
 linux-user/loongarch64/target_resource.h   | 11 +++
 linux-user/loongarch64/target_structs.h| 11 +++
 linux-user/loongarch64/termbits.h  | 11 +++
 7 files changed, 68 insertions(+)
 create mode 100644 linux-user/loongarch64/sockbits.h
 create mode 100644 linux-user/loongarch64/target_errno_defs.h
 create mode 100644 linux-user/loongarch64/target_fcntl.h
 create mode 100644 linux-user/loongarch64/target_prctl.h
 create mode 100644 linux-user/loongarch64/target_resource.h
 create mode 100644 linux-user/loongarch64/target_structs.h
 create mode 100644 linux-user/loongarch64/termbits.h

diff --git a/linux-user/loongarch64/sockbits.h 
b/linux-user/loongarch64/sockbits.h
new file mode 100644
index 00..1cffcae120
--- /dev/null
+++ b/linux-user/loongarch64/sockbits.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_TARGET_SOCKBITS_H
+#define LOONGARCH_TARGET_SOCKBITS_H
+
+#include "../generic/sockbits.h"
+
+#endif
diff --git a/linux-user/loongarch64/target_errno_defs.h 
b/linux-user/loongarch64/target_errno_defs.h
new file mode 100644
index 00..c198b8aca9
--- /dev/null
+++ b/linux-user/loongarch64/target_errno_defs.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_TARGET_ERRNO_DEFS_H
+#define LOONGARCH_TARGET_ERRNO_DEFS_H
+
+/* Target uses generic errno */
+#include "../generic/target_errno_defs.h"
+
+#endif
diff --git a/linux-user/loongarch64/target_fcntl.h 
b/linux-user/loongarch64/target_fcntl.h
new file mode 100644
index 00..99bf586854
--- /dev/null
+++ b/linux-user/loongarch64/target_fcntl.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_TARGET_FCNTL_H
+#define LOONGARCH_TARGET_FCNTL_H
+
+#include "../generic/fcntl.h"
+
+#endif
diff --git a/linux-user/loongarch64/target_prctl.h 
b/linux-user/loongarch64/target_prctl.h
new file mode 100644
index 00..eb53b31ad5
--- /dev/null
+++ b/linux-user/loongarch64/target_prctl.h
@@ -0,0 +1 @@
+/* No special prctl support required. */
diff --git a/linux-user/loongarch64/target_resource.h 
b/linux-user/loongarch64/target_resource.h
new file mode 100644
index 00..0f86bf24ee
--- /dev/null
+++ b/linux-user/loongarch64/target_resource.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_TARGET_RESOURCE_H
+#define LOONGARCH_TARGET_RESOURCE_H
+
+#include "../generic/target_resource.h"
+
+#endif
diff --git a/linux-user/loongarch64/target_structs.h 
b/linux-user/loongarch64/target_structs.h
new file mode 100644
index 00..6041441e15
--- /dev/null
+++ b/linux-user/loongarch64/target_structs.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_TARGET_STRUCTS_H
+#define LOONGARCH_TARGET_STRUCTS_H
+
+#include "../generic/target_structs.h"
+
+#endif
diff --git a/linux-user/loongarch64/termbits.h 
b/linux-user/loongarch64/termbits.h
new file mode 100644
index 00..d425db8748
--- /dev/null
+++ b/linux-user/loongarch64/termbits.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_TARGET_TERMBITS_H
+#define LOONGARCH_TARGET_TERMBITS_H
+
+#include "../generic/termbits.h"
+
+#endif
-- 
2.31.1

[PATCH v15 8/9] target/loongarch: Adjust functions and structure to support user-mode

Some functions and member of the structure are different with softmmu-mode
So we need adjust them to support user-mode.

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
---
 target/loongarch/cpu.c| 22 ++--
 target/loongarch/cpu.h|  6 
 target/loongarch/helper.h |  5 +--
 target/loongarch/insn_trans/trans_extra.c.inc | 14 
 .../insn_trans/trans_privileged.c.inc | 36 +++
 target/loongarch/internals.h  |  2 ++
 target/loongarch/op_helper.c  | 12 +++
 7 files changed, 93 insertions(+), 4 deletions(-)

diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 4c8f96bc3a..472e258f68 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -18,7 +18,6 @@
 #include "fpu/softfloat-helpers.h"
 #include "cpu-csr.h"
 #include "sysemu/reset.h"
-#include "hw/loader.h"
 
 const char * const regnames[32] = {
 "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
@@ -82,6 +81,7 @@ static void loongarch_cpu_set_pc(CPUState *cs, vaddr value)
 env->pc = value;
 }
 
+#ifndef CONFIG_USER_ONLY
 #include "hw/loongarch/virt.h"
 
 void loongarch_cpu_set_irq(void *opaque, int irq, int level)
@@ -292,6 +292,7 @@ static bool loongarch_cpu_exec_interrupt(CPUState *cs, int 
interrupt_request)
 }
 return false;
 }
+#endif
 
 #ifdef CONFIG_TCG
 static void loongarch_cpu_synchronize_from_tb(CPUState *cs,
@@ -306,6 +307,9 @@ static void loongarch_cpu_synchronize_from_tb(CPUState *cs,
 
 static bool loongarch_cpu_has_work(CPUState *cs)
 {
+#ifdef CONFIG_USER_ONLY
+return true;
+#else
 LoongArchCPU *cpu = LOONGARCH_CPU(cs);
 CPULoongArchState *env = >env;
 bool has_work = false;
@@ -316,6 +320,7 @@ static bool loongarch_cpu_has_work(CPUState *cs)
 }
 
 return has_work;
+#endif
 }
 
 static void loongarch_la464_initfn(Object *obj)
@@ -464,7 +469,9 @@ static void loongarch_cpu_reset(DeviceState *dev)
 env->CSR_DMW[n] = FIELD_DP64(env->CSR_DMW[n], CSR_DMW, PLV3, 0);
 }
 
+#ifndef CONFIG_USER_ONLY
 env->pc = 0x1c00;
+#endif
 
 restore_fp_status(env);
 cs->exception_index = -1;
@@ -495,6 +502,7 @@ static void loongarch_cpu_realizefn(DeviceState *dev, Error 
**errp)
 lacc->parent_realize(dev, errp);
 }
 
+#ifndef CONFIG_USER_ONLY
 static void loongarch_qemu_write(void *opaque, hwaddr addr,
  uint64_t val, unsigned size)
 {
@@ -529,13 +537,16 @@ static const MemoryRegionOps loongarch_qemu_ops = {
 .max_access_size = 8,
 },
 };
+#endif
 
 static void loongarch_cpu_init(Object *obj)
 {
 LoongArchCPU *cpu = LOONGARCH_CPU(obj);
-CPULoongArchState *env = >env;
 
+#ifdef CONFIG_USER_ONLY
 cpu_set_cpustate_pointers(cpu);
+#else
+CPULoongArchState *env = >env;
 qdev_init_gpio_in(DEVICE(cpu), loongarch_cpu_set_irq, N_IRQS);
 timer_init_ns(>timer, QEMU_CLOCK_VIRTUAL,
   _constant_timer_cb, cpu);
@@ -545,6 +556,7 @@ static void loongarch_cpu_init(Object *obj)
 memory_region_init_io(>iocsr_mem, OBJECT(cpu), _qemu_ops,
   NULL, "iocsr_misc", 0x428);
 memory_region_add_subregion(>system_iocsr, 0, >iocsr_mem);
+#endif
 }
 
 static ObjectClass *loongarch_cpu_class_by_name(const char *cpu_model)
@@ -612,18 +624,22 @@ static struct TCGCPUOps loongarch_tcg_ops = {
 .initialize = loongarch_translate_init,
 .synchronize_from_tb = loongarch_cpu_synchronize_from_tb,
 
+#ifndef CONFIG_USER_ONLY
 .tlb_fill = loongarch_cpu_tlb_fill,
 .cpu_exec_interrupt = loongarch_cpu_exec_interrupt,
 .do_interrupt = loongarch_cpu_do_interrupt,
 .do_transaction_failed = loongarch_cpu_do_transaction_failed,
+#endif
 };
 #endif /* CONFIG_TCG */
 
+#ifndef CONFIG_USER_ONLY
 #include "hw/core/sysemu-cpu-ops.h"
 
 static const struct SysemuCPUOps loongarch_sysemu_ops = {
 .get_phys_page_debug = loongarch_cpu_get_phys_page_debug,
 };
+#endif
 
 static void loongarch_cpu_class_init(ObjectClass *c, void *data)
 {
@@ -639,8 +655,10 @@ static void loongarch_cpu_class_init(ObjectClass *c, void 
*data)
 cc->has_work = loongarch_cpu_has_work;
 cc->dump_state = loongarch_cpu_dump_state;
 cc->set_pc = loongarch_cpu_set_pc;
+#ifndef CONFIG_USER_ONLY
 dc->vmsd = _loongarch_cpu;
 cc->sysemu_ops = _sysemu_ops;
+#endif
 cc->disas_set_info = loongarch_cpu_disas_set_info;
 cc->gdb_read_register = loongarch_cpu_gdb_read_register;
 cc->gdb_write_register = loongarch_cpu_gdb_write_register;
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 71a5036c3c..19eed2f0c1 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -303,6 +303,7 @@ typedef struct CPUArchState {
 uint64_t CSR_DERA;
 uint64_t CSR_DSAVE;
 
+#ifndef CONFIG_USER_ONLY
 LoongArchTLB  tlb[LOONGARCH_TLB_MAX];
 
 AddressSpace address_space_iocsr;
@@ -310,6 +311,7 @@ typedef struct CPUArchState {

[PATCH v15 0/9] Add LoongArch linux-user emulation support

Hi All,

This series add support linux-user emulation.
As the LoongArch kernel had merged into 5.19-rc1,
you can see the latest kernel at https://kernel.org

Need review patches:

  0002-linux-user-Add-LoongArch-signal-support.patch
  0008-target-loongarch-Adjust-functions-and-structure-to-s.patch
  0009-target-loongarch-Update-README.patch

v15:
  - Rebase;
  - Update README;
  - Adjust some functions and structure to support user-mode;
  - Update syscall;
  - Update target_sigcontext;


Old series:
   - https://patchew.org/QEMU/20220106094200.1801206-1-gaos...@loongson.cn/
  

Thanks.
Song Gao


Song Gao (9):
  linux-user: Add LoongArch generic header files
  linux-user: Add LoongArch signal support
  linux-user: Add LoongArch elf support
  linux-user: Add LoongArch syscall support
  linux-user: Add LoongArch cpu_loop support
  default-configs: Add loongarch linux-user support
  scripts: add loongarch64 binfmt config
  target/loongarch: Adjust functions and structure to support user-mode
  target/loongarch: Update README

 configs/targets/loongarch64-linux-user.mak|   3 +
 linux-user/elfload.c  |  54 +++
 linux-user/loongarch64/cpu_loop.c |  93 ++
 linux-user/loongarch64/signal.c   | 194 +++
 linux-user/loongarch64/sockbits.h |  11 +
 linux-user/loongarch64/syscall_nr.h   | 312 ++
 linux-user/loongarch64/target_cpu.h   |  34 ++
 linux-user/loongarch64/target_elf.h   |  12 +
 linux-user/loongarch64/target_errno_defs.h|  12 +
 linux-user/loongarch64/target_fcntl.h |  11 +
 linux-user/loongarch64/target_prctl.h |   1 +
 linux-user/loongarch64/target_resource.h  |  11 +
 linux-user/loongarch64/target_signal.h|  13 +
 linux-user/loongarch64/target_structs.h   |  11 +
 linux-user/loongarch64/target_syscall.h   |  48 +++
 linux-user/loongarch64/termbits.h |  11 +
 linux-user/syscall_defs.h |  12 +-
 scripts/gensyscalls.sh|   1 +
 scripts/qemu-binfmt-conf.sh   |   6 +-
 target/loongarch/README   |  39 ++-
 target/loongarch/cpu.c|  22 +-
 target/loongarch/cpu.h|   6 +
 target/loongarch/helper.h |   5 +-
 target/loongarch/insn_trans/trans_extra.c.inc |  14 +
 .../insn_trans/trans_privileged.c.inc |  36 ++
 target/loongarch/internals.h  |   2 +
 target/loongarch/op_helper.c  |  12 +
 27 files changed, 974 insertions(+), 12 deletions(-)
 create mode 100644 configs/targets/loongarch64-linux-user.mak
 create mode 100644 linux-user/loongarch64/cpu_loop.c
 create mode 100644 linux-user/loongarch64/signal.c
 create mode 100644 linux-user/loongarch64/sockbits.h
 create mode 100644 linux-user/loongarch64/syscall_nr.h
 create mode 100644 linux-user/loongarch64/target_cpu.h
 create mode 100644 linux-user/loongarch64/target_elf.h
 create mode 100644 linux-user/loongarch64/target_errno_defs.h
 create mode 100644 linux-user/loongarch64/target_fcntl.h
 create mode 100644 linux-user/loongarch64/target_prctl.h
 create mode 100644 linux-user/loongarch64/target_resource.h
 create mode 100644 linux-user/loongarch64/target_signal.h
 create mode 100644 linux-user/loongarch64/target_structs.h
 create mode 100644 linux-user/loongarch64/target_syscall.h
 create mode 100644 linux-user/loongarch64/termbits.h

-- 
2.31.1

[PATCH v15 5/9] linux-user: Add LoongArch cpu_loop support

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
Reviewed-by: Richard Henderson 
---
 linux-user/loongarch64/cpu_loop.c   | 93 +
 linux-user/loongarch64/target_cpu.h | 34 +++
 2 files changed, 127 insertions(+)
 create mode 100644 linux-user/loongarch64/cpu_loop.c
 create mode 100644 linux-user/loongarch64/target_cpu.h

diff --git a/linux-user/loongarch64/cpu_loop.c 
b/linux-user/loongarch64/cpu_loop.c
new file mode 100644
index 00..cfae1ba48e
--- /dev/null
+++ b/linux-user/loongarch64/cpu_loop.c
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * QEMU LoongArch user cpu_loop.
+ *
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "user-internals.h"
+#include "cpu_loop-common.h"
+#include "signal-common.h"
+
+void cpu_loop(CPULoongArchState *env)
+{
+CPUState *cs = env_cpu(env);
+int trapnr, si_code;
+abi_long ret;
+
+for (;;) {
+cpu_exec_start(cs);
+trapnr = cpu_exec(cs);
+cpu_exec_end(cs);
+process_queued_cpu_work(cs);
+
+switch (trapnr) {
+case EXCP_INTERRUPT:
+/* just indicate that signals should be handled asap */
+break;
+case EXCCODE_SYS:
+env->pc += 4;
+ret = do_syscall(env, env->gpr[11],
+ env->gpr[4], env->gpr[5],
+ env->gpr[6], env->gpr[7],
+ env->gpr[8], env->gpr[9],
+ -1, -1);
+if (ret == -QEMU_ERESTARTSYS) {
+env->pc -= 4;
+break;
+}
+if (ret == -QEMU_ESIGRETURN) {
+/*
+ * Returning from a successful sigreturn syscall.
+ * Avoid clobbering register state.
+ */
+break;
+}
+env->gpr[4] = ret;
+break;
+case EXCCODE_INE:
+force_sig_fault(TARGET_SIGILL, 0, env->pc);
+break;
+case EXCCODE_FPE:
+si_code = TARGET_FPE_FLTUNK;
+if (GET_FP_CAUSE(env->fcsr0) & FP_INVALID) {
+si_code = TARGET_FPE_FLTINV;
+} else if (GET_FP_CAUSE(env->fcsr0) & FP_DIV0) {
+si_code = TARGET_FPE_FLTDIV;
+} else if (GET_FP_CAUSE(env->fcsr0) & FP_OVERFLOW) {
+si_code = TARGET_FPE_FLTOVF;
+} else if (GET_FP_CAUSE(env->fcsr0) & FP_UNDERFLOW) {
+si_code = TARGET_FPE_FLTUND;
+} else if (GET_FP_CAUSE(env->fcsr0) & FP_INEXACT) {
+si_code = TARGET_FPE_FLTRES;
+}
+force_sig_fault(TARGET_SIGFPE, si_code, env->pc);
+break;
+case EXCP_DEBUG:
+case EXCCODE_BRK:
+force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->pc);
+break;
+case EXCP_ATOMIC:
+cpu_exec_step_atomic(cs);
+break;
+default:
+EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n",
+  trapnr);
+exit(EXIT_FAILURE);
+}
+process_pending_signals(env);
+}
+}
+
+void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
+{
+int i;
+
+for (i = 0; i < 32; i++) {
+env->gpr[i] = regs->regs[i];
+}
+env->pc = regs->csr.era;
+
+}
diff --git a/linux-user/loongarch64/target_cpu.h 
b/linux-user/loongarch64/target_cpu.h
new file mode 100644
index 00..a29af66156
--- /dev/null
+++ b/linux-user/loongarch64/target_cpu.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch specific CPU ABI and functions for linux-user
+ *
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#ifndef LOONGARCH_TARGET_CPU_H
+#define LOONGARCH_TARGET_CPU_H
+
+static inline void cpu_clone_regs_child(CPULoongArchState *env,
+target_ulong newsp, unsigned flags)
+{
+if (newsp) {
+env->gpr[3] = newsp;
+}
+env->gpr[4] = 0;
+}
+
+static inline void cpu_clone_regs_parent(CPULoongArchState *env,
+ unsigned flags)
+{
+}
+
+static inline void cpu_set_tls(CPULoongArchState *env, target_ulong newtls)
+{
+env->gpr[2] = newtls;
+}
+
+static inline abi_ulong get_sp_from_cpustate(CPULoongArchState *state)
+{
+return state->gpr[3];
+}
+#endif
-- 
2.31.1

[PATCH v15 7/9] scripts: add loongarch64 binfmt config

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
Reviewed-by: Richard Henderson 
---
 scripts/qemu-binfmt-conf.sh | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/qemu-binfmt-conf.sh b/scripts/qemu-binfmt-conf.sh
index 9cb723f443..1f4e2cd19d 100755
--- a/scripts/qemu-binfmt-conf.sh
+++ b/scripts/qemu-binfmt-conf.sh
@@ -4,7 +4,7 @@
 qemu_target_list="i386 i486 alpha arm armeb sparc sparc32plus sparc64 \
 ppc ppc64 ppc64le m68k mips mipsel mipsn32 mipsn32el mips64 mips64el \
 sh4 sh4eb s390x aarch64 aarch64_be hppa riscv32 riscv64 xtensa xtensaeb \
-microblaze microblazeel or1k x86_64 hexagon"
+microblaze microblazeel or1k x86_64 hexagon loongarch64"
 
 
i386_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03\x00'
 
i386_mask='\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
@@ -140,6 +140,10 @@ 
hexagon_magic='\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x
 
hexagon_mask='\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
 hexagon_family=hexagon
 
+loongarch64_magic='\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x02\x01'
+loongarch64_mask='\xff\xff\xff\xff\xff\xff\xff\xfc\x00\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'
+loongarch64_family=loongarch
+
 qemu_get_family() {
 cpu=${HOST_ARCH:-$(uname -m)}
 case "$cpu" in
-- 
2.31.1

[PATCH v15 2/9] linux-user: Add LoongArch signal support

Signed-off-by: Song Gao 
Signed-off-by: Xiaojuan Yang 
---
 linux-user/loongarch64/signal.c| 194 +
 linux-user/loongarch64/target_signal.h |  13 ++
 2 files changed, 207 insertions(+)
 create mode 100644 linux-user/loongarch64/signal.c
 create mode 100644 linux-user/loongarch64/target_signal.h

diff --git a/linux-user/loongarch64/signal.c b/linux-user/loongarch64/signal.c
new file mode 100644
index 00..a969a4a25d
--- /dev/null
+++ b/linux-user/loongarch64/signal.c
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * LoongArch emulation of Linux signals
+ *
+ * Copyright (c) 2021 Loongson Technology Corporation Limited
+ */
+
+#include "qemu/osdep.h"
+#include "qemu.h"
+#include "user-internals.h"
+#include "signal-common.h"
+#include "linux-user/trace.h"
+
+#include "target/loongarch/internals.h"
+
+struct target_sigcontext {
+uint64_t sc_pc;
+uint64_t sc_regs[32];
+uint32_t sc_flags;
+uint64_t sc_extcontext[0] __attribute__((aligned(16)));
+};
+
+struct target_fpu_context {
+uint64_t fc_regs[32];
+uint64_t fc_fcc;
+uint32_t fc_fcsr;
+};
+
+struct target_ucontext {
+target_ulong tuc_flags;
+struct target_ucontext *tuc_link;
+target_stack_t tuc_stack;
+target_sigset_t tuc_sigmask;
+uint8_t __unused[1024 / 8 - sizeof(target_sigset_t)];
+struct target_sigcontext tuc_mcontext;
+};
+
+struct target_rt_sigframe {
+struct target_siginfo rs_info;
+struct target_ucontext rs_uc;
+struct target_fpu_context rs_fc;
+};
+
+static uint64_t read_all_fcc(CPULoongArchState *env)
+{
+uint64_t ret = 0;
+
+for (int i = 0; i < 8; ++i) {
+ret |= (uint64_t)env->cf[i] << (i * 8);
+}
+
+return ret;
+}
+
+static void write_all_fcc(CPULoongArchState *env, uint64_t val)
+{
+for (int i = 0; i < 8; ++i) {
+env->cf[i] = (val >> (i * 8)) & 1;
+}
+}
+
+static void setup_sigcontext(CPULoongArchState *env,
+ struct target_sigcontext *sc,
+ struct target_fpu_context *fc)
+{
+int i;
+
+__put_user(env->pc, >sc_pc);
+__put_user(0, >sc_regs[0]);
+__put_user(env->fcsr0, >fc_fcsr);
+fc->fc_fcc = read_all_fcc(env);
+
+for (i = 1; i < 32; ++i) {
+__put_user(env->gpr[i], >sc_regs[i]);
+}
+
+for (i = 0; i < 32; ++i) {
+__put_user(env->fpr[i], >fc_regs[i]);
+}
+}
+
+static void restore_sigcontext(CPULoongArchState *env,
+   struct target_sigcontext *sc,
+   struct target_fpu_context *fc)
+{
+int i;
+
+__get_user(env->pc, >sc_pc);
+__get_user(env->fcsr0, >fc_fcsr);
+write_all_fcc(env, fc->fc_fcc);
+
+for (i = 1; i < 32; ++i) {
+__get_user(env->gpr[i], >sc_regs[i]);
+}
+
+for (i = 0; i < 32; ++i) {
+__get_user(env->fpr[i], >fc_regs[i]);
+}
+
+restore_fp_status(env);
+}
+
+/*
+ * Determine which stack to use.
+ */
+static abi_ulong get_sigframe(struct target_sigaction *ka,
+  CPULoongArchState *env, size_t frame_size)
+{
+unsigned long sp;
+
+sp = target_sigsp(get_sp_from_cpustate(env) - 32, ka);
+
+return (sp - frame_size) & ~15;
+}
+
+void setup_rt_frame(int sig, struct target_sigaction *ka,
+target_siginfo_t *info,
+target_sigset_t *set, CPULoongArchState *env)
+{
+struct target_rt_sigframe *frame;
+abi_ulong frame_addr;
+int i;
+
+frame_addr = get_sigframe(ka, env, sizeof(*frame));
+trace_user_setup_rt_frame(env, frame_addr);
+if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) {
+goto give_sigsegv;
+}
+
+tswap_siginfo(>rs_info, info);
+
+__put_user(0, >rs_uc.tuc_flags);
+__put_user(0, >rs_uc.tuc_link);
+target_save_altstack(>rs_uc.tuc_stack, env);
+
+setup_sigcontext(env, >rs_uc.tuc_mcontext, >rs_fc);
+
+for (i = 0; i < TARGET_NSIG_WORDS; i++) {
+__put_user(set->sig[i], >rs_uc.tuc_sigmask.sig[i]);
+}
+
+env->gpr[4] = sig;
+env->gpr[5] = frame_addr + offsetof(struct target_rt_sigframe, rs_info);
+env->gpr[6] = frame_addr + offsetof(struct target_rt_sigframe, rs_uc);
+env->gpr[3] = frame_addr;
+env->gpr[1] = default_rt_sigreturn;
+
+env->pc = ka->_sa_handler;
+unlock_user_struct(frame, frame_addr, 1);
+return;
+
+give_sigsegv:
+unlock_user_struct(frame, frame_addr, 1);
+force_sigsegv(sig);
+}
+
+long do_rt_sigreturn(CPULoongArchState *env)
+{
+struct target_rt_sigframe *frame;
+abi_ulong frame_addr;
+sigset_t blocked;
+
+frame_addr = env->gpr[3];
+trace_user_do_rt_sigreturn(env, frame_addr);
+if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) {
+goto badframe;
+}
+
+target_to_host_sigset(, >rs_uc.tuc_sigmask);
+set_sigmask();
+
+restore_sigcontext(env, >rs_uc.tuc_mcontext, >rs_fc);
+target_restore_altstack(>rs_uc.tuc_stack,

[RFC][PATCH] docs: note exception for PCIe IO port access

2022-06-08 Thread Kevin Locke

ioport access is required for VESA BIOS Extensions (VBE).  Since ioport
access is not forwarded over PCI(e) bridges, graphics adapters must be
attached directly to the Root Complex in order for the BIOS to provide
VBE modes.  I'm very grateful to Gerd Hoffmann for explaining this on
the SeaBIOS mailing list.[1]

Update the PCI Express Guidelines to document this as an exception to
the recommendation to "Place only legacy PCI devices on the Root
Complex."

[1]: 
https://mail.coreboot.org/hyperkitty/list/seab...@seabios.org/thread/XG2RN3HKVRDEDTLA2PRELLIENIIH7II7/#XVP3I2KQVZHSTDA4SNVKOITWGRGSDU3F

Signed-off-by: Kevin Locke 
---

This suggested documentation change is the result of my struggles to
understand why I was seeing a very limited set of display modes in one
of my virtual machines, as explained in the seabios ML post linked above
and an earlier post to qemu-discuss.[2]  I hope it may help avoid some
of these hassles for future users.

I'm far from being an expert in PCI(e), BIOS/VBE, or virtualization in
general, and would appreciate any suggestions on these docs changes.
I'm also curious about whether graphics devices are the only exception
and whether "Guest OSes are suspected to behave strangely when PCI
Express devices are integrated with the Root Complex" is still the case.
David Gibson had previously noted that graphics cards appear on the Root
Complex on real hardware.[3]

I notice that the HD Audio Controller on my T430 is an "Express Root
Complex Integrated Endpoint".  Is this a non-graphics example, or am I
misinterpreting the lspci output?

00:1b.0 Audio device: Intel Corporation 7 Series/C216 Chipset Family High 
Definition Audio Controller (rev 04)
Subsystem: Lenovo 7 Series/C216 Chipset Family High Definition Audio 
Controller
Flags: bus master, fast devsel, latency 0, IRQ 29, IOMMU group 6
Memory at f153 (64-bit, non-prefetchable) [size=16K]
Capabilities: [50] Power Management version 2
Capabilities: [60] MSI: Enable+ Count=1/1 Maskable- 64bit+
Capabilities: [70] Express Root Complex Integrated Endpoint, MSI 00
Capabilities: [100] Virtual Channel
Capabilities: [130] Root Complex Link
Kernel driver in use: snd_hda_intel
Kernel modules: snd_hda_intel

Thanks,
Kevin

 docs/pcie.txt | 5 +
 1 file changed, 5 insertions(+)

diff --git a/docs/pcie.txt b/docs/pcie.txt
index 89e3502075..a23d93849b 100644
--- a/docs/pcie.txt
+++ b/docs/pcie.txt
@@ -48,6 +48,11 @@ Place only the following kinds of devices directly on the 
Root Complex:
 strangely when PCI Express devices are integrated
 with the Root Complex.
 
+An exception to this rule is PCI Express devices which will be
+accessed using IO ports.  For example, guests using BIOS firmware
+require IO port access for graphics devices to provide VESA BIOS
+Extensions (VBE).
+
 (2) PCI Express Root Ports (ioh3420), for starting exclusively PCI Express
 hierarchies.
 
-- 
2.35.1

Re: [PATCH 1/2] hw/nvme: Implement shadow doorbell buffer support

2022-06-08 Thread Jinhao Fan



> On Jun 9, 2022, at 4:55 AM, Klaus Jensen  wrote:
> 
> On Jun  8 09:36, Jinhao Fan wrote:
>> Implement Doorbel Buffer Config command (Section 5.7 in NVMe Spec 1.3)
>> and Shadow Doorbel buffer & EventIdx buffer handling logic (Section 7.13
>> in NVMe Spec 1.3). For queues created before the Doorbell Buffer Config
>> command, the nvme_dbbuf_config function tries to associate each existing
>> SQ and CQ with its Shadow Doorbel buffer and EventIdx buffer address.
>> Queues created after the Doorbell Buffer Config command will have the
>> doorbell buffers associated with them when they are initialized.
>> 
>> In nvme_process_sq and nvme_post_cqe, proactively check for Shadow
>> Doorbell buffer changes instead of wait for doorbell register changes.
>> This reduces the number of MMIOs.
>> 
>> Signed-off-by: Jinhao Fan 
>> ---
>> hw/nvme/ctrl.c   | 95 ++--
>> hw/nvme/nvme.h   |  8 
>> include/block/nvme.h |  2 +
>> 3 files changed, 102 insertions(+), 3 deletions(-)
>> 
>> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
>> index 03760ddeae..d3f6c432df 100644
>> --- a/hw/nvme/ctrl.c
>> +++ b/hw/nvme/ctrl.c
>> @@ -223,6 +223,7 @@ static const uint32_t nvme_cse_acs[256] = {
>> [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP,
>> [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
>> [NVME_ADM_CMD_NS_ATTACHMENT]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
>> +[NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP,
>> [NVME_ADM_CMD_FORMAT_NVM]   = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
>> };
>> 
>> @@ -1304,6 +1305,12 @@ static inline void nvme_blk_write(BlockBackend *blk, 
>> int64_t offset,
>> }
>> }
>> 
>> +static void nvme_update_cq_head(NvmeCQueue *cq)
>> +{
>> +pci_dma_read(>ctrl->parent_obj, cq->db_addr, >head,
>> +sizeof(cq->head));
>> +}
>> +
>> static void nvme_post_cqes(void *opaque)
>> {
>> NvmeCQueue *cq = opaque;
>> @@ -1316,6 +1323,10 @@ static void nvme_post_cqes(void *opaque)
>> NvmeSQueue *sq;
>> hwaddr addr;
>> 
>> +if (cq->cqid && n->dbbuf_enabled) {
>> +nvme_update_cq_head(cq);
> 
> Shouldn't we update the cq head eventidx here (prior to reading the
> doorbell buffer)? Like we do for the sq tail?

I’m not sure whether updating cq head eventidx is necessary. My understanding 
is that the purpose of updating eventidx is for the guest to notify the host 
about cq head or sq tail changes with MMIO. For sq tail this is necessary 
because other than MMIO, there is no way to trigger the host to check for sq 
tail updates (shadow doorbell here). For cq head this is different. The host 
will read cq head shadow doorbell every time it wants to post a cqe. Therefore, 
letting the guest notify the host on cq head changes seems unnecessary. 

Please correct me if I miss some point.

> 
>> +}
>> +
>> if (nvme_cq_full(cq)) {
>> break;
>> }
>> @@ -4237,6 +4248,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest 
>> *req)
>> static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
>>  uint16_t sqid, uint16_t cqid, uint16_t size)
>> {
>> +uint32_t stride = 4 << NVME_CAP_DSTRD(n->bar.cap);
>> int i;
>> NvmeCQueue *cq;
>> 
>> @@ -4256,6 +4268,11 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, 
>> uint64_t dma_addr,
>> }
>> sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq);
>> 
>> +if (sqid && n->dbbuf_dbs && n->dbbuf_eis) {
>> +sq->db_addr = n->dbbuf_dbs + 2 * sqid * stride;
>> +sq->ei_addr = n->dbbuf_eis + 2 * sqid * stride;
>> +}
>> +
>> assert(n->cq[cqid]);
>> cq = n->cq[cqid];
>> QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
>> @@ -4599,6 +4616,7 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, 
>> uint64_t dma_addr,
>>  uint16_t cqid, uint16_t vector, uint16_t size,
>>  uint16_t irq_enabled)
>> {
>> +uint32_t stride = 4 << NVME_CAP_DSTRD(n->bar.cap);
>> int ret;
>> 
>> if (msix_enabled(>parent_obj)) {
>> @@ -4615,6 +4633,10 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, 
>> uint64_t dma_addr,
>> cq->head = cq->tail = 0;
>> QTAILQ_INIT(>req_list);
>> QTAILQ_INIT(>sq_list);
>> +if (cqid && n->dbbuf_dbs && n->dbbuf_eis) {
>> +cq->db_addr = n->dbbuf_dbs + (2 * cqid + 1) * stride;
>> +cq->ei_addr = n->dbbuf_eis + (2 * cqid + 1) * stride;
>> +}
>> n->cq[cqid] = cq;
>> cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
>> }
>> @@ -5767,6 +5789,43 @@ out:
>> return status;
>> }
>> 
>> +static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req)
>> +{
>> +uint32_t stride = 4 << NVME_CAP_DSTRD(n->bar.cap);
>> +uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1);
>> +uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2);
>> +int i;
>> +
>> +/* Address should

Re: [PATCH v4 4/4] target/riscv: Force disable extensions if priv spec version does not match

On Thu, Jun 9, 2022 at 12:20 AM Anup Patel  wrote:
>
> We should disable extensions in riscv_cpu_realize() if minimum required
> priv spec version is not satisfied. This also ensures that machines with
> priv spec v1.11 (or lower) cannot enable H, V, and various multi-letter
> extensions.
>
> Fixes: a775398be2e ("target/riscv: Add isa extenstion strings to the
> device tree")

single line "Fixes" tag. Also the commit id should have at least 12 digits.

> Signed-off-by: Anup Patel 
> ---
>  target/riscv/cpu.c | 57 ++
>  1 file changed, 52 insertions(+), 5 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 9f9c27a3f5..953ba2e445 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -43,9 +43,13 @@ static const char riscv_single_letter_exts[] = 
> "IEMAFDQCPVH";
>
>  struct isa_ext_data {
>  const char *name;
> -bool enabled;
> +int min_version;
> +bool *enabled;
>  };
>
> +#define ISA_EDATA_ENTRY(name, prop) {#name, PRIV_VERSION_1_10_0, 
> >cfg.prop}
> +#define ISA_EDATA_ENTRY2(name, min_ver, prop) {#name, min_ver, 
> >cfg.prop}
> +
>  const char * const riscv_int_regnames[] = {
>"x0/zero", "x1/ra",  "x2/sp",  "x3/gp",  "x4/tp",  "x5/t0",   "x6/t1",
>"x7/t2",   "x8/s0",  "x9/s1",  "x10/a0", "x11/a1", "x12/a2",  "x13/a3",
> @@ -513,8 +517,42 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
> **errp)
>  CPURISCVState *env = >env;
>  RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev);
>  CPUClass *cc = CPU_CLASS(mcc);
> -int priv_version = -1;
> +int i, priv_version = -1;
>  Error *local_err = NULL;
> +struct isa_ext_data isa_edata_arr[] = {
> +ISA_EDATA_ENTRY2(h, PRIV_VERSION_1_12_0, ext_h),
> +ISA_EDATA_ENTRY2(v, PRIV_VERSION_1_12_0, ext_v),
> +ISA_EDATA_ENTRY2(zicsr, PRIV_VERSION_1_10_0, ext_icsr),
> +ISA_EDATA_ENTRY2(zifencei, PRIV_VERSION_1_10_0, ext_ifencei),
> +ISA_EDATA_ENTRY2(zfh, PRIV_VERSION_1_12_0, ext_zfh),
> +ISA_EDATA_ENTRY2(zfhmin, PRIV_VERSION_1_12_0, ext_zfhmin),
> +ISA_EDATA_ENTRY2(zfinx, PRIV_VERSION_1_12_0, ext_zfinx),
> +ISA_EDATA_ENTRY2(zdinx, PRIV_VERSION_1_12_0, ext_zdinx),
> +ISA_EDATA_ENTRY2(zba, PRIV_VERSION_1_12_0, ext_zba),
> +ISA_EDATA_ENTRY2(zbb, PRIV_VERSION_1_12_0, ext_zbb),
> +ISA_EDATA_ENTRY2(zbc, PRIV_VERSION_1_12_0, ext_zbc),
> +ISA_EDATA_ENTRY2(zbkb, PRIV_VERSION_1_12_0, ext_zbkb),
> +ISA_EDATA_ENTRY2(zbkc, PRIV_VERSION_1_12_0, ext_zbkc),
> +ISA_EDATA_ENTRY2(zbkx, PRIV_VERSION_1_12_0, ext_zbkx),
> +ISA_EDATA_ENTRY2(zbs, PRIV_VERSION_1_12_0, ext_zbs),
> +ISA_EDATA_ENTRY2(zk, PRIV_VERSION_1_12_0, ext_zk),
> +ISA_EDATA_ENTRY2(zkn, PRIV_VERSION_1_12_0, ext_zkn),
> +ISA_EDATA_ENTRY2(zknd, PRIV_VERSION_1_12_0, ext_zknd),
> +ISA_EDATA_ENTRY2(zkne, PRIV_VERSION_1_12_0, ext_zkne),
> +ISA_EDATA_ENTRY2(zknh, PRIV_VERSION_1_12_0, ext_zknh),
> +ISA_EDATA_ENTRY2(zkr, PRIV_VERSION_1_12_0, ext_zkr),
> +ISA_EDATA_ENTRY2(zks, PRIV_VERSION_1_12_0, ext_zks),
> +ISA_EDATA_ENTRY2(zksed, PRIV_VERSION_1_12_0, ext_zksed),
> +ISA_EDATA_ENTRY2(zksh, PRIV_VERSION_1_12_0, ext_zksh),
> +ISA_EDATA_ENTRY2(zkt, PRIV_VERSION_1_12_0, ext_zkt),
> +ISA_EDATA_ENTRY2(zve32f, PRIV_VERSION_1_12_0, ext_zve32f),
> +ISA_EDATA_ENTRY2(zve64f, PRIV_VERSION_1_12_0, ext_zve64f),
> +ISA_EDATA_ENTRY2(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
> +ISA_EDATA_ENTRY2(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin),
> +ISA_EDATA_ENTRY2(svinval, PRIV_VERSION_1_12_0, ext_svinval),
> +ISA_EDATA_ENTRY2(svnapot, PRIV_VERSION_1_12_0, ext_svnapot),
> +ISA_EDATA_ENTRY2(svpbmt, PRIV_VERSION_1_12_0, ext_svpbmt),
> +};
>
>  cpu_exec_realizefn(cs, _err);
>  if (local_err != NULL) {
> @@ -541,6 +579,17 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
> **errp)
>  set_priv_version(env, priv_version);
>  }
>
> +/* Force disable extensions if priv spec version does not match */
> +for (i = 0; i < ARRAY_SIZE(isa_edata_arr); i++) {
> +if (*isa_edata_arr[i].enabled &&
> +(env->priv_ver < isa_edata_arr[i].min_version)) {
> +*isa_edata_arr[i].enabled = false;
> +warn_report("disabling %s extension for hart 0x%lx because "
> +"privilege spec version does not match",
> +isa_edata_arr[i].name, (unsigned long)env->mhartid);
> +}
> +}
> +
>  if (cpu->cfg.mmu) {
>  riscv_set_feature(env, RISCV_FEATURE_MMU);
>  }
> @@ -1011,8 +1060,6 @@ static void riscv_cpu_class_init(ObjectClass *c, void 
> *data)
>  device_class_set_props(dc, riscv_cpu_properties);
>  }
>
> -#define ISA_EDATA_ENTRY(name, prop) {#name, cpu->cfg.prop}
> -
>  static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, int 
> max_str_len)
>

Re: [PATCH v4 2/4] target/riscv: Add dummy mcountinhibit CSR for priv spec v1.11 or higher

On Thu, Jun 9, 2022 at 12:15 AM Anup Patel  wrote:
>
> The mcountinhibit CSR is mandatory for priv spec v1.11 or higher. For
> implementation that don't want to implement can simply have a dummy
> mcountinhibit which always zero.

which is always

>
> Fixes: a4b2fa433125 ("target/riscv: Introduce privilege version field in
> the CSR ops.")

The "Fixes" tag should be put in one single line, so that scripts
relying on such won't be broken.

> Signed-off-by: Anup Patel 
> Reviewed-by: Frank Chang 
> Reviewed-by: Alistair Francis 
> ---
>  target/riscv/cpu_bits.h | 3 +++
>  target/riscv/csr.c  | 2 ++
>  2 files changed, 5 insertions(+)
>
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index 4d04b20d06..4a55c6a709 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -159,6 +159,9 @@
>  #define CSR_MTVEC   0x305
>  #define CSR_MCOUNTEREN  0x306
>
> +/* Machine Counter Setup */
> +#define CSR_MCOUNTINHIBIT   0x320
> +
>  /* 32-bit only */
>  #define CSR_MSTATUSH0x310
>
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 6dbe9b541f..409a209f14 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -3391,6 +3391,8 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
>  [CSR_MIE] = { "mie",any,   NULL,NULL,rmw_mie 
>   },
>  [CSR_MTVEC]   = { "mtvec",  any,   read_mtvec,   write_mtvec 
>   },
>  [CSR_MCOUNTEREN]  = { "mcounteren", any,   read_mcounteren,  
> write_mcounteren  },
> +[CSR_MCOUNTINHIBIT] = { "mcountinhibit", any, read_zero, write_ignore,
> + .min_priv_ver = 
> PRIV_VERSION_1_11_0 },
>
>  [CSR_MSTATUSH]= { "mstatush",   any32, read_mstatush,
> write_mstatush},
>
> --

Otherwise,
Reviewed-by: Bin Meng

Re: [PATCH v4 1/4] target/riscv: Don't force update priv spec version to latest

On Thu, Jun 9, 2022 at 12:15 AM Anup Patel  wrote:
>
> The riscv_cpu_realize() sets priv spec verion to v1.12 when it is

typo: version

> when "env->priv_ver == 0" (i.e. default v1.10) because the enum
> value of priv spec v1.10 is zero.
>
> Due to above issue, the sifive_u machine will see priv spec v1.12
> instead of priv spec v1.10.
>
> To fix this issue, we set latest priv spec version (i.e. v1.12)
> for base rv64/rv32 cpu and riscv_cpu_realize() will override priv
> spec version only when "cpu->cfg.priv_spec != NULL".
>
> Fixes: 7100fe6c2441 ("target/riscv: Enable privileged spec version 1.12")
> Signed-off-by: Anup Patel 
> Reviewed-by: Frank Chang 
> Reviewed-by: Alistair Francis 
> Reviewed-by: Atish Patra 
> ---
>  target/riscv/cpu.c | 10 ++
>  1 file changed, 6 insertions(+), 4 deletions(-)
>

Otherwise,
Reviewed-by: Bin Meng

[PATCH 3/3] target/riscv: Skip parsing extensions from properties for KVM

When running with accel=kvm, the extensions are actually told by
KVM, so let's skip the parsing logic from properties for KVM.

Signed-off-by: Bin Meng 
---

 target/riscv/cpu.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 61d1737741..ff911017c3 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -581,8 +581,11 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 }
 assert(env->misa_mxl_max == env->misa_mxl);
 
-/* If only MISA_EXT is unset for misa, then set it from properties */
-if (env->misa_ext == 0) {
+/*
+ * If only MISA_EXT is unset for misa, then set it from properties.
+ * For KVM, misa is told by KVM so properties are ignored.
+ */
+if (!kvm_enabled() && env->misa_ext == 0) {
 uint32_t ext = 0;
 
 /* Do some ISA extension error checking */
-- 
2.34.1

[PATCH 2/3] target/riscv: kvm: Set env->misa_ext_mask to the supported value

env->misa_ext_mask might be set to the same value of env->misa_ext
in riscv_cpu_realize() based on given properties, but it may differ
from what KVM tells us.

Let's set the correct env->misa_ext_mask in kvm_arch_init_vcpu().

Signed-off-by: Bin Meng 
---

 target/riscv/kvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 70b4cff06f..c592980299 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -410,7 +410,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
 if (ret) {
 return ret;
 }
-env->misa_ext = isa;
+env->misa_ext_mask = env->misa_ext = isa;
 
 return ret;
 }
-- 
2.34.1

[PATCH 1/3] target/riscv: Remove the redundant initialization of env->misa_mxl

env->misa_mxl was already set in the RISC-V cpu init routine, and
validated at the beginning of riscv_cpu_realize(). There is no need
to do a redundant initialization later.

Signed-off-by: Bin Meng 
---

 target/riscv/cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index a91253d4bd..61d1737741 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -752,7 +752,7 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 ext |= RVJ;
 }
 
-set_misa(env, env->misa_mxl, ext);
+env->misa_ext_mask = env->misa_ext = ext;
 }
 
 riscv_cpu_register_gdb_regs_for_features(cs);
-- 
2.34.1

PING: RE: RE: [PATCH v8 1/1] crypto: Introduce RSA algorithm

2022-06-08 Thread zhenwei pi


Hi, Michael

QEMU side was reviewed by Gonglei a week ago. To avoid this to be 
ignored, PING!


On 5/31/22 20:08, Gonglei (Arei) wrote:




-Original Message-
From: zhenwei pi [mailto:pizhen...@bytedance.com]
Sent: Tuesday, May 31, 2022 9:48 AM
To: Gonglei (Arei) 
Cc: qemu-devel@nongnu.org; m...@redhat.com;
virtualizat...@lists.linux-foundation.org; helei.si...@bytedance.com;
berra...@redhat.com
Subject: Re: RE: [PATCH v8 1/1] crypto: Introduce RSA algorithm

On 5/30/22 21:31, Gonglei (Arei) wrote:




-Original Message-
From: zhenwei pi [mailto:pizhen...@bytedance.com]
Sent: Friday, May 27, 2022 4:48 PM
To: m...@redhat.com; Gonglei (Arei) 
Cc: qemu-devel@nongnu.org; virtualizat...@lists.linux-foundation.org;
helei.si...@bytedance.com; berra...@redhat.com; zhenwei pi

Subject: [PATCH v8 1/1] crypto: Introduce RSA algorithm



Skip...


+static int64_t
+virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto,
+   struct virtio_crypto_akcipher_create_session_req
*sess_req,
+   uint32_t queue_id, uint32_t opcode,
+   struct iovec *iov, unsigned int out_num) {
+VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto);
+CryptoDevBackendSessionInfo info = {0};
+CryptoDevBackendAsymSessionInfo *asym_info;
+int64_t session_id;
+int queue_index;
+uint32_t algo, keytype, keylen;
+g_autofree uint8_t *key = NULL;
+Error *local_err = NULL;
+
+algo = ldl_le_p(_req->para.algo);
+keytype = ldl_le_p(_req->para.keytype);
+keylen = ldl_le_p(_req->para.keylen);
+
+if ((keytype != VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PUBLIC)
+ && (keytype !=

VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PRIVATE)) {

+error_report("unsupported asym keytype: %d", keytype);
+return -VIRTIO_CRYPTO_NOTSUPP;
+}
+
+if (keylen) {
+key = g_malloc(keylen);
+if (iov_to_buf(iov, out_num, 0, key, keylen) != keylen) {
+virtio_error(vdev, "virtio-crypto asym key incorrect");
+return -EFAULT;


Memory leak.


+}
+iov_discard_front(, _num, keylen);
+}
+
+info.op_code = opcode;
+asym_info = _sess_info;
+asym_info->algo = algo;
+asym_info->keytype = keytype;
+asym_info->keylen = keylen;
+asym_info->key = key;
+switch (asym_info->algo) {
+case VIRTIO_CRYPTO_AKCIPHER_RSA:
+asym_info->u.rsa.padding_algo =
+ldl_le_p(_req->para.u.rsa.padding_algo);
+asym_info->u.rsa.hash_algo =
+ldl_le_p(_req->para.u.rsa.hash_algo);
+break;
+
+/* TODO DSA handling */
+
+default:
+return -VIRTIO_CRYPTO_ERR;
+}
+
+queue_index = virtio_crypto_vq2q(queue_id);
+session_id =
+ cryptodev_backend_create_session(vcrypto->cryptodev,
,
+ queue_index, _err);
+if (session_id < 0) {
+if (local_err) {
+error_report_err(local_err);
+}
+return -VIRTIO_CRYPTO_ERR;
+}
+
+return session_id;


Where to free the key at both normal and exceptional paths?



Hi, Lei

The key is declared with g_autofree:
g_autofree uint8_t *key = NULL;



OK. For the patch:

Reviewed-by: Gonglei 


Regards,
-Gonglei
 



--
zhenwei pi

[PATCH v2] edk2: Use TPM2_ENABLE and TPM2_CONFIG_ENABLE for newer edk2

2022-06-08 Thread Stefan Berger

The edk2 commit 4de8d61bcec ("OvmfPkg: rework TPM configuration") switched
the x86_64 build from using TPM_ENABLE to TPM2_ENABLE and TPM1_ENABLE to
be similar to the ARM build. Adapt the QEMU edk2 Makefile to build with
TPM2_ENABLE. QEMU v7.0.0 had lost the TPM 2 support in edk2 and this
restores it.

Signed-off-by: Stefan Berger 
Reviewed-by: Marc-André Lureau 
---
 roms/Makefile.edk2 | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/roms/Makefile.edk2 b/roms/Makefile.edk2
index 485f2244b1..a6eb14f215 100644
--- a/roms/Makefile.edk2
+++ b/roms/Makefile.edk2
@@ -101,8 +101,7 @@ submodules:
-D NETWORK_IP6_ENABLE \
-D NETWORK_HTTP_BOOT_ENABLE \
-D NETWORK_TLS_ENABLE \
-   -D TPM_ENABLE \
-   -D TPM_CONFIG_ENABLE
+   -D TPM2_ENABLE
cp edk2/Build/OvmfIa32/$(target)_$(call toolchain,i386)/FV/OVMF_CODE.fd 
$@
 
 ../pc-bios/edk2-i386-secure-code.fd: submodules
@@ -113,8 +112,7 @@ submodules:
-D NETWORK_IP6_ENABLE \
-D NETWORK_HTTP_BOOT_ENABLE \
-D NETWORK_TLS_ENABLE \
-   -D TPM_ENABLE \
-   -D TPM_CONFIG_ENABLE \
+   -D TPM2_ENABLE \
-D SECURE_BOOT_ENABLE \
-D SMM_REQUIRE
cp edk2/Build/OvmfIa32/$(target)_$(call toolchain,i386)/FV/OVMF_CODE.fd 
$@
@@ -127,8 +125,7 @@ submodules:
-D NETWORK_IP6_ENABLE \
-D NETWORK_HTTP_BOOT_ENABLE \
-D NETWORK_TLS_ENABLE \
-   -D TPM_ENABLE \
-   -D TPM_CONFIG_ENABLE
+   -D TPM2_ENABLE
cp edk2/Build/OvmfX64/$(target)_$(call 
toolchain,x86_64)/FV/OVMF_CODE.fd $@
 
 ../pc-bios/edk2-x86_64-secure-code.fd: submodules
@@ -140,8 +137,7 @@ submodules:
-D NETWORK_IP6_ENABLE \
-D NETWORK_HTTP_BOOT_ENABLE \
-D NETWORK_TLS_ENABLE \
-   -D TPM_ENABLE \
-   -D TPM_CONFIG_ENABLE \
+   -D TPM2_ENABLE \
-D SECURE_BOOT_ENABLE \
-D SMM_REQUIRE
cp edk2/Build/Ovmf3264/$(target)_$(call 
toolchain,x86_64)/FV/OVMF_CODE.fd $@
-- 
2.35.3

Re: [PATCH v6 resend 4/4] vdpa: add vdpa-dev-pci support

2022-06-08 Thread longpeng2--- via




在 2022/6/9 7:10, Michael S. Tsirkin 写道:

On Sat, May 14, 2022 at 12:11:07PM +0800, Longpeng(Mike) wrote:

From: Longpeng 

Supports vdpa-dev-pci, we can use the device as follow:

-device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X

Reviewed-by: Stefano Garzarella 
Signed-off-by: Longpeng 



Build fails:

FAILED: libqemu-aarch64-softmmu.fa.p/hw_virtio_vdpa-dev-pci.c.o
cc -m64 -mcx16 -Ilibqemu-aarch64-softmmu.fa.p -I. -I.. -Itarget/arm -I../target/arm -Iqapi -Itrace 
-Iui -Iui/shader -I/usr/include/pixman-1 -I/usr/include/capstone -I/usr/include/spice-server 
-I/usr/include/spice-1 -I/usr/include/cacard -I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include 
-I/usr/include/sysprof-4 -I/usr/include/nss3 -I/usr/include/nspr4 -I/usr/include/PCSC 
-fdiagnostics-color=auto -Wall -Winvalid-pch -Werror -std=gnu11 -O2 -g -isystem 
/scm/qemu/linux-headers -isystem linux-headers -iquote . -iquote /scm/qemu -iquote 
/scm/qemu/include -iquote /scm/qemu/disas/libvixl -iquote /scm/qemu/tcg/i386 -pthread 
-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE 
-Wstrict-prototypes -Wredundant-decls -Wundef -Wwrite-strings -Wmissing-prototypes 
-fno-strict-aliasing -fno-common -fwrapv -Wold-style-declaration -Wold-style-definition 
-Wtype-limits -Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers -Wempty-body 
-Wnested-externs -Wendif-labels -Wexpansion-to-defined -Wimplicit-fallthrough=2 
-Wno-missing-include-dirs -Wno-shift-negative-value -Wno-psabi -fstack-protector-strong -fPIE 
-isystem../linux-headers -isystemlinux-headers -DNEED_CPU_H 
'-DCONFIG_TARGET="aarch64-softmmu-config-target.h"' 
'-DCONFIG_DEVICES="aarch64-softmmu-config-devices.h"' -MD -MQ 
libqemu-aarch64-softmmu.fa.p/hw_virtio_vdpa-dev-pci.c.o -MF 
libqemu-aarch64-softmmu.fa.p/hw_virtio_vdpa-dev-pci.c.o.d -o 
libqemu-aarch64-softmmu.fa.p/hw_virtio_vdpa-dev-pci.c.o -c ../hw/virtio/vdpa-dev-pci.c
../hw/virtio/vdpa-dev-pci.c:26:10: fatal error: virtio-pci.h: No such file or 
directory
26 | #include "virtio-pci.h"
   |  ^~
compilation terminated.



The following patch moved the virtio-pci.h to the include/ directory:

e1b1f53 2022-05-16 04:38:40 -0400 hw/virtio: move virtio-pci.h into 
shared include space


I'll rebase this series recently.

Thanks.




---
  hw/virtio/meson.build|   1 +
  hw/virtio/vdpa-dev-pci.c | 102 +++
  2 files changed, 103 insertions(+)
  create mode 100644 hw/virtio/vdpa-dev-pci.c

diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
index 8f6f86db71..c2da69616f 100644
--- a/hw/virtio/meson.build
+++ b/hw/virtio/meson.build
@@ -50,6 +50,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: 
files('virtio-serial-pc
  virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: 
files('virtio-pmem-pci.c'))
  virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: 
files('virtio-iommu-pci.c'))
  virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: 
files('virtio-mem-pci.c'))
+virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: 
files('vdpa-dev-pci.c'))
  
  virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss)
  
diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c

new file mode 100644
index 00..fde35dfc92
--- /dev/null
+++ b/hw/virtio/vdpa-dev-pci.c
@@ -0,0 +1,102 @@
+/*
+ * Vhost Vdpa Device PCI Bindings
+ *
+ * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved.
+ *
+ * Authors:
+ *   Longpeng 
+ *
+ * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c"
+ * implemented by:
+ *   Changpeng Liu 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include 
+#include 
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/vdpa-dev.h"
+#include "hw/pci/pci.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "virtio-pci.h"
+#include "qom/object.h"
+
+
+typedef struct VhostVdpaDevicePCI VhostVdpaDevicePCI;
+
+#define TYPE_VHOST_VDPA_DEVICE_PCI "vhost-vdpa-device-pci-base"
+DECLARE_INSTANCE_CHECKER(VhostVdpaDevicePCI, VHOST_VDPA_DEVICE_PCI,
+ TYPE_VHOST_VDPA_DEVICE_PCI)
+
+struct VhostVdpaDevicePCI {
+VirtIOPCIProxy parent_obj;
+VhostVdpaDevice vdev;
+};
+
+static void vhost_vdpa_device_pci_instance_init(Object *obj)
+{
+VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(obj);
+
+virtio_instance_init_common(obj, >vdev, sizeof(dev->vdev),
+TYPE_VHOST_VDPA_DEVICE);
+object_property_add_alias(obj, "bootindex", OBJECT(>vdev),
+  "bootindex");
+}
+
+static Property vhost_vdpa_device_pci_properties[] = {
+DEFINE_PROP_END_OF_LIST(),
+};
+
+static int vhost_vdpa_device_pci_post_init(VhostVdpaDevice *v, Error **errp)
+{
+VhostVdpaDevicePCI

[PATCH] target/riscv: trans_rvv: Avoid assert for RV32 and e64

2022-06-08 Thread Alistair Francis

From: Alistair Francis 

When running a 32-bit guest, with a e64 vmv.v.x and vl_eq_vlmax set to
true the `tcg_debug_assert(vece <= MO_32)` will be triggered inside
tcg_gen_gvec_dup_i32().

This patch checks that condition and instead uses tcg_gen_gvec_dup_i64()
is required.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1028
Suggested-by: Robert Bu 
Signed-off-by: Alistair Francis 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 391c61fe93..6b27d8e91e 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -2097,8 +2097,16 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x 
*a)
 s1 = get_gpr(s, a->rs1, EXT_SIGN);
 
 if (s->vl_eq_vlmax) {
-tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd),
-MAXSZ(s), MAXSZ(s), s1);
+if (get_xl(s) == MXL_RV32 && s->sew == MO_64) {
+TCGv_i64 s1_i64 = tcg_temp_new_i64();
+tcg_gen_ext_tl_i64(s1_i64, s1);
+tcg_gen_gvec_dup_i64(s->sew, vreg_ofs(s, a->rd),
+ MAXSZ(s), MAXSZ(s), s1_i64);
+tcg_temp_free_i64(s1_i64);
+} else {
+tcg_gen_gvec_dup_tl(s->sew, vreg_ofs(s, a->rd),
+MAXSZ(s), MAXSZ(s), s1);
+}
 } else {
 TCGv_i32 desc;
 TCGv_i64 s1_i64 = tcg_temp_new_i64();
-- 
2.36.1

Re: [PATCH v6 resend 4/4] vdpa: add vdpa-dev-pci support

2022-06-08 Thread Michael S. Tsirkin

On Sat, May 14, 2022 at 12:11:07PM +0800, Longpeng(Mike) wrote:
> From: Longpeng 
> 
> Supports vdpa-dev-pci, we can use the device as follow:
> 
> -device vhost-vdpa-device-pci,vhostdev=/dev/vhost-vdpa-X
> 
> Reviewed-by: Stefano Garzarella 
> Signed-off-by: Longpeng 


Build fails:

FAILED: libqemu-aarch64-softmmu.fa.p/hw_virtio_vdpa-dev-pci.c.o 
cc -m64 -mcx16 -Ilibqemu-aarch64-softmmu.fa.p -I. -I.. -Itarget/arm 
-I../target/arm -Iqapi -Itrace -Iui -Iui/shader -I/usr/include/pixman-1 
-I/usr/include/capstone -I/usr/include/spice-server -I/usr/include/spice-1 
-I/usr/include/cacard -I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include 
-I/usr/include/sysprof-4 -I/usr/include/nss3 -I/usr/include/nspr4 
-I/usr/include/PCSC -fdiagnostics-color=auto -Wall -Winvalid-pch -Werror 
-std=gnu11 -O2 -g -isystem /scm/qemu/linux-headers -isystem linux-headers 
-iquote . -iquote /scm/qemu -iquote /scm/qemu/include -iquote 
/scm/qemu/disas/libvixl -iquote /scm/qemu/tcg/i386 -pthread -U_FORTIFY_SOURCE 
-D_FORTIFY_SOURCE=2 -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE 
-Wstrict-prototypes -Wredundant-decls -Wundef -Wwrite-strings 
-Wmissing-prototypes -fno-strict-aliasing -fno-common -fwrapv 
-Wold-style-declaration -Wold-style-definition -Wtype-limits -Wformat-security 
-Wformat-y2k -Winit-self -Wignored-qualifiers -Wempty-body -Wnested-externs 
-Wendif-labels -Wexpansion-to-defined -Wimplicit-fallthrough=2 
-Wno-missing-include-dirs -Wno-shift-negative-value -Wno-psabi 
-fstack-protector-strong -fPIE -isystem../linux-headers -isystemlinux-headers 
-DNEED_CPU_H '-DCONFIG_TARGET="aarch64-softmmu-config-target.h"' 
'-DCONFIG_DEVICES="aarch64-softmmu-config-devices.h"' -MD -MQ 
libqemu-aarch64-softmmu.fa.p/hw_virtio_vdpa-dev-pci.c.o -MF 
libqemu-aarch64-softmmu.fa.p/hw_virtio_vdpa-dev-pci.c.o.d -o 
libqemu-aarch64-softmmu.fa.p/hw_virtio_vdpa-dev-pci.c.o -c 
../hw/virtio/vdpa-dev-pci.c
../hw/virtio/vdpa-dev-pci.c:26:10: fatal error: virtio-pci.h: No such file or 
directory
   26 | #include "virtio-pci.h"
  |  ^~
compilation terminated.


> ---
>  hw/virtio/meson.build|   1 +
>  hw/virtio/vdpa-dev-pci.c | 102 +++
>  2 files changed, 103 insertions(+)
>  create mode 100644 hw/virtio/vdpa-dev-pci.c
> 
> diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
> index 8f6f86db71..c2da69616f 100644
> --- a/hw/virtio/meson.build
> +++ b/hw/virtio/meson.build
> @@ -50,6 +50,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: 
> files('virtio-serial-pc
>  virtio_pci_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: 
> files('virtio-pmem-pci.c'))
>  virtio_pci_ss.add(when: 'CONFIG_VIRTIO_IOMMU', if_true: 
> files('virtio-iommu-pci.c'))
>  virtio_pci_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: 
> files('virtio-mem-pci.c'))
> +virtio_pci_ss.add(when: 'CONFIG_VHOST_VDPA_DEV', if_true: 
> files('vdpa-dev-pci.c'))
>  
>  virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss)
>  
> diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c
> new file mode 100644
> index 00..fde35dfc92
> --- /dev/null
> +++ b/hw/virtio/vdpa-dev-pci.c
> @@ -0,0 +1,102 @@
> +/*
> + * Vhost Vdpa Device PCI Bindings
> + *
> + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All Rights Reserved.
> + *
> + * Authors:
> + *   Longpeng 
> + *
> + * Largely based on the "vhost-user-blk-pci.c" and "vhost-user-blk.c"
> + * implemented by:
> + *   Changpeng Liu 
> + *
> + * This work is licensed under the terms of the GNU LGPL, version 2 or later.
> + * See the COPYING.LIB file in the top-level directory.
> + */
> +#include "qemu/osdep.h"
> +#include 
> +#include 
> +#include "hw/virtio/virtio.h"
> +#include "hw/virtio/vdpa-dev.h"
> +#include "hw/pci/pci.h"
> +#include "hw/qdev-properties.h"
> +#include "qapi/error.h"
> +#include "qemu/error-report.h"
> +#include "qemu/module.h"
> +#include "virtio-pci.h"
> +#include "qom/object.h"
> +
> +
> +typedef struct VhostVdpaDevicePCI VhostVdpaDevicePCI;
> +
> +#define TYPE_VHOST_VDPA_DEVICE_PCI "vhost-vdpa-device-pci-base"
> +DECLARE_INSTANCE_CHECKER(VhostVdpaDevicePCI, VHOST_VDPA_DEVICE_PCI,
> + TYPE_VHOST_VDPA_DEVICE_PCI)
> +
> +struct VhostVdpaDevicePCI {
> +VirtIOPCIProxy parent_obj;
> +VhostVdpaDevice vdev;
> +};
> +
> +static void vhost_vdpa_device_pci_instance_init(Object *obj)
> +{
> +VhostVdpaDevicePCI *dev = VHOST_VDPA_DEVICE_PCI(obj);
> +
> +virtio_instance_init_common(obj, >vdev, sizeof(dev->vdev),
> +TYPE_VHOST_VDPA_DEVICE);
> +object_property_add_alias(obj, "bootindex", OBJECT(>vdev),
> +  "bootindex");
> +}
> +
> +static Property vhost_vdpa_device_pci_properties[] = {
> +DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static int vhost_vdpa_device_pci_post_init(VhostVdpaDevice *v, Error **errp)
> +{
> +VhostVdpaDevicePCI *dev = container_of(v, VhostVdpaDevicePCI, vdev);
> +VirtIOPCIProxy

Re: [PATCH v4 0/3] target/m68k: semihosting cleanup

2022-06-08 Thread Laurent Vivier


Le 08/06/2022 à 21:05, Richard Henderson a écrit :

On 6/8/22 11:08, Laurent Vivier wrote:

Le 08/06/2022 à 16:53, Richard Henderson a écrit :

On 6/8/22 02:36, Laurent Vivier wrote:

Le 08/06/2022 à 00:26, Richard Henderson a écrit :

Based-on: <20220607204557.658541-1-richard.hender...@linaro.org>
("[PATCH v4 00/53] semihosting cleanup")

Changes for v4:
   * Split out of v2.
   * Convert host errno to gdb errno, which for m68k is guest errno.



How do you test semihosting on m68k?


I have only compiled this.  I had been working on adding something to tests/tcg/m68k/, which is 
where those halt patches came from, but haven't finished.




But what is the use case of semihosting on m68k?
Is there a kernel implementation available for that?


Not that I'm aware of.  Do you want to remove it entirely instead?


No, while you fix it I have no problem to keep it.

Thanks,
Laurent

Re: [PATCH] ppc: fix boot with sam460ex

2022-06-08 Thread Michael S. Tsirkin

On Mon, Jun 06, 2022 at 10:51:23AM -0300, Daniel Henrique Barboza wrote:
> Michael,
> 
> 
> I'll queue this patch with the commit msg proposed by Zoltan as follows:
> 
> 
> Author: Michael S. Tsirkin 
> Date:   Thu May 26 18:43:43 2022 -0400
> 
> ppc: fix boot with sam460ex
> Recent changes to pcie_host corrected size of its internal region to
> match what it expects: only the low 28 bits are ever decoded. Previous
> code just ignored bit 29 (if size was 1 << 29) in the address which does
> not make much sense.  We are now asserting on size > 1 << 28 instead,
> but PPC 4xx actually allows guest to configure different sizes, and some
> firmwares seem to set it to 1 << 29.
> This caused e.g. qemu-system-ppc -M sam460ex to exit with an assert when
> the guest writes a value to CFGMSK register when trying to map config
> space. This is done in the board firmware in ppc4xx_init_pcie_port() in
> roms/u-boot-sam460ex/arch/powerpc/cpu/ppc4xx/4xx_pcie.c
> It's not clear what the proper fix should be but for now let's force the
> size to 256MB, so anything outside the expected address range is
> ignored.
> 
> 
> Is that ok with you?
> 
> 
> Thanks,
> 
> 
> Daniel


ACK

> 
> On 5/26/22 19:43, Michael S. Tsirkin wrote:
> > Recent changes to pcie_host corrected size of its internal region to
> > match what it expects - only the low 28 bits are ever decoded. Previous
> > code just ignored bit 29 (if size was 1 << 29) in the address which does
> > not make much sense.  We are now asserting on size > 1 << 28 instead,
> > but it so happened that ppc actually allows guest to configure as large
> > a size as it wants to, and current firmware set it to 1 << 29.
> > 
> > With just qemu-system-ppc -M sam460ex this triggers an assert which
> > seems to happen when the guest (board firmware?) writes a value to
> > CFGMSK reg:
> > 
> > (gdb) bt
> > 
> > This is done in the board firmware here:
> > 
> > https://git.qemu.org/?p=u-boot-sam460ex.git;a=blob;f=arch/powerpc/cpu/ppc4xx/4xx_pcie.c;h=13348be93dccc74c13ea043d6635a7f8ece4b5f0;hb=HEAD
> > 
> > when trying to map config space.
> > 
> > Note that what firmware does matches
> > https://www.hardware.com.br/comunidade/switch-cisco/1128380/
> > 
> > So it's not clear what the proper fix should be.
> > 
> > However, allowing guest to trigger an assert in qemu is not good practice 
> > anyway.
> > 
> > For now let's just force the mask to 256MB on guest write, this way
> > anything outside the expected address range is ignored.
> > 
> > Fixes: commit 1f1a7b2269 ("include/hw/pci/pcie_host: Correct 
> > PCIE_MMCFG_SIZE_MAX")
> > Reviewed-by: BALATON Zoltan 
> > Tested-by: BALATON Zoltan 
> > Signed-off-by: Michael S. Tsirkin 
> > ---
> > 
> > Affected system is orphan so I guess I will merge the patch unless
> > someone objects.
> > 
> >   hw/ppc/ppc440_uc.c | 8 
> >   1 file changed, 8 insertions(+)
> > 
> > diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c
> > index 993e3ba955..a1ecf6dd1c 100644
> > --- a/hw/ppc/ppc440_uc.c
> > +++ b/hw/ppc/ppc440_uc.c
> > @@ -1180,6 +1180,14 @@ static void dcr_write_pcie(void *opaque, int dcrn, 
> > uint32_t val)
> >   case PEGPL_CFGMSK:
> >   s->cfg_mask = val;
> >   size = ~(val & 0xfffe) + 1;
> > +/*
> > + * Firmware sets this register to E001. Why we are not sure,
> > + * but the current guess is anything above PCIE_MMCFG_SIZE_MAX is
> > + * ignored.
> > + */
> > +if (size > PCIE_MMCFG_SIZE_MAX) {
> > +size = PCIE_MMCFG_SIZE_MAX;
> > +}
> >   pcie_host_mmcfg_update(PCIE_HOST_BRIDGE(s), val & 1, s->cfg_base, 
> > size);
> >   break;
> >   case PEGPL_MSGBAH:

[PATCH v2 4/6] bsd-user/bsd-file.h: Add implementations for read, pread, readv and preadv

Implement do_bsd_{read,pread,readv,preadv}. Connect them to the system
call table.

Signed-off-by: Stacey Son 
Signed-off-by: Kyle Evans 
Signed-off-by: Warner Losh 
---
 bsd-user/bsd-file.h   | 79 +++
 bsd-user/freebsd/os-syscall.c | 28 +
 2 files changed, 107 insertions(+)

diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h
index a6bff3b8c26..839f8c5c55a 100644
--- a/bsd-user/bsd-file.h
+++ b/bsd-user/bsd-file.h
@@ -27,4 +27,83 @@ extern struct iovec *lock_iovec(int type, abi_ulong 
target_addr, int count,
 extern void unlock_iovec(struct iovec *vec, abi_ulong target_addr, int count,
 int copy);
 
+ssize_t safe_read(int fd, void *buf, size_t nbytes);
+ssize_t safe_pread(int fd, void *buf, size_t nbytes, off_t offset);
+ssize_t safe_readv(int fd, const struct iovec *iov, int iovcnt);
+ssize_t safe_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset);
+
+/* read(2) */
+static abi_long do_bsd_read(abi_long arg1, abi_long arg2, abi_long arg3)
+{
+abi_long ret;
+void *p;
+
+p = lock_user(VERIFY_WRITE, arg2, arg3, 0);
+if (p == NULL) {
+return -TARGET_EFAULT;
+}
+ret = get_errno(safe_read(arg1, p, arg3));
+unlock_user(p, arg2, ret);
+
+return ret;
+}
+
+/* pread(2) */
+static abi_long do_bsd_pread(void *cpu_env, abi_long arg1,
+abi_long arg2, abi_long arg3, abi_long arg4, abi_long arg5, abi_long arg6)
+{
+abi_long ret;
+void *p;
+
+p = lock_user(VERIFY_WRITE, arg2, arg3, 0);
+if (p == NULL) {
+return -TARGET_EFAULT;
+}
+if (regpairs_aligned(cpu_env) != 0) {
+arg4 = arg5;
+arg5 = arg6;
+}
+ret = get_errno(safe_pread(arg1, p, arg3, target_arg64(arg4, arg5)));
+unlock_user(p, arg2, ret);
+
+return ret;
+}
+
+/* readv(2) */
+static abi_long do_bsd_readv(abi_long arg1, abi_long arg2, abi_long arg3)
+{
+abi_long ret;
+struct iovec *vec = lock_iovec(VERIFY_WRITE, arg2, arg3, 0);
+
+if (vec != NULL) {
+ret = get_errno(safe_readv(arg1, vec, arg3));
+unlock_iovec(vec, arg2, arg3, 1);
+} else {
+ret = -host_to_target_errno(errno);
+}
+
+return ret;
+}
+
+/* preadv(2) */
+static abi_long do_bsd_preadv(void *cpu_env, abi_long arg1,
+abi_long arg2, abi_long arg3, abi_long arg4, abi_long arg5, abi_long arg6)
+{
+abi_long ret;
+struct iovec *vec = lock_iovec(VERIFY_WRITE, arg2, arg3, 1);
+
+if (vec != NULL) {
+if (regpairs_aligned(cpu_env) != 0) {
+arg4 = arg5;
+arg5 = arg6;
+}
+ret = get_errno(safe_preadv(arg1, vec, arg3, target_arg64(arg4, 
arg5)));
+unlock_iovec(vec, arg2, arg3, 0);
+} else {
+ret = -host_to_target_errno(errno);
+}
+
+return ret;
+}
+
 #endif /* BSD_FILE_H */
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index 1c4c9983f1a..8ca92f29857 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -42,6 +42,14 @@
 
 #include "bsd-file.h"
 
+/* I/O */
+safe_syscall3(ssize_t, read, int, fd, void *, buf, size_t, nbytes);
+safe_syscall4(ssize_t, pread, int, fd, void *, buf, size_t, nbytes, off_t,
+offset);
+safe_syscall3(ssize_t, readv, int, fd, const struct iovec *, iov, int, iovcnt);
+safe_syscall4(ssize_t, preadv, int, fd, const struct iovec *, iov, int, iovcnt,
+off_t, offset);
+
 void target_set_brk(abi_ulong new_brk)
 {
 }
@@ -200,6 +208,26 @@ static abi_long freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 abi_long ret;
 
 switch (num) {
+
+/*
+ * File system calls.
+ */
+case TARGET_FREEBSD_NR_read: /* read(2) */
+ret = do_bsd_read(arg1, arg2, arg3);
+break;
+
+case TARGET_FREEBSD_NR_pread: /* pread(2) */
+ret = do_bsd_pread(cpu_env, arg1, arg2, arg3, arg4, arg5, arg6);
+break;
+
+case TARGET_FREEBSD_NR_readv: /* readv(2) */
+ret = do_bsd_readv(arg1, arg2, arg3);
+break;
+
+case TARGET_FREEBSD_NR_preadv: /* preadv(2) */
+ret = do_bsd_preadv(cpu_env, arg1, arg2, arg3, arg4, arg5, arg6);
+break;
+
 default:
 qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num);
 ret = -TARGET_ENOSYS;
-- 
2.33.1

[PATCH v2 5/6] bsd-user/bsd-file.h: Meat of the write system calls

Implement write, writev, pwrite and pwritev and connect them to the
system call dispatch routine.

Signed-off-by: Stacey Son 
Signed-off-by: Kyle Evans 
Signed-off-by: Warner Losh 
---
 bsd-user/bsd-file.h   | 84 +++
 bsd-user/freebsd/os-syscall.c | 22 +
 2 files changed, 106 insertions(+)

diff --git a/bsd-user/bsd-file.h b/bsd-user/bsd-file.h
index 839f8c5c55a..e9e2c85eb67 100644
--- a/bsd-user/bsd-file.h
+++ b/bsd-user/bsd-file.h
@@ -32,6 +32,11 @@ ssize_t safe_pread(int fd, void *buf, size_t nbytes, off_t 
offset);
 ssize_t safe_readv(int fd, const struct iovec *iov, int iovcnt);
 ssize_t safe_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset);
 
+ssize_t safe_write(int fd, void *buf, size_t nbytes);
+ssize_t safe_pwrite(int fd, void *buf, size_t nbytes, off_t offset);
+ssize_t safe_writev(int fd, const struct iovec *iov, int iovcnt);
+ssize_t safe_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t 
offset);
+
 /* read(2) */
 static abi_long do_bsd_read(abi_long arg1, abi_long arg2, abi_long arg3)
 {
@@ -106,4 +111,83 @@ static abi_long do_bsd_preadv(void *cpu_env, abi_long arg1,
 return ret;
 }
 
+/* write(2) */
+static abi_long do_bsd_write(abi_long arg1, abi_long arg2, abi_long arg3)
+{
+abi_long nbytes, ret;
+void *p;
+
+/* nbytes < 0 implies that it was larger than SIZE_MAX. */
+nbytes = arg3;
+if (nbytes < 0) {
+return -TARGET_EINVAL;
+}
+p = lock_user(VERIFY_READ, arg2, nbytes, 1);
+if (p == NULL) {
+return -TARGET_EFAULT;
+}
+ret = get_errno(safe_write(arg1, p, arg3));
+unlock_user(p, arg2, 0);
+
+return ret;
+}
+
+/* pwrite(2) */
+static abi_long do_bsd_pwrite(void *cpu_env, abi_long arg1,
+abi_long arg2, abi_long arg3, abi_long arg4, abi_long arg5, abi_long arg6)
+{
+abi_long ret;
+void *p;
+
+p = lock_user(VERIFY_READ, arg2, arg3, 1);
+if (p == NULL) {
+return -TARGET_EFAULT;
+}
+if (regpairs_aligned(cpu_env) != 0) {
+arg4 = arg5;
+arg5 = arg6;
+}
+ret = get_errno(safe_pwrite(arg1, p, arg3, target_arg64(arg4, arg5)));
+unlock_user(p, arg2, 0);
+
+return ret;
+}
+
+/* writev(2) */
+static abi_long do_bsd_writev(abi_long arg1, abi_long arg2, abi_long arg3)
+{
+abi_long ret;
+struct iovec *vec = lock_iovec(VERIFY_READ, arg2, arg3, 1);
+
+if (vec != NULL) {
+ret = get_errno(safe_writev(arg1, vec, arg3));
+unlock_iovec(vec, arg2, arg3, 0);
+} else {
+ret = -host_to_target_errno(errno);
+}
+
+return ret;
+}
+
+/* pwritev(2) */
+static abi_long do_bsd_pwritev(void *cpu_env, abi_long arg1,
+abi_long arg2, abi_long arg3, abi_long arg4, abi_long arg5, abi_long arg6)
+{
+abi_long ret;
+struct iovec *vec = lock_iovec(VERIFY_READ, arg2, arg3, 1);
+
+if (vec != NULL) {
+if (regpairs_aligned(cpu_env) != 0) {
+arg4 = arg5;
+arg5 = arg6;
+}
+ret = get_errno(safe_pwritev(arg1, vec, arg3, target_arg64(arg4, 
arg5)));
+unlock_iovec(vec, arg2, arg3, 0);
+} else {
+ret = -host_to_target_errno(errno);
+}
+
+return ret;
+}
+
 #endif /* BSD_FILE_H */
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index 8ca92f29857..3a33d54fa4d 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -50,6 +50,13 @@ safe_syscall3(ssize_t, readv, int, fd, const struct iovec *, 
iov, int, iovcnt);
 safe_syscall4(ssize_t, preadv, int, fd, const struct iovec *, iov, int, iovcnt,
 off_t, offset);
 
+safe_syscall3(ssize_t, write, int, fd, void *, buf, size_t, nbytes);
+safe_syscall4(ssize_t, pwrite, int, fd, void *, buf, size_t, nbytes, off_t,
+offset);
+safe_syscall3(ssize_t, writev, int, fd, const struct iovec *, iov, int, 
iovcnt);
+safe_syscall4(ssize_t, pwritev, int, fd, const struct iovec *, iov, int, 
iovcnt,
+off_t, offset);
+
 void target_set_brk(abi_ulong new_brk)
 {
 }
@@ -226,6 +233,21 @@ static abi_long freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 
 case TARGET_FREEBSD_NR_preadv: /* preadv(2) */
 ret = do_bsd_preadv(cpu_env, arg1, arg2, arg3, arg4, arg5, arg6);
+
+case TARGET_FREEBSD_NR_write: /* write(2) */
+ret = do_bsd_write(arg1, arg2, arg3);
+break;
+
+case TARGET_FREEBSD_NR_pwrite: /* pwrite(2) */
+ret = do_bsd_pwrite(cpu_env, arg1, arg2, arg3, arg4, arg5, arg6);
+break;
+
+case TARGET_FREEBSD_NR_writev: /* writev(2) */
+ret = do_bsd_writev(arg1, arg2, arg3);
+break;
+
+case TARGET_FREEBSD_NR_pwritev: /* pwritev(2) */
+ret = do_bsd_pwritev(cpu_env, arg1, arg2, arg3, arg4, arg5, arg6);
 break;
 
 default:
-- 
2.33.1

[PATCH v2 1/6] bsd-user/freebsd/os-syscall.c: lock_iovec

lock_iovec will lock an I/O vec and the memory to which it refers and
create a iovec in the host space that refers to it, with full error
unwinding. Add helper_iovec_unlock to unlock the partially locked iovec
in case there's an error. The code will be used in iovec_unlock when
that is committed.

Note: memory handling likely could be rewritten to use q_autofree. That
will be explored in the future since what we have now works well enough.

Signed-off-by: Warner Losh 
---
 bsd-user/freebsd/os-syscall.c | 102 ++
 1 file changed, 102 insertions(+)

diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index d272478e7b0..67851937a8f 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -73,6 +73,108 @@ bool is_error(abi_long ret)
 return (abi_ulong)ret >= (abi_ulong)(-4096);
 }
 
+/*
+ * Unlocks a iovec. Unlike unlock_iovec, it assumes the tvec array itself is
+ * already locked from target_addr. It will be unlocked as well as all the 
iovec
+ * elements.
+ */
+static void helper_unlock_iovec(struct target_iovec *target_vec,
+abi_ulong target_addr, struct iovec *vec,
+int count, int copy)
+{
+for (int i = 0; i < count; i++) {
+abi_ulong base = tswapal(target_vec[i].iov_base);
+
+if (vec[i].iov_base) {
+unlock_user(vec[i].iov_base, base, copy ? vec[i].iov_len : 0);
+}
+}
+unlock_user(target_vec, target_addr, 0);
+}
+
+struct iovec *lock_iovec(int type, abi_ulong target_addr,
+int count, int copy)
+{
+struct target_iovec *target_vec;
+struct iovec *vec;
+abi_ulong total_len, max_len;
+int i;
+int err = 0;
+
+if (count == 0) {
+errno = 0;
+return NULL;
+}
+if (count < 0 || count > IOV_MAX) {
+errno = EINVAL;
+return NULL;
+}
+
+vec = g_try_new0(struct iovec, count);
+if (vec == NULL) {
+errno = ENOMEM;
+return NULL;
+}
+
+target_vec = lock_user(VERIFY_READ, target_addr,
+   count * sizeof(struct target_iovec), 1);
+if (target_vec == NULL) {
+err = EFAULT;
+goto fail2;
+}
+
+max_len = 0x7fff & MIN(TARGET_PAGE_MASK, PAGE_MASK);
+total_len = 0;
+
+for (i = 0; i < count; i++) {
+abi_ulong base = tswapal(target_vec[i].iov_base);
+abi_long len = tswapal(target_vec[i].iov_len);
+
+if (len < 0) {
+err = EINVAL;
+goto fail;
+} else if (len == 0) {
+/* Zero length pointer is ignored. */
+vec[i].iov_base = 0;
+} else {
+vec[i].iov_base = lock_user(type, base, len, copy);
+/*
+ * If the first buffer pointer is bad, this is a fault.  But
+ * subsequent bad buffers will result in a partial write; this is
+ * realized by filling the vector with null pointers and zero
+ * lengths.
+ */
+if (!vec[i].iov_base) {
+if (i == 0) {
+err = EFAULT;
+goto fail;
+} else {
+/*
+ * Fail all the subsequent addresses, they are already
+ * zero'd.
+ */
+goto out;
+}
+}
+if (len > max_len - total_len) {
+len = max_len - total_len;
+}
+}
+vec[i].iov_len = len;
+total_len += len;
+}
+out:
+unlock_user(target_vec, target_addr, 0);
+return vec;
+
+fail:
+helper_unlock_iovec(target_vec, target_addr, vec, i, copy);
+fail2:
+g_free(vec);
+errno = err;
+return NULL;
+}
+
 /*
  * do_syscall() should always have a single exit point at the end so that
  * actions, such as logging of syscall results, can be performed.  All errnos
-- 
2.33.1

[PATCH v2 6/6] bsd-user/freebsd/os-syscall.c: Implement exit

Implement the exit system call. Bring in bsd-proc.h to contain all the
process system call implementation and helper routines.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Kyle Evans 
Reviewed-by: Richard Henderson 
---
 bsd-user/bsd-proc.h   | 43 +++
 bsd-user/freebsd/os-syscall.c |  7 ++
 2 files changed, 50 insertions(+)
 create mode 100644 bsd-user/bsd-proc.h

diff --git a/bsd-user/bsd-proc.h b/bsd-user/bsd-proc.h
new file mode 100644
index 000..8f0b6990d14
--- /dev/null
+++ b/bsd-user/bsd-proc.h
@@ -0,0 +1,43 @@
+/*
+ *  process related system call shims and definitions
+ *
+ *  Copyright (c) 2013-2014 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef BSD_PROC_H_
+#define BSD_PROC_H_
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* exit(2) */
+static inline abi_long do_bsd_exit(void *cpu_env, abi_long arg1)
+{
+#ifdef TARGET_GPROF
+_mcleanup();
+#endif
+gdb_exit(arg1);
+qemu_plugin_user_exit();
+/* XXX: should free thread stack and CPU env here  */
+_exit(arg1);
+
+return 0;
+}
+
+#endif /* !BSD_PROC_H_ */
diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index 3a33d54fa4d..71aa0d38e03 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -41,6 +41,7 @@
 #include "user/syscall-trace.h"
 
 #include "bsd-file.h"
+#include "bsd-proc.h"
 
 /* I/O */
 safe_syscall3(ssize_t, read, int, fd, void *, buf, size_t, nbytes);
@@ -215,6 +216,12 @@ static abi_long freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 abi_long ret;
 
 switch (num) {
+/*
+ * process system calls
+ */
+case TARGET_FREEBSD_NR_exit: /* exit(2) */
+ret = do_bsd_exit(cpu_env, arg1);
+break;
 
 /*
  * File system calls.
-- 
2.33.1

[PATCH v2 2/6] bsd-user/freebsd/os-syscall.c: unlock_iovec

Releases the references to the iovec created by lock_iovec.

Signed-off-by: Warner Losh 
---
 bsd-user/freebsd/os-syscall.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index 67851937a8f..52093d479d8 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -175,6 +175,20 @@ fail2:
 return NULL;
 }
 
+void unlock_iovec(struct iovec *vec, abi_ulong target_addr,
+int count, int copy)
+{
+struct target_iovec *target_vec;
+
+target_vec = lock_user(VERIFY_READ, target_addr,
+   count * sizeof(struct target_iovec), 1);
+if (target_vec) {
+helper_unlock_iovec(target_vec, target_addr, vec, count, copy);
+}
+
+g_free(vec);
+}
+
 /*
  * do_syscall() should always have a single exit point at the end so that
  * actions, such as logging of syscall results, can be performed.  All errnos
-- 
2.33.1

[PATCH v2 0/6] bsd-user upstreaming: read, write and exit

This series of patches continues the effort to get system calls working
upstream. This series was cleaved off a prior series to give me time to rework
based on the feedback from the first time I posted these.

   o bsd-user/freebsd/os-syscall.c: lock_iovec
I rewored to use g_try_new, as well as fixing bugs in the 'after a FAULT
handling code' Created a common routine to cleanup after errors that can
be used for the unlock_iovec.

   o bsd-user/freebsd/os-syscall.c: unlock_iovec
Fixed the error handling to be consistent with a normal unlock_iovec.

   o bsd-user/freebsd/os-syscall.c: Tracing and error boilerplate
Created the wrapper function as suggested in prior reviews.

V2: Use g_try_new0 and simplify based on not needing to initialize things
to zero that are already zero. Only affects the 'lock_iovec' hunk.

V2: Remove inline tags from do_bsd_* routines.

V2: Clean up logging a little

Now all the patches, except the last one, need to be reviewed.

Warner Losh (6):
  bsd-user/freebsd/os-syscall.c: lock_iovec
  bsd-user/freebsd/os-syscall.c: unlock_iovec
  bsd-user/freebsd/os-syscall.c: Tracing and error boilerplate
  bsd-user/bsd-file.h: Add implementations for read, pread, readv and
preadv
  bsd-user/bsd-file.h: Meat of the write system calls
  bsd-user/freebsd/os-syscall.c: Implement exit

 bsd-user/bsd-file.h   | 163 +
 bsd-user/bsd-proc.h   |  43 +++
 bsd-user/freebsd/os-syscall.c | 217 +-
 3 files changed, 419 insertions(+), 4 deletions(-)
 create mode 100644 bsd-user/bsd-proc.h

-- 
2.33.1

[PATCH v2 3/6] bsd-user/freebsd/os-syscall.c: Tracing and error boilerplate

Add in the tracing and this system call not implemented boilerplate. Do
this by moving the guts of do_freebsd_syscall to freebsd_syscall. Put
the tracing in the wrapper function. Since freebsd_syscall is a
singleton static function, it will almost certainly be inlined. Fix
comments that referred to do_syscall since that was renamed some tie
ago.

Signed-off-by: Warner Losh 
---
 bsd-user/freebsd/os-syscall.c | 44 +++
 1 file changed, 40 insertions(+), 4 deletions(-)

diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index 52093d479d8..1c4c9983f1a 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -190,16 +190,52 @@ void unlock_iovec(struct iovec *vec, abi_ulong 
target_addr,
 }
 
 /*
- * do_syscall() should always have a single exit point at the end so that
- * actions, such as logging of syscall results, can be performed.  All errnos
- * that do_syscall() returns must be -TARGET_.
+ * All errnos that freebsd_syscall() returns must be -TARGET_.
+ */
+static abi_long freebsd_syscall(void *cpu_env, int num, abi_long arg1,
+abi_long arg2, abi_long arg3, abi_long arg4,
+abi_long arg5, abi_long arg6, abi_long arg7,
+abi_long arg8)
+{
+abi_long ret;
+
+switch (num) {
+default:
+qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num);
+ret = -TARGET_ENOSYS;
+break;
+}
+
+return ret;
+}
+
+/*
+ * do_freebsd_syscall() should always have a single exit point at the end so
+ * that actions, such as logging of syscall results, can be performed. This
+ * as a wrapper around freebsd_syscall() so that actually happens. Since
+ * that is a singleton, modern compilers will inline it anyway...
  */
 abi_long do_freebsd_syscall(void *cpu_env, int num, abi_long arg1,
 abi_long arg2, abi_long arg3, abi_long arg4,
 abi_long arg5, abi_long arg6, abi_long arg7,
 abi_long arg8)
 {
-return 0;
+CPUState *cpu = env_cpu(cpu_env);
+int ret;
+
+trace_guest_user_syscall(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, 
arg7, arg8);
+if (do_strace) {
+print_freebsd_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6);
+}
+
+ret = freebsd_syscall(cpu_env, num, arg1, arg2, arg3, arg4, arg5, arg6,
+  arg7, arg8);
+if (do_strace) {
+print_freebsd_syscall_ret(num, ret);
+}
+trace_guest_user_syscall_ret(cpu, num, ret);
+
+return ret;
 }
 
 void syscall_init(void)
-- 
2.33.1

[PATCH v2 2/2] QIOChannelSocket: Fix zero-copy send so socket flush works

2022-06-08 Thread Leonardo Bras

Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial
part of the flushing mechanism got missing: incrementing zero_copy_queued.

Without that, the flushing interface becomes a no-op, and there is no
guarantee the buffer is really sent.

This can go as bad as causing a corruption in RAM during migration.

Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & 
io_flush for CONFIG_LINUX")
Reported-by: 徐闯 
Signed-off-by: Leonardo Bras 
---
 io/channel-socket.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/io/channel-socket.c b/io/channel-socket.c
index ef7c7cfbac..ca4cae930f 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -607,6 +607,11 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
  "Unable to write to socket");
 return -1;
 }
+
+if (zero_copy_enabled) {
+sioc->zero_copy_queued++;
+}
+
 return ret;
 }
 #else /* WIN32 */
-- 
2.36.1

[PATCH v2 1/2] QIOChannelSocket: Reduce ifdefs to improve readability

2022-06-08 Thread Leonardo Bras

During implementation of MSG_ZEROCOPY feature, a lot of #ifdefs were
introduced, particularly at qio_channel_socket_writev().

Rewrite some of those changes so it's easier to read.
...
Signed-off-by: Leonardo Bras 
---
 io/channel-socket.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/io/channel-socket.c b/io/channel-socket.c
index dc9c165de1..ef7c7cfbac 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -554,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
 size_t fdsize = sizeof(int) * nfds;
 struct cmsghdr *cmsg;
 int sflags = 0;
+bool zero_copy_enabled = false;
 
 memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
 
@@ -581,6 +582,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
 #ifdef QEMU_MSG_ZEROCOPY
 if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
 sflags = MSG_ZEROCOPY;
+zero_copy_enabled = true;
 }
 #endif
 
@@ -592,15 +594,13 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
 return QIO_CHANNEL_ERR_BLOCK;
 case EINTR:
 goto retry;
-#ifdef QEMU_MSG_ZEROCOPY
 case ENOBUFS:
-if (sflags & MSG_ZEROCOPY) {
+if (zero_copy_enabled) {
 error_setg_errno(errp, errno,
  "Process can't lock enough memory for using 
MSG_ZEROCOPY");
 return -1;
 }
 break;
-#endif
 }
 
 error_setg_errno(errp, errno,
-- 
2.36.1

Re: [PATCH v1 1/1] QIOChannelSocket: Fix zero-copy send so socket flush works

2022-06-08 Thread Leonardo Bras Soares Passos

On Wed, Jun 8, 2022 at 5:55 PM Peter Xu  wrote:
>
> On Wed, Jun 08, 2022 at 04:26:10PM -0400, Peter Xu wrote:
> > On Wed, Jun 08, 2022 at 03:18:09PM -0300, Leonardo Bras wrote:
> > > Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial
> > > part of the flushing mechanism got missing: incrementing zero_copy_queued.
> > >
> > > Without that, the flushing interface becomes a no-op, and there is no
> > > garantee the buffer is really sent.
> > >
> > > This can go as bad as causing a corruption in RAM during migration.
> > >
> > > Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy 
> > > flag & io_flush for CONFIG_LINUX")
> > > Reported-by: 徐闯 
> > > Signed-off-by: Leonardo Bras 
> >
> > Copy Dave/Juan; Leo please remember to do so in the next posts, or no one
> > will be picking this up. :)
>

Thanks for letting me know.

> My fault, it's an io channel patch.  But still good to copy relevant
> developers..

Np. Sure, I will keep in mind to add them in the next version.

Oh, BTW: I will be sending a v2 shortly.

>
> --
> Peter Xu
>

Re: [PATCH 1/2] hw/nvme: Implement shadow doorbell buffer support

2022-06-08 Thread Klaus Jensen

On Jun  8 09:36, Jinhao Fan wrote:
> Implement Doorbel Buffer Config command (Section 5.7 in NVMe Spec 1.3)
> and Shadow Doorbel buffer & EventIdx buffer handling logic (Section 7.13
> in NVMe Spec 1.3). For queues created before the Doorbell Buffer Config
> command, the nvme_dbbuf_config function tries to associate each existing
> SQ and CQ with its Shadow Doorbel buffer and EventIdx buffer address.
> Queues created after the Doorbell Buffer Config command will have the
> doorbell buffers associated with them when they are initialized.
> 
> In nvme_process_sq and nvme_post_cqe, proactively check for Shadow
> Doorbell buffer changes instead of wait for doorbell register changes.
> This reduces the number of MMIOs.
> 
> Signed-off-by: Jinhao Fan 
> ---
>  hw/nvme/ctrl.c   | 95 ++--
>  hw/nvme/nvme.h   |  8 
>  include/block/nvme.h |  2 +
>  3 files changed, 102 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 03760ddeae..d3f6c432df 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -223,6 +223,7 @@ static const uint32_t nvme_cse_acs[256] = {
>  [NVME_ADM_CMD_GET_FEATURES] = NVME_CMD_EFF_CSUPP,
>  [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
>  [NVME_ADM_CMD_NS_ATTACHMENT]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
> +[NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP,
>  [NVME_ADM_CMD_FORMAT_NVM]   = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
>  };
>  
> @@ -1304,6 +1305,12 @@ static inline void nvme_blk_write(BlockBackend *blk, 
> int64_t offset,
>  }
>  }
>  
> +static void nvme_update_cq_head(NvmeCQueue *cq)
> +{
> +pci_dma_read(>ctrl->parent_obj, cq->db_addr, >head,
> +sizeof(cq->head));
> +}
> +
>  static void nvme_post_cqes(void *opaque)
>  {
>  NvmeCQueue *cq = opaque;
> @@ -1316,6 +1323,10 @@ static void nvme_post_cqes(void *opaque)
>  NvmeSQueue *sq;
>  hwaddr addr;
>  
> +if (cq->cqid && n->dbbuf_enabled) {
> +nvme_update_cq_head(cq);

Shouldn't we update the cq head eventidx here (prior to reading the
doorbell buffer)? Like we do for the sq tail?

> +}
> +
>  if (nvme_cq_full(cq)) {
>  break;
>  }
> @@ -4237,6 +4248,7 @@ static uint16_t nvme_del_sq(NvmeCtrl *n, NvmeRequest 
> *req)
>  static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, uint64_t dma_addr,
>   uint16_t sqid, uint16_t cqid, uint16_t size)
>  {
> +uint32_t stride = 4 << NVME_CAP_DSTRD(n->bar.cap);
>  int i;
>  NvmeCQueue *cq;
>  
> @@ -4256,6 +4268,11 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, 
> uint64_t dma_addr,
>  }
>  sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq);
>  
> +if (sqid && n->dbbuf_dbs && n->dbbuf_eis) {
> +sq->db_addr = n->dbbuf_dbs + 2 * sqid * stride;
> +sq->ei_addr = n->dbbuf_eis + 2 * sqid * stride;
> +}
> +
>  assert(n->cq[cqid]);
>  cq = n->cq[cqid];
>  QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
> @@ -4599,6 +4616,7 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, 
> uint64_t dma_addr,
>   uint16_t cqid, uint16_t vector, uint16_t size,
>   uint16_t irq_enabled)
>  {
> +uint32_t stride = 4 << NVME_CAP_DSTRD(n->bar.cap);
>  int ret;
>  
>  if (msix_enabled(>parent_obj)) {
> @@ -4615,6 +4633,10 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, 
> uint64_t dma_addr,
>  cq->head = cq->tail = 0;
>  QTAILQ_INIT(>req_list);
>  QTAILQ_INIT(>sq_list);
> +if (cqid && n->dbbuf_dbs && n->dbbuf_eis) {
> +cq->db_addr = n->dbbuf_dbs + (2 * cqid + 1) * stride;
> +cq->ei_addr = n->dbbuf_eis + (2 * cqid + 1) * stride;
> +}
>  n->cq[cqid] = cq;
>  cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
>  }
> @@ -5767,6 +5789,43 @@ out:
>  return status;
>  }
>  
> +static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req)
> +{
> +uint32_t stride = 4 << NVME_CAP_DSTRD(n->bar.cap);
> +uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1);
> +uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2);
> +int i;
> +
> +/* Address should be page aligned */
> +if (dbs_addr & (n->page_size - 1) || eis_addr & (n->page_size - 1)) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +/* Save shadow buffer base addr for use during queue creation */
> +n->dbbuf_dbs = dbs_addr;
> +n->dbbuf_eis = eis_addr;
> +n->dbbuf_enabled = true;
> +
> +for (i = 1; i < n->params.max_ioqpairs + 1; i++) {
> +NvmeSQueue *sq = n->sq[i];
> +NvmeCQueue *cq = n->cq[i];
> +
> +if (sq) {
> +/* Submission queue tail pointer location, 2 * QID * stride */
> +sq->db_addr = dbs_addr + 2 * i * stride;
> +sq->ei_addr = eis_addr + 2 * i * stride;
> +}
> +
> +if

Re: [PATCH v1 1/1] QIOChannelSocket: Fix zero-copy send so socket flush works

On Wed, Jun 08, 2022 at 04:26:10PM -0400, Peter Xu wrote:
> On Wed, Jun 08, 2022 at 03:18:09PM -0300, Leonardo Bras wrote:
> > Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial
> > part of the flushing mechanism got missing: incrementing zero_copy_queued.
> > 
> > Without that, the flushing interface becomes a no-op, and there is no
> > garantee the buffer is really sent.
> > 
> > This can go as bad as causing a corruption in RAM during migration.
> > 
> > Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag 
> > & io_flush for CONFIG_LINUX")
> > Reported-by: 徐闯 
> > Signed-off-by: Leonardo Bras 
> 
> Copy Dave/Juan; Leo please remember to do so in the next posts, or no one
> will be picking this up. :)

My fault, it's an io channel patch.  But still good to copy relevant
developers..

-- 
Peter Xu

Re: [PATCH v1 1/1] QIOChannelSocket: Fix zero-copy send so socket flush works

2022-06-08 Thread Leonardo Bras Soares Passos

Hello Daniel,

On Wed, Jun 8, 2022 at 3:46 PM Daniel P. Berrangé  wrote:
>
> On Wed, Jun 08, 2022 at 03:18:09PM -0300, Leonardo Bras wrote:
> > Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial
> > part of the flushing mechanism got missing: incrementing zero_copy_queued.
> >
> > Without that, the flushing interface becomes a no-op, and there is no
> > garantee the buffer is really sent.
> >
> > This can go as bad as causing a corruption in RAM during migration.
> >
> > Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag 
> > & io_flush for CONFIG_LINUX")
> > Reported-by: 徐闯 
> > Signed-off-by: Leonardo Bras 
> > ---
> >  io/channel-socket.c | 11 ---
> >  1 file changed, 8 insertions(+), 3 deletions(-)
> >
> > diff --git a/io/channel-socket.c b/io/channel-socket.c
> > index dc9c165de1..ca4cae930f 100644
> > --- a/io/channel-socket.c
> > +++ b/io/channel-socket.c
> > @@ -554,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel 
> > *ioc,
> >  size_t fdsize = sizeof(int) * nfds;
> >  struct cmsghdr *cmsg;
> >  int sflags = 0;
> > +bool zero_copy_enabled = false;
> >
> >  memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
> >
> > @@ -581,6 +582,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel 
> > *ioc,
> >  #ifdef QEMU_MSG_ZEROCOPY
> >  if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
> >  sflags = MSG_ZEROCOPY;
> > +zero_copy_enabled = true;
> >  }
> >  #endif
> >
> > @@ -592,21 +594,24 @@ static ssize_t qio_channel_socket_writev(QIOChannel 
> > *ioc,
> >  return QIO_CHANNEL_ERR_BLOCK;
> >  case EINTR:
> >  goto retry;
> > -#ifdef QEMU_MSG_ZEROCOPY
>
> Removing this ifdef appears incidental to the change. If this is
> redundant just remove it in its own patch.

The idea is to reduce the amount of #ifdefs as Peter suggested,
because adding another ifdef here
would introduce extra noise. But sure, I see no problem adding this
change as a previous patch.

>
> >  case ENOBUFS:
> > -if (sflags & MSG_ZEROCOPY) {
> > +if (zero_copy_enabled) {
> >  error_setg_errno(errp, errno,
> >   "Process can't lock enough memory for 
> > using MSG_ZEROCOPY");
> >  return -1;
> >  }
> >  break;
> > -#endif
> >  }
> >
> >  error_setg_errno(errp, errno,
> >   "Unable to write to socket");
> >  return -1;
> >  }
> > +
> > +if (zero_copy_enabled) {
>
> What's wrong with
>
>if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
> sioc->zero_copy_queued++;
> }

There is nothing wrong with it, but using zero_copy_enabled as
presented here will
compile-out this 'if()'  block if the user does not support MSG_ZEROCOPY.

Best regards,
Leo

>
>
> Introducing another local variable doesn't really add value IMHO.
>
> With regards,
> Daniel
> --
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
>

Re: [PATCH v4 0/3] target/m68k: semihosting cleanup


On 6/8/22 12:05, Richard Henderson wrote:

But what is the use case of semihosting on m68k?
Is there a kernel implementation available for that?


Not that I'm aware of.  Do you want to remove it entirely instead?
Leaving it as-is should not be an option...


Oh, duh.  The "kernel" implementation is libgloss itself.
The use-case is running the gcc testsuite for coldfire under qemu.


r~

Re: [PATCH v1 1/1] QIOChannelSocket: Fix zero-copy send so socket flush works

On Wed, Jun 08, 2022 at 03:18:09PM -0300, Leonardo Bras wrote:
> Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial
> part of the flushing mechanism got missing: incrementing zero_copy_queued.
> 
> Without that, the flushing interface becomes a no-op, and there is no
> garantee the buffer is really sent.
> 
> This can go as bad as causing a corruption in RAM during migration.
> 
> Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & 
> io_flush for CONFIG_LINUX")
> Reported-by: 徐闯 
> Signed-off-by: Leonardo Bras 

Copy Dave/Juan; Leo please remember to do so in the next posts, or no one
will be picking this up. :)

-- 
Peter Xu

Re: [External] [PATCH v13 3/8] QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX

On Wed, Jun 08, 2022 at 03:14:36PM -0300, Leonardo Bras Soares Passos wrote:
> On Wed, Jun 8, 2022 at 8:41 AM Peter Xu  wrote:
> >
> > On Wed, Jun 08, 2022 at 02:37:28AM -0300, Leonardo Bras Soares Passos wrote:
> > > (1) is not an option, as the interface currently uses ret=1 to make
> > > sure MSG_ZEROCOPY is getting used,
> > > I added that so the user of qio_channel can switch off zero-copy if
> > > it's not getting used, and save some cpu.
> >
> > Yes (1) is not, but could you explain what do you mean by making sure
> > MSG_ZEROCOPY being used?  Why is it relevant to the retval here?
> 
> If sendmsg() is called with MSG_ZEROCOPY, and everything is configured
> correctly, the kernel will attempt to send the buffer using zero-copy.
> 
> Even with the right configuration on a recent enough kernel, there are
> factors that can prevent zero-copy from happening, and the kernel will
> fall back to the copying mechanism.
> An example being the net device not supporting 'Scatter-Gather'
> feature (NETIF_F_SG).
> 
> When this happens, there is an overhead for 'trying zero-copy first',
> instead of just opting for the copying mechanism.
> 
> In a previous iteration of the patchset, it was made clear that it's
> desirable to detect when the kernel falls back to copying mechanism,
> so the user of 'QIOChannelSocket' can switch to copying and avoid the
> overhead. This was done by the return value of flush(), which is 1 if
> that occurs.

Two questions..

  1) When that happens, will MSG_ERRQUEUE keeps working just like zerocopy
 is functional?

 If the answer is yes, I don't see how ret=1 will ever be
 returned.. because we'll also go into the same loop in
 qio_channel_socket_flush() anyway.

 If the answer is no, then since we'll have non-zero zero_copy_queued,
 will the loop in qio_channel_socket_flush() go into a dead one?  How
 could it return?

  2) Even if we have the correct ret=1 returned when that happens, which
 caller is detecting that ret==1 and warn the admin?

Thanks,

-- 
Peter Xu

Re: [PATCH v1 1/1] QIOChannelSocket: Fix zero-copy send so socket flush works

On Wed, Jun 08, 2022 at 07:46:43PM +0100, Daniel P. Berrangé wrote:
> On Wed, Jun 08, 2022 at 03:18:09PM -0300, Leonardo Bras wrote:
> > Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial
> > part of the flushing mechanism got missing: incrementing zero_copy_queued.
> > 
> > Without that, the flushing interface becomes a no-op, and there is no
> > garantee the buffer is really sent.
> > 
> > This can go as bad as causing a corruption in RAM during migration.
> > 
> > Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag 
> > & io_flush for CONFIG_LINUX")
> > Reported-by: 徐闯 
> > Signed-off-by: Leonardo Bras 
> > ---
> >  io/channel-socket.c | 11 ---
> >  1 file changed, 8 insertions(+), 3 deletions(-)
> > 
> > diff --git a/io/channel-socket.c b/io/channel-socket.c
> > index dc9c165de1..ca4cae930f 100644
> > --- a/io/channel-socket.c
> > +++ b/io/channel-socket.c
> > @@ -554,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel 
> > *ioc,
> >  size_t fdsize = sizeof(int) * nfds;
> >  struct cmsghdr *cmsg;
> >  int sflags = 0;
> > +bool zero_copy_enabled = false;
> >  
> >  memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
> >  
> > @@ -581,6 +582,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel 
> > *ioc,
> >  #ifdef QEMU_MSG_ZEROCOPY
> >  if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
> >  sflags = MSG_ZEROCOPY;
> > +zero_copy_enabled = true;
> >  }
> >  #endif
> >  
> > @@ -592,21 +594,24 @@ static ssize_t qio_channel_socket_writev(QIOChannel 
> > *ioc,
> >  return QIO_CHANNEL_ERR_BLOCK;
> >  case EINTR:
> >  goto retry;
> > -#ifdef QEMU_MSG_ZEROCOPY
> 
> Removing this ifdef appears incidental to the change. If this is
> redundant just remove it in its own patch.
> 
> >  case ENOBUFS:
> > -if (sflags & MSG_ZEROCOPY) {
> > +if (zero_copy_enabled) {
> >  error_setg_errno(errp, errno,
> >   "Process can't lock enough memory for 
> > using MSG_ZEROCOPY");
> >  return -1;
> >  }
> >  break;
> > -#endif
> >  }
> >  
> >  error_setg_errno(errp, errno,
> >   "Unable to write to socket");
> >  return -1;
> >  }
> > +
> > +if (zero_copy_enabled) {
> 
> What's wrong with
> 
>if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
> sioc->zero_copy_queued++;
> }
> 
> 
> Introducing another local variable doesn't really add value IMHO.

One benefit of having that variable is we setup zero_copy_enabled once in
the #ifdef and the rest code can avoid wrapping with the macro.  From that
pov the patch looks okay to me.  Thanks,

-- 
Peter Xu

Re: [PATCH v2 08/16] ppc/pnv: user created pnv-phb for powernv9

2022-06-08 Thread Daniel Henrique Barboza

On 6/7/22 03:44, Cédric Le Goater wrote:
Also, the comment seems wrong to me. The qom parenting doesn't matter when building the device tree.

it does. See pnv_dt_xscom()

And this is the root cause of many headaches for user-created devices.
Could it be done differently ?

Just tried to do a related change based on the review you gave in patch 07:

But it would assert if child is not of the correct type. The routine
above is called from a object_child_foreach() which loops on all
children.

I think it could be improved by using directly *->phbs[i].

C.
-

This doesn't work out of the gate because, for user creatable devices, we
are not setting the phbs back into the chip->phbs[] array. In fact we're
not even incrementing chip->num_phbs. We were getting away with all of it
because we are parenting the PHBs to the chip during realize.

Considering that, and also how pnv_dt_xscom() works, I´d rather stick with
most of the the QOM usage we already have. Otherwise we would need, for example,
to change xscom_dt_child to go through each device we want in the DT. And
there's no clear benefit aside from using less QOM, but that can be amended
by adding more documentation here and there.

I can make an exception for powernv8 and
pnv_ics_get_child()/pnv_ics_resend_child(),
where we're cycling through all child elements every time. For those cases it's
worth
to access the phbs directly via chip->phbs[], and for user creatable phb3s I'll
add
the created phb in the array.

I also believe that I can do more to make the current handling of default
phb3/phb4
closer to what user creatable devices does. This will ease the work to be done
by this series and will also make the design easier to understand. I might also
do
some changes that Mark pointed out in the phb3/4 root ports as well. This
series can
then be more about the PnvPHB proxy.

Thanks,

Daniel

Thanks,

Re: [RFC PATCH v8 02/21] vhost: Add custom used buffer callback

On Tue, Jun 7, 2022 at 8:12 AM Jason Wang  wrote:
>
>
> 在 2022/5/20 03:12, Eugenio Pérez 写道:
> > The callback allows SVQ users to know the VirtQueue requests and
> > responses. QEMU can use this to synchronize virtio device model state,
> > allowing to migrate it with minimum changes to the migration code.
> >
> > In the case of networking, this will be used to inspect control
> > virtqueue messages.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >   hw/virtio/vhost-shadow-virtqueue.h | 16 +++-
> >   include/hw/virtio/vhost-vdpa.h |  2 ++
> >   hw/virtio/vhost-shadow-virtqueue.c |  9 -
> >   hw/virtio/vhost-vdpa.c |  3 ++-
> >   4 files changed, 27 insertions(+), 3 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
> > b/hw/virtio/vhost-shadow-virtqueue.h
> > index c132c994e9..6593f07db3 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.h
> > +++ b/hw/virtio/vhost-shadow-virtqueue.h
> > @@ -15,6 +15,13 @@
> >   #include "standard-headers/linux/vhost_types.h"
> >   #include "hw/virtio/vhost-iova-tree.h"
> >
> > +typedef void (*VirtQueueElementCallback)(VirtIODevice *vdev,
> > + const VirtQueueElement *elem);
>
>
> Nit: I wonder if something like "VirtQueueCallback" is sufficient (e.g
> kernel use "callback" directly)
>

I didn't think about the notification part of the "callback" but more
on the function callback, to notify the net or vhost-vdpa net
subsystem :). But I think it can be named your way for sure.

If we ever have other callbacks closer to vq than to vq elements to
rename it later shouldn't be a big deal.

>
> > +
> > +typedef struct VhostShadowVirtqueueOps {
> > +VirtQueueElementCallback used_elem_handler;
> > +} VhostShadowVirtqueueOps;
> > +
> >   /* Shadow virtqueue to relay notifications */
> >   typedef struct VhostShadowVirtqueue {
> >   /* Shadow vring */
> > @@ -59,6 +66,12 @@ typedef struct VhostShadowVirtqueue {
> >*/
> >   uint16_t *desc_next;
> >
> > +/* Optional callbacks */
> > +const VhostShadowVirtqueueOps *ops;
>
>
> Can we merge map_ops to ops?
>

It can be merged, but they are set by different actors.

map_ops is received by hw/virtio/vhost-vdpa, while this ops depends on
the kind of device. Is it ok to fill the ops members "by chunks"?

>
> > +
> > +/* Optional custom used virtqueue element handler */
> > +VirtQueueElementCallback used_elem_cb;
>
>
> This seems not used in this series.
>

Right, this is a leftover. Thanks for pointing it out!

Thanks!

> Thanks
>
>
> > +
> >   /* Next head to expose to the device */
> >   uint16_t shadow_avail_idx;
> >
> > @@ -85,7 +98,8 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, 
> > VirtIODevice *vdev,
> >VirtQueue *vq);
> >   void vhost_svq_stop(VhostShadowVirtqueue *svq);
> >
> > -VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
> > +VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
> > +const VhostShadowVirtqueueOps *ops);
> >
> >   void vhost_svq_free(gpointer vq);
> >   G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
> > diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> > index a29dbb3f53..f1ba46a860 100644
> > --- a/include/hw/virtio/vhost-vdpa.h
> > +++ b/include/hw/virtio/vhost-vdpa.h
> > @@ -17,6 +17,7 @@
> >   #include "hw/virtio/vhost-iova-tree.h"
> >   #include "hw/virtio/virtio.h"
> >   #include "standard-headers/linux/vhost_types.h"
> > +#include "hw/virtio/vhost-shadow-virtqueue.h"
> >
> >   typedef struct VhostVDPAHostNotifier {
> >   MemoryRegion mr;
> > @@ -35,6 +36,7 @@ typedef struct vhost_vdpa {
> >   /* IOVA mapping used by the Shadow Virtqueue */
> >   VhostIOVATree *iova_tree;
> >   GPtrArray *shadow_vqs;
> > +const VhostShadowVirtqueueOps *shadow_vq_ops;
> >   struct vhost_dev *dev;
> >   VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
> >   } VhostVDPA;
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> > b/hw/virtio/vhost-shadow-virtqueue.c
> > index 56c96ebd13..167db8be45 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > @@ -410,6 +410,10 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
> >   break;
> >   }
> >
> > +if (svq->ops && svq->ops->used_elem_handler) {
> > +svq->ops->used_elem_handler(svq->vdev, elem);
> > +}
> > +
> >   if (unlikely(i >= svq->vring.num)) {
> >   qemu_log_mask(LOG_GUEST_ERROR,
> >"More than %u used buffers obtained in a %u size 
> > SVQ",
> > @@ -607,12 +611,14 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
> >* shadow methods and file descriptors.
> >*
> >* @iova_tree: Tree to perform descriptors translations
> > + * @ops: SVQ operations hooks
> >*
> >* Returns the new

Re: [PATCH v6 0/8] KVM: mm: fd-based approach for supporting KVM guest private memory

2022-06-08 Thread Vishal Annapurve

...
> With this patch series, it's actually even not possible for userspace VMM
> to allocate private page by a direct write, it's basically unmapped from
> there. If it really wants to, it should so something special, by intention,
> that's basically the conversion, which we should allow.
>

A VM can pass GPA backed by private pages to userspace VMM and when
Userspace VMM accesses the backing hva there will be pages allocated
to back the shared fd causing 2 sets of pages backing the same guest
memory range.

> Thanks for bringing this up. But in my mind I still think userspace VMM
> can do and it's its responsibility to guarantee that, if that is hard
> required. By design, userspace VMM is the decision-maker for page
> conversion and has all the necessary information to know which page is
> shared/private. It also has the necessary knobs to allocate/free the
> physical pages for guest memory. Definitely, we should make userspace
> VMM more robust.

Making Userspace VMM more robust to avoid double allocation can get
complex, it will have to keep track of all in-use (by Userspace VMM)
shared fd memory to disallow conversion from shared to private and
will have to ensure that all guest supplied addresses belong to shared
GPA ranges.
A coarser but simpler alternative could be to always allow shared to
private conversion with unbacking the memory from shared fd and exit
if the VMM runs in double allocation scenarios. In either cases,
unbacking shared fd memory ideally should prevent memory allocation on
subsequent write accesses to ensure double allocation scenarios are
caught early.

Regards,
Vishal

Re: [RFC PATCH v8 00/21] Net Control VQ support with asid in vDPA SVQ

On Wed, Jun 8, 2022 at 7:51 AM Jason Wang  wrote:
>
>
> 在 2022/5/20 03:12, Eugenio Pérez 写道:
> > Control virtqueue is used by networking device for accepting various
> > commands from the driver. It's a must to support multiqueue and other
> > configurations.
> >
> > Shadow VirtQueue (SVQ) already makes possible migration of virtqueue
> > states, effectively intercepting them so qemu can track what regions of 
> > memory
> > are dirty because device action and needs migration. However, this does not
> > solve networking device state seen by the driver because CVQ messages, like
> > changes on MAC addresses from the driver.
> >
> > To solve that, this series uses SVQ infraestructure proposed to intercept
> > networking control messages used by the device. This way, qemu is able to
> > update VirtIONet device model and to migrate it.
> >
> > However, to intercept all queues would slow device data forwarding. To solve
> > that, only the CVQ must be intercepted all the time. This is achieved using
> > the ASID infraestructure, that allows different translations for different
> > virtqueues. The most updated kernel part of ASID is proposed at [1].
> >
> > You can run qemu in two modes after applying this series: only intercepting
> > cvq with x-cvq-svq=on or intercept all the virtqueues adding cmdline 
> > x-svq=on:
> >
> > -netdev 
> > type=vhost-vdpa,vhostdev=/dev/vhost-vdpa-0,id=vhost-vdpa0,x-cvq-svq=on,x-svq=on
> >
> > First three patches enable the update of the virtio-net device model for 
> > each
> > CVQ message acknoledged by the device.
> >
> > Patches from 5 to 9 enables individual SVQ to copy the buffers to QEMU's VA.
> > This allows simplyfing the memory mapping, instead of map all the guest's
> > memory like in the data virtqueues.
> >
> > Patch 10 allows to inject control messages to the device. This allows to set
> > state to the device both at QEMU startup and at live migration destination. 
> > In
> > the future, this may also be used to emulate _F_ANNOUNCE.
> >
> > Patch 11 updates kernel headers, but it assign random numbers to needed 
> > ioctls
> > because they are still not accepted in the kernel.
> >
> > Patches 12-16 enables the set of the features of the net device model to the
> > vdpa device at device start.
> >
> > Last ones enables the sepparated ASID and SVQ.
> >
> > Comments are welcomed.
>
>
> As discussed, I think we need to split this huge series into smaller ones:
>
> 1) shadow CVQ only, this makes rx-filter-event work
> 2) ASID support for CVQ
>
> And for 1) we need consider whether or not it could be simplified.
>
> Or do it in reverse order, since if we do 1) first, we may have security
> issues.
>

I'm ok with both, but I also think 2) before 1) might make more sense.
There is no way to only shadow CVQ otherwise ATM.

Can we do as with previous base SVQ patches? they were merged although
there is still no way to enable SVQ.

Thanks!

> Thoughts?
>
> Thanks
>
>
> >
> > TODO:
> > * Fallback on regular CVQ if QEMU cannot isolate in its own ASID by any
> >reason, blocking migration. This is tricky, since it can cause that the 
> > VM
> >cannot be migrated anymore, so some way of block it must be used.
> > * Review failure paths, some are with TODO notes, other don't.
> >
> > Changes from rfc v7:
> > * Don't map all guest space in ASID 1 but copy all the buffers. No need for
> >more memory listeners.
> > * Move net backend start callback to SVQ.
> > * Wait for device CVQ commands used by the device at SVQ start, avoiding 
> > races.
> > * Changed ioctls, but they're provisional anyway.
> > * Reorder commits so refactor and code adding ones are closer to usage.
> > * Usual cleaning: better tracing, doc, patches messages, ...
> >
> > Changes from rfc v6:
> > * Fix bad iotlb updates order when batching was enabled
> > * Add reference counting to iova_tree so cleaning is simpler.
> >
> > Changes from rfc v5:
> > * Fixes bad calculus of cvq end group when MQ is not acked by the guest.
> >
> > Changes from rfc v4:
> > * Add missing tracing
> > * Add multiqueue support
> > * Use already sent version for replacing g_memdup
> > * Care with memory management
> >
> > Changes from rfc v3:
> > * Fix bad returning of descriptors to SVQ list.
> >
> > Changes from rfc v2:
> > * Fix use-after-free.
> >
> > Changes from rfc v1:
> > * Rebase to latest master.
> > * Configure ASID instead of assuming cvq asid != data vqs asid.
> > * Update device model so (MAC) state can be migrated too.
> >
> > [1] https://lkml.kernel.org/kvm/20220224212314.1326-1-gda...@xilinx.com/
> >
> > Eugenio Pérez (21):
> >virtio-net: Expose ctrl virtqueue logic
> >vhost: Add custom used buffer callback
> >vdpa: control virtqueue support on shadow virtqueue
> >virtio: Make virtqueue_alloc_element non-static
> >vhost: Add vhost_iova_tree_find
> >vdpa: Add map/unmap operation callback to SVQ
> >vhost: move descriptor translation to vhost_svq_vring_write_descs
> >vhost: Add

[PULL 0/1] VFIO updates 2022-06-08

2022-06-08 Thread Alex Williamson

The following changes since commit 9b1f58854959c5a9bdb347e3e04c252ab7fc9ef5:

  Merge tag 'pull-la-20220606' of https://gitlab.com/rth7680/qemu into staging 
(2022-06-06 16:16:01 -0700)

are available in the Git repository at:

  https://gitlab.com/alex.williamson/qemu.git tags/vfio-updates-20220608.0

for you to fetch changes up to ec6600be0dc16982181c7ad80d94c143c0807dd2:

  vfio/common: remove spurious warning on vfio_listener_region_del (2022-06-08 
08:44:19 -0600)


VFIO updates 2022-06-08

 * Fix spurious alignment warnings for TPM on unmap too (Eric Auger)


Eric Auger (1):
  vfio/common: remove spurious warning on vfio_listener_region_del

 hw/vfio/common.c | 10 +-
 hw/vfio/trace-events |  2 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

[PULL 1/1] vfio/common: remove spurious warning on vfio_listener_region_del

2022-06-08 Thread Alex Williamson

From: Eric Auger 

851d6d1a0f ("vfio/common: remove spurious tpm-crb-cmd misalignment
warning") removed the warning on vfio_listener_region_add() path.

However the same warning also hits on region_del path. Let's remove
it and reword the dynamic trace as this can be called on both
map and unmap path.

Signed-off-by: Eric Auger 
Reviewed-by: Cornelia Huck 
Link: https://lore.kernel.org/r/20220524091405.416256-1-eric.au...@redhat.com
Fixes: 851d6d1a0ff2 ("vfio/common: remove spurious tpm-crb-cmd misalignment 
warning")
Signed-off-by: Alex Williamson 
---
 hw/vfio/common.c |   10 +-
 hw/vfio/trace-events |2 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 29982c7af8c4..ace9562a9ba1 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1145,7 +1145,15 @@ static void vfio_listener_region_del(MemoryListener 
*listener,
 if (unlikely((section->offset_within_address_space &
   ~qemu_real_host_page_mask()) !=
  (section->offset_within_region & 
~qemu_real_host_page_mask( {
-error_report("%s received unaligned region", __func__);
+if (!vfio_known_safe_misalignment(section)) {
+error_report("%s received unaligned region %s iova=0x%"PRIx64
+ " offset_within_region=0x%"PRIx64
+ " qemu_real_host_page_size=0x%"PRIxPTR,
+ __func__, memory_region_name(section->mr),
+ section->offset_within_address_space,
+ section->offset_within_region,
+ qemu_real_host_page_size());
+}
 return;
 }
 
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 582882db91c3..73dffe9e00d5 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -100,7 +100,7 @@ vfio_listener_region_add_skip(uint64_t start, uint64_t end) 
"SKIPPING region_add
 vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to 
liobn fd %d"
 vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add 
[iommu] 0x%"PRIx64" - 0x%"PRIx64
 vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void 
*vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
-vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t 
offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" 
offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR ": cannot 
be mapped for DMA"
+vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t 
offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" 
offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR
 vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t 
size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not 
aligned to 0x%"PRIx64" and cannot be mapped for DMA"
 vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING 
region_del 0x%"PRIx64" - 0x%"PRIx64
 vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" 
- 0x%"PRIx64

Re: [RFC PATCH v8 14/21] vhost: Make possible to check for device exclusive vq group

On Wed, Jun 8, 2022 at 6:25 AM Jason Wang  wrote:
>
>
> 在 2022/5/20 03:12, Eugenio Pérez 写道:
> > CVQ needs to be in its own group, not shared with any data vq. Enable
> > the checking of it here, before introducing address space id concepts.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >   include/hw/virtio/vhost.h |  2 +
> >   hw/net/vhost_net.c|  4 +-
> >   hw/virtio/vhost-vdpa.c| 79 ++-
> >   hw/virtio/trace-events|  1 +
> >   4 files changed, 84 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> > index b291fe4e24..cebec1d817 100644
> > --- a/include/hw/virtio/vhost.h
> > +++ b/include/hw/virtio/vhost.h
> > @@ -84,6 +84,8 @@ struct vhost_dev {
> >   int vq_index_end;
> >   /* if non-zero, minimum required value for max_queues */
> >   int num_queues;
> > +/* Must be a vq group different than any other vhost dev */
> > +bool independent_vq_group;
>
>
> We probably need a better abstraction here.
>
> E.g having a parent vhost_dev_group structure.
>

I think there is room for improvement too, but to make this work we
don't need the device model to know all the other devices at this
moment. I'm open to implementing it if we decide that solution is more
maintainable or whatever other reason though.

>
> >   uint64_t features;
> >   uint64_t acked_features;
> >   uint64_t backend_features;
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index ccac5b7a64..1c2386c01c 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -339,14 +339,16 @@ int vhost_net_start(VirtIODevice *dev, NetClientState 
> > *ncs,
> >   }
> >
> >   for (i = 0; i < nvhosts; i++) {
> > +bool cvq_idx = i >= data_queue_pairs;
> >
> > -if (i < data_queue_pairs) {
> > +if (!cvq_idx) {
> >   peer = qemu_get_peer(ncs, i);
> >   } else { /* Control Virtqueue */
> >   peer = qemu_get_peer(ncs, n->max_queue_pairs);
> >   }
> >
> >   net = get_vhost_net(peer);
> > +net->dev.independent_vq_group = !!cvq_idx;
> >   vhost_net_set_vq_index(net, i * 2, index_end);
> >
> >   /* Suppress the masking guest notifiers on vhost user
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > index eec6d544e9..52dd8baa8d 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -685,7 +685,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev 
> > *dev)
> >   {
> >   uint64_t features;
> >   uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
> > -0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
> > +0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
> > +0x1ULL << VHOST_BACKEND_F_IOTLB_ASID;
> >   int r;
> >
> >   if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, )) {
> > @@ -1110,6 +,78 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev 
> > *dev)
> >   return true;
> >   }
> >
> > +static int vhost_vdpa_get_vring_group(struct vhost_dev *dev,
> > +  struct vhost_vring_state *state)
> > +{
> > +int ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_VRING_GROUP, state);
> > +trace_vhost_vdpa_get_vring_group(dev, state->index, state->num);
> > +return ret;
> > +}
> > +
> > +static bool vhost_dev_is_independent_group(struct vhost_dev *dev)
> > +{
> > +struct vhost_vdpa *v = dev->opaque;
> > +struct vhost_vring_state this_vq_group = {
> > +.index = dev->vq_index,
> > +};
> > +int ret;
> > +
> > +if (!(dev->backend_cap & VHOST_BACKEND_F_IOTLB_ASID)) {
> > +return true;
> > +}
>
>
> This should be false?
>
>
> > +
> > +if (!v->shadow_vqs_enabled) {
> > +return true;
> > +}
>
>
> And here?
>

They're true so it doesn't get in the middle if the device already
knows there is no need to check vhost_dev for an independent group.

With recent mq changes, I think I can delete these checks and move
them to net/vhost-vdpa.

>
> > +
> > +ret = vhost_vdpa_get_vring_group(dev, _vq_group);
> > +if (unlikely(ret)) {
> > +goto call_err;
> > +}
> > +
> > +for (int i = 1; i < dev->nvqs; ++i) {
> > +struct vhost_vring_state vq_group = {
> > +.index = dev->vq_index + i,
> > +};
> > +
> > +ret = vhost_vdpa_get_vring_group(dev, _group);
> > +if (unlikely(ret)) {
> > +goto call_err;
> > +}
> > +if (unlikely(vq_group.num != this_vq_group.num)) {
> > +error_report("VQ %d group is different than VQ %d one",
> > + this_vq_group.index, vq_group.index);
>
>
> Not sure this is needed. The group id is not tied to vq index if I
> understand correctly.
>
> E.g we have 1 qp with cvq, we can have
>
> group 0 cvq
>
> group 1 tx/rx
>

This function is severly undocumented, thanks for pointing out :).

It checks if the virtqueues that

Re: [PATCH v4 0/3] target/m68k: semihosting cleanup


On 6/8/22 11:08, Laurent Vivier wrote:

Le 08/06/2022 à 16:53, Richard Henderson a écrit :

On 6/8/22 02:36, Laurent Vivier wrote:

Le 08/06/2022 à 00:26, Richard Henderson a écrit :

Based-on: <20220607204557.658541-1-richard.hender...@linaro.org>
("[PATCH v4 00/53] semihosting cleanup")

Changes for v4:
   * Split out of v2.
   * Convert host errno to gdb errno, which for m68k is guest errno.



How do you test semihosting on m68k?


I have only compiled this.  I had been working on adding something to tests/tcg/m68k/, 
which is where those halt patches came from, but haven't finished.




But what is the use case of semihosting on m68k?
Is there a kernel implementation available for that?


Not that I'm aware of.  Do you want to remove it entirely instead?
Leaving it as-is should not be an option...


r~

Re: [RFC PATCH v8 12/21] vdpa: delay set_vring_ready after DRIVER_OK

On Wed, Jun 8, 2022 at 6:21 AM Jason Wang  wrote:
>
>
> 在 2022/5/20 03:12, Eugenio Pérez 写道:
> > To restore the device in the destination of a live migration we send the
> > commands through control virtqueue. For a device to read CVQ it must
> > have received DRIVER_OK status bit.
> >
> > However this open a window where the device could start receiving
> > packets in rx queue 0 before it receive the RSS configuration. To avoid
> > that, we will not send vring_enable until all configuration is used by
> > the device.
> >
> > As a first step, reverse the DRIVER_OK and SET_VRING_ENABLE steps.
> >
> > Signed-off-by: Eugenio Pérez 
>
>
> I may miss something, but it looks to me this should be an independent
> patch or it should depend on live migration series.
>

With x-svq it's possible to migrate a VM, because we don't need to
stop the device: VMM always knows the vq state to program in the
destination (assuming no need for inflight etc).

But it will have better context in the next series for sure.

Thanks!

> Thanks
>
>
> > ---
> >   hw/virtio/vhost-vdpa.c | 20 +++-
> >   1 file changed, 15 insertions(+), 5 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > index 31b3d4d013..13e5e2a061 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -748,13 +748,18 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev 
> > *dev, int idx)
> >   return idx;
> >   }
> >
> > +/**
> > + * Set ready all vring of the device
> > + *
> > + * @dev: Vhost device
> > + */
> >   static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
> >   {
> >   int i;
> >   trace_vhost_vdpa_set_vring_ready(dev);
> > -for (i = 0; i < dev->nvqs; ++i) {
> > +for (i = 0; i < dev->vq_index_end; ++i) {
> >   struct vhost_vring_state state = {
> > -.index = dev->vq_index + i,
> > +.index = i,
> >   .num = 1,
> >   };
> >   vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, );
> > @@ -1117,7 +1122,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev 
> > *dev, bool started)
> >   if (unlikely(!ok)) {
> >   return -1;
> >   }
> > -vhost_vdpa_set_vring_ready(dev);
> >   } else {
> >   ok = vhost_vdpa_svqs_stop(dev);
> >   if (unlikely(!ok)) {
> > @@ -1131,16 +1135,22 @@ static int vhost_vdpa_dev_start(struct vhost_dev 
> > *dev, bool started)
> >   }
> >
> >   if (started) {
> > +int r;
> >   memory_listener_register(>listener, _space_memory);
> > -return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
> > +r = vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
> > +if (unlikely(r)) {
> > +return r;
> > +}
> > +vhost_vdpa_set_vring_ready(dev);
> >   } else {
> >   vhost_vdpa_reset_device(dev);
> >   vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
> >  VIRTIO_CONFIG_S_DRIVER);
> >   memory_listener_unregister(>listener);
> >
> > -return 0;
> >   }
> > +
> > +return 0;
> >   }
> >
> >   static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
>

Re: [RFC PATCH v8 11/21] vhost: Update kernel headers

On Wed, Jun 8, 2022 at 6:19 AM Jason Wang  wrote:
>
>
> 在 2022/5/20 03:12, Eugenio Pérez 写道:
> > Signed-off-by: Eugenio Pérez 
> > ---
>
>
> It's better to use the helpers in scripts/ and mentioned to which
> version is this synced.
>

Right, I should have written somewhere this was in the meantime it was
accepted in Linux master :). I'll use the scripts for the next
version.

Thanks!

> Thanks
>
>
> >   include/standard-headers/linux/vhost_types.h | 11 -
> >   linux-headers/linux/vhost.h  | 25 
> >   2 files changed, 30 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/standard-headers/linux/vhost_types.h 
> > b/include/standard-headers/linux/vhost_types.h
> > index 0bd2684a2a..ce78551b0f 100644
> > --- a/include/standard-headers/linux/vhost_types.h
> > +++ b/include/standard-headers/linux/vhost_types.h
> > @@ -87,7 +87,7 @@ struct vhost_msg {
> >
> >   struct vhost_msg_v2 {
> >   uint32_t type;
> > - uint32_t reserved;
> > + uint32_t asid;
> >   union {
> >   struct vhost_iotlb_msg iotlb;
> >   uint8_t padding[64];
> > @@ -153,4 +153,13 @@ struct vhost_vdpa_iova_range {
> >   /* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. 
> > */
> >   #define VHOST_NET_F_VIRTIO_NET_HDR 27
> >
> > +/* Use message type V2 */
> > +#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
> > +/* IOTLB can accept batching hints */
> > +#define VHOST_BACKEND_F_IOTLB_BATCH  0x2
> > +/* IOTLB can accept address space identifier through V2 type of IOTLB
> > + * message
> > + */
> > +#define VHOST_BACKEND_F_IOTLB_ASID  0x3
> > +
> >   #endif
> > diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h
> > index 5d99e7c242..d42eb46efd 100644
> > --- a/linux-headers/linux/vhost.h
> > +++ b/linux-headers/linux/vhost.h
> > @@ -89,11 +89,6 @@
> >
> >   /* Set or get vhost backend capability */
> >
> > -/* Use message type V2 */
> > -#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
> > -/* IOTLB can accept batching hints */
> > -#define VHOST_BACKEND_F_IOTLB_BATCH  0x2
> > -
> >   #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
> >   #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
> >
> > @@ -154,6 +149,26 @@
> >   /* Get the config size */
> >   #define VHOST_VDPA_GET_CONFIG_SIZE  _IOR(VHOST_VIRTIO, 0x79, __u32)
> >
> > +/* Get the number of virtqueue groups. */
> > +#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int)
> > +
> > +/* Get the number of address spaces. */
> > +#define VHOST_VDPA_GET_AS_NUM_IOR(VHOST_VIRTIO, 0x7B, 
> > unsigned int)
> > +
> > +/* Get the group for a virtqueue: read index, write group in num,
> > + * The virtqueue index is stored in the index field of
> > + * vhost_vring_state. The group for this specific virtqueue is
> > + * returned via num field of vhost_vring_state.
> > + */
> > +#define VHOST_VDPA_GET_VRING_GROUP   _IOWR(VHOST_VIRTIO, 0x7C,   \
> > +   struct vhost_vring_state)
> > +/* Set the ASID for a virtqueue group. The group index is stored in
> > + * the index field of vhost_vring_state, the ASID associated with this
> > + * group is stored at num field of vhost_vring_state.
> > + */
> > +#define VHOST_VDPA_SET_GROUP_ASID_IOW(VHOST_VIRTIO, 0x7D, \
> > +  struct vhost_vring_state)
> > +
> >   /* Get the count of all virtqueues */
> >   #define VHOST_VDPA_GET_VQS_COUNT_IOR(VHOST_VIRTIO, 0x80, __u32)
> >
>

Re: [RFC PATCH v8 09/21] vhost: Add svq copy desc mode

On Wed, Jun 8, 2022 at 6:14 AM Jason Wang  wrote:
>
>
> 在 2022/5/20 03:12, Eugenio Pérez 写道:
> > Enable SVQ to not to forward the descriptor translating its address to
> > qemu's IOVA but copying to a region outside of the guest.
> >
> > Virtio-net control VQ will use this mode, so we don't need to send all
> > the guest's memory every time there is a change, but only on messages.
> > Reversely, CVQ will only have access to control messages.  This lead to
> > less messing with memory listeners.
> >
> > We could also try to send only the required translation by message, but
> > this presents a problem when many control messages occupy the same
> > guest's memory region.
> >
> > Lastly, this allows us to inject messages from QEMU to the device in a
> > simple manner.  CVQ should be used rarely and with small messages, so all
> > the drawbacks should be assumible.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >   hw/virtio/vhost-shadow-virtqueue.h |  10 ++
> >   include/hw/virtio/vhost-vdpa.h |   1 +
> >   hw/virtio/vhost-shadow-virtqueue.c | 174 +++--
> >   hw/virtio/vhost-vdpa.c |   1 +
> >   net/vhost-vdpa.c   |   1 +
> >   5 files changed, 175 insertions(+), 12 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
> > b/hw/virtio/vhost-shadow-virtqueue.h
> > index e06ac52158..79cb2d301f 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.h
> > +++ b/hw/virtio/vhost-shadow-virtqueue.h
> > @@ -17,6 +17,12 @@
> >
> >   typedef struct SVQElement {
> >   VirtQueueElement elem;
> > +
> > +/* SVQ IOVA address of in buffer and out buffer if cloned */
> > +hwaddr in_iova, out_iova;
>
>
> It might worth to mention that we'd expect a single buffer here.
>

I'll do it. There is another comment like that in another place, I'll
copy it here.

>
> > +
> > +/* Length of in buffer */
> > +size_t in_len;
> >   } SVQElement;
> >
> >   typedef void (*VirtQueueElementCallback)(VirtIODevice *vdev,
> > @@ -102,6 +108,9 @@ typedef struct VhostShadowVirtqueue {
> >
> >   /* Next head to consume from the device */
> >   uint16_t last_used_idx;
> > +
> > +/* Copy each descriptor to QEMU iova */
> > +bool copy_descs;
> >   } VhostShadowVirtqueue;
> >
> >   bool vhost_svq_valid_features(uint64_t features, Error **errp);
> > @@ -119,6 +128,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq);
> >
> >   VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_map,
> >   const VhostShadowVirtqueueOps *ops,
> > +bool copy_descs,
> >   const VhostShadowVirtqueueMapOps 
> > *map_ops,
> >   void *map_ops_opaque);
> >
> > diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
> > index f1ba46a860..dc2884eea4 100644
> > --- a/include/hw/virtio/vhost-vdpa.h
> > +++ b/include/hw/virtio/vhost-vdpa.h
> > @@ -33,6 +33,7 @@ typedef struct vhost_vdpa {
> >   struct vhost_vdpa_iova_range iova_range;
> >   uint64_t acked_features;
> >   bool shadow_vqs_enabled;
> > +bool svq_copy_descs;
> >   /* IOVA mapping used by the Shadow Virtqueue */
> >   VhostIOVATree *iova_tree;
> >   GPtrArray *shadow_vqs;
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> > b/hw/virtio/vhost-shadow-virtqueue.c
> > index 044005ba89..5a8feb1cbc 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > @@ -16,6 +16,7 @@
> >   #include "qemu/log.h"
> >   #include "qemu/memalign.h"
> >   #include "linux-headers/linux/vhost.h"
> > +#include "qemu/iov.h"
> >
> >   /**
> >* Validate the transport device features that both guests can use with 
> > the SVQ
> > @@ -70,6 +71,30 @@ static uint16_t vhost_svq_available_slots(const 
> > VhostShadowVirtqueue *svq)
> >   return svq->vring.num - (svq->shadow_avail_idx - 
> > svq->shadow_used_idx);
> >   }
> >
> > +static void vhost_svq_alloc_buffer(void **base, size_t *len,
> > +   const struct iovec *iov, size_t num,
> > +   bool write)
> > +{
> > +*len = iov_size(iov, num);
>
>
> Since this behavior is trigger able by the guest, we need an upper limit
> here.
>

Good point. What could be a good limit?

As you propose later, maybe I can redesign SVQ so it either forwards
the buffer to the device or calls an available element callback. It
can inject the right copied buffer by itself. This way we know the
right buffer size beforehand.

>
> > +size_t buf_size = ROUND_UP(*len, 4096);
>
>
> I see a kind of duplicated round up which is done in
> vhost_svq_write_descs().
>

Yes, it's better to return this size somehow.

> Btw, should we use TARGET_PAGE_SIZE instead of the magic 4096 here?
>

Yes. But since we're going to expose pages to the device, it should be
host_page_size, right?

>
> > +
> > +if (!num) {
> > +

Re: [PATCH v1 1/1] QIOChannelSocket: Fix zero-copy send so socket flush works

2022-06-08 Thread Daniel P . Berrangé

On Wed, Jun 08, 2022 at 03:18:09PM -0300, Leonardo Bras wrote:
> Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial
> part of the flushing mechanism got missing: incrementing zero_copy_queued.
> 
> Without that, the flushing interface becomes a no-op, and there is no
> garantee the buffer is really sent.
> 
> This can go as bad as causing a corruption in RAM during migration.
> 
> Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & 
> io_flush for CONFIG_LINUX")
> Reported-by: 徐闯 
> Signed-off-by: Leonardo Bras 
> ---
>  io/channel-socket.c | 11 ---
>  1 file changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/io/channel-socket.c b/io/channel-socket.c
> index dc9c165de1..ca4cae930f 100644
> --- a/io/channel-socket.c
> +++ b/io/channel-socket.c
> @@ -554,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
>  size_t fdsize = sizeof(int) * nfds;
>  struct cmsghdr *cmsg;
>  int sflags = 0;
> +bool zero_copy_enabled = false;
>  
>  memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
>  
> @@ -581,6 +582,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
>  #ifdef QEMU_MSG_ZEROCOPY
>  if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
>  sflags = MSG_ZEROCOPY;
> +zero_copy_enabled = true;
>  }
>  #endif
>  
> @@ -592,21 +594,24 @@ static ssize_t qio_channel_socket_writev(QIOChannel 
> *ioc,
>  return QIO_CHANNEL_ERR_BLOCK;
>  case EINTR:
>  goto retry;
> -#ifdef QEMU_MSG_ZEROCOPY

Removing this ifdef appears incidental to the change. If this is
redundant just remove it in its own patch.

>  case ENOBUFS:
> -if (sflags & MSG_ZEROCOPY) {
> +if (zero_copy_enabled) {
>  error_setg_errno(errp, errno,
>   "Process can't lock enough memory for using 
> MSG_ZEROCOPY");
>  return -1;
>  }
>  break;
> -#endif
>  }
>  
>  error_setg_errno(errp, errno,
>   "Unable to write to socket");
>  return -1;
>  }
> +
> +if (zero_copy_enabled) {

What's wrong with

   if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
sioc->zero_copy_queued++;
}


Introducing another local variable doesn't really add value IMHO.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH v3 1/1] target/riscv: Add Zihintpause support

2022-06-08 Thread Dao Lu

Please ignore, I have missed the commit message, will resend a v3.

Sorry about that.
Dao

On Tue, Jun 7, 2022 at 8:44 PM Dao Lu  wrote:
>
> Tested-by: Heiko Stuebner 
> Signed-off-by: Dao Lu 
> ---
>  target/riscv/cpu.c  |  2 ++
>  target/riscv/cpu.h  |  1 +
>  target/riscv/insn32.decode  |  7 ++-
>  target/riscv/insn_trans/trans_rvi.c.inc | 18 ++
>  4 files changed, 27 insertions(+), 1 deletion(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index ccacdee215..183fb37fdf 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -825,6 +825,7 @@ static Property riscv_cpu_properties[] = {
>  DEFINE_PROP_BOOL("Counters", RISCVCPU, cfg.ext_counters, true),
>  DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true),
>  DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true),
> +DEFINE_PROP_BOOL("Zihintpause", RISCVCPU, cfg.ext_zihintpause, true),
>  DEFINE_PROP_BOOL("Zfh", RISCVCPU, cfg.ext_zfh, false),
>  DEFINE_PROP_BOOL("Zfhmin", RISCVCPU, cfg.ext_zfhmin, false),
>  DEFINE_PROP_BOOL("Zve32f", RISCVCPU, cfg.ext_zve32f, false),
> @@ -996,6 +997,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
> **isa_str, int max_str_len)
>   *extensions by an underscore.
>   */
>  struct isa_ext_data isa_edata_arr[] = {
> +ISA_EDATA_ENTRY(zihintpause, ext_zihintpause),
>  ISA_EDATA_ENTRY(zfh, ext_zfh),
>  ISA_EDATA_ENTRY(zfhmin, ext_zfhmin),
>  ISA_EDATA_ENTRY(zfinx, ext_zfinx),
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index fe6c9a2c92..e466a04a59 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -394,6 +394,7 @@ struct RISCVCPUConfig {
>  bool ext_counters;
>  bool ext_ifencei;
>  bool ext_icsr;
> +bool ext_zihintpause;
>  bool ext_svinval;
>  bool ext_svnapot;
>  bool ext_svpbmt;
> diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
> index 4033565393..595fdcdad8 100644
> --- a/target/riscv/insn32.decode
> +++ b/target/riscv/insn32.decode
> @@ -149,7 +149,12 @@ srl  000 .. 101 . 0110011 @r
>  sra  010 .. 101 . 0110011 @r
>  or   000 .. 110 . 0110011 @r
>  and  000 .. 111 . 0110011 @r
> -fence pred:4 succ:4 - 000 - 000
> +
> +{
> +  pause   0001      0 000 0 000
> +  fence   pred:4 succ:4 - 000 - 000
> +}
> +
>  fence_i         - 001 - 000
>  csrrw . 001 . 1110011 @csr
>  csrrs . 010 . 1110011 @csr
> diff --git a/target/riscv/insn_trans/trans_rvi.c.inc 
> b/target/riscv/insn_trans/trans_rvi.c.inc
> index f1342f30f8..ca75e05f4b 100644
> --- a/target/riscv/insn_trans/trans_rvi.c.inc
> +++ b/target/riscv/insn_trans/trans_rvi.c.inc
> @@ -796,6 +796,24 @@ static bool trans_srad(DisasContext *ctx, arg_srad *a)
>  return gen_shift(ctx, a, EXT_SIGN, tcg_gen_sar_tl, NULL);
>  }
>
> +static bool trans_pause(DisasContext *ctx, arg_pause *a)
> +{
> +if (!ctx->cfg_ptr->ext_zihintpause) {
> +return false;
> +}
> +
> +/*
> + * PAUSE is a no-op in QEMU,
> + * however we need to clear the reservation,
> + * end the TB and return to main loop
> + */
> +tcg_gen_movi_tl(load_res, -1);
> +gen_set_pc_imm(ctx, ctx->pc_succ_insn);
> +tcg_gen_exit_tb(NULL, 0);
> +ctx->base.is_jmp = DISAS_NORETURN;
> +
> +return true;
> +}
>
>  static bool trans_fence(DisasContext *ctx, arg_fence *a)
>  {
> --
> 2.25.1
>

[PATCH v1 1/1] QIOChannelSocket: Fix zero-copy send so socket flush works

2022-06-08 Thread Leonardo Bras

Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial
part of the flushing mechanism got missing: incrementing zero_copy_queued.

Without that, the flushing interface becomes a no-op, and there is no
garantee the buffer is really sent.

This can go as bad as causing a corruption in RAM during migration.

Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & 
io_flush for CONFIG_LINUX")
Reported-by: 徐闯 
Signed-off-by: Leonardo Bras 
---
 io/channel-socket.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/io/channel-socket.c b/io/channel-socket.c
index dc9c165de1..ca4cae930f 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -554,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
 size_t fdsize = sizeof(int) * nfds;
 struct cmsghdr *cmsg;
 int sflags = 0;
+bool zero_copy_enabled = false;
 
 memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
 
@@ -581,6 +582,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
 #ifdef QEMU_MSG_ZEROCOPY
 if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
 sflags = MSG_ZEROCOPY;
+zero_copy_enabled = true;
 }
 #endif
 
@@ -592,21 +594,24 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
 return QIO_CHANNEL_ERR_BLOCK;
 case EINTR:
 goto retry;
-#ifdef QEMU_MSG_ZEROCOPY
 case ENOBUFS:
-if (sflags & MSG_ZEROCOPY) {
+if (zero_copy_enabled) {
 error_setg_errno(errp, errno,
  "Process can't lock enough memory for using 
MSG_ZEROCOPY");
 return -1;
 }
 break;
-#endif
 }
 
 error_setg_errno(errp, errno,
  "Unable to write to socket");
 return -1;
 }
+
+if (zero_copy_enabled) {
+sioc->zero_copy_queued++;
+}
+
 return ret;
 }
 #else /* WIN32 */
-- 
2.36.1

Re: [External] [PATCH v13 3/8] QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX

2022-06-08 Thread Leonardo Bras Soares Passos

On Wed, Jun 8, 2022 at 8:41 AM Peter Xu  wrote:
>
> On Wed, Jun 08, 2022 at 02:37:28AM -0300, Leonardo Bras Soares Passos wrote:
> > (1) is not an option, as the interface currently uses ret=1 to make
> > sure MSG_ZEROCOPY is getting used,
> > I added that so the user of qio_channel can switch off zero-copy if
> > it's not getting used, and save some cpu.
>
> Yes (1) is not, but could you explain what do you mean by making sure
> MSG_ZEROCOPY being used?  Why is it relevant to the retval here?

If sendmsg() is called with MSG_ZEROCOPY, and everything is configured
correctly, the kernel will attempt to send the buffer using zero-copy.

Even with the right configuration on a recent enough kernel, there are
factors that can prevent zero-copy from happening, and the kernel will
fall back to the copying mechanism.
An example being the net device not supporting 'Scatter-Gather'
feature (NETIF_F_SG).

When this happens, there is an overhead for 'trying zero-copy first',
instead of just opting for the copying mechanism.

In a previous iteration of the patchset, it was made clear that it's
desirable to detect when the kernel falls back to copying mechanism,
so the user of 'QIOChannelSocket' can switch to copying and avoid the
overhead. This was done by the return value of flush(), which is 1 if
that occurs.

>
> I just figured it's a bit weird to return >0 here in flush().
>
> >
> > (2) is not a problem, but I fail to see how useful that would be. Is
> > the idea manually keeping track of flush happening?
>
> Yes if we can check this up it'll be good enough to me.  The trace point
> could help in some case in the future too to monitor the behavior of kernel
> MSG_ERRQUEUE but if you don't like it then it's okay.
>

TBH I am not sure how those traces work yet, and I am afraid it can
introduce some overhead in flush.
In any way, we can introduce this trace in a separated patch, since
fixing zero-copy flush seems more urgent right now.

Best regards,
Leo

> --
> Peter Xu
>

Re: [PATCH v4 0/3] target/m68k: semihosting cleanup

2022-06-08 Thread Laurent Vivier


Le 08/06/2022 à 16:53, Richard Henderson a écrit :

On 6/8/22 02:36, Laurent Vivier wrote:

Le 08/06/2022 à 00:26, Richard Henderson a écrit :

Based-on: <20220607204557.658541-1-richard.hender...@linaro.org>
("[PATCH v4 00/53] semihosting cleanup")

Changes for v4:
   * Split out of v2.
   * Convert host errno to gdb errno, which for m68k is guest errno.



How do you test semihosting on m68k?


I have only compiled this.  I had been working on adding something to tests/tcg/m68k/, which is 
where those halt patches came from, but haven't finished.




But what is the use case of semihosting on m68k?
Is there a kernel implementation available for that?

Thanks,
Laurent

Re: [PATCH 03/20] migration: remove unreachble RDMA code in save_hook impl

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> The QEMUFile 'save_hook' callback has a 'size_t size' parameter.
> 
> The RDMA impl of this has logic that takes different actions
> depending on whether the value is zero or non-zero. It has
> commented out logic that would have taken further actions
> if the value was negative.
> 
> The only place where the 'save_hook' callback is invoked is
> the ram_control_save_page() method, which passes 'size'
> through from its caller. The only caller of this method is
> in turn control_save_page(). This method unconditionally
> passes the 'TARGET_PAGE_SIZE' constant for the 'size' parameter.
> 
> IOW, the only scenario for 'size' that can execute in the
> qemu_rdma_save_page method is 'size > 0'. The remaining code
> has been unreachable since RDMA support was first introduced
> 9 years ago.
> 
> Signed-off-by: Daniel P. Berrangé 

Ah good; less rdma code!



Reviewed-by: Dr. David Alan Gilbert 

> ---
>  migration/rdma.c | 120 +--
>  1 file changed, 21 insertions(+), 99 deletions(-)
> 
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 672d1958a9..6e7756bee7 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -1486,34 +1486,6 @@ static uint64_t qemu_rdma_make_wrid(uint64_t wr_id, 
> uint64_t index,
>  return result;
>  }
>  
> -/*
> - * Set bit for unregistration in the next iteration.
> - * We cannot transmit right here, but will unpin later.
> - */
> -static void qemu_rdma_signal_unregister(RDMAContext *rdma, uint64_t index,
> -uint64_t chunk, uint64_t wr_id)
> -{
> -if (rdma->unregistrations[rdma->unregister_next] != 0) {
> -error_report("rdma migration: queue is full");
> -} else {
> -RDMALocalBlock *block = &(rdma->local_ram_blocks.block[index]);
> -
> -if (!test_and_set_bit(chunk, block->unregister_bitmap)) {
> -trace_qemu_rdma_signal_unregister_append(chunk,
> - rdma->unregister_next);
> -
> -rdma->unregistrations[rdma->unregister_next++] =
> -qemu_rdma_make_wrid(wr_id, index, chunk);
> -
> -if (rdma->unregister_next == RDMA_SIGNALED_SEND_MAX) {
> -rdma->unregister_next = 0;
> -}
> -} else {
> -trace_qemu_rdma_signal_unregister_already(chunk);
> -}
> -}
> -}
> -
>  /*
>   * Consult the connection manager to see a work request
>   * (of any kind) has completed.
> @@ -3278,23 +3250,7 @@ qio_channel_rdma_shutdown(QIOChannel *ioc,
>   *Offset is an offset to be added to block_offset and used
>   *to also lookup the corresponding RAMBlock.
>   *
> - *@size > 0 :
> - *Initiate an transfer this size.
> - *
> - *@size == 0 :
> - *A 'hint' or 'advice' that means that we wish to speculatively
> - *and asynchronously unregister this memory. In this case, there is 
> no
> - *guarantee that the unregister will actually happen, for example,
> - *if the memory is being actively transmitted. Additionally, the 
> memory
> - *may be re-registered at any future time if a write within the same
> - *chunk was requested again, even if you attempted to unregister it
> - *here.
> - *
> - *@size < 0 : TODO, not yet supported
> - *Unregister the memory NOW. This means that the caller does not
> - *expect there to be any future RDMA transfers and we just want to 
> clean
> - *things up. This is used in case the upper layer owns the memory and
> - *cannot wait for qemu_fclose() to occur.
> + *@size : Number of bytes to transfer
>   *
>   *@bytes_sent : User-specificed pointer to indicate how many bytes were
>   *  sent. Usually, this will not be more than a few bytes of
> @@ -3323,61 +3279,27 @@ static size_t qemu_rdma_save_page(QEMUFile *f, void 
> *opaque,
>  
>  qemu_fflush(f);
>  
> -if (size > 0) {
> -/*
> - * Add this page to the current 'chunk'. If the chunk
> - * is full, or the page doesn't belong to the current chunk,
> - * an actual RDMA write will occur and a new chunk will be formed.
> - */
> -ret = qemu_rdma_write(f, rdma, block_offset, offset, size);
> -if (ret < 0) {
> -error_report("rdma migration: write error! %d", ret);
> -goto err;
> -}
> -
> -/*
> - * We always return 1 bytes because the RDMA
> - * protocol is completely asynchronous. We do not yet know
> - * whether an  identified chunk is zero or not because we're
> - * waiting for other pages to potentially be merged with
> - * the current chunk. So, we have to call qemu_update_position()
> - * later on when the actual write occurs.
> - */
> -if (bytes_sent) {
> -*bytes_sent = 1;
> -

Re: [PATCH] configure: ignore --make

2022-06-08 Thread Matheus Kowalczuk Ferst

On 08/06/2022 12:54, Paolo Bonzini wrote:
> On 6/8/22 16:21, Matheus Kowalczuk Ferst wrote:
>> Also, we will not have this error at configure-time anymore, but I
>> suppose that *BSD users will identify the problem if they try to build
>> with non-gnu make.
> 
> Yeah, my guess was that "try ./configure && make" with GNU Make
> installed is the more common failure mode, since QEMU is certainly not
> the only package that requires GNU Make.
> 
> Alternatively, I can leave in the check for GNU Make, or move it to
> meson as a "now type "make" to build QEMU" kind of message, and still
> remove the unused --make option.

Both options seem fine. It might be better if we can keep the configure 
script returning an error when gmake is not available, but I guess it's 
not a big deal.

-- 
Matheus K. Ferst
Instituto de Pesquisas ELDORADO 
Analista de Software
Aviso Legal - Disclaimer

Re: [PATCH v4 10/53] semihosting: Adjust error checking in common_semi_cb

2022-06-08 Thread Alex Bennée



Richard Henderson  writes:

> The err parameter is non-zero if and only if an error occured.
> Use this instead of ret == -1 for determining if we need to
> update the saved errno.
>
> This fixes the errno setting of SYS_ISTTY, which returns 0 on
> error, not -1.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée

Re: [PATCH RFC 4/5] cpu: Allow cpu_synchronize_all_post_init() to take an errp

* Peter Xu (pet...@redhat.com) wrote:
> Allow cpu_synchronize_all_post_init() to fail with an errp when it's set.
> Modify both precopy and postcopy to try to detect such error.
> 
> Signed-off-by: Peter Xu 
> ---
>  hw/core/machine.c |  2 +-
>  include/sysemu/cpus.h |  2 +-
>  migration/savevm.c| 20 +---
>  softmmu/cpus.c|  2 +-
>  4 files changed, 20 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index c53548d0b1..b5daad82f8 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -1447,7 +1447,7 @@ void qemu_remove_machine_init_done_notifier(Notifier 
> *notify)
>  
>  void qdev_machine_creation_done(void)
>  {
> -cpu_synchronize_all_post_init();
> +cpu_synchronize_all_post_init(NULL);
>  
>  if (current_machine->boot_config.has_once) {
>  qemu_boot_set(current_machine->boot_config.once, _fatal);
> diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
> index b5c87d48b3..a51ee46441 100644
> --- a/include/sysemu/cpus.h
> +++ b/include/sysemu/cpus.h
> @@ -45,7 +45,7 @@ bool cpus_are_resettable(void);
>  
>  void cpu_synchronize_all_states(void);
>  void cpu_synchronize_all_post_reset(void);
> -void cpu_synchronize_all_post_init(void);
> +void cpu_synchronize_all_post_init(Error **errp);
>  void cpu_synchronize_all_pre_loadvm(void);
>  
>  #ifndef CONFIG_USER_ONLY
> diff --git a/migration/savevm.c b/migration/savevm.c
> index d9076897b8..1175ddefd4 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -2005,7 +2005,17 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
>  /* TODO we should move all of this lot into postcopy_ram.c or a shared 
> code
>   * in migration.c
>   */
> -cpu_synchronize_all_post_init();
> +cpu_synchronize_all_post_init(_err);
> +if (local_err) {
> +/*
> + * TODO: a better way to do this is to tell the src that we cannot
> + * run the VM here so hopefully we can keep the VM running on src
> + * and immediately halt the switch-over.  But that needs work.

Yes, I think it is possible; unlike some of the later errors in the same
function, in this case we know no disks/network/etc have been touched,
so we should be able to recover.
I wonder if we can move the postcopy_state_set(POSTCOPY_INCOMING_RUNNING)
out of loadvm_postcopy_handle_run to after this point.

We've already got the return path, so we should be able to signal the
failure unless we're very unlucky.

Dave

> + */
> +error_report_err(local_err);
> +local_err = NULL;
> +autostart = false;
> +}
>  
>  trace_loadvm_postcopy_handle_run_bh("after cpu sync");
>  
> @@ -2772,7 +2782,11 @@ int qemu_loadvm_state(QEMUFile *f)
>  }
>  
>  qemu_loadvm_state_cleanup();
> -cpu_synchronize_all_post_init();
> +cpu_synchronize_all_post_init(_err);
> +if (local_err) {
> +error_report_err(local_err);
> +return -EINVAL;
> +}
>  
>  return ret;
>  }
> @@ -2789,7 +2803,7 @@ int qemu_load_device_state(QEMUFile *f)
>  return ret;
>  }
>  
> -cpu_synchronize_all_post_init();
> +cpu_synchronize_all_post_init(NULL);
>  return 0;
>  }
>  
> diff --git a/softmmu/cpus.c b/softmmu/cpus.c
> index 464c06201c..59c70fd496 100644
> --- a/softmmu/cpus.c
> +++ b/softmmu/cpus.c
> @@ -146,7 +146,7 @@ void cpu_synchronize_all_post_reset(void)
>  }
>  }
>  
> -void cpu_synchronize_all_post_init(void)
> +void cpu_synchronize_all_post_init(Error **errp)
>  {
>  CPUState *cpu;
>  
> -- 
> 2.32.0
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [PATCH] gitlab: compare CIRRUS_nn vars against 'null' not ""


On 6/8/22 09:06, Daniel P. Berrangé wrote:

The GitLab variable comparisons don't have shell like semantics where
an unset variable compares equal to empty string. We need to explicitly
test against 'null' to detect an unset variable.

Signed-off-by: Daniel P. Berrangé 
---
  .gitlab-ci.d/base.yml | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab-ci.d/base.yml b/.gitlab-ci.d/base.yml
index f334f3ded7..69b36c148a 100644
--- a/.gitlab-ci.d/base.yml
+++ b/.gitlab-ci.d/base.yml
@@ -13,7 +13,7 @@
  #
  
  # Cirrus jobs can't run unless the creds / target repo are set

-- if: '$QEMU_JOB_CIRRUS && ($CIRRUS_GITHUB_REPO == "" || $CIRRUS_API_TOKEN == 
"")'
+- if: '$QEMU_JOB_CIRRUS && ($CIRRUS_GITHUB_REPO == null || 
$CIRRUS_API_TOKEN == null)'
when: never
  
  # Publishing jobs should only run on the default branch in upstream


Tested-by: Richard Henderson 
Reviewed-by: Richard Henderson 

r~

Re: [PATCH v4 4/4] target/riscv: Force disable extensions if priv spec version does not match


On 6/8/22 09:14, Anup Patel wrote:

+struct isa_ext_data isa_edata_arr[] = {


static const?


r~

Re: [PATCH 2/6] bsd-user/freebsd/os-syscall.c: unlock_iovec

On Tue, Jun 7, 2022 at 7:02 PM Richard Henderson <
richard.hender...@linaro.org> wrote:

> On 6/7/22 16:35, Warner Losh wrote:
> >
> >
> >> On Jun 7, 2022, at 3:23 PM, Richard Henderson <
> richard.hender...@linaro.org> wrote:
> >>
> >> On 6/7/22 14:51, Warner Losh wrote:
> >>> void unlock_iovec(IOVecMap *map, bool copy_out)
> >>> {
> >>>   for (int i = 0, count = map->count; i < count; ++i) {
> >>>   if (map->host[i].iov_base) {
> >>>   abi_ulong target_base =
> tswapal(map->target[i].iov_base);
> >>>   unlock_user(map->host[i].iov_base, target_base,
> >>>   copy_out ? map->host[i].iov_len : 0);
> >>>   }
> >>> And wouldn't we want to filter out the iov_base that == 0 since
> >>> we may terminate the loop before we get to the count. When the
> >>> I/O is done, we'll call it not with the number we mapped, but with
> >>> the original number...  Or am I not understanding something here...
> >>
> >> I'm not following -- when and why are you adjusting count?
> >
> > When we hit a memory range we can’t map after the first one,
> > we effectively stop mapping in (in the current linux code we
> > do map after, but then destroy the length). So that means
> > we’ll have entries in the iovec that are zero, and this code
> > doesn’t account for that. We’re not changing the count, per
> > se, but have a scenario where they might wind up NULL.
>
> ... and so skip them with the if.
>
> I mean, I suppose you could set map->count on error, as you say, so that
> we don't iterate
> so far, but... duh, error case.  So long as you don't actively fail,
> there's no point in
> optimizing for it.
>

Setting the count would be hard because we'd have to allocate and free
state that we're not currently doing. Better to just skip it with an if. We
allocate
a vector that's used in a number of places, and we'd have to change that
code if we did things differently. While I'm open to suggestions here, I
think
that just accounting for the possible error with an if is our best bet for
now.
I have a lot of code to get in, and am hoping to not rewrite things unless
there's
some clear benefit over the existing structure (like fixing bugs, matching
linux-user,
or increasing performance).

Warner

Re: [RFC PATCH v8 03/21] vdpa: control virtqueue support on shadow virtqueue

On Tue, Jun 7, 2022 at 8:05 AM Jason Wang  wrote:
>
>
> 在 2022/5/20 03:12, Eugenio Pérez 写道:
> > Introduce the control virtqueue support for vDPA shadow virtqueue. This
> > is needed for advanced networking features like multiqueue.
> >
> > To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR and
> > VIRTIO_NET_CTRL_MQ are implemented. If vDPA device is started with SVQ
> > support and virtio-net driver changes MAC or the number of queues
> > virtio-net device model will be updated with the new one.
> >
> > Others cvq commands could be added here straightforwardly but they have
> > been not tested.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >   net/vhost-vdpa.c | 44 
> >   1 file changed, 44 insertions(+)
> >
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index df1e69ee72..ef12fc284c 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -11,6 +11,7 @@
> >
> >   #include "qemu/osdep.h"
> >   #include "clients.h"
> > +#include "hw/virtio/virtio-net.h"
> >   #include "net/vhost_net.h"
> >   #include "net/vhost-vdpa.h"
> >   #include "hw/virtio/vhost-vdpa.h"
> > @@ -187,6 +188,46 @@ static NetClientInfo net_vhost_vdpa_info = {
> >   .check_peer_type = vhost_vdpa_check_peer_type,
> >   };
> >
> > +static void vhost_vdpa_net_handle_ctrl(VirtIODevice *vdev,
> > +   const VirtQueueElement *elem)
> > +{
> > +struct virtio_net_ctrl_hdr ctrl;
> > +virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> > +size_t s;
> > +struct iovec in = {
> > +.iov_base = ,
> > +.iov_len = sizeof(status),
> > +};
> > +
> > +s = iov_to_buf(elem->out_sg, elem->out_num, 0, , 
> > sizeof(ctrl.class));
> > +if (s != sizeof(ctrl.class)) {
> > +return;
> > +}
> > +
> > +switch (ctrl.class) {
> > +case VIRTIO_NET_CTRL_MAC_ADDR_SET:
> > +case VIRTIO_NET_CTRL_MQ:
> > +break;
> > +default:
> > +return;
> > +};
>
>
> I think we can probably remove the whitelist here since it is expected
> to work for any kind of command?
>

SVQ is expected to inject virtio device status at startup
(specifically, at live migration destination startup). This code is
specific per command.

Thanks!

> Thanks
>
>
> > +
> > +s = iov_to_buf(elem->in_sg, elem->in_num, 0, , sizeof(status));
> > +if (s != sizeof(status) || status != VIRTIO_NET_OK) {
> > +return;
> > +}
> > +
> > +status = VIRTIO_NET_ERR;
> > +virtio_net_handle_ctrl_iov(vdev, , 1, elem->out_sg, elem->out_num);
> > +if (status != VIRTIO_NET_OK) {
> > +error_report("Bad CVQ processing in model");
> > +}
> > +}
> > +
> > +static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
> > +.used_elem_handler = vhost_vdpa_net_handle_ctrl,
> > +};
> > +
> >   static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
> >  const char *device,
> >  const char *name,
> > @@ -211,6 +252,9 @@ static NetClientState 
> > *net_vhost_vdpa_init(NetClientState *peer,
> >
> >   s->vhost_vdpa.device_fd = vdpa_device_fd;
> >   s->vhost_vdpa.index = queue_pair_index;
> > +if (!is_datapath) {
> > +s->vhost_vdpa.shadow_vq_ops = _vdpa_net_svq_ops;
> > +}
> >   ret = vhost_vdpa_add(nc, (void *)>vhost_vdpa, queue_pair_index, 
> > nvqs);
> >   if (ret) {
> >   qemu_del_net_client(nc);
>

Re: [RFC PATCH v8 01/21] virtio-net: Expose ctrl virtqueue logic

On Tue, Jun 7, 2022 at 8:13 AM Jason Wang  wrote:
>
>
> 在 2022/5/20 03:12, Eugenio Pérez 写道:
> > This allows external vhost-net devices to modify the state of the
> > VirtIO device model once vhost-vdpa device has acknowledge the control
> > commands.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >   include/hw/virtio/virtio-net.h |  4 ++
> >   hw/net/virtio-net.c| 84 --
> >   2 files changed, 53 insertions(+), 35 deletions(-)
> >
> > diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
> > index eb87032627..cd31b7f67d 100644
> > --- a/include/hw/virtio/virtio-net.h
> > +++ b/include/hw/virtio/virtio-net.h
> > @@ -218,6 +218,10 @@ struct VirtIONet {
> >   struct EBPFRSSContext ebpf_rss;
> >   };
> >
> > +unsigned virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
> > +const struct iovec *in_sg, size_t 
> > in_num,
> > +const struct iovec *out_sg,
> > +unsigned out_num);
> >   void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
> >  const char *type);
> >
> > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> > index 7ad948ee7c..0e350154ec 100644
> > --- a/hw/net/virtio-net.c
> > +++ b/hw/net/virtio-net.c
> > @@ -1434,57 +1434,71 @@ static int virtio_net_handle_mq(VirtIONet *n, 
> > uint8_t cmd,
> >   return VIRTIO_NET_OK;
> >   }
> >
> > -static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
> > +unsigned virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
>
>
> Should we use size_t here?
>

I think it's a better type, yes. I used "unsigned" because
virtqueue_push uses unsigned for "len", maybe it's a good idea to
replace it there too.

Thanks!

> Thanks
>
>
> > +const struct iovec *in_sg, size_t 
> > in_num,
> > +const struct iovec *out_sg,
> > +unsigned out_num)
> >   {
> >   VirtIONet *n = VIRTIO_NET(vdev);
> >   struct virtio_net_ctrl_hdr ctrl;
> >   virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> > -VirtQueueElement *elem;
> >   size_t s;
> >   struct iovec *iov, *iov2;
> > -unsigned int iov_cnt;
> > +
> > +if (iov_size(in_sg, in_num) < sizeof(status) ||
> > +iov_size(out_sg, out_num) < sizeof(ctrl)) {
> > +virtio_error(vdev, "virtio-net ctrl missing headers");
> > +return 0;
> > +}
> > +
> > +iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
> > +s = iov_to_buf(iov, out_num, 0, , sizeof(ctrl));
> > +iov_discard_front(, _num, sizeof(ctrl));
> > +if (s != sizeof(ctrl)) {
> > +status = VIRTIO_NET_ERR;
> > +} else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
> > +status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
> > +} else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
> > +status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
> > +} else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
> > +status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
> > +} else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
> > +status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
> > +} else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
> > +status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
> > +} else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
> > +status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
> > +}
> > +
> > +s = iov_from_buf(in_sg, in_num, 0, , sizeof(status));
> > +assert(s == sizeof(status));
> > +
> > +g_free(iov2);
> > +return sizeof(status);
> > +}
> > +
> > +static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
> > +{
> > +VirtQueueElement *elem;
> >
> >   for (;;) {
> > +unsigned written;
> >   elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
> >   if (!elem) {
> >   break;
> >   }
> > -if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
> > -iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
> > -virtio_error(vdev, "virtio-net ctrl missing headers");
> > +
> > +written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, 
> > elem->in_num,
> > + elem->out_sg, elem->out_num);
> > +if (written > 0) {
> > +virtqueue_push(vq, elem, written);
> > +virtio_notify(vdev, vq);
> > +g_free(elem);
> > +} else {
> >   virtqueue_detach_element(vq, elem, 0);
> >   g_free(elem);
> >   break;
> >   }
> > -
> > -iov_cnt = elem->out_num;
> > -iov2 = iov = g_memdup2(elem->out_sg,
> > -   sizeof(struct iovec) * elem->out_num);
> > -s =

Re: [PATCH] configure: update list of preserved environment variables

2022-06-08 Thread Matheus Kowalczuk Ferst

On 07/06/2022 07:49, Paolo Bonzini wrote:
> INSTALL and LIBTOOL are not used anymore, but OBJCFLAGS is new and
> was not listed.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>   configure | 3 +--
>   1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/configure b/configure
> index f35847c3cd..ce81419629 100755
> --- a/configure
> +++ b/configure
> @@ -2737,13 +2737,12 @@ preserve_env CC
>   preserve_env CFLAGS
>   preserve_env CXX
>   preserve_env CXXFLAGS
> -preserve_env INSTALL
>   preserve_env LD
>   preserve_env LDFLAGS
>   preserve_env LD_LIBRARY_PATH
> -preserve_env LIBTOOL
>   preserve_env MAKE
>   preserve_env NM
> +preserve_env OBJCFLAGS
>   preserve_env OBJCOPY
>   preserve_env PATH
>   preserve_env PKG_CONFIG
> --
> 2.36.1
> 
> 

Reviewed-by: Matheus Ferst 

I wonder if there are more unused variables, e.g., WINDRES and 
SDL2_CONFIG have some references but it seems that they are only saved 
and never used.

-- 
Matheus K. Ferst
Instituto de Pesquisas ELDORADO 
Analista de Software
Aviso Legal - Disclaimer

Re: [PATCH v7 14/14] tests: Add postcopy preempt tests

* Peter Xu (pet...@redhat.com) wrote:
> Four tests are added for preempt mode:
> 
>   - Postcopy plain
>   - Postcopy recovery
>   - Postcopy tls
>   - Postcopy tls+recovery
> 
> Signed-off-by: Peter Xu 

Reviewed-by: Dr. David Alan Gilbert 

> ---
>  tests/qtest/migration-test.c | 58 
>  1 file changed, 58 insertions(+)
> 
> diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> index 12f1e3a751..ca2082a7d9 100644
> --- a/tests/qtest/migration-test.c
> +++ b/tests/qtest/migration-test.c
> @@ -558,6 +558,7 @@ typedef struct {
>  
>  /* Postcopy specific fields */
>  void *postcopy_data;
> +bool postcopy_preempt;
>  } MigrateCommon;
>  
>  static int test_migrate_start(QTestState **from, QTestState **to,
> @@ -1063,6 +1064,11 @@ static int migrate_postcopy_prepare(QTestState 
> **from_ptr,
>  migrate_set_capability(to, "postcopy-ram", true);
>  migrate_set_capability(to, "postcopy-blocktime", true);
>  
> +if (args->postcopy_preempt) {
> +migrate_set_capability(from, "postcopy-preempt", true);
> +migrate_set_capability(to, "postcopy-preempt", true);
> +}
> +
>  /* We want to pick a speed slow enough that the test completes
>   * quickly, but that it doesn't complete precopy even on a slow
>   * machine, so also set the downtime.
> @@ -1131,6 +1137,26 @@ static void test_postcopy_tls_psk(void)
>  test_postcopy_common();
>  }
>  
> +static void test_postcopy_preempt(void)
> +{
> +MigrateCommon args = {
> +.postcopy_preempt = true,
> +};
> +
> +test_postcopy_common();
> +}
> +
> +static void test_postcopy_preempt_tls_psk(void)
> +{
> +MigrateCommon args = {
> +.postcopy_preempt = true,
> +.start_hook = test_migrate_tls_psk_start_match,
> +.finish_hook = test_migrate_tls_psk_finish,
> +};
> +
> +test_postcopy_common();
> +}
> +
>  static void test_postcopy_recovery_common(MigrateCommon *args)
>  {
>  QTestState *from, *to;
> @@ -1210,6 +1236,27 @@ static void test_postcopy_recovery_tls_psk(void)
>  test_postcopy_recovery_common();
>  }
>  
> +static void test_postcopy_preempt_recovery(void)
> +{
> +MigrateCommon args = {
> +.postcopy_preempt = true,
> +};
> +
> +test_postcopy_recovery_common();
> +}
> +
> +/* This contains preempt+recovery+tls test altogether */
> +static void test_postcopy_preempt_all(void)
> +{
> +MigrateCommon args = {
> +.postcopy_preempt = true,
> +.start_hook = test_migrate_tls_psk_start_match,
> +.finish_hook = test_migrate_tls_psk_finish,
> +};
> +
> +test_postcopy_recovery_common();
> +}
> +
>  static void test_baddest(void)
>  {
>  MigrateStart args = {
> @@ -2194,6 +2241,17 @@ int main(int argc, char **argv)
>  qtest_add_func("/migration/postcopy/recovery/tls/psk",
> test_postcopy_recovery_tls_psk);
>  #endif /* CONFIG_GNUTLS */
> +
> +qtest_add_func("/migration/postcopy/preempt/plain", 
> test_postcopy_preempt);
> +qtest_add_func("/migration/postcopy/preempt/recovery/plain",
> +   test_postcopy_preempt_recovery);
> +#ifdef CONFIG_GNUTLS
> +qtest_add_func("/migration/postcopy/preempt/tls/psk",
> +   test_postcopy_preempt_tls_psk);
> +qtest_add_func("/migration/postcopy/preempt/recovery/tls/psk",
> +   test_postcopy_preempt_all);
> +#endif /* CONFIG_GNUTLS */
> +
>  qtest_add_func("/migration/bad_dest", test_baddest);
>  qtest_add_func("/migration/precopy/unix/plain", test_precopy_unix_plain);
>  qtest_add_func("/migration/precopy/unix/xbzrle", 
> test_precopy_unix_xbzrle);
> -- 
> 2.32.0
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [PATCH v5 02/10] kvm: Support for querying fd-based stats

* Paolo Bonzini (pbonz...@redhat.com) wrote:
> On 6/8/22 18:01, Dr. David Alan Gilbert wrote:
> > 'Find descriptors for 'target', either that have already been read or
> >   query 'stats_fd' to read them from kvm'
> 
> /*
>  * Return the descriptors for 'target', that either have already been
>  * read or are retrieved from 'stats_fd'.
>  */


Reviewed-by: Dr. David Alan Gilbert 

> Paolo
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[PATCH v4 4/4] target/riscv: Force disable extensions if priv spec version does not match

We should disable extensions in riscv_cpu_realize() if minimum required
priv spec version is not satisfied. This also ensures that machines with
priv spec v1.11 (or lower) cannot enable H, V, and various multi-letter
extensions.

Fixes: a775398be2e ("target/riscv: Add isa extenstion strings to the
device tree")
Signed-off-by: Anup Patel 
---
 target/riscv/cpu.c | 57 ++
 1 file changed, 52 insertions(+), 5 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 9f9c27a3f5..953ba2e445 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -43,9 +43,13 @@ static const char riscv_single_letter_exts[] = "IEMAFDQCPVH";
 
 struct isa_ext_data {
 const char *name;
-bool enabled;
+int min_version;
+bool *enabled;
 };
 
+#define ISA_EDATA_ENTRY(name, prop) {#name, PRIV_VERSION_1_10_0, 
>cfg.prop}
+#define ISA_EDATA_ENTRY2(name, min_ver, prop) {#name, min_ver, >cfg.prop}
+
 const char * const riscv_int_regnames[] = {
   "x0/zero", "x1/ra",  "x2/sp",  "x3/gp",  "x4/tp",  "x5/t0",   "x6/t1",
   "x7/t2",   "x8/s0",  "x9/s1",  "x10/a0", "x11/a1", "x12/a2",  "x13/a3",
@@ -513,8 +517,42 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 CPURISCVState *env = >env;
 RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev);
 CPUClass *cc = CPU_CLASS(mcc);
-int priv_version = -1;
+int i, priv_version = -1;
 Error *local_err = NULL;
+struct isa_ext_data isa_edata_arr[] = {
+ISA_EDATA_ENTRY2(h, PRIV_VERSION_1_12_0, ext_h),
+ISA_EDATA_ENTRY2(v, PRIV_VERSION_1_12_0, ext_v),
+ISA_EDATA_ENTRY2(zicsr, PRIV_VERSION_1_10_0, ext_icsr),
+ISA_EDATA_ENTRY2(zifencei, PRIV_VERSION_1_10_0, ext_ifencei),
+ISA_EDATA_ENTRY2(zfh, PRIV_VERSION_1_12_0, ext_zfh),
+ISA_EDATA_ENTRY2(zfhmin, PRIV_VERSION_1_12_0, ext_zfhmin),
+ISA_EDATA_ENTRY2(zfinx, PRIV_VERSION_1_12_0, ext_zfinx),
+ISA_EDATA_ENTRY2(zdinx, PRIV_VERSION_1_12_0, ext_zdinx),
+ISA_EDATA_ENTRY2(zba, PRIV_VERSION_1_12_0, ext_zba),
+ISA_EDATA_ENTRY2(zbb, PRIV_VERSION_1_12_0, ext_zbb),
+ISA_EDATA_ENTRY2(zbc, PRIV_VERSION_1_12_0, ext_zbc),
+ISA_EDATA_ENTRY2(zbkb, PRIV_VERSION_1_12_0, ext_zbkb),
+ISA_EDATA_ENTRY2(zbkc, PRIV_VERSION_1_12_0, ext_zbkc),
+ISA_EDATA_ENTRY2(zbkx, PRIV_VERSION_1_12_0, ext_zbkx),
+ISA_EDATA_ENTRY2(zbs, PRIV_VERSION_1_12_0, ext_zbs),
+ISA_EDATA_ENTRY2(zk, PRIV_VERSION_1_12_0, ext_zk),
+ISA_EDATA_ENTRY2(zkn, PRIV_VERSION_1_12_0, ext_zkn),
+ISA_EDATA_ENTRY2(zknd, PRIV_VERSION_1_12_0, ext_zknd),
+ISA_EDATA_ENTRY2(zkne, PRIV_VERSION_1_12_0, ext_zkne),
+ISA_EDATA_ENTRY2(zknh, PRIV_VERSION_1_12_0, ext_zknh),
+ISA_EDATA_ENTRY2(zkr, PRIV_VERSION_1_12_0, ext_zkr),
+ISA_EDATA_ENTRY2(zks, PRIV_VERSION_1_12_0, ext_zks),
+ISA_EDATA_ENTRY2(zksed, PRIV_VERSION_1_12_0, ext_zksed),
+ISA_EDATA_ENTRY2(zksh, PRIV_VERSION_1_12_0, ext_zksh),
+ISA_EDATA_ENTRY2(zkt, PRIV_VERSION_1_12_0, ext_zkt),
+ISA_EDATA_ENTRY2(zve32f, PRIV_VERSION_1_12_0, ext_zve32f),
+ISA_EDATA_ENTRY2(zve64f, PRIV_VERSION_1_12_0, ext_zve64f),
+ISA_EDATA_ENTRY2(zhinx, PRIV_VERSION_1_12_0, ext_zhinx),
+ISA_EDATA_ENTRY2(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin),
+ISA_EDATA_ENTRY2(svinval, PRIV_VERSION_1_12_0, ext_svinval),
+ISA_EDATA_ENTRY2(svnapot, PRIV_VERSION_1_12_0, ext_svnapot),
+ISA_EDATA_ENTRY2(svpbmt, PRIV_VERSION_1_12_0, ext_svpbmt),
+};
 
 cpu_exec_realizefn(cs, _err);
 if (local_err != NULL) {
@@ -541,6 +579,17 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 set_priv_version(env, priv_version);
 }
 
+/* Force disable extensions if priv spec version does not match */
+for (i = 0; i < ARRAY_SIZE(isa_edata_arr); i++) {
+if (*isa_edata_arr[i].enabled &&
+(env->priv_ver < isa_edata_arr[i].min_version)) {
+*isa_edata_arr[i].enabled = false;
+warn_report("disabling %s extension for hart 0x%lx because "
+"privilege spec version does not match",
+isa_edata_arr[i].name, (unsigned long)env->mhartid);
+}
+}
+
 if (cpu->cfg.mmu) {
 riscv_set_feature(env, RISCV_FEATURE_MMU);
 }
@@ -1011,8 +1060,6 @@ static void riscv_cpu_class_init(ObjectClass *c, void 
*data)
 device_class_set_props(dc, riscv_cpu_properties);
 }
 
-#define ISA_EDATA_ENTRY(name, prop) {#name, cpu->cfg.prop}
-
 static void riscv_isa_string_ext(RISCVCPU *cpu, char **isa_str, int 
max_str_len)
 {
 char *old = *isa_str;
@@ -1071,7 +1118,7 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
**isa_str, int max_str_len)
 };
 
 for (i = 0; i < ARRAY_SIZE(isa_edata_arr); i++) {
-if (isa_edata_arr[i].enabled) {
+if (*isa_edata_arr[i].enabled) {
 new = g_strconcat(old, "_",

[PATCH v4 3/4] target/riscv: Update [m|h]tinst CSR in riscv_cpu_do_interrupt()

We should write transformed instruction encoding of the trapped
instruction in [m|h]tinst CSR at time of taking trap as defined
by the RISC-V privileged specification v1.12.

Signed-off-by: Anup Patel 
---
 target/riscv/cpu.h|   3 +
 target/riscv/cpu_helper.c | 231 +-
 target/riscv/instmap.h|  43 +++
 3 files changed, 271 insertions(+), 6 deletions(-)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 194a58d760..11726e9031 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -271,6 +271,9 @@ struct CPUArchState {
 /* Signals whether the current exception occurred with two-stage address
translation active. */
 bool two_stage_lookup;
+/* Signals whether the current exception occurred while doing two-stage
+   address translation for the VS-stage page table walk. */
+bool two_stage_indirect_lookup;
 
 target_ulong scounteren;
 target_ulong mcounteren;
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 16c6045459..62a6762617 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -22,6 +22,7 @@
 #include "qemu/main-loop.h"
 #include "cpu.h"
 #include "exec/exec-all.h"
+#include "instmap.h"
 #include "tcg/tcg-op.h"
 #include "trace.h"
 #include "semihosting/common-semi.h"
@@ -1055,7 +1056,8 @@ restart:
 
 static void raise_mmu_exception(CPURISCVState *env, target_ulong address,
 MMUAccessType access_type, bool pmp_violation,
-bool first_stage, bool two_stage)
+bool first_stage, bool two_stage,
+bool two_stage_indirect)
 {
 CPUState *cs = env_cpu(env);
 int page_fault_exceptions, vm;
@@ -1105,6 +1107,7 @@ static void raise_mmu_exception(CPURISCVState *env, 
target_ulong address,
 }
 env->badaddr = address;
 env->two_stage_lookup = two_stage;
+env->two_stage_indirect_lookup = two_stage_indirect;
 }
 
 hwaddr riscv_cpu_get_phys_page_debug(CPUState *cs, vaddr addr)
@@ -1150,6 +1153,7 @@ void riscv_cpu_do_transaction_failed(CPUState *cs, hwaddr 
physaddr,
 env->badaddr = addr;
 env->two_stage_lookup = riscv_cpu_virt_enabled(env) ||
 riscv_cpu_two_stage_lookup(mmu_idx);
+env->two_stage_indirect_lookup = false;
 cpu_loop_exit_restore(cs, retaddr);
 }
 
@@ -1175,6 +1179,7 @@ void riscv_cpu_do_unaligned_access(CPUState *cs, vaddr 
addr,
 env->badaddr = addr;
 env->two_stage_lookup = riscv_cpu_virt_enabled(env) ||
 riscv_cpu_two_stage_lookup(mmu_idx);
+env->two_stage_indirect_lookup = false;
 cpu_loop_exit_restore(cs, retaddr);
 }
 
@@ -1190,6 +1195,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
 bool pmp_violation = false;
 bool first_stage_error = true;
 bool two_stage_lookup = false;
+bool two_stage_indirect_error = false;
 int ret = TRANSLATE_FAIL;
 int mode = mmu_idx;
 /* default TLB page size */
@@ -1227,6 +1233,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
  */
 if (ret == TRANSLATE_G_STAGE_FAIL) {
 first_stage_error = false;
+two_stage_indirect_error = true;
 access_type = MMU_DATA_LOAD;
 }
 
@@ -1310,12 +1317,207 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, 
int size,
 raise_mmu_exception(env, address, access_type, pmp_violation,
 first_stage_error,
 riscv_cpu_virt_enabled(env) ||
-riscv_cpu_two_stage_lookup(mmu_idx));
+riscv_cpu_two_stage_lookup(mmu_idx),
+two_stage_indirect_error);
 cpu_loop_exit_restore(cs, retaddr);
 }
 
 return true;
 }
+
+static target_ulong riscv_transformed_insn(CPURISCVState *env,
+   target_ulong insn)
+{
+bool xinsn_has_addr_offset = false;
+target_ulong xinsn = 0;
+
+/*
+ * Only Quadrant 0 and Quadrant 2 of RVC instruction space need to
+ * be uncompressed. The Quadrant 1 of RVC instruction space need
+ * not be transformed because these instructions won't generate
+ * any load/store trap.
+ */
+
+if ((insn & 0x3) != 0x3) {
+/* Transform 16bit instruction into 32bit instruction */
+switch (GET_C_OP(insn)) {
+case OPC_RISC_C_OP_QUAD0: /* Quadrant 0 */
+switch (GET_C_FUNC(insn)) {
+case OPC_RISC_C_FUNC_FLD_LQ:
+if (riscv_cpu_xlen(env) != 128) { /* C.FLD (RV32/64) */
+xinsn = OPC_RISC_FLD;
+xinsn = SET_RD(xinsn, GET_C_RS2S(insn));
+xinsn = SET_RS1(xinsn, GET_C_RS1S(insn));
+xinsn = SET_I_IMM(xinsn, GET_C_LD_IMM(insn));
+xinsn_has_addr_offset = true;
+

[PATCH v4 0/4] QEMU RISC-V nested virtualization fixes

This series does fixes and improvements to have nested virtualization
on QEMU RISC-V.

These patches can also be found in riscv_nested_fixes_v4 branch at:
https://github.com/avpatel/qemu.git

The RISC-V nested virtualization was tested on QEMU RISC-V using
Xvisor RISC-V which has required hypervisor support to run another
hypervisor as Guest/VM.

Changes since v3:
 - Updated PATCH3 to set special pseudoinstructions in htinst for
   guest page faults which result due to VS-stage page table walks
 - Updated warning message in PATCH4

Changes since v2:
 - Dropped the patch which are already in Alistair's next branch
 - Set "Addr. Offset" in the transformed instruction for PATCH3
 - Print warning in riscv_cpu_realize() if we are disabling an
   extension due to privilege spec verions mismatch for PATCH4

Changes since v1:
 - Set write_gva to env->two_stage_lookup which ensures that for
   HS-mode to HS-mode trap write_gva is true only for HLV/HSV
   instructions
 - Included "[PATCH 0/3] QEMU RISC-V priv spec version fixes"
   patches in this series for easy review
 - Re-worked PATCH7 to force disable extensions if required
   priv spec version is not staisfied
 - Added new PATCH8 to fix "aia=aplic-imsic" mode of virt machine

Anup Patel (4):
  target/riscv: Don't force update priv spec version to latest
  target/riscv: Add dummy mcountinhibit CSR for priv spec v1.11 or
higher
  target/riscv: Update [m|h]tinst CSR in riscv_cpu_do_interrupt()
  target/riscv: Force disable extensions if priv spec version does not
match

 target/riscv/cpu.c|  65 +--
 target/riscv/cpu.h|   3 +
 target/riscv/cpu_bits.h   |   3 +
 target/riscv/cpu_helper.c | 231 +-
 target/riscv/csr.c|   2 +
 target/riscv/instmap.h|  43 +++
 6 files changed, 333 insertions(+), 14 deletions(-)

-- 
2.34.1

[PATCH v4 2/4] target/riscv: Add dummy mcountinhibit CSR for priv spec v1.11 or higher

The mcountinhibit CSR is mandatory for priv spec v1.11 or higher. For
implementation that don't want to implement can simply have a dummy
mcountinhibit which always zero.

Fixes: a4b2fa433125 ("target/riscv: Introduce privilege version field in
the CSR ops.")
Signed-off-by: Anup Patel 
Reviewed-by: Frank Chang 
Reviewed-by: Alistair Francis 
---
 target/riscv/cpu_bits.h | 3 +++
 target/riscv/csr.c  | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 4d04b20d06..4a55c6a709 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -159,6 +159,9 @@
 #define CSR_MTVEC   0x305
 #define CSR_MCOUNTEREN  0x306
 
+/* Machine Counter Setup */
+#define CSR_MCOUNTINHIBIT   0x320
+
 /* 32-bit only */
 #define CSR_MSTATUSH0x310
 
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index 6dbe9b541f..409a209f14 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -3391,6 +3391,8 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = {
 [CSR_MIE] = { "mie",any,   NULL,NULL,rmw_mie   
},
 [CSR_MTVEC]   = { "mtvec",  any,   read_mtvec,   write_mtvec   
},
 [CSR_MCOUNTEREN]  = { "mcounteren", any,   read_mcounteren,  
write_mcounteren  },
+[CSR_MCOUNTINHIBIT] = { "mcountinhibit", any, read_zero, write_ignore,
+ .min_priv_ver = 
PRIV_VERSION_1_11_0 },
 
 [CSR_MSTATUSH]= { "mstatush",   any32, read_mstatush,
write_mstatush},
 
-- 
2.34.1

[PATCH v4 1/4] target/riscv: Don't force update priv spec version to latest

The riscv_cpu_realize() sets priv spec verion to v1.12 when it is
when "env->priv_ver == 0" (i.e. default v1.10) because the enum
value of priv spec v1.10 is zero.

Due to above issue, the sifive_u machine will see priv spec v1.12
instead of priv spec v1.10.

To fix this issue, we set latest priv spec version (i.e. v1.12)
for base rv64/rv32 cpu and riscv_cpu_realize() will override priv
spec version only when "cpu->cfg.priv_spec != NULL".

Fixes: 7100fe6c2441 ("target/riscv: Enable privileged spec version 1.12")
Signed-off-by: Anup Patel 
Reviewed-by: Frank Chang 
Reviewed-by: Alistair Francis 
Reviewed-by: Atish Patra 
---
 target/riscv/cpu.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 0497af45cc..9f9c27a3f5 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -169,6 +169,8 @@ static void rv64_base_cpu_init(Object *obj)
 CPURISCVState *env = _CPU(obj)->env;
 /* We set this in the realise function */
 set_misa(env, MXL_RV64, 0);
+/* Set latest version of privileged specification */
+set_priv_version(env, PRIV_VERSION_1_12_0);
 }
 
 static void rv64_sifive_u_cpu_init(Object *obj)
@@ -204,6 +206,8 @@ static void rv32_base_cpu_init(Object *obj)
 CPURISCVState *env = _CPU(obj)->env;
 /* We set this in the realise function */
 set_misa(env, MXL_RV32, 0);
+/* Set latest version of privileged specification */
+set_priv_version(env, PRIV_VERSION_1_12_0);
 }
 
 static void rv32_sifive_u_cpu_init(Object *obj)
@@ -509,7 +513,7 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 CPURISCVState *env = >env;
 RISCVCPUClass *mcc = RISCV_CPU_GET_CLASS(dev);
 CPUClass *cc = CPU_CLASS(mcc);
-int priv_version = 0;
+int priv_version = -1;
 Error *local_err = NULL;
 
 cpu_exec_realizefn(cs, _err);
@@ -533,10 +537,8 @@ static void riscv_cpu_realize(DeviceState *dev, Error 
**errp)
 }
 }
 
-if (priv_version) {
+if (priv_version >= PRIV_VERSION_1_10_0) {
 set_priv_version(env, priv_version);
-} else if (!env->priv_ver) {
-set_priv_version(env, PRIV_VERSION_1_12_0);
 }
 
 if (cpu->cfg.mmu) {
-- 
2.34.1

Re: [PATCH v5 02/10] kvm: Support for querying fd-based stats


On 6/8/22 18:01, Dr. David Alan Gilbert wrote:

'Find descriptors for 'target', either that have already been read or
  query 'stats_fd' to read them from kvm'


/*
 * Return the descriptors for 'target', that either have already been
 * read or are retrieved from 'stats_fd'.
 */

Paolo

Re: [PATCH v4 07/53] semihosting: Simplify softmmu_lock_user_string

2022-06-08 Thread Alex Bennée



Richard Henderson  writes:

> We are not currently bounding the search to the 1024 bytes
> that we allocated, possibly overrunning the buffer.
> Use softmmu_strlen_user to find the length and allocate the
> correct size from the beginning.
>
> Reviewed-by: Peter Maydell 
> Signed-off-by: Richard Henderson 

Reviewed-by: Alex Bennée 

-- 
Alex Bennée

[PATCH] gitlab: compare CIRRUS_nn vars against 'null' not ""

2022-06-08 Thread Daniel P . Berrangé

The GitLab variable comparisons don't have shell like semantics where
an unset variable compares equal to empty string. We need to explicitly
test against 'null' to detect an unset variable.

Signed-off-by: Daniel P. Berrangé 
---
 .gitlab-ci.d/base.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab-ci.d/base.yml b/.gitlab-ci.d/base.yml
index f334f3ded7..69b36c148a 100644
--- a/.gitlab-ci.d/base.yml
+++ b/.gitlab-ci.d/base.yml
@@ -13,7 +13,7 @@
 #
 
 # Cirrus jobs can't run unless the creds / target repo are set
-- if: '$QEMU_JOB_CIRRUS && ($CIRRUS_GITHUB_REPO == "" || $CIRRUS_API_TOKEN 
== "")'
+- if: '$QEMU_JOB_CIRRUS && ($CIRRUS_GITHUB_REPO == null || 
$CIRRUS_API_TOKEN == null)'
   when: never
 
 # Publishing jobs should only run on the default branch in upstream
-- 
2.36.1

Re: dbus-display-test is flakey

2022-06-08 Thread Cole Robinson

On 6/7/22 4:57 PM, Marc-André Lureau wrote:
> Hi Cole,
> 
> 
> On Sun, Jun 5, 2022 at 6:46 PM Cole Robinson  wrote:
>>
>> Hi Marc-André,
>>
>> dbus-display-test seems flakey. I'm occasionally seeing:
>>
>> ▶ 692/746
>> ERROR:../tests/qtest/dbus-display-test.c:68:test_dbus_display_vm:
>> assertion failed
>> (qemu_dbus_display1_vm_get_name(QEMU_DBUS_DISPLAY1_VM(vm)) ==
>> "dbus-test"): (NULL == "dbus-test") ERROR
>>
> 
> Quite an interesting error. The generated dbus proxy code is all
> gobject async init, but the proxy is created synchronously with
> qemu_dbus_display1_vm_proxy_new_sync(), and all cached properties must
> have been set at construction time.
> 
> And nobody else reported this issue so far, but I also noticed that it
> does not seem to be covered by the CI at this point (only centos has
> --enabled-modules, but is missing the glib 2.64 requirements)
> 

Interesting. redhat distro family and debian distro family are both
using --enable-modules for their packages, for a while now. Maybe qemu
CI should be using --enable-modules more. I have a patch on the list
that fixes a qemu-iotests failure that's indirectly caused --enable-modules

>> Examples:
>>
>> fedora rawhide x86_64:
>> https://kojipkgs.fedoraproject.org//work/tasks/4945/87834945/build.log
>> fedora rawhide aarch64:
>> https://kojipkgs.fedoraproject.org//work/tasks/4946/87834946/build.log
>> fedora 35 x86_64:
>> https://download.copr.fedorainfracloud.org/results/@virtmaint-sig/virt-preview/fedora-35-x86_64/04491978-qemu/builder-live.log.gz
>>
> 
> Are you able to reproduce outside koji? I have been running the test
> in a loop for a long while, not seeing any error.
> 

I tried a couple local qemu.git x86_64 builds but couldn't reproduce. If
environment is a factor my guess is its race/load dependent, maybe
exacerbated by 'mock' used used by both koji and copr.

>> This is qemu v7.0.0 with some unrelated patches on top. /usr/bin/make -O
>> -j5 V=1 VERBOSE=1 check
>>
>> Side question: I know I can patch meson.build to skip the test, or
>> similar patch changes, but is there a non-patch way to skip specific tests?
> 
> "meson test" currently supports specifying the tests or subproject to
> run, or using --suite / --no-suite SUITE.
> 
> (mtest2make.py creates convenience check-SUITE rules)
> 
> We could probably add a suite "dbus", and add the test(s) there, so
> you could run "meson test --no-suite dbus".
> 

Ah that's good to know. Looks like we can use `MTESTFLAGS='--no-suite
block' make check` to skip iotests which is flaky for other reasons
reported elsewhere.

Adding a suite just for this dbus case doesn't matter to me now. But IMO
it would be useful if there was a way to skip an individual test without
patching code. QEMU_TEST_SKIP="dbus-display-test.c:bar.c" or something.
Just a thought; maybe there's no simple way to wire that in.

Thanks,
Cole

Re: gitlab: convert Cirrus jobs to .base_job_template

2022-06-08 Thread Daniel P . Berrangé

On Wed, Jun 08, 2022 at 08:48:48AM -0700, Richard Henderson wrote:
> Commit 00125414ba1 is not working as intended.  E.g.
> 
> https://gitlab.com/rth7680/qemu/-/jobs/2558862885
> 
> where I have neither CIRRUS_GITHUB_REPO nor CIRRUS_API_TOKEN set, but the
> job tries to run anyway.  Then fails, predictably, with "token not defined".

Opps, GitLab variable comparisons aren't shell-like where an unset
variable compares equal to empty string.

I just sent a fix & CC'd you

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH] microvm: turn off io reservations for pcie root ports

2022-06-08 Thread Michael S. Tsirkin

On Fri, Jun 03, 2022 at 10:59:20AM +0200, Gerd Hoffmann wrote:
> The pcie host bridge has no io window on microvm,
> so io reservations will not work.
> 
> Signed-off-by: Gerd Hoffmann 

I don't much like overriding user like this. We end up users
setting it to silly values and then if we do want to
support this things just break. Thoughts?

> ---
>  hw/i386/microvm.c | 6 ++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
> index 4b3b1dd262f1..f01d972f5d28 100644
> --- a/hw/i386/microvm.c
> +++ b/hw/i386/microvm.c
> @@ -757,6 +757,12 @@ static void microvm_class_init(ObjectClass *oc, void 
> *data)
>  "Set off to disable adding virtio-mmio devices to the kernel 
> cmdline");
>  
>  machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
> +
> +/*
> + * pcie host bridge (gpex) on microvm has no io address window,
> + * so reserving io space is not going to work.  Turn it off.
> + */
> +object_register_sugar_prop("pcie-root-port", "io-reserve", "0", true);
>  }
>  
>  static const TypeInfo microvm_machine_info = {
> -- 
> 2.36.1

Re: [PATCH v5 02/10] kvm: Support for querying fd-based stats

* Paolo Bonzini (pbonz...@redhat.com) wrote:
> On 6/8/22 16:52, Dr. David Alan Gilbert wrote:
> > > If you mean why not some other source, each source has a different file
> > > descriptor:
> > > 
> > > +int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
> > > 
> > > but the descriptors are consistent every time KVM_GET_STATS_FD is called, 
> > > so
> > > basically "ident" can be used as a cache key.
> > 
> > Ah OK, this is what I was after; it's a little weird that the caller
> > does the ioctl to get the stats-fd, but it does the lookup internally
> > with current_cpu for the ident.
> 
> Oh yeah that's weird.
> 
> Let me squash in this:

Yeh that's nicer; a comment something like:

'Find descriptors for 'target', either that have already been read or
 query 'stats_fd' to read them from kvm'

?

Dave

> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index 023bf4ea79..71896ad173 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -3871,17 +3871,7 @@ static StatsDescriptors 
> *find_stats_descriptors(StatsTarget target, int stats_fd
>  size_t size_desc;
>  ssize_t ret;
> -switch (target) {
> -case STATS_TARGET_VM:
> -ident = StatsTarget_str(STATS_TARGET_VM);
> -break;
> -case STATS_TARGET_VCPU:
> -ident = current_cpu->parent_obj.canonical_path;
> -break;
> -default:
> -abort();
> -}
> -
> +ident = StatsTarget_str(target);
>  QTAILQ_FOREACH(descriptors, _descriptors, next) {
>  if (g_str_equal(descriptors->ident, ident)) {
>  return descriptors;
> @@ -3917,7 +3907,7 @@ static StatsDescriptors 
> *find_stats_descriptors(StatsTarget target, int stats_fd
>  }
>  descriptors->kvm_stats_header = kvm_stats_header;
>  descriptors->kvm_stats_desc = kvm_stats_desc;
> -descriptors->ident = g_strdup(ident);
> +descriptors->ident = ident;
>  QTAILQ_INSERT_TAIL(_descriptors, descriptors, next);
>  return descriptors;
>  }
> 
> (once I test it).
> 
> Paolo
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [PATCH v5 02/10] kvm: Support for querying fd-based stats


On 6/8/22 16:52, Dr. David Alan Gilbert wrote:

If you mean why not some other source, each source has a different file
descriptor:

+int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);

but the descriptors are consistent every time KVM_GET_STATS_FD is called, so
basically "ident" can be used as a cache key.


Ah OK, this is what I was after; it's a little weird that the caller
does the ioctl to get the stats-fd, but it does the lookup internally
with current_cpu for the ident.


Oh yeah that's weird.

Let me squash in this:

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 023bf4ea79..71896ad173 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -3871,17 +3871,7 @@ static StatsDescriptors 
*find_stats_descriptors(StatsTarget target, int stats_fd
 size_t size_desc;
 ssize_t ret;
 
-switch (target) {

-case STATS_TARGET_VM:
-ident = StatsTarget_str(STATS_TARGET_VM);
-break;
-case STATS_TARGET_VCPU:
-ident = current_cpu->parent_obj.canonical_path;
-break;
-default:
-abort();
-}
-
+ident = StatsTarget_str(target);
 QTAILQ_FOREACH(descriptors, _descriptors, next) {
 if (g_str_equal(descriptors->ident, ident)) {
 return descriptors;
@@ -3917,7 +3907,7 @@ static StatsDescriptors 
*find_stats_descriptors(StatsTarget target, int stats_fd
 }
 descriptors->kvm_stats_header = kvm_stats_header;
 descriptors->kvm_stats_desc = kvm_stats_desc;
-descriptors->ident = g_strdup(ident);
+descriptors->ident = ident;
 QTAILQ_INSERT_TAIL(_descriptors, descriptors, next);
 return descriptors;
 }

(once I test it).

Paolo

Re: [PATCH] configure: ignore --make


On 6/8/22 16:21, Matheus Kowalczuk Ferst wrote:

On a clean build on FreeBSD with this patch, I got:

../meson.build:3641:0: ERROR: Key MAKE is not in dict

So it seems that we need to remove the use of MAKE in meson.build too.


Oops, yes.  That's the typical "forget git commit --amend before sending 
out" mistake for me.


Also, we will not have this error at configure-time anymore, but I 
suppose that *BSD users will identify the problem if they try to build 
with non-gnu make.


Yeah, my guess was that "try ./configure && make" with GNU Make 
installed is the more common failure mode, since QEMU is certainly not 
the only package that requires GNU Make.


Alternatively, I can leave in the check for GNU Make, or move it to 
meson as a "now type "make" to build QEMU" kind of message, and still 
remove the unused --make option.


Paolo

Re: [PATCH] disas: Remove libvixl disassembler


On 6/3/22 19:35, Thomas Huth wrote:

On 03/06/2022 19.26, Claudio Fontana wrote:

On 6/3/22 18:42, Thomas Huth wrote:

The disassembly via capstone should be superiour to our old vixl
sources nowadays, so let's finally cut this old disassembler out
of the QEMU source tree.

Signed-off-by: Thomas Huth 


agreed, one thought: at the time I added this thing, I had to add C++ 
compilation support,

maybe something we can now drop if there are no more C++ users?


I thought about that, too, but we still have disas/nanomips.cpp left and 
the Windows-related files in qga/vss-win32/* .


That is pure C++ so it does not need the extra complication of "detect 
whether the C and C++ compiler are ABI-compatible" (typically due to 
different libasan/libtsan implementation between gcc and clang).  So 
it's really just nanoMIPS that's left.


And I think Paolo was 
considering to use C++ for coroutine fixes - not sure whether that still 
is planned, though.


No, that was just an experiment.

Paolo

Re: gitlab: convert Cirrus jobs to .base_job_template


Commit 00125414ba1 is not working as intended.  E.g.

https://gitlab.com/rth7680/qemu/-/jobs/2558862885

where I have neither CIRRUS_GITHUB_REPO nor CIRRUS_API_TOKEN set, but the job tries to run 
anyway.  Then fails, predictably, with "token not defined".



r~

Re: [PATCH] tests/tcg/i386: Use explicit suffix on fist insns

2022-06-08 Thread Alex Bennée



Richard Henderson  writes:

> Fixes a number of assembler warnings of the form:
>
> test-i386.c: Assembler messages:
> test-i386.c:869: Warning: no instruction mnemonic suffix given
>   and no register operands; using default for `fist'
>
> Signed-off-by: Richard Henderson 

Queued to testing/next, thanks.

-- 
Alex Bennée

Re: [PATCH 0/3] target/riscv: Fix issue 1060