date:20220715

[PATCH v4 1/3] target/ppc: Add HASHKEYR and HASHPKEYR SPRs

2022-07-15 Thread Víctor Colombo

Add the Special Purpose Registers HASHKEYR and HASHPKEYR, which were
introduced by the Power ISA 3.1B. They are used by the new instructions
hashchk(p) and hashst(p).

The ISA states that the Operating System should generate the value for
these registers when creating a process, so it's its responsability to
do so. We initialize it with 0 for qemu-softmmu, and set a random 64
bits value for linux-user.

Signed-off-by: Víctor Colombo 
---

Is the way I did the random number generation ok?

---
 target/ppc/cpu.h  |  2 ++
 target/ppc/cpu_init.c | 28 
 2 files changed, 30 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index a4c893cfad..4551d81b5f 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1676,6 +1676,8 @@ void ppc_compat_add_property(Object *obj, const char 
*name,
 #define SPR_BOOKE_GIVOR14 (0x1BD)
 #define SPR_TIR   (0x1BE)
 #define SPR_PTCR  (0x1D0)
+#define SPR_HASHKEYR  (0x1D4)
+#define SPR_HASHPKEYR (0x1D5)
 #define SPR_BOOKE_SPEFSCR (0x200)
 #define SPR_Exxx_BBEAR(0x201)
 #define SPR_Exxx_BBTAR(0x202)
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index d1493a660c..29c7752483 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -5700,6 +5700,33 @@ static void register_power9_mmu_sprs(CPUPPCState *env)
 #endif
 }
 
+static void register_power10_hash_sprs(CPUPPCState *env)
+{
+/*
+ * it's the OS responsability to generate a random value for the registers
+ * in each process' context. So, initialize it with 0 here.
+ */
+uint64_t hashkeyr_initial_value = 0, hashpkeyr_initial_value = 0;
+#if defined(CONFIG_USER_ONLY)
+/* in linux-user, setup the hash register with a random value */
+GRand *rand = g_rand_new();
+hashkeyr_initial_value =
+((uint64_t)g_rand_int(rand) << 32) | (uint64_t)g_rand_int(rand);
+hashpkeyr_initial_value =
+((uint64_t)g_rand_int(rand) << 32) | (uint64_t)g_rand_int(rand);
+g_rand_free(rand);
+#endif
+spr_register(env, SPR_HASHKEYR, "HASHKEYR",
+SPR_NOACCESS, SPR_NOACCESS,
+_read_generic, _write_generic,
+hashkeyr_initial_value);
+spr_register_hv(env, SPR_HASHPKEYR, "HASHPKEYR",
+SPR_NOACCESS, SPR_NOACCESS,
+SPR_NOACCESS, SPR_NOACCESS,
+_read_generic, _write_generic,
+hashpkeyr_initial_value);
+}
+
 /*
  * Initialize PMU counter overflow timers for Power8 and
  * newer Power chips when using TCG.
@@ -6484,6 +6511,7 @@ static void init_proc_POWER10(CPUPPCState *env)
 register_power8_book4_sprs(env);
 register_power8_rpr_sprs(env);
 register_power9_mmu_sprs(env);
+register_power10_hash_sprs(env);
 
 /* FIXME: Filter fields properly based on privilege level */
 spr_register_kvm_hv(env, SPR_PSSCR, "PSSCR", NULL, NULL, NULL, NULL,
-- 
2.25.1

[PATCH v4 3/3] target/ppc: Implement hashstp and hashchkp

2022-07-15 Thread Víctor Colombo

Implementation for instructions hashstp and hashchkp, the privileged
versions of hashst and hashchk, which were added in Power ISA 3.1B.

Signed-off-by: Víctor Colombo 
---
 target/ppc/excp_helper.c   | 2 ++
 target/ppc/helper.h| 2 ++
 target/ppc/insn32.decode   | 2 ++
 target/ppc/translate/fixedpoint-impl.c.inc | 2 ++
 4 files changed, 8 insertions(+)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index fa5a737e22..847eff9213 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2255,6 +2255,8 @@ void helper_##op(CPUPPCState *env, target_ulong ea, 
target_ulong ra,  \
 
 HELPER_HASH(HASHST, env->spr[SPR_HASHKEYR], true)
 HELPER_HASH(HASHCHK, env->spr[SPR_HASHKEYR], false)
+HELPER_HASH(HASHSTP, env->spr[SPR_HASHPKEYR], true)
+HELPER_HASH(HASHCHKP, env->spr[SPR_HASHPKEYR], false)
 
 #if !defined(CONFIG_USER_ONLY)
 
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 5817af632b..122b2e9359 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -6,6 +6,8 @@ DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #endif
 DEF_HELPER_4(HASHST, void, env, tl, tl, tl)
 DEF_HELPER_4(HASHCHK, void, env, tl, tl, tl)
+DEF_HELPER_4(HASHSTP, void, env, tl, tl, tl)
+DEF_HELPER_4(HASHCHKP, void, env, tl, tl, tl)
 #if !defined(CONFIG_USER_ONLY)
 DEF_HELPER_2(store_msr, void, env, tl)
 DEF_HELPER_1(rfi, void, env)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 544514565c..da08960fca 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -330,6 +330,8 @@ PEXTD   01 . . . 001000 -   @X
 
 HASHST  01 . . . 1011010010 .   @X_DW
 HASHCHK 01 . . . 100010 .   @X_DW
+HASHSTP 01 . . . 1010010010 .   @X_DW
+HASHCHKP01 . . . 1010110010 .   @X_DW
 
 ## BCD Assist
 
diff --git a/target/ppc/translate/fixedpoint-impl.c.inc 
b/target/ppc/translate/fixedpoint-impl.c.inc
index 41c06de8a2..1ba56cbed5 100644
--- a/target/ppc/translate/fixedpoint-impl.c.inc
+++ b/target/ppc/translate/fixedpoint-impl.c.inc
@@ -572,3 +572,5 @@ static bool do_hash(DisasContext *ctx, arg_X *a, bool priv,
 
 TRANS(HASHST, do_hash, false, gen_helper_HASHST)
 TRANS(HASHCHK, do_hash, false, gen_helper_HASHCHK)
+TRANS(HASHSTP, do_hash, true, gen_helper_HASHSTP)
+TRANS(HASHCHKP, do_hash, true, gen_helper_HASHCHKP)
-- 
2.25.1

[PATCH v4 2/3] target/ppc: Implement hashst and hashchk

2022-07-15 Thread Víctor Colombo

Implementation for instructions hashst and hashchk, which were added
in Power ISA 3.1B.

It was decided to implement the hash algorithm from ground up in this
patch exactly as described in Power ISA.

Signed-off-by: Víctor Colombo 
---
 target/ppc/excp_helper.c   | 82 ++
 target/ppc/helper.h|  2 +
 target/ppc/insn32.decode   |  8 +++
 target/ppc/translate.c |  5 ++
 target/ppc/translate/fixedpoint-impl.c.inc | 32 +
 5 files changed, 129 insertions(+)

diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index cb752b184a..fa5a737e22 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -2174,6 +2174,88 @@ void helper_td(CPUPPCState *env, target_ulong arg1, 
target_ulong arg2,
 #endif
 #endif
 
+static uint32_t helper_SIMON_LIKE_32_64(uint32_t x, uint64_t key, uint32_t 
lane)
+{
+const uint16_t c = 0xfffc;
+const uint64_t z0 = 0xfa2561cdf44ac398ULL;
+uint16_t z = 0, temp;
+uint16_t k[32], eff_k[32], xleft[33], xright[33], fxleft[32];
+
+for (int i = 3; i >= 0; i--) {
+k[i] = key & 0x;
+key >>= 16;
+}
+xleft[0] = x & 0x;
+xright[0] = (x >> 16) & 0x;
+
+for (int i = 0; i < 28; i++) {
+z = (z0 >> (63 - i)) & 1;
+temp = ror16(k[i + 3], 3) ^ k[i + 1];
+k[i + 4] = c ^ z ^ k[i] ^ temp ^ ror16(temp, 1);
+}
+
+for (int i = 0; i < 8; i++) {
+eff_k[4 * i + 0] = k[4 * i + ((0 + lane) % 4)];
+eff_k[4 * i + 1] = k[4 * i + ((1 + lane) % 4)];
+eff_k[4 * i + 2] = k[4 * i + ((2 + lane) % 4)];
+eff_k[4 * i + 3] = k[4 * i + ((3 + lane) % 4)];
+}
+
+for (int i = 0; i < 32; i++) {
+fxleft[i] = (rol16(xleft[i], 1) &
+rol16(xleft[i], 8)) ^ rol16(xleft[i], 2);
+xleft[i + 1] = xright[i] ^ fxleft[i] ^ eff_k[i];
+xright[i + 1] = xleft[i];
+}
+
+return (((uint32_t)xright[32]) << 16) | xleft[32];
+}
+
+static uint64_t hash_digest(uint64_t ra, uint64_t rb, uint64_t key)
+{
+uint64_t stage0_h = 0ULL, stage0_l = 0ULL;
+uint64_t stage1_h, stage1_l;
+
+for (int i = 0; i < 4; i++) {
+stage0_h |= ror64(rb & 0xff, 8 * (2 * i + 1));
+stage0_h |= ((ra >> 32) & 0xff) << (8 * 2 * i);
+stage0_l |= ror64((rb >> 32) & 0xff, 8 * (2 * i + 1));
+stage0_l |= (ra & 0xff) << (8 * 2 * i);
+rb >>= 8;
+ra >>= 8;
+}
+
+stage1_h = (uint64_t)helper_SIMON_LIKE_32_64(stage0_h >> 32, key, 0) << 32;
+stage1_h |= helper_SIMON_LIKE_32_64(stage0_h, key, 1);
+stage1_l = (uint64_t)helper_SIMON_LIKE_32_64(stage0_l >> 32, key, 2) << 32;
+stage1_l |= helper_SIMON_LIKE_32_64(stage0_l, key, 3);
+
+return stage1_h ^ stage1_l;
+}
+
+#include "qemu/guest-random.h"
+
+#define HELPER_HASH(op, key, store)   \
+void helper_##op(CPUPPCState *env, target_ulong ea, target_ulong ra,  \
+ target_ulong rb) \
+{ \
+uint64_t calculated_hash = hash_digest(ra, rb, key), loaded_hash; \
+  \
+if (store) {  \
+cpu_stq_data_ra(env, ea, calculated_hash, GETPC());   \
+} else {  \
+loaded_hash = cpu_ldq_data_ra(env, ea, GETPC());  \
+if (loaded_hash != calculated_hash) { \
+/* hashes don't match, trap */\
+raise_exception_err_ra(env, POWERPC_EXCP_PROGRAM, \
+POWERPC_EXCP_TRAP, GETPC());  \
+} \
+} \
+}
+
+HELPER_HASH(HASHST, env->spr[SPR_HASHKEYR], true)
+HELPER_HASH(HASHCHK, env->spr[SPR_HASHKEYR], false)
+
 #if !defined(CONFIG_USER_ONLY)
 
 #ifdef CONFIG_TCG
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 159b352f6e..5817af632b 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -4,6 +4,8 @@ DEF_HELPER_FLAGS_4(tw, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #if defined(TARGET_PPC64)
 DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32)
 #endif
+DEF_HELPER_4(HASHST, void, env, tl, tl, tl)
+DEF_HELPER_4(HASHCHK, void, env, tl, tl, tl)
 #if !defined(CONFIG_USER_ONLY)
 DEF_HELPER_2(store_msr, void, env, tl)
 DEF_HELPER_1(rfi, void, env)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index eb41efc100..544514565c 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -172,6 +172,9 @@
 @X_TSX

[PATCH v4 0/3] Implement Power ISA 3.1B hash insns

2022-07-15 Thread Víctor Colombo

This patch series implements the 4 instructions added in Power ISA
3.1B:

- hashchk
- hashst
- hashchkp
- hashstp

It's built on top of ppc-next. Working branch for ease of use can be
found here:
https://github.com/PPC64/qemu/tree/vccolombo-hash-to-send-v4

What do you think about the choice to implement the hash algorithm
from the ground up, following the SIMON-like algorithm presented in
Power ISA? IIUC, this algorithm is not the same as the original[1].
Other options would be to use other algorithm already implemented
in QEMU, or even make this instruction a nop for all Power versions.

v1->v2:
- Split the patch in 2
- Rebase to master

v2->v3:
- Split patches in 3
- the new patch (patch 1) is separating the kvm header
  changes [Cornelia]

v3->v4:
- Remove Patch 1 (linux-headers/asm-powerpc/kvm.h:
Add HASHKEYR and HASHPKEYR in headers)
- Daniel recommended drop the kvm part:
https://lists.nongnu.org/archive/html/qemu-ppc/2022-07/msg00213.html
- Substitute Patch 1 with a separated patch setting up the registers
  for TCG only. Also, now setup it with a random value in linux-user.
- Change the registers naming:
- SPR_POWER_HASHKEYR -> SPR_HASHKEYR
- Drop RFC tag

[1] https://eprint.iacr.org/2013/404.pdf

Víctor Colombo (3):
  target/ppc: Add HASHKEYR and HASHPKEYR SPRs
  target/ppc: Implement hashst and hashchk
  target/ppc: Implement hashstp and hashchkp

 target/ppc/cpu.h   |  2 +
 target/ppc/cpu_init.c  | 28 
 target/ppc/excp_helper.c   | 84 ++
 target/ppc/helper.h|  4 ++
 target/ppc/insn32.decode   | 10 +++
 target/ppc/translate.c |  5 ++
 target/ppc/translate/fixedpoint-impl.c.inc | 34 +
 7 files changed, 167 insertions(+)

-- 
2.25.1

Re: [PULL 0/6] hw/nvme updates

2022-07-15 Thread Peter Maydell

On Fri, 15 Jul 2022 at 09:43, Klaus Jensen  wrote:
>
> From: Klaus Jensen 
>
> Hi,
>
> The following changes since commit 8482ab545e52f50facacfe1118b22b97462724ab:
>
>   Merge tag 'qga-win32-pull-2022-07-13' of github.com:kostyanf14/qemu into 
> staging (2022-07-14 14:52:16 +0100)
>
> are available in the Git repository at:
>
>   git://git.infradead.org/qemu-nvme.git tags/nvme-next-pull-request
>
> for you to fetch changes up to 2e53b0b450246044efd27418c5d05ad6919deb87:
>
>   hw/nvme: Use ioeventfd to handle doorbell updates (2022-07-15 10:40:33 
> +0200)
>
> 
> hw/nvme updates
>
> performance improvements by Jinhao
> ~~
> * shadow doorbells
> * ioeventfd
>
> plus some misc fixes (Darren, Niklas).
>


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/7.1
for any user-visible changes.

-- PMM

[PATCH v3 4/5] target/loongarch/tlb_helper: Fix coverity integer overflow error

2022-07-15 Thread Xiaojuan Yang

Replace '1 << shift' with 'MAKE_64BIT_MASK(shift, 1)' to fix
unintentional integer overflow errors in tlb_helper file.

Fix coverity CID: 1489759 1489762

Signed-off-by: Xiaojuan Yang 
Reviewed-by: Richard Henderson 
---
 target/loongarch/tlb_helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/loongarch/tlb_helper.c b/target/loongarch/tlb_helper.c
index bab19c7e05..610b6d123c 100644
--- a/target/loongarch/tlb_helper.c
+++ b/target/loongarch/tlb_helper.c
@@ -298,7 +298,7 @@ static void invalidate_tlb_entry(CPULoongArchState *env, 
int index)
 } else {
 tlb_ps = FIELD_EX64(env->CSR_STLBPS, CSR_STLBPS, PS);
 }
-pagesize = 1 << tlb_ps;
+pagesize = MAKE_64BIT_MASK(tlb_ps, 1);
 mask = MAKE_64BIT_MASK(0, tlb_ps + 1);
 
 if (tlb_v0) {
@@ -736,7 +736,7 @@ void helper_ldpte(CPULoongArchState *env, target_ulong 
base, target_ulong odd,
 (tmp0 & (~(1 << R_TLBENTRY_G_SHIFT)));
 ps = ptbase + ptwidth - 1;
 if (odd) {
-tmp0 += (1 << ps);
+tmp0 += MAKE_64BIT_MASK(ps, 1);
 }
 } else {
 /* 0:64bit, 1:128bit, 2:192bit, 3:256bit */
-- 
2.31.1

[PATCH v3 5/5] target/loongarch/op_helper: Fix coverity cond_at_most error

2022-07-15 Thread Xiaojuan Yang

The boundary size of cpucfg array should be 0 to ARRAY_SIZE(cpucfg)-1.
So, using index bigger than max boundary to access cpucfg[] must be
forbidden.

Fix coverity CID: 1489760

Signed-off-by: Xiaojuan Yang 
Reviewed-by: Richard Henderson 
---
 target/loongarch/op_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/loongarch/op_helper.c b/target/loongarch/op_helper.c
index 4b429b6699..568c071601 100644
--- a/target/loongarch/op_helper.c
+++ b/target/loongarch/op_helper.c
@@ -81,7 +81,7 @@ target_ulong helper_crc32c(target_ulong val, target_ulong m, 
uint64_t sz)
 
 target_ulong helper_cpucfg(CPULoongArchState *env, target_ulong rj)
 {
-return rj > 21 ? 0 : env->cpucfg[rj];
+return rj >= ARRAY_SIZE(env->cpucfg) ? 0 : env->cpucfg[rj];
 }
 
 uint64_t helper_rdtime_d(CPULoongArchState *env)
-- 
2.31.1

[PATCH v3 2/5] hw/intc/loongarch_pch_pic: Fix bugs for update_irq function

2022-07-15 Thread Xiaojuan Yang

Fix such errors:
1. We should not use 'unsigned long' type as argument when we use
find_first_bit(), and we use ctz64() to replace find_first_bit()
to fix this bug.
2. It is not standard to use '1ULL << irq' to generate a irq mask.
So, we replace it with 'MAKE_64BIT_MASK(irq, 1)'.

Fix coverity CID: 1489761 1489764 1489765

Signed-off-by: Xiaojuan Yang 
---
 hw/intc/loongarch_pch_pic.c | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/hw/intc/loongarch_pch_pic.c b/hw/intc/loongarch_pch_pic.c
index 3c9814a3b4..8fa64d2030 100644
--- a/hw/intc/loongarch_pch_pic.c
+++ b/hw/intc/loongarch_pch_pic.c
@@ -15,22 +15,26 @@
 
 static void pch_pic_update_irq(LoongArchPCHPIC *s, uint64_t mask, int level)
 {
-unsigned long val;
+uint64_t val;
 int irq;
 
 if (level) {
 val = mask & s->intirr & ~s->int_mask;
 if (val) {
-irq = find_first_bit(, 64);
-s->intisr |= 0x1ULL << irq;
-qemu_set_irq(s->parent_irq[s->htmsi_vector[irq]], 1);
+irq = ctz64(val);
+if (irq < 64) {
+s->intisr |= MAKE_64BIT_MASK(irq, 1);
+qemu_set_irq(s->parent_irq[s->htmsi_vector[irq]], 1);
+}
 }
 } else {
 val = mask & s->intisr;
 if (val) {
-irq = find_first_bit(, 64);
-s->intisr &= ~(0x1ULL << irq);
-qemu_set_irq(s->parent_irq[s->htmsi_vector[irq]], 0);
+irq = ctz64(val);
+if (irq < 64) {
+s->intisr &= ~(MAKE_64BIT_MASK(irq, 1));
+qemu_set_irq(s->parent_irq[s->htmsi_vector[irq]], 0);
+}
 }
 }
 }
-- 
2.31.1

[PATCH v3 1/5] target/loongarch/cpu: Fix cpu_class_by_name function

2022-07-15 Thread Xiaojuan Yang

In loongarch_cpu_class_by_name(char *cpu_model) function,
the argument cpu_model already has the suffix '-loongarch-cpu',
so we should remove the LOONGARCH_CPU_TYPE_NAME(cpu_model) macro.
And add the assertion that 'cpu_model' resolves to a class of the
appropriate type.

Signed-off-by: Xiaojuan Yang 
Reviewed-by: Richard Henderson 
---
 target/loongarch/cpu.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index e21715592a..ed26f9beed 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -571,11 +571,12 @@ static void loongarch_cpu_init(Object *obj)
 static ObjectClass *loongarch_cpu_class_by_name(const char *cpu_model)
 {
 ObjectClass *oc;
-char *typename;
 
-typename = g_strdup_printf(LOONGARCH_CPU_TYPE_NAME("%s"), cpu_model);
-oc = object_class_by_name(typename);
-g_free(typename);
+oc = object_class_by_name(cpu_model);
+if (!oc || !object_class_dynamic_cast(oc, TYPE_LOONGARCH_CPU) ||
+object_class_is_abstract(oc)) {
+return NULL;
+}
 return oc;
 }
 
-- 
2.31.1

Re: [PATCH v2 07/11] acpi/tests/bits: add python test that exercizes QEMU bios tables using biosbits

2022-07-15 Thread Michael S. Tsirkin

On Fri, Jul 15, 2022 at 09:47:27AM +0530, Ani Sinha wrote:
> > Instead of all this mess, can't we just spawn e.g. "git clone --depth 1"?
> > And if the directory exists I would fetch and checkout.
> 
> There are two reasons I can think of why I do not like this idea:
> 
> (a) a git clone of a whole directory would download all versions of the
> binary whereas we want only a specific version.

You mention shallow clone yourself, and I used --depth 1 above.

> Downloading a single file
> by shallow cloning or creating a git archive is overkill IMHO when a wget
> style retrieval works just fine.

However, it does not provide for versioning, tagging etc so you have
to implement your own schema.

> (b) we may later move the binary archives to a ftp server or a google
> drive. git/version control mechanisms are not the best place to store
> binary blobs IMHO. In this case also, wget also works.

surely neither ftp nor google drive are reasonable dependencies
for a free software project. But qemu does maintain an http server
already so that't a plus.

I am not insisting on git, but I do not like it that security,
mirroring, caching, versioning all have to be hand rolled and then
figured out by users and maintainers. Who frankly have other things to
do besides learning yet another boutique configuration language.

And I worry that after a while we come up with a new organization schema
for the files, old ones are moved around and nothing relying on the URL
works.  git is kind of good in that it enforces the idea that history is
immutable.

If not vanilla git can we find another utility we can reuse?

git lfs? It seems to be supported by both github and gitlab though
bizarrely github has bandwidth limits on git lfs but apparently not on
vanilla git. Hosting on qemu.org will require maintaining a server
there though.

All that said maybe we should just run with it as it is, just so we get
*something* in the door, and then worry about getting the storage side
straight before making this test a requirement for all acpi developers.

-- 
MST

[PATCH v3 3/5] target/loongarch/cpu: Fix coverity errors about excp_names

2022-07-15 Thread Xiaojuan Yang

Fix out-of-bounds errors when access excp_names[] array. the valid
boundary size of excp_names should be 0 to ARRAY_SIZE(excp_names)-1.
However, the general code do not consider the max boundary.

Fix coverity CID: 1489758

Signed-off-by: Xiaojuan Yang 
Reviewed-by: Richard Henderson 
---
 target/loongarch/cpu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index ed26f9beed..89ea971cde 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -140,7 +140,7 @@ static void loongarch_cpu_do_interrupt(CPUState *cs)
 
 if (cs->exception_index != EXCCODE_INT) {
 if (cs->exception_index < 0 ||
-cs->exception_index > ARRAY_SIZE(excp_names)) {
+cs->exception_index >= ARRAY_SIZE(excp_names)) {
 name = "unknown";
 } else {
 name = excp_names[cs->exception_index];
@@ -190,8 +190,8 @@ static void loongarch_cpu_do_interrupt(CPUState *cs)
 cause = cs->exception_index;
 break;
 default:
-qemu_log("Error: exception(%d) '%s' has not been supported\n",
- cs->exception_index, excp_names[cs->exception_index]);
+qemu_log("Error: exception(%d) has not been supported\n",
+ cs->exception_index);
 abort();
 }
 
-- 
2.31.1

[PATCH v3 0/5] Fix LoongArch coverity error and cpu name bug

2022-07-15 Thread Xiaojuan Yang

This series fix some coverity errors and loongarch_cpu_class_by_name function
for LoongArch virt machine.

Only the loongarch_pch_pic patch(number 2/5) need to be reviewed in this v3
verison, and other patches have been reviewed.

Changes for v3:

1. In loongarch_pch_pic file, We should not use 'unsigned long'
   type as argument when we use find_first_bit(), and we use
   ctz64() to replace find_first_bit() to fix this bug.
2. It is not standard to use '1ULL << irq' to generate a irq mask.
   So, we replace it with 'MAKE_64BIT_MASK(irq, 1)'.
3. Rewrite commit comments for op_helper patch(number 5/5).

Changes for v2:

1. Use MAKE_64BIT_MASK(shift, len) to replace 'xxx << shift'.
2. Use ARRAY_SIZE(arrqy) to get the array size.
3. Add the assertion that 'cpu_model' resolve to a class of the 
   appropriate type.


Changes for v1:

1. Fix coverity errors such as out-of-bounds, integer overflow,
   cond_at_most, etc.
2. Fix loongarch_cpu_class_by_name function.


Please help review
Thanks.

Xiaojuan Yang (5):
  target/loongarch/cpu: Fix cpu_class_by_name function
  hw/intc/loongarch_pch_pic: Fix bugs for update_irq function
  target/loongarch/cpu: Fix coverity errors about excp_names
  target/loongarch/tlb_helper: Fix coverity integer overflow error
  target/loongarch/op_helper: Fix coverity cond_at_most error

 hw/intc/loongarch_pch_pic.c   | 18 +++---
 target/loongarch/cpu.c| 15 ---
 target/loongarch/op_helper.c  |  2 +-
 target/loongarch/tlb_helper.c |  4 ++--
 4 files changed, 22 insertions(+), 17 deletions(-)

-- 
2.31.1

[PATCH v1] target/loongarch/cpu: Fix cpucfg default value

2022-07-15 Thread Xiaojuan Yang

We should config cpucfg[20] to set value for the scache's ways, sets,
and size arguments when loongarch cpu init. However, the old code
wirte 'sets argument' twice, so we change one of them to 'size argument'.

Signed-off-by: Xiaojuan Yang 
---
 target/loongarch/cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index 89ea971cde..4cfce8c9d2 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -406,7 +406,7 @@ static void loongarch_la464_initfn(Object *obj)
 data = 0;
 data = FIELD_DP32(data, CPUCFG20, L3IU_WAYS, 15);
 data = FIELD_DP32(data, CPUCFG20, L3IU_SETS, 14);
-data = FIELD_DP32(data, CPUCFG20, L3IU_SETS, 6);
+data = FIELD_DP32(data, CPUCFG20, L3IU_SIZE, 6);
 env->cpucfg[20] = data;
 
 env->CSR_ASID = FIELD_DP64(0, CSR_ASID, ASIDBITS, 0xa);
-- 
2.31.1

[PATCH 1/2] vhost: Get vring base from vq, not svq

2022-07-15 Thread Eugenio Pérez

The SVQ vring used idx usually match with the guest visible one, as long
as all the guest buffers (GPA) maps to exactly one buffer within qemu's
VA. However, as we can see in virtqueue_map_desc, a single guest buffer
could map to many buffers in SVQ vring.

The solution is to stop using the device's used idx and check for the
last avail idx. Since we cannot report in-flight descriptors with vdpa,
let's rewind all of them.

Fixes: 6d0b22266633 ("vdpa: Adapt vhost_vdpa_get_vring_base to SVQ")
Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 795ed5a049..18820498b3 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1194,11 +1194,10 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev 
*dev,
struct vhost_vring_state *ring)
 {
 struct vhost_vdpa *v = dev->opaque;
-int vdpa_idx = ring->index - dev->vq_index;
 int ret;
 
 if (v->shadow_vqs_enabled) {
-VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
+VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
 
 /*
  * Setting base as last used idx, so destination will see as available
@@ -1208,7 +1207,10 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev 
*dev,
  * TODO: This is ok for networking, but other kinds of devices might
  * have problems with these retransmissions.
  */
-ring->num = svq->last_used_idx;
+while (virtqueue_rewind(vq, 1)) {
+continue;
+}
+ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
 return 0;
 }
 
-- 
2.31.1

Re: [PATCH v2 19/19] vdpa: Add x-svq to NetdevVhostVDPAOptions

2022-07-15 Thread Eugenio Perez Martin

On Fri, Jul 15, 2022 at 6:13 AM Jason Wang  wrote:
>
> On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  wrote:
> >
> > Finally offering the possibility to enable SVQ from the command line.
> >
> > Signed-off-by: Eugenio Pérez 
> > Acked-by: Markus Armbruster 
> > ---
> >  qapi/net.json|  9 +-
> >  net/vhost-vdpa.c | 72 ++--
> >  2 files changed, 77 insertions(+), 4 deletions(-)
> >
> > diff --git a/qapi/net.json b/qapi/net.json
> > index 9af11e9a3b..75ba2cb989 100644
> > --- a/qapi/net.json
> > +++ b/qapi/net.json
> > @@ -445,12 +445,19 @@
> >  # @queues: number of queues to be created for multiqueue vhost-vdpa
> >  #  (default: 1)
> >  #
> > +# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1)
> > +# (default: false)
> > +#
> > +# Features:
> > +# @unstable: Member @x-svq is experimental.
> > +#
> >  # Since: 5.1
> >  ##
> >  { 'struct': 'NetdevVhostVDPAOptions',
> >'data': {
> >  '*vhostdev': 'str',
> > -'*queues':   'int' } }
> > +'*queues':   'int',
> > +'*x-svq':{'type': 'bool', 'features' : [ 'unstable'] } } }
> >
> >  ##
> >  # @NetdevVmnetHostOptions:
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index 7ccf9eaf4d..85148a5114 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -75,6 +75,28 @@ const int vdpa_feature_bits[] = {
> >  VHOST_INVALID_FEATURE_BIT
> >  };
> >
> > +/** Supported device specific feature bits with SVQ */
> > +static const uint64_t vdpa_svq_device_features =
> > +BIT_ULL(VIRTIO_NET_F_CSUM) |
> > +BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
> > +BIT_ULL(VIRTIO_NET_F_MTU) |
> > +BIT_ULL(VIRTIO_NET_F_MAC) |
> > +BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
> > +BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
> > +BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
> > +BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
> > +BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
> > +BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
> > +BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
> > +BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
> > +BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
> > +BIT_ULL(VIRTIO_NET_F_STATUS) |
> > +BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
> > +BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
> > +BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
> > +BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
> > +BIT_ULL(VIRTIO_NET_F_STANDBY);
>
> We need to have a plan for the full feature support like
>
> indirect, event_index, and packed.
>

Event idx is almost straightforward to develop. Packed has more code
but it's equally doable. In order it should not be hard too.

Indirect is a little bit more complicated because of the indirect
table. I guess we will need to either allocate a big buffer where we
can obtain indirect tables and cvq buffers, or allocate & map them
individually.

Note that we can half-support them. To enable them in the guest's
vring is as easy as to accept that feature in SVQ, and SVQ can easily
translate one format to another. I know the interesting part is the
shadow vring to speed the communication with the device, but it's
still a first step in that direction if needed.

> I can help in developing some of these if you wish.
>

We could plan for the next release cycle for sure.

Thanks!

> Thanks
>
> > +
> >  VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> >  {
> >  VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > @@ -133,9 +155,13 @@ err_init:
> >  static void vhost_vdpa_cleanup(NetClientState *nc)
> >  {
> >  VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > +struct vhost_dev *dev = >vhost_net->dev;
> >
> >  qemu_vfree(s->cvq_cmd_out_buffer);
> >  qemu_vfree(s->cvq_cmd_in_buffer);
> > +if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
> > +g_clear_pointer(>vhost_vdpa.iova_tree, vhost_iova_tree_delete);
> > +}
> >  if (s->vhost_net) {
> >  vhost_net_cleanup(s->vhost_net);
> >  g_free(s->vhost_net);
> > @@ -437,7 +463,9 @@ static NetClientState 
> > *net_vhost_vdpa_init(NetClientState *peer,
> > int vdpa_device_fd,
> > int queue_pair_index,
> > int nvqs,
> > -   bool is_datapath)
> > +   bool is_datapath,
> > +   bool svq,
> > +   VhostIOVATree *iova_tree)
> >  {
> >  NetClientState *nc = NULL;
> >  VhostVDPAState *s;
> > @@ -455,6 +483,8 @@ static NetClientState 
> > *net_vhost_vdpa_init(NetClientState *peer,
> >
> >  s->vhost_vdpa.device_fd = vdpa_device_fd;
> >  s->vhost_vdpa.index = queue_pair_index;
> > +s->vhost_vdpa.shadow_vqs_enabled = svq;
> > +s->vhost_vdpa.iova_tree = iova_tree;
> >  if (!is_datapath) {
> >  s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
> >

[PATCH 2/2] vhost: Move SVQ queue rewind to the destination

2022-07-15 Thread Eugenio Pérez

Migration with SVQ already migrate the inflight descriptors, so the
destination can perform the work.

This makes easier to migrate between backends or to recover them in
vhost devices that support set in flight descriptors.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-vdpa.c | 24 +++-
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 18820498b3..4458c8d23e 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -1178,7 +1178,18 @@ static int vhost_vdpa_set_vring_base(struct vhost_dev 
*dev,
struct vhost_vring_state *ring)
 {
 struct vhost_vdpa *v = dev->opaque;
+VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
 
+/*
+ * vhost-vdpa devices does not support in-flight requests. Set all of them
+ * as available.
+ *
+ * TODO: This is ok for networking, but other kinds of devices might
+ * have problems with these retransmissions.
+ */
+while (virtqueue_rewind(vq, 1)) {
+continue;
+}
 if (v->shadow_vqs_enabled) {
 /*
  * Device vring base was set at device start. SVQ base is handled by
@@ -1197,19 +1208,6 @@ static int vhost_vdpa_get_vring_base(struct vhost_dev 
*dev,
 int ret;
 
 if (v->shadow_vqs_enabled) {
-VirtQueue *vq = virtio_get_queue(dev->vdev, ring->index);
-
-/*
- * Setting base as last used idx, so destination will see as available
- * all the entries that the device did not use, including the in-flight
- * processing ones.
- *
- * TODO: This is ok for networking, but other kinds of devices might
- * have problems with these retransmissions.
- */
-while (virtqueue_rewind(vq, 1)) {
-continue;
-}
 ring->num = virtio_queue_get_last_avail_idx(dev->vdev, ring->index);
 return 0;
 }
-- 
2.31.1

[PATCH 0/2] vhost: Get vring base from vq, not svq

2022-07-15 Thread Eugenio Pérez

The SVQ vring used idx usually match with the guest visible one, as long
as all the guest buffers (GPA) maps to exactly one buffer within qemu's
VA. However, as we can see in virtqueue_map_desc, a single guest buffer
could map to many buffers in SVQ vring.

The solution is to stop using the device's used idx and check for the
last avail idx. Since we cannot report in-flight descriptors with vdpa,
let's rewind all of them.

Also, move this rewind to the destination, so we keep migrating the in-flight
ones in case the destnation backend support them (vhost-kernel, emulated virtio
in qemu, etc.)

Eugenio Pérez (2):
  vhost: Get vring base from vq, not svq
  vhost: Move SVQ queue rewind to the destination

 hw/virtio/vhost-vdpa.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

-- 
2.31.1

Re: [RFC PATCH 5/8] block: implement .change_aio_ctx in child_of_bds

2022-07-15 Thread Hanna Reitz


On 12.07.22 23:19, Emanuele Giuseppe Esposito wrote:

bdrv_child_cb_change_aio_ctx() is identical to
bdrv_child_cb_can_set_aio_ctx(), as we only need
to recursively go on the parent bs.

Note: bdrv_child_try_change_aio_context() is not called by
anyone at this point.

Signed-off-by: Emanuele Giuseppe Esposito 
---
  block.c | 9 +
  1 file changed, 9 insertions(+)


Reviewed-by: Hanna Reitz

[PATCH v2 2/3] Revert "main-loop: Disable block backend global state assertion on Cocoa"

2022-07-15 Thread Akihiko Odaki

This reverts commit 47281859f66bdab1974fb122cab2cbb4a1c9af7f.

Signed-off-by: Akihiko Odaki 
---
 include/qemu/main-loop.h | 13 -
 1 file changed, 13 deletions(-)

diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index 5518845299d..0aa36a4f17e 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -280,23 +280,10 @@ bool qemu_mutex_iothread_locked(void);
 bool qemu_in_main_thread(void);
 
 /* Mark and check that the function is part of the global state API. */
-#ifdef CONFIG_COCOA
-/*
- * When using the Cocoa UI, addRemovableDevicesMenuItems() is called from
- * a thread different from the QEMU main thread and can not take the BQL,
- * triggering this assertions in the block layer (commit 0439c5a462).
- * As the Cocoa fix is not trivial, disable this assertion for the v7.0.0
- * release (when using Cocoa); we will restore it immediately after the
- * release.
- * This issue is tracked as https://gitlab.com/qemu-project/qemu/-/issues/926
- */
-#define GLOBAL_STATE_CODE()
-#else
 #define GLOBAL_STATE_CODE() \
 do {\
 assert(qemu_in_main_thread());  \
 } while (0)
-#endif /* CONFIG_COCOA */
 
 /* Mark and check that the function is part of the I/O API. */
 #define IO_CODE()   \
-- 
2.32.1 (Apple Git-133)

Re: [PATCH v2 3/3] meson: Allow to enable gtk and sdl while cocoa is enabled

2022-07-15 Thread Peter Maydell

On Fri, 15 Jul 2022 at 12:40, Akihiko Odaki  wrote:
>
> As ui/cocoa does no longer override main(), ui/gtk and ui/sdl
> can be enabled even ui/cocoa is enabled.
>
> Signed-off-by: Akihiko Odaki 
> ---

Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [PATCH v2 2/3] Revert "main-loop: Disable block backend global state assertion on Cocoa"

2022-07-15 Thread Peter Maydell

On Fri, 15 Jul 2022 at 12:40, Akihiko Odaki  wrote:
>
> This reverts commit 47281859f66bdab1974fb122cab2cbb4a1c9af7f.
>
> Signed-off-by: Akihiko Odaki 
> ---
>  include/qemu/main-loop.h | 13 -
>  1 file changed, 13 deletions(-)

Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [PATCH v2 1/3] ui/cocoa: Run qemu_init in the main thread

2022-07-15 Thread Peter Maydell

On Fri, 15 Jul 2022 at 12:40, Akihiko Odaki  wrote:
>
> This work is based on:
> https://patchew.org/QEMU/20220317125534.38706-1-philippe.mathieu.da...@gmail.com/
>
> Simplify the initialization dance by running qemu_init() in the main
> thread before the Cocoa event loop starts. The secondary thread only
> runs only qemu_main_loop() and qemu_cleanup().
>
> This fixes a case where addRemovableDevicesMenuItems() calls
> qmp_query_block() while expecting the main thread to still hold
> the BQL.
>
> Overriding the code after calling qemu_init() is done by dynamically
> replacing a function pointer variable, qemu_main when initializing
> ui/cocoa, which unifies the static implementation of main() for
> builds with ui/cocoa and ones without ui/cocoa.
>
> Signed-off-by: Akihiko Odaki 

> @@ -585,7 +583,7 @@ - (void) updateUIInfo
>  /*
>   * Don't try to tell QEMU about UI information in the application
>   * startup phase -- we haven't yet registered dcl with the QEMU UI
> - * layer, and also trying to take the iothread lock would deadlock.
> + * layer.
>   * When cocoa_display_init() does register the dcl, the UI layer
>   * will call cocoa_switch(), which will call updateUIInfo, so
>   * we don't lose any information here.

This comment says that we can't use the dcl while allow_events is false...

> @@ -778,16 +776,6 @@ - (void) handleMonitorInput:(NSEvent *)event
>
>  - (bool) handleEvent:(NSEvent *)event
>  {
> -if(!allow_events) {
> -/*
> - * Just let OSX have all events that arrive before
> - * applicationDidFinishLaunching.
> - * This avoids a deadlock on the iothread lock, which 
> cocoa_display_init()
> - * will not drop until after the app_started_sem is posted. (In 
> theory
> - * there should not be any such events, but OSX Catalina now emits 
> some.)
> - */
> -return false;
> -}

...so don't we want to also retain this check of allow_events ?
Much of the code in handleEventLocked assumes the dcl has been registered.

>  return bool_with_iothread_lock(^{
>  return [self handleEventLocked:event];
>  });

> @@ -1915,92 +1898,35 @@ static void cocoa_clipboard_request(QemuClipboardInfo 
> *info,
>  /*
>   * The startup process for the OSX/Cocoa UI is complicated, because
>   * OSX insists that the UI runs on the initial main thread, and so we
> - * need to start a second thread which runs the vl.c qemu_main():
> - *
> - * Initial thread:2nd thread:
> - * in main():
> - *  create qemu-main thread
> - *  wait on display_init semaphore
> - *call qemu_main()
> - *...
> - *in cocoa_display_init():
> - * post the display_init semaphore
> - * wait on app_started semaphore
> - *  create application, menus, etc
> - *  enter OSX run loop
> - * in applicationDidFinishLaunching:
> - *  post app_started semaphore
> - * tell main thread to fullscreen if 
> needed
> - *[...]
> - *run qemu main-loop
> - *
> - * We do this in two stages so that we don't do the creation of the
> - * GUI application menus and so on for command line options like --help
> - * where we want to just print text to stdout and exit immediately.

Could we have an updated version of this diagram that explains the
new startup process, please ?

> + * need to start a second thread which runs the qemu_default_main().
>   */

Otherwise this looks good, and it's nice to get rid of that redefine-main
hack.

thanks
-- PMM

Re: [RFC PATCH 4/8] blockjob: implement .change_aio_ctx in child_job

2022-07-15 Thread Hanna Reitz


On 12.07.22 23:19, Emanuele Giuseppe Esposito wrote:

child_job_change_aio_ctx() is very similar to
child_job_can_set_aio_ctx(), but it implements a new transaction
so that if all check pass, the new transaction's .commit()
will take care of changin the BlockJob AioContext.
child_job_set_aio_ctx_commit() is similar to child_job_set_aio_ctx(),
but it doesn't need to invoke the recursion, as this is already
taken care by child_job_change_aio_ctx().

Note: bdrv_child_try_change_aio_context() is not called by
anyone at this point.

Signed-off-by: Emanuele Giuseppe Esposito 
---
  blockjob.c | 45 +
  1 file changed, 45 insertions(+)


Looks good, disregarding the fact that I’d like it very much if we could 
find some other primitive than tran_add_trail() to get these handlers to 
run on a drained graph.


But that’s taste (and something to talk about in patch 3), so I’ll just 
give a


Reviewed-by: Hanna Reitz

Re: [RFC PATCH 6/8] block-backend: implement .change_aio_ctx in child_root

2022-07-15 Thread Hanna Reitz


On 12.07.22 23:19, Emanuele Giuseppe Esposito wrote:

blk_root_change_aio_ctx() is very similar to blk_root_can_set_aio_ctx(),
but implements a new transaction so that if all check pass, the new
transaction's .commit will take care of changing the BlockBackend
AioContext. blk_root_set_aio_ctx_commit() is the same as
blk_root_set_aio_ctx().

Note: bdrv_child_try_change_aio_context() is not called by
anyone at this point.

Signed-off-by: Emanuele Giuseppe Esposito 
---
  block/block-backend.c | 54 +++
  1 file changed, 54 insertions(+)

diff --git a/block/block-backend.c b/block/block-backend.c
index f425b00793..674eaaa2bf 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c


[...]


@@ -2208,6 +2212,56 @@ int blk_set_aio_context(BlockBackend *blk, AioContext 
*new_context,


[...]


+static bool blk_root_change_aio_ctx(BdrvChild *child, AioContext *ctx,
+GSList **visited, Transaction *tran,
+Error **errp)
+{
+BlockBackend *blk = child->opaque;
+BdrvStateBlkRootContext *s;
+
+if (blk->allow_aio_context_change) {
+goto finish;
+}
+
+/*
+ * Only manually created BlockBackends that are not attached to anything
+ * can change their AioContext without updating their user.
+ */
+if (!blk->name || blk->dev) {
+/* TODO Add BB name/QOM path */
+error_setg(errp, "Cannot change iothread of active block backend");
+return false;
+}


Is the goto really necessary?  Or, rather, do you prefer this to 
something like


if (!blk->allow_aio_context_change) {
    /*
 * Manually created BlockBackends (those with a name) that are not
 * attached to anything can change their AioContext without updating
 * their user; return an error for others.
 */
    if (!blk->name || blk->dev) {
    ...
    }
}

If you prefer the goto, I’d at least rename the label to 
“change_context” or “allowed” or something.


Hanna


+
+finish:
+s = g_new(BdrvStateBlkRootContext, 1);
+*s = (BdrvStateBlkRootContext) {
+.new_ctx = ctx,
+.blk = blk,
+};
+
+tran_add_tail(tran, _blk_root_context, s);


(Again, not a huge fan of this.)


+return true;
+}
+
  static bool blk_root_can_set_aio_ctx(BdrvChild *child, AioContext *ctx,
   GSList **ignore, Error **errp)
  {

Re: [PATCH v2 10/15] qemu-common: introduce a common subproject

2022-07-15 Thread Marc-André Lureau

Hi

On Tue, Jul 12, 2022 at 6:58 PM Warner Losh  wrote:
>
>
>
> On Tue, Jul 12, 2022 at 3:36 AM  wrote:
>>
>> From: Marc-André Lureau 
>>
>> Add a new meson subproject to provide common code and scripts for QEMU
>> and tools. Initially, it will offer QAPI/QMP code generation and
>> common utilities.
>>
>> libvhost-user & libvduse will make use of the subproject to avoid having
>> include/ links to common headers.
>>
>> The other targeted user is qemu-ga, which will also be converted to a
>> subproject (so it can be built, moved, released etc independent from QEMU).
>>
>> Other projects such as qemu-storage-daemon could be built standalone
>> eventually in the future.
>>
>> Note that with meson subprojects are "global". Projects will share
>> subprojects 
>> (https://mesonbuild.com/Subprojects.html#subprojects-depending-on-other-subprojects).
>> We will add extra subprojects/ links to allow standalone subproject
>> compilation though.
>>
>> This initial commit simply set the stage to build and link against it.
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>  meson.build  | 9 -
>>  .../qemu-common/include}/qemu/help-texts.h   | 0
>>  linux-user/meson.build   | 4 ++--
>>  subprojects/libvduse/meson.build | 2 ++
>>  subprojects/libvduse/subprojects/qemu-common | 1 +
>>  subprojects/libvhost-user/meson.build| 2 ++
>>  subprojects/libvhost-user/subprojects/qemu-common| 1 +
>>  subprojects/qemu-common/meson.build  | 8 
>>  8 files changed, 24 insertions(+), 3 deletions(-)
>>  rename {include => subprojects/qemu-common/include}/qemu/help-texts.h (100%)
>>  create mode 12 subprojects/libvduse/subprojects/qemu-common
>>  create mode 12 subprojects/libvhost-user/subprojects/qemu-common
>>  create mode 100644 subprojects/qemu-common/meson.build
>>
>> diff --git a/meson.build b/meson.build
>> index bc5569ace159..254eb1263a66 100644
>> --- a/meson.build
>> +++ b/meson.build
>> @@ -167,6 +167,10 @@ if 'dtrace' in get_option('trace_backends')
>>endif
>>  endif
>>
>> +add_project_arguments('-I' + meson.current_source_dir() / 
>> 'subprojects/qemu-common/include',
>> +  language: ['c', 'cpp', 'objc'],
>> +)
>> +
>>  if get_option('iasl') == ''
>>iasl = find_program('iasl', required: false)
>>  else
>> @@ -1577,6 +1581,9 @@ if libbpf.found() and not cc.links('''
>>endif
>>  endif
>>
>> +qemu_common = subproject('qemu-common')
>> +qemu_common = qemu_common.get_variable('qemu_common_dep')
>> +
>>  #
>>  # config-host.h #
>>  #
>> @@ -3052,7 +3059,7 @@ util_ss.add_all(trace_ss)
>>  util_ss = util_ss.apply(config_all, strict: false)
>>  libqemuutil = static_library('qemuutil',
>>   sources: util_ss.sources() + stub_ss.sources() 
>> + genh,
>> - dependencies: [util_ss.dependencies(), libm, 
>> threads, glib, socket, malloc, pixman])
>> + dependencies: [util_ss.dependencies(), libm, 
>> threads, glib, socket, malloc, pixman, qemu_common])
>>  qemuutil = declare_dependency(link_with: libqemuutil,
>>sources: genh + version_res,
>>dependencies: [event_loop_base])
>> diff --git a/include/qemu/help-texts.h 
>> b/subprojects/qemu-common/include/qemu/help-texts.h
>> similarity index 100%
>> rename from include/qemu/help-texts.h
>> rename to subprojects/qemu-common/include/qemu/help-texts.h
>> diff --git a/linux-user/meson.build b/linux-user/meson.build
>> index de4320af053c..fc6cdb55d657 100644
>> --- a/linux-user/meson.build
>> +++ b/linux-user/meson.build
>> @@ -7,7 +7,7 @@ linux_user_ss = ss.source_set()
>>  common_user_inc += include_directories('include/host/' / host_arch)
>>  common_user_inc += include_directories('include')
>>
>> -linux_user_ss.add(files(
>> +linux_user_ss.add([files(
>>'elfload.c',
>>'exit.c',
>>'fd-trans.c',
>> @@ -20,7 +20,7 @@ linux_user_ss.add(files(
>>'thunk.c',
>>'uaccess.c',
>>'uname.c',
>> -))
>> +), qemu_common])
>
>
> Question: Why does linux-user need these, but bsd-user does not?
>

Indeed, it's not needed anymore, thanks!

Re: [PATCH v7 09/10] i386/pc: relocate 4g start to 1T where applicable

2022-07-15 Thread Igor Mammedov

On Thu, 14 Jul 2022 19:28:19 +0100
Joao Martins  wrote:

> It is assumed that the whole GPA space is available to be DMA
> addressable, within a given address space limit, except for a
> tiny region before the 4G. Since Linux v5.4, VFIO validates
> whether the selected GPA is indeed valid i.e. not reserved by
> IOMMU on behalf of some specific devices or platform-defined
> restrictions, and thus failing the ioctl(VFIO_DMA_MAP) with
>  -EINVAL.
> 
> AMD systems with an IOMMU are examples of such platforms and
> particularly may only have these ranges as allowed:
> 
>    - fedf (0  .. 3.982G)
>   fef0 - 00fc (3.983G .. 1011.9G)
>   0100 -  (1Tb.. 16Pb[*])
> 
> We already account for the 4G hole, albeit if the guest is big
> enough we will fail to allocate a guest with  >1010G due to the
> ~12G hole at the 1Tb boundary, reserved for HyperTransport (HT).
> 
> [*] there is another reserved region unrelated to HT that exists
> in the 256T boundary in Fam 17h according to Errata #1286,
> documeted also in "Open-Source Register Reference for AMD Family
> 17h Processors (PUB)"
> 
> When creating the region above 4G, take into account that on AMD
> platforms the HyperTransport range is reserved and hence it
> cannot be used either as GPAs. On those cases rather than
> establishing the start of ram-above-4g to be 4G, relocate instead
> to 1Tb. See AMD IOMMU spec, section 2.1.2 "IOMMU Logical
> Topology", for more information on the underlying restriction of
> IOVAs.
> 
> After accounting for the 1Tb hole on AMD hosts, mtree should
> look like:
> 
> -7fff (prio 0, i/o):
>alias ram-below-4g @pc.ram -7fff
> 0100-01ff7fff (prio 0, i/o):
>   alias ram-above-4g @pc.ram 8000-00ff
> 
> If the relocation is done or the address space covers it, we
> also add the the reserved HT e820 range as reserved.
> 
> Default phys-bits on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough
> to address 1Tb (0xff  ). On AMD platforms, if a
> ram-above-4g relocation may be desired and the CPU wasn't configured
> with a big enough phys-bits, print an error message to the user
> and do not make the relocation of the above-4g-region if phys-bits
> is too low.
> 
> Suggested-by: Igor Mammedov 
> Signed-off-by: Joao Martins 
> ---
>  hw/i386/pc.c | 82 
>  1 file changed, 82 insertions(+)
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index cda435e3baeb..17613974163e 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -880,6 +880,52 @@ static uint64_t pc_get_cxl_range_end(PCMachineState 
> *pcms)
>  return start;
>  }
>  
> +static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
> +{
> +X86CPU *cpu = X86_CPU(first_cpu);
> +
> +/* 32-bit systems don't have hole64 thus return max CPU address */
> +if (cpu->phys_bits <= 32) {
> +return ((hwaddr)1 << cpu->phys_bits) - 1;
> +}
> +
> +return pc_pci_hole64_start() + pci_hole64_size - 1;
> +}
> +
[...]

> +
> +/*
> + * Relocating ram-above-4G requires more than TCG_PHYS_ADDR_BITS (40).
> + * So make sure phys-bits is required to be appropriately sized in order
> + * to proceed with the above-4g-region relocation and thus boot.

drop mention of relocation here as it's orthogonal to the check.
Important thing we are checking here is that max used GPA is
reachable by configured vCPU (physbits).

> + */
> +maxusedaddr = pc_max_used_gpa(pcms, pci_hole64_size);
> +maxphysaddr = ((hwaddr)1 << cpu->phys_bits) - 1;
> +if (maxphysaddr < maxusedaddr) {
> +error_report("Address space limit 0x%"PRIx64" < 0x%"PRIx64
> + " phys-bits too low (%u)",
> + maxphysaddr, maxusedaddr, cpu->phys_bits);
> +exit(EXIT_FAILURE);
> +}

these hunks should be a separate patch preceding relocation patch
as it basically does max_gpa vs physbits check regardless
of relocation (i.e. relocation is only one of the reasons
max_used_gpa might exceed physbits).

> +
>  /*
>   * Split single memory region and use aliases to address portions of it,
>   * done for backwards compatibility with older qemus.

Re: [RFC PATCH v3 0/3] Implement Power ISA 3.1B hash insns

2022-07-15 Thread Daniel Henrique Barboza





On 7/13/22 13:54, Víctor Colombo wrote:

This patch series implements the 4 instructions added in Power ISA
3.1B:

- hashchk
- hashst
- hashchkp
- hashstp

To build it, you need to apply the following patches on top of master:
<20220701133507.740619-2-lucas.couti...@eldorado.org.br>
<20220701133507.740619-3-lucas.couti...@eldorado.org.br>
<20220712193741.59134-2-leandro.lup...@eldorado.org.br>
<20220712193741.59134-3-leandro.lup...@eldorado.org.br>

Working branch for ease of use can be found here:
https://github.com/PPC64/qemu/tree/vccolombo-hash-to-send-v3

What do you think about the choice to implement the hash algorithm
from the ground up, following the SIMON-like algorithm presented in
Power ISA? IIUC, this algorithm is not the same as the original[1].
Other options would be to use other algorithm already implemented
in QEMU, or even make this instruction a nop for all Power versions.

Also, I was thinking about using the call to spr_register_kvm() in
init_proc_POWER10 to initialize the registers with a random value.
I'm not sure what is the behavior here, I would expect that is the job
of the OS to set the regs, but looks like KVM is not exporting them,
so they are always 0 (?). Does anyone have any insight on this?


This happens because KVM on POWER10 isn't handling these registers
appropriately. We are probably missing kernel/kvm code to do so.

Since KVM on POWER10 is on an uncertain spot at this moment I wouldn't
worry too much about it. Making the regs read/write work in TCG is good
enough for now.


Daniel



v1->v2:
- Split the patch in 2
- Rebase to master

v2->v3:
- Split patches in 3
 - the new patch (patch 1) is separating the kvm header
   changes [Cornelia]

[1] https://eprint.iacr.org/2013/404.pdf

Víctor Colombo (3):
   linux-headers/asm-powerpc/kvm.h: Add HASHKEYR and HASHPKEYR in headers
   target/ppc: Implement hashst and hashchk
   target/ppc: Implement hashstp and hashchkp

  linux-headers/asm-powerpc/kvm.h|  3 +
  target/ppc/cpu.h   |  2 +
  target/ppc/cpu_init.c  |  7 ++
  target/ppc/excp_helper.c   | 82 ++
  target/ppc/helper.h|  4 ++
  target/ppc/insn32.decode   | 10 +++
  target/ppc/translate.c |  5 ++
  target/ppc/translate/fixedpoint-impl.c.inc | 34 +
  8 files changed, 147 insertions(+)

Re: [PATCH v2 1/3] ui/cocoa: Run qemu_init in the main thread

2022-07-15 Thread Peter Maydell

On Fri, 15 Jul 2022 at 14:19, Akihiko Odaki  wrote:
>
> On 2022/07/15 22:10, Peter Maydell wrote:
> > On Fri, 15 Jul 2022 at 12:40, Akihiko Odaki  wrote:
> >>
> >> This work is based on:
> >> https://patchew.org/QEMU/20220317125534.38706-1-philippe.mathieu.da...@gmail.com/
> >>
> >> Simplify the initialization dance by running qemu_init() in the main
> >> thread before the Cocoa event loop starts. The secondary thread only
> >> runs only qemu_main_loop() and qemu_cleanup().
> >>
> >> This fixes a case where addRemovableDevicesMenuItems() calls
> >> qmp_query_block() while expecting the main thread to still hold
> >> the BQL.
> >>
> >> Overriding the code after calling qemu_init() is done by dynamically
> >> replacing a function pointer variable, qemu_main when initializing
> >> ui/cocoa, which unifies the static implementation of main() for
> >> builds with ui/cocoa and ones without ui/cocoa.
> >>
> >> Signed-off-by: Akihiko Odaki 
> >
> >> @@ -585,7 +583,7 @@ - (void) updateUIInfo
> >>   /*
> >>* Don't try to tell QEMU about UI information in the application
> >>* startup phase -- we haven't yet registered dcl with the QEMU 
> >> UI
> >> - * layer, and also trying to take the iothread lock would 
> >> deadlock.
> >> + * layer.
> >>* When cocoa_display_init() does register the dcl, the UI layer
> >>* will call cocoa_switch(), which will call updateUIInfo, so
> >>* we don't lose any information here.
> >
> > This comment says that we can't use the dcl while allow_events is false...
> >
> >> @@ -778,16 +776,6 @@ - (void) handleMonitorInput:(NSEvent *)event
> >>
> >>   - (bool) handleEvent:(NSEvent *)event
> >>   {
> >> -if(!allow_events) {
> >> -/*
> >> - * Just let OSX have all events that arrive before
> >> - * applicationDidFinishLaunching.
> >> - * This avoids a deadlock on the iothread lock, which 
> >> cocoa_display_init()
> >> - * will not drop until after the app_started_sem is posted. (In 
> >> theory
> >> - * there should not be any such events, but OSX Catalina now 
> >> emits some.)
> >> - */
> >> -return false;
> >> -}
> >
> > ...so don't we want to also retain this check of allow_events ?
> > Much of the code in handleEventLocked assumes the dcl has been registered.
> >
> >>   return bool_with_iothread_lock(^{
> >>   return [self handleEventLocked:event];
> >>   });
> >
> >> @@ -1915,92 +1898,35 @@ static void 
> >> cocoa_clipboard_request(QemuClipboardInfo *info,
> >>   /*
> >>* The startup process for the OSX/Cocoa UI is complicated, because
> >>* OSX insists that the UI runs on the initial main thread, and so we
> >> - * need to start a second thread which runs the vl.c qemu_main():
> >> - *
> >> - * Initial thread:2nd thread:
> >> - * in main():
> >> - *  create qemu-main thread
> >> - *  wait on display_init semaphore
> >> - *call qemu_main()
> >> - *...
> >> - *in cocoa_display_init():
> >> - * post the display_init semaphore
> >> - * wait on app_started semaphore
> >> - *  create application, menus, etc
> >> - *  enter OSX run loop
> >> - * in applicationDidFinishLaunching:
> >> - *  post app_started semaphore
> >> - * tell main thread to fullscreen if 
> >> needed
> >> - *[...]
> >> - *run qemu main-loop
> >> - *
> >> - * We do this in two stages so that we don't do the creation of the
> >> - * GUI application menus and so on for command line options like --help
> >> - * where we want to just print text to stdout and exit immediately.
> >
> > Could we have an updated version of this diagram that explains the
> > new startup process, please ?
>
> I don't think the diagram is appropriate anymore. It was necessary to
> describe the synchronization between the initial thread and the second
> thread, but they do no longer synchronize at all.

But there are still two threads, and the sequence of events is
not exactly obvious given that things happen in several different
functions. A summary of the expected sequence of events during
startup is still useful to have, I think.

thanks
-- PMM

[PATCH v2 1/3] ui/cocoa: Run qemu_init in the main thread

2022-07-15 Thread Akihiko Odaki

This work is based on:
https://patchew.org/QEMU/20220317125534.38706-1-philippe.mathieu.da...@gmail.com/

Simplify the initialization dance by running qemu_init() in the main
thread before the Cocoa event loop starts. The secondary thread only
runs only qemu_main_loop() and qemu_cleanup().

This fixes a case where addRemovableDevicesMenuItems() calls
qmp_query_block() while expecting the main thread to still hold
the BQL.

Overriding the code after calling qemu_init() is done by dynamically
replacing a function pointer variable, qemu_main when initializing
ui/cocoa, which unifies the static implementation of main() for
builds with ui/cocoa and ones without ui/cocoa.

Signed-off-by: Akihiko Odaki 
---
 docs/devel/fuzzing.rst  |   4 +-
 include/qemu-main.h |   3 +-
 include/sysemu/sysemu.h |   2 +-
 softmmu/main.c  |  14 ++--
 softmmu/vl.c|   2 +-
 tests/qtest/fuzz/fuzz.c |   2 +-
 ui/cocoa.m  | 167 ++--
 7 files changed, 70 insertions(+), 124 deletions(-)

diff --git a/docs/devel/fuzzing.rst b/docs/devel/fuzzing.rst
index 784ecb99e66..715330c8561 100644
--- a/docs/devel/fuzzing.rst
+++ b/docs/devel/fuzzing.rst
@@ -287,8 +287,8 @@ select the fuzz target. Then, the qtest client is 
initialized. If the target
 requires qos, qgraph is set up and the QOM/LIBQOS modules are initialized.
 Then the QGraph is walked and the QEMU cmd_line is determined and saved.
 
-After this, the ``vl.c:qemu_main`` is called to set up the guest. There are
-target-specific hooks that can be called before and after qemu_main, for
+After this, the ``vl.c:main`` is called to set up the guest. There are
+target-specific hooks that can be called before and after main, for
 additional setup(e.g. PCI setup, or VM snapshotting).
 
 ``LLVMFuzzerTestOneInput``: Uses qtest/qos functions to act based on the fuzz
diff --git a/include/qemu-main.h b/include/qemu-main.h
index 6a3e90d0ad5..6889375e7c2 100644
--- a/include/qemu-main.h
+++ b/include/qemu-main.h
@@ -5,6 +5,7 @@
 #ifndef QEMU_MAIN_H
 #define QEMU_MAIN_H
 
-int qemu_main(int argc, char **argv, char **envp);
+void qemu_default_main(void);
+extern void (*qemu_main)(void);
 
 #endif /* QEMU_MAIN_H */
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 812f66a31a9..254c1eabf57 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -102,7 +102,7 @@ void qemu_boot_set(const char *boot_order, Error **errp);
 
 bool defaults_enabled(void);
 
-void qemu_init(int argc, char **argv, char **envp);
+void qemu_init(int argc, char **argv);
 void qemu_main_loop(void);
 void qemu_cleanup(void);
 
diff --git a/softmmu/main.c b/softmmu/main.c
index c00432ff098..41a091f2c72 100644
--- a/softmmu/main.c
+++ b/softmmu/main.c
@@ -30,18 +30,18 @@
 #include 
 #endif
 
-int qemu_main(int argc, char **argv, char **envp)
+void qemu_default_main(void)
 {
-qemu_init(argc, argv, envp);
 qemu_main_loop();
 qemu_cleanup();
-
-return 0;
 }
 
-#ifndef CONFIG_COCOA
+void (*qemu_main)(void) = qemu_default_main;
+
 int main(int argc, char **argv)
 {
-return qemu_main(argc, argv, NULL);
+qemu_init(argc, argv);
+qemu_main();
+
+return 0;
 }
-#endif
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 3f264d4b093..e8c73d0bb40 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2589,7 +2589,7 @@ void qmp_x_exit_preconfig(Error **errp)
 }
 }
 
-void qemu_init(int argc, char **argv, char **envp)
+void qemu_init(int argc, char **argv)
 {
 QemuOpts *opts;
 QemuOpts *icount_opts = NULL, *accel_opts = NULL;
diff --git a/tests/qtest/fuzz/fuzz.c b/tests/qtest/fuzz/fuzz.c
index 0ad4ba9e94d..678c312923a 100644
--- a/tests/qtest/fuzz/fuzz.c
+++ b/tests/qtest/fuzz/fuzz.c
@@ -236,7 +236,7 @@ int LLVMFuzzerInitialize(int *argc, char ***argv, char 
***envp)
 g_free(pretty_cmd_line);
 }
 
-qemu_init(result.we_wordc, result.we_wordv, NULL);
+qemu_init(result.we_wordc, result.we_wordv);
 
 /* re-enable the rcu atfork, which was previously disabled in qemu_init */
 rcu_enable_atfork();
diff --git a/ui/cocoa.m b/ui/cocoa.m
index 6a4dccff7f0..9bf56232691 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -100,13 +100,11 @@ static void cocoa_switch(DisplayChangeListener *dcl,
 static int left_command_key_enabled = 1;
 static bool swap_opt_cmd;
 
-static int gArgc;
-static char **gArgv;
+static QemuThread qemu_main_thread;
+static bool qemu_main_terminating;
 static bool stretch_video;
 static NSTextField *pauseLabel;
 
-static QemuSemaphore display_init_sem;
-static QemuSemaphore app_started_sem;
 static bool allow_events;
 
 static NSInteger cbchangecount = -1;
@@ -585,7 +583,7 @@ - (void) updateUIInfo
 /*
  * Don't try to tell QEMU about UI information in the application
  * startup phase -- we haven't yet registered dcl with the QEMU UI
- * layer, and also trying to take the iothread lock would deadlock.
+ * layer.
  * When

[PATCH v2 0/3] ui/cocoa: Run qemu_init in the main thread

2022-07-15 Thread Akihiko Odaki

This work is based on:
https://patchew.org/QEMU/20220317125534.38706-1-philippe.mathieu.da...@gmail.com/

Simplify the initialization dance by running qemu_init() in the main
thread before the Cocoa event loop starts. The secondary thread only
runs only qemu_main_loop() and qemu_cleanup().

This fixes a case where addRemovableDevicesMenuItems() calls
qmp_query_block() while expecting the main thread to still hold
the BQL.

Overriding the code after calling qemu_init() is done by dynamically
replacing a function pointer variable, qemu_main when initializing
ui/cocoa, which unifies the static implementation of main() for
builds with ui/cocoa and ones without ui/cocoa.

v2: Restore allow_events flag to fix the crash reported by
Philippe Mathieu-Daudé.

Akihiko Odaki (3):
  ui/cocoa: Run qemu_init in the main thread
  Revert "main-loop: Disable block backend global state assertion on
Cocoa"
  meson: Allow to enable gtk and sdl while cocoa is enabled

 docs/devel/fuzzing.rst   |   4 +-
 include/qemu-main.h  |   3 +-
 include/qemu/main-loop.h |  13 ---
 include/sysemu/sysemu.h  |   2 +-
 meson.build  |  10 +--
 softmmu/main.c   |  14 ++--
 softmmu/vl.c |   2 +-
 tests/qtest/fuzz/fuzz.c  |   2 +-
 ui/cocoa.m   | 167 +--
 9 files changed, 72 insertions(+), 145 deletions(-)

-- 
2.32.1 (Apple Git-133)

[PATCH v2 3/3] meson: Allow to enable gtk and sdl while cocoa is enabled

2022-07-15 Thread Akihiko Odaki

As ui/cocoa does no longer override main(), ui/gtk and ui/sdl
can be enabled even ui/cocoa is enabled.

Signed-off-by: Akihiko Odaki 
---
 meson.build | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/meson.build b/meson.build
index bc5569ace15..7baec7896ef 100644
--- a/meson.build
+++ b/meson.build
@@ -583,12 +583,6 @@ if get_option('attr').allowed()
 endif
 
 cocoa = dependency('appleframeworks', modules: 'Cocoa', required: 
get_option('cocoa'))
-if cocoa.found() and get_option('sdl').enabled()
-  error('Cocoa and SDL cannot be enabled at the same time')
-endif
-if cocoa.found() and get_option('gtk').enabled()
-  error('Cocoa and GTK+ cannot be enabled at the same time')
-endif
 
 vmnet = dependency('appleframeworks', modules: 'vmnet', required: 
get_option('vmnet'))
 if vmnet.found() and not cc.has_header_symbol('vmnet/vmnet.h',
@@ -915,7 +909,7 @@ if not get_option('brlapi').auto() or have_system
 endif
 
 sdl = not_found
-if not get_option('sdl').auto() or (have_system and not cocoa.found())
+if not get_option('sdl').auto() or have_system
   sdl = dependency('sdl2', required: get_option('sdl'), kwargs: static_kwargs)
   sdl_image = not_found
 endif
@@ -1181,7 +1175,7 @@ endif
 gtk = not_found
 gtkx11 = not_found
 vte = not_found
-if not get_option('gtk').auto() or (have_system and not cocoa.found())
+if not get_option('gtk').auto() or have_system
   gtk = dependency('gtk+-3.0', version: '>=3.22.0',
method: 'pkg-config',
required: get_option('gtk'),
-- 
2.32.1 (Apple Git-133)

Re: [PATCH v7 07/14] KVM: Use gfn instead of hva for mmu_notifier_retry

2022-07-15 Thread Gupta, Pankaj


Currently in mmu_notifier validate path, hva range is recorded and then
checked in the mmu_notifier_retry_hva() from page fault path. However
for the to be introduced private memory, a page fault may not have a hva


As this patch appeared in v7, just wondering did you see an actual bug 
because of it? And not having corresponding 'hva' occurs only with 
private memory because its not mapped to host userspace?


Thanks,
Pankaj


associated, checking gfn(gpa) makes more sense. For existing non private
memory case, gfn is expected to continue to work.

The patch also fixes a potential bug in kvm_zap_gfn_range() which has
already been using gfn when calling kvm_inc/dec_notifier_count() in
current code.

Signed-off-by: Chao Peng 
---
  arch/x86/kvm/mmu/mmu.c   |  2 +-
  include/linux/kvm_host.h | 18 --
  virt/kvm/kvm_main.c  |  6 +++---
  3 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f7fa4c31b7c5..0d882fad4bc1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4182,7 +4182,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
return true;
  
  	return fault->slot &&

-  mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
+  mmu_notifier_retry_gfn(vcpu->kvm, mmu_seq, fault->gfn);
  }
  
  static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0bdb6044e316..e9153b54e2a4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -767,8 +767,8 @@ struct kvm {
struct mmu_notifier mmu_notifier;
unsigned long mmu_notifier_seq;
long mmu_notifier_count;
-   unsigned long mmu_notifier_range_start;
-   unsigned long mmu_notifier_range_end;
+   gfn_t mmu_notifier_range_start;
+   gfn_t mmu_notifier_range_end;
  #endif
struct list_head devices;
u64 manual_dirty_log_protect;
@@ -1362,10 +1362,8 @@ void kvm_mmu_free_memory_cache(struct 
kvm_mmu_memory_cache *mc);
  void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
  #endif
  
-void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,

-  unsigned long end);
-void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start,
-  unsigned long end);
+void kvm_inc_notifier_count(struct kvm *kvm, gfn_t start, gfn_t end);
+void kvm_dec_notifier_count(struct kvm *kvm, gfn_t start, gfn_t end);
  
  long kvm_arch_dev_ioctl(struct file *filp,

unsigned int ioctl, unsigned long arg);
@@ -1923,9 +1921,9 @@ static inline int mmu_notifier_retry(struct kvm *kvm, 
unsigned long mmu_seq)
return 0;
  }
  
-static inline int mmu_notifier_retry_hva(struct kvm *kvm,

+static inline int mmu_notifier_retry_gfn(struct kvm *kvm,
 unsigned long mmu_seq,
-unsigned long hva)
+gfn_t gfn)
  {
lockdep_assert_held(>mmu_lock);
/*
@@ -1935,8 +1933,8 @@ static inline int mmu_notifier_retry_hva(struct kvm *kvm,
 * positives, due to shortcuts when handing concurrent invalidations.
 */
if (unlikely(kvm->mmu_notifier_count) &&
-   hva >= kvm->mmu_notifier_range_start &&
-   hva < kvm->mmu_notifier_range_end)
+   gfn >= kvm->mmu_notifier_range_start &&
+   gfn < kvm->mmu_notifier_range_end)
return 1;
if (kvm->mmu_notifier_seq != mmu_seq)
return 1;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index da263c370d00..4d7f0e72366f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -536,8 +536,7 @@ static void kvm_mmu_notifier_invalidate_range(struct 
mmu_notifier *mn,
  
  typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
  
-typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,

-unsigned long end);
+typedef void (*on_lock_fn_t)(struct kvm *kvm, gfn_t start, gfn_t end);
  
  typedef void (*on_unlock_fn_t)(struct kvm *kvm);
  
@@ -624,7 +623,8 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,

locked = true;
KVM_MMU_LOCK(kvm);
if (!IS_KVM_NULL_FN(range->on_lock))
-   range->on_lock(kvm, range->start, 
range->end);
+   range->on_lock(kvm, gfn_range.start,
+   gfn_range.end);
if (IS_KVM_NULL_FN(range->handler))
break;
}

Re: [PATCH 0/2] gitlab-ci: msys2 improvements

2022-07-15 Thread Marc-André Lureau

Hi Richard

On Tue, Jul 12, 2022 at 3:38 PM Marc-André Lureau <
marcandre.lur...@redhat.com> wrote:

> Hi
>
> On Tue, Jul 12, 2022 at 10:10 AM Richard Henderson
>  wrote:
> >
> > On 7/12/22 09:24, Richard Henderson wrote:
> > > On 7/11/22 13:26, marcandre.lur...@redhat.com wrote:
> > >> From: Marc-André Lureau 
> > >>
> > >> Hi
> > >>
> > >> This is a small series to attempt to debug "Intermittent meson
> failures on
> > >> msys2" and improve a bit msys2/gitlab reports.
> > >
> > > Thanks.  I've pushed this to staging for a test run:
> > >
> > > https://gitlab.com/qemu-project/qemu/-/pipelines/585473909
> >
> > Amusingly, both msys2 jobs passed the first time, but I reran and now
> have a failure for
> > your investigation:
> >
> > https://gitlab.com/qemu-project/qemu/-/jobs/2707801937
> >
>
> Hmm, there are no artifacts. How come? meson-logs/ should be there..
> Anyway, I am not sure it would have more details about the failing
> command.
>
> Sadly we don't have any stderr output, and ninja doesn't log the
> failing command exit code either. I guess I will try with a custom
> ninja build now.
>

I think I have triggered 10x times the build now, but I am not reaching the
build error, only random build time over 1h10 limit...

No idea.. Maybe you can try it?
https://gitlab.com/marcandre.lureau/qemu/-/tree/msys2

I have simply patched ninja with
https://github.com/msys2/MINGW-packages/compare/master...elmarco:MINGW-packages:master


-- 
Marc-André Lureau

Re: [PATCH v7 09/10] i386/pc: relocate 4g start to 1T where applicable

2022-07-15 Thread Joao Martins

On 7/15/22 12:57, Igor Mammedov wrote:
> On Thu, 14 Jul 2022 19:28:19 +0100
> Joao Martins  wrote:
> 
>> It is assumed that the whole GPA space is available to be DMA
>> addressable, within a given address space limit, except for a
>> tiny region before the 4G. Since Linux v5.4, VFIO validates
>> whether the selected GPA is indeed valid i.e. not reserved by
>> IOMMU on behalf of some specific devices or platform-defined
>> restrictions, and thus failing the ioctl(VFIO_DMA_MAP) with
>>  -EINVAL.
>>
>> AMD systems with an IOMMU are examples of such platforms and
>> particularly may only have these ranges as allowed:
>>
>>   - fedf (0  .. 3.982G)
>>  fef0 - 00fc (3.983G .. 1011.9G)
>>  0100 -  (1Tb.. 16Pb[*])
>>
>> We already account for the 4G hole, albeit if the guest is big
>> enough we will fail to allocate a guest with  >1010G due to the
>> ~12G hole at the 1Tb boundary, reserved for HyperTransport (HT).
>>
>> [*] there is another reserved region unrelated to HT that exists
>> in the 256T boundary in Fam 17h according to Errata #1286,
>> documeted also in "Open-Source Register Reference for AMD Family
>> 17h Processors (PUB)"
>>
>> When creating the region above 4G, take into account that on AMD
>> platforms the HyperTransport range is reserved and hence it
>> cannot be used either as GPAs. On those cases rather than
>> establishing the start of ram-above-4g to be 4G, relocate instead
>> to 1Tb. See AMD IOMMU spec, section 2.1.2 "IOMMU Logical
>> Topology", for more information on the underlying restriction of
>> IOVAs.
>>
>> After accounting for the 1Tb hole on AMD hosts, mtree should
>> look like:
>>
>> -7fff (prio 0, i/o):
>>   alias ram-below-4g @pc.ram -7fff
>> 0100-01ff7fff (prio 0, i/o):
>>  alias ram-above-4g @pc.ram 8000-00ff
>>
>> If the relocation is done or the address space covers it, we
>> also add the the reserved HT e820 range as reserved.
>>
>> Default phys-bits on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough
>> to address 1Tb (0xff  ). On AMD platforms, if a
>> ram-above-4g relocation may be desired and the CPU wasn't configured
>> with a big enough phys-bits, print an error message to the user
>> and do not make the relocation of the above-4g-region if phys-bits
>> is too low.
>>
>> Suggested-by: Igor Mammedov 
>> Signed-off-by: Joao Martins 
>> ---
>>  hw/i386/pc.c | 82 
>>  1 file changed, 82 insertions(+)
>>
>> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>> index cda435e3baeb..17613974163e 100644
>> --- a/hw/i386/pc.c
>> +++ b/hw/i386/pc.c
>> @@ -880,6 +880,52 @@ static uint64_t pc_get_cxl_range_end(PCMachineState 
>> *pcms)
>>  return start;
>>  }
>>  
>> +static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t 
>> pci_hole64_size)
>> +{
>> +X86CPU *cpu = X86_CPU(first_cpu);
>> +
>> +/* 32-bit systems don't have hole64 thus return max CPU address */
>> +if (cpu->phys_bits <= 32) {
>> +return ((hwaddr)1 << cpu->phys_bits) - 1;
>> +}
>> +
>> +return pc_pci_hole64_start() + pci_hole64_size - 1;
>> +}
>> +
> [...]
> 
>> +
>> +/*
>> + * Relocating ram-above-4G requires more than TCG_PHYS_ADDR_BITS (40).
>> + * So make sure phys-bits is required to be appropriately sized in order
>> + * to proceed with the above-4g-region relocation and thus boot.
> 
> drop mention of relocation here as it's orthogonal to the check.
> Important thing we are checking here is that max used GPA is
> reachable by configured vCPU (physbits).
> 

OK

>> + */
>> +maxusedaddr = pc_max_used_gpa(pcms, pci_hole64_size);
>> +maxphysaddr = ((hwaddr)1 << cpu->phys_bits) - 1;
>> +if (maxphysaddr < maxusedaddr) {
>> +error_report("Address space limit 0x%"PRIx64" < 0x%"PRIx64
>> + " phys-bits too low (%u)",
>> + maxphysaddr, maxusedaddr, cpu->phys_bits);
>> +exit(EXIT_FAILURE);
>> +}
> 
> these hunks should be a separate patch preceding relocation patch
> as it basically does max_gpa vs physbits check regardless
> of relocation (i.e. relocation is only one of the reasons
> max_used_gpa might exceed physbits).
> 
Yeap, makes sense given that this is now generic regardless of AMD 1Tb hole.

>> +
>>  /*
>>   * Split single memory region and use aliases to address portions of it,
>>   * done for backwards compatibility with older qemus.
>

[PATCH] target/arm: Don't set syndrome ISS for loads and stores with writeback

2022-07-15 Thread Peter Maydell

The architecture requires that for faults on loads and stores which
do writeback, the syndrome information does not have the ISS
instruction syndrome information (i.e. ISV is 0).  We got this wrong
for the load and store instructions covered by disas_ldst_reg_imm9().
Calculate iss_valid correctly so that if the insn is a writeback one
it is false.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1057
Signed-off-by: Peter Maydell 
---
Tested with RTH's test case attached to the bug report.
---
 target/arm/translate-a64.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index b7b64f73584..163df8c6157 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -3138,7 +3138,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t 
insn,
 bool is_store = false;
 bool is_extended = false;
 bool is_unpriv = (idx == 2);
-bool iss_valid = !is_vector;
+bool iss_valid;
 bool post_index;
 bool writeback;
 int memidx;
@@ -3191,6 +3191,8 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t 
insn,
 g_assert_not_reached();
 }
 
+iss_valid = !is_vector && !writeback;
+
 if (rn == 31) {
 gen_check_sp_alignment(s);
 }
-- 
2.25.1

Re: [PATCH v3 3/3] target/ppc: Check page dir/table base alignment

2022-07-15 Thread Daniel Henrique Barboza





On 6/28/22 10:39, Leandro Lupori wrote:

According to PowerISA 3.1B, Book III 6.7.6 programming note, the
page directory base addresses are expected to be aligned to their
size. Real hardware seems to rely on that and will access the
wrong address if they are misaligned. This results in a
translation failure even if the page tables seem to be properly
populated.

Signed-off-by: Leandro Lupori 
---


Reviewed-by: Daniel Henrique Barboza 


  target/ppc/mmu-radix64.c | 28 
  1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 705bff76be..00f2e9fa2e 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -265,7 +265,7 @@ static int ppc_radix64_next_level(AddressSpace *as, vaddr 
eaddr,
uint64_t *pte_addr, uint64_t *nls,
int *psize, uint64_t *pte, int *fault_cause)
  {
-uint64_t index, pde;
+uint64_t index, mask, nlb, pde;
  
  /* Read page  entry from guest address space */

  pde = ldq_phys(as, *pte_addr);
@@ -280,7 +280,17 @@ static int ppc_radix64_next_level(AddressSpace *as, vaddr 
eaddr,
  *nls = pde & R_PDE_NLS;
  index = eaddr >> (*psize - *nls);   /* Shift */
  index &= ((1UL << *nls) - 1);   /* Mask */
-*pte_addr = (pde & R_PDE_NLB) + (index * sizeof(pde));
+nlb = pde & R_PDE_NLB;
+mask = MAKE_64BIT_MASK(0, *nls + 3);
+
+if (nlb & mask) {
+qemu_log_mask(LOG_GUEST_ERROR,
+"%s: misaligned page dir/table base: 0x"TARGET_FMT_lx
+" page dir size: 0x"TARGET_FMT_lx"\n",
+__func__, nlb, mask + 1);
+nlb &= ~mask;
+}
+*pte_addr = nlb + index * sizeof(pde);
  }
  return 0;
  }
@@ -294,8 +304,18 @@ static int ppc_radix64_walk_tree(AddressSpace *as, vaddr 
eaddr,
  int level = 0;
  
  index = eaddr >> (*psize - nls);/* Shift */

-index &= ((1UL << nls) - 1);   /* Mask */
-*pte_addr = base_addr + (index * sizeof(pde));
+index &= ((1UL << nls) - 1);/* Mask */
+mask = MAKE_64BIT_MASK(0, nls + 3);
+
+if (base_addr & mask) {
+qemu_log_mask(LOG_GUEST_ERROR,
+"%s: misaligned page dir base: 0x"TARGET_FMT_lx
+" page dir size: 0x"TARGET_FMT_lx"\n",
+__func__, base_addr, mask + 1);
+base_addr &= ~mask;
+}
+*pte_addr = base_addr + index * sizeof(pde);
+
  do {
  int ret;

Re: [PATCH v8 08/12] s390x/cpu_topology: implementing numa for the s390x topology

2022-07-15 Thread Pierre Morel





On 7/15/22 11:11, Janis Schoetterl-Glausch wrote:

On 7/14/22 22:17, Pierre Morel wrote:



On 7/14/22 16:57, Janis Schoetterl-Glausch wrote:

On 6/20/22 16:03, Pierre Morel wrote:

S390x CPU Topology allows a non uniform repartition of the CPU
inside the topology containers, sockets, books and drawers.

We use numa to place the CPU inside the right topology container
and report the non uniform topology to the guest.

Note that s390x needs CPU0 to belong to the topology and consequently
all topology must include CPU0.

We accept a partial QEMU numa definition, in that case undefined CPUs
are added to free slots in the topology starting with slot 0 and going
up.


I don't understand why doing it this way, via numa, makes sense for us.
We report the topology to the guest via STSI, which tells the guest
what the topology "tree" looks like. We don't report any numa distances to the 
guest.
The natural way to specify where a cpu is added to the vm, seems to me to be
by specify the socket, book, ... IDs when doing a device_add or via -device on
the command line.

[...]



It is a choice to have the core-id to determine were the CPU is situated in the 
topology.

But yes we can chose the use drawer-id,book-id,socket-id and use a core-id 
starting on 0 on each socket.

It is not done in the current implementation because the core-id implies the 
socket-id, book-id and drawer-id together with the smp parameters.



Regardless of whether the core-id or the combination of socket-id, book-id .. 
is used to specify where a CPU is
located, why use the numa framework and not just device_add or -device ?


You are right, at least we should be able to use both.
I will work on this.



That feels way more natural since it should already just work if you can do 
hotplug.
At least with core-id and I suspect with a subset of your changes also with 
socket-id, etc.


yes, it already works with core-id



Whereas numa is an awkward fit since it's for specifying distances between 
nodes, which we don't do,
and you have to use a hack to get it to specify which CPUs to plug (via setting 
arch_id to -1).



Is it only for this?

--
Pierre Morel
IBM Lab Boeblingen

Re: [PATCH v2 1/3] ui/cocoa: Run qemu_init in the main thread

2022-07-15 Thread Akihiko Odaki


On 2022/07/15 22:10, Peter Maydell wrote:

On Fri, 15 Jul 2022 at 12:40, Akihiko Odaki  wrote:


This work is based on:
https://patchew.org/QEMU/20220317125534.38706-1-philippe.mathieu.da...@gmail.com/

Simplify the initialization dance by running qemu_init() in the main
thread before the Cocoa event loop starts. The secondary thread only
runs only qemu_main_loop() and qemu_cleanup().

This fixes a case where addRemovableDevicesMenuItems() calls
qmp_query_block() while expecting the main thread to still hold
the BQL.

Overriding the code after calling qemu_init() is done by dynamically
replacing a function pointer variable, qemu_main when initializing
ui/cocoa, which unifies the static implementation of main() for
builds with ui/cocoa and ones without ui/cocoa.

Signed-off-by: Akihiko Odaki 



@@ -585,7 +583,7 @@ - (void) updateUIInfo
  /*
   * Don't try to tell QEMU about UI information in the application
   * startup phase -- we haven't yet registered dcl with the QEMU UI
- * layer, and also trying to take the iothread lock would deadlock.
+ * layer.
   * When cocoa_display_init() does register the dcl, the UI layer
   * will call cocoa_switch(), which will call updateUIInfo, so
   * we don't lose any information here.


This comment says that we can't use the dcl while allow_events is false...


@@ -778,16 +776,6 @@ - (void) handleMonitorInput:(NSEvent *)event

  - (bool) handleEvent:(NSEvent *)event
  {
-if(!allow_events) {
-/*
- * Just let OSX have all events that arrive before
- * applicationDidFinishLaunching.
- * This avoids a deadlock on the iothread lock, which 
cocoa_display_init()
- * will not drop until after the app_started_sem is posted. (In theory
- * there should not be any such events, but OSX Catalina now emits 
some.)
- */
-return false;
-}


...so don't we want to also retain this check of allow_events ?
Much of the code in handleEventLocked assumes the dcl has been registered.


  return bool_with_iothread_lock(^{
  return [self handleEventLocked:event];
  });



@@ -1915,92 +1898,35 @@ static void cocoa_clipboard_request(QemuClipboardInfo 
*info,
  /*
   * The startup process for the OSX/Cocoa UI is complicated, because
   * OSX insists that the UI runs on the initial main thread, and so we
- * need to start a second thread which runs the vl.c qemu_main():
- *
- * Initial thread:2nd thread:
- * in main():
- *  create qemu-main thread
- *  wait on display_init semaphore
- *call qemu_main()
- *...
- *in cocoa_display_init():
- * post the display_init semaphore
- * wait on app_started semaphore
- *  create application, menus, etc
- *  enter OSX run loop
- * in applicationDidFinishLaunching:
- *  post app_started semaphore
- * tell main thread to fullscreen if needed
- *[...]
- *run qemu main-loop
- *
- * We do this in two stages so that we don't do the creation of the
- * GUI application menus and so on for command line options like --help
- * where we want to just print text to stdout and exit immediately.


Could we have an updated version of this diagram that explains the
new startup process, please ?


I don't think the diagram is appropriate anymore. It was necessary to 
describe the synchronization between the initial thread and the second 
thread, but they do no longer synchronize at all.


Regards,
Akihiko Odaki




+ * need to start a second thread which runs the qemu_default_main().
   */


Otherwise this looks good, and it's nice to get rid of that redefine-main
hack.

thanks
-- PMM

Re: [PATCH v2 09/11] s390x: Introduce PV query interface

2022-07-15 Thread Janosch Frank


On 7/15/22 10:10, Marc-André Lureau wrote:
[...]

  ms->pv = true;

+rc = s390_pv_query_info();
+if (rc) {
+goto out_err;



Maybe it's not necessary to make it fatal on error?

lgtm otherwise


Hmm, yes and no.
The info API is fenced by the dump CAP so I don't ever expect an error 
here but on the other hand an optional info API fail might not warrant 
an error.






+}
+
  /* Set SE header and unpack */
  rc = s390_ipl_prepare_pv_header();
  if (rc) {
diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h
index 1f1f545bfc..6fa55bf70e 100644
--- a/include/hw/s390x/pv.h
+++ b/include/hw/s390x/pv.h
@@ -38,6 +38,7 @@ static inline bool s390_is_pv(void)
  return ccw->pv;
  }

+int s390_pv_query_info(void);
  int s390_pv_vm_enable(void);
  void s390_pv_vm_disable(void);
  int s390_pv_set_sec_parms(uint64_t origin, uint64_t length);
@@ -46,8 +47,13 @@ void s390_pv_prep_reset(void);
  int s390_pv_verify(void);
  void s390_pv_unshare(void);
  void s390_pv_inject_reset_error(CPUState *cs);
+uint64_t kvm_s390_pv_dmp_get_size_cpu(void);
+uint64_t kvm_s390_pv_dmp_get_size_mem(void);
+uint64_t kvm_s390_pv_dmp_get_size_complete(void);
+bool kvm_s390_pv_info_basic_valid(void);
  #else /* CONFIG_KVM */
  static inline bool s390_is_pv(void) { return false; }
+static inline int s390_pv_query_info(void) { return 0; }
  static inline int s390_pv_vm_enable(void) { return 0; }
  static inline void s390_pv_vm_disable(void) {}
  static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length)
{ return 0; }
@@ -56,6 +62,10 @@ static inline void s390_pv_prep_reset(void) {}
  static inline int s390_pv_verify(void) { return 0; }
  static inline void s390_pv_unshare(void) {}
  static inline void s390_pv_inject_reset_error(CPUState *cs) {};
+static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; }
+static inline uint64_t kvm_s390_pv_dmp_get_size_mem(void) { return 0; }
+static inline uint64_t kvm_s390_pv_dmp_get_size_complete(void) { return
0; }
+static inline bool kvm_s390_pv_info_basic_valid(void) { return false; }
  #endif /* CONFIG_KVM */

  int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
--
2.34.1

Re: [PATCH v2 09/11] s390x: Introduce PV query interface

2022-07-15 Thread Marc-André Lureau

On Fri, Jul 15, 2022 at 12:18 PM Janosch Frank 
wrote:

> On 7/15/22 10:10, Marc-André Lureau wrote:
> [...]
> >>   ms->pv = true;
> >>
> >> +rc = s390_pv_query_info();
> >> +if (rc) {
> >> +goto out_err;
> >>
> >
> > Maybe it's not necessary to make it fatal on error?
> >
> > lgtm otherwise
>
> Hmm, yes and no.
> The info API is fenced by the dump CAP so I don't ever expect an error
> here but on the other hand an optional info API fail might not warrant
> an error.
>
>
I see. You could explain more explicitly in the commit messages and/or
comments the kernel version/requirements.



> >
> >
> >> +}
> >> +
> >>   /* Set SE header and unpack */
> >>   rc = s390_ipl_prepare_pv_header();
> >>   if (rc) {
> >> diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h
> >> index 1f1f545bfc..6fa55bf70e 100644
> >> --- a/include/hw/s390x/pv.h
> >> +++ b/include/hw/s390x/pv.h
> >> @@ -38,6 +38,7 @@ static inline bool s390_is_pv(void)
> >>   return ccw->pv;
> >>   }
> >>
> >> +int s390_pv_query_info(void);
> >>   int s390_pv_vm_enable(void);
> >>   void s390_pv_vm_disable(void);
> >>   int s390_pv_set_sec_parms(uint64_t origin, uint64_t length);
> >> @@ -46,8 +47,13 @@ void s390_pv_prep_reset(void);
> >>   int s390_pv_verify(void);
> >>   void s390_pv_unshare(void);
> >>   void s390_pv_inject_reset_error(CPUState *cs);
> >> +uint64_t kvm_s390_pv_dmp_get_size_cpu(void);
> >> +uint64_t kvm_s390_pv_dmp_get_size_mem(void);
> >> +uint64_t kvm_s390_pv_dmp_get_size_complete(void);
> >> +bool kvm_s390_pv_info_basic_valid(void);
> >>   #else /* CONFIG_KVM */
> >>   static inline bool s390_is_pv(void) { return false; }
> >> +static inline int s390_pv_query_info(void) { return 0; }
> >>   static inline int s390_pv_vm_enable(void) { return 0; }
> >>   static inline void s390_pv_vm_disable(void) {}
> >>   static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t
> length)
> >> { return 0; }
> >> @@ -56,6 +62,10 @@ static inline void s390_pv_prep_reset(void) {}
> >>   static inline int s390_pv_verify(void) { return 0; }
> >>   static inline void s390_pv_unshare(void) {}
> >>   static inline void s390_pv_inject_reset_error(CPUState *cs) {};
> >> +static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; }
> >> +static inline uint64_t kvm_s390_pv_dmp_get_size_mem(void) { return 0; }
> >> +static inline uint64_t kvm_s390_pv_dmp_get_size_complete(void) { return
> >> 0; }
> >> +static inline bool kvm_s390_pv_info_basic_valid(void) { return false; }
> >>   #endif /* CONFIG_KVM */
> >>
> >>   int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
> >> --
> >> 2.34.1
> >>
> >>
> >>
> >
>
>

-- 
Marc-André Lureau

Re: [PATCH 1/3] target/i386: display deprecation note in '-cpu help'

2022-07-15 Thread Cornelia Huck

On Thu, Jul 14 2022, Daniel P. Berrangé  wrote:

> The deprecation notes are currently only displayed at runtime when the
> user activates a CPU. The QMP query displays a simple flag for
> deprecation, while '-cpu help' displays nothing unless the deprecation
> info is duplicated into the 'notes' field.
>
> This changes the code so that deprecation notes are explicitly shown
> in '-cpu help', to assist the user in deciding what to use.
>
> Signed-off-by: Daniel P. Berrangé 
> ---
>  target/i386/cpu.c | 13 -
>  1 file changed, 12 insertions(+), 1 deletion(-)

Reviewed-by: Cornelia Huck

Re: [PATCH 3/3] target/arm: display deprecation note in '-cpu help'

2022-07-15 Thread Cornelia Huck

On Thu, Jul 14 2022, Daniel P. Berrangé  wrote:

> The deprecation notes are currently only displayed at runtime when the
> user activates a CPU. The QMP query displays a simple flag for
> deprecation, while '-cpu help' displays nothing unless the deprecation
> info is duplicated into the 'notes' field.
>
> This changes the code so that deprecation notes are explicitly shown
> in '-cpu help', to assist the user in deciding what to use.
>
> Signed-off-by: Daniel P. Berrangé 
> ---
>  target/arm/helper.c | 10 +-
>  1 file changed, 9 insertions(+), 1 deletion(-)

Reviewed-by: Cornelia Huck

[PULL 3/6] hw/nvme: fix example serial in documentation

2022-07-15 Thread Klaus Jensen

From: Niklas Cassel 

The serial prop on the controller is actually describing the nvme
subsystem serial, which has to be identical for all controllers within
the same nvme subsystem.

This is enforced since commit a859eb9f8f64 ("hw/nvme: enforce common
serial per subsystem").

Fix the documentation, so that people copying the qemu command line
example won't get an error on qemu start.

Signed-off-by: Niklas Cassel 
Signed-off-by: Klaus Jensen 
---
 docs/system/devices/nvme.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst
index aba253304e46..30f841ef6222 100644
--- a/docs/system/devices/nvme.rst
+++ b/docs/system/devices/nvme.rst
@@ -104,8 +104,8 @@ multipath I/O.
 .. code-block:: console
 
-device nvme-subsys,id=nvme-subsys-0,nqn=subsys0
-   -device nvme,serial=a,subsys=nvme-subsys-0
-   -device nvme,serial=b,subsys=nvme-subsys-0
+   -device nvme,serial=deadbeef,subsys=nvme-subsys-0
+   -device nvme,serial=deadbeef,subsys=nvme-subsys-0
 
 This will create an NVM subsystem with two controllers. Having controllers
 linked to an ``nvme-subsys`` device allows additional ``nvme-ns`` parameters:
-- 
2.36.1

Re: [PATCH v2 15/19] vdpa: manual forward CVQ buffers

2022-07-15 Thread Jason Wang

On Fri, Jul 15, 2022 at 1:34 PM Eugenio Perez Martin
 wrote:
>
> On Fri, Jul 15, 2022 at 6:08 AM Jason Wang  wrote:
> >
> > On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  wrote:
> > >
> > > Do a simple forwarding of CVQ buffers, the same work SVQ could do but
> > > through callbacks. No functional change intended.
> > >
> > > Signed-off-by: Eugenio Pérez 
> > > ---
> > >  include/hw/virtio/vhost-vdpa.h |  3 ++
> > >  hw/virtio/vhost-vdpa.c |  3 +-
> > >  net/vhost-vdpa.c   | 58 ++
> > >  3 files changed, 63 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/include/hw/virtio/vhost-vdpa.h 
> > > b/include/hw/virtio/vhost-vdpa.h
> > > index 7214eb47dc..d85643 100644
> > > --- a/include/hw/virtio/vhost-vdpa.h
> > > +++ b/include/hw/virtio/vhost-vdpa.h
> > > @@ -15,6 +15,7 @@
> > >  #include 
> > >
> > >  #include "hw/virtio/vhost-iova-tree.h"
> > > +#include "hw/virtio/vhost-shadow-virtqueue.h"
> > >  #include "hw/virtio/virtio.h"
> > >  #include "standard-headers/linux/vhost_types.h"
> > >
> > > @@ -35,6 +36,8 @@ typedef struct vhost_vdpa {
> > >  /* IOVA mapping used by the Shadow Virtqueue */
> > >  VhostIOVATree *iova_tree;
> > >  GPtrArray *shadow_vqs;
> > > +const VhostShadowVirtqueueOps *shadow_vq_ops;
> > > +void *shadow_vq_ops_opaque;
> > >  struct vhost_dev *dev;
> > >  VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
> > >  } VhostVDPA;
> > > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > > index 96997210be..beaaa7049a 100644
> > > --- a/hw/virtio/vhost-vdpa.c
> > > +++ b/hw/virtio/vhost-vdpa.c
> > > @@ -419,7 +419,8 @@ static int vhost_vdpa_init_svq(struct vhost_dev 
> > > *hdev, struct vhost_vdpa *v,
> > >  for (unsigned n = 0; n < hdev->nvqs; ++n) {
> > >  g_autoptr(VhostShadowVirtqueue) svq;
> > >
> > > -svq = vhost_svq_new(v->iova_tree, NULL, NULL);
> > > +svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
> > > +v->shadow_vq_ops_opaque);
> > >  if (unlikely(!svq)) {
> > >  error_setg(errp, "Cannot create svq %u", n);
> > >  return -1;
> > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > index df1e69ee72..805c9dd6b6 100644
> > > --- a/net/vhost-vdpa.c
> > > +++ b/net/vhost-vdpa.c
> > > @@ -11,11 +11,14 @@
> > >
> > >  #include "qemu/osdep.h"
> > >  #include "clients.h"
> > > +#include "hw/virtio/virtio-net.h"
> > >  #include "net/vhost_net.h"
> > >  #include "net/vhost-vdpa.h"
> > >  #include "hw/virtio/vhost-vdpa.h"
> > >  #include "qemu/config-file.h"
> > >  #include "qemu/error-report.h"
> > > +#include "qemu/log.h"
> > > +#include "qemu/memalign.h"
> > >  #include "qemu/option.h"
> > >  #include "qapi/error.h"
> > >  #include 
> > > @@ -187,6 +190,57 @@ static NetClientInfo net_vhost_vdpa_info = {
> > >  .check_peer_type = vhost_vdpa_check_peer_type,
> > >  };
> > >
> > > +/**
> > > + * Forward buffer for the moment.
> > > + */
> > > +static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
> > > +SVQElement *svq_elem, void 
> > > *opaque)
> > > +{
> > > +VirtQueueElement *elem = _elem->elem;
> > > +unsigned int n = elem->out_num + elem->in_num;
> > > +g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
> > > +size_t in_len, dev_written;
> > > +virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> > > +int r;
> > > +
> > > +memcpy(dev_buffers, elem->out_sg, elem->out_num);
> > > +memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
> > > +
> > > +r = vhost_svq_add(svq, _buffers[0], elem->out_num, 
> > > _buffers[1],
> > > +  elem->in_num, svq_elem);
> > > +if (unlikely(r != 0)) {
> > > +if (unlikely(r == -ENOSPC)) {
> > > +qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device 
> > > queue\n",
> > > +  __func__);
> > > +}
> > > +goto out;
> > > +}
> > > +
> > > +/*
> > > + * We can poll here since we've had BQL from the time we sent the
> > > + * descriptor. Also, we need to take the answer before SVQ pulls by 
> > > itself,
> > > + * when BQL is released
> > > + */
> > > +dev_written = vhost_svq_poll(svq);
> > > +if (unlikely(dev_written < sizeof(status))) {
> > > +error_report("Insufficient written data (%zu)", dev_written);
> > > +}
> > > +
> > > +out:
> > > +in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, ,
> > > +  sizeof(status));
> > > +if (unlikely(in_len < sizeof(status))) {
> > > +error_report("Bad device CVQ written length");
> > > +}
> > > +vhost_svq_push_elem(svq, svq_elem, MIN(in_len, sizeof(status)));
> > > +g_free(svq_elem);
> > > +return r;
> > > +}
> > > +
> > > +static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
> > > +.avail_handler =

Re: [PATCH v2 18/19] vdpa: Add device migration blocker

2022-07-15 Thread Eugenio Perez Martin

On Fri, Jul 15, 2022 at 10:51 AM Jason Wang  wrote:
>
> On Fri, Jul 15, 2022 at 1:40 PM Eugenio Perez Martin
>  wrote:
> >
> > On Fri, Jul 15, 2022 at 6:03 AM Jason Wang  wrote:
> > >
> > > On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  
> > > wrote:
> > > >
> > > > Since the vhost-vdpa device is exposing _F_LOG,
> > >
> > > I may miss something but I think it doesn't?
> > >
> >
> > It's at vhost_vdpa_get_features. As long as SVQ is enabled, it's
> > exposing VHOST_F_LOG_ALL.
>
> Ok, so this needs to be specified in the change log.

Got it, I'll write some note.

> But I'm kind of
> confused here, we do want to allow migration to work so why we disable
> it?
>

With x-svq parameter, migration of simple devices with no cvq "is
possible". It has intrinsic problems like can't emit the gratuitous
arp but it's possible and traffic can continue.

But devices with cvq require to restore the state at the destination.
That part is not implemented, so it's blocked at the moment.

In the immediate future not all cases (as "net features") will be
available: net/vhost-net.c (or virtio-net.c?) needs to know how to
inject the state at the destination to restore the guest visible
configuration. It's simple code, but it needs to be developed. So
migration blocker is kept for these features. Hopefully, we will reach
a point where all features supported by virtio-net.c will be
supported, but the right thing to do is to merge basic ones first.

Thanks!

Re: [PATCH v8 08/12] s390x/cpu_topology: implementing numa for the s390x topology

2022-07-15 Thread Janis Schoetterl-Glausch

On 7/14/22 22:17, Pierre Morel wrote:
> 
> 
> On 7/14/22 16:57, Janis Schoetterl-Glausch wrote:
>> On 6/20/22 16:03, Pierre Morel wrote:
>>> S390x CPU Topology allows a non uniform repartition of the CPU
>>> inside the topology containers, sockets, books and drawers.
>>>
>>> We use numa to place the CPU inside the right topology container
>>> and report the non uniform topology to the guest.
>>>
>>> Note that s390x needs CPU0 to belong to the topology and consequently
>>> all topology must include CPU0.
>>>
>>> We accept a partial QEMU numa definition, in that case undefined CPUs
>>> are added to free slots in the topology starting with slot 0 and going
>>> up.
>>
>> I don't understand why doing it this way, via numa, makes sense for us.
>> We report the topology to the guest via STSI, which tells the guest
>> what the topology "tree" looks like. We don't report any numa distances to 
>> the guest.
>> The natural way to specify where a cpu is added to the vm, seems to me to be
>> by specify the socket, book, ... IDs when doing a device_add or via -device 
>> on
>> the command line.
>>
>> [...]
>>
> 
> It is a choice to have the core-id to determine were the CPU is situated in 
> the topology.
> 
> But yes we can chose the use drawer-id,book-id,socket-id and use a core-id 
> starting on 0 on each socket.
> 
> It is not done in the current implementation because the core-id implies the 
> socket-id, book-id and drawer-id together with the smp parameters.
> 
> 
Regardless of whether the core-id or the combination of socket-id, book-id .. 
is used to specify where a CPU is
located, why use the numa framework and not just device_add or -device ?

That feels way more natural since it should already just work if you can do 
hotplug.
At least with core-id and I suspect with a subset of your changes also with 
socket-id, etc.

Whereas numa is an awkward fit since it's for specifying distances between 
nodes, which we don't do,
and you have to use a hack to get it to specify which CPUs to plug (via setting 
arch_id to -1).

Re: [PATCH v8 00/12] s390x: CPU Topology

2022-07-15 Thread Janis Schoetterl-Glausch

On 7/14/22 22:05, Pierre Morel wrote:
> 
> 
> On 7/14/22 20:43, Janis Schoetterl-Glausch wrote:
>> On 6/20/22 16:03, Pierre Morel wrote:
>>> Hi,
>>>
>>> This new spin is essentially for coherence with the last Linux CPU
>>> Topology patch, function testing and coding style modifications.
>>>
>>> Forword
>>> ===
>>>
>>> The goal of this series is to implement CPU topology for S390, it
>>> improves the preceeding series with the implementation of books and
>>> drawers, of non uniform CPU topology and with documentation.
>>>
>>> To use these patches, you will need the Linux series version 10.
>>> You find it there:
>>> https://lkml.org/lkml/2022/6/20/590
>>>
>>> Currently this code is for KVM only, I have no idea if it is interesting
>>> to provide a TCG patch. If ever it will be done in another series.
>>>
>>> To have a better understanding of the S390x CPU Topology and its
>>> implementation in QEMU you can have a look at the documentation in the
>>> last patch or follow the introduction here under.
>>>
>>> A short introduction
>>> 
>>>
>>> CPU Topology is described in the S390 POP with essentially the description
>>> of two instructions:
>>>
>>> PTF Perform Topology function used to poll for topology change
>>>  and used to set the polarization but this part is not part of this 
>>> item.
>>>
>>> STSI Store System Information and the SYSIB 15.1.x providing the Topology
>>>  configuration.
>>>
>>> S390 Topology is a 6 levels hierarchical topology with up to 5 level
>>>  of containers. The last topology level, specifying the CPU cores.
>>>
>>>  This patch series only uses the two lower levels sockets and cores.
>>>   To get the information on the topology, S390 provides the STSI
>>>  instruction, which stores a structures providing the list of the
>>>  containers used in the Machine topology: the SYSIB.
>>>  A selector within the STSI instruction allow to chose how many topology
>>>  levels will be provide in the SYSIB.
>>>
>>>  Using the Topology List Entries (TLE) provided inside the SYSIB we
>>>  the Linux kernel is able to compute the information about the cache
>>>  distance between two cores and can use this information to take
>>>  scheduling decisions.
>>
>> Do the socket, book, ... metaphors and looking at STSI from the existing
>> smp infrastructure even make sense?
> 
> Sorry, I do not understand.
> I admit the cover-letter is old and I did not rewrite it really good since 
> the first patch series.
> 
> What we do is:
> Compute the STSI from the SMP + numa + device QEMU parameters .
> 
>>
>> STSI 15.1.x reports the topology to the guest and for a virtual machine,
>> this topology can be very dynamic. So a CPU can move from from one topology
>> container to another, but the socket of a cpu changing while it's running 
>> seems
>> a bit strange. And this isn't supported by this patch series as far as I 
>> understand,
>> the only topology changes are on hotplug.
> 
> A CPU changing from a socket to another socket is the only case the PTF 
> instruction reports a change in the topology with the case a new CPU is plug 
> in.

Can a CPU actually change between sockets right now?
The socket-id is computed from the core-id, so it's fixed, is it not?

> It is not expected to appear often but it does appear.
> The code has been removed from the kernel in spin 10 for 2 reasons:
> 1) we decided to first support only dedicated and pinned CPU> 2) Christian 
> fears it may happen too often due to Linux host scheduling and could be a 
> performance problem

This seems sensible, but now it seems too static.
For example after migration, you cannot tell the guest which CPUs are in the 
same socket, book, ...,
unless I'm misunderstanding something.
And migration is rare, but something you'd want to be able to react to.
And I could imaging that the vCPUs are pinned most of the time, but the pinning 
changes occasionally.

> 
> So yes now we only have a topology report on vCPU plug.
> 
> 
> 
> 
> 
> 
> 
>>
>

Re: [PATCH 1/4] Modifying ‘migrate’ qmp command to add multi-FD socket on particular source and destination pair

2022-07-15 Thread Het Gala




On 13/07/22 1:38 pm, Het Gala wrote:


On 16/06/22 10:56 pm, Dr. David Alan Gilbert wrote:

* Het Gala (het.g...@nutanix.com) wrote:


> First of all, I apologise for the late reply. I was on a leave after 
internship ended


at Nutanix. Hope to learn a lot from you all in the process of 
upstreaming multifd


patches.

i) Modified the format of the qemu monitor command : 'migrate' by 
adding a list,
    each element in the list consists of multi-FD connection 
parameters: source
    and destination uris and of the number of multi-fd channels 
between each pair.


ii) Information of all multi-FD connection parameters’ list, length 
of the list
 and total number of multi-fd channels for all the connections 
together is

 stored in ‘OutgoingArgs’ struct.

Suggested-by: Manish Mishra 
Signed-off-by: Het Gala 
---
  include/qapi/util.h   |  9 
  migration/migration.c | 47 ++
  migration/socket.c    | 53 
---

  migration/socket.h    | 17 +-
  monitor/hmp-cmds.c    | 22 --
  qapi/migration.json   | 43 +++
  6 files changed, 170 insertions(+), 21 deletions(-)

diff --git a/include/qapi/util.h b/include/qapi/util.h
index 81a2b13a33..3041feb3d9 100644
--- a/include/qapi/util.h
+++ b/include/qapi/util.h
@@ -56,4 +56,13 @@ int parse_qapi_name(const char *name, bool 
complete);

  (tail) = &(*(tail))->next; \
  } while (0)
  +#define QAPI_LIST_LENGTH(list) ({ \
+    int _len = 0; \
+    typeof(list) _elem; \
+    for (_elem = list; _elem != NULL; _elem = _elem->next) { \
+    _len++; \
+    } \
+    _len; \
+})
+
  #endif

This looks like it should be a separate patch to me (and perhaps size_t
for len?)


> Sure, will try to make a seperate patch for QAPI_LIST_LENGTH, and other

such utility functions from the other patches.




diff --git a/migration/migration.c b/migration/migration.c
index 31739b2af9..c408175aeb 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2328,13 +2328,14 @@ static bool migrate_prepare(MigrationState 
*s, bool blk, bool blk_inc,

  return true;
  }
  -void qmp_migrate(const char *uri, bool has_blk, bool blk,
+void qmp_migrate(const char *uri, bool has_multi_fd_uri_list,
+ MigrateUriParameterList *cap, bool has_blk, bool blk,
   bool has_inc, bool inc, bool has_detach, bool 
detach,

   bool has_resume, bool resume, Error **errp)
  {
  Error *local_err = NULL;
  MigrationState *s = migrate_get_current();
-    const char *p = NULL;
+    const char *dst_ptr = NULL;
    if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
   has_resume && resume, errp)) {
@@ -2348,20 +2349,46 @@ void qmp_migrate(const char *uri, bool 
has_blk, bool blk,

  }
  }
  +    /*
+ * In case of Multi-FD migration parameters, if uri is provided,

I think you mean 'if uri list is provided'

> Acknowledged.



+ * supports only tcp network protocol.
+ */
+    if (has_multi_fd_uri_list) {
+    int length = QAPI_LIST_LENGTH(cap);
+    init_multifd_array(length);
+    for (int i = 0; i < length; i++) {
+    const char *p1 = NULL, *p2 = NULL;

Keep these as ps/pd  to make it clear which is source and dest.

> Acknowledged. Will change in the upcoming patchset.



+    const char *multifd_dst_uri = cap->value->destination_uri;
+    const char *multifd_src_uri = cap->value->source_uri;
+    uint8_t multifd_channels = cap->value->multifd_channels;
+    if (!strstart(multifd_dst_uri, "tcp:", ) ||
+    !strstart(multifd_src_uri, "tcp:", )) {

I've copied in Claudio Fontana; Claudio is fighting to make snapshots
faster and has been playing with various multithread schemes for multifd
with files and fd's;  perhaps the syntax you're proposing doesn't need
to be limited to tcp.


> For now, we are just aiming to include multifd for existing tcp 
protocol.


We would be happy to take any suggestions from Claudio Fontana and try to

include them in the upcoming patchset series.



+    error_setg(errp, "multi-fd destination and multi-fd 
source "
+    "uri, both should be present and follows tcp 
protocol only");

+    break;
+    } else {
+    store_multifd_migration_params(p1 ? p1 : 
multifd_dst_uri,

+    p2 ? p2 : multifd_src_uri,
+    multifd_channels, i, 
_err);

+    }
+    cap = cap->next;
+    }
+    }
+
  migrate_protocol_allow_multi_channels(false);
-    if (strstart(uri, "tcp:", ) ||
+    if (strstart(uri, "tcp:", _ptr) ||
  strstart(uri, "unix:", NULL) ||
  strstart(uri, "vsock:", NULL)) {
  migrate_protocol_allow_multi_channels(true);
-    socket_start_outgoing_migration(s, p ? p : uri,

Re: [PATCH v2 09/11] s390x: Introduce PV query interface

2022-07-15 Thread Marc-André Lureau

Hi

On Wed, Jul 13, 2022 at 5:18 PM Janosch Frank  wrote:

> Introduce an interface over which we can get information about UV data.
>
> Signed-off-by: Janosch Frank 
> ---
>  hw/s390x/pv.c  | 61 ++
>  hw/s390x/s390-virtio-ccw.c |  5 
>  include/hw/s390x/pv.h  | 10 +++
>  3 files changed, 76 insertions(+)
>
> diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c
> index 401b63d6cb..a5af4ddf46 100644
> --- a/hw/s390x/pv.c
> +++ b/hw/s390x/pv.c
> @@ -20,6 +20,11 @@
>  #include "exec/confidential-guest-support.h"
>  #include "hw/s390x/ipl.h"
>  #include "hw/s390x/pv.h"
> +#include "target/s390x/kvm/kvm_s390x.h"
> +
> +static bool info_valid;
> +static struct kvm_s390_pv_info_vm info_vm;
> +static struct kvm_s390_pv_info_dump info_dump;
>
>  static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data)
>  {
> @@ -56,6 +61,42 @@ static int __s390_pv_cmd(uint32_t cmd, const char
> *cmdname, void *data)
>  }  \
>  }
>
> +int s390_pv_query_info(void)
> +{
> +struct kvm_s390_pv_info info = {
> +.header.id = KVM_PV_INFO_VM,
> +.header.len_max = sizeof(info.header) + sizeof(info.vm),
> +};
> +int rc;
> +
> +/* Info API's first user is dump so they are bundled */
> +if (!kvm_s390_get_protected_dump()) {
> +return 0;
> +}
> +
> +rc = s390_pv_cmd(KVM_PV_INFO, );
> +if (rc) {
> +error_report("KVM PV INFO cmd %x failed: %s",
> + info.header.id, strerror(rc));
> +return rc;
> +}
> +memcpy(_vm, , sizeof(info.vm));
> +
> +info.header.id = KVM_PV_INFO_DUMP;
> +info.header.len_max = sizeof(info.header) + sizeof(info.dump);
> +rc = s390_pv_cmd(KVM_PV_INFO, );
> +if (rc) {
> +error_report("KVM PV INFO cmd %x failed: %s",
> + info.header.id, strerror(rc));
> +return rc;
> +}
> +
> +memcpy(_dump, , sizeof(info.dump));
> +info_valid = true;
> +
> +return rc;
> +}
> +
>  int s390_pv_vm_enable(void)
>  {
>  return s390_pv_cmd(KVM_PV_ENABLE, NULL);
> @@ -114,6 +155,26 @@ void s390_pv_inject_reset_error(CPUState *cs)
>  env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV;
>  }
>
> +uint64_t kvm_s390_pv_dmp_get_size_cpu(void)
> +{
> +return info_dump.dump_cpu_buffer_len;
> +}
> +
> +uint64_t kvm_s390_pv_dmp_get_size_complete(void)
> +{
> +return info_dump.dump_config_finalize_len;
> +}
> +
> +uint64_t kvm_s390_pv_dmp_get_size_mem(void)
> +{
> +return info_dump.dump_config_mem_buffer_per_1m;
> +}
> +
> +bool kvm_s390_pv_info_basic_valid(void)
> +{
> +return info_valid;
> +}
> +
>  #define TYPE_S390_PV_GUEST "s390-pv-guest"
>  OBJECT_DECLARE_SIMPLE_TYPE(S390PVGuest, S390_PV_GUEST)
>
> diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
> index cc3097bfee..f9401e392b 100644
> --- a/hw/s390x/s390-virtio-ccw.c
> +++ b/hw/s390x/s390-virtio-ccw.c
> @@ -366,6 +366,11 @@ static int s390_machine_protect(S390CcwMachineState
> *ms)
>
>  ms->pv = true;
>
> +rc = s390_pv_query_info();
> +if (rc) {
> +goto out_err;
>

Maybe it's not necessary to make it fatal on error?

lgtm otherwise


> +}
> +
>  /* Set SE header and unpack */
>  rc = s390_ipl_prepare_pv_header();
>  if (rc) {
> diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h
> index 1f1f545bfc..6fa55bf70e 100644
> --- a/include/hw/s390x/pv.h
> +++ b/include/hw/s390x/pv.h
> @@ -38,6 +38,7 @@ static inline bool s390_is_pv(void)
>  return ccw->pv;
>  }
>
> +int s390_pv_query_info(void);
>  int s390_pv_vm_enable(void);
>  void s390_pv_vm_disable(void);
>  int s390_pv_set_sec_parms(uint64_t origin, uint64_t length);
> @@ -46,8 +47,13 @@ void s390_pv_prep_reset(void);
>  int s390_pv_verify(void);
>  void s390_pv_unshare(void);
>  void s390_pv_inject_reset_error(CPUState *cs);
> +uint64_t kvm_s390_pv_dmp_get_size_cpu(void);
> +uint64_t kvm_s390_pv_dmp_get_size_mem(void);
> +uint64_t kvm_s390_pv_dmp_get_size_complete(void);
> +bool kvm_s390_pv_info_basic_valid(void);
>  #else /* CONFIG_KVM */
>  static inline bool s390_is_pv(void) { return false; }
> +static inline int s390_pv_query_info(void) { return 0; }
>  static inline int s390_pv_vm_enable(void) { return 0; }
>  static inline void s390_pv_vm_disable(void) {}
>  static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length)
> { return 0; }
> @@ -56,6 +62,10 @@ static inline void s390_pv_prep_reset(void) {}
>  static inline int s390_pv_verify(void) { return 0; }
>  static inline void s390_pv_unshare(void) {}
>  static inline void s390_pv_inject_reset_error(CPUState *cs) {};
> +static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; }
> +static inline uint64_t kvm_s390_pv_dmp_get_size_mem(void) { return 0; }
> +static inline uint64_t kvm_s390_pv_dmp_get_size_complete(void) { return
> 0; }
> +static inline bool kvm_s390_pv_info_basic_valid(void) { return false; }
>  #endif /* CONFIG_KVM

Re: [PATCH 2/3] hw/sensor: max31785 : add fan status, tach target, and tach input object properties

2022-07-15 Thread Peter Maydell

On Thu, 14 Jul 2022 at 22:14, Maheswara Kurapati
 wrote:
> On 7/14/22 8:10 AM, Peter Maydell wrote:
> > On Thu, 14 Jul 2022 at 14:04, Maheswara Kurapati
> >  wrote:
> >> This fix adds object properties for the FAN_COMMAND_1 (3Bh), 
> >> STATUS_FANS_1_2 (81h),
> >> READ_FAN_SPEED_1 (90h) registers for the MAX31785 instrumentation. An 
> >> additional
> >> property tach_margin_percent updates the tachs for a configured percent of
> >> FAN_COMMAND_1 value.
> >>
> >> Registerproperty
> >> --
> >> FAN_COMMAND_1 (3Bh) fan_target
> >> STATUS_FANS_1_2 (81h)   status_fans_1_2
> >> READ_FAN_SPEED_1 (90h)  fan_input
> > This commit message is missing the rationale -- why do we need this?
> The STATUS_FANS_1_2, and READ_FAN_SPEED_1 registers are read-only. I
> added these properties to simulate the error device faults.

I'm not entirely sure what you have in mind here, but
QEMU doesn't generally simulate device error injection.

> > I am also not sure that we should be defining properties that are
> > just straight 1:1 with the device registers. Compare the way we
> > handle temperature-sensor values, where the property values are
> > defined in a generic manner (same units representation) regardless
> > of the underlying device and the device's property-set-get implementation
> > then handles converting that to and from whatever internal implementation
> > representation the device happens to use.

> I am not sure I understood your comment.  I checked hw/sensors/tmp105.c,
> in which a "temperature" property is added for the tmp_input field in
> almost the similar way what I did, except that the registers in the
> MAX31785 are in direct format.

Yes, that is my point. My impression is that you've provided
properties that directly match the register format of this
device because that's easy. I think that instead we should
consider what the properties are intended to do, and perhaps
have a standard convention for what format to use for particular
kinds of data, as we do for temperature already.

-- PMM

[PULL 1/6] hw/nvme: Implement shadow doorbell buffer support

2022-07-15 Thread Klaus Jensen

From: Jinhao Fan 

Implement Doorbel Buffer Config command (Section 5.7 in NVMe Spec 1.3)
and Shadow Doorbel buffer & EventIdx buffer handling logic (Section 7.13
in NVMe Spec 1.3). For queues created before the Doorbell Buffer Config
command, the nvme_dbbuf_config function tries to associate each existing
SQ and CQ with its Shadow Doorbel buffer and EventIdx buffer address.
Queues created after the Doorbell Buffer Config command will have the
doorbell buffers associated with them when they are initialized.

In nvme_process_sq and nvme_post_cqe, proactively check for Shadow
Doorbell buffer changes instead of wait for doorbell register changes.
This reduces the number of MMIOs.

In nvme_process_db(), update the shadow doorbell buffer value with
the doorbell register value if it is the admin queue. This is a hack
since hosts like Linux NVMe driver and SPDK do not use shadow
doorbell buffer for the admin queue. Copying the doorbell register
value to the shadow doorbell buffer allows us to support these hosts
as well as spec-compliant hosts that use shadow doorbell buffer for
the admin queue.

Signed-off-by: Jinhao Fan 
Reviewed-by: Klaus Jensen 
Reviewed-by: Keith Busch 
[k.jensen: rebased]
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 115 ++-
 hw/nvme/nvme.h   |   8 +++
 include/block/nvme.h |   2 +
 3 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index ca335dd7da6d..46e8d54ef07a 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -264,6 +264,7 @@ static const uint32_t nvme_cse_acs[256] = {
 [NVME_ADM_CMD_ASYNC_EV_REQ] = NVME_CMD_EFF_CSUPP,
 [NVME_ADM_CMD_NS_ATTACHMENT]= NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_NIC,
 [NVME_ADM_CMD_VIRT_MNGMT]   = NVME_CMD_EFF_CSUPP,
+[NVME_ADM_CMD_DBBUF_CONFIG] = NVME_CMD_EFF_CSUPP,
 [NVME_ADM_CMD_FORMAT_NVM]   = NVME_CMD_EFF_CSUPP | NVME_CMD_EFF_LBCC,
 };
 
@@ -1330,6 +1331,12 @@ static inline void nvme_blk_write(BlockBackend *blk, 
int64_t offset,
 }
 }
 
+static void nvme_update_cq_head(NvmeCQueue *cq)
+{
+pci_dma_read(>ctrl->parent_obj, cq->db_addr, >head,
+sizeof(cq->head));
+}
+
 static void nvme_post_cqes(void *opaque)
 {
 NvmeCQueue *cq = opaque;
@@ -1342,6 +1349,10 @@ static void nvme_post_cqes(void *opaque)
 NvmeSQueue *sq;
 hwaddr addr;
 
+if (n->dbbuf_enabled) {
+nvme_update_cq_head(cq);
+}
+
 if (nvme_cq_full(cq)) {
 break;
 }
@@ -4287,6 +4298,11 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, 
uint64_t dma_addr,
 }
 sq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_process_sq, sq);
 
+if (n->dbbuf_enabled) {
+sq->db_addr = n->dbbuf_dbs + (sqid << 3);
+sq->ei_addr = n->dbbuf_eis + (sqid << 3);
+}
+
 assert(n->cq[cqid]);
 cq = n->cq[cqid];
 QTAILQ_INSERT_TAIL(&(cq->sq_list), sq, entry);
@@ -4645,6 +4661,10 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, 
uint64_t dma_addr,
 cq->head = cq->tail = 0;
 QTAILQ_INIT(>req_list);
 QTAILQ_INIT(>sq_list);
+if (n->dbbuf_enabled) {
+cq->db_addr = n->dbbuf_dbs + (cqid << 3) + (1 << 2);
+cq->ei_addr = n->dbbuf_eis + (cqid << 3) + (1 << 2);
+}
 n->cq[cqid] = cq;
 cq->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, nvme_post_cqes, cq);
 }
@@ -5988,6 +6008,50 @@ static uint16_t nvme_virt_mngmt(NvmeCtrl *n, NvmeRequest 
*req)
 }
 }
 
+static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const NvmeRequest *req)
+{
+uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1);
+uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2);
+int i;
+
+/* Address should be page aligned */
+if (dbs_addr & (n->page_size - 1) || eis_addr & (n->page_size - 1)) {
+return NVME_INVALID_FIELD | NVME_DNR;
+}
+
+/* Save shadow buffer base addr for use during queue creation */
+n->dbbuf_dbs = dbs_addr;
+n->dbbuf_eis = eis_addr;
+n->dbbuf_enabled = true;
+
+for (i = 0; i < n->params.max_ioqpairs + 1; i++) {
+NvmeSQueue *sq = n->sq[i];
+NvmeCQueue *cq = n->cq[i];
+
+if (sq) {
+/*
+ * CAP.DSTRD is 0, so offset of ith sq db_addr is (i<<3)
+ * nvme_process_db() uses this hard-coded way to calculate
+ * doorbell offsets. Be consistent with that here.
+ */
+sq->db_addr = dbs_addr + (i << 3);
+sq->ei_addr = eis_addr + (i << 3);
+pci_dma_write(>parent_obj, sq->db_addr, >tail,
+sizeof(sq->tail));
+}
+
+if (cq) {
+/* CAP.DSTRD is 0, so offset of ith cq db_addr is (i<<3)+(1<<2) */
+cq->db_addr = dbs_addr + (i << 3) + (1 << 2);
+cq->ei_addr = eis_addr + (i << 3) + (1 << 2);
+pci_dma_write(>parent_obj, cq->db_addr, >head,
+sizeof(cq->head));
+}
+}
+
+return

[PULL 5/6] nvme: Fix misleading macro when mixed with ternary operator

2022-07-15 Thread Klaus Jensen

From: Darren Kenny 

Using the Parfait source code analyser and issue was found in
hw/nvme/ctrl.c where the macros NVME_CAP_SET_CMBS and NVME_CAP_SET_PMRS
are called with a ternary operatore in the second parameter, resulting
in a potentially unexpected expansion of the form:

  x ? a: b & FLAG_TEST

which will result in a different result to:

  (x ? a: b) & FLAG_TEST.

The macros should wrap each of the parameters in brackets to ensure the
correct result on expansion.

Signed-off-by: Darren Kenny 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
---
 include/block/nvme.h | 44 ++--
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index 351fd44ca8ca..8027b7126bda 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -98,28 +98,28 @@ enum NvmeCapMask {
 #define NVME_CAP_PMRS(cap)  (((cap) >> CAP_PMRS_SHIFT)   & CAP_PMRS_MASK)
 #define NVME_CAP_CMBS(cap)  (((cap) >> CAP_CMBS_SHIFT)   & CAP_CMBS_MASK)
 
-#define NVME_CAP_SET_MQES(cap, val)   (cap |= (uint64_t)(val & CAP_MQES_MASK)  
\
-   << CAP_MQES_SHIFT)
-#define NVME_CAP_SET_CQR(cap, val)(cap |= (uint64_t)(val & CAP_CQR_MASK)   
\
-   << CAP_CQR_SHIFT)
-#define NVME_CAP_SET_AMS(cap, val)(cap |= (uint64_t)(val & CAP_AMS_MASK)   
\
-   << CAP_AMS_SHIFT)
-#define NVME_CAP_SET_TO(cap, val) (cap |= (uint64_t)(val & CAP_TO_MASK)
\
-   << CAP_TO_SHIFT)
-#define NVME_CAP_SET_DSTRD(cap, val)  (cap |= (uint64_t)(val & CAP_DSTRD_MASK) 
\
-   << CAP_DSTRD_SHIFT)
-#define NVME_CAP_SET_NSSRS(cap, val)  (cap |= (uint64_t)(val & CAP_NSSRS_MASK) 
\
-   << CAP_NSSRS_SHIFT)
-#define NVME_CAP_SET_CSS(cap, val)(cap |= (uint64_t)(val & CAP_CSS_MASK)   
\
-   << CAP_CSS_SHIFT)
-#define NVME_CAP_SET_MPSMIN(cap, val) (cap |= (uint64_t)(val & 
CAP_MPSMIN_MASK)\
-   << CAP_MPSMIN_SHIFT)
-#define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & 
CAP_MPSMAX_MASK)\
-   << CAP_MPSMAX_SHIFT)
-#define NVME_CAP_SET_PMRS(cap, val)   (cap |= (uint64_t)(val & CAP_PMRS_MASK)  
\
-   << CAP_PMRS_SHIFT)
-#define NVME_CAP_SET_CMBS(cap, val)   (cap |= (uint64_t)(val & CAP_CMBS_MASK)  
\
-   << CAP_CMBS_SHIFT)
+#define NVME_CAP_SET_MQES(cap, val)   \
+((cap) |= (uint64_t)((val) & CAP_MQES_MASK)   << CAP_MQES_SHIFT)
+#define NVME_CAP_SET_CQR(cap, val)\
+((cap) |= (uint64_t)((val) & CAP_CQR_MASK)<< CAP_CQR_SHIFT)
+#define NVME_CAP_SET_AMS(cap, val)\
+((cap) |= (uint64_t)((val) & CAP_AMS_MASK)<< CAP_AMS_SHIFT)
+#define NVME_CAP_SET_TO(cap, val) \
+((cap) |= (uint64_t)((val) & CAP_TO_MASK) << CAP_TO_SHIFT)
+#define NVME_CAP_SET_DSTRD(cap, val)  \
+((cap) |= (uint64_t)((val) & CAP_DSTRD_MASK)  << CAP_DSTRD_SHIFT)
+#define NVME_CAP_SET_NSSRS(cap, val)  \
+((cap) |= (uint64_t)((val) & CAP_NSSRS_MASK)  << CAP_NSSRS_SHIFT)
+#define NVME_CAP_SET_CSS(cap, val)\
+((cap) |= (uint64_t)((val) & CAP_CSS_MASK)<< CAP_CSS_SHIFT)
+#define NVME_CAP_SET_MPSMIN(cap, val) \
+((cap) |= (uint64_t)((val) & CAP_MPSMIN_MASK) << CAP_MPSMIN_SHIFT)
+#define NVME_CAP_SET_MPSMAX(cap, val) \
+((cap) |= (uint64_t)((val) & CAP_MPSMAX_MASK) << CAP_MPSMAX_SHIFT)
+#define NVME_CAP_SET_PMRS(cap, val)   \
+((cap) |= (uint64_t)((val) & CAP_PMRS_MASK)   << CAP_PMRS_SHIFT)
+#define NVME_CAP_SET_CMBS(cap, val)   \
+((cap) |= (uint64_t)((val) & CAP_CMBS_MASK)   << CAP_CMBS_SHIFT)
 
 enum NvmeCapCss {
 NVME_CAP_CSS_NVM= 1 << 0,
-- 
2.36.1

Re: [PATCH v2 15/19] vdpa: manual forward CVQ buffers

2022-07-15 Thread Eugenio Perez Martin

On Fri, Jul 15, 2022 at 10:44 AM Jason Wang  wrote:
>
> On Fri, Jul 15, 2022 at 1:34 PM Eugenio Perez Martin
>  wrote:
> >
> > On Fri, Jul 15, 2022 at 6:08 AM Jason Wang  wrote:
> > >
> > > On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  
> > > wrote:
> > > >
> > > > Do a simple forwarding of CVQ buffers, the same work SVQ could do but
> > > > through callbacks. No functional change intended.
> > > >
> > > > Signed-off-by: Eugenio Pérez 
> > > > ---
> > > >  include/hw/virtio/vhost-vdpa.h |  3 ++
> > > >  hw/virtio/vhost-vdpa.c |  3 +-
> > > >  net/vhost-vdpa.c   | 58 ++
> > > >  3 files changed, 63 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/include/hw/virtio/vhost-vdpa.h 
> > > > b/include/hw/virtio/vhost-vdpa.h
> > > > index 7214eb47dc..d85643 100644
> > > > --- a/include/hw/virtio/vhost-vdpa.h
> > > > +++ b/include/hw/virtio/vhost-vdpa.h
> > > > @@ -15,6 +15,7 @@
> > > >  #include 
> > > >
> > > >  #include "hw/virtio/vhost-iova-tree.h"
> > > > +#include "hw/virtio/vhost-shadow-virtqueue.h"
> > > >  #include "hw/virtio/virtio.h"
> > > >  #include "standard-headers/linux/vhost_types.h"
> > > >
> > > > @@ -35,6 +36,8 @@ typedef struct vhost_vdpa {
> > > >  /* IOVA mapping used by the Shadow Virtqueue */
> > > >  VhostIOVATree *iova_tree;
> > > >  GPtrArray *shadow_vqs;
> > > > +const VhostShadowVirtqueueOps *shadow_vq_ops;
> > > > +void *shadow_vq_ops_opaque;
> > > >  struct vhost_dev *dev;
> > > >  VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
> > > >  } VhostVDPA;
> > > > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > > > index 96997210be..beaaa7049a 100644
> > > > --- a/hw/virtio/vhost-vdpa.c
> > > > +++ b/hw/virtio/vhost-vdpa.c
> > > > @@ -419,7 +419,8 @@ static int vhost_vdpa_init_svq(struct vhost_dev 
> > > > *hdev, struct vhost_vdpa *v,
> > > >  for (unsigned n = 0; n < hdev->nvqs; ++n) {
> > > >  g_autoptr(VhostShadowVirtqueue) svq;
> > > >
> > > > -svq = vhost_svq_new(v->iova_tree, NULL, NULL);
> > > > +svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
> > > > +v->shadow_vq_ops_opaque);
> > > >  if (unlikely(!svq)) {
> > > >  error_setg(errp, "Cannot create svq %u", n);
> > > >  return -1;
> > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > index df1e69ee72..805c9dd6b6 100644
> > > > --- a/net/vhost-vdpa.c
> > > > +++ b/net/vhost-vdpa.c
> > > > @@ -11,11 +11,14 @@
> > > >
> > > >  #include "qemu/osdep.h"
> > > >  #include "clients.h"
> > > > +#include "hw/virtio/virtio-net.h"
> > > >  #include "net/vhost_net.h"
> > > >  #include "net/vhost-vdpa.h"
> > > >  #include "hw/virtio/vhost-vdpa.h"
> > > >  #include "qemu/config-file.h"
> > > >  #include "qemu/error-report.h"
> > > > +#include "qemu/log.h"
> > > > +#include "qemu/memalign.h"
> > > >  #include "qemu/option.h"
> > > >  #include "qapi/error.h"
> > > >  #include 
> > > > @@ -187,6 +190,57 @@ static NetClientInfo net_vhost_vdpa_info = {
> > > >  .check_peer_type = vhost_vdpa_check_peer_type,
> > > >  };
> > > >
> > > > +/**
> > > > + * Forward buffer for the moment.
> > > > + */
> > > > +static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
> > > > +SVQElement *svq_elem, void 
> > > > *opaque)
> > > > +{
> > > > +VirtQueueElement *elem = _elem->elem;
> > > > +unsigned int n = elem->out_num + elem->in_num;
> > > > +g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
> > > > +size_t in_len, dev_written;
> > > > +virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> > > > +int r;
> > > > +
> > > > +memcpy(dev_buffers, elem->out_sg, elem->out_num);
> > > > +memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
> > > > +
> > > > +r = vhost_svq_add(svq, _buffers[0], elem->out_num, 
> > > > _buffers[1],
> > > > +  elem->in_num, svq_elem);
> > > > +if (unlikely(r != 0)) {
> > > > +if (unlikely(r == -ENOSPC)) {
> > > > +qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device 
> > > > queue\n",
> > > > +  __func__);
> > > > +}
> > > > +goto out;
> > > > +}
> > > > +
> > > > +/*
> > > > + * We can poll here since we've had BQL from the time we sent the
> > > > + * descriptor. Also, we need to take the answer before SVQ pulls 
> > > > by itself,
> > > > + * when BQL is released
> > > > + */
> > > > +dev_written = vhost_svq_poll(svq);
> > > > +if (unlikely(dev_written < sizeof(status))) {
> > > > +error_report("Insufficient written data (%zu)", dev_written);
> > > > +}
> > > > +
> > > > +out:
> > > > +in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, ,
> > > > +  sizeof(status));
> > > > +if (unlikely(in_len < sizeof(status))) {
> > > > +

Re: [PATCH v2 12/19] vhost: add vhost_svq_poll

2022-07-15 Thread Jason Wang

On Fri, Jul 15, 2022 at 1:39 PM Eugenio Perez Martin
 wrote:
>
> On Fri, Jul 15, 2022 at 5:59 AM Jason Wang  wrote:
> >
> > On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  wrote:
> > >
> > > It allows the Shadow Control VirtQueue to wait for the device to use the
> > > available buffers.
> > >
> > > Signed-off-by: Eugenio Pérez 
> > > ---
> > >  hw/virtio/vhost-shadow-virtqueue.h |  1 +
> > >  hw/virtio/vhost-shadow-virtqueue.c | 22 ++
> > >  2 files changed, 23 insertions(+)
> > >
> > > diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
> > > b/hw/virtio/vhost-shadow-virtqueue.h
> > > index 1692541cbb..b5c6e3b3b4 100644
> > > --- a/hw/virtio/vhost-shadow-virtqueue.h
> > > +++ b/hw/virtio/vhost-shadow-virtqueue.h
> > > @@ -89,6 +89,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq, 
> > > const SVQElement *elem,
> > >  int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
> > >size_t out_num, const struct iovec *in_sg, size_t 
> > > in_num,
> > >SVQElement *elem);
> > > +size_t vhost_svq_poll(VhostShadowVirtqueue *svq);
> > >
> > >  void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int 
> > > svq_kick_fd);
> > >  void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
> > > diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> > > b/hw/virtio/vhost-shadow-virtqueue.c
> > > index 5244896358..31a267f721 100644
> > > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > > @@ -486,6 +486,28 @@ static void vhost_svq_flush(VhostShadowVirtqueue 
> > > *svq,
> > >  } while (!vhost_svq_enable_notification(svq));
> > >  }
> > >
> > > +/**
> > > + * Poll the SVQ for one device used buffer.
> > > + *
> > > + * This function race with main event loop SVQ polling, so extra
> > > + * synchronization is needed.
> > > + *
> > > + * Return the length written by the device.
> > > + */
> > > +size_t vhost_svq_poll(VhostShadowVirtqueue *svq)
> > > +{
> > > +do {
> > > +uint32_t len;
> > > +SVQElement *elem = vhost_svq_get_buf(svq, );
> > > +if (elem) {
> > > +return len;
> > > +}
> > > +
> > > +/* Make sure we read new used_idx */
> > > +smp_rmb();
> >
> > There's already one smp_rmb(0 in vhost_svq_get_buf(). So this seems useless?
> >
>
> That rmb is after checking for new entries with (vq->last_used_idx !=
> svq->shadow_used_idx) , to avoid reordering used_idx read with the
> actual used entry. So my understanding is
> that the compiler is free to skip that check within the while loop.

What do you mean by "that check" here?

>
> Maybe the right solution is to add it in vhost_svq_more_used after the
> condition (vq->last_used_idx != svq->shadow_used_idx) is false?

I'm not sure I get the goal of the smp_rmb() here. What barrier does it pair?

Since we are in the busy loop, we will read the for new used_idx for
sure, and we can't forecast when the used_idx is committed to memory.

Thanks

>
> Thanks!
>
>
> > Thanks
> >
> > > +} while (true);
> > > +}
> > > +
> > >  /**
> > >   * Forward used buffers.
> > >   *
> > > --
> > > 2.31.1
> > >
> >
>

Re: [PATCH 2/3] target/s390x: display deprecation note in '-cpu help'

2022-07-15 Thread Cornelia Huck

On Thu, Jul 14 2022, Daniel P. Berrangé  wrote:

> The deprecation notes are currently only displayed at runtime when the
> user activates a CPU. The QMP query displays a simple flag for
> deprecation, while '-cpu help' displays nothing unless the deprecation
> info is duplicated into the 'notes' field.
>
> This changes the code so that deprecation notes are explicitly shown
> in '-cpu help', to assist the user in deciding what to use.
>
> Signed-off-by: Daniel P. Berrangé 
> ---
>  target/s390x/cpu_models.c | 28 +++-
>  1 file changed, 23 insertions(+), 5 deletions(-)

Reviewed-by: Cornelia Huck

Re: [PATCH v2 01/11] dump: Cleanup memblock usage

2022-07-15 Thread Marc-André Lureau

Hi

On Thu, Jul 14, 2022 at 1:46 PM Janosch Frank  wrote:

> On 7/13/22 17:35, Marc-André Lureau wrote:
> > Hi
> >
> > On Wed, Jul 13, 2022 at 7:30 PM Janosch Frank 
> wrote:
> >>
> >> On 7/13/22 17:09, Marc-André Lureau wrote:
> >>> Hi
> >>>
> >>> On Wed, Jul 13, 2022 at 5:07 PM Janosch Frank 
> wrote:
> 
>  The iteration over the memblocks is hard to understand so it's about
>  time to clean it up.
> 
>  struct DumpState's next_block and start members can and should be
>  local variables within the iterator.
> 
>  Instead of manually grabbing the next memblock we can use
>  QTAILQ_FOREACH to iterate over all memblocks.
> 
>  The begin and length fields in the DumpState have been left untouched
>  since the qmp arguments share their names.
> 
>  Signed-off-by: Janosch Frank 
> >>>
> >>> After this patch:
> >>> ./qemu-system-x86_64 -monitor stdio -S
> >>> (qemu) dump-guest-memory foo
> >>> Error: dump: failed to save memory: Bad address
> >>
> >> If you have more ways to check for dump errors then please send them to
> >> me. I'm aware that this might not have been a 100% conversion and I'm a
> >> bit terrified about the fact that this will affect all architectures.
> >
> > Same feeling here. Maybe it's about time to write real dump tests!
>
> We have tests for s390 and I've prompted for tests with filtering so we
> can also cover that. Unfortunately s390 differs in the use of memory
> because we only have one large block which hid this error from me.
>
>
> >>>
>  +if (block->target_start >= filter_area_start +
> filter_area_length ||
>  +block->target_end <= filter_area_start) {
>  +return -1;
>  +}
>  +if (filter_area_start > block->target_start) {
>  +return filter_area_start - block->target_start;
>  +}
>  +}
>  +return block->target_start;
> >>>
> >>> This used to be 0. Changing that, I think the patch looks good.
> >>> Although it could perhaps be splitted to introduce the two functions.
> >>
> >> Yes but the 0 was used to indicate that we would have needed continue
> >> iterating and the iteration is done via other means in this patch.
> >>
> >> Or am I missing something?
>
> Had a look, turns out I missed something.
>
> >
> > Well, you changed the way the loop used to work. it used to return 1/0
> > to indicate stop/continue and rely on s->start / s->next_block. Now
> > you return memblock_start.
>
> Maybe we should call this "dump_get_memblock_start_offset()" to make it
> clearer that we don't return block->target_start i.e. a start address
> but rather an offset that we tack on the host address to read the memory?
>
>
Not a big difference to me. You would need to adjust write_memory() "start"
argument name as well then.


> >
> >>
> >>>
>  +}
> #endif
>  --
>  2.34.1
> 
> >>>
> >>>
> >>
> >
> >
>
>

-- 
Marc-André Lureau

[PULL 0/6] hw/nvme updates

2022-07-15 Thread Klaus Jensen

From: Klaus Jensen 

Hi,

The following changes since commit 8482ab545e52f50facacfe1118b22b97462724ab:

  Merge tag 'qga-win32-pull-2022-07-13' of github.com:kostyanf14/qemu into 
staging (2022-07-14 14:52:16 +0100)

are available in the Git repository at:

  git://git.infradead.org/qemu-nvme.git tags/nvme-next-pull-request

for you to fetch changes up to 2e53b0b450246044efd27418c5d05ad6919deb87:

  hw/nvme: Use ioeventfd to handle doorbell updates (2022-07-15 10:40:33 +0200)


hw/nvme updates

performance improvements by Jinhao
~~
* shadow doorbells
* ioeventfd

plus some misc fixes (Darren, Niklas).



Darren Kenny (1):
  nvme: Fix misleading macro when mixed with ternary operator

Jinhao Fan (3):
  hw/nvme: Implement shadow doorbell buffer support
  hw/nvme: Add trace events for shadow doorbell buffer
  hw/nvme: Use ioeventfd to handle doorbell updates

Niklas Cassel (2):
  hw/nvme: fix example serial in documentation
  hw/nvme: force nvme-ns param 'shared' to false if no nvme-subsys node

 docs/system/devices/nvme.rst |   4 +-
 hw/nvme/ctrl.c   | 233 ++-
 hw/nvme/ns.c |   2 +
 hw/nvme/nvme.h   |  13 ++
 hw/nvme/trace-events |   5 +
 include/block/nvme.h |  46 +++
 6 files changed, 277 insertions(+), 26 deletions(-)

-- 
2.36.1

[PULL 4/6] hw/nvme: force nvme-ns param 'shared' to false if no nvme-subsys node

2022-07-15 Thread Klaus Jensen

From: Niklas Cassel 

Since commit 916b0f0b5264 ("hw/nvme: change nvme-ns 'shared' default")
the default value of nvme-ns param 'shared' is set to true, regardless
if there is a nvme-subsys node or not.

On a system without a nvme-subsys node, a namespace will never be able
to be attached to more than one controller, so for this configuration,
it is counterintuitive for this parameter to be set by default.

Force the nvme-ns param 'shared' to false for configurations where
there is no nvme-subsys node, as the namespace will never be able to
attach to more than one controller anyway.

Signed-off-by: Niklas Cassel 
Reviewed-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ns.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 870c3ca1a2f0..62a1f97be010 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -546,6 +546,8 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
 int i;
 
 if (!n->subsys) {
+/* If no subsys, the ns cannot be attached to more than one ctrl. */
+ns->params.shared = false;
 if (ns->params.detached) {
 error_setg(errp, "detached requires that the nvme device is "
"linked to an nvme-subsys device");
-- 
2.36.1

[PULL 6/6] hw/nvme: Use ioeventfd to handle doorbell updates

2022-07-15 Thread Klaus Jensen

From: Jinhao Fan 

Add property "ioeventfd" which is enabled by default. When this is
enabled, updates on the doorbell registers will cause KVM to signal
an event to the QEMU main loop to handle the doorbell updates.
Therefore, instead of letting the vcpu thread run both guest VM and
IO emulation, we now use the main loop thread to do IO emulation and
thus the vcpu thread has more cycles for the guest VM.

Since ioeventfd does not tell us the exact value that is written, it is
only useful when shadow doorbell buffer is enabled, where we check
for the value in the shadow doorbell buffer when we get the doorbell
update event.

IOPS comparison on Linux 5.19-rc2: (Unit: KIOPS)

qd   1   4  16  64
qemu35 121 176 153
ioeventfd   41 133 258 313

Changes since v3:
 - Do not deregister ioeventfd when it was not enabled on a SQ/CQ

Signed-off-by: Jinhao Fan 
Reviewed-by: Klaus Jensen 
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 113 -
 hw/nvme/nvme.h |   5 +++
 2 files changed, 117 insertions(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 55cb0ba1d591..533ad14e7a61 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1400,7 +1400,14 @@ static void nvme_enqueue_req_completion(NvmeCQueue *cq, 
NvmeRequest *req)
 
 QTAILQ_REMOVE(>sq->out_req_list, req, entry);
 QTAILQ_INSERT_TAIL(>req_list, req, entry);
-timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+
+if (req->sq->ioeventfd_enabled) {
+/* Post CQE directly since we are in main loop thread */
+nvme_post_cqes(cq);
+} else {
+/* Schedule the timer to post CQE later since we are in vcpu thread */
+timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
+}
 }
 
 static void nvme_process_aers(void *opaque)
@@ -4226,10 +4233,82 @@ static uint16_t nvme_io_cmd(NvmeCtrl *n, NvmeRequest 
*req)
 return NVME_INVALID_OPCODE | NVME_DNR;
 }
 
+static void nvme_cq_notifier(EventNotifier *e)
+{
+NvmeCQueue *cq = container_of(e, NvmeCQueue, notifier);
+NvmeCtrl *n = cq->ctrl;
+
+event_notifier_test_and_clear(>notifier);
+
+nvme_update_cq_head(cq);
+
+if (cq->tail == cq->head) {
+if (cq->irq_enabled) {
+n->cq_pending--;
+}
+
+nvme_irq_deassert(n, cq);
+}
+
+nvme_post_cqes(cq);
+}
+
+static int nvme_init_cq_ioeventfd(NvmeCQueue *cq)
+{
+NvmeCtrl *n = cq->ctrl;
+uint16_t offset = (cq->cqid << 3) + (1 << 2);
+int ret;
+
+ret = event_notifier_init(>notifier, 0);
+if (ret < 0) {
+return ret;
+}
+
+event_notifier_set_handler(>notifier, nvme_cq_notifier);
+memory_region_add_eventfd(>iomem,
+  0x1000 + offset, 4, false, 0, >notifier);
+
+return 0;
+}
+
+static void nvme_sq_notifier(EventNotifier *e)
+{
+NvmeSQueue *sq = container_of(e, NvmeSQueue, notifier);
+
+event_notifier_test_and_clear(>notifier);
+
+nvme_process_sq(sq);
+}
+
+static int nvme_init_sq_ioeventfd(NvmeSQueue *sq)
+{
+NvmeCtrl *n = sq->ctrl;
+uint16_t offset = sq->sqid << 3;
+int ret;
+
+ret = event_notifier_init(>notifier, 0);
+if (ret < 0) {
+return ret;
+}
+
+event_notifier_set_handler(>notifier, nvme_sq_notifier);
+memory_region_add_eventfd(>iomem,
+  0x1000 + offset, 4, false, 0, >notifier);
+
+return 0;
+}
+
 static void nvme_free_sq(NvmeSQueue *sq, NvmeCtrl *n)
 {
+uint16_t offset = sq->sqid << 3;
+
 n->sq[sq->sqid] = NULL;
 timer_free(sq->timer);
+if (sq->ioeventfd_enabled) {
+memory_region_del_eventfd(>iomem,
+  0x1000 + offset, 4, false, 0, >notifier);
+event_notifier_cleanup(>notifier);
+}
 g_free(sq->io_req);
 if (sq->sqid) {
 g_free(sq);
@@ -4302,6 +4381,12 @@ static void nvme_init_sq(NvmeSQueue *sq, NvmeCtrl *n, 
uint64_t dma_addr,
 if (n->dbbuf_enabled) {
 sq->db_addr = n->dbbuf_dbs + (sqid << 3);
 sq->ei_addr = n->dbbuf_eis + (sqid << 3);
+
+if (n->params.ioeventfd && sq->sqid != 0) {
+if (!nvme_init_sq_ioeventfd(sq)) {
+sq->ioeventfd_enabled = true;
+}
+}
 }
 
 assert(n->cq[cqid]);
@@ -4605,8 +4690,15 @@ static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest 
*req)
 
 static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
 {
+uint16_t offset = (cq->cqid << 3) + (1 << 2);
+
 n->cq[cq->cqid] = NULL;
 timer_free(cq->timer);
+if (cq->ioeventfd_enabled) {
+memory_region_del_eventfd(>iomem,
+  0x1000 + offset, 4, false, 0, >notifier);
+event_notifier_cleanup(>notifier);
+}
 if (msix_enabled(>parent_obj)) {
 msix_vector_unuse(>parent_obj, cq->vector);
 }
@@ -4665,6 +4757,12 @@ static void nvme_init_cq(NvmeCQueue *cq, NvmeCtrl *n, 
uint64_t dma_addr,
 if (n->dbbuf_enabled)

Re: [PATCH v2 18/19] vdpa: Add device migration blocker

2022-07-15 Thread Jason Wang

On Fri, Jul 15, 2022 at 1:40 PM Eugenio Perez Martin
 wrote:
>
> On Fri, Jul 15, 2022 at 6:03 AM Jason Wang  wrote:
> >
> > On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  wrote:
> > >
> > > Since the vhost-vdpa device is exposing _F_LOG,
> >
> > I may miss something but I think it doesn't?
> >
>
> It's at vhost_vdpa_get_features. As long as SVQ is enabled, it's
> exposing VHOST_F_LOG_ALL.

Ok, so this needs to be specified in the change log. But I'm kind of
confused here, we do want to allow migration to work so why we disable
it?

Thanks

>
> Thanks!
>
> > Note that the features were fetched from the vDPA parent.
> >
> > Thanks
> >
> > > adding a migration blocker if
> > > it uses CVQ.
> > >
> > > Signed-off-by: Eugenio Pérez 
> > > ---
> > >  include/hw/virtio/vhost-vdpa.h |  1 +
> > >  hw/virtio/vhost-vdpa.c | 14 ++
> > >  2 files changed, 15 insertions(+)
> > >
> > > diff --git a/include/hw/virtio/vhost-vdpa.h 
> > > b/include/hw/virtio/vhost-vdpa.h
> > > index d85643..d10a89303e 100644
> > > --- a/include/hw/virtio/vhost-vdpa.h
> > > +++ b/include/hw/virtio/vhost-vdpa.h
> > > @@ -35,6 +35,7 @@ typedef struct vhost_vdpa {
> > >  bool shadow_vqs_enabled;
> > >  /* IOVA mapping used by the Shadow Virtqueue */
> > >  VhostIOVATree *iova_tree;
> > > +Error *migration_blocker;
> > >  GPtrArray *shadow_vqs;
> > >  const VhostShadowVirtqueueOps *shadow_vq_ops;
> > >  void *shadow_vq_ops_opaque;
> > > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > > index beaaa7049a..795ed5a049 100644
> > > --- a/hw/virtio/vhost-vdpa.c
> > > +++ b/hw/virtio/vhost-vdpa.c
> > > @@ -20,6 +20,7 @@
> > >  #include "hw/virtio/vhost-shadow-virtqueue.h"
> > >  #include "hw/virtio/vhost-vdpa.h"
> > >  #include "exec/address-spaces.h"
> > > +#include "migration/blocker.h"
> > >  #include "qemu/cutils.h"
> > >  #include "qemu/main-loop.h"
> > >  #include "cpu.h"
> > > @@ -1022,6 +1023,13 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev 
> > > *dev)
> > >  return true;
> > >  }
> > >
> > > +if (v->migration_blocker) {
> > > +int r = migrate_add_blocker(v->migration_blocker, );
> > > +if (unlikely(r < 0)) {
> > > +goto err_migration_blocker;
> > > +}
> > > +}
> > > +
> > >  for (i = 0; i < v->shadow_vqs->len; ++i) {
> > >  VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
> > >  VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
> > > @@ -1064,6 +1072,9 @@ err:
> > >  vhost_svq_stop(svq);
> > >  }
> > >
> > > +err_migration_blocker:
> > > +error_reportf_err(err, "Cannot setup SVQ %u: ", i);
> > > +
> > >  return false;
> > >  }
> > >
> > > @@ -1083,6 +1094,9 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev 
> > > *dev)
> > >  }
> > >  }
> > >
> > > +if (v->migration_blocker) {
> > > +migrate_del_blocker(v->migration_blocker);
> > > +}
> > >  return true;
> > >  }
> > >
> > > --
> > > 2.31.1
> > >
> >
>

Re: [PATCH 0/3] target: RFC: display deprecation note for '-cpu help'

2022-07-15 Thread Cornelia Huck

On Thu, Jul 14 2022, Daniel P. Berrangé  wrote:

> When querying '-cpu help' there is no presentation of fact that a
> CPU may be deprecated. The user just has to try it and see if they
> get a depecation message at runtime.  The QMP command for querying
> CPUs report a deprecation bool flag, but not the explanatory
> reason.
>
> The Icelake-Client CPU (removed in 6df39f5e583ca0f67bd934d1327f9ead2e3bd49c)
> handled this by modifying the '.notes' section to add the word
> 'deprecated':
>
> {
> .version = 2,
> .note = "no TSX, deprecated",
> .alias = "Icelake-Client-noTSX",
> .props = (PropValue[]) {
> { "hle", "off" },
> { "rtm", "off" },
> { /* end of list */ }
> },
> },
>
> This relies on the person deprecating the CPU to remember to do this,
> and is redundant when this info is already expressed in the
> '.deprecation_note' field.
>
> This short series suggests just modifying the '-cpu help'
> formatter so that it displays the full deprecation message
>
> eg
>
> $ qemu-system-x86_64 -cpu help:
> Available CPUs:
> x86 486   (alias configured by machine type) (deprecated: use 
> at least 'Nehalem' / 'Opteron_G4', or 'host' / 'max')
>
> I wonder if this is too verbose, and we should just do a
> concise flag like approach, similar to QMP:
>
> $ qemu-system-x86_64 -cpu help:
> Available CPUs:
> x86 486   (alias configured by machine type) (deprecated)
>
> leaving the full message to be displayed at runtime ? I'm slightly
> inclined to the simpler more concise output.

The good thing about the longer output is that the user gets the full
information right from the start, and does not need to dig around and
figure out why it is deprecated, and what to use instead. That said, if
we have very verbose deprecation notes, the output may get a bit
cluttered. I think I slightly prefer the verbose output.

>
> This series touched x86_64, s390x, and aarch64 because that's all I
> personally needed from a downstream POV, but any & all of the targets
> would benefit from this. They have each implemneted the '-cpu help'
> logic independantly though, and unifying that code is not entirely
> straightforward.

It seems that any arch that does not use a very simple output has chosen
a different format...

Re: [PULL 00/20] SCSI, build system patches for 2022-07-13

2022-07-15 Thread Peter Maydell

On Thu, 14 Jul 2022 at 10:14, Paolo Bonzini  wrote:
>
> The following changes since commit 8e3d85d36b77f11ad7bded3a2d48c1f0cc334f82:
>
>   Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging 
> (2022-07-12 14:12:15 +0100)
>
> are available in the Git repository at:
>
>   https://gitlab.com/bonzini/qemu.git tags/for-upstream
>
> for you to fetch changes up to c0b3607d5938f5ee7fd16ff1e102afe938fd4b39:
>
>   pc-bios/s390-ccw: add -Wno-array-bounds (2022-07-13 16:58:58 +0200)
>
> 
> * SCSI fuzzing fix (Mauro)
> * pre-install data files in the build directory (Akihiko)
> * SCSI fixes for Mac OS (Mark)
>


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/7.1
for any user-visible changes.

-- PMM

Re: [RFC] aspeed/i2c: multi-master between SoC's

2022-07-15 Thread Klaus Jensen

On Jul 14 20:06, Peter Delevoryas wrote:
> Hey Cedric, Klaus, and Corey,
> 

Hi Peter,

Regardless of the issues you are facing its awesome to see this being
put to work like this!

> So I realized something about the current state of multi-master i2c:
> 
> We can't do transfers between two Aspeed I2C controllers, e.g.  AST1030 <->
> AST2600. I'm looking into this case in the new fby35 machine (which isn't even
> merged yet, just in Cedric's pull request)
> 
> This is because the AspeedI2CBusSlave is only designed to receive through
> i2c_send_async(). But the AspeedI2CBus master-mode transfers use i2c_send().
> 
> So, the AST2600 can't send data to the AST1030. And the AST1030 can't reply to
> the AST2600.
> 
> (By the way, another small issue: AspeedI2CBusSlave expects the parent of its
> parent to be its AspeedI2CBus, but that's not true if multiple SoC's are 
> sharing
> an I2CBus. But that's easy to resolve, I'll send a patch for that soon).
> 
> I'm wondering how best to resolve the multi-SoC send-async issue, while
> retaining the ability to send synchronously to non-SoC slave devices.
> 
> I think there's only one way, as far as I can see:
> 
> - Force the Aspeed I2C Controller to master the I2C bus before starting a 
> master
>   transfer. Even for synchronous transfers.
> 
> This shouldn't be a big problem, we can still do synchronous transfers, we 
> just
> have to wait for the bus to be free before starting the transfer.
> 
> - If the I2C slave targets for a master2slave transfer support async_send, 
> then
>   use async_send. This requires refactoring aspeed_i2c_bus_send into a state
>   machine to send data asynchronously.
> 
> In other words, don't try to do a synchronous transfer to an SoC.
> 
> But, of course, we can keep doing synchronous transfers from SoC -> sensor or
> sensor -> SoC.
> 

Yeah, hmm. This is tricky because callers of bus_send expects the
transfer to be "resolved" immediately. Per design, the asynchronous send
requires the device mastering the bus to itself be asynchronous (like
the i2c-echo device I added as an example).

However, looking at aspeed_i2c_bus_handle_cmd (which is the caller of
bus_send), it should be possible to accept bus_send to "yield" as you
sketch below and not raise any interrupt. And yes, it would be required
in bus_send to call i2c_bus_master to register a BH which can then
raise the interrupt upon i2c_ack().

[PULL 2/6] hw/nvme: Add trace events for shadow doorbell buffer

2022-07-15 Thread Klaus Jensen

From: Jinhao Fan 

When shadow doorbell buffer is enabled, doorbell registers are lazily
updated. The actual queue head and tail pointers are stored in Shadow
Doorbell buffers.

Add trace events for updates on the Shadow Doorbell buffers and EventIdx
buffers. Also add trace event for the Doorbell Buffer Config command.

Signed-off-by: Jinhao Fan 
Reviewed-by: Klaus Jensen 
Reviewed-by: Keith Busch 
[k.jensen: rebased]
Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c   | 5 +
 hw/nvme/trace-events | 5 +
 2 files changed, 10 insertions(+)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 46e8d54ef07a..55cb0ba1d591 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -1335,6 +1335,7 @@ static void nvme_update_cq_head(NvmeCQueue *cq)
 {
 pci_dma_read(>ctrl->parent_obj, cq->db_addr, >head,
 sizeof(cq->head));
+trace_pci_nvme_shadow_doorbell_cq(cq->cqid, cq->head);
 }
 
 static void nvme_post_cqes(void *opaque)
@@ -6049,6 +6050,8 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
NvmeRequest *req)
 }
 }
 
+trace_pci_nvme_dbbuf_config(dbs_addr, eis_addr);
+
 return NVME_SUCCESS;
 }
 
@@ -6111,12 +6114,14 @@ static void nvme_update_sq_eventidx(const NvmeSQueue 
*sq)
 {
 pci_dma_write(>ctrl->parent_obj, sq->ei_addr, >tail,
   sizeof(sq->tail));
+trace_pci_nvme_eventidx_sq(sq->sqid, sq->tail);
 }
 
 static void nvme_update_sq_tail(NvmeSQueue *sq)
 {
 pci_dma_read(>ctrl->parent_obj, sq->db_addr, >tail,
  sizeof(sq->tail));
+trace_pci_nvme_shadow_doorbell_sq(sq->sqid, sq->tail);
 }
 
 static void nvme_process_sq(void *opaque)
diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
index 065e1c891df4..fccb79f48973 100644
--- a/hw/nvme/trace-events
+++ b/hw/nvme/trace-events
@@ -3,6 +3,7 @@ pci_nvme_irq_msix(uint32_t vector) "raising MSI-X IRQ vector %u"
 pci_nvme_irq_pin(void) "pulsing IRQ pin"
 pci_nvme_irq_masked(void) "IRQ is masked"
 pci_nvme_dma_read(uint64_t prp1, uint64_t prp2) "DMA read, prp1=0x%"PRIx64" 
prp2=0x%"PRIx64""
+pci_nvme_dbbuf_config(uint64_t dbs_addr, uint64_t eis_addr) 
"dbs_addr=0x%"PRIx64" eis_addr=0x%"PRIx64""
 pci_nvme_map_addr(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len %"PRIu64""
 pci_nvme_map_addr_cmb(uint64_t addr, uint64_t len) "addr 0x%"PRIx64" len 
%"PRIu64""
 pci_nvme_map_prp(uint64_t trans_len, uint32_t len, uint64_t prp1, uint64_t 
prp2, int num_prps) "trans_len %"PRIu64" len %"PRIu32" prp1 0x%"PRIx64" prp2 
0x%"PRIx64" num_prps %d"
@@ -83,6 +84,8 @@ pci_nvme_enqueue_event_noqueue(int queued) "queued %d"
 pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8""
 pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs"
 pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint32_t dw0, 
uint32_t dw1, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" dw0 0x%"PRIx32" 
dw1 0x%"PRIx32" status 0x%"PRIx16""
+pci_nvme_eventidx_cq(uint16_t cqid, uint16_t new_eventidx) "cqid %"PRIu16" 
new_eventidx %"PRIu16""
+pci_nvme_eventidx_sq(uint16_t sqid, uint16_t new_eventidx) "sqid %"PRIu16" 
new_eventidx %"PRIu16""
 pci_nvme_mmio_read(uint64_t addr, unsigned size) "addr 0x%"PRIx64" size %d"
 pci_nvme_mmio_write(uint64_t addr, uint64_t data, unsigned size) "addr 
0x%"PRIx64" data 0x%"PRIx64" size %d"
 pci_nvme_mmio_doorbell_cq(uint16_t cqid, uint16_t new_head) "cqid %"PRIu16" 
new_head %"PRIu16""
@@ -99,6 +102,8 @@ pci_nvme_mmio_start_success(void) "setting controller enable 
bit succeeded"
 pci_nvme_mmio_stopped(void) "cleared controller enable bit"
 pci_nvme_mmio_shutdown_set(void) "shutdown bit set"
 pci_nvme_mmio_shutdown_cleared(void) "shutdown bit cleared"
+pci_nvme_shadow_doorbell_cq(uint16_t cqid, uint16_t new_shadow_doorbell) "cqid 
%"PRIu16" new_shadow_doorbell %"PRIu16""
+pci_nvme_shadow_doorbell_sq(uint16_t sqid, uint16_t new_shadow_doorbell) "sqid 
%"PRIu16" new_shadow_doorbell %"PRIu16""
 pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, 
slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
 pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, 
slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
 pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, 
slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
-- 
2.36.1

Re: [RFC PATCH v3 0/3] Implement Power ISA 3.1B hash insns

2022-07-15 Thread Víctor Colombo


On 15/07/2022 10:23, Daniel Henrique Barboza wrote:

On 7/13/22 13:54, Víctor Colombo wrote:

This patch series implements the 4 instructions added in Power ISA
3.1B:

- hashchk
- hashst
- hashchkp
- hashstp

To build it, you need to apply the following patches on top of master:
<20220701133507.740619-2-lucas.couti...@eldorado.org.br>
<20220701133507.740619-3-lucas.couti...@eldorado.org.br>
<20220712193741.59134-2-leandro.lup...@eldorado.org.br>
<20220712193741.59134-3-leandro.lup...@eldorado.org.br>

Working branch for ease of use can be found here:
https://github.com/PPC64/qemu/tree/vccolombo-hash-to-send-v3

What do you think about the choice to implement the hash algorithm
from the ground up, following the SIMON-like algorithm presented in
Power ISA? IIUC, this algorithm is not the same as the original[1].
Other options would be to use other algorithm already implemented
in QEMU, or even make this instruction a nop for all Power versions.

Also, I was thinking about using the call to spr_register_kvm() in
init_proc_POWER10 to initialize the registers with a random value.
I'm not sure what is the behavior here, I would expect that is the job
of the OS to set the regs, but looks like KVM is not exporting them,
so they are always 0 (?). Does anyone have any insight on this?


This happens because KVM on POWER10 isn't handling these registers
appropriately. We are probably missing kernel/kvm code to do so.

Since KVM on POWER10 is on an uncertain spot at this moment I wouldn't
worry too much about it. Making the regs read/write work in TCG is good
enough for now.


Daniel


Hello Daniel,

Thanks for taking a look at this. I agree that in this case it is better
to make it work in TCG and drop the KVM part from this patch set
I'll work on it now

Thanks!





v1->v2:
- Split the patch in 2
- Rebase to master

v2->v3:
- Split patches in 3
 - the new patch (patch 1) is separating the kvm header
   changes [Cornelia]

[1] https://eprint.iacr.org/2013/404.pdf

Víctor Colombo (3):
   linux-headers/asm-powerpc/kvm.h: Add HASHKEYR and HASHPKEYR in headers
   target/ppc: Implement hashst and hashchk
   target/ppc: Implement hashstp and hashchkp

  linux-headers/asm-powerpc/kvm.h    |  3 +
  target/ppc/cpu.h   |  2 +
  target/ppc/cpu_init.c  |  7 ++
  target/ppc/excp_helper.c   | 82 ++
  target/ppc/helper.h    |  4 ++
  target/ppc/insn32.decode   | 10 +++
  target/ppc/translate.c |  5 ++
  target/ppc/translate/fixedpoint-impl.c.inc | 34 +
  8 files changed, 147 insertions(+)




--
Víctor Cora Colombo
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer

Re: [PATCH v8 00/12] s390x: CPU Topology

2022-07-15 Thread Pierre Morel





On 7/15/22 11:31, Janis Schoetterl-Glausch wrote:

On 7/14/22 22:05, Pierre Morel wrote:



On 7/14/22 20:43, Janis Schoetterl-Glausch wrote:

On 6/20/22 16:03, Pierre Morel wrote:

Hi,

This new spin is essentially for coherence with the last Linux CPU
Topology patch, function testing and coding style modifications.

Forword
===

The goal of this series is to implement CPU topology for S390, it
improves the preceeding series with the implementation of books and
drawers, of non uniform CPU topology and with documentation.

To use these patches, you will need the Linux series version 10.
You find it there:
https://lkml.org/lkml/2022/6/20/590

Currently this code is for KVM only, I have no idea if it is interesting
to provide a TCG patch. If ever it will be done in another series.

To have a better understanding of the S390x CPU Topology and its
implementation in QEMU you can have a look at the documentation in the
last patch or follow the introduction here under.

A short introduction


CPU Topology is described in the S390 POP with essentially the description
of two instructions:

PTF Perform Topology function used to poll for topology change
  and used to set the polarization but this part is not part of this item.

STSI Store System Information and the SYSIB 15.1.x providing the Topology
  configuration.

S390 Topology is a 6 levels hierarchical topology with up to 5 level
  of containers. The last topology level, specifying the CPU cores.

  This patch series only uses the two lower levels sockets and cores.
   To get the information on the topology, S390 provides the STSI
  instruction, which stores a structures providing the list of the
  containers used in the Machine topology: the SYSIB.
  A selector within the STSI instruction allow to chose how many topology
  levels will be provide in the SYSIB.

  Using the Topology List Entries (TLE) provided inside the SYSIB we
  the Linux kernel is able to compute the information about the cache
  distance between two cores and can use this information to take
  scheduling decisions.


Do the socket, book, ... metaphors and looking at STSI from the existing
smp infrastructure even make sense?


Sorry, I do not understand.
I admit the cover-letter is old and I did not rewrite it really good since the 
first patch series.

What we do is:
Compute the STSI from the SMP + numa + device QEMU parameters .



STSI 15.1.x reports the topology to the guest and for a virtual machine,
this topology can be very dynamic. So a CPU can move from from one topology
container to another, but the socket of a cpu changing while it's running seems
a bit strange. And this isn't supported by this patch series as far as I 
understand,
the only topology changes are on hotplug.


A CPU changing from a socket to another socket is the only case the PTF 
instruction reports a change in the topology with the case a new CPU is plug in.


Can a CPU actually change between sockets right now?


To be exact, what I understand is that a shared CPU can be scheduled to 
another real CPU exactly as a guest vCPU can be scheduled by the host to 
another host CPU.



The socket-id is computed from the core-id, so it's fixed, is it not?


the virtual socket-id is computed from the virtual core-id




It is not expected to appear often but it does appear.
The code has been removed from the kernel in spin 10 for 2 reasons:
1) we decided to first support only dedicated and pinned CPU> 2) Christian 
fears it may happen too often due to Linux host scheduling and could be a 
performance problem


This seems sensible, but now it seems too static.
For example after migration, you cannot tell the guest which CPUs are in the 
same socket, book, ...,
unless I'm misunderstanding something.


No, to do this we would need to ask the kernel about it.


And migration is rare, but something you'd want to be able to react to.
And I could imaging that the vCPUs are pinned most of the time, but the pinning 
changes occasionally.


I think on migration we should just make a kvm_set_mtcr on post_load 
like Nico suggested everything else seems complicated for a questionable 
benefit.



--
Pierre Morel
IBM Lab Boeblingen

[PATCH v3 3/3] meson: Allow to enable gtk and sdl while cocoa is enabled

2022-07-15 Thread Akihiko Odaki

As ui/cocoa does no longer override main(), ui/gtk and ui/sdl
can be enabled even ui/cocoa is enabled.

Signed-off-by: Akihiko Odaki 
Reviewed-by: Peter Maydell 
---
 meson.build | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/meson.build b/meson.build
index bc5569ace15..7baec7896ef 100644
--- a/meson.build
+++ b/meson.build
@@ -583,12 +583,6 @@ if get_option('attr').allowed()
 endif
 
 cocoa = dependency('appleframeworks', modules: 'Cocoa', required: 
get_option('cocoa'))
-if cocoa.found() and get_option('sdl').enabled()
-  error('Cocoa and SDL cannot be enabled at the same time')
-endif
-if cocoa.found() and get_option('gtk').enabled()
-  error('Cocoa and GTK+ cannot be enabled at the same time')
-endif
 
 vmnet = dependency('appleframeworks', modules: 'vmnet', required: 
get_option('vmnet'))
 if vmnet.found() and not cc.has_header_symbol('vmnet/vmnet.h',
@@ -915,7 +909,7 @@ if not get_option('brlapi').auto() or have_system
 endif
 
 sdl = not_found
-if not get_option('sdl').auto() or (have_system and not cocoa.found())
+if not get_option('sdl').auto() or have_system
   sdl = dependency('sdl2', required: get_option('sdl'), kwargs: static_kwargs)
   sdl_image = not_found
 endif
@@ -1181,7 +1175,7 @@ endif
 gtk = not_found
 gtkx11 = not_found
 vte = not_found
-if not get_option('gtk').auto() or (have_system and not cocoa.found())
+if not get_option('gtk').auto() or have_system
   gtk = dependency('gtk+-3.0', version: '>=3.22.0',
method: 'pkg-config',
required: get_option('gtk'),
-- 
2.32.1 (Apple Git-133)

Re: [RFC PATCH 7/8] block: use the new _change_ API instead of _can_set_ and _set_

2022-07-15 Thread Hanna Reitz


On 12.07.22 23:19, Emanuele Giuseppe Esposito wrote:

Replace all direct usage of ->can_set_aio_ctx and ->set_aio_ctx,
and call bdrv_child_try_change_aio_context() in
bdrv_try_set_aio_context(), the main function called through
the whole block layer.

 From this point onwards, ->can_set_aio_ctx and ->set_aio_ctx
won't be used anymore.

Signed-off-by: Emanuele Giuseppe Esposito 
---
  block.c   | 30 --
  block/block-backend.c |  8 ++--
  2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/block.c b/block.c
index a7ba590dfa..101188a2d4 100644
--- a/block.c
+++ b/block.c
@@ -2966,17 +2966,18 @@ static void bdrv_attach_child_common_abort(void *opaque)
  }
  
  if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) {

+Transaction *tran;
  GSList *ignore;
+bool ret;
  
-/* No need to ignore `child`, because it has been detached already */

-ignore = NULL;
-child->klass->can_set_aio_ctx(child, s->old_parent_ctx, ,
-  _abort);
-g_slist_free(ignore);
+tran = tran_new();
  
+/* No need to ignore `child`, because it has been detached already */

  ignore = NULL;
-child->klass->set_aio_ctx(child, s->old_parent_ctx, );
+ret = child->klass->change_aio_ctx(child, s->old_parent_ctx, ,
+   tran, _abort);
  g_slist_free(ignore);
+tran_finalize(tran, ret ? ret : -1);


As far as I understand, the transaction is supposed to always succeed; 
that’s why we pass `_abort`, I thought.


If so, `ret` should always be true.  More importantly, though, I think 
the `ret ? ret : -1` is wrong because it’ll always evaluate to either 1 
or -1, but never to 0, which would indicate success.  I think it should 
be `ret == true ? 0 : -1`, or even better `assert(ret == true); 
tran_finalize(tran, 0);`.



  }
  
  bdrv_unref(bs);

@@ -3037,17 +3038,18 @@ static int bdrv_attach_child_common(BlockDriverState 
*child_bs,
  Error *local_err = NULL;
  int ret = bdrv_try_set_aio_context(child_bs, parent_ctx, _err);
  
-if (ret < 0 && child_class->can_set_aio_ctx) {

+if (ret < 0 && child_class->change_aio_ctx) {
+Transaction *tran = tran_new();
  GSList *ignore = g_slist_prepend(NULL, new_child);
-if (child_class->can_set_aio_ctx(new_child, child_ctx, ,
- NULL))
-{
+bool ret_child;
+
+ret_child = child_class->change_aio_ctx(new_child, child_ctx,
+, tran, NULL);
+if (ret_child) {


To be honest, due to the mix of return value styles we have, perhaps a 
`ret_child == true` would help to signal that this is a success path.



  error_free(local_err);
  ret = 0;
-g_slist_free(ignore);
-ignore = g_slist_prepend(NULL, new_child);
-child_class->set_aio_ctx(new_child, child_ctx, );
  }
+tran_finalize(tran, ret_child ? ret_child : -1);


This too should probably be `ret_child == true ? 0 : -1`.


  g_slist_free(ignore);
  }
  
@@ -7708,7 +7710,7 @@ int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,

   Error **errp)
  {
  GLOBAL_STATE_CODE();
-return bdrv_child_try_set_aio_context(bs, ctx, NULL, errp);
+return bdrv_child_try_change_aio_context(bs, ctx, NULL, errp);


Why not remove this function and adjust all callers?

Hanna


  }
  
  void bdrv_add_aio_context_notifier(BlockDriverState *bs,

[PATCH v3 1/3] ui/cocoa: Run qemu_init in the main thread

2022-07-15 Thread Akihiko Odaki

This work is based on:
https://patchew.org/QEMU/20220317125534.38706-1-philippe.mathieu.da...@gmail.com/

Simplify the initialization dance by running qemu_init() in the main
thread before the Cocoa event loop starts. The secondary thread only
runs only qemu_main_loop() and qemu_cleanup().

This fixes a case where addRemovableDevicesMenuItems() calls
qmp_query_block() while expecting the main thread to still hold
the BQL.

Overriding the code after calling qemu_init() is done by dynamically
replacing a function pointer variable, qemu_main when initializing
ui/cocoa, which unifies the static implementation of main() for
builds with ui/cocoa and ones without ui/cocoa.

Signed-off-by: Akihiko Odaki 
---
 docs/devel/fuzzing.rst  |   4 +-
 include/qemu-main.h |   3 +-
 include/sysemu/sysemu.h |   2 +-
 softmmu/main.c  |  14 ++--
 softmmu/vl.c|   2 +-
 tests/qtest/fuzz/fuzz.c |   2 +-
 ui/cocoa.m  | 172 +++-
 7 files changed, 76 insertions(+), 123 deletions(-)

diff --git a/docs/devel/fuzzing.rst b/docs/devel/fuzzing.rst
index 784ecb99e66..715330c8561 100644
--- a/docs/devel/fuzzing.rst
+++ b/docs/devel/fuzzing.rst
@@ -287,8 +287,8 @@ select the fuzz target. Then, the qtest client is 
initialized. If the target
 requires qos, qgraph is set up and the QOM/LIBQOS modules are initialized.
 Then the QGraph is walked and the QEMU cmd_line is determined and saved.
 
-After this, the ``vl.c:qemu_main`` is called to set up the guest. There are
-target-specific hooks that can be called before and after qemu_main, for
+After this, the ``vl.c:main`` is called to set up the guest. There are
+target-specific hooks that can be called before and after main, for
 additional setup(e.g. PCI setup, or VM snapshotting).
 
 ``LLVMFuzzerTestOneInput``: Uses qtest/qos functions to act based on the fuzz
diff --git a/include/qemu-main.h b/include/qemu-main.h
index 6a3e90d0ad5..6889375e7c2 100644
--- a/include/qemu-main.h
+++ b/include/qemu-main.h
@@ -5,6 +5,7 @@
 #ifndef QEMU_MAIN_H
 #define QEMU_MAIN_H
 
-int qemu_main(int argc, char **argv, char **envp);
+void qemu_default_main(void);
+extern void (*qemu_main)(void);
 
 #endif /* QEMU_MAIN_H */
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 812f66a31a9..254c1eabf57 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -102,7 +102,7 @@ void qemu_boot_set(const char *boot_order, Error **errp);
 
 bool defaults_enabled(void);
 
-void qemu_init(int argc, char **argv, char **envp);
+void qemu_init(int argc, char **argv);
 void qemu_main_loop(void);
 void qemu_cleanup(void);
 
diff --git a/softmmu/main.c b/softmmu/main.c
index c00432ff098..41a091f2c72 100644
--- a/softmmu/main.c
+++ b/softmmu/main.c
@@ -30,18 +30,18 @@
 #include 
 #endif
 
-int qemu_main(int argc, char **argv, char **envp)
+void qemu_default_main(void)
 {
-qemu_init(argc, argv, envp);
 qemu_main_loop();
 qemu_cleanup();
-
-return 0;
 }
 
-#ifndef CONFIG_COCOA
+void (*qemu_main)(void) = qemu_default_main;
+
 int main(int argc, char **argv)
 {
-return qemu_main(argc, argv, NULL);
+qemu_init(argc, argv);
+qemu_main();
+
+return 0;
 }
-#endif
diff --git a/softmmu/vl.c b/softmmu/vl.c
index 3f264d4b093..e8c73d0bb40 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -2589,7 +2589,7 @@ void qmp_x_exit_preconfig(Error **errp)
 }
 }
 
-void qemu_init(int argc, char **argv, char **envp)
+void qemu_init(int argc, char **argv)
 {
 QemuOpts *opts;
 QemuOpts *icount_opts = NULL, *accel_opts = NULL;
diff --git a/tests/qtest/fuzz/fuzz.c b/tests/qtest/fuzz/fuzz.c
index 0ad4ba9e94d..678c312923a 100644
--- a/tests/qtest/fuzz/fuzz.c
+++ b/tests/qtest/fuzz/fuzz.c
@@ -236,7 +236,7 @@ int LLVMFuzzerInitialize(int *argc, char ***argv, char 
***envp)
 g_free(pretty_cmd_line);
 }
 
-qemu_init(result.we_wordc, result.we_wordv, NULL);
+qemu_init(result.we_wordc, result.we_wordv);
 
 /* re-enable the rcu atfork, which was previously disabled in qemu_init */
 rcu_enable_atfork();
diff --git a/ui/cocoa.m b/ui/cocoa.m
index 6a4dccff7f0..c181d8d2fb3 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -100,13 +100,11 @@ static void cocoa_switch(DisplayChangeListener *dcl,
 static int left_command_key_enabled = 1;
 static bool swap_opt_cmd;
 
-static int gArgc;
-static char **gArgv;
+static QemuThread qemu_main_thread;
+static bool qemu_main_terminating;
 static bool stretch_video;
 static NSTextField *pauseLabel;
 
-static QemuSemaphore display_init_sem;
-static QemuSemaphore app_started_sem;
 static bool allow_events;
 
 static NSInteger cbchangecount = -1;
@@ -585,7 +583,7 @@ - (void) updateUIInfo
 /*
  * Don't try to tell QEMU about UI information in the application
  * startup phase -- we haven't yet registered dcl with the QEMU UI
- * layer, and also trying to take the iothread lock would deadlock.
+ * layer.
  * When

[PATCH v3 2/3] Revert "main-loop: Disable block backend global state assertion on Cocoa"

2022-07-15 Thread Akihiko Odaki

This reverts commit 47281859f66bdab1974fb122cab2cbb4a1c9af7f.

Signed-off-by: Akihiko Odaki 
Reviewed-by: Peter Maydell 
---
 include/qemu/main-loop.h | 13 -
 1 file changed, 13 deletions(-)

diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index 5518845299d..0aa36a4f17e 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -280,23 +280,10 @@ bool qemu_mutex_iothread_locked(void);
 bool qemu_in_main_thread(void);
 
 /* Mark and check that the function is part of the global state API. */
-#ifdef CONFIG_COCOA
-/*
- * When using the Cocoa UI, addRemovableDevicesMenuItems() is called from
- * a thread different from the QEMU main thread and can not take the BQL,
- * triggering this assertions in the block layer (commit 0439c5a462).
- * As the Cocoa fix is not trivial, disable this assertion for the v7.0.0
- * release (when using Cocoa); we will restore it immediately after the
- * release.
- * This issue is tracked as https://gitlab.com/qemu-project/qemu/-/issues/926
- */
-#define GLOBAL_STATE_CODE()
-#else
 #define GLOBAL_STATE_CODE() \
 do {\
 assert(qemu_in_main_thread());  \
 } while (0)
-#endif /* CONFIG_COCOA */
 
 /* Mark and check that the function is part of the I/O API. */
 #define IO_CODE()   \
-- 
2.32.1 (Apple Git-133)

[PATCH v3 0/3] ui/cocoa: Run qemu_init in the main thread

2022-07-15 Thread Akihiko Odaki

This work is based on:
https://patchew.org/QEMU/20220317125534.38706-1-philippe.mathieu.da...@gmail.com/

Simplify the initialization dance by running qemu_init() in the main
thread before the Cocoa event loop starts. The secondary thread only
runs only qemu_main_loop() and qemu_cleanup().

This fixes a case where addRemovableDevicesMenuItems() calls
qmp_query_block() while expecting the main thread to still hold
the BQL.

Overriding the code after calling qemu_init() is done by dynamically
replacing a function pointer variable, qemu_main when initializing
ui/cocoa, which unifies the static implementation of main() for
builds with ui/cocoa and ones without ui/cocoa.

v3: Document functions involved in startup. (Peter Maydell)

v2: Restore allow_events flag to fix the crash reported by
Philippe Mathieu-Daudé.

Akihiko Odaki (3):
  ui/cocoa: Run qemu_init in the main thread
  Revert "main-loop: Disable block backend global state assertion on
Cocoa"
  meson: Allow to enable gtk and sdl while cocoa is enabled

 docs/devel/fuzzing.rst   |   4 +-
 include/qemu-main.h  |   3 +-
 include/qemu/main-loop.h |  13 ---
 include/sysemu/sysemu.h  |   2 +-
 meson.build  |  10 +--
 softmmu/main.c   |  14 ++--
 softmmu/vl.c |   2 +-
 tests/qtest/fuzz/fuzz.c  |   2 +-
 ui/cocoa.m   | 172 ++-
 9 files changed, 78 insertions(+), 144 deletions(-)

-- 
2.32.1 (Apple Git-133)

Re: [PULL v2 00/19] aspeed queue

2022-07-15 Thread Peter Maydell

On Thu, 14 Jul 2022 at 16:45, Cédric Le Goater  wrote:
>
> The following changes since commit 08c9f7eec7002dac2da52c8265eb319aba381c86:
>
>   Merge tag 'darwin-20220712' of https://github.com/philmd/qemu into staging 
> (2022-07-14 09:30:55 +0100)
>
> are available in the Git repository at:
>
>   https://github.com/legoater/qemu/ tags/pull-aspeed-20220714
>
> for you to fetch changes up to f0418558302ef9e140681e04250fc1ca265f3140:
>
>   aspeed: Add fby35-bmc slot GPIO's (2022-07-14 16:24:38 +0200)
>
> 
> aspeed queue:
>
> * New ISL69259 device model
> * New fby35 multi-SoC machine (AST1030 BIC + AST2600 BMC)
> * Aspeed GPIO fixes
> * Extension of m25p80 with write protect bits
> * More avocado tests using the Aspeed SDK


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/7.1
for any user-visible changes.

-- PMM

Re: [PATCH v3] Align Raspberry Pi DMA interrupts with Linux DTS

2022-07-15 Thread Peter Maydell

On Thu, 14 Jul 2022 at 10:44, Andrey Makarov  wrote:
>
> In v3:
>
> - changed naming of orgate & removed hard-coded constants
>
>
> Signed-off-by: Andrey Makarov 


> diff --git a/tests/qtest/bcm2835-dma-test.c b/tests/qtest/bcm2835-dma-test.c
> new file mode 100644
> index 00..111adfe7f2
> --- /dev/null
> +++ b/tests/qtest/bcm2835-dma-test.c
> @@ -0,0 +1,106 @@

All new files need to start with a comment with the copyright
and license information, please.

thanks
-- PMM

Re: [PATCH] target/arm: Don't set syndrome ISS for loads and stores with writeback

2022-07-15 Thread Richard Henderson


On 7/15/22 18:03, Peter Maydell wrote:

The architecture requires that for faults on loads and stores which
do writeback, the syndrome information does not have the ISS
instruction syndrome information (i.e. ISV is 0).  We got this wrong
for the load and store instructions covered by disas_ldst_reg_imm9().
Calculate iss_valid correctly so that if the insn is a writeback one
it is false.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1057
Signed-off-by: Peter Maydell 
---
Tested with RTH's test case attached to the bug report.
---
  target/arm/translate-a64.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)


Reviewed-by: Richard Henderson 

r~



diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index b7b64f73584..163df8c6157 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -3138,7 +3138,7 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t 
insn,
  bool is_store = false;
  bool is_extended = false;
  bool is_unpriv = (idx == 2);
-bool iss_valid = !is_vector;
+bool iss_valid;
  bool post_index;
  bool writeback;
  int memidx;
@@ -3191,6 +3191,8 @@ static void disas_ldst_reg_imm9(DisasContext *s, uint32_t 
insn,
  g_assert_not_reached();
  }
  
+iss_valid = !is_vector && !writeback;

+
  if (rn == 31) {
  gen_check_sp_alignment(s);
  }

Re: [PATCH v5 2/8] hw/i2c: Read FIFO during RXF_CTL change in NPCM7XX SMBus

2022-07-15 Thread Peter Maydell

On Thu, 14 Jul 2022 at 19:28, Hao Wu  wrote:
>
> Originally we read in from SMBus when RXF_STS is cleared. However,
> the driver clears RXF_STS before setting RXF_CTL, causing the SM bus
> module to read incorrect amount of bytes in FIFO mode when the number
> of bytes read changed. This patch fixes this issue.
>
> Signed-off-by: Hao Wu 
> Reviewed-by: Titus Rwantare 
> Acked-by: Corey Minyard 
> ---
>  hw/i2c/npcm7xx_smbus.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/hw/i2c/npcm7xx_smbus.c b/hw/i2c/npcm7xx_smbus.c
> index f18e311556..1435daea94 100644
> --- a/hw/i2c/npcm7xx_smbus.c
> +++ b/hw/i2c/npcm7xx_smbus.c
> @@ -637,9 +637,6 @@ static void npcm7xx_smbus_write_rxf_sts(NPCM7xxSMBusState 
> *s, uint8_t value)
>  {
>  if (value & NPCM7XX_SMBRXF_STS_RX_THST) {
>  s->rxf_sts &= ~NPCM7XX_SMBRXF_STS_RX_THST;
> -if (s->status == NPCM7XX_SMBUS_STATUS_RECEIVING) {
> -npcm7xx_smbus_recv_fifo(s);
> -}
>  }
>  }
>
> @@ -651,6 +648,9 @@ static void npcm7xx_smbus_write_rxf_ctl(NPCM7xxSMBusState 
> *s, uint8_t value)
>  new_ctl = KEEP_OLD_BIT(s->rxf_ctl, new_ctl, NPCM7XX_SMBRXF_CTL_LAST);
>  }
>  s->rxf_ctl = new_ctl;
> +if (s->status == NPCM7XX_SMBUS_STATUS_RECEIVING) {
> +npcm7xx_smbus_recv_fifo(s);
> +}
>  }

I don't know anything about this hardware, but this looks a bit odd.
Why should we care what order the driver does the register operations
in? Do we really want to read new fifo data regardless of what value
the driver writes to RXF_CTL ? Should the logic actually be "if the
new device register state is  then read fifo data", and
checked in both places ?

thanks
-- PMM

Re: [PATCH v3 0/3] ppc: Check for bad Radix configs

2022-07-15 Thread Daniel Henrique Barboza


Queued in gitlab.com/danielhb/qemu/tree/ppc-next. Thanks,


Daniel

On 6/28/22 10:39, Leandro Lupori wrote:

Changes from v2:
- Improved comments on patch 2
- Improved commit message on patch 3
- Now emulating CPU behavior on misaligned page table base addresses

Leandro Lupori (3):
   ppc: Check partition and process table alignment
   target/ppc: Improve Radix xlate level validation
   target/ppc: Check page dir/table base alignment

  hw/ppc/spapr.c |  5 +++
  hw/ppc/spapr_hcall.c   |  9 
  target/ppc/mmu-book3s-v3.c |  5 +++
  target/ppc/mmu-radix64.c   | 92 ++
  4 files changed, 93 insertions(+), 18 deletions(-)

Re: [RFC PATCH 8/8] block: remove all unused ->can_set_aio_ctx and ->set_aio_ctx callbacks

2022-07-15 Thread Hanna Reitz


On 12.07.22 23:19, Emanuele Giuseppe Esposito wrote:

Together with all _can_set_ and _set_ APIs, as they are not needed
anymore.

Signed-off-by: Emanuele Giuseppe Esposito 
---
  block.c| 196 -
  block/block-backend.c  |  33 -
  blockjob.c |  35 --
  include/block/block-global-state.h |   9 --
  include/block/block_int-common.h   |   4 -
  5 files changed, 277 deletions(-)


Looks good!  I’d just like a follow-up commit that also drops 
bdrv_try_set_aio_context(), so it’s all gone (I think that’s the final 
remnant?).


Hanna

[PATCH v3 02/19] virtio-net: Expose MAC_TABLE_ENTRIES

2022-07-15 Thread Eugenio Pérez

vhost-vdpa control virtqueue needs to know the maximum entries supported
by the virtio-net device, so we know if it is possible to apply the
filter.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/virtio-net.h | 3 +++
 hw/net/virtio-net.c| 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index eb87032627..cce1c554f7 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -35,6 +35,9 @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET)
  * and latency. */
 #define TX_BURST 256
 
+/* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */
+#define MAC_TABLE_ENTRIES64
+
 typedef struct virtio_net_conf
 {
 uint32_t txtimer;
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7ad948ee7c..f83e96e4ce 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -49,7 +49,6 @@
 
 #define VIRTIO_NET_VM_VERSION11
 
-#define MAC_TABLE_ENTRIES64
 #define MAX_VLAN(1 << 12)   /* Per 802.1Q definition */
 
 /* previously fixed value */
-- 
2.31.1

[PATCH v3 07/19] vhost: Decouple vhost_svq_add from VirtQueueElement

2022-07-15 Thread Eugenio Pérez

VirtQueueElement comes from the guest, but we're heading SVQ to be able
to modify the element presented to the device without the guest's
knowledge.

To do so, make SVQ accept sg buffers directly, instead of using
VirtQueueElement.

Add vhost_svq_add_element to maintain element convenience.

Signed-off-by: Eugenio Pérez 
Acked-by: Jason Wang 
---
 hw/virtio/vhost-shadow-virtqueue.c | 33 --
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index aee9891a67..b005a457c6 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -172,30 +172,31 @@ static bool 
vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
 }
 
 static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
-VirtQueueElement *elem, unsigned *head)
+const struct iovec *out_sg, size_t out_num,
+const struct iovec *in_sg, size_t in_num,
+unsigned *head)
 {
 unsigned avail_idx;
 vring_avail_t *avail = svq->vring.avail;
 bool ok;
-g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num));
+g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num));
 
 *head = svq->free_head;
 
 /* We need some descriptors here */
-if (unlikely(!elem->out_num && !elem->in_num)) {
+if (unlikely(!out_num && !in_num)) {
 qemu_log_mask(LOG_GUEST_ERROR,
   "Guest provided element with no descriptors");
 return false;
 }
 
-ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
- elem->in_num > 0, false);
+ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
+ false);
 if (unlikely(!ok)) {
 return false;
 }
 
-ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, 
false,
- true);
+ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
 if (unlikely(!ok)) {
 return false;
 }
@@ -237,17 +238,19 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
  *
  * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
  */
-static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
+static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
+  size_t out_num, const struct iovec *in_sg,
+  size_t in_num, VirtQueueElement *elem)
 {
 unsigned qemu_head;
-unsigned ndescs = elem->in_num + elem->out_num;
+unsigned ndescs = in_num + out_num;
 bool ok;
 
 if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
 return -ENOSPC;
 }
 
-ok = vhost_svq_add_split(svq, elem, _head);
+ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, _head);
 if (unlikely(!ok)) {
 g_free(elem);
 return -EINVAL;
@@ -258,6 +261,14 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, 
VirtQueueElement *elem)
 return 0;
 }
 
+/* Convenience wrapper to add a guest's element to SVQ */
+static int vhost_svq_add_element(VhostShadowVirtqueue *svq,
+ VirtQueueElement *elem)
+{
+return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg,
+ elem->in_num, elem);
+}
+
 /**
  * Forward available buffers.
  *
@@ -294,7 +305,7 @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue 
*svq)
 break;
 }
 
-r = vhost_svq_add(svq, elem);
+r = vhost_svq_add_element(svq, elem);
 if (unlikely(r != 0)) {
 if (r == -ENOSPC) {
 /*
-- 
2.31.1

[PATCH v8 09/11] i386/pc: bounds check phys-bits against max used GPA

2022-07-15 Thread Joao Martins

Calculate max *used* GPA against the CPU maximum possible address
and error out if the former surprasses the latter. This ensures
max used GPA is reacheable by configured phys-bits. Default phys-bits
on Qemu is TCG_PHYS_ADDR_BITS (40) which is enough for the CPU to
address 1Tb (0xff  ) or 1010G (0xfc  ) in AMD hosts
with IOMMU.

This is preparation for AMD guests with >1010G, where it will want relocate
ram-above-4g to be after 1Tb instead of 4G.

Signed-off-by: Joao Martins 
---
 hw/i386/pc.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index cda435e3baeb..f30661b7f1a2 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -880,6 +880,18 @@ static uint64_t pc_get_cxl_range_end(PCMachineState *pcms)
 return start;
 }
 
+static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size)
+{
+X86CPU *cpu = X86_CPU(first_cpu);
+
+/* 32-bit systems don't have hole64 thus return max CPU address */
+if (cpu->phys_bits <= 32) {
+return ((hwaddr)1 << cpu->phys_bits) - 1;
+}
+
+return pc_pci_hole64_start() + pci_hole64_size - 1;
+}
+
 void pc_memory_init(PCMachineState *pcms,
 MemoryRegion *system_memory,
 MemoryRegion *rom_memory,
@@ -894,13 +906,28 @@ void pc_memory_init(PCMachineState *pcms,
 MachineClass *mc = MACHINE_GET_CLASS(machine);
 PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms);
 X86MachineState *x86ms = X86_MACHINE(pcms);
+hwaddr maxphysaddr, maxusedaddr;
 hwaddr cxl_base, cxl_resv_end = 0;
+X86CPU *cpu = X86_CPU(first_cpu);
 
 assert(machine->ram_size == x86ms->below_4g_mem_size +
 x86ms->above_4g_mem_size);
 
 linux_boot = (machine->kernel_filename != NULL);
 
+/*
+ * phys-bits is required to be appropriately configured
+ * to make sure max used GPA is reachable.
+ */
+maxusedaddr = pc_max_used_gpa(pcms, pci_hole64_size);
+maxphysaddr = ((hwaddr)1 << cpu->phys_bits) - 1;
+if (maxphysaddr < maxusedaddr) {
+error_report("Address space limit 0x%"PRIx64" < 0x%"PRIx64
+ " phys-bits too low (%u)",
+ maxphysaddr, maxusedaddr, cpu->phys_bits);
+exit(EXIT_FAILURE);
+}
+
 /*
  * Split single memory region and use aliases to address portions of it,
  * done for backwards compatibility with older qemus.
-- 
2.17.2

[PATCH v3 04/19] vhost: Reorder vhost_svq_kick

2022-07-15 Thread Eugenio Pérez

Future code needs to call it from vhost_svq_add.

No functional change intended.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 28 ++--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index e2184a4481..fd1839cec5 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -215,6 +215,20 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
 return true;
 }
 
+static void vhost_svq_kick(VhostShadowVirtqueue *svq)
+{
+/*
+ * We need to expose the available array entries before checking the used
+ * flags
+ */
+smp_mb();
+if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
+return;
+}
+
+event_notifier_set(>hdev_kick);
+}
+
 /**
  * Add an element to a SVQ.
  *
@@ -235,20 +249,6 @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, 
VirtQueueElement *elem)
 return true;
 }
 
-static void vhost_svq_kick(VhostShadowVirtqueue *svq)
-{
-/*
- * We need to expose the available array entries before checking the used
- * flags
- */
-smp_mb();
-if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
-return;
-}
-
-event_notifier_set(>hdev_kick);
-}
-
 /**
  * Forward available buffers.
  *
-- 
2.31.1

[PATCH v3 01/19] vhost: move descriptor translation to vhost_svq_vring_write_descs

2022-07-15 Thread Eugenio Pérez

It's done for both in and out descriptors so it's better placed here.

Acked-by: Jason Wang 
Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 38 +-
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 56c96ebd13..e2184a4481 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -122,17 +122,35 @@ static bool vhost_svq_translate_addr(const 
VhostShadowVirtqueue *svq,
 return true;
 }
 
-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
-const struct iovec *iovec, size_t num,
-bool more_descs, bool write)
+/**
+ * Write descriptors to SVQ vring
+ *
+ * @svq: The shadow virtqueue
+ * @sg: Cache for hwaddr
+ * @iovec: The iovec from the guest
+ * @num: iovec length
+ * @more_descs: True if more descriptors come in the chain
+ * @write: True if they are writeable descriptors
+ *
+ * Return true if success, false otherwise and print error.
+ */
+static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
+const struct iovec *iovec, size_t num,
+bool more_descs, bool write)
 {
 uint16_t i = svq->free_head, last = svq->free_head;
 unsigned n;
 uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
 vring_desc_t *descs = svq->vring.desc;
+bool ok;
 
 if (num == 0) {
-return;
+return true;
+}
+
+ok = vhost_svq_translate_addr(svq, sg, iovec, num);
+if (unlikely(!ok)) {
+return false;
 }
 
 for (n = 0; n < num; n++) {
@@ -150,6 +168,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue 
*svq, hwaddr *sg,
 }
 
 svq->free_head = le16_to_cpu(svq->desc_next[last]);
+return true;
 }
 
 static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
@@ -169,21 +188,18 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
 return false;
 }
 
-ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
+ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
+ elem->in_num > 0, false);
 if (unlikely(!ok)) {
 return false;
 }
-vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
-elem->in_num > 0, false);
-
 
-ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
+ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, 
false,
+ true);
 if (unlikely(!ok)) {
 return false;
 }
 
-vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
-
 /*
  * Put the entry in the available array (but don't update avail->idx until
  * they do sync).
-- 
2.31.1

[PATCH v3 08/19] vhost: Add SVQDescState

2022-07-15 Thread Eugenio Pérez

This will allow SVQ to add context to the different queue elements.

This patch only store the actual element, no functional change intended.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  8 ++--
 hw/virtio/vhost-shadow-virtqueue.c | 16 
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index c132c994e9..d646c35054 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -15,6 +15,10 @@
 #include "standard-headers/linux/vhost_types.h"
 #include "hw/virtio/vhost-iova-tree.h"
 
+typedef struct SVQDescState {
+VirtQueueElement *elem;
+} SVQDescState;
+
 /* Shadow virtqueue to relay notifications */
 typedef struct VhostShadowVirtqueue {
 /* Shadow vring */
@@ -47,8 +51,8 @@ typedef struct VhostShadowVirtqueue {
 /* IOVA mapping */
 VhostIOVATree *iova_tree;
 
-/* Map for use the guest's descriptors */
-VirtQueueElement **ring_id_maps;
+/* SVQ vring descriptors state */
+SVQDescState *desc_state;
 
 /* Next VirtQueue element that guest made available */
 VirtQueueElement *next_guest_avail_elem;
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index b005a457c6..d12f5afffb 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -256,7 +256,7 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const 
struct iovec *out_sg,
 return -EINVAL;
 }
 
-svq->ring_id_maps[qemu_head] = elem;
+svq->desc_state[qemu_head].elem = elem;
 vhost_svq_kick(svq);
 return 0;
 }
@@ -410,21 +410,21 @@ static VirtQueueElement 
*vhost_svq_get_buf(VhostShadowVirtqueue *svq,
 return NULL;
 }
 
-if (unlikely(!svq->ring_id_maps[used_elem.id])) {
+if (unlikely(!svq->desc_state[used_elem.id].elem)) {
 qemu_log_mask(LOG_GUEST_ERROR,
 "Device %s says index %u is used, but it was not available",
 svq->vdev->name, used_elem.id);
 return NULL;
 }
 
-num = svq->ring_id_maps[used_elem.id]->in_num +
-  svq->ring_id_maps[used_elem.id]->out_num;
+num = svq->desc_state[used_elem.id].elem->in_num +
+  svq->desc_state[used_elem.id].elem->out_num;
 last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
 svq->desc_next[last_used_chain] = svq->free_head;
 svq->free_head = used_elem.id;
 
 *len = used_elem.len;
-return g_steal_pointer(>ring_id_maps[used_elem.id]);
+return g_steal_pointer(>desc_state[used_elem.id].elem);
 }
 
 static void vhost_svq_flush(VhostShadowVirtqueue *svq,
@@ -594,7 +594,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, 
VirtIODevice *vdev,
 memset(svq->vring.desc, 0, driver_size);
 svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size);
 memset(svq->vring.used, 0, device_size);
-svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
+svq->desc_state = g_new0(SVQDescState, svq->vring.num);
 svq->desc_next = g_new0(uint16_t, svq->vring.num);
 for (unsigned i = 0; i < svq->vring.num - 1; i++) {
 svq->desc_next[i] = cpu_to_le16(i + 1);
@@ -619,7 +619,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
 
 for (unsigned i = 0; i < svq->vring.num; ++i) {
 g_autofree VirtQueueElement *elem = NULL;
-elem = g_steal_pointer(>ring_id_maps[i]);
+elem = g_steal_pointer(>desc_state[i].elem);
 if (elem) {
 virtqueue_detach_element(svq->vq, elem, 0);
 }
@@ -631,7 +631,7 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
 }
 svq->vq = NULL;
 g_free(svq->desc_next);
-g_free(svq->ring_id_maps);
+g_free(svq->desc_state);
 qemu_vfree(svq->vring.desc);
 qemu_vfree(svq->vring.used);
 }
-- 
2.31.1

[PATCH v3 09/19] vhost: Track number of descs in SVQDescState

2022-07-15 Thread Eugenio Pérez

A guest's buffer continuos on GPA may need multiple descriptors on
qemu's VA, so SVQ should track its length sepparatedly.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h | 6 ++
 hw/virtio/vhost-shadow-virtqueue.c | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index d646c35054..5c7e7cbab6 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -17,6 +17,12 @@
 
 typedef struct SVQDescState {
 VirtQueueElement *elem;
+
+/*
+ * Number of descriptors exposed to the device. May or may not match
+ * guest's
+ */
+unsigned int ndescs;
 } SVQDescState;
 
 /* Shadow virtqueue to relay notifications */
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index d12f5afffb..ae5bd6efa8 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -257,6 +257,7 @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const 
struct iovec *out_sg,
 }
 
 svq->desc_state[qemu_head].elem = elem;
+svq->desc_state[qemu_head].ndescs = ndescs;
 vhost_svq_kick(svq);
 return 0;
 }
@@ -417,8 +418,7 @@ static VirtQueueElement 
*vhost_svq_get_buf(VhostShadowVirtqueue *svq,
 return NULL;
 }
 
-num = svq->desc_state[used_elem.id].elem->in_num +
-  svq->desc_state[used_elem.id].elem->out_num;
+num = svq->desc_state[used_elem.id].ndescs;
 last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
 svq->desc_next[last_used_chain] = svq->free_head;
 svq->free_head = used_elem.id;
-- 
2.31.1

[PATCH v3 11/19] vhost: Expose vhost_svq_add

2022-07-15 Thread Eugenio Pérez

This allows external parts of SVQ to forward custom buffers to the
device.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h | 3 +++
 hw/virtio/vhost-shadow-virtqueue.c | 6 +++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index d9fc1f1799..dd78f4bec2 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -86,6 +86,9 @@ bool vhost_svq_valid_features(uint64_t features, Error 
**errp);
 
 void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
  const VirtQueueElement *elem, uint32_t len);
+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
+  size_t out_num, const struct iovec *in_sg, size_t in_num,
+  VirtQueueElement *elem);
 
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index b377e125e7..406a823c81 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -238,9 +238,9 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
  *
  * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
  */
-static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
-  size_t out_num, const struct iovec *in_sg,
-  size_t in_num, VirtQueueElement *elem)
+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
+  size_t out_num, const struct iovec *in_sg, size_t in_num,
+  VirtQueueElement *elem)
 {
 unsigned qemu_head;
 unsigned ndescs = in_num + out_num;
-- 
2.31.1

[PATCH v3 03/19] virtio-net: Expose ctrl virtqueue logic

2022-07-15 Thread Eugenio Pérez

This allows external vhost-net devices to modify the state of the
VirtIO device model once the vhost-vdpa device has acknowledged the
control commands.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/virtio-net.h |  4 ++
 hw/net/virtio-net.c| 84 --
 2 files changed, 53 insertions(+), 35 deletions(-)

diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index cce1c554f7..ef234ffe7e 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -221,6 +221,10 @@ struct VirtIONet {
 struct EBPFRSSContext ebpf_rss;
 };
 
+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
+  const struct iovec *in_sg, unsigned in_num,
+  const struct iovec *out_sg,
+  unsigned out_num);
 void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
const char *type);
 
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index f83e96e4ce..dd0d056fde 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1433,57 +1433,71 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t 
cmd,
 return VIRTIO_NET_OK;
 }
 
-static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
+  const struct iovec *in_sg, unsigned in_num,
+  const struct iovec *out_sg,
+  unsigned out_num)
 {
 VirtIONet *n = VIRTIO_NET(vdev);
 struct virtio_net_ctrl_hdr ctrl;
 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
-VirtQueueElement *elem;
 size_t s;
 struct iovec *iov, *iov2;
-unsigned int iov_cnt;
+
+if (iov_size(in_sg, in_num) < sizeof(status) ||
+iov_size(out_sg, out_num) < sizeof(ctrl)) {
+virtio_error(vdev, "virtio-net ctrl missing headers");
+return 0;
+}
+
+iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
+s = iov_to_buf(iov, out_num, 0, , sizeof(ctrl));
+iov_discard_front(, _num, sizeof(ctrl));
+if (s != sizeof(ctrl)) {
+status = VIRTIO_NET_ERR;
+} else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
+status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
+status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
+status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
+status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
+status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
+} else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
+status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
+}
+
+s = iov_from_buf(in_sg, in_num, 0, , sizeof(status));
+assert(s == sizeof(status));
+
+g_free(iov2);
+return sizeof(status);
+}
+
+static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+{
+VirtQueueElement *elem;
 
 for (;;) {
+size_t written;
 elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
 if (!elem) {
 break;
 }
-if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
-iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
-virtio_error(vdev, "virtio-net ctrl missing headers");
+
+written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
+ elem->out_sg, elem->out_num);
+if (written > 0) {
+virtqueue_push(vq, elem, written);
+virtio_notify(vdev, vq);
+g_free(elem);
+} else {
 virtqueue_detach_element(vq, elem, 0);
 g_free(elem);
 break;
 }
-
-iov_cnt = elem->out_num;
-iov2 = iov = g_memdup2(elem->out_sg,
-   sizeof(struct iovec) * elem->out_num);
-s = iov_to_buf(iov, iov_cnt, 0, , sizeof(ctrl));
-iov_discard_front(, _cnt, sizeof(ctrl));
-if (s != sizeof(ctrl)) {
-status = VIRTIO_NET_ERR;
-} else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
-status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
-status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
-status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
-status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
-} else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
-status =

[PATCH v3 10/19] vhost: add vhost_svq_push_elem

2022-07-15 Thread Eugenio Pérez

This function allows external SVQ users to return guest's available
buffers.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  3 +++
 hw/virtio/vhost-shadow-virtqueue.c | 16 
 2 files changed, 19 insertions(+)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index 5c7e7cbab6..d9fc1f1799 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -84,6 +84,9 @@ typedef struct VhostShadowVirtqueue {
 
 bool vhost_svq_valid_features(uint64_t features, Error **errp);
 
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
+ const VirtQueueElement *elem, uint32_t len);
+
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
 void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index ae5bd6efa8..b377e125e7 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -427,6 +427,22 @@ static VirtQueueElement 
*vhost_svq_get_buf(VhostShadowVirtqueue *svq,
 return g_steal_pointer(>desc_state[used_elem.id].elem);
 }
 
+/**
+ * Push an element to SVQ, returning it to the guest.
+ */
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
+ const VirtQueueElement *elem, uint32_t len)
+{
+virtqueue_push(svq->vq, elem, len);
+if (svq->next_guest_avail_elem) {
+/*
+ * Avail ring was full when vhost_svq_flush was called, so it's a
+ * good moment to make more descriptors available if possible.
+ */
+vhost_handle_guest_kick(svq);
+}
+}
+
 static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 bool check_for_avail_queue)
 {
-- 
2.31.1

[PATCH v3 17/19] vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs

2022-07-15 Thread Eugenio Pérez

To know the device features is needed for CVQ SVQ, so SVQ knows if it
can handle all commands or not. Extract from
vhost_vdpa_get_max_queue_pairs so we can reuse it.

Signed-off-by: Eugenio Pérez 
Acked-by: Jason Wang 
---
 net/vhost-vdpa.c | 30 --
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 3915b148c4..0afa60bb51 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -474,20 +474,24 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 return nc;
 }
 
-static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp)
+static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
+{
+int ret = ioctl(fd, VHOST_GET_FEATURES, features);
+if (unlikely(ret < 0)) {
+error_setg_errno(errp, errno,
+ "Fail to query features from vhost-vDPA device");
+}
+return ret;
+}
+
+static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
+  int *has_cvq, Error **errp)
 {
 unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
 g_autofree struct vhost_vdpa_config *config = NULL;
 __virtio16 *max_queue_pairs;
-uint64_t features;
 int ret;
 
-ret = ioctl(fd, VHOST_GET_FEATURES, );
-if (ret) {
-error_setg(errp, "Fail to query features from vhost-vDPA device");
-return ret;
-}
-
 if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
 *has_cvq = 1;
 } else {
@@ -517,10 +521,11 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 NetClientState *peer, Error **errp)
 {
 const NetdevVhostVDPAOptions *opts;
+uint64_t features;
 int vdpa_device_fd;
 g_autofree NetClientState **ncs = NULL;
 NetClientState *nc;
-int queue_pairs, i, has_cvq = 0;
+int queue_pairs, r, i, has_cvq = 0;
 
 assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 opts = >u.vhost_vdpa;
@@ -534,7 +539,12 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 return -errno;
 }
 
-queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd,
+r = vhost_vdpa_get_features(vdpa_device_fd, , errp);
+if (unlikely(r < 0)) {
+return r;
+}
+
+queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
  _cvq, errp);
 if (queue_pairs < 0) {
 qemu_close(vdpa_device_fd);
-- 
2.31.1

[PATCH v3 16/19] vdpa: Buffer CVQ support on shadow virtqueue

2022-07-15 Thread Eugenio Pérez

Introduce the control virtqueue support for vDPA shadow virtqueue. This
is needed for advanced networking features like rx filtering.

Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid
TOCTOU with the guest's or device's memory every time there is a device
model change.  Otherwise, the guest could change the memory content in
the time between qemu and the device read it.

To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
implemented.  If the virtio-net driver changes MAC the virtio-net device
model will be updated with the new one, and a rx filtering change event
will be raised.

More cvq commands could be added here straightforwardly but they have
not been tested.

Signed-off-by: Eugenio Pérez 
---
 net/vhost-vdpa.c | 211 +--
 1 file changed, 204 insertions(+), 7 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 2e3b6b10d8..3915b148c4 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -33,6 +33,9 @@ typedef struct VhostVDPAState {
 NetClientState nc;
 struct vhost_vdpa vhost_vdpa;
 VHostNetState *vhost_net;
+
+/* Control commands shadow buffers */
+void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
 bool started;
 } VhostVDPAState;
 
@@ -131,6 +134,8 @@ static void vhost_vdpa_cleanup(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
 
+qemu_vfree(s->cvq_cmd_out_buffer);
+qemu_vfree(s->cvq_cmd_in_buffer);
 if (s->vhost_net) {
 vhost_net_cleanup(s->vhost_net);
 g_free(s->vhost_net);
@@ -190,24 +195,191 @@ static NetClientInfo net_vhost_vdpa_info = {
 .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
+static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
+{
+VhostIOVATree *tree = v->iova_tree;
+DMAMap needle = {
+/*
+ * No need to specify size or to look for more translations since
+ * this contiguous chunk was allocated by us.
+ */
+.translated_addr = (hwaddr)(uintptr_t)addr,
+};
+const DMAMap *map = vhost_iova_tree_find_iova(tree, );
+int r;
+
+if (unlikely(!map)) {
+error_report("Cannot locate expected map");
+return;
+}
+
+r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
+if (unlikely(r != 0)) {
+error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
+}
+
+vhost_iova_tree_remove(tree, map);
+}
+
+static size_t vhost_vdpa_net_cvq_cmd_len(void)
+{
+/*
+ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
+ * In buffer is always 1 byte, so it should fit here
+ */
+return sizeof(struct virtio_net_ctrl_hdr) +
+   2 * sizeof(struct virtio_net_ctrl_mac) +
+   MAC_TABLE_ENTRIES * ETH_ALEN;
+}
+
+static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
+{
+return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
+}
+
+/** Copy and map a guest buffer. */
+static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
+   const struct iovec *out_data,
+   size_t out_num, size_t data_len, void *buf,
+   size_t *written, bool write)
+{
+DMAMap map = {};
+int r;
+
+if (unlikely(!data_len)) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
+  __func__, write ? "in" : "out");
+return false;
+}
+
+*written = iov_to_buf(out_data, out_num, 0, buf, data_len);
+map.translated_addr = (hwaddr)(uintptr_t)buf;
+map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
+map.perm = write ? IOMMU_RW : IOMMU_RO,
+r = vhost_iova_tree_map_alloc(v->iova_tree, );
+if (unlikely(r != IOVA_OK)) {
+error_report("Cannot map injected element");
+return false;
+}
+
+r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
+   !write);
+if (unlikely(r < 0)) {
+goto dma_map_err;
+}
+
+return true;
+
+dma_map_err:
+vhost_iova_tree_remove(v->iova_tree, );
+return false;
+}
+
 /**
- * Forward buffer for the moment.
+ * Copy the guest element into a dedicated buffer suitable to be sent to NIC
+ *
+ * @iov: [0] is the out buffer, [1] is the in one
+ */
+static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
+VirtQueueElement *elem,
+struct iovec *iov)
+{
+size_t in_copied;
+bool ok;
+
+iov[0].iov_base = s->cvq_cmd_out_buffer;
+ok = vhost_vdpa_cvq_map_buf(>vhost_vdpa, elem->out_sg, elem->out_num,
+vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
+[0].iov_len, false);
+if (unlikely(!ok)) {
+return false;
+}
+
+iov[1].iov_base = s->cvq_cmd_in_buffer;
+ok = vhost_vdpa_cvq_map_buf(>vhost_vdpa, NULL, 0,
+

[PATCH v3 15/19] vdpa: manual forward CVQ buffers

2022-07-15 Thread Eugenio Pérez

Do a simple forwarding of CVQ buffers, the same work SVQ could do but
through callbacks. No functional change intended.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h |  3 ++
 hw/virtio/vhost-vdpa.c |  3 +-
 net/vhost-vdpa.c   | 58 ++
 3 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index 7214eb47dc..d85643 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -15,6 +15,7 @@
 #include 
 
 #include "hw/virtio/vhost-iova-tree.h"
+#include "hw/virtio/vhost-shadow-virtqueue.h"
 #include "hw/virtio/virtio.h"
 #include "standard-headers/linux/vhost_types.h"
 
@@ -35,6 +36,8 @@ typedef struct vhost_vdpa {
 /* IOVA mapping used by the Shadow Virtqueue */
 VhostIOVATree *iova_tree;
 GPtrArray *shadow_vqs;
+const VhostShadowVirtqueueOps *shadow_vq_ops;
+void *shadow_vq_ops_opaque;
 struct vhost_dev *dev;
 VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 96997210be..beaaa7049a 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -419,7 +419,8 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, 
struct vhost_vdpa *v,
 for (unsigned n = 0; n < hdev->nvqs; ++n) {
 g_autoptr(VhostShadowVirtqueue) svq;
 
-svq = vhost_svq_new(v->iova_tree, NULL, NULL);
+svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
+v->shadow_vq_ops_opaque);
 if (unlikely(!svq)) {
 error_setg(errp, "Cannot create svq %u", n);
 return -1;
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index df1e69ee72..2e3b6b10d8 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -11,11 +11,14 @@
 
 #include "qemu/osdep.h"
 #include "clients.h"
+#include "hw/virtio/virtio-net.h"
 #include "net/vhost_net.h"
 #include "net/vhost-vdpa.h"
 #include "hw/virtio/vhost-vdpa.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/memalign.h"
 #include "qemu/option.h"
 #include "qapi/error.h"
 #include 
@@ -187,6 +190,57 @@ static NetClientInfo net_vhost_vdpa_info = {
 .check_peer_type = vhost_vdpa_check_peer_type,
 };
 
+/**
+ * Forward buffer for the moment.
+ */
+static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
+VirtQueueElement *elem,
+void *opaque)
+{
+unsigned int n = elem->out_num + elem->in_num;
+g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
+size_t in_len, dev_written;
+virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+int r;
+
+memcpy(dev_buffers, elem->out_sg, elem->out_num);
+memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
+
+r = vhost_svq_add(svq, _buffers[0], elem->out_num, _buffers[1],
+  elem->in_num, elem);
+if (unlikely(r != 0)) {
+if (unlikely(r == -ENOSPC)) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
+  __func__);
+}
+goto out;
+}
+
+/*
+ * We can poll here since we've had BQL from the time we sent the
+ * descriptor. Also, we need to take the answer before SVQ pulls by itself,
+ * when BQL is released
+ */
+dev_written = vhost_svq_poll(svq);
+if (unlikely(dev_written < sizeof(status))) {
+error_report("Insufficient written data (%zu)", dev_written);
+}
+
+out:
+in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, ,
+  sizeof(status));
+if (unlikely(in_len < sizeof(status))) {
+error_report("Bad device CVQ written length");
+}
+vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
+g_free(elem);
+return r;
+}
+
+static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
+.avail_handler = vhost_vdpa_net_handle_ctrl_avail,
+};
+
 static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
const char *device,
const char *name,
@@ -211,6 +265,10 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.device_fd = vdpa_device_fd;
 s->vhost_vdpa.index = queue_pair_index;
+if (!is_datapath) {
+s->vhost_vdpa.shadow_vq_ops = _vdpa_net_svq_ops;
+s->vhost_vdpa.shadow_vq_ops_opaque = s;
+}
 ret = vhost_vdpa_add(nc, (void *)>vhost_vdpa, queue_pair_index, nvqs);
 if (ret) {
 qemu_del_net_client(nc);
-- 
2.31.1

[PATCH v3 12/19] vhost: add vhost_svq_poll

2022-07-15 Thread Eugenio Pérez

It allows the Shadow Control VirtQueue to wait for the device to use the
available buffers.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.h |  1 +
 hw/virtio/vhost-shadow-virtqueue.c | 22 ++
 2 files changed, 23 insertions(+)

diff --git a/hw/virtio/vhost-shadow-virtqueue.h 
b/hw/virtio/vhost-shadow-virtqueue.h
index dd78f4bec2..cf442f7dea 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -89,6 +89,7 @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
 int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
   size_t out_num, const struct iovec *in_sg, size_t in_num,
   VirtQueueElement *elem);
+size_t vhost_svq_poll(VhostShadowVirtqueue *svq);
 
 void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
 void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 406a823c81..1c54a03e17 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -484,6 +484,28 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
 } while (!vhost_svq_enable_notification(svq));
 }
 
+/**
+ * Poll the SVQ for one device used buffer.
+ *
+ * This function race with main event loop SVQ polling, so extra
+ * synchronization is needed.
+ *
+ * Return the length written by the device.
+ */
+size_t vhost_svq_poll(VhostShadowVirtqueue *svq)
+{
+do {
+uint32_t len;
+VirtQueueElement *elem = vhost_svq_get_buf(svq, );
+if (elem) {
+return len;
+}
+
+/* Make sure we read new used_idx */
+smp_rmb();
+} while (true);
+}
+
 /**
  * Forward used buffers.
  *
-- 
2.31.1

[PATCH v3 14/19] vdpa: Export vhost_vdpa_dma_map and unmap calls

2022-07-15 Thread Eugenio Pérez

Shadow CVQ will copy buffers on qemu VA, so we avoid TOCTOU attacks from
the guest that could set a different state in qemu device model and vdpa
device.

To do so, it needs to be able to map these new buffers to the device.

Signed-off-by: Eugenio Pérez 
Acked-by: Jason Wang 
---
 include/hw/virtio/vhost-vdpa.h | 4 
 hw/virtio/vhost-vdpa.c | 7 +++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index a29dbb3f53..7214eb47dc 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -39,4 +39,8 @@ typedef struct vhost_vdpa {
 VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
 } VhostVDPA;
 
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
+   void *vaddr, bool readonly);
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size);
+
 #endif
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 0b13e98471..96997210be 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -71,8 +71,8 @@ static bool 
vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
 return false;
 }
 
-static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
-  void *vaddr, bool readonly)
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
+   void *vaddr, bool readonly)
 {
 struct vhost_msg_v2 msg = {};
 int fd = v->device_fd;
@@ -97,8 +97,7 @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr 
iova, hwaddr size,
 return ret;
 }
 
-static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
-hwaddr size)
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size)
 {
 struct vhost_msg_v2 msg = {};
 int fd = v->device_fd;
-- 
2.31.1

[PATCH v3 18/19] vdpa: Add device migration blocker

2022-07-15 Thread Eugenio Pérez

Since the vhost-vdpa device is exposing _F_LOG, adding a migration blocker if
it uses CVQ.

Signed-off-by: Eugenio Pérez 
---
 include/hw/virtio/vhost-vdpa.h |  1 +
 hw/virtio/vhost-vdpa.c | 14 ++
 2 files changed, 15 insertions(+)

diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index d85643..d10a89303e 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -35,6 +35,7 @@ typedef struct vhost_vdpa {
 bool shadow_vqs_enabled;
 /* IOVA mapping used by the Shadow Virtqueue */
 VhostIOVATree *iova_tree;
+Error *migration_blocker;
 GPtrArray *shadow_vqs;
 const VhostShadowVirtqueueOps *shadow_vq_ops;
 void *shadow_vq_ops_opaque;
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index beaaa7049a..795ed5a049 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -20,6 +20,7 @@
 #include "hw/virtio/vhost-shadow-virtqueue.h"
 #include "hw/virtio/vhost-vdpa.h"
 #include "exec/address-spaces.h"
+#include "migration/blocker.h"
 #include "qemu/cutils.h"
 #include "qemu/main-loop.h"
 #include "cpu.h"
@@ -1022,6 +1023,13 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
 return true;
 }
 
+if (v->migration_blocker) {
+int r = migrate_add_blocker(v->migration_blocker, );
+if (unlikely(r < 0)) {
+goto err_migration_blocker;
+}
+}
+
 for (i = 0; i < v->shadow_vqs->len; ++i) {
 VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
 VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
@@ -1064,6 +1072,9 @@ err:
 vhost_svq_stop(svq);
 }
 
+err_migration_blocker:
+error_reportf_err(err, "Cannot setup SVQ %u: ", i);
+
 return false;
 }
 
@@ -1083,6 +1094,9 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
 }
 }
 
+if (v->migration_blocker) {
+migrate_del_blocker(v->migration_blocker);
+}
 return true;
 }
 
-- 
2.31.1

[PATCH v3 19/19] vdpa: Add x-svq to NetdevVhostVDPAOptions

2022-07-15 Thread Eugenio Pérez

Finally offering the possibility to enable SVQ from the command line.

Signed-off-by: Eugenio Pérez 
Acked-by: Markus Armbruster 
---
 qapi/net.json|  9 +-
 net/vhost-vdpa.c | 72 ++--
 2 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/qapi/net.json b/qapi/net.json
index 9af11e9a3b..75ba2cb989 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -445,12 +445,19 @@
 # @queues: number of queues to be created for multiqueue vhost-vdpa
 #  (default: 1)
 #
+# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1)
+# (default: false)
+#
+# Features:
+# @unstable: Member @x-svq is experimental.
+#
 # Since: 5.1
 ##
 { 'struct': 'NetdevVhostVDPAOptions',
   'data': {
 '*vhostdev': 'str',
-'*queues':   'int' } }
+'*queues':   'int',
+'*x-svq':{'type': 'bool', 'features' : [ 'unstable'] } } }
 
 ##
 # @NetdevVmnetHostOptions:
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 0afa60bb51..986e6414b4 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -75,6 +75,28 @@ const int vdpa_feature_bits[] = {
 VHOST_INVALID_FEATURE_BIT
 };
 
+/** Supported device specific feature bits with SVQ */
+static const uint64_t vdpa_svq_device_features =
+BIT_ULL(VIRTIO_NET_F_CSUM) |
+BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
+BIT_ULL(VIRTIO_NET_F_MTU) |
+BIT_ULL(VIRTIO_NET_F_MAC) |
+BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
+BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
+BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
+BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
+BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
+BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
+BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
+BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
+BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
+BIT_ULL(VIRTIO_NET_F_STATUS) |
+BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
+BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
+BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
+BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
+BIT_ULL(VIRTIO_NET_F_STANDBY);
+
 VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
@@ -133,9 +155,13 @@ err_init:
 static void vhost_vdpa_cleanup(NetClientState *nc)
 {
 VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
+struct vhost_dev *dev = >vhost_net->dev;
 
 qemu_vfree(s->cvq_cmd_out_buffer);
 qemu_vfree(s->cvq_cmd_in_buffer);
+if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
+g_clear_pointer(>vhost_vdpa.iova_tree, vhost_iova_tree_delete);
+}
 if (s->vhost_net) {
 vhost_net_cleanup(s->vhost_net);
 g_free(s->vhost_net);
@@ -437,7 +463,9 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
int vdpa_device_fd,
int queue_pair_index,
int nvqs,
-   bool is_datapath)
+   bool is_datapath,
+   bool svq,
+   VhostIOVATree *iova_tree)
 {
 NetClientState *nc = NULL;
 VhostVDPAState *s;
@@ -455,6 +483,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.device_fd = vdpa_device_fd;
 s->vhost_vdpa.index = queue_pair_index;
+s->vhost_vdpa.shadow_vqs_enabled = svq;
+s->vhost_vdpa.iova_tree = iova_tree;
 if (!is_datapath) {
 s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
 vhost_vdpa_net_cvq_cmd_page_len());
@@ -465,6 +495,8 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 
 s->vhost_vdpa.shadow_vq_ops = _vdpa_net_svq_ops;
 s->vhost_vdpa.shadow_vq_ops_opaque = s;
+error_setg(>vhost_vdpa.migration_blocker,
+   "Migration disabled: vhost-vdpa uses CVQ.");
 }
 ret = vhost_vdpa_add(nc, (void *)>vhost_vdpa, queue_pair_index, nvqs);
 if (ret) {
@@ -474,6 +506,14 @@ static NetClientState *net_vhost_vdpa_init(NetClientState 
*peer,
 return nc;
 }
 
+static int vhost_vdpa_get_iova_range(int fd,
+ struct vhost_vdpa_iova_range *iova_range)
+{
+int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
+
+return ret < 0 ? -errno : 0;
+}
+
 static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
 {
 int ret = ioctl(fd, VHOST_GET_FEATURES, features);
@@ -524,6 +564,7 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 uint64_t features;
 int vdpa_device_fd;
 g_autofree NetClientState **ncs = NULL;
+g_autoptr(VhostIOVATree) iova_tree = NULL;
 NetClientState *nc;
 int queue_pairs, r, i, has_cvq = 0;
 
@@ -551,22 +592,45 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char 
*name,
 return queue_pairs;
 }
 
+if (opts->x_svq) {
+

Re: [RFC] aspeed/i2c: multi-master between SoC's

2022-07-15 Thread Peter Delevoryas

On Fri, Jul 15, 2022 at 09:49:58AM +0200, Klaus Jensen wrote:
> On Jul 14 20:06, Peter Delevoryas wrote:
> > Hey Cedric, Klaus, and Corey,
> > 
> 
> Hi Peter,
> 
> Regardless of the issues you are facing its awesome to see this being
> put to work like this!

Haha yeah, well, _all_ the designs at Meta (fb) rely significantly on
multi-master i2c. I think I've been trying to get this working for months now,
but we're really close!

If I can just get the i2c layer working, then proper IPMB and MCTP testing
between BMC and BIC firmware will be much easier.

There's some part defects that have a very low frequency of occurrence, and the
patches for those defects rely on a BMC <-> BIC <->  chain of IPMB
messages. With QEMU, we could test those patches much more thoroughly, because
we can inject the part-defect behavior.

> 
> > So I realized something about the current state of multi-master i2c:
> > 
> > We can't do transfers between two Aspeed I2C controllers, e.g.  AST1030 <->
> > AST2600. I'm looking into this case in the new fby35 machine (which isn't 
> > even
> > merged yet, just in Cedric's pull request)
> > 
> > This is because the AspeedI2CBusSlave is only designed to receive through
> > i2c_send_async(). But the AspeedI2CBus master-mode transfers use i2c_send().
> > 
> > So, the AST2600 can't send data to the AST1030. And the AST1030 can't reply 
> > to
> > the AST2600.
> > 
> > (By the way, another small issue: AspeedI2CBusSlave expects the parent of 
> > its
> > parent to be its AspeedI2CBus, but that's not true if multiple SoC's are 
> > sharing
> > an I2CBus. But that's easy to resolve, I'll send a patch for that soon).
> > 
> > I'm wondering how best to resolve the multi-SoC send-async issue, while
> > retaining the ability to send synchronously to non-SoC slave devices.
> > 
> > I think there's only one way, as far as I can see:
> > 
> > - Force the Aspeed I2C Controller to master the I2C bus before starting a 
> > master
> >   transfer. Even for synchronous transfers.
> > 
> > This shouldn't be a big problem, we can still do synchronous transfers, we 
> > just
> > have to wait for the bus to be free before starting the transfer.
> > 
> > - If the I2C slave targets for a master2slave transfer support async_send, 
> > then
> >   use async_send. This requires refactoring aspeed_i2c_bus_send into a state
> >   machine to send data asynchronously.
> > 
> > In other words, don't try to do a synchronous transfer to an SoC.
> > 
> > But, of course, we can keep doing synchronous transfers from SoC -> sensor 
> > or
> > sensor -> SoC.
> > 
> 
> Yeah, hmm. This is tricky because callers of bus_send expects the
> transfer to be "resolved" immediately. Per design, the asynchronous send
> requires the device mastering the bus to itself be asynchronous (like
> the i2c-echo device I added as an example).

Understood: I was ommitting other necessary changes. Yes, we would need to
async-ify all the way up the chain to the register read/write.

> 
> However, looking at aspeed_i2c_bus_handle_cmd (which is the caller of
> bus_send), it should be possible to accept bus_send to "yield" as you
> sketch below and not raise any interrupt. And yes, it would be required
> in bus_send to call i2c_bus_master to register a BH which can then
> raise the interrupt upon i2c_ack().

Yep, that's what I was thinking of. I think I would actually call i2c_bus_master
in aspeed_i2c_bus_handle_cmd or higher though, because I would only call
i2c_bus_master once until the STOP command is issued (or the DMA/pool transfer
is complete). But yeah, I think we're on the same page.

Re: [PATCH v8 00/12] s390x: CPU Topology

2022-07-15 Thread Janis Schoetterl-Glausch

On 7/15/22 15:47, Pierre Morel wrote:
> 
> 
> On 7/15/22 11:31, Janis Schoetterl-Glausch wrote:
>> On 7/14/22 22:05, Pierre Morel wrote:
>>>
>>>
>>> On 7/14/22 20:43, Janis Schoetterl-Glausch wrote:
 On 6/20/22 16:03, Pierre Morel wrote:
> Hi,
>
> This new spin is essentially for coherence with the last Linux CPU
> Topology patch, function testing and coding style modifications.
>
> Forword
> ===
>
> The goal of this series is to implement CPU topology for S390, it
> improves the preceeding series with the implementation of books and
> drawers, of non uniform CPU topology and with documentation.
>
> To use these patches, you will need the Linux series version 10.
> You find it there:
> https://lkml.org/lkml/2022/6/20/590
>
> Currently this code is for KVM only, I have no idea if it is interesting
> to provide a TCG patch. If ever it will be done in another series.
>
> To have a better understanding of the S390x CPU Topology and its
> implementation in QEMU you can have a look at the documentation in the
> last patch or follow the introduction here under.
>
> A short introduction
> 
>
> CPU Topology is described in the S390 POP with essentially the description
> of two instructions:
>
> PTF Perform Topology function used to poll for topology change
>   and used to set the polarization but this part is not part of this 
> item.
>
> STSI Store System Information and the SYSIB 15.1.x providing the Topology
>   configuration.
>
> S390 Topology is a 6 levels hierarchical topology with up to 5 level
>   of containers. The last topology level, specifying the CPU cores.
>
>   This patch series only uses the two lower levels sockets and cores.
>    To get the information on the topology, S390 provides the STSI
>   instruction, which stores a structures providing the list of the
>   containers used in the Machine topology: the SYSIB.
>   A selector within the STSI instruction allow to chose how many 
> topology
>   levels will be provide in the SYSIB.
>
>   Using the Topology List Entries (TLE) provided inside the SYSIB we
>   the Linux kernel is able to compute the information about the cache
>   distance between two cores and can use this information to take
>   scheduling decisions.

 Do the socket, book, ... metaphors and looking at STSI from the existing
 smp infrastructure even make sense?
>>>
>>> Sorry, I do not understand.
>>> I admit the cover-letter is old and I did not rewrite it really good since 
>>> the first patch series.
>>>
>>> What we do is:
>>> Compute the STSI from the SMP + numa + device QEMU parameters .
>>>

 STSI 15.1.x reports the topology to the guest and for a virtual machine,
 this topology can be very dynamic. So a CPU can move from from one topology
 container to another, but the socket of a cpu changing while it's running 
 seems
 a bit strange. And this isn't supported by this patch series as far as I 
 understand,
 the only topology changes are on hotplug.
>>>
>>> A CPU changing from a socket to another socket is the only case the PTF 
>>> instruction reports a change in the topology with the case a new CPU is 
>>> plug in.
>>
>> Can a CPU actually change between sockets right now?
> 
> To be exact, what I understand is that a shared CPU can be scheduled to 
> another real CPU exactly as a guest vCPU can be scheduled by the host to 
> another host CPU.

Ah, ok, this is what I'm forgetting, and what made communication harder,
there are two ways by which the topology can change:
1. the host topology changes
2. the vCPU threads are scheduled on another host CPU

I've been only thinking about the 2.
I assumed some outside entity (libvirt?) pins vCPU threads, and so it would
be the responsibility of that entity to set the topology which then is 
reported to the guest. So if you pin vCPUs for the whole lifetime of the vm
then you could do that by specifying the topology up front with -devices.
If you want to support migration, then the outside entity would need a way
to tell qemu the updated topology.
 
> 
>> The socket-id is computed from the core-id, so it's fixed, is it not?
> 
> the virtual socket-id is computed from the virtual core-id

Meaning cpu.env.core_id, correct? (which is the same as cpu.cpu_index which is 
the same as
ms->possible_cpus->cpus[core_id].props.core_id)
And a cpu's core id doesn't change during the lifetime of the vm, right?
And so it's socket id doesn't either.

> 
>>
>>> It is not expected to appear often but it does appear.
>>> The code has been removed from the kernel in spin 10 for 2 reasons:
>>> 1) we decided to first support only dedicated and pinned CPU> 2) Christian 
>>> fears it may happen too often due to Linux host scheduling

Re: [PATCH v2 18/19] vdpa: Add device migration blocker

2022-07-15 Thread Eugenio Perez Martin

On Fri, Jul 15, 2022 at 11:05 AM Eugenio Perez Martin
 wrote:
>
> On Fri, Jul 15, 2022 at 10:51 AM Jason Wang  wrote:
> >
> > On Fri, Jul 15, 2022 at 1:40 PM Eugenio Perez Martin
> >  wrote:
> > >
> > > On Fri, Jul 15, 2022 at 6:03 AM Jason Wang  wrote:
> > > >
> > > > On Fri, Jul 15, 2022 at 12:32 AM Eugenio Pérez  
> > > > wrote:
> > > > >
> > > > > Since the vhost-vdpa device is exposing _F_LOG,
> > > >
> > > > I may miss something but I think it doesn't?
> > > >
> > >
> > > It's at vhost_vdpa_get_features. As long as SVQ is enabled, it's
> > > exposing VHOST_F_LOG_ALL.
> >
> > Ok, so this needs to be specified in the change log.
>
> Got it, I'll write some note.
>
> > But I'm kind of
> > confused here, we do want to allow migration to work so why we disable
> > it?
> >
>

Adding here:
Without the x-svq parameter, migration is disabled unless the actual
vdpa device backend supports _F_LOG_ALL by itself. There is no such
thing in the Linux kernel at the moment.

> With x-svq parameter, migration of simple devices with no cvq "is
> possible". It has intrinsic problems like can't emit the gratuitous
> arp but it's possible and traffic can continue.
>
> But devices with cvq require to restore the state at the destination.
> That part is not implemented, so it's blocked at the moment.
>
> In the immediate future not all cases (as "net features") will be
> available: net/vhost-net.c (or virtio-net.c?) needs to know how to
> inject the state at the destination to restore the guest visible
> configuration. It's simple code, but it needs to be developed. So
> migration blocker is kept for these features. Hopefully, we will reach
> a point where all features supported by virtio-net.c will be
> supported, but the right thing to do is to merge basic ones first.
>
> Thanks!

[PATCH 2/2] kvm: add support for boolean statistics

2022-07-15 Thread Paolo Bonzini

The next version of Linux will introduce boolean statistics, which
can only have 0 or 1 values.  Convert them to the new QAPI fields
added in the previous commit.

Signed-off-by: Paolo Bonzini 
---
 accel/kvm/kvm-all.c   | 10 +-
 linux-headers/linux/kvm.h |  1 +
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index ed8b6b896e..3a2677d065 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -3743,6 +3743,7 @@ static StatsList *add_kvmstat_entry(struct kvm_stats_desc 
*pdesc,
 case KVM_STATS_UNIT_BYTES:
 case KVM_STATS_UNIT_CYCLES:
 case KVM_STATS_UNIT_SECONDS:
+case KVM_STATS_UNIT_BOOLEAN:
 break;
 default:
 return stats_list;
@@ -3761,7 +3762,10 @@ static StatsList *add_kvmstat_entry(struct 
kvm_stats_desc *pdesc,
 stats->name = g_strdup(pdesc->name);
 stats->value = g_new0(StatsValue, 1);;
 
-if (pdesc->size == 1) {
+if ((pdesc->flags & KVM_STATS_UNIT_MASK) == KVM_STATS_UNIT_BOOLEAN) {
+stats->value->u.boolean = *stats_data;
+stats->value->type = QTYPE_QBOOL;
+} else if (pdesc->size == 1) {
 stats->value->u.scalar = *stats_data;
 stats->value->type = QTYPE_QNUM;
 } else {
@@ -3809,6 +3813,10 @@ static StatsSchemaValueList *add_kvmschema_entry(struct 
kvm_stats_desc *pdesc,
 switch (pdesc->flags & KVM_STATS_UNIT_MASK) {
 case KVM_STATS_UNIT_NONE:
 break;
+case KVM_STATS_UNIT_BOOLEAN:
+schema_entry->value->has_unit = true;
+schema_entry->value->unit = STATS_UNIT_BOOLEAN;
+break;
 case KVM_STATS_UNIT_BYTES:
 schema_entry->value->has_unit = true;
 schema_entry->value->unit = STATS_UNIT_BYTES;
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 0d05d02ee4..f089349149 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -2031,6 +2031,7 @@ struct kvm_stats_header {
 #define KVM_STATS_UNIT_BYTES   (0x1 << KVM_STATS_UNIT_SHIFT)
 #define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
 #define KVM_STATS_UNIT_CYCLES  (0x3 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT)
 #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
 
 #define KVM_STATS_BASE_SHIFT   8
-- 
2.36.1

[PATCH 1/2] monitor: add support for boolean statistics

2022-07-15 Thread Paolo Bonzini

The next version of Linux will introduce boolean statistics, which
can only have 0 or 1 values.  Support them in the schema and in
the HMP command.

Suggested-by: Amneesh Singh 
Signed-off-by: Paolo Bonzini 
---
 monitor/hmp-cmds.c | 2 ++
 qapi/stats.json| 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index ca98df0495..e8d6963722 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -2337,6 +2337,8 @@ static void print_stats_results(Monitor *mon, StatsTarget 
target,
 
 if (stats_value->type == QTYPE_QNUM) {
 monitor_printf(mon, ": %" PRId64 "\n", stats_value->u.scalar);
+} else if (stats_value->type == QTYPE_QBOOL) {
+monitor_printf(mon, ": %s\n", stats_value->u.boolean ? "yes" : 
"no");
 } else if (stats_value->type == QTYPE_QLIST) {
 uint64List *list;
 int i;
diff --git a/qapi/stats.json b/qapi/stats.json
index 2f8bfe8fdb..cb6456c67a 100644
--- a/qapi/stats.json
+++ b/qapi/stats.json
@@ -38,11 +38,12 @@
 # @bytes: stat reported in bytes.
 # @seconds: stat reported in seconds.
 # @cycles: stat reported in clock cycles.
+# @cycles: stat is a boolean value.
 #
 # Since: 7.1
 ##
 { 'enum' : 'StatsUnit',
-  'data' : [ 'bytes', 'seconds', 'cycles' ] }
+  'data' : [ 'bytes', 'seconds', 'cycles', 'boolean' ] }
 
 ##
 # @StatsProvider:
@@ -123,6 +124,7 @@
 ##
 { 'alternate': 'StatsValue',
   'data': { 'scalar': 'uint64',
+'boolean': 'bool',
 'list': [ 'uint64' ] } }
 
 ##
-- 
2.36.1

[PATCH 0/2] monitor, kvm: support for boolean statistics

2022-07-15 Thread Paolo Bonzini

Some statistics exported by KVM only ever have a 0 or 1 value, and Linux has
grown the ability to mark them as such.  Bring it over to the new statistics
subsystem of QEMU: they will be presented to QAPI clients as JSON booleans, and
in HMP as "yes"/"no".

(This was proposed in the context of the Libvirt query-stats GSoC project,
and support will be in Linux 5.19-rc7).

Paolo

Paolo Bonzini (2):
  monitor: add support for boolean statistics
  kvm: add support for boolean statistics

 accel/kvm/kvm-all.c   | 10 +-
 linux-headers/linux/kvm.h |  1 +
 monitor/hmp-cmds.c|  2 ++
 qapi/stats.json   |  4 +++-
 4 files changed, 15 insertions(+), 2 deletions(-)

-- 
2.36.1

1 2 >

1 - 100 of 118 matches

Mail list logo