[PULL 00/20] tcg patch queue

2022-09-01 Thread Richard Henderson
The following changes since commit e93ded1bf6c94ab95015b33e188bc8b0b0c32670:

  Merge tag 'testing-pull-request-2022-08-30' of https://gitlab.com/thuth/qemu 
into staging (2022-08-31 18:19:03 -0400)

are available in the Git repository at:

  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220901

for you to fetch changes up to 20011be2e30b8aa8ef1fc258485f00c688703deb:

  target/riscv: Make translator stop before the end of a page (2022-09-01 
07:43:08 +0100)


Respect PROT_EXEC in user-only mode.
Fix s390x, i386 and riscv for translations crossing a page.


Ilya Leoshkevich (4):
  linux-user: Clear translations on mprotect()
  accel/tcg: Introduce is_same_page()
  target/s390x: Make translator stop before the end of a page
  target/i386: Make translator stop before the end of a page

Richard Henderson (16):
  linux-user/arm: Mark the commpage executable
  linux-user/hppa: Allocate page zero as a commpage
  linux-user/x86_64: Allocate vsyscall page as a commpage
  linux-user: Honor PT_GNU_STACK
  tests/tcg/i386: Move smc_code2 to an executable section
  accel/tcg: Properly implement get_page_addr_code for user-only
  accel/tcg: Unlock mmap_lock after longjmp
  accel/tcg: Make tb_htable_lookup static
  accel/tcg: Move qemu_ram_addr_from_host_nofail to physmem.c
  accel/tcg: Use probe_access_internal for softmmu get_page_addr_code_hostp
  accel/tcg: Document the faulting lookup in tb_lookup_cmp
  accel/tcg: Remove translator_ldsw
  accel/tcg: Add pc and host_pc params to gen_intermediate_code
  accel/tcg: Add fast path for translator_ld*
  target/riscv: Add MAX_INSN_LEN and insn_len
  target/riscv: Make translator stop before the end of a page

 include/elf.h |   1 +
 include/exec/cpu-common.h |   1 +
 include/exec/exec-all.h   |  89 
 include/exec/translator.h |  96 -
 linux-user/arm/target_cpu.h   |   4 +-
 linux-user/qemu.h |   1 +
 accel/tcg/cpu-exec.c  | 143 --
 accel/tcg/cputlb.c|  93 +++--
 accel/tcg/translate-all.c |  29 
 accel/tcg/translator.c| 135 ++-
 accel/tcg/user-exec.c |  17 -
 linux-user/elfload.c  |  82 --
 linux-user/mmap.c |   6 +-
 softmmu/physmem.c |  12 
 target/alpha/translate.c  |   5 +-
 target/arm/translate.c|   5 +-
 target/avr/translate.c|   5 +-
 target/cris/translate.c   |   5 +-
 target/hexagon/translate.c|   6 +-
 target/hppa/translate.c   |   5 +-
 target/i386/tcg/translate.c   |  71 +++
 target/loongarch/translate.c  |   6 +-
 target/m68k/translate.c   |   5 +-
 target/microblaze/translate.c |   5 +-
 target/mips/tcg/translate.c   |   5 +-
 target/nios2/translate.c  |   5 +-
 target/openrisc/translate.c   |   6 +-
 target/ppc/translate.c|   5 +-
 target/riscv/translate.c  |  32 +++--
 target/rx/translate.c |   5 +-
 target/s390x/tcg/translate.c  |  20 --
 target/sh4/translate.c|   5 +-
 target/sparc/translate.c  |   5 +-
 target/tricore/translate.c|   6 +-
 target/xtensa/translate.c |   6 +-
 tests/tcg/i386/test-i386.c|   2 +-
 tests/tcg/riscv64/noexec.c|  79 +
 tests/tcg/s390x/noexec.c  | 106 
 tests/tcg/x86_64/noexec.c |  75 
 tests/tcg/multiarch/noexec.c.inc  | 139 
 tests/tcg/riscv64/Makefile.target |   1 +
 tests/tcg/s390x/Makefile.target   |   1 +
 tests/tcg/x86_64/Makefile.target  |   3 +-
 43 files changed, 966 insertions(+), 367 deletions(-)
 create mode 100644 tests/tcg/riscv64/noexec.c
 create mode 100644 tests/tcg/s390x/noexec.c
 create mode 100644 tests/tcg/x86_64/noexec.c
 create mode 100644 tests/tcg/multiarch/noexec.c.inc



[PULL 10/20] accel/tcg: Make tb_htable_lookup static

2022-09-01 Thread Richard Henderson
The function is not used outside of cpu-exec.c.  Move it and
its subroutines up in the file, before the first use.

Reviewed-by: Alistair Francis 
Acked-by: Ilya Leoshkevich 
Tested-by: Ilya Leoshkevich 
Signed-off-by: Richard Henderson 
---
 include/exec/exec-all.h |   3 -
 accel/tcg/cpu-exec.c| 122 
 2 files changed, 61 insertions(+), 64 deletions(-)

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 0475ec6007..9f35e3b7a9 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -552,9 +552,6 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, 
MemTxAttrs attrs);
 #endif
 void tb_flush(CPUState *cpu);
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
-TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
-   target_ulong cs_base, uint32_t flags,
-   uint32_t cflags);
 void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
 
 /* GETPC is the true target of the return instruction that we'll execute.  */
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index d18081ca6f..7887af6f45 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -170,6 +170,67 @@ uint32_t curr_cflags(CPUState *cpu)
 return cflags;
 }
 
+struct tb_desc {
+target_ulong pc;
+target_ulong cs_base;
+CPUArchState *env;
+tb_page_addr_t phys_page1;
+uint32_t flags;
+uint32_t cflags;
+uint32_t trace_vcpu_dstate;
+};
+
+static bool tb_lookup_cmp(const void *p, const void *d)
+{
+const TranslationBlock *tb = p;
+const struct tb_desc *desc = d;
+
+if (tb->pc == desc->pc &&
+tb->page_addr[0] == desc->phys_page1 &&
+tb->cs_base == desc->cs_base &&
+tb->flags == desc->flags &&
+tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
+tb_cflags(tb) == desc->cflags) {
+/* check next page if needed */
+if (tb->page_addr[1] == -1) {
+return true;
+} else {
+tb_page_addr_t phys_page2;
+target_ulong virt_page2;
+
+virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
+phys_page2 = get_page_addr_code(desc->env, virt_page2);
+if (tb->page_addr[1] == phys_page2) {
+return true;
+}
+}
+}
+return false;
+}
+
+static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
+  target_ulong cs_base, uint32_t flags,
+  uint32_t cflags)
+{
+tb_page_addr_t phys_pc;
+struct tb_desc desc;
+uint32_t h;
+
+desc.env = cpu->env_ptr;
+desc.cs_base = cs_base;
+desc.flags = flags;
+desc.cflags = cflags;
+desc.trace_vcpu_dstate = *cpu->trace_dstate;
+desc.pc = pc;
+phys_pc = get_page_addr_code(desc.env, pc);
+if (phys_pc == -1) {
+return NULL;
+}
+desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
+h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
+return qht_lookup_custom(_ctx.htable, , h, tb_lookup_cmp);
+}
+
 /* Might cause an exception, so have a longjmp destination ready */
 static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
   target_ulong cs_base,
@@ -485,67 +546,6 @@ void cpu_exec_step_atomic(CPUState *cpu)
 end_exclusive();
 }
 
-struct tb_desc {
-target_ulong pc;
-target_ulong cs_base;
-CPUArchState *env;
-tb_page_addr_t phys_page1;
-uint32_t flags;
-uint32_t cflags;
-uint32_t trace_vcpu_dstate;
-};
-
-static bool tb_lookup_cmp(const void *p, const void *d)
-{
-const TranslationBlock *tb = p;
-const struct tb_desc *desc = d;
-
-if (tb->pc == desc->pc &&
-tb->page_addr[0] == desc->phys_page1 &&
-tb->cs_base == desc->cs_base &&
-tb->flags == desc->flags &&
-tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
-tb_cflags(tb) == desc->cflags) {
-/* check next page if needed */
-if (tb->page_addr[1] == -1) {
-return true;
-} else {
-tb_page_addr_t phys_page2;
-target_ulong virt_page2;
-
-virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-phys_page2 = get_page_addr_code(desc->env, virt_page2);
-if (tb->page_addr[1] == phys_page2) {
-return true;
-}
-}
-}
-return false;
-}
-
-TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
-   target_ulong cs_base, uint32_t flags,
-   uint32_t cflags)
-{
-tb_page_addr_t phys_pc;
-struct tb_desc desc;
-uint32_t h;
-
-desc.env = cpu->env_ptr;
-desc.cs_base = cs_base;
-desc.flags = flags;
-desc.cflags = cflags;
-

[PULL 1/4] target/avr: Support probe argument to tlb_fill

2022-09-01 Thread Richard Henderson
While there are no target-specific nonfaulting probes,
generic code may grow some uses at some point.

Note that the attrs argument was incorrect -- it should have
been MEMTXATTRS_UNSPECIFIED. Just use the simpler interface.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/avr/helper.c | 46 -
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/target/avr/helper.c b/target/avr/helper.c
index db76452f9a..82284f8997 100644
--- a/target/avr/helper.c
+++ b/target/avr/helper.c
@@ -102,38 +102,50 @@ bool avr_cpu_tlb_fill(CPUState *cs, vaddr address, int 
size,
   MMUAccessType access_type, int mmu_idx,
   bool probe, uintptr_t retaddr)
 {
-int prot = 0;
-MemTxAttrs attrs = {};
+int prot, page_size = TARGET_PAGE_SIZE;
 uint32_t paddr;
 
 address &= TARGET_PAGE_MASK;
 
 if (mmu_idx == MMU_CODE_IDX) {
-/* access to code in flash */
+/* Access to code in flash. */
 paddr = OFFSET_CODE + address;
 prot = PAGE_READ | PAGE_EXEC;
-if (paddr + TARGET_PAGE_SIZE > OFFSET_DATA) {
+if (paddr >= OFFSET_DATA) {
+/*
+ * This should not be possible via any architectural operations.
+ * There is certainly not an exception that we can deliver.
+ * Accept probing that might come from generic code.
+ */
+if (probe) {
+return false;
+}
 error_report("execution left flash memory");
 abort();
 }
-} else if (address < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) {
-/*
- * access to CPU registers, exit and rebuilt this TB to use full access
- * incase it touches specially handled registers like SREG or SP
- */
-AVRCPU *cpu = AVR_CPU(cs);
-CPUAVRState *env = >env;
-env->fullacc = 1;
-cpu_loop_exit_restore(cs, retaddr);
 } else {
-/* access to memory. nothing special */
+/* Access to memory. */
 paddr = OFFSET_DATA + address;
 prot = PAGE_READ | PAGE_WRITE;
+if (address < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) {
+/*
+ * Access to CPU registers, exit and rebuilt this TB to use
+ * full access in case it touches specially handled registers
+ * like SREG or SP.  For probing, set page_size = 1, in order
+ * to force tlb_fill to be called for the next access.
+ */
+if (probe) {
+page_size = 1;
+} else {
+AVRCPU *cpu = AVR_CPU(cs);
+CPUAVRState *env = >env;
+env->fullacc = 1;
+cpu_loop_exit_restore(cs, retaddr);
+}
+}
 }
 
-tlb_set_page_with_attrs(cs, address, paddr, attrs, prot,
-mmu_idx, TARGET_PAGE_SIZE);
-
+tlb_set_page(cs, address, paddr, prot, mmu_idx, page_size);
 return true;
 }
 
-- 
2.34.1




[PULL 4/4] target/avr: Disable interrupts when env->skip set

2022-09-01 Thread Richard Henderson
This bit is not saved across interrupts, so we must
delay delivering the interrupt until the skip has
been processed.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1118
Reviewed-by: Michael Rolnik 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/avr/helper.c|  9 +
 target/avr/translate.c | 26 ++
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/target/avr/helper.c b/target/avr/helper.c
index 34f1cbffb2..156dde4e92 100644
--- a/target/avr/helper.c
+++ b/target/avr/helper.c
@@ -31,6 +31,15 @@ bool avr_cpu_exec_interrupt(CPUState *cs, int 
interrupt_request)
 AVRCPU *cpu = AVR_CPU(cs);
 CPUAVRState *env = >env;
 
+/*
+ * We cannot separate a skip from the next instruction,
+ * as the skip would not be preserved across the interrupt.
+ * Separating the two insn normally only happens at page boundaries.
+ */
+if (env->skip) {
+return false;
+}
+
 if (interrupt_request & CPU_INTERRUPT_RESET) {
 if (cpu_interrupts_enabled(env)) {
 cs->exception_index = EXCP_RESET;
diff --git a/target/avr/translate.c b/target/avr/translate.c
index dc9c3d6bcc..026753c963 100644
--- a/target/avr/translate.c
+++ b/target/avr/translate.c
@@ -2971,8 +2971,18 @@ static void avr_tr_translate_insn(DisasContextBase 
*dcbase, CPUState *cs)
 if (skip_label) {
 canonicalize_skip(ctx);
 gen_set_label(skip_label);
-if (ctx->base.is_jmp == DISAS_NORETURN) {
+
+switch (ctx->base.is_jmp) {
+case DISAS_NORETURN:
 ctx->base.is_jmp = DISAS_CHAIN;
+break;
+case DISAS_NEXT:
+if (ctx->base.tb->flags & TB_FLAGS_SKIP) {
+ctx->base.is_jmp = DISAS_TOO_MANY;
+}
+break;
+default:
+break;
 }
 }
 
@@ -2989,6 +2999,11 @@ static void avr_tr_tb_stop(DisasContextBase *dcbase, 
CPUState *cs)
 {
 DisasContext *ctx = container_of(dcbase, DisasContext, base);
 bool nonconst_skip = canonicalize_skip(ctx);
+/*
+ * Because we disable interrupts while env->skip is set,
+ * we must return to the main loop to re-evaluate afterward.
+ */
+bool force_exit = ctx->base.tb->flags & TB_FLAGS_SKIP;
 
 switch (ctx->base.is_jmp) {
 case DISAS_NORETURN:
@@ -2997,7 +3012,7 @@ static void avr_tr_tb_stop(DisasContextBase *dcbase, 
CPUState *cs)
 case DISAS_NEXT:
 case DISAS_TOO_MANY:
 case DISAS_CHAIN:
-if (!nonconst_skip) {
+if (!nonconst_skip && !force_exit) {
 /* Note gen_goto_tb checks singlestep.  */
 gen_goto_tb(ctx, 1, ctx->npc);
 break;
@@ -3005,8 +3020,11 @@ static void avr_tr_tb_stop(DisasContextBase *dcbase, 
CPUState *cs)
 tcg_gen_movi_tl(cpu_pc, ctx->npc);
 /* fall through */
 case DISAS_LOOKUP:
-tcg_gen_lookup_and_goto_ptr();
-break;
+if (!force_exit) {
+tcg_gen_lookup_and_goto_ptr();
+break;
+}
+/* fall through */
 case DISAS_EXIT:
 tcg_gen_exit_tb(NULL, 0);
 break;
-- 
2.34.1




[PULL 08/20] accel/tcg: Properly implement get_page_addr_code for user-only

2022-09-01 Thread Richard Henderson
The current implementation is a no-op, simply returning addr.
This is incorrect, because we ought to be checking the page
permissions for execution.

Make get_page_addr_code inline for both implementations.

Acked-by: Ilya Leoshkevich 
Tested-by: Ilya Leoshkevich 
Acked-by: Alistair Francis 
Signed-off-by: Richard Henderson 
---
 include/exec/exec-all.h | 85 ++---
 accel/tcg/cputlb.c  |  5 ---
 accel/tcg/user-exec.c   | 14 +++
 3 files changed, 42 insertions(+), 62 deletions(-)

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 311e5fb422..0475ec6007 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -598,43 +598,44 @@ struct MemoryRegionSection *iotlb_to_section(CPUState 
*cpu,
  hwaddr index, MemTxAttrs attrs);
 #endif
 
-#if defined(CONFIG_USER_ONLY)
-void mmap_lock(void);
-void mmap_unlock(void);
-bool have_mmap_lock(void);
-
 /**
- * get_page_addr_code() - user-mode version
+ * get_page_addr_code_hostp()
  * @env: CPUArchState
  * @addr: guest virtual address of guest code
  *
- * Returns @addr.
+ * See get_page_addr_code() (full-system version) for documentation on the
+ * return value.
+ *
+ * Sets *@hostp (when @hostp is non-NULL) as follows.
+ * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp
+ * to the host address where @addr's content is kept.
+ *
+ * Note: this function can trigger an exception.
+ */
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
+void **hostp);
+
+/**
+ * get_page_addr_code()
+ * @env: CPUArchState
+ * @addr: guest virtual address of guest code
+ *
+ * If we cannot translate and execute from the entire RAM page, or if
+ * the region is not backed by RAM, returns -1. Otherwise, returns the
+ * ram_addr_t corresponding to the guest code at @addr.
+ *
+ * Note: this function can trigger an exception.
  */
 static inline tb_page_addr_t get_page_addr_code(CPUArchState *env,
 target_ulong addr)
 {
-return addr;
+return get_page_addr_code_hostp(env, addr, NULL);
 }
 
-/**
- * get_page_addr_code_hostp() - user-mode version
- * @env: CPUArchState
- * @addr: guest virtual address of guest code
- *
- * Returns @addr.
- *
- * If @hostp is non-NULL, sets *@hostp to the host address where @addr's 
content
- * is kept.
- */
-static inline tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env,
-  target_ulong addr,
-  void **hostp)
-{
-if (hostp) {
-*hostp = g2h_untagged(addr);
-}
-return addr;
-}
+#if defined(CONFIG_USER_ONLY)
+void mmap_lock(void);
+void mmap_unlock(void);
+bool have_mmap_lock(void);
 
 /**
  * adjust_signal_pc:
@@ -691,36 +692,6 @@ G_NORETURN void cpu_loop_exit_sigbus(CPUState *cpu, 
target_ulong addr,
 static inline void mmap_lock(void) {}
 static inline void mmap_unlock(void) {}
 
-/**
- * get_page_addr_code() - full-system version
- * @env: CPUArchState
- * @addr: guest virtual address of guest code
- *
- * If we cannot translate and execute from the entire RAM page, or if
- * the region is not backed by RAM, returns -1. Otherwise, returns the
- * ram_addr_t corresponding to the guest code at @addr.
- *
- * Note: this function can trigger an exception.
- */
-tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr);
-
-/**
- * get_page_addr_code_hostp() - full-system version
- * @env: CPUArchState
- * @addr: guest virtual address of guest code
- *
- * See get_page_addr_code() (full-system version) for documentation on the
- * return value.
- *
- * Sets *@hostp (when @hostp is non-NULL) as follows.
- * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp
- * to the host address where @addr's content is kept.
- *
- * Note: this function can trigger an exception.
- */
-tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
-void **hostp);
-
 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr);
 
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index a46f3a654d..43bd65c973 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1544,11 +1544,6 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState 
*env, target_ulong addr,
 return qemu_ram_addr_from_host_nofail(p);
 }
 
-tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
-{
-return get_page_addr_code_hostp(env, addr, NULL);
-}
-
 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
 {
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 20ada5472b..2bc4394b80 100644
--- a/accel/tcg/user-exec.c
+++ 

[PATCH v3 00/23] target/i386: make SSE helpers generic in the vector size

2022-09-01 Thread Paolo Bonzini
This is the first half of Paul's series from last April, reorganized
so that there is no need for YMM_ONLY and SHIFT == 2 does not appear
yet.  This means that it is independent of Paul's implementation
of AVX decoding.

This generally uses loops that apply the same code for all of MMX/SSE/AVX,
and in fact this series removes basically all uses of XMM_ONLY in the code.
In some cases AVX needs special-casing for the two 128-bit lanes; this is
done with a new macro LANE_WIDTH that is currently the same as the register
width, but remains 16 for AVX registers.

The full work, with the AVX parts rebased on top of these, is at branch
i386-avx of https://gitlab.com/bonzini/qemu.  The branch passes the
tests that Paul had posted, while this reduced part passes the reduced
SSE version.

Paolo

Supersedes: <20220826231204.201395-1-pbonz...@redhat.com>
Based-on: <20220825164827.392942-1-pbonz...@redhat.com>

v2->v3: convert remaining lane-based operations (hadd/hsub, psrldq)
unify MMX and SSE hadd/hsub
move code generation after illegal_op checks
remove C++ comments

v1->v2: get rid of XMM_ONLY
do not special case PMULHRW (yet)
remove dead macro for blend
do not copy table entries on the stack
remove AVXisms from sse_op_table rework
extract more code from the AVX patches

Paolo Bonzini (6):
  i386: do not use MOVL to move data between SSE registers
  i386: formatting fixes
  i386: check SSE table flags instead of hardcoding opcodes
  i386: isolate MMX code more
  i386: Add size suffix to vector FP helpers
  i386: do not cast gen_helper_* function pointers

Paul Brook (17):
  i386: Add ZMM_OFFSET macro
  i386: Rework sse_op_table1
  i386: Rework sse_op_table6/7
  i386: Move 3DNOW decoder
  i386: Add CHECK_NO_VEX
  i386: Rewrite vector shift helper
  i386: Rewrite simple integer vector helpers
  i386: Misc integer AVX helper prep
  i386: Destructive vector helpers for AVX
  i386: Floating point arithmetic helper AVX prep
  i386: reimplement AVX comparison helpers
  i386: Dot product AVX helper prep
  i386: Destructive FP helpers for AVX
  i386: Misc AVX helper prep
  i386: Rewrite blendv helpers
  i386: AVX pclmulqdq prep
  i386: AVX+AES helpers prep

 target/i386/ops_sse.h| 1781 +-
 target/i386/ops_sse_header.h |   68 +-
 target/i386/tcg/translate.c  |  831 +---
 3 files changed, 1391 insertions(+), 1289 deletions(-)

-- 
2.37.1




[PATCH] docs: clarify absence of set_features in vhost-user

2022-09-01 Thread Alyssa Ross
The previous wording was (at least to me) ambiguous about whether a
backend should enable features immediately after they were set using
VHOST_USER_SET_PROTOCOL_FEATURES, or wait for support for protocol
features to be acknowledged if it hasn't been yet before enabling
those features.

This patch attempts to make it clearer that
VHOST_USER_SET_PROTOCOL_FEATURES should immediately enable features,
even if support for protocol features has not yet been acknowledged,
while still also making clear that the frontend SHOULD acknowledge
support for protocol features.

Previous discussion begins here:


Cc: Michael S. Tsirkin 
Signed-off-by: Alyssa Ross 
---
 docs/interop/vhost-user.rst | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index 3f18ab424e..c8b9771a16 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -906,9 +906,9 @@ Front-end message types
   ``VHOST_USER_SET_FEATURES``.
 
 .. Note::
-   Back-ends that report ``VHOST_USER_F_PROTOCOL_FEATURES`` must
-   support this message even before ``VHOST_USER_SET_FEATURES`` was
-   called.
+   While QEMU should acknowledge ``VHOST_USER_F_PROTOCOL_FEATURES``, a
+   back-end must allow ``VHOST_USER_GET_PROTOCOL_FEATURES`` even if
+   ``VHOST_USER_F_PROTOCOL_FEATURES`` has not been acknowledged yet.
 
 ``VHOST_USER_SET_PROTOCOL_FEATURES``
   :id: 16
@@ -923,8 +923,12 @@ Front-end message types
   ``VHOST_USER_SET_FEATURES``.
 
 .. Note::
-   Back-ends that report ``VHOST_USER_F_PROTOCOL_FEATURES`` must support
-   this message even before ``VHOST_USER_SET_FEATURES`` was called.
+   While QEMU should acknowledge ``VHOST_USER_F_PROTOCOL_FEATURES``, a
+   back-end must allow ``VHOST_USER_SET_PROTOCOL_FEATURES`` even if
+   ``VHOST_USER_F_PROTOCOL_FEATURES`` has not been acknowledged yet.
+   The back-end must not wait for ``VHOST_USER_SET_FEATURES`` before
+   enabling protocol features requested with
+   ``VHOST_USER_SET_PROTOCOL_FEATURES``.
 
 ``VHOST_USER_SET_OWNER``
   :id: 3

base-commit: e93ded1bf6c94ab95015b33e188bc8b0b0c32670
-- 
2.37.1




[PATCH] target/i386: rewrite destructive 3DNow operations

2022-09-01 Thread Paolo Bonzini
Remove use of the MOVE macro, since it will be purged from
MMX/SSE as well.

Signed-off-by: Paolo Bonzini 
---
 target/i386/ops_sse.h | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index f603981ab8..2c0090a647 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -1327,11 +1327,11 @@ void helper_pf2iw(CPUX86State *env, MMXReg *d, MMXReg 
*s)
 
 void helper_pfacc(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
-MMXReg r;
+float32 r;
 
-r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), >mmx_status);
-r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), >mmx_status);
-MOVE(*d, r);
+r = float32_add(d->MMX_S(0), d->MMX_S(1), >mmx_status);
+d->MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), >mmx_status);
+d->MMX_S(0) = r;
 }
 
 void helper_pfadd(CPUX86State *env, MMXReg *d, MMXReg *s)
@@ -1392,20 +1392,20 @@ void helper_pfmul(CPUX86State *env, MMXReg *d, MMXReg 
*s)
 
 void helper_pfnacc(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
-MMXReg r;
+float32 r;
 
-r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), >mmx_status);
-r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), >mmx_status);
-MOVE(*d, r);
+r = float32_sub(d->MMX_S(0), d->MMX_S(1), >mmx_status);
+d->MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), >mmx_status);
+d->MMX_S(0) = r;
 }
 
 void helper_pfpnacc(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
-MMXReg r;
+float32 r;
 
-r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), >mmx_status);
-r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), >mmx_status);
-MOVE(*d, r);
+r = float32_sub(d->MMX_S(0), d->MMX_S(1), >mmx_status);
+d->MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), >mmx_status);
+d->MMX_S(0) = r;
 }
 
 void helper_pfrcp(CPUX86State *env, MMXReg *d, MMXReg *s)
@@ -1438,11 +1438,11 @@ void helper_pfsubr(CPUX86State *env, MMXReg *d, MMXReg 
*s)
 
 void helper_pswapd(CPUX86State *env, MMXReg *d, MMXReg *s)
 {
-MMXReg r;
+uint32_t r;
 
-r.MMX_L(0) = s->MMX_L(1);
-r.MMX_L(1) = s->MMX_L(0);
-MOVE(*d, r);
+r = s->MMX_L(0);
+d->MMX_L(0) = s->MMX_L(1);
+d->MMX_L(1) = r;
 }
 #endif
 
-- 
2.37.1




[PATCH v3 17/23] i386: reimplement AVX comparison helpers

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

AVX includes an additional set of comparison predicates, some of which
our softfloat implementation does not expose as separate functions.
Rewrite the helpers in terms of floatN_compare for future extensibility.

Signed-off-by: Paul Brook 
Reviewed-by: Richard Henderson 
Message-Id: <20220424220204.2493824-24-p...@nowt.org>
Signed-off-by: Paolo Bonzini 
---
 target/i386/ops_sse.h| 97 
 target/i386/ops_sse_header.h | 24 -
 target/i386/tcg/translate.c  | 20 
 3 files changed, 75 insertions(+), 66 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index d881d03228..de874e136f 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -1005,57 +1005,66 @@ void glue(helper_addsubpd, SUFFIX)(CPUX86State *env, 
Reg *d, Reg *s)
 }
 }
 
-/* XXX: unordered */
-#define SSE_HELPER_CMP(name, F) \
-void glue(helper_ ## name ## ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)\
+#define SSE_HELPER_CMP_P(name, F, C)\
+void glue(helper_ ## name ## ps, SUFFIX)(CPUX86State *env,  \
+ Reg *d, Reg *s)\
 {   \
-d->ZMM_L(0) = F(32, d->ZMM_S(0), s->ZMM_S(0));  \
-d->ZMM_L(1) = F(32, d->ZMM_S(1), s->ZMM_S(1));  \
-d->ZMM_L(2) = F(32, d->ZMM_S(2), s->ZMM_S(2));  \
-d->ZMM_L(3) = F(32, d->ZMM_S(3), s->ZMM_S(3));  \
+Reg *v = d; \
+int i;  \
+for (i = 0; i < 2 << SHIFT; i++) {  \
+d->ZMM_L(i) = C(F(32, v->ZMM_S(i), s->ZMM_S(i))) ? -1 : 0;  \
+}   \
 }   \
 \
-void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *s)\
+void glue(helper_ ## name ## pd, SUFFIX)(CPUX86State *env,  \
+ Reg *d, Reg *s)\
 {   \
-d->ZMM_L(0) = F(32, d->ZMM_S(0), s->ZMM_S(0));  \
-}   \
-\
-void glue(helper_ ## name ## pd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)\
-{   \
-d->ZMM_Q(0) = F(64, d->ZMM_D(0), s->ZMM_D(0));  \
-d->ZMM_Q(1) = F(64, d->ZMM_D(1), s->ZMM_D(1));  \
-}   \
-\
-void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *s)\
-{   \
-d->ZMM_Q(0) = F(64, d->ZMM_D(0), s->ZMM_D(0));  \
+Reg *v = d; \
+int i;  \
+for (i = 0; i < 1 << SHIFT; i++) {  \
+d->ZMM_Q(i) = C(F(64, v->ZMM_D(i), s->ZMM_D(i))) ? -1 : 0;  \
+}   \
 }
 
-#define FPU_CMPEQ(size, a, b)   \
-(float ## size ## _eq_quiet(a, b, >sse_status) ? -1 : 0)
-#define FPU_CMPLT(size, a, b)   \
-(float ## size ## _lt(a, b, >sse_status) ? -1 : 0)
-#define FPU_CMPLE(size, a, b)   \
-(float ## size ## _le(a, b, >sse_status) ? -1 : 0)
-#define FPU_CMPUNORD(size, a, b)\
-(float ## size ## _unordered_quiet(a, b, >sse_status) ? -1 : 0)
-#define FPU_CMPNEQ(size, a, b)  \
-(float ## size ## _eq_quiet(a, b, >sse_status) ? 0 : -1)
-#define FPU_CMPNLT(size, a, b)  \
-(float ## size ## _lt(a, b, >sse_status) ? 0 : -1)
-#define FPU_CMPNLE(size, a, b)  \
-(float ## size ## _le(a, b, >sse_status) ? 0 : -1)
-#define FPU_CMPORD(size, a, b)  \
-(float ## size ## _unordered_quiet(a, b, >sse_status) ? 0 : -1)
+#if SHIFT == 1
+#define SSE_HELPER_CMP(name, F, C)  \
+SSE_HELPER_CMP_P(name, F, C)\
+void 

[PATCH v3 14/23] i386: Misc integer AVX helper prep

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

More preparatory work for AVX support in various integer vector helpers

No functional changes to existing helpers.

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-13-p...@nowt.org>
Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/ops_sse.h | 164 +-
 1 file changed, 80 insertions(+), 84 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index ce03362810..557cc7ce7d 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -390,19 +390,22 @@ SSE_HELPER_W(helper_pavgw, FAVG)
 
 void glue(helper_pmuludq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0);
-#if SHIFT == 1
-d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2);
-#endif
+Reg *v = d;
+int i;
+
+for (i = 0; i < (1 << SHIFT); i++) {
+d->Q(i) = (uint64_t)s->L(i * 2) * (uint64_t)v->L(i * 2);
+}
 }
 
 void glue(helper_pmaddwd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
+Reg *v = d;
 int i;
 
 for (i = 0; i < (2 << SHIFT); i++) {
-d->L(i) = (int16_t)s->W(2 * i) * (int16_t)d->W(2 * i) +
-(int16_t)s->W(2 * i + 1) * (int16_t)d->W(2 * i + 1);
+d->L(i) = (int16_t)s->W(2 * i) * (int16_t)v->W(2 * i) +
+(int16_t)s->W(2 * i + 1) * (int16_t)v->W(2 * i + 1);
 }
 }
 
@@ -416,32 +419,24 @@ static inline int abs1(int a)
 }
 }
 #endif
+
 void glue(helper_psadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-unsigned int val;
+Reg *v = d;
+int i;
 
-val = 0;
-val += abs1(d->B(0) - s->B(0));
-val += abs1(d->B(1) - s->B(1));
-val += abs1(d->B(2) - s->B(2));
-val += abs1(d->B(3) - s->B(3));
-val += abs1(d->B(4) - s->B(4));
-val += abs1(d->B(5) - s->B(5));
-val += abs1(d->B(6) - s->B(6));
-val += abs1(d->B(7) - s->B(7));
-d->Q(0) = val;
-#if SHIFT == 1
-val = 0;
-val += abs1(d->B(8) - s->B(8));
-val += abs1(d->B(9) - s->B(9));
-val += abs1(d->B(10) - s->B(10));
-val += abs1(d->B(11) - s->B(11));
-val += abs1(d->B(12) - s->B(12));
-val += abs1(d->B(13) - s->B(13));
-val += abs1(d->B(14) - s->B(14));
-val += abs1(d->B(15) - s->B(15));
-d->Q(1) = val;
-#endif
+for (i = 0; i < (1 << SHIFT); i++) {
+unsigned int val = 0;
+val += abs1(v->B(8 * i + 0) - s->B(8 * i + 0));
+val += abs1(v->B(8 * i + 1) - s->B(8 * i + 1));
+val += abs1(v->B(8 * i + 2) - s->B(8 * i + 2));
+val += abs1(v->B(8 * i + 3) - s->B(8 * i + 3));
+val += abs1(v->B(8 * i + 4) - s->B(8 * i + 4));
+val += abs1(v->B(8 * i + 5) - s->B(8 * i + 5));
+val += abs1(v->B(8 * i + 6) - s->B(8 * i + 6));
+val += abs1(v->B(8 * i + 7) - s->B(8 * i + 7));
+d->Q(i) = val;
+}
 }
 
 void glue(helper_maskmov, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
@@ -458,20 +453,24 @@ void glue(helper_maskmov, SUFFIX)(CPUX86State *env, Reg 
*d, Reg *s,
 
 void glue(helper_movl_mm_T0, SUFFIX)(Reg *d, uint32_t val)
 {
+int i;
+
 d->L(0) = val;
 d->L(1) = 0;
-#if SHIFT == 1
-d->Q(1) = 0;
-#endif
+for (i = 1; i < (1 << SHIFT); i++) {
+d->Q(i) = 0;
+}
 }
 
 #ifdef TARGET_X86_64
 void glue(helper_movq_mm_T0, SUFFIX)(Reg *d, uint64_t val)
 {
+int i;
+
 d->Q(0) = val;
-#if SHIFT == 1
-d->Q(1) = 0;
-#endif
+for (i = 1; i < (1 << SHIFT); i++) {
+d->Q(i) = 0;
+}
 }
 #endif
 
@@ -1074,26 +1073,21 @@ uint32_t glue(helper_movmskpd, SUFFIX)(CPUX86State 
*env, Reg *s)
 uint32_t glue(helper_pmovmskb, SUFFIX)(CPUX86State *env, Reg *s)
 {
 uint32_t val;
+int i;
 
 val = 0;
-val |= (s->B(0) >> 7);
-val |= (s->B(1) >> 6) & 0x02;
-val |= (s->B(2) >> 5) & 0x04;
-val |= (s->B(3) >> 4) & 0x08;
-val |= (s->B(4) >> 3) & 0x10;
-val |= (s->B(5) >> 2) & 0x20;
-val |= (s->B(6) >> 1) & 0x40;
-val |= (s->B(7)) & 0x80;
-#if SHIFT == 1
-val |= (s->B(8) << 1) & 0x0100;
-val |= (s->B(9) << 2) & 0x0200;
-val |= (s->B(10) << 3) & 0x0400;
-val |= (s->B(11) << 4) & 0x0800;
-val |= (s->B(12) << 5) & 0x1000;
-val |= (s->B(13) << 6) & 0x2000;
-val |= (s->B(14) << 7) & 0x4000;
-val |= (s->B(15) << 8) & 0x8000;
-#endif
+for (i = 0; i < (1 << SHIFT); i++) {
+uint8_t byte = 0;
+byte |= (s->B(8 * i + 0) >> 7);
+byte |= (s->B(8 * i + 1) >> 6) & 0x02;
+byte |= (s->B(8 * i + 2) >> 5) & 0x04;
+byte |= (s->B(8 * i + 3) >> 4) & 0x08;
+byte |= (s->B(8 * i + 4) >> 3) & 0x10;
+byte |= (s->B(8 * i + 5) >> 2) & 0x20;
+byte |= (s->B(8 * i + 6) >> 1) & 0x40;
+byte |= (s->B(8 * i + 7)) & 0x80;
+val |= byte << (8 * i);
+}
 return val;
 }
 
@@ -1638,46 +1632,48 @@ SSE_HELPER_V(helper_blendvpd, Q, 2, FBLENDVPD)
 
 void glue(helper_ptest, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-uint64_t zf = (s->Q(0) &  d->Q(0)) | (s->Q(1) &  d->Q(1));
- 

Re: [PATCH v3 07/23] i386: check SSE table flags instead of hardcoding opcodes

2022-09-01 Thread Richard Henderson

On 9/1/22 08:48, Paolo Bonzini wrote:

Put more flags to work to avoid hardcoding lists of opcodes.  The op7 case
for SSE_OPF_CMP is included for homogeneity and because AVX needs it, but
it is never used by SSE or MMX.

Extracted from a patch by Paul Brook.

Signed-off-by: Paolo Bonzini
---
  target/i386/tcg/translate.c | 75 +++--
  1 file changed, 31 insertions(+), 44 deletions(-)


Reviewed-by: Richard Henderson 


r~



Re: [PATCH 33/51] tests/qtest: {ahci, ide}-test: Use relative path for temporary files

2022-09-01 Thread Marc-André Lureau
On Wed, Aug 24, 2022 at 2:55 PM Bin Meng  wrote:

> From: Bin Meng 
>
> These test cases uses "blkdebug:path/to/config:path/to/image" for
> testing. On Windows, absolute file paths contain the delimiter ':'
> which causes the blkdebug filename parser fail to parse filenames.
>
>
hmm.. maybe it should learn to escape paths..


Signed-off-by: Bin Meng 
> ---
>
>  tests/qtest/ahci-test.c | 19 ---
>  tests/qtest/ide-test.c  | 18 --
>  2 files changed, 32 insertions(+), 5 deletions(-)
>
> diff --git a/tests/qtest/ahci-test.c b/tests/qtest/ahci-test.c
> index 0e88cd0eef..bce9ff770c 100644
> --- a/tests/qtest/ahci-test.c
> +++ b/tests/qtest/ahci-test.c
> @@ -1848,7 +1848,7 @@ static void create_ahci_io_test(enum IOMode type,
> enum AddrMode addr,
>
>  int main(int argc, char **argv)
>  {
> -const char *arch;
> +const char *arch, *base;
>  int ret;
>  int fd;
>  int c;
> @@ -1886,8 +1886,21 @@ int main(int argc, char **argv)
>  return 0;
>  }
>
> +/*
> + * "base" stores the starting point where we create temporary files.
> + *
> + * On Windows, this is set to the relative path of current working
> + * directory, because the absolute path causes the blkdebug filename
> + * parser fail to parse "blkdebug:path/to/config:path/to/image".
> + */
> +#ifndef _WIN32
> +base = g_get_tmp_dir();
> +#else
> +base = ".";
> +#endif
>

Meanwhile, that seems reasonable. Perhaps chdir() to the temporary
directory first? (assuming other paths are absolute)


> +
>  /* Create a temporary image */
> -tmp_path = g_strdup_printf("%s/qtest.XX", g_get_tmp_dir());
> +tmp_path = g_strdup_printf("%s/qtest.XX", base);
>  fd = mkstemp(tmp_path);
>  g_assert(fd >= 0);
>  if (have_qemu_img()) {
> @@ -1905,7 +1918,7 @@ int main(int argc, char **argv)
>  close(fd);
>
>  /* Create temporary blkdebug instructions */
> -debug_path = g_strdup_printf("%s/qtest-blkdebug.XX",
> g_get_tmp_dir());
> +debug_path = g_strdup_printf("%s/qtest-blkdebug.XX", base);
>  fd = mkstemp(debug_path);
>  g_assert(fd >= 0);
>  close(fd);
> diff --git a/tests/qtest/ide-test.c b/tests/qtest/ide-test.c
> index ebbf8e0126..c5cad6c0be 100644
> --- a/tests/qtest/ide-test.c
> +++ b/tests/qtest/ide-test.c
> @@ -1011,17 +1011,31 @@ static void test_cdrom_dma(void)
>
>  int main(int argc, char **argv)
>  {
> +const char *base;
>  int fd;
>  int ret;
>
> +/*
> + * "base" stores the starting point where we create temporary files.
> + *
> + * On Windows, this is set to the relative path of current working
> + * directory, because the absolute path causes the blkdebug filename
> + * parser fail to parse "blkdebug:path/to/config:path/to/image".
> + */
> +#ifndef _WIN32
> +base = g_get_tmp_dir();
> +#else
> +base = ".";
> +#endif
> +
>  /* Create temporary blkdebug instructions */
> -debug_path = g_strdup_printf("%s/qtest-blkdebug.XX",
> g_get_tmp_dir());
> +debug_path = g_strdup_printf("%s/qtest-blkdebug.XX", base);
>  fd = mkstemp(debug_path);
>  g_assert(fd >= 0);
>  close(fd);
>
>  /* Create a temporary raw image */
> -tmp_path = g_strdup_printf("%s/qtest.XX", g_get_tmp_dir());
> +tmp_path = g_strdup_printf("%s/qtest.XX", base);
>  fd = mkstemp(tmp_path);
>  g_assert(fd >= 0);
>  ret = ftruncate(fd, TEST_IMAGE_SIZE);
> --
> 2.34.1
>
>
>

-- 
Marc-André Lureau


Re: [PATCH v5 12/18] dump/dump: Add section string table support

2022-09-01 Thread Janis Schoetterl-Glausch
On Thu, 2022-08-11 at 12:11 +, Janosch Frank wrote:
> As sections don't have a type like the notes do we need another way to
> determine their contents. The string table allows us to assign each
> section an identification string which architectures can then use to
> tag their sections with.
> 
> There will be no string table if the architecture doesn't add custom
> sections which are introduced in a following patch.

Why? Is there any harm in always having a (possibly empty) string
table? Can't put garbage into sh_name then but that's not relevant.
Seems like it would make the code a bit simpler.

I would also expect the string data to be written in this patch,
instead you do that in the next.
With that and Steffen's .shstrtab addressed:
Reviewed-by: Janis Schoetterl-Glausch 
Some minor suggestions below.

> 
> Signed-off-by: Janosch Frank 
> ---
>  dump/dump.c   | 71 +++
>  include/sysemu/dump.h |  4 +++
>  2 files changed, 75 insertions(+)
> 
> diff --git a/dump/dump.c b/dump/dump.c
> index 31eb20108c..0d6dbf453a 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c

[...]

> @@ -393,17 +400,50 @@ static void prepare_elf_section_hdr_zero(DumpState *s)
>  }
>  }
>  
> +static void prepare_elf_section_hdr_string(DumpState *s, void *buff)
> +{
> +Elf32_Shdr shdr32;
> +Elf64_Shdr shdr64;

Could just = {} those and drop the memset below.

> +int shdr_size;
> +void *shdr;
> +
> +if (dump_is_64bit(s)) {
> +shdr_size = sizeof(Elf64_Shdr);
> +memset(, 0, shdr_size);
> +shdr64.sh_type = SHT_STRTAB;
> +shdr64.sh_offset = s->section_offset + s->elf_section_data_size;
> +shdr64.sh_name = s->string_table_buf->len;
> +g_array_append_vals(s->string_table_buf, ".strtab", 
> sizeof(".strtab"));

Could put the ".shstrtab" in a char[] variable.

> +shdr64.sh_size = s->string_table_buf->len;
> +shdr = 
> +} else {
> +shdr_size = sizeof(Elf32_Shdr);
> +memset(, 0, shdr_size);
> +shdr32.sh_type = SHT_STRTAB;
> +shdr32.sh_offset = s->section_offset + s->elf_section_data_size;
> +shdr32.sh_name = s->string_table_buf->len;
> +g_array_append_vals(s->string_table_buf, ".strtab", 
> sizeof(".strtab"));
> +shdr32.sh_size = s->string_table_buf->len;
> +shdr = 
> +}
> +
> +memcpy(buff, shdr, shdr_size);
> +}

[...]

> @@ -1844,6 +1903,18 @@ static void dump_init(DumpState *s, int fd, bool 
> has_format,
>  }
>  }
>  
> +/*
> + * calculate shdr_num and elf_section_data_size so we know the offsets 
> and

What is the elf_section_data_size thing about?

> + * sizes of all parts.
> + *
> + * If phdr_num overflowed we have at least one section header
> + * More sections/hdrs can be added by the architectures
> + */
> +if (s->shdr_num > 1) {
> +/* Reserve the string table */
> +s->shdr_num += 1;
> +}
> +
>  if (dump_is_64bit(s)) {
>  s->shdr_offset = sizeof(Elf64_Ehdr);
>  s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num;

[...]



[PULL 13/20] accel/tcg: Document the faulting lookup in tb_lookup_cmp

2022-09-01 Thread Richard Henderson
It was non-obvious to me why we can raise an exception in
the middle of a comparison function, but it works.
While nearby, use TARGET_PAGE_ALIGN instead of open-coding.

Acked-by: Ilya Leoshkevich 
Signed-off-by: Richard Henderson 
---
 accel/tcg/cpu-exec.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 7887af6f45..5f43b9769a 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -198,7 +198,16 @@ static bool tb_lookup_cmp(const void *p, const void *d)
 tb_page_addr_t phys_page2;
 target_ulong virt_page2;
 
-virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
+/*
+ * We know that the first page matched, and an otherwise valid TB
+ * encountered an incomplete instruction at the end of that page,
+ * therefore we know that generating a new TB from the current PC
+ * must also require reading from the next page -- even if the
+ * second pages do not match, and therefore the resulting insn
+ * is different for the new TB.  Therefore any exception raised
+ * here by the faulting lookup is not premature.
+ */
+virt_page2 = TARGET_PAGE_ALIGN(desc->pc);
 phys_page2 = get_page_addr_code(desc->env, virt_page2);
 if (tb->page_addr[1] == phys_page2) {
 return true;
-- 
2.34.1




[PULL 14/20] accel/tcg: Remove translator_ldsw

2022-09-01 Thread Richard Henderson
The only user can easily use translator_lduw and
adjust the type to signed during the return.

Reviewed-by: Alistair Francis 
Acked-by: Ilya Leoshkevich 
Tested-by: Ilya Leoshkevich 
Signed-off-by: Richard Henderson 
---
 include/exec/translator.h   | 1 -
 target/i386/tcg/translate.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/exec/translator.h b/include/exec/translator.h
index 0d0bf3a31e..45b9268ca4 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -178,7 +178,6 @@ bool translator_use_goto_tb(DisasContextBase *db, 
target_ulong dest);
 
 #define FOR_EACH_TRANSLATOR_LD(F)   \
 F(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */)   \
-F(translator_ldsw, int16_t, cpu_ldsw_code, bswap16) \
 F(translator_lduw, uint16_t, cpu_lduw_code, bswap16)\
 F(translator_ldl, uint32_t, cpu_ldl_code, bswap32)  \
 F(translator_ldq, uint64_t, cpu_ldq_code, bswap64)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index b7972f0ff5..a23417d058 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2033,7 +2033,7 @@ static inline uint8_t x86_ldub_code(CPUX86State *env, 
DisasContext *s)
 
 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
 {
-return translator_ldsw(env, >base, advance_pc(env, s, 2));
+return translator_lduw(env, >base, advance_pc(env, s, 2));
 }
 
 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
-- 
2.34.1




Re: [PATCH v4 0/4] Enable unix socket support on Windows

2022-09-01 Thread Marc-André Lureau
Hi

On Thu, Sep 1, 2022 at 10:13 AM Bin Meng  wrote:
>
> Hi,
>
> On Tue, Aug 2, 2022 at 3:52 PM Bin Meng  wrote:
> >
> > Support for the unix socket has existed both in BSD and Linux for the
> > longest time, but not on Windows. Since Windows 10 build 17063 [1],
> > the native support for the unix socket has come to Windows. Starting
> > this build, two Win32 processes can use the AF_UNIX address family
> > over Winsock API to communicate with each other.
> >
> > [1] https://devblogs.microsoft.com/commandline/af_unix-comes-to-windows/
> >
> > Changes in v4:
> > - instead of introducing CONFIG_AF_UNIX, add fallback afunix.h header
> >   in os-win32.h, and compile the AF_UNIX stuff for all Windows hosts
> > - drop CONFIG_AF_UNIX
> > - introduce a new helper socket_check_afunix_support() to runtime-check
> >   the availability of AF_UNIX socket, and skip those appropriately
> >
>
> All patches in this series have been reviewed. Would you please queue
> this? Thanks!

Yes, I was going to do it. Thanks




[PATCH v3 05/23] i386: Rework sse_op_table6/7

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

Add a flags field each row in sse_op_table6 and sse_op_table7.

Initially this is only used as a replacement for the magic SSE41_SPECIAL
pointer.  The other flags are mostly relevant for the AVX implementation
but can be applied to SSE as well.

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-6-p...@nowt.org>
Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/tcg/translate.c | 230 
 1 file changed, 131 insertions(+), 99 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 7332bbcf44..b7321b7588 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2976,7 +2976,6 @@ static const struct SSEOpHelper_table1 sse_op_table1[256] 
= {
 #undef SSE_SPECIAL
 
 #define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
-#define SSE_SPECIAL_FN ((void *)1)
 
 static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
 [0 + 2] = MMX_OP2(psrlw),
@@ -3060,113 +3059,134 @@ static const SSEFunc_0_epp sse_op_table5[256] = {
 [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
 };
 
-struct SSEOpHelper_epp {
+struct SSEOpHelper_table6 {
 SSEFunc_0_epp op[2];
 uint32_t ext_mask;
+int flags;
 };
 
-struct SSEOpHelper_eppi {
+struct SSEOpHelper_table7 {
 SSEFunc_0_eppi op[2];
 uint32_t ext_mask;
+int flags;
 };
 
-#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
-#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
-#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
-#define SSE41_SPECIAL { { NULL, SSE_SPECIAL_FN }, CPUID_EXT_SSE41 }
-#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
-CPUID_EXT_PCLMULQDQ }
-#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
+#define gen_helper_special_xmm NULL
 
-static const struct SSEOpHelper_epp sse_op_table6[256] = {
-[0x00] = SSSE3_OP(pshufb),
-[0x01] = SSSE3_OP(phaddw),
-[0x02] = SSSE3_OP(phaddd),
-[0x03] = SSSE3_OP(phaddsw),
-[0x04] = SSSE3_OP(pmaddubsw),
-[0x05] = SSSE3_OP(phsubw),
-[0x06] = SSSE3_OP(phsubd),
-[0x07] = SSSE3_OP(phsubsw),
-[0x08] = SSSE3_OP(psignb),
-[0x09] = SSSE3_OP(psignw),
-[0x0a] = SSSE3_OP(psignd),
-[0x0b] = SSSE3_OP(pmulhrsw),
-[0x10] = SSE41_OP(pblendvb),
-[0x14] = SSE41_OP(blendvps),
-[0x15] = SSE41_OP(blendvpd),
-[0x17] = SSE41_OP(ptest),
-[0x1c] = SSSE3_OP(pabsb),
-[0x1d] = SSSE3_OP(pabsw),
-[0x1e] = SSSE3_OP(pabsd),
-[0x20] = SSE41_OP(pmovsxbw),
-[0x21] = SSE41_OP(pmovsxbd),
-[0x22] = SSE41_OP(pmovsxbq),
-[0x23] = SSE41_OP(pmovsxwd),
-[0x24] = SSE41_OP(pmovsxwq),
-[0x25] = SSE41_OP(pmovsxdq),
-[0x28] = SSE41_OP(pmuldq),
-[0x29] = SSE41_OP(pcmpeqq),
-[0x2a] = SSE41_SPECIAL, /* movntqda */
-[0x2b] = SSE41_OP(packusdw),
-[0x30] = SSE41_OP(pmovzxbw),
-[0x31] = SSE41_OP(pmovzxbd),
-[0x32] = SSE41_OP(pmovzxbq),
-[0x33] = SSE41_OP(pmovzxwd),
-[0x34] = SSE41_OP(pmovzxwq),
-[0x35] = SSE41_OP(pmovzxdq),
-[0x37] = SSE42_OP(pcmpgtq),
-[0x38] = SSE41_OP(pminsb),
-[0x39] = SSE41_OP(pminsd),
-[0x3a] = SSE41_OP(pminuw),
-[0x3b] = SSE41_OP(pminud),
-[0x3c] = SSE41_OP(pmaxsb),
-[0x3d] = SSE41_OP(pmaxsd),
-[0x3e] = SSE41_OP(pmaxuw),
-[0x3f] = SSE41_OP(pmaxud),
-[0x40] = SSE41_OP(pmulld),
-[0x41] = SSE41_OP(phminposuw),
-[0xdb] = AESNI_OP(aesimc),
-[0xdc] = AESNI_OP(aesenc),
-[0xdd] = AESNI_OP(aesenclast),
-[0xde] = AESNI_OP(aesdec),
-[0xdf] = AESNI_OP(aesdeclast),
+#define OP(name, op, flags, ext, mmx_name) \
+{{mmx_name, gen_helper_ ## name ## _xmm}, CPUID_EXT_ ## ext, flags}
+#define BINARY_OP_MMX(name, ext) \
+OP(name, op1, SSE_OPF_MMX, ext, gen_helper_ ## name ## _mmx)
+#define BINARY_OP(name, ext, flags) \
+OP(name, op1, flags, ext, NULL)
+#define UNARY_OP_MMX(name, ext) \
+OP(name, op1, SSE_OPF_MMX, ext, gen_helper_ ## name ## _mmx)
+#define UNARY_OP(name, ext, flags) \
+OP(name, op1, flags, ext, NULL)
+#define BLENDV_OP(name, ext, flags) OP(name, op1, 0, ext, NULL)
+#define CMP_OP(name, ext) OP(name, op1, SSE_OPF_CMP, ext, NULL)
+#define SPECIAL_OP(ext) OP(special, op1, SSE_OPF_SPECIAL, ext, NULL)
+
+/* prefix [66] 0f 38 */
+static const struct SSEOpHelper_table6 sse_op_table6[256] = {
+[0x00] = BINARY_OP_MMX(pshufb, SSSE3),
+[0x01] = BINARY_OP_MMX(phaddw, SSSE3),
+[0x02] = BINARY_OP_MMX(phaddd, SSSE3),
+[0x03] = BINARY_OP_MMX(phaddsw, SSSE3),
+[0x04] = BINARY_OP_MMX(pmaddubsw, SSSE3),
+[0x05] = BINARY_OP_MMX(phsubw, SSSE3),
+[0x06] = BINARY_OP_MMX(phsubd, SSSE3),
+[0x07] = BINARY_OP_MMX(phsubsw, SSSE3),
+[0x08] = BINARY_OP_MMX(psignb, SSSE3),
+[0x09] = BINARY_OP_MMX(psignw, SSSE3),
+[0x0a] = BINARY_OP_MMX(psignd, SSSE3),
+[0x0b] = BINARY_OP_MMX(pmulhrsw, SSSE3),
+[0x10] = BLENDV_OP(pblendvb, SSE41, SSE_OPF_MMX),
+ 

Re: [PATCH 04/51] semihosting/arm-compat-semi: Avoid using hardcoded /tmp

2022-09-01 Thread Bin Meng
On Wed, Aug 31, 2022 at 8:59 PM Marc-André Lureau
 wrote:
>
> Hi
>
> On Wed, Aug 24, 2022 at 1:54 PM Bin Meng  wrote:
>>
>> From: Bin Meng 
>>
>> Use g_get_tmp_dir() to get the directory to use for temporary files.
>>
>> Signed-off-by: Bin Meng 
>> ---
>>
>>  semihosting/arm-compat-semi.c | 3 ++-
>>  1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/semihosting/arm-compat-semi.c b/semihosting/arm-compat-semi.c
>> index e741674238..d5e66cc298 100644
>> --- a/semihosting/arm-compat-semi.c
>> +++ b/semihosting/arm-compat-semi.c
>> @@ -503,7 +503,8 @@ void do_common_semihosting(CPUState *cs)
>>  GET_ARG(0);
>>  GET_ARG(1);
>>  GET_ARG(2);
>> -len = asprintf(, "/tmp/qemu-%x%02x", getpid(), (int)arg1 & 0xff);
>> +len = asprintf(, "%s/qemu-%x%02x", g_get_tmp_dir(),
>> +   getpid(), (int)arg1 & 0xff);
>
>
> This is most likely wrong. I am not familiar with semihosting, but I believe 
> we are implementing tmpnam(), it should return a POSIX filename.

Replacing /tmp with g_get_tmp_dir() is not wrong, correct?

I checked the semihosting spec, it does not mention the file name
should be a POSIX compliant path.
https://developer.arm.com/documentation/dui0058/d/semihosting/semihosting-swis/sys-tmpnam--0x0d-

Certainly this needs a semihosting expert to take a look.

>
>>
>>  if (len < 0) {
>>  common_semi_set_ret(cs, -1);
>>  break;
>> --

Regards,
Bin



[PATCH v3 09/23] i386: Add size suffix to vector FP helpers

2022-09-01 Thread Paolo Bonzini
For AVX we're going to need both 128 bit (xmm) and 256 bit (ymm) variants of
floating point helpers. Add the register type suffix to the existing
*PS and *PD helpers (SS and SD variants are only valid on 128 bit vectors)

No functional changes.

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-15-p...@nowt.org>
Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/ops_sse.h| 48 ++--
 target/i386/ops_sse_header.h | 48 ++--
 target/i386/tcg/translate.c  | 37 +--
 3 files changed, 67 insertions(+), 66 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 8845b6d4cb..2c0090a647 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -537,7 +537,7 @@ void glue(helper_pshufw, SUFFIX)(Reg *d, Reg *s, int order)
 MOVE(*d, r);
 }
 #else
-void helper_shufps(Reg *d, Reg *s, int order)
+void glue(helper_shufps, SUFFIX)(Reg *d, Reg *s, int order)
 {
 Reg r;
 
@@ -548,7 +548,7 @@ void helper_shufps(Reg *d, Reg *s, int order)
 MOVE(*d, r);
 }
 
-void helper_shufpd(Reg *d, Reg *s, int order)
+void glue(helper_shufpd, SUFFIX)(Reg *d, Reg *s, int order)
 {
 Reg r;
 
@@ -598,7 +598,7 @@ void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int order)
 /* XXX: not accurate */
 
 #define SSE_HELPER_S(name, F)   \
-void helper_ ## name ## ps(CPUX86State *env, Reg *d, Reg *s)\
+void glue(helper_ ## name ## ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)\
 {   \
 d->ZMM_S(0) = F(32, d->ZMM_S(0), s->ZMM_S(0));  \
 d->ZMM_S(1) = F(32, d->ZMM_S(1), s->ZMM_S(1));  \
@@ -611,7 +611,7 @@ void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int order)
 d->ZMM_S(0) = F(32, d->ZMM_S(0), s->ZMM_S(0));  \
 }   \
 \
-void helper_ ## name ## pd(CPUX86State *env, Reg *d, Reg *s)\
+void glue(helper_ ## name ## pd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)\
 {   \
 d->ZMM_D(0) = F(64, d->ZMM_D(0), s->ZMM_D(0));  \
 d->ZMM_D(1) = F(64, d->ZMM_D(1), s->ZMM_D(1));  \
@@ -647,7 +647,7 @@ SSE_HELPER_S(sqrt, FPU_SQRT)
 
 
 /* float to float conversions */
-void helper_cvtps2pd(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_cvtps2pd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
 float32 s0, s1;
 
@@ -657,7 +657,7 @@ void helper_cvtps2pd(CPUX86State *env, Reg *d, Reg *s)
 d->ZMM_D(1) = float32_to_float64(s1, >sse_status);
 }
 
-void helper_cvtpd2ps(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_cvtpd2ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
 d->ZMM_S(0) = float64_to_float32(s->ZMM_D(0), >sse_status);
 d->ZMM_S(1) = float64_to_float32(s->ZMM_D(1), >sse_status);
@@ -675,7 +675,7 @@ void helper_cvtsd2ss(CPUX86State *env, Reg *d, Reg *s)
 }
 
 /* integer to float */
-void helper_cvtdq2ps(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_cvtdq2ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
 d->ZMM_S(0) = int32_to_float32(s->ZMM_L(0), >sse_status);
 d->ZMM_S(1) = int32_to_float32(s->ZMM_L(1), >sse_status);
@@ -683,7 +683,7 @@ void helper_cvtdq2ps(CPUX86State *env, Reg *d, Reg *s)
 d->ZMM_S(3) = int32_to_float32(s->ZMM_L(3), >sse_status);
 }
 
-void helper_cvtdq2pd(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_cvtdq2pd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
 int32_t l0, l1;
 
@@ -760,7 +760,7 @@ WRAP_FLOATCONV(int64_t, float32_to_int64_round_to_zero, 
float32, INT64_MIN)
 WRAP_FLOATCONV(int64_t, float64_to_int64, float64, INT64_MIN)
 WRAP_FLOATCONV(int64_t, float64_to_int64_round_to_zero, float64, INT64_MIN)
 
-void helper_cvtps2dq(CPUX86State *env, ZMMReg *d, ZMMReg *s)
+void glue(helper_cvtps2dq, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
 {
 d->ZMM_L(0) = x86_float32_to_int32(s->ZMM_S(0), >sse_status);
 d->ZMM_L(1) = x86_float32_to_int32(s->ZMM_S(1), >sse_status);
@@ -768,7 +768,7 @@ void helper_cvtps2dq(CPUX86State *env, ZMMReg *d, ZMMReg *s)
 d->ZMM_L(3) = x86_float32_to_int32(s->ZMM_S(3), >sse_status);
 }
 
-void helper_cvtpd2dq(CPUX86State *env, ZMMReg *d, ZMMReg *s)
+void glue(helper_cvtpd2dq, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
 {
 d->ZMM_L(0) = x86_float64_to_int32(s->ZMM_D(0), >sse_status);
 d->ZMM_L(1) = x86_float64_to_int32(s->ZMM_D(1), >sse_status);
@@ -810,7 +810,7 @@ int64_t helper_cvtsd2sq(CPUX86State *env, ZMMReg *s)
 #endif
 
 /* float to integer truncated */
-void helper_cvttps2dq(CPUX86State *env, ZMMReg *d, ZMMReg *s)
+void glue(helper_cvttps2dq, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
 {
 

[PATCH v3 10/23] i386: do not cast gen_helper_* function pointers

2022-09-01 Thread Paolo Bonzini
Use a union to store the various possible kinds of function pointers, and
access the correct one based on the flags.

SSEOpHelper_table6 and SSEOpHelper_table7 right now only have one case,
but this would change with AVX's 3- and 4-argument operations.  Use
unions there too, to keep the code more similar for the three tables.

Extracted from a patch by Paul Brook .

Signed-off-by: Paolo Bonzini 
---
 target/i386/tcg/translate.c | 75 ++---
 1 file changed, 37 insertions(+), 38 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 16db155c94..c6a9a5b1d4 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2784,6 +2784,8 @@ typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, 
TCGv_ptr reg);
 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
 typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
 typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
+typedef void (*SSEFunc_0_eppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
+   TCGv_ptr reg_c);
 typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
TCGv_i32 val);
 typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
@@ -2798,7 +2800,7 @@ typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr 
reg_a, TCGv_ptr reg_b,
 #define SSE_OPF_SHUF  (1 << 9) /* pshufx/shufpx */
 
 #define OP(op, flags, a, b, c, d)   \
-{flags, {a, b, c, d} }
+{flags, {{.op = a}, {.op = b}, {.op = c}, {.op = d} } }
 
 #define MMX_OP(x) OP(op1, SSE_OPF_MMX, \
 gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm, NULL, NULL)
@@ -2809,9 +2811,15 @@ typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr 
reg_a, TCGv_ptr reg_b,
 #define SSE_OP(sname, dname, op, flags) OP(op, flags, \
 gen_helper_##sname##_xmm, gen_helper_##dname##_xmm, NULL, NULL)
 
+typedef union SSEFuncs {
+SSEFunc_0_epp op1;
+SSEFunc_0_ppi op1i;
+SSEFunc_0_eppt op1t;
+} SSEFuncs;
+
 struct SSEOpHelper_table1 {
 int flags;
-SSEFunc_0_epp op[4];
+SSEFuncs fn[4];
 };
 
 #define SSE_3DNOW { SSE_OPF_3DNOW }
@@ -2867,8 +2875,7 @@ static const struct SSEOpHelper_table1 sse_op_table1[256] 
= {
 [0x5f] = SSE_FOP(max),
 
 [0xc2] = SSE_FOP(cmpeq), /* sse_op_table4 */
-[0xc6] = OP(dummy, SSE_OPF_SHUF, (SSEFunc_0_epp)gen_helper_shufps_xmm,
-(SSEFunc_0_epp)gen_helper_shufpd_xmm, NULL, NULL),
+[0xc6] = SSE_OP(shufps, shufpd, op1i, SSE_OPF_SHUF),
 
 /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX.  */
 [0x38] = SSE_SPECIAL,
@@ -2894,10 +2901,8 @@ static const struct SSEOpHelper_table1 
sse_op_table1[256] = {
 [0x6e] = SSE_SPECIAL, /* movd mm, ea */
 [0x6f] = SSE_SPECIAL, /* movq, movdqa, , movqdu */
 [0x70] = OP(op1i, SSE_OPF_SHUF | SSE_OPF_MMX,
-(SSEFunc_0_epp)gen_helper_pshufw_mmx,
-(SSEFunc_0_epp)gen_helper_pshufd_xmm,
-(SSEFunc_0_epp)gen_helper_pshufhw_xmm,
-(SSEFunc_0_epp)gen_helper_pshuflw_xmm),
+gen_helper_pshufw_mmx, gen_helper_pshufd_xmm,
+gen_helper_pshufhw_xmm, gen_helper_pshuflw_xmm),
 [0x71] = SSE_SPECIAL, /* shiftw */
 [0x72] = SSE_SPECIAL, /* shiftd */
 [0x73] = SSE_SPECIAL, /* shiftq */
@@ -2959,8 +2964,7 @@ static const struct SSEOpHelper_table1 sse_op_table1[256] 
= {
 [0xf5] = MMX_OP(pmaddwd),
 [0xf6] = MMX_OP(psadbw),
 [0xf7] = OP(op1t, SSE_OPF_MMX,
-(SSEFunc_0_epp)gen_helper_maskmov_mmx,
-(SSEFunc_0_epp)gen_helper_maskmov_xmm, NULL, NULL),
+gen_helper_maskmov_mmx, gen_helper_maskmov_xmm, NULL, NULL),
 [0xf8] = MMX_OP(psubb),
 [0xf9] = MMX_OP(psubw),
 [0xfa] = MMX_OP(psubl),
@@ -3057,17 +3061,19 @@ static const SSEFunc_0_epp sse_op_table5[256] = {
 [0xb6] = gen_helper_movq, /* pfrcpit2 */
 [0xb7] = gen_helper_pmulhrw_mmx,
 [0xbb] = gen_helper_pswapd,
-[0xbf] = gen_helper_pavgb_mmx /* pavgusb */
+[0xbf] = gen_helper_pavgb_mmx,
 };
 
 struct SSEOpHelper_table6 {
-SSEFunc_0_epp op[2];
+SSEFuncs fn[2];
 uint32_t ext_mask;
 int flags;
 };
 
 struct SSEOpHelper_table7 {
-SSEFunc_0_eppi op[2];
+union {
+SSEFunc_0_eppi op1;
+} fn[2];
 uint32_t ext_mask;
 int flags;
 };
@@ -3075,7 +3081,8 @@ struct SSEOpHelper_table7 {
 #define gen_helper_special_xmm NULL
 
 #define OP(name, op, flags, ext, mmx_name) \
-{{mmx_name, gen_helper_ ## name ## _xmm}, CPUID_EXT_ ## ext, flags}
+{{{.op = mmx_name}, {.op = gen_helper_ ## name ## _xmm} }, \
+CPUID_EXT_ ## ext, flags}
 #define BINARY_OP_MMX(name, ext) \
 OP(name, op1, SSE_OPF_MMX, ext, gen_helper_ ## name ## _mmx)
 #define BINARY_OP(name, ext, flags) \
@@ -3185,11 +3192,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 int b1, op1_offset, op2_offset, is_xmm, 

Re: [PATCH v3 23/23] i386: AVX+AES helpers prep

2022-09-01 Thread Richard Henderson

On 9/1/22 08:48, Paolo Bonzini wrote:

From: Paul Brook

Make the AES vector helpers AVX ready

No functional changes to existing helpers

Signed-off-by: Paul Brook
Message-Id:<20220424220204.2493824-22-p...@nowt.org>
Signed-off-by: Paolo Bonzini
---
  target/i386/ops_sse.h | 41 ++---
  1 file changed, 22 insertions(+), 19 deletions(-)


Reviewed-by: Richard Henderson 


r~



[PATCH v2 01/10] qapi: fix example of query-ballon command

2022-09-01 Thread Victor Toso
Example output has an extra ',' delimiter. Fix it.

Problem was noticed when trying to load the example into python's json
library.

Signed-off-by: Victor Toso 
---
 qapi/machine.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qapi/machine.json b/qapi/machine.json
index 6afd1936b0..5f1f50d3ed 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1074,7 +1074,7 @@
 #
 # -> { "execute": "query-balloon" }
 # <- { "return": {
-#  "actual": 1073741824,
+#  "actual": 1073741824
 #   }
 #}
 #
-- 
2.37.2




[PATCH] qio: fix command spawn RDONLY/WRONLY

2022-09-01 Thread marcandre . lureau
From: Marc-André Lureau 

The in/out handling is inverted, although nothing seemed to notice that yet.

Signed-off-by: Marc-André Lureau 
---
 io/channel-command.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/io/channel-command.c b/io/channel-command.c
index 9f2f4a1793..ed17b44f74 100644
--- a/io/channel-command.c
+++ b/io/channel-command.c
@@ -79,10 +79,10 @@ qio_channel_command_new_spawn(const char *const argv[],
 flags = flags & O_ACCMODE;
 
 if (flags == O_RDONLY) {
-stdinnull = true;
+stdoutnull = true;
 }
 if (flags == O_WRONLY) {
-stdoutnull = true;
+stdinnull = true;
 }
 
 if (stdinnull || stdoutnull) {
-- 
2.37.2




[PATCH] tests: mark io-command test as skipped if socat is missing

2022-09-01 Thread marcandre . lureau
From: Marc-André Lureau 

Signed-off-by: Marc-André Lureau 
---
 tests/unit/test-io-channel-command.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/unit/test-io-channel-command.c 
b/tests/unit/test-io-channel-command.c
index 99056e07c0..aa09c559cd 100644
--- a/tests/unit/test-io-channel-command.c
+++ b/tests/unit/test-io-channel-command.c
@@ -41,7 +41,8 @@ static void test_io_channel_command_fifo(bool async)
 
 unlink(TEST_FIFO);
 if (access("/bin/socat", X_OK) < 0) {
-return; /* Pretend success if socat is not present */
+g_test_skip("socat is missing");
+return;
 }
 if (mkfifo(TEST_FIFO, 0600) < 0) {
 abort();
-- 
2.37.2




[PATCH] io/command: implement portable spawn

2022-09-01 Thread marcandre . lureau
From: Marc-André Lureau 

Using GLib spawn API is both simpler and portable.

Signed-off-by: Marc-André Lureau 
---
 io/channel-command.c | 115 ---
 1 file changed, 21 insertions(+), 94 deletions(-)

diff --git a/io/channel-command.c b/io/channel-command.c
index 9f2f4a1793..c069732105 100644
--- a/io/channel-command.c
+++ b/io/channel-command.c
@@ -26,7 +26,6 @@
 #include "qemu/sockets.h"
 #include "trace.h"
 
-#ifndef WIN32
 /**
  * qio_channel_command_new_pid:
  * @writefd: the FD connected to the command's stdin
@@ -69,107 +68,35 @@ qio_channel_command_new_spawn(const char *const argv[],
   int flags,
   Error **errp)
 {
-pid_t pid = -1;
-int stdinfd[2] = { -1, -1 };
-int stdoutfd[2] = { -1, -1 };
-int devnull = -1;
-bool stdinnull = false, stdoutnull = false;
+g_autoptr(GError) err = NULL;
+GPid pid = 0;
 QIOChannelCommand *ioc;
+GSpawnFlags gflags = G_SPAWN_CLOEXEC_PIPES;
+int stdinfd = -1, stdoutfd = -1;
 
 flags = flags & O_ACCMODE;
-
-if (flags == O_RDONLY) {
-stdinnull = true;
-}
-if (flags == O_WRONLY) {
-stdoutnull = true;
-}
-
-if (stdinnull || stdoutnull) {
-devnull = open("/dev/null", O_RDWR);
-if (devnull < 0) {
-error_setg_errno(errp, errno,
- "Unable to open /dev/null");
-goto error;
-}
-}
-
-if ((!stdinnull && !g_unix_open_pipe(stdinfd, FD_CLOEXEC, NULL)) ||
-(!stdoutnull && !g_unix_open_pipe(stdoutfd, FD_CLOEXEC, NULL))) {
-error_setg_errno(errp, errno,
- "Unable to open pipe");
-goto error;
-}
-
-pid = qemu_fork(errp);
-if (pid < 0) {
-goto error;
-}
-
-if (pid == 0) { /* child */
-dup2(stdinnull ? devnull : stdinfd[0], STDIN_FILENO);
-dup2(stdoutnull ? devnull : stdoutfd[1], STDOUT_FILENO);
-/* Leave stderr connected to qemu's stderr */
-
-if (!stdinnull) {
-close(stdinfd[0]);
-close(stdinfd[1]);
-}
-if (!stdoutnull) {
-close(stdoutfd[0]);
-close(stdoutfd[1]);
-}
-if (devnull != -1) {
-close(devnull);
-}
-
-execv(argv[0], (char * const *)argv);
-_exit(1);
+gflags |= flags == O_WRONLY ? G_SPAWN_STDOUT_TO_DEV_NULL : 0;
+
+if (!g_spawn_async_with_pipes(NULL, (char **)argv, NULL, gflags, NULL, 
NULL,
+  ,
+  flags == O_RDONLY ? NULL : ,
+  flags == O_WRONLY ? NULL : ,
+  NULL, )) {
+error_setg(errp, "%s", err->message);
+return NULL;
 }
 
-if (!stdinnull) {
-close(stdinfd[0]);
-}
-if (!stdoutnull) {
-close(stdoutfd[1]);
-}
 
-ioc = qio_channel_command_new_pid(stdinnull ? devnull : stdinfd[1],
-  stdoutnull ? devnull : stdoutfd[0],
-  pid);
-trace_qio_channel_command_new_spawn(ioc, argv[0], flags);
+ioc = qio_channel_command_new_pid(stdinfd,
+  stdoutfd,
+#ifdef _WIN32
+  GetProcessId(pid)
+#else
+  pid
+#endif
+);
 return ioc;
-
- error:
-if (devnull != -1) {
-close(devnull);
-}
-if (stdinfd[0] != -1) {
-close(stdinfd[0]);
-}
-if (stdinfd[1] != -1) {
-close(stdinfd[1]);
-}
-if (stdoutfd[0] != -1) {
-close(stdoutfd[0]);
-}
-if (stdoutfd[1] != -1) {
-close(stdoutfd[1]);
-}
-return NULL;
-}
-
-#else /* WIN32 */
-QIOChannelCommand *
-qio_channel_command_new_spawn(const char *const argv[],
-  int flags,
-  Error **errp)
-{
-error_setg_errno(errp, ENOSYS,
- "Command spawn not supported on this platform");
-return NULL;
 }
-#endif /* WIN32 */
 
 #ifndef WIN32
 static int qio_channel_command_abort(QIOChannelCommand *ioc,
-- 
2.37.2




Re: [PATCH 41/51] tests/qtest: migration-test: Kill "to" after migration is canceled

2022-09-01 Thread Marc-André Lureau
Hi

On Wed, Aug 24, 2022 at 10:56 PM Dr. David Alan Gilbert 
wrote:

> * Bin Meng (bmeng...@gmail.com) wrote:
> > From: Xuzhou Cheng 
> >
> > Make sure QEMU process "to" is killed before launching another target
> > for migration in the test_multifd_tcp_cancel case.
> >
> > Signed-off-by: Xuzhou Cheng 
> > Signed-off-by: Bin Meng 
> > ---
> >
> >  tests/qtest/migration-test.c | 4 
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
> > index 125d48d855..18ec079abf 100644
> > --- a/tests/qtest/migration-test.c
> > +++ b/tests/qtest/migration-test.c
> > @@ -2132,6 +2132,10 @@ static void test_multifd_tcp_cancel(void)
> >  wait_for_migration_pass(from);
> >
> >  migrate_cancel(from);
> > +/* Make sure QEMU process "to" is killed */
> > +if (qtest_probe_child(to)) {
> > +qtest_kill_qemu(to);
> > +}
>
> I'm not sure that's safe - what happens if the qemu exits between the
> probe and kill?
>

It looks safe to me, qtest_probe_child() resets the qemu_pid if it already
exited. Otherwise, there is a process/handle waiting for
waitpid/CloseHandle done in qtest_kill_qemu().

We are missing a CloseHandle() in qtest_probe_child() though, I'll send a
patch.

so lgtm,
Reviewed-by: Marc-André Lureau 

-- 
Marc-André Lureau


[PULL 17/20] target/s390x: Make translator stop before the end of a page

2022-09-01 Thread Richard Henderson
From: Ilya Leoshkevich 

Right now translator stops right *after* the end of a page, which
breaks reporting of fault locations when the last instruction of a
multi-insn translation block crosses a page boundary.

Signed-off-by: Ilya Leoshkevich 
Reviewed-by: Richard Henderson 
Message-Id: <20220817150506.592862-3-...@linux.ibm.com>
Signed-off-by: Richard Henderson 
---
 target/s390x/tcg/translate.c |  15 +++-
 tests/tcg/s390x/noexec.c | 106 +++
 tests/tcg/multiarch/noexec.c.inc | 139 +++
 tests/tcg/s390x/Makefile.target  |   1 +
 4 files changed, 257 insertions(+), 4 deletions(-)
 create mode 100644 tests/tcg/s390x/noexec.c
 create mode 100644 tests/tcg/multiarch/noexec.c.inc

diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index d4c0b9b3a2..1d2dddab1c 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -6609,6 +6609,14 @@ static void s390x_tr_insn_start(DisasContextBase 
*dcbase, CPUState *cs)
 dc->insn_start = tcg_last_op();
 }
 
+static target_ulong get_next_pc(CPUS390XState *env, DisasContext *s,
+uint64_t pc)
+{
+uint64_t insn = ld_code2(env, s, pc);
+
+return pc + get_ilen((insn >> 8) & 0xff);
+}
+
 static void s390x_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
 {
 CPUS390XState *env = cs->env_ptr;
@@ -6616,10 +6624,9 @@ static void s390x_tr_translate_insn(DisasContextBase 
*dcbase, CPUState *cs)
 
 dc->base.is_jmp = translate_one(env, dc);
 if (dc->base.is_jmp == DISAS_NEXT) {
-uint64_t page_start;
-
-page_start = dc->base.pc_first & TARGET_PAGE_MASK;
-if (dc->base.pc_next - page_start >= TARGET_PAGE_SIZE || dc->ex_value) 
{
+if (!is_same_page(dcbase, dc->base.pc_next) ||
+!is_same_page(dcbase, get_next_pc(env, dc, dc->base.pc_next)) ||
+dc->ex_value) {
 dc->base.is_jmp = DISAS_TOO_MANY;
 }
 }
diff --git a/tests/tcg/s390x/noexec.c b/tests/tcg/s390x/noexec.c
new file mode 100644
index 00..15d007d07f
--- /dev/null
+++ b/tests/tcg/s390x/noexec.c
@@ -0,0 +1,106 @@
+#include "../multiarch/noexec.c.inc"
+
+static void *arch_mcontext_pc(const mcontext_t *ctx)
+{
+return (void *)ctx->psw.addr;
+}
+
+static int arch_mcontext_arg(const mcontext_t *ctx)
+{
+return ctx->gregs[2];
+}
+
+static void arch_flush(void *p, int len)
+{
+}
+
+extern char noexec_1[];
+extern char noexec_2[];
+extern char noexec_end[];
+
+asm("noexec_1:\n"
+"   lgfi %r2,1\n"   /* %r2 is 0 on entry, set 1. */
+"noexec_2:\n"
+"   lgfi %r2,2\n"   /* %r2 is 0/1; set 2. */
+"   br %r14\n"  /* return */
+"noexec_end:");
+
+extern char exrl_1[];
+extern char exrl_2[];
+extern char exrl_end[];
+
+asm("exrl_1:\n"
+"   exrl %r0, exrl_2\n"
+"   br %r14\n"
+"exrl_2:\n"
+"   lgfi %r2,2\n"
+"exrl_end:");
+
+int main(void)
+{
+struct noexec_test noexec_tests[] = {
+{
+.name = "fallthrough",
+.test_code = noexec_1,
+.test_len = noexec_end - noexec_1,
+.page_ofs = noexec_1 - noexec_2,
+.entry_ofs = noexec_1 - noexec_2,
+.expected_si_ofs = 0,
+.expected_pc_ofs = 0,
+.expected_arg = 1,
+},
+{
+.name = "jump",
+.test_code = noexec_1,
+.test_len = noexec_end - noexec_1,
+.page_ofs = noexec_1 - noexec_2,
+.entry_ofs = 0,
+.expected_si_ofs = 0,
+.expected_pc_ofs = 0,
+.expected_arg = 0,
+},
+{
+.name = "exrl",
+.test_code = exrl_1,
+.test_len = exrl_end - exrl_1,
+.page_ofs = exrl_1 - exrl_2,
+.entry_ofs = exrl_1 - exrl_2,
+.expected_si_ofs = 0,
+.expected_pc_ofs = exrl_1 - exrl_2,
+.expected_arg = 0,
+},
+{
+.name = "fallthrough [cross]",
+.test_code = noexec_1,
+.test_len = noexec_end - noexec_1,
+.page_ofs = noexec_1 - noexec_2 - 2,
+.entry_ofs = noexec_1 - noexec_2 - 2,
+.expected_si_ofs = 0,
+.expected_pc_ofs = -2,
+.expected_arg = 1,
+},
+{
+.name = "jump [cross]",
+.test_code = noexec_1,
+.test_len = noexec_end - noexec_1,
+.page_ofs = noexec_1 - noexec_2 - 2,
+.entry_ofs = -2,
+.expected_si_ofs = 0,
+.expected_pc_ofs = -2,
+.expected_arg = 0,
+},
+{
+.name = "exrl [cross]",
+.test_code = exrl_1,
+.test_len = exrl_end - exrl_1,
+.page_ofs = exrl_1 - exrl_2 - 2,
+.entry_ofs = exrl_1 - exrl_2 - 2,
+.expected_si_ofs = 0,
+.expected_pc_ofs = exrl_1 - exrl_2 - 2,
+

[PULL 12/20] accel/tcg: Use probe_access_internal for softmmu get_page_addr_code_hostp

2022-09-01 Thread Richard Henderson
Simplify the implementation of get_page_addr_code_hostp
by reusing the existing probe_access infrastructure.

Acked-by: Ilya Leoshkevich 
Tested-by: Ilya Leoshkevich 
Signed-off-by: Richard Henderson 
---
 accel/tcg/cputlb.c | 76 --
 1 file changed, 26 insertions(+), 50 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 80a3eb4f1c..8fad2d9b83 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1482,56 +1482,6 @@ static bool victim_tlb_hit(CPUArchState *env, size_t 
mmu_idx, size_t index,
   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
  (ADDR) & TARGET_PAGE_MASK)
 
-/*
- * Return a ram_addr_t for the virtual address for execution.
- *
- * Return -1 if we can't translate and execute from an entire page
- * of RAM.  This will force us to execute by loading and translating
- * one insn at a time, without caching.
- *
- * NOTE: This function will trigger an exception if the page is
- * not executable.
- */
-tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
-void **hostp)
-{
-uintptr_t mmu_idx = cpu_mmu_index(env, true);
-uintptr_t index = tlb_index(env, mmu_idx, addr);
-CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
-void *p;
-
-if (unlikely(!tlb_hit(entry->addr_code, addr))) {
-if (!VICTIM_TLB_HIT(addr_code, addr)) {
-tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
-index = tlb_index(env, mmu_idx, addr);
-entry = tlb_entry(env, mmu_idx, addr);
-
-if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
-/*
- * The MMU protection covers a smaller range than a target
- * page, so we must redo the MMU check for every insn.
- */
-return -1;
-}
-}
-assert(tlb_hit(entry->addr_code, addr));
-}
-
-if (unlikely(entry->addr_code & TLB_MMIO)) {
-/* The region is not backed by RAM.  */
-if (hostp) {
-*hostp = NULL;
-}
-return -1;
-}
-
-p = (void *)((uintptr_t)addr + entry->addend);
-if (hostp) {
-*hostp = p;
-}
-return qemu_ram_addr_from_host_nofail(p);
-}
-
 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
 {
@@ -1687,6 +1637,32 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
 return flags ? NULL : host;
 }
 
+/*
+ * Return a ram_addr_t for the virtual address for execution.
+ *
+ * Return -1 if we can't translate and execute from an entire page
+ * of RAM.  This will force us to execute by loading and translating
+ * one insn at a time, without caching.
+ *
+ * NOTE: This function will trigger an exception if the page is
+ * not executable.
+ */
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
+void **hostp)
+{
+void *p;
+
+(void)probe_access_internal(env, addr, 1, MMU_INST_FETCH,
+cpu_mmu_index(env, true), false, , 0);
+if (p == NULL) {
+return -1;
+}
+if (hostp) {
+*hostp = p;
+}
+return qemu_ram_addr_from_host_nofail(p);
+}
+
 #ifdef CONFIG_PLUGIN
 /*
  * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
-- 
2.34.1




[PATCH v3 08/23] i386: isolate MMX code more

2022-09-01 Thread Paolo Bonzini
Extracted from a patch by Paul Brook .

Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/tcg/translate.c | 52 +++--
 1 file changed, 33 insertions(+), 19 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 849c40b685..097c895ef1 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3888,6 +3888,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 gen_ldo_env_A0(s, op2_offset);
 }
 }
+if (!op6->op[b1]) {
+goto illegal_op;
+}
+tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
+tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+op6->op[b1](cpu_env, s->ptr0, s->ptr1);
 } else {
 if ((op6->flags & SSE_OPF_MMX) == 0) {
 goto unknown_op;
@@ -3900,14 +3906,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 gen_lea_modrm(env, s, modrm);
 gen_ldq_env_A0(s, op2_offset);
 }
+tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
+tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+op6->op[0](cpu_env, s->ptr0, s->ptr1);
 }
-if (!op6->op[b1]) {
-goto illegal_op;
-}
-
-tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
-op6->op[b1](cpu_env, s->ptr0, s->ptr1);
 
 if (op6->flags & SSE_OPF_CMP) {
 set_cc_op(s, CC_OP_EFLAGS);
@@ -4427,16 +4429,8 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 return;
 }
 
-if (b1) {
-op1_offset = ZMM_OFFSET(reg);
-if (mod == 3) {
-op2_offset = ZMM_OFFSET(rm | REX_B(s));
-} else {
-op2_offset = offsetof(CPUX86State,xmm_t0);
-gen_lea_modrm(env, s, modrm);
-gen_ldo_env_A0(s, op2_offset);
-}
-} else {
+if (b1 == 0) {
+/* MMX */
 if ((op7->flags & SSE_OPF_MMX) == 0) {
 goto illegal_op;
 }
@@ -4448,9 +4442,29 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 gen_lea_modrm(env, s, modrm);
 gen_ldq_env_A0(s, op2_offset);
 }
-}
-val = x86_ldub_code(env, s);
+val = x86_ldub_code(env, s);
+tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
+tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
 
+/* We only actually have one MMX instuction (palignr) */
+assert(b == 0x0f);
+
+op7->op[0](cpu_env, s->ptr0, s->ptr1,
+   tcg_const_i32(val));
+break;
+}
+
+/* SSE */
+op1_offset = ZMM_OFFSET(reg);
+if (mod == 3) {
+op2_offset = ZMM_OFFSET(rm | REX_B(s));
+} else {
+op2_offset = offsetof(CPUX86State, xmm_t0);
+gen_lea_modrm(env, s, modrm);
+gen_ldo_env_A0(s, op2_offset);
+}
+
+val = x86_ldub_code(env, s);
 if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
 set_cc_op(s, CC_OP_EFLAGS);
 
-- 
2.37.1





[PATCH v3 15/23] i386: Destructive vector helpers for AVX

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

These helpers need to take special care to avoid overwriting source values
before the wole result has been calculated.  Currently they use a dummy
Reg typed variable to store the result then assign the whole register.
This will cause 128 bit operations to corrupt the upper half of the register,
so replace it with explicit temporaries and element assignments.

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-14-p...@nowt.org>
Signed-off-by: Paolo Bonzini 
---
 target/i386/ops_sse.h | 556 --
 1 file changed, 262 insertions(+), 294 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 557cc7ce7d..7d48c05693 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -41,6 +41,7 @@
 #endif
 
 #define LANE_WIDTH (SHIFT ? 16 : 8)
+#define PACK_WIDTH (LANE_WIDTH / 2)
 
 /*
  * Copy the relevant parts of a Reg value around. In the case where
@@ -474,71 +475,81 @@ void glue(helper_movq_mm_T0, SUFFIX)(Reg *d, uint64_t val)
 }
 #endif
 
+#define SHUFFLE4(F, a, b, offset) do {  \
+r0 = a->F((order & 3) + offset);\
+r1 = a->F(((order >> 2) & 3) + offset); \
+r2 = b->F(((order >> 4) & 3) + offset); \
+r3 = b->F(((order >> 6) & 3) + offset); \
+d->F(offset) = r0;  \
+d->F(offset + 1) = r1;  \
+d->F(offset + 2) = r2;  \
+d->F(offset + 3) = r3;  \
+} while (0)
+
 #if SHIFT == 0
 void glue(helper_pshufw, SUFFIX)(Reg *d, Reg *s, int order)
 {
-Reg r;
+uint16_t r0, r1, r2, r3;
 
-r.W(0) = s->W(order & 3);
-r.W(1) = s->W((order >> 2) & 3);
-r.W(2) = s->W((order >> 4) & 3);
-r.W(3) = s->W((order >> 6) & 3);
-MOVE(*d, r);
+SHUFFLE4(W, s, s, 0);
 }
 #else
 void glue(helper_shufps, SUFFIX)(Reg *d, Reg *s, int order)
 {
-Reg r;
+Reg *v = d;
+uint32_t r0, r1, r2, r3;
+int i;
 
-r.L(0) = d->L(order & 3);
-r.L(1) = d->L((order >> 2) & 3);
-r.L(2) = s->L((order >> 4) & 3);
-r.L(3) = s->L((order >> 6) & 3);
-MOVE(*d, r);
+for (i = 0; i < 2 << SHIFT; i += 4) {
+SHUFFLE4(L, v, s, i);
+}
 }
 
 void glue(helper_shufpd, SUFFIX)(Reg *d, Reg *s, int order)
 {
-Reg r;
+Reg *v = d;
+uint64_t r0, r1;
+int i;
 
-r.Q(0) = d->Q(order & 1);
-r.Q(1) = s->Q((order >> 1) & 1);
-MOVE(*d, r);
+for (i = 0; i < 1 << SHIFT; i += 2) {
+r0 = v->Q(((order & 1) & 1) + i);
+r1 = s->Q(((order >> 1) & 1) + i);
+d->Q(i) = r0;
+d->Q(i + 1) = r1;
+order >>= 2;
+}
 }
 
 void glue(helper_pshufd, SUFFIX)(Reg *d, Reg *s, int order)
 {
-Reg r;
+uint32_t r0, r1, r2, r3;
+int i;
 
-r.L(0) = s->L(order & 3);
-r.L(1) = s->L((order >> 2) & 3);
-r.L(2) = s->L((order >> 4) & 3);
-r.L(3) = s->L((order >> 6) & 3);
-MOVE(*d, r);
+for (i = 0; i < 2 << SHIFT; i += 4) {
+SHUFFLE4(L, s, s, i);
+}
 }
 
 void glue(helper_pshuflw, SUFFIX)(Reg *d, Reg *s, int order)
 {
-Reg r;
+uint16_t r0, r1, r2, r3;
+int i, j;
 
-r.W(0) = s->W(order & 3);
-r.W(1) = s->W((order >> 2) & 3);
-r.W(2) = s->W((order >> 4) & 3);
-r.W(3) = s->W((order >> 6) & 3);
-r.Q(1) = s->Q(1);
-MOVE(*d, r);
+for (i = 0, j = 1; j < 1 << SHIFT; i += 8, j += 2) {
+SHUFFLE4(W, s, s, i);
+d->Q(j) = s->Q(j);
+}
 }
 
 void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int order)
 {
-Reg r;
+uint16_t r0, r1, r2, r3;
+int i, j;
 
-r.Q(0) = s->Q(0);
-r.W(4) = s->W(4 + (order & 3));
-r.W(5) = s->W(4 + ((order >> 2) & 3));
-r.W(6) = s->W(4 + ((order >> 4) & 3));
-r.W(7) = s->W(4 + ((order >> 6) & 3));
-MOVE(*d, r);
+for (i = 4, j = 0; j < 1 << SHIFT; i += 8, j += 2) {
+d->Q(j) = s->Q(j);
+SHUFFLE4(W, s, s, i);
+}
 }
 #endif
 
@@ -1091,156 +1102,132 @@ uint32_t glue(helper_pmovmskb, SUFFIX)(CPUX86State 
*env, Reg *s)
 return val;
 }
 
-void glue(helper_packsswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
-{
-Reg r;
-
-r.B(0) = satsb((int16_t)d->W(0));
-r.B(1) = satsb((int16_t)d->W(1));
-r.B(2) = satsb((int16_t)d->W(2));
-r.B(3) = satsb((int16_t)d->W(3));
-#if SHIFT == 1
-r.B(4) = satsb((int16_t)d->W(4));
-r.B(5) = satsb((int16_t)d->W(5));
-r.B(6) = satsb((int16_t)d->W(6));
-r.B(7) = satsb((int16_t)d->W(7));
-#endif
-r.B((4 << SHIFT) + 0) = satsb((int16_t)s->W(0));
-r.B((4 << SHIFT) + 1) = satsb((int16_t)s->W(1));
-r.B((4 << SHIFT) + 2) = satsb((int16_t)s->W(2));
-r.B((4 << SHIFT) + 3) = satsb((int16_t)s->W(3));
-#if SHIFT == 1
-r.B(12) = satsb((int16_t)s->W(4));
-r.B(13) = satsb((int16_t)s->W(5));
-r.B(14) = satsb((int16_t)s->W(6));
-r.B(15) = satsb((int16_t)s->W(7));
-#endif
-MOVE(*d, r);
+#define PACK_HELPER_B(name, F) \
+void glue(helper_pack ## name, SUFFIX)(CPUX86State *env,  \
+Reg *d, Reg *s)

[PATCH v3 22/23] i386: AVX pclmulqdq prep

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

Make the pclmulqdq helper AVX ready

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-21-p...@nowt.org>
Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/ops_sse.h | 29 ++---
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index a11a0143bf..4135623ad8 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -2211,14 +2211,14 @@ target_ulong helper_crc32(uint32_t crc1, target_ulong 
msg, uint32_t len)
 
 #endif
 
-void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
-uint32_t ctrl)
+#if SHIFT == 1
+static void clmulq(uint64_t *dest_l, uint64_t *dest_h,
+  uint64_t a, uint64_t b)
 {
-uint64_t ah, al, b, resh, resl;
+uint64_t al, ah, resh, resl;
 
 ah = 0;
-al = d->Q((ctrl & 1) != 0);
-b = s->Q((ctrl & 16) != 0);
+al = a;
 resh = resl = 0;
 
 while (b) {
@@ -2231,8 +2231,23 @@ void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, 
Reg *d, Reg *s,
 b >>= 1;
 }
 
-d->Q(0) = resl;
-d->Q(1) = resh;
+*dest_l = resl;
+*dest_h = resh;
+}
+#endif
+
+void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+uint32_t ctrl)
+{
+Reg *v = d;
+uint64_t a, b;
+int i;
+
+for (i = 0; i < 1 << SHIFT; i += 2) {
+a = v->Q(((ctrl & 1) != 0) + i);
+b = s->Q(((ctrl & 16) != 0) + i);
+clmulq(>Q(i), >Q(i + 1), a, b);
+}
 }
 
 void glue(helper_aesdec, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
-- 
2.37.1





Re: [PATCH] target/i386: rewrite destructive 3DNow operations

2022-09-01 Thread Richard Henderson

On 9/1/22 08:11, Paolo Bonzini wrote:

Remove use of the MOVE macro, since it will be purged from
MMX/SSE as well.

Signed-off-by: Paolo Bonzini
---
  target/i386/ops_sse.h | 32 
  1 file changed, 16 insertions(+), 16 deletions(-)


Reviewed-by: Richard Henderson 


r~



Re: [PATCH 32/51] tests/qtest: Fix ERROR_SHARING_VIOLATION for win32

2022-09-01 Thread Marc-André Lureau
Hi

On Wed, Aug 24, 2022 at 2:03 PM Bin Meng  wrote:

> From: Bin Meng 
>
> On Windows, the MinGW provided mkstemp() API opens the file with
> exclusive access, denying other processes to read/write the file.
> Such behavior prevents the QEMU executable from opening the file,
> (e.g.: CreateFile returns ERROR_SHARING_VIOLATION).
>

g_mkstemp() doesn't have this behaviour (after running a quick test). Use
it?


>
> This can be fixed by closing the file and reopening it.
>
> Signed-off-by: Bin Meng 
> ---
>
>  tests/qtest/ahci-test.c| 14 ++
>  tests/qtest/boot-serial-test.c | 13 +
>  2 files changed, 27 insertions(+)
>
> diff --git a/tests/qtest/ahci-test.c b/tests/qtest/ahci-test.c
> index f26cd6f86f..0e88cd0eef 100644
> --- a/tests/qtest/ahci-test.c
> +++ b/tests/qtest/ahci-test.c
> @@ -1443,6 +1443,20 @@ static int prepare_iso(size_t size, unsigned char
> **buf, char **name)
>  int fd = mkstemp(cdrom_path);
>
>  g_assert(fd != -1);
> +#ifdef _WIN32
> +/*
> + * On Windows, the MinGW provided mkstemp() API opens the file with
> + * exclusive access, denying other processes to read/write the file.
> + * Such behavior prevents the QEMU executable from opening the file,
> + * (e.g.: CreateFile returns ERROR_SHARING_VIOLATION).
> + *
> + * Close the file and reopen it.
> + */
> +close(fd);
> +fd = open(cdrom_path, O_WRONLY);
> +g_assert(fd != -1);
> +#endif
> +
>  g_assert(buf);
>  g_assert(name);
>  patt = g_malloc(size);
> diff --git a/tests/qtest/boot-serial-test.c
> b/tests/qtest/boot-serial-test.c
> index 404adcfa20..fb6c81bf35 100644
> --- a/tests/qtest/boot-serial-test.c
> +++ b/tests/qtest/boot-serial-test.c
> @@ -235,6 +235,19 @@ static void test_machine(const void *data)
>
>  ser_fd = mkstemp(serialtmp);
>  g_assert(ser_fd != -1);
> +#ifdef _WIN32
> +/*
> + * On Windows, the MinGW provided mkstemp() API opens the file with
> + * exclusive access, denying other processes to read/write the file.
> + * Such behavior prevents the QEMU executable from opening the file,
> + * (e.g.: CreateFile returns ERROR_SHARING_VIOLATION).
> + *
> + * Close the file and reopen it.
> + */
> +close(ser_fd);
> +ser_fd = open(serialtmp, O_RDONLY);
> +g_assert(ser_fd != -1);
> +#endif
>
>  if (test->kernel) {
>  code = test->kernel;
> --
> 2.34.1
>
>
>

-- 
Marc-André Lureau


[PING PATCH v2] linux-user: Passthrough MADV_DONTNEED for certain file mappings

2022-09-01 Thread Ilya Leoshkevich
On Mon, 2022-07-25 at 14:50 +0200, Ilya Leoshkevich wrote:
> This is a follow-up for commit 892a4f6a750a ("linux-user: Add partial
> support for MADV_DONTNEED"), which added passthrough for anonymous
> mappings. File mappings can be handled in a similar manner.
> 
> In order to do that, mark pages, for which mmap() was passed through,
> with PAGE_PASSTHROUGH, and then allow madvise() passthrough for these
> pages as well.
> 
> Signed-off-by: Ilya Leoshkevich 
> ---
> 
> v1:
> https://lists.gnu.org/archive/html/qemu-devel/2022-07/msg00112.html
> v1 -> v2: Fix PAGE_PASSTHROUGH value; make checks consistent with
>   page_set_flags() (Laurent).
> 
>  include/exec/cpu-all.h |  6 ++
>  linux-user/mmap.c  | 25 +
>  2 files changed, 27 insertions(+), 4 deletions(-)
> 
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index f5bda2c3ca..2d29ba13c0 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -262,6 +262,12 @@ extern const TargetPageBits target_page;
>  #define PAGE_TARGET_1  0x0200
>  #define PAGE_TARGET_2  0x0400
>  
> +/*
> + * For linux-user, indicates that the page is mapped with the same
> semantics
> + * in both guest and host.
> + */
> +#define PAGE_PASSTHROUGH 0x0800
> +
>  #if defined(CONFIG_USER_ONLY)
>  void page_dump(FILE *f);
>  
> diff --git a/linux-user/mmap.c b/linux-user/mmap.c
> index 4e7a6be6ee..79928e3ae5 100644
> --- a/linux-user/mmap.c
> +++ b/linux-user/mmap.c
> @@ -424,7 +424,8 @@ abi_ulong mmap_find_vma(abi_ulong start,
> abi_ulong size, abi_ulong align)
>  abi_long target_mmap(abi_ulong start, abi_ulong len, int
> target_prot,
>   int flags, int fd, abi_ulong offset)
>  {
> -    abi_ulong ret, end, real_start, real_end, retaddr, host_offset,
> host_len;
> +    abi_ulong ret, end, real_start, real_end, retaddr, host_offset,
> host_len,
> +  passthrough_start = -1, passthrough_end = -1;
>  int page_flags, host_prot;
>  
>  mmap_lock();
> @@ -537,6 +538,8 @@ abi_long target_mmap(abi_ulong start, abi_ulong
> len, int target_prot,
>  host_start += offset - host_offset;
>  }
>  start = h2g(host_start);
> +    passthrough_start = start;
> +    passthrough_end = start + len;
>  } else {
>  if (start & ~TARGET_PAGE_MASK) {
>  errno = EINVAL;
> @@ -619,6 +622,8 @@ abi_long target_mmap(abi_ulong start, abi_ulong
> len, int target_prot,
>   host_prot, flags, fd, offset1);
>  if (p == MAP_FAILED)
>  goto fail;
> +    passthrough_start = real_start;
> +    passthrough_end = real_end;
>  }
>  }
>   the_end1:
> @@ -626,7 +631,18 @@ abi_long target_mmap(abi_ulong start, abi_ulong
> len, int target_prot,
>  page_flags |= PAGE_ANON;
>  }
>  page_flags |= PAGE_RESET;
> -    page_set_flags(start, start + len, page_flags);
> +    if (passthrough_start == passthrough_end) {
> +    page_set_flags(start, start + len, page_flags);
> +    } else {
> +    if (start < passthrough_start) {
> +    page_set_flags(start, passthrough_start, page_flags);
> +    }
> +    page_set_flags(passthrough_start, passthrough_end,
> +   page_flags | PAGE_PASSTHROUGH);
> +    if (passthrough_end < start + len) {
> +    page_set_flags(passthrough_end, start + len,
> page_flags);
> +    }
> +    }
>   the_end:
>  trace_target_mmap_complete(start);
>  if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
> @@ -845,7 +861,7 @@ static bool
> can_passthrough_madv_dontneed(abi_ulong start, abi_ulong end)
>  }
>  
>  for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
> -    if (!(page_get_flags(addr) & PAGE_ANON)) {
> +    if (!(page_get_flags(addr) & (PAGE_ANON |
> PAGE_PASSTHROUGH))) {
>  return false;
>  }
>  }
> @@ -888,7 +904,8 @@ abi_long target_madvise(abi_ulong start,
> abi_ulong len_in, int advice)
>   *
>   * This breaks MADV_DONTNEED, completely implementing which is
> quite
>   * complicated. However, there is one low-hanging fruit: host-
> page-aligned
> - * anonymous mappings. In this case passthrough is safe, so do
> it.
> + * anonymous mappings or mappings that are known to have the
> same semantics
> + * in the host and the guest. In this case passthrough is safe,
> so do it.
>   */
>  mmap_lock();
>  if ((advice & MADV_DONTNEED) &&

I would like to ping this patch and two others that I used for
debugging it:

[PATCH] linux-user: Fix stracing in-memory mmap arguments
https://patchew.org/QEMU/20220630165901.2459135-1-...@linux.ibm.com/

[PATCH] linux-user: Implement stracing madvise()
https://patchew.org/QEMU/20220725134440.172892-1-...@linux.ibm.com/

[PATCH v2] linux-user: Passthrough MADV_DONTNEED for certain file
mappings
https://patchew.org/QEMU/20220725125043.43048-1-...@linux.ibm.com/



Re: [PATCH v5 13/18] dump/dump: Add arch section support

2022-09-01 Thread Janis Schoetterl-Glausch
On Thu, 2022-08-11 at 12:11 +, Janosch Frank wrote:
> Add hooks which architectures can use to add arbitrary data to custom
> sections.
> 
> Signed-off-by: Janosch Frank 
> ---
>  dump/dump.c| 120 ++---
>  include/sysemu/dump-arch.h |   3 +
>  2 files changed, 100 insertions(+), 23 deletions(-)
> 
> diff --git a/dump/dump.c b/dump/dump.c
> index 0d6dbf453a..65b18fc602 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -430,7 +430,7 @@ static void prepare_elf_section_hdr_string(DumpState *s, 
> void *buff)
>  memcpy(buff, shdr, shdr_size);
>  }
>  
> -static void prepare_elf_section_hdrs(DumpState *s)
> +static void prepare_elf_section_hdrs(DumpState *s, Error **errp)
>  {
>  size_t len, sizeof_shdr;
>  void *buff_hdr;
> @@ -438,6 +438,7 @@ static void prepare_elf_section_hdrs(DumpState *s)
>  /*
>   * Section ordering:
>   * - HDR zero
> + * - Arch section hdrs
>   * - String table hdr
>   */
>  sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr);
> @@ -465,6 +466,16 @@ static void prepare_elf_section_hdrs(DumpState *s)
>  return;
>  }
>  
> +/* Add architecture defined section headers */
> +if (s->dump_info.arch_sections_write_hdr_fn) {
> +buff_hdr += s->dump_info.arch_sections_write_hdr_fn(s, buff_hdr);
> +
> +if (s->shdr_num >= SHN_LORESERVE) {

   /* TODO: raise limit by encoding via sh_link */

> +error_setg_errno(errp, EINVAL, "dump: too many architecture 
> defined sections");
> +return;
> +}
> +}
> +
>  /*
>   * String table is the last section since strings are added via
>   * arch_sections_write_hdr().
> @@ -477,7 +488,10 @@ static void write_elf_section_headers(DumpState *s, 
> Error **errp)
>  size_t sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : 
> sizeof(Elf32_Shdr);
>  int ret;
>  
> -prepare_elf_section_hdrs(s);
> +prepare_elf_section_hdrs(s, errp);
> +if (*errp) {

You're depending on errp not being NULL here, which it isn't, but it
doesn't seem like good style to me.
error.h recommends to also indicate success/failure via the return
value if possible. prepare_elf_section_hdrs returns void right now, so
it's easy so side step it this way.
(ERRP_GUARD would work too of course)

> +return;
> +}
>  
>  ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s);
>  if (ret < 0) {
> @@ -485,6 +499,30 @@ static void write_elf_section_headers(DumpState *s, 
> Error **errp)
>  }
>  }
>  
> +static void write_elf_sections(DumpState *s, Error **errp)
> +{
> +int ret;
> +
> +if (!s->elf_section_data_size) {
> +return;
> +}
> +
> +/* Write architecture section data */
> +ret = fd_write_vmcore(s->elf_section_data,
> +  s->elf_section_data_size, s);
> +if (ret < 0) {
> +error_setg_errno(errp, -ret, "dump: failed to write architecture 
> section  data");

Looks like two spaces between section and data.

> +return;
> +}
> +
> +/* Write string table */
> +ret = fd_write_vmcore(s->string_table_buf->data,
> +  s->string_table_buf->len, s);
> +if (ret < 0) {
> +error_setg_errno(errp, -ret, "dump: failed to write string table 
> data");
> +}
> +}
> +
>  static void write_data(DumpState *s, void *buf, int length, Error **errp)
>  {
>  int ret;
> @@ -744,6 +782,24 @@ static void dump_iterate(DumpState *s, Error **errp)
>  }
>  }
>  
> +static void dump_end(DumpState *s, Error **errp)
> +{
> +ERRP_GUARD();
> +
> +if (!s->elf_section_data_size) {
> +return;
> +}
> +s->elf_section_data = g_malloc0(s->elf_section_data_size);

Why zero initialize the memory, do you depend on that?

> +
> +/* Adds the architecture defined section data to s->elf_section_data  */
> +if (s->dump_info.arch_sections_write_fn) {
> +s->dump_info.arch_sections_write_fn(s, s->elf_section_data);
> +}
> +
> +/* write sections to vmcore */
> +write_elf_sections(s, errp);
> +}
> +
>  static void create_vmcore(DumpState *s, Error **errp)
>  {
>  ERRP_GUARD();
> @@ -758,6 +814,9 @@ static void create_vmcore(DumpState *s, Error **errp)
>  if (*errp) {
>  return;
>  }
> +
> +/* Write the section data */
> +dump_end(s, errp);
>  }
>  
>  static int write_start_flat_header(int fd)
> @@ -1883,38 +1942,53 @@ static void dump_init(DumpState *s, int fd, bool 
> has_format,
>  }
>  
>  /*
> - * calculate phdr_num
> - *
> - * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow
> + * Adds the number of architecture sections to shdr_num, sets
> + * string_table_usage and sets elf_section_data_size so we know
> + * the offsets and sizes of all parts.
>   */
> -s->phdr_num = 1; /* PT_NOTE */
> -if (s->list.num < UINT16_MAX - 

Re: [PATCH v5 15/18] s390x: Add protected dump cap

2022-09-01 Thread Janis Schoetterl-Glausch
On Thu, 2022-08-11 at 12:11 +, Janosch Frank wrote:
> Add a protected dump capability for later feature checking.
> 
> Signed-off-by: Janosch Frank 
> Reviewed-by: Steffen Eiden 

Reviewed-by: Janis Schoetterl-Glausch 



Re: [PATCH v5 17/18] s390x: Add KVM PV dump interface

2022-09-01 Thread Janis Schoetterl-Glausch
On Thu, 2022-08-11 at 12:11 +, Janosch Frank wrote:
> Let's add a few bits of code which hide the new KVM PV dump API from
> us via new functions.
> 
> Signed-off-by: Janosch Frank 

Reviewed-by: Janis Schoetterl-Glausch 



[PATCH] hw/pci-bridge: Fix brace Werror with clang 6.0.0

2022-09-01 Thread Chenyi Qiang
Two warnings are:

/hw/pci-bridge/cxl_downstream.c:101:51: error: suggest braces
around initialization of subobject [-Werror,-Wmissing-braces]
dvsec = (uint8_t *)&(CXLDVSECPortExtensions){ 0 };
  ^
  {}

/hw/pci-bridge/cxl_root_port.c:62:51: error: suggest braces
around initialization of subobject [-Werror,-Wmissing-braces]
dvsec = (uint8_t *)&(CXLDVSECPortExtensions){ 0 };
  ^
  {}

The original code is correct, just to work around clang's bogus warning.

Signed-off-by: Chenyi Qiang 
---
 hw/pci-bridge/cxl_downstream.c | 2 +-
 hw/pci-bridge/cxl_root_port.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/pci-bridge/cxl_downstream.c b/hw/pci-bridge/cxl_downstream.c
index a361e519d0..8e536f193b 100644
--- a/hw/pci-bridge/cxl_downstream.c
+++ b/hw/pci-bridge/cxl_downstream.c
@@ -98,7 +98,7 @@ static void build_dvsecs(CXLComponentState *cxl)
 {
 uint8_t *dvsec;
 
-dvsec = (uint8_t *)&(CXLDVSECPortExtensions){ 0 };
+dvsec = (uint8_t *)&(CXLDVSECPortExtensions){ };
 cxl_component_create_dvsec(cxl, CXL2_DOWNSTREAM_PORT,
EXTENSIONS_PORT_DVSEC_LENGTH,
EXTENSIONS_PORT_DVSEC,
diff --git a/hw/pci-bridge/cxl_root_port.c b/hw/pci-bridge/cxl_root_port.c
index fb213fa06e..e959f82086 100644
--- a/hw/pci-bridge/cxl_root_port.c
+++ b/hw/pci-bridge/cxl_root_port.c
@@ -59,7 +59,7 @@ static void build_dvsecs(CXLComponentState *cxl)
 {
 uint8_t *dvsec;
 
-dvsec = (uint8_t *)&(CXLDVSECPortExtensions){ 0 };
+dvsec = (uint8_t *)&(CXLDVSECPortExtensions){ };
 cxl_component_create_dvsec(cxl, CXL2_ROOT_PORT,
EXTENSIONS_PORT_DVSEC_LENGTH,
EXTENSIONS_PORT_DVSEC,
-- 
2.17.1




Re: [PATCH] qio: fix command spawn RDONLY/WRONLY

2022-09-01 Thread Marc-André Lureau
Hi

On Thu, Sep 1, 2022 at 2:32 PM Daniel P. Berrangé 
wrote:

> On Thu, Sep 01, 2022 at 02:11:20PM +0400, marcandre.lur...@redhat.com
> wrote:
> > From: Marc-André Lureau 
> >
> > The in/out handling is inverted, although nothing seemed to notice that
> yet.
>
> On the contrary, it is correct, and the unit tests validate this.
>

> > Signed-off-by: Marc-André Lureau 
> > ---
> >  io/channel-command.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/io/channel-command.c b/io/channel-command.c
> > index 9f2f4a1793..ed17b44f74 100644
> > --- a/io/channel-command.c
> > +++ b/io/channel-command.c
> > @@ -79,10 +79,10 @@ qio_channel_command_new_spawn(const char *const
> argv[],
> >  flags = flags & O_ACCMODE;
> >
> >  if (flags == O_RDONLY) {
> > -stdinnull = true;
> > +stdoutnull = true;
> >  }
> >  if (flags == O_WRONLY) {
> > -stdoutnull = true;
> > +stdinnull = true;
> >  }
>
> This change breaks the unit tests.
>
>
Does it really test it then? we are talking about test-io-channel-command ?
It works before and after for me. Other tests as well.

The confusion is because there are two parties involves. The 'flags'
> variable is from the POV of the parent process, while stdinnull/stdoutnull
> are from the POV of the child process.
>
> IOW, if the parent process is reading from the child (O_RDONLY),
> then the child needs a stdout to write to the parent, but not
> any stdin to read from the parent, hence we set stdin to /dev/null
> in the child.
>

Ok, thanks for the clarification!


-- 
Marc-André Lureau


[PULL 3/4] target/avr: Only execute one interrupt at a time

2022-09-01 Thread Richard Henderson
We cannot deliver two interrupts simultaneously;
the first interrupt handler must execute first.

Reviewed-by: Michael Rolnik 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/avr/helper.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/target/avr/helper.c b/target/avr/helper.c
index 9614ccf3e4..34f1cbffb2 100644
--- a/target/avr/helper.c
+++ b/target/avr/helper.c
@@ -28,7 +28,6 @@
 
 bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
-bool ret = false;
 AVRCPU *cpu = AVR_CPU(cs);
 CPUAVRState *env = >env;
 
@@ -38,8 +37,7 @@ bool avr_cpu_exec_interrupt(CPUState *cs, int 
interrupt_request)
 avr_cpu_do_interrupt(cs);
 
 cs->interrupt_request &= ~CPU_INTERRUPT_RESET;
-
-ret = true;
+return true;
 }
 }
 if (interrupt_request & CPU_INTERRUPT_HARD) {
@@ -52,11 +50,10 @@ bool avr_cpu_exec_interrupt(CPUState *cs, int 
interrupt_request)
 if (!env->intsrc) {
 cs->interrupt_request &= ~CPU_INTERRUPT_HARD;
 }
-
-ret = true;
+return true;
 }
 }
-return ret;
+return false;
 }
 
 void avr_cpu_do_interrupt(CPUState *cs)
-- 
2.34.1




[PATCH v3 13/23] i386: Rewrite simple integer vector helpers

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

Rewrite the "simple" vector integer helpers in preperation for AVX support.

While the current code is able to use the same prototype for unary
(a = F(b)) and binary (a = F(b, c)) operations, future changes will cause
them to diverge.

No functional changes to existing helpers

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-12-p...@nowt.org>
Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/ops_sse.h | 83 +++
 1 file changed, 28 insertions(+), 55 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index a4a09226e3..ce03362810 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -229,63 +229,36 @@ void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg 
*d, Reg *c)
 }
 #endif
 
-#define SSE_HELPER_B(name, F)   \
+#define SSE_HELPER_1(name, elem, num, F)\
 void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)   \
 {   \
-d->B(0) = F(d->B(0), s->B(0));  \
-d->B(1) = F(d->B(1), s->B(1));  \
-d->B(2) = F(d->B(2), s->B(2));  \
-d->B(3) = F(d->B(3), s->B(3));  \
-d->B(4) = F(d->B(4), s->B(4));  \
-d->B(5) = F(d->B(5), s->B(5));  \
-d->B(6) = F(d->B(6), s->B(6));  \
-d->B(7) = F(d->B(7), s->B(7));  \
-XMM_ONLY(   \
- d->B(8) = F(d->B(8), s->B(8)); \
- d->B(9) = F(d->B(9), s->B(9)); \
- d->B(10) = F(d->B(10), s->B(10));  \
- d->B(11) = F(d->B(11), s->B(11));  \
- d->B(12) = F(d->B(12), s->B(12));  \
- d->B(13) = F(d->B(13), s->B(13));  \
- d->B(14) = F(d->B(14), s->B(14));  \
- d->B(15) = F(d->B(15), s->B(15));  \
-)   \
-}
+int n = num;\
+for (int i = 0; i < n; i++) {   \
+d->elem(i) = F(s->elem(i)); \
+}   \
+}
+
+#define SSE_HELPER_2(name, elem, num, F)\
+void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)   \
+{   \
+Reg *v = d; \
+int n = num;\
+for (int i = 0; i < n; i++) {   \
+d->elem(i) = F(v->elem(i), s->elem(i)); \
+}   \
+}
+
+#define SSE_HELPER_B(name, F)   \
+SSE_HELPER_2(name, B, 8 << SHIFT, F)
 
 #define SSE_HELPER_W(name, F)   \
-void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)   \
-{   \
-d->W(0) = F(d->W(0), s->W(0));  \
-d->W(1) = F(d->W(1), s->W(1));  \
-d->W(2) = F(d->W(2), s->W(2));  \
-d->W(3) = F(d->W(3), s->W(3));  \
-XMM_ONLY(   \
- d->W(4) = F(d->W(4), s->W(4)); \
- d->W(5) = F(d->W(5), s->W(5)); \
- d->W(6) = F(d->W(6), s->W(6)); \
- d->W(7) = F(d->W(7), s->W(7)); \
-)   \
-}
+SSE_HELPER_2(name, W, 4 << SHIFT, F)
 
 #define SSE_HELPER_L(name, F)   \
-void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)   \
-{   \
-d->L(0) = F(d->L(0), s->L(0));  \
-d->L(1) = F(d->L(1), s->L(1));  \
-XMM_ONLY(   \
- d->L(2) = F(d->L(2), s->L(2)); \
- d->L(3) = F(d->L(3), s->L(3)); \
-)   \
-}
+SSE_HELPER_2(name, L, 2 << SHIFT, F)
 
 #define SSE_HELPER_Q(name, F)   \
-void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)   \
-{  

[PATCH v3 04/23] i386: Rework sse_op_table1

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

Add a flags field each row in sse_op_table1.

Initially this is only used as a replacement for the magic
SSE_SPECIAL and SSE_DUMMY pointers, the other flags are mostly
relevant for the AVX implementation but can be applied to SSE as well.

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-5-p...@nowt.org>
Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/tcg/translate.c | 311 +---
 1 file changed, 182 insertions(+), 129 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index cba862746b..7332bbcf44 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2790,146 +2790,193 @@ typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, 
TCGv_ptr reg_b, TCGv_i32 val);
 typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
TCGv val);
 
-#define SSE_SPECIAL ((void *)1)
-#define SSE_DUMMY ((void *)2)
+#define SSE_OPF_CMP   (1 << 1) /* does not write for first operand */
+#define SSE_OPF_SPECIAL   (1 << 3) /* magic */
+#define SSE_OPF_3DNOW (1 << 4) /* 3DNow! instruction */
+#define SSE_OPF_MMX   (1 << 5) /* MMX/integer/AVX2 instruction */
+#define SSE_OPF_SCALAR(1 << 6) /* Has SSE scalar variants */
+#define SSE_OPF_SHUF  (1 << 9) /* pshufx/shufpx */
 
-#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm }
-#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
- gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
+#define OP(op, flags, a, b, c, d)   \
+{flags, {a, b, c, d} }
 
-static const SSEFunc_0_epp sse_op_table1[256][4] = {
+#define MMX_OP(x) OP(op1, SSE_OPF_MMX, \
+gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm, NULL, NULL)
+
+#define SSE_FOP(name) OP(op1, SSE_OPF_SCALAR, \
+gen_helper_##name##ps, gen_helper_##name##pd, \
+gen_helper_##name##ss, gen_helper_##name##sd)
+#define SSE_OP(sname, dname, op, flags) OP(op, flags, \
+gen_helper_##sname##_xmm, gen_helper_##dname##_xmm, NULL, NULL)
+
+struct SSEOpHelper_table1 {
+int flags;
+SSEFunc_0_epp op[4];
+};
+
+#define SSE_3DNOW { SSE_OPF_3DNOW }
+#define SSE_SPECIAL { SSE_OPF_SPECIAL }
+
+static const struct SSEOpHelper_table1 sse_op_table1[256] = {
 /* 3DNow! extensions */
-[0x0e] = { SSE_DUMMY }, /* femms */
-[0x0f] = { SSE_DUMMY }, /* pf... */
+[0x0e] = SSE_SPECIAL, /* femms */
+[0x0f] = SSE_3DNOW, /* pf... (sse_op_table5) */
 /* pure SSE operations */
-[0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* 
movups, movupd, movss, movsd */
-[0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* 
movups, movupd, movss, movsd */
-[0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* 
movlps, movlpd, movsldup, movddup */
-[0x13] = { SSE_SPECIAL, SSE_SPECIAL },  /* movlps, movlpd */
-[0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm },
-[0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm },
-[0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd, 
movshdup */
-[0x17] = { SSE_SPECIAL, SSE_SPECIAL },  /* movhps, movhpd */
+[0x10] = SSE_SPECIAL, /* movups, movupd, movss, movsd */
+[0x11] = SSE_SPECIAL, /* movups, movupd, movss, movsd */
+[0x12] = SSE_SPECIAL, /* movlps, movlpd, movsldup, movddup */
+[0x13] = SSE_SPECIAL, /* movlps, movlpd */
+[0x14] = SSE_OP(punpckldq, punpcklqdq, op1, 0), /* unpcklps, unpcklpd */
+[0x15] = SSE_OP(punpckhdq, punpckhqdq, op1, 0), /* unpckhps, unpckhpd */
+[0x16] = SSE_SPECIAL, /* movhps, movhpd, movshdup */
+[0x17] = SSE_SPECIAL, /* movhps, movhpd */
 
-[0x28] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
-[0x29] = { SSE_SPECIAL, SSE_SPECIAL },  /* movaps, movapd */
-[0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* 
cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */
-[0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* 
movntps, movntpd, movntss, movntsd */
-[0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* 
cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */
-[0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* 
cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */
-[0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd },
-[0x2f] = { gen_helper_comiss, gen_helper_comisd },
-[0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */
-[0x51] = SSE_FOP(sqrt),
-[0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL },
-[0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL },
-[0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */
-[0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd 
*/
-[0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */
-[0x57] = { gen_helper_pxor_xmm, 

[PATCH v3 19/23] i386: Destructive FP helpers for AVX

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

Perpare the horizontal atithmetic vector helpers for AVX
These currently use a dummy Reg typed variable to store the result then
assign the whole register.  This will cause 128 bit operations to corrupt
the upper half of the register, so replace it with explicit temporaries
and element assignments.

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-18-p...@nowt.org>
Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/ops_sse.h | 93 ++-
 1 file changed, 39 insertions(+), 54 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 59ed30071e..61722fe4a2 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -945,45 +927,49 @@ void helper_insertq_i(CPUX86State *env, ZMMReg *d, int 
index, int length)
 d->ZMM_Q(0) = helper_insertq(d->ZMM_Q(0), index, length);
 }
 
-void glue(helper_haddps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
-ZMMReg r;
-
-r.ZMM_S(0) = float32_add(d->ZMM_S(0), d->ZMM_S(1), >sse_status);
-r.ZMM_S(1) = float32_add(d->ZMM_S(2), d->ZMM_S(3), >sse_status);
-r.ZMM_S(2) = float32_add(s->ZMM_S(0), s->ZMM_S(1), >sse_status);
-r.ZMM_S(3) = float32_add(s->ZMM_S(2), s->ZMM_S(3), >sse_status);
-MOVE(*d, r);
+#define SSE_HELPER_HPS(name, F)  \
+void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \
+{ \
+Reg *v = d;   \
+float32 r[2 << SHIFT];\
+int i, j, k;  \
+for (k = 0; k < 2 << SHIFT; k += LANE_WIDTH / 4) {\
+for (i = j = 0; j < 4; i++, j += 2) { \
+r[i + k] = F(v->ZMM_S(j + k), v->ZMM_S(j + k + 1), 
>sse_status); \
+} \
+for (j = 0; j < 4; i++, j += 2) { \
+r[i + k] = F(s->ZMM_S(j + k), s->ZMM_S(j + k + 1), 
>sse_status); \
+} \
+} \
+for (i = 0; i < 2 << SHIFT; i++) {\
+d->ZMM_S(i) = r[i];   \
+} \
 }
 
-void glue(helper_haddpd, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
-ZMMReg r;
+SSE_HELPER_HPS(haddps, float32_add)
+SSE_HELPER_HPS(hsubps, float32_sub)
 
-r.ZMM_D(0) = float64_add(d->ZMM_D(0), d->ZMM_D(1), >sse_status);
-r.ZMM_D(1) = float64_add(s->ZMM_D(0), s->ZMM_D(1), >sse_status);
-MOVE(*d, r);
+#define SSE_HELPER_HPD(name, F)  \
+void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \
+{ \
+Reg *v = d;   \
+float64 r[1 << SHIFT];\
+int i, j, k;  \
+for (k = 0; k < 1 << SHIFT; k += LANE_WIDTH / 8) {\
+for (i = j = 0; j < 2; i++, j += 2) { \
+r[i + k] = F(v->ZMM_D(j + k), v->ZMM_D(j + k + 1), 
>sse_status); \
+} \
+for (j = 0; j < 2; i++, j += 2) { \
+r[i + k] = F(s->ZMM_D(j + k), s->ZMM_D(j + k + 1), 
>sse_status); \
+} \
+} \
+for (i = 0; i < 1 << SHIFT; i++) {\
+d->ZMM_D(i) = r[i];   \
+} \
 }
 
-void glue(helper_hsubps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
-ZMMReg r;
-
-r.ZMM_S(0) = float32_sub(d->ZMM_S(0), d->ZMM_S(1), >sse_status);
-r.ZMM_S(1) = float32_sub(d->ZMM_S(2), d->ZMM_S(3), >sse_status);
-r.ZMM_S(2) = float32_sub(s->ZMM_S(0), s->ZMM_S(1), >sse_status);
-r.ZMM_S(3) = float32_sub(s->ZMM_S(2), s->ZMM_S(3), >sse_status);
-MOVE(*d, r);
-}
-
-void glue(helper_hsubpd, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
-{
-ZMMReg r;
-
-r.ZMM_D(0) = float64_sub(d->ZMM_D(0), d->ZMM_D(1), >sse_status);
-r.ZMM_D(1) = float64_sub(s->ZMM_D(0), s->ZMM_D(1), >sse_status);
-MOVE(*d, r);
-}
+SSE_HELPER_HPD(haddpd, float64_add)
+SSE_HELPER_HPD(hsubpd, float64_sub)
 
 void glue(helper_addsubps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-- 
2.37.1





[PATCH v3 18/23] i386: Dot product AVX helper prep

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

Make the dpps and dppd helpers AVX-ready

I can't see any obvious reason why dppd shouldn't work on 256 bit ymm
registers, but both AMD and Intel agree that it's xmm only.

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-17-p...@nowt.org>
Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/ops_sse.h | 80 ---
 1 file changed, 45 insertions(+), 35 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index de874e136f..59ed30071e 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -1903,55 +1903,64 @@ SSE_HELPER_I(helper_blendps, L, 4, FBLENDP)
 SSE_HELPER_I(helper_blendpd, Q, 2, FBLENDP)
 SSE_HELPER_I(helper_pblendw, W, 8, FBLENDP)
 
-void glue(helper_dpps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mask)
+void glue(helper_dpps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+   uint32_t mask)
 {
+Reg *v = d;
 float32 prod1, prod2, temp2, temp3, temp4;
+int i;
 
-/*
- * We must evaluate (A+B)+(C+D), not ((A+B)+C)+D
- * to correctly round the intermediate results
- */
-if (mask & (1 << 4)) {
-prod1 = float32_mul(d->ZMM_S(0), s->ZMM_S(0), >sse_status);
-} else {
-prod1 = float32_zero;
-}
-if (mask & (1 << 5)) {
-prod2 = float32_mul(d->ZMM_S(1), s->ZMM_S(1), >sse_status);
-} else {
-prod2 = float32_zero;
-}
-temp2 = float32_add(prod1, prod2, >sse_status);
-if (mask & (1 << 6)) {
-prod1 = float32_mul(d->ZMM_S(2), s->ZMM_S(2), >sse_status);
-} else {
-prod1 = float32_zero;
-}
-if (mask & (1 << 7)) {
-prod2 = float32_mul(d->ZMM_S(3), s->ZMM_S(3), >sse_status);
-} else {
-prod2 = float32_zero;
-}
-temp3 = float32_add(prod1, prod2, >sse_status);
-temp4 = float32_add(temp2, temp3, >sse_status);
+for (i = 0; i < 2 << SHIFT; i += 4) {
+/*
+ * We must evaluate (A+B)+(C+D), not ((A+B)+C)+D
+ * to correctly round the intermediate results
+ */
+if (mask & (1 << 4)) {
+prod1 = float32_mul(v->ZMM_S(i), s->ZMM_S(i), >sse_status);
+} else {
+prod1 = float32_zero;
+}
+if (mask & (1 << 5)) {
+prod2 = float32_mul(v->ZMM_S(i+1), s->ZMM_S(i+1), 
>sse_status);
+} else {
+prod2 = float32_zero;
+}
+temp2 = float32_add(prod1, prod2, >sse_status);
+if (mask & (1 << 6)) {
+prod1 = float32_mul(v->ZMM_S(i+2), s->ZMM_S(i+2), 
>sse_status);
+} else {
+prod1 = float32_zero;
+}
+if (mask & (1 << 7)) {
+prod2 = float32_mul(v->ZMM_S(i+3), s->ZMM_S(i+3), 
>sse_status);
+} else {
+prod2 = float32_zero;
+}
+temp3 = float32_add(prod1, prod2, >sse_status);
+temp4 = float32_add(temp2, temp3, >sse_status);
 
-d->ZMM_S(0) = (mask & (1 << 0)) ? temp4 : float32_zero;
-d->ZMM_S(1) = (mask & (1 << 1)) ? temp4 : float32_zero;
-d->ZMM_S(2) = (mask & (1 << 2)) ? temp4 : float32_zero;
-d->ZMM_S(3) = (mask & (1 << 3)) ? temp4 : float32_zero;
+d->ZMM_S(i) = (mask & (1 << 0)) ? temp4 : float32_zero;
+d->ZMM_S(i+1) = (mask & (1 << 1)) ? temp4 : float32_zero;
+d->ZMM_S(i+2) = (mask & (1 << 2)) ? temp4 : float32_zero;
+d->ZMM_S(i+3) = (mask & (1 << 3)) ? temp4 : float32_zero;
+}
 }
 
-void glue(helper_dppd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mask)
+#if SHIFT == 1
+/* Oddly, there is no ymm version of dppd */
+void glue(helper_dppd, SUFFIX)(CPUX86State *env,
+   Reg *d, Reg *s, uint32_t mask)
 {
+Reg *v = d;
 float64 prod1, prod2, temp2;
 
 if (mask & (1 << 4)) {
-prod1 = float64_mul(d->ZMM_D(0), s->ZMM_D(0), >sse_status);
+prod1 = float64_mul(v->ZMM_D(0), s->ZMM_D(0), >sse_status);
 } else {
 prod1 = float64_zero;
 }
 if (mask & (1 << 5)) {
-prod2 = float64_mul(d->ZMM_D(1), s->ZMM_D(1), >sse_status);
+prod2 = float64_mul(v->ZMM_D(1), s->ZMM_D(1), >sse_status);
 } else {
 prod2 = float64_zero;
 }
@@ -1959,6 +1968,7 @@ void glue(helper_dppd, SUFFIX)(CPUX86State *env, Reg *d, 
Reg *s, uint32_t mask)
 d->ZMM_D(0) = (mask & (1 << 0)) ? temp2 : float64_zero;
 d->ZMM_D(1) = (mask & (1 << 1)) ? temp2 : float64_zero;
 }
+#endif
 
 void glue(helper_mpsadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
   uint32_t offset)
-- 
2.37.1





[PATCH v2 07/10] qapi: fix example of MEM_UNPLUG_ERROR event

2022-09-01 Thread Victor Toso
Example output was missing ',' delimiter. Fix it.

Problem was noticed when trying to load the example into python's json
library.

Signed-off-by: Victor Toso 
---
 qapi/machine.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qapi/machine.json b/qapi/machine.json
index 5f1f50d3ed..4782eea2c3 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1416,7 +1416,7 @@
 #
 # Example:
 #
-# <- { "event": "MEM_UNPLUG_ERROR"
+# <- { "event": "MEM_UNPLUG_ERROR",
 #  "data": { "device": "dimm1",
 #"msg": "acpi: device unplug for unsupported device"
 #  },
-- 
2.37.2




[PATCH v2 04/10] qapi: fix example of BLOCK_JOB_READY event

2022-09-01 Thread Victor Toso
Example output is missing ',' delimiter. Fix it.

Problem was noticed when trying to load the example into python's json
library.

Signed-off-by: Victor Toso 
---
 qapi/block-core.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 2173e7734a..882b266532 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -5252,7 +5252,7 @@
 #
 # <- { "event": "BLOCK_JOB_READY",
 #  "data": { "device": "drive0", "type": "mirror", "speed": 0,
-#"len": 2097152, "offset": 2097152 }
+#"len": 2097152, "offset": 2097152 },
 #  "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
 #
 ##
-- 
2.37.2




Re: [PATCH 38/51] tests/qtest: {ahci, ide}-test: Open file in binary mode

2022-09-01 Thread Marc-André Lureau
Hi

On Wed, Aug 24, 2022 at 3:08 PM Bin Meng  wrote:

> From: Xuzhou Cheng 
>
> By default Windows opens file in text mode, while a POSIX compliant
> implementation treats text files and binary files the same.
>
> The fopen() 'mode' string can include the letter 'b' to indicate
> binary mode shall be used. POSIX spec says the character 'b' shall
> have no effect, but is allowed for ISO C standard conformance.
> Let's add the letter 'b' which works on both POSIX and Windows.
>
> Similar situation applies to the open() 'flags' where O_BINARY is
> used for binary mode.
>
> Signed-off-by: Xuzhou Cheng 
> Signed-off-by: Bin Meng 
> ---
>
>  tests/qtest/ahci-test.c | 2 +-
>  tests/qtest/ide-test.c  | 4 ++--
>  2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/tests/qtest/ahci-test.c b/tests/qtest/ahci-test.c
> index bce9ff770c..be11508c75 100644
> --- a/tests/qtest/ahci-test.c
> +++ b/tests/qtest/ahci-test.c
> @@ -1453,7 +1453,7 @@ static int prepare_iso(size_t size, unsigned char
> **buf, char **name)
>   * Close the file and reopen it.
>   */
>  close(fd);
> -fd = open(cdrom_path, O_WRONLY);
> +fd = open(cdrom_path, O_WRONLY | O_BINARY);
>  g_assert(fd != -1);
>

that should be gone in next iteration, with g_mkstemp() usage.


>  #endif
>
> diff --git a/tests/qtest/ide-test.c b/tests/qtest/ide-test.c
> index c5cad6c0be..ee03dea4fa 100644
> --- a/tests/qtest/ide-test.c
> +++ b/tests/qtest/ide-test.c
> @@ -892,7 +892,7 @@ static void cdrom_pio_impl(int nblocks)
>
>  /* Prepopulate the CDROM with an interesting pattern */
>  generate_pattern(pattern, patt_len, ATAPI_BLOCK_SIZE);
> -fh = fopen(tmp_path, "w+");
> +fh = fopen(tmp_path, "wb+");
>  ret = fwrite(pattern, ATAPI_BLOCK_SIZE, patt_blocks, fh);
>  g_assert_cmpint(ret, ==, patt_blocks);
>  fclose(fh);
> @@ -993,7 +993,7 @@ static void test_cdrom_dma(void)
>  prdt[0].size = cpu_to_le32(len | PRDT_EOT);
>
>  generate_pattern(pattern, ATAPI_BLOCK_SIZE * 16, ATAPI_BLOCK_SIZE);
> -fh = fopen(tmp_path, "w+");
> +fh = fopen(tmp_path, "wb+");
>  ret = fwrite(pattern, ATAPI_BLOCK_SIZE, 16, fh);
>  g_assert_cmpint(ret, ==, 16);
>  fclose(fh);
> --
> 2.34.1
>
>
>
ack this part,
Reviewed-by: Marc-André Lureau 


-- 
Marc-André Lureau


Re: [PATCH v5 16/18] s390x: Introduce PV query interface

2022-09-01 Thread Janis Schoetterl-Glausch
On Thu, 2022-08-11 at 12:11 +, Janosch Frank wrote:
> Introduce an interface over which we can get information about UV data.
> 
> Signed-off-by: Janosch Frank 
> Reviewed-by: Steffen Eiden 

With the below fixed:
Reviewed-by: Janis Schoetterl-Glausch 

> ---
>  hw/s390x/pv.c  | 61 ++
>  hw/s390x/s390-virtio-ccw.c |  6 
>  include/hw/s390x/pv.h  | 10 +++
>  3 files changed, 77 insertions(+)
> 
> diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c
> index 401b63d6cb..2b892b45e8 100644
> --- a/hw/s390x/pv.c
> +++ b/hw/s390x/pv.c
> @@ -20,6 +20,11 @@
>  #include "exec/confidential-guest-support.h"
>  #include "hw/s390x/ipl.h"
>  #include "hw/s390x/pv.h"
> +#include "target/s390x/kvm/kvm_s390x.h"
> +
> +static bool info_valid;
> +static struct kvm_s390_pv_info_vm info_vm;
> +static struct kvm_s390_pv_info_dump info_dump;
>  
>  static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data)
>  {
> @@ -56,6 +61,42 @@ static int __s390_pv_cmd(uint32_t cmd, const char 
> *cmdname, void *data)
>  }  \
>  }
>  
> +int s390_pv_query_info(void)
> +{
> +struct kvm_s390_pv_info info = {
> +.header.id = KVM_PV_INFO_VM,
> +.header.len_max = sizeof(info.header) + sizeof(info.vm),
> +};
> +int rc;
> +
> +/* Info API's first user is dump so they are bundled */
> +if (!kvm_s390_get_protected_dump()) {
> +return 0;
> +}
> +
> +rc = s390_pv_cmd(KVM_PV_INFO, );
> +if (rc) {
> +error_report("KVM PV INFO cmd %x failed: %s",
> + info.header.id, strerror(rc));

Should be strerror(-rc).

> +return rc;
> +}
> +memcpy(_vm, , sizeof(info.vm));
> +
> +info.header.id = KVM_PV_INFO_DUMP;
> +info.header.len_max = sizeof(info.header) + sizeof(info.dump);
> +rc = s390_pv_cmd(KVM_PV_INFO, );
> +if (rc) {
> +error_report("KVM PV INFO cmd %x failed: %s",
> + info.header.id, strerror(rc));

Same here.

> +return rc;
> +}
> +
> +memcpy(_dump, , sizeof(info.dump));
> +info_valid = true;
> +
> +return rc;
> +}
> +
> 
[...]




Re: [PATCH v1 15/25] Deprecate 32 bit big-endian MIPS

2022-09-01 Thread Alex Bennée


Thomas Huth  writes:

> On 26/08/2022 19.21, Alex Bennée wrote:

>> -   * - MIPS
>> +   * - MIPS (LE only)
>
> I'd replace "LE" with "little endian" - not everybody might know that
> abbreviation.

>> +'''
>> +
>> +A Debian 10 ("Buster") moved into LTS the big endian 32 bit version of
>
> s/A Debian/As Debian/

Fixed.

>> +MIPS moved out of support making it hard to maintain our
>> +cross-compilation CI tests of the architecture. As we no longer have
>> +CI coverage support may bitrot away before the deprecation process
>> +completes. The little endian variants of MIPS (both 32 and 64 bit) are
>> +still a supported host architecture.
>> +
>>   QEMU API (QAPI) events
>>   --
> ...
>> diff --git a/tests/docker/dockerfiles/debian-mips-cross.docker
>> b/tests/docker/dockerfiles/debian-mips-cross.docker
>> index 26c154014d..75943619df 100644
>> --- a/tests/docker/dockerfiles/debian-mips-cross.docker
>> +++ b/tests/docker/dockerfiles/debian-mips-cross.docker
>> @@ -1,32 +1,14 @@
>>   #
>> -# Docker mips cross-compiler target
>> +# Docker cross-compiler target
>
> Why did you remove the "mips" here?

You may notice most of the flattened cross compiler docker images are
basically the same save the last two lines. I ended up just copy pasting
the preamble as I went along. I could restore if you like.

>
>  Thomas


-- 
Alex Bennée



Re: [PATCH] qio: fix command spawn RDONLY/WRONLY

2022-09-01 Thread Daniel P . Berrangé
On Thu, Sep 01, 2022 at 02:11:20PM +0400, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> The in/out handling is inverted, although nothing seemed to notice that yet.

On the contrary, it is correct, and the unit tests validate this.

> Signed-off-by: Marc-André Lureau 
> ---
>  io/channel-command.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/io/channel-command.c b/io/channel-command.c
> index 9f2f4a1793..ed17b44f74 100644
> --- a/io/channel-command.c
> +++ b/io/channel-command.c
> @@ -79,10 +79,10 @@ qio_channel_command_new_spawn(const char *const argv[],
>  flags = flags & O_ACCMODE;
>  
>  if (flags == O_RDONLY) {
> -stdinnull = true;
> +stdoutnull = true;
>  }
>  if (flags == O_WRONLY) {
> -stdoutnull = true;
> +stdinnull = true;
>  }

This change breaks the unit tests.

The confusion is because there are two parties involves. The 'flags'
variable is from the POV of the parent process, while stdinnull/stdoutnull
are from the POV of the child process.

IOW, if the parent process is reading from the child (O_RDONLY),
then the child needs a stdout to write to the parent, but not
any stdin to read from the parent, hence we set stdin to /dev/null
in the child.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




[PULL 09/20] accel/tcg: Unlock mmap_lock after longjmp

2022-09-01 Thread Richard Henderson
The mmap_lock is held around tb_gen_code.  While the comment
is correct that the lock is dropped when tb_gen_code runs out
of memory, the lock is *not* dropped when an exception is
raised reading code for translation.

Acked-by: Alistair Francis 
Acked-by: Ilya Leoshkevich 
Tested-by: Ilya Leoshkevich 
Signed-off-by: Richard Henderson 
---
 accel/tcg/cpu-exec.c  | 12 ++--
 accel/tcg/user-exec.c |  3 ---
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index a565a3f8ec..d18081ca6f 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -462,13 +462,11 @@ void cpu_exec_step_atomic(CPUState *cpu)
 cpu_tb_exec(cpu, tb, _exit);
 cpu_exec_exit(cpu);
 } else {
-/*
- * The mmap_lock is dropped by tb_gen_code if it runs out of
- * memory.
- */
 #ifndef CONFIG_SOFTMMU
 clear_helper_retaddr();
-tcg_debug_assert(!have_mmap_lock());
+if (have_mmap_lock()) {
+mmap_unlock();
+}
 #endif
 if (qemu_mutex_iothread_locked()) {
 qemu_mutex_unlock_iothread();
@@ -936,7 +934,9 @@ int cpu_exec(CPUState *cpu)
 
 #ifndef CONFIG_SOFTMMU
 clear_helper_retaddr();
-tcg_debug_assert(!have_mmap_lock());
+if (have_mmap_lock()) {
+mmap_unlock();
+}
 #endif
 if (qemu_mutex_iothread_locked()) {
 qemu_mutex_unlock_iothread();
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 2bc4394b80..521aa8b61e 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -80,10 +80,7 @@ MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
  * (and if the translator doesn't handle page boundaries correctly
  * there's little we can do about that here).  Therefore, do not
  * trigger the unwinder.
- *
- * Like tb_gen_code, release the memory lock before cpu_loop_exit.
  */
-mmap_unlock();
 *pc = 0;
 return MMU_INST_FETCH;
 }
-- 
2.34.1




[PULL 2/4] target/avr: Call avr_cpu_do_interrupt directly

2022-09-01 Thread Richard Henderson
There is no need to go through cc->tcg_ops when
we know what value that must have.

Reviewed-by: Michael Rolnik 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/avr/helper.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/target/avr/helper.c b/target/avr/helper.c
index 82284f8997..9614ccf3e4 100644
--- a/target/avr/helper.c
+++ b/target/avr/helper.c
@@ -29,14 +29,13 @@
 bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
 bool ret = false;
-CPUClass *cc = CPU_GET_CLASS(cs);
 AVRCPU *cpu = AVR_CPU(cs);
 CPUAVRState *env = >env;
 
 if (interrupt_request & CPU_INTERRUPT_RESET) {
 if (cpu_interrupts_enabled(env)) {
 cs->exception_index = EXCP_RESET;
-cc->tcg_ops->do_interrupt(cs);
+avr_cpu_do_interrupt(cs);
 
 cs->interrupt_request &= ~CPU_INTERRUPT_RESET;
 
@@ -47,7 +46,7 @@ bool avr_cpu_exec_interrupt(CPUState *cs, int 
interrupt_request)
 if (cpu_interrupts_enabled(env) && env->intsrc != 0) {
 int index = ctz32(env->intsrc);
 cs->exception_index = EXCP_INT(index);
-cc->tcg_ops->do_interrupt(cs);
+avr_cpu_do_interrupt(cs);
 
 env->intsrc &= env->intsrc - 1; /* clear the interrupt */
 if (!env->intsrc) {
-- 
2.34.1




[PULL 06/20] tests/tcg/i386: Move smc_code2 to an executable section

2022-09-01 Thread Richard Henderson
We're about to start validating PAGE_EXEC, which means
that we've got to put this code into a section that is
both writable and executable.

Note that this test did not run on hardware beforehand either.

Acked-by: Ilya Leoshkevich 
Tested-by: Ilya Leoshkevich 
Signed-off-by: Richard Henderson 
---
 tests/tcg/i386/test-i386.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/tcg/i386/test-i386.c b/tests/tcg/i386/test-i386.c
index ac8d5a3c1f..e6b308a2c0 100644
--- a/tests/tcg/i386/test-i386.c
+++ b/tests/tcg/i386/test-i386.c
@@ -1998,7 +1998,7 @@ uint8_t code[] = {
 0xc3, /* ret */
 };
 
-asm(".section \".data\"\n"
+asm(".section \".data_x\",\"awx\"\n"
 "smc_code2:\n"
 "movl 4(%esp), %eax\n"
 "movl %eax, smc_patch_addr2 + 1\n"
-- 
2.34.1




Re: [PATCH v1 12/16] qapi: fix example of blockdev-add command

2022-09-01 Thread Victor Toso
Hi,

First of all, I'm happy that this patch got us into this
discussion.

On Wed, Aug 31, 2022 at 04:53:49PM +0200, Markus Armbruster wrote:
> Victor Toso  writes:
>
> > Hi,
> >
> > On Wed, Aug 31, 2022 at 03:16:54PM +0200, Markus Armbruster wrote:
> >> Cc: Kevin for an improved chance of getting any nonsense I might write
> >> corrected.
> >>
> >> Victor Toso  writes:
> >>
> >> > Hi,
> >> >
> >> > On Wed, Aug 31, 2022 at 01:40:50PM +0200, Markus Armbruster wrote:
> >> >> Victor Toso  writes:
> >> >>
> >> >> > The example output is setting optional member "backing" with null.
> >> >> > This has no runtime impact. Remove it.
> >> >> >
> >> >> > Problem was noticed when using the example as a test case for Go
> >> >> > bindings.
> >> >>
> >> >> "Fix example" and "problem" implies there's something wrong.
> >> >> "No runtime impact" sounds like it's redundant, but not wrong.
> >> >> Wrong or not wrong?
> >> >
> >> > I take your comment is more about the wording which is confusing.
> >> >
> >> > Would it be better if I change to:
> >> > '''
> >> >The example output is setting optional member "backing" with
> >> >null. While this has no runtime impact, setting optional
> >> >members with empty value should not be encouraged. Remove it.
> >> > '''
> >> >
> >> > While I think the above is true, my main reason for proposing
> >> > this change is to re-use the example as a test case, but I'm not
> >> > sure if adding anything related to it would make it better (only
> >> > more confusing!).
> >>
> >> I had a closer look at the schema.
> >>
> >> The definition of backing is
> >>
> >> ##
> >> # @BlockdevOptionsGenericCOWFormat:
> >> #
> >> # Driver specific block device options for image format that have no 
> >> option
> >> # besides their data source and an optional backing file.
> >> #
> >> # @backing: reference to or definition of the backing file block
> >> #   device, null disables the backing file entirely.
> >> #   Defaults to the backing file stored the image file.
> >> #
> >> # Since: 2.9
> >> ##
> >> { 'struct': 'BlockdevOptionsGenericCOWFormat',
> >>   'base': 'BlockdevOptionsGenericFormat',
> >>   'data': { '*backing': 'BlockdevRefOrNull' } }
> >>
> >> Meaning, if I remember correctly (with some help from commit
> >> c42e8742f52's message):
> >>
> >> 1. Present @backing
> >>
> >> 1.a. of type 'str' means use the existing block device with this ID as
> >>  backing image
> >>
> >> 1.b. of type 'BlockdevOptions' means use the new block device defined by
> >>  it as backing image
> >>
> >> 1.c. that is null means use no backing image
> >>
> >> 2. Absent @backing means default to the backing file named in the COW
> >>image.
> >
> > Over the wire, how you get the difference between 1.c and 2? Are
> > you saying that for optional member "backing" we should be
> > explicit sending null over the wire?
>
> In the QAPI schema language, absent optional members do not default to
> any specific value.  Or in other words, "absent" is distinct from
> "present with value V" for any value V.
>
> Now, the *semantics* of "absent" are often identical to some default
> value.  Documentation should then say something like (default:
> DEFAULT-VALUE).

Yep, this is fine.

> In this particular instance, it isn't: "absent" means something else
> than any possible value.

The major painpoint for me is that, in Go an optional member is a
field with a pointer to that field's type. A pointer is default
initialized with nil and if the user of the Go module does
nothing with it, we naturally omit it in the output JSON.

This needs to be workaround in two cases so far:
BlockdevRefOrNull and StrOrNull. This two alternate types are the
only ones that take JSON null as value. I'm sure I'll make it
work.

--

Now, should we really keep using null type as alternative way of
expressing "disabling feature" or even "use something else"?

I'd be happy to work on improving this if that's reasonable. My
2c bellow.

##
# @BlockdevRefOrNull:
#
# Reference to a block device.
#
# @definition: defines a new block device inline
# @reference: references the ID of an existing block device.
# An empty string means that no block device should
# be referenced.  Deprecated; use null instead.
# @null: No block device should be referenced (since 2.10)
#
# Since: 2.9
##
{ 'alternate': 'BlockdevRefOrNull',
  'data': { 'definition': 'BlockdevOptions',
'reference': 'str',
'null': 'null' } }

BlockdevRefOrNull is only used by BlockdevOptionsGenericCOWFormat
which is used by BlockdevOptions ('qed' and 'vmdk') and extend by
BlockdevOptionsQcow and BlockdevOptionsQcow2.

As you pointed out before, setting backing to null means
disabling. This is expressed in both BlockdevRefOrNull and
@BlockdevOptionsGenericCOWFormat documentation.

IMHO the idea of 

[PATCH v3 12/23] i386: Rewrite vector shift helper

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

Rewrite the vector shift helpers in preperation for AVX support (3 operand
form and 256 bit vectors).

For now keep the existing two operand interface.

No functional changes to existing helpers.

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-11-p...@nowt.org>
Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/ops_sse.h | 247 +++---
 1 file changed, 112 insertions(+), 135 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 2c0090a647..a4a09226e3 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -40,6 +40,8 @@
 #define SUFFIX _xmm
 #endif
 
+#define LANE_WIDTH (SHIFT ? 16 : 8)
+
 /*
  * Copy the relevant parts of a Reg value around. In the case where
  * sizeof(Reg) > SIZE, these helpers operate only on the lower bytes of
@@ -56,198 +58,173 @@
 #define MOVE(d, r) memcpy(&(d).B(0), &(r).B(0), SIZE)
 #endif
 
-void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
-{
-int shift;
+#if SHIFT == 0
+#define FPSRL(x, c) ((x) >> shift)
+#define FPSRAW(x, c) ((int16_t)(x) >> shift)
+#define FPSRAL(x, c) ((int32_t)(x) >> shift)
+#define FPSLL(x, c) ((x) << shift)
+#endif
 
-if (s->Q(0) > 15) {
-d->Q(0) = 0;
-#if SHIFT == 1
-d->Q(1) = 0;
-#endif
+void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
+{
+Reg *s = d;
+int shift;
+if (c->Q(0) > 15) {
+for (int i = 0; i < 1 << SHIFT; i++) {
+d->Q(i) = 0;
+}
 } else {
-shift = s->B(0);
-d->W(0) >>= shift;
-d->W(1) >>= shift;
-d->W(2) >>= shift;
-d->W(3) >>= shift;
-#if SHIFT == 1
-d->W(4) >>= shift;
-d->W(5) >>= shift;
-d->W(6) >>= shift;
-d->W(7) >>= shift;
-#endif
+shift = c->B(0);
+for (int i = 0; i < 4 << SHIFT; i++) {
+d->W(i) = FPSRL(s->W(i), shift);
+}
 }
 }
 
-void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
 {
+Reg *s = d;
 int shift;
+if (c->Q(0) > 15) {
+for (int i = 0; i < 1 << SHIFT; i++) {
+d->Q(i) = 0;
+}
+} else {
+shift = c->B(0);
+for (int i = 0; i < 4 << SHIFT; i++) {
+d->W(i) = FPSLL(s->W(i), shift);
+}
+}
+}
 
-if (s->Q(0) > 15) {
+void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
+{
+Reg *s = d;
+int shift;
+if (c->Q(0) > 15) {
 shift = 15;
 } else {
-shift = s->B(0);
+shift = c->B(0);
+}
+for (int i = 0; i < 4 << SHIFT; i++) {
+d->W(i) = FPSRAW(s->W(i), shift);
 }
-d->W(0) = (int16_t)d->W(0) >> shift;
-d->W(1) = (int16_t)d->W(1) >> shift;
-d->W(2) = (int16_t)d->W(2) >> shift;
-d->W(3) = (int16_t)d->W(3) >> shift;
-#if SHIFT == 1
-d->W(4) = (int16_t)d->W(4) >> shift;
-d->W(5) = (int16_t)d->W(5) >> shift;
-d->W(6) = (int16_t)d->W(6) >> shift;
-d->W(7) = (int16_t)d->W(7) >> shift;
-#endif
 }
 
-void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
 {
+Reg *s = d;
 int shift;
-
-if (s->Q(0) > 15) {
-d->Q(0) = 0;
-#if SHIFT == 1
-d->Q(1) = 0;
-#endif
+if (c->Q(0) > 31) {
+for (int i = 0; i < 1 << SHIFT; i++) {
+d->Q(i) = 0;
+}
 } else {
-shift = s->B(0);
-d->W(0) <<= shift;
-d->W(1) <<= shift;
-d->W(2) <<= shift;
-d->W(3) <<= shift;
-#if SHIFT == 1
-d->W(4) <<= shift;
-d->W(5) <<= shift;
-d->W(6) <<= shift;
-d->W(7) <<= shift;
-#endif
+shift = c->B(0);
+for (int i = 0; i < 2 << SHIFT; i++) {
+d->L(i) = FPSRL(s->L(i), shift);
+}
 }
 }
 
-void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
 {
+Reg *s = d;
 int shift;
-
-if (s->Q(0) > 31) {
-d->Q(0) = 0;
-#if SHIFT == 1
-d->Q(1) = 0;
-#endif
+if (c->Q(0) > 31) {
+for (int i = 0; i < 1 << SHIFT; i++) {
+d->Q(i) = 0;
+}
 } else {
-shift = s->B(0);
-d->L(0) >>= shift;
-d->L(1) >>= shift;
-#if SHIFT == 1
-d->L(2) >>= shift;
-d->L(3) >>= shift;
-#endif
+shift = c->B(0);
+for (int i = 0; i < 2 << SHIFT; i++) {
+d->L(i) = FPSLL(s->L(i), shift);
+}
 }
 }
 
-void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
 {
+Reg *s = d;
 int shift;
-
-if (s->Q(0) > 31) {
+if (c->Q(0) > 31) {
 shift = 31;
 } else {
-shift = s->B(0);
+shift = c->B(0);
+}
+for (int i = 0; i < 2 << SHIFT; i++) {
+d->L(i) = 

[PATCH v3 21/23] i386: Rewrite blendv helpers

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

Rewrite the blendv helpers so that they can easily be extended to support
the AVX encodings, which make all 4 arguments explicit.

No functional changes to the existing helpers

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-20-p...@nowt.org>
Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/ops_sse.h | 86 ---
 1 file changed, 24 insertions(+), 62 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 7cfbcce49f..a11a0143bf 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -1591,76 +1591,38 @@ void glue(helper_palignr, SUFFIX)(CPUX86State *env, Reg 
*d, Reg *s,
 }
 }
 
-#define XMM0 (env->xmm_regs[0])
+#if SHIFT >= 1
 
-#if SHIFT == 1
 #define SSE_HELPER_V(name, elem, num, F)\
-void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)   \
+void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)   \
 {   \
-d->elem(0) = F(d->elem(0), s->elem(0), XMM0.elem(0));   \
-d->elem(1) = F(d->elem(1), s->elem(1), XMM0.elem(1));   \
-if (num > 2) {  \
-d->elem(2) = F(d->elem(2), s->elem(2), XMM0.elem(2));   \
-d->elem(3) = F(d->elem(3), s->elem(3), XMM0.elem(3));   \
-if (num > 4) {  \
-d->elem(4) = F(d->elem(4), s->elem(4), XMM0.elem(4));   \
-d->elem(5) = F(d->elem(5), s->elem(5), XMM0.elem(5));   \
-d->elem(6) = F(d->elem(6), s->elem(6), XMM0.elem(6));   \
-d->elem(7) = F(d->elem(7), s->elem(7), XMM0.elem(7));   \
-if (num > 8) {  \
-d->elem(8) = F(d->elem(8), s->elem(8), XMM0.elem(8)); \
-d->elem(9) = F(d->elem(9), s->elem(9), XMM0.elem(9)); \
-d->elem(10) = F(d->elem(10), s->elem(10), XMM0.elem(10)); \
-d->elem(11) = F(d->elem(11), s->elem(11), XMM0.elem(11)); \
-d->elem(12) = F(d->elem(12), s->elem(12), XMM0.elem(12)); \
-d->elem(13) = F(d->elem(13), s->elem(13), XMM0.elem(13)); \
-d->elem(14) = F(d->elem(14), s->elem(14), XMM0.elem(14)); \
-d->elem(15) = F(d->elem(15), s->elem(15), XMM0.elem(15)); \
-}   \
-}   \
+Reg *v = d; \
+Reg *m = >xmm_regs[0]; \
+int i;  \
+for (i = 0; i < num; i++) { \
+d->elem(i) = F(v->elem(i), s->elem(i), m->elem(i)); \
 }   \
 }
 
 #define SSE_HELPER_I(name, elem, num, F)\
-void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t imm) \
+void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,   \
+uint32_t imm)   \
 {   \
-d->elem(0) = F(d->elem(0), s->elem(0), ((imm >> 0) & 1));   \
-d->elem(1) = F(d->elem(1), s->elem(1), ((imm >> 1) & 1));   \
-if (num > 2) {  \
-d->elem(2) = F(d->elem(2), s->elem(2), ((imm >> 2) & 1));   \
-d->elem(3) = F(d->elem(3), s->elem(3), ((imm >> 3) & 1));   \
-if (num > 4) {  \
-d->elem(4) = F(d->elem(4), s->elem(4), ((imm >> 4) & 1)); \
-d->elem(5) = F(d->elem(5), s->elem(5), ((imm >> 5) & 1)); \
-d->elem(6) = F(d->elem(6), s->elem(6), ((imm >> 6) & 1)); \
-d->elem(7) = F(d->elem(7), s->elem(7), ((imm >> 7) & 1)); \
-if (num > 8) {  \
-d->elem(8) = F(d->elem(8), s->elem(8), ((imm >> 8) & 1)); \
-d->elem(9) = F(d->elem(9), s->elem(9), ((imm >> 9) & 1)); \
-d->elem(10) = F(d->elem(10), s->elem(10),   \
-((imm >> 10) & 1)); \
-d->elem(11) = F(d->elem(11), s->elem(11),   \
-((imm >> 11) & 1)); \
-d->elem(12) = F(d->elem(12), s->elem(12),   \
-((imm >> 12) & 1)); \
-

Re: [PATCH v5 18/18] s390x: pv: Add dump support

2022-09-01 Thread Janis Schoetterl-Glausch
On Thu, 2022-08-11 at 12:11 +, Janosch Frank wrote:
> Sometimes dumping a guest from the outside is the only way to get the
> data that is needed. This can be the case if a dumping mechanism like
> KDUMP hasn't been configured or data needs to be fetched at a specific
> point. Dumping a protected guest from the outside without help from
> fw/hw doesn't yield sufficient data to be useful. Hence we now
> introduce PV dump support.
> 
> The PV dump support works by integrating the firmware into the dump
> process. New Ultravisor calls are used to initiate the dump process,
> dump cpu data, dump memory state and lastly complete the dump process.
> The UV calls are exposed by KVM via the new KVM_PV_DUMP command and
> its subcommands. The guest's data is fully encrypted and can only be
> decrypted by the entity that owns the customer communication key for
> the dumped guest. Also dumping needs to be allowed via a flag in the
> SE header.
> 
> On the QEMU side of things we store the PV dump data in the newly
> introduced architecture ELF sections (storage state and completion
> data) and the cpu notes (for cpu dump data).
> 
> Users can use the zgetdump tool to convert the encrypted QEMU dump to an
> unencrypted one.

Does PV dump work when memory is being filtered? Are there any
constraints on the filter parameters, alignment or so?
> 
> Signed-off-by: Janosch Frank 
> ---
>  dump/dump.c  |  12 +-
>  include/sysemu/dump.h|   5 +
>  target/s390x/arch_dump.c | 242 ++-
>  3 files changed, 227 insertions(+), 32 deletions(-)

[...]
>  
>  typedef struct NoteFuncDescStruct {
>  int contents_size;
> +uint64_t (*note_size_func)(void); /* NULL for non-dynamic sized contents 
> */
>  void (*note_contents_func)(Note *note, S390CPU *cpu, int id);
> +bool pvonly;
>  } NoteFuncDesc;
>  
>  static const NoteFuncDesc note_core[] = {
> -{sizeof_field(Note, contents.prstatus), s390x_write_elf64_prstatus},
> -{sizeof_field(Note, contents.fpregset), s390x_write_elf64_fpregset},
> -{ 0, NULL}
> +{sizeof_field(Note, contents.prstatus), NULL, 
> s390x_write_elf64_prstatus, false},
> +{sizeof_field(Note, contents.fpregset), NULL, 
> s390x_write_elf64_fpregset, false},
> +{ 0, NULL, NULL}
>  };
>  
>  static const NoteFuncDesc note_linux[] = {
> -{sizeof_field(Note, contents.prefix),   s390x_write_elf64_prefix},
> -{sizeof_field(Note, contents.ctrs), s390x_write_elf64_ctrs},
> -{sizeof_field(Note, contents.timer),s390x_write_elf64_timer},
> -{sizeof_field(Note, contents.todcmp),   s390x_write_elf64_todcmp},
> -{sizeof_field(Note, contents.todpreg),  s390x_write_elf64_todpreg},
> -{sizeof_field(Note, contents.vregslo),  s390x_write_elf64_vregslo},
> -{sizeof_field(Note, contents.vregshi),  s390x_write_elf64_vregshi},
> -{sizeof_field(Note, contents.gscb), s390x_write_elf64_gscb},
> -{ 0, NULL}
> +{sizeof_field(Note, contents.prefix),   NULL, s390x_write_elf64_prefix,  
> false},
> +{sizeof_field(Note, contents.ctrs), NULL, s390x_write_elf64_ctrs,
> false},
> +{sizeof_field(Note, contents.timer),NULL, s390x_write_elf64_timer,   
> false},
> +{sizeof_field(Note, contents.todcmp),   NULL, s390x_write_elf64_todcmp,  
> false},
> +{sizeof_field(Note, contents.todpreg),  NULL, s390x_write_elf64_todpreg, 
> false},
> +{sizeof_field(Note, contents.vregslo),  NULL, s390x_write_elf64_vregslo, 
> false},
> +{sizeof_field(Note, contents.vregshi),  NULL, s390x_write_elf64_vregshi, 
> false},
> +{sizeof_field(Note, contents.gscb), NULL, s390x_write_elf64_gscb,
> false},
> +{0, kvm_s390_pv_dmp_get_size_cpu,   s390x_write_elf64_pv, true},
> +{ 0, NULL, NULL}
>  };
>  
>  static int s390x_write_elf64_notes(const char *note_name,
> @@ -207,22 +226,41 @@ static int s390x_write_elf64_notes(const char 
> *note_name,
> DumpState *s,
> const NoteFuncDesc *funcs)
>  {
> -Note note;
> +Note note, *notep;
>  const NoteFuncDesc *nf;
> -int note_size;
> +int note_size, content_size;

Could make those size_t. I guess it's not necessary, but we're kind of
a dumb pipe for data from the ultravisor so there's something to be
said for not making assumptions.

>  int ret = -1;
>  
>  assert(strlen(note_name) < sizeof(note.name));
>  
>  for (nf = funcs; nf->note_contents_func; nf++) {
> -memset(, 0, sizeof(note));
> -note.hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1);
> -note.hdr.n_descsz = cpu_to_be32(nf->contents_size);
> -g_strlcpy(note.name, note_name, sizeof(note.name));
> -(*nf->note_contents_func)(, cpu, id);
> +notep = 
> +if (nf->pvonly && !s390_is_pv()) {
> +continue;
> +}
>  
> -note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size;
> -

[PATCH v2] target/sh4: Fix TB_FLAG_UNALIGN

2022-09-01 Thread Richard Henderson
The value previously chosen overlaps GUSA_MASK.

Rename all DELAY_SLOT_* and GUSA_* defines to emphasize
that they are included in TB_FLAGs.  Add aliases for the
FPSCR and SR bits that are included in TB_FLAGS, so that
we don't accidentally reassign those bits.

Fixes: 4da06fb3062 ("target/sh4: Implement prctl_unalign_sigbus")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/856
Signed-off-by: Richard Henderson 
---
 target/sh4/cpu.h| 56 +
 linux-user/sh4/signal.c |  6 +--
 target/sh4/cpu.c|  6 +--
 target/sh4/helper.c |  6 +--
 target/sh4/translate.c  | 90 ++---
 5 files changed, 88 insertions(+), 76 deletions(-)

diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index 9f15ef913c..727b829598 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -78,26 +78,33 @@
 #define FPSCR_RM_NEAREST   (0 << 0)
 #define FPSCR_RM_ZERO  (1 << 0)
 
-#define DELAY_SLOT_MASK0x7
-#define DELAY_SLOT (1 << 0)
-#define DELAY_SLOT_CONDITIONAL (1 << 1)
-#define DELAY_SLOT_RTE (1 << 2)
+#define TB_FLAG_DELAY_SLOT   (1 << 0)
+#define TB_FLAG_DELAY_SLOT_COND  (1 << 1)
+#define TB_FLAG_DELAY_SLOT_RTE   (1 << 2)
+#define TB_FLAG_PENDING_MOVCA(1 << 3)
+#define TB_FLAG_GUSA_SHIFT   4  /* [11:4] */
+#define TB_FLAG_GUSA_EXCLUSIVE   (1 << 12)
+#define TB_FLAG_UNALIGN  (1 << 13)
+#define TB_FLAG_SR_FD(1 << SR_FD)   /* 15 */
+#define TB_FLAG_FPSCR_PR FPSCR_PR   /* 19 */
+#define TB_FLAG_FPSCR_SZ FPSCR_SZ   /* 20 */
+#define TB_FLAG_FPSCR_FR FPSCR_FR   /* 21 */
+#define TB_FLAG_SR_RB(1 << SR_RB)   /* 29 */
+#define TB_FLAG_SR_MD(1 << SR_MD)   /* 30 */
 
-#define TB_FLAG_PENDING_MOVCA  (1 << 3)
-#define TB_FLAG_UNALIGN(1 << 4)
-
-#define GUSA_SHIFT 4
-#ifdef CONFIG_USER_ONLY
-#define GUSA_EXCLUSIVE (1 << 12)
-#define GUSA_MASK  ((0xff << GUSA_SHIFT) | GUSA_EXCLUSIVE)
-#else
-/* Provide dummy versions of the above to allow tests against tbflags
-   to be elided while avoiding ifdefs.  */
-#define GUSA_EXCLUSIVE 0
-#define GUSA_MASK  0
-#endif
-
-#define TB_FLAG_ENVFLAGS_MASK  (DELAY_SLOT_MASK | GUSA_MASK)
+#define TB_FLAG_DELAY_SLOT_MASK  (TB_FLAG_DELAY_SLOT |   \
+  TB_FLAG_DELAY_SLOT_COND |  \
+  TB_FLAG_DELAY_SLOT_RTE)
+#define TB_FLAG_GUSA_MASK((0xff << TB_FLAG_GUSA_SHIFT) | \
+  TB_FLAG_GUSA_EXCLUSIVE)
+#define TB_FLAG_FPSCR_MASK   (TB_FLAG_FPSCR_PR | \
+  TB_FLAG_FPSCR_SZ | \
+  TB_FLAG_FPSCR_FR)
+#define TB_FLAG_SR_MASK  (TB_FLAG_SR_FD | \
+  TB_FLAG_SR_RB | \
+  TB_FLAG_SR_MD)
+#define TB_FLAG_ENVFLAGS_MASK(TB_FLAG_DELAY_SLOT_MASK | \
+  TB_FLAG_GUSA_MASK)
 
 typedef struct tlb_t {
 uint32_t vpn;  /* virtual page number */
@@ -258,7 +265,7 @@ static inline int cpu_mmu_index (CPUSH4State *env, bool 
ifetch)
 {
 /* The instruction in a RTE delay slot is fetched in privileged
mode, but executed in user mode.  */
-if (ifetch && (env->flags & DELAY_SLOT_RTE)) {
+if (ifetch && (env->flags & TB_FLAG_DELAY_SLOT_RTE)) {
 return 0;
 } else {
 return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
@@ -366,11 +373,10 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, 
target_ulong *pc,
 {
 *pc = env->pc;
 /* For a gUSA region, notice the end of the region.  */
-*cs_base = env->flags & GUSA_MASK ? env->gregs[0] : 0;
-*flags = env->flags /* TB_FLAG_ENVFLAGS_MASK: bits 0-2, 4-12 */
-| (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
-| (env->sr & ((1u << SR_MD) | (1u << SR_RB)))  /* Bits 29-30 */
-| (env->sr & (1u << SR_FD))/* Bit 15 */
+*cs_base = env->flags & TB_FLAG_GUSA_MASK ? env->gregs[0] : 0;
+*flags = env->flags
+| (env->fpscr & TB_FLAG_FPSCR_MASK)
+| (env->sr & TB_FLAG_SR_MASK)
 | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 3 */
 #ifdef CONFIG_USER_ONLY
 *flags |= TB_FLAG_UNALIGN * !env_cpu(env)->prctl_unalign_sigbus;
diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
index f6a18bc6b5..c4ba962708 100644
--- a/linux-user/sh4/signal.c
+++ b/linux-user/sh4/signal.c
@@ -161,7 +161,7 @@ static void restore_sigcontext(CPUSH4State *regs, struct 
target_sigcontext *sc)
 __get_user(regs->fpul, >sc_fpul);
 
 regs->tra = -1; /* disable syscall checks */
-regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
+regs->flags = 0;
 }
 
 void setup_frame(int sig, struct target_sigaction *ka,
@@ 

Re: [PATCH v4 0/4] Enable unix socket support on Windows

2022-09-01 Thread Bin Meng
Hi,

On Tue, Aug 2, 2022 at 3:52 PM Bin Meng  wrote:
>
> Support for the unix socket has existed both in BSD and Linux for the
> longest time, but not on Windows. Since Windows 10 build 17063 [1],
> the native support for the unix socket has come to Windows. Starting
> this build, two Win32 processes can use the AF_UNIX address family
> over Winsock API to communicate with each other.
>
> [1] https://devblogs.microsoft.com/commandline/af_unix-comes-to-windows/
>
> Changes in v4:
> - instead of introducing CONFIG_AF_UNIX, add fallback afunix.h header
>   in os-win32.h, and compile the AF_UNIX stuff for all Windows hosts
> - drop CONFIG_AF_UNIX
> - introduce a new helper socket_check_afunix_support() to runtime-check
>   the availability of AF_UNIX socket, and skip those appropriately
>

All patches in this series have been reviewed. Would you please queue
this? Thanks!

Regards,
Bin



[PULL 04/20] linux-user: Honor PT_GNU_STACK

2022-09-01 Thread Richard Henderson
Map the stack executable if required by default or on demand.

Acked-by: Ilya Leoshkevich 
Tested-by: Ilya Leoshkevich 
Signed-off-by: Richard Henderson 
---
 include/elf.h|  1 +
 linux-user/qemu.h|  1 +
 linux-user/elfload.c | 19 ++-
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/include/elf.h b/include/elf.h
index 3a4bcb646a..3d6b9062c0 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -31,6 +31,7 @@ typedef int64_t  Elf64_Sxword;
 #define PT_LOPROC  0x7000
 #define PT_HIPROC  0x7fff
 
+#define PT_GNU_STACK  (PT_LOOS + 0x474e551)
 #define PT_GNU_PROPERTY   (PT_LOOS + 0x474e553)
 
 #define PT_MIPS_REGINFO   0x7000
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 7d90de1b15..e2e93fbd1d 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -48,6 +48,7 @@ struct image_info {
 uint32_telf_flags;
 int personality;
 abi_ulong   alignment;
+boolexec_stack;
 
 /* Generic semihosting knows about these pointers. */
 abi_ulong   arg_strings;   /* strings for argv */
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index b20d513929..90375c6b74 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -232,6 +232,7 @@ static bool init_guest_commpage(void)
 #define ELF_ARCHEM_386
 
 #define ELF_PLATFORM get_elf_platform()
+#define EXSTACK_DEFAULT true
 
 static const char *get_elf_platform(void)
 {
@@ -308,6 +309,7 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, 
const CPUX86State *en
 
 #define ELF_ARCHEM_ARM
 #define ELF_CLASS   ELFCLASS32
+#define EXSTACK_DEFAULT true
 
 static inline void init_thread(struct target_pt_regs *regs,
struct image_info *infop)
@@ -776,6 +778,7 @@ static inline void init_thread(struct target_pt_regs *regs,
 #else
 
 #define ELF_CLASS   ELFCLASS32
+#define EXSTACK_DEFAULT true
 
 #endif
 
@@ -973,6 +976,7 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, 
const CPUPPCState *en
 
 #define ELF_CLASS   ELFCLASS64
 #define ELF_ARCHEM_LOONGARCH
+#define EXSTACK_DEFAULT true
 
 #define elf_check_arch(x) ((x) == EM_LOONGARCH)
 
@@ -1068,6 +1072,7 @@ static uint32_t get_elf_hwcap(void)
 #define ELF_CLASS   ELFCLASS32
 #endif
 #define ELF_ARCHEM_MIPS
+#define EXSTACK_DEFAULT true
 
 #ifdef TARGET_ABI_MIPSN32
 #define elf_check_abi(x) ((x) & EF_MIPS_ABI2)
@@ -1806,6 +1811,10 @@ static inline void init_thread(struct target_pt_regs 
*regs,
 #define bswaptls(ptr) bswap32s(ptr)
 #endif
 
+#ifndef EXSTACK_DEFAULT
+#define EXSTACK_DEFAULT false
+#endif
+
 #include "elf.h"
 
 /* We must delay the following stanzas until after "elf.h". */
@@ -2081,6 +2090,7 @@ static abi_ulong setup_arg_pages(struct linux_binprm 
*bprm,
  struct image_info *info)
 {
 abi_ulong size, error, guard;
+int prot;
 
 size = guest_stack_size;
 if (size < STACK_LOWER_LIMIT) {
@@ -2091,7 +2101,11 @@ static abi_ulong setup_arg_pages(struct linux_binprm 
*bprm,
 guard = qemu_real_host_page_size();
 }
 
-error = target_mmap(0, size + guard, PROT_READ | PROT_WRITE,
+prot = PROT_READ | PROT_WRITE;
+if (info->exec_stack) {
+prot |= PROT_EXEC;
+}
+error = target_mmap(0, size + guard, prot,
 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 if (error == -1) {
 perror("mmap stack");
@@ -2921,6 +2935,7 @@ static void load_elf_image(const char *image_name, int 
image_fd,
  */
 loaddr = -1, hiaddr = 0;
 info->alignment = 0;
+info->exec_stack = EXSTACK_DEFAULT;
 for (i = 0; i < ehdr->e_phnum; ++i) {
 struct elf_phdr *eppnt = phdr + i;
 if (eppnt->p_type == PT_LOAD) {
@@ -2963,6 +2978,8 @@ static void load_elf_image(const char *image_name, int 
image_fd,
 if (!parse_elf_properties(image_fd, info, eppnt, bprm_buf, )) {
 goto exit_errmsg;
 }
+} else if (eppnt->p_type == PT_GNU_STACK) {
+info->exec_stack = eppnt->p_flags & PF_X;
 }
 }
 
-- 
2.34.1




[PULL 16/20] accel/tcg: Add fast path for translator_ld*

2022-09-01 Thread Richard Henderson
Cache the translation from guest to host address, so we may
use direct loads when we hit on the primary translation page.

Look up the second translation page only once, during translation.
This obviates another lookup of the second page within tb_gen_code
after translation.

Fixes a bug in that plugin_insn_append should be passed the bytes
in the original memory order, not bswapped by pieces.

Acked-by: Ilya Leoshkevich 
Tested-by: Ilya Leoshkevich 
Signed-off-by: Richard Henderson 
---
 include/exec/translator.h |  63 +++
 accel/tcg/translate-all.c |  23 +++
 accel/tcg/translator.c| 126 +-
 3 files changed, 141 insertions(+), 71 deletions(-)

diff --git a/include/exec/translator.h b/include/exec/translator.h
index 69db0f5c21..329a42fe46 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -81,24 +81,14 @@ typedef enum DisasJumpType {
  * Architecture-agnostic disassembly context.
  */
 typedef struct DisasContextBase {
-const TranslationBlock *tb;
+TranslationBlock *tb;
 target_ulong pc_first;
 target_ulong pc_next;
 DisasJumpType is_jmp;
 int num_insns;
 int max_insns;
 bool singlestep_enabled;
-#ifdef CONFIG_USER_ONLY
-/*
- * Guest address of the last byte of the last protected page.
- *
- * Pages containing the translated instructions are made non-writable in
- * order to achieve consistency in case another thread is modifying the
- * code while translate_insn() fetches the instruction bytes piecemeal.
- * Such writer threads are blocked on mmap_lock() in page_unprotect().
- */
-target_ulong page_protect_end;
-#endif
+void *host_addr[2];
 } DisasContextBase;
 
 /**
@@ -183,24 +173,43 @@ bool translator_use_goto_tb(DisasContextBase *db, 
target_ulong dest);
  * the relevant information at translation time.
  */
 
-#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn) \
-type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \
-   abi_ptr pc, bool do_swap);   \
-static inline type fullname(CPUArchState *env,  \
-DisasContextBase *dcbase, abi_ptr pc)   \
-{   \
-return fullname ## _swap(env, dcbase, pc, false);   \
+uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
+uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
+uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
+uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
+
+static inline uint16_t
+translator_lduw_swap(CPUArchState *env, DisasContextBase *db,
+ abi_ptr pc, bool do_swap)
+{
+uint16_t ret = translator_lduw(env, db, pc);
+if (do_swap) {
+ret = bswap16(ret);
 }
+return ret;
+}
 
-#define FOR_EACH_TRANSLATOR_LD(F)   \
-F(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */)   \
-F(translator_lduw, uint16_t, cpu_lduw_code, bswap16)\
-F(translator_ldl, uint32_t, cpu_ldl_code, bswap32)  \
-F(translator_ldq, uint64_t, cpu_ldq_code, bswap64)
+static inline uint32_t
+translator_ldl_swap(CPUArchState *env, DisasContextBase *db,
+abi_ptr pc, bool do_swap)
+{
+uint32_t ret = translator_ldl(env, db, pc);
+if (do_swap) {
+ret = bswap32(ret);
+}
+return ret;
+}
 
-FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
-
-#undef GEN_TRANSLATOR_LD
+static inline uint64_t
+translator_ldq_swap(CPUArchState *env, DisasContextBase *db,
+abi_ptr pc, bool do_swap)
+{
+uint64_t ret = translator_ldq_swap(env, db, pc, false);
+if (do_swap) {
+ret = bswap64(ret);
+}
+return ret;
+}
 
 /*
  * Return whether addr is on the same page as where disassembly started.
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 587886aa4e..f5e8592d4a 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1385,8 +1385,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 {
 CPUArchState *env = cpu->env_ptr;
 TranslationBlock *tb, *existing_tb;
-tb_page_addr_t phys_pc, phys_page2;
-target_ulong virt_page2;
+tb_page_addr_t phys_pc;
 tcg_insn_unit *gen_code_buf;
 int gen_code_size, search_size, max_insns;
 #ifdef CONFIG_PROFILER
@@ -1429,6 +1428,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 tb->flags = flags;
 tb->cflags = cflags;
 tb->trace_vcpu_dstate = *cpu->trace_dstate;
+tb->page_addr[0] = phys_pc;
+tb->page_addr[1] = -1;
 tcg_ctx->tb_cflags = cflags;
  tb_overflow:
 
@@ -1622,13 +1623,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 }
 
 /*
- * If the TB is not associated with a physical RAM 

[PULL 15/20] accel/tcg: Add pc and host_pc params to gen_intermediate_code

2022-09-01 Thread Richard Henderson
Pass these along to translator_loop -- pc may be used instead
of tb->pc, and host_pc is currently unused.  Adjust all targets
at one time.

Acked-by: Alistair Francis 
Acked-by: Ilya Leoshkevich 
Tested-by: Ilya Leoshkevich 
Signed-off-by: Richard Henderson 
---
 include/exec/exec-all.h   |  1 -
 include/exec/translator.h | 24 
 accel/tcg/translate-all.c |  6 --
 accel/tcg/translator.c|  9 +
 target/alpha/translate.c  |  5 +++--
 target/arm/translate.c|  5 +++--
 target/avr/translate.c|  5 +++--
 target/cris/translate.c   |  5 +++--
 target/hexagon/translate.c|  6 --
 target/hppa/translate.c   |  5 +++--
 target/i386/tcg/translate.c   |  5 +++--
 target/loongarch/translate.c  |  6 --
 target/m68k/translate.c   |  5 +++--
 target/microblaze/translate.c |  5 +++--
 target/mips/tcg/translate.c   |  5 +++--
 target/nios2/translate.c  |  5 +++--
 target/openrisc/translate.c   |  6 --
 target/ppc/translate.c|  5 +++--
 target/riscv/translate.c  |  5 +++--
 target/rx/translate.c |  5 +++--
 target/s390x/tcg/translate.c  |  5 +++--
 target/sh4/translate.c|  5 +++--
 target/sparc/translate.c  |  5 +++--
 target/tricore/translate.c|  6 --
 target/xtensa/translate.c |  6 --
 25 files changed, 97 insertions(+), 53 deletions(-)

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 9f35e3b7a9..bcad607c4e 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -39,7 +39,6 @@ typedef ram_addr_t tb_page_addr_t;
 #define TB_PAGE_ADDR_FMT RAM_ADDR_FMT
 #endif
 
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns);
 void restore_state_to_opc(CPUArchState *env, TranslationBlock *tb,
   target_ulong *data);
 
diff --git a/include/exec/translator.h b/include/exec/translator.h
index 45b9268ca4..69db0f5c21 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -26,6 +26,19 @@
 #include "exec/translate-all.h"
 #include "tcg/tcg.h"
 
+/**
+ * gen_intermediate_code
+ * @cpu: cpu context
+ * @tb: translation block
+ * @max_insns: max number of instructions to translate
+ * @pc: guest virtual program counter address
+ * @host_pc: host physical program counter address
+ *
+ * This function must be provided by the target, which should create
+ * the target-specific DisasContext, and then invoke translator_loop.
+ */
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
+   target_ulong pc, void *host_pc);
 
 /**
  * DisasJumpType:
@@ -123,11 +136,13 @@ typedef struct TranslatorOps {
 
 /**
  * translator_loop:
- * @ops: Target-specific operations.
- * @db: Disassembly context.
  * @cpu: Target vCPU.
  * @tb: Translation block.
  * @max_insns: Maximum number of insns to translate.
+ * @pc: guest virtual program counter address
+ * @host_pc: host physical program counter address
+ * @ops: Target-specific operations.
+ * @db: Disassembly context.
  *
  * Generic translator loop.
  *
@@ -141,8 +156,9 @@ typedef struct TranslatorOps {
  * - When single-stepping is enabled (system-wide or on the current vCPU).
  * - When too many instructions have been translated.
  */
-void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
- CPUState *cpu, TranslationBlock *tb, int max_insns);
+void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
+ target_ulong pc, void *host_pc,
+ const TranslatorOps *ops, DisasContextBase *db);
 
 void translator_loop_temp_check(DisasContextBase *db);
 
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index b83161a081..587886aa4e 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -46,6 +46,7 @@
 
 #include "exec/cputlb.h"
 #include "exec/translate-all.h"
+#include "exec/translator.h"
 #include "qemu/bitmap.h"
 #include "qemu/qemu-print.h"
 #include "qemu/timer.h"
@@ -1392,11 +1393,12 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 TCGProfile *prof = _ctx->prof;
 int64_t ti;
 #endif
+void *host_pc;
 
 assert_memory_lock();
 qemu_thread_jit_write();
 
-phys_pc = get_page_addr_code(env, pc);
+phys_pc = get_page_addr_code_hostp(env, pc, _pc);
 
 if (phys_pc == -1) {
 /* Generate a one-shot TB with 1 insn in it */
@@ -1444,7 +1446,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 tcg_func_start(tcg_ctx);
 
 tcg_ctx->cpu = env_cpu(env);
-gen_intermediate_code(cpu, tb, max_insns);
+gen_intermediate_code(cpu, tb, max_insns, pc, host_pc);
 assert(tb->size != 0);
 tcg_ctx->cpu = NULL;
 max_insns = tb->icount;
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index fe7af9b943..3eef30d93a 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -51,16 +51,17 @@ static inline void 

[PATCH v3 06/23] i386: Move 3DNOW decoder

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

Handle 3DNOW instructions early to avoid complicating the MMX/SSE logic.

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-25-p...@nowt.org>
Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/tcg/translate.c | 30 +-
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index b7321b7588..c76f6dba11 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3216,6 +3216,11 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 is_xmm = 1;
 }
 }
+if (sse_op_flags & SSE_OPF_3DNOW) {
+if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
+goto illegal_op;
+}
+}
 /* simple MMX/SSE operation */
 if (s->flags & HF_TS_MASK) {
 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
@@ -4567,21 +4572,20 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 rm = (modrm & 7);
 op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
 }
+if (sse_op_flags & SSE_OPF_3DNOW) {
+/* 3DNow! data insns */
+val = x86_ldub_code(env, s);
+SSEFunc_0_epp op_3dnow = sse_op_table5[val];
+if (!op_3dnow) {
+goto unknown_op;
+}
+tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
+tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
+op_3dnow(cpu_env, s->ptr0, s->ptr1);
+return;
+}
 }
 switch(b) {
-case 0x0f: /* 3DNow! data insns */
-val = x86_ldub_code(env, s);
-sse_fn_epp = sse_op_table5[val];
-if (!sse_fn_epp) {
-goto unknown_op;
-}
-if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) {
-goto illegal_op;
-}
-tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
-tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset);
-sse_fn_epp(cpu_env, s->ptr0, s->ptr1);
-break;
 case 0x70: /* pshufx insn */
 case 0xc6: /* pshufx insn */
 val = x86_ldub_code(env, s);
-- 
2.37.1





[PATCH v3 20/23] i386: Misc AVX helper prep

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

Fixup various vector helpers that either trivially exten to 256 bit,
or don't have 256 bit variants.

No functional changes to existing helpers

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-19-p...@nowt.org>
Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/ops_sse.h | 143 +++---
 1 file changed, 94 insertions(+), 49 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 61722fe4a2..7cfbcce49f 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -422,6 +422,7 @@ void glue(helper_psadbw, SUFFIX)(CPUX86State *env, Reg *d, 
Reg *s)
 }
 }
 
+#if SHIFT < 2
 void glue(helper_maskmov, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
   target_ulong a0)
 {
@@ -433,6 +434,7 @@ void glue(helper_maskmov, SUFFIX)(CPUX86State *env, Reg *d, 
Reg *s,
 }
 }
 }
+#endif
 
 void glue(helper_movl_mm_T0, SUFFIX)(Reg *d, uint32_t val)
 {
@@ -635,21 +637,24 @@ void helper_sqrtsd(CPUX86State *env, Reg *d, Reg *s)
 /* float to float conversions */
 void glue(helper_cvtps2pd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-float32 s0, s1;
-
-s0 = s->ZMM_S(0);
-s1 = s->ZMM_S(1);
-d->ZMM_D(0) = float32_to_float64(s0, >sse_status);
-d->ZMM_D(1) = float32_to_float64(s1, >sse_status);
+int i;
+for (i = 1 << SHIFT; --i >= 0; ) {
+d->ZMM_D(i) = float32_to_float64(s->ZMM_S(i), >sse_status);
+}
 }
 
 void glue(helper_cvtpd2ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-d->ZMM_S(0) = float64_to_float32(s->ZMM_D(0), >sse_status);
-d->ZMM_S(1) = float64_to_float32(s->ZMM_D(1), >sse_status);
-d->Q(1) = 0;
+int i;
+for (i = 0; i < 1 << SHIFT; i++) {
+ d->ZMM_S(i) = float64_to_float32(s->ZMM_D(i), >sse_status);
+}
+for (i >>= 1; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+}
 }
 
+#if SHIFT == 1
 void helper_cvtss2sd(CPUX86State *env, Reg *d, Reg *s)
 {
 d->ZMM_D(0) = float32_to_float64(s->ZMM_S(0), >sse_status);
@@ -659,26 +664,27 @@ void helper_cvtsd2ss(CPUX86State *env, Reg *d, Reg *s)
 {
 d->ZMM_S(0) = float64_to_float32(s->ZMM_D(0), >sse_status);
 }
+#endif
 
 /* integer to float */
 void glue(helper_cvtdq2ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-d->ZMM_S(0) = int32_to_float32(s->ZMM_L(0), >sse_status);
-d->ZMM_S(1) = int32_to_float32(s->ZMM_L(1), >sse_status);
-d->ZMM_S(2) = int32_to_float32(s->ZMM_L(2), >sse_status);
-d->ZMM_S(3) = int32_to_float32(s->ZMM_L(3), >sse_status);
+int i;
+for (i = 0; i < 2 << SHIFT; i++) {
+d->ZMM_S(i) = int32_to_float32(s->ZMM_L(i), >sse_status);
+}
 }
 
 void glue(helper_cvtdq2pd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
-int32_t l0, l1;
-
-l0 = (int32_t)s->ZMM_L(0);
-l1 = (int32_t)s->ZMM_L(1);
-d->ZMM_D(0) = int32_to_float64(l0, >sse_status);
-d->ZMM_D(1) = int32_to_float64(l1, >sse_status);
+int i;
+for (i = 1 << SHIFT; --i >= 0; ) {
+int32_t l = s->ZMM_L(i);
+d->ZMM_D(i) = int32_to_float64(l, >sse_status);
+}
 }
 
+#if SHIFT == 1
 void helper_cvtpi2ps(CPUX86State *env, ZMMReg *d, MMXReg *s)
 {
 d->ZMM_S(0) = int32_to_float32(s->MMX_L(0), >sse_status);
@@ -713,8 +719,11 @@ void helper_cvtsq2sd(CPUX86State *env, ZMMReg *d, uint64_t 
val)
 }
 #endif
 
+#endif
+
 /* float to integer */
 
+#if SHIFT == 1
 /*
  * x86 mandates that we return the indefinite integer value for the result
  * of any float-to-integer conversion that raises the 'invalid' exception.
@@ -745,22 +754,28 @@ WRAP_FLOATCONV(int64_t, float32_to_int64, float32, 
INT64_MIN)
 WRAP_FLOATCONV(int64_t, float32_to_int64_round_to_zero, float32, INT64_MIN)
 WRAP_FLOATCONV(int64_t, float64_to_int64, float64, INT64_MIN)
 WRAP_FLOATCONV(int64_t, float64_to_int64_round_to_zero, float64, INT64_MIN)
+#endif
 
 void glue(helper_cvtps2dq, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
 {
-d->ZMM_L(0) = x86_float32_to_int32(s->ZMM_S(0), >sse_status);
-d->ZMM_L(1) = x86_float32_to_int32(s->ZMM_S(1), >sse_status);
-d->ZMM_L(2) = x86_float32_to_int32(s->ZMM_S(2), >sse_status);
-d->ZMM_L(3) = x86_float32_to_int32(s->ZMM_S(3), >sse_status);
+int i;
+for (i = 0; i < 2 << SHIFT; i++) {
+d->ZMM_L(i) = x86_float32_to_int32(s->ZMM_S(i), >sse_status);
+}
 }
 
 void glue(helper_cvtpd2dq, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
 {
-d->ZMM_L(0) = x86_float64_to_int32(s->ZMM_D(0), >sse_status);
-d->ZMM_L(1) = x86_float64_to_int32(s->ZMM_D(1), >sse_status);
-d->ZMM_Q(1) = 0;
+int i;
+for (i = 0; i < 1 << SHIFT; i++) {
+d->ZMM_L(i) = x86_float64_to_int32(s->ZMM_D(i), >sse_status);
+}
+for (i >>= 1; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+}
 }
 
+#if SHIFT == 1
 void helper_cvtps2pi(CPUX86State *env, MMXReg *d, ZMMReg *s)
 {
 d->MMX_L(0) = x86_float32_to_int32(s->ZMM_S(0), >sse_status);
@@ -794,23 +809,31 @@ int64_t 

[PATCH v3 23/23] i386: AVX+AES helpers prep

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

Make the AES vector helpers AVX ready

No functional changes to existing helpers

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-22-p...@nowt.org>
Signed-off-by: Paolo Bonzini 
---
 target/i386/ops_sse.h | 41 ++---
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 4135623ad8..f208253161 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -2256,11 +2256,12 @@ void glue(helper_aesdec, SUFFIX)(CPUX86State *env, Reg 
*d, Reg *s)
 Reg st = *d;
 Reg rk = *s;
 
-for (i = 0 ; i < 4 ; i++) {
-d->L(i) = rk.L(i) ^ bswap32(AES_Td0[st.B(AES_ishifts[4*i+0])] ^
-AES_Td1[st.B(AES_ishifts[4*i+1])] ^
-AES_Td2[st.B(AES_ishifts[4*i+2])] ^
-AES_Td3[st.B(AES_ishifts[4*i+3])]);
+for (i = 0 ; i < 2 << SHIFT ; i++) {
+int j = i & 3;
+d->L(i) = rk.L(i) ^ bswap32(AES_Td0[st.B(AES_ishifts[4 * j + 0])] ^
+AES_Td1[st.B(AES_ishifts[4 * j + 1])] ^
+AES_Td2[st.B(AES_ishifts[4 * j + 2])] ^
+AES_Td3[st.B(AES_ishifts[4 * j + 3])]);
 }
 }
 
@@ -2270,8 +2271,8 @@ void glue(helper_aesdeclast, SUFFIX)(CPUX86State *env, 
Reg *d, Reg *s)
 Reg st = *d;
 Reg rk = *s;
 
-for (i = 0; i < 16; i++) {
-d->B(i) = rk.B(i) ^ (AES_isbox[st.B(AES_ishifts[i])]);
+for (i = 0; i < 8 << SHIFT; i++) {
+d->B(i) = rk.B(i) ^ (AES_isbox[st.B(AES_ishifts[i & 15] + (i & ~15))]);
 }
 }
 
@@ -2281,11 +2282,12 @@ void glue(helper_aesenc, SUFFIX)(CPUX86State *env, Reg 
*d, Reg *s)
 Reg st = *d;
 Reg rk = *s;
 
-for (i = 0 ; i < 4 ; i++) {
-d->L(i) = rk.L(i) ^ bswap32(AES_Te0[st.B(AES_shifts[4*i+0])] ^
-AES_Te1[st.B(AES_shifts[4*i+1])] ^
-AES_Te2[st.B(AES_shifts[4*i+2])] ^
-AES_Te3[st.B(AES_shifts[4*i+3])]);
+for (i = 0 ; i < 2 << SHIFT ; i++) {
+int j = i & 3;
+d->L(i) = rk.L(i) ^ bswap32(AES_Te0[st.B(AES_shifts[4 * j + 0])] ^
+AES_Te1[st.B(AES_shifts[4 * j + 1])] ^
+AES_Te2[st.B(AES_shifts[4 * j + 2])] ^
+AES_Te3[st.B(AES_shifts[4 * j + 3])]);
 }
 }
 
@@ -2295,22 +2297,22 @@ void glue(helper_aesenclast, SUFFIX)(CPUX86State *env, 
Reg *d, Reg *s)
 Reg st = *d;
 Reg rk = *s;
 
-for (i = 0; i < 16; i++) {
-d->B(i) = rk.B(i) ^ (AES_sbox[st.B(AES_shifts[i])]);
+for (i = 0; i < 8 << SHIFT; i++) {
+d->B(i) = rk.B(i) ^ (AES_sbox[st.B(AES_shifts[i & 15] + (i & ~15))]);
 }
-
 }
 
+#if SHIFT == 1
 void glue(helper_aesimc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
 int i;
 Reg tmp = *s;
 
 for (i = 0 ; i < 4 ; i++) {
-d->L(i) = bswap32(AES_imc[tmp.B(4*i+0)][0] ^
-  AES_imc[tmp.B(4*i+1)][1] ^
-  AES_imc[tmp.B(4*i+2)][2] ^
-  AES_imc[tmp.B(4*i+3)][3]);
+d->L(i) = bswap32(AES_imc[tmp.B(4 * i + 0)][0] ^
+  AES_imc[tmp.B(4 * i + 1)][1] ^
+  AES_imc[tmp.B(4 * i + 2)][2] ^
+  AES_imc[tmp.B(4 * i + 3)][3]);
 }
 }
 
@@ -2328,6 +2330,7 @@ void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State 
*env, Reg *d, Reg *s,
 d->L(3) = (d->L(2) << 24 | d->L(2) >> 8) ^ ctrl;
 }
 #endif
+#endif
 
 #undef SSE_HELPER_S
 
-- 
2.37.1




Re: [PATCH v3 12/23] i386: Rewrite vector shift helper

2022-09-01 Thread Richard Henderson

On 9/1/22 08:48, Paolo Bonzini wrote:

From: Paul Brook 

Rewrite the vector shift helpers in preperation for AVX support (3 operand
form and 256 bit vectors).

For now keep the existing two operand interface.

No functional changes to existing helpers.

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-11-p...@nowt.org>
Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
  target/i386/ops_sse.h | 247 +++---
  1 file changed, 112 insertions(+), 135 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 2c0090a647..a4a09226e3 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -40,6 +40,8 @@
  #define SUFFIX _xmm
  #endif
  
+#define LANE_WIDTH (SHIFT ? 16 : 8)


Much better, thanks.


r~



Re: [Qemu-devel] [RFC PATCH] Add qemu .clang-format

2022-09-01 Thread Daniel P . Berrangé
On Thu, Sep 01, 2022 at 09:08:33AM +0800, Wang, Lei wrote:
> On 8/31/2022 6:39 PM, Daniel P. Berrangé wrote:
> > On Wed, Aug 31, 2022 at 05:18:34PM +0800, Wang, Lei wrote:
> > > 
> > > 
> > > On 8/31/2022 4:49 PM, Daniel P. Berrangé wrote:
> > > > On Wed, Aug 31, 2022 at 02:23:51PM +0800, Wang, Lei wrote:
> > > > > 
> > > > > On 10/2/2015 1:30 AM, marcandre.lur...@redhat.com wrote:
> > > > > > From: Marc-André Lureau 
> > > > > > 
> > > > > > clang-format is awesome to reflow your code according to qemu coding
> > > > > > style in an editor (in the region you modify).
> > > > > > 
> > > > > > (note: clang-tidy should be able to add missing braces around
> > > > > > statements, but I haven't tried it, it's quite recent)
> > > > > > 
> > > > > > Signed-off-by: Marc-André Lureau 
> > > > > > ---
> > > > > > .clang-format | 6 ++
> > > > > > 1 file changed, 6 insertions(+)
> > > > > > create mode 100644 .clang-format
> > > > > > 
> > > > > > diff --git a/.clang-format b/.clang-format
> > > > > > new file mode 100644
> > > > > > index 000..6422547
> > > > > > --- /dev/null
> > > > > > +++ b/.clang-format
> > > > > > @@ -0,0 +1,6 @@
> > > > > > +BasedOnStyle: LLVM
> > > > > > +IndentWidth: 4
> > > > > > +UseTab: Never
> > > > > > +BreakBeforeBraces: Linux
> > > > > > +AllowShortIfStatementsOnASingleLine: false
> > > > > > +IndentCaseLabels: false
> > > > > 
> > > > > Hi, any progress on this? I also found a gist on GitHub which can be a
> > > > > reference: 
> > > > > https://gist.github.com/elmarco/aa5e0b23567f46fb7f0e73cde586a0c1
> > > > 
> > > > clang-format is a great tool and I'd highly recommend its use on
> > > > any newly started projects, and even retrospectively on existing
> > > > projects which are small scale. Adding it to large existing projects
> > > > is problematic though.
> > > > 
> > > > None of the QEMU code complies with it today and indeed there is
> > > > quite a bit of style variance across different parts of QEMU. If
> > > > we add this config file, and someone makes a 1 line change in a
> > > > file, clang-format will reformat the entire file contents.
> > > > 
> > > > The only practical way to introduce use of clang-format would be
> > > > to do a bulk reformat of the entire codebase. That is something
> > > > that is quite disruptive to both people with patches they're
> > > > working on but not submitted yet, as well as people wanting to
> > > > cherry-pick new commits back to old code branches.
> > > > 
> > > > With regards,
> > > > Daniel
> > > 
> > > I think the benefits of introducing clang-format mainly for its ability to
> > > format a code range, which means for any future contributions, we could
> > > encourage a range format before the patch is generated. This can 
> > > extensively
> > > simplify my workflow, especially because I use the Neovim + LSP 
> > > combination,
> > > which supports a built-in function "lua vim.lsp.buf.range_formatting()".
> > 
> > IMHO partial format conversions are even worse than full conversions,
> > because they would make code inconsistent within the scope of a file.
> 
> So you mean when we're adding new code in an old file, the coding style
> should also be the old one? That sounds a bit unreasonable. I thought we are
> shifting the coding style in an on-demand way, so we can finally achieve to
> the new style mildly, if each time we're using the old coding style, that
> could be impossible.

>From my POV as a maintainer, the best situation would be consistency across
the entire codebase. Since we likely won't get that though, then next best
is consistency across the subsystem directory, and next best is consistency
across the whole file.  Mixing code styles within a file is the worst IMHO.

> 
> > > I have no interest in reformatting the existing code and also think using 
> > > it
> > > to reformat an entire file shouldn't be encouraged, but, we can leverage
> > > this tool to give future contributions a better experience. It's also
> > > important to note that the kernel already has a ".clang-format" file, so I
> > > think we can give it a try:)
> > 
> > The mere action of introducing a .clang-format file in the root of the
> > repository will cause some contributors' editors to automatically
> > reformat files every time they are saved. IOW even if you don't want
> > intend to do reformatting, that will be a net result.
> 
> I think that depends on developer's configuration, as far as I know, format
> on save is a feature which can be easily disabled on most of the IDE's, such
> as VSCode.

You could disable it, but it requires each developer to know that we're
shipping a clang-format that should not in fact be used to reformat
code, which is rather counterintuitive. 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-

[PATCH v2 02/10] qapi: fix example of query-vnc command

2022-09-01 Thread Victor Toso
Example output has an extra ',' delimiter in member "websocket" and it
lacks it in "family" member. Fix it.

Problem was noticed when trying to load the example into python's json
library.

Signed-off-by: Victor Toso 
---
 qapi/ui.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qapi/ui.json b/qapi/ui.json
index cf58ab4283..286c5731d1 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -667,8 +667,8 @@
 # {
 #"host":"127.0.0.1",
 #"service":"50401",
-#"family":"ipv4"
-#"websocket":false,
+#"family":"ipv4",
+#"websocket":false
 # }
 #  ]
 #   }
-- 
2.37.2




[PATCH v2 00/10] qapi examples fixes, simplified version

2022-09-01 Thread Victor Toso
Hi,

This is the follow up from v1. I've dropped a few patches that need
further discussion. I plan to revisit those when submiting the generator
again at a later time.

v1: https://lists.gnu.org/archive/html/qemu-devel/2022-08/msg04525.html

Changes in v2:
 * Dropped "qapi: fix example of blockdev-add command". It was wrong.
 * Dropped patches with too long examples as we might want to cut them
   short. I've also dropped the patches that fixes examples with
   comments on them as we might want to have a way to do that.
 * Fixed style in:
  - qapi: fix example of NIC_RX_FILTER_CHANGED event
  - qapi: fix example of query-dump-guest-memory-capability command
 * Dropped the generator (rfc) as I'll submit it again later, improved.

Cheers,
Victor

Victor Toso (10):
  qapi: fix example of query-ballon command
  qapi: fix example of query-vnc command
  qapi: fix example of query-dump-guest-memory-capability command
  qapi: fix example of BLOCK_JOB_READY event
  qapi: fix example of NIC_RX_FILTER_CHANGED event
  qapi: fix example of DEVICE_UNPLUG_GUEST_ERROR event
  qapi: fix example of MEM_UNPLUG_ERROR event
  qapi: fix examples of blockdev-add with qcow2
  qapi: fix example of query-hotpluggable-cpus command
  qapi: fix examples of events missing timestamp

 qapi/block-core.json | 12 ++--
 qapi/dump.json   |  2 +-
 qapi/machine.json|  8 
 qapi/migration.json  | 27 +++
 qapi/net.json|  1 -
 qapi/qdev.json   |  3 +--
 qapi/ui.json |  4 ++--
 7 files changed, 37 insertions(+), 20 deletions(-)

-- 
2.37.2




[PATCH v2 09/10] qapi: fix example of query-hotpluggable-cpus command

2022-09-01 Thread Victor Toso
The example return type has the wrong member name. Fix it.

Problem was noticed when using the example as a test case for Go
bindings.

Signed-off-by: Victor Toso 
---
 qapi/machine.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qapi/machine.json b/qapi/machine.json
index 4782eea2c3..abb2f48808 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -958,9 +958,9 @@
 #
 # -> { "execute": "query-hotpluggable-cpus" }
 # <- {"return": [
-#  { "props": { "core": 8 }, "type": "POWER8-spapr-cpu-core",
+#  { "props": { "core-id": 8 }, "type": "POWER8-spapr-cpu-core",
 #"vcpus-count": 1 },
-#  { "props": { "core": 0 }, "type": "POWER8-spapr-cpu-core",
+#  { "props": { "core-id": 0 }, "type": "POWER8-spapr-cpu-core",
 #"vcpus-count": 1, "qom-path": "/machine/unattached/device[0]"}
 #]}'
 #
-- 
2.37.2




[PATCH v2 10/10] qapi: fix examples of events missing timestamp

2022-09-01 Thread Victor Toso
I've used real timestamp and changing them one by one so they would
not be all equal.

Problem was noticed when using the example as a test case for Go
bindings.

Signed-off-by: Victor Toso 
---
 qapi/migration.json | 27 +++
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index 81185d4311..88ecf86ac8 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1995,16 +1995,23 @@
 #}
 # <- { "return": { } }
 # <- {"event": "JOB_STATUS_CHANGE",
+# "timestamp": {"seconds": 1432121972, "microseconds": 744001},
 # "data": {"status": "created", "id": "snapsave0"}}
 # <- {"event": "JOB_STATUS_CHANGE",
+# "timestamp": {"seconds": 1432122172, "microseconds": 744001},
 # "data": {"status": "running", "id": "snapsave0"}}
-# <- {"event": "STOP"}
-# <- {"event": "RESUME"}
+# <- {"event": "STOP",
+# "timestamp": {"seconds": 1432122372, "microseconds": 744001} }
+# <- {"event": "RESUME",
+# "timestamp": {"seconds": 1432122572, "microseconds": 744001} }
 # <- {"event": "JOB_STATUS_CHANGE",
+# "timestamp": {"seconds": 1432122772, "microseconds": 744001},
 # "data": {"status": "waiting", "id": "snapsave0"}}
 # <- {"event": "JOB_STATUS_CHANGE",
+# "timestamp": {"seconds": 1432122972, "microseconds": 744001},
 # "data": {"status": "pending", "id": "snapsave0"}}
 # <- {"event": "JOB_STATUS_CHANGE",
+# "timestamp": {"seconds": 1432123172, "microseconds": 744001},
 # "data": {"status": "concluded", "id": "snapsave0"}}
 # -> {"execute": "query-jobs"}
 # <- {"return": [{"current-progress": 1,
@@ -2056,16 +2063,23 @@
 #}
 # <- { "return": { } }
 # <- {"event": "JOB_STATUS_CHANGE",
+# "timestamp": {"seconds": 1472124172, "microseconds": 744001},
 # "data": {"status": "created", "id": "snapload0"}}
 # <- {"event": "JOB_STATUS_CHANGE",
+# "timestamp": {"seconds": 1472125172, "microseconds": 744001},
 # "data": {"status": "running", "id": "snapload0"}}
-# <- {"event": "STOP"}
-# <- {"event": "RESUME"}
+# <- {"event": "STOP",
+# "timestamp": {"seconds": 1472125472, "microseconds": 744001} }
+# <- {"event": "RESUME",
+# "timestamp": {"seconds": 1472125872, "microseconds": 744001} }
 # <- {"event": "JOB_STATUS_CHANGE",
+# "timestamp": {"seconds": 1472126172, "microseconds": 744001},
 # "data": {"status": "waiting", "id": "snapload0"}}
 # <- {"event": "JOB_STATUS_CHANGE",
+# "timestamp": {"seconds": 1472127172, "microseconds": 744001},
 # "data": {"status": "pending", "id": "snapload0"}}
 # <- {"event": "JOB_STATUS_CHANGE",
+# "timestamp": {"seconds": 1472128172, "microseconds": 744001},
 # "data": {"status": "concluded", "id": "snapload0"}}
 # -> {"execute": "query-jobs"}
 # <- {"return": [{"current-progress": 1,
@@ -2108,14 +2122,19 @@
 #}
 # <- { "return": { } }
 # <- {"event": "JOB_STATUS_CHANGE",
+# "timestamp": {"seconds": 1442124172, "microseconds": 744001},
 # "data": {"status": "created", "id": "snapdelete0"}}
 # <- {"event": "JOB_STATUS_CHANGE",
+# "timestamp": {"seconds": 1442125172, "microseconds": 744001},
 # "data": {"status": "running", "id": "snapdelete0"}}
 # <- {"event": "JOB_STATUS_CHANGE",
+# "timestamp": {"seconds": 1442126172, "microseconds": 744001},
 # "data": {"status": "waiting", "id": "snapdelete0"}}
 # <- {"event": "JOB_STATUS_CHANGE",
+# "timestamp": {"seconds": 1442127172, "microseconds": 744001},
 # "data": {"status": "pending", "id": "snapdelete0"}}
 # <- {"event": "JOB_STATUS_CHANGE",
+# "timestamp": {"seconds": 1442128172, "microseconds": 744001},
 # "data": {"status": "concluded", "id": "snapdelete0"}}
 # -> {"execute": "query-jobs"}
 # <- {"return": [{"current-progress": 1,
-- 
2.37.2




[PATCH v2 08/10] qapi: fix examples of blockdev-add with qcow2

2022-09-01 Thread Victor Toso
The examples use "qcow2" driver with the wrong member name for
BlockdevRef alternate type. This patch changes all wrong member names
from "file" to "data-file" which is the correct member name in
BlockdevOptionsQcow2 for the BlockdevRef field.

Problem was noticed when using the example as a test case for Go
bindings.

Signed-off-by: Victor Toso 
---
 qapi/block-core.json | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 882b266532..f21fa235f2 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1541,8 +1541,8 @@
 # -> { "execute": "blockdev-add",
 #  "arguments": { "driver": "qcow2",
 # "node-name": "node1534",
-# "file": { "driver": "file",
-#   "filename": "hd1.qcow2" },
+# "data-file": { "driver": "file",
+#"filename": "hd1.qcow2" },
 # "backing": null } }
 #
 # <- { "return": {} }
@@ -4378,7 +4378,7 @@
 #  "arguments": {
 #   "driver": "qcow2",
 #   "node-name": "test1",
-#   "file": {
+#   "data-file": {
 #   "driver": "file",
 #   "filename": "test.qcow2"
 #}
@@ -4395,7 +4395,7 @@
 #   "cache": {
 #  "direct": true
 #},
-#"file": {
+#   "data-file": {
 #  "driver": "file",
 #  "filename": "/tmp/test.qcow2"
 #},
@@ -4477,7 +4477,7 @@
 #  "arguments": {
 #   "driver": "qcow2",
 #   "node-name": "node0",
-#   "file": {
+#   "data-file": {
 #   "driver": "file",
 #   "filename": "test.qcow2"
 #   }
-- 
2.37.2




[PULL 01/20] linux-user/arm: Mark the commpage executable

2022-09-01 Thread Richard Henderson
We're about to start validating PAGE_EXEC, which means
that we've got to mark the commpage executable.  We had
been placing the commpage outside of reserved_va, which
was incorrect and lead to an abort.

Acked-by: Ilya Leoshkevich 
Tested-by: Ilya Leoshkevich 
Signed-off-by: Richard Henderson 
---
 linux-user/arm/target_cpu.h | 4 ++--
 linux-user/elfload.c| 6 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/linux-user/arm/target_cpu.h b/linux-user/arm/target_cpu.h
index 709d19bc9e..89ba274cfc 100644
--- a/linux-user/arm/target_cpu.h
+++ b/linux-user/arm/target_cpu.h
@@ -34,9 +34,9 @@ static inline unsigned long arm_max_reserved_va(CPUState *cs)
 } else {
 /*
  * We need to be able to map the commpage.
- * See validate_guest_space in linux-user/elfload.c.
+ * See init_guest_commpage in linux-user/elfload.c.
  */
-return 0xul;
+return 0xul;
 }
 }
 #define MAX_RESERVED_VA  arm_max_reserved_va
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index ce902dbd56..3e3dc02499 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -398,7 +398,8 @@ enum {
 
 static bool init_guest_commpage(void)
 {
-void *want = g2h_untagged(HI_COMMPAGE & -qemu_host_page_size);
+abi_ptr commpage = HI_COMMPAGE & -qemu_host_page_size;
+void *want = g2h_untagged(commpage);
 void *addr = mmap(want, qemu_host_page_size, PROT_READ | PROT_WRITE,
   MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
 
@@ -417,6 +418,9 @@ static bool init_guest_commpage(void)
 perror("Protecting guest commpage");
 exit(EXIT_FAILURE);
 }
+
+page_set_flags(commpage, commpage + qemu_host_page_size,
+   PAGE_READ | PAGE_EXEC | PAGE_VALID);
 return true;
 }
 
-- 
2.34.1




[PULL 07/20] accel/tcg: Introduce is_same_page()

2022-09-01 Thread Richard Henderson
From: Ilya Leoshkevich 

Introduce a function that checks whether a given address is on the same
page as where disassembly started. Having it improves readability of
the following patches.

Reviewed-by: Alistair Francis 
Signed-off-by: Ilya Leoshkevich 
Message-Id: <20220811095534.241224-3-...@linux.ibm.com>
Reviewed-by: Richard Henderson 
[rth: Make the DisasContextBase parameter const.]
Signed-off-by: Richard Henderson 
---
 include/exec/translator.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/include/exec/translator.h b/include/exec/translator.h
index 7db6845535..0d0bf3a31e 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -187,4 +187,14 @@ FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
 
 #undef GEN_TRANSLATOR_LD
 
+/*
+ * Return whether addr is on the same page as where disassembly started.
+ * Translators can use this to enforce the rule that only single-insn
+ * translation blocks are allowed to cross page boundaries.
+ */
+static inline bool is_same_page(const DisasContextBase *db, target_ulong addr)
+{
+return ((addr ^ db->pc_first) & TARGET_PAGE_MASK) == 0;
+}
+
 #endif /* EXEC__TRANSLATOR_H */
-- 
2.34.1




[PULL 19/20] target/riscv: Add MAX_INSN_LEN and insn_len

2022-09-01 Thread Richard Henderson
These will be useful in properly ending the TB.

Reviewed-by: Alistair Francis 
Acked-by: Ilya Leoshkevich 
Tested-by: Ilya Leoshkevich 
Signed-off-by: Richard Henderson 
---
 target/riscv/translate.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 38666ddc91..a719aa6e63 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1022,6 +1022,14 @@ static uint32_t opcode_at(DisasContextBase *dcbase, 
target_ulong pc)
 /* Include decoders for factored-out extensions */
 #include "decode-XVentanaCondOps.c.inc"
 
+/* The specification allows for longer insns, but not supported by qemu. */
+#define MAX_INSN_LEN  4
+
+static inline int insn_len(uint16_t first_word)
+{
+return (first_word & 3) == 3 ? 4 : 2;
+}
+
 static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
 {
 /*
@@ -1037,7 +1045,7 @@ static void decode_opc(CPURISCVState *env, DisasContext 
*ctx, uint16_t opcode)
 };
 
 /* Check for compressed insn */
-if (extract16(opcode, 0, 2) != 3) {
+if (insn_len(opcode) == 2) {
 if (!has_ext(ctx, RVC)) {
 gen_exception_illegal(ctx);
 } else {
-- 
2.34.1




[PULL 05/20] linux-user: Clear translations on mprotect()

2022-09-01 Thread Richard Henderson
From: Ilya Leoshkevich 

Currently it's possible to execute pages that do not have PAGE_EXEC
if there is an existing translation block. Fix by invalidating TBs
that touch the affected pages.

Signed-off-by: Ilya Leoshkevich 
Message-Id: <20220817150506.592862-2-...@linux.ibm.com>
Signed-off-by: Richard Henderson 
---
 linux-user/mmap.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/linux-user/mmap.c b/linux-user/mmap.c
index 048c4135af..6a828e8418 100644
--- a/linux-user/mmap.c
+++ b/linux-user/mmap.c
@@ -177,9 +177,11 @@ int target_mprotect(abi_ulong start, abi_ulong len, int 
target_prot)
 goto error;
 }
 }
+
 page_set_flags(start, start + len, page_flags);
-mmap_unlock();
-return 0;
+tb_invalidate_phys_range(start, start + len);
+ret = 0;
+
 error:
 mmap_unlock();
 return ret;
-- 
2.34.1




[PATCH v3 01/23] i386: do not use MOVL to move data between SSE registers

2022-09-01 Thread Paolo Bonzini
Write down explicitly the load/store sequence.

Extracted from a patch by Paul Brook .

Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/tcg/translate.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index b7972f0ff5..3237c1d8f9 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3295,8 +3295,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)));
 } else {
 rm = (modrm & 7) | REX_B(s);
-gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
-offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0)));
+tcg_gen_ld_i32(s->tmp2_i32, cpu_env,
+   offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)));
+tcg_gen_st_i32(s->tmp2_i32, cpu_env,
+   offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)));
 }
 break;
 case 0x310: /* movsd xmm, ea */
-- 
2.37.1





[PATCH v3 02/23] i386: formatting fixes

2022-09-01 Thread Paolo Bonzini
Extracted from a patch by Paul Brook .

Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/tcg/translate.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 3237c1d8f9..25a2539d59 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3314,7 +3314,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 } else {
 rm = (modrm & 7) | REX_B(s);
 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
-offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0)));
+offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)));
 }
 break;
 case 0x012: /* movlps */
@@ -4463,7 +4463,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 /* 32 bit access */
 gen_op_ld_v(s, MO_32, s->T0, s->A0);
 tcg_gen_st32_tl(s->T0, cpu_env,
-offsetof(CPUX86State,xmm_t0.ZMM_L(0)));
+offsetof(CPUX86State, xmm_t0.ZMM_L(0)));
 break;
 case 3:
 /* 64 bit access */
@@ -4523,8 +4523,9 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 break;
 case 0xf7:
 /* maskmov : we must prepare A0 */
-if (mod != 3)
+if (mod != 3) {
 goto illegal_op;
+}
 tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]);
 gen_extu(s->aflag, s->A0);
 gen_add_A0_ds_seg(s);
-- 
2.37.1





Re: [PATCH v3 15/23] i386: Destructive vector helpers for AVX

2022-09-01 Thread Richard Henderson

On 9/1/22 08:48, Paolo Bonzini wrote:

From: Paul Brook

These helpers need to take special care to avoid overwriting source values
before the wole result has been calculated.  Currently they use a dummy
Reg typed variable to store the result then assign the whole register.
This will cause 128 bit operations to corrupt the upper half of the register,
so replace it with explicit temporaries and element assignments.

Signed-off-by: Paul Brook
Message-Id:<20220424220204.2493824-14-p...@nowt.org>
Signed-off-by: Paolo Bonzini
---
  target/i386/ops_sse.h | 556 --
  1 file changed, 262 insertions(+), 294 deletions(-)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH v3 10/23] i386: do not cast gen_helper_* function pointers

2022-09-01 Thread Richard Henderson

On 9/1/22 08:48, Paolo Bonzini wrote:

  #define OP(op, flags, a, b, c, d)   \
-{flags, {a, b, c, d} }
+{flags, {{.op = a}, {.op = b}, {.op = c}, {.op = d} } }


It would have been handy to have uppercase macro args here, because .op looks 
like...


  struct SSEOpHelper_table1 {
  int flags;
-SSEFunc_0_epp op[4];
+SSEFuncs fn[4];
  };


... a forgotten change to .fn at first glance.



  #define OP(name, op, flags, ext, mmx_name) \
-{{mmx_name, gen_helper_ ## name ## _xmm}, CPUID_EXT_ ## ext, flags}
+{{{.op = mmx_name}, {.op = gen_helper_ ## name ## _xmm} }, \
+CPUID_EXT_ ## ext, flags}


Likewise.

But either way,
Reviewed-by: Richard Henderson 


r~



[PATCH v2 03/10] qapi: fix example of query-dump-guest-memory-capability command

2022-09-01 Thread Victor Toso
Example output is missing closing curly brackets. Fix it.

Problem was noticed when trying to load the example into python's json
library.

Signed-off-by: Victor Toso 
---
 qapi/dump.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qapi/dump.json b/qapi/dump.json
index 90859c5483..6fc215dd47 100644
--- a/qapi/dump.json
+++ b/qapi/dump.json
@@ -195,7 +195,7 @@
 #
 # -> { "execute": "query-dump-guest-memory-capability" }
 # <- { "return": { "formats":
-#  ["elf", "kdump-zlib", "kdump-lzo", "kdump-snappy"] }
+#  ["elf", "kdump-zlib", "kdump-lzo", "kdump-snappy"] } }
 #
 ##
 { 'command': 'query-dump-guest-memory-capability',
-- 
2.37.2




Re: [PATCH v1 16/16] RFC: add a generator for qapi's examples

2022-09-01 Thread Victor Toso
Hi,

On Wed, Aug 31, 2022 at 04:57:20PM +0200, Markus Armbruster wrote:
> Victor Toso  writes:
> 
> > Hi,
> >
> > On Wed, Aug 31, 2022 at 02:01:54PM +0200, Markus Armbruster wrote:
> >> Victor Toso  writes:
> >>
> >> > The goal of this generator is to validate QAPI examples and transform
> >> > them into a format that can be used for 3rd party applications to
> >> > validate their QAPI/QMP introspection.
> >> >
> >> > For each Example section, we parse server and client messages into a
> >> > python dictionary. This step alone has found several ill formatted
> >> > JSON messages in the examples.
> >> >
> >> > The generator outputs another JSON file with all the examples in the
> >> > QAPI module that they came from. This can be used to validate the
> >> > introspection between QAPI/QMP to language bindings.
> >> >
> >> > When used with the POC qapi-go branch, we have found bad QMP messages
> >> > with wrong member names, mandatory members that were missing and
> >> > optional members that were being set with null (not needed).
> >> >
> >> > A simple example of the output format is:
> >> >
> >> >  { "examples": [
> >> >{
> >> >  "id": "ksuxwzfayw",
> >> >  "client": [
> >> >  {
> >> >"sequence-order": 1
> >> >"message-type": "command",
> >> >"message":
> >> >{ "arguments":
> >> >  { "device": "scratch", "size": 1073741824 },
> >> >  "execute": "block_resize"
> >> >},
> >> > } ],
> >> > "server": [
> >> > {
> >> >   "sequence-order": 2
> >> >   "message-type": "return",
> >> >   "message": { "return": {} },
> >> > } ]
> >> > }
> >> >   ] }
> >> >
> >> > If this idea seems reasonable, we can add python-qemu-qmp to validate
> >> > each message at generation time already.
> >> >
> >> > Signed-off-by: Victor Toso 
> >>
> >> If I understand you correctly, there are two benefits:
> >>
> >> 1. Mechanical syntax check for examples
> >>
> >>Love it.
> >
> > Not just JSON syntax but can be extend to the introspection
> > layer. Errors like wrong member names would fail while parsing
> > the examples (issues such as fixed by patches 11 and 13/16 should
> > not happen anymore).
> 
> It's also a mechanical check against the schema.  Still love it :)

Great :)

> >> 2. Can extract examples for use as test cases
> >>
> >>Sounds good to me.  Possible redundancy with existing tests.
> >>Probably nothing to worry about.
> >>
> >>Can you explain in a bit more detail how the extracted data
> >>is (to be) used?
> >
> > Sure.
> >
> > The Golang test that consumes this is 152 lines of code [0]. The
> > idea is that we can use the examples to feed Golang unmarshalling
> > code and then marshall it back to JSON and compare input JSON
> > with output JSON and see that their content matches.
> >
> > [0] 
> > https://gitlab.com/victortoso/qapi-go/-/blob/wip-v3/test/examples_test.go
> >
> > I have generated the examples with this patch series and stored
> > the output here [1]
> >
> > [1] https://gitlab.com/victortoso/qapi-go/-/tree/wip-v3/test/data/examples
> >
> > The examples are QMP messages that are either sent by Client "->"
> > or sent by Server "<-". The order matters so I take the order set
> > in the examples and store it as "sequence-order".
> >
> > In the Go test code, I follow the sequence-order. One example of
> > this being useful is that we know which Return type to expect
> > after a Command is issued.
> >
> > I've also included metadata about the type of message, which is
> > one of three options: command, event or return. (Errors are
> > return too).
> >
> > This is important because it makes the tests very easy to write.
> > Different Unmarshal/Marshal code can be set in the code block of
> > the specific message type.
> >
> > --
> >
> > The things that makes me quite excited with this idea are:
> >
> >  1. We have valid functional examples documented. If the examples
> > break, we would have the software in place to know it (plug
> > to ci or some other ninja check seems reasonable to me)
> >
> >  2. Developers should get more interested in documenting examples
> > as that alone is is a valid test case, even if only useful
> > for language binding's syntax.
> 
> Thanks!  Would you like to work some of this into your commit message?

Yeah. I'll resend this series fixing the style you have proposed
and I'll be removing the patches that might need some extra
discussion, like this rfc and examples that are cut short with a
comment.

I'll improve this generator and send it later, probably after the
next iteration of qapi-go. This also gives some room to feedback
from others, if any.

Cheers,
Victor


signature.asc
Description: PGP signature


Re: [PATCH v5 06/18] dump: Rework dump_calculate_size function

2022-09-01 Thread Janis Schoetterl-Glausch
On Thu, 2022-08-11 at 12:10 +, Janosch Frank wrote:
> dump_calculate_size() sums up all the sizes of the guest memory
> blocks. Since we already have a function that calculates the size of a
> single memory block (dump_get_memblock_size()) we can simply iterate
> over the blocks and use the function instead of calculating the size
> ourselves.
> 
> Signed-off-by: Janosch Frank 
> Reviewed-by: Marc-André Lureau 

Reviewed-by: Janis Schoetterl-Glausch 

> ---
>  dump/dump.c | 22 --
>  1 file changed, 8 insertions(+), 14 deletions(-)
> 
> diff --git a/dump/dump.c b/dump/dump.c
> index b043337bc7..d82cc46d7d 100644
> --- a/dump/dump.c
> +++ b/dump/dump.c
> @@ -1548,25 +1548,19 @@ bool qemu_system_dump_in_progress(void)
>  return (qatomic_read(>status) == DUMP_STATUS_ACTIVE);
>  }
>  
> -/* calculate total size of memory to be dumped (taking filter into
> - * acoount.) */
> +/*
> + * calculate total size of memory to be dumped (taking filter into
> + * account.)
> + */
>  static int64_t dump_calculate_size(DumpState *s)
>  {
>  GuestPhysBlock *block;
> -int64_t size = 0, total = 0, left = 0, right = 0;
> +int64_t total = 0;
>  
>  QTAILQ_FOREACH(block, >guest_phys_blocks.head, next) {
> -if (dump_has_filter(s)) {
> -/* calculate the overlapped region. */
> -left = MAX(s->filter_area_begin, block->target_start);
> -right = MIN(s->filter_area_begin + s->filter_area_length, 
> block->target_end);
> -size = right - left;
> -size = size > 0 ? size : 0;
> -} else {
> -/* count the whole region in */
> -size = (block->target_end - block->target_start);
> -}
> -total += size;
> +total += dump_filtered_memblock_size(block,
> + s->filter_area_begin,
> + s->filter_area_length);
>  }
>  
>  return total;




Re: [PATCH v5 07/18] dump: Split elf header functions into prepare and write

2022-09-01 Thread Janis Schoetterl-Glausch
On Thu, 2022-08-11 at 12:11 +, Janosch Frank wrote:
> Let's split the write from the modification of the elf header so we
> can consolidate the write of the data in one function.
> 
> Signed-off-by: Janosch Frank 

This is cosmetic only, right?

Reviewed-by: Janis Schoetterl-Glausch 





Re: [PATCH] tests: mark io-command test as skipped if socat is missing

2022-09-01 Thread Daniel P . Berrangé
On Thu, Sep 01, 2022 at 03:04:14PM +0400, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> Signed-off-by: Marc-André Lureau 
> ---
>  tests/unit/test-io-channel-command.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/unit/test-io-channel-command.c 
> b/tests/unit/test-io-channel-command.c
> index 99056e07c0..aa09c559cd 100644
> --- a/tests/unit/test-io-channel-command.c
> +++ b/tests/unit/test-io-channel-command.c
> @@ -41,7 +41,8 @@ static void test_io_channel_command_fifo(bool async)
>  
>  unlink(TEST_FIFO);
>  if (access("/bin/socat", X_OK) < 0) {
> -return; /* Pretend success if socat is not present */
> +g_test_skip("socat is missing");
> +return;
>  }
>  if (mkfifo(TEST_FIFO, 0600) < 0) {
>  abort();

Reviewed-by: Daniel P. Berrangé 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH 0/2] expose host-phys-bits to guest

2022-09-01 Thread Xiaoyao Li

On 8/31/2022 8:50 PM, Gerd Hoffmann wrote:

When the guest (firmware specifically) knows how big
the address space actually is it can be used better.

Some more background:
   https://bugzilla.redhat.com/show_bug.cgi?id=2084533


QEMU enables host-phys-bits for "-cpu host/max" in 
host_cpu_max_instance_init();


I think the problem is for all the named CPU model, that they don't have 
phys_bits defined. Thus they all have "cpu->phys-bits == 0", which leads 
to cpu->phys_bits = TCG_PHYS_ADDR_BITS (36 for 32-bits build and 40 for 
64-bits build)


Anyway, IMO, guest including guest firmware, should always consult from 
CPUID leaf 0x8008 for physical address length. Tt is the duty of 
userspace VMM, here QEMU, to ensure VM's host physical address length 
not exceeding host's. If userspace VMM cannot ensure this, guest is 
likely hitting problem.



This is a RfC series exposes the information via cpuid.

take care,
   Gerd

Gerd Hoffmann (2):
   [hack] reserve bit KVM_HINTS_HOST_PHYS_BITS
   [RfC] expose host-phys-bits to guest

  include/standard-headers/asm-x86/kvm_para.h | 3 ++-
  target/i386/cpu.h   | 3 ---
  hw/i386/microvm.c   | 6 +-
  target/i386/cpu.c   | 3 +--
  target/i386/host-cpu.c  | 4 +++-
  target/i386/kvm/kvm.c   | 1 +
  6 files changed, 12 insertions(+), 8 deletions(-)






Re: [PATCH 03/51] block: Unify the get_tmp_filename() implementation

2022-09-01 Thread Bin Meng
Hi Marc-André,

On Wed, Aug 31, 2022 at 8:54 PM Marc-André Lureau
 wrote:
>
> Hi Bin
>
> On Wed, Aug 24, 2022 at 1:42 PM Bin Meng  wrote:
>>
>> From: Bin Meng 
>>
>> At present get_tmp_filename() has platform specific implementations
>> to get the directory to use for temporary files. Switch over to use
>> g_get_tmp_dir() which works on all supported platforms.
>>
>
> It "works" quite differently though. Is this patch really necessary here?

Without this patch the qtest cases builds on Windows do not have any
problem. So it is optional. I put it in the same series as it has the
same context of using hardcoded /tmp directory name.

>
> If yes, please explain why.
>
> If not, I suggest you drop optional / rfc / "nice to have" patches from the 
> series. It will help to get it merged faster.

I can drop this single patch and send another single patch if this is
the desired practice.

>
> thanks

Regards,
Bin



[PULL 20/20] target/riscv: Make translator stop before the end of a page

2022-09-01 Thread Richard Henderson
Right now the translator stops right *after* the end of a page, which
breaks reporting of fault locations when the last instruction of a
multi-insn translation block crosses a page boundary.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1155
Reviewed-by: Alistair Francis 
Acked-by: Ilya Leoshkevich 
Tested-by: Ilya Leoshkevich 
Signed-off-by: Richard Henderson 
---
 target/riscv/translate.c  | 17 +--
 tests/tcg/riscv64/noexec.c| 79 +++
 tests/tcg/riscv64/Makefile.target |  1 +
 3 files changed, 93 insertions(+), 4 deletions(-)
 create mode 100644 tests/tcg/riscv64/noexec.c

diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index a719aa6e63..f8af6daa70 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1154,12 +1154,21 @@ static void riscv_tr_translate_insn(DisasContextBase 
*dcbase, CPUState *cpu)
 }
 ctx->nftemp = 0;
 
+/* Only the first insn within a TB is allowed to cross a page boundary. */
 if (ctx->base.is_jmp == DISAS_NEXT) {
-target_ulong page_start;
-
-page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
-if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE) {
+if (!is_same_page(>base, ctx->base.pc_next)) {
 ctx->base.is_jmp = DISAS_TOO_MANY;
+} else {
+unsigned page_ofs = ctx->base.pc_next & ~TARGET_PAGE_MASK;
+
+if (page_ofs > TARGET_PAGE_SIZE - MAX_INSN_LEN) {
+uint16_t next_insn = cpu_lduw_code(env, ctx->base.pc_next);
+int len = insn_len(next_insn);
+
+if (!is_same_page(>base, ctx->base.pc_next + len)) {
+ctx->base.is_jmp = DISAS_TOO_MANY;
+}
+}
 }
 }
 }
diff --git a/tests/tcg/riscv64/noexec.c b/tests/tcg/riscv64/noexec.c
new file mode 100644
index 00..86f64b28db
--- /dev/null
+++ b/tests/tcg/riscv64/noexec.c
@@ -0,0 +1,79 @@
+#include "../multiarch/noexec.c.inc"
+
+static void *arch_mcontext_pc(const mcontext_t *ctx)
+{
+return (void *)ctx->__gregs[REG_PC];
+}
+
+static int arch_mcontext_arg(const mcontext_t *ctx)
+{
+return ctx->__gregs[REG_A0];
+}
+
+static void arch_flush(void *p, int len)
+{
+__builtin___clear_cache(p, p + len);
+}
+
+extern char noexec_1[];
+extern char noexec_2[];
+extern char noexec_end[];
+
+asm(".option push\n"
+".option norvc\n"
+"noexec_1:\n"
+"   li a0,1\n"   /* a0 is 0 on entry, set 1. */
+"noexec_2:\n"
+"   li a0,2\n"  /* a0 is 0/1; set 2. */
+"   ret\n"
+"noexec_end:\n"
+".option pop");
+
+int main(void)
+{
+struct noexec_test noexec_tests[] = {
+{
+.name = "fallthrough",
+.test_code = noexec_1,
+.test_len = noexec_end - noexec_1,
+.page_ofs = noexec_1 - noexec_2,
+.entry_ofs = noexec_1 - noexec_2,
+.expected_si_ofs = 0,
+.expected_pc_ofs = 0,
+.expected_arg = 1,
+},
+{
+.name = "jump",
+.test_code = noexec_1,
+.test_len = noexec_end - noexec_1,
+.page_ofs = noexec_1 - noexec_2,
+.entry_ofs = 0,
+.expected_si_ofs = 0,
+.expected_pc_ofs = 0,
+.expected_arg = 0,
+},
+{
+.name = "fallthrough [cross]",
+.test_code = noexec_1,
+.test_len = noexec_end - noexec_1,
+.page_ofs = noexec_1 - noexec_2 - 2,
+.entry_ofs = noexec_1 - noexec_2 - 2,
+.expected_si_ofs = 0,
+.expected_pc_ofs = -2,
+.expected_arg = 1,
+},
+{
+.name = "jump [cross]",
+.test_code = noexec_1,
+.test_len = noexec_end - noexec_1,
+.page_ofs = noexec_1 - noexec_2 - 2,
+.entry_ofs = -2,
+.expected_si_ofs = 0,
+.expected_pc_ofs = -2,
+.expected_arg = 0,
+},
+};
+
+return test_noexec(noexec_tests,
+   sizeof(noexec_tests) / sizeof(noexec_tests[0]));
+}
diff --git a/tests/tcg/riscv64/Makefile.target 
b/tests/tcg/riscv64/Makefile.target
index d41bf6d60d..b5b89dfb0e 100644
--- a/tests/tcg/riscv64/Makefile.target
+++ b/tests/tcg/riscv64/Makefile.target
@@ -3,3 +3,4 @@
 
 VPATH += $(SRC_PATH)/tests/tcg/riscv64
 TESTS += test-div
+TESTS += noexec
-- 
2.34.1




[PULL 18/20] target/i386: Make translator stop before the end of a page

2022-09-01 Thread Richard Henderson
From: Ilya Leoshkevich 

Right now translator stops right *after* the end of a page, which
breaks reporting of fault locations when the last instruction of a
multi-insn translation block crosses a page boundary.

An implementation, like the one arm and s390x have, would require an
i386 length disassembler, which is burdensome to maintain. Another
alternative would be to single-step at the end of a guest page, but
this may come with a performance impact.

Fix by snapshotting disassembly state and restoring it after we figure
out we crossed a page boundary. This includes rolling back cc_op
updates and emitted ops.

Signed-off-by: Ilya Leoshkevich 
Reviewed-by: Richard Henderson 
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1143
Message-Id: <20220817150506.592862-4-...@linux.ibm.com>
[rth: Simplify end-of-insn cross-page checks.]
Signed-off-by: Richard Henderson 
---
 target/i386/tcg/translate.c  | 64 ---
 tests/tcg/x86_64/noexec.c| 75 
 tests/tcg/x86_64/Makefile.target |  3 +-
 3 files changed, 116 insertions(+), 26 deletions(-)
 create mode 100644 tests/tcg/x86_64/noexec.c

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 4836c889e0..b184fe33b8 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -130,6 +130,7 @@ typedef struct DisasContext {
 TCGv_i64 tmp1_i64;
 
 sigjmp_buf jmpbuf;
+TCGOp *prev_insn_end;
 } DisasContext;
 
 /* The environment in which user-only runs is constrained. */
@@ -2008,6 +2009,12 @@ static uint64_t advance_pc(CPUX86State *env, 
DisasContext *s, int num_bytes)
 {
 uint64_t pc = s->pc;
 
+/* This is a subsequent insn that crosses a page boundary.  */
+if (s->base.num_insns > 1 &&
+!is_same_page(>base, s->pc + num_bytes - 1)) {
+siglongjmp(s->jmpbuf, 2);
+}
+
 s->pc += num_bytes;
 if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
 /* If the instruction's 16th byte is on a different page than the 1st, 
a
@@ -4556,6 +4563,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 int modrm, reg, rm, mod, op, opreg, val;
 target_ulong next_eip, tval;
 target_ulong pc_start = s->base.pc_next;
+bool orig_cc_op_dirty = s->cc_op_dirty;
+CCOp orig_cc_op = s->cc_op;
 
 s->pc_start = s->pc = pc_start;
 s->override = -1;
@@ -4568,9 +4577,22 @@ static target_ulong disas_insn(DisasContext *s, CPUState 
*cpu)
 s->rip_offset = 0; /* for relative ip address */
 s->vex_l = 0;
 s->vex_v = 0;
-if (sigsetjmp(s->jmpbuf, 0) != 0) {
+switch (sigsetjmp(s->jmpbuf, 0)) {
+case 0:
+break;
+case 1:
 gen_exception_gpf(s);
 return s->pc;
+case 2:
+/* Restore state that may affect the next instruction. */
+s->cc_op_dirty = orig_cc_op_dirty;
+s->cc_op = orig_cc_op;
+s->base.num_insns--;
+tcg_remove_ops_after(s->prev_insn_end);
+s->base.is_jmp = DISAS_TOO_MANY;
+return pc_start;
+default:
+g_assert_not_reached();
 }
 
 prefixes = 0;
@@ -8632,6 +8654,7 @@ static void i386_tr_insn_start(DisasContextBase *dcbase, 
CPUState *cpu)
 {
 DisasContext *dc = container_of(dcbase, DisasContext, base);
 
+dc->prev_insn_end = tcg_last_op();
 tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
 }
 
@@ -8652,31 +8675,22 @@ static void i386_tr_translate_insn(DisasContextBase 
*dcbase, CPUState *cpu)
 #endif
 
 pc_next = disas_insn(dc, cpu);
-
-if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
-/* if single step mode, we generate only one instruction and
-   generate an exception */
-/* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
-   the flag and abort the translation to give the irqs a
-   chance to happen */
-dc->base.is_jmp = DISAS_TOO_MANY;
-} else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
-   && ((pc_next & TARGET_PAGE_MASK)
-   != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
-   & TARGET_PAGE_MASK)
-   || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
-/* Do not cross the boundary of the pages in icount mode,
-   it can cause an exception. Do it only when boundary is
-   crossed by the first instruction in the block.
-   If current instruction already crossed the bound - it's ok,
-   because an exception hasn't stopped this code.
- */
-dc->base.is_jmp = DISAS_TOO_MANY;
-} else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
-dc->base.is_jmp = DISAS_TOO_MANY;
-}
-
 dc->base.pc_next = pc_next;
+
+if (dc->base.is_jmp == DISAS_NEXT) {
+if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
+/*
+ * If single step mode, we generate only one instruction and
+ * generate an exception.
+ 

[PATCH v3 03/23] i386: Add ZMM_OFFSET macro

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

Add a convenience macro to get the address of an xmm_regs element within
CPUX86State.

This was originally going to be the basis of an implementation that broke
operations into 128 bit chunks. I scrapped that idea, so this is now a purely
cosmetic change. But I think a worthwhile one - it reduces the number of
function calls that need to be split over multiple lines.

No functional changes.

Signed-off-by: Paul Brook 
Reviewed-by: Richard Henderson 
Message-Id: <20220424220204.2493824-9-p...@nowt.org>
Signed-off-by: Paolo Bonzini 
---
 target/i386/tcg/translate.c | 60 +
 1 file changed, 27 insertions(+), 33 deletions(-)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 25a2539d59..cba862746b 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2777,6 +2777,8 @@ static inline void gen_op_movq_env_0(DisasContext *s, int 
d_offset)
 tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset);
 }
 
+#define ZMM_OFFSET(reg) offsetof(CPUX86State, xmm_regs[reg])
+
 typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
 typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
 typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
@@ -3198,13 +3200,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 if (mod == 3)
 goto illegal_op;
 gen_lea_modrm(env, s, modrm);
-gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
+gen_sto_env_A0(s, ZMM_OFFSET(reg));
 break;
 case 0x3f0: /* lddqu */
 if (mod == 3)
 goto illegal_op;
 gen_lea_modrm(env, s, modrm);
-gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
+gen_ldo_env_A0(s, ZMM_OFFSET(reg));
 break;
 case 0x22b: /* movntss */
 case 0x32b: /* movntsd */
@@ -3240,15 +3242,13 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 #ifdef TARGET_X86_64
 if (s->dflag == MO_64) {
 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
-tcg_gen_addi_ptr(s->ptr0, cpu_env,
- offsetof(CPUX86State,xmm_regs[reg]));
+tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(reg));
 gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0);
 } else
 #endif
 {
 gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-tcg_gen_addi_ptr(s->ptr0, cpu_env,
- offsetof(CPUX86State,xmm_regs[reg]));
+tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(reg));
 tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
 gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32);
 }
@@ -3273,11 +3273,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 case 0x26f: /* movdqu xmm, ea */
 if (mod != 3) {
 gen_lea_modrm(env, s, modrm);
-gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
+gen_ldo_env_A0(s, ZMM_OFFSET(reg));
 } else {
 rm = (modrm & 7) | REX_B(s);
-gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]),
-offsetof(CPUX86State,xmm_regs[rm]));
+gen_op_movo(s, ZMM_OFFSET(reg), ZMM_OFFSET(rm));
 }
 break;
 case 0x210: /* movss xmm, ea */
@@ -,7 +3332,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 case 0x212: /* movsldup */
 if (mod != 3) {
 gen_lea_modrm(env, s, modrm);
-gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
+gen_ldo_env_A0(s, ZMM_OFFSET(reg));
 } else {
 rm = (modrm & 7) | REX_B(s);
 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)),
@@ -3375,7 +3374,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 case 0x216: /* movshdup */
 if (mod != 3) {
 gen_lea_modrm(env, s, modrm);
-gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
+gen_ldo_env_A0(s, ZMM_OFFSET(reg));
 } else {
 rm = (modrm & 7) | REX_B(s);
 gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)),
@@ -3397,8 +3396,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 goto illegal_op;
 field_length = x86_ldub_code(env, s) & 0x3F;
 bit_index = x86_ldub_code(env, s) & 0x3F;
-tcg_gen_addi_ptr(s->ptr0, cpu_env,
-offsetof(CPUX86State,xmm_regs[reg]));
+tcg_gen_addi_ptr(s->ptr0, cpu_env, ZMM_OFFSET(reg));
 if (b1 == 1)
 

[PATCH v3 11/23] i386: Add CHECK_NO_VEX

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

Reject invalid VEX encodings on MMX instructions.

Signed-off-by: Paul Brook 
Reviewed-by: Richard Henderson 
Message-Id: <20220424220204.2493824-7-p...@nowt.org>
Signed-off-by: Paolo Bonzini 
---
 target/i386/tcg/translate.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index c6a9a5b1d4..99c84473f4 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3186,6 +3186,12 @@ static const struct SSEOpHelper_table7 
sse_op_table7[256] = {
 #undef BLENDV_OP
 #undef SPECIAL_OP
 
+/* VEX prefix not allowed */
+#define CHECK_NO_VEX(s) do { \
+if (s->prefix & PREFIX_VEX) \
+goto illegal_op; \
+} while (0)
+
 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
 target_ulong pc_start)
 {
@@ -3272,6 +3278,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 b |= (b1 << 8);
 switch(b) {
 case 0x0e7: /* movntq */
+CHECK_NO_VEX(s);
 if (mod == 3) {
 goto illegal_op;
 }
@@ -3307,6 +3314,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 }
 break;
 case 0x6e: /* movd mm, ea */
+CHECK_NO_VEX(s);
 #ifdef TARGET_X86_64
 if (s->dflag == MO_64) {
 gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
@@ -3338,6 +3346,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 }
 break;
 case 0x6f: /* movq mm, ea */
+CHECK_NO_VEX(s);
 if (mod != 3) {
 gen_lea_modrm(env, s, modrm);
 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
@@ -3473,6 +3482,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 break;
 case 0x178:
 case 0x378:
+CHECK_NO_VEX(s);
 {
 int bit_index, field_length;
 
@@ -3492,6 +3502,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 }
 break;
 case 0x7e: /* movd ea, mm */
+CHECK_NO_VEX(s);
 #ifdef TARGET_X86_64
 if (s->dflag == MO_64) {
 tcg_gen_ld_i64(s->T0, cpu_env,
@@ -3532,6 +3543,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 gen_op_movq_env_0(s, offsetof(CPUX86State, 
xmm_regs[reg].ZMM_Q(1)));
 break;
 case 0x7f: /* movq ea, mm */
+CHECK_NO_VEX(s);
 if (mod != 3) {
 gen_lea_modrm(env, s, modrm);
 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
@@ -3614,6 +3626,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 offsetof(CPUX86State, xmm_t0.ZMM_L(1)));
 op1_offset = offsetof(CPUX86State,xmm_t0);
 } else {
+CHECK_NO_VEX(s);
 tcg_gen_movi_tl(s->T0, val);
 tcg_gen_st32_tl(s->T0, cpu_env,
 offsetof(CPUX86State, mmx_t0.MMX_L(0)));
@@ -3653,6 +3666,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 break;
 case 0x02a: /* cvtpi2ps */
 case 0x12a: /* cvtpi2pd */
+CHECK_NO_VEX(s);
 gen_helper_enter_mmx(cpu_env);
 if (mod != 3) {
 gen_lea_modrm(env, s, modrm);
@@ -3698,6 +3712,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 case 0x12c: /* cvttpd2pi */
 case 0x02d: /* cvtps2pi */
 case 0x12d: /* cvtpd2pi */
+CHECK_NO_VEX(s);
 gen_helper_enter_mmx(cpu_env);
 if (mod != 3) {
 gen_lea_modrm(env, s, modrm);
@@ -3771,6 +3786,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 tcg_gen_st16_tl(s->T0, cpu_env,
 
offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val)));
 } else {
+CHECK_NO_VEX(s);
 val &= 3;
 tcg_gen_st16_tl(s->T0, cpu_env,
 
offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val)));
@@ -3810,6 +3826,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 }
 break;
 case 0x2d6: /* movq2dq */
+CHECK_NO_VEX(s);
 gen_helper_enter_mmx(cpu_env);
 rm = (modrm & 7);
 gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)),
@@ -3817,6 +3834,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
 gen_op_movq_env_0(s, offsetof(CPUX86State, 
xmm_regs[reg].ZMM_Q(1)));
 break;
 case 0x3d6: /* movdq2q */
+CHECK_NO_VEX(s);
 gen_helper_enter_mmx(cpu_env);
 rm = (modrm & 7) | REX_B(s);
 gen_op_movq(s, 

[PATCH v3 16/23] i386: Floating point arithmetic helper AVX prep

2022-09-01 Thread Paolo Bonzini
From: Paul Brook 

Prepare the "easy" floating point vector helpers for AVX

No functional changes to existing helpers.

Signed-off-by: Paul Brook 
Message-Id: <20220424220204.2493824-16-p...@nowt.org>
Reviewed-by: Richard Henderson 
Signed-off-by: Paolo Bonzini 
---
 target/i386/ops_sse.h | 138 --
 1 file changed, 92 insertions(+), 46 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 7d48c05693..d881d03228 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -553,40 +553,58 @@ void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int 
order)
 }
 #endif
 
-#if SHIFT == 1
+#if SHIFT >= 1
 /* FPU ops */
 /* XXX: not accurate */
 
-#define SSE_HELPER_S(name, F)   \
-void glue(helper_ ## name ## ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)\
+#define SSE_HELPER_P(name, F)   \
+void glue(helper_ ## name ## ps, SUFFIX)(CPUX86State *env,  \
+Reg *d, Reg *s) \
 {   \
-d->ZMM_S(0) = F(32, d->ZMM_S(0), s->ZMM_S(0));  \
-d->ZMM_S(1) = F(32, d->ZMM_S(1), s->ZMM_S(1));  \
-d->ZMM_S(2) = F(32, d->ZMM_S(2), s->ZMM_S(2));  \
-d->ZMM_S(3) = F(32, d->ZMM_S(3), s->ZMM_S(3));  \
+Reg *v = d; \
+int i;  \
+for (i = 0; i < 2 << SHIFT; i++) {  \
+d->ZMM_S(i) = F(32, v->ZMM_S(i), s->ZMM_S(i));  \
+}   \
 }   \
 \
-void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *s)\
+void glue(helper_ ## name ## pd, SUFFIX)(CPUX86State *env,  \
+Reg *d, Reg *s) \
 {   \
-d->ZMM_S(0) = F(32, d->ZMM_S(0), s->ZMM_S(0));  \
-}   \
-\
-void glue(helper_ ## name ## pd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)\
-{   \
-d->ZMM_D(0) = F(64, d->ZMM_D(0), s->ZMM_D(0));  \
-d->ZMM_D(1) = F(64, d->ZMM_D(1), s->ZMM_D(1));  \
-}   \
-\
-void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *s)\
-{   \
-d->ZMM_D(0) = F(64, d->ZMM_D(0), s->ZMM_D(0));  \
+Reg *v = d; \
+int i;  \
+for (i = 0; i < 1 << SHIFT; i++) {  \
+d->ZMM_D(i) = F(64, v->ZMM_D(i), s->ZMM_D(i));  \
+}   \
 }
 
+#if SHIFT == 1
+
+#define SSE_HELPER_S(name, F)   \
+SSE_HELPER_P(name, F)   \
+\
+void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *s)\
+{   \
+Reg *v = d; \
+d->ZMM_S(0) = F(32, v->ZMM_S(0), s->ZMM_S(0));  \
+}   \
+\
+void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *s)\
+{   \
+Reg *v = d; \
+d->ZMM_D(0) = F(64, v->ZMM_D(0), s->ZMM_D(0));  \
+}
+
+#else
+
+#define SSE_HELPER_S(name, F) SSE_HELPER_P(name, F)
+
+#endif
+
 #define FPU_ADD(size, a, b) float ## size ## _add(a, b, >sse_status)
 #define FPU_SUB(size, a, b) float ## size ## _sub(a, b, >sse_status)
 #define FPU_MUL(size, a, b) float ## size ## _mul(a, b, >sse_status)
 #define FPU_DIV(size, a, b) float ## size ## _div(a, b, >sse_status)
-#define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, >sse_status)
 
 /* Note 

[PATCH v2 06/10] qapi: fix example of DEVICE_UNPLUG_GUEST_ERROR event

2022-09-01 Thread Victor Toso
Example output is missing a ',' delimiter and it has an extra ending
curly bracket. Fix it.

Problem was noticed when trying to load the example into python's json
library.

Signed-off-by: Victor Toso 
---
 qapi/qdev.json | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/qapi/qdev.json b/qapi/qdev.json
index 26cd10106b..2708fb4e99 100644
--- a/qapi/qdev.json
+++ b/qapi/qdev.json
@@ -150,10 +150,9 @@
 #
 # Example:
 #
-# <- { "event": "DEVICE_UNPLUG_GUEST_ERROR"
+# <- { "event": "DEVICE_UNPLUG_GUEST_ERROR",
 #  "data": { "device": "core1",
 #"path": "/machine/peripheral/core1" },
-#  },
 #  "timestamp": { "seconds": 1615570772, "microseconds": 202844 } }
 #
 ##
-- 
2.37.2




Re: [PATCH 34/51] tests/qtest: bios-tables-test: Adapt the case for win32

2022-09-01 Thread Marc-André Lureau
On Wed, Aug 24, 2022 at 3:02 PM Bin Meng  wrote:

> From: Bin Meng 
>
> Single quotes in the arguments (oem_id='CRASH ') are not removed in
> the Windows environment before it is passed to the QEMU executable.
> The space in the argument causes the "-acpitable" option parser to
> think that all of its parameters are done, hence it complains:
>
>   '-acpitable' requires one of 'data' or 'file'
>
> Change to use double quotes which works fine on all platforms.
>
> Also /dev/null does not work on win32, and nul should be used.
>
> Signed-off-by: Bin Meng 
>

Reviewed-by: Marc-André Lureau 


> ---
>
>  tests/qtest/bios-tables-test.c | 12 +---
>  1 file changed, 9 insertions(+), 3 deletions(-)
>
> diff --git a/tests/qtest/bios-tables-test.c
> b/tests/qtest/bios-tables-test.c
> index 36783966b0..0148ce388c 100644
> --- a/tests/qtest/bios-tables-test.c
> +++ b/tests/qtest/bios-tables-test.c
> @@ -1615,6 +1615,12 @@ static void test_acpi_virt_viot(void)
>  free_test_data();
>  }
>
> +#ifndef _WIN32
> +# define DEV_NULL "/dev/null"
> +#else
> +# define DEV_NULL "nul"
> +#endif
> +
>  static void test_acpi_q35_slic(void)
>  {
>  test_data data = {
> @@ -1622,9 +1628,9 @@ static void test_acpi_q35_slic(void)
>  .variant = ".slic",
>  };
>
> -test_acpi_one("-acpitable sig=SLIC,oem_id='CRASH ',oem_table_id='ME',"
> -  "oem_rev=2210,asl_compiler_id='qemu',"
> -  "asl_compiler_rev=,data=/dev/null",
> +test_acpi_one("-acpitable sig=SLIC,oem_id=\"CRASH \",oem_table_id=ME,"
> +  "oem_rev=2210,asl_compiler_id=qemu,"
> +  "asl_compiler_rev=,data=" DEV_NULL,
>);
>  free_test_data();
>  }
> --
> 2.34.1
>
>
>

-- 
Marc-André Lureau


[PATCH v2 05/10] qapi: fix example of NIC_RX_FILTER_CHANGED event

2022-09-01 Thread Victor Toso
Example output has an extra ending curly bracket. Fix it.

Problem was noticed when trying to load the example into python's json
library.

Signed-off-by: Victor Toso 
---
 qapi/net.json | 1 -
 1 file changed, 1 deletion(-)

diff --git a/qapi/net.json b/qapi/net.json
index 75ba2cb989..dd088c09c5 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -757,7 +757,6 @@
 #  "data": { "name": "vnet0",
 #"path": "/machine/peripheral/vnet0/virtio-backend" },
 #  "timestamp": { "seconds": 1368697518, "microseconds": 326866 } }
-#}
 #
 ##
 { 'event': 'NIC_RX_FILTER_CHANGED',
-- 
2.37.2




Re: [PATCH 1/2] tpm_emulator: Use latest tpm_ioctl.h from swtpm project

2022-09-01 Thread Stefan Berger




On 8/31/22 07:36, Marc-André Lureau wrote:



On Fri, Aug 26, 2022 at 7:52 PM Stefan Berger > wrote:


Use the latest tpm_ioctl.h from upstream swtpm project.

Signed-off-by: Stefan Berger mailto:stef...@linux.ibm.com>>


Reviewed-by: Marc-André Lureau >



Thanks. I had to rebase and modify it a bit now that the PR with the 
_WIN32 changes is merged and will post a v2 soon. Any comments on 2/2?


   Stefan



Re: [PATCH 0/2] expose host-phys-bits to guest

2022-09-01 Thread Gerd Hoffmann
On Thu, Sep 01, 2022 at 10:36:19PM +0800, Xiaoyao Li wrote:
> On 9/1/2022 9:58 PM, Gerd Hoffmann wrote:
> 
> > > Anyway, IMO, guest including guest firmware, should always consult from
> > > CPUID leaf 0x8008 for physical address length.
> > 
> > It simply can't for the reason outlined above.  Even if we fix qemu
> > today that doesn't solve the problem for the firmware because we want
> > backward compatibility with older qemu versions.  Thats why I want the
> > extra bit which essentially says "CPUID leaf 0x8008 actually works".
> 
> I don't understand how it backward compatible with older qemu version. Old
> QEMU won't set the extra bit you introduced in this series, and all the
> guest created with old QEMU will become untrusted on CPUID leaf 0x8008 ?

Correct, on old qemu firmware will not trust CPUID leaf 0x8008.
That is not worse than the situation we have today, currently the
firmware never trusts CPUID leaf 0x8008.

So the patches will improves the situation for new qemu only, but I
don't see a way around that.

take care,
  Gerd




[PATCH 12/42] hw/isa/piix3: Remove unused include

2022-09-01 Thread Bernhard Beschow
Ammends commit 988fb613215993dd0ce642b89ca8182c479d39dd.

Signed-off-by: Bernhard Beschow 
---
 hw/isa/piix3.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c
index 0117f896d2..b54ff1c948 100644
--- a/hw/isa/piix3.c
+++ b/hw/isa/piix3.c
@@ -31,7 +31,6 @@
 #include "hw/qdev-properties.h"
 #include "hw/isa/isa.h"
 #include "hw/xen/xen.h"
-#include "sysemu/xen.h"
 #include "sysemu/reset.h"
 #include "sysemu/runstate.h"
 #include "migration/vmstate.h"
-- 
2.37.3




[PATCH 33/42] hw/isa/piix4: Prefix pci_slot_get_pirq() with "piix4_"

2022-09-01 Thread Bernhard Beschow
Prefixing with "piix4_" makes the method distinguishable from its
PIIX3 counterpart upon merging and also complies more with QEMU
conventions.

Signed-off-by: Bernhard Beschow 
---
 hw/isa/piix4.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hw/isa/piix4.c b/hw/isa/piix4.c
index 72bd9ad74d..01a98990d6 100644
--- a/hw/isa/piix4.c
+++ b/hw/isa/piix4.c
@@ -62,7 +62,7 @@ static void piix4_set_irq(void *opaque, int irq_num, int 
level)
 }
 }
 
-static int pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
+static int piix4_pci_slot_get_pirq(PCIDevice *pci_dev, int irq_num)
 {
 int slot;
 
@@ -248,7 +248,8 @@ static void piix4_realize(PCIDevice *dev, Error **errp)
   qdev_get_gpio_in(DEVICE(>pic), 9));
 }
 
-pci_bus_irqs(pci_bus, piix4_set_irq, pci_slot_get_pirq, s, PIIX_NUM_PIRQS);
+pci_bus_irqs(pci_bus, piix4_set_irq, piix4_pci_slot_get_pirq, s,
+ PIIX_NUM_PIRQS);
 }
 
 static void piix4_init(Object *obj)
-- 
2.37.3




[PATCH 42/42] hw/i386/acpi-build: Resolve PIIX ISA bridge rather than ACPI controller

2022-09-01 Thread Bernhard Beschow
Resolving the PIIX ISA bridge rather than the PIIX ACPI controller mirrors
the ICH9 code one line below.

Signed-off-by: Bernhard Beschow 
---
 hw/i386/acpi-build.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 8af75b1e22..d7bb1ccb26 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -288,7 +288,7 @@ static void acpi_get_pm_info(MachineState *machine, 
AcpiPmInfo *pm)
 
 static void acpi_get_misc_info(AcpiMiscInfo *info)
 {
-Object *piix = object_resolve_type_unambiguous(TYPE_PIIX4_PM);
+Object *piix = object_resolve_type_unambiguous(TYPE_PIIX_PCI_DEVICE);
 Object *lpc = object_resolve_type_unambiguous(TYPE_ICH9_LPC_DEVICE);
 assert(!!piix != !!lpc);
 
-- 
2.37.3




  1   2   3   4   >