date:20180115

Re: [Qemu-devel] [PATCH 3/3] target/ppc: add support for hypervisor doorbells on book3s CPUs

2018-01-15 Thread Cédric Le Goater

On 01/16/2018 08:41 AM, Cédric Le Goater wrote:
> The hypervisor doorbells are used by skiboot and Linux on POWER9
> processors to wake up secondaries.
> 
> This adds processor control support to the Server architecture by
> reusing the Embedded support. They are very similar, only the bits
> definition of the CPU identifier differ.
> 
> Still to be done is message broadcast to all threads of the same
> processor.
> 
> Signed-off-by: Cédric Le Goater 
> ---
>  target/ppc/cpu.h|  8 ++--
>  target/ppc/excp_helper.c| 39 ---
>  target/ppc/helper.h |  2 +-
>  target/ppc/translate.c  | 13 -
>  target/ppc/translate_init.c |  2 +-
>  5 files changed, 52 insertions(+), 12 deletions(-)
> 
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index b8f4dfc1084a..603a38cae83f 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -930,7 +930,7 @@ enum {
>  #define BOOKE206_MAX_TLBN  4
>  
>  
> /*/
> -/* Embedded.Processor Control */
> +/* Server and Embedded Processor Control */
>  
>  #define DBELL_TYPE_SHIFT   27
>  #define DBELL_TYPE_MASK(0x1f << DBELL_TYPE_SHIFT)
> @@ -940,11 +940,15 @@ enum {
>  #define DBELL_TYPE_G_DBELL_CRIT(0x03 << DBELL_TYPE_SHIFT)
>  #define DBELL_TYPE_G_DBELL_MC  (0x04 << DBELL_TYPE_SHIFT)
>  
> -#define DBELL_BRDCAST  (1 << 26)
> +#define DBELL_TYPE_DBELL_SERVER(0x05 << DBELL_TYPE_SHIFT)
> +
> +#define DBELL_BRDCAST  PPC_BIT(37)
>  #define DBELL_LPIDTAG_SHIFT14
>  #define DBELL_LPIDTAG_MASK (0xfff << DBELL_LPIDTAG_SHIFT)
>  #define DBELL_PIRTAG_MASK  0x3fff
>  
> +#define DBELL_PROCIDTAG_MASK   PPC_BITMASK(44, 63)
> +
>  
> /*/
>  /* Segment page size information, used by recent hash MMUs
>   * The format of this structure mirrors kvm_ppc_smmu_info
> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> index 4e548a448747..0f32cab1ff57 100644
> --- a/target/ppc/excp_helper.c
> +++ b/target/ppc/excp_helper.c
> @@ -417,6 +417,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
> excp_model, int excp)
>  case POWERPC_EXCP_HISI:  /* Hypervisor instruction storage exception 
> */
>  case POWERPC_EXCP_HDSEG: /* Hypervisor data segment exception
> */
>  case POWERPC_EXCP_HISEG: /* Hypervisor instruction segment exception 
> */
> +case POWERPC_EXCP_SDOOR_HV:  /* Hypervisor Doorbell interrupt
> */
>  case POWERPC_EXCP_HV_EMU:
>  srr0 = SPR_HSRR0;
>  srr1 = SPR_HSRR1;
> @@ -846,6 +847,11 @@ static void ppc_hw_interrupt(CPUPPCState *env)
>  powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_DOORI);
>  return;
>  }
> +if (env->pending_interrupts & (1 << PPC_INTERRUPT_HDOORBELL)) {
> +env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDOORBELL);
> +powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_SDOOR_HV);
> +return;
> +}
>  if (env->pending_interrupts & (1 << PPC_INTERRUPT_PERFM)) {
>  env->pending_interrupts &= ~(1 << PPC_INTERRUPT_PERFM);
>  powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_PERFM);
> @@ -1088,8 +1094,8 @@ void helper_rfsvc(CPUPPCState *env)
>  do_rfi(env, env->lr, env->ctr & 0x);
>  }
>  
> -/* Embedded.Processor Control */
> -static int dbell2irq(target_ulong rb)
> +/* Server and Embedded Processor Control */
> +static int dbell2irq(target_ulong rb, bool book3s)
>  {
>  int msg = rb & DBELL_TYPE_MASK;
>  int irq = -1;
> @@ -1109,12 +1115,21 @@ static int dbell2irq(target_ulong rb)
>  break;
>  }
>  
> +/* A Directed Hypervisor Doorbell message is sent only if the
> + * message type is 5. All other types are reserved and the
> + * instruction is a no-op */
> +if (book3s && msg == DBELL_TYPE_DBELL_SERVER) {
> +irq = PPC_INTERRUPT_HDOORBELL;
> +}
> +
>  return irq;
>  }
>  
>  void helper_msgclr(CPUPPCState *env, target_ulong rb)
>  {
> -int irq = dbell2irq(rb);
> +/* 64-bit server processors compliant with arch 2.x */
> +bool book3s = (env->insns_flags & PPC_SEGMENT_64B);
> +int irq = dbell2irq(rb, book3s);
>  
>  if (irq < 0) {
>  return;
> @@ -1123,10 +1138,11 @@ void helper_msgclr(CPUPPCState *env, target_ulong rb)
>  env->pending_interrupts &= ~(1 << irq);
>  }
>  
> -void helper_msgsnd(target_ulong rb)
> +void helper_msgsnd(CPUPPCState *env, target_ulong rb)
>  {
> -int irq = dbell2irq(rb);
> -int pir = rb & DBELL_PIRTAG_MASK;
> +/* 64-bit server processors compliant with arch 2.x */
> +bool book3s = (env->insns_flags & PPC_SEGMENT_64B);
> +int irq = dbell2irq(rb, book3s);
>  CPUState

[Qemu-devel] [PATCH 2/3] target/ppc: msgsnd and msgclr instructions need hypervisor privilege

2018-01-15 Thread Cédric Le Goater

Signed-off-by: Cédric Le Goater 
---
 target/ppc/translate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 6d16a0b22d44..bcd36d53537f 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -6169,7 +6169,7 @@ static void gen_msgclr(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
 GEN_PRIV;
 #else
-CHK_SV;
+CHK_HV;
 gen_helper_msgclr(cpu_env, cpu_gpr[rB(ctx->opcode)]);
 #endif /* defined(CONFIG_USER_ONLY) */
 }
@@ -6179,7 +6179,7 @@ static void gen_msgsnd(DisasContext *ctx)
 #if defined(CONFIG_USER_ONLY)
 GEN_PRIV;
 #else
-CHK_SV;
+CHK_HV;
 gen_helper_msgsnd(cpu_gpr[rB(ctx->opcode)]);
 #endif /* defined(CONFIG_USER_ONLY) */
 }
-- 
2.13.6

[Qemu-devel] [PATCH 1/3] target/ppc: fix doorbell and hypervisor doorbell definitions

2018-01-15 Thread Cédric Le Goater

commit f03a1af581b9 ("ppc: Fix POWER7 and POWER8 exception definitions")
introduced definitions for the server doorbell exceptions by reusing
the embedded definitions but this adds complexity in the powerpc_excp()
routine. Let's introduce specific definitions for the Server doorbells
exception.

Signed-off-by: Cédric Le Goater 
---
 target/ppc/cpu.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 14aaa87fe825..b8f4dfc1084a 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -140,9 +140,6 @@ enum {
 POWERPC_EXCP_HYPPRIV  = 41, /* Embedded hypervisor priv instruction  */
 /* Vectors 42 to 63 are reserved */
 /* Exceptions defined in the PowerPC server specification*/
-/* Server doorbell variants */
-#define POWERPC_EXCP_SDOOR  POWERPC_EXCP_GDOORI
-#define POWERPC_EXCP_SDOOR_HV   POWERPC_EXCP_DOORI
 POWERPC_EXCP_RESET= 64, /* System reset exception*/
 POWERPC_EXCP_DSEG = 65, /* Data segment exception*/
 POWERPC_EXCP_ISEG = 66, /* Instruction segment exception */
@@ -189,8 +186,11 @@ enum {
 POWERPC_EXCP_HV_EMU   = 96, /* HV emulation assistance   */
 POWERPC_EXCP_HV_MAINT = 97, /* HMI   */
 POWERPC_EXCP_HV_FU= 98, /* Hypervisor Facility unavailable   */
+/* Server doorbell variants */
+POWERPC_EXCP_SDOOR= 99,
+POWERPC_EXCP_SDOOR_HV = 100,
 /* EOL   */
-POWERPC_EXCP_NB   = 99,
+POWERPC_EXCP_NB   = 101,
 /* QEMU exceptions: used internally during code translation  */
 POWERPC_EXCP_STOP = 0x200, /* stop translation   */
 POWERPC_EXCP_BRANCH   = 0x201, /* branch instruction */
-- 
2.13.6

[Qemu-devel] [PATCH 3/3] target/ppc: add support for hypervisor doorbells on book3s CPUs

2018-01-15 Thread Cédric Le Goater

The hypervisor doorbells are used by skiboot and Linux on POWER9
processors to wake up secondaries.

This adds processor control support to the Server architecture by
reusing the Embedded support. They are very similar, only the bits
definition of the CPU identifier differ.

Still to be done is message broadcast to all threads of the same
processor.

Signed-off-by: Cédric Le Goater 
---
 target/ppc/cpu.h|  8 ++--
 target/ppc/excp_helper.c| 39 ---
 target/ppc/helper.h |  2 +-
 target/ppc/translate.c  | 13 -
 target/ppc/translate_init.c |  2 +-
 5 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index b8f4dfc1084a..603a38cae83f 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -930,7 +930,7 @@ enum {
 #define BOOKE206_MAX_TLBN  4
 
 /*/
-/* Embedded.Processor Control */
+/* Server and Embedded Processor Control */
 
 #define DBELL_TYPE_SHIFT   27
 #define DBELL_TYPE_MASK(0x1f << DBELL_TYPE_SHIFT)
@@ -940,11 +940,15 @@ enum {
 #define DBELL_TYPE_G_DBELL_CRIT(0x03 << DBELL_TYPE_SHIFT)
 #define DBELL_TYPE_G_DBELL_MC  (0x04 << DBELL_TYPE_SHIFT)
 
-#define DBELL_BRDCAST  (1 << 26)
+#define DBELL_TYPE_DBELL_SERVER(0x05 << DBELL_TYPE_SHIFT)
+
+#define DBELL_BRDCAST  PPC_BIT(37)
 #define DBELL_LPIDTAG_SHIFT14
 #define DBELL_LPIDTAG_MASK (0xfff << DBELL_LPIDTAG_SHIFT)
 #define DBELL_PIRTAG_MASK  0x3fff
 
+#define DBELL_PROCIDTAG_MASK   PPC_BITMASK(44, 63)
+
 /*/
 /* Segment page size information, used by recent hash MMUs
  * The format of this structure mirrors kvm_ppc_smmu_info
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 4e548a448747..0f32cab1ff57 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -417,6 +417,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
excp_model, int excp)
 case POWERPC_EXCP_HISI:  /* Hypervisor instruction storage exception */
 case POWERPC_EXCP_HDSEG: /* Hypervisor data segment exception*/
 case POWERPC_EXCP_HISEG: /* Hypervisor instruction segment exception */
+case POWERPC_EXCP_SDOOR_HV:  /* Hypervisor Doorbell interrupt*/
 case POWERPC_EXCP_HV_EMU:
 srr0 = SPR_HSRR0;
 srr1 = SPR_HSRR1;
@@ -846,6 +847,11 @@ static void ppc_hw_interrupt(CPUPPCState *env)
 powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_DOORI);
 return;
 }
+if (env->pending_interrupts & (1 << PPC_INTERRUPT_HDOORBELL)) {
+env->pending_interrupts &= ~(1 << PPC_INTERRUPT_HDOORBELL);
+powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_SDOOR_HV);
+return;
+}
 if (env->pending_interrupts & (1 << PPC_INTERRUPT_PERFM)) {
 env->pending_interrupts &= ~(1 << PPC_INTERRUPT_PERFM);
 powerpc_excp(cpu, env->excp_model, POWERPC_EXCP_PERFM);
@@ -1088,8 +1094,8 @@ void helper_rfsvc(CPUPPCState *env)
 do_rfi(env, env->lr, env->ctr & 0x);
 }
 
-/* Embedded.Processor Control */
-static int dbell2irq(target_ulong rb)
+/* Server and Embedded Processor Control */
+static int dbell2irq(target_ulong rb, bool book3s)
 {
 int msg = rb & DBELL_TYPE_MASK;
 int irq = -1;
@@ -1109,12 +1115,21 @@ static int dbell2irq(target_ulong rb)
 break;
 }
 
+/* A Directed Hypervisor Doorbell message is sent only if the
+ * message type is 5. All other types are reserved and the
+ * instruction is a no-op */
+if (book3s && msg == DBELL_TYPE_DBELL_SERVER) {
+irq = PPC_INTERRUPT_HDOORBELL;
+}
+
 return irq;
 }
 
 void helper_msgclr(CPUPPCState *env, target_ulong rb)
 {
-int irq = dbell2irq(rb);
+/* 64-bit server processors compliant with arch 2.x */
+bool book3s = (env->insns_flags & PPC_SEGMENT_64B);
+int irq = dbell2irq(rb, book3s);
 
 if (irq < 0) {
 return;
@@ -1123,10 +1138,11 @@ void helper_msgclr(CPUPPCState *env, target_ulong rb)
 env->pending_interrupts &= ~(1 << irq);
 }
 
-void helper_msgsnd(target_ulong rb)
+void helper_msgsnd(CPUPPCState *env, target_ulong rb)
 {
-int irq = dbell2irq(rb);
-int pir = rb & DBELL_PIRTAG_MASK;
+/* 64-bit server processors compliant with arch 2.x */
+bool book3s = (env->insns_flags & PPC_SEGMENT_64B);
+int irq = dbell2irq(rb, book3s);
 CPUState *cs;
 
 if (irq < 0) {
@@ -1137,8 +1153,17 @@ void helper_msgsnd(target_ulong rb)
 CPU_FOREACH(cs) {
 PowerPCCPU *cpu = POWERPC_CPU(cs);
 CPUPPCState *cenv = >env;
+bool send;
 
-if ((rb & DBELL_BRDCAST) || (cenv->spr[SPR_BOOKE_PIR] == pir)) {
+/* TODO: broadcast

[Qemu-devel] [PATCH 0/3] target/ppc: add support for hypervisor doorbells

2018-01-15 Thread Cédric Le Goater

Hi,

The hypervisor doorbells are used by skiboot and Linux on POWER9
processors to wake up secondaries. This adds processor control support
to the Book3S architecture.

The full tree can be found here :

  https://github.com/legoater/qemu powernv-2.12

Thanks,

C.

Cédric Le Goater (3):
  target/ppc: fix doorbell and hypervisor doorbell definitions
  target/ppc: msgsnd and msgclr instructions need hypervisor privilege
  target/ppc: add support for hypervisor doorbells on book3s CPUs

 target/ppc/cpu.h| 16 ++--
 target/ppc/excp_helper.c| 39 ---
 target/ppc/helper.h |  2 +-
 target/ppc/translate.c  | 17 ++---
 target/ppc/translate_init.c |  2 +-
 5 files changed, 58 insertions(+), 18 deletions(-)

-- 
2.13.6

Re: [Qemu-devel] Some question about savem/qcow2 incremental snapshot

2018-01-15 Thread He Junyan

On 三, 2018-01-10 at 20:17 +, Stefan Hajnoczi wrote:
> On Wed, Jan 10, 2018 at 8:15 PM, Dr. David Alan Gilbert
>  wrote:
> > 
> > * Stefan Hajnoczi (stefa...@gmail.com) wrote:
> > > 
> > > On Tue, Jan 9, 2018 at 7:55 PM, Dr. David Alan Gilbert
> > >  wrote:
> > > > 
> > > > > 
> > > > > Certain guest operations like rebooting or zeroing memory
> > > > > will defeat
> > > > > the incremental guest RAM snapshot feature.  It's worth
> > > > > thinking about
> > > > > these cases to make sure this feature would be worth it in
> > > > > real use
> > > > > cases.
> > > > But those probably wouldn't upset an NVDimm?
> > > If the guest dirties all RAM then the incremental snapshot
> > > feature
> > > degrades to a full snapshot.  I'm asking if there are common
> > > operations where that happens.
> > > 
> > > I seem to remember Windows guests zero all pages on cold
> > > boot.  Maybe
> > > that's not the case anymore.
> > > 
> > > Worth checking before embarking on this feature because it could
> > > be a
> > > waste of effort if it turns out real-world guests dirty all
> > > memory in
> > > common cases.
> > Right, but I'm hoping that there's some magic somewhere where an
> > NVDimm doesn't
> > get zero'd because of a cold boot since that would seem to make it
> > volatile.
> This feature isn't specific to NVDIMM though.  It would be equally
> useful for regular RAM.
> 
> Stefan
> 

Thanks for all your advices.
I already did a lot of investigation and write some code. My
consideration is as following:
1. As the first step, I will use is_active() function to make it just
work for nvdimm kind memory region and just for snapshot saving, not
for live migration. I understand that this can work for all kinds of
memory, but it may low guest's performance if we always enable dirty
log tracking for memory. For the live migration, all the data need to
be copied and it seems can not get benefit by this manner.
2. Saving and Loading is relatively easy to do, while deleting some
snapshot point needs a lot of work to do. The current framework just
deletes all the data of one snapshot point by one shot. I want to add
some reference to L1/L2 table of QCOW2 when the cluster's data is
depended by other snapshot point, so we can keep the data when
deleting.

I will also check whether the cold boot will zero all pages later.

Thanks,
Junyan

Re: [Qemu-devel] [PULL 00/25] pc, pci, virtio: features, fixes, cleanups

2018-01-15 Thread Jason Wang




On 2018年01月16日 14:28, Jason Wang wrote:



On 2018年01月16日 10:48, Michael S. Tsirkin wrote:

On Tue, Jan 09, 2018 at 12:10:10PM +1100, David Gibson wrote:

On Mon, Jan 08, 2018 at 08:10:23PM +0200, Michael S. Tsirkin wrote:

On Mon, Jan 08, 2018 at 09:14:41AM +, Peter Maydell wrote:
On 21 December 2017 at 23:43, Michael S. Tsirkin  
wrote:

you also need to drop "hw/pci: remove obsolete PCIDevice->init()"
(applied in your tree as 18951fce55d5aa58cd1629b4cb704ee51bab4420)
else the not-yet-QOM'ified NVME won't work anymore.

Done that too, thanks a lot

Hi. I'm afraid this pullreq (merge of 880b1ff) has new runtime
errors from the clang sanitizer:

/home/petmay01/linaro/qemu-for-merges/hw/net/net_tx_pkt.c:201:27:
runtime error: member access within misaligned address 0x55b4ffee8246
for type 'struct ip_header', which requires 4 byte alignment
0x55b4ffee8246: note: pointer points here
  01 00 00 00 45 00  01 a9 01 00 00 00 40 11  78 45 00 00 00 00 ff ff
ff ff 00 00 00 00 00 00  00 00
  ^
/home/petmay01/linaro/qemu-for-merges/hw/net/net_tx_pkt.c:201:27:
runtime error: load of misaligned address 0x55b4ffee8246 for type
'uint8_t' (aka 'unsigned char'), which requires 4 byte alignment
0x55b4ffee8246: note: pointer points here
  01 00 00 00 45 00  01 a9 01 00 00 00 40 11  78 45 00 00 00 00 ff ff
ff ff 00 00 00 00 00 00  00 00
  ^
/home/petmay01/linaro/qemu-for-merges/hw/net/net_tx_pkt.c:208:65:
runtime error: member access within misaligned address 0x55b4ffee8246
for type 'struct ip_header', which requires 4 byte alignment
0x55b4ffee8246: note: pointer points here
  01 00 00 00 45 00  01 a9 01 00 00 00 40 11  78 45 00 00 00 00 ff ff
ff ff 00 00 00 00 00 00  00 00
  ^
/home/petmay01/linaro/qemu-for-merges/hw/net/net_tx_pkt.c:210:13:
runtime error: member access within misaligned address 0x55b4ffee8246
for type 'struct ip_header', which requires 4 byte alignment
0x55b4ffee8246: note: pointer points here
  01 00 00 00 45 00  01 a9 01 00 00 00 40 11  78 45 00 00 00 00 ff ff
ff ff 00 00 00 00 00 00  00 00
  ^
/home/petmay01/linaro/qemu-for-merges/hw/net/net_tx_pkt.c:210:13:
runtime error: load of misaligned address 0x55b4ffee8246 for type
'uint8_t' (aka 'unsigned char'), which requires 4 byte alignment
0x55b4ffee8246: note: pointer points here
  01 00 00 00 45 00  01 a9 01 00 00 00 40 11  78 45 00 00 00 00 ff ff
ff ff 00 00 00 00 00 00  00 00
  ^

Sorry for the late report, I didn't quite have time
to process the pull before Christmas holidays.

thanks
-- PMM

So I bisected and the reason is this commit:

commit 18b20bb43a2f37f0c8ae23a3e9b3d9a4a05b6bd4
Author: David Gibson 
Date:   Tue Dec 19 15:45:22 2017 +1100

 tests/pxe-test: Add some extra tests
  Previously virtio-net was only tested for ppc64 in "slow" 
mode.  That
 doesn't make much sense since virtio-net is used much more 
often in
 practice than the spapr-vlan device which was tested always.  
So, move

 virtio-net to always be tested on ppc64.
  We had no tests at all for the q35 machine, which doesn't 
seem wise

 given its increasing prominence.  Add a couple of tests for it,
 including testing the newer e1000e adapter.
  Signed-off-by: David Gibson 
 Reviewed-by: Thomas Huth 
 Reviewed-by: Michael S. Tsirkin 
 Signed-off-by: Michael S. Tsirkin 

:04 04 5a982bfea24b9ac3c651b84425a39b3c85f4871e 
771af3fdfb2778c6d6ed6b1098d1e79c181d6fb0 M  tests


Pls either fix or drop e1000e test

Feel free to drop this patch for now.  I'll debug and repost it when I
have a chance.

--
David Gibson    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au    | minimalist, thank you. NOT _the_ 
_other_

    | _way_ _around_!
http://www.ozlabs.org/~dgibson

BTw, Jason, you might want to take a look.
I suspect it's a bug somewhere in the e1000e emulation.



Will have a look.

cc Dmitry for more thoughts.

Thanks



Cc again with the correct address.

Re: [Qemu-devel] [PULL 00/25] pc, pci, virtio: features, fixes, cleanups

2018-01-15 Thread Jason Wang




On 2018年01月16日 10:48, Michael S. Tsirkin wrote:

On Tue, Jan 09, 2018 at 12:10:10PM +1100, David Gibson wrote:

On Mon, Jan 08, 2018 at 08:10:23PM +0200, Michael S. Tsirkin wrote:

On Mon, Jan 08, 2018 at 09:14:41AM +, Peter Maydell wrote:

On 21 December 2017 at 23:43, Michael S. Tsirkin  wrote:

you also need to drop "hw/pci: remove obsolete PCIDevice->init()"
(applied in your tree as 18951fce55d5aa58cd1629b4cb704ee51bab4420)
else the not-yet-QOM'ified NVME won't work anymore.

Done that too, thanks a lot

Hi. I'm afraid this pullreq (merge of 880b1ff) has new runtime
errors from the clang sanitizer:

/home/petmay01/linaro/qemu-for-merges/hw/net/net_tx_pkt.c:201:27:
runtime error: member access within misaligned address 0x55b4ffee8246
for type 'struct ip_header', which requires 4 byte alignment
0x55b4ffee8246: note: pointer points here
  01 00 00 00 45 00  01 a9 01 00 00 00 40 11  78 45 00 00 00 00 ff ff
ff ff 00 00 00 00 00 00  00 00
  ^
/home/petmay01/linaro/qemu-for-merges/hw/net/net_tx_pkt.c:201:27:
runtime error: load of misaligned address 0x55b4ffee8246 for type
'uint8_t' (aka 'unsigned char'), which requires 4 byte alignment
0x55b4ffee8246: note: pointer points here
  01 00 00 00 45 00  01 a9 01 00 00 00 40 11  78 45 00 00 00 00 ff ff
ff ff 00 00 00 00 00 00  00 00
  ^
/home/petmay01/linaro/qemu-for-merges/hw/net/net_tx_pkt.c:208:65:
runtime error: member access within misaligned address 0x55b4ffee8246
for type 'struct ip_header', which requires 4 byte alignment
0x55b4ffee8246: note: pointer points here
  01 00 00 00 45 00  01 a9 01 00 00 00 40 11  78 45 00 00 00 00 ff ff
ff ff 00 00 00 00 00 00  00 00
  ^
/home/petmay01/linaro/qemu-for-merges/hw/net/net_tx_pkt.c:210:13:
runtime error: member access within misaligned address 0x55b4ffee8246
for type 'struct ip_header', which requires 4 byte alignment
0x55b4ffee8246: note: pointer points here
  01 00 00 00 45 00  01 a9 01 00 00 00 40 11  78 45 00 00 00 00 ff ff
ff ff 00 00 00 00 00 00  00 00
  ^
/home/petmay01/linaro/qemu-for-merges/hw/net/net_tx_pkt.c:210:13:
runtime error: load of misaligned address 0x55b4ffee8246 for type
'uint8_t' (aka 'unsigned char'), which requires 4 byte alignment
0x55b4ffee8246: note: pointer points here
  01 00 00 00 45 00  01 a9 01 00 00 00 40 11  78 45 00 00 00 00 ff ff
ff ff 00 00 00 00 00 00  00 00
  ^

Sorry for the late report, I didn't quite have time
to process the pull before Christmas holidays.

thanks
-- PMM

So I bisected and the reason is this commit:

commit 18b20bb43a2f37f0c8ae23a3e9b3d9a4a05b6bd4
Author: David Gibson 
Date:   Tue Dec 19 15:45:22 2017 +1100

 tests/pxe-test: Add some extra tests
 
 Previously virtio-net was only tested for ppc64 in "slow" mode.  That

 doesn't make much sense since virtio-net is used much more often in
 practice than the spapr-vlan device which was tested always.  So, move
 virtio-net to always be tested on ppc64.
 
 We had no tests at all for the q35 machine, which doesn't seem wise

 given its increasing prominence.  Add a couple of tests for it,
 including testing the newer e1000e adapter.
 
 Signed-off-by: David Gibson 

 Reviewed-by: Thomas Huth 
 Reviewed-by: Michael S. Tsirkin 
 Signed-off-by: Michael S. Tsirkin 

:04 04 5a982bfea24b9ac3c651b84425a39b3c85f4871e 
771af3fdfb2778c6d6ed6b1098d1e79c181d6fb0 M  tests

Pls either fix or drop e1000e test

Feel free to drop this patch for now.  I'll debug and repost it when I
have a chance.

--
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson

BTw, Jason, you might want to take a look.
I suspect it's a bug somewhere in the e1000e emulation.



Will have a look.

cc Dmitry for more thoughts.

Thanks

Re: [Qemu-devel] [PATCH] net: Allow netdevs to be used with 'hostfwd_add' and 'hostfwd_remove'

2018-01-15 Thread Jason Wang




On 2018年01月12日 04:02, Thomas Huth wrote:

It does not make much sense to limit these commands to the legacy 'vlan'
concept only, they should work with the modern netdevs, too. So now
it is possible to use this command with one, two or three parameters.

With one parameter, the command installs a hostfwd rule on the default
"user" network:
 hostfwd_add tcp:...

With two parameters, the command installs a hostfwd rule on a netdev
(that's the new way of using this command):
 hostfwd_add netdev_id tcp:...

With three parameters, the command installs a rule on a 'vlan' (aka hub):
 hostfwd_add hub_id name tcp:...

Same applies to the hostfwd_remove command now.

Signed-off-by: Thomas Huth


Applied.

Thanks

Re: [Qemu-devel] [PATCH] qemu-doc: Get rid of "vlan=X" example in the documentation

2018-01-15 Thread Jason Wang




On 2018年01月15日 15:40, Thomas Huth wrote:

The vlan concept is marked as deprecated, so we should not use
this for examples in the documentation anymore.

Signed-off-by: Thomas Huth 
---
  qemu-options.hx | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qemu-options.hx b/qemu-options.hx
index 130016c..d0c8b06 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2257,8 +2257,8 @@ qemu-system-i386 linux.img -net nic -net tap
  #launch a QEMU instance with two NICs, each one connected
  #to a TAP device
  qemu-system-i386 linux.img \
- -net nic,vlan=0 -net tap,vlan=0,ifname=tap0 \
- -net nic,vlan=1 -net tap,vlan=1,ifname=tap1
+-netdev tap,id=nd0,ifname=tap0 -device e1000,netdev=nd0 \
+-netdev tap,id=nd1,ifname=tap1 -device rtl8139,netdev=nd1
  @end example
  
  @example


Applied.

Thanks

[Qemu-devel] [PATCH v6 8/9] docs: Add section for NVMe VFIO driver

2018-01-15 Thread Fam Zheng

Signed-off-by: Fam Zheng 
Message-Id: <20180110091846.10699-9-f...@redhat.com>
Reviewed-by: Stefan Hajnoczi 
---
 docs/qemu-block-drivers.texi | 37 +
 1 file changed, 37 insertions(+)

diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi
index 503c1847aa..cd74767ed3 100644
--- a/docs/qemu-block-drivers.texi
+++ b/docs/qemu-block-drivers.texi
@@ -785,6 +785,43 @@ warning: ssh server @code{ssh.example.com:22} does not 
support fsync
 With sufficiently new versions of libssh2 and OpenSSH, @code{fsync} is
 supported.
 
+@node disk_images_nvme
+@subsection NVMe disk images
+
+NVM Express (NVMe) storage controllers can be accessed directly by a userspace
+driver in QEMU.  This bypasses the host kernel file system and block layers
+while retaining QEMU block layer functionalities, such as block jobs, I/O
+throttling, image formats, etc.  Disk I/O performance is typically higher than
+with @code{-drive file=/dev/sda} using either thread pool or linux-aio.
+
+The controller will be exclusively used by the QEMU process once started. To be
+able to share storage between multiple VMs and other applications on the host,
+please use the file based protocols.
+
+Before starting QEMU, bind the host NVMe controller to the host vfio-pci
+driver.  For example:
+
+@example
+# modprobe vfio-pci
+# lspci -n -s :06:0d.0
+06:0d.0 0401: 1102:0002 (rev 08)
+# echo :06:0d.0 > /sys/bus/pci/devices/:06:0d.0/driver/unbind
+# echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id
+
+# qemu-system-x86_64 -drive 
file=nvme://@var{host}:@var{bus}:@var{slot}.@var{func}/@var{namespace}
+@end example
+
+Alternative syntax using properties:
+
+@example
+qemu-system-x86_64 -drive 
file.driver=nvme,file.device=@var{host}:@var{bus}:@var{slot}.@var{func},file.namespace=@var{namespace}
+@end example
+
+@var{host}:@var{bus}:@var{slot}.@var{func} is the NVMe controller's PCI device
+address on the host.
+
+@var{namespace} is the NVMe namespace number, starting from 1.
+
 @node disk_image_locking
 @subsection Disk image file locking
 
-- 
2.14.3

[Qemu-devel] [PATCH v6 7/9] block: Move NVMe constants to a separate header

2018-01-15 Thread Fam Zheng

Signed-off-by: Fam Zheng 
Message-Id: <20180110091846.10699-8-f...@redhat.com>
Reviewed-by: Stefan Hajnoczi 
---
 block/nvme.c |   7 +-
 hw/block/nvme.h  | 698 +-
 include/block/nvme.h | 700 +++
 3 files changed, 702 insertions(+), 703 deletions(-)
 create mode 100644 include/block/nvme.h

diff --git a/block/nvme.c b/block/nvme.c
index 30616f50c7..044b15e1f5 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -22,12 +22,7 @@
 #include "block/block_int.h"
 #include "trace.h"
 
-/* TODO: Move nvme spec definitions from hw/block/nvme.h into a separate file
- * that doesn't depend on dma/pci headers. */
-#include "sysemu/dma.h"
-#include "hw/pci/pci.h"
-#include "hw/block/block.h"
-#include "hw/block/nvme.h"
+#include "block/nvme.h"
 
 #define NVME_SQ_ENTRY_BYTES 64
 #define NVME_CQ_ENTRY_BYTES 16
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 6aab338ff5..59a1504018 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -1,703 +1,7 @@
 #ifndef HW_NVME_H
 #define HW_NVME_H
 #include "qemu/cutils.h"
-
-typedef struct NvmeBar {
-uint64_tcap;
-uint32_tvs;
-uint32_tintms;
-uint32_tintmc;
-uint32_tcc;
-uint32_trsvd1;
-uint32_tcsts;
-uint32_tnssrc;
-uint32_taqa;
-uint64_tasq;
-uint64_tacq;
-uint32_tcmbloc;
-uint32_tcmbsz;
-} NvmeBar;
-
-enum NvmeCapShift {
-CAP_MQES_SHIFT = 0,
-CAP_CQR_SHIFT  = 16,
-CAP_AMS_SHIFT  = 17,
-CAP_TO_SHIFT   = 24,
-CAP_DSTRD_SHIFT= 32,
-CAP_NSSRS_SHIFT= 33,
-CAP_CSS_SHIFT  = 37,
-CAP_MPSMIN_SHIFT   = 48,
-CAP_MPSMAX_SHIFT   = 52,
-};
-
-enum NvmeCapMask {
-CAP_MQES_MASK  = 0x,
-CAP_CQR_MASK   = 0x1,
-CAP_AMS_MASK   = 0x3,
-CAP_TO_MASK= 0xff,
-CAP_DSTRD_MASK = 0xf,
-CAP_NSSRS_MASK = 0x1,
-CAP_CSS_MASK   = 0xff,
-CAP_MPSMIN_MASK= 0xf,
-CAP_MPSMAX_MASK= 0xf,
-};
-
-#define NVME_CAP_MQES(cap)  (((cap) >> CAP_MQES_SHIFT)   & CAP_MQES_MASK)
-#define NVME_CAP_CQR(cap)   (((cap) >> CAP_CQR_SHIFT)& CAP_CQR_MASK)
-#define NVME_CAP_AMS(cap)   (((cap) >> CAP_AMS_SHIFT)& CAP_AMS_MASK)
-#define NVME_CAP_TO(cap)(((cap) >> CAP_TO_SHIFT) & CAP_TO_MASK)
-#define NVME_CAP_DSTRD(cap) (((cap) >> CAP_DSTRD_SHIFT)  & CAP_DSTRD_MASK)
-#define NVME_CAP_NSSRS(cap) (((cap) >> CAP_NSSRS_SHIFT)  & CAP_NSSRS_MASK)
-#define NVME_CAP_CSS(cap)   (((cap) >> CAP_CSS_SHIFT)& CAP_CSS_MASK)
-#define NVME_CAP_MPSMIN(cap)(((cap) >> CAP_MPSMIN_SHIFT) & CAP_MPSMIN_MASK)
-#define NVME_CAP_MPSMAX(cap)(((cap) >> CAP_MPSMAX_SHIFT) & CAP_MPSMAX_MASK)
-
-#define NVME_CAP_SET_MQES(cap, val)   (cap |= (uint64_t)(val & CAP_MQES_MASK)  
\
-   << CAP_MQES_SHIFT)
-#define NVME_CAP_SET_CQR(cap, val)(cap |= (uint64_t)(val & CAP_CQR_MASK)   
\
-   << CAP_CQR_SHIFT)
-#define NVME_CAP_SET_AMS(cap, val)(cap |= (uint64_t)(val & CAP_AMS_MASK)   
\
-   << CAP_AMS_SHIFT)
-#define NVME_CAP_SET_TO(cap, val) (cap |= (uint64_t)(val & CAP_TO_MASK)
\
-   << CAP_TO_SHIFT)
-#define NVME_CAP_SET_DSTRD(cap, val)  (cap |= (uint64_t)(val & CAP_DSTRD_MASK) 
\
-   << CAP_DSTRD_SHIFT)
-#define NVME_CAP_SET_NSSRS(cap, val)  (cap |= (uint64_t)(val & CAP_NSSRS_MASK) 
\
-   << CAP_NSSRS_SHIFT)
-#define NVME_CAP_SET_CSS(cap, val)(cap |= (uint64_t)(val & CAP_CSS_MASK)   
\
-   << CAP_CSS_SHIFT)
-#define NVME_CAP_SET_MPSMIN(cap, val) (cap |= (uint64_t)(val & 
CAP_MPSMIN_MASK)\
-   << CAP_MPSMIN_SHIFT)
-#define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & 
CAP_MPSMAX_MASK)\
-<< 
CAP_MPSMAX_SHIFT)
-
-enum NvmeCcShift {
-CC_EN_SHIFT = 0,
-CC_CSS_SHIFT= 4,
-CC_MPS_SHIFT= 7,
-CC_AMS_SHIFT= 11,
-CC_SHN_SHIFT= 14,
-CC_IOSQES_SHIFT = 16,
-CC_IOCQES_SHIFT = 20,
-};
-
-enum NvmeCcMask {
-CC_EN_MASK  = 0x1,
-CC_CSS_MASK = 0x7,
-CC_MPS_MASK = 0xf,
-CC_AMS_MASK = 0x7,
-CC_SHN_MASK = 0x3,
-CC_IOSQES_MASK  = 0xf,
-CC_IOCQES_MASK  = 0xf,
-};
-
-#define NVME_CC_EN(cc) ((cc >> CC_EN_SHIFT) & CC_EN_MASK)
-#define NVME_CC_CSS(cc)((cc >> CC_CSS_SHIFT)& CC_CSS_MASK)
-#define NVME_CC_MPS(cc)((cc >> CC_MPS_SHIFT)& CC_MPS_MASK)
-#define NVME_CC_AMS(cc)((cc >> CC_AMS_SHIFT)& CC_AMS_MASK)
-#define NVME_CC_SHN(cc)((cc >> CC_SHN_SHIFT)

Re: [Qemu-devel] [PATCH RESEND v1 1/2] i386: Add Intel Processor Trace feature support

2018-01-15 Thread Kang, Luwei

> > On Mon, Jan 15, 2018 at 12:04:55 -0200, Eduardo Habkost wrote:
> > > CCing libvirt developers.
> > ...
> > > This case is slightly more problematic, however: the new feature is
> > > actually migratable (under very controlled circumstances) because of
> > > patch 2/2, but it is not migration-safe[1].  This means libvirt
> > > shouldn't include it in "host-model" expansion (which uses the
> > > query-cpu-model-expansion QMP command) until we make the feature
> > > migration-safe.
> > >
> > > For QEMU, this means the feature shouldn't be returned by
> > > "query-cpu-model-expansion type=static model=max" (but it can be
> > > returned by "query-cpu-model-expansion type=full model=max").
> > >
> > > Jiri, it looks like libvirt uses type=full on
> > > query-cpu-model-expansion on x86.  It needs to use type=static[2],
> > > or it will have no way to find out if a feature is migration-safe or
> > > not.
> > ...
> > > [2] It looks like libvirt uses type=full because it wants to get
> > > all QOM property aliases returned.  In this case, one
> > > solution for libvirt is to use:
> > >
> > > static_expansion = query_cpu_model_expansion(type=static, model)
> > > all_props = query_cpu_model_expansion(type=full,
> > > static_expansion)
> >
> > This is exactly what libvirt is doing (with model = "host") ever since
> > query-cpu-model-expansion support was implemented for x86.
> 
> Oh, now I see that the x86 code uses
> QEMU_MONITOR_CPU_MODEL_EXPANSION_STATIC_FULL and not just 
> QEMU_MONITOR_CPU_MODEL_EXPANSION_FULL.  Nice!
> 

So, I need to add Intel PT feature in "X86CPUDefinition builtin_x86_defs[]" so 
that we can get this CPUID in specific CPU model not only "-cpu host". Is that 
right?

Intel PT is first supported in Intel Core M and 5th generation Intel Core 
processors that are based on the Intel micro-architecture code name Broadwell 
but Intel PT use EPT is first supported in Ice Lake. Intel PT virtualization 
depend on PT use EPT.  I will add Intel PT to "Broadwell" CPU model and later 
to make sure a "Broadwell" guest can use Intel PT if the host is Ice Lake.

Thanks,
Luwei Kang

Re: [Qemu-devel] [PATCH] fw_cfg: don't use DMA mapping for fw_cfg device

2018-01-15 Thread Peter Xu

On Mon, Jan 15, 2018 at 12:22:48PM +0100, Marc-Andre Lureau wrote:
> Hi
> 
> On Mon, Jan 15, 2018 at 9:55 AM, Peter Xu  wrote:
> > fw_cfg device does not need IOMMU protection, so use physical addresses
> > always.  That's how QEMU implements fw_cfg.  Otherwise we'll see call
> > traces during boot when vIOMMU is enabled in guest:
> >
> > [1.018306] [ cut here ]
> > [1.018314] WARNING: CPU: 1 PID: 1 at drivers/firmware/qemu_fw_cfg.c:152 
> > fw_cfg_dma_transfer+0x399/0x500
> > [1.018315] fw_cfg_dma_transfer: failed to map fw_cfg_dma
> > [1.018316] Modules linked in:
> > [1.018320] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 
> > 3.10.0-827.el7.x86_64 #1
> > [1.018321] Hardware name: Red Hat KVM, BIOS 1.11.0-1.el7 04/01/2014
> > [1.018322] Call Trace:
> > [1.018330]  [] dump_stack+0x19/0x1b
> > [1.018334]  [] __warn+0xd8/0x100
> > [1.018336]  [] warn_slowpath_fmt+0x5f/0x80
> > [1.018338]  [] fw_cfg_dma_transfer+0x399/0x500
> > [1.018340]  [] fw_cfg_read_blob+0xac/0x1c0
> > [1.018342]  [] fw_cfg_register_dir_entries+0x80/0x450
> > [1.018344]  [] fw_cfg_sysfs_probe+0x212/0x3f0
> > [1.018347]  [] platform_drv_probe+0x42/0x110
> > [1.018350]  [] driver_probe_device+0xc2/0x3e0
> > [1.018352]  [] __driver_attach+0x93/0xa0
> > [1.018354]  [] ? __device_attach+0x40/0x40
> > [1.018359]  [] bus_for_each_dev+0x73/0xc0
> > [1.018362]  [] driver_attach+0x1e/0x20
> > [1.018364]  [] bus_add_driver+0x200/0x2d0
> > [1.018366]  [] ? firmware_map_add_early+0x58/0x58
> > [1.018368]  [] driver_register+0x64/0xf0
> > [1.018370]  [] __platform_driver_register+0x4a/0x50
> > [1.018372]  [] fw_cfg_sysfs_init+0x34/0x61
> > [1.018376]  [] do_one_initcall+0xb8/0x230
> > [1.018379]  [] kernel_init_freeable+0x17a/0x219
> > [1.018381]  [] ? initcall_blacklist+0xb0/0xb0
> > [1.018383]  [] ? rest_init+0x80/0x80
> > [1.018385]  [] kernel_init+0xe/0xf0
> > [1.018388]  [] ret_from_fork+0x58/0x90
> > [1.018390]  [] ? rest_init+0x80/0x80
> > [1.018392] ---[ end trace d00a5b71608a8f59 ]---
> >
> > Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1533367
> > Fixes: e90cb816599b ("fw_cfg: do DMA read operation", 2017-11-28)
> > CC: Marc-André Lureau 
> > CC: Michael S. Tsirkin 
> > Signed-off-by: Peter Xu 
> > --
> >
> > This is based on tree:
> >   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git/log/?h=vhost
> >
> > Please review, thanks.
> >
> > Signed-off-by: Peter Xu 
> 
> The DMA business is confusing, sadly I didn't get much clue what I was
> supposed to do. What I can say:
> 
> Tested-by: Marc-André Lureau 

Thanks for confirming this.

> 
> Should the series be removed from Michael tree and I squash your fix &
> send a v10?
> 
> Fwiw, "fw_cfg: write vmcoreinfo details" should also be fixed to
> allocate memory (unless your approach fixes that?)

Yes, IMHO this patch should also work for writes (though not tested).

Thanks,

-- 
Peter Xu

[Qemu-devel] [PATCH v6 9/9] qapi: Add NVMe driver options to the schema

2018-01-15 Thread Fam Zheng

Signed-off-by: Fam Zheng 
Message-Id: <20180110091846.10699-10-f...@redhat.com>
Reviewed-by: Stefan Hajnoczi 
---
 qapi/block-core.json | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index e94a6881b2..bd16440dc7 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2230,6 +2230,7 @@
 #
 # @vxhs: Since 2.10
 # @throttle: Since 2.11
+# @nvme: Since 2.12
 #
 # Since: 2.9
 ##
@@ -2237,7 +2238,7 @@
   'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop',
 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
 'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
-'null-aio', 'null-co', 'parallels', 'qcow', 'qcow2', 'qed',
+'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed',
 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh',
 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] }
 
@@ -2278,6 +2279,19 @@
 { 'struct': 'BlockdevOptionsNull',
   'data': { '*size': 'int', '*latency-ns': 'uint64' } }
 
+##
+# @BlockdevOptionsNVMe:
+#
+# Driver specific block device options for the NVMe backend.
+#
+# @device:controller address of the NVMe device.
+# @namespace: namespace number of the device, starting from 1.
+#
+# Since: 2.12
+##
+{ 'struct': 'BlockdevOptionsNVMe',
+  'data': { 'device': 'str', 'namespace': 'int' } }
+
 ##
 # @BlockdevOptionsVVFAT:
 #
@@ -3183,6 +3197,7 @@
   'nfs':'BlockdevOptionsNfs',
   'null-aio':   'BlockdevOptionsNull',
   'null-co':'BlockdevOptionsNull',
+  'nvme':   'BlockdevOptionsNVMe',
   'parallels':  'BlockdevOptionsGenericFormat',
   'qcow2':  'BlockdevOptionsQcow2',
   'qcow':   'BlockdevOptionsQcow',
-- 
2.14.3

[Qemu-devel] [PATCH v6 6/9] qemu-img: Map bench buffer

2018-01-15 Thread Fam Zheng

Signed-off-by: Fam Zheng 
Message-Id: <20180110091846.10699-7-f...@redhat.com>
Reviewed-by: Stefan Hajnoczi 
---
 qemu-img.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/qemu-img.c b/qemu-img.c
index 68b375f998..28d0e4e9f8 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3862,6 +3862,7 @@ static int img_bench(int argc, char **argv)
 struct timeval t1, t2;
 int i;
 bool force_share = false;
+size_t buf_size;
 
 for (;;) {
 static const struct option long_options[] = {
@@ -4050,9 +4051,12 @@ static int img_bench(int argc, char **argv)
 printf("Sending flush every %d requests\n", flush_interval);
 }
 
-data.buf = blk_blockalign(blk, data.nrreq * data.bufsize);
+buf_size = data.nrreq * data.bufsize;
+data.buf = blk_blockalign(blk, buf_size);
 memset(data.buf, pattern, data.nrreq * data.bufsize);
 
+blk_register_buf(blk, data.buf, buf_size);
+
 data.qiov = g_new(QEMUIOVector, data.nrreq);
 for (i = 0; i < data.nrreq; i++) {
 qemu_iovec_init([i], 1);
@@ -4073,6 +4077,9 @@ static int img_bench(int argc, char **argv)
+ ((double)(t2.tv_usec - t1.tv_usec) / 100));
 
 out:
+if (data.buf) {
+blk_unregister_buf(blk, data.buf);
+}
 qemu_vfree(data.buf);
 blk_unref(blk);
 
-- 
2.14.3

[Qemu-devel] [PATCH v6 3/9] block: Add VFIO based NVMe driver

2018-01-15 Thread Fam Zheng

This is a new protocol driver that exclusively opens a host NVMe
controller through VFIO. It achieves better latency than linux-aio by
completely bypassing host kernel vfs/block layer.

$rw-$bs-$iodepth  linux-aio nvme://

randread-4k-1 10.5k 21.6k
randread-512k-1   745   1591
randwrite-4k-130.7k 37.0k
randwrite-512k-1  1945  1980

(unit: IOPS)

The driver also integrates with the polling mechanism of iothread.

This patch is co-authored by Paolo and me.

Signed-off-by: Paolo Bonzini 
Signed-off-by: Fam Zheng 
Message-Id: <20180110091846.10699-4-f...@redhat.com>
---
 MAINTAINERS |6 +
 block/Makefile.objs |1 +
 block/nvme.c| 1180 +++
 block/trace-events  |   21 +
 4 files changed, 1208 insertions(+)
 create mode 100644 block/nvme.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 4770f105d4..bd636a4bff 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1876,6 +1876,12 @@ L: qemu-bl...@nongnu.org
 S: Supported
 F: block/null.c
 
+NVMe Block Driver
+M: Fam Zheng 
+L: qemu-bl...@nongnu.org
+S: Supported
+F: block/nvme*
+
 Bootdevice
 M: Gonglei 
 S: Maintained
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 6eaf78a046..4c7e9d84a7 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -11,6 +11,7 @@ block-obj-$(CONFIG_POSIX) += file-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 block-obj-y += null.o mirror.o commit.o io.o
 block-obj-y += throttle-groups.o
+block-obj-$(CONFIG_LINUX) += nvme.o
 
 block-obj-y += nbd.o nbd-client.o sheepdog.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
diff --git a/block/nvme.c b/block/nvme.c
new file mode 100644
index 00..99cc7702ad
--- /dev/null
+++ b/block/nvme.c
@@ -0,0 +1,1180 @@
+/*
+ * NVMe block driver based on vfio
+ *
+ * Copyright 2016 - 2018 Red Hat, Inc.
+ *
+ * Authors:
+ *   Fam Zheng 
+ *   Paolo Bonzini 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include 
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/error-report.h"
+#include "qemu/cutils.h"
+#include "qemu/vfio-helpers.h"
+#include "block/block_int.h"
+#include "trace.h"
+
+/* TODO: Move nvme spec definitions from hw/block/nvme.h into a separate file
+ * that doesn't depend on dma/pci headers. */
+#include "sysemu/dma.h"
+#include "hw/pci/pci.h"
+#include "hw/block/block.h"
+#include "hw/block/nvme.h"
+
+#define NVME_SQ_ENTRY_BYTES 64
+#define NVME_CQ_ENTRY_BYTES 16
+#define NVME_QUEUE_SIZE 128
+#define NVME_BAR_SIZE 8192
+
+typedef struct {
+int32_t  head, tail;
+uint8_t  *queue;
+uint64_t iova;
+/* Hardware MMIO register */
+volatile uint32_t *doorbell;
+} NVMeQueue;
+
+typedef struct {
+BlockCompletionFunc *cb;
+void *opaque;
+int cid;
+void *prp_list_page;
+uint64_t prp_list_iova;
+bool busy;
+} NVMeRequest;
+
+typedef struct {
+CoQueue free_req_queue;
+QemuMutex   lock;
+
+/* Fields protected by BQL */
+int index;
+uint8_t *prp_list_pages;
+
+/* Fields protected by @lock */
+NVMeQueue   sq, cq;
+int cq_phase;
+NVMeRequest reqs[NVME_QUEUE_SIZE];
+boolbusy;
+int need_kick;
+int inflight;
+} NVMeQueuePair;
+
+/* Memory mapped registers */
+typedef volatile struct {
+uint64_t cap;
+uint32_t vs;
+uint32_t intms;
+uint32_t intmc;
+uint32_t cc;
+uint32_t reserved0;
+uint32_t csts;
+uint32_t nssr;
+uint32_t aqa;
+uint64_t asq;
+uint64_t acq;
+uint32_t cmbloc;
+uint32_t cmbsz;
+uint8_t  reserved1[0xec0];
+uint8_t  cmd_set_specfic[0x100];
+uint32_t doorbells[];
+} QEMU_PACKED NVMeRegs;
+
+QEMU_BUILD_BUG_ON(offsetof(NVMeRegs, doorbells) != 0x1000);
+
+typedef struct {
+AioContext *aio_context;
+QEMUVFIOState *vfio;
+NVMeRegs *regs;
+/* The submission/completion queue pairs.
+ * [0]: admin queue.
+ * [1..]: io queues.
+ */
+NVMeQueuePair **queues;
+int nr_queues;
+size_t page_size;
+/* How many uint32_t elements does each doorbell entry take. */
+size_t doorbell_scale;
+bool write_cache_supported;
+EventNotifier irq_notifier;
+uint64_t nsze; /* Namespace size reported by identify command */
+int nsid;  /* The namespace id to read/write data. */
+uint64_t max_transfer;
+int plugged;
+
+CoMutex dma_map_lock;
+CoQueue dma_flush_queue;
+
+/* Total size of mapped qiov, accessed under dma_map_lock */
+int dma_map_count;
+} BDRVNVMeState;
+
+#define NVME_BLOCK_OPT_DEVICE "device"
+#define NVME_BLOCK_OPT_NAMESPACE "namespace"
+
+static

[Qemu-devel] [PATCH v6 5/9] block/nvme: Implement .bdrv_(un)register_buf

2018-01-15 Thread Fam Zheng

Forward these two calls to the IOVA manager.

Signed-off-by: Fam Zheng 
Message-Id: <20180110091846.10699-6-f...@redhat.com>
Reviewed-by: Stefan Hajnoczi 
---
 block/nvme.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/block/nvme.c b/block/nvme.c
index 99cc7702ad..30616f50c7 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -1145,6 +1145,27 @@ static void nvme_aio_unplug(BlockDriverState *bs)
 }
 }
 
+static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size)
+{
+int ret;
+BDRVNVMeState *s = bs->opaque;
+
+ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL);
+if (ret) {
+/* FIXME: we may run out of IOVA addresses after repeated
+ * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap
+ * doesn't reclaim addresses for fixed mappings. */
+error_report("nvme_register_buf failed: %s", strerror(-ret));
+}
+}
+
+static void nvme_unregister_buf(BlockDriverState *bs, void *host)
+{
+BDRVNVMeState *s = bs->opaque;
+
+qemu_vfio_dma_unmap(s->vfio, host);
+}
+
 static BlockDriver bdrv_nvme = {
 .format_name  = "nvme",
 .protocol_name= "nvme",
@@ -1170,6 +1191,9 @@ static BlockDriver bdrv_nvme = {
 
 .bdrv_io_plug = nvme_aio_plug,
 .bdrv_io_unplug   = nvme_aio_unplug,
+
+.bdrv_register_buf= nvme_register_buf,
+.bdrv_unregister_buf  = nvme_unregister_buf,
 };
 
 static void bdrv_nvme_init(void)
-- 
2.14.3

[Qemu-devel] [PATCH v6 2/9] util: Introduce vfio helpers

2018-01-15 Thread Fam Zheng

This is a library to manage the host vfio interface, which could be used
to implement userspace device driver code in QEMU such as NVMe or net
controllers.

Signed-off-by: Fam Zheng 
Message-Id: <20180110091846.10699-3-f...@redhat.com>
Reviewed-by: Stefan Hajnoczi 
---
 include/qemu/vfio-helpers.h |  33 ++
 util/Makefile.objs  |   1 +
 util/trace-events   |  11 +
 util/vfio-helpers.c | 726 
 4 files changed, 771 insertions(+)
 create mode 100644 include/qemu/vfio-helpers.h
 create mode 100644 util/vfio-helpers.c

diff --git a/include/qemu/vfio-helpers.h b/include/qemu/vfio-helpers.h
new file mode 100644
index 00..ce7e7b057f
--- /dev/null
+++ b/include/qemu/vfio-helpers.h
@@ -0,0 +1,33 @@
+/*
+ * QEMU VFIO helpers
+ *
+ * Copyright 2016 - 2018 Red Hat, Inc.
+ *
+ * Authors:
+ *   Fam Zheng 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_VFIO_HELPERS_H
+#define QEMU_VFIO_HELPERS_H
+#include "qemu/typedefs.h"
+
+typedef struct QEMUVFIOState QEMUVFIOState;
+
+QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp);
+void qemu_vfio_close(QEMUVFIOState *s);
+int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
+  bool temporary, uint64_t *iova_list);
+int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s);
+void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host);
+void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
+uint64_t offset, uint64_t size,
+Error **errp);
+void qemu_vfio_pci_unmap_bar(QEMUVFIOState *s, int index, void *bar,
+ uint64_t offset, uint64_t size);
+int qemu_vfio_pci_init_irq(QEMUVFIOState *s, EventNotifier *e,
+   int irq_type, Error **errp);
+
+#endif
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 2973b0a323..3fb611631f 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -46,3 +46,4 @@ util-obj-y += qht.o
 util-obj-y += range.o
 util-obj-y += stats64.o
 util-obj-y += systemd.o
+util-obj-$(CONFIG_LINUX) += vfio-helpers.o
diff --git a/util/trace-events b/util/trace-events
index 025499f83f..2f57bf2337 100644
--- a/util/trace-events
+++ b/util/trace-events
@@ -59,3 +59,14 @@ lockcnt_futex_wake(const void *lockcnt) "lockcnt %p waking 
up one waiter"
 # util/qemu-thread-posix.c
 qemu_mutex_locked(void *lock) "locked mutex %p"
 qemu_mutex_unlocked(void *lock) "unlocked mutex %p"
+
+# util/vfio-helpers.c
+qemu_vfio_dma_reset_temporary(void *s) "s %p"
+qemu_vfio_ram_block_added(void *s, void *p, size_t size) "s %p host %p size 
0x%zx"
+qemu_vfio_ram_block_removed(void *s, void *p, size_t size) "s %p host %p size 
0x%zx"
+qemu_vfio_find_mapping(void *s, void *p) "s %p host %p"
+qemu_vfio_new_mapping(void *s, void *host, size_t size, int index, uint64_t 
iova) "s %p host %p size %zu index %d iova 0x%"PRIx64
+qemu_vfio_do_mapping(void *s, void *host, size_t size, uint64_t iova) "s %p 
host %p size %zu iova 0x%"PRIx64
+qemu_vfio_dma_map(void *s, void *host, size_t size, bool temporary, uint64_t 
*iova) "s %p host %p size %zu temporary %d iova %p"
+qemu_vfio_dma_map_invalid(void *s, void *mapping_host, size_t mapping_size, 
void *host, size_t size) "s %p mapping %p %zu requested %p %zu"
+qemu_vfio_dma_unmap(void *s, void *host) "s %p host %p"
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
new file mode 100644
index 00..0660aaf2f7
--- /dev/null
+++ b/util/vfio-helpers.c
@@ -0,0 +1,726 @@
+/*
+ * VFIO utility
+ *
+ * Copyright 2016 - 2018 Red Hat, Inc.
+ *
+ * Authors:
+ *   Fam Zheng 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include 
+#include 
+#include "qapi/error.h"
+#include "exec/ramlist.h"
+#include "exec/cpu-common.h"
+#include "trace.h"
+#include "qemu/queue.h"
+#include "qemu/error-report.h"
+#include "standard-headers/linux/pci_regs.h"
+#include "qemu/event_notifier.h"
+#include "qemu/vfio-helpers.h"
+#include "trace.h"
+
+#define QEMU_VFIO_DEBUG 0
+
+#define QEMU_VFIO_IOVA_MIN 0x1ULL
+/* XXX: Once VFIO exposes the iova bit width in the IOMMU capability interface,
+ * we can use a runtime limit; alternatively it's also possible to do platform
+ * specific detection by reading sysfs entries. Until then, 39 is a safe bet.
+ **/
+#define QEMU_VFIO_IOVA_MAX (1ULL << 39)
+
+typedef struct {
+/* Page aligned addr. */
+void *host;
+size_t size;
+uint64_t iova;
+} IOVAMapping;
+
+struct QEMUVFIOState {
+QemuMutex lock;
+
+/* These fields are protected by BQL */
+int container;
+int group;
+int device;
+RAMBlockNotifier ram_notifier;
+struct vfio_region_info config_region_info,

[Qemu-devel] [PATCH v6 4/9] block: Introduce buf register API

2018-01-15 Thread Fam Zheng

Allow block driver to map and unmap a buffer for later I/O, as a performance
hint.

Signed-off-by: Fam Zheng 
Message-Id: <20180110091846.10699-5-f...@redhat.com>
Reviewed-by: Stefan Hajnoczi 
---
 block/block-backend.c  | 10 ++
 block/io.c | 24 
 include/block/block.h  | 11 ++-
 include/block/block_int.h  |  9 +
 include/sysemu/block-backend.h |  3 +++
 5 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index baef8e7abc..f66349c2c9 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2096,3 +2096,13 @@ static void blk_root_drained_end(BdrvChild *child)
 }
 }
 }
+
+void blk_register_buf(BlockBackend *blk, void *host, size_t size)
+{
+bdrv_register_buf(blk_bs(blk), host, size);
+}
+
+void blk_unregister_buf(BlockBackend *blk, void *host)
+{
+bdrv_unregister_buf(blk_bs(blk), host);
+}
diff --git a/block/io.c b/block/io.c
index 7ea402352e..89d0745e95 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2825,3 +2825,27 @@ void bdrv_io_unplug(BlockDriverState *bs)
 bdrv_io_unplug(child->bs);
 }
 }
+
+void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
+{
+BdrvChild *child;
+
+if (bs->drv && bs->drv->bdrv_register_buf) {
+bs->drv->bdrv_register_buf(bs, host, size);
+}
+QLIST_FOREACH(child, >children, next) {
+bdrv_register_buf(child->bs, host, size);
+}
+}
+
+void bdrv_unregister_buf(BlockDriverState *bs, void *host)
+{
+BdrvChild *child;
+
+if (bs->drv && bs->drv->bdrv_unregister_buf) {
+bs->drv->bdrv_unregister_buf(bs, host);
+}
+QLIST_FOREACH(child, >children, next) {
+bdrv_unregister_buf(child->bs, host);
+}
+}
diff --git a/include/block/block.h b/include/block/block.h
index 9b12774ddf..2025d7ed19 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -631,5 +631,14 @@ void bdrv_del_child(BlockDriverState *parent, BdrvChild 
*child, Error **errp);
 
 bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
  uint32_t granularity, Error **errp);
-
+/**
+ *
+ * bdrv_register_buf/bdrv_unregister_buf:
+ *
+ * Register/unregister a buffer for I/O. For example, VFIO drivers are
+ * interested to know the memory areas that would later be used for I/O, so
+ * that they can prepare IOMMU mapping etc., to get better performance.
+ */
+void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
+void bdrv_unregister_buf(BlockDriverState *bs, void *host);
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 29cafa4236..99b9190627 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -446,6 +446,15 @@ struct BlockDriver {
 const char *name,
 Error **errp);
 
+/**
+ * Register/unregister a buffer for I/O. For example, when the driver is
+ * interested to know the memory areas that will later be used in iovs, so
+ * that it can do IOMMU mapping with VFIO etc., in order to get better
+ * performance. In the case of VFIO drivers, this callback is used to do
+ * DMA mapping for hot buffers.
+ */
+void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
+void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
 QLIST_ENTRY(BlockDriver) list;
 };
 
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index c4e52a5fa3..92ab624fac 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -229,4 +229,7 @@ void blk_io_limits_enable(BlockBackend *blk, const char 
*group);
 void blk_io_limits_update_group(BlockBackend *blk, const char *group);
 void blk_set_force_allow_inactivate(BlockBackend *blk);
 
+void blk_register_buf(BlockBackend *blk, void *host, size_t size);
+void blk_unregister_buf(BlockBackend *blk, void *host);
+
 #endif
-- 
2.14.3

[Qemu-devel] [PATCH v6 0/9] block: Add VFIO based driver for NVMe device

2018-01-15 Thread Fam Zheng

v6: Fix the cleaning up around nvme_init() and write cache setup in
nvme_file_open(). [Stefan]
Add Stefan's rev-by to the doc patch.
Rebase on to Paolo's coroutine series for q->free_req_queue:

Based-on: 20180115220822.10156-1-pbonz...@redhat.com
([PATCH 0/4] coroutine-lock: polymorphic CoQueue)

v5: Add Stefan's rev-bys.
Add Paolo's sob line.
Address Stefan's, Alex's and Eric's comments, including:
- Add PCI Bar MMIO offset/size parameter to VFIO utility API. [Alex]
- Fix various lock and memory leakings. [Stefan]
- Improve documentation. [Stefan]
- Add one missing "Since 2.12" in QAPI schema. [Eric]
- Fix locking of BDRVNVMeState->inflight. [Stefan]
- Add one missing endianness conversion for cmd.nsid. [Stefan]
- Use bdrv_get_aio_context() instead of qemu_get_current_aio_context().
- When init, issue "Set Features" command to enable/disable volatile write
  cache according to "flags & BDRV_O_NOCACHE".

v4: - Rebase, address Stefan's comments.
- Add QAPI and doc patches.

v3: Rebase, small tweaks/fixes and add locks to provide basic thread safety
(basic because it is not really tested).

v2:
- Implement "split vfio addr space" appraoch. [Paolo]
- Add back 'device reset' in nvme_close(). [Paolo]
- Better variable namings. [Stefan]
- "Reuse" macro definitions from NVMe emulation code.
- Rebase onto current master which has polling by default and update
  performance results accordingly.
- Update MAINTAINERS.
- Specify namespace in URI.
- The sporadical I/O error from v1 "disappeared" in this version.
- Tests one: qemu-img bench, fio, bonnie++ and installation of
  ubuntu/fedora/rhel on QEMU emulated nvme and a Intel P3700 card.

Fam Zheng (9):
  stubs: Add stubs for ram block API
  util: Introduce vfio helpers
  block: Add VFIO based NVMe driver
  block: Introduce buf register API
  block/nvme: Implement .bdrv_(un)register_buf
  qemu-img: Map bench buffer
  block: Move NVMe constants to a separate header
  docs: Add section for NVMe VFIO driver
  qapi: Add NVMe driver options to the schema

 MAINTAINERS|6 +
 block/Makefile.objs|1 +
 block/block-backend.c  |   10 +
 block/io.c |   24 +
 block/nvme.c   | 1199 
 block/trace-events |   21 +
 docs/qemu-block-drivers.texi   |   37 ++
 hw/block/nvme.h|  698 +--
 include/block/block.h  |   11 +-
 include/block/block_int.h  |9 +
 include/block/nvme.h   |  700 +++
 include/qemu/vfio-helpers.h|   33 ++
 include/sysemu/block-backend.h |3 +
 qapi/block-core.json   |   17 +-
 qemu-img.c |9 +-
 stubs/Makefile.objs|1 +
 stubs/ram-block.c  |   16 +
 util/Makefile.objs |1 +
 util/trace-events  |   11 +
 util/vfio-helpers.c|  726 
 20 files changed, 2833 insertions(+), 700 deletions(-)
 create mode 100644 block/nvme.c
 create mode 100644 include/block/nvme.h
 create mode 100644 include/qemu/vfio-helpers.h
 create mode 100644 stubs/ram-block.c
 create mode 100644 util/vfio-helpers.c

-- 
2.14.3

[Qemu-devel] [PATCH v6 1/9] stubs: Add stubs for ram block API

2018-01-15 Thread Fam Zheng

These functions will be wanted by block-obj-y but the actual definition
is in obj-y, so stub them to keep the linker happy.

Signed-off-by: Fam Zheng 
Acked-by: Paolo Bonzini 
Message-Id: <20180110091846.10699-2-f...@redhat.com>
Reviewed-by: Stefan Hajnoczi 
---
 stubs/Makefile.objs |  1 +
 stubs/ram-block.c   | 16 
 2 files changed, 17 insertions(+)
 create mode 100644 stubs/ram-block.c

diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index 8cfe34328a..2d59d84091 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -42,3 +42,4 @@ stub-obj-y += vmgenid.o
 stub-obj-y += xen-common.o
 stub-obj-y += xen-hvm.o
 stub-obj-y += pci-host-piix.o
+stub-obj-y += ram-block.o
diff --git a/stubs/ram-block.c b/stubs/ram-block.c
new file mode 100644
index 00..cfa5d8678f
--- /dev/null
+++ b/stubs/ram-block.c
@@ -0,0 +1,16 @@
+#include "qemu/osdep.h"
+#include "exec/ramlist.h"
+#include "exec/cpu-common.h"
+
+void ram_block_notifier_add(RAMBlockNotifier *n)
+{
+}
+
+void ram_block_notifier_remove(RAMBlockNotifier *n)
+{
+}
+
+int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
+{
+return 0;
+}
-- 
2.14.3

Re: [Qemu-devel] [RESEND PATCH 2/6] memory: introduce AddressSpaceOps and IOMMUObject

2018-01-15 Thread David Gibson

On Fri, Jan 12, 2018 at 06:25:34PM +0800, Liu, Yi L wrote:
> On Wed, Dec 20, 2017 at 10:18:16PM +1100, David Gibson wrote:
> > On Wed, Dec 20, 2017 at 02:47:30PM +0800, Liu, Yi L wrote:
> > > On Mon, Dec 18, 2017 at 10:35:31PM +1100, David Gibson wrote:
> > > > On Wed, Nov 15, 2017 at 03:16:32PM +0800, Peter Xu wrote:
> > > > > On Tue, Nov 14, 2017 at 10:52:54PM +0100, Auger Eric wrote:
> > > > > 
> 
> [...]
> 
> Sorry for the delayed reply, spent some time on reconsidering your comments.
> 
> > 
> > I'm ok with calling it a "PASID context".
> > 
> > Thinking about this some more, here are some extra observations:
> > 
> >  * I think each device needs both a PASID context and an ordinary
> >address space.  The PASID context would be used for bus
> >transactions which include a process id, the address space for
> >those that don't.
> > 
> >  * Theoretically, the PASID context could be modelled as an array/map
> >of AddressSpace objects for each process ID.  However, creating all
> >those AddressSpace objects in advance might be too expensive.  I
> >can see a couple of options to avoid this:
> > 
> > 1) Have the PASID context class include a 'translate' method similar
> > to the one in IOMMUMemoryRegionClass, but taking a process ID as well
> > as an address.  This would avoid creating extra AddressSpace objects,
> > but might require duplicating a bunch of the translation code that
> > already exists for AddressSpace.
> > 
> > 2) "Lazily" create AddressSpace objects.  The generic part of the
> > PASID aware DMA helper functions would use a cache of AddressSpace's
> > for particular process IDs, using the AddressSpace (and MemoryRegion
> > within) to translate accesses for a particular process ID.  However,
> > these AddressSpace and MemoryRegion objects would only be created when
> > the device first accesses that address space.  In the common case,
> > where a single device is just being used by a single process or a
> > small number, this should keep the number of AddressSpace objects
> > relatively small.  Obviously the cache would need to be invalidated,
> > cleaning up the AddressSpace objects, when the PASID table is altered.
> 
> Sorry, a double check here. Does "AddressSpace objects" mean the existing
> AddressSpace definition in Qemu?

Yes.

> >  * I realize that the expected case here is with KVM, where the guest
> >controls the first level translation, but the host controls the
> >second level translation.  However, we should also be able to model
> >the case where the guest controls both levels for the sake of full
> >system emulation.  I think understanding this case will lead to a
> >better design even for the simpler case.
> > 
> > Do you have a plan for what the virt-SVM aware DMA functions will look
> > like?
> 
> The behaviour is device specific.
> For a SVM capable physcial device, it would store the pasid value in a
> register locates in the deivce. e.g. a GPU context can be set to use SVM,
> after the pasid is set, any DMA from this context is DMAs target to a
> process virtual address space.

That doesn't sound any more device specific than any DMA operation,
and we have helpers for that.

> So for a virt-SVM aware DMA device, the device model needs to figure out
> the target address space. With the correct address space, then consume
> the translate() callback provided by iommu emulator. And then emulate the
> DMA operation for the emulated device.

Nearly all of that sounds like something that belongs in a helper
function.  Basically a varaint of dma_memory_rw() (and related
functions) that takes a PASID as well as an address.

> I'll try to get a new version with your suggestions.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v2] net: Allow hubports to connect to other netdevs

2018-01-15 Thread Jason Wang




On 2018年01月16日 03:50, Thomas Huth wrote:

QEMU can emulate hubs to connect NICs and netdevs. This is currently
primarily used for the mis-named 'vlan' feature of the networking
subsystem. Now the 'vlan' feature has been marked as deprecated, since
its name is rather confusing and the users often rather mis-configure
their network when trying to use it. But while the 'vlan' parameter
should be removed at one point in time, the basic idea of emulating
a hub in QEMU is still good: It's useful for bundling up the output of
multiple NICs into one single l2tp netdev for example.

Now to be able to use the hubport feature without 'vlan's, there is one
missing piece: The possibility to connect a hubport to a netdev, too.
This patch adds this possibility by introducing a new "netdev=..."
parameter to the hubports.

To bundle up the output of multiple NICs into one socket netdev, you can
now run QEMU with these parameters for example:

qemu-system-ppc64 ... -netdev socket,id=s1,connect=:11122 \
 -netdev hubport,hubid=1,id=h1,netdev=s1 \
 -netdev hubport,hubid=1,id=h2 -device e1000,netdev=h2 \
 -netdev hubport,hubid=1,id=h3 -device virtio-net-pci,netdev=h3

For using the socket netdev, you have got to start another QEMU as the
receiving side first, for example with network dumping enabled:

qemu-system-x86_64 -M isapc -netdev socket,id=s0,listen=:11122 \
 -device ne2k_isa,netdev=s0 \
 -object filter-dump,id=f1,netdev=s0,file=/tmp/dump.dat

After the ppc64 guest tried to boot from both NICs, you can see in the
dump file (using Wireshark, for example), that the output of both NICs
(the e1000 and the virtio-net-pci) has been successfully transfered
via the socket netdev in this case.

Suggested-by: Paolo Bonzini
Signed-off-by: Thomas Huth
---
  v2: Set up peer via qemu_new_net_client() instead of duplicating the
  code to do this in net_init_hubport()


Applied.

Thanks

Re: [Qemu-devel] [PULL 01/33] MAINTAINERS: Add myself as maintainer to X86 machines

2018-01-15 Thread Thomas Huth

On 16.01.2018 05:46, Michael S. Tsirkin wrote:
> From: Marcel Apfelbaum 
> 
> Signed-off-by: Marcel Apfelbaum 
> Signed-off-by: Marcel Apfelbaum 

I think the second SoB rather be yours, Michael, instead of a second one
from Marcel.

> ---
>  MAINTAINERS | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 4770f10..753e799 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -852,6 +852,7 @@ X86 Machines
>  
>  PC
>  M: Michael S. Tsirkin 
> +M: Marcel Apfelbaum 
>  S: Supported
>  F: include/hw/i386/
>  F: hw/i386/
>

Re: [Qemu-devel] [PATCH] net: Allow hubports to connect to other netdevs

2018-01-15 Thread Jason Wang




On 2018年01月16日 02:06, Thomas Huth wrote:

On 15.01.2018 18:36, Thomas Huth wrote:

On 15.01.2018 08:40, Jason Wang wrote:

[...]

And since it was a hub, do we need to send to its netdev too inside
net_hub_receive()?

I currently don't think so, but I'll check again...

OK, I now think we're definitely fine here. The check is really just
there to make sure that we do not send the packet back to the same
sender. And I've checked with a command line like this that network
traffic (TFTP booting in this case) works as expected:

qemu-system-ppc64 -vga none -nographic \
  -netdev user,id=s1,tftp=/path/to/tftpdir,bootfile=ppc64.img \
  -netdev hubport,hubid=1,id=h1,netdev=s1  \
  -netdev hubport,hubid=1,id=h3 -device virtio-net-pci,netdev=h3 \
  -boot n -object filter-dump,id=f1,netdev=s1,file=/tmp/dump.dat

Both, the behaviour of the TFTP boot in the guest and the wireshark dump
looked fine, there were no missing packets here.

  Thomas


Right, I think I misread the command parameters.

Thanks

Re: [Qemu-devel] vhost-pci and virtio-vhost-user

2018-01-15 Thread Jason Wang




On 2018年01月15日 21:56, Stefan Hajnoczi wrote:

On Mon, Jan 15, 2018 at 02:56:31PM +0800, Jason Wang wrote:

On 2018年01月12日 18:18, Stefan Hajnoczi wrote:

And what's more important, according to the kvm 2016 slides of vhost-pci,
the motivation of vhost-pci is not building SDN but a chain of VNFs. So
bypassing the central vswitch through a private VM2VM path does make sense.
(Though whether or not vhost-pci is the best choice is still questionable).

This is probably my fault.  Maybe my networking terminology is wrong.  I
consider "virtual network functions" to be part of "software-defined
networking" use cases.  I'm not implying there must be a central virtual
switch.

To rephrase: vhost-pci enables exitless VM2VM communication.

The problem is, exitless is not what vhost-pci invents, it could be achieved
now when both sides are doing busypolling.

The only way I'm aware of is ivshmem.  But ivshmem lacks a family of
standard device types that allows different implementations to
interoperate.  We already have the virtio family of device types, so it
makes sense to work on a virtio-based solution.

Perhaps I've missed a different approach for exitless VM2VM
communication.  Please explain how VM1 and VM2 can do exitless network
communication today?


I'm not sure we're talking the same thing. For VM2VM, do you mean only 
for shared memory? I thought we can treat any backends that can transfer 
data directly between two VMs for a VM2VM solution. In this case, if 
virtqueue notifications were disabled by all sides (e.g busy polling), 
there will be no exits at all.


And if you want a virtio version of shared memory, it's another kind of 
motivation at least from my point of view.




Also, how can VM1 provide SCSI I/O services to VM2 today?

Stefan


I know little about storage, but it looks to me iSCSI can do this.

Thanks

Re: [Qemu-devel] vhost-pci and virtio-vhost-user

2018-01-15 Thread Jason Wang




On 2018年01月15日 18:43, Wei Wang wrote:

On 01/15/2018 04:34 PM, Jason Wang wrote:



On 2018年01月15日 15:59, Wei Wang wrote:

On 01/15/2018 02:56 PM, Jason Wang wrote:



On 2018年01月12日 18:18, Stefan Hajnoczi wrote:




I just fail understand why we can't do software defined network or 
storage with exist virtio device/drivers (or are there any 
shortcomings that force us to invent new infrastructure).




Existing virtio-net works with a host central vSwitch, and it has 
the following disadvantages:

1) long code/data path;
2) poor scalability; and
3) host CPU sacrifice


Please show me the numbers.


Sure. For 64B packet transmission between two VMs: vhost-user reports 
~6.8Mpps, and vhost-pci reports ~11Mpps, which is ~1.62x faster.




This result is kind of incomplete. So still many questions left:

- What's the configuration of the vhost-user?
- What's the result of e.g 1500 byte?
- You said it improves scalability, at least I can't get this conclusion 
just from what you provide here

- You suspect long code/data path, but no latency numbers to prove it







Vhost-pci solves the above issues by providing a point-to-point 
communication between VMs. No matter how the control path would look 
like finally, the key point is that the data path is P2P between VMs.


Best,
Wei




Well, I think I've pointed out several times in the replies of 
previous versions. Both vhost-pci-net and virtio-net is an ethernet 
device, which is not tied to a central vswitch for sure. There're 
just too many methods or tricks which can be used to build a point to 
point data path.



Could you please show an existing example that makes virtio-net work 
without a host vswitch/bridge?


For vhost-user, it's as simple as a testpmd which does io forwarding 
between two vhost ports? For kernel, you can do even more tricks, tc, 
bpf or whatever others.


Could you also share other p2p data path solutions that you have in 
mind? Thanks.



Best,
Wei



So my point stands still: both vhost-pci-net and virtio-net are ethernet 
devices, any ethernet device can connect to each other directly without 
switch. Saying virtio-net can not connect to each other directly without 
a switch obviously make no sense, it's a network topology issue for 
sure. Even if it was not a typical setup or configuration, extending the 
exist backends is 1st choice unless you can prove there're any design 
limitations of exist solutions.


Thanks

Re: [Qemu-devel] [PATCH] tests/boot-serial-test: fix powernv support

2018-01-15 Thread David Gibson

On Mon, Jan 15, 2018 at 01:24:17PM +0100, Cédric Le Goater wrote:
> Recent commit introduced the firmware image skiboot 5.9 which
> has a different first line ouput.
> 
> Signed-off-by: Cédric Le Goater 

Applied to ppc-for-2.12.

> ---
>  tests/boot-serial-test.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/tests/boot-serial-test.c b/tests/boot-serial-test.c
> index dd3828c49b38..0a848a1eeb6f 100644
> --- a/tests/boot-serial-test.c
> +++ b/tests/boot-serial-test.c
> @@ -41,7 +41,7 @@ static testdef_t tests[] = {
>  { "ppc64", "ppce500", "", "U-Boot" },
>  { "ppc64", "prep", "", "Open Hack'Ware BIOS" },
>  { "ppc64", "pseries", "", "Open Firmware" },
> -{ "ppc64", "powernv", "-cpu POWER8", "SkiBoot" },
> +{ "ppc64", "powernv", "-cpu POWER8", "OPAL" },
>  { "i386", "isapc", "-cpu qemu32 -device sga", "SGABIOS" },
>  { "i386", "pc", "-device sga", "SGABIOS" },
>  { "i386", "q35", "-device sga", "SGABIOS" },

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [Qemu-arm] [PATCH] hw/misc: Add code to emulate Xilinx Slave Serial port

2018-01-15 Thread Philippe Mathieu-Daudé

On 01/15/2018 10:51 PM, Andrey Smirnov wrote:
> On Tue, Dec 19, 2017 at 4:48 PM, Alistair Francis
>  wrote:
>> On Thu, Dec 14, 2017 at 7:19 AM, Andrey Smirnov
>>  wrote:
>>> Add code to emulate Xilinx Slave Serial FPGA configuration port.
>>>
>>> Cc: "Edgar E. Iglesias" 
>>> Cc: Alistair Francis 
>>> Cc: qemu-devel@nongnu.org
>>> Cc: qemu-...@nongnu.org
>>> Cc: yurov...@gmail.com
>>> Signed-off-by: Andrey Smirnov 
>>
>> Hey,
>>
>> Thanks for the patch!
>>
>> I have some comments inline, if anything is unclear just email me back
>> and I can provide more information or help.
>>
>>> ---
>>>
>>> Integrating this into a build system via "obj-y" might not be the best
>>> way. Does this code need a dedicated CONFIG_ symbol?
>>
>> You probably don't need a specific one, there are already some Xilinx
>> ones in there you can use.
>>
>> Maybe CONFIG_XILINX or CONFIG_XILINX_AXI
>>
> 
> OK, will do if I ever re-spin this patch
> 
>>>
>>> Thanks,
>>> Andrey Smirnov
>>>
>>>
>>>  hw/misc/Makefile.objs |   1 +
>>>  hw/misc/xilinx_slave_serial.c | 105 
>>> ++
>>>  include/hw/misc/xilinx_slave_serial.h |  21 +++
>>>  3 files changed, 127 insertions(+)
>>>  create mode 100644 hw/misc/xilinx_slave_serial.c
>>>  create mode 100644 include/hw/misc/xilinx_slave_serial.h
>>
>> You will need to connect this to a machine as well.
>>
>>>
>>> diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
>>> index a68a201083..4599288e55 100644
>>> --- a/hw/misc/Makefile.objs
>>> +++ b/hw/misc/Makefile.objs
>>> @@ -38,6 +38,7 @@ obj-$(CONFIG_IMX) += imx7_ccm.o
>>>  obj-$(CONFIG_IMX) += imx2_wdt.o
>>>  obj-$(CONFIG_IMX) += imx7_snvs.o
>>>  obj-$(CONFIG_IMX) += imx7_gpr.o
>>> +obj-y += xilinx_slave_serial.o
>>>  obj-$(CONFIG_MILKYMIST) += milkymist-hpdmc.o
>>>  obj-$(CONFIG_MILKYMIST) += milkymist-pfpu.o
>>>  obj-$(CONFIG_MAINSTONE) += mst_fpga.o
>>> diff --git a/hw/misc/xilinx_slave_serial.c b/hw/misc/xilinx_slave_serial.c
>>> new file mode 100644
>>> index 00..607674fb60
>>> --- /dev/null
>>> +++ b/hw/misc/xilinx_slave_serial.c
>>> @@ -0,0 +1,105 @@
>>> +/*
>>> + * Copyright (c) 2017, Impinj, Inc.
>>> + *
>>> + * Code to emulate programming "port" of Xilinx FPGA in Slave Serial
>>> + * configuration connected via SPI, for more deatils see (p. 27):
>>> + *
>>> + * See https://www.xilinx.com/support/documentation/user_guides/ug380.pdf
>>
>> Ah, so this is for a Spartan-6 device. We don't have any QEMU support
>> for Spartan-6. What are you trying to use this for?

Well, this question is valid for all the ARM FDT devices...

Alistair: GSoC idea: have a easier way to integrate FDT devices into
QEMU and automagically qtest them.

If devices have qtests for code coverage, I think we should accept them
upstream, even if they are not yet plugged into a board.

The other way, there are motivated contributors who start sending
patches but then never finish due to changes in life and reduced spare
time, or lack of motivation due to the high quality asked by some
maintainer or daily paid reviewers, which is a shame IMHO.

Speaking from experience I already found in the ML archives some pieces
of unfinished code of devices I am thinking about implement, and few of
them pretty finished, but never merged. The contributors comments are
"Hey, I wrote this device and it works for me" garage-sale attitude "if
you find something useful, take it, else let it in the trash".

> The use-case for this patch is to fool FPGA configuration tools
> running on the guest into beliving that they successfully configure
> Spartan-6 device. I tested this code against
> "drivers/fpga/xilinx-spi.c" from Linux kernel.
> 
>>> + *
>>> + * Author: Andrey Smirnov 
>>> + *
>>> + * This work is licensed under the terms of the GNU GPL, version 2 or 
>>> later.
>>> + * See the COPYING file in the top-level directory.
>>> + */
>>> +
>>> +#include "qemu/osdep.h"
>>> +#include "hw/misc/xilinx_slave_serial.h"
>>> +#include "qemu/log.h"
>>> +
>>> +enum {
>>> +XILINX_SLAVE_SERIAL_STATE_RESET,
>>> +XILINX_SLAVE_SERIAL_STATE_RECONFIGURATION,
>>> +XILINX_SLAVE_SERIAL_STATE_DONE,
>>> +};
>>> +
>>> +static void xilinx_slave_serial_update_outputs(XilinxSlaveSerialState 
>>> *xlnxss)
>>
>> For function names try to use xlnx instead of xilinx, it just saves line 
>> length.
> 
> Will fix if I re-spin this patch.
> 
>>
>>> +{
>>> +qemu_set_irq(xlnxss->done,
>>> + xlnxss->state == XILINX_SLAVE_SERIAL_STATE_DONE);
>>> +}
>>> +
>>> +static void xilinx_slave_serial_reset(DeviceState *dev)
>>> +{
>>> +XilinxSlaveSerialState *xlnxss = XILINX_SLAVE_SERIAL(dev);
>>
>> This is generally just called 's'.
> 
> OK, will fix if I re-spin this patch
> 
>>
>>> +
>>> +xlnxss->state = XILINX_SLAVE_SERIAL_STATE_RESET;
>>> +
>>> +

[Qemu-devel] [PULL 23/33] vhost: Simplify ring verification checks

2018-01-15 Thread Michael S. Tsirkin

From: "Dr. David Alan Gilbert" 

vhost_verify_ring_mappings() were used to verify that
rings are still accessible and related memory hasn't
been moved after flatview is updated.

It was doing checks by mapping ring's GPA+len and
checking that HVA hadn't changed with new memory map.
To avoid maybe expensive mapping call, we were
identifying address range that changed and were doing
mapping only if ring was in changed range.

However it's not neccessary to perform ring's GPA
mapping as we already have its current HVA and all
we need is to verify that ring's GPA translates to
the same HVA in updated flatview.

This will allow the following patches to simplify the range
comparison that was previously needed to avoid expensive
verify_ring_mapping calls.

Signed-off-by: Igor Mammedov 
with modifications by:
Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Igor Mammedov 
---
 hw/virtio/vhost.c | 79 +--
 1 file changed, 42 insertions(+), 37 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 83df043..0bdd833 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -450,35 +450,37 @@ static void vhost_memory_unmap(struct vhost_dev *dev, 
void *buffer,
 }
 }
 
-static int vhost_verify_ring_part_mapping(struct vhost_dev *dev,
-  void *part,
-  uint64_t part_addr,
-  uint64_t part_size,
-  uint64_t start_addr,
-  uint64_t size)
-{
-hwaddr l;
-void *p;
-int r = 0;
-
-if (!ranges_overlap(start_addr, size, part_addr, part_size)) {
+static int vhost_verify_ring_part_mapping(void *ring_hva,
+  uint64_t ring_gpa,
+  uint64_t ring_size,
+  void *reg_hva,
+  uint64_t reg_gpa,
+  uint64_t reg_size)
+{
+uint64_t hva_ring_offset;
+uint64_t ring_last = range_get_last(ring_gpa, ring_size);
+uint64_t reg_last = range_get_last(reg_gpa, reg_size);
+
+if (ring_last < reg_gpa || ring_gpa > reg_last) {
 return 0;
 }
-l = part_size;
-p = vhost_memory_map(dev, part_addr, , 1);
-if (!p || l != part_size) {
-r = -ENOMEM;
+/* check that whole ring's is mapped */
+if (ring_last > reg_last) {
+return -ENOMEM;
 }
-if (p != part) {
-r = -EBUSY;
+/* check that ring's MemoryRegion wasn't replaced */
+hva_ring_offset = ring_gpa - reg_gpa;
+if (ring_hva != reg_hva + hva_ring_offset) {
+return -EBUSY;
 }
-vhost_memory_unmap(dev, p, l, 0, 0);
-return r;
+
+return 0;
 }
 
 static int vhost_verify_ring_mappings(struct vhost_dev *dev,
-  uint64_t start_addr,
-  uint64_t size)
+  void *reg_hva,
+  uint64_t reg_gpa,
+  uint64_t reg_size)
 {
 int i, j;
 int r = 0;
@@ -492,22 +494,25 @@ static int vhost_verify_ring_mappings(struct vhost_dev 
*dev,
 struct vhost_virtqueue *vq = dev->vqs + i;
 
 j = 0;
-r = vhost_verify_ring_part_mapping(dev, vq->desc, vq->desc_phys,
-   vq->desc_size, start_addr, size);
+r = vhost_verify_ring_part_mapping(
+vq->desc, vq->desc_phys, vq->desc_size,
+reg_hva, reg_gpa, reg_size);
 if (r) {
 break;
 }
 
 j++;
-r = vhost_verify_ring_part_mapping(dev, vq->avail, vq->avail_phys,
-   vq->avail_size, start_addr, size);
+r = vhost_verify_ring_part_mapping(
+vq->desc, vq->desc_phys, vq->desc_size,
+reg_hva, reg_gpa, reg_size);
 if (r) {
 break;
 }
 
 j++;
-r = vhost_verify_ring_part_mapping(dev, vq->used, vq->used_phys,
-   vq->used_size, start_addr, size);
+r = vhost_verify_ring_part_mapping(
+vq->desc, vq->desc_phys, vq->desc_size,
+reg_hva, reg_gpa, reg_size);
 if (r) {
 break;
 }
@@ -635,13 +640,11 @@ static void vhost_commit(MemoryListener *listener)
 {
 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
  memory_listener);
-hwaddr start_addr = 0;
-ram_addr_t size = 0;
 MemoryRegionSection *old_sections;
 int

Re: [Qemu-devel] [PATCH qemu v2] RFC: vfio-pci: Allow mmap of MSIX BAR

2018-01-15 Thread Alexey Kardashevskiy

On 06/01/18 02:29, Alex Williamson wrote:
> On Fri, 5 Jan 2018 10:48:07 +0100
> Auger Eric  wrote:
> 
>> Hi Alexey,
>>
>> On 15/12/17 07:29, Alexey Kardashevskiy wrote:
>>> This makes use of a new VFIO_REGION_INFO_CAP_MSIX_MAPPABLE capability
>>> which tells that a region with MSIX data can be mapped entirely, i.e.
>>> the VFIO PCI driver won't prevent MSIX vectors area from being mapped.
>>>
>>> With this change, all BARs are mapped in a single chunk and MSIX vectors
>>> are emulated on top unless the machine requests not to by defining and
>>> enabling a new "vfio-no-msix-emulation" property. At the moment only
>>> sPAPR machine does so - it prohibits MSIX emulation and does not allow
>>> enabling it as it does not define the "set" callback for the new property;
>>> the new property also does not appear in "-machine pseries,help".
>>>
>>> If the new capability is present, this puts MSIX IO memory region under
>>> mapped memory region. If the capability is not there, it falls back to
>>> the old behaviour with the sparse capability.
>>>
>>> In MSIX vectors section is not aligned to the page size, the KVM memory
>>> listener does not register it with the KVM as a memory slot and MSIX is
>>> emulated by QEMU as before.
>>>
>>> This requires the kernel change - "vfio-pci: Allow mapping MSIX BAR" -
>>> for the new capability: https://www.spinics.net/lists/kvm/msg160282.html
>>>
>>> Signed-off-by: Alexey Kardashevskiy 
>>> ---
>>>
>>> This is mtree and flatview BEFORE this patch:
>>>
>>> "info mtree":
>>> memory-region: p...@8002000.mmio
>>>   - (prio 0, i/o): p...@8002000.mmio
>>> 2100-2100 (prio 1, i/o): 0001:03:00.0 BAR 1
>>>   2100e000-2100e5ff (prio 0, i/o): msix-table
>>>   2100f000-2100f00f (prio 0, i/o): msix-pba [disabled]
>>> 2104-2107 (prio 1, i/o): 0001:03:00.0 BAR 3
>>>   2104-2107 (prio 0, ramd): 0001:03:00.0 BAR 3 
>>> mmaps[0]
>>>
>>> "info mtree -f":
>>> FlatView #0
>>>  AS "memory", root: system
>>>  AS "cpu-memory", root: system
>>>  Root memory region: system
>>>   -7fff (prio 0, ram): ppc_spapr.ram
>>>   2100-2100dfff (prio 1, i/o): 0001:03:00.0 BAR 1
>>>   2100e000-2100e5ff (prio 0, i/o): msix-table
>>>   2100e600-2100 (prio 1, i/o): 0001:03:00.0 BAR 1 
>>> @e600
>>>   2104-2107 (prio 0, ramd): 0001:03:00.0 BAR 3 
>>> mmaps[0]
>>>
>>>
>>>
>>> This is AFTER this patch applied:
>>>
>>> "info mtree":
>>> memory-region: p...@8002000.mmio
>>>   - (prio 0, i/o): p...@8002000.mmio
>>> 2100-2100 (prio 1, i/o): 0001:03:00.0 BAR 1
>>>   2100-2100 (prio 0, ramd): 0001:03:00.0 BAR 1 
>>> mmaps[0]
>>> 2100e000-2100e5ff (prio 0, i/o): msix-table 
>>> [disabled]
>>> 2100f000-2100f00f (prio 0, i/o): msix-pba [disabled]
>>> 2104-2107 (prio 1, i/o): 0001:03:00.0 BAR 3
>>>   2104-2107 (prio 0, ramd): 0001:03:00.0 BAR 3 
>>> mmaps[0]
>>>
>>>
>>> "info mtree -f":
>>> FlatView #2
>>>  AS "memory", root: system
>>>  AS "cpu-memory", root: system
>>>  Root memory region: system
>>>   -7fff (prio 0, ram): ppc_spapr.ram
>>>   2100-2100 (prio 0, ramd): 0001:03:00.0 BAR 1 
>>> mmaps[0]
>>>   2104-2107 (prio 0, ramd): 0001:03:00.0 BAR 3 
>>> mmaps[0]
>>>
>>>
>>>
>>> This is AFTER this patch applied AND spapr_get_msix_emulation() patched
>>> to enable emulation:
>>>
>>> "info mtree":
>>> memory-region: p...@8002000.mmio
>>>   - (prio 0, i/o): p...@8002000.mmio
>>> 2100-2100 (prio 1, i/o): 0001:03:00.0 BAR 1
>>>   2100-2100 (prio 0, ramd): 0001:03:00.0 BAR 1 
>>> mmaps[0]
>>> 2100e000-2100e5ff (prio 0, i/o): msix-table
>>> 2100f000-2100f00f (prio 0, i/o): msix-pba [disabled]
>>> 2104-2107 (prio 1, i/o): 0001:03:00.0 BAR 3
>>>   2104-2107 (prio 0, ramd): 0001:03:00.0 BAR 3 
>>> mmaps[0]
>>>
>>> "info mtree -f":
>>> FlatView #1
>>>  AS "memory", root: system
>>>  AS "cpu-memory", root: system
>>>  Root memory region: system
>>>   -7fff (prio 0, ram): ppc_spapr.ram
>>>   2100-2100dfff (prio 0, ramd): 0001:03:00.0 BAR 1 
>>> mmaps[0]
>>>   2100e000-2100e5ff (prio 0, i/o): msix-table
>>>   2100e600-2100 (prio 0, ramd): 0001:03:00.0 BAR 1 
>>> mmaps[0] @e600
>>>   2104-2107 (prio 0, ramd):

Re: [Qemu-devel] [PATCH v2 1/1] target-ppc: Fix booke206 tlbwe TLB instruction

2018-01-15 Thread David Gibson

On Mon, Jan 15, 2018 at 10:32:20AM +0100, Luc MICHEL wrote:
> When overwritting a valid TLB entry with a new one, the previous page
> were not flushed in QEMU TLB, leading to incoherent mapping. This commit
> fixes this.
> 
> Signed-off-by: Luc MICHEL 

Applied to ppc-for-2.12.

> ---
>  target/ppc/mmu_helper.c | 32 +++-
>  1 file changed, 27 insertions(+), 5 deletions(-)
> 
> diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c
> index 2a1f9902c9..298c15e961 100644
> --- a/target/ppc/mmu_helper.c
> +++ b/target/ppc/mmu_helper.c
> @@ -2570,6 +2570,17 @@ void helper_booke_setpid(CPUPPCState *env, uint32_t 
> pidn, target_ulong pid)
>  tlb_flush(CPU(cpu));
>  }
>  
> +static inline void flush_page(CPUPPCState *env, ppcmas_tlb_t *tlb)
> +{
> +PowerPCCPU *cpu = ppc_env_get_cpu(env);
> +
> +if (booke206_tlb_to_page_size(env, tlb) == TARGET_PAGE_SIZE) {
> +tlb_flush_page(CPU(cpu), tlb->mas2 & MAS2_EPN_MASK);
> +} else {
> +tlb_flush(CPU(cpu));
> +}
> +}
> +
>  void helper_booke206_tlbwe(CPUPPCState *env)
>  {
>  PowerPCCPU *cpu = ppc_env_get_cpu(env);
> @@ -2628,6 +2639,21 @@ void helper_booke206_tlbwe(CPUPPCState *env)
>  if (msr_gs) {
>  cpu_abort(CPU(cpu), "missing HV implementation\n");
>  }
> +
> +if (tlb->mas1 & MAS1_VALID) {
> +/* Invalidate the page in QEMU TLB if it was a valid entry.
> + *
> + * In "PowerPC e500 Core Family Reference Manual, Rev. 1",
> + * Section "12.4.2 TLB Write Entry (tlbwe) Instruction":
> + * (https://www.nxp.com/docs/en/reference-manual/E500CORERM.pdf)
> + *
> + * "Note that when an L2 TLB entry is written, it may be displacing 
> an
> + * already valid entry in the same L2 TLB location (a victim). If a
> + * valid L1 TLB entry corresponds to the L2 MMU victim entry, that L1
> + * TLB entry is automatically invalidated." */
> +flush_page(env, tlb);
> +}
> +
>  tlb->mas7_3 = ((uint64_t)env->spr[SPR_BOOKE_MAS7] << 32) |
>  env->spr[SPR_BOOKE_MAS3];
>  tlb->mas1 = env->spr[SPR_BOOKE_MAS1];
> @@ -2663,11 +2689,7 @@ void helper_booke206_tlbwe(CPUPPCState *env)
>  tlb->mas1 &= ~MAS1_IPROT;
>  }
>  
> -if (booke206_tlb_to_page_size(env, tlb) == TARGET_PAGE_SIZE) {
> -tlb_flush_page(CPU(cpu), tlb->mas2 & MAS2_EPN_MASK);
> -} else {
> -tlb_flush(CPU(cpu));
> -}
> +flush_page(env, tlb);
>  }
>  
>  static inline void booke206_tlb_to_mas(CPUPPCState *env, ppcmas_tlb_t *tlb)

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

[Qemu-devel] [PULL 19/33] vhost-user-test: make features mask an init_virtio_dev() argument

2018-01-15 Thread Michael S. Tsirkin

From: Maxime Coquelin 

The goal is to generalize the use of [un]init_virtio_dev() to
all tests, which does not necessarily expose the same features
set.

Signed-off-by: Maxime Coquelin 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Marc-André Lureau 
---
 tests/vhost-user-test.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index 969e393..6a144e8 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -164,7 +164,7 @@ typedef struct TestServer {
 static const char *tmpfs;
 static const char *root;
 
-static void init_virtio_dev(TestServer *s)
+static void init_virtio_dev(TestServer *s, uint32_t features_mask)
 {
 uint32_t features;
 int i;
@@ -187,7 +187,7 @@ static void init_virtio_dev(TestServer *s)
 }
 
 features = qvirtio_get_features(>dev->vdev);
-features = features & (1u << VIRTIO_NET_F_MAC);
+features = features & features_mask;
 qvirtio_set_features(>dev->vdev, features);
 
 qvirtio_set_driver_ok(>dev->vdev);
@@ -652,7 +652,7 @@ static void test_read_guest_mem(void)
 s = qtest_start(qemu_cmd);
 g_free(qemu_cmd);
 
-init_virtio_dev(server);
+init_virtio_dev(server, 1u << VIRTIO_NET_F_MAC);
 
 read_guest_mem(server);
 
@@ -681,7 +681,7 @@ static void test_migrate(void)
 from = qtest_start(cmd);
 g_free(cmd);
 
-init_virtio_dev(s);
+init_virtio_dev(s, 1u << VIRTIO_NET_F_MAC);
 wait_for_fds(s);
 size = get_log_size(s);
 g_assert_cmpint(size, ==, (2 * 1024 * 1024) / (VHOST_LOG_PAGE * 8));
@@ -803,7 +803,7 @@ static void test_reconnect_subprocess(void)
 qtest_start(cmd);
 g_free(cmd);
 
-init_virtio_dev(s);
+init_virtio_dev(s, 1u << VIRTIO_NET_F_MAC);
 wait_for_fds(s);
 wait_for_rings_started(s, 2);
 
@@ -841,7 +841,7 @@ static void test_connect_fail_subprocess(void)
 qtest_start(cmd);
 g_free(cmd);
 
-init_virtio_dev(s);
+init_virtio_dev(s, 1u << VIRTIO_NET_F_MAC);
 wait_for_fds(s);
 wait_for_rings_started(s, 2);
 
@@ -871,7 +871,7 @@ static void test_flags_mismatch_subprocess(void)
 qtest_start(cmd);
 g_free(cmd);
 
-init_virtio_dev(s);
+init_virtio_dev(s, 1u << VIRTIO_NET_F_MAC);
 wait_for_fds(s);
 wait_for_rings_started(s, 2);
 
-- 
MST

[Qemu-devel] [PULL 32/33] vhost-user: fix misaligned access to payload

2018-01-15 Thread Michael S. Tsirkin

We currently take a pointer to a misaligned field of a packed structure.
clang reports this as a build warning.
A fix is to keep payload in a separate structure, and access is it
from there using a vectored write.

Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/vhost-user.c | 39 ---
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 6ac3610..7930fd8 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -652,33 +652,34 @@ static void slave_read(void *opaque)
 {
 struct vhost_dev *dev = opaque;
 struct vhost_user *u = dev->opaque;
-VhostUserMsg msg = { 0, };
+VhostUserHeader hdr = { 0, };
+VhostUserPayload payload = { 0, };
 int size, ret = 0;
 
 /* Read header */
-size = read(u->slave_fd, , VHOST_USER_HDR_SIZE);
+size = read(u->slave_fd, , VHOST_USER_HDR_SIZE);
 if (size != VHOST_USER_HDR_SIZE) {
 error_report("Failed to read from slave.");
 goto err;
 }
 
-if (msg.hdr.size > VHOST_USER_PAYLOAD_SIZE) {
+if (hdr.size > VHOST_USER_PAYLOAD_SIZE) {
 error_report("Failed to read msg header."
-" Size %d exceeds the maximum %zu.", msg.hdr.size,
+" Size %d exceeds the maximum %zu.", hdr.size,
 VHOST_USER_PAYLOAD_SIZE);
 goto err;
 }
 
 /* Read payload */
-size = read(u->slave_fd, , msg.hdr.size);
-if (size != msg.hdr.size) {
+size = read(u->slave_fd, , hdr.size);
+if (size != hdr.size) {
 error_report("Failed to read payload from slave.");
 goto err;
 }
 
-switch (msg.hdr.request) {
+switch (hdr.request) {
 case VHOST_USER_SLAVE_IOTLB_MSG:
-ret = vhost_backend_handle_iotlb_msg(dev, );
+ret = vhost_backend_handle_iotlb_msg(dev, );
 break;
 case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG :
 ret = vhost_user_slave_handle_config_change(dev);
@@ -692,15 +693,23 @@ static void slave_read(void *opaque)
  * REPLY_ACK feature handling. Other reply types has to be managed
  * directly in their request handlers.
  */
-if (msg.hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
-msg.hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
-msg.hdr.flags |= VHOST_USER_REPLY_MASK;
+if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) {
+struct iovec iovec[2];
 
-msg.payload.u64 = !!ret;
-msg.hdr.size = sizeof(msg.payload.u64);
 
-size = write(u->slave_fd, , VHOST_USER_HDR_SIZE + msg.hdr.size);
-if (size != VHOST_USER_HDR_SIZE + msg.hdr.size) {
+hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK;
+hdr.flags |= VHOST_USER_REPLY_MASK;
+
+payload.u64 = !!ret;
+hdr.size = sizeof(payload.u64);
+
+iovec[0].iov_base = 
+iovec[0].iov_len = VHOST_USER_HDR_SIZE;
+iovec[1].iov_base = 
+iovec[1].iov_len = hdr.size;
+
+size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec));
+if (size != VHOST_USER_HDR_SIZE + hdr.size) {
 error_report("Failed to send msg reply to slave.");
 goto err;
 }
-- 
MST

Re: [Qemu-devel] [PATCH 0/6] ppc/pnv: misc fixes and cleanups for POWER9

2018-01-15 Thread David Gibson

On Mon, Jan 15, 2018 at 07:04:00PM +0100, Cédric Le Goater wrote:
> Hello,
> 
> Here are some preliminary and straight forward fixes before adding
> POWER9 support to the PowerNV machine.
> 
> The full tree can be found here :
> 
>   https://github.com/legoater/qemu powernv-2.12
> 
> Thanks,

Applied to ppc-for-2.12.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 09/11] pci: add trace-events support for hw/pci-host

2018-01-15 Thread Michael S. Tsirkin

On Sun, Jan 14, 2018 at 10:47:49AM +, Mark Cave-Ayland wrote:
> Signed-off-by: Mark Cave-Ayland 
> CC: Michael S. Tsirkin 
> CC: Marcel Apfelbaum 

Seems harmless so why not.

Acked-by: Michael S. Tsirkin 



> ---
>  Makefile.objs| 1 +
>  hw/pci-host/trace-events | 1 +
>  2 files changed, 2 insertions(+)
>  create mode 100644 hw/pci-host/trace-events
> 
> diff --git a/Makefile.objs b/Makefile.objs
> index c8b1bba593..6aa793ce4f 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -149,6 +149,7 @@ trace-events-subdirs += hw/i386/xen
>  trace-events-subdirs += hw/9pfs
>  trace-events-subdirs += hw/ppc
>  trace-events-subdirs += hw/pci
> +trace-events-subdirs += hw/pci-host
>  trace-events-subdirs += hw/s390x
>  trace-events-subdirs += hw/vfio
>  trace-events-subdirs += hw/acpi
> diff --git a/hw/pci-host/trace-events b/hw/pci-host/trace-events
> new file mode 100644
> index 00..9284b1fbad
> --- /dev/null
> +++ b/hw/pci-host/trace-events
> @@ -0,0 +1 @@
> +# See docs/devel/tracing.txt for syntax documentation.
> -- 
> 2.11.0

[Qemu-devel] [PULL 17/33] vhost-user-test: extract read-guest-mem test from main loop

2018-01-15 Thread Michael S. Tsirkin

From: Maxime Coquelin 

This patch makes read-guest-test consistent with other tests,
i.e. create the test server in the test function.

Reviewed-by: Marc-André Lureau 
Signed-off-by: Maxime Coquelin 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 tests/vhost-user-test.c | 41 +++--
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index 43c6528..df56724 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -617,6 +617,28 @@ GSourceFuncs test_migrate_source_funcs = {
 .check = test_migrate_source_check,
 };
 
+static void test_read_guest_mem(void)
+{
+TestServer *server = NULL;
+char *qemu_cmd = NULL;
+QTestState *s = NULL;
+
+server = test_server_new("test");
+test_server_listen(server);
+
+qemu_cmd = GET_QEMU_CMD(server);
+
+s = qtest_start(qemu_cmd);
+g_free(qemu_cmd);
+
+init_virtio_dev(server);
+
+read_guest_mem(server);
+
+qtest_quit(s);
+test_server_free(server);
+}
+
 static void test_migrate(void)
 {
 TestServer *s = test_server_new("src");
@@ -919,10 +941,7 @@ static void test_multiqueue(void)
 
 int main(int argc, char **argv)
 {
-QTestState *s = NULL;
-TestServer *server = NULL;
 const char *hugefs;
-char *qemu_cmd = NULL;
 int ret;
 char template[] = "/tmp/vhost-test-XX";
 GMainLoop *loop;
@@ -947,20 +966,11 @@ int main(int argc, char **argv)
 root = tmpfs;
 }
 
-server = test_server_new("test");
-test_server_listen(server);
-
 loop = g_main_loop_new(NULL, FALSE);
 /* run the main loop thread so the chardev may operate */
 thread = g_thread_new(NULL, thread_function, loop);
 
-qemu_cmd = GET_QEMU_CMD(server);
-
-s = qtest_start(qemu_cmd);
-g_free(qemu_cmd);
-init_virtio_dev(server);
-
-qtest_add_data_func("/vhost-user/read-guest-mem", server, read_guest_mem);
+qtest_add_func("/vhost-user/read-guest-mem", test_read_guest_mem);
 qtest_add_func("/vhost-user/migrate", test_migrate);
 qtest_add_func("/vhost-user/multiqueue", test_multiqueue);
 
@@ -978,12 +988,7 @@ int main(int argc, char **argv)
 
 ret = g_test_run();
 
-if (s) {
-qtest_quit(s);
-}
-
 /* cleanup */
-test_server_free(server);
 
 /* finish the helper thread and dispatch pending sources */
 g_main_loop_quit(loop);
-- 
MST

[Qemu-devel] [PULL 30/33] tests: acpi: add comments to fetch_rsdt_referenced_tables/data->tables usage

2018-01-15 Thread Michael S. Tsirkin

From: Igor Mammedov 

Signed-off-by: Igor Mammedov 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 tests/bios-tables-test.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index 1314ad8..b354aaa 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -257,6 +257,8 @@ static void fetch_rsdt_referenced_tables(test_data *data)
 
 addr = le32_to_cpu(data->rsdt_tables_addr[i + 1]); /* fadt is first */
 fetch_table(_table, addr);
+
+/* Add table to ASL test tables list */
 g_array_append_val(data->tables, ssdt_table);
 }
 }
@@ -427,6 +429,7 @@ try_again:
 return exp_tables;
 }
 
+/* test the list of tables in @data->tables against reference tables */
 static void test_acpi_asl(test_data *data)
 {
 int i;
-- 
MST

[Qemu-devel] [PULL 21/33] vhost: Build temporary section list and deref after commit

2018-01-15 Thread Michael S. Tsirkin

From: "Dr. David Alan Gilbert" 

Igor spotted that there's a race, where a region that's unref'd
in a _del callback might be free'd before the set_mem_table call in
the _commit callback, and thus the vhost might end up using free memory.

Fix this by building a complete temporary sections list, ref'ing every
section (during add and nop) and then unref'ing the whole list right
at the end of commit.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Igor Mammedov 
---
 include/hw/virtio/vhost.h |  2 ++
 hw/virtio/vhost.c | 73 ++-
 2 files changed, 49 insertions(+), 26 deletions(-)

diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 1dc2d73..09854b6 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -60,6 +60,8 @@ struct vhost_dev {
 struct vhost_memory *mem;
 int n_mem_sections;
 MemoryRegionSection *mem_sections;
+int n_tmp_sections;
+MemoryRegionSection *tmp_sections;
 struct vhost_virtqueue *vqs;
 int nvqs;
 /* the first virtqueue which would be used by this vhost dev */
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 386aef8..8a85dde 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -621,6 +621,8 @@ static void vhost_begin(MemoryListener *listener)
  memory_listener);
 dev->mem_changed_end_addr = 0;
 dev->mem_changed_start_addr = -1;
+dev->tmp_sections = NULL;
+dev->n_tmp_sections = 0;
 }
 
 static void vhost_commit(MemoryListener *listener)
@@ -629,17 +631,25 @@ static void vhost_commit(MemoryListener *listener)
  memory_listener);
 hwaddr start_addr = 0;
 ram_addr_t size = 0;
+MemoryRegionSection *old_sections;
+int n_old_sections;
+
 uint64_t log_size;
 int r;
 
+old_sections = dev->mem_sections;
+n_old_sections = dev->n_mem_sections;
+dev->mem_sections = dev->tmp_sections;
+dev->n_mem_sections = dev->n_tmp_sections;
+
 if (!dev->memory_changed) {
-return;
+goto out;
 }
 if (!dev->started) {
-return;
+goto out;
 }
 if (dev->mem_changed_start_addr > dev->mem_changed_end_addr) {
-return;
+goto out;
 }
 
 if (dev->started) {
@@ -656,7 +666,7 @@ static void vhost_commit(MemoryListener *listener)
 VHOST_OPS_DEBUG("vhost_set_mem_table failed");
 }
 dev->memory_changed = false;
-return;
+goto out;
 }
 log_size = vhost_get_log_size(dev);
 /* We allocate an extra 4K bytes to log,
@@ -675,6 +685,27 @@ static void vhost_commit(MemoryListener *listener)
 vhost_dev_log_resize(dev, log_size);
 }
 dev->memory_changed = false;
+
+out:
+/* Deref the old list of sections, this must happen _after_ the
+ * vhost_set_mem_table to ensure the client isn't still using the
+ * section we're about to unref.
+ */
+while (n_old_sections--) {
+memory_region_unref(old_sections[n_old_sections].mr);
+}
+g_free(old_sections);
+return;
+}
+
+static void vhost_add_section(struct vhost_dev *dev,
+  MemoryRegionSection *section)
+{
+++dev->n_tmp_sections;
+dev->tmp_sections = g_renew(MemoryRegionSection, dev->tmp_sections,
+dev->n_tmp_sections);
+dev->tmp_sections[dev->n_tmp_sections - 1] = *section;
+memory_region_ref(section->mr);
 }
 
 static void vhost_region_add(MemoryListener *listener,
@@ -687,36 +718,31 @@ static void vhost_region_add(MemoryListener *listener,
 return;
 }
 
-++dev->n_mem_sections;
-dev->mem_sections = g_renew(MemoryRegionSection, dev->mem_sections,
-dev->n_mem_sections);
-dev->mem_sections[dev->n_mem_sections - 1] = *section;
-memory_region_ref(section->mr);
+vhost_add_section(dev, section);
 vhost_set_memory(listener, section, true);
 }
 
-static void vhost_region_del(MemoryListener *listener,
+static void vhost_region_nop(MemoryListener *listener,
  MemoryRegionSection *section)
 {
 struct vhost_dev *dev = container_of(listener, struct vhost_dev,
  memory_listener);
-int i;
 
 if (!vhost_section(section)) {
 return;
 }
 
-vhost_set_memory(listener, section, false);
-memory_region_unref(section->mr);
-for (i = 0; i < dev->n_mem_sections; ++i) {
-if (dev->mem_sections[i].offset_within_address_space
-== section->offset_within_address_space) {
---dev->n_mem_sections;
-memmove(>mem_sections[i], >mem_sections[i+1],
-(dev->n_mem_sections - i) * sizeof(*dev->mem_sections));
-

[Qemu-devel] [PULL 15/33] hw/acpi-build: Make next_base easy to follow

2018-01-15 Thread Michael S. Tsirkin

From: Dou Liyang 

It may be hard to read the assignment statement of "next_base", so

S/next_base += (1ULL << 32) - pcms->below_4g_mem_size;
 /next_base = mem_base + mem_len;

... for readability.

No functionality change.

Signed-off-by: Dou Liyang 
Reviewed-by: Igor Mammedov 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/acpi-build.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 6f38fb9..dc4b2b9 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2394,7 +2394,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, 
MachineState *machine)
 }
 mem_base = 1ULL << 32;
 mem_len = next_base - pcms->below_4g_mem_size;
-next_base += (1ULL << 32) - pcms->below_4g_mem_size;
+next_base = mem_base + mem_len;
 }
 numamem = acpi_data_push(table_data, sizeof *numamem);
 build_srat_memory(numamem, mem_base, mem_len, i - 1,
-- 
MST

[Qemu-devel] [PULL 29/33] tests: acpi: rename test_acpi_tables()/test_dst_table() to reflect its usage

2018-01-15 Thread Michael S. Tsirkin

From: Igor Mammedov 

Main purpose of test_dst_table() is loading a table from QEMU
with checking that checksum in header matches actual one,
rename it reflect main action it performs.

Likewise test_acpi_tables() name is to broad, while the function
only loads tables referenced by RSDT, rename it to reflect it.

Signed-off-by: Igor Mammedov 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 tests/bios-tables-test.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index 4b357cd..1314ad8 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -210,7 +210,11 @@ static void test_acpi_facs_table(test_data *data)
 ACPI_ASSERT_CMP(facs_table->signature, "FACS");
 }
 
-static void test_dst_table(AcpiSdtTable *sdt_table, uint32_t addr)
+/** fetch_table
+ *   load ACPI table at @addr into table descriptor @sdt_table
+ *   and check that header checksum matches actual one.
+ */
+static void fetch_table(AcpiSdtTable *sdt_table, uint32_t addr)
 {
 uint8_t checksum;
 
@@ -234,14 +238,15 @@ static void test_acpi_dsdt_table(test_data *data)
 AcpiSdtTable dsdt_table;
 uint32_t addr = le32_to_cpu(data->fadt_table.dsdt);
 
-test_dst_table(_table, addr);
+fetch_table(_table, addr);
 ACPI_ASSERT_CMP(dsdt_table.header.signature, "DSDT");
 
 /* Since DSDT isn't in RSDT, add DSDT to ASL test tables list manually */
 g_array_append_val(data->tables, dsdt_table);
 }
 
-static void test_acpi_tables(test_data *data)
+/* Load all tables and add to test list directly RSDT referenced tables */
+static void fetch_rsdt_referenced_tables(test_data *data)
 {
 int tables_nr = data->rsdt_tables_nr - 1; /* fadt is first */
 int i;
@@ -251,7 +256,7 @@ static void test_acpi_tables(test_data *data)
 uint32_t addr;
 
 addr = le32_to_cpu(data->rsdt_tables_addr[i + 1]); /* fadt is first */
-test_dst_table(_table, addr);
+fetch_table(_table, addr);
 g_array_append_val(data->tables, ssdt_table);
 }
 }
@@ -640,7 +645,7 @@ static void test_acpi_one(const char *params, test_data 
*data)
 test_acpi_fadt_table(data);
 test_acpi_facs_table(data);
 test_acpi_dsdt_table(data);
-test_acpi_tables(data);
+fetch_rsdt_referenced_tables(data);
 
 if (iasl) {
 if (getenv(ACPI_REBUILD_EXPECTED_AML)) {
-- 
MST

[Qemu-devel] [PULL 20/33] vhost-user-test: use init_virtio_dev in multiqueue test

2018-01-15 Thread Michael S. Tsirkin

From: Maxime Coquelin 

Now that init_virtio_dev() has been generalized to all cases,
use it in test_multiqueue() to avoid code duplication.

Signed-off-by: Maxime Coquelin 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Marc-André Lureau 
---
 tests/vhost-user-test.c | 65 ++---
 1 file changed, 8 insertions(+), 57 deletions(-)

diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index 6a144e8..ec6ac9d 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -892,79 +892,30 @@ static void test_flags_mismatch(void)
 
 #endif
 
-static QVirtioPCIDevice *virtio_net_pci_init(QPCIBus *bus, int slot)
-{
-QVirtioPCIDevice *dev;
-
-dev = qvirtio_pci_device_find(bus, VIRTIO_ID_NET);
-g_assert(dev != NULL);
-g_assert_cmphex(dev->vdev.device_type, ==, VIRTIO_ID_NET);
-
-qvirtio_pci_device_enable(dev);
-qvirtio_reset(>vdev);
-qvirtio_set_acknowledge(>vdev);
-qvirtio_set_driver(>vdev);
-
-return dev;
-}
-
-static void driver_init(QVirtioDevice *dev)
-{
-uint32_t features;
-
-features = qvirtio_get_features(dev);
-features = features & ~(QVIRTIO_F_BAD_FEATURE |
-(1u << VIRTIO_RING_F_INDIRECT_DESC) |
-(1u << VIRTIO_RING_F_EVENT_IDX));
-qvirtio_set_features(dev, features);
-
-qvirtio_set_driver_ok(dev);
-}
-
-#define PCI_SLOT0x04
-
 static void test_multiqueue(void)
 {
-const int queues = 2;
 TestServer *s = test_server_new("mq");
-QVirtioPCIDevice *dev;
-QPCIBus *bus;
-QVirtQueuePCI *vq[queues * 2];
-QGuestAllocator *alloc;
 char *cmd;
-int i;
-
-s->queues = queues;
+uint32_t features_mask = ~(QVIRTIO_F_BAD_FEATURE |
+(1u << VIRTIO_RING_F_INDIRECT_DESC) |
+(1u << VIRTIO_RING_F_EVENT_IDX));
+s->queues = 2;
 test_server_listen(s);
 
 cmd = g_strdup_printf(QEMU_CMD_MEM QEMU_CMD_CHR QEMU_CMD_NETDEV 
",queues=%d "
   "-device 
virtio-net-pci,netdev=net0,mq=on,vectors=%d",
   512, 512, root, s->chr_name,
   s->socket_path, "", s->chr_name,
-  queues, queues * 2 + 2);
+  s->queues, s->queues * 2 + 2);
 qtest_start(cmd);
 g_free(cmd);
 
-bus = qpci_init_pc(NULL);
-dev = virtio_net_pci_init(bus, PCI_SLOT);
+init_virtio_dev(s, features_mask);
 
-alloc = pc_alloc_init();
-for (i = 0; i < queues * 2; i++) {
-vq[i] = (QVirtQueuePCI *)qvirtqueue_setup(>vdev, alloc, i);
-}
+wait_for_rings_started(s, s->queues * 2);
 
-driver_init(>vdev);
-wait_for_rings_started(s, queues * 2);
+uninit_virtio_dev(s);
 
-/* End test */
-for (i = 0; i < queues * 2; i++) {
-qvirtqueue_cleanup(dev->vdev.bus, [i]->vq, alloc);
-}
-pc_alloc_uninit(alloc);
-qvirtio_pci_device_disable(dev);
-g_free(dev->pdev);
-g_free(dev);
-qpci_free_pc(bus);
 qtest_end();
 
 test_server_free(s);
-- 
MST

[Qemu-devel] [PULL 27/33] tests: acpi: move tested tables array allocation outside of test_acpi_dsdt_table()

2018-01-15 Thread Michael S. Tsirkin

From: Igor Mammedov 

at best it's confusing that array for list of tables to be tested
against reference tables is allocated within test_acpi_dsdt_table()
and at worst it would just overwrite list of tables if they were
added before test_acpi_dsdt_table().
Move array initialization to test_acpi_one() before we start
processing tables.

Signed-off-by: Igor Mammedov 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 tests/bios-tables-test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index def81fc..a2f64c8 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -234,12 +234,11 @@ static void test_acpi_dsdt_table(test_data *data)
 uint32_t addr = le32_to_cpu(data->fadt_table.dsdt);
 
 memset(_table, 0, sizeof(dsdt_table));
-data->tables = g_array_new(false, true, sizeof(AcpiSdtTable));
 
 test_dst_table(_table, addr);
 ACPI_ASSERT_CMP(dsdt_table.header.signature, "DSDT");
 
-/* Place DSDT first */
+/* Since DSDT isn't in RSDT, add DSDT to ASL test tables list manually */
 g_array_append_val(data->tables, dsdt_table);
 }
 
@@ -636,6 +635,7 @@ static void test_acpi_one(const char *params, test_data 
*data)
 
 boot_sector_test();
 
+data->tables = g_array_new(false, true, sizeof(AcpiSdtTable));
 test_acpi_rsdp_address(data);
 test_acpi_rsdp_table(data);
 test_acpi_rsdt_table(data);
-- 
MST

[Qemu-devel] [PULL 12/33] intel-iommu: Extend address width to 48 bits

2018-01-15 Thread Michael S. Tsirkin

From: Prasad Singamsetty 

The current implementation of Intel IOMMU code only supports 39 bits
iova address width. This patch provides a new parameter (x-aw-bits)
for intel-iommu to extend its address width to 48 bits but keeping the
default the same (39 bits). The reason for not changing the default
is to avoid potential compatibility problems with live migration of
intel-iommu enabled QEMU guest. The only valid values for 'x-aw-bits'
parameter are 39 and 48.

After enabling larger address width (48), we should be able to map
larger iova addresses in the guest. For example, a QEMU guest that
is configured with large memory ( >=1TB ). To check whether 48 bits
aw is enabled, we can grep in the guest dmesg output with line:
"DMAR: Host address width 48".

Signed-off-by: Prasad Singamsetty 
Reviewed-by: Peter Xu 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/intel_iommu_internal.h |   9 ++--
 include/hw/i386/intel_iommu.h  |   1 +
 hw/i386/acpi-build.c   |   3 +-
 hw/i386/intel_iommu.c  | 101 -
 4 files changed, 65 insertions(+), 49 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 77e4a98..d084099 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -131,7 +131,7 @@
 #define VTD_TLB_DID(val)(((val) >> 32) & VTD_DOMAIN_ID_MASK)
 
 /* IVA_REG */
-#define VTD_IVA_ADDR(val)   ((val) & ~0xfffULL & ((1ULL << VTD_MGAW) - 1))
+#define VTD_IVA_ADDR(val)   ((val) & ~0xfffULL)
 #define VTD_IVA_AM(val) ((val) & 0x3fULL)
 
 /* GCMD_REG */
@@ -197,7 +197,6 @@
 #define VTD_DOMAIN_ID_SHIFT 16  /* 16-bit domain id for 64K domains */
 #define VTD_DOMAIN_ID_MASK  ((1UL << VTD_DOMAIN_ID_SHIFT) - 1)
 #define VTD_CAP_ND  (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL)
-#define VTD_MGAW39  /* Maximum Guest Address Width */
 #define VTD_ADDRESS_SIZE(aw)(1ULL << (aw))
 #define VTD_CAP_MGAW(aw)aw) - 1) & 0x3fULL) << 16)
 #define VTD_MAMV18ULL
@@ -213,7 +212,6 @@
 #define VTD_CAP_SAGAW_39bit (0x2ULL << VTD_CAP_SAGAW_SHIFT)
  /* 48-bit AGAW, 4-level page-table */
 #define VTD_CAP_SAGAW_48bit (0x4ULL << VTD_CAP_SAGAW_SHIFT)
-#define VTD_CAP_SAGAW   VTD_CAP_SAGAW_39bit
 
 /* IQT_REG */
 #define VTD_IQT_QT(val) (((val) >> 4) & 0x7fffULL)
@@ -252,7 +250,7 @@
 #define VTD_FRCD_SID_MASK   0xULL
 #define VTD_FRCD_SID(val)   ((val) & VTD_FRCD_SID_MASK)
 /* For the low 64-bit of 128-bit */
-#define VTD_FRCD_FI(val)((val) & (((1ULL << VTD_MGAW) - 1) ^ 0xfffULL))
+#define VTD_FRCD_FI(val)((val) & ~0xfffULL)
 
 /* DMA Remapping Fault Conditions */
 typedef enum VTDFaultReason {
@@ -360,8 +358,7 @@ typedef union VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_IOTLB_DOMAIN   (2ULL << 4)
 #define VTD_INV_DESC_IOTLB_PAGE (3ULL << 4)
 #define VTD_INV_DESC_IOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK)
-#define VTD_INV_DESC_IOTLB_ADDR(val)((val) & ~0xfffULL & \
- ((1ULL << VTD_MGAW) - 1))
+#define VTD_INV_DESC_IOTLB_ADDR(val)((val) & ~0xfffULL)
 #define VTD_INV_DESC_IOTLB_AM(val)  ((val) & 0x3fULL)
 #define VTD_INV_DESC_IOTLB_RSVD_LO  0xff00ULL
 #define VTD_INV_DESC_IOTLB_RSVD_HI  0xf80ULL
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 372b06d..45ec891 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -304,6 +304,7 @@ struct IntelIOMMUState {
 bool intr_eime; /* Extended interrupt mode enabled */
 OnOffAuto intr_eim; /* Toggle for EIM cabability */
 bool buggy_eim; /* Force buggy EIM unless eim=off */
+uint8_t aw_bits;/* Host/IOVA address width (in bits) */
 };
 
 /* Find the VTD Address space associated with the given bus pointer,
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 18b939e..6f38fb9 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -2473,6 +2473,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
 AcpiDmarDeviceScope *scope = NULL;
 /* Root complex IOAPIC use one path[0] only */
 size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]);
+IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu);
 
 assert(iommu);
 if (iommu->intr_supported) {
@@ -2480,7 +2481,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker)
 }
 
 dmar = acpi_data_push(table_data, sizeof(*dmar));
-dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
+dmar->host_address_width = intel_iommu->aw_bits - 1;
 dmar->flags = dmar_flags;
 
 /* DMAR Remapping Hardware Unit Definition

Re: [Qemu-devel] [PATCH V5] pci: removed the is_express field since a uniform interface was inserted

2018-01-15 Thread Michael S. Tsirkin

On Mon, Dec 18, 2017 at 05:21:40PM +0200, Yoni Bettan wrote:
> according to Eduardo Habkost's commit fd3b02c889 all PCIEs now implement
> INTERFACE_PCIE_DEVICE so we don't need is_express field anymore.
> 
> Devices that implements only INTERFACE_PCIE_DEVICE (is_express == 1)
> or
> devices that implements only INTERFACE_CONVENTIONAL_PCI_DEVICE (is_express == 
> 0)
> where not affected by the change.
> 
> The only devices that were affected are those that are hybrid and also
> had (is_express == 1) - therefor only:
>   - hw/vfio/pci.c
>   - hw/usb/hcd-xhci.c
>   - hw/xen/xen_pt.c
> 
> For those 3 I made sure that QEMU_PCI_CAP_EXPRESS is on in instance_init()
> 
> Signed-off-by: Yoni Bettan 


Thanks!
Could you pls rebase this on top of the latest pci branch?
There's been some conflicting changes so this no longer
applies cleanly.


> ---
>  docs/pcie_pci_bridge.txt   | 2 +-
>  hw/block/nvme.c| 1 -
>  hw/net/e1000e.c| 1 -
>  hw/pci-bridge/pcie_pci_bridge.c| 1 -
>  hw/pci-bridge/pcie_root_port.c | 1 -
>  hw/pci-bridge/xio3130_downstream.c | 1 -
>  hw/pci-bridge/xio3130_upstream.c   | 1 -
>  hw/pci-host/xilinx-pcie.c  | 1 -
>  hw/pci/pci.c   | 8 ++--
>  hw/scsi/megasas.c  | 4 
>  hw/usb/hcd-xhci.c  | 9 -
>  hw/vfio/pci.c  | 5 -
>  hw/xen/xen_pt.c| 9 -
>  include/hw/pci/pci.h   | 3 ---
>  14 files changed, 27 insertions(+), 20 deletions(-)
> 
> diff --git a/docs/pcie_pci_bridge.txt b/docs/pcie_pci_bridge.txt
> index 5a4203f97c..ab35ebf3ca 100644
> --- a/docs/pcie_pci_bridge.txt
> +++ b/docs/pcie_pci_bridge.txt
> @@ -110,5 +110,5 @@ To enable device hot-plug into the bridge on Linux 
> there're 3 ways:
>  Implementation
>  ==
>  The PCIE-PCI bridge is based on PCI-PCI bridge, but also accumulates PCI 
> Express
> -features as a PCI Express device (is_express=1).
> +features as a PCI Express device.
>  
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 441e21ed1f..9325bc0911 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1087,7 +1087,6 @@ static void nvme_class_init(ObjectClass *oc, void *data)
>  pc->vendor_id = PCI_VENDOR_ID_INTEL;
>  pc->device_id = 0x5845;
>  pc->revision = 2;
> -pc->is_express = 1;
>  
>  set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
>  dc->desc = "Non-Volatile Memory Express";
> diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
> index f1af279e8d..c360f0d8c9 100644
> --- a/hw/net/e1000e.c
> +++ b/hw/net/e1000e.c
> @@ -675,7 +675,6 @@ static void e1000e_class_init(ObjectClass *class, void 
> *data)
>  c->revision = 0;
>  c->romfile = "efi-e1000e.rom";
>  c->class_id = PCI_CLASS_NETWORK_ETHERNET;
> -c->is_express = 1;
>  
>  dc->desc = "Intel 82574L GbE Controller";
>  dc->reset = e1000e_qdev_reset;
> diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c
> index a4d827c99d..b7d9ebbec2 100644
> --- a/hw/pci-bridge/pcie_pci_bridge.c
> +++ b/hw/pci-bridge/pcie_pci_bridge.c
> @@ -169,7 +169,6 @@ static void pcie_pci_bridge_class_init(ObjectClass 
> *klass, void *data)
>  DeviceClass *dc = DEVICE_CLASS(klass);
>  HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
>  
> -k->is_express = 1;
>  k->is_bridge = 1;
>  k->vendor_id = PCI_VENDOR_ID_REDHAT;
>  k->device_id = PCI_DEVICE_ID_REDHAT_PCIE_BRIDGE;
> diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c
> index 9b6e4ce512..45f9e8cd4a 100644
> --- a/hw/pci-bridge/pcie_root_port.c
> +++ b/hw/pci-bridge/pcie_root_port.c
> @@ -145,7 +145,6 @@ static void rp_class_init(ObjectClass *klass, void *data)
>  DeviceClass *dc = DEVICE_CLASS(klass);
>  PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
>  
> -k->is_express = 1;
>  k->is_bridge = 1;
>  k->config_write = rp_write_config;
>  k->realize = rp_realize;
> diff --git a/hw/pci-bridge/xio3130_downstream.c 
> b/hw/pci-bridge/xio3130_downstream.c
> index 1e09d2afb7..613a0d6bb7 100644
> --- a/hw/pci-bridge/xio3130_downstream.c
> +++ b/hw/pci-bridge/xio3130_downstream.c
> @@ -177,7 +177,6 @@ static void xio3130_downstream_class_init(ObjectClass 
> *klass, void *data)
>  DeviceClass *dc = DEVICE_CLASS(klass);
>  PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
>  
> -k->is_express = 1;
>  k->is_bridge = 1;
>  k->config_write = xio3130_downstream_write_config;
>  k->realize = xio3130_downstream_realize;
> diff --git a/hw/pci-bridge/xio3130_upstream.c 
> b/hw/pci-bridge/xio3130_upstream.c
> index 227997ce46..d4645bddee 100644
> --- a/hw/pci-bridge/xio3130_upstream.c
> +++ b/hw/pci-bridge/xio3130_upstream.c
> @@ -148,7 +148,6 @@ static void xio3130_upstream_class_init(ObjectClass 
> *klass, void *data)
>  DeviceClass *dc = DEVICE_CLASS(klass);
>  PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
>

[Qemu-devel] [PULL 14/33] ACPI/unit-test: Add a testcase for RAM allocation in numa node

2018-01-15 Thread Michael S. Tsirkin

From: Dou Liyang 

As QEMU supports the memory-less node, it is possible that there is
no RAM in the first numa node(also be called as node0). eg:
  ... \
  -m 128,slots=3,maxmem=1G \
  -numa node -numa node,mem=128M \

But, this makes it hard for QEMU to build a known-to-work ACPI SRAT
table. Only fixing it is not enough.

Add a testcase for this situation to make sure the ACPI table is
correct for guest.

Suggested-by: Eduardo Habkost 
Signed-off-by: Dou Liyang 
Reviewed-by: Igor Mammedov 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 tests/bios-tables-test.c  |  24 
 tests/acpi-test-data/pc/DSDT.numamem  | Bin 0 -> 5150 bytes
 tests/acpi-test-data/pc/SRAT.numamem  | Bin 0 -> 224 bytes
 tests/acpi-test-data/q35/DSDT.numamem | Bin 0 -> 7834 bytes
 tests/acpi-test-data/q35/SRAT.numamem | Bin 0 -> 224 bytes
 5 files changed, 24 insertions(+)
 create mode 100644 tests/acpi-test-data/pc/DSDT.numamem
 create mode 100644 tests/acpi-test-data/pc/SRAT.numamem
 create mode 100644 tests/acpi-test-data/q35/DSDT.numamem
 create mode 100644 tests/acpi-test-data/q35/SRAT.numamem

diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index e28e0c9..def81fc 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -810,6 +810,28 @@ static void test_acpi_piix4_tcg_memhp(void)
 free_test_data();
 }
 
+static void test_acpi_q35_tcg_numamem(void)
+{
+test_data data;
+
+memset(, 0, sizeof(data));
+data.machine = MACHINE_Q35;
+data.variant = ".numamem";
+test_acpi_one(" -numa node -numa node,mem=128", );
+free_test_data();
+}
+
+static void test_acpi_piix4_tcg_numamem(void)
+{
+test_data data;
+
+memset(, 0, sizeof(data));
+data.machine = MACHINE_PC;
+data.variant = ".numamem";
+test_acpi_one(" -numa node -numa node,mem=128", );
+free_test_data();
+}
+
 int main(int argc, char *argv[])
 {
 const char *arch = qtest_get_arch();
@@ -832,6 +854,8 @@ int main(int argc, char *argv[])
 qtest_add_func("acpi/q35/cpuhp", test_acpi_q35_tcg_cphp);
 qtest_add_func("acpi/piix4/memhp", test_acpi_piix4_tcg_memhp);
 qtest_add_func("acpi/q35/memhp", test_acpi_q35_tcg_memhp);
+qtest_add_func("acpi/piix4/numamem", test_acpi_piix4_tcg_numamem);
+qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem);
 }
 ret = g_test_run();
 boot_sector_cleanup(disk);
diff --git a/tests/acpi-test-data/pc/DSDT.numamem 
b/tests/acpi-test-data/pc/DSDT.numamem
new file mode 100644
index 
..224cfdd9e983e02dac5f4bf7e210eaa64cb0dc78
GIT binary patch
literal 5150
zcmb7I-EJG#5uUTVl$K{nX(_FLmDq$F*GSsf{PAB

[Qemu-devel] [PULL 26/33] x86_iommu: check if machine has PCI bus

2018-01-15 Thread Michael S. Tsirkin

From: Mohammed Gamal 

Starting qemu with
qemu-system-x86_64 -S -M isapc -device {amd|intel}-iommu
leads to a segfault. The code assume PCI bus is present and
tries to access the bus structure without checking.

Since Intel VT-d and AMDVI should only work with PCI, add a
check for PCI bus and return error if not present.

Reviewed-by: Peter Xu 
Reviewed-by: Eduardo Habkost 
Signed-off-by: Mohammed Gamal 
Reviewed-by: Thomas Huth 
---
 hw/i386/x86-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index 51de519..8a01a2d 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -88,7 +88,7 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
 PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE));
 QLIST_INIT(_iommu->iec_notifiers);
 
-if (!pcms) {
+if (!pcms || !pcms->bus) {
 error_setg(errp, "Machine-type '%s' not supported by IOMMU",
mc->name);
 return;
-- 
MST

[Qemu-devel] [PULL 13/33] hw/pci-bridge: fix QEMU crash because of pcie-root-port

2018-01-15 Thread Michael S. Tsirkin

From: Marcel Apfelbaum 

If we try to use more pcie_root_ports then available slots
and an IO hint is passed to the port, QEMU crashes because
we try to init the "IO hint" capability even if the device
is not created.
Fix it by checking for error before adding the capability,
so QEMU can fail gracefully.

Signed-off-by: Marcel Apfelbaum 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/pci-bridge/gen_pcie_root_port.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/hw/pci-bridge/gen_pcie_root_port.c 
b/hw/pci-bridge/gen_pcie_root_port.c
index ad4e6aa..0e2f2e8 100644
--- a/hw/pci-bridge/gen_pcie_root_port.c
+++ b/hw/pci-bridge/gen_pcie_root_port.c
@@ -74,8 +74,13 @@ static void gen_rp_realize(DeviceState *dev, Error **errp)
 PCIDevice *d = PCI_DEVICE(dev);
 GenPCIERootPort *grp = GEN_PCIE_ROOT_PORT(d);
 PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(d);
+Error *local_err = NULL;
 
-rpc->parent_realize(dev, errp);
+rpc->parent_realize(dev, _err);
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
 
 int rc = pci_bridge_qemu_reserve_cap_init(d, 0, grp->bus_reserve,
 grp->io_reserve, grp->mem_reserve, grp->pref32_reserve,
-- 
MST

[Qemu-devel] [PULL 25/33] x86_iommu: Move machine check to x86_iommu_realize()

2018-01-15 Thread Michael S. Tsirkin

From: Mohammed Gamal 

Instead of having the same error checks in vtd_realize()
and amdvi_realize(), move that over to the generic
x86_iommu_realize().

Reviewed-by: Peter Xu 
Reviewed-by: Eduardo Habkost 
Signed-off-by: Mohammed Gamal 
Reviewed-by: Thomas Huth 
---
 hw/i386/amd_iommu.c   | 13 ++---
 hw/i386/intel_iommu.c | 13 ++---
 hw/i386/x86-iommu.c   | 13 +
 3 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index eeaf0e0..63d46ff 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1144,18 +1144,9 @@ static void amdvi_realize(DeviceState *dev, Error **err)
 AMDVIState *s = AMD_IOMMU_DEVICE(dev);
 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
 MachineState *ms = MACHINE(qdev_get_machine());
-MachineClass *mc = MACHINE_GET_CLASS(ms);
-PCMachineState *pcms =
-PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE));
-PCIBus *bus;
-
-if (!pcms) {
-error_setg(err, "Machine-type '%s' not supported by amd-iommu",
-   mc->name);
-return;
-}
+PCMachineState *pcms = PC_MACHINE(ms);
+PCIBus *bus = pcms->bus;
 
-bus = pcms->bus;
 s->iotlb = g_hash_table_new_full(amdvi_uint64_hash,
  amdvi_uint64_equal, g_free, g_free);
 
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4e8642e..2e841cd 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3052,20 +3052,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error 
**errp)
 static void vtd_realize(DeviceState *dev, Error **errp)
 {
 MachineState *ms = MACHINE(qdev_get_machine());
-MachineClass *mc = MACHINE_GET_CLASS(ms);
-PCMachineState *pcms =
-PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE));
-PCIBus *bus;
+PCMachineState *pcms = PC_MACHINE(ms);
+PCIBus *bus = pcms->bus;
 IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
 
-if (!pcms) {
-error_setg(errp, "Machine-type '%s' not supported by intel-iommu",
-   mc->name);
-return;
-}
-
-bus = pcms->bus;
 x86_iommu->type = TYPE_INTEL;
 
 if (!vtd_decide_config(s, errp)) {
diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c
index 293caf8..51de519 100644
--- a/hw/i386/x86-iommu.c
+++ b/hw/i386/x86-iommu.c
@@ -21,6 +21,8 @@
 #include "hw/sysbus.h"
 #include "hw/boards.h"
 #include "hw/i386/x86-iommu.h"
+#include "hw/i386/pc.h"
+#include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "trace.h"
 
@@ -80,7 +82,18 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp)
 {
 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
 X86IOMMUClass *x86_class = X86_IOMMU_GET_CLASS(dev);
+MachineState *ms = MACHINE(qdev_get_machine());
+MachineClass *mc = MACHINE_GET_CLASS(ms);
+PCMachineState *pcms =
+PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE));
 QLIST_INIT(_iommu->iec_notifiers);
+
+if (!pcms) {
+error_setg(errp, "Machine-type '%s' not supported by IOMMU",
+   mc->name);
+return;
+}
+
 if (x86_class->realize) {
 x86_class->realize(dev, errp);
 }
-- 
MST

[Qemu-devel] [PATCHv2 1/2] spapr: Allow some cases where we can't set VSMT mode in the kernel

2018-01-15 Thread David Gibson

At present if we require a vsmt mode that's not equal to the kernel's
default, and the kernel doesn't let us change it (e.g. because it's an old
kernel without support) then we always fail.

But in fact we can cope with the kernel having a different vsmt as long as
  a) it's >= the actual number of vthreads/vcore (so that guest threads
 that are supposed to be on the same core act like it)
  b) it's a submultiple of the requested vsmt mode (so that guest threads
 spaced by the vsmt value will act like they're on different cores)

Allowing this case gives us a bit more freedom to adjust the vsmt behaviour
without breaking existing cases.

Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c | 26 +++---
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index e35214bfc3..6d3613d934 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2314,17 +2314,29 @@ static void spapr_set_vsmt_mode(sPAPRMachineState 
*spapr, Error **errp)
 if (kvm_enabled() && (spapr->vsmt != kvm_smt)) {
 ret = kvmppc_set_smt_threads(spapr->vsmt);
 if (ret) {
+/* Looks like KVM isn't able to change VSMT mode */
 error_setg(_err,
"Failed to set KVM's VSMT mode to %d (errno %d)",
spapr->vsmt, ret);
-if (!vsmt_user) {
-error_append_hint(_err, "On PPC, a VM with %d threads/"
- "core on a host with %d threads/core requires "
- " the use of VSMT mode %d.\n",
- smp_threads, kvm_smt, spapr->vsmt);
+/* We can live with that if the default one is big enough
+ * for the number of threads, and a submultiple of the one
+ * we want.  In this case we'll waste some vcpu ids, but
+ * behaviour will be correct */
+if ((kvm_smt >= smp_threads) && (spapr->vsmt % kvm_smt) == 0) {
+warn_report_err(local_err);
+local_err = NULL;
+goto out;
+} else {
+if (!vsmt_user) {
+error_append_hint(_err,
+  "On PPC, a VM with %d threads/core"
+  " on a host with %d threads/core"
+  " requires the use of VSMT mode %d.\n",
+  smp_threads, kvm_smt, spapr->vsmt);
+}
+kvmppc_hint_smt_possible(_err);
+goto out;
 }
-kvmppc_hint_smt_possible(_err);
-goto out;
 }
 }
 /* else TCG: nothing to do currently */
-- 
2.14.3

[Qemu-devel] [PULL 33/33] vhost: remove assertion to prevent crash

2018-01-15 Thread Michael S. Tsirkin

From: Jay Zhou 

QEMU will assert on vhost-user backed virtio device hotplug if QEMU is
using more RAM regions than VHOST_MEMORY_MAX_NREGIONS (for example if
it were started with a lot of DIMM devices).

Fix it by returning error instead of asserting and let callers of
vhost_set_mem_table() handle error condition gracefully.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Igor Mammedov 
Signed-off-by: Jay Zhou 
---
 hw/virtio/vhost-user.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 7930fd8..6eb9798 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -342,11 +342,14 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev,
  );
 fd = memory_region_get_fd(mr);
 if (fd > 0) {
+if (fd_num == VHOST_MEMORY_MAX_NREGIONS) {
+error_report("Failed preparing vhost-user memory table msg");
+return -1;
+}
 msg.payload.memory.regions[fd_num].userspace_addr = 
reg->userspace_addr;
 msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
 msg.payload.memory.regions[fd_num].guest_phys_addr = 
reg->guest_phys_addr;
 msg.payload.memory.regions[fd_num].mmap_offset = offset;
-assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
 fds[fd_num++] = fd;
 }
 }
-- 
MST

[Qemu-devel] [PATCHv2 2/2] spapr: Adjust default VSMT value for better migration compatibility

2018-01-15 Thread David Gibson

fa98fbfc "PC: KVM: Support machine option to set VSMT mode" introduced the
"vsmt" parameter for the pseries machine type, which controls the spacing
of the vcpu ids of thread 0 for each virtual core.  This was done to bring
some consistency and stability to how that was done, while still allowing
backwards compatibility for migration and otherwise.

The default value we used for vsmt was set to the max of the host's
advertised default number of threads and the number of vthreads per vcore
in the guest.  This was done to continue running without extra parameters
on older KVM versions which don't allow the VSMT value to be changed.

Unfortunately, even that smaller than before leakage of host configuration
into guest visible configuration still breaks things.  Specifically a guest
with 4 (or less) vthread/vcore will get a different vsmt value when
running on a POWER8 (vsmt==8) and POWER9 (vsmt==4) host.  That means the
vcpu ids don't line up so you can't migrate between them, though you should
be able to.

Long term we really want to make vsmt == smp_threads for sufficiently
new machine types.  However, that means that qemu will then require a
sufficiently recent KVM (one which supports changing VSMT) - that's still
not widely enough deployed to be really comfortable to do.

In the meantime we need some default that will work as often as
possible.  This patch changes that default to 8 in all circumstances.
This does change guest visible behaviour (including for existing
machine versions) for many cases - just not the most common/important
case.

Following is case by case justification for why this is still the least
worst option.  Note that any of the old behaviours can still be duplicated
after this patch, it's just that it requires manual intervention by
setting the vsmt property on the command line.

KVM HV on POWER8 host:
   This is the overwhelmingly common case in production setups, and is
   unchanged by design.  POWER8 hosts will advertise a default VSMT mode
   of 8, and > 8 vthreads/vcore isn't permitted

KVM HV on POWER7 host:
   Will break, but POWER7s allowing KVM were never released to the public.

KVM HV on POWER9 host:
   Not yet released to the public, breaking this now will reduce other
   breakage later.

KVM HV on PowerPC 970:
   Will theoretically break it, but it was barely supported to begin with
   and already required various user visible hacks to work.  Also so old
   that I just don't care.

TCG:
   This is the nastiest one; it means migration of TCG guests (without
   manual vsmt setting) will break.  Since TCG is rarely used in production
   I think this is worth it for the other benefits.  It does also remove
   one more barrier to TCG<->KVM migration which could be interesting for
   debugging applications.

KVM PR:
   As with TCG, this will break migration of existing configurations,
   without adding extra manual vsmt options.  As with TCG, it is rare in
   production so I think the benefits outweigh breakages.

Signed-off-by: David Gibson 
Reviewed-by: Laurent Vivier 
Reviewed-by: Jose Ricardo Ziviani 
Reviewed-by: Greg Kurz 
---
 hw/ppc/spapr.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 6d3613d934..a216ceada8 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2305,9 +2305,14 @@ static void spapr_set_vsmt_mode(sPAPRMachineState 
*spapr, Error **errp)
 }
 /* In this case, spapr->vsmt has been set by the command line */
 } else {
-/* Choose a VSMT mode that may be higher than necessary but is
- * likely to be compatible with hosts that don't have VSMT. */
-spapr->vsmt = MAX(kvm_smt, smp_threads);
+/*
+ * Default VSMT value is tricky, because we need it to be as
+ * consistent as possible (for migration), but this requires
+ * changing it for at least some existing cases.  We pick 8 as
+ * the value that we'd get with KVM on POWER8, the
+ * overwhelmingly common case in production systems.
+ */
+spapr->vsmt = 8;
 }
 
 /* KVM: If necessary, set the SMT mode: */
-- 
2.14.3

[Qemu-devel] [PULL 22/33] vhost: Move log_dirty check

2018-01-15 Thread Michael S. Tsirkin

From: "Dr. David Alan Gilbert" 

Move the log_dirty check into vhost_section.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/vhost.c  | 20 +---
 hw/virtio/trace-events |  3 +++
 2 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 8a85dde..83df043 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -27,6 +27,7 @@
 #include "hw/virtio/virtio-access.h"
 #include "migration/blocker.h"
 #include "sysemu/dma.h"
+#include "trace.h"
 
 /* enabled until disconnected backend stabilizes */
 #define _VHOST_DEBUG 1
@@ -567,18 +568,12 @@ static void vhost_set_memory(MemoryListener *listener,
  memory_listener);
 hwaddr start_addr = section->offset_within_address_space;
 ram_addr_t size = int128_get64(section->size);
-bool log_dirty =
-memory_region_get_dirty_log_mask(section->mr) & ~(1 << 
DIRTY_MEMORY_MIGRATION);
 int s = offsetof(struct vhost_memory, regions) +
 (dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
 void *ram;
 
 dev->mem = g_realloc(dev->mem, s);
 
-if (log_dirty) {
-add = false;
-}
-
 assert(size);
 
 /* Optimize no-change case. At least cirrus_vga does this a lot at this 
time. */
@@ -611,8 +606,19 @@ static void vhost_set_memory(MemoryListener *listener,
 
 static bool vhost_section(MemoryRegionSection *section)
 {
-return memory_region_is_ram(section->mr) &&
+bool result;
+bool log_dirty = memory_region_get_dirty_log_mask(section->mr) &
+ ~(1 << DIRTY_MEMORY_MIGRATION);
+result = memory_region_is_ram(section->mr) &&
 !memory_region_is_rom(section->mr);
+
+/* Vhost doesn't handle any block which is doing dirty-tracking other
+ * than migration; this typically fires on VGA areas.
+ */
+result &= !log_dirty;
+
+trace_vhost_section(section->mr->name, result);
+return result;
 }
 
 static void vhost_begin(MemoryListener *listener)
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 775461a..4a493bc 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -1,5 +1,8 @@
 # See docs/devel/tracing.txt for syntax documentation.
 
+# hw/virtio/vhost.c
+vhost_section(const char *name, int r) "%s:%d"
+
 # hw/virtio/virtio.c
 virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned 
out_num) "elem %p size %zd in_num %u out_num %u"
 virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) 
"vq %p elem %p len %u idx %u"
-- 
MST

[Qemu-devel] [PULL 11/33] intel-iommu: Redefine macros to enable supporting 48 bit address width

2018-01-15 Thread Michael S. Tsirkin

From: Prasad Singamsetty 

The current implementation of Intel IOMMU code only supports 39 bits
host/iova address width so number of macros use hard coded values based
on that. This patch is to redefine them so they can be used with
variable address widths. This patch doesn't add any new functionality
but enables adding support for 48 bit address width.

Signed-off-by: Prasad Singamsetty 
Reviewed-by: Peter Xu 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/i386/intel_iommu_internal.h | 34 +++---
 include/hw/i386/intel_iommu.h  |  6 +++--
 hw/i386/intel_iommu.c  | 54 --
 3 files changed, 61 insertions(+), 33 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 0e73a65..77e4a98 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -172,10 +172,10 @@
 
 /* RTADDR_REG */
 #define VTD_RTADDR_RTT  (1ULL << 11)
-#define VTD_RTADDR_ADDR_MASK(VTD_HAW_MASK ^ 0xfffULL)
+#define VTD_RTADDR_ADDR_MASK(aw)(VTD_HAW_MASK(aw) ^ 0xfffULL)
 
 /* IRTA_REG */
-#define VTD_IRTA_ADDR_MASK  (VTD_HAW_MASK ^ 0xfffULL)
+#define VTD_IRTA_ADDR_MASK(aw)  (VTD_HAW_MASK(aw) ^ 0xfffULL)
 #define VTD_IRTA_EIME   (1ULL << 11)
 #define VTD_IRTA_SIZE_MASK  (0xfULL)
 
@@ -198,8 +198,8 @@
 #define VTD_DOMAIN_ID_MASK  ((1UL << VTD_DOMAIN_ID_SHIFT) - 1)
 #define VTD_CAP_ND  (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL)
 #define VTD_MGAW39  /* Maximum Guest Address Width */
-#define VTD_ADDRESS_SIZE(1ULL << VTD_MGAW)
-#define VTD_CAP_MGAW(((VTD_MGAW - 1) & 0x3fULL) << 16)
+#define VTD_ADDRESS_SIZE(aw)(1ULL << (aw))
+#define VTD_CAP_MGAW(aw)aw) - 1) & 0x3fULL) << 16)
 #define VTD_MAMV18ULL
 #define VTD_CAP_MAMV(VTD_MAMV << 48)
 #define VTD_CAP_PSI (1ULL << 39)
@@ -219,7 +219,7 @@
 #define VTD_IQT_QT(val) (((val) >> 4) & 0x7fffULL)
 
 /* IQA_REG */
-#define VTD_IQA_IQA_MASK(VTD_HAW_MASK ^ 0xfffULL)
+#define VTD_IQA_IQA_MASK(aw)(VTD_HAW_MASK(aw) ^ 0xfffULL)
 #define VTD_IQA_QS  0x7ULL
 
 /* IQH_REG */
@@ -373,6 +373,24 @@ typedef union VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL
 #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffe0fff8
 
+/* Rsvd field masks for spte */
+#define VTD_SPTE_PAGE_L1_RSVD_MASK(aw) \
+(0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_PAGE_L2_RSVD_MASK(aw) \
+(0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_PAGE_L3_RSVD_MASK(aw) \
+(0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_PAGE_L4_RSVD_MASK(aw) \
+(0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_LPAGE_L1_RSVD_MASK(aw) \
+(0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_LPAGE_L2_RSVD_MASK(aw) \
+(0x1ff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \
+(0x3800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+#define VTD_SPTE_LPAGE_L4_RSVD_MASK(aw) \
+(0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM))
+
 /* Information about page-selective IOTLB invalidate */
 struct VTDIOTLBPageInvInfo {
 uint16_t domain_id;
@@ -403,7 +421,7 @@ typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_ROOT_ENTRY_CTP  (~0xfffULL)
 
 #define VTD_ROOT_ENTRY_NR   (VTD_PAGE_SIZE / sizeof(VTDRootEntry))
-#define VTD_ROOT_ENTRY_RSVD (0xffeULL | ~VTD_HAW_MASK)
+#define VTD_ROOT_ENTRY_RSVD(aw) (0xffeULL | ~VTD_HAW_MASK(aw))
 
 /* Masks for struct VTDContextEntry */
 /* lo */
@@ -415,7 +433,7 @@ typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_CONTEXT_TT_PASS_THROUGH (2ULL << 2)
 /* Second Level Page Translation Pointer*/
 #define VTD_CONTEXT_ENTRY_SLPTPTR   (~0xfffULL)
-#define VTD_CONTEXT_ENTRY_RSVD_LO   (0xff0ULL | ~VTD_HAW_MASK)
+#define VTD_CONTEXT_ENTRY_RSVD_LO(aw) (0xff0ULL | ~VTD_HAW_MASK(aw))
 /* hi */
 #define VTD_CONTEXT_ENTRY_AW7ULL /* Adjusted guest-address-width */
 #define VTD_CONTEXT_ENTRY_DID(val)  (((val) >> 8) & VTD_DOMAIN_ID_MASK)
@@ -439,7 +457,7 @@ typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_SL_RW_MASK  3ULL
 #define VTD_SL_R1ULL
 #define VTD_SL_W(1ULL << 1)
-#define VTD_SL_PT_BASE_ADDR_MASK(~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK)
+#define VTD_SL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw))
 #define VTD_SL_IGN_COM  0xbff0ULL
 
 #endif
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index ac15e6b..372b06d 100644
--- a/include/hw/i386/intel_iommu.h
+++

[Qemu-devel] [PULL 31/33] vhost-user: factor out msg head and payload

2018-01-15 Thread Michael S. Tsirkin

split header and payload into separate structures,
to enable easier handling of alignment issues.

Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/vhost-user.c | 198 +
 1 file changed, 101 insertions(+), 97 deletions(-)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 8b94688..6ac3610 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -112,7 +112,7 @@ static VhostUserConfig c __attribute__ ((unused));
+ sizeof(c.size) \
+ sizeof(c.flags))
 
-typedef struct VhostUserMsg {
+typedef struct {
 VhostUserRequest request;
 
 #define VHOST_USER_VERSION_MASK (0x3)
@@ -120,7 +120,9 @@ typedef struct VhostUserMsg {
 #define VHOST_USER_NEED_REPLY_MASK  (0x1 << 3)
 uint32_t flags;
 uint32_t size; /* the following payload size */
-union {
+} QEMU_PACKED VhostUserHeader;
+
+typedef union {
 #define VHOST_USER_VRING_IDX_MASK   (0xff)
 #define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
 uint64_t u64;
@@ -130,15 +132,17 @@ typedef struct VhostUserMsg {
 VhostUserLog log;
 struct vhost_iotlb_msg iotlb;
 VhostUserConfig config;
-} payload;
+} VhostUserPayload;
+
+typedef struct VhostUserMsg {
+VhostUserHeader hdr;
+VhostUserPayload payload;
 } QEMU_PACKED VhostUserMsg;
 
 static VhostUserMsg m __attribute__ ((unused));
-#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
-+ sizeof(m.flags) \
-+ sizeof(m.size))
+#define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader))
 
-#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
+#define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload))
 
 /* The version of the protocol we support */
 #define VHOST_USER_VERSION(0x1)
@@ -163,33 +167,33 @@ static int vhost_user_read(struct vhost_dev *dev, 
VhostUserMsg *msg)
 r = qemu_chr_fe_read_all(chr, p, size);
 if (r != size) {
 error_report("Failed to read msg header. Read %d instead of %d."
- " Original request %d.", r, size, msg->request);
+ " Original request %d.", r, size, msg->hdr.request);
 goto fail;
 }
 
 /* validate received flags */
-if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
+if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
 error_report("Failed to read msg header."
-" Flags 0x%x instead of 0x%x.", msg->flags,
+" Flags 0x%x instead of 0x%x.", msg->hdr.flags,
 VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
 goto fail;
 }
 
 /* validate message size is sane */
-if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
+if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) {
 error_report("Failed to read msg header."
-" Size %d exceeds the maximum %zu.", msg->size,
+" Size %d exceeds the maximum %zu.", msg->hdr.size,
 VHOST_USER_PAYLOAD_SIZE);
 goto fail;
 }
 
-if (msg->size) {
+if (msg->hdr.size) {
 p += VHOST_USER_HDR_SIZE;
-size = msg->size;
+size = msg->hdr.size;
 r = qemu_chr_fe_read_all(chr, p, size);
 if (r != size) {
 error_report("Failed to read msg payload."
- " Read %d instead of %d.", r, msg->size);
+ " Read %d instead of %d.", r, msg->hdr.size);
 goto fail;
 }
 }
@@ -205,7 +209,7 @@ static int process_message_reply(struct vhost_dev *dev,
 {
 VhostUserMsg msg_reply;
 
-if ((msg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
+if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) {
 return 0;
 }
 
@@ -213,10 +217,10 @@ static int process_message_reply(struct vhost_dev *dev,
 return -1;
 }
 
-if (msg_reply.request != msg->request) {
+if (msg_reply.hdr.request != msg->hdr.request) {
 error_report("Received unexpected msg type."
  "Expected %d received %d",
- msg->request, msg_reply.request);
+ msg->hdr.request, msg_reply.hdr.request);
 return -1;
 }
 
@@ -243,15 +247,15 @@ static int vhost_user_write(struct vhost_dev *dev, 
VhostUserMsg *msg,
 {
 struct vhost_user *u = dev->opaque;
 CharBackend *chr = u->chr;
-int ret, size = VHOST_USER_HDR_SIZE + msg->size;
+int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size;
 
 /*
  * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
  * we just need send it once in the first time. For later such
  * request, we just ignore it.
  */
-if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) {
-msg->flags &= ~VHOST_USER_NEED_REPLY_MASK;
+if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) {
+

[Qemu-devel] [PULL 16/33] vhost-user-test: fix features mask

2018-01-15 Thread Michael S. Tsirkin

From: Maxime Coquelin 

VIRTIO_NET_F_MAC is a bit position, not a bit mask.

Signed-off-by: Maxime Coquelin 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Marc-André Lureau 
---
 tests/vhost-user-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index e2c89ed..43c6528 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -177,7 +177,7 @@ static void init_virtio_dev(TestServer *s)
 qvirtio_set_driver(>vdev);
 
 features = qvirtio_get_features(>vdev);
-features = features & VIRTIO_NET_F_MAC;
+features = features & (1u << VIRTIO_NET_F_MAC);
 qvirtio_set_features(>vdev, features);
 
 qvirtio_set_driver_ok(>vdev);
-- 
MST

[Qemu-devel] [PATCHv2 0/2] Further VSMT fixes

2018-01-15 Thread David Gibson

Here are some follow on fixes to Ziviani's proposed changes to VSMT
handling.  This should fix migration of POWER8 compat mode guests
between POWER8 and POWER9 hosts.

The changes are simple, the rationale's rather more complex.

Changes since v2:
  * Dropped one patch, already merged
  * Discovered that the previous version broken running KVM PR in
almost all cases, even ones that should still be ok
  - Added a new 1/2 patch to address that

David Gibson (2):
  spapr: Allow some cases where we can't set VSMT mode in the kernel
  spapr: Adjust default VSMT value for better migration compatibility

 hw/ppc/spapr.c | 37 +++--
 1 file changed, 27 insertions(+), 10 deletions(-)

-- 
2.14.3

[Qemu-devel] [PULL 10/33] vhost-user: fix multiple queue specification

2018-01-15 Thread Michael S. Tsirkin

From: Maxime Coquelin 

The number of queues supported by the slave is queried with
message VHOST_USER_GET_QUEUE_NUM, not with message
VHOST_USER_GET_PROTOCOL_FEATURES.

Signed-off-by: Maxime Coquelin 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 docs/interop/vhost-user.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
index 0875ef4..9fcf48d 100644
--- a/docs/interop/vhost-user.txt
+++ b/docs/interop/vhost-user.txt
@@ -228,8 +228,8 @@ Multiple queue is treated as a protocol extension, hence 
the slave has to
 implement protocol features first. The multiple queues feature is supported
 only when the protocol feature VHOST_USER_PROTOCOL_F_MQ (bit 0) is set.
 
-The max number of queues the slave supports can be queried with message
-VHOST_USER_GET_PROTOCOL_FEATURES. Master should stop when the number of
+The max number of queue pairs the slave supports can be queried with message
+VHOST_USER_GET_QUEUE_NUM. Master should stop when the number of
 requested queues is bigger than that.
 
 As all queues share one connection, the master uses a unique index for each
-- 
MST

[Qemu-devel] [PULL 28/33] tests: acpi: init table descriptor in test_dst_table()

2018-01-15 Thread Michael S. Tsirkin

From: Igor Mammedov 

remove code duplication and make sure that table descriptor
passed in for initialization is in expected state.

Signed-off-by: Igor Mammedov 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 tests/bios-tables-test.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
index a2f64c8..4b357cd 100644
--- a/tests/bios-tables-test.c
+++ b/tests/bios-tables-test.c
@@ -214,6 +214,7 @@ static void test_dst_table(AcpiSdtTable *sdt_table, 
uint32_t addr)
 {
 uint8_t checksum;
 
+memset(sdt_table, 0, sizeof(*sdt_table));
 ACPI_READ_TABLE_HEADER(_table->header, addr);
 
 sdt_table->aml_len = le32_to_cpu(sdt_table->header.length)
@@ -233,8 +234,6 @@ static void test_acpi_dsdt_table(test_data *data)
 AcpiSdtTable dsdt_table;
 uint32_t addr = le32_to_cpu(data->fadt_table.dsdt);
 
-memset(_table, 0, sizeof(dsdt_table));
-
 test_dst_table(_table, addr);
 ACPI_ASSERT_CMP(dsdt_table.header.signature, "DSDT");
 
@@ -251,7 +250,6 @@ static void test_acpi_tables(test_data *data)
 AcpiSdtTable ssdt_table;
 uint32_t addr;
 
-memset(_table, 0, sizeof(ssdt_table));
 addr = le32_to_cpu(data->rsdt_tables_addr[i + 1]); /* fadt is first */
 test_dst_table(_table, addr);
 g_array_append_val(data->tables, ssdt_table);
-- 
MST

[Qemu-devel] [PULL 05/33] contrib/vhost-user-blk: introduce a vhost-user-blk sample application

2018-01-15 Thread Michael S. Tsirkin

From: Changpeng Liu 

This commit introduces a vhost-user-blk backend device, it uses UNIX
domain socket to communicate with QEMU. The vhost-user-blk sample
application should be used with QEMU vhost-user-blk-pci device.

To use it, complie with:
make vhost-user-blk

and start like this:
vhost-user-blk -b /dev/sdb -s /path/vhost.socket

Signed-off-by: Changpeng Liu 
Reviewed-by: Marc-André Lureau 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 Makefile|   3 +
 contrib/vhost-user-blk/vhost-user-blk.c | 545 
 .gitignore  |   1 +
 Makefile.objs   |   1 +
 contrib/vhost-user-blk/Makefile.objs|   1 +
 5 files changed, 551 insertions(+)
 create mode 100644 contrib/vhost-user-blk/vhost-user-blk.c
 create mode 100644 contrib/vhost-user-blk/Makefile.objs

diff --git a/Makefile b/Makefile
index d86ecd2..f021fc8 100644
--- a/Makefile
+++ b/Makefile
@@ -331,6 +331,7 @@ dummy := $(call unnest-vars,, \
 ivshmem-server-obj-y \
 libvhost-user-obj-y \
 vhost-user-scsi-obj-y \
+vhost-user-blk-obj-y \
 qga-vss-dll-obj-y \
 block-obj-y \
 block-obj-m \
@@ -562,6 +563,8 @@ ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) 
$(COMMON_LDADDS)
 endif
 vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y) libvhost-user.a
$(call LINK, $^)
+vhost-user-blk$(EXESUF): $(vhost-user-blk-obj-y) libvhost-user.a
+   $(call LINK, $^)
 
 module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
$(call quiet-command,$(PYTHON) $< $@ \
diff --git a/contrib/vhost-user-blk/vhost-user-blk.c 
b/contrib/vhost-user-blk/vhost-user-blk.c
new file mode 100644
index 000..67dac81
--- /dev/null
+++ b/contrib/vhost-user-blk/vhost-user-blk.c
@@ -0,0 +1,545 @@
+/*
+ * vhost-user-blk sample application
+ *
+ * Copyright (c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Author:
+ *  Changpeng Liu 
+ *
+ * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
+ * implementation by:
+ *  Felipe Franciosi 
+ *  Anthony Liguori 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 only.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "standard-headers/linux/virtio_blk.h"
+#include "contrib/libvhost-user/libvhost-user-glib.h"
+#include "contrib/libvhost-user/libvhost-user.h"
+
+#include 
+
+struct virtio_blk_inhdr {
+unsigned char status;
+};
+
+/* vhost user block device */
+typedef struct VubDev {
+VugDev parent;
+int blk_fd;
+struct virtio_blk_config blkcfg;
+char *blk_name;
+GMainLoop *loop;
+} VubDev;
+
+typedef struct VubReq {
+VuVirtqElement *elem;
+int64_t sector_num;
+size_t size;
+struct virtio_blk_inhdr *in;
+struct virtio_blk_outhdr *out;
+VubDev *vdev_blk;
+struct VuVirtq *vq;
+} VubReq;
+
+/* refer util/iov.c */
+static size_t vub_iov_size(const struct iovec *iov,
+  const unsigned int iov_cnt)
+{
+size_t len;
+unsigned int i;
+
+len = 0;
+for (i = 0; i < iov_cnt; i++) {
+len += iov[i].iov_len;
+}
+return len;
+}
+
+static void vub_panic_cb(VuDev *vu_dev, const char *buf)
+{
+VugDev *gdev;
+VubDev *vdev_blk;
+
+assert(vu_dev);
+
+gdev = container_of(vu_dev, VugDev, parent);
+vdev_blk = container_of(gdev, VubDev, parent);
+if (buf) {
+g_warning("vu_panic: %s", buf);
+}
+
+g_main_loop_quit(vdev_blk->loop);
+}
+
+static void vub_req_complete(VubReq *req)
+{
+VugDev *gdev = >vdev_blk->parent;
+VuDev *vu_dev = >parent;
+
+/* IO size with 1 extra status byte */
+vu_queue_push(vu_dev, req->vq, req->elem,
+  req->size + 1);
+vu_queue_notify(vu_dev, req->vq);
+
+if (req->elem) {
+free(req->elem);
+}
+
+g_free(req);
+}
+
+static int vub_open(const char *file_name, bool wce)
+{
+int fd;
+int flags = O_RDWR;
+
+if (!wce) {
+flags |= O_DIRECT;
+}
+
+fd = open(file_name, flags);
+if (fd < 0) {
+fprintf(stderr, "Cannot open file %s, %s\n", file_name,
+strerror(errno));
+return -1;
+}
+
+return fd;
+}
+
+static ssize_t
+vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
+{
+VubDev *vdev_blk = req->vdev_blk;
+ssize_t rc;
+
+if (!iovcnt) {
+fprintf(stderr, "Invalid Read IOV count\n");
+return -1;
+}
+
+req->size = vub_iov_size(iov, iovcnt);
+rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
+if (rc < 0) {
+fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed

Re: [Qemu-devel] [PATCH 2/2] spapr: Adjust default VSMT value for better migration compatibility

2018-01-15 Thread David Gibson

On Mon, Jan 15, 2018 at 10:48:47AM +0100, Greg Kurz wrote:
> On Mon, 15 Jan 2018 18:27:15 +1100
> David Gibson  wrote:
> 
> > fa98fbfc "PC: KVM: Support machine option to set VSMT mode" introduced the
> > "vsmt" parameter for the pseries machine type, which controls the spacing
> > of the vcpu ids of thread 0 for each virtual core.  This was done to bring
> > some consistency and stability to how that was done, while still allowing
> > backwards compatibility for migration and otherwise.
> > 
> > The default value we used for vsmt was set to the max of the host's
> > advertised default number of threads and the number of vthreads per vcore
> > in the guest.  This was done to continue running without extra parameters
> > on older KVM versions which don't allow the VSMT value to be changed.
> > 
> > Unfortunately, even that smaller than before leakage of host configuration
> > into guest visible configuration still breaks things.  Specifically a guest
> > with 4 (or less) vthread/vcore will get a different vsmt value when
> > running on a POWER8 (vsmt==8) and POWER9 (vsmt==4) host.  That means the
> > vcpu ids don't line up so you can't migrate between them, though you should
> > be able to.
> > 
> > Long term we really want to make vsmt == smp_threads for sufficiently
> > new machine types.  However, that means that qemu will then require a
> > sufficiently recent KVM (one which supports changing VSMT) - that's still
> > not widely enough deployed to be really comfortable to do.
> > 
> > In the meantime we some default that will work as often as possible.
> 
> s/we some/we need some/ ?

Corrected.

> > This patch changes that default to 8 in all circumstances.  This does
> > change guest visible behaviour (including for existing machine versions)
> > for many cases - just not the most common/important case.
> > 
> > Following is case by case justification for why this is still the least
> > worst option.  Note that any of the old behaviours can still be duplicated
> > after this patch, it's just that it requires manual intervention by
> > setting the vsmt property on the command line.
> > 
> 
> IIUC this unconditionally breaks existing setups that rely on static
> Micro-Threading on a POWER8 host (eg, subcores-per-core=2 on the host
> and smp_threads=4). I have no evidence this is a widely used setup,
> but FWIW it is documented in some IBM RedBooks:

Well.. it will break migration between old and new qemu on the
microthreaded setup,  but fix it between new qemu on microthreaded
setup and new qemu on non-microthreaded setup (old qemu on
microthreaded to old qemu on non-microthreaded was already broken for
the same reasons as p8<->p9).  It's not really obvious to me which is
preferable.

> "Performance Optimization and Tuning Techniques for IBM Power Systems
>  Processors Including IBM POWER8"
> 
> http://www.redbooks.ibm.com/abstracts/sg248171.html?Open
> 
> "IBM PowerKVM: Configuration and Use"
> 
> http://www.redbooks.ibm.com/abstracts/sg248231.html?Open
>
> Maybe the new behaviour could be added for new machine types only ?

I'd really prefer not to.  It makes some existing cases work, but
breaks some other cases.  Given that the old behaviour is inherently
wrong, I'm more inclined to change it.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

[Qemu-devel] [PULL 08/33] virtio: improve virtio devices initialization time

2018-01-15 Thread Michael S. Tsirkin

From: Gal Hammer 

The loading time of a VM is quite significant when its virtio
devices use a large amount of virt-queues (e.g. a virtio-serial
device with max_ports=511). Most of the time is spend in the
creation of all the required event notifiers (ioeventfd and memory
regions).

This patch pack all the changes to the memory regions in a
single memory transaction.

Reported-by: Sitong Liu 
Reported-by: Xiaoling Gao 
Signed-off-by: Gal Hammer 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/virtio.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index d6002ee..3ac3491 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2574,6 +2574,7 @@ static int 
virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
 int n, r, err;
 
+memory_region_transaction_begin();
 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
 VirtQueue *vq = >vq[n];
 if (!virtio_queue_get_num(vdev, n)) {
@@ -2596,6 +2597,7 @@ static int 
virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
 }
 event_notifier_set(>host_notifier);
 }
+memory_region_transaction_commit();
 return 0;
 
 assign_error:
@@ -2609,6 +2611,7 @@ assign_error:
 r = virtio_bus_set_host_notifier(qbus, n, false);
 assert(r >= 0);
 }
+memory_region_transaction_commit();
 return err;
 }
 
@@ -2625,6 +2628,7 @@ static void 
virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
 VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
 int n, r;
 
+memory_region_transaction_begin();
 for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
 VirtQueue *vq = >vq[n];
 
@@ -2635,6 +2639,7 @@ static void 
virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
 r = virtio_bus_set_host_notifier(qbus, n, false);
 assert(r >= 0);
 }
+memory_region_transaction_commit();
 }
 
 void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
-- 
MST

[Qemu-devel] [PULL 24/33] vhost: Merge sections added to temporary list

2018-01-15 Thread Michael S. Tsirkin

From: "Dr. David Alan Gilbert" 

As sections are reported by the listener to the _nop and _add
methods, add them to the temporary section list but now merge them
with the previous section if the new one abuts and the backend allows.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Igor Mammedov 
---
 docs/interop/vhost-user.txt |  2 +-
 hw/virtio/vhost.c   | 72 ++---
 hw/virtio/trace-events  |  2 ++
 3 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
index 9fcf48d..a8c5623 100644
--- a/docs/interop/vhost-user.txt
+++ b/docs/interop/vhost-user.txt
@@ -551,7 +551,7 @@ Master message types
   Master payload: N/A
   Slave payload: u64
 
-  Query how many queues the backend supports. This request should be
+  Query how many queue pairs the backend supports. This request should be
   sent only when VHOST_USER_PROTOCOL_F_MQ is set in queried protocol
   features by VHOST_USER_GET_PROTOCOL_FEATURES.
 
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 0bdd833..e63099f 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -709,14 +709,65 @@ out:
 return;
 }
 
-static void vhost_add_section(struct vhost_dev *dev,
-  MemoryRegionSection *section)
-{
-++dev->n_tmp_sections;
-dev->tmp_sections = g_renew(MemoryRegionSection, dev->tmp_sections,
-dev->n_tmp_sections);
-dev->tmp_sections[dev->n_tmp_sections - 1] = *section;
-memory_region_ref(section->mr);
+/* Adds the section data to the tmp_section structure.
+ * It relies on the listener calling us in memory address order
+ * and for each region (via the _add and _nop methods) to
+ * join neighbours.
+ */
+static void vhost_region_add_section(struct vhost_dev *dev,
+ MemoryRegionSection *section)
+{
+bool need_add = true;
+uint64_t mrs_size = int128_get64(section->size);
+uint64_t mrs_gpa = section->offset_within_address_space;
+uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
+ section->offset_within_region;
+
+trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size,
+   mrs_host);
+
+if (dev->n_tmp_sections) {
+/* Since we already have at least one section, lets see if
+ * this extends it; since we're scanning in order, we only
+ * have to look at the last one, and the FlatView that calls
+ * us shouldn't have overlaps.
+ */
+MemoryRegionSection *prev_sec = dev->tmp_sections +
+   (dev->n_tmp_sections - 1);
+uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
+uint64_t prev_size = int128_get64(prev_sec->size);
+uint64_t prev_gpa_end   = range_get_last(prev_gpa_start, prev_size);
+uint64_t prev_host_start =
+(uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
+prev_sec->offset_within_region;
+uint64_t prev_host_end   = range_get_last(prev_host_start, prev_size);
+
+if (prev_gpa_end + 1 == mrs_gpa &&
+prev_host_end + 1 == mrs_host &&
+section->mr == prev_sec->mr &&
+(!dev->vhost_ops->vhost_backend_can_merge ||
+dev->vhost_ops->vhost_backend_can_merge(dev,
+mrs_host, mrs_size,
+prev_host_start, prev_size))) {
+/* The two sections abut */
+need_add = false;
+prev_sec->size = int128_add(prev_sec->size, section->size);
+trace_vhost_region_add_section_abut(section->mr->name,
+mrs_size + prev_size);
+}
+}
+
+if (need_add) {
+++dev->n_tmp_sections;
+dev->tmp_sections = g_renew(MemoryRegionSection, dev->tmp_sections,
+dev->n_tmp_sections);
+dev->tmp_sections[dev->n_tmp_sections - 1] = *section;
+/* The flatview isn't stable and we don't use it, making it NULL
+ * means we can memcmp the list.
+ */
+dev->tmp_sections[dev->n_tmp_sections - 1].fv = NULL;
+memory_region_ref(section->mr);
+}
 }
 
 static void vhost_region_add(MemoryListener *listener,
@@ -728,11 +779,12 @@ static void vhost_region_add(MemoryListener *listener,
 if (!vhost_section(section)) {
 return;
 }
+vhost_region_add_section(dev, section);
 
-vhost_add_section(dev, section);
 vhost_set_memory(listener, section, true);
 }
 
+/* Called on regions that have not changed */
 static void vhost_region_nop(MemoryListener *listener,

[Qemu-devel] [PULL 09/33] pci/shpc: Move function to generic header file

2018-01-15 Thread Michael S. Tsirkin

From: Yuval Shaia 

This function should be declared in generic header file so we can
utilize it.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Yuval Shaia 
Signed-off-by: Marcel Apfelbaum 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/qemu/host-utils.h | 10 ++
 hw/pci/shpc.c | 13 ++---
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 5ac621c..38da849 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -400,6 +400,16 @@ static inline uint64_t pow2ceil(uint64_t value)
 return 0x8000ull >> (n - 1);
 }
 
+static inline uint32_t pow2roundup32(uint32_t x)
+{
+x |= (x >> 1);
+x |= (x >> 2);
+x |= (x >> 4);
+x |= (x >> 8);
+x |= (x >> 16);
+return x + 1;
+}
+
 /**
  * urshift - 128-bit Unsigned Right Shift.
  * @plow: in/out - lower 64-bit integer.
diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
index 69fc14b..a8462d4 100644
--- a/hw/pci/shpc.c
+++ b/hw/pci/shpc.c
@@ -1,6 +1,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu-common.h"
+#include "qemu/host-utils.h"
 #include "qemu/range.h"
 #include "qemu/error-report.h"
 #include "hw/pci/shpc.h"
@@ -122,16 +123,6 @@
 #define SHPC_PCI_TO_IDX(pci_slot) ((pci_slot) - 1)
 #define SHPC_IDX_TO_PHYSICAL(slot) ((slot) + 1)
 
-static int roundup_pow_of_two(int x)
-{
-x |= (x >> 1);
-x |= (x >> 2);
-x |= (x >> 4);
-x |= (x >> 8);
-x |= (x >> 16);
-return x + 1;
-}
-
 static uint16_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk)
 {
 uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot);
@@ -656,7 +647,7 @@ int shpc_init(PCIDevice *d, PCIBus *sec_bus, MemoryRegion 
*bar,
 
 int shpc_bar_size(PCIDevice *d)
 {
-return roundup_pow_of_two(SHPC_SLOT_REG(SHPC_MAX_SLOTS));
+return pow2roundup32(SHPC_SLOT_REG(SHPC_MAX_SLOTS));
 }
 
 void shpc_cleanup(PCIDevice *d, MemoryRegion *bar)
-- 
MST

[Qemu-devel] [PULL 02/33] vhost-user: add new vhost user messages to support virtio config space

2018-01-15 Thread Michael S. Tsirkin

From: Changpeng Liu 

Add VHOST_USER_GET_CONFIG/VHOST_USER_SET_CONFIG messages which can be
used for live migration of vhost user devices, also vhost user devices
can benefit from the messages to get/set virtio config space from/to the
I/O target. For the purpose to support virtio config space change,
VHOST_USER_SLAVE_CONFIG_CHANGE_MSG message is added as the event notifier
in case virtio config space change in the slave I/O target.

Signed-off-by: Changpeng Liu 
Reviewed-by: Marc-André Lureau 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 docs/interop/vhost-user.txt   |  55 ++
 include/hw/virtio/vhost-backend.h |  12 
 include/hw/virtio/vhost.h |  15 +
 hw/virtio/vhost-user.c| 118 ++
 hw/virtio/vhost.c |  32 +++
 5 files changed, 232 insertions(+)

diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
index d49444e..0875ef4 100644
--- a/docs/interop/vhost-user.txt
+++ b/docs/interop/vhost-user.txt
@@ -119,6 +119,19 @@ Depending on the request type, payload can be:
 - 3: IOTLB invalidate
 - 4: IOTLB access fail
 
+ * Virtio device config space
+   ---
+   | offset | size | flags | payload |
+   ---
+
+   Offset: a 32-bit offset of virtio device's configuration space
+   Size: a 32-bit configuration space access size in bytes
+   Flags: a 32-bit value:
+- 0: Vhost master messages used for writeable fields
+- 1: Vhost master messages used for live migration
+   Payload: Size bytes array holding the contents of the virtio
+   device's configuration space
+
 In QEMU the vhost-user message is implemented with the following struct:
 
 typedef struct VhostUserMsg {
@@ -132,6 +145,7 @@ typedef struct VhostUserMsg {
 VhostUserMemory memory;
 VhostUserLog log;
 struct vhost_iotlb_msg iotlb;
+VhostUserConfig config;
 };
 } QEMU_PACKED VhostUserMsg;
 
@@ -623,6 +637,32 @@ Master message types
   and expect this message once (per VQ) during device configuration
   (ie. before the master starts the VQ).
 
+ * VHOST_USER_GET_CONFIG
+
+  Id: 24
+  Equivalent ioctl: N/A
+  Master payload: virtio device config space
+  Slave payload: virtio device config space
+
+  Submitted by the vhost-user master to fetch the contents of the virtio
+  device configuration space, vhost-user slave's payload size MUST match
+  master's request, vhost-user slave uses zero length of payload to
+  indicate an error to vhost-user master. The vhost-user master may
+  cache the contents to avoid repeated VHOST_USER_GET_CONFIG calls.
+
+* VHOST_USER_SET_CONFIG
+
+  Id: 25
+  Equivalent ioctl: N/A
+  Master payload: virtio device config space
+  Slave payload: N/A
+
+  Submitted by the vhost-user master when the Guest changes the virtio
+  device configuration space and also can be used for live migration
+  on the destination host. The vhost-user slave must check the flags
+  field, and slaves MUST NOT accept SET_CONFIG for read-only
+  configuration space fields unless the live migration bit is set.
+
 Slave message types
 ---
 
@@ -641,6 +681,21 @@ Slave message types
   This request should be send only when VIRTIO_F_IOMMU_PLATFORM feature
   has been successfully negotiated.
 
+* VHOST_USER_SLAVE_CONFIG_CHANGE_MSG
+
+ Id: 2
+ Equivalent ioctl: N/A
+ Slave payload: N/A
+ Master payload: N/A
+
+ Vhost-user slave sends such messages to notify that the virtio device's
+ configuration space has changed, for those host devices which can support
+ such feature, host driver can send VHOST_USER_GET_CONFIG message to slave
+ to get the latest content. If VHOST_USER_PROTOCOL_F_REPLY_ACK is
+ negotiated, and slave set the VHOST_USER_NEED_REPLY flag, master must
+ respond with zero when operation is successfully completed, or non-zero
+ otherwise.
+
 VHOST_USER_PROTOCOL_F_REPLY_ACK:
 ---
 The original vhost-user specification only demands replies for certain
diff --git a/include/hw/virtio/vhost-backend.h 
b/include/hw/virtio/vhost-backend.h
index a7a5f22..592254f 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -20,6 +20,11 @@ typedef enum VhostBackendType {
 VHOST_BACKEND_TYPE_MAX = 3,
 } VhostBackendType;
 
+typedef enum VhostSetConfigType {
+VHOST_SET_CONFIG_TYPE_MASTER = 0,
+VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
+} VhostSetConfigType;
+
 struct vhost_dev;
 struct vhost_log;
 struct vhost_memory;
@@ -84,6 +89,11 @@ typedef void (*vhost_set_iotlb_callback_op)(struct vhost_dev 
*dev,
int enabled);
 typedef int

[Qemu-devel] [PULL 06/33] qemu: add a cleanup callback function to EventNotifier

2018-01-15 Thread Michael S. Tsirkin

From: Gal Hammer 

Adding a cleanup callback function to the EventNotifier struct
which allows users to execute event_notifier_cleanup in a
different context.

Signed-off-by: Gal Hammer 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/qemu/event_notifier.h | 1 +
 util/event_notifier-posix.c   | 5 -
 util/event_notifier-win32.c   | 2 ++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/qemu/event_notifier.h b/include/qemu/event_notifier.h
index 599c99f..b30a454 100644
--- a/include/qemu/event_notifier.h
+++ b/include/qemu/event_notifier.h
@@ -26,6 +26,7 @@ struct EventNotifier {
 int rfd;
 int wfd;
 #endif
+void (*cleanup)(EventNotifier *);
 };
 
 typedef void EventNotifierHandler(EventNotifier *);
diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c
index 73c4046..6525666 100644
--- a/util/event_notifier-posix.c
+++ b/util/event_notifier-posix.c
@@ -29,6 +29,7 @@ void event_notifier_init_fd(EventNotifier *e, int fd)
 {
 e->rfd = fd;
 e->wfd = fd;
+e->cleanup = NULL;
 }
 #endif
 
@@ -65,6 +66,7 @@ int event_notifier_init(EventNotifier *e, int active)
 e->rfd = fds[0];
 e->wfd = fds[1];
 }
+e->cleanup = NULL;
 if (active) {
 event_notifier_set(e);
 }
@@ -80,10 +82,11 @@ void event_notifier_cleanup(EventNotifier *e)
 {
 if (e->rfd != e->wfd) {
 close(e->rfd);
-e->rfd = -1;
 }
 close(e->wfd);
+e->rfd = -1;
 e->wfd = -1;
+e->cleanup = NULL;
 }
 
 int event_notifier_get_fd(const EventNotifier *e)
diff --git a/util/event_notifier-win32.c b/util/event_notifier-win32.c
index 62c53b0..eff8670 100644
--- a/util/event_notifier-win32.c
+++ b/util/event_notifier-win32.c
@@ -19,6 +19,7 @@ int event_notifier_init(EventNotifier *e, int active)
 {
 e->event = CreateEvent(NULL, TRUE, FALSE, NULL);
 assert(e->event);
+e->cleanup = NULL;
 return 0;
 }
 
@@ -26,6 +27,7 @@ void event_notifier_cleanup(EventNotifier *e)
 {
 CloseHandle(e->event);
 e->event = NULL;
+e->cleanup = NULL;
 }
 
 HANDLE event_notifier_get_handle(EventNotifier *e)
-- 
MST

[Qemu-devel] [PULL 18/33] vhost-user-test: setup virtqueues in all tests

2018-01-15 Thread Michael S. Tsirkin

From: Maxime Coquelin 

Only the multiqueue test setups the virtqueues.
This patch generalizes the setup of virtqueues for all tests.

Signed-off-by: Maxime Coquelin 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Marc-André Lureau 
---
 tests/vhost-user-test.c | 53 +++--
 1 file changed, 42 insertions(+), 11 deletions(-)

diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index df56724..969e393 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -55,6 +55,7 @@
 /*** FROM hw/virtio/vhost-user.c */
 
 #define VHOST_MEMORY_MAX_NREGIONS8
+#define VHOST_MAX_VIRTQUEUES0x100
 
 #define VHOST_USER_F_PROTOCOL_FEATURES 30
 #define VHOST_USER_PROTOCOL_F_MQ 0
@@ -141,6 +142,8 @@ enum {
 
 typedef struct TestServer {
 QPCIBus *bus;
+QVirtioPCIDevice *dev;
+QVirtQueue *vq[VHOST_MAX_VIRTQUEUES];
 gchar *socket_path;
 gchar *mig_path;
 gchar *chr_name;
@@ -155,6 +158,7 @@ typedef struct TestServer {
 bool test_fail;
 int test_flags;
 int queues;
+QGuestAllocator *alloc;
 } TestServer;
 
 static const char *tmpfs;
@@ -162,26 +166,43 @@ static const char *root;
 
 static void init_virtio_dev(TestServer *s)
 {
-QVirtioPCIDevice *dev;
 uint32_t features;
+int i;
 
 s->bus = qpci_init_pc(NULL);
 g_assert_nonnull(s->bus);
 
-dev = qvirtio_pci_device_find(s->bus, VIRTIO_ID_NET);
-g_assert_nonnull(dev);
+s->dev = qvirtio_pci_device_find(s->bus, VIRTIO_ID_NET);
+g_assert_nonnull(s->dev);
 
-qvirtio_pci_device_enable(dev);
-qvirtio_reset(>vdev);
-qvirtio_set_acknowledge(>vdev);
-qvirtio_set_driver(>vdev);
+qvirtio_pci_device_enable(s->dev);
+qvirtio_reset(>dev->vdev);
+qvirtio_set_acknowledge(>dev->vdev);
+qvirtio_set_driver(>dev->vdev);
+
+s->alloc = pc_alloc_init();
 
-features = qvirtio_get_features(>vdev);
+for (i = 0; i < s->queues * 2; i++) {
+s->vq[i] = qvirtqueue_setup(>dev->vdev, s->alloc, i);
+}
+
+features = qvirtio_get_features(>dev->vdev);
 features = features & (1u << VIRTIO_NET_F_MAC);
-qvirtio_set_features(>vdev, features);
+qvirtio_set_features(>dev->vdev, features);
 
-qvirtio_set_driver_ok(>vdev);
-qvirtio_pci_device_free(dev);
+qvirtio_set_driver_ok(>dev->vdev);
+}
+
+static void uninit_virtio_dev(TestServer *s)
+{
+int i;
+
+for (i = 0; i < s->queues * 2; i++) {
+qvirtqueue_cleanup(s->dev->vdev.bus, s->vq[i], s->alloc);
+}
+pc_alloc_uninit(s->alloc);
+
+qvirtio_pci_device_free(s->dev);
 }
 
 static void wait_for_fds(TestServer *s)
@@ -635,6 +656,8 @@ static void test_read_guest_mem(void)
 
 read_guest_mem(server);
 
+uninit_virtio_dev(server);
+
 qtest_quit(s);
 test_server_free(server);
 }
@@ -711,6 +734,8 @@ static void test_migrate(void)
 
 read_guest_mem(dest);
 
+uninit_virtio_dev(s);
+
 g_source_destroy(source);
 g_source_unref(source);
 
@@ -789,6 +814,8 @@ static void test_reconnect_subprocess(void)
 wait_for_fds(s);
 wait_for_rings_started(s, 2);
 
+uninit_virtio_dev(s);
+
 qtest_end();
 test_server_free(s);
 return;
@@ -818,6 +845,8 @@ static void test_connect_fail_subprocess(void)
 wait_for_fds(s);
 wait_for_rings_started(s, 2);
 
+uninit_virtio_dev(s);
+
 qtest_end();
 test_server_free(s);
 }
@@ -846,6 +875,8 @@ static void test_flags_mismatch_subprocess(void)
 wait_for_fds(s);
 wait_for_rings_started(s, 2);
 
+uninit_virtio_dev(s);
+
 qtest_end();
 test_server_free(s);
 }
-- 
MST

[Qemu-devel] [PULL 01/33] MAINTAINERS: Add myself as maintainer to X86 machines

2018-01-15 Thread Michael S. Tsirkin

From: Marcel Apfelbaum 

Signed-off-by: Marcel Apfelbaum 
Signed-off-by: Marcel Apfelbaum 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 4770f10..753e799 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -852,6 +852,7 @@ X86 Machines
 
 PC
 M: Michael S. Tsirkin 
+M: Marcel Apfelbaum 
 S: Supported
 F: include/hw/i386/
 F: hw/i386/
-- 
MST

[Qemu-devel] [PULL 07/33] virtio: postpone the execution of event_notifier_cleanup function

2018-01-15 Thread Michael S. Tsirkin

From: Gal Hammer 

Use the EventNotifier's cleanup callback function to execute the
event_notifier_cleanup function after kvm unregistered the eventfd.

This change supports running the virtio_bus_set_host_notifier
function inside a memory region transaction. Otherwise, a closed
fd is sent to kvm, which results in a failure.

Signed-off-by: Gal Hammer 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 accel/kvm/kvm-all.c|  4 
 hw/virtio/virtio-bus.c | 19 +++
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f290f48..071f4f5 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -812,6 +812,10 @@ static void kvm_mem_ioeventfd_del(MemoryListener *listener,
 if (r < 0) {
 abort();
 }
+
+if (e->cleanup) {
+e->cleanup(e);
+}
 }
 
 static void kvm_io_ioeventfd_add(MemoryListener *listener,
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index 3042232..8106346 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -256,6 +256,15 @@ bool virtio_bus_ioeventfd_enabled(VirtioBusState *bus)
 return k->ioeventfd_assign && k->ioeventfd_enabled(proxy);
 }
 
+static void virtio_bus_cleanup_event_notifier(EventNotifier *notifier)
+{
+/* Test and clear notifier after disabling event,
+ * in case poll callback didn't have time to run.
+ */
+virtio_queue_host_notifier_read(notifier);
+event_notifier_cleanup(notifier);
+}
+
 /*
  * This function switches ioeventfd on/off in the device.
  * The caller must set or clear the handlers for the EventNotifier.
@@ -283,19 +292,13 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int 
n, bool assign)
 r = k->ioeventfd_assign(proxy, notifier, n, true);
 if (r < 0) {
 error_report("%s: unable to assign ioeventfd: %d", __func__, r);
-goto cleanup_event_notifier;
+virtio_bus_cleanup_event_notifier(notifier);
 }
-return 0;
 } else {
+notifier->cleanup = virtio_bus_cleanup_event_notifier;
 k->ioeventfd_assign(proxy, notifier, n, false);
 }
 
-cleanup_event_notifier:
-/* Test and clear notifier after disabling event,
- * in case poll callback didn't have time to run.
- */
-virtio_queue_host_notifier_read(notifier);
-event_notifier_cleanup(notifier);
 return r;
 }
 
-- 
MST

[Qemu-devel] [PULL 04/33] contrib/libvhost-user: enable virtio config space messages

2018-01-15 Thread Michael S. Tsirkin

From: Changpeng Liu 

Enable VHOST_USER_GET_CONFIG/VHOST_USER_SET_CONFIG messages in
libvhost-user library, users can implement their own I/O target
based on the library. This enable the virtio config space delivered
between QEMU host device and the I/O target.

Signed-off-by: Changpeng Liu 
Reviewed-by: Marc-André Lureau 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 contrib/libvhost-user/libvhost-user.h | 33 +++
 contrib/libvhost-user/libvhost-user.c | 42 +++
 2 files changed, 75 insertions(+)

diff --git a/contrib/libvhost-user/libvhost-user.h 
b/contrib/libvhost-user/libvhost-user.h
index 2f5864b..f8a730b 100644
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -30,6 +30,16 @@
 
 #define VHOST_MEMORY_MAX_NREGIONS 8
 
+typedef enum VhostSetConfigType {
+VHOST_SET_CONFIG_TYPE_MASTER = 0,
+VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
+} VhostSetConfigType;
+
+/*
+ * Maximum size of virtio device config space
+ */
+#define VHOST_USER_MAX_CONFIG_SIZE 256
+
 enum VhostUserProtocolFeature {
 VHOST_USER_PROTOCOL_F_MQ = 0,
 VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
@@ -69,6 +79,8 @@ typedef enum VhostUserRequest {
 VHOST_USER_SET_SLAVE_REQ_FD = 21,
 VHOST_USER_IOTLB_MSG = 22,
 VHOST_USER_SET_VRING_ENDIAN = 23,
+VHOST_USER_GET_CONFIG = 24,
+VHOST_USER_SET_CONFIG = 25,
 VHOST_USER_MAX
 } VhostUserRequest;
 
@@ -90,6 +102,18 @@ typedef struct VhostUserLog {
 uint64_t mmap_offset;
 } VhostUserLog;
 
+typedef struct VhostUserConfig {
+uint32_t offset;
+uint32_t size;
+uint32_t flags;
+uint8_t region[VHOST_USER_MAX_CONFIG_SIZE];
+} VhostUserConfig;
+
+static VhostUserConfig c __attribute__ ((unused));
+#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \
+   + sizeof(c.size) \
+   + sizeof(c.flags))
+
 #if defined(_WIN32)
 # define VU_PACKED __attribute__((gcc_struct, packed))
 #else
@@ -112,6 +136,7 @@ typedef struct VhostUserMsg {
 struct vhost_vring_addr addr;
 VhostUserMemory memory;
 VhostUserLog log;
+VhostUserConfig config;
 } payload;
 
 int fds[VHOST_MEMORY_MAX_NREGIONS];
@@ -140,6 +165,10 @@ typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg 
*vmsg,
   int *do_reply);
 typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
 typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx);
+typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len);
+typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data,
+ uint32_t offset, uint32_t size,
+ uint32_t flags);
 
 typedef struct VuDevIface {
 /* called by VHOST_USER_GET_FEATURES to get the features bitmask */
@@ -162,6 +191,10 @@ typedef struct VuDevIface {
  * on unmanaged exit/crash.
  */
 vu_queue_is_processed_in_order_cb queue_is_processed_in_order;
+/* get the config space of the device */
+vu_get_config_cb get_config;
+/* set the config space of the device */
+vu_set_config_cb set_config;
 } VuDevIface;
 
 typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
diff --git a/contrib/libvhost-user/libvhost-user.c 
b/contrib/libvhost-user/libvhost-user.c
index f409bd3..27cc597 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -84,6 +84,8 @@ vu_request_to_string(unsigned int req)
 REQ(VHOST_USER_SET_SLAVE_REQ_FD),
 REQ(VHOST_USER_IOTLB_MSG),
 REQ(VHOST_USER_SET_VRING_ENDIAN),
+REQ(VHOST_USER_GET_CONFIG),
+REQ(VHOST_USER_SET_CONFIG),
 REQ(VHOST_USER_MAX),
 };
 #undef REQ
@@ -798,6 +800,42 @@ vu_set_slave_req_fd(VuDev *dev, VhostUserMsg *vmsg)
 }
 
 static bool
+vu_get_config(VuDev *dev, VhostUserMsg *vmsg)
+{
+int ret = -1;
+
+if (dev->iface->get_config) {
+ret = dev->iface->get_config(dev, vmsg->payload.config.region,
+ vmsg->payload.config.size);
+}
+
+if (ret) {
+/* resize to zero to indicate an error to master */
+vmsg->size = 0;
+}
+
+return true;
+}
+
+static bool
+vu_set_config(VuDev *dev, VhostUserMsg *vmsg)
+{
+int ret = -1;
+
+if (dev->iface->set_config) {
+ret = dev->iface->set_config(dev, vmsg->payload.config.region,
+ vmsg->payload.config.offset,
+ vmsg->payload.config.size,
+ vmsg->payload.config.flags);
+if (ret) {
+vu_panic(dev, "Set virtio configuration space failed");
+}
+}
+
+return false;
+}
+
+static bool

[Qemu-devel] [PULL 03/33] vhost-user-blk: introduce a new vhost-user-blk host device

2018-01-15 Thread Michael S. Tsirkin

From: Changpeng Liu 

This commit introduces a new vhost-user device for block, it uses a
chardev to connect with the backend, same with Qemu virito-blk device,
Guest OS still uses the virtio-blk frontend driver.

To use it, start QEMU with command line like this:

qemu-system-x86_64 \
-chardev socket,id=char0,path=/path/vhost.socket \
-device vhost-user-blk-pci,chardev=char0,num-queues=2, \
bootindex=2... \

Users can use different parameters for `num-queues` and `bootindex`.

Different with exist Qemu virtio-blk host device, it makes more easy
for users to implement their own I/O processing logic, such as all
user space I/O stack against hardware block device. It uses the new
vhost messages(VHOST_USER_GET_CONFIG) to get block virtio config
information from backend process.

Signed-off-by: Changpeng Liu 
Reviewed-by: Marc-André Lureau 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 default-configs/pci.mak|   1 +
 hw/virtio/virtio-pci.h |  18 ++
 include/hw/virtio/vhost-user-blk.h |  41 +
 hw/block/vhost-user-blk.c  | 359 +
 hw/virtio/virtio-pci.c |  55 ++
 hw/block/Makefile.objs |   3 +
 6 files changed, 477 insertions(+)
 create mode 100644 include/hw/virtio/vhost-user-blk.h
 create mode 100644 hw/block/vhost-user-blk.c

diff --git a/default-configs/pci.mak b/default-configs/pci.mak
index e514bde..49a0f28 100644
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -43,3 +43,4 @@ CONFIG_VGA_PCI=y
 CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM)
 CONFIG_ROCKER=y
 CONFIG_VHOST_USER_SCSI=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX))
+CONFIG_VHOST_USER_BLK=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX))
diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index 12d3a90..813082b 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -27,6 +27,9 @@
 #include "hw/virtio/virtio-gpu.h"
 #include "hw/virtio/virtio-crypto.h"
 #include "hw/virtio/vhost-user-scsi.h"
+#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
+#include "hw/virtio/vhost-user-blk.h"
+#endif
 
 #ifdef CONFIG_VIRTFS
 #include "hw/9pfs/virtio-9p.h"
@@ -46,6 +49,7 @@ typedef struct VirtIOSerialPCI VirtIOSerialPCI;
 typedef struct VirtIONetPCI VirtIONetPCI;
 typedef struct VHostSCSIPCI VHostSCSIPCI;
 typedef struct VHostUserSCSIPCI VHostUserSCSIPCI;
+typedef struct VHostUserBlkPCI VHostUserBlkPCI;
 typedef struct VirtIORngPCI VirtIORngPCI;
 typedef struct VirtIOInputPCI VirtIOInputPCI;
 typedef struct VirtIOInputHIDPCI VirtIOInputHIDPCI;
@@ -244,6 +248,20 @@ struct VHostUserSCSIPCI {
 VHostUserSCSI vdev;
 };
 
+#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX)
+/*
+ * vhost-user-blk-pci: This extends VirtioPCIProxy.
+ */
+#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci"
+#define VHOST_USER_BLK_PCI(obj) \
+OBJECT_CHECK(VHostUserBlkPCI, (obj), TYPE_VHOST_USER_BLK_PCI)
+
+struct VHostUserBlkPCI {
+VirtIOPCIProxy parent_obj;
+VHostUserBlk vdev;
+};
+#endif
+
 /*
  * virtio-blk-pci: This extends VirtioPCIProxy.
  */
diff --git a/include/hw/virtio/vhost-user-blk.h 
b/include/hw/virtio/vhost-user-blk.h
new file mode 100644
index 000..5804cc9
--- /dev/null
+++ b/include/hw/virtio/vhost-user-blk.h
@@ -0,0 +1,41 @@
+/*
+ * vhost-user-blk host device
+ * Copyright(C) 2017 Intel Corporation.
+ *
+ * Authors:
+ *  Changpeng Liu 
+ *
+ * Based on vhost-scsi.h, Copyright IBM, Corp. 2011
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef VHOST_USER_BLK_H
+#define VHOST_USER_BLK_H
+
+#include "standard-headers/linux/virtio_blk.h"
+#include "qemu-common.h"
+#include "hw/qdev.h"
+#include "hw/block/block.h"
+#include "chardev/char-fe.h"
+#include "hw/virtio/vhost.h"
+
+#define TYPE_VHOST_USER_BLK "vhost-user-blk"
+#define VHOST_USER_BLK(obj) \
+OBJECT_CHECK(VHostUserBlk, (obj), TYPE_VHOST_USER_BLK)
+
+typedef struct VHostUserBlk {
+VirtIODevice parent_obj;
+CharBackend chardev;
+int32_t bootindex;
+struct virtio_blk_config blkcfg;
+uint16_t num_queues;
+uint32_t queue_size;
+uint32_t config_wce;
+uint32_t config_ro;
+struct vhost_dev dev;
+} VHostUserBlk;
+
+#endif
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
new file mode 100644
index 000..b53b4c9
--- /dev/null
+++ b/hw/block/vhost-user-blk.c
@@ -0,0 +1,359 @@
+/*
+ * vhost-user-blk host device
+ *
+ * Copyright(C) 2017 Intel Corporation.
+ *
+ * Authors:
+ *  Changpeng Liu 
+ *
+ * Largely based on the "vhost-user-scsi.c" and "vhost-scsi.c" implemented by:
+ * Felipe Franciosi 
+ * Stefan Hajnoczi 
+ * Nicholas Bellinger

[Qemu-devel] [PULL 00/33] pc, pci, virtio: features, fixes, cleanups

2018-01-15 Thread Michael S. Tsirkin

A large pull due to patch buildup over the holiday period.
Most notable here is probably the addition of vhost-user-blk.

The following changes since commit f5213bd060b460c99e605472b7e03967db43:

  Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20180115' 
into staging (2018-01-15 13:17:47 +)

are available in the git repository at:

  git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

for you to fetch changes up to 631ae6adb1fcea786e0db8c6f1b19570bad0f90c:

  vhost: remove assertion to prevent crash (2018-01-16 06:36:19 +0200)


pc, pci, virtio: features, fixes, cleanups

A bunch of fixes, cleanus and new features all over the place.

Signed-off-by: Michael S. Tsirkin <m...@redhat.com>


Changpeng Liu (4):
  vhost-user: add new vhost user messages to support virtio config space
  vhost-user-blk: introduce a new vhost-user-blk host device
  contrib/libvhost-user: enable virtio config space messages
  contrib/vhost-user-blk: introduce a vhost-user-blk sample application

Dou Liyang (2):
  ACPI/unit-test: Add a testcase for RAM allocation in numa node
  hw/acpi-build: Make next_base easy to follow

Dr. David Alan Gilbert (4):
  vhost: Build temporary section list and deref after commit
  vhost: Move log_dirty check
  vhost: Simplify ring verification checks
  vhost: Merge sections added to temporary list

Gal Hammer (3):
  qemu: add a cleanup callback function to EventNotifier
  virtio: postpone the execution of event_notifier_cleanup function
  virtio: improve virtio devices initialization time

Igor Mammedov (4):
  tests: acpi: move tested tables array allocation outside of 
test_acpi_dsdt_table()
  tests: acpi: init table descriptor in test_dst_table()
  tests: acpi: rename test_acpi_tables()/test_dst_table() to reflect its 
usage
  tests: acpi: add comments to fetch_rsdt_referenced_tables/data->tables 
usage

Jay Zhou (1):
  vhost: remove assertion to prevent crash

Marcel Apfelbaum (2):
  MAINTAINERS: Add myself as maintainer to X86 machines
  hw/pci-bridge: fix QEMU crash because of pcie-root-port

Maxime Coquelin (6):
  vhost-user: fix multiple queue specification
  vhost-user-test: fix features mask
  vhost-user-test: extract read-guest-mem test from main loop
  vhost-user-test: setup virtqueues in all tests
  vhost-user-test: make features mask an init_virtio_dev() argument
  vhost-user-test: use init_virtio_dev in multiqueue test

Michael S. Tsirkin (2):
  vhost-user: factor out msg head and payload
  vhost-user: fix misaligned access to payload

Mohammed Gamal (2):
  x86_iommu: Move machine check to x86_iommu_realize()
  x86_iommu: check if machine has PCI bus

Prasad Singamsetty (2):
  intel-iommu: Redefine macros to enable supporting 48 bit address width
  intel-iommu: Extend address width to 48 bits

Yuval Shaia (1):
  pci/shpc: Move function to generic header file

 docs/interop/vhost-user.txt |  61 +++-
 Makefile|   3 +
 default-configs/pci.mak |   1 +
 contrib/libvhost-user/libvhost-user.h   |  33 ++
 hw/i386/intel_iommu_internal.h  |  43 ++-
 hw/virtio/virtio-pci.h  |  18 ++
 include/hw/i386/intel_iommu.h   |   7 +-
 include/hw/virtio/vhost-backend.h   |  12 +
 include/hw/virtio/vhost-user-blk.h  |  41 +++
 include/hw/virtio/vhost.h   |  17 +
 include/qemu/event_notifier.h   |   1 +
 include/qemu/host-utils.h   |  10 +
 accel/kvm/kvm-all.c |   4 +
 contrib/libvhost-user/libvhost-user.c   |  42 +++
 contrib/vhost-user-blk/vhost-user-blk.c | 545 
 hw/block/vhost-user-blk.c   | 359 +
 hw/i386/acpi-build.c|   5 +-
 hw/i386/amd_iommu.c |  13 +-
 hw/i386/intel_iommu.c   | 136 
 hw/i386/x86-iommu.c |  13 +
 hw/pci-bridge/gen_pcie_root_port.c  |   7 +-
 hw/pci/shpc.c   |  13 +-
 hw/virtio/vhost-user.c  | 318 +--
 hw/virtio/vhost.c   | 250 +++
 hw/virtio/virtio-bus.c  |  19 +-
 hw/virtio/virtio-pci.c  |  55 
 hw/virtio/virtio.c  |   5 +
 tests/bios-tables-test.c|  50 ++-
 tests/vhost-user-test.c | 171 +-
 util/event_notifier-posix.c |   5 +-
 util/event_notifier-win32.c |   2 +
 .gitignore  |   1 +
 MAINTAINERS |   1 +
 Makefile.objs   |   1 +
 contrib/vhost-user-blk/Makefile.objs|   1 +
 hw/block/Make

Re: [Qemu-devel] [PATCH v4 08/14] i.MX: Add implementation of i.MX7 GPR IP block

2018-01-15 Thread Philippe Mathieu-Daudé

On 01/15/2018 10:37 PM, Andrey Smirnov wrote:
> Add minimal code needed to allow upstream Linux guest to boot.
> 
> Cc: Peter Maydell 
> Cc: Jason Wang 
> Cc: Philippe Mathieu-Daudé 
> Cc: qemu-devel@nongnu.org
> Cc: qemu-...@nongnu.org
> Cc: yurov...@gmail.com
> Signed-off-by: Andrey Smirnov 
> ---
>  hw/misc/Makefile.objs  |   1 +
>  hw/misc/imx7_gpr.c | 119 
> +
>  include/hw/misc/imx7_gpr.h |  28 +++
>  3 files changed, 148 insertions(+)
>  create mode 100644 hw/misc/imx7_gpr.c
>  create mode 100644 include/hw/misc/imx7_gpr.h
> 
> diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
> index 019886912c..fce426eb75 100644
> --- a/hw/misc/Makefile.objs
> +++ b/hw/misc/Makefile.objs
> @@ -36,6 +36,7 @@ obj-$(CONFIG_IMX) += imx6_src.o
>  obj-$(CONFIG_IMX) += imx7_ccm.o
>  obj-$(CONFIG_IMX) += imx2_wdt.o
>  obj-$(CONFIG_IMX) += imx7_snvs.o
> +obj-$(CONFIG_IMX) += imx7_gpr.o
>  obj-$(CONFIG_MILKYMIST) += milkymist-hpdmc.o
>  obj-$(CONFIG_MILKYMIST) += milkymist-pfpu.o
>  obj-$(CONFIG_MAINSTONE) += mst_fpga.o
> diff --git a/hw/misc/imx7_gpr.c b/hw/misc/imx7_gpr.c
> new file mode 100644
> index 00..9e8ccea9e8
> --- /dev/null
> +++ b/hw/misc/imx7_gpr.c
> @@ -0,0 +1,119 @@
> +/*
> + * Copyright (c) 2017, Impinj, Inc.
> + *
> + * i.MX7 GPR IP block emulation code
> + *
> + * Author: Andrey Smirnov 
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + * Bare minimum emulation code needed to support being able to shut
> + * down linux guest gracefully.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "hw/misc/imx7_gpr.h"
> +#include "qemu/log.h"
> +#include "sysemu/sysemu.h"
> +
> +enum IMX7GPRRegisters {
> +IOMUXC_GPR0  = 0x00,
> +IOMUXC_GPR1  = 0x04,
> +IOMUXC_GPR2  = 0x08,
> +IOMUXC_GPR3  = 0x0c,
> +IOMUXC_GPR4  = 0x10,
> +IOMUXC_GPR5  = 0x14,
> +IOMUXC_GPR6  = 0x18,
> +IOMUXC_GPR7  = 0x1c,
> +IOMUXC_GPR8  = 0x20,
> +IOMUXC_GPR9  = 0x24,
> +IOMUXC_GPR10 = 0x28,
> +IOMUXC_GPR11 = 0x2c,
> +IOMUXC_GPR12 = 0x30,
> +IOMUXC_GPR13 = 0x34,
> +IOMUXC_GPR14 = 0x38,
> +IOMUXC_GPR15 = 0x3c,
> +IOMUXC_GPR16 = 0x40,
> +IOMUXC_GPR17 = 0x44,
> +IOMUXC_GPR18 = 0x48,
> +IOMUXC_GPR19 = 0x4c,
> +IOMUXC_GPR20 = 0x50,
> +IOMUXC_GPR21 = 0x54,
> +IOMUXC_GPR22 = 0x58,
> +};
> +
> +#define IMX7D_GPR1_IRQ_MASK BIT(12)
> +#define IMX7D_GPR1_ENET1_TX_CLK_SEL_MASKBIT(13)
> +#define IMX7D_GPR1_ENET2_TX_CLK_SEL_MASKBIT(14)
> +#define IMX7D_GPR1_ENET_TX_CLK_SEL_MASK (0x3 << 13)
> +#define IMX7D_GPR1_ENET1_CLK_DIR_MASK   BIT(17)
> +#define IMX7D_GPR1_ENET2_CLK_DIR_MASK   BIT(18)
> +#define IMX7D_GPR1_ENET_CLK_DIR_MASK(0x3 << 17)
> +
> +#define IMX7D_GPR5_CSI_MUX_CONTROL_MIPI BIT(4)
> +#define IMX7D_GPR12_PCIE_PHY_REFCLK_SEL BIT(5)
> +#define IMX7D_GPR22_PCIE_PHY_PLL_LOCKED BIT(31)
> +
> +
> +static uint64_t imx7_gpr_read(void *opaque, hwaddr offset, unsigned size)
> +{
> +if (offset == IOMUXC_GPR22) {
> +return IMX7D_GPR22_PCIE_PHY_PLL_LOCKED;
> +}
> +
> +return 0;
> +}
> +
> +static void imx7_gpr_write(void *opaque, hwaddr offset,
> +   uint64_t v, unsigned size)
> +{

If you ever respin, please add a trace point here (just copy/paste from
another file from the same directory), and in the read() function.

Linux will evolve and use more registers from this device (and the other
devices you are modelling), and a Linux driver busy loop is likely to
hang QEMU. A trace event will ease your board next user soon :)

That said,

Reviewed-by: Philippe Mathieu-Daudé 

> +}
> +
> +static const struct MemoryRegionOps imx7_gpr_ops = {
> +.read = imx7_gpr_read,
> +.write = imx7_gpr_write,
> +.endianness = DEVICE_NATIVE_ENDIAN,
> +.impl = {
> +/*
> + * Our device would not work correctly if the guest was doing
> + * unaligned access. This might not be a limitation on the
> + * real device but in practice there is no reason for a guest
> + * to access this device unaligned.
> + */
> +.min_access_size = 4,
> +.max_access_size = 4,
> +.unaligned = false,
> +},
> +};
> +
> +static void imx7_gpr_init(Object *obj)
> +{
> +SysBusDevice *sd = SYS_BUS_DEVICE(obj);
> +IMX7GPRState *s = IMX7_GPR(obj);
> +
> +memory_region_init_io(>mmio, obj, _gpr_ops, s,
> +  TYPE_IMX7_GPR, 64 * 1024);
> +sysbus_init_mmio(sd, >mmio);
> +}
> +
> +static void imx7_gpr_class_init(ObjectClass *klass, void *data)
> +{
> +DeviceClass *dc = DEVICE_CLASS(klass);
> +
> +dc->desc  = "i.MX7 General Purpose Registers Module";
> +}
> +
> +static const TypeInfo

Re: [Qemu-devel] [PATCH v6 0/3] vhost: two fixes and used_memslots refactoring

2018-01-15 Thread Michael S. Tsirkin

On Fri, Jan 12, 2018 at 10:47:56AM +0800, Jay Zhou wrote:
> Jay Zhou (3):
>   vhost: remove assertion to prevent crash
>   vhost: fix memslot limit check
>   vhost: used_memslots refactoring

This looks good to me, but needs to be rebased on top
of vhost mem slot management refactoring by dgilbert is
merged. Pls post the rebase, and I'll merge.

Thanks!



>  hw/virtio/vhost-backend.c | 15 +++-
>  hw/virtio/vhost-user.c| 74 
> +++
>  hw/virtio/vhost.c | 30 +---
>  include/hw/virtio/vhost-backend.h |  6 ++--
>  4 files changed, 86 insertions(+), 39 deletions(-)
> 
> --
> 1.8.3.1
>

Re: [Qemu-devel] [PATCH v4 07/14] i.MX: Add i.MX7 GPT variant

2018-01-15 Thread Philippe Mathieu-Daudé

On 01/15/2018 10:37 PM, Andrey Smirnov wrote:
> Add minimal code needed to allow upstream Linux guest to boot.
> 
> Cc: Peter Maydell 
> Cc: Jason Wang 
> Cc: Philippe Mathieu-Daudé 
> Cc: qemu-devel@nongnu.org
> Cc: qemu-...@nongnu.org
> Cc: yurov...@gmail.com
> Signed-off-by: Andrey Smirnov 
> ---
>  hw/timer/imx_gpt.c | 25 +
>  include/hw/timer/imx_gpt.h |  1 +
>  2 files changed, 26 insertions(+)
> 
> diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c
> index 4b9b54bf2e..65e4ee6bcf 100644
> --- a/hw/timer/imx_gpt.c
> +++ b/hw/timer/imx_gpt.c
> @@ -113,6 +113,17 @@ static const IMXClk imx6_gpt_clocks[] = {
>  CLK_HIGH,  /* 111 reference clock */
>  };
>  
> +static const IMXClk imx7_gpt_clocks[] = {
> +CLK_NONE,  /* 000 No clock source */
> +CLK_IPG,   /* 001 ipg_clk, 532MHz*/
> +CLK_IPG_HIGH,  /* 010 ipg_clk_highfreq */
> +CLK_EXT,   /* 011 External clock */
> +CLK_32k,   /* 100 ipg_clk_32k */
> +CLK_HIGH,  /* 101 reference clock */
> +CLK_NONE,  /* 110 not defined */
> +CLK_NONE,  /* 111 not defined */
> +};
> +
>  static void imx_gpt_set_freq(IMXGPTState *s)
>  {
>  uint32_t clksrc = extract32(s->cr, GPT_CR_CLKSRC_SHIFT, 3);
> @@ -512,6 +523,13 @@ static void imx6_gpt_init(Object *obj)
>  s->clocks = imx6_gpt_clocks;
>  }
>  
> +static void imx7_gpt_init(Object *obj)
> +{
> +IMXGPTState *s = IMX_GPT(obj);
> +
> +s->clocks = imx7_gpt_clocks;
> +}
> +
>  static const TypeInfo imx25_gpt_info = {
>  .name = TYPE_IMX25_GPT,
>  .parent = TYPE_SYS_BUS_DEVICE,
> @@ -532,11 +550,18 @@ static const TypeInfo imx6_gpt_info = {
>  .instance_init = imx6_gpt_init,
>  };
>  
> +static const TypeInfo imx7_gpt_info = {
> +.name = TYPE_IMX7_GPT,
> +.parent = TYPE_IMX25_GPT,

ideally there should be an abstract TYPE_IMX_GPT you'd use as parent
rather than the TYPE_IMX25_GPT, anyway out of the scope of this series.

Reviewed-by: Philippe Mathieu-Daudé 

> +.instance_init = imx7_gpt_init,
> +};
> +
>  static void imx_gpt_register_types(void)
>  {
>  type_register_static(_gpt_info);
>  type_register_static(_gpt_info);
>  type_register_static(_gpt_info);
> +type_register_static(_gpt_info);
>  }
>  
>  type_init(imx_gpt_register_types)
> diff --git a/include/hw/timer/imx_gpt.h b/include/hw/timer/imx_gpt.h
> index eac59b2a70..20ccb327c4 100644
> --- a/include/hw/timer/imx_gpt.h
> +++ b/include/hw/timer/imx_gpt.h
> @@ -77,6 +77,7 @@
>  #define TYPE_IMX25_GPT "imx25.gpt"
>  #define TYPE_IMX31_GPT "imx31.gpt"
>  #define TYPE_IMX6_GPT "imx6.gpt"
> +#define TYPE_IMX7_GPT "imx7.gpt"
>  
>  #define TYPE_IMX_GPT TYPE_IMX25_GPT
>  
>

Re: [Qemu-devel] [PATCH v4 11/14] ARM: Add basic code to emulate A7MPCore DAP block

2018-01-15 Thread Philippe Mathieu-Daudé

Hi Andrey,

On 01/15/2018 10:37 PM, Andrey Smirnov wrote:
> Add minimal code to emulate A7MPCore DAP block needed to boot Linux
> guest.

I was not aware the DAP is accessed by upstream Linux...

You sure this isn't rather part of some bootloader built-in self-test?

> Cc: Peter Maydell 
> Cc: Jason Wang 
> Cc: Philippe Mathieu-Daudé 
> Cc: qemu-devel@nongnu.org
> Cc: qemu-...@nongnu.org
> Cc: yurov...@gmail.com
> Signed-off-by: Andrey Smirnov 
> ---
>  hw/arm/Makefile.objs   |   2 +-
>  hw/arm/coresight.c | 120 
> +
>  include/hw/arm/coresight.h |  24 +
>  3 files changed, 145 insertions(+), 1 deletion(-)
>  create mode 100644 hw/arm/coresight.c
>  create mode 100644 include/hw/arm/coresight.h
> 
> diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
> index 2794e086d6..692216e0cf 100644
> --- a/hw/arm/Makefile.objs
> +++ b/hw/arm/Makefile.objs
> @@ -1,4 +1,4 @@
> -obj-y += boot.o collie.o exynos4_boards.o gumstix.o highbank.o
> +obj-y += boot.o collie.o exynos4_boards.o gumstix.o highbank.o coresight.o
>  obj-$(CONFIG_DIGIC) += digic_boards.o
>  obj-y += integratorcp.o mainstone.o musicpal.o nseries.o
>  obj-y += omap_sx1.o palm.o realview.o spitz.o stellaris.o
> diff --git a/hw/arm/coresight.c b/hw/arm/coresight.c
> new file mode 100644
> index 00..d0a8c1b005
> --- /dev/null
> +++ b/hw/arm/coresight.c
> @@ -0,0 +1,120 @@
> +/*
> + * Copyright (c) 2017, Impinj, Inc.
> + *
> + * CoreSight block emulation code
> + *
> + * Author: Andrey Smirnov 
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "hw/arm/coresight.h"
> +#include "qemu/log.h"
> +
> +static uint64_t coresight_read(void *opaque, hwaddr offset,
> +   unsigned size)
> +{
> +return 0;
> +}
> +
> +static void coresight_write(void *opaque, hwaddr offset,
> +uint64_t value, unsigned size)
> +{
> +}

I assume you had to add this to bypass the memory_transaction_failures
check.

> +
> +static const struct MemoryRegionOps coresight_ops = {
> +.read = coresight_read,
> +.write = coresight_write,
> +.endianness = DEVICE_NATIVE_ENDIAN,
> +.impl = {
> +/*
> + * Our device would not work correctly if the guest was doing
> + * unaligned access. This might not be a limitation on the real
> + * device but in practice there is no reason for a guest to access
> + * this device unaligned.
> + */
> +.min_access_size = 4,
> +.max_access_size = 4,
> +.unaligned = false,
> +},
> +};
> +
> +static void a7mpcore_dap_init(Object *obj)
> +{
> +SysBusDevice *sd = SYS_BUS_DEVICE(obj);
> +A7MPCoreDAPState *s = A7MPCORE_DAP(obj);
> +
> +memory_region_init(>container, obj, "a7mpcore-dap-container", 
> 0x10);

You can just use add this in fsl_imx7_realize():

  create_unimplemented_device("a7mpcore-dap-container",
  FSL_IMX7_A7MPCORE_DAP_ADDR,
  0x10);

to register a background region for the DAP (see "hw/misc/unimp.h")

as a bonus, running with "-d unimp" you can trace the DAP access.

So this model and those files are not necessary.

> +sysbus_init_mmio(sd, >container);
> +
> +memory_region_init_io(>ca7_atb_funnel,
> +  obj,
> +  _ops,
> +  s,
> +  TYPE_A7MPCORE_DAP ".ca7-atb-funnel",
> +  0x1000);
> +memory_region_add_subregion(>container, 0x41000, >ca7_atb_funnel);
> +
> +memory_region_init_io(>cpu0_etm,
> +  obj,
> +  _ops,
> +  s,
> +  TYPE_A7MPCORE_DAP ".cpu0-etm",
> +  0x1000);
> +memory_region_add_subregion(>container, 0x7C000, >cpu0_etm);
> +
> +memory_region_init_io(>atb_funnel,
> +  obj,
> +  _ops,
> +  s,
> +  TYPE_A7MPCORE_DAP ".atb-funnel",
> +  0x1000);
> +memory_region_add_subregion(>container, 0x83000, >atb_funnel);
> +
> +memory_region_init_io(>tmc_etb,
> +  obj,
> +  _ops,
> +  s,
> +  TYPE_A7MPCORE_DAP ".tmc-etb",
> +  0x1000);
> +memory_region_add_subregion(>container, 0x84000, >tmc_etb);
> +
> +memory_region_init_io(>tmc_etr,
> +  obj,
> +  _ops,
> +  s,
> +  TYPE_A7MPCORE_DAP ".tmc-etr",
> +

Re: [Qemu-devel] [PATCH v2 3/4] cryptodev-vhost-user: add crypto session handler

2018-01-15 Thread Michael S. Tsirkin

On Sat, Dec 30, 2017 at 04:52:12PM +0800, Jay Zhou wrote:
> diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
> index 954771d..f43c63d 100644
> --- a/docs/interop/vhost-user.txt
> +++ b/docs/interop/vhost-user.txt
> @@ -596,6 +596,25 @@ Master message types
>and expect this message once (per VQ) during device configuration
>(ie. before the master starts the VQ).
>  
> + * VHOST_USER_CREATE_CRYPTO_SESSION
> +
> +  Id: 23
> +  Equivalent ioctl: N/A
> +  Master payload: crypto session description
> +  Slave payload: crypto session description
> +
> +  Create a session for crypto operation. The server side must return the
> +  session id, 0 or positive for success, negative for failure.
> +
> + * VHOST_USER_CLOSE_CRYPTO_SESSION
> +
> +  Id: 24
> +  Equivalent ioctl: N/A
> +  Master payload: u64
> +
> +  Close a session for crypto operation which was previously
> +  created by VHOST_USER_CREATE_CRYPTO_SESSION.
> +
>  Slave message types
>  ---
>  


Sorry about a delayed response.
So an issue with this patchset is that you are adding
new messages unconditionally.

You must add a protocol bit whenever you add new messages.
If appropriate, you can document that it's a required feature for crypto
devices.


-- 
MST

Re: [Qemu-devel] [PATCH 5/5] tests: acpi: fix FADT not being compared to reference table

2018-01-15 Thread Michael S. Tsirkin

On Fri, Dec 29, 2017 at 04:16:42PM +0100, Igor Mammedov wrote:
> It turns out that FADT isn't actually tested for changes
> against reference table, since it happens to be the 1st
> table in RSDT which is currently ignored.
> Fix it by making sure that all tables from RSDT are added
> to test list.
> 
> Signed-off-by: Igor Mammedov 

This was intentional, wasn't it?
The reason IIRC was that FADT includes things like the DSDT
address which can change at any time.

So I think we'll have to tweak the FADT to compare it.

E.g. replace any non-zero pointer with a known pattern,
and fix up the checksum.

What do you think?

> ---
>  tests/bios-tables-test.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
> index 81c558e..c5dccdb 100644
> --- a/tests/bios-tables-test.c
> +++ b/tests/bios-tables-test.c
> @@ -248,14 +248,14 @@ static void test_acpi_dsdt_table(test_data *data)
>  /* Load all tables and add to test list directly RSDT referenced tables */
>  static void fetch_rsdt_referenced_tables(test_data *data)
>  {
> -int tables_nr = data->rsdt_tables_nr - 1; /* fadt is first */
> +int tables_nr = data->rsdt_tables_nr;
>  int i;
>  
>  for (i = 0; i < tables_nr; i++) {
>  AcpiSdtTable ssdt_table;
>  uint32_t addr;
>  
> -addr = le32_to_cpu(data->rsdt_tables_addr[i + 1]); /* fadt is first 
> */
> +addr = le32_to_cpu(data->rsdt_tables_addr[i]);
>  fetch_table(_table, addr);
>  
>  /* Add table to ASL test tables list */
> -- 
> 2.7.4

Re: [Qemu-devel] [PATCH v9 00/26] tcg: generic vector operations

2018-01-15 Thread no-reply

Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20180116033404.31532-1-richard.hender...@linaro.org
Subject: [Qemu-devel] [PATCH v9 00/26] tcg: generic vector operations

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 t [tag update]patchew/20180106153730.30313-1-hpous...@reactos.org 
-> patchew/20180106153730.30313-1-hpous...@reactos.org
 * [new tag]   
patchew/20180116033404.31532-1-richard.hender...@linaro.org -> 
patchew/20180116033404.31532-1-richard.hender...@linaro.org
Switched to a new branch 'test'
318acc661a tcg/aarch64: Add vector operations
43511156d7 tcg/i386: Add vector operations
c29ad91d5c target/arm: Use vector infrastructure for aa64 orr/bic immediate
083b52d1fc target/arm: Use vector infrastructure for aa64 widening shifts
d6371398a3 target/arm: Use vector infrastructure for aa64 multiplies
25df8a60db target/arm: Use vector infrastructure for aa64 compares
57c1ab2b50 target/arm: Use vector infrastructure for aa64 constant shifts
8bfefd6b52 target/arm: Use vector infrastructure for aa64 zip/uzp/trn/xtn
ad7234c694 target/arm: Use vector infrastructure for aa64 dup/movi
6f14f6cfe8 target/arm: Use vector infrastructure for aa64 mov/not/neg
d97b762c05 target/arm: Use vector infrastructure for aa64 add/sub/logic
1ddbc6628e target/arm: Align vector registers
d7a8026fd9 tcg/optimize: Handle vector opcodes during optimize
fbeeb215e8 tcg: Add generic vector helpers with a scalar variable operand
5155c310c9 tcg: Add generic vector helpers with a scalar immediate operand
e13611c9d9 tcg: Loosen vec_gen_op* typecheck rules
03086dbc06 tcg: Add generic helpers for saturating arithmetic
ef1031a706 tcg: Add generic vector ops for extension
1629aea426 tcg: Add generic vector ops for multiplication
9788bdeec6 tcg: Add generic vector ops for comparisons
58c28e1fce tcg: Add generic vector ops for constant shifts
cc69b266cc tcg: Add generic vector ops for interleave
7c955c796b tcg: Add generic vector expanders
62b38cf8bf tcg: Standardize integral arguments to expanders
fbdd0d55ff tcg: Add types and basic operations for host vectors
3d46adc466 tcg: Allow multiple word entries into the constant pool

=== OUTPUT BEGIN ===
Checking PATCH 1/26: tcg: Allow multiple word entries into the constant pool...
ERROR: spaces prohibited around that ':' (ctx:WxW)
#23: FILE: tcg/tcg-pool.inc.c:26:
+int addend  : 32;
 ^

ERROR: spaces prohibited around that ':' (ctx:WxW)
#24: FILE: tcg/tcg-pool.inc.c:27:
+int rtype   : 16;
 ^

ERROR: spaces prohibited around that ':' (ctx:WxW)
#25: FILE: tcg/tcg-pool.inc.c:28:
+int nlong   : 16;
 ^

total: 3 errors, 0 warnings, 156 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 2/26: tcg: Add types and basic operations for host vectors...
ERROR: externs should be avoided in .c files
#137: FILE: tcg/tcg-op-vec.c:37:
+extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);

ERROR: externs should be avoided in .c files
#138: FILE: tcg/tcg-op-vec.c:38:
+extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);

ERROR: Macros with complex values should be enclosed in parenthesis
#535: FILE: tcg/tcg-opc.h:209:
+#define IMPLVEC  TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)

WARNING: line over 80 characters
#538: FILE: tcg/tcg-opc.h:212:
+DEF(movi_vec, 1, 0, 0, TCG_OPF_VECTOR | TCG_OPF_NOT_PRESENT) /* vecl defines 
const args */

ERROR: Macros with complex values should be enclosed in parenthesis
#800: FILE: tcg/tcg.h:621:
+#define TCGOP_VECL(X) (X)->param1

ERROR: Macros with complex values should be enclosed in parenthesis
#801: FILE: tcg/tcg.h:622:
+#define TCGOP_VECE(X) (X)->param2

total: 5 errors, 1 warnings, 807 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 3/26: tcg: Standardize integral arguments to expanders...
Checking PATCH 4/26: tcg: Add generic vector expanders...
ERROR: spaces required around that '&' (ctx:WxO)
#320: FILE: accel/tcg/tcg-runtime-gvec.c:281:
+*(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);
   ^

ERROR: space prohibited after that '~' (ctx:OxW)
#320: FILE:

[Qemu-devel] [PATCH v2 2/4] tcg/arm: Support tlb offsets larger than 64k

2018-01-15 Thread Richard Henderson

AArch64 with SVE has an offset of 80k to the 8th TLB.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.inc.c | 30 +-
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 0ff283d84f..8f5d4f208d 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -1246,12 +1246,6 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg 
argreg,
 /* We're expecting to use an 8-bit immediate and to mask.  */
 QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
 
-/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
-   Using the offset of the second entry in the last tlb table ensures
-   that we can index all of the elements of the first entry.  */
-QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
-  > 0x);
-
 /* Load and compare a TLB entry, leaving the flags set.  Returns the register
containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
 
@@ -1264,6 +1258,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
addrlo, TCGReg addrhi,
  ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
  : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
 int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+int mask_off;
 unsigned s_bits = opc & MO_SIZE;
 unsigned a_bits = get_alignment_bits(opc);
 
@@ -1295,16 +1290,25 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
addrlo, TCGReg addrhi,
 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
 }
 
-/* We checked that the offset is contained within 16 bits above.  */
-if (add_off > 0xfff
-|| (use_armv6_instructions && TARGET_LONG_BITS == 64
-&& cmp_off > 0xff)) {
+/* Add portions of the offset until the memory access is in range.
+ * If we plan on using ldrd, reduce to an 8-bit offset; otherwise
+ * we can use a 12-bit offset.  */
+if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
+mask_off = 0xff;
+} else {
+mask_off = 0xfff;
+}
+while (cmp_off > mask_off) {
+int shift = ctz32(cmp_off & ~mask_off) & ~1;
+int rot = ((32 - shift) << 7) & 0xf00;
+int addend = cmp_off & (0xff << shift);
 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
-(24 << 7) | (cmp_off >> 8));
+rot | ((cmp_off >> shift) & 0xff));
 base = TCG_REG_R2;
-add_off -= cmp_off & 0xff00;
-cmp_off &= 0xff;
+add_off -= addend;
+cmp_off -= addend;
 }
+
 if (!use_armv7_instructions) {
 tcg_out_dat_imm(s, COND_AL, ARITH_AND,
 TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
-- 
2.14.3

[Qemu-devel] [PATCH v9 23/26] target/arm: Use vector infrastructure for aa64 widening shifts

2018-01-15 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c | 29 -
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 48caba3d9f..4f15e58556 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -8705,12 +8705,7 @@ static void handle_vec_simd_wshli(DisasContext *s, bool 
is_q, bool is_u,
 int size = 32 - clz32(immh) - 1;
 int immhb = immh << 3 | immb;
 int shift = immhb - (8 << size);
-int dsize = 64;
-int esize = 8 << size;
-int elements = dsize/esize;
-TCGv_i64 tcg_rn = new_tmp_a64(s);
-TCGv_i64 tcg_rd = new_tmp_a64(s);
-int i;
+GVecGen2Fn *gvec_fn;
 
 if (size >= 3) {
 unallocated_encoding(s);
@@ -8721,18 +8716,18 @@ static void handle_vec_simd_wshli(DisasContext *s, bool 
is_q, bool is_u,
 return;
 }
 
-/* For the LL variants the store is larger than the load,
- * so if rd == rn we would overwrite parts of our input.
- * So load everything right now and use shifts in the main loop.
- */
-read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
-
-for (i = 0; i < elements; i++) {
-tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
-ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
-tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
-write_vec_element(s, tcg_rd, rd, i, size + 1);
+if (is_u) {
+gvec_fn = is_q ? tcg_gen_gvec_extuh : tcg_gen_gvec_extul;
+} else {
+gvec_fn = is_q ? tcg_gen_gvec_extsh : tcg_gen_gvec_extsl;
 }
+gvec_fn(size, vec_full_reg_offset(s, rd),
+vec_full_reg_offset(s, rn), 16, 16);
+
+/* Perform the shift in the wider format.  */
+tcg_gen_gvec_shli(size + 1, vec_full_reg_offset(s, rd),
+  vec_full_reg_offset(s, rd),
+  16, vec_full_reg_size(s), shift);
 }
 
 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
-- 
2.14.3

[Qemu-devel] [PATCH v2 0/4] tcg/arm fixes

2018-01-15 Thread Richard Henderson

Changes since v1:

Patch 1:
  * Use rsb/rsc for gt/le.  Now we always use "rI" as a constraint,
so we never have to load immediates into registers by hand.

Patch 2:
  * We can't use add_ofs in the while condition and cmp_ofs in the
reduction and converge; use cmp_ofs always.  Fixes the sparc64
regression that Peter noticed.


r~


Richard Henderson (4):
  tcg/arm: Fix double-word comparisons
  tcg/arm: Support tlb offsets larger than 64k

[Qemu-devel] [PATCH v2 1/4] tcg/arm: Fix double-word comparisons

2018-01-15 Thread Richard Henderson

The code sequence we were generating was only good for unsigned
comparisons.  For signed comparisions, use the sequence from gcc.

Fixes booting of ppc64 firmware, with a patch changing the code
sequence for ppc comparisons.

Tested-by: Michael Roth 
Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.inc.c | 97 
 1 file changed, 65 insertions(+), 32 deletions(-)

diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 98a12535a5..0ff283d84f 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -239,10 +239,10 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
 }
 }
 
-#define TCG_CT_CONST_ARM  0x100
-#define TCG_CT_CONST_INV  0x200
-#define TCG_CT_CONST_NEG  0x400
-#define TCG_CT_CONST_ZERO 0x800
+#define TCG_CT_CONST_ARM 0x0100
+#define TCG_CT_CONST_INV 0x0200
+#define TCG_CT_CONST_NEG 0x0400
+#define TCG_CT_CONST_ZERO0x1000
 
 /* parse target specific constraints */
 static const char *target_parse_constraint(TCGArgConstraint *ct,
@@ -351,8 +351,7 @@ static inline int check_fit_imm(uint32_t imm)
 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
  const TCGArgConstraint *arg_ct)
 {
-int ct;
-ct = arg_ct->ct;
+int ct = arg_ct->ct;
 if (ct & TCG_CT_CONST) {
 return 1;
 } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
@@ -1103,6 +1102,56 @@ static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
 }
 }
 
+static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args,
+const int *const_args)
+{
+TCGReg al = args[0];
+TCGReg ah = args[1];
+TCGArg bl = args[2];
+TCGArg bh = args[3];
+TCGCond cond = args[4];
+int const_bl = const_args[2];
+int const_bh = const_args[3];
+
+switch (cond) {
+case TCG_COND_EQ:
+case TCG_COND_NE:
+case TCG_COND_LTU:
+case TCG_COND_LEU:
+case TCG_COND_GTU:
+case TCG_COND_GEU:
+/* We perform a conditional comparision.  If the high half is
+   equal, then overwrite the flags with the comparison of the
+   low half.  The resulting flags cover the whole.  */
+tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, ah, bh, const_bh);
+tcg_out_dat_rI(s, COND_EQ, ARITH_CMP, 0, al, bl, const_bl);
+return cond;
+
+case TCG_COND_LT:
+case TCG_COND_GE:
+/* We perform a double-word subtraction and examine the result.
+   We do not actually need the result of the subtract, so the
+   low part "subtract" is a compare.  For the high half we have
+   no choice but to compute into a temporary.  */
+tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, al, bl, const_bl);
+tcg_out_dat_rI(s, COND_AL, ARITH_SBC | TO_CPSR,
+   TCG_REG_TMP, ah, bh, const_bh);
+return cond;
+
+case TCG_COND_LE:
+case TCG_COND_GT:
+/* Similar, but with swapped arguments, via reversed subtract.  */
+tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR,
+   TCG_REG_TMP, al, bl, const_bl);
+tcg_out_dat_rI(s, COND_AL, ARITH_RSC | TO_CPSR,
+TCG_REG_TMP, ah, bh, const_bh);
+return tcg_swap_cond(cond);
+
+default:
+g_assert_not_reached();
+}
+}
+
 #ifdef CONFIG_SOFTMMU
 #include "tcg-ldst.inc.c"
 
@@ -1964,22 +2013,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
arg_label(args[3]));
 break;
-case INDEX_op_brcond2_i32:
-/* The resulting conditions are:
- * TCG_COND_EQ-->  a0 == a2 && a1 == a3,
- * TCG_COND_NE--> (a0 != a2 && a1 == a3) ||  a1 != a3,
- * TCG_COND_LT(U) --> (a0 <  a2 && a1 == a3) ||  a1 <  a3,
- * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
- * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
- * TCG_COND_GT(U) --> (a0 >  a2 && a1 == a3) ||  a1 >  a3,
- */
-tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
-args[1], args[3], const_args[3]);
-tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
-args[0], args[2], const_args[2]);
-tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]],
-   arg_label(args[5]));
-break;
 case INDEX_op_setcond_i32:
 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
 args[1], args[2], const_args[2]);
@@ -1988,15 +2021,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
 ARITH_MOV, args[0], 0, 0);
 break;
+
+case INDEX_op_brcond2_i32:
+c =

[Qemu-devel] [PATCH v9 20/26] target/arm: Use vector infrastructure for aa64 constant shifts

2018-01-15 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c | 386 ++---
 1 file changed, 329 insertions(+), 57 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 8769b4505a..d8bb3bbb25 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -6432,17 +6432,6 @@ static void handle_shri_with_rndacc(TCGv_i64 tcg_res, 
TCGv_i64 tcg_src,
 }
 }
 
-/* Common SHL/SLI - Shift left with an optional insert */
-static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
- bool insert, int shift)
-{
-if (insert) { /* SLI */
-tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
-} else { /* SHL */
-tcg_gen_shli_i64(tcg_res, tcg_src, shift);
-}
-}
-
 /* SRI: shift right with insert */
 static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
  int size, int shift)
@@ -6546,7 +6535,11 @@ static void handle_scalar_simd_shli(DisasContext *s, 
bool insert,
 tcg_rn = read_fp_dreg(s, rn);
 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
 
-handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
+if (insert) {
+tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
+} else {
+tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
+}
 
 write_fp_dreg(s, rd, tcg_rd);
 
@@ -8283,16 +8276,195 @@ static void 
disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
 }
 }
 
+static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+tcg_gen_vec_sar8i_i64(a, a, shift);
+tcg_gen_vec_add8_i64(d, d, a);
+}
+
+static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+tcg_gen_vec_sar16i_i64(a, a, shift);
+tcg_gen_vec_add16_i64(d, d, a);
+}
+
+static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+tcg_gen_sari_i32(a, a, shift);
+tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+tcg_gen_sari_i64(a, a, shift);
+tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+tcg_gen_sari_vec(vece, a, a, sh);
+tcg_gen_add_vec(vece, d, d, a);
+}
+
+static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+tcg_gen_vec_shr8i_i64(a, a, shift);
+tcg_gen_vec_add8_i64(d, d, a);
+}
+
+static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+tcg_gen_vec_shr16i_i64(a, a, shift);
+tcg_gen_vec_add16_i64(d, d, a);
+}
+
+static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+tcg_gen_shri_i32(a, a, shift);
+tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+tcg_gen_shri_i64(a, a, shift);
+tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+tcg_gen_shri_vec(vece, a, a, sh);
+tcg_gen_add_vec(vece, d, d, a);
+}
+
+static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+uint64_t mask = (0xff >> shift) * (-1ull / 0xff);
+TCGv_i64 t = tcg_temp_new_i64();
+
+tcg_gen_shri_i64(t, a, shift);
+tcg_gen_andi_i64(t, t, mask);
+tcg_gen_andi_i64(d, d, ~mask);
+tcg_gen_or_i64(d, d, t);
+tcg_temp_free_i64(t);
+}
+
+static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+uint64_t mask = (0x >> shift) * (-1ull / 0x);
+TCGv_i64 t = tcg_temp_new_i64();
+
+tcg_gen_shri_i64(t, a, shift);
+tcg_gen_andi_i64(t, t, mask);
+tcg_gen_andi_i64(d, d, ~mask);
+tcg_gen_or_i64(d, d, t);
+tcg_temp_free_i64(t);
+}
+
+static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+tcg_gen_shri_i32(a, a, shift);
+tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
+}
+
+static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+tcg_gen_shri_i64(a, a, shift);
+tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
+}
+
+static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+uint64_t mask = (2ull << ((8 << vece) - 1)) - 1;
+TCGv_vec t = tcg_temp_new_vec_matching(d);
+TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+tcg_gen_dupi_vec(vece, m, mask ^ (mask >> sh));
+tcg_gen_shri_vec(vece, t, a, sh);
+tcg_gen_and_vec(vece, d, d, m);
+tcg_gen_or_vec(vece, d, d, t);
+
+tcg_temp_free_vec(t);
+tcg_temp_free_vec(m);
+}
+
 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
  int immh, int immb, int opcode, int rn, int 
rd)
 {
+static const GVecGen2i ssra_op[4] = {
+{ .fni8 = gen_ssra8_i64,
+  .fniv = gen_ssra_vec,
+  .load_dest = true,
+  .opc = INDEX_op_sari_vec,
+  .vece = MO_8 },
+

[Qemu-devel] [PATCH v9 21/26] target/arm: Use vector infrastructure for aa64 compares

2018-01-15 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c | 96 ++
 1 file changed, 62 insertions(+), 34 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index d8bb3bbb25..44e44cc9f2 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -7115,6 +7115,28 @@ static void 
disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
 }
 }
 
+/* CMTST : test is "if (X & Y != 0)". */
+static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+tcg_gen_and_i32(d, a, b);
+tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
+tcg_gen_neg_i32(d, d);
+}
+
+static void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+tcg_gen_and_i64(d, a, b);
+tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
+tcg_gen_neg_i64(d, d);
+}
+
+static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+tcg_gen_and_vec(vece, d, a, b);
+tcg_gen_dupi_vec(vece, a, 0);
+tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
+}
+
 static void handle_3same_64(DisasContext *s, int opcode, bool u,
 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
 {
@@ -7158,10 +7180,7 @@ static void handle_3same_64(DisasContext *s, int opcode, 
bool u,
 cond = TCG_COND_EQ;
 goto do_cmop;
 }
-/* CMTST : test is "if (X & Y != 0)". */
-tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
-tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
-tcg_gen_neg_i64(tcg_rd, tcg_rd);
+gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
 break;
 case 0x8: /* SSHL, USHL */
 if (u) {
@@ -9684,6 +9703,7 @@ static void disas_simd_3same_int(DisasContext *s, 
uint32_t insn)
 int rd = extract32(insn, 0, 5);
 int pass;
 GVecGen3Fn *gvec_op;
+TCGCond cond;
 
 switch (opcode) {
 case 0x13: /* MUL, PMUL */
@@ -9731,6 +9751,44 @@ static void disas_simd_3same_int(DisasContext *s, 
uint32_t insn)
 vec_full_reg_offset(s, rm),
 is_q ? 16 : 8, vec_full_reg_size(s));
 return;
+case 0x11:
+if (u) { /* CMEQ */
+cond = TCG_COND_EQ;
+goto do_gvec_cmp;
+} else { /* CMTST */
+static const GVecGen3 cmtst_op[4] = {
+{ .fni4 = gen_helper_neon_tst_u8,
+  .fniv = gen_cmtst_vec,
+  .vece = MO_8 },
+{ .fni4 = gen_helper_neon_tst_u16,
+  .fniv = gen_cmtst_vec,
+  .vece = MO_16 },
+{ .fni4 = gen_cmtst_i32,
+  .fniv = gen_cmtst_vec,
+  .vece = MO_32 },
+{ .fni8 = gen_cmtst_i64,
+  .fniv = gen_cmtst_vec,
+  .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+  .vece = MO_64 },
+};
+tcg_gen_gvec_3(vec_full_reg_offset(s, rd),
+   vec_full_reg_offset(s, rn),
+   vec_full_reg_offset(s, rm),
+   is_q ? 16 : 8, vec_full_reg_size(s),
+   _op[size]);
+}
+return;
+case 0x06: /* CMGT, CMHI */
+cond = u ? TCG_COND_GTU : TCG_COND_GT;
+goto do_gvec_cmp;
+case 0x07: /* CMGE, CMHS */
+cond = u ? TCG_COND_GEU : TCG_COND_GE;
+do_gvec_cmp:
+tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ is_q ? 16 : 8, vec_full_reg_size(s));
+return;
 }
 
 if (size == 3) {
@@ -9813,26 +9871,6 @@ static void disas_simd_3same_int(DisasContext *s, 
uint32_t insn)
 genenvfn = fns[size][u];
 break;
 }
-case 0x6: /* CMGT, CMHI */
-{
-static NeonGenTwoOpFn * const fns[3][2] = {
-{ gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
-{ gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
-{ gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
-};
-genfn = fns[size][u];
-break;
-}
-case 0x7: /* CMGE, CMHS */
-{
-static NeonGenTwoOpFn * const fns[3][2] = {
-{ gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
-{ gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
-{ gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
-};
-genfn = fns[size][u];
-break;
-}
 case 0x8: /* SSHL, USHL */
 {
 static NeonGenTwoOpFn * const fns[3][2] = {
@@ -9905,16 +9943,6 @@ static void disas_simd_3same_int(DisasContext *s, 
uint32_t insn)
 genfn

[Qemu-devel] [PATCH v9 24/26] target/arm: Use vector infrastructure for aa64 orr/bic immediate

2018-01-15 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c | 38 +-
 1 file changed, 17 insertions(+), 21 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 4f15e58556..5963eedd41 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -6078,7 +6078,6 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t 
insn)
 bool is_neg = extract32(insn, 29, 1);
 bool is_q = extract32(insn, 30, 1);
 uint64_t imm = 0;
-int i;
 
 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
 unallocated_encoding(s);
@@ -6164,28 +6163,25 @@ static void disas_simd_mod_imm(DisasContext *s, 
uint32_t insn)
 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
 vec_full_reg_size(s), imm);
 } else {
+/* ORR or BIC, with BIC negation to AND handled above.  */
+static const GVecGen2s ops[2] = {
+{ .fni8 = tcg_gen_or_i64,
+  .fniv = tcg_gen_or_vec,
+  .opc = INDEX_op_or_vec,
+  .vece = MO_64,
+  .prefer_i64 = TCG_TARGET_REG_BITS == 64 },
+{ .fni8 = tcg_gen_and_i64,
+  .fniv = tcg_gen_and_vec,
+  .opc = INDEX_op_and_vec,
+  .vece = MO_64,
+  .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
+};
 TCGv_i64 tcg_imm = tcg_const_i64(imm);
-TCGv_i64 tcg_rd = new_tmp_a64(s);
-
-for (i = 0; i < 2; i++) {
-int foffs = vec_reg_offset(s, rd, i, MO_64);
-
-if (i == 1 && !is_q) {
-/* non-quad ops clear high half of vector */
-tcg_gen_movi_i64(tcg_rd, 0);
-} else {
-tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
-if (is_neg) {
-/* AND (BIC) */
-tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
-} else {
-/* ORR */
-tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
-}
-}
-tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
-}
 
+tcg_gen_gvec_2s(vec_full_reg_offset(s, rd),
+vec_full_reg_offset(s, rd),
+is_q ? 16 : 8, vec_full_reg_size(s),
+tcg_imm, [is_neg]);
 tcg_temp_free_i64(tcg_imm);
 }
 }
-- 
2.14.3

[Qemu-devel] [PATCH v9 25/26] tcg/i386: Add vector operations

2018-01-15 Thread Richard Henderson

The x86 vector instruction set is extremely irregular.  With newer
editions, Intel has filled in some of the blanks.  However, we don't
get many 64-bit operations until SSE4.2, introduced in 2009.

The subsequent edition was for AVX1, introduced in 2011, which added
three-operand addressing, and adjusts how all instructions should be
encoded.

Given the relatively narrow 2 year window between possible to support
and desirable to support, and to vastly simplify code maintainence,
I am only planning to support AVX1 and later cpus.

Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.h |   46 +-
 tcg/i386/tcg-target.opc.h |   13 +
 tcg/i386/tcg-target.inc.c | 1331 +++--
 3 files changed, 1336 insertions(+), 54 deletions(-)
 create mode 100644 tcg/i386/tcg-target.opc.h

diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index b89dababf4..e77b95cc2c 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -30,10 +30,10 @@
 
 #ifdef __x86_64__
 # define TCG_TARGET_REG_BITS  64
-# define TCG_TARGET_NB_REGS   16
+# define TCG_TARGET_NB_REGS   32
 #else
 # define TCG_TARGET_REG_BITS  32
-# define TCG_TARGET_NB_REGS8
+# define TCG_TARGET_NB_REGS   24
 #endif
 
 typedef enum {
@@ -56,6 +56,26 @@ typedef enum {
 TCG_REG_R13,
 TCG_REG_R14,
 TCG_REG_R15,
+
+TCG_REG_XMM0,
+TCG_REG_XMM1,
+TCG_REG_XMM2,
+TCG_REG_XMM3,
+TCG_REG_XMM4,
+TCG_REG_XMM5,
+TCG_REG_XMM6,
+TCG_REG_XMM7,
+
+/* 64-bit registers; likewise always define.  */
+TCG_REG_XMM8,
+TCG_REG_XMM9,
+TCG_REG_XMM10,
+TCG_REG_XMM11,
+TCG_REG_XMM12,
+TCG_REG_XMM13,
+TCG_REG_XMM14,
+TCG_REG_XMM15,
+
 TCG_REG_RAX = TCG_REG_EAX,
 TCG_REG_RCX = TCG_REG_ECX,
 TCG_REG_RDX = TCG_REG_EDX,
@@ -77,6 +97,8 @@ typedef enum {
 
 extern bool have_bmi1;
 extern bool have_popcnt;
+extern bool have_avx1;
+extern bool have_avx2;
 
 /* optional instructions */
 #define TCG_TARGET_HAS_div2_i32 1
@@ -146,6 +168,26 @@ extern bool have_popcnt;
 #define TCG_TARGET_HAS_mulsh_i640
 #endif
 
+/* We do not support older SSE systems, only beginning with AVX1.  */
+#define TCG_TARGET_HAS_v64  have_avx1
+#define TCG_TARGET_HAS_v128 have_avx1
+#define TCG_TARGET_HAS_v256 have_avx2
+
+#define TCG_TARGET_HAS_andc_vec 1
+#define TCG_TARGET_HAS_orc_vec  0
+#define TCG_TARGET_HAS_not_vec  0
+#define TCG_TARGET_HAS_neg_vec  0
+#define TCG_TARGET_HAS_shi_vec  1
+#define TCG_TARGET_HAS_shs_vec  0
+#define TCG_TARGET_HAS_shv_vec  0
+#define TCG_TARGET_HAS_zip_vec  1
+#define TCG_TARGET_HAS_uzp_vec  0
+#define TCG_TARGET_HAS_trn_vec  0
+#define TCG_TARGET_HAS_cmp_vec  1
+#define TCG_TARGET_HAS_mul_vec  1
+#define TCG_TARGET_HAS_extl_vec 1
+#define TCG_TARGET_HAS_exth_vec 0
+
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
 (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \
  ((ofs) == 0 && (len) == 16))
diff --git a/tcg/i386/tcg-target.opc.h b/tcg/i386/tcg-target.opc.h
new file mode 100644
index 00..e5fa88ba25
--- /dev/null
+++ b/tcg/i386/tcg-target.opc.h
@@ -0,0 +1,13 @@
+/* Target-specific opcodes for host vector expansion.  These will be
+   emitted by tcg_expand_vec_op.  For those familiar with GCC internals,
+   consider these to be UNSPEC with names.  */
+
+DEF(x86_shufps_vec, 1, 2, 1, IMPLVEC)
+DEF(x86_vpblendvb_vec, 1, 3, 0, IMPLVEC)
+DEF(x86_blend_vec, 1, 2, 1, IMPLVEC)
+DEF(x86_packss_vec, 1, 2, 0, IMPLVEC)
+DEF(x86_packus_vec, 1, 2, 0, IMPLVEC)
+DEF(x86_psrldq_vec, 1, 1, 1, IMPLVEC)
+DEF(x86_vperm2i128_vec, 1, 2, 1, IMPLVEC)
+DEF(x86_punpckl_vec, 1, 2, 0, IMPLVEC)
+DEF(x86_punpckh_vec, 1, 2, 0, IMPLVEC)
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 63d27f10e7..4805da6130 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -28,9 +28,14 @@
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
 #if TCG_TARGET_REG_BITS == 64
 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
-"%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
 #else
 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
+#endif
+"%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+"%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7",
+#if TCG_TARGET_REG_BITS == 64
+"%xmm8", "%xmm9", "%xmm10", "%xmm11",
+"%xmm12", "%xmm13", "%xmm14", "%xmm15",
 #endif
 };
 #endif
@@ -60,6 +65,28 @@ static const int tcg_target_reg_alloc_order[] = {
 TCG_REG_ECX,
 TCG_REG_EDX,
 TCG_REG_EAX,
+#endif
+TCG_REG_XMM0,
+TCG_REG_XMM1,
+TCG_REG_XMM2,
+TCG_REG_XMM3,
+TCG_REG_XMM4,
+TCG_REG_XMM5,
+#ifndef _WIN64
+/* The Win64 ABI has xmm6-xmm15 as caller-saves, and we do not save
+

[Qemu-devel] [PATCH v9 26/26] tcg/aarch64: Add vector operations

2018-01-15 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 tcg/aarch64/tcg-target.h |  30 +-
 tcg/aarch64/tcg-target.opc.h |   3 +
 tcg/aarch64/tcg-target.inc.c | 674 ---
 3 files changed, 660 insertions(+), 47 deletions(-)
 create mode 100644 tcg/aarch64/tcg-target.opc.h

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index c2525066ab..46434ecca4 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -31,13 +31,22 @@ typedef enum {
 TCG_REG_SP = 31,
 TCG_REG_XZR = 31,
 
+TCG_REG_V0 = 32, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+TCG_REG_V8, TCG_REG_V9, TCG_REG_V10, TCG_REG_V11,
+TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
+
 /* Aliases.  */
 TCG_REG_FP = TCG_REG_X29,
 TCG_REG_LR = TCG_REG_X30,
 TCG_AREG0  = TCG_REG_X19,
 } TCGReg;
 
-#define TCG_TARGET_NB_REGS 32
+#define TCG_TARGET_NB_REGS 64
 
 /* used for function call generation */
 #define TCG_REG_CALL_STACK  TCG_REG_SP
@@ -113,6 +122,25 @@ typedef enum {
 #define TCG_TARGET_HAS_mulsh_i641
 #define TCG_TARGET_HAS_direct_jump  1
 
+#define TCG_TARGET_HAS_v64  1
+#define TCG_TARGET_HAS_v128 1
+#define TCG_TARGET_HAS_v256 0
+
+#define TCG_TARGET_HAS_andc_vec 1
+#define TCG_TARGET_HAS_orc_vec  1
+#define TCG_TARGET_HAS_not_vec  1
+#define TCG_TARGET_HAS_neg_vec  1
+#define TCG_TARGET_HAS_shi_vec  1
+#define TCG_TARGET_HAS_shs_vec  0
+#define TCG_TARGET_HAS_shv_vec  0
+#define TCG_TARGET_HAS_zip_vec  1
+#define TCG_TARGET_HAS_uzp_vec  1
+#define TCG_TARGET_HAS_trn_vec  1
+#define TCG_TARGET_HAS_cmp_vec  1
+#define TCG_TARGET_HAS_mul_vec  1
+#define TCG_TARGET_HAS_extl_vec 1
+#define TCG_TARGET_HAS_exth_vec 1
+
 #define TCG_TARGET_DEFAULT_MO (0)
 
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
diff --git a/tcg/aarch64/tcg-target.opc.h b/tcg/aarch64/tcg-target.opc.h
new file mode 100644
index 00..4816a6c3d4
--- /dev/null
+++ b/tcg/aarch64/tcg-target.opc.h
@@ -0,0 +1,3 @@
+/* Target-specific opcodes for host vector expansion.  These will be
+   emitted by tcg_expand_vec_op.  For those familiar with GCC internals,
+   consider these to be UNSPEC with names.  */
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 150530f30e..b2ce818d7c 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -20,10 +20,15 @@ QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
 
 #ifdef CONFIG_DEBUG_TCG
 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
-"%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
-"%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
-"%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
-"%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
+"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+"x24", "x25", "x26", "x27", "x28", "fp", "x30", "sp",
+
+"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
+"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+"v24", "v25", "v26", "v27", "v28", "fp", "v30", "v31",
 };
 #endif /* CONFIG_DEBUG_TCG */
 
@@ -43,6 +48,14 @@ static const int tcg_target_reg_alloc_order[] = {
 /* X19 reserved for AREG0 */
 /* X29 reserved as fp */
 /* X30 reserved as temporary */
+
+TCG_REG_V0, TCG_REG_V1, TCG_REG_V2, TCG_REG_V3,
+TCG_REG_V4, TCG_REG_V5, TCG_REG_V6, TCG_REG_V7,
+/* V8 - V15 are call-saved, and skipped.  */
+TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
 };
 
 static const int tcg_target_call_iarg_regs[8] = {
@@ -54,6 +67,7 @@ static const int tcg_target_call_oarg_regs[1] = {
 };
 
 #define TCG_REG_TMP TCG_REG_X30
+#define TCG_VEC_TMP TCG_REG_V31
 
 #ifndef CONFIG_SOFTMMU
 /* Note that XZR cannot be encoded in the address base register slot,
@@ -119,9 +133,13 @@ static const char 
*target_parse_constraint(TCGArgConstraint *ct,
const char *ct_str, TCGType type)
 {
 switch (*ct_str++) {
-case 'r':
+case 'r': /* general registers */
 ct->ct |= TCG_CT_REG;
-ct->u.regs = 0xu;
+ct->u.regs |= 0xu;
+break;

[Qemu-devel] [PATCH v9 22/26] target/arm: Use vector infrastructure for aa64 multiplies

2018-01-15 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c | 171 -
 1 file changed, 138 insertions(+), 33 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 44e44cc9f2..48caba3d9f 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -9691,6 +9691,66 @@ static void disas_simd_3same_float(DisasContext *s, 
uint32_t insn)
 }
 }
 
+static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+gen_helper_neon_mul_u8(a, a, b);
+gen_helper_neon_add_u8(d, d, a);
+}
+
+static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+gen_helper_neon_mul_u16(a, a, b);
+gen_helper_neon_add_u16(d, d, a);
+}
+
+static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+tcg_gen_mul_i32(a, a, b);
+tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+tcg_gen_mul_i64(a, a, b);
+tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+tcg_gen_mul_vec(vece, a, a, b);
+tcg_gen_add_vec(vece, d, d, a);
+}
+
+static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+gen_helper_neon_mul_u8(a, a, b);
+gen_helper_neon_sub_u8(d, d, a);
+}
+
+static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+gen_helper_neon_mul_u16(a, a, b);
+gen_helper_neon_sub_u16(d, d, a);
+}
+
+static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+tcg_gen_mul_i32(a, a, b);
+tcg_gen_sub_i32(d, d, a);
+}
+
+static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+tcg_gen_mul_i64(a, a, b);
+tcg_gen_sub_i64(d, d, a);
+}
+
+static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+tcg_gen_mul_vec(vece, a, a, b);
+tcg_gen_sub_vec(vece, d, d, a);
+}
+
 /* Integer op subgroup of C3.6.16. */
 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
 {
@@ -9702,7 +9762,8 @@ static void disas_simd_3same_int(DisasContext *s, 
uint32_t insn)
 int rn = extract32(insn, 5, 5);
 int rd = extract32(insn, 0, 5);
 int pass;
-GVecGen3Fn *gvec_op;
+GVecGen3Fn *gvec_fn;
+const GVecGen3 *gvec_op;
 TCGCond cond;
 
 switch (opcode) {
@@ -9745,12 +9806,70 @@ static void disas_simd_3same_int(DisasContext *s, 
uint32_t insn)
 
 switch (opcode) {
 case 0x10: /* ADD, SUB */
-gvec_op = u ? tcg_gen_gvec_sub : tcg_gen_gvec_add;
-gvec_op(size, vec_full_reg_offset(s, rd),
+gvec_fn = u ? tcg_gen_gvec_sub : tcg_gen_gvec_add;
+do_gvec:
+gvec_fn(size, vec_full_reg_offset(s, rd),
 vec_full_reg_offset(s, rn),
 vec_full_reg_offset(s, rm),
 is_q ? 16 : 8, vec_full_reg_size(s));
 return;
+case 0x13: /* MUL, PMUL */
+if (!u) { /* MUL */
+gvec_fn = tcg_gen_gvec_mul;
+goto do_gvec;
+}
+break;
+case 0x12: /* MLA, MLS */
+{
+static const GVecGen3 mla_op[4] = {
+{ .fni4 = gen_mla8_i32,
+  .fniv = gen_mla_vec,
+  .opc = INDEX_op_mul_vec,
+  .load_dest = true,
+  .vece = MO_8 },
+{ .fni4 = gen_mla16_i32,
+  .fniv = gen_mla_vec,
+  .opc = INDEX_op_mul_vec,
+  .load_dest = true,
+  .vece = MO_16 },
+{ .fni4 = gen_mla32_i32,
+  .fniv = gen_mla_vec,
+  .opc = INDEX_op_mul_vec,
+  .load_dest = true,
+  .vece = MO_32 },
+{ .fni8 = gen_mla64_i64,
+  .fniv = gen_mla_vec,
+  .opc = INDEX_op_mul_vec,
+  .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+  .load_dest = true,
+  .vece = MO_64 },
+};
+static const GVecGen3 mls_op[4] = {
+{ .fni4 = gen_mls8_i32,
+  .fniv = gen_mls_vec,
+  .opc = INDEX_op_mul_vec,
+  .load_dest = true,
+  .vece = MO_8 },
+{ .fni4 = gen_mls16_i32,
+  .fniv = gen_mls_vec,
+  .opc = INDEX_op_mul_vec,
+  .load_dest = true,
+  .vece = MO_16 },
+{ .fni4 = gen_mls32_i32,
+  .fniv = gen_mls_vec,
+  .opc = INDEX_op_mul_vec,
+  .load_dest = true,
+  .vece = MO_32 },
+{ .fni8 = gen_mls64_i64,
+  .fniv = gen_mls_vec,
+  .opc = INDEX_op_mul_vec,
+  .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+  .load_dest = true,
+  .vece = MO_64 },
+};
+gvec_op = (u ? _op[size] : _op[size]);
+}
+goto

[Qemu-devel] [PATCH v9 16/26] target/arm: Use vector infrastructure for aa64 add/sub/logic

2018-01-15 Thread Richard Henderson

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c | 207 +
 1 file changed, 134 insertions(+), 73 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index ba94f7d045..572af456d1 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -21,6 +21,7 @@
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "tcg-op.h"
+#include "tcg-op-gvec.h"
 #include "qemu/log.h"
 #include "arm_ldst.h"
 #include "translate.h"
@@ -83,6 +84,10 @@ typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
 typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
 typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 
+/* Note that the gvec expanders operate on offsets + sizes.  */
+typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
+uint32_t, uint32_t, uint32_t);
+
 /* initialize TCG globals.  */
 void a64_translate_init(void)
 {
@@ -535,6 +540,21 @@ static inline int vec_reg_offset(DisasContext *s, int 
regno,
 return offs;
 }
 
+/* Return the offset info CPUARMState of the "whole" vector register Qn.  */
+static inline int vec_full_reg_offset(DisasContext *s, int regno)
+{
+assert_fp_access_checked(s);
+return offsetof(CPUARMState, vfp.regs[regno * 2]);
+}
+
+/* Return the byte size of the "whole" vector register, VL / 8.  */
+static inline int vec_full_reg_size(DisasContext *s)
+{
+/* FIXME SVE: We should put the composite ZCR_EL* value into tb->flags.
+   In the meantime this is just the AdvSIMD length of 128.  */
+return 128 / 8;
+}
+
 /* Return the offset into CPUARMState of a slice (from
  * the least significant end) of FP register Qn (ie
  * Dn, Sn, Hn or Bn).
@@ -9048,85 +9068,125 @@ static void disas_simd_three_reg_diff(DisasContext *s, 
uint32_t insn)
 }
 }
 
+static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+tcg_gen_xor_i64(rn, rn, rm);
+tcg_gen_and_i64(rn, rn, rd);
+tcg_gen_xor_i64(rd, rm, rn);
+}
+
+static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+tcg_gen_xor_i64(rn, rn, rd);
+tcg_gen_and_i64(rn, rn, rm);
+tcg_gen_xor_i64(rd, rd, rn);
+}
+
+static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+tcg_gen_xor_i64(rn, rn, rd);
+tcg_gen_andc_i64(rn, rn, rm);
+tcg_gen_xor_i64(rd, rd, rn);
+}
+
+static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
+{
+tcg_gen_xor_vec(vece, rn, rn, rm);
+tcg_gen_and_vec(vece, rn, rn, rd);
+tcg_gen_xor_vec(vece, rd, rm, rn);
+}
+
+static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
+{
+tcg_gen_xor_vec(vece, rn, rn, rd);
+tcg_gen_and_vec(vece, rn, rn, rm);
+tcg_gen_xor_vec(vece, rd, rd, rn);
+}
+
+static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
+{
+tcg_gen_xor_vec(vece, rn, rn, rd);
+tcg_gen_andc_vec(vece, rn, rn, rm);
+tcg_gen_xor_vec(vece, rd, rd, rn);
+}
+
 /* Logic op (opcode == 3) subgroup of C3.6.16. */
 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
 {
+static const GVecGen3 bsl_op = {
+.fni8 = gen_bsl_i64,
+.fniv = gen_bsl_vec,
+.prefer_i64 = TCG_TARGET_REG_BITS == 64,
+.load_dest = true
+};
+static const GVecGen3 bit_op = {
+.fni8 = gen_bit_i64,
+.fniv = gen_bit_vec,
+.prefer_i64 = TCG_TARGET_REG_BITS == 64,
+.load_dest = true
+};
+static const GVecGen3 bif_op = {
+.fni8 = gen_bif_i64,
+.fniv = gen_bif_vec,
+.prefer_i64 = TCG_TARGET_REG_BITS == 64,
+.load_dest = true
+};
+
 int rd = extract32(insn, 0, 5);
 int rn = extract32(insn, 5, 5);
 int rm = extract32(insn, 16, 5);
 int size = extract32(insn, 22, 2);
 bool is_u = extract32(insn, 29, 1);
 bool is_q = extract32(insn, 30, 1);
-TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
-int pass;
+GVecGen3Fn *gvec_fn;
+const GVecGen3 *gvec_op;
 
 if (!fp_access_check(s)) {
 return;
 }
 
-tcg_op1 = tcg_temp_new_i64();
-tcg_op2 = tcg_temp_new_i64();
-tcg_res[0] = tcg_temp_new_i64();
-tcg_res[1] = tcg_temp_new_i64();
-
-for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
-read_vec_element(s, tcg_op1, rn, pass, MO_64);
-read_vec_element(s, tcg_op2, rm, pass, MO_64);
-
-if (!is_u) {
-switch (size) {
-case 0: /* AND */
-tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
-break;
-case 1: /* BIC */
-tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
-break;
-case 2: /* ORR */
-tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
-break;
-case 3: /* ORN */
-tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
-

[Qemu-devel] [PATCH v9 15/26] target/arm: Align vector registers

2018-01-15 Thread Richard Henderson

Reviewed-by: Alex Bennée 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Richard Henderson 
---
 target/arm/cpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 96316700dd..3ff4dea6b8 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -492,7 +492,7 @@ typedef struct CPUARMState {
  * the two execution states, and means we do not need to explicitly
  * map these registers when changing states.
  */
-float64 regs[64];
+float64 regs[64] QEMU_ALIGNED(16);
 
 uint32_t xregs[16];
 /* We store these fpcsr fields separately for convenience.  */
-- 
2.14.3

[Qemu-devel] [PATCH v9 14/26] tcg/optimize: Handle vector opcodes during optimize

2018-01-15 Thread Richard Henderson

Trivial move and constant propagation.  Some identity and constant
function folding, but nothing that requires knowledge of the size
of the vector element.

Signed-off-by: Richard Henderson 
---
 tcg/optimize.c | 150 +
 1 file changed, 77 insertions(+), 73 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 2cbbeefd53..d4ea67e541 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -32,6 +32,11 @@
 glue(glue(case INDEX_op_, x), _i32):\
 glue(glue(case INDEX_op_, x), _i64)
 
+#define CASE_OP_32_64_VEC(x)\
+glue(glue(case INDEX_op_, x), _i32):\
+glue(glue(case INDEX_op_, x), _i64):\
+glue(glue(case INDEX_op_, x), _vec)
+
 struct tcg_temp_info {
 bool is_const;
 TCGTemp *prev_copy;
@@ -108,40 +113,6 @@ static void init_arg_info(struct tcg_temp_info *infos,
 init_ts_info(infos, temps_used, arg_temp(arg));
 }
 
-static int op_bits(TCGOpcode op)
-{
-const TCGOpDef *def = _op_defs[op];
-return def->flags & TCG_OPF_64BIT ? 64 : 32;
-}
-
-static TCGOpcode op_to_mov(TCGOpcode op)
-{
-switch (op_bits(op)) {
-case 32:
-return INDEX_op_mov_i32;
-case 64:
-return INDEX_op_mov_i64;
-default:
-fprintf(stderr, "op_to_mov: unexpected return value of "
-"function op_bits.\n");
-tcg_abort();
-}
-}
-
-static TCGOpcode op_to_movi(TCGOpcode op)
-{
-switch (op_bits(op)) {
-case 32:
-return INDEX_op_movi_i32;
-case 64:
-return INDEX_op_movi_i64;
-default:
-fprintf(stderr, "op_to_movi: unexpected return value of "
-"function op_bits.\n");
-tcg_abort();
-}
-}
-
 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
 {
 TCGTemp *i;
@@ -199,11 +170,23 @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
 
 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
 {
-TCGOpcode new_op = op_to_movi(op->opc);
+const TCGOpDef *def;
+TCGOpcode new_op;
 tcg_target_ulong mask;
 struct tcg_temp_info *di = arg_info(dst);
 
+def = _op_defs[op->opc];
+if (def->flags & TCG_OPF_VECTOR) {
+new_op = INDEX_op_dupi_vec;
+} else if (def->flags & TCG_OPF_64BIT) {
+new_op = INDEX_op_movi_i64;
+} else {
+new_op = INDEX_op_movi_i32;
+}
 op->opc = new_op;
+/* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
+op->args[0] = dst;
+op->args[1] = val;
 
 reset_temp(dst);
 di->is_const = true;
@@ -214,15 +197,13 @@ static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, 
TCGArg dst, TCGArg val)
 mask |= ~0xull;
 }
 di->mask = mask;
-
-op->args[0] = dst;
-op->args[1] = val;
 }
 
 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
 {
 TCGTemp *dst_ts = arg_temp(dst);
 TCGTemp *src_ts = arg_temp(src);
+const TCGOpDef *def;
 struct tcg_temp_info *di;
 struct tcg_temp_info *si;
 tcg_target_ulong mask;
@@ -236,9 +217,16 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, 
TCGArg dst, TCGArg src)
 reset_ts(dst_ts);
 di = ts_info(dst_ts);
 si = ts_info(src_ts);
-new_op = op_to_mov(op->opc);
-
+def = _op_defs[op->opc];
+if (def->flags & TCG_OPF_VECTOR) {
+new_op = INDEX_op_mov_vec;
+} else if (def->flags & TCG_OPF_64BIT) {
+new_op = INDEX_op_mov_i64;
+} else {
+new_op = INDEX_op_mov_i32;
+}
 op->opc = new_op;
+/* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
 op->args[0] = dst;
 op->args[1] = src;
 
@@ -417,8 +405,9 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, 
TCGArg y)
 
 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
 {
+const TCGOpDef *def = _op_defs[op];
 TCGArg res = do_constant_folding_2(op, x, y);
-if (op_bits(op) == 32) {
+if (!(def->flags & TCG_OPF_64BIT)) {
 res = (int32_t)res;
 }
 return res;
@@ -508,13 +497,12 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, 
TCGArg x,
 tcg_target_ulong xv = arg_info(x)->val;
 tcg_target_ulong yv = arg_info(y)->val;
 if (arg_is_const(x) && arg_is_const(y)) {
-switch (op_bits(op)) {
-case 32:
-return do_constant_folding_cond_32(xv, yv, c);
-case 64:
+const TCGOpDef *def = _op_defs[op];
+tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
+if (def->flags & TCG_OPF_64BIT) {
 return do_constant_folding_cond_64(xv, yv, c);
-default:
-tcg_abort();
+} else {
+return do_constant_folding_cond_32(xv, yv, c);
 }
 } else if (args_are_copies(x, y)) {
 return do_constant_folding_cond_eq(c);
@@ -653,11 +641,11 @@ void tcg_optimize(TCGContext *s)
 
 /* For commutative operations make constant second argument */

[Qemu-devel] [PATCH v9 18/26] target/arm: Use vector infrastructure for aa64 dup/movi

2018-01-15 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c | 83 +++---
 1 file changed, 34 insertions(+), 49 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index bc14c28e71..55a4902fc2 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -5846,38 +5846,24 @@ static void disas_simd_across_lanes(DisasContext *s, 
uint32_t insn)
  *
  * size: encoded in imm5 (see ARM ARM LowestSetBit())
  */
+
 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
  int imm5)
 {
 int size = ctz32(imm5);
-int esize = 8 << size;
-int elements = (is_q ? 128 : 64) / esize;
-int index, i;
-TCGv_i64 tmp;
+int index = imm5 >> (size + 1);
 
 if (size > 3 || (size == 3 && !is_q)) {
 unallocated_encoding(s);
 return;
 }
-
 if (!fp_access_check(s)) {
 return;
 }
 
-index = imm5 >> (size + 1);
-
-tmp = tcg_temp_new_i64();
-read_vec_element(s, tmp, rn, index, size);
-
-for (i = 0; i < elements; i++) {
-write_vec_element(s, tmp, rd, i, size);
-}
-
-if (!is_q) {
-clear_vec_high(s, rd);
-}
-
-tcg_temp_free_i64(tmp);
+tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
+ vec_reg_offset(s, rn, index, size),
+ is_q ? 16 : 8, vec_full_reg_size(s));
 }
 
 /* DUP (element, scalar)
@@ -5926,9 +5912,7 @@ static void handle_simd_dupg(DisasContext *s, int is_q, 
int rd, int rn,
  int imm5)
 {
 int size = ctz32(imm5);
-int esize = 8 << size;
-int elements = (is_q ? 128 : 64)/esize;
-int i = 0;
+uint32_t dofs, oprsz, maxsz;
 
 if (size > 3 || ((size == 3) && !is_q)) {
 unallocated_encoding(s);
@@ -5939,12 +5923,11 @@ static void handle_simd_dupg(DisasContext *s, int is_q, 
int rd, int rn,
 return;
 }
 
-for (i = 0; i < elements; i++) {
-write_vec_element(s, cpu_reg(s, rn), rd, i, size);
-}
-if (!is_q) {
-clear_vec_high(s, rd);
-}
+dofs = vec_full_reg_offset(s, rd);
+oprsz = is_q ? 16 : 8;
+maxsz = vec_full_reg_size(s);
+
+tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
 }
 
 /* INS (Element)
@@ -6135,7 +6118,6 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t 
insn)
 bool is_neg = extract32(insn, 29, 1);
 bool is_q = extract32(insn, 30, 1);
 uint64_t imm = 0;
-TCGv_i64 tcg_rd, tcg_imm;
 int i;
 
 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
@@ -6217,32 +6199,35 @@ static void disas_simd_mod_imm(DisasContext *s, 
uint32_t insn)
 imm = ~imm;
 }
 
-tcg_imm = tcg_const_i64(imm);
-tcg_rd = new_tmp_a64(s);
+if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
+/* MOVI or MVNI, with MVNI negation handled above.  */
+tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
+vec_full_reg_size(s), imm);
+} else {
+TCGv_i64 tcg_imm = tcg_const_i64(imm);
+TCGv_i64 tcg_rd = new_tmp_a64(s);
 
-for (i = 0; i < 2; i++) {
-int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
+for (i = 0; i < 2; i++) {
+int foffs = vec_reg_offset(s, rd, i, MO_64);
 
-if (i == 1 && !is_q) {
-/* non-quad ops clear high half of vector */
-tcg_gen_movi_i64(tcg_rd, 0);
-} else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
-tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
-if (is_neg) {
-/* AND (BIC) */
-tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
+if (i == 1 && !is_q) {
+/* non-quad ops clear high half of vector */
+tcg_gen_movi_i64(tcg_rd, 0);
 } else {
-/* ORR */
-tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
+tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
+if (is_neg) {
+/* AND (BIC) */
+tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
+} else {
+/* ORR */
+tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
+}
 }
-} else {
-/* MOVI */
-tcg_gen_mov_i64(tcg_rd, tcg_imm);
+tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
 }
-tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
-}
 
-tcg_temp_free_i64(tcg_imm);
+tcg_temp_free_i64(tcg_imm);
+}
 }
 
 /* AdvSIMD scalar copy
-- 
2.14.3

[Qemu-devel] [PATCH v9 19/26] target/arm: Use vector infrastructure for aa64 zip/uzp/trn/xtn

2018-01-15 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c | 103 +++--
 1 file changed, 35 insertions(+), 68 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 55a4902fc2..8769b4505a 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -5576,11 +5576,7 @@ static void disas_simd_zip_trn(DisasContext *s, uint32_t 
insn)
 int opcode = extract32(insn, 12, 2);
 bool part = extract32(insn, 14, 1);
 bool is_q = extract32(insn, 30, 1);
-int esize = 8 << size;
-int i, ofs;
-int datasize = is_q ? 128 : 64;
-int elements = datasize / esize;
-TCGv_i64 tcg_res, tcg_resl, tcg_resh;
+GVecGen3Fn *gvec_fn;
 
 if (opcode == 0 || (size == 3 && !is_q)) {
 unallocated_encoding(s);
@@ -5591,60 +5587,24 @@ static void disas_simd_zip_trn(DisasContext *s, 
uint32_t insn)
 return;
 }
 
-tcg_resl = tcg_const_i64(0);
-tcg_resh = tcg_const_i64(0);
-tcg_res = tcg_temp_new_i64();
-
-for (i = 0; i < elements; i++) {
-switch (opcode) {
-case 1: /* UZP1/2 */
-{
-int midpoint = elements / 2;
-if (i < midpoint) {
-read_vec_element(s, tcg_res, rn, 2 * i + part, size);
-} else {
-read_vec_element(s, tcg_res, rm,
- 2 * (i - midpoint) + part, size);
-}
-break;
-}
-case 2: /* TRN1/2 */
-if (i & 1) {
-read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
-} else {
-read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
-}
-break;
-case 3: /* ZIP1/2 */
-{
-int base = part * elements / 2;
-if (i & 1) {
-read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
-} else {
-read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
-}
-break;
-}
-default:
-g_assert_not_reached();
-}
-
-ofs = i * esize;
-if (ofs < 64) {
-tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
-tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
-} else {
-tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
-tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
-}
+switch (opcode) {
+case 1: /* UZP1/2 */
+gvec_fn = part ? tcg_gen_gvec_uzpo : tcg_gen_gvec_uzpe;
+break;
+case 2: /* TRN1/2 */
+gvec_fn = part ? tcg_gen_gvec_trno : tcg_gen_gvec_trne;
+break;
+case 3: /* ZIP1/2 */
+gvec_fn = part ? tcg_gen_gvec_ziph : tcg_gen_gvec_zipl;
+break;
+default:
+g_assert_not_reached();
 }
 
-tcg_temp_free_i64(tcg_res);
-
-write_vec_element(s, tcg_resl, rd, 0, MO_64);
-tcg_temp_free_i64(tcg_resl);
-write_vec_element(s, tcg_resh, rd, 1, MO_64);
-tcg_temp_free_i64(tcg_resh);
+gvec_fn(size, vec_full_reg_offset(s, rd),
+vec_full_reg_offset(s, rn),
+vec_full_reg_offset(s, rm),
+is_q ? 16 : 8, vec_full_reg_size(s));
 }
 
 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
@@ -7922,6 +7882,22 @@ static void handle_2misc_narrow(DisasContext *s, bool 
scalar,
 int destelt = is_q ? 2 : 0;
 int passes = scalar ? 1 : 2;
 
+if (opcode == 0x12 && !u) { /* XTN, XTN2 */
+tcg_debug_assert(!scalar);
+if (is_q) { /* XTN2 */
+tcg_gen_gvec_uzpe(size, vec_reg_offset(s, rd, 1, MO_64),
+  vec_reg_offset(s, rn, 0, MO_64),
+  vec_reg_offset(s, rn, 1, MO_64),
+  8, vec_full_reg_size(s) - 8);
+} else {
+tcg_gen_gvec_uzpe(size, vec_reg_offset(s, rd, 0, MO_64),
+  vec_reg_offset(s, rn, 0, MO_64),
+  vec_reg_offset(s, rn, 1, MO_64),
+  8, vec_full_reg_size(s));
+}
+return;
+}
+
 if (scalar) {
 tcg_res[1] = tcg_const_i32(0);
 }
@@ -7939,23 +7915,14 @@ static void handle_2misc_narrow(DisasContext *s, bool 
scalar,
 tcg_res[pass] = tcg_temp_new_i32();
 
 switch (opcode) {
-case 0x12: /* XTN, SQXTUN */
+case 0x12: /* , SQXTUN */
 {
-static NeonGenNarrowFn * const xtnfns[3] = {
-gen_helper_neon_narrow_u8,
-gen_helper_neon_narrow_u16,
-tcg_gen_extrl_i64_i32,
-};
 static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
 gen_helper_neon_unarrow_sat8,
 gen_helper_neon_unarrow_sat16,
 gen_helper_neon_unarrow_sat32,
 };
-if (u) {
-genenvfn = sqxtunfns[size];
-

[Qemu-devel] [PATCH v9 13/26] tcg: Add generic vector helpers with a scalar variable operand

2018-01-15 Thread Richard Henderson

Use dup to convert the scalar to a third vector.

Signed-off-by: Richard Henderson 
---
 accel/tcg/tcg-runtime.h  |   5 ++
 tcg/tcg-op-gvec.h|  28 ++
 accel/tcg/tcg-runtime-gvec.c |  48 ++
 tcg/tcg-op-gvec.c| 207 +++
 4 files changed, 288 insertions(+)

diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 30bb10f9f1..65d0c2ec3b 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -157,6 +157,11 @@ DEF_HELPER_FLAGS_4(gvec_sub16, TCG_CALL_NO_RWG, void, ptr, 
ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_sub32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_sub64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(gvec_subs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_subs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_subs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_subs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
 DEF_HELPER_FLAGS_4(gvec_mul8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_mul16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_mul32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/tcg/tcg-op-gvec.h b/tcg/tcg-op-gvec.h
index 1fbb94a0cd..0e8ba3d305 100644
--- a/tcg/tcg-op-gvec.h
+++ b/tcg/tcg-op-gvec.h
@@ -122,6 +122,27 @@ typedef struct {
 bool load_dest;
 } GVecGen2i;
 
+typedef struct {
+/* Expand inline as a 64-bit or 32-bit integer.
+   Only one of these will be non-NULL.  */
+void (*fni8)(TCGv_i64, TCGv_i64, TCGv_i64);
+void (*fni4)(TCGv_i32, TCGv_i32, TCGv_i32);
+/* Expand inline with a host vector type.  */
+void (*fniv)(unsigned, TCGv_vec, TCGv_vec, TCGv_vec);
+/* Expand out-of-line helper w/descriptor.  */
+gen_helper_gvec_2i *fno;
+/* The opcode, if any, to which this corresponds.  */
+TCGOpcode opc;
+/* The data argument to the out-of-line helper.  */
+uint32_t data;
+/* The vector element size, if applicable.  */
+uint8_t vece;
+/* Prefer i64 to v64.  */
+bool prefer_i64;
+/* Load scalar as 1st source operand.  */
+bool scalar_first;
+} GVecGen2s;
+
 typedef struct {
 /* Expand inline as a 64-bit or 32-bit integer.
Only one of these will be non-NULL.  */
@@ -166,6 +187,8 @@ void tcg_gen_gvec_2(uint32_t dofs, uint32_t aofs,
 uint32_t oprsz, uint32_t maxsz, const GVecGen2 *);
 void tcg_gen_gvec_2i(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
  uint32_t maxsz, int64_t c, const GVecGen2i *);
+void tcg_gen_gvec_2s(uint32_t dofs, uint32_t aofs, uint32_t oprsz,
+ uint32_t maxsz, TCGv_i64 c, const GVecGen2s *);
 void tcg_gen_gvec_3(uint32_t dofs, uint32_t aofs, uint32_t bofs,
 uint32_t oprsz, uint32_t maxsz, const GVecGen3 *);
 void tcg_gen_gvec_4(uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs,
@@ -192,6 +215,11 @@ void tcg_gen_gvec_addi(unsigned vece, uint32_t dofs, 
uint32_t aofs,
 void tcg_gen_gvec_muli(unsigned vece, uint32_t dofs, uint32_t aofs,
int64_t c, uint32_t oprsz, uint32_t maxsz);
 
+void tcg_gen_gvec_adds(unsigned vece, uint32_t dofs, uint32_t aofs,
+   TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_subs(unsigned vece, uint32_t dofs, uint32_t aofs,
+   TCGv_i64 c, uint32_t oprsz, uint32_t maxsz);
+
 /* Saturated arithmetic.  */
 void tcg_gen_gvec_ssadd(unsigned vece, uint32_t dofs, uint32_t aofs,
 uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index 378658124f..ec8695dd66 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -189,6 +189,54 @@ void HELPER(gvec_sub64)(void *d, void *a, void *b, 
uint32_t desc)
 clear_high(d, oprsz, desc);
 }
 
+void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+intptr_t oprsz = simd_oprsz(desc);
+vec8 vecb = (vec8){ b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b };
+intptr_t i;
+
+for (i = 0; i < oprsz; i += sizeof(vec8)) {
+*(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb;
+}
+clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+intptr_t oprsz = simd_oprsz(desc);
+vec16 vecb = (vec16){ b, b, b, b, b, b, b, b };
+intptr_t i;
+
+for (i = 0; i < oprsz; i += sizeof(vec16)) {
+*(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb;
+}
+clear_high(d, oprsz, desc);
+}
+
+void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
+{
+intptr_t oprsz = simd_oprsz(desc);
+vec32 vecb = (vec32){ b, b, b, b };
+intptr_t i;
+
+for (i = 0; i < oprsz; i += sizeof(vec32)) {
+*(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb;
+

[Qemu-devel] [PATCH v9 17/26] target/arm: Use vector infrastructure for aa64 mov/not/neg

2018-01-15 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c | 43 ++-
 1 file changed, 38 insertions(+), 5 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 572af456d1..bc14c28e71 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -85,6 +85,7 @@ typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
 typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 
 /* Note that the gvec expanders operate on offsets + sizes.  */
+typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
 typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
 uint32_t, uint32_t, uint32_t);
 
@@ -4579,14 +4580,19 @@ static void handle_fp_1src_double(DisasContext *s, int 
opcode, int rd, int rn)
 TCGv_i64 tcg_op;
 TCGv_i64 tcg_res;
 
+switch (opcode) {
+case 0x0: /* FMOV */
+tcg_gen_gvec_mov(0, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ 8, vec_full_reg_size(s));
+return;
+}
+
 fpst = get_fpstatus_ptr();
 tcg_op = read_fp_dreg(s, rn);
 tcg_res = tcg_temp_new_i64();
 
 switch (opcode) {
-case 0x0: /* FMOV */
-tcg_gen_mov_i64(tcg_res, tcg_op);
-break;
 case 0x1: /* FABS */
 gen_helper_vfp_absd(tcg_res, tcg_op);
 break;
@@ -9153,6 +9159,12 @@ static void disas_simd_3same_logic(DisasContext *s, 
uint32_t insn)
 gvec_fn = tcg_gen_gvec_andc;
 goto do_fn;
 case 2: /* ORR */
+if (rn == rm) { /* MOV */
+tcg_gen_gvec_mov(0, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ is_q ? 16 : 8, vec_full_reg_size(s));
+return;
+}
 gvec_fn = tcg_gen_gvec_or;
 goto do_fn;
 case 3: /* ORN */
@@ -10032,6 +10044,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, 
uint32_t insn)
 int rmode = -1;
 TCGv_i32 tcg_rmode;
 TCGv_ptr tcg_fpstatus;
+GVecGen2Fn *gvec_fn;
 
 switch (opcode) {
 case 0x0: /* REV64, REV32 */
@@ -10040,8 +10053,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, 
uint32_t insn)
 return;
 case 0x5: /* CNT, NOT, RBIT */
 if (u && size == 0) {
-/* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
-size = 3;
+/* NOT */
 break;
 } else if (u && size == 1) {
 /* RBIT */
@@ -10293,6 +10305,27 @@ static void disas_simd_two_reg_misc(DisasContext *s, 
uint32_t insn)
 tcg_rmode = NULL;
 }
 
+switch (opcode) {
+case 0x5:
+if (u && size == 0) { /* NOT */
+gvec_fn = tcg_gen_gvec_not;
+goto do_fn;
+}
+break;
+case 0xb:
+if (u) { /* NEG */
+gvec_fn = tcg_gen_gvec_neg;
+goto do_fn;
+}
+break;
+
+do_fn:
+gvec_fn(size, vec_full_reg_offset(s, rd),
+vec_full_reg_offset(s, rn),
+is_q ? 16 : 8, vec_full_reg_size(s));
+return;
+}
+
 if (size == 3) {
 /* All 64-bit element operations can be shared with scalar 2misc */
 int pass;
-- 
2.14.3

[Qemu-devel] [PATCH v9 11/26] tcg: Loosen vec_gen_op* typecheck rules

2018-01-15 Thread Richard Henderson

For ARM SVE with VQ=3, we want to be able to dup a scalar
into a v256, use that, and then perform a second operation
with the v256 punned to a v128.

Allow operands to a vector operation be wider than necessary
for the output.

Signed-off-by: Richard Henderson 
---
 tcg/tcg-op-vec.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
index a73d094ddb..ad9a45b653 100644
--- a/tcg/tcg-op-vec.c
+++ b/tcg/tcg-op-vec.c
@@ -78,7 +78,7 @@ static void vec_gen_op2(TCGOpcode opc, unsigned vece, 
TCGv_vec r, TCGv_vec a)
 TCGTemp *at = tcgv_vec_temp(a);
 TCGType type = rt->base_type;
 
-tcg_debug_assert(at->base_type == type);
+tcg_debug_assert(at->base_type >= type);
 vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
 }
 
@@ -90,8 +90,8 @@ static void vec_gen_op3(TCGOpcode opc, unsigned vece,
 TCGTemp *bt = tcgv_vec_temp(b);
 TCGType type = rt->base_type;
 
-tcg_debug_assert(at->base_type == type);
-tcg_debug_assert(bt->base_type == type);
+tcg_debug_assert(at->base_type >= type);
+tcg_debug_assert(bt->base_type >= type);
 vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
 }
 
@@ -257,14 +257,14 @@ void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, 
TCGv_i64 a)
 
 if (TCG_TARGET_REG_BITS == 64) {
 TCGArg ai = tcgv_i64_arg(a);
-vec_gen_2(INDEX_op_dup_vec, type, MO_64, ri, ai);
+vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 } else if (vece == MO_64) {
 TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
 TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
 vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
 } else {
 TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
-vec_gen_2(INDEX_op_dup_vec, type, MO_64, ri, ai);
+vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 }
 }
 
@@ -493,8 +493,8 @@ void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
 TCGType type = rt->base_type;
 int can;
 
-tcg_debug_assert(at->base_type == type);
-tcg_debug_assert(bt->base_type == type);
+tcg_debug_assert(at->base_type >= type);
+tcg_debug_assert(bt->base_type >= type);
 can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
 if (can > 0) {
 vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
@@ -515,8 +515,8 @@ void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, 
TCGv_vec b)
 TCGType type = rt->base_type;
 int can;
 
-tcg_debug_assert(at->base_type == type);
-tcg_debug_assert(bt->base_type == type);
+tcg_debug_assert(at->base_type >= type);
+tcg_debug_assert(bt->base_type >= type);
 can = tcg_can_emit_vec_op(INDEX_op_mul_vec, type, vece);
 if (can > 0) {
 vec_gen_3(INDEX_op_mul_vec, type, vece, ri, ai, bi);
-- 
2.14.3

[Qemu-devel] [PATCH v9 09/26] tcg: Add generic vector ops for extension

2018-01-15 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 accel/tcg/tcg-runtime.h  |   8 +++
 tcg/tcg-op-gvec.h|   9 +++
 tcg/tcg-op.h |   5 ++
 tcg/tcg-opc.h|   5 ++
 tcg/tcg.h|   2 +
 accel/tcg/tcg-runtime-gvec.c |  26 +
 tcg/tcg-op-gvec.c| 130 +++
 tcg/tcg-op-vec.c |  39 +
 tcg/tcg.c|   6 ++
 tcg/README   |  13 +
 10 files changed, 243 insertions(+)

diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index c4a2e6b215..d1b3542946 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -199,6 +199,14 @@ DEF_HELPER_FLAGS_4(gvec_trn16, TCG_CALL_NO_RWG, void, ptr, 
ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_trn32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_trn64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_3(gvec_extu8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_extu16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_extu32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(gvec_exts8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_exts16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(gvec_exts32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/tcg/tcg-op-gvec.h b/tcg/tcg-op-gvec.h
index 28ec0f260c..f716c53be0 100644
--- a/tcg/tcg-op-gvec.h
+++ b/tcg/tcg-op-gvec.h
@@ -222,6 +222,15 @@ void tcg_gen_gvec_trne(unsigned vece, uint32_t dofs, 
uint32_t aofs,
 void tcg_gen_gvec_trno(unsigned vece, uint32_t dofs, uint32_t aofs,
uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
 
+void tcg_gen_gvec_extul(unsigned vece, uint32_t dofs, uint32_t aofs,
+uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_extuh(unsigned vece, uint32_t dofs, uint32_t aofs,
+uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_extsl(unsigned vece, uint32_t dofs, uint32_t aofs,
+uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_extsh(unsigned vece, uint32_t dofs, uint32_t aofs,
+uint32_t oprsz, uint32_t maxsz);
+
 void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
   uint32_t aofs, uint32_t bofs,
   uint32_t oprsz, uint32_t maxsz);
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index f967790cd9..28a5cbe47a 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -940,6 +940,11 @@ void tcg_gen_uzpo_vec(unsigned vece, TCGv_vec r, TCGv_vec 
a, TCGv_vec b);
 void tcg_gen_trne_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
 void tcg_gen_trno_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b);
 
+void tcg_gen_extul_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
+void tcg_gen_extuh_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
+void tcg_gen_extsl_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
+void tcg_gen_extsh_vec(unsigned vece, TCGv_vec r, TCGv_vec a);
+
 void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, TCGv_vec r,
  TCGv_vec a, TCGv_vec b);
 
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index b21a30273c..3dfd872a0f 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -249,6 +249,11 @@ DEF(uzpo_vec, 1, 2, 0, IMPLVEC | 
IMPL(TCG_TARGET_HAS_uzp_vec))
 DEF(trne_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_trn_vec))
 DEF(trno_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_trn_vec))
 
+DEF(extul_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_extl_vec))
+DEF(extuh_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_exth_vec))
+DEF(extsl_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_extl_vec))
+DEF(extsh_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_exth_vec))
+
 DEF(cmp_vec, 1, 2, 1, IMPLVEC)
 
 DEF(last_generic, 0, 0, 0, TCG_OPF_NOT_PRESENT)
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 9ae7465d1e..f870a3f582 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -186,6 +186,8 @@ typedef uint64_t TCGRegSet;
 #define TCG_TARGET_HAS_trn_vec  0
 #define TCG_TARGET_HAS_cmp_vec  0
 #define TCG_TARGET_HAS_mul_vec  0
+#define TCG_TARGET_HAS_extl_vec 0
+#define TCG_TARGET_HAS_exth_vec 0
 #else
 #define TCG_TARGET_MAYBE_vec1
 #endif
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index 9406ccd769..ff26be0744 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -588,3 +588,29 @@ DO_CMP2(8)
 DO_CMP2(16)
 DO_CMP2(32)
 DO_CMP2(64)
+
+#define DO_EXT(NAME, TYPE1, TYPE2) \
+void HELPER(NAME)(void *d, void *a, uint32_t desc)   \
+{\
+intptr_t oprsz =

[Qemu-devel] [PATCH v9 12/26] tcg: Add generic vector helpers with a scalar immediate operand

2018-01-15 Thread Richard Henderson

We already have immediate shifts.  Add addition, multiplication,
and logical operations with an immediate.  Subtraction can thus
be done with negation of the constant.

Signed-off-by: Richard Henderson 
---
 accel/tcg/tcg-runtime.h  |  14 
 tcg/tcg-op-gvec.h|  22 -
 accel/tcg/tcg-runtime-gvec.c | 132 ++
 tcg/tcg-op-gvec.c| 186 ++-
 4 files changed, 352 insertions(+), 2 deletions(-)

diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index ec187a094b..30bb10f9f1 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -147,6 +147,11 @@ DEF_HELPER_FLAGS_4(gvec_add16, TCG_CALL_NO_RWG, void, ptr, 
ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_add32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_add64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(gvec_adds8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_adds16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_adds32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_adds64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
 DEF_HELPER_FLAGS_4(gvec_sub8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_sub16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_sub32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -157,6 +162,11 @@ DEF_HELPER_FLAGS_4(gvec_mul16, TCG_CALL_NO_RWG, void, ptr, 
ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_mul32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_mul64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(gvec_muls8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_muls16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_muls32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_muls64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
 DEF_HELPER_FLAGS_4(gvec_ssadd8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_ssadd16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_ssadd32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -189,6 +199,10 @@ DEF_HELPER_FLAGS_4(gvec_xor, TCG_CALL_NO_RWG, void, ptr, 
ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_andc, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_orc, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(gvec_andi, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_xori, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_ori, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
 DEF_HELPER_FLAGS_3(gvec_shl8i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 DEF_HELPER_FLAGS_3(gvec_shl16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 DEF_HELPER_FLAGS_3(gvec_shl32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
diff --git a/tcg/tcg-op-gvec.h b/tcg/tcg-op-gvec.h
index 98fdab22f6..1fbb94a0cd 100644
--- a/tcg/tcg-op-gvec.h
+++ b/tcg/tcg-op-gvec.h
@@ -35,6 +35,12 @@ void tcg_gen_gvec_2_ool(uint32_t dofs, uint32_t aofs,
 uint32_t oprsz, uint32_t maxsz, int32_t data,
 gen_helper_gvec_2 *fn);
 
+/* Similarly, passing an extra data value.  */
+typedef void gen_helper_gvec_2i(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
+void tcg_gen_gvec_2i_ool(uint32_t dofs, uint32_t aofs, TCGv_i64 c,
+ uint32_t oprsz, uint32_t maxsz, int32_t data,
+ gen_helper_gvec_2i *fn);
+
 /* Similarly, passing an extra pointer (e.g. env or float_status).  */
 typedef void gen_helper_gvec_2_ptr(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i32);
 void tcg_gen_gvec_2_ptr(uint32_t dofs, uint32_t aofs,
@@ -102,8 +108,10 @@ typedef struct {
 void (*fni4)(TCGv_i32, TCGv_i32, int32_t);
 /* Expand inline with a host vector type.  */
 void (*fniv)(unsigned, TCGv_vec, TCGv_vec, int64_t);
-/* Expand out-of-line helper w/descriptor.  */
+/* Expand out-of-line helper w/descriptor, data in descriptor.  */
 gen_helper_gvec_2 *fno;
+/* Expand out-of-line helper w/descriptor, data as argument.  */
+gen_helper_gvec_2i *fnoi;
 /* The opcode, if any, to which this corresponds.  */
 TCGOpcode opc;
 /* The vector element size, if applicable.  */
@@ -179,6 +187,11 @@ void tcg_gen_gvec_sub(unsigned vece, uint32_t dofs, 
uint32_t aofs,
 void tcg_gen_gvec_mul(unsigned vece, uint32_t dofs, uint32_t aofs,
   uint32_t bofs, uint32_t oprsz, uint32_t maxsz);
 
+void tcg_gen_gvec_addi(unsigned vece, uint32_t dofs, uint32_t aofs,
+   int64_t c, uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_muli(unsigned vece, uint32_t dofs, uint32_t aofs,
+   int64_t c, uint32_t oprsz, uint32_t maxsz);
+
 /* Saturated arithmetic.  */
 void tcg_gen_gvec_ssadd(unsigned vece, uint32_t dofs, uint32_t aofs,
 uint32_t bofs,

1 2 3 4 5 >

1 - 100 of 450 matches

Mail list logo