date:20170608

Re: [Qemu-devel] [PATCH] timer/aspeed: fix timer enablement when a reload is not set

2017-06-08 Thread Cédric Le Goater

On 06/09/2017 04:26 AM, Andrew Jeffery wrote:
> On Tue, 2017-06-06 at 10:55 +0200, Cédric Le Goater wrote:
>> When a timer is enabled before a reload value is set, the controller
>> waits for a reload value to be set before starting decrementing. This
>> fix tries to cover that case by changing the timer expiry only when
>> a reload value is valid.
>>
>>> Signed-off-by: Cédric Le Goater 
>> ---
>>  hw/timer/aspeed_timer.c | 37 +
>>  1 file changed, 29 insertions(+), 8 deletions(-)
>>
>> diff --git a/hw/timer/aspeed_timer.c b/hw/timer/aspeed_timer.c
>> index 9b70ee09b07f..50acbf530a3a 100644
>> --- a/hw/timer/aspeed_timer.c
>> +++ b/hw/timer/aspeed_timer.c
>> @@ -130,15 +130,26 @@ static uint64_t calculate_next(struct AspeedTimer *t)
>>  next = seq[1];
>>  } else if (now < seq[2]) {
>>  next = seq[2];
>> -} else {
>> +} else if (t->reload) {
>>  reload_ns = muldiv64(t->reload, NANOSECONDS_PER_SECOND, rate);
>>  t->start = now - ((now - t->start) % reload_ns);
>> +} else {
>> +/* no reload value, return 0 */
>> +break;
>>  }
>>  }
>>  
>>  return next;
>>  }
>>  
>> +static void aspeed_timer_mod(AspeedTimer *t)
>> +{
>> +uint64_t next = calculate_next(t);
>> +if (next) {
>> +timer_mod(>timer, next);
>> +}
>> +}
>> +
>>  static void aspeed_timer_expire(void *opaque)
>>  {
>>  AspeedTimer *t = opaque;
>> @@ -164,7 +175,7 @@ static void aspeed_timer_expire(void *opaque)
>>  qemu_set_irq(t->irq, t->level);
>>  }
>>  
>> -timer_mod(>timer, calculate_next(t));
>> +aspeed_timer_mod(t);
>>  }
>>  
>>  static uint64_t aspeed_timer_get_value(AspeedTimer *t, int reg)
>> @@ -227,10 +238,23 @@ static void 
>> aspeed_timer_set_value(AspeedTimerCtrlState *s, int timer, int reg,
>> uint32_t value)
>>  {
>>  AspeedTimer *t;
>> +uint32_t old_reload;
>>  
>>  trace_aspeed_timer_set_value(timer, reg, value);
>>  t = >timers[timer];
>>  switch (reg) {
>> +case TIMER_REG_RELOAD:
>> +old_reload = t->reload;
>> +t->reload = value;
>> +
>> +/* If the reload value was not previously set, or zero, and
>> + * the current value is valid, try to start the timer if it is
>> + * enabled.
>> + */
>> +if (old_reload || !t->reload) {
>> +break;
>> +}
> 
> Maybe I need more caffeine, but I initially struggled to reconcile the
> condition with the comment, as the condition checks the inverse in
> order to break while the comment discusses the non-breaking case. 

I agree. The reload "value" is used in a hidden way to the activate the 
timer.

> However, after trying for several minutes, I'm not sure there's an easy
> way to improve it.

I tried a few things. May be, we could move the following code in 
its own routine and call it twice ? 
 
>> +
>>  case TIMER_REG_STATUS:
>>  if (timer_enabled(t)) {
>>  uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
>> @@ -238,17 +262,14 @@ static void 
>> aspeed_timer_set_value(AspeedTimerCtrlState *s, int timer, int reg,
>>  uint32_t rate = calculate_rate(t);
>>  
>>  t->start += muldiv64(delta, NANOSECONDS_PER_SECOND, rate);
>> -timer_mod(>timer, calculate_next(t));
>> +aspeed_timer_mod(t);
>>  }
>>  break;
>> -case TIMER_REG_RELOAD:
>> -t->reload = value;
>> -break;
>>  case TIMER_REG_MATCH_FIRST:
>>  case TIMER_REG_MATCH_SECOND:
>>  t->match[reg - 2] = value;
>>  if (timer_enabled(t)) {
>> -timer_mod(>timer, calculate_next(t));
>> +aspeed_timer_mod(t);
>>  }
>>  break;
>>  default:
>> @@ -268,7 +289,7 @@ static void aspeed_timer_ctrl_enable(AspeedTimer *t, 
>> bool enable)
>>  trace_aspeed_timer_ctrl_enable(t->id, enable);
>>  if (enable) {
>>  t->start = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
>> -timer_mod(>timer, calculate_next(t));
>> +aspeed_timer_mod(t);
>>  } else {
>>  timer_del(>timer);
>>  }
> 
> Reviewed-by: Andrew Jeffery 

Thanks,

C.

Re: [Qemu-devel] [PATCH] target/sh4: optimize cross-page and indirect jumps

2017-06-08 Thread Richard Henderson


On 06/07/2017 02:20 PM, Aurelien Jarno wrote:

  if (ctx->singlestep_enabled)
  gen_helper_debug(cpu_env);
-tcg_gen_exit_tb(0);
+tcg_gen_lookup_and_goto_ptr(cpu_pc);


That really should be an else for that if, since helper_debug doesn't return.


r~

[Qemu-devel] [PATCH v5 6/7] tcg/arm: Try pc-relative addresses for movi

2017-06-08 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.inc.c | 44 +---
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index fce382f..18708b1 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -418,25 +418,39 @@ static inline void tcg_out_dat_imm(TCGContext *s,
 
 static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
 {
-int rot, opc, rn;
-
-/* For armv7, make sure not to use movw+movt when mov/mvn would do.
-   Speed things up by only checking when movt would be required.
-   Prior to armv7, have one go at fully rotated immediates before
-   doing the decomposition thing below.  */
-if (!use_armv7_instructions || (arg & 0x)) {
-rot = encode_imm(arg);
+int rot, opc, rn, diff;
+
+/* Check a single MOV/MVN before anything else.  */
+rot = encode_imm(arg);
+if (rot >= 0) {
+tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
+rotl(arg, rot) | (rot << 7));
+return;
+}
+rot = encode_imm(~arg);
+if (rot >= 0) {
+tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
+rotl(~arg, rot) | (rot << 7));
+return;
+}
+
+/* Check for a pc-relative address.  This will usually be the TB,
+   or within the TB, which is immediately before the code block.  */
+diff = arg - ((intptr_t)s->code_ptr + 8);
+if (diff >= 0) {
+rot = encode_imm(diff);
 if (rot >= 0) {
-tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
-rotl(arg, rot) | (rot << 7));
+tcg_out_dat_imm(s, cond, ARITH_ADD, rd, TCG_REG_PC,
+rotl(diff, rot) | (rot << 7));
 return;
-}
-rot = encode_imm(~arg);
+   }
+} else {
+rot = encode_imm(-diff);
 if (rot >= 0) {
-tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
-rotl(~arg, rot) | (rot << 7));
+tcg_out_dat_imm(s, cond, ARITH_SUB, rd, TCG_REG_PC,
+rotl(-diff, rot) | (rot << 7));
 return;
-}
+   }
 }
 
 /* Use movw + movt.  */
-- 
2.9.4

[Qemu-devel] [PATCH v5 1/7] util: add cacheinfo

2017-06-08 Thread Richard Henderson

From: "Emilio G. Cota" 

Add helpers to gather cache info from the host at init-time.

For now, only export the host's I/D cache line sizes, which we
will use to improve cache locality to avoid false sharing.

Suggested-by: Richard Henderson 
Suggested-by: Geert Martin Ijewski 
Tested-by:Geert Martin Ijewski 
Signed-off-by: Emilio G. Cota 
Message-Id: <1496794624-4083-1-git-send-email-c...@braap.org>
[rth: Move all implementations from tcg/ppc/]
Signed-off-by: Richard Henderson 
---
 include/qemu/osdep.h |   3 +
 tcg/ppc/tcg-target.inc.c |  71 +-
 util/Makefile.objs   |   1 +
 util/cacheinfo.c | 185 +++
 4 files changed, 191 insertions(+), 69 deletions(-)
 create mode 100644 util/cacheinfo.c

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 1c9f5e2..ee43521 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -470,4 +470,7 @@ char *qemu_get_pid_name(pid_t pid);
  */
 pid_t qemu_fork(Error **errp);
 
+extern int qemu_icache_linesize;
+extern int qemu_dcache_linesize;
+
 #endif
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index 8d50f18..1f690df 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -2820,14 +2820,11 @@ void tcg_register_jit(void *buf, size_t buf_size)
 }
 #endif /* __ELF__ */
 
-static size_t dcache_bsize = 16;
-static size_t icache_bsize = 16;
-
 void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
 uintptr_t p, start1, stop1;
-size_t dsize = dcache_bsize;
-size_t isize = icache_bsize;
+size_t dsize = qemu_dcache_linesize;
+size_t isize = qemu_icache_linesize;
 
 start1 = start & ~(dsize - 1);
 stop1 = (stop + dsize - 1) & ~(dsize - 1);
@@ -2844,67 +2841,3 @@ void flush_icache_range(uintptr_t start, uintptr_t stop)
 asm volatile ("sync" : : : "memory");
 asm volatile ("isync" : : : "memory");
 }
-
-#if defined _AIX
-#include 
-
-static void __attribute__((constructor)) tcg_cache_init(void)
-{
-icache_bsize = _system_configuration.icache_line;
-dcache_bsize = _system_configuration.dcache_line;
-}
-
-#elif defined __linux__
-static void __attribute__((constructor)) tcg_cache_init(void)
-{
-unsigned long dsize = qemu_getauxval(AT_DCACHEBSIZE);
-unsigned long isize = qemu_getauxval(AT_ICACHEBSIZE);
-
-if (dsize == 0 || isize == 0) {
-if (dsize == 0) {
-fprintf(stderr, "getauxval AT_DCACHEBSIZE failed\n");
-}
-if (isize == 0) {
-fprintf(stderr, "getauxval AT_ICACHEBSIZE failed\n");
-}
-exit(1);
-}
-dcache_bsize = dsize;
-icache_bsize = isize;
-}
-
-#elif defined __APPLE__
-#include 
-
-static void __attribute__((constructor)) tcg_cache_init(void)
-{
-size_t len;
-unsigned cacheline;
-int name[2] = { CTL_HW, HW_CACHELINE };
-
-len = sizeof(cacheline);
-if (sysctl(name, 2, , , NULL, 0)) {
-perror("sysctl CTL_HW HW_CACHELINE failed");
-exit(1);
-}
-dcache_bsize = cacheline;
-icache_bsize = cacheline;
-}
-
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-#include 
-
-static void __attribute__((constructor)) tcg_cache_init(void)
-{
-size_t len = 4;
-unsigned cacheline;
-
-if (sysctlbyname ("machdep.cacheline_size", , , NULL, 0)) {
-fprintf(stderr, "sysctlbyname machdep.cacheline_size failed: %s\n",
-strerror(errno));
-exit(1);
-}
-dcache_bsize = cacheline;
-icache_bsize = cacheline;
-}
-#endif
diff --git a/util/Makefile.objs b/util/Makefile.objs
index c6205eb..94d9477 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -20,6 +20,7 @@ util-obj-y += host-utils.o
 util-obj-y += bitmap.o bitops.o hbitmap.o
 util-obj-y += fifo8.o
 util-obj-y += acl.o
+util-obj-y += cacheinfo.o
 util-obj-y += error.o qemu-error.o
 util-obj-y += id.o
 util-obj-y += iov.o qemu-config.o qemu-sockets.o uri.o notify.o
diff --git a/util/cacheinfo.c b/util/cacheinfo.c
new file mode 100644
index 000..f987522
--- /dev/null
+++ b/util/cacheinfo.c
@@ -0,0 +1,185 @@
+/*
+ * cacheinfo.c - helpers to query the host about its caches
+ *
+ * Copyright (C) 2017, Emilio G. Cota 
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+int qemu_icache_linesize = 0;
+int qemu_dcache_linesize = 0;
+
+/*
+ * Operating system specific detection mechanisms.
+ */
+
+#if defined(_AIX)
+# include 
+
+static void sys_cache_info(int *isize, int *dsize)
+{
+*isize = _system_configuration.icache_line;
+*dsize = _system_configuration.dcache_line;
+}
+
+#elif defined(_WIN32)
+
+static void sys_cache_info(int *isize, int *dsize)
+{
+SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf;
+DWORD size = 0;
+BOOL success;
+size_t i, n;
+
+/* Check

[Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate

2017-06-08 Thread Richard Henderson

This is a follow-up to Emilio's patch set.

My primary changes to Emilio's patches are to the first patch, in
merging the existing implementations from tcg/ppc/tcg-target.inc.c
into util/cacheinfo.c.

Then I've a few follow-up patches to take advantage of the new TB
placement for arm platforms.  I've had a look at the asm output for
ppc64 and s390x, and don't see anything obvious that can be improved.

Changes since v4:
  * The first patch reorganized a bit for aarch64 and ppc64.
Re-tested on win32, for which there was a Werror.
Incorporated feedback from Emilio re MacOS.
  * Fixed the short description for the tcg/arm patches.


r~


Emilio G. Cota (2):
  util: add cacheinfo
  tcg: allocate TB structs before the corresponding translated code

Richard Henderson (5):
  tcg/aarch64: Use ADR in tcg_out_movi
  tcg/arm: Use indirect branch for goto_tb
  tcg/arm: Remove limit on code buffer size
  tcg/arm: Try pc-relative addresses for movi
  tcg/arm: Use ldr (literal) for goto_tb

 include/exec/exec-all.h  |   5 +-
 include/exec/tb-context.h|   3 +-
 include/qemu/osdep.h |   3 +
 tcg/aarch64/tcg-target.inc.c |   7 +-
 tcg/arm/tcg-target.inc.c |  82 +++
 tcg/ppc/tcg-target.inc.c |  71 +
 tcg/tcg.c|  20 +
 tcg/tcg.h|   2 +-
 translate-all.c  |  41 ++
 util/Makefile.objs   |   1 +
 util/cacheinfo.c | 185 +++
 11 files changed, 293 insertions(+), 127 deletions(-)
 create mode 100644 util/cacheinfo.c

-- 
2.9.4

[Qemu-devel] [PATCH v5 7/7] tcg/arm: Use ldr (literal) for goto_tb

2017-06-08 Thread Richard Henderson

The new placement of the TB means that we can use one insn
to load the goto_tb destination directly from the TB.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.inc.c | 23 ++-
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 18708b1..b640fb9 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -1669,14 +1669,27 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 }
 break;
 case INDEX_op_goto_tb:
-tcg_debug_assert(s->tb_jmp_insn_offset == 0);
 {
 /* Indirect jump method */
-intptr_t ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
-tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
-tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff);
+intptr_t ptr, dif, dil;
+TCGReg base = TCG_REG_PC;
+
+tcg_debug_assert(s->tb_jmp_insn_offset == 0);
+ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
+dif = ptr - ((intptr_t)s->code_ptr + 8);
+dil = sextract32(dif, 0, 12);
+if (dif != dil) {
+/* The TB is close, but outside the 12 bits addressable by
+   the load.  We can extend this to 20 bits with a sub of a
+   shifted immediate from pc.  In the vastly unlikely event
+   the code requires more than 1MB, we'll use 2 insns and
+   be no worse off.  */
+base = TCG_REG_R0;
+tcg_out_movi32(s, COND_AL, base, ptr - dil);
+}
+tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil);
+s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
 }
-s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
 break;
 case INDEX_op_goto_ptr:
 tcg_out_bx(s, COND_AL, args[0]);
-- 
2.9.4

Re: [Qemu-devel] [PATCH v4 1/6] xics: introduce macros for ICP/ICS link properties

2017-06-08 Thread Cédric Le Goater

On 06/09/2017 04:10 AM, David Gibson wrote:
> On Thu, Jun 08, 2017 at 07:26:35PM +0200, Cédric Le Goater wrote:
>> On 06/08/2017 07:00 PM, Greg Kurz wrote:
>>> On Thu, 8 Jun 2017 18:08:44 +0200
>>> Cédric Le Goater  wrote:
>>>
>>> FWIW, other people do that as well (see hw/i386/pc_q35.c for example).  
>>>   
>>
>> well, I don't see the benefits of changing a string constant by a 
>> define. 
>>  
>
> Improved semantics,  especially since the "xics" string appears in 
> many places with different meanings.   

 ah ? If so, we should do a cleanup up. The code seems consistent from 
 what I can see. xics is a general name for :

'PowerPC interrupt controller (type 2)' 

 and it is mostly used as a prefix. There are no "xics" object, only a 
>>>
>>> I'm only talking about "xics" as a property name actually:
>>>
>>> $ git grep '"xics"'
>>> hw/intc/xics.c:obj = object_property_get_link(OBJECT(dev), "xics", 
>>> );
>>> hw/intc/xics.c:obj = object_property_get_link(OBJECT(dev), "xics", 
>>> );
>>> hw/ppc/pnv.c:object_property_add_const_link(OBJECT(>psi), "xics",
>>> hw/ppc/pnv.c:object_property_add_const_link(OBJECT(pnv_core), 
>>> "xics",
>>> hw/ppc/pnv_core.c:object_property_add_const_link(obj, "xics", 
>>> OBJECT(xi), _abort);
>>> hw/ppc/pnv_core.c:xi = object_property_get_link(OBJECT(dev), "xics", 
>>> _err);
>>> hw/ppc/pnv_psi.c:obj = object_property_get_link(OBJECT(dev), "xics", 
>>> );
>>> hw/ppc/pnv_psi.c:object_property_add_const_link(OBJECT(ics), "xics", 
>>> obj,  _abort);
>>> hw/ppc/spapr.c:object_property_add_const_link(obj, "xics", 
>>> OBJECT(spapr), _abort);
>>> hw/ppc/spapr_cpu_core.c:object_property_add_const_link(obj, "xics", 
>>> OBJECT(spapr), _abort);
>>>
>>> You have to read the code to know which ones are related.
>>
>> The "xics" property link always point to the same object : 
>> the XICSFabric object which is the machine, spapr or pnv. 
>>
>>> With this patch applied, it is mostly obvious, even for the newbie:
>>
>> ah. the goal is to know where in the code the link was set. 
>> It can be even more complex with aliases.
> 
> There doesn't seem to be a strong convention about whether to use raw
> property names or defines across qemu.  I'm not all that fussed either
> way.
> 
> I do see one small advantage to use defines: if you make a typo, it
> will probably result in a compile time error, whereas with a bare
> string it won't show up until a runtime error.

ok. I can agree with that.

> In this case, I intend to take the macro patch, mostly just on the
> basis of avoiding further delays to rework the remaining patches.

But I don't think having two different defines is a good idea : 

+#define ICP_PROP_XICS "xics"
+#define ICS_PROP_XICS "xics"

C.

[Qemu-devel] [PATCH v5 3/7] tcg/aarch64: Use ADR in tcg_out_movi

2017-06-08 Thread Richard Henderson

The new placement of the TB means that we can use one insn
to load the return value for exit_tb returning the TB pointer.

Tested-by: Emilio G. Cota 
Signed-off-by: Richard Henderson 
---
 tcg/aarch64/tcg-target.inc.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 5f18545..1fa3bcc 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -616,7 +616,12 @@ static void tcg_out_movi(TCGContext *s, TCGType type, 
TCGReg rd,
 /* Look for host pointer values within 4G of the PC.  This happens
often when loading pointers to QEMU's own data structures.  */
 if (type == TCG_TYPE_I64) {
-tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
+tcg_target_long disp = value - (intptr_t)s->code_ptr;
+if (disp == sextract64(disp, 0, 21)) {
+tcg_out_insn(s, 3406, ADR, rd, disp);
+return;
+}
+disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
 if (disp == sextract64(disp, 0, 21)) {
 tcg_out_insn(s, 3406, ADRP, rd, disp);
 if (value & 0xfff) {
-- 
2.9.4

[Qemu-devel] [PATCH v5 5/7] tcg/arm: Remove limit on code buffer size

2017-06-08 Thread Richard Henderson

Since we're no longer using a direct branch, we have no
limit on the branch distance.

Signed-off-by: Richard Henderson 
---
 translate-all.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/translate-all.c b/translate-all.c
index bb094ad..966747a 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -523,8 +523,6 @@ static inline PageDesc *page_find(tb_page_addr_t index)
 # define MAX_CODE_GEN_BUFFER_SIZE  (32u * 1024 * 1024)
 #elif defined(__aarch64__)
 # define MAX_CODE_GEN_BUFFER_SIZE  (128ul * 1024 * 1024)
-#elif defined(__arm__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
 #elif defined(__s390x__)
   /* We have a +- 4GB range on the branches; leave some slop.  */
 # define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
-- 
2.9.4

[Qemu-devel] [PULL 11/20] spapr: Rework DRC name handling

2017-06-08 Thread David Gibson

DRC objects have a get_name method which returns the DRC name generated
when the DRC is created.  Replace that with a fixed spapr_drc_name()
function which generates the name on the fly from other information.  This
means:
  * We get rid of a method with only one implementation, and only local
callers
  * We don't have to carry the name string around for the lifetime of the
DRC
  * We use information added to the class structure to generate the name
in standard format, so we don't need an explicit switch on drc type
any more

We also eliminate the 'name' property; it's basically useless since the
only information in it can easily be deduced from other things.

Signed-off-by: David Gibson 
Reviewed-by: Michael Roth 
Acked-by: Michael Roth 
---
 hw/ppc/spapr_drc.c | 80 +-
 include/hw/ppc/spapr_drc.h |  3 +-
 2 files changed, 30 insertions(+), 53 deletions(-)

diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 82c0b94..15ef67d 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -155,9 +155,32 @@ static uint32_t set_allocation_state(sPAPRDRConnector *drc,
 return RTAS_OUT_SUCCESS;
 }
 
-static const char *get_name(sPAPRDRConnector *drc)
+static const char *spapr_drc_name(sPAPRDRConnector *drc)
 {
-return drc->name;
+sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+/* human-readable name for a DRC to encode into the DT
+ * description. this is mainly only used within a guest in place
+ * of the unique DRC index.
+ *
+ * in the case of VIO/PCI devices, it corresponds to a "location
+ * code" that maps a logical device/function (DRC index) to a
+ * physical (or virtual in the case of VIO) location in the system
+ * by chaining together the "location label" for each
+ * encapsulating component.
+ *
+ * since this is more to do with diagnosing physical hardware
+ * issues than guest compatibility, we choose location codes/DRC
+ * names that adhere to the documented format, but avoid encoding
+ * the entire topology information into the label/code, instead
+ * just using the location codes based on the labels for the
+ * endpoints (VIO/PCI adaptor connectors), which is basically just
+ * "C" followed by an integer ID.
+ *
+ * DRC names as documented by PAPR+ v2.7, 13.5.2.4
+ * location codes as documented by PAPR+ v2.7, 12.3.1.5
+ */
+return g_strdup_printf("%s%d", drck->drc_name_prefix, drc->id);
 }
 
 /* has the guest been notified of device attachment? */
@@ -202,13 +225,6 @@ static void prop_get_index(Object *obj, Visitor *v, const 
char *name,
 visit_type_uint32(v, name, , errp);
 }
 
-static char *prop_get_name(Object *obj, Error **errp)
-{
-sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj);
-sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-return g_strdup(drck->get_name(drc));
-}
-
 static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
  void *opaque, Error **errp)
 {
@@ -567,45 +583,6 @@ sPAPRDRConnector *spapr_dr_connector_new(Object *owner, 
const char *type,
 object_property_set_bool(OBJECT(drc), true, "realized", NULL);
 g_free(prop_name);
 
-/* human-readable name for a DRC to encode into the DT
- * description. this is mainly only used within a guest in place
- * of the unique DRC index.
- *
- * in the case of VIO/PCI devices, it corresponds to a
- * "location code" that maps a logical device/function (DRC index)
- * to a physical (or virtual in the case of VIO) location in the
- * system by chaining together the "location label" for each
- * encapsulating component.
- *
- * since this is more to do with diagnosing physical hardware
- * issues than guest compatibility, we choose location codes/DRC
- * names that adhere to the documented format, but avoid encoding
- * the entire topology information into the label/code, instead
- * just using the location codes based on the labels for the
- * endpoints (VIO/PCI adaptor connectors), which is basically
- * just "C" followed by an integer ID.
- *
- * DRC names as documented by PAPR+ v2.7, 13.5.2.4
- * location codes as documented by PAPR+ v2.7, 12.3.1.5
- */
-switch (spapr_drc_type(drc)) {
-case SPAPR_DR_CONNECTOR_TYPE_CPU:
-drc->name = g_strdup_printf("CPU %d", id);
-break;
-case SPAPR_DR_CONNECTOR_TYPE_PHB:
-drc->name = g_strdup_printf("PHB %d", id);
-break;
-case SPAPR_DR_CONNECTOR_TYPE_VIO:
-case SPAPR_DR_CONNECTOR_TYPE_PCI:
-drc->name = g_strdup_printf("C%d", id);
-break;
-case SPAPR_DR_CONNECTOR_TYPE_LMB:
-drc->name = g_strdup_printf("LMB %d", id);
-break;
-default:
-g_assert(false);
-}
-
 /* PCI slot

[Qemu-devel] [PULL 16/20] xics: introduce macros for ICP/ICS link properties

2017-06-08 Thread David Gibson

From: Greg Kurz 

These properties are part of the XICS API. They deserve to appear
explicitely in the XICS header file.

Signed-off-by: Greg Kurz 
Signed-off-by: David Gibson 
---
 hw/intc/xics.c  | 8 
 hw/ppc/pnv_core.c   | 3 ++-
 hw/ppc/pnv_psi.c| 3 ++-
 hw/ppc/spapr.c  | 3 ++-
 hw/ppc/spapr_cpu_core.c | 3 ++-
 include/hw/ppc/xics.h   | 4 
 6 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index ec73f02..aa2c4e7 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -346,9 +346,9 @@ static void icp_realize(DeviceState *dev, Error **errp)
 Object *obj;
 Error *err = NULL;
 
-obj = object_property_get_link(OBJECT(dev), "xics", );
+obj = object_property_get_link(OBJECT(dev), ICP_PROP_XICS, );
 if (!obj) {
-error_setg(errp, "%s: required link 'xics' not found: %s",
+error_setg(errp, "%s: required link '" ICP_PROP_XICS "' not found: %s",
__func__, error_get_pretty(err));
 return;
 }
@@ -654,9 +654,9 @@ static void ics_base_realize(DeviceState *dev, Error **errp)
 Object *obj;
 Error *err = NULL;
 
-obj = object_property_get_link(OBJECT(dev), "xics", );
+obj = object_property_get_link(OBJECT(dev), ICS_PROP_XICS, );
 if (!obj) {
-error_setg(errp, "%s: required link 'xics' not found: %s",
+error_setg(errp, "%s: required link '" ICS_PROP_XICS "' not found: %s",
__func__, error_get_pretty(err));
 return;
 }
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index e8a9a94..0b6e729 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -121,7 +121,8 @@ static void pnv_core_realize_child(Object *child, 
XICSFabric *xi, Error **errp)
 obj = object_new(TYPE_PNV_ICP);
 object_property_add_child(OBJECT(cpu), "icp", obj, _abort);
 object_unref(obj);
-object_property_add_const_link(obj, "xics", OBJECT(xi), _abort);
+object_property_add_const_link(obj, ICP_PROP_XICS, OBJECT(xi),
+   _abort);
 object_property_set_bool(obj, true, "realized", _err);
 if (local_err) {
 error_propagate(errp, local_err);
diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c
index 2bf5bfe..9876c26 100644
--- a/hw/ppc/pnv_psi.c
+++ b/hw/ppc/pnv_psi.c
@@ -474,7 +474,8 @@ static void pnv_psi_realize(DeviceState *dev, Error **errp)
 }
 
 /* Create PSI interrupt control source */
-object_property_add_const_link(OBJECT(ics), "xics", obj,  _abort);
+object_property_add_const_link(OBJECT(ics), ICS_PROP_XICS, obj,
+   _abort);
 object_property_set_int(OBJECT(ics), PSI_NUM_INTERRUPTS, "nr-irqs", );
 if (err) {
 error_propagate(errp, err);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 01dda9e..b2951d7 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -107,7 +107,8 @@ static ICSState *spapr_ics_create(sPAPRMachineState *spapr,
 
 obj = object_new(type_ics);
 object_property_add_child(OBJECT(spapr), "ics", obj, _abort);
-object_property_add_const_link(obj, "xics", OBJECT(spapr), _abort);
+object_property_add_const_link(obj, ICS_PROP_XICS, OBJECT(spapr),
+   _abort);
 object_property_set_int(obj, nr_irqs, "nr-irqs", _err);
 if (local_err) {
 goto error;
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 029a141..e81879c 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -145,7 +145,8 @@ static void spapr_cpu_core_realize_child(Object *child, 
Error **errp)
 obj = object_new(spapr->icp_type);
 object_property_add_child(OBJECT(cpu), "icp", obj, _abort);
 object_unref(obj);
-object_property_add_const_link(obj, "xics", OBJECT(spapr), _abort);
+object_property_add_const_link(obj, ICP_PROP_XICS, OBJECT(spapr),
+   _abort);
 object_property_set_bool(obj, true, "realized", _err);
 if (local_err) {
 goto error;
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 40a506e..3114532 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -86,6 +86,8 @@ struct ICPState {
 XICSFabric *xics;
 };
 
+#define ICP_PROP_XICS "xics"
+
 struct PnvICPState {
 ICPState parent_obj;
 
@@ -130,6 +132,8 @@ struct ICSState {
 XICSFabric *xics;
 };
 
+#define ICS_PROP_XICS "xics"
+
 static inline bool ics_valid_irq(ICSState *ics, uint32_t nr)
 {
 return (ics->offset != 0) && (nr >= ics->offset)
-- 
2.9.4

[Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate

2017-06-08 Thread Richard Henderson

This is a follow-up to Emilio's patch set.

My primary changes to Emilio's patches are to the first patch, in
merging the existing implementations from tcg/ppc/tcg-target.inc.c
into util/cacheinfo.c.

Then I've a few follow-up patches to take advantage of the new TB
placement for arm platforms.  I've had a look at the asm output for
ppc64 and s390x, and don't see anything obvious that can be improved.

Changes since v4:
  * The first patch reorganized a bit for aarch64 and ppc64.
Re-tested on win32, for which there was a Werror.
Incorporated feedback from Emilio re MacOS.
  * Fixed the short description for the tcg/arm patches.


r~


Emilio G. Cota (2):
  util: add cacheinfo
  tcg: allocate TB structs before the corresponding translated code

Richard Henderson (5):
  tcg/aarch64: Use ADR in tcg_out_movi
  tcg/arm: Use indirect branch for goto_tb
  tcg/arm: Remove limit on code buffer size
  tcg/arm: Try pc-relative addresses for movi
  tcg/arm: Use ldr (literal) for goto_tb

 include/exec/exec-all.h  |   5 +-
 include/exec/tb-context.h|   3 +-
 include/qemu/osdep.h |   3 +
 tcg/aarch64/tcg-target.inc.c |   7 +-
 tcg/arm/tcg-target.inc.c |  82 +++
 tcg/ppc/tcg-target.inc.c |  71 +
 tcg/tcg.c|  20 +
 tcg/tcg.h|   2 +-
 translate-all.c  |  41 ++
 util/Makefile.objs   |   1 +
 util/cacheinfo.c | 185 +++
 11 files changed, 293 insertions(+), 127 deletions(-)
 create mode 100644 util/cacheinfo.c

-- 
2.9.4

[Qemu-devel] [PATCH v5 4/7] tcg/arm: Use indirect branch for goto_tb

2017-06-08 Thread Richard Henderson

Signed-off-by: Richard Henderson 
---
 include/exec/exec-all.h  |  5 +
 tcg/arm/tcg-target.inc.c | 17 ++---
 2 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 87ae10b..724ec73 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -301,7 +301,7 @@ static inline void 
tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
 #define CODE_GEN_AVG_BLOCK_SIZE 150
 #endif
 
-#if defined(__arm__) || defined(_ARCH_PPC) \
+#if defined(_ARCH_PPC) \
 || defined(__x86_64__) || defined(__i386__) \
 || defined(__sparc__) || defined(__aarch64__) \
 || defined(__s390x__) || defined(__mips__) \
@@ -401,9 +401,6 @@ static inline void tb_set_jmp_target1(uintptr_t jmp_addr, 
uintptr_t addr)
 #elif defined(__aarch64__)
 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr);
 #define tb_set_jmp_target1 aarch64_tb_set_jmp_target
-#elif defined(__arm__)
-void arm_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr);
-#define tb_set_jmp_target1 arm_tb_set_jmp_target
 #elif defined(__sparc__) || defined(__mips__)
 void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr);
 #else
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 9f5cb66..fce382f 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -1026,16 +1026,6 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit 
*addr)
 }
 }
 
-void arm_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
-{
-tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
-tcg_insn_unit *target = (tcg_insn_unit *)addr;
-
-/* we could use a ldr pc, [pc, #-4] kind of branch and avoid the flush */
-reloc_pc24_atomic(code_ptr, target);
-flush_icache_range(jmp_addr, jmp_addr + 4);
-}
-
 static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
 {
 if (l->has_value) {
@@ -1665,11 +1655,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 }
 break;
 case INDEX_op_goto_tb:
-if (s->tb_jmp_insn_offset) {
-/* Direct jump method */
-s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
-tcg_out_b_noaddr(s, COND_AL);
-} else {
+tcg_debug_assert(s->tb_jmp_insn_offset == 0);
+{
 /* Indirect jump method */
 intptr_t ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
 tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
-- 
2.9.4

[Qemu-devel] [PULL 14/20] hw/ppc/spapr: Adjust firmware name for PCI bridges

2017-06-08 Thread David Gibson

From: Thomas Huth 

SLOF uses "pci" as name for PCI bridges nodes in the device tree instead
of "pci-bridges", so booting via bootindex from a device behind a PCI
bridge currently does not work since QEMU passes the wrong name in the
"qemu,boot-list" property. Fix it by changing the name of the PCI bridge
nodes to "pci" instead.

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1459170
Signed-off-by: Thomas Huth 
Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index e340ff3..01dda9e 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2441,6 +2441,12 @@ static char *spapr_get_fw_dev_path(FWPathProvider *p, 
BusState *bus,
 return g_strdup_printf("disk@%"PRIX64, (uint64_t)id << 32);
 }
 
+if (g_str_equal("pci-bridge", qdev_fw_name(dev))) {
+/* SLOF uses "pci" instead of "pci-bridge" for PCI bridges */
+PCIDevice *pcidev = CAST(PCIDevice, dev, TYPE_PCI_DEVICE);
+return g_strdup_printf("pci@%x", PCI_SLOT(pcidev->devfn));
+}
+
 return NULL;
 }
 
-- 
2.9.4

[Qemu-devel] [PULL 17/20] xics: pass appropriate types to realize() handlers.

2017-06-08 Thread David Gibson

From: Greg Kurz 

It makes more sense to pass an IPCState * to handlers of ICPStateClass
instead of a DeviceState *, if only to benefit from compile time type
checking. The same goes with ICSStateClass.

While here, we also change the declaration of ICPStateClass in xics.h
for consistency.

Signed-off-by: Greg Kurz 
Signed-off-by: David Gibson 
---
 hw/intc/xics.c| 10 --
 hw/intc/xics_kvm.c|  6 ++
 hw/intc/xics_pnv.c|  6 +++---
 include/hw/ppc/xics.h |  8 
 4 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index aa2c4e7..f74a96e 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -356,7 +356,7 @@ static void icp_realize(DeviceState *dev, Error **errp)
 icp->xics = XICS_FABRIC(obj);
 
 if (icpc->realize) {
-icpc->realize(dev, errp);
+icpc->realize(icp, errp);
 }
 
 qemu_register_reset(icp_reset, dev);
@@ -606,10 +606,8 @@ static void ics_simple_initfn(Object *obj)
 ics->offset = XICS_IRQ_BASE;
 }
 
-static void ics_simple_realize(DeviceState *dev, Error **errp)
+static void ics_simple_realize(ICSState *ics, Error **errp)
 {
-ICSState *ics = ICS_SIMPLE(dev);
-
 if (!ics->nr_irqs) {
 error_setg(errp, "Number of interrupts needs to be greater 0");
 return;
@@ -617,7 +615,7 @@ static void ics_simple_realize(DeviceState *dev, Error 
**errp)
 ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
 ics->qirqs = qemu_allocate_irqs(ics_simple_set_irq, ics, ics->nr_irqs);
 
-qemu_register_reset(ics_simple_reset, dev);
+qemu_register_reset(ics_simple_reset, ics);
 }
 
 static Property ics_simple_properties[] = {
@@ -664,7 +662,7 @@ static void ics_base_realize(DeviceState *dev, Error **errp)
 
 
 if (icsc->realize) {
-icsc->realize(dev, errp);
+icsc->realize(ics, errp);
 }
 }
 
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 45bf110..41c5b94 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -328,10 +328,8 @@ static void ics_kvm_reset(void *dev)
 ics_set_kvm_state(ics, 1);
 }
 
-static void ics_kvm_realize(DeviceState *dev, Error **errp)
+static void ics_kvm_realize(ICSState *ics, Error **errp)
 {
-ICSState *ics = ICS_SIMPLE(dev);
-
 if (!ics->nr_irqs) {
 error_setg(errp, "Number of interrupts needs to be greater 0");
 return;
@@ -339,7 +337,7 @@ static void ics_kvm_realize(DeviceState *dev, Error **errp)
 ics->irqs = g_malloc0(ics->nr_irqs * sizeof(ICSIRQState));
 ics->qirqs = qemu_allocate_irqs(ics_kvm_set_irq, ics, ics->nr_irqs);
 
-qemu_register_reset(ics_kvm_reset, dev);
+qemu_register_reset(ics_kvm_reset, ics);
 }
 
 static void ics_kvm_class_init(ObjectClass *klass, void *data)
diff --git a/hw/intc/xics_pnv.c b/hw/intc/xics_pnv.c
index 12ae605..2a955a8 100644
--- a/hw/intc/xics_pnv.c
+++ b/hw/intc/xics_pnv.c
@@ -159,11 +159,11 @@ static const MemoryRegionOps pnv_icp_ops = {
 },
 };
 
-static void pnv_icp_realize(DeviceState *dev, Error **errp)
+static void pnv_icp_realize(ICPState *icp, Error **errp)
 {
-PnvICPState *icp = PNV_ICP(dev);
+PnvICPState *pnv_icp = PNV_ICP(icp);
 
-memory_region_init_io(>mmio, OBJECT(dev), _icp_ops,
+memory_region_init_io(_icp->mmio, OBJECT(icp), _icp_ops,
   icp, "icp-thread", 0x1000);
 }
 
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 3114532..797df82 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -65,9 +65,9 @@ typedef struct XICSFabric XICSFabric;
 struct ICPStateClass {
 DeviceClass parent_class;
 
-void (*realize)(DeviceState *dev, Error **errp);
-void (*pre_save)(ICPState *s);
-int (*post_load)(ICPState *s, int version_id);
+void (*realize)(ICPState *icp, Error **errp);
+void (*pre_save)(ICPState *icp);
+int (*post_load)(ICPState *icp, int version_id);
 void (*cpu_setup)(ICPState *icp, PowerPCCPU *cpu);
 void (*reset)(ICPState *icp);
 };
@@ -113,7 +113,7 @@ struct PnvICPState {
 struct ICSStateClass {
 DeviceClass parent_class;
 
-void (*realize)(DeviceState *dev, Error **errp);
+void (*realize)(ICSState *s, Error **errp);
 void (*pre_save)(ICSState *s);
 int (*post_load)(ICSState *s, int version_id);
 void (*reject)(ICSState *s, uint32_t irq);
-- 
2.9.4

[Qemu-devel] [PATCH 1/3] Guess L1 cache linesize for aarch64

2017-06-08 Thread Richard Henderson

Using the cache hierarchy linesize minimum in CTR_EL0.
See the comment within the code for rationale.

* sysdeps/unix/sysv/linux/aarch64/sysconf.c: New file.

Cc: Marcus Shawcroft 
---
 sysdeps/unix/sysv/linux/aarch64/sysconf.c | 55 +++
 1 file changed, 55 insertions(+)
 create mode 100644 sysdeps/unix/sysv/linux/aarch64/sysconf.c

diff --git a/sysdeps/unix/sysv/linux/aarch64/sysconf.c 
b/sysdeps/unix/sysv/linux/aarch64/sysconf.c
new file mode 100644
index 000..30608dd
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/aarch64/sysconf.c
@@ -0,0 +1,55 @@
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   .  */
+
+#include 
+#include 
+#include 
+#include 
+
+
+static long int linux_sysconf (int name);
+
+/* Get the value of the system variable NAME.  */
+long int
+__sysconf (int name)
+{
+  unsigned ctr;
+
+  /* Unfortunately, the registers that contain the actual cache info
+ (CCSIDR_EL1, CLIDR_EL1, and CSSELR_EL1) are protected by the Linux
+ kernel (though they need not have been).  However, CTR_EL0 contains
+ the *minimum* linesize in the entire cache hierarchy, and is
+ accessible to userland, for use in __aarch64_sync_cache_range,
+ and it is a reasonable assumption that the L1 cache will have that
+ minimum line size.  */
+  switch (name)
+{
+case _SC_LEVEL1_ICACHE_LINESIZE:
+  asm("mrs\t%0, ctr_el0" : "=r"(ctr));
+  return 4 << (ctr & 0xf);
+case _SC_LEVEL1_DCACHE_LINESIZE:
+  asm("mrs\t%0, ctr_el0" : "=r"(ctr));
+  return 4 << ((ctr >> 16) & 0xf);
+}
+
+  return linux_sysconf (name);
+}
+
+/* Now the generic Linux version.  */
+#undef __sysconf
+#define __sysconf static linux_sysconf
+#include 
-- 
2.9.4

[Qemu-devel] [PATCH v5 2/7] tcg: allocate TB structs before the corresponding translated code

2017-06-08 Thread Richard Henderson

From: "Emilio G. Cota" 

Allocating an arbitrarily-sized array of tbs results in either
(a) a lot of memory wasted or (b) unnecessary flushes of the code
cache when we run out of TB structs in the array.

An obvious solution would be to just malloc a TB struct when needed,
and keep the TB array as an array of pointers (recall that tb_find_pc()
needs the TB array to run in O(log n)).

Perhaps a better solution, which is implemented in this patch, is to
allocate TB's right before the translated code they describe. This
results in some memory waste due to padding to have code and TBs in
separate cache lines--for instance, I measured 4.7% of padding in the
used portion of code_gen_buffer when booting aarch64 Linux on a
host with 64-byte cache lines. However, it can allow for optimizations
in some host architectures, since TCG backends could safely assume that
the TB and the corresponding translated code are very close to each
other in memory. See this message by rth for a detailed explanation:

  https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg05172.html
  Subject: Re: GSoC 2017 Proposal: TCG performance enhancements
  Message-ID: <1e67644b-4b30-887e-d329-1848e94c9...@twiddle.net>

Suggested-by: Richard Henderson 
Reviewed-by: Pranith Kumar 
Signed-off-by: Emilio G. Cota 
Message-Id: <1496790745-314-3-git-send-email-c...@braap.org>
[rth: Simplify the arithmetic in tcg_tb_alloc]
Signed-off-by: Richard Henderson 
---
 include/exec/tb-context.h |  3 ++-
 tcg/tcg.c | 20 
 tcg/tcg.h |  2 +-
 translate-all.c   | 39 ---
 4 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
index c7f17f2..25c2afe 100644
--- a/include/exec/tb-context.h
+++ b/include/exec/tb-context.h
@@ -31,8 +31,9 @@ typedef struct TBContext TBContext;
 
 struct TBContext {
 
-TranslationBlock *tbs;
+TranslationBlock **tbs;
 struct qht htable;
+size_t tbs_size;
 int nb_tbs;
 /* any access to the tbs or the page table must use this lock */
 QemuMutex tb_lock;
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 564292f..3559829 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -383,6 +383,26 @@ void tcg_context_init(TCGContext *s)
 }
 }
 
+/*
+ * Allocate TBs right before their corresponding translated code, making
+ * sure that TBs and code are on different cache lines.
+ */
+TranslationBlock *tcg_tb_alloc(TCGContext *s)
+{
+uintptr_t align = qemu_icache_linesize;
+TranslationBlock *tb;
+void *next;
+
+tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
+next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
+
+if (unlikely(next > s->code_gen_highwater)) {
+return NULL;
+}
+s->code_gen_ptr = next;
+return tb;
+}
+
 void tcg_prologue_init(TCGContext *s)
 {
 size_t prologue_size, total_size;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 5ec48d1..9e37722 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -697,7 +697,6 @@ struct TCGContext {
here, because there's too much arithmetic throughout that relies
on addition and subtraction working on bytes.  Rely on the GCC
extension that allows arithmetic on void*.  */
-int code_gen_max_blocks;
 void *code_gen_prologue;
 void *code_gen_epilogue;
 void *code_gen_buffer;
@@ -756,6 +755,7 @@ static inline bool tcg_op_buf_full(void)
 /* tb_lock must be held for tcg_malloc_internal. */
 void *tcg_malloc_internal(TCGContext *s, int size);
 void tcg_pool_reset(TCGContext *s);
+TranslationBlock *tcg_tb_alloc(TCGContext *s);
 
 void tb_lock(void);
 void tb_unlock(void);
diff --git a/translate-all.c b/translate-all.c
index b3ee876..bb094ad 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -781,12 +781,13 @@ static inline void code_gen_alloc(size_t tb_size)
 exit(1);
 }
 
-/* Estimate a good size for the number of TBs we can support.  We
-   still haven't deducted the prologue from the buffer size here,
-   but that's minimal and won't affect the estimate much.  */
-tcg_ctx.code_gen_max_blocks
-= tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
-tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks);
+/* size this conservatively -- realloc later if needed */
+tcg_ctx.tb_ctx.tbs_size =
+tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE / 8;
+if (unlikely(!tcg_ctx.tb_ctx.tbs_size)) {
+tcg_ctx.tb_ctx.tbs_size = 64 * 1024;
+}
+tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock *, tcg_ctx.tb_ctx.tbs_size);
 
 qemu_mutex_init(_ctx.tb_ctx.tb_lock);
 }
@@ -828,13 +829,20 @@ bool tcg_enabled(void)
 static TranslationBlock *tb_alloc(target_ulong pc)
 {
 TranslationBlock *tb;
+TBContext *ctx;
 
 assert_tb_locked();
 
-if (tcg_ctx.tb_ctx.nb_tbs >=

[Qemu-devel] [PULL 18/20] xics: setup cpu at realize time

2017-06-08 Thread David Gibson

From: Greg Kurz 

Until recently, spapr used to allocate ICPState objects for the lifetime
of the machine. They would only be associated to vCPUs in xics_cpu_setup()
when plugging a CPU core.

Now that ICPState objects have the same lifecycle as vCPUs, it is
possible to associate them during realization.

This patch hence open-codes xics_cpu_setup() in icp_realize(). The vCPU
is passed as a property. Note that vCPU now needs to be realized first
for the IRQs to be allocated. It also needs to resetted before ICPState
realization in order to synchronize with KVM.

Since ICPState objects are freed when unrealized, xics_cpu_destroy() isn't
needed anymore and can be safely dropped.

Signed-off-by: Greg Kurz 
Reviewed-by: Cédric Le Goater 
Signed-off-by: David Gibson 
---
 hw/intc/xics.c  | 76 +
 hw/ppc/pnv_core.c   | 18 ++--
 hw/ppc/spapr_cpu_core.c | 23 +++
 include/hw/ppc/xics.h   |  3 +-
 4 files changed, 51 insertions(+), 69 deletions(-)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index f74a96e..fdbfddf 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -38,50 +38,6 @@
 #include "monitor/monitor.h"
 #include "hw/intc/intc.h"
 
-void xics_cpu_destroy(XICSFabric *xi, PowerPCCPU *cpu)
-{
-CPUState *cs = CPU(cpu);
-ICPState *icp = ICP(cpu->intc);
-
-assert(icp);
-assert(cs == icp->cs);
-
-icp->output = NULL;
-icp->cs = NULL;
-}
-
-void xics_cpu_setup(XICSFabric *xi, PowerPCCPU *cpu, ICPState *icp)
-{
-CPUState *cs = CPU(cpu);
-CPUPPCState *env = >env;
-ICPStateClass *icpc;
-
-assert(icp);
-
-cpu->intc = OBJECT(icp);
-icp->cs = cs;
-
-icpc = ICP_GET_CLASS(icp);
-if (icpc->cpu_setup) {
-icpc->cpu_setup(icp, cpu);
-}
-
-switch (PPC_INPUT(env)) {
-case PPC_FLAGS_INPUT_POWER7:
-icp->output = env->irq_inputs[POWER7_INPUT_INT];
-break;
-
-case PPC_FLAGS_INPUT_970:
-icp->output = env->irq_inputs[PPC970_INPUT_INT];
-break;
-
-default:
-error_report("XICS interrupt controller does not support this CPU "
- "bus model");
-abort();
-}
-}
-
 void icp_pic_print_info(ICPState *icp, Monitor *mon)
 {
 int cpu_index = icp->cs ? icp->cs->cpu_index : -1;
@@ -343,6 +299,8 @@ static void icp_realize(DeviceState *dev, Error **errp)
 {
 ICPState *icp = ICP(dev);
 ICPStateClass *icpc = ICP_GET_CLASS(dev);
+PowerPCCPU *cpu;
+CPUPPCState *env;
 Object *obj;
 Error *err = NULL;
 
@@ -355,6 +313,36 @@ static void icp_realize(DeviceState *dev, Error **errp)
 
 icp->xics = XICS_FABRIC(obj);
 
+obj = object_property_get_link(OBJECT(dev), ICP_PROP_CPU, );
+if (!obj) {
+error_setg(errp, "%s: required link '" ICP_PROP_CPU "' not found: %s",
+   __func__, error_get_pretty(err));
+return;
+}
+
+cpu = POWERPC_CPU(obj);
+cpu->intc = OBJECT(icp);
+icp->cs = CPU(obj);
+
+if (icpc->cpu_setup) {
+icpc->cpu_setup(icp, cpu);
+}
+
+env = >env;
+switch (PPC_INPUT(env)) {
+case PPC_FLAGS_INPUT_POWER7:
+icp->output = env->irq_inputs[POWER7_INPUT_INT];
+break;
+
+case PPC_FLAGS_INPUT_970:
+icp->output = env->irq_inputs[PPC970_INPUT_INT];
+break;
+
+default:
+error_setg(errp, "XICS interrupt controller does not support this CPU 
bus model");
+return;
+}
+
 if (icpc->realize) {
 icpc->realize(icp, errp);
 }
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 0b6e729..c7b00b6 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -118,20 +118,20 @@ static void pnv_core_realize_child(Object *child, 
XICSFabric *xi, Error **errp)
 PowerPCCPU *cpu = POWERPC_CPU(cs);
 Object *obj;
 
-obj = object_new(TYPE_PNV_ICP);
-object_property_add_child(OBJECT(cpu), "icp", obj, _abort);
-object_unref(obj);
-object_property_add_const_link(obj, ICP_PROP_XICS, OBJECT(xi),
-   _abort);
-object_property_set_bool(obj, true, "realized", _err);
+object_property_set_bool(child, true, "realized", _err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
 }
 
-object_property_set_bool(child, true, "realized", _err);
+obj = object_new(TYPE_PNV_ICP);
+object_property_add_child(child, "icp", obj, NULL);
+object_unref(obj);
+object_property_add_const_link(obj, ICP_PROP_XICS, OBJECT(xi),
+   _abort);
+object_property_add_const_link(obj, ICP_PROP_CPU, child, _abort);
+object_property_set_bool(obj, true, "realized", _err);
 if (local_err) {
-object_unparent(obj);
 error_propagate(errp, local_err);
 return;
 }
@@ -142,8 +142,6 @@ static void pnv_core_realize_child(Object *child, 
XICSFabric *xi, Error

[Qemu-devel] [PULL 13/20] xics: add reset() handler to ICPStateClass

2017-06-08 Thread David Gibson

From: Greg Kurz 

Taking into account that qemu_set_irq() returns immediatly if its first
argument is NULL, icp_kvm_reset() largely duplicates icp_reset().

This patch introduces a reset() handler, so that the common logic can
be implemented in icp_reset() only.

While there we can also drop icp_kvm_realize() and icp_kvm_unrealize(). This
causes icp-kvm to be realized in icp_realize(), which sets icp->xics, but
it has no impact.

Signed-off-by: Greg Kurz 
Reviewed-by: Cédric Le Goater 
Signed-off-by: David Gibson 
---
 hw/intc/xics.c|  5 +
 hw/intc/xics_kvm.c| 27 ++-
 include/hw/ppc/xics.h |  1 +
 3 files changed, 8 insertions(+), 25 deletions(-)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index ea35167..ec73f02 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -325,6 +325,7 @@ static const VMStateDescription vmstate_icp_server = {
 static void icp_reset(void *dev)
 {
 ICPState *icp = ICP(dev);
+ICPStateClass *icpc = ICP_GET_CLASS(icp);
 
 icp->xirr = 0;
 icp->pending_priority = 0xff;
@@ -332,6 +333,10 @@ static void icp_reset(void *dev)
 
 /* Make all outputs are deasserted */
 qemu_set_irq(icp->output, 0);
+
+if (icpc->reset) {
+icpc->reset(icp);
+}
 }
 
 static void icp_realize(DeviceState *dev, Error **errp)
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 14b8f6f..45bf110 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -110,19 +110,8 @@ static int icp_set_kvm_state(ICPState *icp, int version_id)
 return 0;
 }
 
-static void icp_kvm_reset(void *dev)
+static void icp_kvm_reset(ICPState *icp)
 {
-ICPState *icp = ICP(dev);
-
-icp->xirr = 0;
-icp->pending_priority = 0xff;
-icp->mfrr = 0xff;
-
-/* Make all outputs as deasserted only if the CPU thread is in use */
-if (icp->output) {
-qemu_set_irq(icp->output, 0);
-}
-
 icp_set_kvm_state(icp, 1);
 }
 
@@ -159,26 +148,14 @@ static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU 
*cpu)
 QLIST_INSERT_HEAD(_enabled_icps, enabled_icp, node);
 }
 
-static void icp_kvm_realize(DeviceState *dev, Error **errp)
-{
-qemu_register_reset(icp_kvm_reset, dev);
-}
-
-static void icp_kvm_unrealize(DeviceState *dev, Error **errp)
-{
-qemu_unregister_reset(icp_kvm_reset, dev);
-}
-
 static void icp_kvm_class_init(ObjectClass *klass, void *data)
 {
-DeviceClass *dc = DEVICE_CLASS(klass);
 ICPStateClass *icpc = ICP_CLASS(klass);
 
-dc->realize = icp_kvm_realize;
-dc->unrealize = icp_kvm_unrealize;
 icpc->pre_save = icp_get_kvm_state;
 icpc->post_load = icp_set_kvm_state;
 icpc->cpu_setup = icp_kvm_cpu_setup;
+icpc->reset = icp_kvm_reset;
 }
 
 static const TypeInfo icp_kvm_info = {
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index a3073f9..40a506e 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -69,6 +69,7 @@ struct ICPStateClass {
 void (*pre_save)(ICPState *s);
 int (*post_load)(ICPState *s, int version_id);
 void (*cpu_setup)(ICPState *icp, PowerPCCPU *cpu);
+void (*reset)(ICPState *icp);
 };
 
 struct ICPState {
-- 
2.9.4

[Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate

2017-06-08 Thread Richard Henderson

This is a follow-up to Emilio's patch set.

My primary changes to Emilio's patches are to the first patch, in
merging the existing implementations from tcg/ppc/tcg-target.inc.c
into util/cacheinfo.c.

Then I've a few follow-up patches to take advantage of the new TB
placement for arm platforms.  I've had a look at the asm output for
ppc64 and s390x, and don't see anything obvious that can be improved.

Changes since v4:
  * The first patch reorganized a bit for aarch64 and ppc64.
Re-tested on win32, for which there was a Werror.
Incorporated feedback from Emilio re MacOS.
  * Fixed the short description for the tcg/arm patches.


r~


Emilio G. Cota (2):
  util: add cacheinfo
  tcg: allocate TB structs before the corresponding translated code

Richard Henderson (5):
  tcg/aarch64: Use ADR in tcg_out_movi
  tcg/arm: Use indirect branch for goto_tb
  tcg/arm: Remove limit on code buffer size
  tcg/arm: Try pc-relative addresses for movi
  tcg/arm: Use ldr (literal) for goto_tb

 include/exec/exec-all.h  |   5 +-
 include/exec/tb-context.h|   3 +-
 include/qemu/osdep.h |   3 +
 tcg/aarch64/tcg-target.inc.c |   7 +-
 tcg/arm/tcg-target.inc.c |  82 +++
 tcg/ppc/tcg-target.inc.c |  71 +
 tcg/tcg.c|  20 +
 tcg/tcg.h|   2 +-
 translate-all.c  |  41 ++
 util/Makefile.objs   |   1 +
 util/cacheinfo.c | 185 +++
 11 files changed, 293 insertions(+), 127 deletions(-)
 create mode 100644 util/cacheinfo.c

-- 
2.9.4

[Qemu-devel] [PULL 15/20] hw/cpu: core.c can be compiled as common object

2017-06-08 Thread David Gibson

From: Thomas Huth 

There does not seem to be any target specific code in core.c, so we can
put it into "common-obj" instead of "obj" to compile it only once for
all targets.

Signed-off-by: Thomas Huth 
Reviewed-by: Juan Quintela 
Signed-off-by: David Gibson 
---
 hw/cpu/Makefile.objs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs
index 942a4bb..cd52d20 100644
--- a/hw/cpu/Makefile.objs
+++ b/hw/cpu/Makefile.objs
@@ -2,5 +2,4 @@ obj-$(CONFIG_ARM11MPCORE) += arm11mpcore.o
 obj-$(CONFIG_REALVIEW) += realview_mpcore.o
 obj-$(CONFIG_A9MPCORE) += a9mpcore.o
 obj-$(CONFIG_A15MPCORE) += a15mpcore.o
-obj-y += core.o
-
+common-obj-y += core.o
-- 
2.9.4

[Qemu-devel] [PULL 20/20] Revert "spapr: fix memory hot-unplugging"

2017-06-08 Thread David Gibson

From: Laurent Vivier 

This reverts commit fe6824d12642b005c69123ecf8631f9b13553f8b.

Conflicts hw/ppc/spapr_drc.c, because get_index() has been renamed
spapr_get_index().

This didn't fix the problem. Once the hotplug has been started
some memory is allocated and some structures are allocated.
We don't free it when we ignore the unplug, and we can't because
they can be in use by the kernel.

Signed-off-by: Laurent Vivier 
Tested-by: Daniel Barboza 
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_drc.c | 20 +++-
 include/hw/ppc/spapr_drc.h |  1 -
 2 files changed, 3 insertions(+), 18 deletions(-)

diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 15ef67d..5cb75bb 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -128,17 +128,6 @@ static uint32_t set_allocation_state(sPAPRDRConnector *drc,
 if (!drc->dev) {
 return RTAS_OUT_NO_SUCH_INDICATOR;
 }
-if (drc->awaiting_release && drc->awaiting_allocation) {
-/* kernel is acknowledging a previous hotplug event
- * while we are already removing it.
- * it's safe to ignore awaiting_allocation here since we know the
- * situation is predicated on the guest either already having done
- * so (boot-time hotplug), or never being able to acquire in the
- * first place (hotplug followed by immediate unplug).
- */
-drc->awaiting_allocation_skippable = true;
-return RTAS_OUT_NO_SUCH_INDICATOR;
-}
 }
 
 if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
@@ -391,11 +380,9 @@ void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState 
*d, Error **errp)
 }
 
 if (drc->awaiting_allocation) {
-if (!drc->awaiting_allocation_skippable) {
-drc->awaiting_release = true;
-trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc));
-return;
-}
+drc->awaiting_release = true;
+trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc));
+return;
 }
 
 drc->dr_indicator = SPAPR_DR_INDICATOR_INACTIVE;
@@ -418,7 +405,6 @@ void spapr_drc_detach(sPAPRDRConnector *drc, DeviceState 
*d, Error **errp)
 }
 
 drc->awaiting_release = false;
-drc->awaiting_allocation_skippable = false;
 g_free(drc->fdt);
 drc->fdt = NULL;
 drc->fdt_start_offset = 0;
diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
index c487123..bc9f988 100644
--- a/include/hw/ppc/spapr_drc.h
+++ b/include/hw/ppc/spapr_drc.h
@@ -201,7 +201,6 @@ typedef struct sPAPRDRConnector {
 bool awaiting_release;
 bool signalled;
 bool awaiting_allocation;
-bool awaiting_allocation_skippable;
 
 /* device pointer, via link property */
 DeviceState *dev;
-- 
2.9.4

[Qemu-devel] [PULL 19/20] xics: drop ICPStateClass::cpu_setup() handler

2017-06-08 Thread David Gibson

From: Greg Kurz 

The cpu_setup() handler is only implemented by xics_kvm, where it really
does a typical "realize" job. Moreover, the realize() handler is called
shortly after cpu_setup(), on the same path.

This patch converts xics_kvm to implement realize() instead of cpu_setup().

Signed-off-by: Greg Kurz 
Reviewed-by: Cédric Le Goater 
Signed-off-by: David Gibson 
---
 hw/intc/xics.c|  4 
 hw/intc/xics_kvm.c| 12 ++--
 include/hw/ppc/xics.h |  1 -
 3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/hw/intc/xics.c b/hw/intc/xics.c
index fdbfddf..7ccfb53 100644
--- a/hw/intc/xics.c
+++ b/hw/intc/xics.c
@@ -324,10 +324,6 @@ static void icp_realize(DeviceState *dev, Error **errp)
 cpu->intc = OBJECT(icp);
 icp->cs = CPU(obj);
 
-if (icpc->cpu_setup) {
-icpc->cpu_setup(icp, cpu);
-}
-
 env = >env;
 switch (PPC_INPUT(env)) {
 case PPC_FLAGS_INPUT_POWER7:
diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
index 41c5b94..3091ad3 100644
--- a/hw/intc/xics_kvm.c
+++ b/hw/intc/xics_kvm.c
@@ -115,9 +115,9 @@ static void icp_kvm_reset(ICPState *icp)
 icp_set_kvm_state(icp, 1);
 }
 
-static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU *cpu)
+static void icp_kvm_realize(ICPState *icp, Error **errp)
 {
-CPUState *cs = CPU(cpu);
+CPUState *cs = icp->cs;
 KVMEnabledICP *enabled_icp;
 unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
 int ret;
@@ -139,9 +139,9 @@ static void icp_kvm_cpu_setup(ICPState *icp, PowerPCCPU 
*cpu)
 
 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_IRQ_XICS, 0, kernel_xics_fd, 
vcpu_id);
 if (ret < 0) {
-error_report("Unable to connect CPU%ld to kernel XICS: %s", vcpu_id,
- strerror(errno));
-exit(1);
+error_setg(errp, "Unable to connect CPU%ld to kernel XICS: %s", 
vcpu_id,
+   strerror(errno));
+return;
 }
 enabled_icp = g_malloc(sizeof(*enabled_icp));
 enabled_icp->vcpu_id = vcpu_id;
@@ -154,7 +154,7 @@ static void icp_kvm_class_init(ObjectClass *klass, void 
*data)
 
 icpc->pre_save = icp_get_kvm_state;
 icpc->post_load = icp_set_kvm_state;
-icpc->cpu_setup = icp_kvm_cpu_setup;
+icpc->realize = icp_kvm_realize;
 icpc->reset = icp_kvm_reset;
 }
 
diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
index 37b8fb1..28d248a 100644
--- a/include/hw/ppc/xics.h
+++ b/include/hw/ppc/xics.h
@@ -68,7 +68,6 @@ struct ICPStateClass {
 void (*realize)(ICPState *icp, Error **errp);
 void (*pre_save)(ICPState *icp);
 int (*post_load)(ICPState *icp, int version_id);
-void (*cpu_setup)(ICPState *icp, PowerPCCPU *cpu);
 void (*reset)(ICPState *icp);
 };
 
-- 
2.9.4

[Qemu-devel] [PULL 04/20] pseries: Correct panic behaviour for pseries machine type

2017-06-08 Thread David Gibson

The pseries machine type doesn't usually use the 'pvpanic' device as such,
because it has a firmware/hypervisor facility with roughly the same
purpose.  The 'ibm,os-term' RTAS call notifies the hypervisor that the
guest has crashed.

Our implementation of this call was sending a GUEST_PANICKED qmp event;
however, it was not doing the other usual panic actions, making its
behaviour different from pvpanic for no good reason.

To correct this, we should call qemu_system_guest_panicked() rather than
directly sending the panic event.

Signed-off-by: David Gibson 
Reviewed-by: Thomas Huth 
---
 hw/ppc/spapr_rtas.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 707c4d4..94a2799 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -293,12 +293,9 @@ static void rtas_ibm_os_term(PowerPCCPU *cpu,
 target_ulong args,
 uint32_t nret, target_ulong rets)
 {
-target_ulong ret = 0;
+qemu_system_guest_panicked(NULL);
 
-qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, false, NULL,
-   _abort);
-
-rtas_st(rets, 0, ret);
+rtas_st(rets, 0, RTAS_OUT_SUCCESS);
 }
 
 static void rtas_set_power_level(PowerPCCPU *cpu, sPAPRMachineState *spapr,
-- 
2.9.4

[Qemu-devel] [PULL 07/20] spapr: Clean up RTAS set-indicator

2017-06-08 Thread David Gibson

In theory the RTAS set-indicator call can be used for a number of
"indicators" defined by PAPR.  In practice the only ones we're ever likely
to implement are those used for Dynamic Reconfiguration (i.e. hotplug).
Because of this, the current implementation determines the associated DRC
object, before dispatching based on the type of indicator.

However, this means we also need a check that we're dealing with a DR
related indicator at all, which duplicates some of the logic from the
switch further down.

Even though it means a bit of code duplication, things work out cleaner if
we delegate the DRC lookup to the individual indicator type functions -
and it also allows some further cleanups.

While we're there, remove references to "sensor", a copy/paste artefact
from the related, but distinct "get-sensor" call.

Signed-off-by: David Gibson 
Reviewed-by: Michael Roth 
Acked-by: Michael Roth 
---
 hw/ppc/spapr_drc.c  | 84 -
 hw/ppc/trace-events |  2 --
 2 files changed, 44 insertions(+), 42 deletions(-)

diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index cabeacf..f0cff0b 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -903,74 +903,78 @@ out:
  * RTAS calls
  */
 
-static bool sensor_type_is_dr(uint32_t sensor_type)
+static uint32_t rtas_set_isolation_state(uint32_t idx, uint32_t state)
 {
-switch (sensor_type) {
-case RTAS_SENSOR_TYPE_ISOLATION_STATE:
-case RTAS_SENSOR_TYPE_DR:
-case RTAS_SENSOR_TYPE_ALLOCATION_STATE:
-return true;
+sPAPRDRConnector *drc = spapr_drc_by_index(idx);
+sPAPRDRConnectorClass *drck;
+
+if (!drc) {
+return RTAS_OUT_PARAM_ERROR;
 }
 
-return false;
+drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+return drck->set_isolation_state(drc, state);
 }
 
-static void rtas_set_indicator(PowerPCCPU *cpu, sPAPRMachineState *spapr,
-   uint32_t token, uint32_t nargs,
-   target_ulong args, uint32_t nret,
-   target_ulong rets)
+static uint32_t rtas_set_allocation_state(uint32_t idx, uint32_t state)
 {
-uint32_t sensor_type;
-uint32_t sensor_index;
-uint32_t sensor_state;
-uint32_t ret = RTAS_OUT_SUCCESS;
-sPAPRDRConnector *drc;
+sPAPRDRConnector *drc = spapr_drc_by_index(idx);
 sPAPRDRConnectorClass *drck;
 
-if (nargs != 3 || nret != 1) {
-ret = RTAS_OUT_PARAM_ERROR;
-goto out;
+if (!drc) {
+return RTAS_OUT_PARAM_ERROR;
 }
 
-sensor_type = rtas_ld(args, 0);
-sensor_index = rtas_ld(args, 1);
-sensor_state = rtas_ld(args, 2);
+drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+return drck->set_allocation_state(drc, state);
+}
 
-if (!sensor_type_is_dr(sensor_type)) {
-goto out_unimplemented;
-}
+static uint32_t rtas_set_indicator_state(uint32_t idx, uint32_t state)
+{
+sPAPRDRConnector *drc = spapr_drc_by_index(idx);
+sPAPRDRConnectorClass *drck;
 
-/* if this is a DR sensor we can assume sensor_index == drc_index */
-drc = spapr_drc_by_index(sensor_index);
 if (!drc) {
-trace_spapr_rtas_set_indicator_invalid(sensor_index);
+return RTAS_OUT_PARAM_ERROR;
+}
+
+drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+return drck->set_indicator_state(drc, state);
+}
+
+static void rtas_set_indicator(PowerPCCPU *cpu, sPAPRMachineState *spapr,
+   uint32_t token,
+   uint32_t nargs, target_ulong args,
+   uint32_t nret, target_ulong rets)
+{
+uint32_t type, idx, state;
+uint32_t ret = RTAS_OUT_SUCCESS;
+
+if (nargs != 3 || nret != 1) {
 ret = RTAS_OUT_PARAM_ERROR;
 goto out;
 }
-drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
 
-switch (sensor_type) {
+type = rtas_ld(args, 0);
+idx = rtas_ld(args, 1);
+state = rtas_ld(args, 2);
+
+switch (type) {
 case RTAS_SENSOR_TYPE_ISOLATION_STATE:
-ret = drck->set_isolation_state(drc, sensor_state);
+ret = rtas_set_isolation_state(idx, state);
 break;
 case RTAS_SENSOR_TYPE_DR:
-ret = drck->set_indicator_state(drc, sensor_state);
+ret = rtas_set_indicator_state(idx, state);
 break;
 case RTAS_SENSOR_TYPE_ALLOCATION_STATE:
-ret = drck->set_allocation_state(drc, sensor_state);
+ret = rtas_set_allocation_state(idx, state);
 break;
 default:
-goto out_unimplemented;
+ret = RTAS_OUT_NOT_SUPPORTED;
 }
 
 out:
 rtas_st(rets, 0, ret);
-return;
-
-out_unimplemented:
-/* currently only DR-related sensors are implemented */
-trace_spapr_rtas_set_indicator_not_supported(sensor_index, sensor_type);
-rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
 }
 
 static void rtas_get_sensor_state(PowerPCCPU *cpu,

[Qemu-devel] [PULL 05/20] spapr: Clean up DR entity sense handling

2017-06-08 Thread David Gibson

DRC classes have an entity_sense method to determine (in a specific PAPR
sense) the presence or absence of a device plugged into a DRC.  However,
we only have one implementation of the method, which explicitly tests for
different DRC types.  This changes it to instead have different method
implementations for the two cases: "logical" and "physical" DRCs.

While we're at it, the entity sense method always returns RTAS_OUT_SUCCESS,
and the interesting value is returned via pass-by-reference.  Simplify this
to directly return the value we care about

Signed-off-by: David Gibson 
Reviewed-by: Michael Roth 
Acked-by: Michael Roth 
---
 hw/ppc/spapr_drc.c | 72 ++
 hw/ppc/spapr_pci.c |  6 ++--
 hw/ppc/trace-events|  1 -
 include/hw/ppc/spapr_drc.h |  4 +--
 4 files changed, 40 insertions(+), 43 deletions(-)

diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 39e7f30..cabeacf 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -185,39 +185,25 @@ static void set_signalled(sPAPRDRConnector *drc)
  * based on the current allocation/indicator/power states
  * for the DR connector.
  */
-static uint32_t entity_sense(sPAPRDRConnector *drc, sPAPRDREntitySense *state)
+static sPAPRDREntitySense physical_entity_sense(sPAPRDRConnector *drc)
 {
-if (drc->dev) {
-if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI &&
-drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
-/* for logical DR, we return a state of UNUSABLE
- * iff the allocation state UNUSABLE.
- * Otherwise, report the state as USABLE/PRESENT,
- * as we would for PCI.
- */
-*state = SPAPR_DR_ENTITY_SENSE_UNUSABLE;
-} else {
-/* this assumes all PCI devices are assigned to
- * a 'live insertion' power domain, where QEMU
- * manages power state automatically as opposed
- * to the guest. present, non-PCI resources are
- * unaffected by power state.
- */
-*state = SPAPR_DR_ENTITY_SENSE_PRESENT;
-}
+/* this assumes all PCI devices are assigned to a 'live insertion'
+ * power domain, where QEMU manages power state automatically as
+ * opposed to the guest. present, non-PCI resources are unaffected
+ * by power state.
+ */
+return drc->dev ? SPAPR_DR_ENTITY_SENSE_PRESENT
+: SPAPR_DR_ENTITY_SENSE_EMPTY;
+}
+
+static sPAPRDREntitySense logical_entity_sense(sPAPRDRConnector *drc)
+{
+if (drc->dev
+&& (drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE)) {
+return SPAPR_DR_ENTITY_SENSE_PRESENT;
 } else {
-if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_PCI) {
-/* PCI devices, and only PCI devices, use EMPTY
- * in cases where we'd otherwise use UNUSABLE
- */
-*state = SPAPR_DR_ENTITY_SENSE_EMPTY;
-} else {
-*state = SPAPR_DR_ENTITY_SENSE_UNUSABLE;
-}
+return SPAPR_DR_ENTITY_SENSE_UNUSABLE;
 }
-
-trace_spapr_drc_entity_sense(spapr_drc_index(drc), *state);
-return RTAS_OUT_SUCCESS;
 }
 
 static void prop_get_index(Object *obj, Visitor *v, const char *name,
@@ -445,7 +431,6 @@ static void reset(DeviceState *d)
 {
 sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d);
 sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-sPAPRDREntitySense state;
 
 trace_spapr_drc_reset(spapr_drc_index(drc));
 
@@ -477,8 +462,7 @@ static void reset(DeviceState *d)
 }
 }
 
-drck->entity_sense(drc, );
-if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
+if (drck->dr_entity_sense(drc) == SPAPR_DR_ENTITY_SENSE_PRESENT) {
 drck->set_signalled(drc);
 }
 }
@@ -488,8 +472,7 @@ static bool spapr_drc_needed(void *opaque)
 sPAPRDRConnector *drc = (sPAPRDRConnector *)opaque;
 sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
 bool rc = false;
-sPAPRDREntitySense value;
-drck->entity_sense(drc, );
+sPAPRDREntitySense value = drck->dr_entity_sense(drc);
 
 /* If no dev is plugged in there is no need to migrate the DRC state */
 if (value != SPAPR_DR_ENTITY_SENSE_PRESENT) {
@@ -667,7 +650,6 @@ static void spapr_dr_connector_class_init(ObjectClass *k, 
void *data)
 drck->set_indicator_state = set_indicator_state;
 drck->set_allocation_state = set_allocation_state;
 drck->get_name = get_name;
-drck->entity_sense = entity_sense;
 drck->attach = attach;
 drck->detach = detach;
 drck->release_pending = release_pending;
@@ -678,6 +660,20 @@ static void spapr_dr_connector_class_init(ObjectClass *k, 
void *data)
 dk->user_creatable = false;
 }
 
+static void spapr_drc_physical_class_init(ObjectClass *k, void *data)
+{
+sPAPRDRConnectorClass

[Qemu-devel] [PULL 09/20] spapr: Change DRC attach & detach methods to functions

2017-06-08 Thread David Gibson

DRC objects have attach & detach methods, but there's only one
implementation.  Although there are some differences in its behaviour for
different DRC types, the overall structure is the same, so while we might
want different method implementations for some parts, we're unlikely to
want them for the top-level functions.

So, replace them with direct function calls.

Signed-off-by: David Gibson 
Reviewed-by: Michael Roth 
Acked-by: Michael Roth 
---
 hw/ppc/spapr.c | 19 ++-
 hw/ppc/spapr_drc.c | 18 ++
 hw/ppc/spapr_pci.c |  9 +++--
 include/hw/ppc/spapr_drc.h |  7 ---
 4 files changed, 19 insertions(+), 34 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index b2311dc..e340ff3 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2523,7 +2523,6 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t 
addr_start, uint64_t size,
Error **errp)
 {
 sPAPRDRConnector *drc;
-sPAPRDRConnectorClass *drck;
 uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE;
 int i, fdt_offset, fdt_size;
 void *fdt;
@@ -2538,10 +2537,10 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t 
addr_start, uint64_t size,
 fdt_offset = spapr_populate_memory_node(fdt, node, addr,
 SPAPR_MEMORY_BLOCK_SIZE);
 
-drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, errp);
+spapr_drc_attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, errp);
 addr += SPAPR_MEMORY_BLOCK_SIZE;
 if (!dev->hotplugged) {
+sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
 /* guests expect coldplugged LMBs to be pre-allocated */
 drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_USABLE);
 drck->set_isolation_state(drc, 
SPAPR_DR_ISOLATION_STATE_UNISOLATED);
@@ -2554,7 +2553,6 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t 
addr_start, uint64_t size,
 if (dedicated_hp_event_source) {
 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
   addr_start / SPAPR_MEMORY_BLOCK_SIZE);
-drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
 spapr_hotplug_req_add_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
nr_lmbs,
spapr_drc_index(drc));
@@ -2742,7 +2740,6 @@ static void spapr_memory_unplug_request(HotplugHandler 
*hotplug_dev,
 uint64_t addr_start, addr;
 int i;
 sPAPRDRConnector *drc;
-sPAPRDRConnectorClass *drck;
 sPAPRDIMMState *ds;
 
 addr_start = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP,
@@ -2762,14 +2759,12 @@ static void spapr_memory_unplug_request(HotplugHandler 
*hotplug_dev,
   addr / SPAPR_MEMORY_BLOCK_SIZE);
 g_assert(drc);
 
-drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-drck->detach(drc, dev, errp);
+spapr_drc_detach(drc, dev, errp);
 addr += SPAPR_MEMORY_BLOCK_SIZE;
 }
 
 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
   addr_start / SPAPR_MEMORY_BLOCK_SIZE);
-drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
 spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
   nr_lmbs, spapr_drc_index(drc));
 out:
@@ -2824,7 +2819,6 @@ void spapr_core_unplug_request(HotplugHandler 
*hotplug_dev, DeviceState *dev,
 {
 int index;
 sPAPRDRConnector *drc;
-sPAPRDRConnectorClass *drck;
 Error *local_err = NULL;
 CPUCore *cc = CPU_CORE(dev);
 int smt = kvmppc_smt_threads();
@@ -2842,8 +2836,7 @@ void spapr_core_unplug_request(HotplugHandler 
*hotplug_dev, DeviceState *dev,
 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, index * smt);
 g_assert(drc);
 
-drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-drck->detach(drc, dev, _err);
+spapr_drc_detach(drc, dev, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
@@ -2887,8 +2880,8 @@ static void spapr_core_plug(HotplugHandler *hotplug_dev, 
DeviceState *dev,
 }
 
 if (drc) {
-sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-drck->attach(drc, dev, fdt, fdt_offset, !dev->hotplugged, _err);
+spapr_drc_attach(drc, dev, fdt, fdt_offset, !dev->hotplugged,
+ _err);
 if (local_err) {
 g_free(fdt);
 error_propagate(errp, local_err);
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index 19e2c0d..82c0b94 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -49,8 +49,6 @@ uint32_t spapr_drc_index(sPAPRDRConnector *drc)
 static uint32_t set_isolation_state(sPAPRDRConnector *drc,

[Qemu-devel] [PULL 01/20] target/ppc: pass const string to kvmppc_is_mem_backend_page_size_ok()

2017-06-08 Thread David Gibson

From: Greg Kurz 

This function has three implementations. Two are stubs that do nothing
and the third one only passes the obj_path argument to:

Object *object_resolve_path(const char *path, bool *ambiguous);

Signed-off-by: Greg Kurz 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Thomas Huth 
Signed-off-by: David Gibson 
---
 target/ppc/kvm.c | 4 ++--
 target/ppc/kvm_ppc.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 51249ce..8881762 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -478,7 +478,7 @@ static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 }
 }
 
-bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
+bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 {
 Object *mem_obj = object_resolve_path(obj_path, NULL);
 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
@@ -499,7 +499,7 @@ static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 {
 }
 
-bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
+bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 {
 return true;
 }
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index f48243d..eab7c8f 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -64,7 +64,7 @@ int kvmppc_enable_hwrng(void);
 int kvmppc_put_books_sregs(PowerPCCPU *cpu);
 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void);
 
-bool kvmppc_is_mem_backend_page_size_ok(char *obj_path);
+bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path);
 
 #else
 
@@ -211,7 +211,7 @@ static inline uint64_t kvmppc_rma_size(uint64_t 
current_size,
 return ram_size;
 }
 
-static inline bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
+static inline bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 {
 return true;
 }
-- 
2.9.4

[Qemu-devel] [PULL 10/20] spapr: Fold spapr_phb_{add, remove}_pci_device() into their only callers

2017-06-08 Thread David Gibson

Both functions are fairly short, and so are their callers.  There's no
particular logical distinction between them, so fold them together.

Signed-off-by: David Gibson 
Reviewed-by: Michael Roth 
Acked-by: Michael Roth 
---
 hw/ppc/spapr_pci.c | 63 --
 1 file changed, 23 insertions(+), 40 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 56cb4b8..0b447f2 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1344,30 +1344,6 @@ static int spapr_create_pci_child_dt(sPAPRPHBState *phb, 
PCIDevice *dev,
 return offset;
 }
 
-static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
- sPAPRPHBState *phb,
- PCIDevice *pdev,
- Error **errp)
-{
-DeviceState *dev = DEVICE(pdev);
-void *fdt = NULL;
-int fdt_start_offset = 0, fdt_size;
-
-fdt = create_device_tree(_size);
-fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
-if (!fdt_start_offset) {
-error_setg(errp, "Failed to create pci child device tree node");
-goto out;
-}
-
-spapr_drc_attach(drc, DEVICE(pdev),
- fdt, fdt_start_offset, !dev->hotplugged, errp);
-out:
-if (*errp) {
-g_free(fdt);
-}
-}
-
 /* Callback to be called during DRC release. */
 void spapr_phb_remove_pci_device_cb(DeviceState *dev)
 {
@@ -1385,14 +1361,6 @@ void spapr_phb_remove_pci_device_cb(DeviceState *dev)
 object_unparent(OBJECT(dev));
 }
 
-static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc,
-sPAPRPHBState *phb,
-PCIDevice *pdev,
-Error **errp)
-{
-spapr_drc_detach(drc, DEVICE(pdev), errp);
-}
-
 static sPAPRDRConnector *spapr_phb_get_pci_func_drc(sPAPRPHBState *phb,
 uint32_t busnr,
 int32_t devfn)
@@ -1429,6 +1397,8 @@ static void spapr_phb_hot_plug_child(HotplugHandler 
*plug_handler,
 Error *local_err = NULL;
 PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
 uint32_t slotnr = PCI_SLOT(pdev->devfn);
+void *fdt = NULL;
+int fdt_start_offset, fdt_size;
 
 /* if DR is disabled we don't need to do anything in the case of
  * hotplug or coldplug callbacks
@@ -1438,10 +1408,10 @@ static void spapr_phb_hot_plug_child(HotplugHandler 
*plug_handler,
  * we need to let them know it's not enabled
  */
 if (plugged_dev->hotplugged) {
-error_setg(errp, QERR_BUS_NO_HOTPLUG,
+error_setg(_err, QERR_BUS_NO_HOTPLUG,
object_get_typename(OBJECT(phb)));
 }
-return;
+goto out;
 }
 
 g_assert(drc);
@@ -1452,16 +1422,23 @@ static void spapr_phb_hot_plug_child(HotplugHandler 
*plug_handler,
  */
 if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] &&
 PCI_FUNC(pdev->devfn) != 0) {
-error_setg(errp, "PCI: slot %d function 0 already ocuppied by %s,"
+error_setg(_err, "PCI: slot %d function 0 already ocuppied by 
%s,"
" additional functions can no longer be exposed to guest.",
slotnr, bus->devices[PCI_DEVFN(slotnr, 0)]->name);
-return;
+goto out;
+}
+
+fdt = create_device_tree(_size);
+fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
+if (!fdt_start_offset) {
+error_setg(_err, "Failed to create pci child device tree node");
+goto out;
 }
 
-spapr_phb_add_pci_device(drc, phb, pdev, _err);
+spapr_drc_attach(drc, DEVICE(pdev), fdt, fdt_start_offset,
+ !plugged_dev->hotplugged, _err);
 if (local_err) {
-error_propagate(errp, local_err);
-return;
+goto out;
 }
 
 /* If this is function 0, signal hotplug for all the device functions.
@@ -1485,6 +1462,12 @@ static void spapr_phb_hot_plug_child(HotplugHandler 
*plug_handler,
 }
 }
 }
+
+out:
+if (local_err) {
+error_propagate(errp, local_err);
+g_free(fdt);
+}
 }
 
 static void spapr_phb_hot_unplug_child(HotplugHandler *plug_handler,
@@ -1531,7 +1514,7 @@ static void spapr_phb_hot_unplug_child(HotplugHandler 
*plug_handler,
 }
 }
 
-spapr_phb_remove_pci_device(drc, phb, pdev, _err);
+spapr_drc_detach(drc, DEVICE(pdev), _err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
-- 
2.9.4

[Qemu-devel] [PULL 08/20] spapr: Clean up handling of DR-indicator

2017-06-08 Thread David Gibson

There are 3 types of "indicator" associated with hotplug in the PAPR spec
the "allocation state", "isolation state" and "DR-indicator".  The first
two are intimately tied to the various state transitions associated with
hotplug.  The DR-indicator, however, is different and simpler.

It's basically just a guest controlled variable which can be used by the
guest to flag state or problems associated with a device.  The idea is that
the hypervisor can use it to present information back on management
consoles (on some machines with PowerVM it may even control physical LEDs
on the machine case associated with the relevant device).

For that reason, there's only ever likely to be a single update
implementation so the set_indicator_state method isn't useful.  Replace it
with a direct function call.

While we're there, make some small associated cleanups:
  * PAPR doesn't use the term "indicator state", just "DR-indicator" and
the allocation state and isolation state are also considered "indicators".
Rename things to be less confusing
  * Fold set_indicator_state() and rtas_set_indicator_state() into a single
rtas_set_dr_indicator() function.

Signed-off-by: David Gibson 
Reviewed-by: Michael Roth 
Acked-by: Michael Roth 
---
 hw/ppc/spapr_drc.c | 25 -
 hw/ppc/trace-events|  2 +-
 include/hw/ppc/spapr_drc.h | 16 
 3 files changed, 17 insertions(+), 26 deletions(-)

diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index f0cff0b..19e2c0d 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -116,14 +116,6 @@ static uint32_t set_isolation_state(sPAPRDRConnector *drc,
 return RTAS_OUT_SUCCESS;
 }
 
-static uint32_t set_indicator_state(sPAPRDRConnector *drc,
-sPAPRDRIndicatorState state)
-{
-trace_spapr_drc_set_indicator_state(spapr_drc_index(drc), state);
-drc->indicator_state = state;
-return RTAS_OUT_SUCCESS;
-}
-
 static uint32_t set_allocation_state(sPAPRDRConnector *drc,
  sPAPRDRAllocationState state)
 {
@@ -321,7 +313,7 @@ static void attach(sPAPRDRConnector *drc, DeviceState *d, 
void *fdt,
 if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_PCI) {
 drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
 }
-drc->indicator_state = SPAPR_DR_INDICATOR_STATE_ACTIVE;
+drc->dr_indicator = SPAPR_DR_INDICATOR_ACTIVE;
 
 drc->dev = d;
 drc->fdt = fdt;
@@ -394,7 +386,7 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d, 
Error **errp)
 }
 }
 
-drc->indicator_state = SPAPR_DR_INDICATOR_STATE_INACTIVE;
+drc->dr_indicator = SPAPR_DR_INDICATOR_INACTIVE;
 
 /* Calling release callbacks based on spapr_drc_type(drc). */
 switch (spapr_drc_type(drc)) {
@@ -507,7 +499,7 @@ static const VMStateDescription vmstate_spapr_drc = {
 .fields  = (VMStateField []) {
 VMSTATE_UINT32(isolation_state, sPAPRDRConnector),
 VMSTATE_UINT32(allocation_state, sPAPRDRConnector),
-VMSTATE_UINT32(indicator_state, sPAPRDRConnector),
+VMSTATE_UINT32(dr_indicator, sPAPRDRConnector),
 VMSTATE_BOOL(configured, sPAPRDRConnector),
 VMSTATE_BOOL(awaiting_release, sPAPRDRConnector),
 VMSTATE_BOOL(awaiting_allocation, sPAPRDRConnector),
@@ -647,7 +639,6 @@ static void spapr_dr_connector_class_init(ObjectClass *k, 
void *data)
 dk->realize = realize;
 dk->unrealize = unrealize;
 drck->set_isolation_state = set_isolation_state;
-drck->set_indicator_state = set_indicator_state;
 drck->set_allocation_state = set_allocation_state;
 drck->get_name = get_name;
 drck->attach = attach;
@@ -929,17 +920,17 @@ static uint32_t rtas_set_allocation_state(uint32_t idx, 
uint32_t state)
 return drck->set_allocation_state(drc, state);
 }
 
-static uint32_t rtas_set_indicator_state(uint32_t idx, uint32_t state)
+static uint32_t rtas_set_dr_indicator(uint32_t idx, uint32_t state)
 {
 sPAPRDRConnector *drc = spapr_drc_by_index(idx);
-sPAPRDRConnectorClass *drck;
 
 if (!drc) {
 return RTAS_OUT_PARAM_ERROR;
 }
 
-drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
-return drck->set_indicator_state(drc, state);
+trace_spapr_drc_set_dr_indicator(idx, state);
+drc->dr_indicator = state;
+return RTAS_OUT_SUCCESS;
 }
 
 static void rtas_set_indicator(PowerPCCPU *cpu, sPAPRMachineState *spapr,
@@ -964,7 +955,7 @@ static void rtas_set_indicator(PowerPCCPU *cpu, 
sPAPRMachineState *spapr,
 ret = rtas_set_isolation_state(idx, state);
 break;
 case RTAS_SENSOR_TYPE_DR:
-ret = rtas_set_indicator_state(idx, state);
+ret = rtas_set_dr_indicator(idx, state);
 break;
 case RTAS_SENSOR_TYPE_ALLOCATION_STATE:
 ret = rtas_set_allocation_state(idx, state);
diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
index

[Qemu-devel] [PULL 06/20] spapr: Don't misuse DR-indicator in spapr_recover_pending_dimm_state()

2017-06-08 Thread David Gibson

With some combinations of migration and hotplug we can lost temporary state
indicating how many DRCs (guest side hotplug handles) are still connected
to a DIMM object in the process of removal.  When we hit that situation
spapr_recover_pending_dimm_state() is used to scan more extensively and
work out the right number.

It does this using drc->indicator state to determine what state of
disconnection the DRC is in.  However, this is not safe, because the
indicator state is guest settable - in fact it's more-or-less a purely
guest->host notification mechanism which should have no bearing on the
internals of hotplug state management.

So, replace the test for this with a test on drc->dev, which is a purely
qemu side managed variable, and updated the same BQL critical section as
the indicator state.

This does introduce an off-by-one change, because the indicator state was
updated before the call to spapr_lmb_release() on the current DRC, whereas
drc->dev is updated afterwards.  That's corrected by always decrementing
the nr_lmbs value instead of only doing so in the case where we didn't
have to recover information.

Signed-off-by: David Gibson 
Reviewed-by: Michael Roth 
Acked-by: Michael Roth 
---
 hw/ppc/spapr.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 9b7ae28..b2311dc 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2676,7 +2676,7 @@ static sPAPRDIMMState 
*spapr_recover_pending_dimm_state(sPAPRMachineState *ms,
 drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
   addr / SPAPR_MEMORY_BLOCK_SIZE);
 g_assert(drc);
-if (drc->indicator_state != SPAPR_DR_INDICATOR_STATE_INACTIVE) {
+if (drc->dev) {
 avail_lmbs++;
 }
 addr += SPAPR_MEMORY_BLOCK_SIZE;
@@ -2700,10 +2700,11 @@ void spapr_lmb_release(DeviceState *dev)
  * during the unplug process. In this case recover it. */
 if (ds == NULL) {
 ds = spapr_recover_pending_dimm_state(spapr, PC_DIMM(dev));
-if (ds->nr_lmbs) {
-return;
-}
-} else if (--ds->nr_lmbs) {
+/* The DRC being examined by the caller at least must be counted */
+g_assert(ds->nr_lmbs);
+}
+
+if (--ds->nr_lmbs) {
 return;
 }
 
-- 
2.9.4

[Qemu-devel] [PULL 00/20] ppc-for-2.10 queue 20170609

2017-06-08 Thread David Gibson

The following changes since commit 64175afc695c0672876fbbfc31b299c86d562cb4:

  arm_gicv3: Fix ICC_BPR1 reset value when EL3 not implemented (2017-06-07 
17:21:44 +0100)

are available in the git repository at:

  git://github.com/dgibson/qemu.git tags/ppc-for-2.10-20170609

for you to fetch changes up to 593080936a06a04eacc589350e3a7ebc02f2ed8e:

  Revert "spapr: fix memory hot-unplugging" (2017-06-09 12:35:46 +1000)


ppc patch queue 2017-06-09

This batch contains more patches to rework the pseries machine hotplug
infrastructure, plus an assorted batch of bugfixes.

It contains a start on fixes to restore migration from older machine
types on older versions which was broken by some xics changes.  There
are still a few missing pieces here, though.


David Gibson (8):
  pseries: Correct panic behaviour for pseries machine type
  spapr: Clean up DR entity sense handling
  spapr: Don't misuse DR-indicator in spapr_recover_pending_dimm_state()
  spapr: Clean up RTAS set-indicator
  spapr: Clean up handling of DR-indicator
  spapr: Change DRC attach & detach methods to functions
  spapr: Fold spapr_phb_{add,remove}_pci_device() into their only callers
  spapr: Rework DRC name handling

Greg Kurz (9):
  target/ppc: pass const string to kvmppc_is_mem_backend_page_size_ok()
  target/ppc: fix memory leak in kvmppc_is_mem_backend_page_size_ok()
  spapr: fix memory leak in spapr_memory_pre_plug()
  pnv_core: drop reference on ICPState object during CPU realization
  xics: add reset() handler to ICPStateClass
  xics: introduce macros for ICP/ICS link properties
  xics: pass appropriate types to realize() handlers.
  xics: setup cpu at realize time
  xics: drop ICPStateClass::cpu_setup() handler

Laurent Vivier (1):
  Revert "spapr: fix memory hot-unplugging"

Thomas Huth (2):
  hw/ppc/spapr: Adjust firmware name for PCI bridges
  hw/cpu: core.c can be compiled as common object

 hw/cpu/Makefile.objs   |   3 +-
 hw/intc/xics.c |  95 +++
 hw/intc/xics_kvm.c |  45 ++-
 hw/intc/xics_pnv.c |   6 +-
 hw/ppc/pnv_core.c  |  16 +--
 hw/ppc/pnv_psi.c   |   3 +-
 hw/ppc/spapr.c |  44 +++
 hw/ppc/spapr_cpu_core.c|  22 ++--
 hw/ppc/spapr_drc.c | 289 +++--
 hw/ppc/spapr_pci.c |  72 ---
 hw/ppc/spapr_rtas.c|   7 +-
 hw/ppc/trace-events|   5 +-
 include/hw/ppc/spapr_drc.h |  31 +++--
 include/hw/ppc/xics.h  |  17 +--
 target/ppc/kvm.c   |   5 +-
 target/ppc/kvm_ppc.h   |   4 +-
 16 files changed, 278 insertions(+), 386 deletions(-)

[Qemu-devel] [PULL 12/20] pnv_core: drop reference on ICPState object during CPU realization

2017-06-08 Thread David Gibson

From: Greg Kurz 

Similarly to what was done to spapr with commit 249127d0dfeb, this patch
ensures that we don't keep an extra reference on the ICPState object. Also
since the object was just created and not reparented yet, the call to
object_property_add_child() should never fail: let's pass _abort to
make this clear.

Signed-off-by: Greg Kurz 
Reviewed-by: Cédric Le Goater 
Signed-off-by: David Gibson 
---
 hw/ppc/pnv_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index 1b7ec70..e8a9a94 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -119,7 +119,8 @@ static void pnv_core_realize_child(Object *child, 
XICSFabric *xi, Error **errp)
 Object *obj;
 
 obj = object_new(TYPE_PNV_ICP);
-object_property_add_child(OBJECT(cpu), "icp", obj, NULL);
+object_property_add_child(OBJECT(cpu), "icp", obj, _abort);
+object_unref(obj);
 object_property_add_const_link(obj, "xics", OBJECT(xi), _abort);
 object_property_set_bool(obj, true, "realized", _err);
 if (local_err) {
-- 
2.9.4

[Qemu-devel] [PULL 03/20] spapr: fix memory leak in spapr_memory_pre_plug()

2017-06-08 Thread David Gibson

From: Greg Kurz 

The string returned by object_property_get_str() is dynamically allocated.

(Spotted by Coverity, CID 1375942)

Signed-off-by: Greg Kurz 
Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 91b4057..9b7ae28 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2615,8 +2615,11 @@ static void spapr_memory_pre_plug(HotplugHandler 
*hotplug_dev, DeviceState *dev,
 if (mem_dev && !kvmppc_is_mem_backend_page_size_ok(mem_dev)) {
 error_setg(errp, "Memory backend has bad page size. "
"Use 'memory-backend-file' with correct mem-path.");
-return;
+goto out;
 }
+
+out:
+g_free(mem_dev);
 }
 
 struct sPAPRDIMMState {
-- 
2.9.4

[Qemu-devel] [PULL 02/20] target/ppc: fix memory leak in kvmppc_is_mem_backend_page_size_ok()

2017-06-08 Thread David Gibson

From: Greg Kurz 

The string returned by object_property_get_str() is dynamically allocated.

Signed-off-by: Greg Kurz 
Reviewed-by: Thomas Huth 
Signed-off-by: David Gibson 
---
 target/ppc/kvm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 8881762..f2f7c53 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -486,6 +486,7 @@ bool kvmppc_is_mem_backend_page_size_ok(const char 
*obj_path)
 
 if (mempath) {
 pagesize = qemu_mempath_getpagesize(mempath);
+g_free(mempath);
 } else {
 pagesize = getpagesize();
 }
-- 
2.9.4

Re: [Qemu-devel] [PATCH v1 1/1] char-socket: Don't report TCP socket waiting as an error

2017-06-08 Thread Markus Armbruster

Alistair Francis  writes:

> On Thu, Jun 8, 2017 at 10:56 AM, Markus Armbruster  wrote:
>> Alistair Francis  writes:
>>
>>> On Wed, Jun 7, 2017 at 11:03 PM, Markus Armbruster  
>>> wrote:
 Alistair Francis  writes:

> On Wed, Jun 7, 2017 at 12:19 AM, Markus Armbruster  
> wrote:
>> Paolo Bonzini  writes:
>>
>>> On 06/06/2017 18:30, Alistair Francis wrote:
>
> This is somehow confusing. I don't think it is worth having another
> qemu_log_stderr() function rather than using error_report() but this 
> very
> call might deserve a comment explaining this unusual use. What do you 
> think?

 The problem with stderr is that this isn't an error. Some uses of QEMU
 (inside Eclipse for example) flag everything printed on stderr as red
 which confuses users that they are seeing an error when they really
 aren't.
>>>
>>> But they are wrong.
>>
>> Concur.  We also print warnings and informational messages to stderr.
>>
>> We should make errors easy to recognize.  Fortunately, error_report()
>> prints errors to stderr in a rigid format.  Unfortunately, error
>> messages bypassing error_report() still exist in places.  We suck.
>>
>> The format is
>>
>> timestamp-if-enabled progname ':' location message
>>
>> timestamp-if-enabled is normally empty.  With -msg timestamp=on, it's
>> the current time in ISO 8601 format, followed by a space.
>>
>> progname is the program name (main()'s argv[0]).
>>
>> location is either empty, or a reference to the command line or a
>> configuration file.
>>
>> See error_vreport() for details.
>
> Ok, but this isn't an error, it's more information. So it sounds like
> we should still print to stderr but not print in the format described
> above?

 Yes.

 I explained the error message format to show how to distinguish actual
 errors from other stuff.
>>>
>>> Sorry, I should have been more clear. I meant we should not use the
>>> error_report() function here. I don't think we have any
>>> warning_report() function though, is that something worth having?
>>
>> So far we simply use error_printf() for such things.
>>
>> A function to report a warning would let us report them more uniformly,
>> but only if we actually use it uniformly.  In other words, adding one
>> without also converting the existing warnings to use it would create yet
>> another open-ended incremental conversion job.  Are we up to it?
>
> Yeah! Why not. I am happy to give it a shot changing some errors to warnings.
>
> First thing though, what is the format for printing warnings?

We make one up.

For what it's worth, gcc uses the same format as for errors with the
message prefixed either by "warning: " or by "error: ".  Also common is
prefixing warnings, but not errors.

We already have several error_report() calls with messages that start
with (a variation of) "warning: ".

Re: [Qemu-devel] [PATCH RFC] spapr: ignore interrupts during reset state

2017-06-08 Thread Nikunj A Dadhania

David Gibson  writes:

> On Thu, Jun 08, 2017 at 12:06:08PM +0530, Nikunj A Dadhania wrote:
>> Rebooting a SMP TCG guest is broken for both single/multi threaded TCG.
>
> Ouch.  When exactly did this happen?

Broken since long

> I know that smp boot used to work under TCG, albeit very slowly.

SMP boot works, its the reboot issued from the guest doesn't boot and
crashes in SLOF.

>> When reset happens, all the CPUs are in halted state. First CPU is brought 
>> out
>> of reset and secondary CPUs would be initialized by the guest kernel using a
>> rtas call start-cpu.
>> 
>> However, in case of TCG, decrementer interrupts keep on coming and waking the
>> secondary CPUs up.
>
> Ok.. how is that happening given that the secondary CPUs should have
> MSR[EE] == 0?

Basically, the CPU is in halted condition and has_work() does not check
for MSR_EE in that case. But I am not sure if checking MSR_EE is
sufficient, as the CPU does go to halted state (idle) while running as
well.

static bool cpu_has_work_POWER8(CPUState *cs)
{
PowerPCCPU *cpu = POWERPC_CPU(cs);
CPUPPCState *env = >env;

if (cs->halted) {
   [ SNIP ]
   /* Does not check for msr_ee */
} else {
return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
}
}

>
>> These secondary CPUs would see the decrementer interrupt pending, which makes
>> cpu::has_work() to bring them out of wait loop and start executing
>> tcg_exec_cpu().
>> 
>> The problem with this is all the CPUs wake up and start booting SLOF image,
>> causing the following exception(4 CPUs TCG VM):
>
> [snip]
>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
>> index d10808d..eb88bcb 100644
>> --- a/target/ppc/cpu.h
>> +++ b/target/ppc/cpu.h
>> @@ -1013,6 +1013,13 @@ struct CPUPPCState {
>>  int access_type; /* when a memory exception occurs, the access
>>  type is stored here */
>>  
>> +/* CPU in reset, shouldn't process any interrupts.
>> + *
>> + * Decrementer interrupts in TCG can still wake the CPU up. Make sure 
>> that
>> + * when this variable is set, cpu_has_work_* should return false.
>> + */
>> +int in_reset;
>
> So I'd really rather not add another flag to the cpu structure,
> especially since we'd then need to migrate it as well.

I agree, Bharata and I did discuss about the migrate case. This patch
was to highlight the exact issue.

> I'm pretty sure there should be a way to inhibit the unwanted
> interrupts using existing mechanisms.

One of the thing that I had observed was msr had just MSR_SF bit set
during the reset case, we can test for that maybe.

The below works as well:

+if ((env->msr & ~(1ull << MSR_SF)) == 0) {
+return false;
+}

>> +
>>  CPU_COMMON
>>  
>>  /* MMU context - only relevant for full system emulation */
>> diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c
>> index 56a0ab2..64f4348 100644
>> --- a/target/ppc/translate_init.c
>> +++ b/target/ppc/translate_init.c
>> @@ -8561,6 +8561,9 @@ static bool cpu_has_work_POWER7(CPUState *cs)
>>  CPUPPCState *env = >env;
>>  
>>  if (cs->halted) {
>> +if (env->in_reset) {
>> +return false;
>> +}
>>  if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
>>  return false;
>>  }
>> @@ -8718,6 +8721,9 @@ static bool cpu_has_work_POWER8(CPUState *cs)
>>  CPUPPCState *env = >env;
>>  
>>  if (cs->halted) {
>> +if (env->in_reset) {
>> +return false;
>> +}
>>  if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
>>  return false;
>>  }
>> @@ -8899,6 +8905,9 @@ static bool cpu_has_work_POWER9(CPUState *cs)
>>  CPUPPCState *env = >env;
>>  
>>  if (cs->halted) {
>> +if (env->in_reset) {
>> +return false;
>> +}
>>  if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
>>  return false;
>>  }

Regards
Nikunj

Re: [Qemu-devel] 答复: Re: [PATCHv2 02/04] colo-compare: Process pactkets in the IOThread ofthe primary

2017-06-08 Thread Jason Wang




On 2017年06月08日 17:16, wang.yong...@zte.com.cn wrote:


＞＞ From: Wang Yong ＜wang.yong...@zte.com.cn＞

＞＞

＞＞ Process pactkets in the IOThread which arrived over the socket.

＞＞ we use qio_channel_set_aio_fd_handler to set the handlers on the

＞＞ IOThread AioContext.then the packets from the primary and the 
secondary


＞＞ are processed in the IOThread.

＞＞ Finally remove the colo-compare thread using the IOThread instead.

＞＞

＞＞ Signed-off-by: Wang Yong＜wang.yong...@zte.com.cn＞

＞＞ Signed-off-by: Wang Guang＜wang.guan...@zte.com.cn＞

＞＞ ---

＞＞   net/colo-compare.c | 133 
-


＞＞   net/colo.h |   1 +

＞＞   2 files changed, 91 insertions(+), 43 deletions(-)

＞＞

＞＞ diff --git a/net/colo-compare.c b/net/colo-compare.c

＞＞ index b0942a4..e3af791 100644

＞＞ --- a/net/colo-compare.c

＞＞ +++ b/net/colo-compare.c

＞＞ @@ -29,6 +29,7 @@

＞＞   #include "qemu/sockets.h"

＞＞   #include "qapi-visit.h"

＞＞   #include "net/colo.h"

＞＞ +#include "io/channel.h"

＞＞   #include "sysemu/iothread.h"

＞＞

＞＞   #define TYPE_COLO_COMPARE "colo-compare"

＞＞ @@ -82,11 +83,6 @@ typedef struct CompareState {

＞＞   GQueue conn_list;

＞＞   /* hashtable to save connection */

＞＞   GHashTable *connection_track_table;

＞＞ -/* compare thread, a thread for each NIC */

＞＞ -QemuThread thread;

＞＞ -

＞＞ -GMainContext *worker_context;

＞＞ -GMainLoop *compare_loop;

＞＞

＞＞   /*compare iothread*/

＞＞   IOThread *iothread;

＞＞ @@ -95,6 +91,14 @@ typedef struct CompareState {

＞＞   QEMUTimer *packet_check_timer;

＞＞   } CompareState;

＞＞

＞＞ +typedef struct {

＞＞ +Chardev parent;

＞＞ +QIOChannel *ioc; /*I/O channel */


＞We probably don't want to manipulate char backend's internal io 
channel.


＞All need here is to access the frontend API (char-fe.c) I believe, and

＞hide the internal implementation.

char-fd.c ?



Char-fe.c for sure which means frontend of chardev.

These API can only watch events in the qemu main thread, not in the 
IOThread.


I had to use the qio_channel_socket_set_aio_fd_handler function to

monitor the char event in the IOThread,so the io channel is used her



The point is not touching the internal structure of chardev like ioc, 
instead extend its helper like e.g qemu_chr_fe_set_handlers() and let it 
set aio handlers,



-＞qio_channel_socket_set_aio_fd_handler

   -＞aio_set_fd_handler


Thanks


＞＞ +} CompareChardev;

＞＞ +

＞＞ +#define COMPARE_CHARDEV(obj) \

＞＞ +OBJECT_CHECK(CompareChardev, (obj), TYPE_CHARDEV_SOCKET)

＞＞ +

＞＞   typedef struct CompareClass {

＞＞   ObjectClass parent_class;

＞＞   } CompareClass;

＞＞ @@ -107,6 +111,12 @@ enum {

＞＞   static int compare_chr_send(CharBackend *out,

＞＞   const uint8_t *buf,

＞＞   uint32_t size);

＞＞ +static void compare_chr_set_aio_fd_handlers(CharBackend *b,

＞＞ +AioContext *ctx,

＞＞ +IOCanReadHandler *fd_can_read,

＞＞ +IOReadHandler *fd_read,

＞＞ +IOEventHandler *fd_event,

＞＞ +void *opaque);

＞＞

＞＞   static gint seq_sorter(Packet *a, Packet *b, gpointer data)

＞＞   {

＞＞ @@ -534,6 +544,30 @@ err:

＞＞   return ret ＜ 0 ? ret : -EIO;

＞＞   }

＞＞

＞＞ +static void compare_chr_read(void *opaque)

＞＞ +{

＞＞ +Chardev *chr = opaque;

＞＞ +uint8_t buf[CHR_READ_BUF_LEN];

＞＞ +int len, size;

＞＞ +int max_size;

＞＞ +

＞＞ +max_size = qemu_chr_be_can_write(chr);

＞＞ +if (max_size ＜= 0) {

＞＞ +return;

＞＞ +}

＞＞ +

＞＞ +len = sizeof(buf);

＞＞ +if (len ＞ max_size) {

＞＞ +len = max_size;

＞＞ +}

＞＞ +size = CHARDEV_GET_CLASS(chr)-＞chr_sync_read(chr, (void *)buf, 
len);


＞＞ +if (size == 0) {

＞＞ +return;

＞＞ +} else if (size ＞ 0) {

＞＞ +qemu_chr_be_write(chr, buf, size);

＞＞ +}

＞＞ +}

＞＞ +

＞＞   static int compare_chr_can_read(void *opaque)

＞＞   {

＞＞   return COMPARE_READ_LEN_MAX;

＞＞ @@ -550,8 +584,8 @@ static void compare_pri_chr_in(void *opaque, 
const uint8_t *buf, int size)


＞＞

＞＞   ret = net_fill_rstate(＞pri_rs, buf, size);

＞＞   if (ret == -1) {

＞＞ -qemu_chr_fe_set_handlers(＞chr_pri_in, NULL, NULL, NULL,

＞＞ - NULL, NULL, true);

＞＞ +  compare_chr_set_aio_fd_handlers(＞chr_pri_in, s-＞ctx,

＞＞ +NULL, NULL, NULL, NULL);

＞＞   error_report("colo-compare primary_in error");

＞＞   }

＞＞   }

＞＞ @@ -567,8 +601,8 @@ static void compare_sec_chr_in(void *opaque, 
const uint8_t *buf, int size)


＞＞

＞＞   ret = net_fill_rstate(＞sec_rs, buf, size);

＞＞   if (ret == -1) {

＞＞ -qemu_chr_fe_set_handlers(＞chr_sec_in, NULL, NULL, NULL,

＞＞ - NULL, NULL, true);

＞＞ +

Re: [Qemu-devel] [PATCH v8 02/11] migration: pass MigrationIncomingState* into migration check functions

2017-06-08 Thread Peter Xu

On Wed, Jun 07, 2017 at 12:46:29PM +0300, Alexey Perevalov wrote:
> That tiny refactoring is necessary to be able to set
> UFFD_FEATURE_THREAD_ID while requesting features, and then
> to create downtime context in case when kernel supports it.
> 
> Signed-off-by: Alexey Perevalov 
> ---
>  migration/migration.c|  3 ++-
>  migration/postcopy-ram.c | 10 +-
>  migration/postcopy-ram.h |  2 +-
>  migration/savevm.c   |  2 +-
>  4 files changed, 9 insertions(+), 8 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index 48c94c9..2a77636 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -726,6 +726,7 @@ void 
> qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
>Error **errp)
>  {
>  MigrationState *s = migrate_get_current();
> +MigrationIncomingState *mis = migration_incoming_get_current();

If this patch is only servicing patch 6, I'd prefer in patch 6 we call
migration_incoming_get_current() (rather than here), then this patch
may be dropped?...

Thanks,

>  MigrationCapabilityStatusList *cap;
>  bool old_postcopy_cap = migrate_postcopy_ram();
>  
> @@ -772,7 +773,7 @@ void 
> qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
>   * special support.
>   */
>  if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
> -!postcopy_ram_supported_by_host()) {
> +!postcopy_ram_supported_by_host(mis)) {
>  /* postcopy_ram_supported_by_host will have emitted a more
>   * detailed message
>   */
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 9c41887..10d39a0 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -63,7 +63,7 @@ struct PostcopyDiscardState {
>  #include 
>  #include 
>  
> -static bool ufd_version_check(int ufd)
> +static bool ufd_version_check(int ufd, MigrationIncomingState *mis)
>  {
>  struct uffdio_api api_struct;
>  uint64_t ioctl_mask;
> @@ -126,7 +126,7 @@ static int test_ramblock_postcopiable(const char 
> *block_name, void *host_addr,
>   * normally fine since if the postcopy succeeds it gets turned back on at the
>   * end.
>   */
> -bool postcopy_ram_supported_by_host(void)
> +bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
>  {
>  long pagesize = getpagesize();
>  int ufd = -1;
> @@ -149,7 +149,7 @@ bool postcopy_ram_supported_by_host(void)
>  }
>  
>  /* Version and features check */
> -if (!ufd_version_check(ufd)) {
> +if (!ufd_version_check(ufd, mis)) {
>  goto out;
>  }
>  
> @@ -525,7 +525,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState 
> *mis)
>   * Although the host check already tested the API, we need to
>   * do the check again as an ABI handshake on the new fd.
>   */
> -if (!ufd_version_check(mis->userfault_fd)) {
> +if (!ufd_version_check(mis->userfault_fd, mis)) {
>  return -1;
>  }
>  
> @@ -663,7 +663,7 @@ void *postcopy_get_tmp_page(MigrationIncomingState *mis)
>  
>  #else
>  /* No target OS support, stubs just fail */
> -bool postcopy_ram_supported_by_host(void)
> +bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
>  {
>  error_report("%s: No OS support", __func__);
>  return false;
> diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
> index 52d51e8..587a8b8 100644
> --- a/migration/postcopy-ram.h
> +++ b/migration/postcopy-ram.h
> @@ -14,7 +14,7 @@
>  #define QEMU_POSTCOPY_RAM_H
>  
>  /* Return true if the host supports everything we need to do postcopy-ram */
> -bool postcopy_ram_supported_by_host(void);
> +bool postcopy_ram_supported_by_host(MigrationIncomingState *mis);
>  
>  /*
>   * Make all of RAM sensitive to accesses to areas that haven't yet been 
> written
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 9c320f5..8b7bab8 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1380,7 +1380,7 @@ static int 
> loadvm_postcopy_handle_advise(MigrationIncomingState *mis)
>  return -1;
>  }
>  
> -if (!postcopy_ram_supported_by_host()) {
> +if (!postcopy_ram_supported_by_host(mis)) {
>  postcopy_state_set(POSTCOPY_INCOMING_NONE);
>  return -1;
>  }
> -- 
> 1.9.1
> 

-- 
Peter Xu

[Qemu-devel] [PATCH v2 5/6] migration: move skip_configuration out

2017-06-08 Thread Peter Xu

It was in SaveState but now moved to MigrationState altogether. Again,
using HW_COMPAT_2_3 for old PC/SPAPR machines, and
register_compat_prop() for xen_init().

Signed-off-by: Peter Xu 
---
 hw/i386/pc_piix.c |  1 -
 hw/ppc/spapr.c|  1 -
 hw/xen/xen-common.c   |  2 +-
 include/hw/compat.h   |  4 
 include/migration/migration.h |  4 +++-
 migration/migration.c |  2 ++
 migration/savevm.c| 15 ---
 7 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index c83cec5..529018d 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -317,7 +317,6 @@ static void pc_compat_2_3(MachineState *machine)
 if (kvm_enabled()) {
 pcms->smm = ON_OFF_AUTO_OFF;
 }
-savevm_skip_configuration();
 }
 
 static void pc_compat_2_2(MachineState *machine)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 3e78bb9..227b03b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -3593,7 +3593,6 @@ static void 
spapr_machine_2_3_instance_options(MachineState *machine)
 {
 spapr_machine_2_4_instance_options(machine);
 savevm_skip_section_footers();
-savevm_skip_configuration();
 }
 
 static void spapr_machine_2_3_class_options(MachineClass *mc)
diff --git a/hw/xen/xen-common.c b/hw/xen/xen-common.c
index 8240d50..a80034f 100644
--- a/hw/xen/xen-common.c
+++ b/hw/xen/xen-common.c
@@ -145,7 +145,7 @@ static int xen_init(MachineState *ms)
  * confirm that in the future.
  */
 register_compat_prop("migration", "store-global-state", "off");
-savevm_skip_configuration();
+register_compat_prop("migration", "skip-configuration", "on");
 savevm_skip_section_footers();
 
 return 0;
diff --git a/include/hw/compat.h b/include/hw/compat.h
index 5b5c8de..4ed2ae7 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -179,6 +179,10 @@
 .value= "off",\
 },{\
 .driver   = "migration",\
+.property = "skip-configuration",\
+.value= "on",\
+},{\
+.driver   = "migration",\
 .property = "store-global-state",\
 .value= "off",\
 },
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 27b07ed..5f6861c 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -171,6 +171,9 @@ struct MigrationState
 
 /* Whether the VM is only allowing for migratable devices */
 bool only_migratable;
+
+/* Whether we skip QEMU_VM_CONFIGURATION for migration */
+bool skip_configuration;
 };
 
 void migrate_set_state(int *state, int old_state, int new_state);
@@ -249,7 +252,6 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t 
block_offset,
 
 void savevm_skip_section_footers(void);
 void register_global_state(void);
-void savevm_skip_configuration(void);
 int global_state_store(void);
 void global_state_store_running(void);
 
diff --git a/migration/migration.c b/migration/migration.c
index dbec586..a4ab83d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2105,6 +2105,8 @@ static Property migration_properties[] = {
 DEFINE_PROP_BOOL("store-global-state", MigrationState,
  store_global_state, true),
 DEFINE_PROP_BOOL("only-migratable", MigrationState, only_migratable, 
false),
+DEFINE_PROP_BOOL("skip-configuration", MigrationState,
+ skip_configuration, false),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/migration/savevm.c b/migration/savevm.c
index f073027..222af4c 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -290,7 +290,6 @@ typedef struct SaveStateEntry {
 typedef struct SaveState {
 QTAILQ_HEAD(, SaveStateEntry) handlers;
 int global_section_id;
-bool skip_configuration;
 uint32_t len;
 const char *name;
 uint32_t target_page_bits;
@@ -299,15 +298,8 @@ typedef struct SaveState {
 static SaveState savevm_state = {
 .handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
 .global_section_id = 0,
-.skip_configuration = false,
 };
 
-void savevm_skip_configuration(void)
-{
-savevm_state.skip_configuration = true;
-}
-
-
 static void configuration_pre_save(void *opaque)
 {
 SaveState *state = opaque;
@@ -989,11 +981,11 @@ void qemu_savevm_state_header(QEMUFile *f)
 qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
 qemu_put_be32(f, QEMU_VM_FILE_VERSION);
 
-if (!savevm_state.skip_configuration || enforce_config_section()) {
+if (!migrate_get_current()->skip_configuration ||
+enforce_config_section()) {
 qemu_put_byte(f, QEMU_VM_CONFIGURATION);
 vmstate_save_state(f, _configuration, _state, 0);
 }
-
 }
 
 void qemu_savevm_state_begin(QEMUFile *f)
@@ -2003,7 +1995,8 @@ int qemu_loadvm_state(QEMUFile *f)
 return -ENOTSUP;
 }
 
-if (!savevm_state.skip_configuration || enforce_config_section()) {
+if

[Qemu-devel] [PATCH v2 6/6] migration: move skip_section_footers

2017-06-08 Thread Peter Xu

Move it into MigrationState, with a property binded to it. Same trick is
played like previous patches.

Signed-off-by: Peter Xu 
---
 hw/i386/pc_piix.c |  1 -
 hw/ppc/spapr.c|  1 -
 hw/xen/xen-common.c   |  2 +-
 include/hw/compat.h   |  4 
 include/migration/migration.h |  3 ++-
 migration/migration.c |  2 ++
 migration/savevm.c| 11 ++-
 7 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 529018d..1be23e2 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -313,7 +313,6 @@ static void pc_init1(MachineState *machine,
 static void pc_compat_2_3(MachineState *machine)
 {
 PCMachineState *pcms = PC_MACHINE(machine);
-savevm_skip_section_footers();
 if (kvm_enabled()) {
 pcms->smm = ON_OFF_AUTO_OFF;
 }
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 227b03b..944f829 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -3592,7 +3592,6 @@ DEFINE_SPAPR_MACHINE(2_4, "2.4", false);
 static void spapr_machine_2_3_instance_options(MachineState *machine)
 {
 spapr_machine_2_4_instance_options(machine);
-savevm_skip_section_footers();
 }
 
 static void spapr_machine_2_3_class_options(MachineClass *mc)
diff --git a/hw/xen/xen-common.c b/hw/xen/xen-common.c
index a80034f..e8f08bb 100644
--- a/hw/xen/xen-common.c
+++ b/hw/xen/xen-common.c
@@ -146,7 +146,7 @@ static int xen_init(MachineState *ms)
  */
 register_compat_prop("migration", "store-global-state", "off");
 register_compat_prop("migration", "skip-configuration", "on");
-savevm_skip_section_footers();
+register_compat_prop("migration", "skip-section-footer", "on");
 
 return 0;
 }
diff --git a/include/hw/compat.h b/include/hw/compat.h
index 4ed2ae7..ef5fbc7 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -183,6 +183,10 @@
 .value= "on",\
 },{\
 .driver   = "migration",\
+.property = "skip-section-footer",\
+.value= "on",\
+},{\
+.driver   = "migration",\
 .property = "store-global-state",\
 .value= "off",\
 },
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 5f6861c..dc35567 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -174,6 +174,8 @@ struct MigrationState
 
 /* Whether we skip QEMU_VM_CONFIGURATION for migration */
 bool skip_configuration;
+/* Whether we skip section footer */
+bool skip_section_footer;
 };
 
 void migrate_set_state(int *state, int old_state, int new_state);
@@ -250,7 +252,6 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t 
block_offset,
  ram_addr_t offset, size_t size,
  uint64_t *bytes_sent);
 
-void savevm_skip_section_footers(void);
 void register_global_state(void);
 int global_state_store(void);
 void global_state_store_running(void);
diff --git a/migration/migration.c b/migration/migration.c
index a4ab83d..9f90f7b 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2107,6 +2107,8 @@ static Property migration_properties[] = {
 DEFINE_PROP_BOOL("only-migratable", MigrationState, only_migratable, 
false),
 DEFINE_PROP_BOOL("skip-configuration", MigrationState,
  skip_configuration, false),
+DEFINE_PROP_BOOL("skip-section-footer", MigrationState,
+ skip_section_footer, false),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/migration/savevm.c b/migration/savevm.c
index 222af4c..06d6986 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -65,8 +65,6 @@
 
 const unsigned int postcopy_ram_discard_version = 0;
 
-static bool skip_section_footers;
-
 /* Subcommands for QEMU_VM_COMMAND */
 enum qemu_vm_cmd {
 MIG_CMD_INVALID = 0,   /* Must be 0 */
@@ -780,11 +778,6 @@ static void vmstate_save(QEMUFile *f, SaveStateEntry *se, 
QJSON *vmdesc)
 vmstate_save_state(f, se->vmsd, se->opaque, vmdesc);
 }
 
-void savevm_skip_section_footers(void)
-{
-skip_section_footers = true;
-}
-
 /*
  * Write the header for device section (QEMU_VM_SECTION START/END/PART/FULL)
  */
@@ -812,7 +805,7 @@ static void save_section_header(QEMUFile *f, SaveStateEntry 
*se,
  */
 static void save_section_footer(QEMUFile *f, SaveStateEntry *se)
 {
-if (!skip_section_footers) {
+if (!migrate_get_current()->skip_section_footer) {
 qemu_put_byte(f, QEMU_VM_SECTION_FOOTER);
 qemu_put_be32(f, se->section_id);
 }
@@ -1810,7 +1803,7 @@ static bool check_section_footer(QEMUFile *f, 
SaveStateEntry *se)
 uint8_t read_mark;
 uint32_t read_section_id;
 
-if (skip_section_footers) {
+if (migrate_get_current()->skip_section_footer) {
 /* No footer to check */
 return true;
 }
-- 
2.7.4

[Qemu-devel] [PATCH v2 3/6] migration: move global_state.optional out

2017-06-08 Thread Peter Xu

Put it into MigrationState then we can use the properties to specify
whether to enable storing global state.

Removing global_state_set_optional() since now we can use HW_COMPAT_2_3
for x86/power in general, and the register_compat_prop() for xen_init().

Signed-off-by: Peter Xu 
---
 hw/i386/pc_piix.c |  1 -
 hw/ppc/spapr.c|  1 -
 hw/xen/xen-common.c   |  8 +++-
 include/hw/compat.h   |  4 
 include/migration/migration.h |  7 ++-
 migration/migration.c | 24 
 6 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 2234bd0..c83cec5 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -317,7 +317,6 @@ static void pc_compat_2_3(MachineState *machine)
 if (kvm_enabled()) {
 pcms->smm = ON_OFF_AUTO_OFF;
 }
-global_state_set_optional();
 savevm_skip_configuration();
 }
 
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index ab3aab1..3e78bb9 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -3593,7 +3593,6 @@ static void 
spapr_machine_2_3_instance_options(MachineState *machine)
 {
 spapr_machine_2_4_instance_options(machine);
 savevm_skip_section_footers();
-global_state_set_optional();
 savevm_skip_configuration();
 }
 
diff --git a/hw/xen/xen-common.c b/hw/xen/xen-common.c
index 0bed577..8240d50 100644
--- a/hw/xen/xen-common.c
+++ b/hw/xen/xen-common.c
@@ -138,7 +138,13 @@ static int xen_init(MachineState *ms)
 }
 qemu_add_vm_change_state_handler(xen_change_state_handler, NULL);
 
-global_state_set_optional();
+/*
+ * TODO: make sure global MigrationState has not yet been created
+ * (otherwise the compat trick won't work). For now we are in
+ * configure_accelerator() so we are mostly good. Better to
+ * confirm that in the future.
+ */
+register_compat_prop("migration", "store-global-state", "off");
 savevm_skip_configuration();
 savevm_skip_section_footers();
 
diff --git a/include/hw/compat.h b/include/hw/compat.h
index 400c64b..5b5c8de 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -177,6 +177,10 @@
 .driver   = TYPE_PCI_DEVICE,\
 .property = "x-pcie-lnksta-dllla",\
 .value= "off",\
+},{\
+.driver   = "migration",\
+.property = "store-global-state",\
+.value= "off",\
 },
 
 #define HW_COMPAT_2_2 \
diff --git a/include/migration/migration.h b/include/migration/migration.h
index bd0186c..d3ec719 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -162,6 +162,12 @@ struct MigrationState
 /* Do we have to clean up -b/-i from old migrate parameters */
 /* This feature is deprecated and will be removed */
 bool must_remove_block_options;
+
+/*
+ * Global switch on whether we need to store the global state
+ * during migration.
+ */
+bool store_global_state;
 };
 
 void migrate_set_state(int *state, int old_state, int new_state);
@@ -240,7 +246,6 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t 
block_offset,
 
 void savevm_skip_section_footers(void);
 void register_global_state(void);
-void global_state_set_optional(void);
 void savevm_skip_configuration(void);
 int global_state_store(void);
 void global_state_store_running(void);
diff --git a/migration/migration.c b/migration/migration.c
index 98b77e2..79d886c 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -138,13 +138,13 @@ void migration_incoming_state_destroy(void)
 
 
 typedef struct {
-bool optional;
 uint32_t size;
 uint8_t runstate[100];
 RunState state;
 bool received;
 } GlobalState;
 
+/* This is only used if MigrationState.store_global_state is set. */
 static GlobalState global_state;
 
 int global_state_store(void)
@@ -175,19 +175,13 @@ static RunState global_state_get_runstate(void)
 return global_state.state;
 }
 
-void global_state_set_optional(void)
-{
-global_state.optional = true;
-}
-
 static bool global_state_needed(void *opaque)
 {
 GlobalState *s = opaque;
 char *runstate = (char *)s->runstate;
 
 /* If it is not optional, it is mandatory */
-
-if (s->optional == false) {
+if (migrate_get_current()->store_global_state) {
 return true;
 }
 
@@ -2107,6 +2101,19 @@ void migrate_fd_connect(MigrationState *s)
 s->migration_thread_running = true;
 }
 
+static Property migration_properties[] = {
+DEFINE_PROP_BOOL("store-global-state", MigrationState,
+ store_global_state, true),
+DEFINE_PROP_END_OF_LIST(),
+};
+
+static void migration_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+
+dc->props = migration_properties;
+}
+
 static void migration_instance_init(Object *obj)
 {
 MigrationState *ms = MIGRATION_OBJ(obj);
@@ -2131,6 +2138,7 @@ static void migration_instance_init(Object *obj)

[Qemu-devel] [PATCH v2 2/6] migration: let MigrationState be a qdev

2017-06-08 Thread Peter Xu

Let the old man "MigrationState" join the object family. Direct benefit
is that we can start to use all the property features derived from
current QDev, like: HW_COMPAT_* bits, command line setup for migration
parameters (so will never need to set them up each time using HMP/QMP,
this is really, really attractive for test writters), etc.

I see no reason to disallow this happen yet. So let's start from this
one, to see whether it would be anything good.

No functional change at all.

Signed-off-by: Peter Xu 
---
 include/migration/migration.h | 19 ++
 migration/migration.c | 61 ---
 2 files changed, 59 insertions(+), 21 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 79b5484..bd0186c 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -21,6 +21,7 @@
 #include "qapi-types.h"
 #include "exec/cpu-common.h"
 #include "qemu/coroutine_int.h"
+#include "hw/qdev.h"
 
 #define QEMU_VM_FILE_MAGIC   0x5145564d
 #define QEMU_VM_FILE_VERSION_COMPAT  0x0002
@@ -49,6 +50,8 @@ enum mig_rp_message_type {
 MIG_RP_MSG_MAX
 };
 
+#define TYPE_MIGRATION "migration"
+
 /* State for the incoming migration */
 struct MigrationIncomingState {
 QEMUFile *from_src_file;
@@ -91,8 +94,24 @@ struct MigrationIncomingState {
 MigrationIncomingState *migration_incoming_get_current(void);
 void migration_incoming_state_destroy(void);
 
+#define MIGRATION_CLASS(klass) \
+OBJECT_CLASS_CHECK(MigrationClass, (klass), TYPE_MIGRATION)
+#define MIGRATION_OBJ(obj) \
+OBJECT_CHECK(MigrationState, (obj), TYPE_MIGRATION)
+#define MIGRATION_GET_CLASS(obj) \
+OBJECT_GET_CLASS(MigrationClass, (obj), TYPE_MIGRATION)
+
+typedef struct MigrationClass {
+/*< private >*/
+DeviceClass parent_class;
+} MigrationClass;
+
 struct MigrationState
 {
+/*< private >*/
+DeviceState parent_obj;
+
+/*< public >*/
 size_t bytes_xfer;
 size_t xfer_limit;
 QemuThread thread;
diff --git a/migration/migration.c b/migration/migration.c
index 48c94c9..98b77e2 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -93,29 +93,13 @@ static bool deferred_incoming;
 /* For outgoing */
 MigrationState *migrate_get_current(void)
 {
-static bool once;
-static MigrationState current_migration = {
-.state = MIGRATION_STATUS_NONE,
-.xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
-.mbps = -1,
-.parameters = {
-.compress_level = DEFAULT_MIGRATE_COMPRESS_LEVEL,
-.compress_threads = DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT,
-.decompress_threads = DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT,
-.cpu_throttle_initial = DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL,
-.cpu_throttle_increment = DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT,
-.max_bandwidth = MAX_THROTTLE,
-.downtime_limit = DEFAULT_MIGRATE_SET_DOWNTIME,
-.x_checkpoint_delay = DEFAULT_MIGRATE_X_CHECKPOINT_DELAY,
-},
-};
+static MigrationState *current_migration;
 
-if (!once) {
-current_migration.parameters.tls_creds = g_strdup("");
-current_migration.parameters.tls_hostname = g_strdup("");
-once = true;
+if (!current_migration) {
+current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
 }
-return _migration;
+
+return current_migration;
 }
 
 MigrationIncomingState *migration_incoming_get_current(void)
@@ -2123,3 +2107,38 @@ void migrate_fd_connect(MigrationState *s)
 s->migration_thread_running = true;
 }
 
+static void migration_instance_init(Object *obj)
+{
+MigrationState *ms = MIGRATION_OBJ(obj);
+
+ms->state = MIGRATION_STATUS_NONE;
+ms->xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE;
+ms->mbps = -1;
+ms->parameters = (MigrationParameters) {
+.compress_level = DEFAULT_MIGRATE_COMPRESS_LEVEL,
+.compress_threads = DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT,
+.decompress_threads = DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT,
+.cpu_throttle_initial = DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL,
+.cpu_throttle_increment = DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT,
+.max_bandwidth = MAX_THROTTLE,
+.downtime_limit = DEFAULT_MIGRATE_SET_DOWNTIME,
+.x_checkpoint_delay = DEFAULT_MIGRATE_X_CHECKPOINT_DELAY,
+};
+ms->parameters.tls_creds = g_strdup("");
+ms->parameters.tls_hostname = g_strdup("");
+}
+
+static const TypeInfo migration_type = {
+.name = TYPE_MIGRATION,
+.parent = TYPE_DEVICE,
+.class_size = sizeof(MigrationClass),
+.instance_size = sizeof(MigrationState),
+.instance_init = migration_instance_init,
+};
+
+static void register_migration_types(void)
+{
+type_register_static(_type);
+}
+
+type_init(register_migration_types);
-- 
2.7.4

[Qemu-devel] [PATCH v2 4/6] migration: move only_migratable to MigrationState

2017-06-08 Thread Peter Xu

One less global variable, and it does only matter with migration.

We keep the old "--only-migratable" option, but also now we support:

  -global migration.only-migratable=true

Currently still keep the old interface.

Signed-off-by: Peter Xu 
---
 include/migration/migration.h | 3 +++
 include/sysemu/sysemu.h   | 1 -
 migration/migration.c | 3 ++-
 migration/savevm.c| 2 +-
 vl.c  | 9 +++--
 5 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index d3ec719..27b07ed 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -168,6 +168,9 @@ struct MigrationState
  * during migration.
  */
 bool store_global_state;
+
+/* Whether the VM is only allowing for migratable devices */
+bool only_migratable;
 };
 
 void migrate_set_state(int *state, int old_state, int new_state);
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 9841a52..b213696 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -15,7 +15,6 @@
 /* vl.c */
 
 extern const char *bios_name;
-extern int only_migratable;
 extern const char *qemu_name;
 extern QemuUUID qemu_uuid;
 extern bool qemu_uuid_set;
diff --git a/migration/migration.c b/migration/migration.c
index 79d886c..dbec586 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1113,7 +1113,7 @@ static GSList *migration_blockers;
 
 int migrate_add_blocker(Error *reason, Error **errp)
 {
-if (only_migratable) {
+if (migrate_get_current()->only_migratable) {
 error_propagate(errp, error_copy(reason));
 error_prepend(errp, "disallowing migration blocker "
   "(--only_migratable) for: ");
@@ -2104,6 +2104,7 @@ void migrate_fd_connect(MigrationState *s)
 static Property migration_properties[] = {
 DEFINE_PROP_BOOL("store-global-state", MigrationState,
  store_global_state, true),
+DEFINE_PROP_BOOL("only-migratable", MigrationState, only_migratable, 
false),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/migration/savevm.c b/migration/savevm.c
index 9c320f5..f073027 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2321,7 +2321,7 @@ void vmstate_register_ram_global(MemoryRegion *mr)
 bool vmstate_check_only_migratable(const VMStateDescription *vmsd)
 {
 /* check needed if --only-migratable is specified */
-if (!only_migratable) {
+if (!migrate_get_current()->only_migratable) {
 return true;
 }
 
diff --git a/vl.c b/vl.c
index be4dcf2..e842eef 100644
--- a/vl.c
+++ b/vl.c
@@ -188,7 +188,6 @@ bool boot_strict;
 uint8_t *boot_splash_filedata;
 size_t boot_splash_filedata_size;
 uint8_t qemu_extra_params_fw[2];
-int only_migratable; /* turn it off unless user states otherwise */
 
 int icount_align_option;
 
@@ -3937,7 +3936,13 @@ int main(int argc, char **argv, char **envp)
 incoming = optarg;
 break;
 case QEMU_OPTION_only_migratable:
-only_migratable = 1;
+/*
+ * TODO: we can remove this option one day, and we
+ * should all use:
+ *
+ * "-global migration.only-migratable=true"
+ */
+migrate_get_current()->only_migratable = true;
 break;
 case QEMU_OPTION_nodefaults:
 has_defaults = 0;
-- 
2.7.4

[Qemu-devel] [PATCH v2 0/6] migration: objectify MigrationState

2017-06-08 Thread Peter Xu

v2
- (I didn't add Juan's r-b since I touched the patches)
- remove once parameter in migrate_get_current() since not needed
- add one more patch to export register_compat_prop(), then use it in
  the following patches in xen_init().

I picked this topic out as suggested by Juan. Also I did what Juan has
suggested in previous discussions that I moved lots of global
parameters into MigrationState, and let them be properties. Then we
can use HW_COMPAT_* and "-global migration.xxx=xxx" formular.

Currently register_compat_prop() is exported to be used by xen_init().

If this can be merged and okay, we can move on to convert more things
into properties for migration.

Please review. Thanks.

Peter Xu (6):
  machine: export register_compat_prop()
  migration: let MigrationState be a qdev
  migration: move global_state.optional out
  migration: move only_migratable to MigrationState
  migration: move skip_configuration out
  migration: move skip_section_footers

 hw/core/machine.c |  6 +--
 hw/i386/pc_piix.c |  3 --
 hw/ppc/spapr.c|  3 --
 hw/xen/xen-common.c   | 12 --
 include/hw/boards.h   |  3 ++
 include/hw/compat.h   | 12 ++
 include/migration/migration.h | 36 +++--
 include/sysemu/sysemu.h   |  1 -
 migration/migration.c | 92 +--
 migration/savevm.c| 28 -
 vl.c  |  9 -
 11 files changed, 136 insertions(+), 69 deletions(-)

-- 
2.7.4

[Qemu-devel] [PATCH v2 1/6] machine: export register_compat_prop()

2017-06-08 Thread Peter Xu

We have HW_COMPAT_*, however that's only binded to machines, not other
things (like accelerators).  Behind it, it was register_compat_prop()
that played the trick.  Let's export the function for further use
outside HW_COMPAT_* magic.

CC: Eduardo Habkost 
CC: Markus Armbruster 
CC: Marcel Apfelbaum 
Signed-off-by: Peter Xu 
---
 hw/core/machine.c   | 6 +++---
 include/hw/boards.h | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 3adebf1..320486d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -753,9 +753,9 @@ static void machine_class_finalize(ObjectClass *klass, void 
*data)
 g_free(mc->name);
 }
 
-static void register_compat_prop(const char *driver,
- const char *property,
- const char *value)
+void register_compat_prop(const char *driver,
+  const char *property,
+  const char *value)
 {
 GlobalProperty *p = g_new0(GlobalProperty, 1);
 /* Machine compat_props must never cause errors: */
diff --git a/include/hw/boards.h b/include/hw/boards.h
index 76ce021..6e0f5c7 100644
--- a/include/hw/boards.h
+++ b/include/hw/boards.h
@@ -41,6 +41,9 @@ int machine_kvm_shadow_mem(MachineState *machine);
 int machine_phandle_start(MachineState *machine);
 bool machine_dump_guest_core(MachineState *machine);
 bool machine_mem_merge(MachineState *machine);
+void register_compat_prop(const char *driver,
+  const char *property,
+  const char *value);
 void machine_register_compat_props(MachineState *machine);
 HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
 void machine_set_cpu_numa_node(MachineState *machine,
-- 
2.7.4

Re: [Qemu-devel] [PATCH 0/7] KVM: MMU: fast write protect

2017-06-08 Thread Xiao Guangrong




On 05/30/2017 12:48 AM, Paolo Bonzini wrote:



On 23/05/2017 04:23, Xiao Guangrong wrote:


Ping...

Sorry to disturb, just make this patchset not be missed. :)


It won't. :)  I'm going to look at it and the dirty page ring buffer
this week.


Ping.. :)

Re: [Qemu-devel] [PATCH v1] virtio-net: enable configurable tx queue size

2017-06-08 Thread Wei Wang


On 06/09/2017 03:01 AM, Michael S. Tsirkin wrote:

On Wed, Jun 07, 2017 at 09:04:29AM +0800, Wei Wang wrote:

On 06/05/2017 11:38 PM, Michael S. Tsirkin wrote:

On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote:

This patch enables the virtio-net tx queue size to be configurable
between 256 and 1024 by the user. The queue size specified by the
user should be power of 2. If "tx_queue_size" is not offered by the
user, the default queue size, 1024, will be used.

For the traditional QEMU backend, setting the tx queue size to be 1024
requires the guest virtio driver to support the VIRTIO_F_MAX_CHAIN_SIZE
feature. This feature restricts the guest driver from chaining 1024
vring descriptors, which may cause the device side implementation to
send more than 1024 iov to writev.

VIRTIO_F_MAX_CHAIN_SIZE is a common transport feature added for all
virtio devices. However, each device has the flexibility to set the max
chain size to limit its driver to chain vring descriptors. Currently,
the max chain size of the virtio-net device is set to 1023.

In the case that the tx queue size is set to 1024 and the
VIRTIO_F_MAX_CHAIN_SIZE feature is not supported by the guest driver,
the tx queue size will be reconfigured to be 512.

I'd like to see the reverse. Start with the current default.
If VIRTIO_F_MAX_CHAIN_SIZE is negotiated, increase the queue size.


OK, we can let the queue size start with 256, and how about
increasing it to 1024 in the following two cases:

I think it should be
1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated
and
2) user requested large size


1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated; or
2) the backend is vhost.

For vhost we also need vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE.
We also need to send the max chain size to backend.


I think the limitation that we are dealing with is that the virtio-net
backend implementation in QEMU is possible to pass more than
1024 iov to writev. In this case, the QEMU backend uses the
"max_chain_size" register to tell the driver the max size of the
vring_desc chain. So, I think it should be the device (backend)
sending the max size to the driver, rather than the other way
around.

For the vhost-user and vhost-net backend cases, they don't have
such limitation as the QEMU backend, right?
If no such limitation, I think without the negotiation of
VIRTIO_F_MAX_CHAIN_SIZE, the device should be safe to use 1024
tx queue size if it is the vhost backend.

Best,
Wei

Re: [Qemu-devel] [PATCH v4 0/6] spapr/xics: fix migration of older machine types

2017-06-08 Thread David Gibson

On Thu, Jun 08, 2017 at 03:42:32PM +0200, Greg Kurz wrote:
> I've provided answers for all comments from the v3 review that I deliberately
> don't address in v4.

I've merged patches 1-4.  5 & 6 I'm still reviewing.

> 
> v4: - some patches from v3 got merged
> - added some more preparatory cleanup in xics (patches 1,2)
> - merge cpu_setup() handler into realize() (patch 4)
> - see individual changelog for patches 3 and 6
> 
> v3: - preparatory cleanup in pnv (patch 1)
> - rework ICPState realization and vmstate registration (patches 2,3,4)
> - fix migration using dummy icp/server entries (patch 5)
> 
> v2: - some patches from v1 are already merged in ppc-for-2.10
> - added a new fix to a potential memory leak (patch 1)
> - consolidate dt_id computation (patch 3)
> - see individual changelogs for patch 2 and 4
> 
> I could successfully do the following on POWER8 host with full cores (SMT8):
> 
> 1) start a pseries-2.9 machine with QEMU 2.9:
> -smp cores=1,threads=2,maxcpus=8
> 2) hotplug a core:
> device_add host-spapr-cpu-core,core-id=4
> 3) migrate to QEMU 2.10 configured with core-id 0,4
> 4) hotplug another core:
> device_add host-spapr-cpu-core,core-id=2
> 5) migrate back to QEMU 2.9 configured with core-id 0,4,2
> 6) hotplug the core in the last available slot:
> device_add host-spapr-cpu-core,core-id=6
> 7) migrate to QEMU 2.10 configured with core-id 0,4,2,6
> 
> I could check that the guest is functional after each migration.
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 2/3] exec: simplify address_space_get_iotlb_entry

2017-06-08 Thread David Gibson

On Fri, Jun 09, 2017 at 09:58:47AM +0800, Peter Xu wrote:
> On Thu, Jun 08, 2017 at 09:59:50PM +0300, Michael S. Tsirkin wrote:
> > On Thu, Jun 08, 2017 at 02:11:50PM +0800, Peter Xu wrote:
> > > On Wed, Jun 07, 2017 at 04:07:20PM +0300, Michael S. Tsirkin wrote:
> > > > On Wed, Jun 07, 2017 at 11:44:43AM +0800, Peter Xu wrote:
> > > > > On Wed, Jun 07, 2017 at 09:47:05AM +1000, David Gibson wrote:
> > > > > > On Tue, Jun 06, 2017 at 04:34:30PM +0200, Paolo Bonzini wrote:
> > > > > > > 
> > > > > > > 
> > > > > > > On 05/06/2017 05:07, Peter Xu wrote:
> > > > > > > > I don't sure whether it'll be a good interface for IOTLB. AFAIU 
> > > > > > > > at
> > > > > > > > least for VT-d, the IOMMU translation is page aligned which is 
> > > > > > > > defined
> > > > > > > > by spec, so it makes sense that (again at least for VT-d) here 
> > > > > > > > we'd
> > > > > > > > better just use page_mask/addr_mask.
> > > > > > > > 
> > > > > > > > That's also how I know about IOMMU in general - I assume it do 
> > > > > > > > the
> > > > > > > > translations always with page masks (never arbitary length), 
> > > > > > > > though
> > > > > > > > page size can differ from platfrom to platform, that's why here 
> > > > > > > > the
> > > > > > > > IOTLB interface used addr_mask, then it works for all 
> > > > > > > > platforms. I
> > > > > > > > don't know whether I'm 100% correct here though.
> > > > > > > > 
> > > > > > > > Maybe David/Paolo/... would comment as well?
> > > > > > > 
> > > > > > > I would ask David.  There are PowerPC MMUs that allow fast lookup 
> > > > > > > of
> > > > > > > arbitrarily-sized windows (not necessarily power of two),
> > > > > > 
> > > > > > Uh.. I'm not sure what you mean here.  You might be thinking of the
> > > > > > BATs which really old (32-bit) PowerPC MMUs had - those allow
> > > > > > arbitrary large block translations, but they do have to be a power 
> > > > > > of
> > > > > > two.
> > > > > > 
> > > > > > > so maybe the
> > > > > > > IOMMUs can do the same.
> > > > > > 
> > > > > > The only Power IOMMU I know about uses a fixed, power-of-two page 
> > > > > > size
> > > > > > per DMA window.
> > > > > 
> > > > > If so, I would still be inclined to keep using masks for QEMU IOTLB.
> > > > > Then, my first two patches should still stand.
> > > > > 
> > > > > I am just afraid that not using masks will diverge the emulation from
> > > > > real hardware and brings trouble one day.
> > > > > 
> > > > > For vhost IOTLB interface, it does not need to be strictly aligned to
> > > > > QEMU IOMMU IOTLB definition, and that's how it's working now (current
> > > > > vhost iotlb allows arbitary length, and I think it's good). So imho we
> > > > > don't really need to worry about the performance - after all, we can
> > > > > do everything customized for vhost, just like what patch 3 did (yeah,
> > > > > it can be better...).
> > > > > 
> > > > > Thanks,
> > > > 
> > > > Pre-faults is also something that does not happen on real hardware.
> > > > And it's about security so a bigger issue.
> > > > 
> > > > If I had to choose between that and using non-power-of-2 in
> > > > the API, I'd go for non-power-of-2. Let backends that can only
> > > > support power of 2 split it up to multiple transactions.
> > > 
> > > The problem is that when I was fixing the problem that vhost had with
> > > PT (a764040, "exec: abstract address_space_do_translate()"), I did
> > > broke the IOTLB translation a bit (it was using page masks). IMHO we
> > > need to fix it first for correctness (patch 1/2).
> > > 
> > > For patch 3, if we can have Jason's patch to allow dynamic
> > > iommu_platform switching, that'll be the best, then I can rewrite
> > > patch 3 with the switching logic rather than caching anything. But
> > > IMHO that can be separated from patch 1/2 if you like.
> > > 
> > > Or do you have better suggestion on how should we fix it?
> > > 
> > > Thanks,
> > 
> > Can we drop masks completely and replace with length? I think we
> > should do that instead of trying to fix masks.
> 
> Do you mean to modify IOMMUTLBEntry.addr_mask into length?
> 
> Again, I am not sure this is good... At least we need to get ack from
> David since spapr should be the initial user of it, and possibly also
> Alex since vfio should be assuming that (IIUC both in QEMU and kernel)
> addr_mask is page masks rather than arbirary length.

So, I don't see that using size instead of mask would be a particular
problem for spapr.  However, I also don't see any advantage to
switching.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v3 3/5] xics: setup cpu at realize time

2017-06-08 Thread David Gibson

On Thu, Jun 08, 2017 at 10:45:30AM +0200, Greg Kurz wrote:
> On Thu, 8 Jun 2017 12:01:12 +1000
> David Gibson  wrote:
> 
> > On Wed, Jun 07, 2017 at 07:17:09PM +0200, Greg Kurz wrote:
> > > Until recently, spapr used to allocate ICPState objects for the lifetime
> > > of the machine. They would only be associated to vCPUs in xics_cpu_setup()
> > > when plugging a CPU core.
> > > 
> > > Now that ICPState objects have the same lifecycle as vCPUs, it is
> > > possible to associate them during realization.
> > > 
> > > This patch hence open-codes xics_cpu_setup() in icp_realize(). The vCPU
> > > is passed as a property. Note that vCPU now needs to be realized first
> > > for the IRQs to be allocated. It also needs to resetted before ICPState
> > > realization in order to synchronize with KVM.  
> > 
> > Ok, what enforces those ordering constraints?
> > 
> 
> I'm not sure about what you're asking... I had to re-order because
> xics_cpu_setup() used to be called after the vCPU is realized and
> put in PAPR mode.

Duh, sorry, I wasn't thinking to ask about realize order, since that's
manual and you've re-ordered it to be correct.

You also mention that reset order matters, and I'm less clear on what
guarantees that the reset handlers for the components get called in
the right order.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH RFC] spapr: ignore interrupts during reset state

2017-06-08 Thread David Gibson

On Thu, Jun 08, 2017 at 12:06:08PM +0530, Nikunj A Dadhania wrote:
> Rebooting a SMP TCG guest is broken for both single/multi threaded TCG.

Ouch.  When exactly did this happen?  I know that smp boot used to
work under TCG, albeit very slowly.

> When reset happens, all the CPUs are in halted state. First CPU is brought out
> of reset and secondary CPUs would be initialized by the guest kernel using a
> rtas call start-cpu.
> 
> However, in case of TCG, decrementer interrupts keep on coming and waking the
> secondary CPUs up.

Ok.. how is that happening given that the secondary CPUs should have
MSR[EE] == 0?

> These secondary CPUs would see the decrementer interrupt pending, which makes
> cpu::has_work() to bring them out of wait loop and start executing
> tcg_exec_cpu().
> 
> The problem with this is all the CPUs wake up and start booting SLOF image,
> causing the following exception(4 CPUs TCG VM):

[snip]
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index d10808d..eb88bcb 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -1013,6 +1013,13 @@ struct CPUPPCState {
>  int access_type; /* when a memory exception occurs, the access
>  type is stored here */
>  
> +/* CPU in reset, shouldn't process any interrupts.
> + *
> + * Decrementer interrupts in TCG can still wake the CPU up. Make sure 
> that
> + * when this variable is set, cpu_has_work_* should return false.
> + */
> +int in_reset;

So I'd really rather not add another flag to the cpu structure,
especially since we'd then need to migrate it as well.

I'm pretty sure there should be a way to inhibit the unwanted
interrupts using existing mechanisms.

> +
>  CPU_COMMON
>  
>  /* MMU context - only relevant for full system emulation */
> diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c
> index 56a0ab2..64f4348 100644
> --- a/target/ppc/translate_init.c
> +++ b/target/ppc/translate_init.c
> @@ -8561,6 +8561,9 @@ static bool cpu_has_work_POWER7(CPUState *cs)
>  CPUPPCState *env = >env;
>  
>  if (cs->halted) {
> +if (env->in_reset) {
> +return false;
> +}
>  if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
>  return false;
>  }
> @@ -8718,6 +8721,9 @@ static bool cpu_has_work_POWER8(CPUState *cs)
>  CPUPPCState *env = >env;
>  
>  if (cs->halted) {
> +if (env->in_reset) {
> +return false;
> +}
>  if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
>  return false;
>  }
> @@ -8899,6 +8905,9 @@ static bool cpu_has_work_POWER9(CPUState *cs)
>  CPUPPCState *env = >env;
>  
>  if (cs->halted) {
> +if (env->in_reset) {
> +return false;
> +}
>  if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
>  return false;
>  }

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v4 1/6] xics: introduce macros for ICP/ICS link properties

2017-06-08 Thread David Gibson

On Thu, Jun 08, 2017 at 07:26:35PM +0200, Cédric Le Goater wrote:
> On 06/08/2017 07:00 PM, Greg Kurz wrote:
> > On Thu, 8 Jun 2017 18:08:44 +0200
> > Cédric Le Goater  wrote:
> > 
> > FWIW, other people do that as well (see hw/i386/pc_q35.c for example).  
> >   
> 
>  well, I don't see the benefits of changing a string constant by a 
>  define. 
>   
> >>>
> >>> Improved semantics,  especially since the "xics" string appears in 
> >>> many places with different meanings.   
> >>
> >> ah ? If so, we should do a cleanup up. The code seems consistent from 
> >> what I can see. xics is a general name for :
> >>
> >>'PowerPC interrupt controller (type 2)' 
> >>
> >> and it is mostly used as a prefix. There are no "xics" object, only a 
> > 
> > I'm only talking about "xics" as a property name actually:
> > 
> > $ git grep '"xics"'
> > hw/intc/xics.c:obj = object_property_get_link(OBJECT(dev), "xics", 
> > );
> > hw/intc/xics.c:obj = object_property_get_link(OBJECT(dev), "xics", 
> > );
> > hw/ppc/pnv.c:object_property_add_const_link(OBJECT(>psi), "xics",
> > hw/ppc/pnv.c:object_property_add_const_link(OBJECT(pnv_core), 
> > "xics",
> > hw/ppc/pnv_core.c:object_property_add_const_link(obj, "xics", 
> > OBJECT(xi), _abort);
> > hw/ppc/pnv_core.c:xi = object_property_get_link(OBJECT(dev), "xics", 
> > _err);
> > hw/ppc/pnv_psi.c:obj = object_property_get_link(OBJECT(dev), "xics", 
> > );
> > hw/ppc/pnv_psi.c:object_property_add_const_link(OBJECT(ics), "xics", 
> > obj,  _abort);
> > hw/ppc/spapr.c:object_property_add_const_link(obj, "xics", 
> > OBJECT(spapr), _abort);
> > hw/ppc/spapr_cpu_core.c:object_property_add_const_link(obj, "xics", 
> > OBJECT(spapr), _abort);
> > 
> > You have to read the code to know which ones are related.
> 
> The "xics" property link always point to the same object : 
> the XICSFabric object which is the machine, spapr or pnv. 
> 
> > With this patch applied, it is mostly obvious, even for the newbie:
> 
> ah. the goal is to know where in the code the link was set. 
> It can be even more complex with aliases.

There doesn't seem to be a strong convention about whether to use raw
property names or defines across qemu.  I'm not all that fussed either
way.

I do see one small advantage to use defines: if you make a typo, it
will probably result in a compile time error, whereas with a bare
string it won't show up until a runtime error.

In this case, I intend to take the macro patch, mostly just on the
basis of avoiding further delays to rework the remaining patches.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH] hw/cpu: core.c can be compiled as common object

2017-06-08 Thread David Gibson

On Thu, Jun 08, 2017 at 04:23:34PM +0200, Juan Quintela wrote:
> Thomas Huth  wrote:
> > There does not seem to be any target specific code in core.c, so we can
> > put it into "common-obj" instead of "obj" to compile it only once for
> > all targets.
> >
> > Signed-off-by: Thomas Huth 
> 
> It compiles.
> I can't see anything that is target dependent on core.c.
> 
> So
> 
> Reviewed-by: Juan Quintela 

I've merged this into my ppc-for-2.10 tree, on the basis that ppc is
the only user of core.c so far.  Any objections?

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v2 2/2] Revert "spapr: fix memory hot-unplugging"

2017-06-08 Thread David Gibson

On Thu, Jun 08, 2017 at 07:27:43PM +0200, Laurent Vivier wrote:
> This reverts commit fe6824d12642b005c69123ecf8631f9b13553f8b.
> 
> Conflicts hw/ppc/spapr_drc.c, because get_index() has been renamed
> spapr_get_index().
> 
> This didn't fix the problem. Once the hotplug has been started
> some memory is allocated and some structures are allocated.
> We don't free it when we ignore the unplug, and we can't because
> they can be in use by the kernel.
> 
> Signed-off-by: Laurent Vivier 

Heh.  I've just been looking at awaiting_allocation_skippable and
trying - but failing - to work out what it accomplishes.  Applied to
ppc-for-2.10.

> ---
>  hw/ppc/spapr_drc.c | 20 +++-
>  include/hw/ppc/spapr_drc.h |  1 -
>  2 files changed, 3 insertions(+), 18 deletions(-)
> 
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index 39e7f30..7605977 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -140,17 +140,6 @@ static uint32_t set_allocation_state(sPAPRDRConnector 
> *drc,
>  if (!drc->dev) {
>  return RTAS_OUT_NO_SUCH_INDICATOR;
>  }
> -if (drc->awaiting_release && drc->awaiting_allocation) {
> -/* kernel is acknowledging a previous hotplug event
> - * while we are already removing it.
> - * it's safe to ignore awaiting_allocation here since we know the
> - * situation is predicated on the guest either already having 
> done
> - * so (boot-time hotplug), or never being able to acquire in the
> - * first place (hotplug followed by immediate unplug).
> - */
> -drc->awaiting_allocation_skippable = true;
> -return RTAS_OUT_NO_SUCH_INDICATOR;
> -}
>  }
>  
>  if (spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PCI) {
> @@ -401,11 +390,9 @@ static void detach(sPAPRDRConnector *drc, DeviceState 
> *d, Error **errp)
>  }
>  
>  if (drc->awaiting_allocation) {
> -if (!drc->awaiting_allocation_skippable) {
> -drc->awaiting_release = true;
> -trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc));
> -return;
> -}
> +drc->awaiting_release = true;
> +trace_spapr_drc_awaiting_allocation(spapr_drc_index(drc));
> +return;
>  }
>  
>  drc->indicator_state = SPAPR_DR_INDICATOR_STATE_INACTIVE;
> @@ -428,7 +415,6 @@ static void detach(sPAPRDRConnector *drc, DeviceState *d, 
> Error **errp)
>  }
>  
>  drc->awaiting_release = false;
> -drc->awaiting_allocation_skippable = false;
>  g_free(drc->fdt);
>  drc->fdt = NULL;
>  drc->fdt_start_offset = 0;
> diff --git a/include/hw/ppc/spapr_drc.h b/include/hw/ppc/spapr_drc.h
> index c88e1be..84b58f0 100644
> --- a/include/hw/ppc/spapr_drc.h
> +++ b/include/hw/ppc/spapr_drc.h
> @@ -200,7 +200,6 @@ typedef struct sPAPRDRConnector {
>  bool awaiting_release;
>  bool signalled;
>  bool awaiting_allocation;
> -bool awaiting_allocation_skippable;
>  
>  /* device pointer, via link property */
>  DeviceState *dev;

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

[Qemu-devel] [PATCH v8 6/7] trace: [tcg, trivial] Re-align generated code

2017-06-08 Thread Emilio G. Cota

From: Lluís Vilanova 

Last patch removed a nesting level in generated code. Re-align all code
generated by backends to be 4-column aligned.

Signed-off-by: Lluís Vilanova 
Signed-off-by: Emilio G. Cota 
---
 scripts/tracetool/backend/dtrace.py |  4 ++--
 scripts/tracetool/backend/ftrace.py | 20 ++--
 scripts/tracetool/backend/log.py| 19 ++-
 scripts/tracetool/backend/simple.py |  4 ++--
 scripts/tracetool/backend/syslog.py |  6 +++---
 scripts/tracetool/backend/ust.py|  4 ++--
 6 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/scripts/tracetool/backend/dtrace.py 
b/scripts/tracetool/backend/dtrace.py
index c469cbd..c6812b7 100644
--- a/scripts/tracetool/backend/dtrace.py
+++ b/scripts/tracetool/backend/dtrace.py
@@ -6,7 +6,7 @@ DTrace/SystemTAP backend.
 """
 
 __author__ = "Lluís Vilanova "
-__copyright__  = "Copyright 2012-2016, Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
 __license__= "GPL version 2 or (at your option) any later version"
 
 __maintainer__ = "Stefan Hajnoczi"
@@ -46,6 +46,6 @@ def generate_h_begin(events, group):
 
 
 def generate_h(event, group):
-out('QEMU_%(uppername)s(%(argnames)s);',
+out('QEMU_%(uppername)s(%(argnames)s);',
 uppername=event.name.upper(),
 argnames=", ".join(event.args.names()))
diff --git a/scripts/tracetool/backend/ftrace.py 
b/scripts/tracetool/backend/ftrace.py
index db9fe7a..dd0eda4 100644
--- a/scripts/tracetool/backend/ftrace.py
+++ b/scripts/tracetool/backend/ftrace.py
@@ -29,17 +29,17 @@ def generate_h(event, group):
 if len(event.args) > 0:
 argnames = ", " + argnames
 
-out('{',
-'char ftrace_buf[MAX_TRACE_STRLEN];',
-'int unused __attribute__ ((unused));',
-'int trlen;',
-'if (trace_event_get_state(%(event_id)s)) {',
-'trlen = snprintf(ftrace_buf, MAX_TRACE_STRLEN,',
-' "%(name)s " %(fmt)s "\\n" 
%(argnames)s);',
-'trlen = MIN(trlen, MAX_TRACE_STRLEN - 1);',
-'unused = write(trace_marker_fd, ftrace_buf, trlen);',
-'}',
+out('{',
+'char ftrace_buf[MAX_TRACE_STRLEN];',
+'int unused __attribute__ ((unused));',
+'int trlen;',
+'if (trace_event_get_state(%(event_id)s)) {',
+'trlen = snprintf(ftrace_buf, MAX_TRACE_STRLEN,',
+' "%(name)s " %(fmt)s "\\n" 
%(argnames)s);',
+'trlen = MIN(trlen, MAX_TRACE_STRLEN - 1);',
+'unused = write(trace_marker_fd, ftrace_buf, trlen);',
 '}',
+'}',
 name=event.name,
 args=event.args,
 event_id="TRACE_" + event.name.upper(),
diff --git a/scripts/tracetool/backend/log.py b/scripts/tracetool/backend/log.py
index 4f4a4d3..54f0a69 100644
--- a/scripts/tracetool/backend/log.py
+++ b/scripts/tracetool/backend/log.py
@@ -6,7 +6,7 @@ Stderr built-in backend.
 """
 
 __author__ = "Lluís Vilanova "
-__copyright__  = "Copyright 2012-2016, Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
 __license__= "GPL version 2 or (at your option) any later version"
 
 __maintainer__ = "Stefan Hajnoczi"
@@ -35,14 +35,15 @@ def generate_h(event, group):
 else:
 cond = "trace_event_get_state(%s)" % ("TRACE_" + event.name.upper())
 
-out('if (%(cond)s) {',
-'struct timeval _now;',
-'gettimeofday(&_now, NULL);',
-'qemu_log_mask(LOG_TRACE, "%%d@%%zd.%%06zd:%(name)s " 
%(fmt)s "\\n",',
-'  getpid(),',
-'  (size_t)_now.tv_sec, (size_t)_now.tv_usec',
-'  %(argnames)s);',
-'}',
+out('if (%(cond)s) {',
+'struct timeval _now;',
+'gettimeofday(&_now, NULL);',
+'qemu_log_mask(LOG_TRACE,',
+'  "%%d@%%zd.%%06zd:%(name)s " %(fmt)s "\\n",',
+'  getpid(),',
+'  (size_t)_now.tv_sec, (size_t)_now.tv_usec',
+'  %(argnames)s);',
+'}',
 cond=cond,
 name=event.name,
 fmt=event.fmt.rstrip("\n"),
diff --git a/scripts/tracetool/backend/simple.py 
b/scripts/tracetool/backend/simple.py
index 4acc06e..f983670 100644
--- a/scripts/tracetool/backend/simple.py
+++ b/scripts/tracetool/backend/simple.py
@@ -6,7 +6,7 @@ Simple built-in backend.
 """
 
 __author__ = "Lluís Vilanova

[Qemu-devel] [PATCH v8 1/7] exec: [tcg] Refactor flush of per-CPU virtual TB cache

2017-06-08 Thread Emilio G. Cota

From: Lluís Vilanova 

The function is reused in later patches.

Signed-off-by: Lluís Vilanova 
Reviewed-by: Richard Henderson 
Signed-off-by: Emilio G. Cota 
---
 cputlb.c|  2 +-
 include/exec/exec-all.h |  6 ++
 translate-all.c | 15 ++-
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index 743776a..6a2b762 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -118,7 +118,7 @@ static void tlb_flush_nocheck(CPUState *cpu)
 
 memset(env->tlb_table, -1, sizeof(env->tlb_table));
 memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
-memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
+tb_flush_jmp_cache_all(cpu);
 
 env->vtlb_index = 0;
 env->tlb_flush_addr = -1;
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 724ec73..b0281b0 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -366,6 +366,12 @@ struct TranslationBlock {
 };
 
 void tb_free(TranslationBlock *tb);
+/**
+ * tb_flush_jmp_cache_all:
+ *
+ * Flush the virtual translation block cache.
+ */
+void tb_flush_jmp_cache_all(CPUState *env);
 void tb_flush(CPUState *cpu);
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
 TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
diff --git a/translate-all.c b/translate-all.c
index 966747a..8a5dc19 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -931,11 +931,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data 
tb_flush_count)
 }
 
 CPU_FOREACH(cpu) {
-int i;
-
-for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) {
-atomic_set(>tb_jmp_cache[i], NULL);
-}
+tb_flush_jmp_cache_all(cpu);
 }
 
 tcg_ctx.tb_ctx.nb_tbs = 0;
@@ -1733,6 +1729,15 @@ void tb_check_watchpoint(CPUState *cpu)
 }
 }
 
+void tb_flush_jmp_cache_all(CPUState *cpu)
+{
+int i;
+
+for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) {
+atomic_set(>tb_jmp_cache[i], NULL);
+}
+}
+
 #ifndef CONFIG_USER_ONLY
 /* in deterministic execution mode, instructions doing device I/Os
  * must be at the end of the TB.
-- 
2.7.4

Re: [Qemu-devel] [PATCH] Add manpage for QEMU Backup Tool

2017-06-08 Thread no-reply

Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [Qemu-devel] [PATCH] Add manpage for QEMU Backup Tool
Message-id: 1496957498-17127-1-git-send-email-chugh.ish...@research.iiit.ac.in
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
71c5bbc Add manpage for QEMU Backup Tool

=== OUTPUT BEGIN ===
Checking PATCH 1/1: Add manpage for QEMU Backup Tool...
ERROR: trailing whitespace
#28: FILE: contrib/backup/qemu-backup.texi:1:
+   $

ERROR: trailing whitespace
#29: FILE: contrib/backup/qemu-backup.texi:2:
+ $

ERROR: trailing whitespace
#117: FILE: contrib/backup/qemu-backup.texi:90:
+ $

ERROR: trailing whitespace
#120: FILE: contrib/backup/qemu-backup.texi:93:
+ $

ERROR: trailing whitespace
#128: FILE: contrib/backup/qemu-backup.texi:101:
+ $

ERROR: trailing whitespace
#132: FILE: contrib/backup/qemu-backup.texi:105:
+ $

ERROR: trailing whitespace
#134: FILE: contrib/backup/qemu-backup.texi:107:
+ $

ERROR: trailing whitespace
#137: FILE: contrib/backup/qemu-backup.texi:110:
+ $

ERROR: trailing whitespace
#143: FILE: contrib/backup/qemu-backup.texi:116:
+ $

ERROR: trailing whitespace
#147: FILE: contrib/backup/qemu-backup.texi:120:
+ $

ERROR: trailing whitespace
#151: FILE: contrib/backup/qemu-backup.texi:124:
+ $

ERROR: trailing whitespace
#155: FILE: contrib/backup/qemu-backup.texi:128:
+ $

ERROR: trailing whitespace
#161: FILE: contrib/backup/qemu-backup.texi:134:
+ $

ERROR: trailing whitespace
#163: FILE: contrib/backup/qemu-backup.texi:136:
+ $

ERROR: trailing whitespace
#166: FILE: contrib/backup/qemu-backup.texi:139:
+ $

ERROR: trailing whitespace
#168: FILE: contrib/backup/qemu-backup.texi:141:
+ $

ERROR: trailing whitespace
#173: FILE: contrib/backup/qemu-backup.texi:146:
+ $

total: 17 errors, 0 warnings, 155 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@freelists.org

[Qemu-devel] [PATCH v8 0/7] trace: [tcg] Optimize per-vCPU tracing states

2017-06-08 Thread Emilio G. Cota

This is my own respin of Lluís' v7:
  https://lists.gnu.org/archive/html/qemu-devel/2017-01/msg02741.html

Changes from v7:

- Ported to current dev tree.

- Allocate cpu->trace_dstate statically. This
  * allows us to drop the event_count inline patch.
  * simplifies and improves the performance of accessing cpu->trace_dstate:
we just need to dereference, instead of going through bitmap_copy and
an intermediate unsigned long.

- If we try to register more CPU events than the max we support (there's a 
constant
  for it), drop the event and tell the user with error_report. But really this
  is a bug, since we control what CPU events are traceable. Should we abort()
  as well?

- Added rth's R-b tag

- Addressed my own comments:
  * rename tb->trace_vcpu_dstate to the shorter tb->trace_ds
  * use uint32_t for tb->trace_ds instead of a typedef
  * add BUILD_BUG_ON check to make sure tb->trace_ds is big enough
  * fix xxhash

- Do not add trace_dstate to tb_htable_lookup, since we can grab it from
  cpu->trace_dstate.

This patchset applies cleanly on top of rth's tcg-next (a01792e1e).

Thanks,

Emilio

Emilio G. Cota (1):
  cpu: allocate cpu->trace_dstate in place

Lluís Vilanova (6):
  exec: [tcg] Refactor flush of per-CPU virtual TB cache
  trace: [tcg] Delay changes to dynamic state when translating
  exec: [tcg] Use different TBs according to the vCPU's dynamic tracing
state
  trace: [tcg] Do not generate TCG code to trace dinamically-disabled
events
  trace: [tcg, trivial] Re-align generated code
  trace: [trivial] Statically enable all guest events

 cpu-exec.c   |  8 ++--
 cputlb.c |  2 +-
 include/exec/exec-all.h  |  9 +
 include/exec/tb-hash-xx.h|  7 +--
 include/exec/tb-hash.h   |  5 +++--
 include/qom/cpu.h| 12 ++--
 qom/cpu.c|  8 
 scripts/tracetool/__init__.py|  3 ++-
 scripts/tracetool/backend/dtrace.py  |  4 ++--
 scripts/tracetool/backend/ftrace.py  | 20 ++--
 scripts/tracetool/backend/log.py | 19 ++-
 scripts/tracetool/backend/simple.py  |  4 ++--
 scripts/tracetool/backend/syslog.py  |  6 +++---
 scripts/tracetool/backend/ust.py |  4 ++--
 scripts/tracetool/format/h.py| 26 +++---
 scripts/tracetool/format/tcg_h.py| 21 +
 scripts/tracetool/format/tcg_helper_c.py |  5 +++--
 tcg-runtime.c|  3 ++-
 tests/qht-bench.c|  2 +-
 trace-events |  6 +++---
 trace/control-target.c   | 22 +++---
 trace/control.c  |  9 -
 trace/control.h  |  3 +++
 translate-all.c  | 25 ++---
 24 files changed, 154 insertions(+), 79 deletions(-)

-- 
2.7.4

Re: [Qemu-devel] [PATCH] timer/aspeed: fix timer enablement when a reload is not set

2017-06-08 Thread Andrew Jeffery

On Tue, 2017-06-06 at 10:55 +0200, Cédric Le Goater wrote:
> When a timer is enabled before a reload value is set, the controller
> waits for a reload value to be set before starting decrementing. This
> fix tries to cover that case by changing the timer expiry only when
> a reload value is valid.
> 
> > Signed-off-by: Cédric Le Goater 
> ---
>  hw/timer/aspeed_timer.c | 37 +
>  1 file changed, 29 insertions(+), 8 deletions(-)
> 
> diff --git a/hw/timer/aspeed_timer.c b/hw/timer/aspeed_timer.c
> index 9b70ee09b07f..50acbf530a3a 100644
> --- a/hw/timer/aspeed_timer.c
> +++ b/hw/timer/aspeed_timer.c
> @@ -130,15 +130,26 @@ static uint64_t calculate_next(struct AspeedTimer *t)
>  next = seq[1];
>  } else if (now < seq[2]) {
>  next = seq[2];
> -} else {
> +} else if (t->reload) {
>  reload_ns = muldiv64(t->reload, NANOSECONDS_PER_SECOND, rate);
>  t->start = now - ((now - t->start) % reload_ns);
> +} else {
> +/* no reload value, return 0 */
> +break;
>  }
>  }
>  
>  return next;
>  }
>  
> +static void aspeed_timer_mod(AspeedTimer *t)
> +{
> +uint64_t next = calculate_next(t);
> +if (next) {
> +timer_mod(>timer, next);
> +}
> +}
> +
>  static void aspeed_timer_expire(void *opaque)
>  {
>  AspeedTimer *t = opaque;
> @@ -164,7 +175,7 @@ static void aspeed_timer_expire(void *opaque)
>  qemu_set_irq(t->irq, t->level);
>  }
>  
> -timer_mod(>timer, calculate_next(t));
> +aspeed_timer_mod(t);
>  }
>  
>  static uint64_t aspeed_timer_get_value(AspeedTimer *t, int reg)
> @@ -227,10 +238,23 @@ static void aspeed_timer_set_value(AspeedTimerCtrlState 
> *s, int timer, int reg,
> uint32_t value)
>  {
>  AspeedTimer *t;
> +uint32_t old_reload;
>  
>  trace_aspeed_timer_set_value(timer, reg, value);
>  t = >timers[timer];
>  switch (reg) {
> +case TIMER_REG_RELOAD:
> +old_reload = t->reload;
> +t->reload = value;
> +
> +/* If the reload value was not previously set, or zero, and
> + * the current value is valid, try to start the timer if it is
> + * enabled.
> + */
> +if (old_reload || !t->reload) {
> +break;
> +}

Maybe I need more caffeine, but I initially struggled to reconcile the
condition with the comment, as the condition checks the inverse in
order to break while the comment discusses the non-breaking case. 

However, after trying for several minutes, I'm not sure there's an easy
way to improve it.

> +
>  case TIMER_REG_STATUS:
>  if (timer_enabled(t)) {
>  uint64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
> @@ -238,17 +262,14 @@ static void aspeed_timer_set_value(AspeedTimerCtrlState 
> *s, int timer, int reg,
>  uint32_t rate = calculate_rate(t);
>  
>  t->start += muldiv64(delta, NANOSECONDS_PER_SECOND, rate);
> -timer_mod(>timer, calculate_next(t));
> +aspeed_timer_mod(t);
>  }
>  break;
> -case TIMER_REG_RELOAD:
> -t->reload = value;
> -break;
>  case TIMER_REG_MATCH_FIRST:
>  case TIMER_REG_MATCH_SECOND:
>  t->match[reg - 2] = value;
>  if (timer_enabled(t)) {
> -timer_mod(>timer, calculate_next(t));
> +aspeed_timer_mod(t);
>  }
>  break;
>  default:
> @@ -268,7 +289,7 @@ static void aspeed_timer_ctrl_enable(AspeedTimer *t, bool 
> enable)
>  trace_aspeed_timer_ctrl_enable(t->id, enable);
>  if (enable) {
>  t->start = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
> -timer_mod(>timer, calculate_next(t));
> +aspeed_timer_mod(t);
>  } else {
>  timer_del(>timer);
>  }

Reviewed-by: Andrew Jeffery 

signature.asc
Description: This is a digitally signed message part

[Qemu-devel] [PATCH v8 2/7] cpu: allocate cpu->trace_dstate in place

2017-06-08 Thread Emilio G. Cota

There's little point in dynamically allocating the bitmap if we
know at compile-time the max number of events we want to support.
Thus, make room in the struct for the bitmap, which will make things
easier later: this paves the way for upcoming changes, in which
we'll use a u32 to fully capture cpu->trace_dstate.

This change also increases performance by saving a dereference and
improving locality--note that this is important since upcoming work
makes reading this bitmap fairly common.

Signed-off-by: Emilio G. Cota 
---
 include/qom/cpu.h | 9 +++--
 qom/cpu.c | 8 
 trace/control.c   | 9 -
 3 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 89ddb68..bc6e20f 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -259,6 +259,7 @@ typedef void (*run_on_cpu_func)(CPUState *cpu, 
run_on_cpu_data data);
 struct qemu_work_item;
 
 #define CPU_UNSET_NUMA_NODE_ID -1
+#define CPU_TRACE_DSTATE_MAX_EVENTS 32
 
 /**
  * CPUState:
@@ -373,12 +374,8 @@ struct CPUState {
 struct KVMState *kvm_state;
 struct kvm_run *kvm_run;
 
-/*
- * Used for events with 'vcpu' and *without* the 'disabled' properties.
- * Dynamically allocated based on bitmap requried to hold up to
- * trace_get_vcpu_event_count() entries.
- */
-unsigned long *trace_dstate;
+/* Used for events with 'vcpu' and *without* the 'disabled' properties */
+DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
 
 /* TODO Move common fields from CPUArchState here. */
 int cpu_index; /* used by alpha TCG */
diff --git a/qom/cpu.c b/qom/cpu.c
index 5069876..69fbb9c 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -382,7 +382,6 @@ static void cpu_common_unrealizefn(DeviceState *dev, Error 
**errp)
 
 static void cpu_common_initfn(Object *obj)
 {
-uint32_t count;
 CPUState *cpu = CPU(obj);
 CPUClass *cc = CPU_GET_CLASS(obj);
 
@@ -397,18 +396,11 @@ static void cpu_common_initfn(Object *obj)
 QTAILQ_INIT(>breakpoints);
 QTAILQ_INIT(>watchpoints);
 
-count = trace_get_vcpu_event_count();
-if (count) {
-cpu->trace_dstate = bitmap_new(count);
-}
-
 cpu_exec_initfn(cpu);
 }
 
 static void cpu_common_finalize(Object *obj)
 {
-CPUState *cpu = CPU(obj);
-g_free(cpu->trace_dstate);
 }
 
 static int64_t cpu_common_get_arch_id(CPUState *cpu)
diff --git a/trace/control.c b/trace/control.c
index 9b157b0..83740aa 100644
--- a/trace/control.c
+++ b/trace/control.c
@@ -65,8 +65,15 @@ void trace_event_register_group(TraceEvent **events)
 size_t i;
 for (i = 0; events[i] != NULL; i++) {
 events[i]->id = next_id++;
-if (events[i]->vcpu_id != TRACE_VCPU_EVENT_NONE) {
+if (events[i]->vcpu_id == TRACE_VCPU_EVENT_NONE) {
+continue;
+}
+
+if (likely(next_vcpu_id < CPU_TRACE_DSTATE_MAX_EVENTS)) {
 events[i]->vcpu_id = next_vcpu_id++;
+} else {
+error_report("WARNING: too many vcpu trace events; dropping '%s'",
+ events[i]->name);
 }
 }
 event_groups = g_renew(TraceEventGroup, event_groups, nevent_groups + 1);
-- 
2.7.4

[Qemu-devel] [PATCH v8 5/7] trace: [tcg] Do not generate TCG code to trace dinamically-disabled events

2017-06-08 Thread Emilio G. Cota

From: Lluís Vilanova 

If an event is dynamically disabled, the TCG code that calls the
execution-time tracer is not generated.

Removes the overheads of execution-time tracers for dynamically disabled
events. As a bonus, also avoids checking the event state when the
execution-time tracer is called from TCG-generated code (since otherwise
TCG would simply not call it).

Signed-off-by: Lluís Vilanova 
Signed-off-by: Emilio G. Cota 
---
 scripts/tracetool/__init__.py|  3 ++-
 scripts/tracetool/format/h.py| 26 +++---
 scripts/tracetool/format/tcg_h.py| 21 +
 scripts/tracetool/format/tcg_helper_c.py |  5 +++--
 4 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py
index 1ffbc1d..d4c204a 100644
--- a/scripts/tracetool/__init__.py
+++ b/scripts/tracetool/__init__.py
@@ -6,7 +6,7 @@ Machinery for generating tracing-related intermediate files.
 """
 
 __author__ = "Lluís Vilanova "
-__copyright__  = "Copyright 2012-2016, Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
 __license__= "GPL version 2 or (at your option) any later version"
 
 __maintainer__ = "Stefan Hajnoczi"
@@ -268,6 +268,7 @@ class Event(object):
 return self._FMT.findall(self.fmt)
 
 QEMU_TRACE   = "trace_%(name)s"
+QEMU_TRACE_NOCHECK   = "_nocheck__" + QEMU_TRACE
 QEMU_TRACE_TCG   = QEMU_TRACE + "_tcg"
 QEMU_DSTATE  = "_TRACE_%(NAME)s_DSTATE"
 QEMU_EVENT   = "_TRACE_%(NAME)s_EVENT"
diff --git a/scripts/tracetool/format/h.py b/scripts/tracetool/format/h.py
index 3682f4e..aecf249 100644
--- a/scripts/tracetool/format/h.py
+++ b/scripts/tracetool/format/h.py
@@ -6,7 +6,7 @@ trace/generated-tracers.h
 """
 
 __author__ = "Lluís Vilanova "
-__copyright__  = "Copyright 2012-2016, Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
 __license__= "GPL version 2 or (at your option) any later version"
 
 __maintainer__ = "Stefan Hajnoczi"
@@ -49,6 +49,19 @@ def generate(events, backend, group):
 backend.generate_begin(events, group)
 
 for e in events:
+# tracer without checks
+out('',
+'static inline void %(api)s(%(args)s)',
+'{',
+api=e.api(e.QEMU_TRACE_NOCHECK),
+args=e.args)
+
+if "disable" not in e.properties:
+backend.generate(e, group)
+
+out('}')
+
+# tracer wrapper with checks (per-vCPU tracing)
 if "vcpu" in e.properties:
 trace_cpu = next(iter(e.args))[1]
 cond = "trace_event_get_vcpu_state(%(cpu)s,"\
@@ -63,16 +76,15 @@ def generate(events, backend, group):
 'static inline void %(api)s(%(args)s)',
 '{',
 'if (%(cond)s) {',
+'%(api_nocheck)s(%(names)s);',
+'}',
+'}',
 api=e.api(),
+api_nocheck=e.api(e.QEMU_TRACE_NOCHECK),
 args=e.args,
+names=", ".join(e.args.names()),
 cond=cond)
 
-if "disable" not in e.properties:
-backend.generate(e, group)
-
-out('}',
-'}')
-
 backend.generate_end(events, group)
 
 out('#endif /* TRACE_%s_GENERATED_TRACERS_H */' % group.upper())
diff --git a/scripts/tracetool/format/tcg_h.py 
b/scripts/tracetool/format/tcg_h.py
index db55f52..1651cc3 100644
--- a/scripts/tracetool/format/tcg_h.py
+++ b/scripts/tracetool/format/tcg_h.py
@@ -6,7 +6,7 @@ Generate .h file for TCG code generation.
 """
 
 __author__ = "Lluís Vilanova "
-__copyright__  = "Copyright 2012-2016, Lluís Vilanova "
+__copyright__  = "Copyright 2012-2017, Lluís Vilanova "
 __license__= "GPL version 2 or (at your option) any later version"
 
 __maintainer__ = "Stefan Hajnoczi"
@@ -46,7 +46,7 @@ def generate(events, backend, group):
 
 for e in events:
 # just keep one of them
-if "tcg-trans" not in e.properties:
+if "tcg-exec" not in e.properties:
 continue
 
 out('static inline void %(name_tcg)s(%(args)s)',
@@ -58,12 +58,25 @@ def generate(events, backend, group):
 args_trans = e.original.event_trans.args
 args_exec = tracetool.vcpu.transform_args(
 "tcg_helper_c", e.original.event_exec, "wrapper")
+if "vcpu" in e.properties:
+trace_cpu = e.args.names()[0]
+cond = "trace_event_get_vcpu_state(%(cpu)s,"\
+   " TRACE_%(id)s)"\
+   % dict(
+   cpu=trace_cpu,
+

[Qemu-devel] [PATCH v8 4/7] exec: [tcg] Use different TBs according to the vCPU's dynamic tracing state

2017-06-08 Thread Emilio G. Cota

From: Lluís Vilanova 

Every vCPU now uses a separate set of TBs for each set of dynamic
tracing event state values. Each set of TBs can be used by any number of
vCPUs to maximize TB reuse when vCPUs have the same tracing state.

This feature is later used by tracetool to optimize tracing of guest
code events.

The maximum number of TB sets is defined as 2^E, where E is the number
of events that have the 'vcpu' property (their state is stored in
CPUState->trace_dstate).

For this to work, a change on the dynamic tracing state of a vCPU will
force it to flush its virtual TB cache (which is only indexed by
address), and fall back to the physical TB cache (which now contains the
vCPU's dynamic tracing state as part of the hashing function).

Signed-off-by: Lluís Vilanova 
Reviewed-by: Richard Henderson 
[cota:
 - rename tb->trace_vcpu_dstate to the shorter tb->trace_ds
 - use uint32_t for tb->trace_ds instead of a typedef
 - add BUILD_BUG_ON check to make sure tb->trace_ds is big enough
 - fix xxhash
 - directly dereference cpu->trace_dstate instead of using bitmap_copy etc.
 - drop trace_dstate parameter from tb_htable_lookup; grab it directly from cpu.
]
Signed-off-by: Emilio G. Cota 
---
 cpu-exec.c|  8 ++--
 include/exec/exec-all.h   |  3 +++
 include/exec/tb-hash-xx.h |  7 +--
 include/exec/tb-hash.h|  5 +++--
 tcg-runtime.c |  3 ++-
 tests/qht-bench.c |  2 +-
 trace/control-target.c|  1 +
 trace/control.h   |  3 +++
 translate-all.c   | 10 --
 9 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/cpu-exec.c b/cpu-exec.c
index 5b181c1..b6679d9 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -280,6 +280,7 @@ struct tb_desc {
 CPUArchState *env;
 tb_page_addr_t phys_page1;
 uint32_t flags;
+uint32_t trace_ds;
 };
 
 static bool tb_cmp(const void *p, const void *d)
@@ -291,6 +292,7 @@ static bool tb_cmp(const void *p, const void *d)
 tb->page_addr[0] == desc->phys_page1 &&
 tb->cs_base == desc->cs_base &&
 tb->flags == desc->flags &&
+tb->trace_ds == desc->trace_ds &&
 !atomic_read(>invalid)) {
 /* check next page if needed */
 if (tb->page_addr[1] == -1) {
@@ -319,10 +321,11 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, 
target_ulong pc,
 desc.env = (CPUArchState *)cpu->env_ptr;
 desc.cs_base = cs_base;
 desc.flags = flags;
+desc.trace_ds = *cpu->trace_dstate;
 desc.pc = pc;
 phys_pc = get_page_addr_code(desc.env, pc);
 desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
-h = tb_hash_func(phys_pc, pc, flags);
+h = tb_hash_func(phys_pc, pc, flags, *cpu->trace_dstate);
 return qht_lookup(_ctx.tb_ctx.htable, tb_cmp, , h);
 }
 
@@ -342,7 +345,8 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
 cpu_get_tb_cpu_state(env, , _base, );
 tb = atomic_rcu_read(>tb_jmp_cache[tb_jmp_cache_hash_func(pc)]);
 if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
- tb->flags != flags)) {
+ tb->flags != flags ||
+ tb->trace_ds != *cpu->trace_dstate)) {
 tb = tb_htable_lookup(cpu, pc, cs_base, flags);
 if (!tb) {
 
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index b0281b0..6bdc6e5 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -324,6 +324,9 @@ struct TranslationBlock {
 #define CF_USE_ICOUNT  0x2
 #define CF_IGNORE_ICOUNT 0x4 /* Do not generate icount code */
 
+/* Tracing Dynamic State (hence '_ds') used to generate this TB */
+uint32_t trace_ds;
+
 uint16_t invalid;
 
 void *tc_ptr;/* pointer to the translated code */
diff --git a/include/exec/tb-hash-xx.h b/include/exec/tb-hash-xx.h
index 2c40b5c..6cd3022 100644
--- a/include/exec/tb-hash-xx.h
+++ b/include/exec/tb-hash-xx.h
@@ -49,7 +49,7 @@
  * contiguous in memory.
  */
 static inline
-uint32_t tb_hash_func5(uint64_t a0, uint64_t b0, uint32_t e)
+uint32_t tb_hash_func6(uint64_t a0, uint64_t b0, uint32_t e, uint32_t f)
 {
 uint32_t v1 = TB_HASH_XX_SEED + PRIME32_1 + PRIME32_2;
 uint32_t v2 = TB_HASH_XX_SEED + PRIME32_2;
@@ -78,11 +78,14 @@ uint32_t tb_hash_func5(uint64_t a0, uint64_t b0, uint32_t e)
 v4 *= PRIME32_1;
 
 h32 = rol32(v1, 1) + rol32(v2, 7) + rol32(v3, 12) + rol32(v4, 18);
-h32 += 20;
+h32 += 24;
 
 h32 += e * PRIME32_3;
 h32  = rol32(h32, 17) * PRIME32_4;
 
+h32 += f * PRIME32_3;
+h32  = rol32(h32, 17) * PRIME32_4;
+
 h32 ^= h32 >> 15;
 h32 *= PRIME32_2;
 h32 ^= h32 >> 13;
diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
index b1fe2d0..d64c2d9 100644
--- a/include/exec/tb-hash.h
+++ b/include/exec/tb-hash.h
@@ -58,9 +58,10 @@ static inline unsigned int 
tb_jmp_cache_hash_func(target_ulong pc)
 #endif /* CONFIG_SOFTMMU */
 
 static inline
-uint32_t

[Qemu-devel] [PATCH v8 7/7] trace: [trivial] Statically enable all guest events

2017-06-08 Thread Emilio G. Cota

From: Lluís Vilanova 

The optimizations of this series makes it feasible to have them
available on all builds.

Signed-off-by: Lluís Vilanova 
Signed-off-by: Emilio G. Cota 
---
 trace-events | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/trace-events b/trace-events
index d7a4d94..b040d7e 100644
--- a/trace-events
+++ b/trace-events
@@ -125,7 +125,7 @@ vcpu guest_cpu_reset(void)
 #
 # Mode: user, softmmu
 # Targets: TCG(all)
-disable vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) "info=%d", 
"vaddr=0x%016"PRIx64" info=%d"
+vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) "info=%d", 
"vaddr=0x%016"PRIx64" info=%d"
 
 # @num: System call number.
 # @arg*: System call argument value.
@@ -134,7 +134,7 @@ disable vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) 
"info=%d", "vaddr=0x
 #
 # Mode: user
 # Targets: TCG(all)
-disable vcpu guest_user_syscall(uint64_t num, uint64_t arg1, uint64_t arg2, 
uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7, 
uint64_t arg8) "num=0x%016"PRIx64" arg1=0x%016"PRIx64" arg2=0x%016"PRIx64" 
arg3=0x%016"PRIx64" arg4=0x%016"PRIx64" arg5=0x%016"PRIx64" arg6=0x%016"PRIx64" 
arg7=0x%016"PRIx64" arg8=0x%016"PRIx64
+vcpu guest_user_syscall(uint64_t num, uint64_t arg1, uint64_t arg2, uint64_t 
arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7, uint64_t 
arg8) "num=0x%016"PRIx64" arg1=0x%016"PRIx64" arg2=0x%016"PRIx64" 
arg3=0x%016"PRIx64" arg4=0x%016"PRIx64" arg5=0x%016"PRIx64" arg6=0x%016"PRIx64" 
arg7=0x%016"PRIx64" arg8=0x%016"PRIx64
 
 # @num: System call number.
 # @ret: System call result value.
@@ -143,4 +143,4 @@ disable vcpu guest_user_syscall(uint64_t num, uint64_t 
arg1, uint64_t arg2, uint
 #
 # Mode: user
 # Targets: TCG(all)
-disable vcpu guest_user_syscall_ret(uint64_t num, uint64_t ret) 
"num=0x%016"PRIx64" ret=0x%016"PRIx64
+vcpu guest_user_syscall_ret(uint64_t num, uint64_t ret) "num=0x%016"PRIx64" 
ret=0x%016"PRIx64
-- 
2.7.4

[Qemu-devel] [PATCH v8 3/7] trace: [tcg] Delay changes to dynamic state when translating

2017-06-08 Thread Emilio G. Cota

From: Lluís Vilanova 

This keeps consistency across all decisions taken during translation
when the dynamic state of a vCPU is changed in the middle of translating
some guest code.

Signed-off-by: Lluís Vilanova 
Reviewed-by: Richard Henderson 
[cota: use CPU_TRACE_DSTATE_MAX_EVENTS instead of trace_get_vcpu_event_count()]
Signed-off-by: Emilio G. Cota 
---
 include/qom/cpu.h  |  3 +++
 trace/control-target.c | 21 ++---
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index bc6e20f..29f4a32 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -303,6 +303,8 @@ struct qemu_work_item;
  * @kvm_fd: vCPU file descriptor for KVM.
  * @work_mutex: Lock to prevent multiple access to queued_work_*.
  * @queued_work_first: First asynchronous work pending.
+ * @trace_dstate_delayed: Delayed changes to trace_dstate (includes all changes
+ *to @trace_dstate).
  * @trace_dstate: Dynamic tracing state of events for this vCPU (bitmask).
  *
  * State of one CPU core or thread.
@@ -375,6 +377,7 @@ struct CPUState {
 struct kvm_run *kvm_run;
 
 /* Used for events with 'vcpu' and *without* the 'disabled' properties */
+DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
 DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
 
 /* TODO Move common fields from CPUArchState here. */
diff --git a/trace/control-target.c b/trace/control-target.c
index 6266e63..416d14e 100644
--- a/trace/control-target.c
+++ b/trace/control-target.c
@@ -1,13 +1,14 @@
 /*
  * Interface for configuring and controlling the state of tracing events.
  *
- * Copyright (C) 2014-2016 Lluís Vilanova 
+ * Copyright (C) 2014-2017 Lluís Vilanova 
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
  */
 
 #include "qemu/osdep.h"
+#include "qom/cpu.h"
 #include "cpu.h"
 #include "trace-root.h"
 #include "trace/control.h"
@@ -34,6 +35,13 @@ void trace_event_set_state_dynamic_init(TraceEvent *ev, bool 
state)
 }
 }
 
+static void trace_event_synchronize_vcpu_state_dynamic(
+CPUState *vcpu, run_on_cpu_data ignored)
+{
+bitmap_copy(vcpu->trace_dstate, vcpu->trace_dstate_delayed,
+CPU_TRACE_DSTATE_MAX_EVENTS);
+}
+
 void trace_event_set_state_dynamic(TraceEvent *ev, bool state)
 {
 CPUState *vcpu;
@@ -69,13 +77,20 @@ void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
 if (state_pre != state) {
 if (state) {
 trace_events_enabled_count++;
-set_bit(vcpu_id, vcpu->trace_dstate);
+set_bit(vcpu_id, vcpu->trace_dstate_delayed);
 (*ev->dstate)++;
 } else {
 trace_events_enabled_count--;
-clear_bit(vcpu_id, vcpu->trace_dstate);
+clear_bit(vcpu_id, vcpu->trace_dstate_delayed);
 (*ev->dstate)--;
 }
+/*
+ * Delay changes until next TB; we want all TBs to be built from a
+ * single set of dstate values to ensure consistency of generated
+ * tracing code.
+ */
+async_run_on_cpu(vcpu, trace_event_synchronize_vcpu_state_dynamic,
+ RUN_ON_CPU_NULL);
 }
 }
 
-- 
2.7.4

[Qemu-devel] [PATCH] Add manpage for QEMU Backup Tool

2017-06-08 Thread Ishani Chugh

Signed-off-by: Ishani Chugh 
---
 Makefile|   2 +-
 contrib/backup/qemu-backup.texi | 147 
 2 files changed, 148 insertions(+), 1 deletion(-)
 create mode 100644 contrib/backup/qemu-backup.texi

diff --git a/Makefile b/Makefile
index c830d7a..f42cb1d 100644
--- a/Makefile
+++ b/Makefile
@@ -504,7 +504,7 @@ clean:
 VERSION ?= $(shell cat VERSION)
 
 dist: qemu-$(VERSION).tar.bz2
-
+qemu-backup.8: contrib/qemu-backup/qemu-backup.texi
 qemu-%.tar.bz2:
$(SRC_PATH)/scripts/make-release "$(SRC_PATH)" "$(patsubst 
qemu-%.tar.bz2,%,$@)"
 
diff --git a/contrib/backup/qemu-backup.texi b/contrib/backup/qemu-backup.texi
new file mode 100644
index 000..41e2b16
--- /dev/null
+++ b/contrib/backup/qemu-backup.texi
@@ -0,0 +1,147 @@
+   
+ 
+@setfilename qemu-backup-tool-manpage
+
+@documentlanguage en
+@documentencoding UTF-8
+
+\input texinfo
+@settitle Manpage_backup_tool 1.0
+@copying
+This is a manpage for qemu_backup_tool, version 1.0.
+
+Copyright @copyright{} 2016 Free Software Foundation, Inc.
+@end copying
+@ifinfo
+@direntry
+* QEMU: (qemu-backup-tool-manpage).Man page for QEMU backup tool.
+@end direntry
+@end ifinfo
+@iftex
+@titlepage
+@sp 7
+@center @titlefont{QEMU_backup_tool}
+@sp 1
+@center @titlefont{Man Page}
+@sp 3
+@end titlepage
+@end iftex
+@ifnottex
+@node Top
+@top Short Sample
+
+@menu
+* Name::
+* Synopsis::
+* list of Commands::
+* Command Parameters::
+* Command Descriptions::
+* License::
+@end menu
+
+@end ifnottex
+
+@node Name
+@chapter Name
+
+QEMU disk backup tool.
+
+@node Synopsis
+@chapter Synopsis
+
+
+qemu-backup command [ command options].
+
+@node  list of Commands
+@chapter  list of Commands
+@cindex chapter, first dummy
+@itemize
+@item qemu-backup guest add [--id id] [--qmp socketpath]
+@item qemu-backup guest list
+@item qemu-backup drive add [--guest guestname] [--id driveid] [--target 
target]
+@item qemu-backup drive list [--guest guestname]
+@item qemu-backup backup [--guest guestname]
+@item qemu-backup restore [--guest guestname]
+@item qemu-backup drive-remove [--guest guestname] [--id driveid]
+@item qemu-backup remove [--guest guestname]
+@item qemu-backup drive add [--all] [--guest guestname] [--target target]
+@item qemu-backup backup [--inc] [--guest guestname]
+@end itemize
+@node  Command Parameters
+@chapter  Command Parameters
+@cindex chapter, first dummy
+@itemize
+@item --guest: Name of the guest.
+@item --id: id of guest or drive.
+@item --target: Destination on which you want your backup to be made.
+@item --all: Add all the drives present in a guest for backup except cd-rom.
+@item --inc: For incremental backup.
+@item --qmp: Path of qmp socket.
+@end itemize
+
+@node  Command Descriptions
+@chapter  Command Descriptions
+@cindex chapter, first dummy
+@itemize
+@item qemu-backup guest add [--id id] [--qmp socketpath]
+This command adds a guest to the configuration file given its path to qmp 
socket.
+example:
+qemu-backup guest add --id=fedora –qmp=/var/run/qemu/fedora.sock
+ 
+@item qemu-backup guest list
+This commands lists the names of guests which are added to configuration file.
+ 
+@item qemu-backup drive add [--guest guestname] [--id driveid] [--target 
target]
+This command adds different drives for backup in a particular guest by giving 
the name of drive to be backed up and target imagefile in which we want to 
store the drive backup.
+example:
+qemu-backup drive add --guest=fedora --id=root
+--target=/backup/fedora/root.img
+qemu-backup drive add --guest=fedora --id=data
+--target=/backup/fedora/data.img
+ 
+@item qemu-backup drive list [--guest guestname]
+This commands gives the names of the drive present in a guest which are added 
for backup.
+example: qemu-backup drive list --guest=fedora
+ 
+@item qemu-backup backup [--guest guestname]
+ 
+This command makes the backup of the drives, in their respective given 
destinations. The ids of drive and their destinations are taken from the 
configuration file.
+example: qemu-backup backup --guest=fedora
+ 
+@item qemu-backup restore [--guest guestname]
+This command is needed if we want to restore the backup. It will list the 
commands to be run for performing the same but will not perform any action.
+example: qemu-backup restore --guest=fedora
+cp /backup/fedora/root.img /var/run/qemu/fedora/root.img
+cp /backup/fedora/data.img /var/run/qemu/fedora/data.img
+ 
+@item qemu-backup drive-remove [--guest guestname] [--id driveid]
+This command helps remove a drive which is set for backup in configuration of 
given host.
+example: qemu-backup remove --guest=fedora --id=root
+ 
+@item qemu-backup remove [--guest guestname]
+This command removes the guest from the configuration file.
+example: qemu-backup remove –guest=fedora
+ 
+@item qemu-backup drive add --all [-guest guestname] [-destination destination]
+This command adds all the drives of the guest for backup other

Re: [Qemu-devel] [PATCH 2/3] exec: simplify address_space_get_iotlb_entry

2017-06-08 Thread Peter Xu

On Thu, Jun 08, 2017 at 09:59:50PM +0300, Michael S. Tsirkin wrote:
> On Thu, Jun 08, 2017 at 02:11:50PM +0800, Peter Xu wrote:
> > On Wed, Jun 07, 2017 at 04:07:20PM +0300, Michael S. Tsirkin wrote:
> > > On Wed, Jun 07, 2017 at 11:44:43AM +0800, Peter Xu wrote:
> > > > On Wed, Jun 07, 2017 at 09:47:05AM +1000, David Gibson wrote:
> > > > > On Tue, Jun 06, 2017 at 04:34:30PM +0200, Paolo Bonzini wrote:
> > > > > > 
> > > > > > 
> > > > > > On 05/06/2017 05:07, Peter Xu wrote:
> > > > > > > I don't sure whether it'll be a good interface for IOTLB. AFAIU at
> > > > > > > least for VT-d, the IOMMU translation is page aligned which is 
> > > > > > > defined
> > > > > > > by spec, so it makes sense that (again at least for VT-d) here 
> > > > > > > we'd
> > > > > > > better just use page_mask/addr_mask.
> > > > > > > 
> > > > > > > That's also how I know about IOMMU in general - I assume it do the
> > > > > > > translations always with page masks (never arbitary length), 
> > > > > > > though
> > > > > > > page size can differ from platfrom to platform, that's why here 
> > > > > > > the
> > > > > > > IOTLB interface used addr_mask, then it works for all platforms. I
> > > > > > > don't know whether I'm 100% correct here though.
> > > > > > > 
> > > > > > > Maybe David/Paolo/... would comment as well?
> > > > > > 
> > > > > > I would ask David.  There are PowerPC MMUs that allow fast lookup of
> > > > > > arbitrarily-sized windows (not necessarily power of two),
> > > > > 
> > > > > Uh.. I'm not sure what you mean here.  You might be thinking of the
> > > > > BATs which really old (32-bit) PowerPC MMUs had - those allow
> > > > > arbitrary large block translations, but they do have to be a power of
> > > > > two.
> > > > > 
> > > > > > so maybe the
> > > > > > IOMMUs can do the same.
> > > > > 
> > > > > The only Power IOMMU I know about uses a fixed, power-of-two page size
> > > > > per DMA window.
> > > > 
> > > > If so, I would still be inclined to keep using masks for QEMU IOTLB.
> > > > Then, my first two patches should still stand.
> > > > 
> > > > I am just afraid that not using masks will diverge the emulation from
> > > > real hardware and brings trouble one day.
> > > > 
> > > > For vhost IOTLB interface, it does not need to be strictly aligned to
> > > > QEMU IOMMU IOTLB definition, and that's how it's working now (current
> > > > vhost iotlb allows arbitary length, and I think it's good). So imho we
> > > > don't really need to worry about the performance - after all, we can
> > > > do everything customized for vhost, just like what patch 3 did (yeah,
> > > > it can be better...).
> > > > 
> > > > Thanks,
> > > 
> > > Pre-faults is also something that does not happen on real hardware.
> > > And it's about security so a bigger issue.
> > > 
> > > If I had to choose between that and using non-power-of-2 in
> > > the API, I'd go for non-power-of-2. Let backends that can only
> > > support power of 2 split it up to multiple transactions.
> > 
> > The problem is that when I was fixing the problem that vhost had with
> > PT (a764040, "exec: abstract address_space_do_translate()"), I did
> > broke the IOTLB translation a bit (it was using page masks). IMHO we
> > need to fix it first for correctness (patch 1/2).
> > 
> > For patch 3, if we can have Jason's patch to allow dynamic
> > iommu_platform switching, that'll be the best, then I can rewrite
> > patch 3 with the switching logic rather than caching anything. But
> > IMHO that can be separated from patch 1/2 if you like.
> > 
> > Or do you have better suggestion on how should we fix it?
> > 
> > Thanks,
> 
> Can we drop masks completely and replace with length? I think we
> should do that instead of trying to fix masks.

Do you mean to modify IOMMUTLBEntry.addr_mask into length?

Again, I am not sure this is good... At least we need to get ack from
David since spapr should be the initial user of it, and possibly also
Alex since vfio should be assuming that (IIUC both in QEMU and kernel)
addr_mask is page masks rather than arbirary length.

(CC Alex)

Thanks,

-- 
Peter Xu

Re: [Qemu-devel] [RFC v5 2/4] hw/intc/arm_gicv3_its: Implement state save/restore

2017-06-08 Thread Shannon Zhao



On 2017/4/14 20:46, Eric Auger wrote:
[...]
> @@ -43,6 +50,7 @@ struct GICv3ITSState {
>  
>  /* Registers */
>  uint32_t ctlr;
> +uint32_t iidr;
I think this should reset in gicv3_its_common_reset

Thanks,
-- 
Shannon

Re: [Qemu-devel] [PATCH] q35/mch: implement extended TSEG sizes

2017-06-08 Thread Michael S. Tsirkin

On Fri, Jun 09, 2017 at 01:01:54AM +0200, Laszlo Ersek wrote:
> On 06/08/17 21:55, Michael S. Tsirkin wrote:
> > On Thu, Jun 08, 2017 at 09:48:53PM +0200, Gerd Hoffmann wrote:
> >>   Hi,
> >>
> >>> I really dislike negotiation being re-invented for each device.  Do
> >>> we
> >>> need these tricks?  Can we just do fw cfg with standard discovery?
> >>> This ties in with my proposal to generalize smi features to
> >>> generic ones.
> >>
> >> Device properties should be part of the device.
> >> We should have done this with the smi too.
> > 
> > What is part of the device and what isn't? It's all part
> > of QEMU in the end.  Adding probing for multiple devices
> > will just add to number of exits and slow down guest boot.
> > 
> > We do want to stick to emulating real devices if we can, no argument
> > here - but this stuff is PV anyway - what do we gain by spreading it
> > out?
> > 
> >> A more standard way to handle this would be to add a vendor-specific
> >> pci capability and place the register there.  Not sure we have room for
> >> that in the pci config space though.
> >>
> >> cheers,
> >>   Gerd
> > 
> > We don't have room anywhere in PCI config space. Laszlo makes argument
> > why it's safe for this device based on spec but it's anyone's guess
> > whether current and future software will follow spec.  In short, going
> > anywhere near the emulated device has a potential to break some drivers.
> 
> I'm fine using any QEMU facility that lets independent firmware modules
> perform their feature detections / negotiations / lockdowns in arbitrary
> order between each other. (Hopefully without extreme parsing requirements.)

How about adding etc/mch/features etc copying the smi stuff? Is this
simple enough? We can worry about removing code duplication later.

> What I can not sign up for is to develop a general QEMU infrastructure
> for this (regardless of whether it is the fw_cfg bitmap stuff prevails,
> or the PCI config space register / capability list). Either is complex
> work, needing documentation too, the design has to be future proof. I'm
> not experienced enough in QEMU to get it right reasonably soon
> (everything is surprisingly complex and difficult in QEMU -- this has
> been my experience over the years, and I still struggle with QOM every
> single time), and I definitely do not have the capacity to take on a
> QEMU feature of the suggested size.
> 
> It's not lack of interest on my part, but lack of capacity. (Case in
> point: it's ~1AM local time, and my laptop's uptime, which quite closely
> approximates the hours I've actually spent working today, is ~15:30.)
> The reason I keep submitting these little patches to qemu-devel is that
> I figure everyone else is overloaded too, so I might as well try what
> I'm capable of. But, we should be clear that that is not much, load-wise
> and sophistication-wise.
> 
> The alternative could have been that I'd clone
>  to qemu-kvm-rhev
> (from OVMF), set up the cross-BZ dependencies correctly, wait until the
> clone gets assigned to a seasoned QEMU developer, and once he or she
> gets to work on it, we figure out the design together, and once he/she
> writes the code for QEMU, I write the code for the firmware.
> 
> I figured that sending a patch like the present one (having discussed it
> preliminarily with Gerd and Paolo in the "[edk2] SMRAM sizes on large
> hosts" thread) would be more efficient than waiting for a seasoned QEMU
> developer. I didn't expect that my patch would be better than theirs. :)
> The above kind of collaboration has certainly proved functional in the
> past, it just takes a lot of time and coordination.
> 
> Anyway, "Laszlo embarking on a QEMU infrastructure project that's liable
> to take fifteen patch set iterations" is not an alternative,
> unfortunately. I definitely don't intend to throw QEMU patches over the
> fence; I know what drag that creates for maintainers. I intend to be
> responsible for my QEMU patches. However -- or perhaps, "exactly because
> of that"? -- I simply can't take on QEMU work that's larger than this
> caliber.
> 
> Sorry about the wall of text.
> 
> Thanks,
> Laszlo

Re: [Qemu-devel] [PATCH qemu v7] memory/iommu: QOM'fy IOMMU MemoryRegion

2017-06-08 Thread David Gibson

On Thu, Jun 08, 2017 at 05:30:57PM +0200, Paolo Bonzini wrote:
1;4602;0c> 
> 
> On 08/06/2017 16:58, David Gibson wrote:
> > So, doing that via the standard macro wrappers would be slow, because
> > it does type verification.  However, looking up the class function is
> > fundamentally:
> > 
> > ((IOMMUMemoryRegionClass *)(((Object *)mr)->class))->translate
> > 
> > i.e. two pointers followed, which is just the same as
> > mr->ops->translate
> 
> Fair enough.  On the other hand it's a bigger change so I think it's
> better to keep it separate.

Ok, project for another day.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

[Qemu-devel] [PATCH v2] block: change variable names in BlockDriverState

2017-06-08 Thread Manos Pitsidianakis

Change the 'int count' parameter in *pwrite_zeros, *pdiscard related
functions (and some others) to 'int bytes', as they both refer to bytes.
This helps with code legibility.

Signed-off-by: Manos Pitsidianakis 
---
 block/blkdebug.c   | 36 +++
 block/blkreplay.c  |  8 +++
 block/block-backend.c  | 22 +--
 block/file-posix.c | 34 +++---
 block/io.c | 48 +-
 block/iscsi.c  | 20 +-
 block/mirror.c |  8 +++
 block/nbd-client.c |  8 +++
 block/nbd-client.h |  4 ++--
 block/qcow2.c  | 28 
 block/qed.c|  8 +++
 block/raw-format.c |  8 +++
 block/rbd.c|  4 ++--
 block/sheepdog.c   |  6 +++---
 hw/ide/core.c  |  6 +++---
 include/block/block.h  |  8 +++
 include/block/block_int.h  |  6 +++---
 include/sysemu/block-backend.h | 20 +-
 qemu-io-cmds.c | 46 
 19 files changed, 164 insertions(+), 164 deletions(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index a5196e8..8bab3ac 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -576,7 +576,7 @@ static int blkdebug_co_flush(BlockDriverState *bs)
 }
 
 static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
-  int64_t offset, int count,
+  int64_t offset, int bytes,
   BdrvRequestFlags flags)
 {
 uint32_t align = MAX(bs->bl.request_alignment,
@@ -587,29 +587,29 @@ static int coroutine_fn 
blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
  * preferred alignment (so that we test the fallback to writes on
  * unaligned portions), and check that the block layer never hands
  * us anything unaligned that crosses an alignment boundary.  */
-if (count < align) {
+if (bytes < align) {
 assert(QEMU_IS_ALIGNED(offset, align) ||
-   QEMU_IS_ALIGNED(offset + count, align) ||
+   QEMU_IS_ALIGNED(offset + bytes, align) ||
DIV_ROUND_UP(offset, align) ==
-   DIV_ROUND_UP(offset + count, align));
+   DIV_ROUND_UP(offset + bytes, align));
 return -ENOTSUP;
 }
 assert(QEMU_IS_ALIGNED(offset, align));
-assert(QEMU_IS_ALIGNED(count, align));
+assert(QEMU_IS_ALIGNED(bytes, align));
 if (bs->bl.max_pwrite_zeroes) {
-assert(count <= bs->bl.max_pwrite_zeroes);
+assert(bytes <= bs->bl.max_pwrite_zeroes);
 }
 
-err = rule_check(bs, offset, count);
+err = rule_check(bs, offset, bytes);
 if (err) {
 return err;
 }
 
-return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
+return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
 static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
- int64_t offset, int count)
+ int64_t offset, int bytes)
 {
 uint32_t align = bs->bl.pdiscard_alignment;
 int err;
@@ -617,29 +617,29 @@ static int coroutine_fn 
blkdebug_co_pdiscard(BlockDriverState *bs,
 /* Only pass through requests that are larger than requested
  * minimum alignment, and ensure that unaligned requests do not
  * cross optimum discard boundaries. */
-if (count < bs->bl.request_alignment) {
+if (bytes < bs->bl.request_alignment) {
 assert(QEMU_IS_ALIGNED(offset, align) ||
-   QEMU_IS_ALIGNED(offset + count, align) ||
+   QEMU_IS_ALIGNED(offset + bytes, align) ||
DIV_ROUND_UP(offset, align) ==
-   DIV_ROUND_UP(offset + count, align));
+   DIV_ROUND_UP(offset + bytes, align));
 return -ENOTSUP;
 }
 assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
-assert(QEMU_IS_ALIGNED(count, bs->bl.request_alignment));
-if (align && count >= align) {
+assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
+if (align && bytes >= align) {
 assert(QEMU_IS_ALIGNED(offset, align));
-assert(QEMU_IS_ALIGNED(count, align));
+assert(QEMU_IS_ALIGNED(bytes, align));
 }
 if (bs->bl.max_pdiscard) {
-assert(count <= bs->bl.max_pdiscard);
+assert(bytes <= bs->bl.max_pdiscard);
 }
 
-err = rule_check(bs, offset, count);
+err = rule_check(bs, offset, bytes);
 if (err) {
 return err;
 }
 
-return bdrv_co_pdiscard(bs->file->bs, offset, count);
+return bdrv_co_pdiscard(bs->file->bs, offset, bytes);
 }
 
 static void blkdebug_close(BlockDriverState *bs)
diff --git

Re: [Qemu-devel] [PATCH] q35/mch: implement extended TSEG sizes

2017-06-08 Thread Laszlo Ersek

On 06/08/17 21:55, Michael S. Tsirkin wrote:
> On Thu, Jun 08, 2017 at 09:48:53PM +0200, Gerd Hoffmann wrote:
>>   Hi,
>>
>>> I really dislike negotiation being re-invented for each device.  Do
>>> we
>>> need these tricks?  Can we just do fw cfg with standard discovery?
>>> This ties in with my proposal to generalize smi features to
>>> generic ones.
>>
>> Device properties should be part of the device.
>> We should have done this with the smi too.
> 
> What is part of the device and what isn't? It's all part
> of QEMU in the end.  Adding probing for multiple devices
> will just add to number of exits and slow down guest boot.
> 
> We do want to stick to emulating real devices if we can, no argument
> here - but this stuff is PV anyway - what do we gain by spreading it
> out?
> 
>> A more standard way to handle this would be to add a vendor-specific
>> pci capability and place the register there.  Not sure we have room for
>> that in the pci config space though.
>>
>> cheers,
>>   Gerd
> 
> We don't have room anywhere in PCI config space. Laszlo makes argument
> why it's safe for this device based on spec but it's anyone's guess
> whether current and future software will follow spec.  In short, going
> anywhere near the emulated device has a potential to break some drivers.

I'm fine using any QEMU facility that lets independent firmware modules
perform their feature detections / negotiations / lockdowns in arbitrary
order between each other. (Hopefully without extreme parsing requirements.)

What I can not sign up for is to develop a general QEMU infrastructure
for this (regardless of whether it is the fw_cfg bitmap stuff prevails,
or the PCI config space register / capability list). Either is complex
work, needing documentation too, the design has to be future proof. I'm
not experienced enough in QEMU to get it right reasonably soon
(everything is surprisingly complex and difficult in QEMU -- this has
been my experience over the years, and I still struggle with QOM every
single time), and I definitely do not have the capacity to take on a
QEMU feature of the suggested size.

It's not lack of interest on my part, but lack of capacity. (Case in
point: it's ~1AM local time, and my laptop's uptime, which quite closely
approximates the hours I've actually spent working today, is ~15:30.)
The reason I keep submitting these little patches to qemu-devel is that
I figure everyone else is overloaded too, so I might as well try what
I'm capable of. But, we should be clear that that is not much, load-wise
and sophistication-wise.

The alternative could have been that I'd clone
 to qemu-kvm-rhev
(from OVMF), set up the cross-BZ dependencies correctly, wait until the
clone gets assigned to a seasoned QEMU developer, and once he or she
gets to work on it, we figure out the design together, and once he/she
writes the code for QEMU, I write the code for the firmware.

I figured that sending a patch like the present one (having discussed it
preliminarily with Gerd and Paolo in the "[edk2] SMRAM sizes on large
hosts" thread) would be more efficient than waiting for a seasoned QEMU
developer. I didn't expect that my patch would be better than theirs. :)
The above kind of collaboration has certainly proved functional in the
past, it just takes a lot of time and coordination.

Anyway, "Laszlo embarking on a QEMU infrastructure project that's liable
to take fifteen patch set iterations" is not an alternative,
unfortunately. I definitely don't intend to throw QEMU patches over the
fence; I know what drag that creates for maintainers. I intend to be
responsible for my QEMU patches. However -- or perhaps, "exactly because
of that"? -- I simply can't take on QEMU work that's larger than this
caliber.

Sorry about the wall of text.

Thanks,
Laszlo

Re: [Qemu-devel] [PATCH 2/4] xenfb: Activate mouse handler

2017-06-08 Thread Stefano Stabellini

On Thu, 8 Jun 2017, Owen Smith wrote:
> Mouse events are only delivered to the first handler in the chain.
> Activating the xenfb mouse event handler so that mouse events can
> be passed over the shared ring protocol.
> Note: The keyboard handler is activated internally by the add
> call.

I am not sure I follow: why do we need this now? How is it working
today?


> Signed-off-by: Owen Smith 
> ---
>  hw/display/xenfb.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
> index 2ebc81b..b0a5726 100644
> --- a/hw/display/xenfb.c
> +++ b/hw/display/xenfb.c
> @@ -385,6 +385,7 @@ static void input_connected(struct XenDevice *xendev)
>  in->qmouse = qemu_add_mouse_event_handler(xenfb_mouse_event, in,
> in->abs_pointer_wanted,
> "Xen PVFB Mouse");
> +qemu_activate_mouse_event_handler(in->qmouse);
>  }
>  
>  static void input_disconnect(struct XenDevice *xendev)
> -- 
> 2.1.4
>

Re: [Qemu-devel] [PATCH 1/4] xenfb: Add feature-vkbd-standalone

2017-06-08 Thread Stefano Stabellini

On Thu, 8 Jun 2017, Owen Smith wrote:
> Advertise "feature-vkbd-standalone" to indicate the backend
> can connect without a vfb device connection.
> When "request-vkbd-standalone" is set to 1, the backend does
> not wait for a QemuConsole to be setup before connecting the 
> vkbd device. This also means that absolute coordinates cannot
> be scaled to the non-existent QemuConsole's sizes, and remain
> unscaled, in the range [0, 0x7FFF].
> 
> Signed-off-by: Owen Smith 
> ---
>  hw/display/xenfb.c | 32 ++--
>  1 file changed, 22 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
> index e76c0d8..2ebc81b 100644
> --- a/hw/display/xenfb.c
> +++ b/hw/display/xenfb.c
> @@ -52,6 +52,7 @@ struct common {
>  struct XenInput {
>  struct common c;
>  int abs_pointer_wanted; /* Whether guest supports absolute pointer */
> +int vkbd_standalone;/* Guest supports vkbd without vfb device */
>  int button_state;   /* Last seen pointer button state */
>  int extended;
>  QEMUPutMouseEntry *qmouse;
> @@ -306,18 +307,22 @@ static void xenfb_mouse_event(void *opaque,
> int dx, int dy, int dz, int button_state)
>  {
>  struct XenInput *xenfb = opaque;
> -DisplaySurface *surface = qemu_console_surface(xenfb->c.con);
> -int dw = surface_width(surface);
> -int dh = surface_height(surface);
> -int i;
> +int i, x, y;
> +if (xenfb->c.con != NULL) {
> +DisplaySurface *surface = qemu_console_surface(xenfb->c.con);
> +int dw = surface_width(surface);
> +int dh = surface_height(surface);
> +x = dx * (dh - 1) / 0x7fff;
> +y = dy * (dw - 1) / 0x7fff;
> +} else {
> +x = dx;
> +y = dy;
> +}
>  
>  trace_xenfb_mouse_event(opaque, dx, dy, dz, button_state,
>  xenfb->abs_pointer_wanted);
>  if (xenfb->abs_pointer_wanted)
> - xenfb_send_position(xenfb,
> - dx * (dw - 1) / 0x7fff,
> - dy * (dh - 1) / 0x7fff,
> - dz);
> +xenfb_send_position(xenfb, x, y, dz);
>  else
>   xenfb_send_motion(xenfb, dx, dy, dz);
>  
> @@ -336,6 +341,7 @@ static void xenfb_mouse_event(void *opaque,
>  static int input_init(struct XenDevice *xendev)
>  {
>  xenstore_write_be_int(xendev, "feature-abs-pointer", 1);
> +xenstore_write_be_int(xendev, "feature-vkbd-standalone", 1);
>  return 0;
>  }
>  
> @@ -345,8 +351,14 @@ static int input_initialise(struct XenDevice *xendev)
>  int rc;
>  
>  if (!in->c.con) {
> -xen_pv_printf(xendev, 1, "ds not set (yet)\n");
> -return -1;
> +if (xenstore_read_fe_int(xendev, "request-vkbd-standalone",
> + >vkbd_standalone) == -1) {
> +in->vkbd_standalone = 0;
> +}
> +if (in->vkbd_standalone == 0) {
> +xen_pv_printf(xendev, 1, "ds not set (yet)\n");
> +return -1;
> +}

In your changes to include/public/io/kbdif.h, make sure to write when
(at what xenstore status stage) the frontend needs to write
request-vkbd-standalone. 

This patch looks good:

Reviewed-by: Stefano Stabellini 



>  }
>  
>  rc = common_bind(>c);

[Qemu-devel] [PATCH] nbd: Fix regression on resiliency to port scan

2017-06-08 Thread Eric Blake

Back in qemu 2.5, qemu-nbd was immune to port probes (a transient
server would not quit, regardless of how many probe connections
came and went, until a connection actually negotiated).  But we
broke that in commit ee7d7aa when removing the return value to
nbd_client_new(), although that patch also introduced a bug causing
an assertion failure on a client that fails negotiation.  We then
made it worse during refactoring in commit 1a6245a (a segfault
before we could even assert); the (masked) assertion was cleaned
up in d3780c2 (still in 2.6), and just recently we finally fixed
the segfault ("nbd: Fully intialize client in case of failed
negotiation").  But that still means that ever since we added
TLS support to qemu-nbd, we have been vulnerable to an ill-timed
port-scan being able to cause a denial of service by taking down
qemu-nbd before a real client has a chance to connect.

Since negotiation is now handled asynchronously via coroutines,
we no longer have a synchronous point of return by re-adding a
return value to nbd_client_new().  So this patch instead wires
things up to pass the negotiation status through the close_fn
callback function.

Simple test across two terminals:
$ qemu-nbd -f raw -p 30001 file
$ nmap 127.0.0.1 -p 30001 && \
  qemu-io -c 'r 0 512' -f raw nbd://localhost:30001

Note that this patch does not change what constitutes successful
negotiation (thus, a client must enter transmission phase before
that client can be considered as a reason to terminate the server
when the connection ends).  Perhaps we may want to tweak things
in a later patch to also treat a client that uses NBD_OPT_ABORT
as being a 'successful' negotiation (the client correctly talked
the NBD protocol, and informed us it was not going to use our
export after all), but that's a discussion for another day.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1451614

Signed-off-by: Eric Blake 
---
 include/block/nbd.h |  2 +-
 blockdev-nbd.c  |  6 +-
 nbd/server.c| 24 +++-
 qemu-nbd.c  |  4 ++--
 4 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index 416257a..8fa5ce5 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -162,7 +162,7 @@ void nbd_client_new(NBDExport *exp,
 QIOChannelSocket *sioc,
 QCryptoTLSCreds *tlscreds,
 const char *tlsaclname,
-void (*close)(NBDClient *));
+void (*close_fn)(NBDClient *, bool));
 void nbd_client_get(NBDClient *client);
 void nbd_client_put(NBDClient *client);

diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index dd0860f..28f551a 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -27,6 +27,10 @@ typedef struct NBDServerData {

 static NBDServerData *nbd_server;

+static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
+{
+nbd_client_put(client);
+}

 static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition,
gpointer opaque)
@@ -46,7 +50,7 @@ static gboolean nbd_accept(QIOChannel *ioc, GIOCondition 
condition,
 qio_channel_set_name(QIO_CHANNEL(cioc), "nbd-server");
 nbd_client_new(NULL, cioc,
nbd_server->tlscreds, NULL,
-   nbd_client_put);
+   nbd_blockdev_client_closed);
 object_unref(OBJECT(cioc));
 return TRUE;
 }
diff --git a/nbd/server.c b/nbd/server.c
index 49b55f6..f2b1aa4 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -81,7 +81,7 @@ static QTAILQ_HEAD(, NBDExport) exports = 
QTAILQ_HEAD_INITIALIZER(exports);

 struct NBDClient {
 int refcount;
-void (*close)(NBDClient *client);
+void (*close_fn)(NBDClient *client, bool negotiated);

 bool no_zeroes;
 NBDExport *exp;
@@ -778,7 +778,7 @@ void nbd_client_put(NBDClient *client)
 }
 }

-static void client_close(NBDClient *client)
+static void client_close(NBDClient *client, bool negotiated)
 {
 if (client->closing) {
 return;
@@ -793,8 +793,8 @@ static void client_close(NBDClient *client)
  NULL);

 /* Also tell the client, so that they release their reference.  */
-if (client->close) {
-client->close(client);
+if (client->close_fn) {
+client->close_fn(client, negotiated);
 }
 }

@@ -975,7 +975,7 @@ void nbd_export_close(NBDExport *exp)

 nbd_export_get(exp);
 QTAILQ_FOREACH_SAFE(client, >clients, next, next) {
-client_close(client);
+client_close(client, true);
 }
 nbd_export_set_name(exp, NULL);
 nbd_export_set_description(exp, NULL);
@@ -1337,7 +1337,7 @@ done:

 out:
 nbd_request_put(req);
-client_close(client);
+client_close(client, true);
 nbd_client_put(client);
 }

@@ -1363,7 +1363,7 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
 qemu_co_mutex_init(>send_lock);

 if (nbd_negotiate(data)) {
-

Re: [Qemu-devel] [PATCH 0/4] xenfb: Add vkbd-only option

2017-06-08 Thread Stefano Stabellini

On Thu, 8 Jun 2017, Owen Smith wrote:
> Adds the ability for a vkbd device to connect without the
> QemuConsole, in order to support a standalone PV mouse and
> keyboard frontend.
> This series adds a new feature flag, which will need adding
> to the xen's include/public/io/kbdif.h

Please do so, I would like that change to be applied to xen before this
series is applied to QEMU.


> "feature-vkbd-standalone" is set to 1 by backends that allow 
> the vkbd device model to connect without requiring a vfb device
> connected. The vkbd device will only bypass the check for
> the vfb device if the frontend sets "request-vkbd-standalone"
> to 1.
> The last 2 patches add a couple of missing input handler
> functions, and uses these to remove a leak in the vkbd device
> model.
> 
> Owen Smith (4):
>   xenfb: Add feature-vkbd-standalone
>   xenfb: Activate mouse handler
>   ui/input: Add activate/remove for keyboard handlers
>   xenfb: Fix leak by adding/removing keyboard handler
> 
>  hw/display/xenfb.c   | 44 
>  include/ui/console.h |  2 ++
>  ui/input-legacy.c| 12 
>  3 files changed, 46 insertions(+), 12 deletions(-)
> 
> -- 
> 2.1.4
>

Re: [Qemu-devel] [PATCH 5/5] vnc: No need for Error** parameter at vnc_client_io_error()

2017-06-08 Thread Eric Blake

On 06/08/2017 12:44 PM, Eduardo Habkost wrote:

 -ssize_t vnc_client_io_error(VncState *vs, ssize_t ret, Error **errp)
 +ssize_t vnc_client_io_error(VncState *vs, ssize_t ret, Error *err)
  {
>>>
>>> This is unusual.
>>
>> Why?  I would say that using Error** for input (and not output)
>> is the unusual pattern.

Yes, and when you frame it that way, it makes sense. But with no comment
framing it that way, ...

>> Isn't this one of the purposes of this function?
>>
>> The difference here is that now the function function is just
>> taking ownership of err, making the interface and the
>> implementation simpler.  If I document this more clearly at
>> vnc_client_io_error()'s declaration, would it make this change
>> acceptable?

... why yes, you've hit on why I felt uneasy - we are missing good
documentation!

> 
> What about the following?
> 

Yes, that makes it MUCH easier to understand what's going on.

With this squashed in,
Reviewed-by: Eric Blake 

> Signed-off-by: Eduardo Habkost 
> ---
>  ui/vnc.c | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/ui/vnc.c b/ui/vnc.c
> index 51f13f0c29..cb4210 100644
> --- a/ui/vnc.c
> +++ b/ui/vnc.c
> @@ -1180,6 +1180,15 @@ void vnc_disconnect_finish(VncState *vs)
>  g_free(vs);
>  }
>  
> +
> +/*
> + * Handle I/O error (@ret < 0) or EOF (@ret == 0) from I/O
> + * channel.  In case of errors or EOF, @err is freed using
> + * error_free().
> + *
> + * Returns 0 in case @ret <= 0 and the error was properly
> + * handled, otherwise returns @ret.
> + */
>  ssize_t vnc_client_io_error(VncState *vs, ssize_t ret, Error *err)
>  {
>  if (ret <= 0) {
> 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH] Remove restriction that prevents bootimg elf64 images

2017-06-08 Thread Anatol Pomozov

+reply-all

On Thu, Jun 8, 2017 at 1:41 PM, Adam Lackorzynski
 wrote:
>
> On Tue Jun 06, 2017 at 21:41:48 -0700, Anatol Pomozov wrote:
>> It is possible to create a 64 bit elf image that has valid multiboot header.
>> qemu should be able to boot such images.
>
> But this 64bit image actually starts with 32bit code, right?

Correct. The very first part of the startup code has to be 32bit.
After it sets "long mode" it can use 64bit instructions. To make sure
that the preamble has only 32bit instructions one have to use asm
directive such as ".code32".

Here is an example from LitleKernel sturtup code:

https://github.com/littlekernel/lk/blob/master/arch/x86/64/start.S#L50

.code32 tells assembler to treat following text as 32 bit code. And
later when it jumps into "long mode"

https://github.com/littlekernel/lk/blob/master/arch/x86/64/start.S#L214
one can use 64bit code.

> So it's a 32bit program and the check verifies that this is the case.

While preamble have to contain 32 only instructions the rest of the
image can perfectly contain 64bit code. Right now 64bit binary cannot
be run with "qemu-system-x86_64 -kernel". But the same binary runs
fine if packed with GRUB as iso.

I tried to hack around this restriction by adding
"OUTPUT_FORMAT(elf32-i386)" to the linker file and compiling project
with 64bit support. But GNU ld program crashed at Ubuntu 14.04. It
means not that many people use this code path. GNU ld compiled from
HEAD does not have this problem but now GDB is confused by the fact
that ELF contains 64bit code while header reports i386.

Practically there is no reason for this check as it prevents running
64bit binaries with "qemu-system-x86_64 -kernel".

Re: [Qemu-devel] [PATCH] Remove restriction that prevents bootimg elf64 images

2017-06-08 Thread Adam Lackorzynski

On Tue Jun 06, 2017 at 21:41:48 -0700, Anatol Pomozov wrote:
> It is possible to create a 64 bit elf image that has valid multiboot header.
> qemu should be able to boot such images.

But this 64bit image actually starts with 32bit code, right?
So it's a 32bit program and the check verifies that this is the case.

Adam

Re: [Qemu-devel] [PATCH v4 0/7] tcg: allocate TB structs preceding translate

2017-06-08 Thread Richard Henderson


On 06/08/2017 10:44 AM, Emilio G. Cota wrote:

On Thu, Jun 08, 2017 at 09:44:11 -0700, Richard Henderson wrote:

On 06/07/2017 04:30 PM, Emilio G. Cota wrote:

On Wed, Jun 07, 2017 at 08:55:29 -0700, Richard Henderson wrote:

Then I've a few follow-up patches to take advantage of the new TB
placement for arm platforms.  I've had a look at the asm output for
ppc64 and s390x, and don't see anything obvious that can be improved.


Nice! Just tested patches 3-7 with an arm guest image on
x86 and aarch64 hosts:

   Tested-by: Emilio G. Cota 


So... you've tested patch 3 then?
Patches 4-7 are for an arm host.


Ha! Yes, sorry for the confusion -- I blindly went by the patch titles'
"target/arm" prefix; shouldn't they be "tcg/arm"?


Oops, yes indeed.  I'll fix that too for v5.


r~

Re: [Qemu-devel] [PATCH 5/5] coccinelle: prefer glib g_new/g_renew macros

2017-06-08 Thread Eric Blake

On 06/08/2017 03:50 AM, Markus Armbruster wrote:
>>> Your script differs from Markus', we should figure out if they can be
>>> merged into one.
>>
>> One notable difference is that I abuse expression, instead of type. I didn't 
>> manage to teach spatch about the includes and custom type (--all-includes 
>> didn't work). I just tried with expression and it was happy, I haven't 
>> searched further.
> 
> Does your semantic patch more, less, or both?

I have not tried to find out - but the idea would be:
if you run Markus' script first, does Marc-André's find anything?
if you run Marc-André's script first, does Markus' find anything?

>> Sadly, my script is really far from perfect. And I don't how much time it 
>> will take me to make it better, and if I really want to spend that time for 
>> this. In any case, the result needs careful review. So thought it would be 
>> easier to provide a patch that I manually changed/reviewed, rather than a 
>> full cocci script.

On the other hand, since it is something that we have done repeatedly,
having it reviewed into a full cocci script that we store in git will
make future re-runs easier (we can incrementally improve the cocci
script as we gain experience with additional patterns that work).

> 
> I can play with the script when this series reaches the front of my
> review queue.

Fingers crossed that your review queue isn't a year long any more (even
if I do understand that you will not be reviewing much during your time
off later this month).

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 5/5] coccinelle: prefer glib g_new/g_renew macros

2017-06-08 Thread Eric Blake

On 06/08/2017 03:23 AM, Marc-André Lureau wrote:
>>
>> Your script differs from Markus', we should figure out if they can be
>> merged into one.
> 
> One notable difference is that I abuse expression, instead of type. I didn't 
> manage to teach spatch about the includes and custom type (--all-includes 
> didn't work). I just tried with expression and it was happy, I haven't 
> searched further.
> 
>>
>>> (
>>> - g_malloc0(sizeof(*e2))
>>> + g_malloc0(sizeof(*e2))
>>
>> Huh?
>>
>>> |
>>> - g_malloc(sizeof(*e2))
>>> + g_malloc(sizeof(*e2))
>>
>> Huh?
> 
> That's what I explained in the cover letter, I don't wont those to be 
> touched, but they would because I abuse expressions...

Ah. So you're writing a multi-pass filter: if it matches the first
branch, then don't try later branches; otherwise if it matches the later
branch, perform the substitution in the later branch.

But that can be written:

(
 g_malloc0(sizeof(*e2))
|
 g_malloc(sizeof(*e2))
|
- ...
+ ...
)

where you don't have to use -/+ patch form for the lines that you are
intentionally leaving unchanged because they are already in the right
form and where you therefore don't want any subsequent branches of the
pattern to be matched.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH] spapr: manage hotplugged devices while the VM is not started

2017-06-08 Thread Michael Roth

Quoting David Gibson (2017-05-30 23:35:57)
> On Tue, May 30, 2017 at 06:04:45PM +0200, Laurent Vivier wrote:
> > For QEMU, a hotlugged device is a device added using the HMP/QMP
> > interface.
> > For SPAPR, a hotplugged device is a device added while the
> > machine is running. In this case QEMU doesn't update internal
> > state but relies on the OS for this part
> > 
> > In the case of migration, when we (libvirt) hotplug a device
> > on the source guest, we (libvirt) generally hotplug the same
> > device on the destination guest. But in this case, the machine
> > is stopped (RUN_STATE_INMIGRATE) and QEMU must not expect
> > the OS will manage it as an hotplugged device as it will
> > be "imported" by the migration.
> > 
> > This patch changes the meaning of "hotplugged" in spapr.c
> > to manage a QEMU hotplugged device like a "coldplugged" one
> > when the machine is awaiting an incoming migration.
> > 
> > Signed-off-by: Laurent Vivier 
> 
> So, I think this is a reasonable concept, at least in terms of
> cleanliness and not doing unnecessary work.  However, if it's fixing
> bugs, I suspect that means we still have problems elsewhere.

I was hoping a lot of these issues would go away once we default
the initial/reset DRC states to "coldplugged". I think your pending
patch:

  "spapr: Make DRC reset force DRC into known state"

But I didn't consider the fact that libvirt will be issuing these
hotplugs *after* reset, so those states would indeed need to
be fixed up again to reflect boot-time,attached as opposed to
boot-time,unattached before starting the target.

So I do think this patch addresses a specific bug that isn't
obviously fixable elsewhere.

To me it seems like the only way to avoid doing something like
what this patch does is to migrate all attached DRCs from the
source in all cases.

This would break backward-migration though, unless we switch from
using subregions for DRCs to explicitly disabling DRC migration
based on machine type.

That approach seems to similar to what x86 does, e.g.
hw/acpi/ich9.c and hw/acpi/piix.c migrate vmstate_memhp_state
(corresponding to all DIMMs' slot status) in all cases where
memory hotplug is enabled. If they were to do this using
subregions for DIMMs in a transitional state I think similar
issues would pop up in that code as well.

Even if we take this route, we still need to explicitly suppress
hotplug events during INMIGRATE to avoid extra events going on
the queue. *Unless* we similarly rely purely on the ones sent by
the source. I believe the proposed event migration patches using
VMSTATE_QTAILQ_V only add to the list, so we'd need a variant
that either nukes the list first, or a pre-load hook in
vmstate_spapr_pending_events that does the same.

Personally, it's seeming like the general approach of not
special-casing INMIGRATE, and just letting migration do the
fixing, is easier to deal with conceptually, albeit somewhat
less flexible in terms of backward compatibility. Both approaches
seem reasonable though.

> 
> Specifically, what is it we're doing before the incoming migration
> that's breaking things.  Even if it's unnecessary, anything done there
> should be overwritten by the incoming stream.  That should certainly
> be the case (now) for the DRC state variables.  Maybe not for the
> queued hotplug events - but that means we should update the queue
> migration to make sure we clear anything existing on the destination
> before adding migrated events.
> 
> I'm also concerned by the fact that this makes changes for memory and
> cpu hotplug, but not for PCI devices.  Why aren't they also affected
> by this problem?
> 
> One nit in the implementation, see below:
> 
> > ---
> >  hw/ppc/spapr.c | 20 ++--
> >  1 file changed, 14 insertions(+), 6 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 0980d73..f1302d0 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -2511,6 +2511,12 @@ static void spapr_nmi(NMIState *n, int cpu_index, 
> > Error **errp)
> >  }
> >  }
> >  
> > +static bool spapr_coldplugged(DeviceState *dev)
> > +{
> > +return runstate_check(RUN_STATE_INMIGRATE) ||
> > +   !dev->hotplugged;
> > +}
> > +
> >  static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t 
> > size,
> > uint32_t node, bool dedicated_hp_event_source,
> > Error **errp)
> > @@ -2521,6 +2527,7 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t 
> > addr_start, uint64_t size,
> >  int i, fdt_offset, fdt_size;
> >  void *fdt;
> >  uint64_t addr = addr_start;
> > +bool coldplugged = spapr_coldplugged(dev);
> >  
> >  for (i = 0; i < nr_lmbs; i++) {
> >  drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
> > @@ -2532,9 +2539,9 @@ static void spapr_add_lmbs(DeviceState *dev, uint64_t 
> > addr_start, uint64_t size,
> >

Re: [Qemu-devel] [PATCH v1 1/1] char-socket: Don't report TCP socket waiting as an error

2017-06-08 Thread Alistair Francis

On Thu, Jun 8, 2017 at 10:56 AM, Markus Armbruster  wrote:
> Alistair Francis  writes:
>
>> On Wed, Jun 7, 2017 at 11:03 PM, Markus Armbruster  wrote:
>>> Alistair Francis  writes:
>>>
 On Wed, Jun 7, 2017 at 12:19 AM, Markus Armbruster  
 wrote:
> Paolo Bonzini  writes:
>
>> On 06/06/2017 18:30, Alistair Francis wrote:

 This is somehow confusing. I don't think it is worth having another
 qemu_log_stderr() function rather than using error_report() but this 
 very
 call might deserve a comment explaining this unusual use. What do you 
 think?
>>>
>>> The problem with stderr is that this isn't an error. Some uses of QEMU
>>> (inside Eclipse for example) flag everything printed on stderr as red
>>> which confuses users that they are seeing an error when they really
>>> aren't.
>>
>> But they are wrong.
>
> Concur.  We also print warnings and informational messages to stderr.
>
> We should make errors easy to recognize.  Fortunately, error_report()
> prints errors to stderr in a rigid format.  Unfortunately, error
> messages bypassing error_report() still exist in places.  We suck.
>
> The format is
>
> timestamp-if-enabled progname ':' location message
>
> timestamp-if-enabled is normally empty.  With -msg timestamp=on, it's
> the current time in ISO 8601 format, followed by a space.
>
> progname is the program name (main()'s argv[0]).
>
> location is either empty, or a reference to the command line or a
> configuration file.
>
> See error_vreport() for details.

 Ok, but this isn't an error, it's more information. So it sounds like
 we should still print to stderr but not print in the format described
 above?
>>>
>>> Yes.
>>>
>>> I explained the error message format to show how to distinguish actual
>>> errors from other stuff.
>>
>> Sorry, I should have been more clear. I meant we should not use the
>> error_report() function here. I don't think we have any
>> warning_report() function though, is that something worth having?
>
> So far we simply use error_printf() for such things.
>
> A function to report a warning would let us report them more uniformly,
> but only if we actually use it uniformly.  In other words, adding one
> without also converting the existing warnings to use it would create yet
> another open-ended incremental conversion job.  Are we up to it?

Yeah! Why not. I am happy to give it a shot changing some errors to warnings.

First thing though, what is the format for printing warnings?

Thanks,
Alistair

>

Re: [Qemu-devel] [PATCH] q35/mch: implement extended TSEG sizes

2017-06-08 Thread Michael S. Tsirkin

On Thu, Jun 08, 2017 at 09:48:53PM +0200, Gerd Hoffmann wrote:
>   Hi,
> 
> > I really dislike negotiation being re-invented for each device.  Do
> > we
> > need these tricks?  Can we just do fw cfg with standard discovery?
> > This ties in with my proposal to generalize smi features to
> > generic ones.
> 
> Device properties should be part of the device.
> We should have done this with the smi too.

What is part of the device and what isn't? It's all part
of QEMU in the end.  Adding probing for multiple devices
will just add to number of exits and slow down guest boot.

We do want to stick to emulating real devices if we can, no argument
here - but this stuff is PV anyway - what do we gain by spreading it
out?

> A more standard way to handle this would be to add a vendor-specific
> pci capability and place the register there.  Not sure we have room for
> that in the pci config space though.
> 
> cheers,
>   Gerd

We don't have room anywhere in PCI config space. Laszlo makes argument
why it's safe for this device based on spec but it's anyone's guess
whether current and future software will follow spec.  In short, going
anywhere near the emulated device has a potential to break some drivers.

-- 
MST

Re: [Qemu-devel] [PATCH] q35/mch: implement extended TSEG sizes

2017-06-08 Thread Gerd Hoffmann

  Hi,

> I really dislike negotiation being re-invented for each device.  Do
> we
> need these tricks?  Can we just do fw cfg with standard discovery?
> This ties in with my proposal to generalize smi features to
> generic ones.

Device properties should be part of the device.
We should have done this with the smi too.

A more standard way to handle this would be to add a vendor-specific
pci capability and place the register there.  Not sure we have room for
that in the pci config space though.

cheers,
  Gerd

[Qemu-devel] [PATCH RFC v19 11/13] target-avr: Put all translation code into one compilation unit

2017-06-08 Thread Michael Rolnik

From: Michael Rolnik 

From: Richard Henderson 

Signed-off-by: Richard Henderson 
---
 target/avr/Makefile.objs|   2 -
 target/avr/decode.c |   6 +-
 target/avr/translate-inst.c | 198 ++--
 target/avr/translate-inst.h | 113 -
 target/avr/translate.c  | 101 +-
 target/avr/translate.h  | 112 -
 6 files changed, 180 insertions(+), 352 deletions(-)
 delete mode 100644 target/avr/translate.h

diff --git a/target/avr/Makefile.objs b/target/avr/Makefile.objs
index 9848b1cb4c..48233ef544 100644
--- a/target/avr/Makefile.objs
+++ b/target/avr/Makefile.objs
@@ -20,6 +20,4 @@
 
 obj-y += translate.o cpu.o helper.o
 obj-y += gdbstub.o
-obj-y += translate-inst.o
-obj-y += decode.o
 obj-$(CONFIG_SOFTMMU) += machine.o
diff --git a/target/avr/decode.c b/target/avr/decode.c
index 2d2e54e448..576dd833a6 100644
--- a/target/avr/decode.c
+++ b/target/avr/decode.c
@@ -18,10 +18,8 @@
  * 
  */
 
-#include 
-#include "translate.h"
-
-void avr_decode(uint32_t pc, uint32_t *l, uint32_t c, translate_function_t *t)
+static void avr_decode(uint32_t pc, uint32_t *l, uint32_t c,
+   translate_function_t *t)
 {
 uint32_t opc = extract32(c, 0, 16);
 switch (opc & 0xd000) {
diff --git a/target/avr/translate-inst.c b/target/avr/translate-inst.c
index 377263a0d3..827ec7bb9e 100644
--- a/target/avr/translate-inst.c
+++ b/target/avr/translate-inst.c
@@ -18,10 +18,6 @@
  *  
  */
 
-#include "translate.h"
-#include "translate-inst.h"
-#include "qemu/bitops.h"
-
 static void gen_add_CHf(TCGv R, TCGv Rd, TCGv Rr)
 {
 TCGv t1 = tcg_temp_new_i32();
@@ -249,7 +245,7 @@ static TCGv gen_get_zaddr(void)
  *  Adds two registers and the contents of the C Flag and places the result in
  *  the destination register Rd.
  */
-int avr_translate_ADC(DisasContext *ctx, uint32_t opcode)
+static int avr_translate_ADC(DisasContext *ctx, uint32_t opcode)
 {
 TCGv Rd = cpu_r[ADC_Rd(opcode)];
 TCGv Rr = cpu_r[ADC_Rr(opcode)];
@@ -276,7 +272,7 @@ int avr_translate_ADC(DisasContext *ctx, uint32_t opcode)
  *  Adds two registers without the C Flag and places the result in the
  *  destination register Rd.
  */
-int avr_translate_ADD(DisasContext *ctx, uint32_t opcode)
+static int avr_translate_ADD(DisasContext *ctx, uint32_t opcode)
 {
 TCGv Rd = cpu_r[ADD_Rd(opcode)];
 TCGv Rr = cpu_r[ADD_Rr(opcode)];
@@ -305,7 +301,7 @@ int avr_translate_ADD(DisasContext *ctx, uint32_t opcode)
  *  instruction is not available in all devices. Refer to the device specific
  *  instruction set summary.
  */
-int avr_translate_ADIW(DisasContext *ctx, uint32_t opcode)
+static int avr_translate_ADIW(DisasContext *ctx, uint32_t opcode)
 {
 if (avr_feature(ctx->env, AVR_FEATURE_ADIW_SBIW) == false) {
 gen_helper_unsupported(cpu_env);
@@ -355,7 +351,7 @@ int avr_translate_ADIW(DisasContext *ctx, uint32_t opcode)
  *  Performs the logical AND between the contents of register Rd and register
  *  Rr and places the result in the destination register Rd.
  */
-int avr_translate_AND(DisasContext *ctx, uint32_t opcode)
+static int avr_translate_AND(DisasContext *ctx, uint32_t opcode)
 {
 TCGv Rd = cpu_r[AND_Rd(opcode)];
 TCGv Rr = cpu_r[AND_Rr(opcode)];
@@ -384,7 +380,7 @@ int avr_translate_AND(DisasContext *ctx, uint32_t opcode)
  *  Performs the logical AND between the contents of register Rd and a constant
  *  and places the result in the destination register Rd.
  */
-int avr_translate_ANDI(DisasContext *ctx, uint32_t opcode)
+static int avr_translate_ANDI(DisasContext *ctx, uint32_t opcode)
 {
 TCGv Rd = cpu_r[16 + ANDI_Rd(opcode)];
 int Imm = (ANDI_Imm(opcode));
@@ -404,7 +400,7 @@ int avr_translate_ANDI(DisasContext *ctx, uint32_t opcode)
  *  signed value by two without changing its sign. The Carry Flag can be used 
to
  *  round the result.
  */
-int avr_translate_ASR(DisasContext *ctx, uint32_t opcode)
+static int avr_translate_ASR(DisasContext *ctx, uint32_t opcode)
 {
 TCGv Rd = cpu_r[ASR_Rd(opcode)];
 TCGv t1 = tcg_temp_new_i32();
@@ -435,7 +431,7 @@ int avr_translate_ASR(DisasContext *ctx, uint32_t opcode)
 /*
  *  Clears a single Flag in SREG.
  */
-int avr_translate_BCLR(DisasContext *ctx, uint32_t opcode)
+static int avr_translate_BCLR(DisasContext *ctx, uint32_t opcode)
 {
 switch (BCLR_Bit(opcode)) {
 case 0x00:
@@ -470,7 +466,7 @@ int avr_translate_BCLR(DisasContext *ctx, uint32_t opcode)
 /*
  *  Copies the T Flag in the SREG (Status Register) to bit b in register Rd.
  */
-int avr_translate_BLD(DisasContext *ctx, uint32_t opcode)
+static int avr_translate_BLD(DisasContext *ctx, uint32_t opcode)
 {
 TCGv Rd = cpu_r[BLD_Rd(opcode)];
 TCGv t1 = tcg_temp_new_i32();
@@ -491,7 +487,7 @@ int

[Qemu-devel] [PATCH RFC v19 10/13] target-avr: Put env pointer in DisasContext

2017-06-08 Thread Michael Rolnik

From: Michael Rolnik 

From: Richard Henderson 

Signed-off-by: Richard Henderson 
---
 target/avr/translate-inst.c | 298 ++--
 target/avr/translate-inst.h | 194 ++--
 target/avr/translate.c  |  16 +--
 target/avr/translate.h  |  11 +-
 4 files changed, 257 insertions(+), 262 deletions(-)

diff --git a/target/avr/translate-inst.c b/target/avr/translate-inst.c
index 6ed98bb1ac..377263a0d3 100644
--- a/target/avr/translate-inst.c
+++ b/target/avr/translate-inst.c
@@ -109,9 +109,9 @@ static void gen_ZNSf(TCGv R)
 tcg_gen_xor_tl(cpu_Sf, cpu_Nf, cpu_Vf); /* Sf = Nf ^ Vf */
 }
 
-static void gen_push_ret(CPUAVRState *env, int ret)
+static void gen_push_ret(DisasContext *ctx, int ret)
 {
-if (avr_feature(env, AVR_FEATURE_1_BYTE_PC)) {
+if (avr_feature(ctx->env, AVR_FEATURE_1_BYTE_PC)) {
 
 TCGv t0 = tcg_const_i32((ret & 0xff));
 
@@ -119,7 +119,7 @@ static void gen_push_ret(CPUAVRState *env, int ret)
 tcg_gen_subi_tl(cpu_sp, cpu_sp, 1);
 
 tcg_temp_free_i32(t0);
-} else if (avr_feature(env, AVR_FEATURE_2_BYTE_PC)) {
+} else if (avr_feature(ctx->env, AVR_FEATURE_2_BYTE_PC)) {
 
 TCGv t0 = tcg_const_i32((ret & 0x00));
 
@@ -129,7 +129,7 @@ static void gen_push_ret(CPUAVRState *env, int ret)
 
 tcg_temp_free_i32(t0);
 
-} else if (avr_feature(env, AVR_FEATURE_3_BYTE_PC)) {
+} else if (avr_feature(ctx->env, AVR_FEATURE_3_BYTE_PC)) {
 
 TCGv lo = tcg_const_i32((ret & 0xff));
 TCGv hi = tcg_const_i32((ret & 0x00) >> 8);
@@ -144,20 +144,20 @@ static void gen_push_ret(CPUAVRState *env, int ret)
 }
 }
 
-static void gen_pop_ret(CPUAVRState *env, TCGv ret)
+static void gen_pop_ret(DisasContext *ctx, TCGv ret)
 {
-if (avr_feature(env, AVR_FEATURE_1_BYTE_PC)) {
+if (avr_feature(ctx->env, AVR_FEATURE_1_BYTE_PC)) {
 
 tcg_gen_addi_tl(cpu_sp, cpu_sp, 1);
 tcg_gen_qemu_ld_tl(ret, cpu_sp, MMU_DATA_IDX, MO_UB);
 
-} else if (avr_feature(env, AVR_FEATURE_2_BYTE_PC)) {
+} else if (avr_feature(ctx->env, AVR_FEATURE_2_BYTE_PC)) {
 
 tcg_gen_addi_tl(cpu_sp, cpu_sp, 1);
 tcg_gen_qemu_ld_tl(ret, cpu_sp, MMU_DATA_IDX, MO_BEUW);
 tcg_gen_addi_tl(cpu_sp, cpu_sp, 1);
 
-} else if (avr_feature(env, AVR_FEATURE_3_BYTE_PC)) {
+} else if (avr_feature(ctx->env, AVR_FEATURE_3_BYTE_PC)) {
 
 TCGv lo = tcg_temp_new_i32();
 TCGv hi = tcg_temp_new_i32();
@@ -249,7 +249,7 @@ static TCGv gen_get_zaddr(void)
  *  Adds two registers and the contents of the C Flag and places the result in
  *  the destination register Rd.
  */
-int avr_translate_ADC(CPUAVRState *env, DisasContext *ctx, uint32_t opcode)
+int avr_translate_ADC(DisasContext *ctx, uint32_t opcode)
 {
 TCGv Rd = cpu_r[ADC_Rd(opcode)];
 TCGv Rr = cpu_r[ADC_Rr(opcode)];
@@ -276,7 +276,7 @@ int avr_translate_ADC(CPUAVRState *env, DisasContext *ctx, 
uint32_t opcode)
  *  Adds two registers without the C Flag and places the result in the
  *  destination register Rd.
  */
-int avr_translate_ADD(CPUAVRState *env, DisasContext *ctx, uint32_t opcode)
+int avr_translate_ADD(DisasContext *ctx, uint32_t opcode)
 {
 TCGv Rd = cpu_r[ADD_Rd(opcode)];
 TCGv Rr = cpu_r[ADD_Rr(opcode)];
@@ -305,9 +305,9 @@ int avr_translate_ADD(CPUAVRState *env, DisasContext *ctx, 
uint32_t opcode)
  *  instruction is not available in all devices. Refer to the device specific
  *  instruction set summary.
  */
-int avr_translate_ADIW(CPUAVRState *env, DisasContext *ctx, uint32_t opcode)
+int avr_translate_ADIW(DisasContext *ctx, uint32_t opcode)
 {
-if (avr_feature(env, AVR_FEATURE_ADIW_SBIW) == false) {
+if (avr_feature(ctx->env, AVR_FEATURE_ADIW_SBIW) == false) {
 gen_helper_unsupported(cpu_env);
 
 return BS_EXCP;
@@ -355,7 +355,7 @@ int avr_translate_ADIW(CPUAVRState *env, DisasContext *ctx, 
uint32_t opcode)
  *  Performs the logical AND between the contents of register Rd and register
  *  Rr and places the result in the destination register Rd.
  */
-int avr_translate_AND(CPUAVRState *env, DisasContext *ctx, uint32_t opcode)
+int avr_translate_AND(DisasContext *ctx, uint32_t opcode)
 {
 TCGv Rd = cpu_r[AND_Rd(opcode)];
 TCGv Rr = cpu_r[AND_Rr(opcode)];
@@ -384,7 +384,7 @@ int avr_translate_AND(CPUAVRState *env, DisasContext *ctx, 
uint32_t opcode)
  *  Performs the logical AND between the contents of register Rd and a constant
  *  and places the result in the destination register Rd.
  */
-int avr_translate_ANDI(CPUAVRState *env, DisasContext *ctx, uint32_t opcode)
+int avr_translate_ANDI(DisasContext *ctx, uint32_t opcode)
 {
 TCGv Rd = cpu_r[16 + ANDI_Rd(opcode)];
 int Imm = (ANDI_Imm(opcode));
@@ -404,7 +404,7 @@ int avr_translate_ANDI(CPUAVRState *env, DisasContext *ctx, 
uint32_t opcode)
  *  signed value by two without changing its sign. The Carry Flag

[Qemu-devel] [PATCH RFC v19 12/13] target-avr: Respect .inc.c convention

2017-06-08 Thread Michael Rolnik

From: Michael Rolnik 

From: Richard Henderson 

Signed-off-by: Richard Henderson 
---
 target/avr/{decode.c => decode.inc.c} | 0
 target/avr/{translate-inst.c => translate-inst.inc.c} | 0
 target/avr/translate.c| 4 ++--
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename target/avr/{decode.c => decode.inc.c} (100%)
 rename target/avr/{translate-inst.c => translate-inst.inc.c} (100%)

diff --git a/target/avr/decode.c b/target/avr/decode.inc.c
similarity index 100%
rename from target/avr/decode.c
rename to target/avr/decode.inc.c
diff --git a/target/avr/translate-inst.c b/target/avr/translate-inst.inc.c
similarity index 100%
rename from target/avr/translate-inst.c
rename to target/avr/translate-inst.inc.c
diff --git a/target/avr/translate.c b/target/avr/translate.c
index 184e66c04e..428261a64a 100644
--- a/target/avr/translate.c
+++ b/target/avr/translate.c
@@ -103,8 +103,8 @@ static void gen_goto_tb(DisasContext *ctx, int n, 
target_ulong dest)
 
 #include "exec/gen-icount.h"
 #include "translate-inst.h"
-#include "translate-inst.c"
-#include "decode.c"
+#include "translate-inst.inc.c"
+#include "decode.inc.c"
 
 void avr_translate_init(void)
 {
-- 
2.11.0 (Apple Git-81)

[Qemu-devel] [PULL 3/3] hw/pcie: fix the generic pcie root port to support migration

2017-06-08 Thread Michael S. Tsirkin

From: Marcel Apfelbaum 

Add msix state to pcie-root-ports's vmstate
in order to support migration.

Signed-off-by: Marcel Apfelbaum 
Reviewed-by: Dr. David Alan Gilbert 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 include/hw/compat.h|  4 
 hw/pci-bridge/gen_pcie_root_port.c | 25 +
 2 files changed, 29 insertions(+)

diff --git a/include/hw/compat.h b/include/hw/compat.h
index 400c64b..26cd585 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -14,6 +14,10 @@
 .driver   = "virtio-net-device",\
 .property = "x-mtu-bypass-backend",\
 .value= "off",\
+},{\
+.driver   = "pcie-root-port",\
+.property = "x-migrate-msix",\
+.value= "false",\
 },
 
 #define HW_COMPAT_2_8 \
diff --git a/hw/pci-bridge/gen_pcie_root_port.c 
b/hw/pci-bridge/gen_pcie_root_port.c
index 8ebffa8..cb694d6 100644
--- a/hw/pci-bridge/gen_pcie_root_port.c
+++ b/hw/pci-bridge/gen_pcie_root_port.c
@@ -20,6 +20,14 @@
 #define GEN_PCIE_ROOT_PORT_AER_OFFSET   0x100
 #define GEN_PCIE_ROOT_PORT_MSIX_NR_VECTOR   1
 
+typedef struct GenPCIERootPort {
+/*< private >*/
+PCIESlot parent_obj;
+/*< public >*/
+
+bool migrate_msix;
+} GenPCIERootPort;
+
 static uint8_t gen_rp_aer_vector(const PCIDevice *d)
 {
 return 0;
@@ -45,6 +53,13 @@ static void gen_rp_interrupts_uninit(PCIDevice *d)
 msix_uninit_exclusive_bar(d);
 }
 
+static bool gen_rp_test_migrate_msix(void *opaque, int version_id)
+{
+GenPCIERootPort *rp = opaque;
+
+return rp->migrate_msix;
+}
+
 static const VMStateDescription vmstate_rp_dev = {
 .name = "pcie-root-port",
 .version_id = 1,
@@ -54,10 +69,18 @@ static const VMStateDescription vmstate_rp_dev = {
 VMSTATE_PCI_DEVICE(parent_obj.parent_obj.parent_obj, PCIESlot),
 VMSTATE_STRUCT(parent_obj.parent_obj.parent_obj.exp.aer_log,
PCIESlot, 0, vmstate_pcie_aer_log, PCIEAERLog),
+VMSTATE_MSIX_TEST(parent_obj.parent_obj.parent_obj.parent_obj,
+  GenPCIERootPort,
+  gen_rp_test_migrate_msix),
 VMSTATE_END_OF_LIST()
 }
 };
 
+static Property gen_rp_props[] = {
+DEFINE_PROP_BOOL("x-migrate-msix", GenPCIERootPort, migrate_msix, true),
+DEFINE_PROP_END_OF_LIST()
+};
+
 static void gen_rp_dev_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
@@ -68,6 +91,7 @@ static void gen_rp_dev_class_init(ObjectClass *klass, void 
*data)
 k->device_id = PCI_DEVICE_ID_REDHAT_PCIE_RP;
 dc->desc = "PCI Express Root Port";
 dc->vmsd = _rp_dev;
+dc->props = gen_rp_props;
 rpc->aer_vector = gen_rp_aer_vector;
 rpc->interrupts_init = gen_rp_interrupts_init;
 rpc->interrupts_uninit = gen_rp_interrupts_uninit;
@@ -77,6 +101,7 @@ static void gen_rp_dev_class_init(ObjectClass *klass, void 
*data)
 static const TypeInfo gen_rp_dev_info = {
 .name  = TYPE_GEN_PCIE_ROOT_PORT,
 .parent= TYPE_PCIE_ROOT_PORT,
+.instance_size = sizeof(GenPCIERootPort),
 .class_init= gen_rp_dev_class_init,
 };
 
-- 
MST

[Qemu-devel] [PULL 2/3] nvdimm acpi: fix region format interface code

2017-06-08 Thread Michael S. Tsirkin

From: Haozhong Zhang 

Per ACPI 6.2, section 5.2.25.6 and JEDEC Annex L Release 3, the
current region format interface code 0x201 indicates the block
addressed function interface 1, rather than a byte addressable
interface. Fix it by using 0x301 which indicates the byte addressable
no energy backed function interface 1.

Signed-off-by: Haozhong Zhang 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Stefan Hajnoczi 
---
 hw/acpi/nvdimm.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 8e7d6ec..b5734f5 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -338,9 +338,10 @@ static void nvdimm_build_structure_dcr(GArray *structures, 
DeviceState *dev)
 nfit_dcr->revision_id = cpu_to_le16(1 /* Current Revision supported
  in ACPI 6.0 is 1. */);
 nfit_dcr->serial_number = cpu_to_le32(sn);
-nfit_dcr->fic = cpu_to_le16(0x201 /* Format Interface Code. See Chapter
- 2: NVDIMM Device Specific Method
- (DSM) in DSM Spec Rev1.*/);
+nfit_dcr->fic = cpu_to_le16(0x301 /* Format Interface Code:
+ Byte addressable, no energy backed.
+ See ACPI 6.2, sect 5.2.25.6 and
+ JEDEC Annex L Release 3. */);
 }
 
 static GArray *nvdimm_build_device_structure(void)
-- 
MST

[Qemu-devel] [PULL 1/3] vhost-user-bridge: fix iov_restore_front() warning

2017-06-08 Thread Michael S. Tsirkin

From: Marc-André Lureau 

  CC  tests/vhost-user-bridge.o
/home/dgilbert/git/qemu-world3/tests/vhost-user-bridge.c:228:23: warning: 
variables 'front' and 'iov' used in loop condition not modified in loop body 
[-Wfor-loop-analysis]
for (cur = front; front != iov; cur++) {
  ^~~~
1 warning generated.

Fix the loop, document the function, and fix some related assert().

In practice, the loop bug was harmless because the front sg buffer is
enough to discard/restore the header size.

Reported-by: Dr. David Alan Gilbert 
Signed-off-by: Marc-André Lureau 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Dr. David Alan Gilbert 
Tested-by: Jens Freimann 
---
 tests/vhost-user-bridge.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
index 8618c20..1e5b5ca 100644
--- a/tests/vhost-user-bridge.c
+++ b/tests/vhost-user-bridge.c
@@ -220,12 +220,18 @@ vubr_handle_tx(VuDev *dev, int qidx)
 free(elem);
 }
 
+
+/* this function reverse the effect of iov_discard_front() it must be
+ * called with 'front' being the original struct iovec and 'bytes'
+ * being the number of bytes you shaved off
+ */
 static void
 iov_restore_front(struct iovec *front, struct iovec *iov, size_t bytes)
 {
 struct iovec *cur;
 
-for (cur = front; front != iov; cur++) {
+for (cur = front; cur != iov; cur++) {
+assert(bytes >= cur->iov_len);
 bytes -= cur->iov_len;
 }
 
@@ -302,7 +308,8 @@ vubr_backend_recv_cb(int sock, void *ctx)
 }
 iov_from_buf(sg, elem->in_num, 0, , sizeof hdr);
 total += hdrlen;
-assert(iov_discard_front(, , hdrlen) == hdrlen);
+ret = iov_discard_front(, , hdrlen);
+assert(ret == hdrlen);
 }
 
 struct msghdr msg = {
-- 
MST

[Qemu-devel] [PULL 0/3] pc, pci, vhost: fixes

2017-06-08 Thread Michael S. Tsirkin

The following changes since commit 64175afc695c0672876fbbfc31b299c86d562cb4:

  arm_gicv3: Fix ICC_BPR1 reset value when EL3 not implemented (2017-06-07 
17:21:44 +0100)

are available in the git repository at:

  git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

for you to fetch changes up to bc277a52fbea1532d1adf30ba0edf15ab3dcdead:

  hw/pcie: fix the generic pcie root port to support migration (2017-06-08 
22:02:37 +0300)


pc, pci, vhost: fixes

Some fixes all over the place.

Signed-off-by: Michael S. Tsirkin 


Haozhong Zhang (1):
  nvdimm acpi: fix region format interface code

Marc-André Lureau (1):
  vhost-user-bridge: fix iov_restore_front() warning

Marcel Apfelbaum (1):
  hw/pcie: fix the generic pcie root port to support migration

 include/hw/compat.h|  4 
 hw/acpi/nvdimm.c   |  7 ---
 hw/pci-bridge/gen_pcie_root_port.c | 25 +
 tests/vhost-user-bridge.c  | 11 +--
 4 files changed, 42 insertions(+), 5 deletions(-)

Re: [Qemu-devel] [PATCH v1] virtio-net: enable configurable tx queue size

2017-06-08 Thread Michael S. Tsirkin

On Wed, Jun 07, 2017 at 09:04:29AM +0800, Wei Wang wrote:
> On 06/05/2017 11:38 PM, Michael S. Tsirkin wrote:
> > On Mon, Jun 05, 2017 at 04:57:29PM +0800, Wei Wang wrote:
> > > This patch enables the virtio-net tx queue size to be configurable
> > > between 256 and 1024 by the user. The queue size specified by the
> > > user should be power of 2. If "tx_queue_size" is not offered by the
> > > user, the default queue size, 1024, will be used.
> > > 
> > > For the traditional QEMU backend, setting the tx queue size to be 1024
> > > requires the guest virtio driver to support the VIRTIO_F_MAX_CHAIN_SIZE
> > > feature. This feature restricts the guest driver from chaining 1024
> > > vring descriptors, which may cause the device side implementation to
> > > send more than 1024 iov to writev.
> > > 
> > > VIRTIO_F_MAX_CHAIN_SIZE is a common transport feature added for all
> > > virtio devices. However, each device has the flexibility to set the max
> > > chain size to limit its driver to chain vring descriptors. Currently,
> > > the max chain size of the virtio-net device is set to 1023.
> > > 
> > > In the case that the tx queue size is set to 1024 and the
> > > VIRTIO_F_MAX_CHAIN_SIZE feature is not supported by the guest driver,
> > > the tx queue size will be reconfigured to be 512.
> > I'd like to see the reverse. Start with the current default.
> > If VIRTIO_F_MAX_CHAIN_SIZE is negotiated, increase the queue size.
> > 
> 
> OK, we can let the queue size start with 256, and how about
> increasing it to 1024 in the following two cases:

I think it should be
1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated
and
2) user requested large size

> 1) VIRTIO_F_MAX_CHAIN_SIZE is negotiated; or
> 2) the backend is vhost.

For vhost we also need vhost backend to support VIRTIO_F_MAX_CHAIN_SIZE.
We also need to send the max chain size to backend.

> Best,
> Wei

Re: [Qemu-devel] [PATCH 2/3] exec: simplify address_space_get_iotlb_entry

2017-06-08 Thread Michael S. Tsirkin

On Thu, Jun 08, 2017 at 02:11:50PM +0800, Peter Xu wrote:
> On Wed, Jun 07, 2017 at 04:07:20PM +0300, Michael S. Tsirkin wrote:
> > On Wed, Jun 07, 2017 at 11:44:43AM +0800, Peter Xu wrote:
> > > On Wed, Jun 07, 2017 at 09:47:05AM +1000, David Gibson wrote:
> > > > On Tue, Jun 06, 2017 at 04:34:30PM +0200, Paolo Bonzini wrote:
> > > > > 
> > > > > 
> > > > > On 05/06/2017 05:07, Peter Xu wrote:
> > > > > > I don't sure whether it'll be a good interface for IOTLB. AFAIU at
> > > > > > least for VT-d, the IOMMU translation is page aligned which is 
> > > > > > defined
> > > > > > by spec, so it makes sense that (again at least for VT-d) here we'd
> > > > > > better just use page_mask/addr_mask.
> > > > > > 
> > > > > > That's also how I know about IOMMU in general - I assume it do the
> > > > > > translations always with page masks (never arbitary length), though
> > > > > > page size can differ from platfrom to platform, that's why here the
> > > > > > IOTLB interface used addr_mask, then it works for all platforms. I
> > > > > > don't know whether I'm 100% correct here though.
> > > > > > 
> > > > > > Maybe David/Paolo/... would comment as well?
> > > > > 
> > > > > I would ask David.  There are PowerPC MMUs that allow fast lookup of
> > > > > arbitrarily-sized windows (not necessarily power of two),
> > > > 
> > > > Uh.. I'm not sure what you mean here.  You might be thinking of the
> > > > BATs which really old (32-bit) PowerPC MMUs had - those allow
> > > > arbitrary large block translations, but they do have to be a power of
> > > > two.
> > > > 
> > > > > so maybe the
> > > > > IOMMUs can do the same.
> > > > 
> > > > The only Power IOMMU I know about uses a fixed, power-of-two page size
> > > > per DMA window.
> > > 
> > > If so, I would still be inclined to keep using masks for QEMU IOTLB.
> > > Then, my first two patches should still stand.
> > > 
> > > I am just afraid that not using masks will diverge the emulation from
> > > real hardware and brings trouble one day.
> > > 
> > > For vhost IOTLB interface, it does not need to be strictly aligned to
> > > QEMU IOMMU IOTLB definition, and that's how it's working now (current
> > > vhost iotlb allows arbitary length, and I think it's good). So imho we
> > > don't really need to worry about the performance - after all, we can
> > > do everything customized for vhost, just like what patch 3 did (yeah,
> > > it can be better...).
> > > 
> > > Thanks,
> > 
> > Pre-faults is also something that does not happen on real hardware.
> > And it's about security so a bigger issue.
> > 
> > If I had to choose between that and using non-power-of-2 in
> > the API, I'd go for non-power-of-2. Let backends that can only
> > support power of 2 split it up to multiple transactions.
> 
> The problem is that when I was fixing the problem that vhost had with
> PT (a764040, "exec: abstract address_space_do_translate()"), I did
> broke the IOTLB translation a bit (it was using page masks). IMHO we
> need to fix it first for correctness (patch 1/2).
> 
> For patch 3, if we can have Jason's patch to allow dynamic
> iommu_platform switching, that'll be the best, then I can rewrite
> patch 3 with the switching logic rather than caching anything. But
> IMHO that can be separated from patch 1/2 if you like.
> 
> Or do you have better suggestion on how should we fix it?
> 
> Thanks,

Can we drop masks completely and replace with length? I think we
should do that instead of trying to fix masks.

> -- 
> Peter Xu

[Qemu-devel] [PATCH RFC v19 08/13] target-avr: instruction decoder generator

2017-06-08 Thread Michael Rolnik

Signed-off-by: Michael Rolnik 
Message-Id: <1471522070-77598-9-git-send-email-mrol...@gmail.com>
Signed-off-by: Richard Henderson 
---
 target/avr/cpugen/CMakeLists.txt   |  38 +++
 target/avr/cpugen/README.md|  17 ++
 target/avr/cpugen/cpu/avr.yaml | 213 ++
 target/avr/cpugen/src/CMakeLists.txt   |  62 
 target/avr/cpugen/src/cpugen.cpp   | 457 +
 target/avr/cpugen/src/utils.cpp|  26 ++
 target/avr/cpugen/src/utils.h  |  78 +
 target/avr/cpugen/xsl/decode.c.xsl | 103 +++
 target/avr/cpugen/xsl/translate-inst.h.xsl | 118 
 target/avr/cpugen/xsl/utils.xsl| 108 +++
 10 files changed, 1220 insertions(+)
 create mode 100644 target/avr/cpugen/CMakeLists.txt
 create mode 100644 target/avr/cpugen/README.md
 create mode 100644 target/avr/cpugen/cpu/avr.yaml
 create mode 100644 target/avr/cpugen/src/CMakeLists.txt
 create mode 100644 target/avr/cpugen/src/cpugen.cpp
 create mode 100644 target/avr/cpugen/src/utils.cpp
 create mode 100644 target/avr/cpugen/src/utils.h
 create mode 100644 target/avr/cpugen/xsl/decode.c.xsl
 create mode 100644 target/avr/cpugen/xsl/translate-inst.h.xsl
 create mode 100644 target/avr/cpugen/xsl/utils.xsl

diff --git a/target/avr/cpugen/CMakeLists.txt b/target/avr/cpugen/CMakeLists.txt
new file mode 100644
index 00..ded391c9c2
--- /dev/null
+++ b/target/avr/cpugen/CMakeLists.txt
@@ -0,0 +1,38 @@
+cmake_minimum_required(VERSION 2.8)
+
+project(cpugen)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -ggdb -g3")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
+
+set(Boost_USE_STATIC_LIBS   ON)
+find_package(
+Boost 1.60.0
+REQUIRED
+COMPONENTS
+system
+regex)
+#set(BUILD_SHARED_LIBS   OFF)
+#set(BUILD_STATIC_LIBS   ON)
+add_subdirectory(tinyxml2)
+add_subdirectory(yaml-cpp)
+
+include_directories(
+${CMAKE_CURRENT_SOURCE_DIR}
+${CMAKE_CURRENT_SOURCE_DIR}/..
+${CMAKE_CURRENT_SOURCE_DIR}/../yaml-cpp/include
+${Boost_INCLUDE_DIRS}
+)
+
+add_executable(
+cpugen
+src/cpugen.cpp
+src/utils.cpp
+)
+
+target_link_libraries(
+cpugen
+yaml-cpp
+tinyxml2
+${Boost_LIBRARIES}
+)
diff --git a/target/avr/cpugen/README.md b/target/avr/cpugen/README.md
new file mode 100644
index 00..f0caa8b135
--- /dev/null
+++ b/target/avr/cpugen/README.md
@@ -0,0 +1,17 @@
+# CPUGEN
+## How to build
+within ```cpugen``` directory do
+```
+git clone https://github.com/leethomason/tinyxml2
+git clone https://github.com/jbeder/yaml-cpp
+mkdir build
+cd build
+cmake ..
+make
+```
+## How to use
+```
+cpugen ../cpu/avr.yaml
+xsltproc ../xsl/decode.c.xsl output.xml > ../../decode.c
+xsltproc ../xsl/translate-inst.h.xsl output.xml > ../../translate-inst.h
+```
diff --git a/target/avr/cpugen/cpu/avr.yaml b/target/avr/cpugen/cpu/avr.yaml
new file mode 100644
index 00..c36b628cf1
--- /dev/null
+++ b/target/avr/cpugen/cpu/avr.yaml
@@ -0,0 +1,213 @@
+cpu:
+name: avr
+instructions:
+- ADC:
+opcode: 0001 11 hRr[1] Rd[5] lRr[4]
+- ADD:
+opcode:  11 hRr[1] Rd[5] lRr[4]
+- ADIW:
+opcode: 1001 0110 hImm[2] Rd[2] lImm[4]
+- AND:
+opcode: 0010 00 hRr[1] Rd[5] lRr[4]
+- ANDI:
+opcode: 0111 hImm[4] Rd[4] lImm[4]
+- ASR:
+opcode: 1001 010 Rd[5] 0101
+- BCLR:
+opcode: 1001 0100 1 Bit[3] 1000
+- BLD:
+opcode:  100 Rd[5] 0 Bit[3]
+- BRBC:
+opcode:  01 Imm[7] Bit[3]
+- BRBS:
+opcode:  00 Imm[7] Bit[3]
+- BREAK:
+opcode: 1001 0101 1001 1000
+- BSET:
+opcode: 1001 0100 0 Bit[3] 1000
+- BST:
+opcode:  101 Rd[5] 0 Bit[3]
+- CALL:
+opcode: 1001 010 hImm[5] 111 lImm[17]
+- CBI:
+opcode: 1001 1000 Imm[5] Bit[3]
+- COM:
+opcode: 1001 010 Rd[5] 
+- CP:
+opcode: 0001 01 hRr[1] Rd[5] lRr[4]
+- CPC:
+opcode:  01 hRr[1] Rd[5] lRr[4]
+- CPI:
+opcode: 0011 hImm[4] Rd[4] lImm[4]
+- CPSE:
+opcode: 0001 00 hRr[1] Rd[5] lRr[4]
+- DEC:
+opcode: 1001 010 Rd[5] 1010
+- DES:
+opcode: 1001 0100 Imm[4] 1011
+- EICALL:
+opcode: 1001 0101 0001 1001
+- EIJMP:
+opcode: 1001 0100 0001 1001
+- ELPM1:
+opcode: 1001 0101 1101 1000
+- ELPM2:
+opcode: 1001 000 Rd[5] 0110
+- ELPMX:
+opcode: 1001 000 Rd[5] 0111
+- EOR:
+opcode: 0010 01 hRr[1] Rd[5] lRr[4]
+- FMUL:
+opcode:  0011 0 Rd[3] 1 Rr[3]
+- FMULS:
+opcode:  0011 1 Rd[3] 0 Rr[3]
+- FMULSU:
+

[Qemu-devel] [PATCH RFC v19 09/13] target-avr: adding instruction decoder

2017-06-08 Thread Michael Rolnik

Signed-off-by: Michael Rolnik 
Message-Id: <1471522070-77598-10-git-send-email-mrol...@gmail.com>
Signed-off-by: Richard Henderson 
---
 target/avr/Makefile.objs |   1 +
 target/avr/decode.c  | 691 +++
 target/avr/translate.c   |   2 +
 3 files changed, 694 insertions(+)
 create mode 100644 target/avr/decode.c

diff --git a/target/avr/Makefile.objs b/target/avr/Makefile.objs
index a448792ff3..9848b1cb4c 100644
--- a/target/avr/Makefile.objs
+++ b/target/avr/Makefile.objs
@@ -21,4 +21,5 @@
 obj-y += translate.o cpu.o helper.o
 obj-y += gdbstub.o
 obj-y += translate-inst.o
+obj-y += decode.o
 obj-$(CONFIG_SOFTMMU) += machine.o
diff --git a/target/avr/decode.c b/target/avr/decode.c
new file mode 100644
index 00..2d2e54e448
--- /dev/null
+++ b/target/avr/decode.c
@@ -0,0 +1,691 @@
+/*
+ * QEMU AVR CPU
+ *
+ * Copyright (c) 2016 Michael Rolnik
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * 
+ */
+
+#include 
+#include "translate.h"
+
+void avr_decode(uint32_t pc, uint32_t *l, uint32_t c, translate_function_t *t)
+{
+uint32_t opc = extract32(c, 0, 16);
+switch (opc & 0xd000) {
+case 0x: {
+switch (opc & 0x2c00) {
+case 0x: {
+switch (opc & 0x0300) {
+case 0x: {
+*l = 16;
+*t = _translate_NOP;
+break;
+}
+case 0x0100: {
+*l = 16;
+*t = _translate_MOVW;
+break;
+}
+case 0x0200: {
+*l = 16;
+*t = _translate_MULS;
+break;
+}
+case 0x0300: {
+switch (opc & 0x0088) {
+case 0x: {
+*l = 16;
+*t = _translate_MULSU;
+break;
+}
+case 0x0008: {
+*l = 16;
+*t = _translate_FMUL;
+break;
+}
+case 0x0080: {
+*l = 16;
+*t = _translate_FMULS;
+break;
+}
+case 0x0088: {
+*l = 16;
+*t = _translate_FMULSU;
+break;
+}
+}
+break;
+}
+}
+break;
+}
+case 0x0400: {
+*l = 16;
+*t = _translate_CPC;
+break;
+}
+case 0x0800: {
+*l = 16;
+*t = _translate_SBC;
+break;
+}
+case 0x0c00: {
+*l = 16;
+*t = _translate_ADD;
+break;
+}
+case 0x2000: {
+*l = 16;
+*t = _translate_AND;
+break;
+}
+case 0x2400: {
+*l = 16;
+*t = _translate_EOR;
+break;
+}
+case 0x2800: {
+*l = 16;
+*t = _translate_OR;
+break;
+}
+case 0x2c00: {
+*l = 16;
+*t = _translate_MOV;
+break;
+}
+}
+break;
+}
+case 0x1000: {
+switch (opc & 0x2000) {

[Qemu-devel] [PATCH RFC v19 06/13] target-avr: adding helpers for IN, OUT, SLEEP, WBR & unsupported instructions

2017-06-08 Thread Michael Rolnik

Signed-off-by: Michael Rolnik 
Message-Id: <1471522070-77598-7-git-send-email-mrol...@gmail.com>
Signed-off-by: Richard Henderson 
---
 target/avr/cpu.h   |  10 +++
 target/avr/helper.c| 216 -
 target/avr/helper.h|   7 ++
 target/avr/translate.c |   8 ++
 4 files changed, 239 insertions(+), 2 deletions(-)

diff --git a/target/avr/cpu.h b/target/avr/cpu.h
index 54dc58c0df..92143244f7 100644
--- a/target/avr/cpu.h
+++ b/target/avr/cpu.h
@@ -139,6 +139,7 @@ struct CPUAVRState {
 uint32_t sp; /* 16 bits */
 
 uint64_t intsrc; /* interrupt sources */
+bool fullacc;/* CPU/MEM if true MEM only otherwise */
 
 uint32_t features;
 
@@ -181,6 +182,10 @@ int avr_cpu_handle_mmu_fault(CPUState *cpu, vaddr address, 
int rw,
 int avr_cpu_memory_rw_debug(CPUState *cs, vaddr address, uint8_t *buf,
 int len, bool is_write);
 
+enum {
+TB_FLAGS_FULL_ACCESS = 1,
+};
+
 static inline void cpu_get_tb_cpu_state(CPUAVRState *env, target_ulong *pc,
 target_ulong *cs_base, uint32_t *pflags)
 {
@@ -188,6 +193,11 @@ static inline void cpu_get_tb_cpu_state(CPUAVRState *env, 
target_ulong *pc,
 
 *pc = env->pc_w * 2;
 *cs_base = 0;
+
+if (env->fullacc) {
+flags |= TB_FLAGS_FULL_ACCESS;
+}
+
 *pflags = flags;
 }
 
diff --git a/target/avr/helper.c b/target/avr/helper.c
index 61255fdff3..bc53053a57 100644
--- a/target/avr/helper.c
+++ b/target/avr/helper.c
@@ -28,6 +28,7 @@
 #include "exec/cpu_ldst.h"
 #include "qemu/host-utils.h"
 #include "exec/helper-proto.h"
+#include "exec/ioport.h"
 
 bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
@@ -79,11 +80,11 @@ void avr_cpu_do_interrupt(CPUState *cs)
 
 if (avr_feature(env, AVR_FEATURE_3_BYTE_PC)) {
 cpu_stb_data(env, env->sp--, (ret & 0xff));
-cpu_stb_data(env, env->sp--, (ret & 0x00ff00) >>  8);
+cpu_stb_data(env, env->sp--, (ret & 0x00ff00) >> 8);
 cpu_stb_data(env, env->sp--, (ret & 0xff) >> 16);
 } else if (avr_feature(env, AVR_FEATURE_2_BYTE_PC)) {
 cpu_stb_data(env, env->sp--, (ret & 0xff));
-cpu_stb_data(env, env->sp--, (ret & 0x00ff00) >>  8);
+cpu_stb_data(env, env->sp--, (ret & 0x00ff00) >> 8);
 } else {
 cpu_stb_data(env, env->sp--, (ret & 0xff));
 }
@@ -126,7 +127,19 @@ void tlb_fill(CPUState *cs, target_ulong vaddr, 
MMUAccessType access_type,
 if (mmu_idx == MMU_CODE_IDX) {
 paddr = PHYS_BASE_CODE + vaddr - VIRT_BASE_CODE;
 prot = PAGE_READ | PAGE_EXEC;
+} else if (vaddr - VIRT_BASE_REGS < AVR_REGS) {
+/*
+ * this is a write into CPU registers, exit and rebuilt this TB
+ * to use full write
+ */
+AVRCPU *cpu = AVR_CPU(cs);
+CPUAVRState *env = >env;
+env->fullacc = 1;
+cpu_loop_exit_restore(cs, retaddr);
 } else {
+/*
+ * this is a write into memory. nothing special
+ */
 paddr = PHYS_BASE_DATA + vaddr - VIRT_BASE_DATA;
 prot = PAGE_READ | PAGE_WRITE;
 }
@@ -134,6 +147,30 @@ void tlb_fill(CPUState *cs, target_ulong vaddr, 
MMUAccessType access_type,
 tlb_set_page_with_attrs(cs, vaddr, paddr, attrs, prot, mmu_idx, page_size);
 }
 
+void helper_sleep(CPUAVRState *env)
+{
+CPUState *cs = CPU(avr_env_get_cpu(env));
+
+cs->exception_index = EXCP_HLT;
+cpu_loop_exit(cs);
+}
+
+void helper_unsupported(CPUAVRState *env)
+{
+CPUState *cs = CPU(avr_env_get_cpu(env));
+
+/*
+ *  I count not find what happens on the real platform, so
+ *  it's EXCP_DEBUG for meanwhile
+ */
+cs->exception_index = EXCP_DEBUG;
+if (qemu_loglevel_mask(LOG_UNIMP)) {
+qemu_log("UNSUPPORTED\n");
+cpu_dump_state(cs, qemu_logfile, fprintf, 0);
+}
+cpu_loop_exit(cs);
+}
+
 void helper_debug(CPUAVRState *env)
 {
 CPUState *cs = CPU(avr_env_get_cpu(env));
@@ -141,3 +178,178 @@ void helper_debug(CPUAVRState *env)
 cs->exception_index = EXCP_DEBUG;
 cpu_loop_exit(cs);
 }
+
+void helper_wdr(CPUAVRState *env)
+{
+CPUState *cs = CPU(avr_env_get_cpu(env));
+
+/* WD is not implemented yet, placeholder */
+cs->exception_index = EXCP_DEBUG;
+cpu_loop_exit(cs);
+}
+
+/*
+ * This function implements IN instruction
+ *
+ * It does the following
+ * a.  if an IO register belongs to CPU, its value is read and returned
+ * b.  otherwise io address is translated to mem address and physical memory
+ * is read.
+ * c.  it caches the value for sake of SBI, SBIC, SBIS & CBI implementation
+ *
+ */
+target_ulong helper_inb(CPUAVRState *env, uint32_t port)
+{
+target_ulong data = 0;
+
+switch (port) {
+case 0x38: /* RAMPD */
+data = 0xff & (env->rampD >> 16);
+break;
+case 0x39: /* RAMPX */
+data = 0xff & (env->rampX >> 16);
+break;
+case

[Qemu-devel] [PATCH RFC v19 05/13] target-avr: adding AVR interrupt handling

2017-06-08 Thread Michael Rolnik

Signed-off-by: Michael Rolnik 
Message-Id: <1471522070-77598-6-git-send-email-mrol...@gmail.com>
Signed-off-by: Richard Henderson 
---
 target/avr/helper.c | 55 +
 1 file changed, 55 insertions(+)

diff --git a/target/avr/helper.c b/target/avr/helper.c
index c1871939b3..61255fdff3 100644
--- a/target/avr/helper.c
+++ b/target/avr/helper.c
@@ -32,11 +32,66 @@
 bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
 bool ret = false;
+CPUClass *cc = CPU_GET_CLASS(cs);
+AVRCPU *cpu = AVR_CPU(cs);
+CPUAVRState *env = >env;
+
+if (interrupt_request & CPU_INTERRUPT_RESET) {
+if (cpu_interrupts_enabled(env)) {
+cs->exception_index = EXCP_RESET;
+cc->do_interrupt(cs);
+
+cs->interrupt_request &= ~CPU_INTERRUPT_RESET;
+
+ret = true;
+}
+}
+if (interrupt_request & CPU_INTERRUPT_HARD) {
+if (cpu_interrupts_enabled(env) && env->intsrc != 0) {
+int index = ctz32(env->intsrc);
+cs->exception_index = EXCP_INT(index);
+cc->do_interrupt(cs);
+
+env->intsrc &= env->intsrc - 1; /* clear the interrupt */
+cs->interrupt_request &= ~CPU_INTERRUPT_HARD;
+
+ret = true;
+}
+}
 return ret;
 }
 
 void avr_cpu_do_interrupt(CPUState *cs)
 {
+AVRCPU *cpu = AVR_CPU(cs);
+CPUAVRState *env = >env;
+
+uint32_t ret = env->pc_w;
+int vector = 0;
+int size = avr_feature(env, AVR_FEATURE_JMP_CALL) ? 2 : 1;
+int base = 0; /* TODO: where to get it */
+
+if (cs->exception_index == EXCP_RESET) {
+vector = 0;
+} else if (env->intsrc != 0) {
+vector = ctz32(env->intsrc) + 1;
+}
+
+if (avr_feature(env, AVR_FEATURE_3_BYTE_PC)) {
+cpu_stb_data(env, env->sp--, (ret & 0xff));
+cpu_stb_data(env, env->sp--, (ret & 0x00ff00) >>  8);
+cpu_stb_data(env, env->sp--, (ret & 0xff) >> 16);
+} else if (avr_feature(env, AVR_FEATURE_2_BYTE_PC)) {
+cpu_stb_data(env, env->sp--, (ret & 0xff));
+cpu_stb_data(env, env->sp--, (ret & 0x00ff00) >>  8);
+} else {
+cpu_stb_data(env, env->sp--, (ret & 0xff));
+}
+
+env->pc_w = base + vector * size;
+env->sregI = 0; /* clear Global Interrupt Flag */
+
+cs->exception_index = -1;
 }
 
 int avr_cpu_memory_rw_debug(CPUState *cs, vaddr addr, uint8_t *buf,
-- 
2.11.0 (Apple Git-81)

[Qemu-devel] [PATCH RFC v19 01/13] target-avr: AVR cores support is added.

2017-06-08 Thread Michael Rolnik

1. basic CPU structure
2. registers
3. no instructions
4. saving sreg, rampD, rampX, rampY, rampD, eind in HW representation

Signed-off-by: Michael Rolnik 
Message-Id: <1471522070-77598-2-git-send-email-mrol...@gmail.com>
Signed-off-by: Richard Henderson 
---
 MAINTAINERS |   5 +
 arch_init.c |   2 +
 configure   |   5 +
 default-configs/avr-softmmu.mak |  21 +++
 include/disas/bfd.h |   6 +
 include/sysemu/arch_init.h  |   1 +
 target/avr/Makefile.objs|  23 
 target/avr/cpu-qom.h|  84 
 target/avr/cpu.c| 288 
 target/avr/cpu.h| 179 +
 target/avr/gdbstub.c|  85 
 target/avr/helper.c |  88 
 target/avr/helper.h |  21 +++
 target/avr/machine.c| 120 +
 target/avr/translate.c  | 256 +++
 target/avr/translate.h  | 114 
 16 files changed, 1298 insertions(+)
 create mode 100644 default-configs/avr-softmmu.mak
 create mode 100644 target/avr/Makefile.objs
 create mode 100644 target/avr/cpu-qom.h
 create mode 100644 target/avr/cpu.c
 create mode 100644 target/avr/cpu.h
 create mode 100644 target/avr/gdbstub.c
 create mode 100644 target/avr/helper.c
 create mode 100644 target/avr/helper.h
 create mode 100644 target/avr/machine.c
 create mode 100644 target/avr/translate.c
 create mode 100644 target/avr/translate.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 120788d8fb..dc37be323e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -125,6 +125,11 @@ F: disas/arm.c
 F: disas/arm-a64.cc
 F: disas/libvixl/
 
+AVR
+M: Michael Rolnik 
+S: Maintained
+F: target-avr/
+
 CRIS
 M: Edgar E. Iglesias 
 S: Maintained
diff --git a/arch_init.c b/arch_init.c
index a0b8ed6167..4a9e74d62e 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -81,6 +81,8 @@ int graphic_depth = 32;
 #define QEMU_ARCH QEMU_ARCH_UNICORE32
 #elif defined(TARGET_TRICORE)
 #define QEMU_ARCH QEMU_ARCH_TRICORE
+#elif defined(TARGET_AVR)
+#define QEMU_ARCH QEMU_ARCH_AVR
 #endif
 
 const uint32_t arch_type = QEMU_ARCH;
diff --git a/configure b/configure
index 13e040d28c..d808340ece 100755
--- a/configure
+++ b/configure
@@ -6048,6 +6048,8 @@ case "$target_name" in
 mttcg="yes"
 gdb_xml_files="aarch64-core.xml aarch64-fpu.xml arm-core.xml arm-vfp.xml 
arm-vfp3.xml arm-neon.xml"
   ;;
+  avr)
+  ;;
   cris)
   ;;
   hppa)
@@ -6264,6 +6266,9 @@ for i in $ARCH $TARGET_BASE_ARCH ; do
   disas_config "ARM_A64"
 fi
   ;;
+  avr)
+disas_config "AVR"
+  ;;
   cris)
 disas_config "CRIS"
   ;;
diff --git a/default-configs/avr-softmmu.mak b/default-configs/avr-softmmu.mak
new file mode 100644
index 00..003465dd1c
--- /dev/null
+++ b/default-configs/avr-softmmu.mak
@@ -0,0 +1,21 @@
+#
+#  QEMU AVR CPU
+#
+#  Copyright (c) 2016 Michael Rolnik
+#
+#  This library is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU Lesser General Public
+#  License as published by the Free Software Foundation; either
+#  version 2.1 of the License, or (at your option) any later version.
+#
+#  This library is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  Lesser General Public License for more details.
+#
+#  You should have received a copy of the GNU Lesser General Public
+#  License along with this library; if not, see
+#  
+#
+
+# Default configuration for avr-softmmu
diff --git a/include/disas/bfd.h b/include/disas/bfd.h
index b01e002b4c..2b5841c696 100644
--- a/include/disas/bfd.h
+++ b/include/disas/bfd.h
@@ -213,6 +213,12 @@ enum bfd_architecture
 #define bfd_mach_m32r  0  /* backwards compatibility */
   bfd_arch_mn10200,/* Matsushita MN10200 */
   bfd_arch_mn10300,/* Matsushita MN10300 */
+  bfd_arch_avr,   /* Atmel AVR microcontrollers.  */
+#define bfd_mach_avr1  1
+#define bfd_mach_avr2  2
+#define bfd_mach_avr3  3
+#define bfd_mach_avr4  4
+#define bfd_mach_avr5  5
   bfd_arch_cris,   /* Axis CRIS */
 #define bfd_mach_cris_v0_v10   255
 #define bfd_mach_cris_v32  32
diff --git a/include/sysemu/arch_init.h b/include/sysemu/arch_init.h
index 8751c468ed..1bf565f146 100644
--- a/include/sysemu/arch_init.h
+++ b/include/sysemu/arch_init.h
@@ -24,6 +24,7 @@ enum {
 QEMU_ARCH_MOXIE = (1 << 15),
 QEMU_ARCH_TRICORE = (1 << 16),
 QEMU_ARCH_NIOS2 = (1 << 17),
+QEMU_ARCH_AVR = (1 << 18),
 };
 
 extern const uint32_t arch_type;
diff --git a/target/avr/Makefile.objs b/target/avr/Makefile.objs
new file mode 100644
index

1 2 3 4 >

1 - 100 of 343 matches

Mail list logo