date:20170425

Re: [Qemu-devel] [PATCH v8] Allow setting NUMA distance for different NUMA nodes

2017-04-25 Thread Andrew Jones

On Tue, Apr 25, 2017 at 09:52:33AM +0800, He Chen wrote:
> This patch is going to add SLIT table support in QEMU, and provides
> additional option `dist` for command `-numa` to allow user set vNUMA
> distance by QEMU command.
> 
> With this patch, when a user wants to create a guest that contains
> several vNUMA nodes and also wants to set distance among those nodes,
> the QEMU command would like:
> 
> ```
> -numa node,nodeid=0,cpus=0 \
> -numa node,nodeid=1,cpus=1 \
> -numa node,nodeid=2,cpus=2 \
> -numa node,nodeid=3,cpus=3 \
> -numa dist,src=0,dst=1,val=21 \
> -numa dist,src=0,dst=2,val=31 \
> -numa dist,src=0,dst=3,val=41 \
> -numa dist,src=1,dst=2,val=21 \
> -numa dist,src=1,dst=3,val=31 \
> -numa dist,src=2,dst=3,val=21 \
> ```
> 
> Signed-off-by: He Chen 
> 
> ---
> Changes since v7:
> * Remove unnecessary node present check.
> * Minor improvement on prompt message.
> 
> Changes since v6:
> * Split validate_numa_distance into 2 separate functions.
> * Add comments before validate and complete numa distance functions.
> 
> Changes since v5:
> * Made the generation of the SLIT dependent on `have_numa_distance`.
> * Doc refinement.
> ---
>  hw/acpi/aml-build.c |  26 ++
>  hw/i386/acpi-build.c|   4 ++
>  include/hw/acpi/aml-build.h |   1 +
>  include/sysemu/numa.h   |   2 +
>  include/sysemu/sysemu.h |   4 ++
>  numa.c  | 124 
> 
>  qapi-schema.json|  30 ++-
>  qemu-options.hx |  16 +-
>  8 files changed, 204 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> index c6f2032..be496c8 100644
> --- a/hw/acpi/aml-build.c
> +++ b/hw/acpi/aml-build.c
> @@ -24,6 +24,7 @@
>  #include "hw/acpi/aml-build.h"
>  #include "qemu/bswap.h"
>  #include "qemu/bitops.h"
> +#include "sysemu/numa.h"
>  
>  static GArray *build_alloc_array(void)
>  {
> @@ -1609,3 +1610,28 @@ void build_srat_memory(AcpiSratMemoryAffinity 
> *numamem, uint64_t base,
>  numamem->base_addr = cpu_to_le64(base);
>  numamem->range_length = cpu_to_le64(len);
>  }
> +
> +/*
> + * ACPI spec 5.2.17 System Locality Distance Information Table
> + * (Revision 2.0 or later)
> + */
> +void build_slit(GArray *table_data, BIOSLinker *linker)
> +{
> +int slit_start, i, j;
> +slit_start = table_data->len;
> +
> +acpi_data_push(table_data, sizeof(AcpiTableHeader));
> +
> +build_append_int_noprefix(table_data, nb_numa_nodes, 8);
> +for (i = 0; i < nb_numa_nodes; i++) {
> +for (j = 0; j < nb_numa_nodes; j++) {
> +assert(numa_info[i].distance[j]);
> +build_append_int_noprefix(table_data, numa_info[i].distance[j], 
> 1);
> +}
> +}
> +
> +build_header(linker, table_data,
> + (void *)(table_data->data + slit_start),
> + "SLIT",
> + table_data->len - slit_start, 1, NULL, NULL);
> +}
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 2073108..2458ebc 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -2678,6 +2678,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState 
> *machine)
>  if (pcms->numa_nodes) {
>  acpi_add_table(table_offsets, tables_blob);
>  build_srat(tables_blob, tables->linker, machine);
> +if (have_numa_distance) {
> +acpi_add_table(table_offsets, tables_blob);
> +build_slit(tables_blob, tables->linker);
> +}
>  }
>  if (acpi_get_mcfg()) {
>  acpi_add_table(table_offsets, tables_blob);
> diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
> index 00c21f1..329a0d0 100644
> --- a/include/hw/acpi/aml-build.h
> +++ b/include/hw/acpi/aml-build.h
> @@ -389,4 +389,5 @@ GCC_FMT_ATTR(2, 3);
>  void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
> uint64_t len, int node, MemoryAffinityFlags flags);
>  
> +void build_slit(GArray *table_data, BIOSLinker *linker);
>  #endif
> diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
> index 8f09dcf..0ea1bc0 100644
> --- a/include/sysemu/numa.h
> +++ b/include/sysemu/numa.h
> @@ -8,6 +8,7 @@
>  #include "hw/boards.h"
>  
>  extern int nb_numa_nodes;   /* Number of NUMA nodes */
> +extern bool have_numa_distance;
>  
>  struct numa_addr_range {
>  ram_addr_t mem_start;
> @@ -21,6 +22,7 @@ typedef struct node_info {
>  struct HostMemoryBackend *node_memdev;
>  bool present;
>  QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */
> +uint8_t distance[MAX_NODES];
>  } NodeInfo;
>  
>  extern NodeInfo numa_info[MAX_NODES];
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index 576c7ce..6999545 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -169,6 +169,10 @@ extern int mem_prealloc;
>  
>  #define MAX_NODES 128
>  #define NUMA_NODE_UNASSIGNED

Re: [Qemu-devel] [PATCH 2/2] migration: to_dst_file at that point is NULL

2017-04-25 Thread Laurent Vivier

On 25/04/2017 12:17, Juan Quintela wrote:
> We have just arrived as:
> 
> migration.c: qemu_migrate()
>   
>   s = migrate_init() <- puts it to NULL
>   
>   {tcp,unix}_start_outgoing_migration ->
>  socket_outgoing_migration
> migration_channel_connect()
>  sets to_dst_file
> 
> if tls is enabled, we do another round through
> migrate_channel_tls_connect(), but we only set it up if there is no
> error.  So we don't need the assignation.  I am removing it to remove
> in the follwing patches the knowledge about MigrationState in that two
> files.
> 
> Signed-off-by: Juan Quintela 
> ---
>  migration/socket.c | 1 -
>  migration/tls.c| 1 -
>  2 files changed, 2 deletions(-)
> 
> diff --git a/migration/socket.c b/migration/socket.c
> index 13966f1..dc88812 100644
> --- a/migration/socket.c
> +++ b/migration/socket.c
> @@ -79,7 +79,6 @@ static void socket_outgoing_migration(QIOTask *task,
>  
>  if (qio_task_propagate_error(task, )) {
>  trace_migration_socket_outgoing_error(error_get_pretty(err));
> -data->s->to_dst_file = NULL;
>  migrate_fd_error(data->s, err);
>  error_free(err);
>  } else {
> diff --git a/migration/tls.c b/migration/tls.c
> index 45bec44..a33ecb7 100644
> --- a/migration/tls.c
> +++ b/migration/tls.c
> @@ -116,7 +116,6 @@ static void migration_tls_outgoing_handshake(QIOTask 
> *task,
>  
>  if (qio_task_propagate_error(task, )) {
>  trace_migration_tls_outgoing_handshake_error(error_get_pretty(err));
> -s->to_dst_file = NULL;
>  migrate_fd_error(s, err);
>  error_free(err);
>  } else {
> 

In migrate_fd_error(), we have "assert(s->to_dst_file == NULL);", so you
break the function with this change.

Laurent

[Qemu-devel] [PATCH 12/13] arm: add MPU support to M profile CPUs

2017-04-25 Thread Peter Maydell

From: Michael Davidsaver 

The M series MPU is almost the same as the already implemented R
profile MPU (v7 PMSA).  So all we need to implement here is the MPU
register interface in the system register space.

This implementation has the same restriction as the R profile MPU
that it doesn't permit regions to be sized down smaller than 1K.

We also do not yet implement support for MPU_CTRL.HFNMIENA; this
bit should if zero disable use of the MPU when running HardFault,
NMI or with FAULTMASK set to 1 (ie at an execution priority of
less than zero) -- if the MPU is enabled we don't treat these
cases any differently.

Signed-off-by: Michael Davidsaver 
[PMM: Keep all the bits in mpu_ctrl field, rather than
 using SCTLR bits for them; drop broken HFNMIENA support;
 various cleanup]
Signed-off-by: Peter Maydell 
---
 target/arm/cpu.h  |   6 +++
 hw/intc/armv7m_nvic.c | 104 ++
 target/arm/helper.c   |  25 +++-
 target/arm/machine.c  |   5 ++-
 4 files changed, 137 insertions(+), 3 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 0718955..bbdd064 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -418,6 +418,7 @@ typedef struct CPUARMState {
 uint32_t dfsr; /* Debug Fault Status Register */
 uint32_t mmfar; /* MemManage Fault Address */
 uint32_t bfar; /* BusFault Address */
+unsigned mpu_ctrl; /* MPU_CTRL (some bits kept in sctlr_el[1]) */
 int exception;
 } v7m;
 
@@ -1166,6 +1167,11 @@ FIELD(V7M_DFSR, DWTTRAP, 2, 1)
 FIELD(V7M_DFSR, VCATCH, 3, 1)
 FIELD(V7M_DFSR, EXTERNAL, 4, 1)
 
+/* v7M MPU_CTRL bits */
+FIELD(V7M_MPU_CTRL, ENABLE, 0, 1)
+FIELD(V7M_MPU_CTRL, HFNMIENA, 1, 1)
+FIELD(V7M_MPU_CTRL, PRIVDEFENA, 2, 1)
+
 /* If adding a feature bit which corresponds to a Linux ELF
  * HWCAP bit, remember to update the feature-bit-to-hwcap
  * mapping in linux-user/elfload.c:get_elf_hwcap().
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 32ffa0b..26a4b2d 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -19,6 +19,7 @@
 #include "hw/arm/arm.h"
 #include "hw/arm/armv7m_nvic.h"
 #include "target/arm/cpu.h"
+#include "exec/exec-all.h"
 #include "qemu/log.h"
 #include "trace.h"
 
@@ -528,6 +529,39 @@ static uint32_t nvic_readl(NVICState *s, uint32_t offset)
 case 0xd70: /* ISAR4.  */
 return 0x01310102;
 /* TODO: Implement debug registers.  */
+case 0xd90: /* MPU_TYPE */
+/* Unified MPU; if the MPU is not present this value is zero */
+return cpu->pmsav7_dregion << 8;
+break;
+case 0xd94: /* MPU_CTRL */
+return cpu->env.v7m.mpu_ctrl;
+case 0xd98: /* MPU_RNR */
+return cpu->env.cp15.c6_rgnr;
+case 0xd9c: /* MPU_RBAR */
+case 0xda4: /* MPU_RBAR_A1 */
+case 0xdac: /* MPU_RBAR_A2 */
+case 0xdb4: /* MPU_RBAR_A3 */
+{
+int region = cpu->env.cp15.c6_rgnr;
+
+if (region >= cpu->pmsav7_dregion) {
+return 0;
+}
+return (cpu->env.pmsav7.drbar[region] & 0x1f) | (region & 0xf);
+}
+case 0xda0: /* MPU_RASR */
+case 0xda8: /* MPU_RASR_A1 */
+case 0xdb0: /* MPU_RASR_A2 */
+case 0xdb8: /* MPU_RASR_A3 */
+{
+int region = cpu->env.cp15.c6_rgnr;
+
+if (region >= cpu->pmsav7_dregion) {
+return 0;
+}
+return ((cpu->env.pmsav7.dracr[region] & 0x) << 16) |
+(cpu->env.pmsav7.drsr[region] & 0x);
+}
 default:
 qemu_log_mask(LOG_GUEST_ERROR, "NVIC: Bad read offset 0x%x\n", offset);
 return 0;
@@ -627,6 +661,76 @@ static void nvic_writel(NVICState *s, uint32_t offset, 
uint32_t value)
 qemu_log_mask(LOG_UNIMP,
   "NVIC: Aux fault status registers unimplemented\n");
 break;
+case 0xd90: /* MPU_TYPE */
+return; /* RO */
+case 0xd94: /* MPU_CTRL */
+if ((value &
+ (R_V7M_MPU_CTRL_HFNMIENA_MASK | R_V7M_MPU_CTRL_ENABLE_MASK))
+== R_V7M_MPU_CTRL_HFNMIENA_MASK) {
+qemu_log_mask(LOG_GUEST_ERROR, "MPU_CTRL: HFNMIENA and !ENABLE is "
+  "UNPREDICTABLE\n");
+}
+cpu->env.v7m.mpu_ctrl = value & (R_V7M_MPU_CTRL_ENABLE_MASK |
+ R_V7M_MPU_CTRL_HFNMIENA_MASK |
+ R_V7M_MPU_CTRL_PRIVDEFENA_MASK);
+tlb_flush(CPU(cpu));
+break;
+case 0xd98: /* MPU_RNR */
+if (value >= cpu->pmsav7_dregion) {
+qemu_log_mask(LOG_GUEST_ERROR, "MPU region out of range %"
+  PRIu32 "/%" PRIu32 "\n",
+  value, cpu->pmsav7_dregion);
+} else {
+cpu->env.cp15.c6_rgnr = value;
+}
+break;
+case 0xd9c: /* MPU_RBAR */
+case 0xda4: /* MPU_RBAR_A1 */
+case 0xdac: /* MPU_RBAR_A2 */
+

[Qemu-devel] [PATCH 09/13] armv7m: Implement M profile default memory map

2017-04-25 Thread Peter Maydell

From: Michael Davidsaver 

Add support for the M profile default memory map which is used
if the MPU is not present or disabled.

The main differences in behaviour from implementing this
correctly are that we set the PAGE_EXEC attribute on
the right regions of memory, such that device regions
are not executable.

Signed-off-by: Michael Davidsaver 
[PMM: rephrased comment and commit message; don't mark
 the flash memory region as not-writable]
Signed-off-by: Peter Maydell 
---
 target/arm/helper.c | 35 ++-
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 9e1ed1c..51662ad 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8129,18 +8129,35 @@ static inline void 
get_phys_addr_pmsav7_default(CPUARMState *env,
 ARMMMUIdx mmu_idx,
 int32_t address, int *prot)
 {
-*prot = PAGE_READ | PAGE_WRITE;
-switch (address) {
-case 0xF000 ... 0x:
-if (regime_sctlr(env, mmu_idx) & SCTLR_V) { /* hivecs execing is ok */
+if (!arm_feature(env, ARM_FEATURE_M)) {
+*prot = PAGE_READ | PAGE_WRITE;
+switch (address) {
+case 0xF000 ... 0x:
+if (regime_sctlr(env, mmu_idx) & SCTLR_V) {
+/* hivecs execing is ok */
+*prot |= PAGE_EXEC;
+}
+break;
+case 0x ... 0x7FFF:
 *prot |= PAGE_EXEC;
+break;
+}
+} else {
+/* Default system address map for M profile cores.
+ * The architecture specifies which regions are execute-never;
+ * at the MPU level no other checks are defined.
+ */
+switch (address) {
+case 0x ... 0x1fff: /* ROM */
+case 0x2000 ... 0x3fff: /* SRAM */
+case 0x6000 ... 0x7fff: /* RAM */
+case 0x8000 ... 0x9fff: /* RAM */
+*prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
+break;
+default: /* Peripheral, 2x Device, and System */
+*prot = PAGE_READ | PAGE_WRITE;
 }
-break;
-case 0x ... 0x7FFF:
-*prot |= PAGE_EXEC;
-break;
 }
-
 }
 
 static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
-- 
2.7.4

[Qemu-devel] [PATCH 07/13] arm: Remove unnecessary check on cpu->pmsav7_dregion

2017-04-25 Thread Peter Maydell

Now that we enforce both:
 * pmsav7_dregion == 0 implies has_mpu == false
 * PMSA with has_mpu == false means SCTLR.M cannot be set
we can remove a check on pmsav7_dregion from get_phys_addr_pmsav7(),
because we can only reach this code path if the MPU is enabled
(and so region_translation_disabled() returned false).

Signed-off-by: Peter Maydell 
---
 target/arm/helper.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index f0f25c8..5c044d0 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8227,8 +8227,7 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, 
uint32_t address,
 }
 
 if (n == -1) { /* no hits */
-if (cpu->pmsav7_dregion &&
-(is_user || !(regime_sctlr(env, mmu_idx) & SCTLR_BR))) {
+if (is_user || !(regime_sctlr(env, mmu_idx) & SCTLR_BR)) {
 /* background fault */
 *fsr = 0;
 return true;
-- 
2.7.4

[Qemu-devel] [PATCH 04/13] arm: Clean up handling of no-MPU PMSA CPUs

2017-04-25 Thread Peter Maydell

ARM CPUs come in two flavours:
 * proper MMU ("VMSA")
 * only an MPU ("PMSA")
For PMSA, the MPU may be implemented, or not (in which case there
is default "always acts the same" behaviour, but it isn't guest
programmable).

QEMU is a bit confused about how we indicate this: we have an
ARM_FEATURE_MPU, but it's not clear whether this indicates
"PMSA, not VMSA" or "PMSA and MPU present" , and sometimes we
use it for one purpose and sometimes the other.

Currently trying to implement a PMSA-without-MPU core won't
work correctly because we turn off the ARM_FEATURE_MPU bit
and then a lot of things which should still exist get
turned off too.

As the first step in cleaning this up, rename the feature
bit to ARM_FEATURE_PMSA, which indicates a PMSA CPU (with
or without MPU).

Signed-off-by: Peter Maydell 
---
 target/arm/cpu.h |  2 +-
 target/arm/cpu.c | 12 ++--
 target/arm/helper.c  | 12 ++--
 target/arm/machine.c |  2 +-
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 253565b..0718955 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -1179,7 +1179,7 @@ enum arm_features {
 ARM_FEATURE_V6K,
 ARM_FEATURE_V7,
 ARM_FEATURE_THUMB2,
-ARM_FEATURE_MPU,/* Only has Memory Protection Unit, not full MMU.  */
+ARM_FEATURE_PMSA,   /* no MMU; may have Memory Protection Unit */
 ARM_FEATURE_VFP3,
 ARM_FEATURE_VFP_FP16,
 ARM_FEATURE_NEON,
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index b357aee..f17e279 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -586,7 +586,7 @@ static void arm_cpu_post_init(Object *obj)
  _abort);
 }
 
-if (arm_feature(>env, ARM_FEATURE_MPU)) {
+if (arm_feature(>env, ARM_FEATURE_PMSA)) {
 qdev_property_add_static(DEVICE(obj), _cpu_has_mpu_property,
  _abort);
 if (arm_feature(>env, ARM_FEATURE_V7)) {
@@ -682,7 +682,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error 
**errp)
 
 if (arm_feature(env, ARM_FEATURE_V7) &&
 !arm_feature(env, ARM_FEATURE_M) &&
-!arm_feature(env, ARM_FEATURE_MPU)) {
+!arm_feature(env, ARM_FEATURE_PMSA)) {
 /* v7VMSA drops support for the old ARMv5 tiny pages, so we
  * can use 4K pages.
  */
@@ -758,10 +758,10 @@ static void arm_cpu_realizefn(DeviceState *dev, Error 
**errp)
 }
 
 if (!cpu->has_mpu) {
-unset_feature(env, ARM_FEATURE_MPU);
+unset_feature(ARM_FEATURE_PMSA);
 }
 
-if (arm_feature(env, ARM_FEATURE_MPU) &&
+if (arm_feature(env, ARM_FEATURE_PMSA) &&
 arm_feature(env, ARM_FEATURE_V7)) {
 uint32_t nr = cpu->pmsav7_dregion;
 
@@ -861,7 +861,7 @@ static void arm946_initfn(Object *obj)
 
 cpu->dtb_compatible = "arm,arm946";
 set_feature(>env, ARM_FEATURE_V5);
-set_feature(>env, ARM_FEATURE_MPU);
+set_feature(>env, ARM_FEATURE_PMSA);
 set_feature(>env, ARM_FEATURE_DUMMY_C15_REGS);
 cpu->midr = 0x41059461;
 cpu->ctr = 0x0f004006;
@@ -1073,7 +1073,7 @@ static void cortex_r5_initfn(Object *obj)
 set_feature(>env, ARM_FEATURE_THUMB_DIV);
 set_feature(>env, ARM_FEATURE_ARM_DIV);
 set_feature(>env, ARM_FEATURE_V7MP);
-set_feature(>env, ARM_FEATURE_MPU);
+set_feature(>env, ARM_FEATURE_PMSA);
 cpu->midr = 0x411fc153; /* r1p3 */
 cpu->id_pfr0 = 0x0131;
 cpu->id_pfr1 = 0x001;
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 791332c..404bfdb 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -485,7 +485,7 @@ static void contextidr_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 {
 ARMCPU *cpu = arm_env_get_cpu(env);
 
-if (raw_read(env, ri) != value && !arm_feature(env, ARM_FEATURE_MPU)
+if (raw_read(env, ri) != value && !arm_feature(env, ARM_FEATURE_PMSA)
 && !extended_addresses_enabled(env)) {
 /* For VMSA (when not using the LPAE long descriptor page table
  * format) this register includes the ASID, so do a TLB flush.
@@ -4615,7 +4615,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 define_arm_cp_regs(cpu, v6k_cp_reginfo);
 }
 if (arm_feature(env, ARM_FEATURE_V7MP) &&
-!arm_feature(env, ARM_FEATURE_MPU)) {
+!arm_feature(env, ARM_FEATURE_PMSA)) {
 define_arm_cp_regs(cpu, v7mp_cp_reginfo);
 }
 if (arm_feature(env, ARM_FEATURE_V7)) {
@@ -4969,7 +4969,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 }
 }
 
-if (arm_feature(env, ARM_FEATURE_MPU)) {
+if (arm_feature(env, ARM_FEATURE_PMSA)) {
 if (arm_feature(env, ARM_FEATURE_V6)) {
 /* PMSAv6 not implemented */
 assert(arm_feature(env, ARM_FEATURE_V7));
@@ -5131,7 +5131,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
 define_arm_cp_regs(cpu, id_pre_v8_midr_cp_reginfo);
 }
 define_arm_cp_regs(cpu, id_cp_reginfo);
-

Re: [Qemu-devel] [PATCH] COLO-compare: Add compare_lock aviod comparison conflict

2017-04-25 Thread Zhang Chen




On 04/25/2017 08:07 PM, Hailiang Zhang wrote:

On 2017/4/25 19:57, Zhang Chen wrote:


On 04/20/2017 02:40 PM, Jason Wang wrote:


On 2017年04月20日 14:36, Zhang Chen wrote:


On 04/20/2017 02:20 PM, Hailiang Zhang wrote:

On 2017/4/20 12:32, Zhang Chen wrote:

When network traffic heavy, compare_pri_rs_finalize() and
compare_sec_rs_finalize() have a chance to confilct.
Both of them call colo_compare_connection() to compare packet,
But during compare_pri_rs_finalize() comparison, have secondary
packet come and call compare_sec_rs_finalize(), that packet will be
handle twice. If packet same, the pkt will be double free.

Interesting, if I'm right, this should not happen, because, all the
comparing works
are done in colo compare thread, so there is no chance to access the
connect_list
concurrently.  Besides, even both of the packets from primary and
secondary arrive
at the same time, it should only be handle once, we will handle it
with the later arrived one,
No ?

No, In my test often trigger this bug, you can use udp server and
client test it.

13517@1492648526.850246:colo_compare_main : packet same and release
packet
13517@1492648526.850304:colo_compare_main : packet same and release
packet
*** glibc detected ***
/home/zhangchen/qemu-colo-apr14/x86_64-softmmu/qemu-system-x86_64:
double free or corruption (out): 0x56a75210 ***
=== Backtrace: =
/lib64/libc.so.6(+0x76628)[0x753d6628]
/lib64/libc.so.6(cfree+0x6c)[0x753db5cc]


Thanks
Zhang Chen

I agree that you should check whether or not they are running in the
same thread.



I found they are not running in the same thread, and I have reviewed
relative code but don't find out
why we do same job to pri_chr_in and sec_chr_in then they running in
different thread.
Anyone can tell me the reason?

Log:

Breakpoint 5, compare_pri_chr_in (opaque=0x77fd1010,
buf=0x73eb0950 "", size=8)
  at net/colo-compare.c:591
591{
(gdb) info thread
Id   Target Id Frame
18   Thread 0x7fff70bff700 (LWP 27864) "qemu-system-x86"
0x756e4ae7 in sem_timedwait ()
 from /lib64/libpthread.so.0
9Thread 0x7fff6f1ff700 (LWP 27748) "qemu-system-x86"
0x756e4a00 in sem_wait ()
 from /lib64/libpthread.so.0
7Thread 0x7fff701ff700 (LWP 27746) "qemu-system-x86"
0x756e261c in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
5Thread 0x72dbf700 (LWP 27743) "qemu-system-x86"
0x754320a7 in ioctl ()
 from /lib64/libc.so.6
4Thread 0x735c0700 (LWP 27742) "qemu-system-x86"
0x756e5dbd in sendmsg ()
 from /lib64/libpthread.so.0
* 3Thread 0x73eb2700 (LWP 27741) "qemu-system-x86"
compare_pri_chr_in (opaque=0x77fd1010,
  buf=0x73eb0950 "", size=8) at net/colo-compare.c:591
2Thread 0x746b3700 (LWP 27729) "qemu-system-x86"
0x75436789 in syscall ()
 from /lib64/libc.so.6
1Thread 0x77fb7a80 (LWP 27725) "qemu-system-x86"
0x756e5294 in __lll_lock_wait ()
 from /lib64/libpthread.so.0
(gdb)  bt
#0  compare_pri_chr_in (opaque=0x77fd1010, buf=0x73eb0950 "",
size=8) at net/colo-compare.c:591
#1  0x55c60fba in qemu_chr_be_write_impl (s=0x5684a630,
buf=0x73eb0950 "", len=8)
  at chardev/char.c:284
#2  0x55c6102f in qemu_chr_be_write (s=0x5684a630,
buf=0x73eb0950 "", len=8)
  at chardev/char.c:296
#3  0x55c6a056 in tcp_chr_read (chan=0x5684aa30,
cond=G_IO_IN, opaque=0x5684a630)
  at chardev/char-socket.c:414
#4  0x55c83dbc in qio_channel_fd_source_dispatch
(source=0x568d8b80, callback=
  0x55c69ebf , user_data=0x5684a630) at
io/channel-watch.c:84
#5  0x760c460a in g_main_context_dispatch () from
/usr/lib64/libglib-2.0.so.0
#6  0x760c7e88 in ?? () from /usr/lib64/libglib-2.0.so.0
#7  0x760c835d in g_main_loop_run () from
/usr/lib64/libglib-2.0.so.0
#8  0x55b82e22 in colo_compare_thread (opaque=0x77fd1010) at
net/colo-compare.c:703
#9  0x756de7b6 in start_thread () from /lib64/libpthread.so.0
#10 0x75439d6d in clone () from /lib64/libc.so.6
#11 0x in ?? ()
(gdb) c
Continuing.
[Switching to Thread 0x77fb7a80 (LWP 27725)]

Breakpoint 6, compare_sec_chr_in (opaque=0x77fd1010,
buf=0x7fffc590 "", size=1088)
  at net/colo-compare.c:608
608{
(gdb) info thread
Id   Target Id Frame
18   Thread 0x7fff70bff700 (LWP 27864) "qemu-system-x86" (Exiting)
0x756de9b3 in start_thread
  () from /lib64/libpthread.so.0
9Thread 0x7fff6f1ff700 (LWP 27748) "qemu-system-x86"
0x756e4a00 in sem_wait ()
 from /lib64/libpthread.so.0
7Thread 0x7fff701ff700 (LWP 27746) "qemu-system-x86"
0x756e261c in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
5Thread 0x72dbf700 (LWP 27743) "qemu-system-x86"
0x754320a7 in ioctl ()
 from /lib64/libc.so.6
4

Re: [Qemu-devel] [PATCH v2 06/13] tcg: add goto_ptr opcode

2017-04-25 Thread Richard Henderson


On 04/25/2017 09:53 AM, Emilio G. Cota wrote:

+void tcg_gen_goto_ptr(TCGv_ptr ptr)
+{
+if (TCG_TARGET_HAS_goto_ptr) {
+tcg_gen_op1i(INDEX_op_goto_ptr, GET_TCGV_PTR(ptr));
+} else {
+tcg_gen_exit_tb(0);
+}
+}
+


I think this function should look more like

void tcg_gen_lookup_and_goto_ptr(TCGv target_addr)
{
if (TCG_TARGET_HAS_goto_ptr) {
TCGv_ptr ptr = tcg_temp_new_ptr();
gen_helper_lookup_tb_ptr(ptr, tcg_ctx.tcg_env, addr);
tcg_gen_op1i(INDEX_op_goto_ptr, GET_TCGV_PTR(ptr));
tcg_temp_free_ptr(ptr);
} else {
tcg_gen_exit_tb(0);
}
}

since there's not really any point in all targets being exposed to the 
implementation detail of lookup_tb_ptr.



r~

Re: [Qemu-devel] [PATCH] COLO-compare: Add compare_lock aviod comparison conflict

2017-04-25 Thread Hailiang Zhang


On 2017/4/25 19:57, Zhang Chen wrote:


On 04/20/2017 02:40 PM, Jason Wang wrote:


On 2017年04月20日 14:36, Zhang Chen wrote:


On 04/20/2017 02:20 PM, Hailiang Zhang wrote:

On 2017/4/20 12:32, Zhang Chen wrote:

When network traffic heavy, compare_pri_rs_finalize() and
compare_sec_rs_finalize() have a chance to confilct.
Both of them call colo_compare_connection() to compare packet,
But during compare_pri_rs_finalize() comparison, have secondary
packet come and call compare_sec_rs_finalize(), that packet will be
handle twice. If packet same, the pkt will be double free.

Interesting, if I'm right, this should not happen, because, all the
comparing works
are done in colo compare thread, so there is no chance to access the
connect_list
concurrently.  Besides, even both of the packets from primary and
secondary arrive
at the same time, it should only be handle once, we will handle it
with the later arrived one,
No ?

No, In my test often trigger this bug, you can use udp server and
client test it.

13517@1492648526.850246:colo_compare_main : packet same and release
packet
13517@1492648526.850304:colo_compare_main : packet same and release
packet
*** glibc detected ***
/home/zhangchen/qemu-colo-apr14/x86_64-softmmu/qemu-system-x86_64:
double free or corruption (out): 0x56a75210 ***
=== Backtrace: =
/lib64/libc.so.6(+0x76628)[0x753d6628]
/lib64/libc.so.6(cfree+0x6c)[0x753db5cc]


Thanks
Zhang Chen

I agree that you should check whether or not they are running in the
same thread.



I found they are not running in the same thread, and I have reviewed
relative code but don't find out
why we do same job to pri_chr_in and sec_chr_in then they running in
different thread.
Anyone can tell me the reason?

Log:

Breakpoint 5, compare_pri_chr_in (opaque=0x77fd1010,
buf=0x73eb0950 "", size=8)
  at net/colo-compare.c:591
591{
(gdb) info thread
Id   Target Id Frame
18   Thread 0x7fff70bff700 (LWP 27864) "qemu-system-x86"
0x756e4ae7 in sem_timedwait ()
 from /lib64/libpthread.so.0
9Thread 0x7fff6f1ff700 (LWP 27748) "qemu-system-x86"
0x756e4a00 in sem_wait ()
 from /lib64/libpthread.so.0
7Thread 0x7fff701ff700 (LWP 27746) "qemu-system-x86"
0x756e261c in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
5Thread 0x72dbf700 (LWP 27743) "qemu-system-x86"
0x754320a7 in ioctl ()
 from /lib64/libc.so.6
4Thread 0x735c0700 (LWP 27742) "qemu-system-x86"
0x756e5dbd in sendmsg ()
 from /lib64/libpthread.so.0
* 3Thread 0x73eb2700 (LWP 27741) "qemu-system-x86"
compare_pri_chr_in (opaque=0x77fd1010,
  buf=0x73eb0950 "", size=8) at net/colo-compare.c:591
2Thread 0x746b3700 (LWP 27729) "qemu-system-x86"
0x75436789 in syscall ()
 from /lib64/libc.so.6
1Thread 0x77fb7a80 (LWP 27725) "qemu-system-x86"
0x756e5294 in __lll_lock_wait ()
 from /lib64/libpthread.so.0
(gdb)  bt
#0  compare_pri_chr_in (opaque=0x77fd1010, buf=0x73eb0950 "",
size=8) at net/colo-compare.c:591
#1  0x55c60fba in qemu_chr_be_write_impl (s=0x5684a630,
buf=0x73eb0950 "", len=8)
  at chardev/char.c:284
#2  0x55c6102f in qemu_chr_be_write (s=0x5684a630,
buf=0x73eb0950 "", len=8)
  at chardev/char.c:296
#3  0x55c6a056 in tcp_chr_read (chan=0x5684aa30,
cond=G_IO_IN, opaque=0x5684a630)
  at chardev/char-socket.c:414
#4  0x55c83dbc in qio_channel_fd_source_dispatch
(source=0x568d8b80, callback=
  0x55c69ebf , user_data=0x5684a630) at
io/channel-watch.c:84
#5  0x760c460a in g_main_context_dispatch () from
/usr/lib64/libglib-2.0.so.0
#6  0x760c7e88 in ?? () from /usr/lib64/libglib-2.0.so.0
#7  0x760c835d in g_main_loop_run () from
/usr/lib64/libglib-2.0.so.0
#8  0x55b82e22 in colo_compare_thread (opaque=0x77fd1010) at
net/colo-compare.c:703
#9  0x756de7b6 in start_thread () from /lib64/libpthread.so.0
#10 0x75439d6d in clone () from /lib64/libc.so.6
#11 0x in ?? ()
(gdb) c
Continuing.
[Switching to Thread 0x77fb7a80 (LWP 27725)]

Breakpoint 6, compare_sec_chr_in (opaque=0x77fd1010,
buf=0x7fffc590 "", size=1088)
  at net/colo-compare.c:608
608{
(gdb) info thread
Id   Target Id Frame
18   Thread 0x7fff70bff700 (LWP 27864) "qemu-system-x86" (Exiting)
0x756de9b3 in start_thread
  () from /lib64/libpthread.so.0
9Thread 0x7fff6f1ff700 (LWP 27748) "qemu-system-x86"
0x756e4a00 in sem_wait ()
 from /lib64/libpthread.so.0
7Thread 0x7fff701ff700 (LWP 27746) "qemu-system-x86"
0x756e261c in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
5Thread 0x72dbf700 (LWP 27743) "qemu-system-x86"
0x754320a7 in ioctl ()
 from /lib64/libc.so.6
4Thread 0x735c0700 (LWP 27742)

[Qemu-devel] [PATCH 08/13] armv7m: Improve "-d mmu" tracing for PMSAv7 MPU

2017-04-25 Thread Peter Maydell

From: Michael Davidsaver 

Improve the "-d mmu" tracing for the PMSAv7 MPU translation
process as an aid in debugging guest MPU configurations:
 * fix a missing newline for a guest-error log
 * report the region number with guest-error or unimp
   logs of bad region register values
 * add a log message for the overall result of the lookup
 * print "0x" prefix for hex values

Signed-off-by: Michael Davidsaver 
[PMM: a little tidyup, report region number in all messages
 rather than just one]
Signed-off-by: Peter Maydell 
---
 target/arm/helper.c | 39 +++
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 5c044d0..9e1ed1c 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -8169,16 +8169,18 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, 
uint32_t address,
 }
 
 if (!rsize) {
-qemu_log_mask(LOG_GUEST_ERROR, "DRSR.Rsize field can not be 
0");
+qemu_log_mask(LOG_GUEST_ERROR,
+  "DRSR[%d]: Rsize field cannot be 0\n", n);
 continue;
 }
 rsize++;
 rmask = (1ull << rsize) - 1;
 
 if (base & rmask) {
-qemu_log_mask(LOG_GUEST_ERROR, "DRBAR %" PRIx32 " misaligned "
-  "to DRSR region size, mask = %" PRIx32,
-  base, rmask);
+qemu_log_mask(LOG_GUEST_ERROR,
+  "DRBAR[%d]: 0x%" PRIx32 " misaligned "
+  "to DRSR region size, mask = 0x%" PRIx32 "\n",
+  n, base, rmask);
 continue;
 }
 
@@ -8215,9 +8217,10 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, 
uint32_t address,
 }
 }
 if (rsize < TARGET_PAGE_BITS) {
-qemu_log_mask(LOG_UNIMP, "No support for MPU (sub)region"
+qemu_log_mask(LOG_UNIMP,
+  "DRSR[%d]: No support for MPU (sub)region "
   "alignment of %" PRIu32 " bits. Minimum is %d\n",
-  rsize, TARGET_PAGE_BITS);
+  n, rsize, TARGET_PAGE_BITS);
 continue;
 }
 if (srdis) {
@@ -8251,8 +8254,8 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, 
uint32_t address,
 break;
 default:
 qemu_log_mask(LOG_GUEST_ERROR,
-  "Bad value for AP bits in DRACR %"
-  PRIx32 "\n", ap);
+  "DRACR[%d]: Bad value for AP bits: 0x%"
+  PRIx32 "\n", n, ap);
 }
 } else { /* Priv. mode AP bits decoding */
 switch (ap) {
@@ -8269,8 +8272,8 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, 
uint32_t address,
 break;
 default:
 qemu_log_mask(LOG_GUEST_ERROR,
-  "Bad value for AP bits in DRACR %"
-  PRIx32 "\n", ap);
+  "DRACR[%d]: Bad value for AP bits: 0x%"
+  PRIx32 "\n", n, ap);
 }
 }
 
@@ -8448,9 +8451,21 @@ static bool get_phys_addr(CPUARMState *env, target_ulong 
address,
  */
 if (arm_feature(env, ARM_FEATURE_PMSA) &&
 arm_feature(env, ARM_FEATURE_V7)) {
+bool ret;
 *page_size = TARGET_PAGE_SIZE;
-return get_phys_addr_pmsav7(env, address, access_type, mmu_idx,
-phys_ptr, prot, fsr);
+ret = get_phys_addr_pmsav7(env, address, access_type, mmu_idx,
+   phys_ptr, prot, fsr);
+qemu_log_mask(CPU_LOG_MMU, "PMSAv7 MPU lookup for %s at 0x%08" PRIx32
+  " mmu_idx %u -> %s (prot %c%c%c)\n",
+  access_type == 1 ? "reading" :
+  (access_type == 2 ? "writing" : "execute"),
+  (uint32_t)address, mmu_idx,
+  ret ? "Miss" : "Hit",
+  *prot & PAGE_READ ? 'r' : '-',
+  *prot & PAGE_WRITE ? 'w' : '-',
+  *prot & PAGE_EXEC ? 'x' : '-');
+
+return ret;
 }
 
 if (regime_translation_disabled(env, mmu_idx)) {
-- 
2.7.4

Re: [Qemu-devel] [PATCH] ui/cocoa.m: Fix macOS 10.12 deprecation warnings

2017-04-25 Thread Peter Maydell

On 25 April 2017 at 07:29, Brendan Shanks  wrote:
> macOS 10.12 deprecated/replaced many AppKit constants to make naming
> more consistent. Use the new constants, and #define them to the
> old constants when compiling against a pre-10.12 SDK.
>
> Signed-off-by: Brendan Shanks 
> ---
>  ui/cocoa.m | 87 
> +-
>  1 file changed, 58 insertions(+), 29 deletions(-)
>

Thanks for this patch -- I'm running 10.12 but I think my SDK
is still the earlier version. Patch looks good and compiles
fine, so I've applied it to git master.

-- PMM

[Qemu-devel] [PATCH 02/13] arm: Add support for M profile CPUs having different MMU index semantics

2017-04-25 Thread Peter Maydell

The M profile CPU's MPU has an awkward corner case which we
would like to implement with a different MMU index.

We can avoid having to bump the number of MMU modes ARM
uses, because some of our existing MMU indexes are only
used by non-M-profile CPUs, so we can borrow one.
To avoid that getting too confusing, clean up the code
to try to keep the two meanings of the index separate.

Instead of ARMMMUIdx enum values being identical to core QEMU
MMU index values, they are now the core index values with some
high bits set. Any particular CPU always uses the same high
bits (so eventually A profile cores and M profile cores will
use different bits). New functions arm_to_core_mmu_idx()
and core_to_arm_mmu_idx() convert between the two.

In general core index values are stored in 'int' types, and
ARM values are stored in ARMMMUIdx types.

Signed-off-by: Peter Maydell 
---
 target/arm/cpu.h   |  71 -
 target/arm/translate.h |   2 +-
 target/arm/helper.c| 151 -
 target/arm/op_helper.c |   3 +-
 target/arm/translate-a64.c |  18 --
 target/arm/translate.c |  10 +--
 6 files changed, 156 insertions(+), 99 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 1055bfe..e1f4856 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -2037,6 +2037,16 @@ static inline bool arm_excp_unmasked(CPUState *cs, 
unsigned int excp_idx,
  * for the accesses done as part of a stage 1 page table walk, rather than
  * having to walk the stage 2 page table over and over.)
  *
+ * The ARMMMUIdx and the mmu index value used by the core QEMU TLB code
+ * are not quite the same -- different CPU types (most notably M profile
+ * vs A/R profile) would like to use MMU indexes with different semantics,
+ * but since we don't ever need to use all of those in a single CPU we
+ * can avoid setting NB_MMU_MODES to more than 8. The lower bits of
+ * ARMMMUIdx are the core TLB mmu index, and the higher bits are always
+ * the same for any particular CPU.
+ * Variables of type ARMMUIdx are always full values, and the core
+ * index values are in variables of type 'int'.
+ *
  * Our enumeration includes at the end some entries which are not "true"
  * mmu_idx values in that they don't have corresponding TLBs and are only
  * valid for doing slow path page table walks.
@@ -2045,28 +2055,61 @@ static inline bool arm_excp_unmasked(CPUState *cs, 
unsigned int excp_idx,
  * of the AT/ATS operations.
  * The values used are carefully arranged to make mmu_idx => EL lookup easy.
  */
+#define ARM_MMU_IDX_A 0x10 /* A profile (and M profile, for the moment) */
+#define ARM_MMU_IDX_NOTLB 0x20 /* does not have a TLB */
+
+#define ARM_MMU_IDX_TYPE_MASK (~0x7)
+#define ARM_MMU_IDX_COREIDX_MASK 0x7
+
 typedef enum ARMMMUIdx {
-ARMMMUIdx_S12NSE0 = 0,
-ARMMMUIdx_S12NSE1 = 1,
-ARMMMUIdx_S1E2 = 2,
-ARMMMUIdx_S1E3 = 3,
-ARMMMUIdx_S1SE0 = 4,
-ARMMMUIdx_S1SE1 = 5,
-ARMMMUIdx_S2NS = 6,
+ARMMMUIdx_S12NSE0 = 0 | ARM_MMU_IDX_A,
+ARMMMUIdx_S12NSE1 = 1 | ARM_MMU_IDX_A,
+ARMMMUIdx_S1E2 = 2 | ARM_MMU_IDX_A,
+ARMMMUIdx_S1E3 = 3 | ARM_MMU_IDX_A,
+ARMMMUIdx_S1SE0 = 4 | ARM_MMU_IDX_A,
+ARMMMUIdx_S1SE1 = 5 | ARM_MMU_IDX_A,
+ARMMMUIdx_S2NS = 6 | ARM_MMU_IDX_A,
 /* Indexes below here don't have TLBs and are used only for AT system
  * instructions or for the first stage of an S12 page table walk.
  */
-ARMMMUIdx_S1NSE0 = 7,
-ARMMMUIdx_S1NSE1 = 8,
+ARMMMUIdx_S1NSE0 = 0 | ARM_MMU_IDX_NOTLB,
+ARMMMUIdx_S1NSE1 = 1 | ARM_MMU_IDX_NOTLB,
 } ARMMMUIdx;
 
+/* Bit macros for the core-mmu-index values for each index,
+ * for use when calling tlb_flush_by_mmuidx() and friends.
+ */
+typedef enum ARMMMUIdxBit {
+ARMMMUIdxBit_S12NSE0 = 1 << 0,
+ARMMMUIdxBit_S12NSE1 = 1 << 1,
+ARMMMUIdxBit_S1E2 = 1 << 2,
+ARMMMUIdxBit_S1E3 = 1 << 3,
+ARMMMUIdxBit_S1SE0 = 1 << 4,
+ARMMMUIdxBit_S1SE1 = 1 << 5,
+ARMMMUIdxBit_S2NS = 1 << 6,
+} ARMMMUIdxBit;
+
 #define MMU_USER_IDX 0
 
+static inline int arm_to_core_mmu_idx(ARMMMUIdx mmu_idx)
+{
+return mmu_idx & ARM_MMU_IDX_COREIDX_MASK;
+}
+
+static inline ARMMMUIdx core_to_arm_mmu_idx(CPUARMState *env, int mmu_idx)
+{
+return mmu_idx | ARM_MMU_IDX_A;
+}
+
 /* Return the exception level we're running at if this is our mmu_idx */
 static inline int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx)
 {
-assert(mmu_idx < ARMMMUIdx_S2NS);
-return mmu_idx & 3;
+switch (mmu_idx & ARM_MMU_IDX_TYPE_MASK) {
+case ARM_MMU_IDX_A:
+return mmu_idx & 3;
+default:
+g_assert_not_reached();
+}
 }
 
 /* Determine the current mmu_idx to use for normal loads/stores */
@@ -2075,7 +2118,7 @@ static inline int cpu_mmu_index(CPUARMState *env, bool 
ifetch)
 int el = arm_current_el(env);
 
 if (el < 2 && arm_is_secure_below_el3(env)) {
-return ARMMMUIdx_S1SE0 + el;
+return

[Qemu-devel] [PATCH 11/13] armv7m: Classify faults as MemManage or BusFault

2017-04-25 Thread Peter Maydell

From: Michael Davidsaver 

General logic is that operations stopped by the MPU are MemManage,
and those which go through the MPU and are caught by the unassigned
handle are BusFault. Distinguish these by looking at the
exception.fsr values, and set the CFSR bits and (if appropriate)
fill in the BFAR or MMFAR with the exception address.

Signed-off-by: Michael Davidsaver 
[PMM: i-side faults do not set BFAR/MMFAR, only d-side;
 added some CPU_LOG_INT logging]
Signed-off-by: Peter Maydell 
---
 target/arm/helper.c | 45 ++---
 1 file changed, 42 insertions(+), 3 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 51662ad..49b6d01 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6342,10 +6342,49 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
 break;
 case EXCP_PREFETCH_ABORT:
 case EXCP_DATA_ABORT:
-/* TODO: if we implemented the MPU registers, this is where we
- * should set the MMFAR, etc from exception.fsr and exception.vaddress.
+/* Note that for M profile we don't have a guest facing FSR, but
+ * the env->exception.fsr will be populated by the code that
+ * raises the fault, in the A profile short-descriptor format.
  */
-armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM);
+switch (env->exception.fsr & 0xf) {
+case 0x8: /* External Abort */
+switch (cs->exception_index) {
+case EXCP_PREFETCH_ABORT:
+env->v7m.cfsr |= R_V7M_CFSR_PRECISERR_MASK;
+qemu_log_mask(CPU_LOG_INT, "...with CFSR.PRECISERR\n");
+break;
+case EXCP_DATA_ABORT:
+env->v7m.cfsr |=
+(R_V7M_CFSR_IBUSERR_MASK | R_V7M_CFSR_BFARVALID_MASK);
+env->v7m.bfar = env->exception.vaddress;
+qemu_log_mask(CPU_LOG_INT,
+  "...with CFSR.IBUSERR and BFAR 0x%x\n",
+  env->v7m.bfar);
+break;
+}
+armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_BUS);
+break;
+default:
+/* All other FSR values are either MPU faults or "can't happen
+ * for M profile" cases.
+ */
+switch (cs->exception_index) {
+case EXCP_PREFETCH_ABORT:
+env->v7m.cfsr |= R_V7M_CFSR_IACCVIOL_MASK;
+qemu_log_mask(CPU_LOG_INT, "...with CFSR.IACCVIOL\n");
+break;
+case EXCP_DATA_ABORT:
+env->v7m.cfsr |=
+(R_V7M_CFSR_DACCVIOL_MASK | R_V7M_CFSR_MMARVALID_MASK);
+env->v7m.mmfar = env->exception.vaddress;
+qemu_log_mask(CPU_LOG_INT,
+  "...with CFSR.DACCVIOL and MMFAR 0x%x\n",
+  env->v7m.mmfar);
+break;
+}
+armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_MEM);
+break;
+}
 break;
 case EXCP_BKPT:
 if (semihosting_enabled()) {
-- 
2.7.4

[Qemu-devel] [PATCH 10/13] arm: All M profile cores are PMSA

2017-04-25 Thread Peter Maydell

All M profile CPUs are PMSA, so set the feature bit.
(We haven't actually implemented the M profile MPU register
interface yet, but setting this feature bit gives us closer
to correct behaviour for the MPU-disabled case.)

Signed-off-by: Peter Maydell 
---
 target/arm/cpu.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 8e57498..df8b835 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -543,6 +543,14 @@ static void arm_cpu_post_init(Object *obj)
 {
 ARMCPU *cpu = ARM_CPU(obj);
 
+/* M profile implies PMSA. We have to do this here rather than
+ * in realize with the other feature-implication checks because
+ * we look at the PMSA bit to see if we should add some properties.
+ */
+if (arm_feature(>env, ARM_FEATURE_M)) {
+set_feature(>env, ARM_FEATURE_PMSA);
+}
+
 if (arm_feature(>env, ARM_FEATURE_CBAR) ||
 arm_feature(>env, ARM_FEATURE_CBAR_RO)) {
 qdev_property_add_static(DEVICE(obj), _cpu_reset_cbar_property,
-- 
2.7.4

[Qemu-devel] [PATCH 01/13] arm: Use the mmu_idx we're passed in arm_cpu_do_unaligned_access()

2017-04-25 Thread Peter Maydell

When identifying the DFSR format for an alignment fault, use
the mmu index that we are passed, rather than calling cpu_mmu_index()
to get the mmu index for the current CPU state. This doesn't actually
make any difference since the only cases where the current MMU index
differs from the index used for the load are the "unprivileged
load/store" instructions, and in that case the mmu index may
differ but the translation regime is the same (apart from the
"use from Hyp mode" case which is UNPREDICTABLE).
However it's the more logical thing to do.

Signed-off-by: Peter Maydell 
---
 target/arm/op_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index 156b825..de24815 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -208,7 +208,7 @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr,
 /* the DFSR for an alignment fault depends on whether we're using
  * the LPAE long descriptor format, or the short descriptor format
  */
-if (arm_s1_regime_using_lpae_format(env, cpu_mmu_index(env, false))) {
+if (arm_s1_regime_using_lpae_format(env, mmu_idx)) {
 env->exception.fsr = (1 << 9) | 0x21;
 } else {
 env->exception.fsr = 0x1;
-- 
2.7.4

[Qemu-devel] [PATCH 13/13] arm: Implement HFNMIENA support for M profile MPU

2017-04-25 Thread Peter Maydell

Implement HFNMIENA support for the M profile MPU. This bit controls
whether the MPU is treated as enabled when executing at execution
priorities of less than zero (in NMI, HardFault or with the FAULTMASK
bit set).

Doing this requires us to use a different MMU index for "running
at execution priority < 0", because we will have different
access permissions for that case versus the normal case.

Signed-off-by: Peter Maydell 
---
 target/arm/cpu.h   | 24 +++-
 target/arm/helper.c| 18 +-
 target/arm/translate.c |  1 +
 3 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index bbdd064..2e873e8 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -2043,6 +2043,18 @@ static inline bool arm_excp_unmasked(CPUState *cs, 
unsigned int excp_idx,
  * for the accesses done as part of a stage 1 page table walk, rather than
  * having to walk the stage 2 page table over and over.)
  *
+ * R profile CPUs have an MPU, but can use the same set of MMU indexes
+ * as A profile. They only need to distinguish NS EL0 and NS EL1 (and
+ * NS EL2 if we ever model a Cortex-R52).
+ *
+ * M profile CPUs are rather different as they do not have a true MMU.
+ * They have the following different MMU indexes:
+ *  User
+ *  Privileged
+ *  Execution priority negative (this is like privileged, but the
+ *  MPU HFNMIENA bit means that it may have different access permission
+ *  check results to normal privileged code, so can't share a TLB).
+ *
  * The ARMMMUIdx and the mmu index value used by the core QEMU TLB code
  * are not quite the same -- different CPU types (most notably M profile
  * vs A/R profile) would like to use MMU indexes with different semantics,
@@ -2078,6 +2090,7 @@ typedef enum ARMMMUIdx {
 ARMMMUIdx_S2NS = 6 | ARM_MMU_IDX_A,
 ARMMMUIdx_MUser = 0 | ARM_MMU_IDX_M,
 ARMMMUIdx_MPriv = 1 | ARM_MMU_IDX_M,
+ARMMMUIdx_MNegPri = 2 | ARM_MMU_IDX_M,
 /* Indexes below here don't have TLBs and are used only for AT system
  * instructions or for the first stage of an S12 page table walk.
  */
@@ -2098,6 +2111,7 @@ typedef enum ARMMMUIdxBit {
 ARMMMUIdxBit_S2NS = 1 << 6,
 ARMMMUIdxBit_MUser = 1 << 0,
 ARMMMUIdxBit_MPriv = 1 << 1,
+ARMMMUIdxBit_MNegPri = 1 << 2,
 } ARMMMUIdxBit;
 
 #define MMU_USER_IDX 0
@@ -2123,7 +2137,7 @@ static inline int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx)
 case ARM_MMU_IDX_A:
 return mmu_idx & 3;
 case ARM_MMU_IDX_M:
-return mmu_idx & 1;
+return mmu_idx == ARMMMUIdx_MUser ? 0 : 1;
 default:
 g_assert_not_reached();
 }
@@ -2137,6 +2151,14 @@ static inline int cpu_mmu_index(CPUARMState *env, bool 
ifetch)
 if (arm_feature(env, ARM_FEATURE_M)) {
 ARMMMUIdx mmu_idx = el == 0 ? ARMMMUIdx_MUser : ARMMMUIdx_MPriv;
 
+/* Execution priority is negative if FAULTMASK is set or
+ * we're in a HardFault or NMI handler.
+ */
+if ((env->v7m.exception > 0 && env->v7m.exception <= 3)
+|| env->daif & PSTATE_F) {
+return arm_to_core_mmu_idx(ARMMMUIdx_MNegPri);
+}
+
 return arm_to_core_mmu_idx(mmu_idx);
 }
 
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 5bf706d..3aae52a 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -7037,6 +7037,7 @@ static inline uint32_t regime_el(CPUARMState *env, 
ARMMMUIdx mmu_idx)
 case ARMMMUIdx_S1NSE0:
 case ARMMMUIdx_S1NSE1:
 case ARMMMUIdx_MPriv:
+case ARMMMUIdx_MNegPri:
 case ARMMMUIdx_MUser:
 return 1;
 default:
@@ -7055,6 +7056,7 @@ static inline bool regime_is_secure(CPUARMState *env, 
ARMMMUIdx mmu_idx)
 case ARMMMUIdx_S1E2:
 case ARMMMUIdx_S2NS:
 case ARMMMUIdx_MPriv:
+case ARMMMUIdx_MNegPri:
 case ARMMMUIdx_MUser:
 return false;
 case ARMMMUIdx_S1E3:
@@ -7077,7 +7079,21 @@ static inline bool 
regime_translation_disabled(CPUARMState *env,
ARMMMUIdx mmu_idx)
 {
 if (arm_feature(env, ARM_FEATURE_M)) {
-return !(env->v7m.mpu_ctrl & R_V7M_MPU_CTRL_ENABLE_MASK);
+switch (env->v7m.mpu_ctrl &
+(R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK)) {
+case R_V7M_MPU_CTRL_ENABLE_MASK:
+/* Enabled, but not for HardFault and NMI */
+return mmu_idx == ARMMMUIdx_MNegPri;
+case R_V7M_MPU_CTRL_ENABLE_MASK | R_V7M_MPU_CTRL_HFNMIENA_MASK:
+/* Enabled for all cases */
+return false;
+case 0:
+default:
+/* HFNMIENA set and ENABLE clear is UNPREDICTABLE, but
+ * we warned about that in armv7m_nvic.c when the guest set it.
+ */
+return true;
+}
 }
 
 if (mmu_idx == ARMMMUIdx_S2NS) {
diff --git a/target/arm/translate.c b/target/arm/translate.c
index ac905dd..ae6646c 100644
--- a/target/arm/translate.c

[Qemu-devel] [PATCH 06/13] arm: Don't let no-MPU PMSA cores write to SCTLR.M

2017-04-25 Thread Peter Maydell

If the CPU is a PMSA config with no MPU implemented, then the
SCTLR.M bit should be RAZ/WI, so that the guest can never
turn on the non-existent MPU.

Signed-off-by: Peter Maydell 
---
 target/arm/helper.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 404bfdb..f0f25c8 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -3258,6 +3258,11 @@ static void sctlr_write(CPUARMState *env, const 
ARMCPRegInfo *ri,
 return;
 }
 
+if (arm_feature(env, ARM_FEATURE_PMSA) && !cpu->has_mpu) {
+/* M bit is RAZ/WI for PMSA with no MPU implemented */
+value &= ~SCTLR_M;
+}
+
 raw_write(env, ri, value);
 /* ??? Lots of these bits are not implemented.  */
 /* This may enable/disable the MMU, so do a TLB flush.  */
-- 
2.7.4

[Qemu-devel] [PATCH 03/13] arm: Use different ARMMMUIdx values for M profile

2017-04-25 Thread Peter Maydell

Make M profile use completely separate ARMMMUIdx values from
those that A profile CPUs use. This is a prelude to adding
support for the MPU and for v8M, which together will require
6 MMU indexes which don't map cleanly onto the A profile
uses:
 non secure User
 non secure Privileged
 non secure Privileged, execution priority < 0
 secure User
 secure Privileged
 secure Privileged, execution priority < 0

Signed-off-by: Peter Maydell 
---
 target/arm/cpu.h   | 21 +++--
 target/arm/helper.c|  5 +
 target/arm/translate.c |  3 +++
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index e1f4856..253565b 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -2055,8 +2055,9 @@ static inline bool arm_excp_unmasked(CPUState *cs, 
unsigned int excp_idx,
  * of the AT/ATS operations.
  * The values used are carefully arranged to make mmu_idx => EL lookup easy.
  */
-#define ARM_MMU_IDX_A 0x10 /* A profile (and M profile, for the moment) */
+#define ARM_MMU_IDX_A 0x10 /* A profile */
 #define ARM_MMU_IDX_NOTLB 0x20 /* does not have a TLB */
+#define ARM_MMU_IDX_M 0x40 /* M profile */
 
 #define ARM_MMU_IDX_TYPE_MASK (~0x7)
 #define ARM_MMU_IDX_COREIDX_MASK 0x7
@@ -2069,6 +2070,8 @@ typedef enum ARMMMUIdx {
 ARMMMUIdx_S1SE0 = 4 | ARM_MMU_IDX_A,
 ARMMMUIdx_S1SE1 = 5 | ARM_MMU_IDX_A,
 ARMMMUIdx_S2NS = 6 | ARM_MMU_IDX_A,
+ARMMMUIdx_MUser = 0 | ARM_MMU_IDX_M,
+ARMMMUIdx_MPriv = 1 | ARM_MMU_IDX_M,
 /* Indexes below here don't have TLBs and are used only for AT system
  * instructions or for the first stage of an S12 page table walk.
  */
@@ -2087,6 +2090,8 @@ typedef enum ARMMMUIdxBit {
 ARMMMUIdxBit_S1SE0 = 1 << 4,
 ARMMMUIdxBit_S1SE1 = 1 << 5,
 ARMMMUIdxBit_S2NS = 1 << 6,
+ARMMMUIdxBit_MUser = 1 << 0,
+ARMMMUIdxBit_MPriv = 1 << 1,
 } ARMMMUIdxBit;
 
 #define MMU_USER_IDX 0
@@ -2098,7 +2103,11 @@ static inline int arm_to_core_mmu_idx(ARMMMUIdx mmu_idx)
 
 static inline ARMMMUIdx core_to_arm_mmu_idx(CPUARMState *env, int mmu_idx)
 {
-return mmu_idx | ARM_MMU_IDX_A;
+if (arm_feature(env, ARM_FEATURE_M)) {
+return mmu_idx | ARM_MMU_IDX_M;
+} else {
+return mmu_idx | ARM_MMU_IDX_A;
+}
 }
 
 /* Return the exception level we're running at if this is our mmu_idx */
@@ -2107,6 +2116,8 @@ static inline int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx)
 switch (mmu_idx & ARM_MMU_IDX_TYPE_MASK) {
 case ARM_MMU_IDX_A:
 return mmu_idx & 3;
+case ARM_MMU_IDX_M:
+return mmu_idx & 1;
 default:
 g_assert_not_reached();
 }
@@ -2117,6 +2128,12 @@ static inline int cpu_mmu_index(CPUARMState *env, bool 
ifetch)
 {
 int el = arm_current_el(env);
 
+if (arm_feature(env, ARM_FEATURE_M)) {
+ARMMMUIdx mmu_idx = el == 0 ? ARMMMUIdx_MUser : ARMMMUIdx_MPriv;
+
+return arm_to_core_mmu_idx(mmu_idx);
+}
+
 if (el < 2 && arm_is_secure_below_el3(env)) {
 return arm_to_core_mmu_idx(ARMMMUIdx_S1SE0 + el);
 }
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 520adcc..791332c 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -6992,6 +6992,8 @@ static inline uint32_t regime_el(CPUARMState *env, 
ARMMMUIdx mmu_idx)
 case ARMMMUIdx_S1SE1:
 case ARMMMUIdx_S1NSE0:
 case ARMMMUIdx_S1NSE1:
+case ARMMMUIdx_MPriv:
+case ARMMMUIdx_MUser:
 return 1;
 default:
 g_assert_not_reached();
@@ -7008,6 +7010,8 @@ static inline bool regime_is_secure(CPUARMState *env, 
ARMMMUIdx mmu_idx)
 case ARMMMUIdx_S1NSE1:
 case ARMMMUIdx_S1E2:
 case ARMMMUIdx_S2NS:
+case ARMMMUIdx_MPriv:
+case ARMMMUIdx_MUser:
 return false;
 case ARMMMUIdx_S1E3:
 case ARMMMUIdx_S1SE0:
@@ -7146,6 +7150,7 @@ static inline bool regime_is_user(CPUARMState *env, 
ARMMMUIdx mmu_idx)
 switch (mmu_idx) {
 case ARMMMUIdx_S1SE0:
 case ARMMMUIdx_S1NSE0:
+case ARMMMUIdx_MUser:
 return true;
 default:
 return false;
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 8d509a2..ac905dd 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -161,6 +161,9 @@ static inline int get_a32_user_mem_index(DisasContext *s)
 case ARMMMUIdx_S1SE0:
 case ARMMMUIdx_S1SE1:
 return arm_to_core_mmu_idx(ARMMMUIdx_S1SE0);
+case ARMMMUIdx_MUser:
+case ARMMMUIdx_MPriv:
+return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 case ARMMMUIdx_S2NS:
 default:
 g_assert_not_reached();
-- 
2.7.4

[Qemu-devel] [PATCH 00/13] armv7m: Implement MPU support

2017-04-25 Thread Peter Maydell

This patchset implements support for the MPU in our v7M cores. 
Support is on the same level as that for the R profile MPU: it works,
but regions smaller than 1K in size are not supported. It likely
has some missing corner-case features.

The patchset can be divided into three parts:

 * patches 1..3 are the RFC I sent out yesterday which refactors the
   mmuidx handling so that M profile can use different semantics for
   the mmu indexes (only very minor change to the RFC: I used some
   symbolic constants rather than hardcoding masks with 7 and ~7,
   tweaked a few expressions, etc)
 * patches 4..7 clean up our handling of whether the MPU
   exists or not, since we weren't consistent about whether
   ARM_FEATURE_MPU meant "PMSA, not VMSA" or "PMSA and MPU is
   present".  We rename the feature bit to ARM_FEATURE_PMSA and use
   the has_mpu flag to indicate whether a PMSA core has an MPU
   implemented or not
 * patches 8..13 implement the MPU support proper.  Most of this is
   Michael Davidsaver's code, but I've tidied it up, fixed a few
   bugs, and reimplemented the HFNMIENA support

Testing has been light -- I have a few basic MPU tests at
https://git.linaro.org/people/peter.maydell/m-profile-tests.git
but otherwise don't have anything to hand that exercises the MPU.

I wanted to get this patchset out to the list before I go off
on my break; I will come back and follow up on review comments
when I get back in June.

thanks
-- PMM

Michael Davidsaver (4):
  armv7m: Improve "-d mmu" tracing for PMSAv7 MPU
  armv7m: Implement M profile default memory map
  armv7m: Classify faults as MemManage or BusFault
  arm: add MPU support to M profile CPUs

Peter Maydell (9):
  arm: Use the mmu_idx we're passed in arm_cpu_do_unaligned_access()
  arm: Add support for M profile CPUs having different MMU index
semantics
  arm: Use different ARMMMUIdx values for M profile
  arm: Clean up handling of no-MPU PMSA CPUs
  arm: Don't clear ARM_FEATURE_PMSA for no-mpu configs
  arm: Don't let no-MPU PMSA cores write to SCTLR.M
  arm: Remove unnecessary check on cpu->pmsav7_dregion
  arm: All M profile cores are PMSA
  arm: Implement HFNMIENA support for M profile MPU

 target/arm/cpu.h   | 118 ++--
 target/arm/translate.h |   2 +-
 hw/intc/armv7m_nvic.c  | 104 ++
 target/arm/cpu.c   |  26 +++-
 target/arm/helper.c| 332 +++--
 target/arm/machine.c   |   7 +-
 target/arm/op_helper.c |   3 +-
 target/arm/translate-a64.c |  18 ++-
 target/arm/translate.c |  14 +-
 9 files changed, 484 insertions(+), 140 deletions(-)

-- 
2.7.4

[Qemu-devel] [PATCH 05/13] arm: Don't clear ARM_FEATURE_PMSA for no-mpu configs

2017-04-25 Thread Peter Maydell

Fix the handling of QOM properties for PMSA CPUs with no MPU:

Allow no-MPU to be specified by either:
 * has-mpu = false
 * pmsav7_dregion = 0
and make setting one imply the other. Don't clear the PMSA
feature bit in this situation.

Signed-off-by: Peter Maydell 
---
 target/arm/cpu.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index f17e279..8e57498 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -757,8 +757,14 @@ static void arm_cpu_realizefn(DeviceState *dev, Error 
**errp)
 cpu->id_pfr1 &= ~0xf000;
 }
 
+/* MPU can be configured out of a PMSA CPU either by setting has-mpu
+ * to false or by setting pmsav7-dregion to 0.
+ */
 if (!cpu->has_mpu) {
-unset_feature(ARM_FEATURE_PMSA);
+cpu->pmsav7_dregion = 0;
+}
+if (cpu->pmsav7_dregion == 0) {
+cpu->has_mpu = false;
 }
 
 if (arm_feature(env, ARM_FEATURE_PMSA) &&
-- 
2.7.4

Re: [Qemu-devel] [BUG] QEMU crashes with dpdk virtio pmd

2017-04-25 Thread Jason Wang




On 2017年04月25日 19:37, wangyunjian wrote:

The q->tx_bh will free in virtio_net_del_queue() function, when remove virtio 
queues
if the guest doesn't support multiqueue. But it might be still referenced by 
others (eg . virtio_net_set_status()),
which need so set NULL.

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7d091c9..98bd683 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1522,9 +1522,12 @@ static void virtio_net_del_queue(VirtIONet *n, int index)
  if (q->tx_timer) {
  timer_del(q->tx_timer);
  timer_free(q->tx_timer);
+q->tx_timer = NULL;
  } else {
  qemu_bh_delete(q->tx_bh);
+q->tx_bh = NULL;
  }
+q->tx_waiting = 0;
  virtio_del_queue(vdev, index * 2 + 1);
  }


Thanks a lot for the fix.

Two questions:

- If virtio_net_set_status() is the only function that may access tx_bh, 
it looks like setting tx_waiting to zero is sufficient?

- Can you post a formal patch for this?

Thanks


From: wangyunjian
Sent: Monday, April 24, 2017 6:10 PM
To: qemu-devel@nongnu.org; Michael S. Tsirkin ; 'Jason Wang' 

Cc: wangyunjian ; caihe 
Subject: [Qemu-devel][BUG] QEMU crashes with dpdk virtio pmd

Qemu crashes, with pre-condition:
vm xml config with multiqueue, and the vm's driver virtio-net support 
multi-queue

reproduce steps:
i. start dpdk testpmd in VM with the virtio nic
ii. stop testpmd
iii. reboot the VM

This commit "f9d6dbf0  remove virtio queues if the guest doesn't support 
multiqueue" is introduced.

Qemu version: QEMU emulator version 2.9.50 (v2.9.0-137-g32c7e0a)
VM DPDK version:  DPDK-1.6.1

Call Trace:
#0  0x7f60881fe5d7 in raise () from /usr/lib64/libc.so.6
#1  0x7f60881ffcc8 in abort () from /usr/lib64/libc.so.6
#2  0x7f608823e2f7 in __libc_message () from /usr/lib64/libc.so.6
#3  0x7f60882456d3 in _int_free () from /usr/lib64/libc.so.6
#4  0x7f608900158f in g_free () from /usr/lib64/libglib-2.0.so.0
#5  0x7f6088fea32c in iter_remove_or_steal () from 
/usr/lib64/libglib-2.0.so.0
#6  0x7f608edc0986 in object_property_del_all (obj=0x7f6091e74800) at 
qom/object.c:410
#7  object_finalize (data=0x7f6091e74800) at qom/object.c:467
#8  object_unref (obj=obj@entry=0x7f6091e74800) at qom/object.c:903
#9  0x7f608eaf1fd3 in phys_section_destroy (mr=0x7f6091e74800) at 
git/qemu/exec.c:1154
#10 phys_sections_free (map=0x7f6090b72bb0) at git/qemu/exec.c:1163
#11 address_space_dispatch_free (d=0x7f6090b72b90) at git/qemu/exec.c:2514
#12 0x7f608ee91ace in call_rcu_thread (opaque=) at 
util/rcu.c:272
#13 0x7f6089b0ddc5 in start_thread () from /usr/lib64/libpthread.so.0
#14 0x7f60882bf71d in clone () from /usr/lib64/libc.so.6

Call Trace:
#0  0x7fdccaeb9790 in ?? ()
#1  0x7fdcd82d09fc in object_property_del_all (obj=0x7fdcdb8acf60) at 
qom/object.c:405
#2  object_finalize (data=0x7fdcdb8acf60) at qom/object.c:467
#3  object_unref (obj=obj@entry=0x7fdcdb8acf60) at qom/object.c:903
#4  0x7fdcd8001fd3 in phys_section_destroy (mr=0x7fdcdb8acf60) at 
git/qemu/exec.c:1154
#5  phys_sections_free (map=0x7fdcdc86aa00) at git/qemu/exec.c:1163
#6  address_space_dispatch_free (d=0x7fdcdc86a9e0) at git/qemu/exec.c:2514
#7  0x7fdcd83a1ace in call_rcu_thread (opaque=) at 
util/rcu.c:272
#8  0x7fdcd301ddc5 in start_thread () from /usr/lib64/libpthread.so.0
#9  0x7fdcd17cf71d in clone () from /usr/lib64/libc.so.6

Re: [Qemu-devel] [PATCH 1/2] migration: Move check_migratable() into qdev.c

2017-04-25 Thread Laurent Vivier

On 25/04/2017 12:17, Juan Quintela wrote:
> The function is only used once, and nothing else in migration knows
> about objects.  Create the function vmstate_device_is_migratable() in
> savem.c that really do the bit that is related with migration.
> 
> Signed-off-by: Juan Quintela 

Reviewed-by: Laurent Vivier 

> ---
>  hw/core/qdev.c| 15 ++-
>  include/migration/migration.h |  3 ---
>  include/migration/vmstate.h   |  2 ++
>  migration/migration.c | 15 ---
>  migration/savevm.c| 10 ++
>  stubs/vmstate.c   |  5 ++---
>  6 files changed, 28 insertions(+), 22 deletions(-)
> 
> diff --git a/hw/core/qdev.c b/hw/core/qdev.c
> index 02b632f..17ff638 100644
> --- a/hw/core/qdev.c
> +++ b/hw/core/qdev.c
> @@ -37,7 +37,7 @@
>  #include "hw/boards.h"
>  #include "hw/sysbus.h"
>  #include "qapi-event.h"
> -#include "migration/migration.h"
> +#include "migration/vmstate.h"
>  
>  bool qdev_hotplug = false;
>  static bool qdev_hot_added = false;
> @@ -861,6 +861,19 @@ static bool device_get_realized(Object *obj, Error 
> **errp)
>  return dev->realized;
>  }
>  
> +static int check_migratable(Object *obj, Error **err)
> +{
> +DeviceClass *dc = DEVICE_GET_CLASS(obj);
> +if (!vmstate_device_is_migratable(dc->vmsd)) {
> +error_setg(err, "Device %s is not migratable, but "
> +   "--only-migratable was specified",
> +   object_get_typename(obj));
> +return -1;
> +}
> +
> +return 0;
> +}
> +
>  static void device_set_realized(Object *obj, bool value, Error **errp)
>  {
>  DeviceState *dev = DEVICE(obj);
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index ba1a16c..dfeca38 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -22,7 +22,6 @@
>  #include "qapi-types.h"
>  #include "exec/cpu-common.h"
>  #include "qemu/coroutine_int.h"
> -#include "qom/object.h"
>  
>  #define QEMU_VM_FILE_MAGIC   0x5145564d
>  #define QEMU_VM_FILE_VERSION_COMPAT  0x0002
> @@ -292,8 +291,6 @@ int migrate_add_blocker(Error *reason, Error **errp);
>   */
>  void migrate_del_blocker(Error *reason);
>  
> -int check_migratable(Object *obj, Error **err);
> -
>  bool migrate_release_ram(void);
>  bool migrate_postcopy_ram(void);
>  bool migrate_zero_blocks(void);
> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> index dad3984..9452dec 100644
> --- a/include/migration/vmstate.h
> +++ b/include/migration/vmstate.h
> @@ -1049,4 +1049,6 @@ int64_t self_announce_delay(int round)
>  
>  void dump_vmstate_json_to_file(FILE *out_fp);
>  
> +bool vmstate_device_is_migratable(const VMStateDescription *vmsd);
> +
>  #endif
> diff --git a/migration/migration.c b/migration/migration.c
> index 353f272..5447cab 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1158,21 +1158,6 @@ void migrate_del_blocker(Error *reason)
>  migration_blockers = g_slist_remove(migration_blockers, reason);
>  }
>  
> -int check_migratable(Object *obj, Error **err)
> -{
> -DeviceClass *dc = DEVICE_GET_CLASS(obj);
> -if (only_migratable && dc->vmsd) {
> -if (dc->vmsd->unmigratable) {
> -error_setg(err, "Device %s is not migratable, but "
> -   "--only-migratable was specified",
> -   object_get_typename(obj));
> -return -1;
> -}
> -}
> -
> -return 0;
> -}
> -
>  void qmp_migrate_incoming(const char *uri, Error **errp)
>  {
>  Error *local_err = NULL;
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 03ae1bd..7421a67 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -2480,3 +2480,13 @@ void vmstate_register_ram_global(MemoryRegion *mr)
>  {
>  vmstate_register_ram(mr, NULL);
>  }
> +
> +bool vmstate_device_is_migratable(const VMStateDescription *vmsd)
> +{
> +if (only_migratable && vmsd) {
> +if (vmsd->unmigratable) {
> +return false;
> +}
> +}
> +return true;
> +}
> diff --git a/stubs/vmstate.c b/stubs/vmstate.c
> index 6d52f29..5af824b 100644
> --- a/stubs/vmstate.c
> +++ b/stubs/vmstate.c
> @@ -1,7 +1,6 @@
>  #include "qemu/osdep.h"
>  #include "qemu-common.h"
>  #include "migration/vmstate.h"
> -#include "migration/migration.h"
>  
>  const VMStateDescription vmstate_dummy = {};
>  
> @@ -21,7 +20,7 @@ void vmstate_unregister(DeviceState *dev,
>  {
>  }
>  
> -int check_migratable(Object *obj, Error **err)
> +bool vmstate_device_is_migratable(const VMStateDescription *vmsd)
>  {
> -return 0;
> +return true;
>  }
>

Re: [Qemu-devel] [PATCH] COLO-compare: Add compare_lock aviod comparison conflict

2017-04-25 Thread Zhang Chen




On 04/20/2017 02:40 PM, Jason Wang wrote:



On 2017年04月20日 14:36, Zhang Chen wrote:



On 04/20/2017 02:20 PM, Hailiang Zhang wrote:

On 2017/4/20 12:32, Zhang Chen wrote:

When network traffic heavy, compare_pri_rs_finalize() and
compare_sec_rs_finalize() have a chance to confilct.
Both of them call colo_compare_connection() to compare packet,
But during compare_pri_rs_finalize() comparison, have secondary
packet come and call compare_sec_rs_finalize(), that packet will be
handle twice. If packet same, the pkt will be double free.


Interesting, if I'm right, this should not happen, because, all the 
comparing works
are done in colo compare thread, so there is no chance to access the 
connect_list
concurrently.  Besides, even both of the packets from primary and 
secondary arrive
at the same time, it should only be handle once, we will handle it 
with the later arrived one,

No ?


No, In my test often trigger this bug, you can use udp server and 
client test it.


13517@1492648526.850246:colo_compare_main : packet same and release 
packet
13517@1492648526.850304:colo_compare_main : packet same and release 
packet
*** glibc detected *** 
/home/zhangchen/qemu-colo-apr14/x86_64-softmmu/qemu-system-x86_64: 
double free or corruption (out): 0x56a75210 ***

=== Backtrace: =
/lib64/libc.so.6(+0x76628)[0x753d6628]
/lib64/libc.so.6(cfree+0x6c)[0x753db5cc]


Thanks
Zhang Chen


I agree that you should check whether or not they are running in the 
same thread.




I found they are not running in the same thread, and I have reviewed 
relative code but don't find out
why we do same job to pri_chr_in and sec_chr_in then they running in 
different thread.

Anyone can tell me the reason?

Log:

Breakpoint 5, compare_pri_chr_in (opaque=0x77fd1010, 
buf=0x73eb0950 "", size=8)

at net/colo-compare.c:591
591{
(gdb) info thread
  Id   Target Id Frame
  18   Thread 0x7fff70bff700 (LWP 27864) "qemu-system-x86" 
0x756e4ae7 in sem_timedwait ()

   from /lib64/libpthread.so.0
  9Thread 0x7fff6f1ff700 (LWP 27748) "qemu-system-x86" 
0x756e4a00 in sem_wait ()

   from /lib64/libpthread.so.0
  7Thread 0x7fff701ff700 (LWP 27746) "qemu-system-x86" 
0x756e261c in pthread_cond_wait@@GLIBC_2.3.2 () from 
/lib64/libpthread.so.0
  5Thread 0x72dbf700 (LWP 27743) "qemu-system-x86" 
0x754320a7 in ioctl ()

   from /lib64/libc.so.6
  4Thread 0x735c0700 (LWP 27742) "qemu-system-x86" 
0x756e5dbd in sendmsg ()

   from /lib64/libpthread.so.0
* 3Thread 0x73eb2700 (LWP 27741) "qemu-system-x86" 
compare_pri_chr_in (opaque=0x77fd1010,

buf=0x73eb0950 "", size=8) at net/colo-compare.c:591
  2Thread 0x746b3700 (LWP 27729) "qemu-system-x86" 
0x75436789 in syscall ()

   from /lib64/libc.so.6
  1Thread 0x77fb7a80 (LWP 27725) "qemu-system-x86" 
0x756e5294 in __lll_lock_wait ()

   from /lib64/libpthread.so.0
(gdb)  bt
#0  compare_pri_chr_in (opaque=0x77fd1010, buf=0x73eb0950 "", 
size=8) at net/colo-compare.c:591
#1  0x55c60fba in qemu_chr_be_write_impl (s=0x5684a630, 
buf=0x73eb0950 "", len=8)

at chardev/char.c:284
#2  0x55c6102f in qemu_chr_be_write (s=0x5684a630, 
buf=0x73eb0950 "", len=8)

at chardev/char.c:296
#3  0x55c6a056 in tcp_chr_read (chan=0x5684aa30, 
cond=G_IO_IN, opaque=0x5684a630)

at chardev/char-socket.c:414
#4  0x55c83dbc in qio_channel_fd_source_dispatch 
(source=0x568d8b80, callback=
0x55c69ebf , user_data=0x5684a630) at 
io/channel-watch.c:84
#5  0x760c460a in g_main_context_dispatch () from 
/usr/lib64/libglib-2.0.so.0

#6  0x760c7e88 in ?? () from /usr/lib64/libglib-2.0.so.0
#7  0x760c835d in g_main_loop_run () from 
/usr/lib64/libglib-2.0.so.0
#8  0x55b82e22 in colo_compare_thread (opaque=0x77fd1010) at 
net/colo-compare.c:703

#9  0x756de7b6 in start_thread () from /lib64/libpthread.so.0
#10 0x75439d6d in clone () from /lib64/libc.so.6
#11 0x in ?? ()
(gdb) c
Continuing.
[Switching to Thread 0x77fb7a80 (LWP 27725)]

Breakpoint 6, compare_sec_chr_in (opaque=0x77fd1010, 
buf=0x7fffc590 "", size=1088)

at net/colo-compare.c:608
608{
(gdb) info thread
  Id   Target Id Frame
  18   Thread 0x7fff70bff700 (LWP 27864) "qemu-system-x86" (Exiting) 
0x756de9b3 in start_thread

() from /lib64/libpthread.so.0
  9Thread 0x7fff6f1ff700 (LWP 27748) "qemu-system-x86" 
0x756e4a00 in sem_wait ()

   from /lib64/libpthread.so.0
  7Thread 0x7fff701ff700 (LWP 27746) "qemu-system-x86" 
0x756e261c in pthread_cond_wait@@GLIBC_2.3.2 () from 
/lib64/libpthread.so.0
  5Thread 0x72dbf700 (LWP 27743) "qemu-system-x86" 
0x754320a7 in ioctl ()

   from /lib64/libc.so.6
  4Thread 0x735c0700 (LWP 27742) "qemu-system-x86" 
0x754320a7 in ioctl

Re: [Qemu-devel] [virtio-comment] Re: [RFC 1/2] spec/vhost-user: Introduce secondary channel for slave initiated requests

2017-04-25 Thread Maxime Coquelin

Hi Wei,

On 04/24/2017 10:05 AM, Wei Wang wrote:

On 04/14/2017 05:03 PM, Marc-André Lureau wrote:

Hi

On Tue, Apr 11, 2017 at 5:53 PM Maxime Coquelin 
> wrote:

Hi Marc-André,

On 04/11/2017 03:06 PM, Marc-André Lureau wrote:
> Hi
>
> On Tue, Apr 11, 2017 at 12:10 PM Maxime Coquelin
> 
>> wrote:
>
> This vhost-user specification update aims at enabling the
> slave to send requests to the master using a dedicated socket
> created by the master.
>
> It can be used for example when the slave implements a device
> IOTLB to send cache miss requests to the master.
>
> The message types list is updated with an "Initiator" field to
> indicate for each type whether the master and/or slave can
> initiate the request.
>
> Signed-off-by: Maxime Coquelin 
> >>
>
>
> This is very similar to a patch I proposed for shutdown slave
initiated
> requests:
> https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg00095.html

Indeed, thanks for pointing this out, I wasn't aware of your series.

I find your proposal of having dedicated messages types
(VHOST_USER_SLAVE_*) cleaner.

ok

Are you ok if I handover your patch, and replace
VHOST_USER_SET_SLAVE_FD to VHOST_USER_SET_SLAVE_REQ_FD?

They are very similar, I suggest you update your patch with the best 
of both.

I suppose you came to the same conclusion with me that trying to make 
the communication both ways on the same fd would be quite difficult, 
although it's a bit strange that the qemu implementation forces the 
design of the protocol in some direction.

--

When would you get the implementation patch ready? Thanks.

I sent second version of the RFC on April 14th, which comprises the
implementation:
https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg02467.html

Cheers,
Maxime

Re: [Qemu-devel] [PATCH] ram: Split dirty bitmap by RAMBlock

2017-04-25 Thread Hailiang Zhang


On 2017/4/25 18:11, Juan Quintela wrote:

Both the ram bitmap and the unsent bitmap are split by RAMBlock.

Signed-off-by: Juan Quintela 
---
  include/exec/ram_addr.h  |  13 +-
  include/migration/postcopy-ram.h |   3 -
  migration/postcopy-ram.c |   5 +-
  migration/ram.c  | 257 +++
  4 files changed, 109 insertions(+), 169 deletions(-)

diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index 6436a41..c56b35b 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -39,6 +39,14 @@ struct RAMBlock {
  QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
  int fd;
  size_t page_size;
+/* dirty bitmap used during migration */
+unsigned long *bmap;
+/* bitmap of pages that haven't been sent even once
+ * only maintained and used in postcopy at the moment
+ * where it's used to send the dirtymap at the start
+ * of the postcopy phase
+ */
+unsigned long *unsentmap;
  };
  
  static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)

@@ -360,16 +368,15 @@ static inline void 
cpu_physical_memory_clear_dirty_range(ram_addr_t start,
  
  
  static inline

-uint64_t cpu_physical_memory_sync_dirty_bitmap(unsigned long *dest,
-   RAMBlock *rb,
+uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb,
 ram_addr_t start,
 ram_addr_t length,
 uint64_t *real_dirty_pages)
  {
  ram_addr_t addr;
-start = rb->offset + start;
  unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
  uint64_t num_dirty = 0;
+unsigned long *dest = rb->bmap;
  
  /* start address is aligned at the start of a word? */

  if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) {
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index 8e036b9..4c25f03 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -43,12 +43,9 @@ int postcopy_ram_prepare_discard(MigrationIncomingState 
*mis);
  
  /*

   * Called at the start of each RAMBlock by the bitmap code.
- * 'offset' is the bitmap offset of the named RAMBlock in the migration
- * bitmap.
   * Returns a new PDS
   */
  PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
- unsigned long offset,
   const char *name);
  
  /*

diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 85fd8d7..e3f4a37 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -33,7 +33,6 @@
  
  struct PostcopyDiscardState {

  const char *ramblock_name;
-uint64_t offset; /* Bitmap entry for the 1st bit of this RAMBlock */
  uint16_t cur_entry;
  /*
   * Start and length of a discard range (bytes)
@@ -717,14 +716,12 @@ void *postcopy_get_tmp_page(MigrationIncomingState *mis)
   * returns: a new PDS.
   */
  PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
- unsigned long offset,
   const char *name)
  {
  PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState));
  
  if (res) {

  res->ramblock_name = name;
-res->offset = offset;
  }
  
  return res;

@@ -745,7 +742,7 @@ void postcopy_discard_send_range(MigrationState *ms, 
PostcopyDiscardState *pds,
  {
  size_t tp_size = qemu_target_page_size();
  /* Convert to byte offsets within the RAM block */
-pds->start_list[pds->cur_entry] = (start - pds->offset) * tp_size;
+pds->start_list[pds->cur_entry] = start  * tp_size;
  pds->length_list[pds->cur_entry] = length * tp_size;
  trace_postcopy_discard_send_range(pds->ramblock_name, start, length);
  pds->cur_entry++;
diff --git a/migration/ram.c b/migration/ram.c
index f48664e..d99f6e2 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -138,19 +138,6 @@ out:
  return ret;
  }
  
-struct RAMBitmap {

-struct rcu_head rcu;
-/* Main migration bitmap */
-unsigned long *bmap;
-/* bitmap of pages that haven't been sent even once
- * only maintained and used in postcopy at the moment
- * where it's used to send the dirtymap at the start
- * of the postcopy phase
- */
-unsigned long *unsentmap;
-};
-typedef struct RAMBitmap RAMBitmap;
-
  /*
   * An outstanding page request, on the source, having been received
   * and queued
@@ -220,8 +207,6 @@ struct RAMState {
  uint64_t postcopy_requests;
  /* protects modification of the bitmap */
  QemuMutex bitmap_mutex;
-/* Ram Bitmap protected by RCU */
-RAMBitmap *ram_bitmap;
  /* The RAMBlock used in the last src_page_requests */

Re: [Qemu-devel] [PATCH 3/6] migration: add UFFD_FEATURE_THREAD_ID feature support

2017-04-25 Thread Alexey Perevalov


On 04/25/2017 02:14 PM, Dr. David Alan Gilbert wrote:

* Alexey (a.pereva...@samsung.com) wrote:

+ Andrea Arcangeli

On Mon, Apr 24, 2017 at 06:10:02PM +0100, Dr. David Alan Gilbert wrote:

* Alexey (a.pereva...@samsung.com) wrote:

On Mon, Apr 24, 2017 at 04:12:29PM +0800, Peter Xu wrote:

On Fri, Apr 21, 2017 at 06:22:12PM +0300, Alexey wrote:

On Fri, Apr 21, 2017 at 11:24:54AM +0100, Dr. David Alan Gilbert wrote:

* Alexey Perevalov (a.pereva...@samsung.com) wrote:

Userfaultfd mechanism is able to provide process thread id,
in case when client request it with UFDD_API ioctl.

Signed-off-by: Alexey Perevalov 

There seem to be two parts to this:
   a) Adding the mis parameter to ufd_version_check
   b) Asking for the feature

Please split it into two patches.

Also


---
  include/migration/postcopy-ram.h |  2 +-
  migration/migration.c|  2 +-
  migration/postcopy-ram.c | 12 ++--
  migration/savevm.c   |  2 +-
  4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index 8e036b9..809f6db 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -14,7 +14,7 @@
  #define QEMU_POSTCOPY_RAM_H
  
  /* Return true if the host supports everything we need to do postcopy-ram */

-bool postcopy_ram_supported_by_host(void);
+bool postcopy_ram_supported_by_host(MigrationIncomingState *mis);
  
  /*

   * Make all of RAM sensitive to accesses to areas that haven't yet been 
written
diff --git a/migration/migration.c b/migration/migration.c
index ad4036f..79f6425 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -802,7 +802,7 @@ void 
qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
   * special support.
   */
  if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
-!postcopy_ram_supported_by_host()) {
+!postcopy_ram_supported_by_host(NULL)) {
  /* postcopy_ram_supported_by_host will have emitted a more
   * detailed message
   */
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index dc80dbb..70f0480 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -60,13 +60,13 @@ struct PostcopyDiscardState {
  #include 
  #include 
  
-static bool ufd_version_check(int ufd)

+static bool ufd_version_check(int ufd, MigrationIncomingState *mis)
  {
  struct uffdio_api api_struct;
  uint64_t ioctl_mask;
  
  api_struct.api = UFFD_API;

-api_struct.features = 0;
+api_struct.features = UFFD_FEATURE_THREAD_ID;
  if (ioctl(ufd, UFFDIO_API, _struct)) {
  error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s",
   strerror(errno));

You're not actually using the 'mis' here - what I'd expected was
something that was going to check if the UFFDIO_API return said that it really
had the feature, and if so store a flag in the MIS somewhere.

Also, I'm not sure it's right to set 'api_struct.features' on the input - what
happens if this is run on an old kernel - we don't want postcopy to fail on
an old kernel without your feature.
I'm not 100% sure of the interface, but I think the way it works is you set
features = 0 before the call, and then check the api_struct.features in the
return - in the same way that I check for UFFD_FEATURE_MISSING_HUGETLBFS.


We need to ask kernel about that feature,
right,
kernel returns back available features
uffdio_api.features = UFFD_API_FEATURES
but it also stores requested features

I feel like this does not against Dave's comment, maybe we just need
to send the UFFDIO_API twice? Like:

yes, ioctl with UFFDIO_API will fail on old kernel if we will request
e.g. UFFD_FEATURE_THREAD_ID or other new feature.

So in general way need a per feature request, for better error handling.

No, we don't need to - I think the way the kernel works is that you pass
features = 0 in, and it sets api_struct.features on the way out;
so if you always pass 0 in, you can then just check the features that
it returns.


Without explicitly set UFFD_FEATURE_THREAD_ID, ptid will not sent back
to user space.

Also it's impossible to call ioctl UFFD_API more than one time, due to
internal state of userfault_ctx inside kernel is changing
UFFD_STATE_WAIT_API -> UFFD_STATE_RUNNING,
but ioctl UFFD_API expects UFFD_STATE_WAIT_API
^^^

So looks like no way to provide backward compatibility for old kernels.
I even don't know how to be with new kernels, because point of extension
should be for new kernels (e.g. I want to add new feature in future,
UFFD_FEATURE_ALLOW_PADDING which will allow UFFD_COPY for lesser page
size than was registered).
So how to be in this case, add new UFFD feature, like
UFFD_FEATURE_ALLOW_CALL_API_AGAIN (allow set not always/persistent feature,
like UFFD_FEATURE_THREAD_ID)

or just remove condition in kernel while

[Qemu-devel] [PULL 2/2] s390x/misc_helper.c: wrap s390_virtio_hypercall in BQL

2017-04-25 Thread Alexander Graf

From: Aurelien Jarno 

s390_virtio_hypercall can trigger IO events and interrupts, most notably
when using virtio-ccw devices.

Reviewed-by: Alexander Graf 
Signed-off-by: Aurelien Jarno 
Reviewed-by: Philippe Mathieu-Daudé 
Fixes: 278f5e98c647 ("s390x/misc_helper.c: wrap IO instructions in BQL")
Signed-off-by: Alexander Graf 
---
 target/s390x/misc_helper.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index 83d3894..eca8244 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -288,7 +288,9 @@ void HELPER(diag)(CPUS390XState *env, uint32_t r1, uint32_t 
r3, uint32_t num)
 switch (num) {
 case 0x500:
 /* KVM hypercall */
+qemu_mutex_lock_iothread();
 r = s390_virtio_hypercall(env);
+qemu_mutex_unlock_iothread();
 break;
 case 0x44:
 /* yield */
-- 
1.8.5.6

[Qemu-devel] [PULL 1/2] target-s390x: Mask the SIGP order_code to 8bit.

2017-04-25 Thread Alexander Graf

From: Philipp Kern 

According to "CPU Signaling and Response", "Signal-Processor Orders",
the order field is bit position 56-63. Without this, the Linux
guest kernel is sometimes unable to stop emulation and enters
an infinite loop of "XXX unknown sigp: 0x0005".

Signed-off-by: Philipp Kern 
Reviewed-by: Thomas Huth 
[agraf: add comment according to email]
Signed-off-by: Alexander Graf 
---
 target/s390x/misc_helper.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index 93b0e61..83d3894 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -515,7 +515,8 @@ uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t 
order_code, uint32_t r1,
 /* Remember: Use "R1 or R1 + 1, whichever is the odd-numbered register"
as parameter (input). Status (output) is always R1. */
 
-switch (order_code) {
+/* sigp contains the order code in bit positions 56-63, mask it here. */
+switch (order_code & 0xff) {
 case SIGP_SET_ARCH:
 /* switch arch */
 break;
-- 
1.8.5.6

Re: [Qemu-devel] [PATCH v8] Allow setting NUMA distance for different NUMA nodes

2017-04-25 Thread Igor Mammedov

On Tue, 25 Apr 2017 09:52:33 +0800
He Chen  wrote:

> This patch is going to add SLIT table support in QEMU, and provides
> additional option `dist` for command `-numa` to allow user set vNUMA
> distance by QEMU command.
> 
> With this patch, when a user wants to create a guest that contains
> several vNUMA nodes and also wants to set distance among those nodes,
> the QEMU command would like:
> 
> ```
> -numa node,nodeid=0,cpus=0 \
> -numa node,nodeid=1,cpus=1 \
> -numa node,nodeid=2,cpus=2 \
> -numa node,nodeid=3,cpus=3 \
> -numa dist,src=0,dst=1,val=21 \
> -numa dist,src=0,dst=2,val=31 \
> -numa dist,src=0,dst=3,val=41 \
> -numa dist,src=1,dst=2,val=21 \
> -numa dist,src=1,dst=3,val=31 \
> -numa dist,src=2,dst=3,val=21 \
> ```
> 
> Signed-off-by: He Chen 

Reviewed-by: Igor Mammedov 


> 
> ---
> Changes since v7:
> * Remove unnecessary node present check.
> * Minor improvement on prompt message.
> 
> Changes since v6:
> * Split validate_numa_distance into 2 separate functions.
> * Add comments before validate and complete numa distance functions.
> 
> Changes since v5:
> * Made the generation of the SLIT dependent on `have_numa_distance`.
> * Doc refinement.
> ---
>  hw/acpi/aml-build.c |  26 ++
>  hw/i386/acpi-build.c|   4 ++
>  include/hw/acpi/aml-build.h |   1 +
>  include/sysemu/numa.h   |   2 +
>  include/sysemu/sysemu.h |   4 ++
>  numa.c  | 124 
> 
>  qapi-schema.json|  30 ++-
>  qemu-options.hx |  16 +-
>  8 files changed, 204 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> index c6f2032..be496c8 100644
> --- a/hw/acpi/aml-build.c
> +++ b/hw/acpi/aml-build.c
> @@ -24,6 +24,7 @@
>  #include "hw/acpi/aml-build.h"
>  #include "qemu/bswap.h"
>  #include "qemu/bitops.h"
> +#include "sysemu/numa.h"
>  
>  static GArray *build_alloc_array(void)
>  {
> @@ -1609,3 +1610,28 @@ void build_srat_memory(AcpiSratMemoryAffinity 
> *numamem, uint64_t base,
>  numamem->base_addr = cpu_to_le64(base);
>  numamem->range_length = cpu_to_le64(len);
>  }
> +
> +/*
> + * ACPI spec 5.2.17 System Locality Distance Information Table
> + * (Revision 2.0 or later)
> + */
> +void build_slit(GArray *table_data, BIOSLinker *linker)
> +{
> +int slit_start, i, j;
> +slit_start = table_data->len;
> +
> +acpi_data_push(table_data, sizeof(AcpiTableHeader));
> +
> +build_append_int_noprefix(table_data, nb_numa_nodes, 8);
> +for (i = 0; i < nb_numa_nodes; i++) {
> +for (j = 0; j < nb_numa_nodes; j++) {
> +assert(numa_info[i].distance[j]);
> +build_append_int_noprefix(table_data, numa_info[i].distance[j], 
> 1);
> +}
> +}
> +
> +build_header(linker, table_data,
> + (void *)(table_data->data + slit_start),
> + "SLIT",
> + table_data->len - slit_start, 1, NULL, NULL);
> +}
> diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
> index 2073108..2458ebc 100644
> --- a/hw/i386/acpi-build.c
> +++ b/hw/i386/acpi-build.c
> @@ -2678,6 +2678,10 @@ void acpi_build(AcpiBuildTables *tables, MachineState 
> *machine)
>  if (pcms->numa_nodes) {
>  acpi_add_table(table_offsets, tables_blob);
>  build_srat(tables_blob, tables->linker, machine);
> +if (have_numa_distance) {
> +acpi_add_table(table_offsets, tables_blob);
> +build_slit(tables_blob, tables->linker);
> +}
>  }
>  if (acpi_get_mcfg()) {
>  acpi_add_table(table_offsets, tables_blob);
> diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
> index 00c21f1..329a0d0 100644
> --- a/include/hw/acpi/aml-build.h
> +++ b/include/hw/acpi/aml-build.h
> @@ -389,4 +389,5 @@ GCC_FMT_ATTR(2, 3);
>  void build_srat_memory(AcpiSratMemoryAffinity *numamem, uint64_t base,
> uint64_t len, int node, MemoryAffinityFlags flags);
>  
> +void build_slit(GArray *table_data, BIOSLinker *linker);
>  #endif
> diff --git a/include/sysemu/numa.h b/include/sysemu/numa.h
> index 8f09dcf..0ea1bc0 100644
> --- a/include/sysemu/numa.h
> +++ b/include/sysemu/numa.h
> @@ -8,6 +8,7 @@
>  #include "hw/boards.h"
>  
>  extern int nb_numa_nodes;   /* Number of NUMA nodes */
> +extern bool have_numa_distance;
>  
>  struct numa_addr_range {
>  ram_addr_t mem_start;
> @@ -21,6 +22,7 @@ typedef struct node_info {
>  struct HostMemoryBackend *node_memdev;
>  bool present;
>  QLIST_HEAD(, numa_addr_range) addr; /* List to store address ranges */
> +uint8_t distance[MAX_NODES];
>  } NodeInfo;
>  
>  extern NodeInfo numa_info[MAX_NODES];
> diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
> index 576c7ce..6999545 100644
> --- a/include/sysemu/sysemu.h
> +++ b/include/sysemu/sysemu.h
> @@ -169,6 +169,10 @@ extern int

[Qemu-devel] [PULL 0/2] s390 patch queue 2017-04-25

2017-04-25 Thread Alexander Graf

Hi Peter,

This is my current patch queue for s390.  Please pull.

Alex


The following changes since commit f4b5b021c847669b1c78050aea26fe9abceef6dd:

  Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into 
staging (2017-04-25 09:21:54 +0100)

are available in the git repository at:


  git://github.com/agraf/qemu.git tags/signed-s390-for-upstream

for you to fetch changes up to 2cf9953beebd194a432ebd567399807d9b1f6a4d:

  s390x/misc_helper.c: wrap s390_virtio_hypercall in BQL (2017-04-25 13:39:43 
+0200)


Patch queue for s390 - 2017-04-25

Two simple fixes this time around:

  - fix BQL for s390 virtio target
  - Fix SIGP emulation


Aurelien Jarno (1):
  s390x/misc_helper.c: wrap s390_virtio_hypercall in BQL

Philipp Kern (1):
  target-s390x: Mask the SIGP order_code to 8bit.

 target/s390x/misc_helper.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

Re: [Qemu-devel] [PATCH RESEND v2 01/18] net/colo: Add notifier/callback related helpers for filter

2017-04-25 Thread Jason Wang




On 2017年04月22日 16:35, zhanghailiang wrote:

We will use this notifier to help COLO to notify filter object
to do something, like do checkpoint, or process failover event.

Cc: Jason Wang
Signed-off-by: zhanghailiang
Signed-off-by: Zhang Chen
Signed-off-by: Li Zhijian
---
  net/colo.c | 105 +
  net/colo.h |  19 +++
  2 files changed, 124 insertions(+)


Went through this series and I feel that the code duplicates (at least 
part) functionality of IOThread (iothread.c) . So I come to an idea that 
switch to use IOThread for COLO comparing thread then you can use aio bh 
to do the inter process communication.


Thoughts?

Thanks

Re: [Qemu-devel] [BUG] QEMU crashes with dpdk virtio pmd

2017-04-25 Thread wangyunjian

The q->tx_bh will free in virtio_net_del_queue() function, when remove virtio 
queues 
if the guest doesn't support multiqueue. But it might be still referenced by 
others (eg . virtio_net_set_status()),
which need so set NULL.

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7d091c9..98bd683 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1522,9 +1522,12 @@ static void virtio_net_del_queue(VirtIONet *n, int index)
 if (q->tx_timer) {
 timer_del(q->tx_timer);
 timer_free(q->tx_timer);
+q->tx_timer = NULL;
 } else {
 qemu_bh_delete(q->tx_bh);
+q->tx_bh = NULL;
 }
+q->tx_waiting = 0;
 virtio_del_queue(vdev, index * 2 + 1);
 }

From: wangyunjian 
Sent: Monday, April 24, 2017 6:10 PM
To: qemu-devel@nongnu.org; Michael S. Tsirkin ; 'Jason Wang' 

Cc: wangyunjian ; caihe 
Subject: [Qemu-devel][BUG] QEMU crashes with dpdk virtio pmd 

Qemu crashes, with pre-condition:
vm xml config with multiqueue, and the vm's driver virtio-net support 
multi-queue

reproduce steps:
i. start dpdk testpmd in VM with the virtio nic
ii. stop testpmd
iii. reboot the VM

This commit "f9d6dbf0  remove virtio queues if the guest doesn't support 
multiqueue" is introduced.

Qemu version: QEMU emulator version 2.9.50 (v2.9.0-137-g32c7e0a)
VM DPDK version:  DPDK-1.6.1

Call Trace:
#0  0x7f60881fe5d7 in raise () from /usr/lib64/libc.so.6
#1  0x7f60881ffcc8 in abort () from /usr/lib64/libc.so.6
#2  0x7f608823e2f7 in __libc_message () from /usr/lib64/libc.so.6
#3  0x7f60882456d3 in _int_free () from /usr/lib64/libc.so.6
#4  0x7f608900158f in g_free () from /usr/lib64/libglib-2.0.so.0
#5  0x7f6088fea32c in iter_remove_or_steal () from 
/usr/lib64/libglib-2.0.so.0
#6  0x7f608edc0986 in object_property_del_all (obj=0x7f6091e74800) at 
qom/object.c:410
#7  object_finalize (data=0x7f6091e74800) at qom/object.c:467
#8  object_unref (obj=obj@entry=0x7f6091e74800) at qom/object.c:903
#9  0x7f608eaf1fd3 in phys_section_destroy (mr=0x7f6091e74800) at 
git/qemu/exec.c:1154
#10 phys_sections_free (map=0x7f6090b72bb0) at git/qemu/exec.c:1163
#11 address_space_dispatch_free (d=0x7f6090b72b90) at git/qemu/exec.c:2514
#12 0x7f608ee91ace in call_rcu_thread (opaque=) at 
util/rcu.c:272
#13 0x7f6089b0ddc5 in start_thread () from /usr/lib64/libpthread.so.0
#14 0x7f60882bf71d in clone () from /usr/lib64/libc.so.6

Call Trace:
#0  0x7fdccaeb9790 in ?? ()
#1  0x7fdcd82d09fc in object_property_del_all (obj=0x7fdcdb8acf60) at 
qom/object.c:405
#2  object_finalize (data=0x7fdcdb8acf60) at qom/object.c:467
#3  object_unref (obj=obj@entry=0x7fdcdb8acf60) at qom/object.c:903
#4  0x7fdcd8001fd3 in phys_section_destroy (mr=0x7fdcdb8acf60) at 
git/qemu/exec.c:1154
#5  phys_sections_free (map=0x7fdcdc86aa00) at git/qemu/exec.c:1163
#6  address_space_dispatch_free (d=0x7fdcdc86a9e0) at git/qemu/exec.c:2514
#7  0x7fdcd83a1ace in call_rcu_thread (opaque=) at 
util/rcu.c:272
#8  0x7fdcd301ddc5 in start_thread () from /usr/lib64/libpthread.so.0
#9  0x7fdcd17cf71d in clone () from /usr/lib64/libc.so.6

Re: [Qemu-devel] [PATCH v2] hw/arm/exynos: Add generic SDHCI devices

2017-04-25 Thread Peter Maydell

On 22 April 2017 at 20:07, Krzysztof Kozlowski  wrote:
> Exynos4210 has four SD/MMC controllers supporting:
>  - SD Standard Host Specification Version 2.0,
>  - MMC Specification Version 4.3,
>  - SDIO Card Specification Version 2.0,
>  - DMA and ADMA.
>
> Add emulation of SDHCI devices which allows accessing storage through SD
> cards. Differences from real hardware:
>  - Devices are shipped with eMMC memory, not SD card.
>  - The Exynos4210 SDHCI has few more registers, e.g. for
>controlling the clocks, additional status (0x80, 0x84, 0x8c). These
>are not implemented.
>
> Testing on smdkc210 machine with "-drive file=FILE,if=sd,bus=0,index=2".
>
> Signed-off-by: Krzysztof Kozlowski 

Applied to master, thanks (since this is the only thing in my
ARM to-review queue...)

-- PMM

Re: [Qemu-devel] [PATCH 1/3] colo-compare: serialize compare thread's initialization with main thread

2017-04-25 Thread Jason Wang




On 2017年04月25日 17:59, Hailiang Zhang wrote:

On 2017/4/25 16:41, Jason Wang wrote:


On 2017年04月24日 14:03, Hailiang Zhang wrote:

On 2017/4/24 12:10, Jason Wang wrote:

On 2017年04月20日 15:46, zhanghailiang wrote:

We call qemu_chr_fe_set_handlers() in colo-compare thread, it is used
to detach watched fd from default main context, so it has chance to
handle the same watched fd with main thread concurrently, which will
trigger an error report:
"qemu-char.c:918: io_watch_poll_finalize: Assertion `iwp->src ==
((void *)0)' failed."

Anyway to prevent fd from being handled by main thread before creating
colo thread? Using semaphore seems not elegant.

So how about calling qemu_mutex_lock_iothread() before
qemu_chr_fe_set_handlers() ?
Looks better, but I needs more information e.g how main thread can 
touch it?


Hmm, this happened quite occasionally, and we didn't catch the first 
place (backtrace)
of removing fd from been watched, but  from the codes logic, we found 
there should

be such possible cases:
tcp_chr_write (Or tcp_chr_read/tcp_chr_sync_read/chr_disconnect)
 ->tcp_chr_disconnect (Or char_socket_finalize)
->tcp_chr_free_connection
  -> remove_fd_in_watch(chr);

Anyway, it needs the protection from been freed twice.

Thanks,
Hailiang


Still a little bit confused. The question is how could main thread still 
call tcp_chr_write or other in the above case?


Thanks


Thanks

.

Re: [Qemu-devel] [PATCH v2 07/13] tcg/i386: implement goto_ptr op

2017-04-25 Thread Richard Henderson


On 04/25/2017 09:53 AM, Emilio G. Cota wrote:

  static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
  {
+static const TCGTargetOpDef ri = { .args_ct_str = { "ri" } };
  static const TCGTargetOpDef ri_r = { .args_ct_str = { "ri", "r" } };
  static const TCGTargetOpDef re_r = { .args_ct_str = { "re", "r" } };
  static const TCGTargetOpDef qi_r = { .args_ct_str = { "qi", "r" } };
@@ -2324,6 +2333,9 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode 
op)
  case INDEX_op_st_i64:
  return _r;
  
+case INDEX_op_goto_ptr:

+return 
+


This is incorrect.  You only handle register inputs (i.e. just "r") in your 
implementation.  Indeed, that's also the only thing that makes sense.



r~

Re: [Qemu-devel] question about block size and virtual disks

2017-04-25 Thread Kevin Wolf

Am 21.04.2017 um 18:26 hat Chris Friesen geschrieben:
> On 04/20/2017 03:21 PM, Eric Blake wrote:
> >On 04/20/2017 04:03 PM, Chris Friesen wrote:
> 
> >>Also, does the 4KB block size get "passed-through" to the guest somehow
> >>so that the guest knows it needs to use 4KB blocks, or does that need to
> >>be explicitly specified via virtio-blk-pci.logical_block_size and/or
> >>virtio-blk-pci.physical_block_size parameters?  (Assuming I'm using
> >>virtio-blk-pci.)
> >
> >Again, qemu should be passing the advertisement of host properties down
> >to the guest insofar as possible (so a good guest will see that the
> >hardware is 4k only and will not try to make 512-byte requests), but at
> >the same time, qemu should handle guests that are so old that they are
> >blissfully unaware of the hardware advertisements and send 512-byte
> >requests anyway.  Of course, such guests are penalized with
> >read-modify-write delays when submitting 512-byte IO.  But explicitly
> >stating available parameters is always the wisest course of action, if
> >you don't want to rely on defaults changing underneath you.
> 
> I did an experiment with qemu-kvm-ev-2.6.0-28.el7_3.6.1, using
> -drive cache=none,aio=native and an LVM volume as the storage.
> 
> The guest saw the logical/physical block size as 512B, even though
> on the host both were 4KB.
> 
> Unless something is being lost in the LVM layer (which is possible)
> it appears that qemu defaults to 512B block size unless explicitly
> told otherwise.

Yes, this is the default as it is the most compatible setting (qemu
didn't support 4k sectors initially).

If you know that your guest can handle 4k sectors just fine, it is
recommended to explicitly set both options to 4k to avoid expensive
512 byte sector emulation. If you need to boot from the disk with BIOS,
you won't be able to change the logical block size, but you can still
try setting the physical block size to 4k.

In practice, it shouldn't make too much of a difference because the
OS will work only in complete file system blocks and/or pages, which are
already 4k, so even on a disk with 512 byte sectors, you should mostly
see 4k aligned requests that don't need emulation in qemu.

Kevin

Re: [Qemu-devel] [PATCH] target-s390x: Mask the SIGP order_code to 8bit.

2017-04-25 Thread Alexander Graf


On 04/25/2017 01:21 PM, Philipp Kern wrote:

On 2017-04-25 11:51, Richard Henderson wrote:

On 04/24/2017 10:25 AM, Alexander Graf wrote:

On 24.04.17 00:32, Aurelien Jarno wrote:

From: Philipp Kern 

According to "CPU Signaling and Response", "Signal-Processor Orders",
the order field is bit position 56-63. Without this, the Linux
guest kernel is sometimes unable to stop emulation and enters
an infinite loop of "XXX unknown sigp: 0x0005".

Signed-off-by: Philipp Kern 
Signed-off-by: Aurelien Jarno 
---
 target/s390x/misc_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

This patch has been sent by Philipp Kern a lot of time ago, and it 
seems

has been lost. I am resending it, as it is still useful.

diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index 3bf09ea222..4946b56ab3 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -534,7 +534,7 @@ uint32_t HELPER(sigp)(CPUS390XState *env, 
uint64_t order_code, uint32_t r1,
 /* Remember: Use "R1 or R1 + 1, whichever is the odd-numbered 
register"

as parameter (input). Status (output) is always R1. */

-switch (order_code) {
+switch (order_code & 0xff) {


This definitely needs a comment above the mask. Ideally I'd love to 
just change the function prototype to pass order_code as uint8_t, 
but I don't think that's possible with the TCG glue.


Correct.  We'll need to leave the mask here.


I shall point out that Alexander merged it into the s390-next tree 
when I first sent it but that was never merged into qemu proper. I 
don't think there's a problem in adding a comment that says what the 
commit description says right there, like this:


/* sigp contains the order code in bit positions 56-63, mask it here. */


Ouch, you're right. Let me fix that up and send out a pull request.


Alex

[Qemu-devel] [PULL V2 8/8] COLO-compare: Optimize tcp compare trace event

2017-04-25 Thread Jason Wang

From: Zhang Chen 

Optimize two trace events as one, adjust print format make
it easy to read. rename trace_colo_compare_pkt_info_src/dst
to trace_colo_compare_tcp_info.

Signed-off-by: Zhang Chen 
Signed-off-by: Jason Wang 
---
 net/colo-compare.c | 29 +
 net/trace-events   |  3 +--
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index 4ab80b1..03ddebe 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -264,18 +264,23 @@ static int colo_packet_compare_tcp(Packet *spkt, Packet 
*ppkt)
 res = -1;
 }
 
-if (res != 0 && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
-trace_colo_compare_pkt_info_src(inet_ntoa(ppkt->ip->ip_src),
-ntohl(stcp->th_seq),
-ntohl(stcp->th_ack),
-res, stcp->th_flags,
-spkt->size);
-
-trace_colo_compare_pkt_info_dst(inet_ntoa(ppkt->ip->ip_dst),
-ntohl(ptcp->th_seq),
-ntohl(ptcp->th_ack),
-res, ptcp->th_flags,
-ppkt->size);
+if (res && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
+char ip_src[20], ip_dst[20];
+
+strcpy(ip_src, inet_ntoa(ppkt->ip->ip_src));
+strcpy(ip_dst, inet_ntoa(ppkt->ip->ip_dst));
+
+trace_colo_compare_tcp_info(ip_src,
+ip_dst,
+ntohl(ptcp->th_seq),
+ntohl(stcp->th_seq),
+ntohl(ptcp->th_ack),
+ntohl(stcp->th_ack),
+res,
+ptcp->th_flags,
+stcp->th_flags,
+ppkt->size,
+spkt->size);
 
 qemu_hexdump((char *)ppkt->data, stderr,
  "colo-compare ppkt", ppkt->size);
diff --git a/net/trace-events b/net/trace-events
index 35198bc..123cb28 100644
--- a/net/trace-events
+++ b/net/trace-events
@@ -13,8 +13,7 @@ colo_compare_icmp_miscompare(const char *sta, int size) ": %s 
= %d"
 colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, 
const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, 
spkt size = %d, ip_src = %s, ip_dst = %s"
 colo_old_packet_check_found(int64_t old_time) "%" PRId64
 colo_compare_miscompare(void) ""
-colo_compare_pkt_info_src(const char *src, uint32_t sseq, uint32_t sack, int 
res, uint32_t sflag, int ssize) "src/dst: %s s: seq/ack=%u/%u res=%d flags=%x 
spkt_size: %d\n"
-colo_compare_pkt_info_dst(const char *dst, uint32_t dseq, uint32_t dack, int 
res, uint32_t dflag, int dsize) "src/dst: %s d: seq/ack=%u/%u res=%d flags=%x 
dpkt_size: %d\n"
+colo_compare_tcp_info(const char *src, const char *dst, uint32_t pseq, 
uint32_t sseq, uint32_t pack, uint32_t sack, int res, uint32_t pflag, uint32_t 
sflag, int psize, int ssize) "src/dst: %s/%s pseq/sseq:%u/%u pack/sack:%u/%u 
res=%d pflags/sflag:%x/%x psize/ssize:%d/%d \n"
 
 # net/filter-rewriter.c
 colo_filter_rewriter_debug(void) ""
-- 
2.7.4

[Qemu-devel] [PULL V2 5/8] aspeed: add a FTGMAC100 nic

2017-04-25 Thread Jason Wang

From: Cédric Le Goater 

There is a second NIC but we do not use it for the moment. We use the
'aspeed' property to tune the definition of the end of ring buffer bit
for the Aspeed SoCs.

Signed-off-by: Cédric Le Goater 
Signed-off-by: Jason Wang 
---
 hw/arm/aspeed_soc.c | 21 +
 include/hw/arm/aspeed_soc.h |  2 ++
 2 files changed, 23 insertions(+)

diff --git a/hw/arm/aspeed_soc.c b/hw/arm/aspeed_soc.c
index 571e4f0..4937e2b 100644
--- a/hw/arm/aspeed_soc.c
+++ b/hw/arm/aspeed_soc.c
@@ -19,6 +19,7 @@
 #include "hw/char/serial.h"
 #include "qemu/log.h"
 #include "hw/i2c/aspeed_i2c.h"
+#include "net/net.h"
 
 #define ASPEED_SOC_UART_5_BASE  0x00184000
 #define ASPEED_SOC_IOMEM_SIZE   0x0020
@@ -33,6 +34,8 @@
 #define ASPEED_SOC_TIMER_BASE   0x1E782000
 #define ASPEED_SOC_WDT_BASE 0x1E785000
 #define ASPEED_SOC_I2C_BASE 0x1E78A000
+#define ASPEED_SOC_ETH1_BASE0x1E66
+#define ASPEED_SOC_ETH2_BASE0x1E68
 
 static const int uart_irqs[] = { 9, 32, 33, 34, 10 };
 static const int timer_irqs[] = { 16, 17, 18, 35, 36, 37, 38, 39, };
@@ -175,6 +178,10 @@ static void aspeed_soc_init(Object *obj)
 object_initialize(>wdt, sizeof(s->wdt), TYPE_ASPEED_WDT);
 object_property_add_child(obj, "wdt", OBJECT(>wdt), NULL);
 qdev_set_parent_bus(DEVICE(>wdt), sysbus_get_default());
+
+object_initialize(>ftgmac100, sizeof(s->ftgmac100), TYPE_FTGMAC100);
+object_property_add_child(obj, "ftgmac100", OBJECT(>ftgmac100), NULL);
+qdev_set_parent_bus(DEVICE(>ftgmac100), sysbus_get_default());
 }
 
 static void aspeed_soc_realize(DeviceState *dev, Error **errp)
@@ -299,6 +306,20 @@ static void aspeed_soc_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 sysbus_mmio_map(SYS_BUS_DEVICE(>wdt), 0, ASPEED_SOC_WDT_BASE);
+
+/* Net */
+qdev_set_nic_properties(DEVICE(>ftgmac100), _table[0]);
+object_property_set_bool(OBJECT(>ftgmac100), true, "aspeed", );
+object_property_set_bool(OBJECT(>ftgmac100), true, "realized",
+ _err);
+error_propagate(, local_err);
+if (err) {
+error_propagate(errp, err);
+return;
+}
+sysbus_mmio_map(SYS_BUS_DEVICE(>ftgmac100), 0, ASPEED_SOC_ETH1_BASE);
+sysbus_connect_irq(SYS_BUS_DEVICE(>ftgmac100), 0,
+   qdev_get_gpio_in(DEVICE(>vic), 2));
 }
 
 static void aspeed_soc_class_init(ObjectClass *oc, void *data)
diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h
index dbec0c1..4c5fc66 100644
--- a/include/hw/arm/aspeed_soc.h
+++ b/include/hw/arm/aspeed_soc.h
@@ -20,6 +20,7 @@
 #include "hw/i2c/aspeed_i2c.h"
 #include "hw/ssi/aspeed_smc.h"
 #include "hw/watchdog/wdt_aspeed.h"
+#include "hw/net/ftgmac100.h"
 
 #define ASPEED_SPIS_NUM  2
 
@@ -39,6 +40,7 @@ typedef struct AspeedSoCState {
 AspeedSMCState spi[ASPEED_SPIS_NUM];
 AspeedSDMCState sdmc;
 AspeedWDTState wdt;
+FTGMAC100State ftgmac100;
 } AspeedSoCState;
 
 #define TYPE_ASPEED_SOC "aspeed-soc"
-- 
2.7.4

[Qemu-devel] [PULL V2 3/8] net: add FTGMAC100 support

2017-04-25 Thread Jason Wang

From: Cédric Le Goater 

The FTGMAC100 device is an Ethernet controller with DMA function that
can be found on Aspeed SoCs (which include NCSI).

It is fully compliant with IEEE 802.3 specification for 10/100 Mbps
Ethernet and IEEE 802.3z specification for 1000 Mbps Ethernet and
includes Reduced Media Independent Interface (RMII) and Reduced
Gigabit Media Independent Interface (RGMII) interfaces. It adopts an
AHB bus interface and integrates a link list DMA engine with direct
M-Bus accesses for transmitting and receiving packets. It has
independent TX/RX fifos, supports half and full duplex (1000 Mbps mode
only supports full duplex), flow control for full duplex and
backpressure for half duplex.

The FTGMAC100 also implements IP, TCP, UDP checksum offloads and
supports IEEE 802.1Q VLAN tag insertion and removal. It offers
high-priority transmit queue for QoS and CoS applications

This model is backed with a RealTek 8211E PHY which is the chip found
on the AST2500 EVB. It is complete enough to satisfy two different
Linux drivers and a U-Boot driver. Not supported features are :

 - IEEE 802.1Q VLAN
 - High Priority Transmit Queue
 - Wake-On-LAN functions

The code is based on the Coldfire Fast Ethernet Controller model.

Signed-off-by: Cédric Le Goater 
Signed-off-by: Jason Wang 
---
 default-configs/arm-softmmu.mak |1 +
 hw/net/Makefile.objs|1 +
 hw/net/ftgmac100.c  | 1003 +++
 include/hw/net/ftgmac100.h  |   60 +++
 4 files changed, 1065 insertions(+)
 create mode 100644 hw/net/ftgmac100.c
 create mode 100644 include/hw/net/ftgmac100.h

diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index 1e3bd2b..78d7af0 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -29,6 +29,7 @@ CONFIG_LAN9118=y
 CONFIG_SMC91C111=y
 CONFIG_ALLWINNER_EMAC=y
 CONFIG_IMX_FEC=y
+CONFIG_FTGMAC100=y
 CONFIG_DS1338=y
 CONFIG_PFLASH_CFI01=y
 CONFIG_PFLASH_CFI02=y
diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs
index 6a95d92..5ddaffe 100644
--- a/hw/net/Makefile.objs
+++ b/hw/net/Makefile.objs
@@ -26,6 +26,7 @@ common-obj-$(CONFIG_IMX_FEC) += imx_fec.o
 common-obj-$(CONFIG_CADENCE) += cadence_gem.o
 common-obj-$(CONFIG_STELLARIS_ENET) += stellaris_enet.o
 common-obj-$(CONFIG_LANCE) += lance.o
+common-obj-$(CONFIG_FTGMAC100) += ftgmac100.o
 
 obj-$(CONFIG_ETRAXFS) += etraxfs_eth.o
 obj-$(CONFIG_COLDFIRE) += mcf_fec.o
diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
new file mode 100644
index 000..c35f368
--- /dev/null
+++ b/hw/net/ftgmac100.c
@@ -0,0 +1,1003 @@
+/*
+ * Faraday FTGMAC100 Gigabit Ethernet
+ *
+ * Copyright (C) 2016-2017, IBM Corporation.
+ *
+ * Based on Coldfire Fast Ethernet Controller emulation.
+ *
+ * Copyright (c) 2007 CodeSourcery.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/net/ftgmac100.h"
+#include "sysemu/dma.h"
+#include "qemu/log.h"
+#include "net/checksum.h"
+#include "net/eth.h"
+#include "hw/net/mii.h"
+
+/* For crc32 */
+#include 
+
+/*
+ * FTGMAC100 registers
+ */
+#define FTGMAC100_ISR 0x00
+#define FTGMAC100_IER 0x04
+#define FTGMAC100_MAC_MADR0x08
+#define FTGMAC100_MAC_LADR0x0c
+#define FTGMAC100_MATH0   0x10
+#define FTGMAC100_MATH1   0x14
+#define FTGMAC100_NPTXPD  0x18
+#define FTGMAC100_RXPD0x1C
+#define FTGMAC100_NPTXR_BADR  0x20
+#define FTGMAC100_RXR_BADR0x24
+#define FTGMAC100_HPTXPD  0x28
+#define FTGMAC100_HPTXR_BADR  0x2c
+#define FTGMAC100_ITC 0x30
+#define FTGMAC100_APTC0x34
+#define FTGMAC100_DBLAC   0x38
+#define FTGMAC100_REVR0x40
+#define FTGMAC100_FEAR1   0x44
+#define FTGMAC100_RBSR0x4c
+#define FTGMAC100_TPAFCR  0x48
+
+#define FTGMAC100_MACCR   0x50
+#define FTGMAC100_MACSR   0x54
+#define FTGMAC100_PHYCR   0x60
+#define FTGMAC100_PHYDATA 0x64
+#define FTGMAC100_FCR 0x68
+
+/*
+ * Interrupt status register & interrupt enable register
+ */
+#define FTGMAC100_INT_RPKT_BUF(1 << 0)
+#define FTGMAC100_INT_RPKT_FIFO   (1 << 1)
+#define FTGMAC100_INT_NO_RXBUF(1 << 2)
+#define FTGMAC100_INT_RPKT_LOST   (1 << 3)
+#define FTGMAC100_INT_XPKT_ETH(1 << 4)
+#define FTGMAC100_INT_XPKT_FIFO   (1 << 5)
+#define FTGMAC100_INT_NO_NPTXBUF  (1 << 6)
+#define FTGMAC100_INT_XPKT_LOST   (1 << 7)
+#define FTGMAC100_INT_AHB_ERR (1 << 8)
+#define FTGMAC100_INT_PHYSTS_CHG  (1 << 9)
+#define FTGMAC100_INT_NO_HPTXBUF  (1 << 10)
+
+/*
+ * Automatic polling timer control register
+ */
+#define FTGMAC100_APTC_RXPOLL_CNT(x)((x) & 0xf)
+#define FTGMAC100_APTC_RXPOLL_TIME_SEL  (1 << 4)
+#define FTGMAC100_APTC_TXPOLL_CNT(x)(((x) >> 8) & 0xf)
+#define

[Qemu-devel] [PULL V2 7/8] COLO-compare: Optimize tcp compare for option field

2017-04-25 Thread Jason Wang

From: Zhang Chen 

In this patch we support packet that have tcp options field.
Add tcp options field check, If the packet have options
field we just skip it and compare tcp payload,
Avoid unnecessary checkpoint, optimize performance.

Signed-off-by: Zhang Chen 
Signed-off-by: Jason Wang 
---
 net/colo-compare.c | 27 ++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index 9b09cfc..4ab80b1 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -233,7 +233,32 @@ static int colo_packet_compare_tcp(Packet *spkt, Packet 
*ppkt)
 spkt->ip->ip_sum = ppkt->ip->ip_sum;
 }
 
-if (ptcp->th_sum == stcp->th_sum) {
+/*
+ * Check tcp header length for tcp option field.
+ * th_off > 5 means this tcp packet have options field.
+ * The tcp options maybe always different.
+ * for example:
+ * From RFC 7323.
+ * TCP Timestamps option (TSopt):
+ * Kind: 8
+ *
+ * Length: 10 bytes
+ *
+ *+---+---+-+-+
+ *|Kind=8 |  10   |   TS Value (TSval)  |TS Echo Reply (TSecr)|
+ *+---+---+-+-+
+ *   1   1  4 4
+ *
+ * In this case the primary guest's timestamp always different with
+ * the secondary guest's timestamp. COLO just focus on payload,
+ * so we just need skip this field.
+ */
+if (ptcp->th_off > 5) {
+ptrdiff_t tcp_offset;
+tcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
+ + (ptcp->th_off * 4);
+res = colo_packet_compare_common(ppkt, spkt, tcp_offset);
+} else if (ptcp->th_sum == stcp->th_sum) {
 res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN);
 } else {
 res = -1;
-- 
2.7.4

[Qemu-devel] [PULL V2 6/8] slirp: add a fake NC-SI backend

2017-04-25 Thread Jason Wang

From: Cédric Le Goater 

NC-SI (Network Controller Sideband Interface) enables a BMC to manage
a set of NICs on a system. This model takes the simplest approach and
reverses the NC-SI packets to pretend a NIC is present and exercise
the Linux driver.

The NCSI header file  comes from mainline Linux and was
untabified.

Signed-off-by: Cédric Le Goater 
Reviewed-by: Philippe Mathieu-Daudé 
Acked-by: Samuel Thibault 
Signed-off-by: Jason Wang 
---
 include/net/eth.h   |   1 +
 slirp/Makefile.objs |   2 +-
 slirp/ncsi-pkt.h| 419 
 slirp/ncsi.c| 130 
 slirp/slirp.c   |   4 +
 slirp/slirp.h   |   3 +
 6 files changed, 558 insertions(+), 1 deletion(-)
 create mode 100644 slirp/ncsi-pkt.h
 create mode 100644 slirp/ncsi.c

diff --git a/include/net/eth.h b/include/net/eth.h
index afeb45b..09054a5 100644
--- a/include/net/eth.h
+++ b/include/net/eth.h
@@ -209,6 +209,7 @@ struct tcp_hdr {
 #define ETH_P_IPV6(0x86dd)
 #define ETH_P_VLAN(0x8100)
 #define ETH_P_DVLAN   (0x88a8)
+#define ETH_P_NCSI(0x88f8)
 #define ETH_P_UNKNOWN (0x)
 #define VLAN_VID_MASK 0x0fff
 #define IP_HEADER_VERSION_4   (4)
diff --git a/slirp/Makefile.objs b/slirp/Makefile.objs
index 1baa1f1..28049b0 100644
--- a/slirp/Makefile.objs
+++ b/slirp/Makefile.objs
@@ -2,4 +2,4 @@ common-obj-y = cksum.o if.o ip_icmp.o ip6_icmp.o ip6_input.o 
ip6_output.o \
ip_input.o ip_output.o dnssearch.o dhcpv6.o
 common-obj-y += slirp.o mbuf.o misc.o sbuf.o socket.o tcp_input.o tcp_output.o
 common-obj-y += tcp_subr.o tcp_timer.o udp.o udp6.o bootp.o tftp.o arp_table.o 
\
-ndp_table.o
+ndp_table.o ncsi.o
diff --git a/slirp/ncsi-pkt.h b/slirp/ncsi-pkt.h
new file mode 100644
index 000..ea07d1c
--- /dev/null
+++ b/slirp/ncsi-pkt.h
@@ -0,0 +1,419 @@
+/*
+ * Copyright Gavin Shan, IBM Corporation 2016.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef NCSI_PKT_H
+#define NCSI_PKT_H
+
+/* from linux/net/ncsi/ncsi-pkt.h */
+#define __be32 uint32_t
+#define __be16 uint16_t
+
+struct ncsi_pkt_hdr {
+unsigned char mc_id;/* Management controller ID */
+unsigned char revision; /* NCSI version - 0x01  */
+unsigned char reserved; /* Reserved */
+unsigned char id;   /* Packet sequence number   */
+unsigned char type; /* Packet type  */
+unsigned char channel;  /* Network controller ID*/
+__be16length;   /* Payload length   */
+__be32reserved1[2]; /* Reserved */
+};
+
+struct ncsi_cmd_pkt_hdr {
+struct ncsi_pkt_hdr common; /* Common NCSI packet header */
+};
+
+struct ncsi_rsp_pkt_hdr {
+struct ncsi_pkt_hdr common; /* Common NCSI packet header */
+__be16  code;   /* Response code */
+__be16  reason; /* Response reason   */
+};
+
+struct ncsi_aen_pkt_hdr {
+struct ncsi_pkt_hdr common;   /* Common NCSI packet header */
+unsigned char   reserved2[3]; /* Reserved  */
+unsigned char   type; /* AEN packet type   */
+};
+
+/* NCSI common command packet */
+struct ncsi_cmd_pkt {
+struct ncsi_cmd_pkt_hdr cmd;  /* Command header */
+__be32  checksum; /* Checksum   */
+unsigned char   pad[26];
+};
+
+struct ncsi_rsp_pkt {
+struct ncsi_rsp_pkt_hdr rsp;  /* Response header */
+__be32  checksum; /* Checksum*/
+unsigned char   pad[22];
+};
+
+/* Select Package */
+struct ncsi_cmd_sp_pkt {
+struct ncsi_cmd_pkt_hdr cmd;/* Command header */
+unsigned char   reserved[3];/* Reserved   */
+unsigned char   hw_arbitration; /* HW arbitration */
+__be32  checksum;   /* Checksum   */
+unsigned char   pad[22];
+};
+
+/* Disable Channel */
+struct ncsi_cmd_dc_pkt {
+struct ncsi_cmd_pkt_hdr cmd; /* Command header  */
+unsigned char   reserved[3]; /* Reserved*/
+unsigned char   ald; /* Allow link down */
+__be32  checksum;/* Checksum*/
+unsigned char   pad[22];
+};
+
+/* Reset Channel */
+struct ncsi_cmd_rc_pkt {
+struct ncsi_cmd_pkt_hdr cmd;  /* Command header */
+__be32

[Qemu-devel] [PULL V2 1/8] colo-compare: Fix old packet check bug.

2017-04-25 Thread Jason Wang

From: Zhang Chen 

If colo-compare find one old packet,we can notify colo-frame
do checkpoint, no need continue find more old packet here.

Signed-off-by: Zhang Chen 
Signed-off-by: Jason Wang 
---
 net/colo-compare.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index 54e6d40..9b09cfc 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -372,10 +372,9 @@ static int colo_old_packet_check_one(Packet *pkt, int64_t 
*check_time)
 }
 }
 
-static void colo_old_packet_check_one_conn(void *opaque,
-   void *user_data)
+static int colo_old_packet_check_one_conn(Connection *conn,
+  void *user_data)
 {
-Connection *conn = opaque;
 GList *result = NULL;
 int64_t check_time = REGULAR_PACKET_CHECK_MS;
 
@@ -386,7 +385,10 @@ static void colo_old_packet_check_one_conn(void *opaque,
 if (result) {
 /* do checkpoint will flush old packet */
 /* TODO: colo_notify_checkpoint();*/
+return 0;
 }
+
+return 1;
 }
 
 /*
@@ -398,7 +400,12 @@ static void colo_old_packet_check(void *opaque)
 {
 CompareState *s = opaque;
 
-g_queue_foreach(>conn_list, colo_old_packet_check_one_conn, NULL);
+/*
+ * If we find one old packet, stop finding job and notify
+ * COLO frame do checkpoint.
+ */
+g_queue_find_custom(>conn_list, NULL,
+(GCompareFunc)colo_old_packet_check_one_conn);
 }
 
 /*
-- 
2.7.4

[Qemu-devel] [PULL V2 0/8] Net patches

2017-04-25 Thread Jason Wang

The following changes since commit 32c7e0ab755745e961f1772e95cac381cc68769d:

  Merge remote-tracking branch 'remotes/juanquintela/tags/migration/20170421' 
into staging (2017-04-21 15:59:27 +0100)

are available in the git repository at:

  https://github.com/jasowang/qemu.git tags/net-pull-request

for you to fetch changes up to 0fc8aec7de64f2bf83a274a2a38b938ce03425d2:

  COLO-compare: Optimize tcp compare trace event (2017-04-25 19:17:25 +0800)



Changes from V1:
- Fix clang warnings


Cédric Le Goater (5):
  hw/net: add MII definitions
  net: add FTGMAC100 support
  net/ftgmac100: add a 'aspeed' property
  aspeed: add a FTGMAC100 nic
  slirp: add a fake NC-SI backend

Zhang Chen (3):
  colo-compare: Fix old packet check bug.
  COLO-compare: Optimize tcp compare for option field
  COLO-compare: Optimize tcp compare trace event

 default-configs/arm-softmmu.mak |1 +
 hw/arm/aspeed_soc.c |   21 +
 hw/net/Makefile.objs|1 +
 hw/net/ftgmac100.c  | 1016 +++
 include/hw/arm/aspeed_soc.h |2 +
 include/hw/net/ftgmac100.h  |   64 +++
 include/hw/net/mii.h|   71 ++-
 include/net/eth.h   |1 +
 net/colo-compare.c  |   69 ++-
 net/trace-events|3 +-
 slirp/Makefile.objs |2 +-
 slirp/ncsi-pkt.h|  419 
 slirp/ncsi.c|  130 +
 slirp/slirp.c   |4 +
 slirp/slirp.h   |3 +
 15 files changed, 1770 insertions(+), 37 deletions(-)
 create mode 100644 hw/net/ftgmac100.c
 create mode 100644 include/hw/net/ftgmac100.h
 create mode 100644 slirp/ncsi-pkt.h
 create mode 100644 slirp/ncsi.c

[Qemu-devel] [PULL V2 4/8] net/ftgmac100: add a 'aspeed' property

2017-04-25 Thread Jason Wang

From: Cédric Le Goater 

The Aspeed SoCs have a different definition of the end of the ring
buffer bit. Add a property to specify which set of bits should be used
by the NIC.

Signed-off-by: Cédric Le Goater 
Signed-off-by: Jason Wang 
---
 hw/net/ftgmac100.c | 17 +++--
 include/hw/net/ftgmac100.h |  4 
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
index c35f368..3c36ab9 100644
--- a/hw/net/ftgmac100.c
+++ b/hw/net/ftgmac100.c
@@ -126,6 +126,7 @@
 #define FTGMAC100_TXDES0_CRC_ERR (1 << 19)
 #define FTGMAC100_TXDES0_LTS (1 << 28)
 #define FTGMAC100_TXDES0_FTS (1 << 29)
+#define FTGMAC100_TXDES0_EDOTR_ASPEED(1 << 30)
 #define FTGMAC100_TXDES0_TXDMA_OWN   (1 << 31)
 
 #define FTGMAC100_TXDES1_VLANTAG_CI(x)   ((x) & 0x)
@@ -154,6 +155,7 @@
 #define FTGMAC100_RXDES0_PAUSE_FRAME (1 << 25)
 #define FTGMAC100_RXDES0_LRS (1 << 28)
 #define FTGMAC100_RXDES0_FRS (1 << 29)
+#define FTGMAC100_RXDES0_EDORR_ASPEED(1 << 30)
 #define FTGMAC100_RXDES0_RXPKT_RDY   (1 << 31)
 
 #define FTGMAC100_RXDES1_VLANTAG_CI  0x
@@ -462,7 +464,7 @@ static void ftgmac100_do_tx(FTGMAC100State *s, uint32_t 
tx_ring,
 /* Write back the modified descriptor.  */
 ftgmac100_write_bd(, addr);
 /* Advance to the next descriptor.  */
-if (bd.des0 & FTGMAC100_TXDES0_EDOTR) {
+if (bd.des0 & s->txdes0_edotr) {
 addr = tx_ring;
 } else {
 addr += sizeof(FTGMAC100Desc);
@@ -880,7 +882,7 @@ static ssize_t ftgmac100_receive(NetClientState *nc, const 
uint8_t *buf,
 s->isr |= FTGMAC100_INT_RPKT_FIFO;
 }
 ftgmac100_write_bd(, addr);
-if (bd.des0 & FTGMAC100_RXDES0_EDORR) {
+if (bd.des0 & s->rxdes0_edorr) {
 addr = s->rx_ring;
 } else {
 addr += sizeof(FTGMAC100Desc);
@@ -921,6 +923,14 @@ static void ftgmac100_realize(DeviceState *dev, Error 
**errp)
 FTGMAC100State *s = FTGMAC100(dev);
 SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
 
+if (s->aspeed) {
+s->txdes0_edotr = FTGMAC100_TXDES0_EDOTR_ASPEED;
+s->rxdes0_edorr = FTGMAC100_RXDES0_EDORR_ASPEED;
+} else {
+s->txdes0_edotr = FTGMAC100_TXDES0_EDOTR;
+s->rxdes0_edorr = FTGMAC100_RXDES0_EDORR;
+}
+
 memory_region_init_io(>iomem, OBJECT(dev), _ops, s,
   TYPE_FTGMAC100, 0x2000);
 sysbus_init_mmio(sbd, >iomem);
@@ -967,11 +977,14 @@ static const VMStateDescription vmstate_ftgmac100 = {
 VMSTATE_UINT32(phy_advertise, FTGMAC100State),
 VMSTATE_UINT32(phy_int, FTGMAC100State),
 VMSTATE_UINT32(phy_int_mask, FTGMAC100State),
+VMSTATE_UINT32(txdes0_edotr, FTGMAC100State),
+VMSTATE_UINT32(rxdes0_edorr, FTGMAC100State),
 VMSTATE_END_OF_LIST()
 }
 };
 
 static Property ftgmac100_properties[] = {
+DEFINE_PROP_BOOL("aspeed", FTGMAC100State, aspeed, false),
 DEFINE_NIC_PROPERTIES(FTGMAC100State, conf),
 DEFINE_PROP_END_OF_LIST(),
 };
diff --git a/include/hw/net/ftgmac100.h b/include/hw/net/ftgmac100.h
index 962a718..d9bc589 100644
--- a/include/hw/net/ftgmac100.h
+++ b/include/hw/net/ftgmac100.h
@@ -55,6 +55,10 @@ typedef struct FTGMAC100State {
 uint32_t phy_advertise;
 uint32_t phy_int;
 uint32_t phy_int_mask;
+
+bool aspeed;
+uint32_t txdes0_edotr;
+uint32_t rxdes0_edorr;
 } FTGMAC100State;
 
 #endif
-- 
2.7.4

[Qemu-devel] [PULL V2 2/8] hw/net: add MII definitions

2017-04-25 Thread Jason Wang

From: Cédric Le Goater 

This adds comments on the Basic mode control and status registers bit
definitions. It also adds a couple of bits for 1000BASE-T and the
RealTek 8211E PHY for the FTGMAC100 model to use.

Signed-off-by: Cédric Le Goater 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Jason Wang 
---
 include/hw/net/mii.h | 71 +++-
 1 file changed, 53 insertions(+), 18 deletions(-)

diff --git a/include/hw/net/mii.h b/include/hw/net/mii.h
index 9fdd7bb..6ce48a6 100644
--- a/include/hw/net/mii.h
+++ b/include/hw/net/mii.h
@@ -22,13 +22,20 @@
 #define MII_H
 
 /* PHY registers */
-#define MII_BMCR0
-#define MII_BMSR1
-#define MII_PHYID1  2
-#define MII_PHYID2  3
-#define MII_ANAR4
-#define MII_ANLPAR  5
-#define MII_ANER6
+#define MII_BMCR0  /* Basic mode control register */
+#define MII_BMSR1  /* Basic mode status register */
+#define MII_PHYID1  2  /* ID register 1 */
+#define MII_PHYID2  3  /* ID register 2 */
+#define MII_ANAR4  /* Autonegotiation advertisement */
+#define MII_ANLPAR  5  /* Autonegotiation lnk partner abilities */
+#define MII_ANER6  /* Autonegotiation expansion */
+#define MII_ANNP7  /* Autonegotiation next page */
+#define MII_ANLPRNP 8  /* Autonegotiation link partner rx next page */
+#define MII_CTRL10009  /* 1000BASE-T control */
+#define MII_STAT100010 /* 1000BASE-T status */
+#define MII_MDDACR  13 /* MMD access control */
+#define MII_MDDAADR 14 /* MMD access address data */
+#define MII_EXTSTAT 15 /* Extended Status */
 #define MII_NSR 16
 #define MII_LBREMR  17
 #define MII_REC 18
@@ -38,19 +45,33 @@
 /* PHY registers fields */
 #define MII_BMCR_RESET  (1 << 15)
 #define MII_BMCR_LOOPBACK   (1 << 14)
-#define MII_BMCR_SPEED  (1 << 13)
-#define MII_BMCR_AUTOEN (1 << 12)
-#define MII_BMCR_FD (1 << 8)
+#define MII_BMCR_SPEED100   (1 << 13)  /* LSB of Speed (100) */
+#define MII_BMCR_SPEED  MII_BMCR_SPEED100
+#define MII_BMCR_AUTOEN (1 << 12) /* Autonegotiation enable */
+#define MII_BMCR_PDOWN  (1 << 11) /* Enable low power state */
+#define MII_BMCR_ISOLATE(1 << 10) /* Isolate data paths from MII */
+#define MII_BMCR_ANRESTART  (1 << 9)  /* Auto negotiation restart */
+#define MII_BMCR_FD (1 << 8)  /* Set duplex mode */
+#define MII_BMCR_CTST   (1 << 7)  /* Collision test */
+#define MII_BMCR_SPEED1000  (1 << 6)  /* MSB of Speed (1000) */
 
-#define MII_BMSR_100TX_FD   (1 << 14)
-#define MII_BMSR_100TX_HD   (1 << 13)
-#define MII_BMSR_10T_FD (1 << 12)
-#define MII_BMSR_10T_HD (1 << 11)
-#define MII_BMSR_MFPS   (1 << 6)
-#define MII_BMSR_AN_COMP(1 << 5)
-#define MII_BMSR_AUTONEG(1 << 3)
-#define MII_BMSR_LINK_ST(1 << 2)
+#define MII_BMSR_100TX_FD   (1 << 14) /* Can do 100mbps, full-duplex */
+#define MII_BMSR_100TX_HD   (1 << 13) /* Can do 100mbps, half-duplex */
+#define MII_BMSR_10T_FD (1 << 12) /* Can do 10mbps, full-duplex */
+#define MII_BMSR_10T_HD (1 << 11) /* Can do 10mbps, half-duplex */
+#define MII_BMSR_100T2_FD   (1 << 10) /* Can do 100mbps T2, full-duplex */
+#define MII_BMSR_100T2_HD   (1 << 9)  /* Can do 100mbps T2, half-duplex */
+#define MII_BMSR_EXTSTAT(1 << 8)  /* Extended status in register 15 */
+#define MII_BMSR_MFPS   (1 << 6)  /* MII Frame Preamble Suppression */
+#define MII_BMSR_AN_COMP(1 << 5)  /* Auto-negotiation complete */
+#define MII_BMSR_RFAULT (1 << 4)  /* Remote fault */
+#define MII_BMSR_AUTONEG(1 << 3)  /* Able to do auto-negotiation */
+#define MII_BMSR_LINK_ST(1 << 2)  /* Link status */
+#define MII_BMSR_JABBER (1 << 1)  /* Jabber detected */
+#define MII_BMSR_EXTCAP (1 << 0)  /* Ext-reg capability */
 
+#define MII_ANAR_PAUSE_ASYM (1 << 11) /* Try for asymetric pause */
+#define MII_ANAR_PAUSE  (1 << 10) /* Try for pause */
 #define MII_ANAR_TXFD   (1 << 8)
 #define MII_ANAR_TX (1 << 7)
 #define MII_ANAR_10FD   (1 << 6)
@@ -58,17 +79,31 @@
 #define MII_ANAR_CSMACD (1 << 0)
 
 #define MII_ANLPAR_ACK  (1 << 14)
+#define MII_ANLPAR_PAUSEASY (1 << 11) /* can pause asymmetrically */
+#define MII_ANLPAR_PAUSE(1 << 10) /* can pause */
 #define MII_ANLPAR_TXFD (1 << 8)
 #define MII_ANLPAR_TX   (1 << 7)
 #define MII_ANLPAR_10FD (1 << 6)
 #define MII_ANLPAR_10   (1 << 5)
 #define MII_ANLPAR_CSMACD   (1 << 0)
 
+#define MII_ANER_NWAY   (1 << 0) /* Can do N-way auto-nego */
+
+#define MII_CTRL1000_FULL   (1 << 9)  /* 1000BASE-T full duplex */
+#define MII_CTRL1000_HALF   (1 << 8)  /* 1000BASE-T half duplex */
+
+#define MII_STAT1000_FULL   (1 << 11) /* 1000BASE-T full duplex */
+#define MII_STAT1000_HALF   (1 << 10) /* 1000BASE-T half

Re: [Qemu-devel] [PATCH] tcg: Initialize return value after exit_atomic

2017-04-25 Thread Richard Henderson


On 04/25/2017 01:21 PM, Nikunj A Dadhania wrote:

Richard Henderson  writes:


Users of tcg_gen_atomic_cmpxchg and do_atomic_op rightfully utilize
the output.  Even though this code is dead, it gets translated, and
without the initialization we encounter a tcg_error.

Reported-by: Nikunj A Dadhania 
Signed-off-by: Richard Henderson 


With this the tcg_error goes away.

But then powernv skiboot code [1] enters into infinite loop. Basically,
in target/ppc/translate.c:gen_conditional_store(), setcond_tl will
always fail, and CRF_EQ_BIT will never be set, the lock will never be
taken.


The setcond_tl *shouldn't* always fail.  If that's the case, then we have 
another bug in the !parallel_cpus code path for gen_conditional_store.



r~

Re: [Qemu-devel] [PATCH v2 07/13] tcg/i386: implement goto_ptr op

2017-04-25 Thread Richard Henderson


On 04/25/2017 09:53 AM, Emilio G. Cota wrote:

+case INDEX_op_goto_ptr:
+/* save target address into new register */
+tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_EDX, a0);
+/* set return value to 0 */
+tgen_arithr(s, ARITH_XOR, TCG_REG_EAX, TCG_REG_EAX);
+/* jmp to the target address (could be epilogue) */
+tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_EDX);
+break;


I've just thought of an improvement to be made here -- move the xor after the 
jump (and therefore just before the "normal" epilogue) like so.



r~


diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index f6fb03e..f636557 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -1907,12 +1907,8 @@ static inline void tcg_out_op
 s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
 break;
 case INDEX_op_goto_ptr:
-/* save target address into new register */
-tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_EDX, a0);
-/* set return value to 0 */
-tgen_arithr(s, ARITH_XOR, TCG_REG_EAX, TCG_REG_EAX);
-/* jmp to the target address (could be epilogue) */
-tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_EDX);
+/* jmp to the given host address (could be epilogue) */
+tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0);
 break;
 case INDEX_op_br:
 tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0);
@@ -2579,10 +2575,13 @@ static void tcg_target_qemu_prologue
 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
 #endif

-/* TB epilogue */
-tb_ret_addr = s->code_ptr;
+/* Return path for goto_ptr.  Set return value to 0, a-la exit_tb,
+   and fall through to the rest of the epilogue.  */
 s->code_gen_epilogue = s->code_ptr;
+tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0);

+/* TB epilogue */
+tb_ret_addr = s->code_ptr;
 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);

 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {

Re: [Qemu-devel] [PATCH 2/6] util: introduce glib-helper.c

2017-04-25 Thread Dr. David Alan Gilbert

* Peter Maydell (peter.mayd...@linaro.org) wrote:
> On 21 April 2017 at 16:10, Alexey  wrote:
> > Hello, thank you for so  detailed comment,
> >
> > On Fri, Apr 21, 2017 at 11:27:55AM +0100, Peter Maydell wrote:
> 
> >> Can we have a proper doc comment format comment, please,
> >> since this is now a function available to all of QEMU?
> >>
> >> > +gint g_int_cmp64(gconstpointer a, gconstpointer b,
> >> > +gpointer __attribute__((unused)) user_data);
> >>
> >> What is this actually for? Looking at the original uses
> >> I can tell that this is a GCompareDataFunc function, but
> >> the comment should tell me that.
> > I looked at another functions comments in QEMU, I didn't find
> > some common style, and decided keep it as is. Maybe I omitted some
> > best practice here.
> 
> See include/qemu/bitops.h for an example of the comment style.
> More important than just the style is that the comment
> should clearly explain the purpose of the function in detail.
> 
> Certainly many of our existing functions are poorly documented,
> but we're trying to raise the bar gradually here.
> 
> > yes, it was copy pasted,
> > right now, after mingw build check I think to use intptr_t as a type
> > for comparision in this function or even keep gpointer and merge these two
> > functions into _direct_.
> > I saw intptr_t is widely used in QEMU.
> >
> > The intent of this function was a comparator for case when client code
> > want to keep integers in pointer field. xen_disk.c uses UINT32 so it
> > wasn't a problem, but migration uses 64 address (kernel provides it in
> > __u64, long long), so on 32 platform it's a problem.
> 
> Code which tries to put a genuinely 64 bit value into a pointer
> is buggy and needs to be fixed. I'm not clear if that is the
> case here, or if the ABI from the kernel guarantees that the
> value is really a pointer type and fits in uintptr_t / gpointer.

It's a (probably masked) HVA, so always a valid pointer.

Dave

> I don't think we need more than one of these functions.
> 
> >> This is also missing the copyright line.
> > Yes, maybe it was better for me to ask before send.
> > I found in util files with reference to GNU GPL, version 2, like
> > in this file, also I found that
> >
> >  * This library is free software; you can redistribute it and/or
> >  * modify it under the terms of the GNU Lesser General Public
> >  * License as published by the Free Software Foundation; either
> >  * version 2 of the License, or (at your option) any later version.
> >
> > So I just copied copyright reference from glib-compat.h.
> 
> Yes, that's the license statement, which is fine. What is
> missing is the copyright line, which in glib-compat.h looks
> like:
>  Copyright IBM, Corp. 2013
> 
> For code you write, you want either your personal or (more likely)
> a Samsung copyright line -- check with your company about what
> their preferred form is.
> 
> thanks
> -- PMM
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH] tcg: Initialize return value after exit_atomic

2017-04-25 Thread Nikunj A Dadhania

Richard Henderson  writes:

> Users of tcg_gen_atomic_cmpxchg and do_atomic_op rightfully utilize
> the output.  Even though this code is dead, it gets translated, and
> without the initialization we encounter a tcg_error.
>
> Reported-by: Nikunj A Dadhania 
> Signed-off-by: Richard Henderson 

With this the tcg_error goes away.

But then powernv skiboot code [1] enters into infinite loop. Basically,
in target/ppc/translate.c:gen_conditional_store(), setcond_tl will
always fail, and CRF_EQ_BIT will never be set, the lock will never be
taken.

So "make check" still fails at powernv serial test.

./configure --target-list=ppc64-softmmu  --cc=clang --host-cc=clang && make && 
make check

> ---
>  tcg/tcg-op.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
> index 95a39b7..6b1f415 100644
> --- a/tcg/tcg-op.c
> +++ b/tcg/tcg-op.c
> @@ -2861,6 +2861,9 @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv 
> addr, TCGv_i64 cmpv,
>  #endif
>  #else
>  gen_helper_exit_atomic(tcg_ctx.tcg_env);
> +/* Produce a result, so that we have a well-formed opcode stream
> +   with respect to uses of the result in the (dead) code following.  
> */
> +tcg_gen_movi_i64(retv, 0);
>  #endif /* CONFIG_ATOMIC64 */
>  } else {
>  TCGv_i32 c32 = tcg_temp_new_i32();
> @@ -2966,6 +2969,9 @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, 
> TCGv_i64 val,
>  #endif
>  #else
>  gen_helper_exit_atomic(tcg_ctx.tcg_env);
> +/* Produce a result, so that we have a well-formed opcode stream
> +   with respect to uses of the result in the (dead) code following.  
> */
> +tcg_gen_movi_i64(ret, 0);
>  #endif /* CONFIG_ATOMIC64 */
>  } else {
>  TCGv_i32 v32 = tcg_temp_new_i32();
> -- 

Regards,
Nikunj

1. https://github.com/open-power/skiboot/blob/master/asm/lock.S#L36

Re: [Qemu-devel] [PATCH] target-s390x: Mask the SIGP order_code to 8bit.

2017-04-25 Thread Philipp Kern


On 2017-04-25 11:51, Richard Henderson wrote:

On 04/24/2017 10:25 AM, Alexander Graf wrote:

On 24.04.17 00:32, Aurelien Jarno wrote:

From: Philipp Kern 

According to "CPU Signaling and Response", "Signal-Processor Orders",
the order field is bit position 56-63. Without this, the Linux
guest kernel is sometimes unable to stop emulation and enters
an infinite loop of "XXX unknown sigp: 0x0005".

Signed-off-by: Philipp Kern 
Signed-off-by: Aurelien Jarno 
---
 target/s390x/misc_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

This patch has been sent by Philipp Kern a lot of time ago, and it 
seems

has been lost. I am resending it, as it is still useful.

diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index 3bf09ea222..4946b56ab3 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -534,7 +534,7 @@ uint32_t HELPER(sigp)(CPUS390XState *env, 
uint64_t order_code, uint32_t r1,
 /* Remember: Use "R1 or R1 + 1, whichever is the odd-numbered 
register"

as parameter (input). Status (output) is always R1. */

-switch (order_code) {
+switch (order_code & 0xff) {


This definitely needs a comment above the mask. Ideally I'd love to 
just change the function prototype to pass order_code as uint8_t, but 
I don't think that's possible with the TCG glue.


Correct.  We'll need to leave the mask here.


I shall point out that Alexander merged it into the s390-next tree when 
I first sent it but that was never merged into qemu proper. I don't 
think there's a problem in adding a comment that says what the commit 
description says right there, like this:


/* sigp contains the order code in bit positions 56-63, mask it here. */

Kind regards
Philipp Kern

Re: [Qemu-devel] [PATCH v2 13/13] tb-hash: improve tb_jmp_cache hash function in user mode

2017-04-25 Thread Richard Henderson


On 04/25/2017 09:53 AM, Emilio G. Cota wrote:

Optimizations to cross-page chaining and indirect branches make
performance more sensitive to the hit rate of tb_jmp_cache.
The constraint of reserving some bits for the page number
lowers the achievable quality of the hashing function.


Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH RESEND v2 08/18] ram/COLO: Record the dirty pages that SVM received

2017-04-25 Thread Hailiang Zhang


On 2017/4/25 2:29, Juan Quintela wrote:

zhanghailiang  wrote:

We record the address of the dirty pages that received,
it will help flushing pages that cached into SVM.

Here, it is a trick, we record dirty pages by re-using migration
dirty bitmap. In the later patch, we will start the dirty log
for SVM, just like migration, in this way, we can record both
the dirty pages caused by PVM and SVM, we only flush those dirty
pages from RAM cache while do checkpoint.

Cc: Juan Quintela 
Signed-off-by: zhanghailiang 
Reviewed-by: Dr. David Alan Gilbert 
---
  migration/ram.c | 29 +
  1 file changed, 29 insertions(+)

diff --git a/migration/ram.c b/migration/ram.c
index 05d1b06..0653a24 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2268,6 +2268,9 @@ static inline void *host_from_ram_block_offset(RAMBlock 
*block,
  static inline void *colo_cache_from_block_offset(RAMBlock *block,
   ram_addr_t offset)
  {
+unsigned long *bitmap;
+long k;
+
  if (!offset_in_ramblock(block, offset)) {
  return NULL;
  }
@@ -2276,6 +2279,17 @@ static inline void 
*colo_cache_from_block_offset(RAMBlock *block,
   __func__, block->idstr);
  return NULL;
  }
+
+k = (memory_region_get_ram_addr(block->mr) + offset) >> TARGET_PAGE_BITS;
+bitmap = atomic_rcu_read(_state.ram_bitmap)->bmap;
+/*
+* During colo checkpoint, we need bitmap of these migrated pages.
+* It help us to decide which pages in ram cache should be flushed
+* into VM's RAM later.
+*/
+if (!test_and_set_bit(k, bitmap)) {
+ram_state.migration_dirty_pages++;
+}
  return block->colo_cache + offset;
  }
  
@@ -2752,6 +2766,15 @@ int colo_init_ram_cache(void)

  memcpy(block->colo_cache, block->host, block->used_length);
  }
  rcu_read_unlock();
+/*
+* Record the dirty pages that sent by PVM, we use this dirty bitmap 
together
+* with to decide which page in cache should be flushed into SVM's RAM. Here
+* we use the same name 'ram_bitmap' as for migration.
+*/
+ram_state.ram_bitmap = g_new0(RAMBitmap, 1);
+ram_state.ram_bitmap->bmap = bitmap_new(last_ram_page());
+ram_state.migration_dirty_pages = 0;
+
  return 0;
  
  out_locked:

@@ -2770,6 +2793,12 @@ out_locked:
  void colo_release_ram_cache(void)
  {
  RAMBlock *block;
+RAMBitmap *bitmap = ram_state.ram_bitmap;
+
+atomic_rcu_set(_state.ram_bitmap, NULL);
+if (bitmap) {
+call_rcu(bitmap, migration_bitmap_free, rcu);
+}
  
  rcu_read_lock();

  QLIST_FOREACH_RCU(block, _list.blocks, next) {

You can see my Split bitmap patches, I am splitting the dirty bitmap per
block, I think that it shouldn't make your life more difficult, but
please take a look.


OK, I'll look at it.


I am wondering if it is faster/easier to use the page_cache.c that
xbzrle uses to store the dirty pages instead of copying the whole
RAMBlocks, but I don't really know.


Hmm,  Yes, it takes long time (depends on VM's memory size) to backup the whole 
VM's memory data into cache.
And we can reduce the time by backup page one by one while loading the page 
during the first live migration round,
because we can know  if users enable COLO at the beginning of the first 
migration stage.
I'd like to send those optimization later in another series...

Thanks,
Hailiang



Thanks, Juan.

.

Re: [Qemu-devel] [PATCH v2 02/13] exec-all: inline tb_from_jmp_cache

2017-04-25 Thread Richard Henderson


On 04/25/2017 09:53 AM, Emilio G. Cota wrote:

+/* tb_hash_func() in tb-hash.h needs tb_page_addr_t, defined above */
+#include "tb-hash.h"
+


This causes an include loop (I think), and quite a few targets fail to build.
Are you using --target-list in your testing?


r~

Re: [Qemu-devel] [PATCH 3/6] migration: add UFFD_FEATURE_THREAD_ID feature support

2017-04-25 Thread Dr. David Alan Gilbert

* Alexey (a.pereva...@samsung.com) wrote:
> + Andrea Arcangeli
> 
> On Mon, Apr 24, 2017 at 06:10:02PM +0100, Dr. David Alan Gilbert wrote:
> > * Alexey (a.pereva...@samsung.com) wrote:
> > > On Mon, Apr 24, 2017 at 04:12:29PM +0800, Peter Xu wrote:
> > > > On Fri, Apr 21, 2017 at 06:22:12PM +0300, Alexey wrote:
> > > > > On Fri, Apr 21, 2017 at 11:24:54AM +0100, Dr. David Alan Gilbert 
> > > > > wrote:
> > > > > > * Alexey Perevalov (a.pereva...@samsung.com) wrote:
> > > > > > > Userfaultfd mechanism is able to provide process thread id,
> > > > > > > in case when client request it with UFDD_API ioctl.
> > > > > > > 
> > > > > > > Signed-off-by: Alexey Perevalov 
> > > > > > 
> > > > > > There seem to be two parts to this:
> > > > > >   a) Adding the mis parameter to ufd_version_check
> > > > > >   b) Asking for the feature
> > > > > > 
> > > > > > Please split it into two patches.
> > > > > > 
> > > > > > Also
> > > > > > 
> > > > > > > ---
> > > > > > >  include/migration/postcopy-ram.h |  2 +-
> > > > > > >  migration/migration.c|  2 +-
> > > > > > >  migration/postcopy-ram.c | 12 ++--
> > > > > > >  migration/savevm.c   |  2 +-
> > > > > > >  4 files changed, 9 insertions(+), 9 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/include/migration/postcopy-ram.h 
> > > > > > > b/include/migration/postcopy-ram.h
> > > > > > > index 8e036b9..809f6db 100644
> > > > > > > --- a/include/migration/postcopy-ram.h
> > > > > > > +++ b/include/migration/postcopy-ram.h
> > > > > > > @@ -14,7 +14,7 @@
> > > > > > >  #define QEMU_POSTCOPY_RAM_H
> > > > > > >  
> > > > > > >  /* Return true if the host supports everything we need to do 
> > > > > > > postcopy-ram */
> > > > > > > -bool postcopy_ram_supported_by_host(void);
> > > > > > > +bool postcopy_ram_supported_by_host(MigrationIncomingState *mis);
> > > > > > >  
> > > > > > >  /*
> > > > > > >   * Make all of RAM sensitive to accesses to areas that haven't 
> > > > > > > yet been written
> > > > > > > diff --git a/migration/migration.c b/migration/migration.c
> > > > > > > index ad4036f..79f6425 100644
> > > > > > > --- a/migration/migration.c
> > > > > > > +++ b/migration/migration.c
> > > > > > > @@ -802,7 +802,7 @@ void 
> > > > > > > qmp_migrate_set_capabilities(MigrationCapabilityStatusList 
> > > > > > > *params,
> > > > > > >   * special support.
> > > > > > >   */
> > > > > > >  if (!old_postcopy_cap && 
> > > > > > > runstate_check(RUN_STATE_INMIGRATE) &&
> > > > > > > -!postcopy_ram_supported_by_host()) {
> > > > > > > +!postcopy_ram_supported_by_host(NULL)) {
> > > > > > >  /* postcopy_ram_supported_by_host will have emitted 
> > > > > > > a more
> > > > > > >   * detailed message
> > > > > > >   */
> > > > > > > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> > > > > > > index dc80dbb..70f0480 100644
> > > > > > > --- a/migration/postcopy-ram.c
> > > > > > > +++ b/migration/postcopy-ram.c
> > > > > > > @@ -60,13 +60,13 @@ struct PostcopyDiscardState {
> > > > > > >  #include 
> > > > > > >  #include 
> > > > > > >  
> > > > > > > -static bool ufd_version_check(int ufd)
> > > > > > > +static bool ufd_version_check(int ufd, MigrationIncomingState 
> > > > > > > *mis)
> > > > > > >  {
> > > > > > >  struct uffdio_api api_struct;
> > > > > > >  uint64_t ioctl_mask;
> > > > > > >  
> > > > > > >  api_struct.api = UFFD_API;
> > > > > > > -api_struct.features = 0;
> > > > > > > +api_struct.features = UFFD_FEATURE_THREAD_ID;
> > > > > > >  if (ioctl(ufd, UFFDIO_API, _struct)) {
> > > > > > >  error_report("postcopy_ram_supported_by_host: UFFDIO_API 
> > > > > > > failed: %s",
> > > > > > >   strerror(errno));
> > > > > > 
> > > > > > You're not actually using the 'mis' here - what I'd expected was
> > > > > > something that was going to check if the UFFDIO_API return said 
> > > > > > that it really
> > > > > > had the feature, and if so store a flag in the MIS somewhere.
> > > > > > 
> > > > > > Also, I'm not sure it's right to set 'api_struct.features' on the 
> > > > > > input - what
> > > > > > happens if this is run on an old kernel - we don't want postcopy to 
> > > > > > fail on
> > > > > > an old kernel without your feature.
> > > > > > I'm not 100% sure of the interface, but I think the way it works is 
> > > > > > you set
> > > > > > features = 0 before the call, and then check the 
> > > > > > api_struct.features in the
> > > > > > return - in the same way that I check for 
> > > > > > UFFD_FEATURE_MISSING_HUGETLBFS.
> > > > > > 
> > > > > We need to ask kernel about that feature,
> > > > > right,
> > > > > kernel returns back available features
> > > > > uffdio_api.features = UFFD_API_FEATURES
> > > > > but it also stores requested features
> > > > 
> > > > I feel like this does not against Dave's comment, maybe we just

Re: [Qemu-devel] [PATCH v2 09/13] target/arm: optimize indirect branches with TCG's goto_ptr

2017-04-25 Thread Richard Henderson


On 04/25/2017 09:53 AM, Emilio G. Cota wrote:

+
+gen_jr = false;
+gen_helper_lookup_tb_ptr(ptr, cpu_env, cpu_R[15]);
+tcg_gen_goto_ptr(ptr);
+tcg_temp_free_ptr(ptr);
+break;


Likewise doesn't compile for aarch64.


r~

Re: [Qemu-devel] [PATCH v2 08/13] target/arm: optimize cross-page block chaining in softmmu

2017-04-25 Thread Richard Henderson


On 04/25/2017 09:53 AM, Emilio G. Cota wrote:

Instead of unconditionally exiting to the exec loop, use the
lookup_tb_ptr helper to jump to the target if it is valid.
As long as the hit rate in tb_jmp_cache remains high, this
will improve performance.

Perf impact: see the next commit's log.

Signed-off-by: Emilio G. Cota 
---
  target/arm/translate.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/target/arm/translate.c b/target/arm/translate.c
index e32e38c..574cf70 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -4085,8 +4085,12 @@ static inline void gen_goto_tb(DisasContext *s, int n, 
target_ulong dest)
  gen_set_pc_im(s, dest);
  tcg_gen_exit_tb((uintptr_t)s->tb + n);
  } else {
+TCGv_ptr ptr = tcg_temp_new_ptr();
+
  gen_set_pc_im(s, dest);
-tcg_gen_exit_tb(0);
+gen_helper_lookup_tb_ptr(ptr, cpu_env, cpu_R[15]);
+tcg_gen_goto_ptr(ptr);
+tcg_temp_free_ptr(ptr);
  }


This does not compile for aarch64.  You need to tcg_gen_extu_i32_tl first.


r~

Re: [Qemu-devel] [PATCH RESEND v2 07/18] COLO: Load dirty pages into SVM's RAM cache firstly

2017-04-25 Thread Hailiang Zhang


On 2017/4/25 2:27, Juan Quintela wrote:

zhanghailiang  wrote:

We should not load PVM's state directly into SVM, because there maybe some
errors happen when SVM is receving data, which will break SVM.

We need to ensure receving all data before load the state into SVM. We use
an extra memory to cache these data (PVM's ram). The ram cache in secondary side
is initially the same as SVM/PVM's memory. And in the process of checkpoint,
we cache the dirty pages of PVM into this ram cache firstly, so this ram cache
always the same as PVM's memory at every checkpoint, then we flush this cached 
ram
to SVM after we receive all PVM's state.

Cc: Dr. David Alan Gilbert 
Signed-off-by: zhanghailiang 
Signed-off-by: Li Zhijian 
---
v2:
- Move colo_init_ram_cache() and colo_release_ram_cache() out of
   incoming thread since both of them need the global lock, if we keep
   colo_release_ram_cache() in incoming thread, there are potential
   dead-lock.
- Remove bool ram_cache_enable flag, use migration_incoming_in_state() instead.
- Remove the Reviewd-by tag because of the above changes.



+out_locked:
+QLIST_FOREACH_RCU(block, _list.blocks, next) {
+if (block->colo_cache) {
+qemu_anon_ram_free(block->colo_cache, block->used_length);
+block->colo_cache = NULL;
+}
+}
+
+rcu_read_unlock();
+return -errno;
+}
+
+/* It is need to hold the global lock to call this helper */
+void colo_release_ram_cache(void)
+{
+RAMBlock *block;
+
+rcu_read_lock();
+QLIST_FOREACH_RCU(block, _list.blocks, next) {
+if (block->colo_cache) {
+qemu_anon_ram_free(block->colo_cache, block->used_length);
+block->colo_cache = NULL;
+}
+}
+rcu_read_unlock();
+}

Create a function from the creation/removal?  We have exactly two copies
of the same code.  Right now the code inside the function is very small,
but it could be bigger, no?


Yes, we add more codes  in next patch (patch 08).  :)


Later, Juan.


.

Re: [Qemu-devel] [PATCH v2 06/13] tcg: add goto_ptr opcode

2017-04-25 Thread Richard Henderson


On 04/25/2017 09:53 AM, Emilio G. Cota wrote:

@@ -1138,6 +1138,7 @@ void tcg_dump_ops(TCGContext *s)
  }
  switch (c) {
  case INDEX_op_set_label:
+case INDEX_op_goto_tb:
  case INDEX_op_br:
  case INDEX_op_brcond_i32:
  case INDEX_op_brcond_i64:


This is wrong, and causes crashes when dumping.  Nor should goto_ptr be here, 
so I don't know what you were after.



r~

Re: [Qemu-devel] [PATCH RESEND v2 04/18] COLO: integrate colo compare with colo frame

2017-04-25 Thread Hailiang Zhang


On 2017/4/25 2:18, Juan Quintela wrote:

zhanghailiang  wrote:

For COLO FT, both the PVM and SVM run at the same time,
only sync the state while it needs.

So here, let SVM runs while not doing checkpoint, change
DEFAULT_MIGRATE_X_CHECKPOINT_DELAY to 200*100.

Besides, we forgot to release colo_checkpoint_semd and
colo_delay_timer, fix them here.

Cc: Jason Wang 
Signed-off-by: zhanghailiang 
Reviewed-by: Dr. David Alan Gilbert 




diff --git a/migration/migration.c b/migration/migration.c
index 353f272..2ade2aa 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -70,7 +70,7 @@
  /* The delay time (in ms) between two COLO checkpoints
   * Note: Please change this default value to 1 when we support hybrid 
mode.
   */
-#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200
+#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
  
  static NotifierList migration_state_notifiers =

  NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);

1000 or 200 * 100

Please, fix value or comment?


OK, will fix in next version, thanks.


Later, Juan.

.

Re: [Qemu-devel] [PATCH v2 05/13] tcg-runtime: add lookup_tb_ptr helper

2017-04-25 Thread Richard Henderson


On 04/25/2017 09:53 AM, Emilio G. Cota wrote:

Signed-off-by: Emilio G. Cota 
---
  tcg-runtime.c | 7 +++
  tcg/tcg-runtime.h | 2 ++
  tcg/tcg.h | 1 +
  3 files changed, 10 insertions(+)


Modulo what I mentioned earlier about maybe directly inlining tb_from_jmp_cache,

Reviewed-by: Richard Henderson 


r~

Re: [Qemu-devel] [PATCH v2 03/13] tcg: enforce 64-byte alignment of TCGContext

2017-04-25 Thread Richard Henderson


On 04/25/2017 09:53 AM, Emilio G. Cota wrote:

This will allow us to prevent cache line false sharing in TCGContext.

Before:
$ objdump -t  build/x86_64-linux-user/qemu-x86_64 | grep tcg_ctx
003ea820 g O .bss   000152d8  tcg_ctx

After:
$ objdump -t  build/x86_64-linux-user/qemu-x86_64 | grep tcg_ctx
003ea880 g O .bss   00015300  tcg_ctx

Signed-off-by: Emilio G. Cota 
---
  tcg/tcg.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tcg/tcg.h b/tcg/tcg.h
index 6c216bb..5fdbfe3 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -727,7 +727,7 @@ struct TCGContext {
  
  uint16_t gen_insn_end_off[TCG_MAX_INSNS];

  target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
-};
+} QEMU_ALIGNED(64);


Let's drop the alignment and structure re-arrangement for now and focus on the 
task of goto_ptr.



r~

Re: [Qemu-devel] [PATCH v2 02/13] exec-all: inline tb_from_jmp_cache

2017-04-25 Thread Richard Henderson


On 04/25/2017 09:53 AM, Emilio G. Cota wrote:

The inline improves performance, as shown in upcoming commits' logs.

This commit is kept separate to ease review, since the inclusion
of tb-hash.h might be controversial. The problem here, which was
introduced before this commit, is that tb_hash_func() depends on
page_addr_t: this defeats the original purpose of tb-hash.h,
which was to be self-contained and CPU-agnostic.

Signed-off-by: Emilio G. Cota
---
  cpu-exec.c  | 19 ---
  include/exec/exec-all.h | 24 +++-
  2 files changed, 23 insertions(+), 20 deletions(-)


Is there a reason we should just inline this code directly into 
HELPER(lookup_tb_ptr)?  I think that would save a bit of churn, and I can't 
think of any other reason we'd want to use this function.



r~

Re: [Qemu-devel] DMG chunk size independence

2017-04-25 Thread Ashijeet Acharya

Hi,

cc'ing Peter Wu in...

Currently I have completed the task for zlib, uncompressed and zeroed
chunks in a DMG file using the approach we discussed earlier.
Unfortunately, this approach is not appropriate for bz2 chunks since
we cannot restart our decompression from the access point we cached
since bz2 decompression checks for a special magic key 'BZh' before it
starts decompressing. Since our cached point can be pointing to any
random location inside the compressed stream and not necessarily the
start of a "block", dmg_uncompress_bz2_do() fails with an error value
BZ_DATA_ERROR_MAGIC (-5) and thus our approach fails.
This blog post here explains this limitation too ->
https://blastedbio.blogspot.in/2011/11/random-access-to-bzip2.html

Now, there is an interesting thing I found out about bz2 compressed
streams i.e. the size of a compressed block varies from 0 to a max of
900 KiB. This is guaranteed and can be verified because each block has
a 4 byte header attached to it at the beginning in which the first
three bytes are the magic key "BZh" followed by a number from 1-9.
These help us find the max size that block will have as the size
increments by 100KiB for each value (eg. BZh3 has a max of 300KiB).

Now the wikipedia page here
(https://en.wikipedia.org/wiki/Bzip2#File_format) states that a 900KiB
block can expand to a max of 46MiB in its uncompressed form. Thus we
need not worry about QEMU allocating wild sized memory at once as we
have a limit of 64MiB as of now and stick to the approach of
decompressing the whole block every time we enter it. This solves our
problem of caching an access point and ultimately failing with this
error value BZ_DATA_ERROR_MAGIC (-5).

I am hesitant in this approach because I am not sure yet that "blocks"
and "chunks" mean the same thing and are just two different
terminologies (i.e. chunks == blocks) OR chunks are made up of blocks
(i.e chunks = x * blocks).

I approached Peter Wu (who worked on DMG a few years ago) about this
and he's not sure either.

(Peter, you may skip this part as I already explained you this earlier :-) )
I did a little naive test of my own, where I downloaded one of the bz2
DMG images and tried reading it with a HEX editor.

First, I manually calculated the size between the appearance of two
sequential magic keys ('BZh') offsets which marked the length of a
block starting at the offset of first magic key. Next I compared it to
the size of the corresponding chunk whose size (s->lenghts[chunk]) we
get by reading the mish blocks and all that stuff while opening the
image in QEMU, and interestingly both the sizes appeared to be equal.
I repeated it for quite a few chunks and this test stayed valid for
all.

Peter thinks we cannot rely on this test thus I wouldn't mind more
views on it...

Ashijeet

Re: [Qemu-devel] [PATCH 4/6] migration: calculate downtime on dst side

2017-04-25 Thread Alexey Perevalov


On 04/25/2017 01:25 PM, Peter Xu wrote:

On Tue, Apr 25, 2017 at 01:10:30PM +0300, Alexey Perevalov wrote:

On 04/25/2017 11:24 AM, Peter Xu wrote:

On Fri, Apr 14, 2017 at 04:17:18PM +0300, Alexey Perevalov wrote:

[...]


+/*
+ * This function calculates downtime per cpu and trace it
+ *
+ *  Also it calculates total downtime as an interval's overlap,
+ *  for many vCPU.
+ *
+ *  The approach is following:
+ *  Initially intervals are represented in tree where key is
+ *  pagefault address, and values:
+ *   begin - page fault time
+ *   end   - page load time
+ *   cpus  - bit mask shows affected cpus
+ *
+ *  To calculate overlap on all cpus, intervals converted into
+ *  array of points in time (downtime_points), the size of
+ *  array is 2 * number of nodes in tree of intervals (2 array
+ *  elements per one in element of interval).
+ *  Each element is marked as end (E) or as start (S) of interval.
+ *  The overlap downtime will be calculated for SE, only in case
+ *  there is sequence S(0..N)E(M) for every vCPU.
+ *
+ * As example we have 3 CPU
+ *
+ *  S1E1   S1   E1
+ * -***xxx***> CPU1
+ *
+ * S2E2
+ * xxx---> CPU2
+ *
+ * S3E3
+ * xxx---> CPU3
+ *
+ * We have sequence S1,S2,E1,S3,S1,E2,E3,E1
+ * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't include 
CPU3
+ * S3,S1,E2 - sequenece includes all CPUs, in this case overlap will be S1,E2
+ * Legend of picture is following: * - means downtime per vCPU
+ * x - means overlapped downtime
+ */

Not sure whether I get the point in this patch... iiuc we defined the
downtime here as the period when all vcpus are halted, right?

If so, I have a few questions:

- will this algorithm consume lots of memory? since I see we have one
   trace object per fault page address

I don't think, it consumes too much, one DowntimeDuration
takes (if I'm using bitmap_try_new, in this patch set I used pointer to
uint64_t array to keep bitmap array,
but I'm going to use include/qemu/bitmap.h, it works with pointers to long)

(2* int64 + (ROUND_UP((smp_cpus + BITS_PER_BYTE * sizeof(long) - 1 /
(BITS_PER_BYTE * sizeof(long * siezof(long)
so it's about 16 + at least 4 bytes, per page fault,
Lets assume we migration 256 vCPU and 256 Gb of ram and that ram is based on
4Kb pages - it's really bad case
16 + ((256 + 8 * 4 - 1) / ( 8 * 4 )) * 4 = 52 bytes
(256 * 1024 * 1024 * 1024)/(4 * 1024) = 67108864 page faults, but not all of
these pages will be pagefaulted, due to
page pre-fetching
67108864 * 52 = 3489660928 bytes (3.5 Gb for that operation),
but I have a doubt, who will use 4Kb pages for 256 Gb, probably
2Mb or 1G huge page will be chosen on x86, on ARM or other architecture it
could be another values.

Hmm, it looks still big though...


- do we need to protect the tree to make sure there's no insertion
   when doing the calculation?

I asked the same question when sent RFC patches,
the answer here is no, we should not, due to right now,
it's only one socket and one listen thread (maybe in future,
it will be required, maybe after multi fd patch set),
and calculation is doing synchronously right after migration complete.

Okay.


- if the only thing we want here is the "total downtime", whether
   below would work? (assuming N is vcpu numbers)

   a. define array cpu_fault_addr[N], to store current faulted address
  for each vcpu. When vcpu X is running, cpu_fault_addr[X] should
  be 0.

   b. when page fault happens on vcpu A, setup cpu_fault_addr[A] with
  corresponding fault address.

at this time need to is fault happens for all another vCPU,
and if it happens mark current time as total vCPU downtime start.


   c. when page copy finished, loop over cpu_fault_addr[] to see
  whether that matches any, clear corresponding element if matched.

so when page copy finished and mark for total vCPU is set,
yes that interval is a part of total downtime.

   Then, we can just measure the period when cpu_fault_addr[] is all
   set (by tracing at both b. and c.). Can this work?

Yes, it works, but it's better to keep time - cpu_fault_time,
address is not important here, it doesn't matter the reason of pagefault.

We still need the addresses? So that when we do COPY, we can check the
new page address against these stored ones, to know which vcpus to
clear the bit.

Frankly say, we need ) because there is not another way to determine
vCPU at COPY time.




2 vCPU could fault due to access to one page, ok, it's not a problem, just
store
time when it was faulted.
Looks like it's better algorithm, with lesser complexity,
thank you a lot.

My pleasure. Thanks,




--
Best regards,
Alexey Perevalov

[Qemu-devel] [PATCH 10/11] pc-bios/s390-ccw: add boot entry selection to El Torito routine

2017-04-25 Thread Cornelia Huck

From: "Eugene (jno) Dvurechenski" 

If there is no LOADPARM given or '0' specified, then IPL the first
matched entry. Otherwise IPL the matching entry of that number.

Signed-off-by: Eugene (jno) Dvurechenski 
Signed-off-by: Farhan Ali 
Reviewed-by: Christian Borntraeger 
Signed-off-by: Cornelia Huck 
---
 pc-bios/s390-ccw/bootmap.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index e974350b6f..523fa78c5f 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -655,6 +655,7 @@ static IsoBcSection *find_iso_bc_entry(void)
 IsoBcEntry *e = (IsoBcEntry *)sec;
 uint32_t offset = find_iso_bc();
 int i;
+unsigned int loadparm = get_loadparm_index();
 
 if (!offset) {
 return NULL;
@@ -675,7 +676,11 @@ static IsoBcSection *find_iso_bc_entry(void)
 for (i = 1; i < ISO_BC_ENTRY_PER_SECTOR; i++) {
 if (e[i].id == ISO_BC_BOOTABLE_SECTION) {
 if (is_iso_bc_entry_compatible([i].body.sect)) {
-return [i].body.sect;
+if (loadparm <= 1) {
+/* found, default, or unspecified */
+return [i].body.sect;
+}
+loadparm--;
 }
 }
 }
-- 
2.11.0

[Qemu-devel] [PATCH 09/11] pc-bios/s390-ccw: add boot entry selection for ECKD DASD

2017-04-25 Thread Cornelia Huck

From: Farhan Ali 

1. change a bit definition of ScsiMbr to allow an array of pointers
2. add loadparm fetch to boot script processing
3. apply loadparm index to boot entry selection, if any

Initial patch from Eugene (jno) Dvurechenski.

Signed-off-by: Eugene (jno) Dvurechenski 
Signed-off-by: Farhan Ali 
Reviewed-by: Christian Borntraeger 
Signed-off-by: Cornelia Huck 
---
 pc-bios/s390-ccw/bootmap.c | 16 +++-
 pc-bios/s390-ccw/bootmap.h |  2 +-
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index e39e67e07b..e974350b6f 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -183,15 +183,21 @@ static block_number_t load_eckd_segments(block_number_t 
blk, uint64_t *address)
 static void run_eckd_boot_script(block_number_t mbr_block_nr)
 {
 int i;
+unsigned int loadparm = get_loadparm_index();
 block_number_t block_nr;
 uint64_t address;
-ScsiMbr *scsi_mbr = (void *)sec;
+ScsiMbr *bte = (void *)sec; /* Eckd bootmap table entry */
 BootMapScript *bms = (void *)sec;
 
+debug_print_int("loadparm", loadparm);
+IPL_assert(loadparm < 31, "loadparm value greater than"
+   " maximum number of boot entries allowed");
+
 memset(sec, FREE_SPACE_FILLER, sizeof(sec));
 read_block(mbr_block_nr, sec, "Cannot read MBR");
 
-block_nr = eckd_block_num((void *)&(scsi_mbr->blockptr));
+block_nr = eckd_block_num((void *)&(bte->blockptr[loadparm]));
+IPL_assert(block_nr != -1, "No Boot Map");
 
 memset(sec, FREE_SPACE_FILLER, sizeof(sec));
 read_block(block_nr, sec, "Cannot read Boot Map Script");
@@ -459,11 +465,11 @@ static void ipl_scsi(void)
 debug_print_int("MBR Version", mbr->version_id);
 IPL_check(mbr->version_id == 1,
   "Unknown MBR layout version, assuming version 1");
-debug_print_int("program table", mbr->blockptr.blockno);
-IPL_assert(mbr->blockptr.blockno, "No Program Table");
+debug_print_int("program table", mbr->blockptr[0].blockno);
+IPL_assert(mbr->blockptr[0].blockno, "No Program Table");
 
 /* Parse the program table */
-read_block(mbr->blockptr.blockno, sec,
+read_block(mbr->blockptr[0].blockno, sec,
"Error reading Program Table");
 
 IPL_assert(magic_match(sec, ZIPL_MAGIC), "No zIPL magic in PT");
diff --git a/pc-bios/s390-ccw/bootmap.h b/pc-bios/s390-ccw/bootmap.h
index 9073de2238..7f367820f3 100644
--- a/pc-bios/s390-ccw/bootmap.h
+++ b/pc-bios/s390-ccw/bootmap.h
@@ -70,7 +70,7 @@ typedef struct ScsiMbr {
 uint8_t magic[4];
 uint32_t version_id;
 uint8_t reserved[8];
-ScsiBlockPtr blockptr;
+ScsiBlockPtr blockptr[];
 } __attribute__ ((packed)) ScsiMbr;
 
 #define ZIPL_MAGIC  "zIPL"
-- 
2.11.0

[Qemu-devel] [PATCH 08/11] pc-bios/s390-ccw: provide entry selection on LOADPARM for SCSI disk

2017-04-25 Thread Cornelia Huck

From: Farhan Ali 

Fix SCSI bootmap interpreter to make use of any specified entry of the
Program Table using the leftmost numeric value from the LOADPARM, if specified.

Initial patch from Eugene (jno) Dvurechenski.

Signed-off-by: Eugene (jno) Dvurechenski 
Signed-off-by: Farhan Ali 
Reviewed-by: Christian Borntraeger 
Signed-off-by: Cornelia Huck 
---
 pc-bios/s390-ccw/bootmap.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
index b21c877b53..e39e67e07b 100644
--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
@@ -444,7 +444,8 @@ static void ipl_scsi(void)
 uint8_t *ns, *ns_end;
 int program_table_entries = 0;
 const int pte_len = sizeof(ScsiBlockPtr);
-ScsiBlockPtr *prog_table_entry;
+ScsiBlockPtr *prog_table_entry = NULL;
+unsigned int loadparm = get_loadparm_index();
 
 /* Grab the MBR */
 memset(sec, FREE_SPACE_FILLER, sizeof(sec));
@@ -467,6 +468,7 @@ static void ipl_scsi(void)
 
 IPL_assert(magic_match(sec, ZIPL_MAGIC), "No zIPL magic in PT");
 
+debug_print_int("loadparm index", loadparm);
 ns_end = sec + virtio_get_block_size();
 for (ns = (sec + pte_len); (ns + pte_len) < ns_end; ns += pte_len) {
 prog_table_entry = (ScsiBlockPtr *)ns;
@@ -475,16 +477,15 @@ static void ipl_scsi(void)
 }
 
 program_table_entries++;
+if (program_table_entries == loadparm + 1) {
+break; /* selected entry found */
+}
 }
 
 debug_print_int("program table entries", program_table_entries);
 
 IPL_assert(program_table_entries != 0, "Empty Program Table");
 
-/* Run the default entry */
-
-prog_table_entry = (ScsiBlockPtr *)(sec + pte_len);
-
 zipl_run(prog_table_entry); /* no return */
 }
 
-- 
2.11.0

[Qemu-devel] [PATCH 11/11] pc-bios/s390-ccw.img: update image

2017-04-25 Thread Cornelia Huck

Contains the following commits:

- pc-bios/s390-ccw: Make ebcdic/ascii conversion public
- pc-bios/s390-ccw: get LOADPARM stored in SCP Read Info
- pc-bios/s390-ccw: provide a function to interpret LOADPARM value
- pc-bios/s390-ccw: provide entry selection on LOADPARM for SCSI disk
- pc-bios/s390-ccw: add boot entry selection for ECKD DASD
- pc-bios/s390-ccw: add boot entry selection to El Torito routine

Signed-off-by: Cornelia Huck 
---
 pc-bios/s390-ccw.img | Bin 26456 -> 26472 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/pc-bios/s390-ccw.img b/pc-bios/s390-ccw.img
index 
2a4adfa654844040dbc84e90a9c2e0af56fdb9fa..0b01d49495c607b67d3f1b2359395534631deb88
 100644
GIT binary patch
delta 10797
zcmZ`<3wTu3wch8x_
zD;l!JNA=cJp~sI}8Vp`RQAdj{qQDr%qEe+Du%+wH+<*yW`M$|sXYYSK
z_S$Q&{m40SROvjbG-oH-8io0I&0=ZiHqY;37nPfDi^{j18f`aU)c0ko@{7EA2h67L
zm&$`?z4D+8D35!8a93w>t>@gzC(a%##1!1iV>q@kXlECJ?zved_HOA`Mk2e{fCx39gsNEt%=d
zm)>5qN~^NHLLCRE`NA}CUdYZ76YGFO#McqVw
zqOi}X@QUdbTCAR782WhQQlMFgaW|E-*cSm8A%PmJUMQkBs}{SSBNV+!Ow>0B5oiT;
zd}(9#ikS=o5rxr^~qL9RGkvB)XD)sDlxOvM{P3Kl@1*cpur4{TS0fAv*UTL@N~`
zN3+)cjnCos`SxrHX^yp`nN$}4y-EuTQu%-;5=o^i
zDdZs|NhNxBAms7XiESiV{5~m9Z!+46c5Ky0vGL<;v{v?#^AV~~UQHN59`cL$pR$R!
zGxNoAkE0HzL3Dr=?+kg#2vS|;4GL2IfERsr?z};;;{8rBmW*KTI=#pE6HCjc_k^!a
zN$7}MSVP*LW>EmdUkH(PmO0)S#V;2b{t2`4_$C}DI@imfKL)}=dCIHzT>ZOzIeMFt
zBInpv3`o9(JNhL-20kkTwqkW`K%Td0{)CLc8N)Ct^d3w}enP?UDl>#Kw^HCt{uxGB
zP%xl>LNx0)rz2SS3cl_%L+t1Ty4=agpxi;ABNTfgi6hVnDY;UX+^%Eu=^ZvFU*cVfN)bbCtB=;
z9$QL?tdx)gFV?Qly6wVh>+bqv*k_uq{T9LX3}!$r@h08S8}EtVm5WxjD^E-i2_bK*
z5Zk(v7O(q?Y2g5j
z_n_up`9(~XXVE-1LgV;LCgB-8fN{qV#UD{y++kKZj#OUVA?sq@NsGqnPY5M+D%=WL
zKFG?KkH_Aw9FbRIYqgA(%r%Q;z0dRvqO*6igeOQz;-QI7xtLLp1lV{K0zNU9OTD^>G~|8kct6QXe7mu
zKr3nolGn43A|!r|-@S&3T2L@~x5qR60n^b$XWdCgT{*=_UKrB-DbyVEEkiq)?q=v>hQ7nb
zEoS2yjJ?}c2hxGjw!PYstv49YZ{Ov}LHXS47Ke&*M`^QT5OFV_!F)c1{Bxop
z@YSP!G7U}E=lXszE#yJ1)xUjLcnQ;9{q3JIfU$H-m*}R?C`(ry+CMZ4g
z`iy3*NygtpTC+uDlYbUluk+6!o?sm9X}{qvHnO}r(0@*;r>?fC=V6x8A>CK
zvY=6hl6D^7A;$UY+F-nXP(^l9
z^M?2KhV2jgVj%v6{~FsckuVfv*|u3sk7UyxXVYvot*y_{@;vQPa^JnM99~s
z3hP22tZY)2Vj-J^J|V?)?VLAw9|GtQR%K%_5`7hrip@wlr`(rtcOu;HU5cvI}X6
z#}wSLT@K<436kNfXcWHOTKqhMrCPh%skJ7fUidM|?D|2Yje6>Sk9uuyCpzsU(|=$O
zxXA;Te^HZBE3WHJgYguU*rtGB4*ujQ@|KX>Npa;UE`Di67kWW%tu8_vftRK)_E5
zdC|}KoRw6Ed=O*pN?{9?W|2hxY3p@bO(NXeV5}sy>jU#+;rwiU3vjr6h=+U2UGEdSY^*h-lxGtdLwr$Zi!shZMy0Y}9OYx<0g
zdOUbJA%}r?jGpEdExTa0e=-I*)wqI6PU_Wrh
z_ps>UEc#Iv9fee&?6-?y*r-@BO(;!bM*<`3$y|Ljjby|~}F2BaQ!w|6bHgYiun=)5V)8H=)?PbHH=K1Qc5ag{~>2P
z2dfu9mFt~@{9S3AFvmDSySA`JYuLI1EPyDG%k)1PhmHaF25__Cv0<)(*>TJX;yb5efKrfp?2A7+}j5$>;<^!=dWp*7T(b^D!j8LHvGF55!THUn3Bc3H_|C~{zEJ+m213A
zCx*1(JoMQu3}+Iys$Q%xV^Dq#3$DlwsrebaLm6I2Yjwy6)mCiic!M*4k!(xN9+etJ
zf~@b6Z<%_R#HX0_pK%h3V%6g)9_JUyE2(b({6Pe?pYNY1PGkWH*-5Of6HGr%v^$pi
z`RzUWYd8`$iH(#cQtUO@IWBYr>wCTA+`%08Gxu?(dDD^F#oUXwE5U|7>_dSpQdQ63
zcNokQAQg;IRVLfqnSN(IH{Fw;DGsV^~Ta3~nO?o_N8_7$*yMPc;>ESI=;HPbyrr`|@2_9|W8TP~iA5;kt?GQ;Y)*+{?h-
z+cKtrR9roY{BE8BA;`4``MoC4iXN0O+gtK%TBg5}$Jutde<+g=aq}*=^Tu7

[Qemu-devel] [PATCH 02/11] hw/s390x/ipl: enable LOADPARM in IPIB for a boot device

2017-04-25 Thread Cornelia Huck

From: Farhan Ali 

Insert the LOADPARM value to the IPL Information Parameter Block.

An IPL Information Parameter Block is created when "bootindex" is
specified for a device. If a user specifies "loadparm=", then we
store the loadparm value in the created IPIB for that boot device.

Initial patch from Eugene (jno) Dvurechenski.

Signed-off-by: Eugene (jno) Dvurechenski 
Signed-off-by: Farhan Ali 
Reviewed-by: Christian Borntraeger 
Signed-off-by: Cornelia Huck 
---
 hw/s390x/ipl.c | 31 +--
 hw/s390x/ipl.h |  3 +++
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 7978c7d52a..0711ee927a 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -17,8 +17,10 @@
 #include "cpu.h"
 #include "elf.h"
 #include "hw/loader.h"
+#include "hw/boards.h"
 #include "hw/s390x/virtio-ccw.h"
 #include "hw/s390x/css.h"
+#include "hw/s390x/ebcdic.h"
 #include "ipl.h"
 #include "qemu/error-report.h"
 
@@ -243,7 +245,6 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl)
 ipl->iplb.pbt = S390_IPL_TYPE_CCW;
 ipl->iplb.ccw.devno = cpu_to_be16(ccw_dev->sch->devno);
 ipl->iplb.ccw.ssid = ccw_dev->sch->ssid & 3;
-return true;
 } else if (sd) {
 SCSIBus *bus = scsi_bus_from_device(sd);
 VirtIOSCSI *vdev = container_of(bus, VirtIOSCSI, bus);
@@ -259,13 +260,39 @@ static bool s390_gen_initial_iplb(S390IPLState *ipl)
 ipl->iplb.scsi.channel = cpu_to_be16(sd->channel);
 ipl->iplb.scsi.devno = cpu_to_be16(ccw_dev->sch->devno);
 ipl->iplb.scsi.ssid = ccw_dev->sch->ssid & 3;
-return true;
+} else {
+return false; /* unknown device */
+}
+
+if (!s390_ipl_set_loadparm(ipl->iplb.loadparm)) {
+ipl->iplb.flags |= DIAG308_FLAGS_LP_VALID;
 }
+return true;
 }
 
 return false;
 }
 
+int s390_ipl_set_loadparm(uint8_t *loadparm)
+{
+MachineState *machine = MACHINE(qdev_get_machine());
+char *lp = object_property_get_str(OBJECT(machine), "loadparm", NULL);
+
+if (lp) {
+int i;
+
+/* lp is an uppercase string without leading/embedded spaces */
+for (i = 0; i < 8 && lp[i]; i++) {
+loadparm[i] = ascii2ebcdic[(uint8_t) lp[i]];
+}
+
+g_free(lp);
+return 0;
+}
+
+return -1;
+}
+
 static int load_netboot_image(Error **errp)
 {
 S390IPLState *ipl = get_ipl_device();
diff --git a/hw/s390x/ipl.h b/hw/s390x/ipl.h
index 46930e4c64..8a705e0428 100644
--- a/hw/s390x/ipl.h
+++ b/hw/s390x/ipl.h
@@ -57,6 +57,8 @@ struct IplBlockQemuScsi {
 } QEMU_PACKED;
 typedef struct IplBlockQemuScsi IplBlockQemuScsi;
 
+#define DIAG308_FLAGS_LP_VALID 0x80
+
 union IplParameterBlock {
 struct {
 uint32_t len;
@@ -82,6 +84,7 @@ union IplParameterBlock {
 } QEMU_PACKED;
 typedef union IplParameterBlock IplParameterBlock;
 
+int s390_ipl_set_loadparm(uint8_t *loadparm);
 void s390_ipl_update_diag308(IplParameterBlock *iplb);
 void s390_ipl_prepare_cpu(S390CPU *cpu);
 IplParameterBlock *s390_ipl_get_iplb(void);
-- 
2.11.0

[Qemu-devel] [PATCH 05/11] pc-bios/s390-ccw: Make ebcdic/ascii conversion public

2017-04-25 Thread Cornelia Huck

From: "Eugene (jno) Dvurechenski" 

Make the ebcdic_to_ascii function public to the rest of the
"bios" code, as the volume label is no more the single thing
to be converted.

Signed-off-by: Eugene (jno) Dvurechenski 
Signed-off-by: Farhan Ali 
Reviewed-by: Christian Borntraeger 
Signed-off-by: Cornelia Huck 
---
 pc-bios/s390-ccw/bootmap.h  | 22 --
 pc-bios/s390-ccw/main.c | 11 +++
 pc-bios/s390-ccw/s390-ccw.h | 13 +
 3 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/pc-bios/s390-ccw/bootmap.h b/pc-bios/s390-ccw/bootmap.h
index bea168714b..9073de2238 100644
--- a/pc-bios/s390-ccw/bootmap.h
+++ b/pc-bios/s390-ccw/bootmap.h
@@ -264,28 +264,6 @@ typedef enum {
 
 /* utility code below */
 
-static const unsigned char ebc2asc[256] =
-  /* 0123456789abcdef0123456789abcdef */
-"" /* 1F */
-"" /* 3F */
-" ...<(+|&.!$*);." /* 5F first.chr.here.is.real.space 
*/
-"-/.,%_>?.`:#@'=\""/* 7F */
-".abcdefghi...jklmnopqr.." /* 9F */
-"..stuvwxyz.." /* BF */
-".ABCDEFGHI...JKLMNOPQR.." /* DF */
-"..STUVWXYZ..0123456789..";/* FF */
-
-static inline void ebcdic_to_ascii(const char *src,
-   char *dst,
-   unsigned int size)
-{
-unsigned int i;
-for (i = 0; i < size; i++) {
-unsigned c = src[i];
-dst[i] = ebc2asc[c];
-}
-}
-
 static inline void print_volser(const void *volser)
 {
 char ascii[8];
diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
index 0946766d86..393d732353 100644
--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
@@ -15,6 +15,17 @@ char stack[PAGE_SIZE * 8] 
__attribute__((__aligned__(PAGE_SIZE)));
 static SubChannelId blk_schid = { .one = 1 };
 IplParameterBlock iplb __attribute__((__aligned__(PAGE_SIZE)));
 
+const unsigned char ebc2asc[256] =
+  /* 0123456789abcdef0123456789abcdef */
+"" /* 1F */
+"" /* 3F */
+" ...<(+|&.!$*);." /* 5F first.chr.here.is.real.space 
*/
+"-/.,%_>?.`:#@'=\""/* 7F */
+".abcdefghi...jklmnopqr.." /* 9F */
+"..stuvwxyz.." /* BF */
+".ABCDEFGHI...JKLMNOPQR.." /* DF */
+"..STUVWXYZ..0123456789..";/* FF */
+
 /*
  * Priniciples of Operations (SA22-7832-09) chapter 17 requires that
  * a subsystem-identification is at 184-187 and bytes 188-191 are zero
diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h
index ded67bcbc6..4e0aab27d3 100644
--- a/pc-bios/s390-ccw/s390-ccw.h
+++ b/pc-bios/s390-ccw/s390-ccw.h
@@ -189,4 +189,17 @@ static inline void IPL_check(bool term, const char 
*message)
 }
 }
 
+extern const unsigned char ebc2asc[256];
+static inline void ebcdic_to_ascii(const char *src,
+   char *dst,
+   unsigned int size)
+{
+unsigned int i;
+
+for (i = 0; i < size; i++) {
+unsigned c = src[i];
+dst[i] = ebc2asc[c];
+}
+}
+
 #endif /* S390_CCW_H */
-- 
2.11.0

[Qemu-devel] [PATCH 07/11] pc-bios/s390-ccw: provide a function to interpret LOADPARM value

2017-04-25 Thread Cornelia Huck

From: Farhan Ali 

The LOADPARM value is fetched from SCP Read Info, but it's applied
only at the phase of bootmap interpretation. So let's read the LOARPARM
value and store it. Also provide a parsing function to detect numbers in
the LOADPARM which can be used during bootmap interpretation.

Remove a stray whitespace.

Initial patch from Eugene (jno) Dvurechenski.

Signed-off-by: Eugene (jno) Dvurechenski 
Signed-off-by: Farhan Ali 
Signed-off-by: Cornelia Huck 
---
 pc-bios/s390-ccw/main.c | 27 ++-
 pc-bios/s390-ccw/s390-ccw.h |  1 +
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
index 393d732353..1cacc1b46f 100644
--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
@@ -14,6 +14,7 @@
 char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE)));
 static SubChannelId blk_schid = { .one = 1 };
 IplParameterBlock iplb __attribute__((__aligned__(PAGE_SIZE)));
+static char loadparm[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 
 const unsigned char ebc2asc[256] =
   /* 0123456789abcdef0123456789abcdef */
@@ -40,7 +41,6 @@ void write_subsystem_identification(void)
 *zeroes = 0;
 }
 
-
 void panic(const char *string)
 {
 sclp_print(string);
@@ -48,6 +48,26 @@ void panic(const char *string)
 while (1) { }
 }
 
+unsigned int get_loadparm_index(void)
+{
+const char *lp = loadparm;
+int i;
+unsigned int idx = 0;
+
+for (i = 0; i < 8; i++) {
+char c = lp[i];
+
+if (c < '0' || c > '9') {
+break;
+}
+
+idx *= 10;
+idx += c - '0';
+}
+
+return idx;
+}
+
 static bool find_dev(Schib *schib, int dev_no)
 {
 int i, r;
@@ -84,6 +104,7 @@ static void virtio_setup(void)
 int ssid;
 bool found = false;
 uint16_t dev_no;
+char ldp[] = "LOADPARM=[]\n";
 VDev *vdev = virtio_get_device();
 
 /*
@@ -93,6 +114,10 @@ static void virtio_setup(void)
  */
 enable_mss_facility();
 
+sclp_get_loadparm_ascii(loadparm);
+memcpy(ldp + 10, loadparm, 8);
+sclp_print(ldp);
+
 if (store_iplb()) {
 switch (iplb.pbt) {
 case S390_IPL_TYPE_CCW:
diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h
index 903d2ce816..07d8cbcb20 100644
--- a/pc-bios/s390-ccw/s390-ccw.h
+++ b/pc-bios/s390-ccw/s390-ccw.h
@@ -62,6 +62,7 @@ void consume_sclp_int(void);
 void panic(const char *string);
 void write_subsystem_identification(void);
 extern char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE)));
+unsigned int get_loadparm_index(void);
 
 /* sclp.c */
 void sclp_print(const char *string);
-- 
2.11.0

[Qemu-devel] [PATCH 04/11] util/qemu-config: Add loadparm to qemu machine_opts

2017-04-25 Thread Cornelia Huck

From: Farhan Ali 

Add S390CcwMachineState machine parameter "loadparm" to qemu machine_opts so
libvirt can query for it.

Signed-off-by: Farhan Ali 
Signed-off-by: Cornelia Huck 
---
 util/qemu-config.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/util/qemu-config.c b/util/qemu-config.c
index 5527100a01..405dd1a1d7 100644
--- a/util/qemu-config.c
+++ b/util/qemu-config.c
@@ -227,6 +227,12 @@ static QemuOptsList machine_opts = {
 .name = "dea-key-wrap",
 .type = QEMU_OPT_BOOL,
 .help = "enable/disable DEA key wrapping using the CPACF wrapping 
key",
+},{
+.name = "loadparm",
+.type = QEMU_OPT_STRING,
+.help = "Up to 8 chars in set of [A-Za-z0-9. ](lower case chars"
+" converted to upper case) to pass to machine"
+" loader, boot manager, and guest kernel",
 },
 { /* End of list */ }
 }
-- 
2.11.0

[Qemu-devel] [PATCH 06/11] pc-bios/s390-ccw: get LOADPARM stored in SCP Read Info

2017-04-25 Thread Cornelia Huck

From: Farhan Ali 

Obtain the loadparm value stored in SCP Read Info by performing
a SCLP Read Info request.

Rename sclp-ascii.c to sclp.c to reflect the changed scope of
the file.

Signed-off-by: Farhan Ali 
Reviewed-by: Christian Borntraeger 
Signed-off-by: Cornelia Huck 
---
 pc-bios/s390-ccw/Makefile |  2 +-
 pc-bios/s390-ccw/s390-ccw.h   |  3 ++-
 pc-bios/s390-ccw/{sclp-ascii.c => sclp.c} | 12 
 pc-bios/s390-ccw/sclp.h   |  2 ++
 4 files changed, 17 insertions(+), 2 deletions(-)
 rename pc-bios/s390-ccw/{sclp-ascii.c => sclp.c} (87%)

diff --git a/pc-bios/s390-ccw/Makefile b/pc-bios/s390-ccw/Makefile
index 0339c24789..79a46b6735 100644
--- a/pc-bios/s390-ccw/Makefile
+++ b/pc-bios/s390-ccw/Makefile
@@ -9,7 +9,7 @@ $(call set-vpath, $(SRC_PATH)/pc-bios/s390-ccw)
 
 .PHONY : all clean build-all
 
-OBJECTS = start.o main.o bootmap.o sclp-ascii.o virtio.o virtio-scsi.o
+OBJECTS = start.o main.o bootmap.o sclp.o virtio.o virtio-scsi.o
 QEMU_CFLAGS := $(filter -W%, $(QEMU_CFLAGS))
 QEMU_CFLAGS += -ffreestanding -fno-delete-null-pointer-checks -msoft-float
 QEMU_CFLAGS += -march=z900 -fPIE -fno-strict-aliasing
diff --git a/pc-bios/s390-ccw/s390-ccw.h b/pc-bios/s390-ccw/s390-ccw.h
index 4e0aab27d3..903d2ce816 100644
--- a/pc-bios/s390-ccw/s390-ccw.h
+++ b/pc-bios/s390-ccw/s390-ccw.h
@@ -63,9 +63,10 @@ void panic(const char *string);
 void write_subsystem_identification(void);
 extern char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE)));
 
-/* sclp-ascii.c */
+/* sclp.c */
 void sclp_print(const char *string);
 void sclp_setup(void);
+void sclp_get_loadparm_ascii(char *loadparm);
 
 /* virtio.c */
 unsigned long virtio_load_direct(ulong rec_list1, ulong rec_list2,
diff --git a/pc-bios/s390-ccw/sclp-ascii.c b/pc-bios/s390-ccw/sclp.c
similarity index 87%
rename from pc-bios/s390-ccw/sclp-ascii.c
rename to pc-bios/s390-ccw/sclp.c
index dc1c3e4f4d..a1639baed7 100644
--- a/pc-bios/s390-ccw/sclp-ascii.c
+++ b/pc-bios/s390-ccw/sclp.c
@@ -80,3 +80,15 @@ void sclp_print(const char *str)
 
 sclp_service_call(SCLP_CMD_WRITE_EVENT_DATA, sccb);
 }
+
+void sclp_get_loadparm_ascii(char *loadparm)
+{
+
+ReadInfo *sccb = (void *)_sccb;
+
+memset((char *)_sccb, 0, sizeof(ReadInfo));
+sccb->h.length = sizeof(ReadInfo);
+if (!sclp_service_call(SCLP_CMDW_READ_SCP_INFO, sccb)) {
+ebcdic_to_ascii((char *) sccb->loadparm, loadparm, 8);
+}
+}
diff --git a/pc-bios/s390-ccw/sclp.h b/pc-bios/s390-ccw/sclp.h
index 3cbfb78930..0dd987ff5d 100644
--- a/pc-bios/s390-ccw/sclp.h
+++ b/pc-bios/s390-ccw/sclp.h
@@ -55,6 +55,8 @@ typedef struct ReadInfo {
 SCCBHeader h;
 uint16_t rnmax;
 uint8_t rnsize;
+uint8_t reserved[13];
+uint8_t loadparm[8];
 } __attribute__((packed)) ReadInfo;
 
 typedef struct SCCB {
-- 
2.11.0

[Qemu-devel] [PATCH 01/11] hw/s390x: provide loadparm property for the machine

2017-04-25 Thread Cornelia Huck

From: Farhan Ali 

In order to specify the LOADPARM value one may now add ",loadparm=xxx"
parameter to the "-machine s390-ccw-virtio" option.

The property setter will normalize and check the value provided much
like the way the HMC does.

The value is stored, but not used at the moment.

Initial patch from Eugene (jno) Dvurechenski.

Signed-off-by: Eugene (jno) Dvurechenski 
Signed-off-by: Farhan Ali 
Reviewed-by: Christian Borntraeger 
Signed-off-by: Cornelia Huck 
---
 hw/s390x/s390-virtio-ccw.c | 37 +
 include/hw/s390x/s390-virtio-ccw.h |  1 +
 2 files changed, 38 insertions(+)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 04bd0ebe40..fdd4384ff0 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -274,6 +274,36 @@ bool cpu_model_allowed(void)
 return true;
 }
 
+static char *machine_get_loadparm(Object *obj, Error **errp)
+{
+S390CcwMachineState *ms = S390_CCW_MACHINE(obj);
+
+return g_memdup(ms->loadparm, sizeof(ms->loadparm));
+}
+
+static void machine_set_loadparm(Object *obj, const char *val, Error **errp)
+{
+S390CcwMachineState *ms = S390_CCW_MACHINE(obj);
+int i;
+
+for (i = 0; i < sizeof(ms->loadparm) && val[i]; i++) {
+uint8_t c = toupper(val[i]); /* mimic HMC */
+
+if (('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || (c == '.') ||
+(c == ' ')) {
+ms->loadparm[i] = c;
+} else {
+error_setg(errp, "LOADPARM: invalid character '%c' (ASCII 0x%02x)",
+   c, c);
+return;
+}
+}
+
+for (; i < sizeof(ms->loadparm); i++) {
+ms->loadparm[i] = ' '; /* pad right with spaces */
+}
+}
+
 static inline void s390_machine_initfn(Object *obj)
 {
 object_property_add_bool(obj, "aes-key-wrap",
@@ -291,6 +321,13 @@ static inline void s390_machine_initfn(Object *obj)
 "enable/disable DEA key wrapping using the CPACF wrapping key",
 NULL);
 object_property_set_bool(obj, true, "dea-key-wrap", NULL);
+object_property_add_str(obj, "loadparm",
+machine_get_loadparm, machine_set_loadparm, NULL);
+object_property_set_description(obj, "loadparm",
+"Up to 8 chars in set of [A-Za-z0-9. ] (lower case chars converted"
+" to upper case) to pass to machine loader, boot manager,"
+" and guest kernel",
+NULL);
 }
 
 static const TypeInfo ccw_machine_info = {
diff --git a/include/hw/s390x/s390-virtio-ccw.h 
b/include/hw/s390x/s390-virtio-ccw.h
index 6ecae00386..7b8a3e4d74 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -28,6 +28,7 @@ typedef struct S390CcwMachineState {
 /*< public >*/
 bool aes_key_wrap;
 bool dea_key_wrap;
+uint8_t loadparm[8];
 } S390CcwMachineState;
 
 typedef struct S390CcwMachineClass {
-- 
2.11.0

[Qemu-devel] [PATCH 03/11] hw/s390x/sclp: update LOADPARM in SCP Info

2017-04-25 Thread Cornelia Huck

From: Farhan Ali 

LOADPARM has two copies:
1. in SCP Information Block
2. in IPL Information Parameter Block

So, update SCLP intrinsics now. We always store LOADPARM in SCP
information block even if we don't have a valid IPL Information
Parameter Block.

Initial patch from Eugene (jno) Dvurechenski.

Signed-off-by: Eugene (jno) Dvurechenski 
Signed-off-by: Farhan Ali 
Reviewed-by: Christian Borntraeger 
Signed-off-by: Cornelia Huck 
---
 hw/s390x/sclp.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index e741da1141..b4f6dd58dd 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -23,6 +23,7 @@
 #include "hw/s390x/sclp.h"
 #include "hw/s390x/event-facility.h"
 #include "hw/s390x/s390-pci-bus.h"
+#include "hw/s390x/ipl.h"
 
 static inline SCLPDevice *get_sclp_device(void)
 {
@@ -57,6 +58,7 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb)
 int cpu_count = 0;
 int rnsize, rnmax;
 int slots = MIN(machine->ram_slots, s390_get_memslot_count(kvm_state));
+IplParameterBlock *ipib = s390_ipl_get_iplb();
 
 CPU_FOREACH(cpu) {
 cpu_count++;
@@ -129,6 +131,13 @@ static void read_SCP_info(SCLPDevice *sclp, SCCB *sccb)
 read_info->rnmax2 = cpu_to_be64(rnmax);
 }
 
+if (ipib && ipib->flags & DIAG308_FLAGS_LP_VALID) {
+memcpy(_info->loadparm, >loadparm,
+   sizeof(read_info->loadparm));
+} else {
+s390_ipl_set_loadparm(read_info->loadparm);
+}
+
 sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
 }
 
-- 
2.11.0

[Qemu-devel] [PATCH 00/11] s390x: support for LOADPARM

2017-04-25 Thread Cornelia Huck

This patchset implements the LOADPARM machine property. This is
exposed via SCLP and diagnose 308 to the guest. It will be used
by the bios to select a boot entry; guest operating systems can
use it as well.

Cornelia Huck (1):
  pc-bios/s390-ccw.img: update image

Eugene (jno) Dvurechenski (2):
  pc-bios/s390-ccw: Make ebcdic/ascii conversion public
  pc-bios/s390-ccw: add boot entry selection to El Torito routine

Farhan Ali (8):
  hw/s390x: provide loadparm property for the machine
  hw/s390x/ipl: enable LOADPARM in IPIB for a boot device
  hw/s390x/sclp: update LOADPARM in SCP Info
  util/qemu-config: Add loadparm to qemu machine_opts
  pc-bios/s390-ccw: get LOADPARM stored in SCP Read Info
  pc-bios/s390-ccw: provide a function to interpret LOADPARM value
  pc-bios/s390-ccw: provide entry selection on LOADPARM for SCSI disk
  pc-bios/s390-ccw: add boot entry selection for ECKD DASD

 hw/s390x/ipl.c|  31 ++--
 hw/s390x/ipl.h|   3 +++
 hw/s390x/s390-virtio-ccw.c|  37 +
 hw/s390x/sclp.c   |   9 +++
 include/hw/s390x/s390-virtio-ccw.h|   1 +
 pc-bios/s390-ccw.img  | Bin 26456 -> 26472 bytes
 pc-bios/s390-ccw/Makefile |   2 +-
 pc-bios/s390-ccw/bootmap.c|  34 +-
 pc-bios/s390-ccw/bootmap.h|  24 +--
 pc-bios/s390-ccw/main.c   |  38 +-
 pc-bios/s390-ccw/s390-ccw.h   |  17 -
 pc-bios/s390-ccw/{sclp-ascii.c => sclp.c} |  12 ++
 pc-bios/s390-ccw/sclp.h   |   2 ++
 util/qemu-config.c|   6 +
 14 files changed, 177 insertions(+), 39 deletions(-)
 rename pc-bios/s390-ccw/{sclp-ascii.c => sclp.c} (87%)

-- 
2.11.0

[Qemu-devel] [PATCH] tcg: Initialize return value after exit_atomic

2017-04-25 Thread Richard Henderson

Users of tcg_gen_atomic_cmpxchg and do_atomic_op rightfully utilize
the output.  Even though this code is dead, it gets translated, and
without the initialization we encounter a tcg_error.

Reported-by: Nikunj A Dadhania 
Signed-off-by: Richard Henderson 
---
 tcg/tcg-op.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 95a39b7..6b1f415 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -2861,6 +2861,9 @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, 
TCGv_i64 cmpv,
 #endif
 #else
 gen_helper_exit_atomic(tcg_ctx.tcg_env);
+/* Produce a result, so that we have a well-formed opcode stream
+   with respect to uses of the result in the (dead) code following.  */
+tcg_gen_movi_i64(retv, 0);
 #endif /* CONFIG_ATOMIC64 */
 } else {
 TCGv_i32 c32 = tcg_temp_new_i32();
@@ -2966,6 +2969,9 @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, 
TCGv_i64 val,
 #endif
 #else
 gen_helper_exit_atomic(tcg_ctx.tcg_env);
+/* Produce a result, so that we have a well-formed opcode stream
+   with respect to uses of the result in the (dead) code following.  */
+tcg_gen_movi_i64(ret, 0);
 #endif /* CONFIG_ATOMIC64 */
 } else {
 TCGv_i32 v32 = tcg_temp_new_i32();
-- 
2.9.3

[Qemu-devel] [PULL 1/4] hmp: gpa2hva and gpa2hpa hostaddr command

2017-04-25 Thread Dr. David Alan Gilbert (git)

From: Paolo Bonzini 

These commands are useful when testing machine-check passthrough.
gpa2hva is useful to inject a MADV_HWPOISON madvise from gdb, while
gpa2hpa is useful to inject an error with the mce-inject kernel
module.

Signed-off-by: Paolo Bonzini 
Message-Id: <1490021158-4469-1-git-send-email-pbonz...@redhat.com>
Signed-off-by: Paolo Bonzini 
Message-Id: <20170420133058.12911-1-pbonz...@redhat.com>
Reviewed-by: Dr. David Alan Gilbert 
Signed-off-by: Dr. David Alan Gilbert 
---
 hmp-commands.hx |  32 ++
 monitor.c   | 101 
 2 files changed, 133 insertions(+)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 88192817b2..0aca984261 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -526,6 +526,38 @@ Dump 80 16 bit values at the start of the video memory.
 ETEXI
 
 {
+.name   = "gpa2hva",
+.args_type  = "addr:l",
+.params = "addr",
+.help   = "print the host virtual address corresponding to a guest 
physical address",
+.cmd= hmp_gpa2hva,
+},
+
+STEXI
+@item gpa2hva @var{addr}
+@findex gpa2hva
+Print the host virtual address at which the guest's physical address @var{addr}
+is mapped.
+ETEXI
+
+#ifdef CONFIG_LINUX
+{
+.name   = "gpa2hpa",
+.args_type  = "addr:l",
+.params = "addr",
+.help   = "print the host physical address corresponding to a 
guest physical address",
+.cmd= hmp_gpa2hpa,
+},
+#endif
+
+STEXI
+@item gpa2hpa @var{addr}
+@findex gpa2hpa
+Print the host physical address at which the guest's physical address 
@var{addr}
+is mapped.
+ETEXI
+
+{
 .name   = "p|print",
 .args_type  = "fmt:/,val:l",
 .params = "/fmt expr",
diff --git a/monitor.c b/monitor.c
index be282ecb80..a27dc8003f 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1421,6 +1421,107 @@ static void hmp_physical_memory_dump(Monitor *mon, 
const QDict *qdict)
 memory_dump(mon, count, format, size, addr, 1);
 }
 
+static void *gpa2hva(MemoryRegion **p_mr, hwaddr addr, Error **errp)
+{
+MemoryRegionSection mrs = memory_region_find(get_system_memory(),
+ addr, 1);
+
+if (!mrs.mr) {
+error_setg(errp, "No memory is mapped at address 0x%" HWADDR_PRIx, 
addr);
+return NULL;
+}
+
+if (!memory_region_is_ram(mrs.mr) && !memory_region_is_romd(mrs.mr)) {
+error_setg(errp, "Memory at address 0x%" HWADDR_PRIx "is not RAM", 
addr);
+memory_region_unref(mrs.mr);
+return NULL;
+}
+
+*p_mr = mrs.mr;
+return qemu_map_ram_ptr(mrs.mr->ram_block, mrs.offset_within_region);
+}
+
+static void hmp_gpa2hva(Monitor *mon, const QDict *qdict)
+{
+hwaddr addr = qdict_get_int(qdict, "addr");
+Error *local_err = NULL;
+MemoryRegion *mr = NULL;
+void *ptr;
+
+ptr = gpa2hva(, addr, _err);
+if (local_err) {
+error_report_err(local_err);
+return;
+}
+
+monitor_printf(mon, "Host virtual address for 0x%" HWADDR_PRIx
+   " (%s) is %p\n",
+   addr, mr->name, ptr);
+
+memory_region_unref(mr);
+}
+
+#ifdef CONFIG_LINUX
+static uint64_t vtop(void *ptr, Error **errp)
+{
+uint64_t pinfo;
+uint64_t ret = -1;
+uintptr_t addr = (uintptr_t) ptr;
+uintptr_t pagesize = getpagesize();
+off_t offset = addr / pagesize * sizeof(pinfo);
+int fd;
+
+fd = open("/proc/self/pagemap", O_RDONLY);
+if (fd == -1) {
+error_setg_errno(errp, errno, "Cannot open /proc/self/pagemap");
+return -1;
+}
+
+/* Force copy-on-write if necessary.  */
+atomic_add((uint8_t *)ptr, 0);
+
+if (pread(fd, , sizeof(pinfo), offset) != sizeof(pinfo)) {
+error_setg_errno(errp, errno, "Cannot read pagemap");
+goto out;
+}
+if ((pinfo & (1ull << 63)) == 0) {
+error_setg(errp, "Page not present");
+goto out;
+}
+ret = ((pinfo & 0x007full) * pagesize) | (addr & (pagesize - 
1));
+
+out:
+close(fd);
+return ret;
+}
+
+static void hmp_gpa2hpa(Monitor *mon, const QDict *qdict)
+{
+hwaddr addr = qdict_get_int(qdict, "addr");
+Error *local_err = NULL;
+MemoryRegion *mr = NULL;
+void *ptr;
+uint64_t physaddr;
+
+ptr = gpa2hva(, addr, _err);
+if (local_err) {
+error_report_err(local_err);
+return;
+}
+
+physaddr = vtop(ptr, _err);
+if (local_err) {
+error_report_err(local_err);
+} else {
+monitor_printf(mon, "Host physical address for 0x%" HWADDR_PRIx
+   " (%s) is 0x%" PRIx64 "\n",
+   addr, mr->name, (uint64_t) physaddr);
+}
+
+memory_region_unref(mr);
+}
+#endif
+
 static void do_print(Monitor *mon, const QDict *qdict)
 {

[Qemu-devel] [PULL 2/4] libqtest: Ignore QMP events when parsing the response for HMP commands

2017-04-25 Thread Dr. David Alan Gilbert (git)

From: Thomas Huth 

When running certain HMP commands (like "device_del") via QMP, we
can sometimes get a QMP event in the response first, so that the
"g_assert(ret)" statement in qtest_hmp() triggers and the test
fails. Fix this by ignoring such QMP events while looking for the
real return value from QMP.

Signed-off-by: Thomas Huth 
Message-Id: <1490860207-8302-2-git-send-email-th...@redhat.com>
Reviewed-by: Markus Armbruster 
Signed-off-by: Dr. David Alan Gilbert 
  Added note to qtest_hmp/qtest_hmpv's header description to say
  it discards events
---
 tests/libqtest.c | 6 ++
 tests/libqtest.h | 4 +++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests/libqtest.c b/tests/libqtest.c
index 99b1195355..0b0bf1d460 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -588,6 +588,12 @@ char *qtest_hmpv(QTestState *s, const char *fmt, va_list 
ap)
  " 'arguments': {'command-line': %s}}",
  cmd);
 ret = g_strdup(qdict_get_try_str(resp, "return"));
+while (ret == NULL && qdict_get_try_str(resp, "event")) {
+/* Ignore asynchronous QMP events */
+QDECREF(resp);
+resp = qtest_qmp_receive(s);
+ret = g_strdup(qdict_get_try_str(resp, "return"));
+}
 g_assert(ret);
 QDECREF(resp);
 g_free(cmd);
diff --git a/tests/libqtest.h b/tests/libqtest.h
index 2c9962d94f..ee237448da 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h
@@ -132,11 +132,12 @@ void qtest_qmp_eventwait(QTestState *s, const char 
*event);
 QDict *qtest_qmp_eventwait_ref(QTestState *s, const char *event);
 
 /**
- * qtest_hmpv:
+ * qtest_hmp:
  * @s: #QTestState instance to operate on.
  * @fmt...: HMP command to send to QEMU
  *
  * Send HMP command to QEMU via QMP's human-monitor-command.
+ * QMP events are discarded.
  *
  * Returns: the command's output.  The caller should g_free() it.
  */
@@ -149,6 +150,7 @@ char *qtest_hmp(QTestState *s, const char *fmt, ...);
  * @ap: HMP command arguments
  *
  * Send HMP command to QEMU via QMP's human-monitor-command.
+ * QMP events are discarded.
  *
  * Returns: the command's output.  The caller should g_free() it.
  */
-- 
2.12.2

[Qemu-devel] [PULL 4/4] tests: Add a tester for HMP commands

2017-04-25 Thread Dr. David Alan Gilbert (git)

From: Thomas Huth 

HMP commands do not get any automatic testing yet, so on certain
QEMU machines, some HMP commands were causing crashes in the past.
Thus we should test HMP commands in our test suite, too, to avoid
that such problems creep in again in the future.

Signed-off-by: Thomas Huth 
Message-Id: <1493097407-20482-1-git-send-email-th...@redhat.com>
Reviewed-by: Dr. David Alan Gilbert 
Signed-off-by: Dr. David Alan Gilbert 
---
 tests/Makefile.include |   2 +
 tests/test-hmp.c   | 161 +
 2 files changed, 163 insertions(+)
 create mode 100644 tests/test-hmp.c

diff --git a/tests/Makefile.include b/tests/Makefile.include
index 579ec07cce..31931c0d77 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -331,6 +331,7 @@ check-qtest-xtensaeb-y = $(check-qtest-xtensa-y)
 check-qtest-s390x-y = tests/boot-serial-test$(EXESUF)
 
 check-qtest-generic-y += tests/qom-test$(EXESUF)
+check-qtest-generic-y += tests/test-hmp$(EXESUF)
 
 qapi-schema += alternate-any.json
 qapi-schema += alternate-array.json
@@ -720,6 +721,7 @@ tests/tpci200-test$(EXESUF): tests/tpci200-test.o
 tests/display-vga-test$(EXESUF): tests/display-vga-test.o
 tests/ipoctal232-test$(EXESUF): tests/ipoctal232-test.o
 tests/qom-test$(EXESUF): tests/qom-test.o
+tests/test-hmp$(EXESUF): tests/test-hmp.o
 tests/drive_del-test$(EXESUF): tests/drive_del-test.o $(libqos-pc-obj-y)
 tests/qdev-monitor-test$(EXESUF): tests/qdev-monitor-test.o $(libqos-pc-obj-y)
 tests/nvme-test$(EXESUF): tests/nvme-test.o
diff --git a/tests/test-hmp.c b/tests/test-hmp.c
new file mode 100644
index 00..99e35ec15a
--- /dev/null
+++ b/tests/test-hmp.c
@@ -0,0 +1,161 @@
+/*
+ * Test HMP commands.
+ *
+ * Copyright (c) 2017 Red Hat Inc.
+ *
+ * Author:
+ *Thomas Huth 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2
+ * or later. See the COPYING file in the top-level directory.
+ *
+ * This test calls some HMP commands for all machines that the current
+ * QEMU binary provides, to check whether they terminate successfully
+ * (i.e. do not crash QEMU).
+ */
+
+#include "qemu/osdep.h"
+#include "libqtest.h"
+
+static int verbose;
+
+static const char *hmp_cmds[] = {
+"boot_set ndc",
+"chardev-add null,id=testchardev1",
+"chardev-remove testchardev1",
+"commit all",
+"cpu-add 1",
+"cpu 0",
+"device_add ?",
+"device_add usb-mouse,id=mouse1",
+"mouse_button 7",
+"mouse_move 10 10",
+"mouse_button 0",
+"device_del mouse1",
+"dump-guest-memory /dev/null 0 4096",
+"gdbserver",
+"host_net_add user id=net0",
+"hostfwd_add tcp::43210-:43210",
+"hostfwd_remove tcp::43210-:43210",
+"host_net_remove 0 net0",
+"i /w 0",
+"log all",
+"log none",
+"memsave 0 4096 \"/dev/null\"",
+"migrate_set_cache_size 1",
+"migrate_set_downtime 1",
+"migrate_set_speed 1",
+"netdev_add user,id=net1",
+"set_link net1 off",
+"set_link net1 on",
+"netdev_del net1",
+"nmi",
+"o /w 0 0x1234",
+"object_add memory-backend-ram,id=mem1,size=256M",
+"object_del mem1",
+"pmemsave 0 4096 \"/dev/null\"",
+"p $pc + 8",
+"qom-list /",
+"qom-set /machine initrd test",
+"screendump /dev/null",
+"sendkey x",
+"singlestep on",
+"wavcapture /dev/null",
+"stopcapture 0",
+"sum 0 512",
+"x /8i 0x100",
+"xp /16x 0",
+NULL
+};
+
+/* Run through the list of pre-defined commands */
+static void test_commands(void)
+{
+char *response;
+int i;
+
+for (i = 0; hmp_cmds[i] != NULL; i++) {
+if (verbose) {
+fprintf(stderr, "\t%s\n", hmp_cmds[i]);
+}
+response = hmp(hmp_cmds[i]);
+g_free(response);
+}
+
+}
+
+/* Run through all info commands and call them blindly (without arguments) */
+static void test_info_commands(void)
+{
+char *resp, *info, *info_buf, *endp;
+
+info_buf = info = hmp("help info");
+
+while (*info) {
+/* Extract the info command, ignore parameters and description */
+g_assert(strncmp(info, "info ", 5) == 0);
+endp = strchr([5], ' ');
+g_assert(endp != NULL);
+*endp = '\0';
+/* Now run the info command */
+if (verbose) {
+fprintf(stderr, "\t%s\n", info);
+}
+resp = hmp(info);
+g_free(resp);
+/* And move forward to the next line */
+info = strchr(endp + 1, '\n');
+if (!info) {
+break;
+}
+info += 1;
+}
+
+g_free(info_buf);
+}
+
+static void test_machine(gconstpointer data)
+{
+const char *machine = data;
+char *args;
+
+args = g_strdup_printf("-S -M %s", machine);
+qtest_start(args);
+
+test_info_commands();
+test_commands();
+
+qtest_end();
+g_free(args);
+g_free((void *)data);
+}
+

[Qemu-devel] [PULL 3/4] libqtest: Add a generic function to run a callback function for every machine

2017-04-25 Thread Dr. David Alan Gilbert (git)

From: Thomas Huth 

Some tests need to run single tests for every available machine of the
current QEMU binary. To avoid code duplication, let's extract this
code that deals with 'query-machines' into a separate function.

Signed-off-by: Thomas Huth 
Message-Id: <1490860207-8302-3-git-send-email-th...@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Dr. David Alan Gilbert 
---
 tests/libqtest.c| 30 +
 tests/libqtest.h|  8 +
 tests/pc-cpu-test.c | 95 -
 tests/qom-test.c| 36 
 4 files changed, 80 insertions(+), 89 deletions(-)

diff --git a/tests/libqtest.c b/tests/libqtest.c
index 0b0bf1d460..512c150266 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -946,3 +946,33 @@ bool qtest_big_endian(QTestState *s)
 {
 return s->big_endian;
 }
+
+void qtest_cb_for_every_machine(void (*cb)(const char *machine))
+{
+QDict *response, *minfo;
+QList *list;
+const QListEntry *p;
+QObject *qobj;
+QString *qstr;
+const char *mname;
+
+qtest_start("-machine none");
+response = qmp("{ 'execute': 'query-machines' }");
+g_assert(response);
+list = qdict_get_qlist(response, "return");
+g_assert(list);
+
+for (p = qlist_first(list); p; p = qlist_next(p)) {
+minfo = qobject_to_qdict(qlist_entry_obj(p));
+g_assert(minfo);
+qobj = qdict_get(minfo, "name");
+g_assert(qobj);
+qstr = qobject_to_qstring(qobj);
+g_assert(qstr);
+mname = qstring_get_str(qstr);
+cb(mname);
+}
+
+qtest_end();
+QDECREF(response);
+}
diff --git a/tests/libqtest.h b/tests/libqtest.h
index ee237448da..38bc1e9953 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h
@@ -919,4 +919,12 @@ void qmp_fd_send(int fd, const char *fmt, ...);
 QDict *qmp_fdv(int fd, const char *fmt, va_list ap);
 QDict *qmp_fd(int fd, const char *fmt, ...);
 
+/**
+ * qtest_cb_for_every_machine:
+ * @cb: Pointer to the callback function
+ *
+ *  Call a callback function for every name of all available machines.
+ */
+void qtest_cb_for_every_machine(void (*cb)(const char *machine));
+
 #endif
diff --git a/tests/pc-cpu-test.c b/tests/pc-cpu-test.c
index c3a2633d3c..c4211a4e85 100644
--- a/tests/pc-cpu-test.c
+++ b/tests/pc-cpu-test.c
@@ -79,69 +79,46 @@ static void test_data_free(gpointer data)
 g_free(pc);
 }
 
-static void add_pc_test_cases(void)
+static void add_pc_test_case(const char *mname)
 {
-QDict *response, *minfo;
-QList *list;
-const QListEntry *p;
-QObject *qobj;
-QString *qstr;
-const char *mname;
 char *path;
 PCTestData *data;
 
-qtest_start("-machine none");
-response = qmp("{ 'execute': 'query-machines' }");
-g_assert(response);
-list = qdict_get_qlist(response, "return");
-g_assert(list);
-
-for (p = qlist_first(list); p; p = qlist_next(p)) {
-minfo = qobject_to_qdict(qlist_entry_obj(p));
-g_assert(minfo);
-qobj = qdict_get(minfo, "name");
-g_assert(qobj);
-qstr = qobject_to_qstring(qobj);
-g_assert(qstr);
-mname = qstring_get_str(qstr);
-if (!g_str_has_prefix(mname, "pc-")) {
-continue;
-}
-data = g_malloc(sizeof(PCTestData));
-data->machine = g_strdup(mname);
-data->cpu_model = "Haswell"; /* 1.3+ theoretically */
-data->sockets = 1;
-data->cores = 3;
-data->threads = 2;
-data->maxcpus = data->sockets * data->cores * data->threads * 2;
-if (g_str_has_suffix(mname, "-1.4") ||
-(strcmp(mname, "pc-1.3") == 0) ||
-(strcmp(mname, "pc-1.2") == 0) ||
-(strcmp(mname, "pc-1.1") == 0) ||
-(strcmp(mname, "pc-1.0") == 0) ||
-(strcmp(mname, "pc-0.15") == 0) ||
-(strcmp(mname, "pc-0.14") == 0) ||
-(strcmp(mname, "pc-0.13") == 0) ||
-(strcmp(mname, "pc-0.12") == 0) ||
-(strcmp(mname, "pc-0.11") == 0) ||
-(strcmp(mname, "pc-0.10") == 0)) {
-path = g_strdup_printf("cpu/%s/init/%ux%ux%u=%u",
-   mname, data->sockets, data->cores,
-   data->threads, data->maxcpus);
-qtest_add_data_func_full(path, data, test_pc_without_cpu_add,
- test_data_free);
-g_free(path);
-} else {
-path = g_strdup_printf("cpu/%s/add/%ux%ux%u=%u",
-   mname, data->sockets, data->cores,
-   data->threads, data->maxcpus);
-qtest_add_data_func_full(path, data, test_pc_with_cpu_add,
- test_data_free);
-g_free(path);
-}
+if (!g_str_has_prefix(mname, "pc-")) {
+return;
+}
+data

[Qemu-devel] [PULL 0/4] hmp queue

2017-04-25 Thread Dr. David Alan Gilbert (git)

From: "Dr. David Alan Gilbert" <dgilb...@redhat.com>

The following changes since commit f4b5b021c847669b1c78050aea26fe9abceef6dd:

  Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into 
staging (2017-04-25 09:21:54 +0100)

are available in the git repository at:

  git://github.com/dagrh/qemu.git tags/pull-hmp-20170425

for you to fetch changes up to 1eb8e78dd1cd4e0b4170fd42f6d8882c867f334b:

  tests: Add a tester for HMP commands (2017-04-25 11:26:52 +0100)


HMP pull with fixed test/strcmp case


Paolo Bonzini (1):
  hmp: gpa2hva and gpa2hpa hostaddr command

Thomas Huth (3):
  libqtest: Ignore QMP events when parsing the response for HMP commands
  libqtest: Add a generic function to run a callback function for every 
machine
  tests: Add a tester for HMP commands

 hmp-commands.hx|  32 ++
 monitor.c  | 101 +++
 tests/Makefile.include |   2 +
 tests/libqtest.c   |  36 +++
 tests/libqtest.h   |  12 +++-
 tests/pc-cpu-test.c|  95 +++--
 tests/qom-test.c   |  36 ++-
 tests/test-hmp.c   | 161 +
 8 files changed, 385 insertions(+), 90 deletions(-)
 create mode 100644 tests/test-hmp.c

Re: [Qemu-devel] [Qemu-devel RFC v2 1/4] msf2: Add Smartfusion2 System timer

2017-04-25 Thread sundeep subbaraya

Hi Alistair,

On Mon, Apr 24, 2017 at 11:14 PM, Alistair Francis  wrote:
 +
 +isr = !!(st->regs[R_RIS] & TIMER_RIS_ACK);
 +ier = !!(st->regs[R_CTRL] & TIMER_CTRL_INTR);
 +
 +qemu_set_irq(st->irq, (ier && isr));
 +}
 +
 +static uint64_t
 +timer_read(void *opaque, hwaddr addr, unsigned int size)
 +{
 +struct timerblock *t = opaque;
 +struct msf2_timer *st;
 +uint32_t r = 0;
 +unsigned int timer;
 +int isr;
 +int ier;
 +
 +addr >>= 2;
 +timer = timer_from_addr(addr);
 +st = >timers[timer];
 +
 +if (timer) {
 +addr -= 6;
 +}
>>>
>>> Isn't this timer logic just checking if (addr >> 2) == R_MAX and if it
>>> is set (addr >> 2) back to zero? This seems an overly complex way to
>>> check that.
>> I did not get you clearly. Do you want me to write like this:
>> unsigned int timer = 0;
>>
>> addr >>= 2;
>> if (addr >= R_MAX) {
>> timer = 1;
>> addr =  addr - R_MAX;
>> }
>
> Yeah, I think this is clearer then what you had earlier.
>
> Although why do you have to subtract R_MAX, shouldn't it just be an
> error if accessing values larger then R_MAX?

Sorry I forgot about replying to this in earlier mail.
There are two independent timer blocks accessing same base address.
Based on offset passed in read/write functions we figure out
which block has to be handled.
0x0 to 0x14 -> timer1
0x18 to 0x2C -> timer2
Here R_MAX is 0x18 hence addr >= R_MAX is valid and refers to timer2.
Although I missed the bounds checking 0 < addr < 0x2C. I will add that
check in read and
write functions.

Thanks,
Sundeep
>
>>
>>>
 +
 +switch (addr) {
 +case R_VAL:
 +r = ptimer_get_count(st->ptimer);
 +D(qemu_log("msf2_timer t=%d read counter=%x\n", timer, r));
 +break;
 +
 +case R_MIS:
 +isr = !!(st->regs[R_RIS] & TIMER_RIS_ACK);
 +ier = !!(st->regs[R_CTRL] & TIMER_CTRL_INTR);
 +r = ier && isr;
 +break;
 +
 +default:
 +if (addr < ARRAY_SIZE(st->regs)) {
 +r = st->regs[addr];
 +}
 +break;
 +}
 +D(fprintf(stderr, "%s timer=%d %x=%x\n", __func__, timer, addr * 4, 
 r));
 +return r;
 +}
 +
 +static void timer_update(struct msf2_timer *st)
 +{
 +uint64_t count;
 +
 +D(fprintf(stderr, "%s timer=%d\n", __func__, st->nr));
 +
 +if (!(st->regs[R_CTRL] & TIMER_CTRL_ENBL)) {
 +ptimer_stop(st->ptimer);
 +return;
 +}
 +
 +count = st->regs[R_LOADVAL];
 +ptimer_set_limit(st->ptimer, count, 1);
 +ptimer_run(st->ptimer, 1);
 +}
>>>
>>> The update function should be above the read/write functions.
>>>
>> Ok I will change
>>
 +
 +static void
 +timer_write(void *opaque, hwaddr addr,
 +uint64_t val64, unsigned int size)
 +{
 +struct timerblock *t = opaque;
 +struct msf2_timer *st;
 +unsigned int timer;
 +uint32_t value = val64;
 +
 +addr >>= 2;
 +timer = timer_from_addr(addr);
 +st = >timers[timer];
 +D(fprintf(stderr, "%s addr=%x val=%x (timer=%d)\n",
 + __func__, addr * 4, value, timer));
 +
 +if (timer) {
 +addr -= 6;
 +}
>>>
>>> Same comment from the read function.
>>>
 +
 +switch (addr) {
 +case R_CTRL:
 +st->regs[R_CTRL] = value;
 +timer_update(st);
 +break;
 +
 +case R_RIS:
 +if (value & TIMER_RIS_ACK) {
 +st->regs[R_RIS] &= ~TIMER_RIS_ACK;
 +}
 +break;
 +
 +case R_LOADVAL:
 +st->regs[R_LOADVAL] = value;
 +if (st->regs[R_CTRL] & TIMER_CTRL_ENBL) {
 +timer_update(st);
 +}
 +break;
 +
 +case R_BGLOADVAL:
 +st->regs[R_BGLOADVAL] = value;
 +st->regs[R_LOADVAL] = value;
 +break;
 +
 +case R_VAL:
 +case R_MIS:
 +break;
 +
 +default:
 +if (addr < ARRAY_SIZE(st->regs)) {
 +st->regs[addr] = value;
 +}
 +break;
 +}
 +timer_update_irq(st);
 +}
 +
 +static const MemoryRegionOps timer_ops = {
 +.read = timer_read,
 +.write = timer_write,
 +.endianness = DEVICE_NATIVE_ENDIAN,
 +.valid = {
 +.min_access_size = 4,
 +.max_access_size = 4
 +}
 +};
 +
 +static void timer_hit(void *opaque)
 +{
 +struct msf2_timer *st = opaque;
 +D(fprintf(stderr, "%s %d\n", __func__, st->nr));
 +st->regs[R_RIS] |= TIMER_RIS_ACK;
 +
 +if (!(st->regs[R_CTRL] &

[Qemu-devel] [PATCH 2/3] migration: Remove use of old MigrationParams

2017-04-25 Thread Juan Quintela

We have change in the previous patch to use migration capabilities for
it.  Notice that we continue using the old command line flags from
migrate command from the time being.  Remove the set_params method as
now it is empty.

Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |  3 +--
 migration/block.c | 17 ++---
 migration/colo.c  |  3 ---
 migration/migration.c |  8 +---
 migration/savevm.c|  2 --
 5 files changed, 8 insertions(+), 25 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 618ab0e..2917baa 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -42,8 +42,7 @@
 extern int only_migratable;
 
 struct MigrationParams {
-bool blk;
-bool shared;
+bool unused; /* C don't allow empty structs */
 };
 
 /* Messages sent on the return path from destination to source */
diff --git a/migration/block.c b/migration/block.c
index 060087f..fcfa823 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -94,9 +94,6 @@ typedef struct BlkMigBlock {
 } BlkMigBlock;
 
 typedef struct BlkMigState {
-/* Written during setup phase.  Can be read without a lock.  */
-int blk_enable;
-int shared_base;
 QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
 int64_t total_sector_sum;
 bool zero_blocks;
@@ -425,7 +422,7 @@ static int init_blk_migration(QEMUFile *f)
 bmds->bulk_completed = 0;
 bmds->total_sectors = sectors;
 bmds->completed_sectors = 0;
-bmds->shared_base = block_mig_state.shared_base;
+bmds->shared_base = migrate_use_block_shared();
 
 assert(i < num_bs);
 bmds_bs[i].bmds = bmds;
@@ -994,22 +991,12 @@ static int block_load(QEMUFile *f, void *opaque, int 
version_id)
 return 0;
 }
 
-static void block_set_params(const MigrationParams *params, void *opaque)
-{
-block_mig_state.blk_enable = params->blk;
-block_mig_state.shared_base = params->shared;
-
-/* shared base means that blk_enable = 1 */
-block_mig_state.blk_enable |= params->shared;
-}
-
 static bool block_is_active(void *opaque)
 {
-return block_mig_state.blk_enable == 1;
+return migrate_use_block_enabled();
 }
 
 static SaveVMHandlers savevm_block_handlers = {
-.set_params = block_set_params,
 .save_live_setup = block_save_setup,
 .save_live_iterate = block_save_iterate,
 .save_live_complete_precopy = block_save_complete,
diff --git a/migration/colo.c b/migration/colo.c
index c19eb3f..5c6c2f0 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -332,9 +332,6 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
 goto out;
 }
 
-/* Disable block migration */
-s->params.blk = 0;
-s->params.shared = 0;
 qemu_savevm_state_header(fb);
 qemu_savevm_state_begin(fb, >params);
 qemu_mutex_lock_iothread();
diff --git a/migration/migration.c b/migration/migration.c
index 775b24c..9b96f1a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -787,6 +787,10 @@ void 
qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 s->enabled_capabilities[cap->value->capability] = cap->value->state;
 }
 
+if (s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK_SHARED]) {
+s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK_ENABLED] = true;
+}
+
 if (migrate_postcopy_ram()) {
 if (migrate_use_compression()) {
 /* The decompression threads asynchronously write into RAM
@@ -1214,9 +1218,6 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
 MigrationParams params;
 const char *p;
 
-params.blk = has_blk && blk;
-params.shared = has_inc && inc;
-
 if (migration_is_setup_or_active(s->state) ||
 s->state == MIGRATION_STATUS_CANCELLING ||
 s->state == MIGRATION_STATUS_COLO) {
@@ -1239,6 +1240,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
 }
 
 if (has_inc && inc) {
+migrate_set_block_enabled(s);
 migrate_set_block_shared(s);
 }
 
diff --git a/migration/savevm.c b/migration/savevm.c
index 0c01988..102b11d 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1233,8 +1233,6 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
 {
 int ret;
 MigrationParams params = {
-.blk = 0,
-.shared = 0
 };
 MigrationState *ms = migrate_init();
 MigrationStatus status;
-- 
2.9.3

[Qemu-devel] [PATCH 1/3] migration: Create block capabilities for shared and enable

2017-04-25 Thread Juan Quintela

Those two capabilities were added through the command line.  Notice that
we just created them.  This is just the boilerplate.

Signed-off-by: Juan Quintela 
Reviewed-by: Eric Blake 
---
 include/migration/migration.h |  3 +++
 migration/migration.c | 36 
 qapi-schema.json  |  7 ++-
 3 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index dfeca38..618ab0e 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -307,6 +307,9 @@ bool migrate_colo_enabled(void);
 
 int64_t xbzrle_cache_resize(int64_t new_size);
 
+bool migrate_use_block_enabled(void);
+bool migrate_use_block_shared(void);
+
 bool migrate_use_compression(void);
 int migrate_compress_level(void);
 int migrate_compress_threads(void);
diff --git a/migration/migration.c b/migration/migration.c
index 5447cab..775b24c 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1195,6 +1195,16 @@ bool migration_is_blocked(Error **errp)
 return false;
 }
 
+static void migrate_set_block_shared(MigrationState *s)
+{
+s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK_SHARED] = true;
+}
+
+static void migrate_set_block_enabled(MigrationState *s)
+{
+s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK_ENABLED] = true;
+}
+
 void qmp_migrate(const char *uri, bool has_blk, bool blk,
  bool has_inc, bool inc, bool has_detach, bool detach,
  Error **errp)
@@ -1224,6 +1234,14 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
 
 s = migrate_init();
 
+if (has_blk && blk) {
+migrate_set_block_enabled(s);
+}
+
+if (has_inc && inc) {
+migrate_set_block_shared(s);
+}
+
 if (strstart(uri, "tcp:", )) {
 tcp_start_outgoing_migration(s, p, _err);
 #ifdef CONFIG_RDMA
@@ -1419,6 +1437,24 @@ int64_t migrate_xbzrle_cache_size(void)
 return s->xbzrle_cache_size;
 }
 
+bool migrate_use_block_enabled(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK_ENABLED];
+}
+
+bool migrate_use_block_shared(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK_SHARED];
+}
+
 /* migration thread support */
 /*
  * Something bad happened to the RP stream, mark an error
diff --git a/qapi-schema.json b/qapi-schema.json
index 01b087f..e963bb3 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -894,11 +894,16 @@
 # @release-ram: if enabled, qemu will free the migrated ram pages on the source
 #during postcopy-ram migration. (since 2.9)
 #
+# @block-enabled: enable block migration (Since 2.10)
+#
+# @block-shared: enable block shared migration (Since 2.10)
+#
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
-   'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram'] }
+   'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
+   'block-enabled', 'block-shared' ] }
 
 ##
 # @MigrationCapabilityStatus:
-- 
2.9.3

[Qemu-devel] [PATCH 3/3] migration: Remove old MigrationParams

2017-04-25 Thread Juan Quintela

Not used anymore after moving block migration to use capabilities.

Signed-off-by: Juan Quintela 
---
 include/migration/migration.h | 10 ++
 include/migration/vmstate.h   |  1 -
 include/qemu/typedefs.h   |  1 -
 include/sysemu/sysemu.h   |  3 +--
 migration/colo.c  |  2 +-
 migration/migration.c |  8 +++-
 migration/savevm.c| 16 +++-
 7 files changed, 10 insertions(+), 31 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 2917baa..3495162 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -41,10 +41,6 @@
 /* for vl.c */
 extern int only_migratable;
 
-struct MigrationParams {
-bool unused; /* C don't allow empty structs */
-};
-
 /* Messages sent on the return path from destination to source */
 enum mig_rp_message_type {
 MIG_RP_MSG_INVALID = 0,  /* Must be 0 */
@@ -134,12 +130,10 @@ struct MigrationState
 QEMUBH *cleanup_bh;
 QEMUFile *to_dst_file;
 
-/* New style params from 'migrate-set-parameters' */
+/* params from 'migrate-set-parameters' */
 MigrationParameters parameters;
 
 int state;
-/* Old style params from 'migrate' command */
-MigrationParams params;
 
 /* State related to return path */
 struct {
@@ -229,7 +223,7 @@ void migrate_fd_connect(MigrationState *s);
 
 void add_migration_state_change_notifier(Notifier *notify);
 void remove_migration_state_change_notifier(Notifier *notify);
-MigrationState *migrate_init(const MigrationParams *params);
+MigrationState *migrate_init(void);
 bool migration_is_blocked(Error **errp);
 bool migration_in_setup(MigrationState *);
 bool migration_is_idle(void);
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 9452dec..4396d7e 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -37,7 +37,6 @@ typedef int LoadStateHandler(QEMUFile *f, void *opaque, int 
version_id);
 
 typedef struct SaveVMHandlers {
 /* This runs inside the iothread lock.  */
-void (*set_params)(const MigrationParams *params, void * opaque);
 SaveStateHandler *save_state;
 
 void (*cleanup)(void *opaque);
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index f08d327..a388243 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -49,7 +49,6 @@ typedef struct MemoryRegion MemoryRegion;
 typedef struct MemoryRegionCache MemoryRegionCache;
 typedef struct MemoryRegionSection MemoryRegionSection;
 typedef struct MigrationIncomingState MigrationIncomingState;
-typedef struct MigrationParams MigrationParams;
 typedef struct MigrationState MigrationState;
 typedef struct Monitor Monitor;
 typedef struct MonitorDef MonitorDef;
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 058d5eb..3340202 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -102,8 +102,7 @@ enum qemu_vm_cmd {
 #define MAX_VM_CMD_PACKAGED_SIZE (1ul << 24)
 
 bool qemu_savevm_state_blocked(Error **errp);
-void qemu_savevm_state_begin(QEMUFile *f,
- const MigrationParams *params);
+void qemu_savevm_state_begin(QEMUFile *f);
 void qemu_savevm_state_header(QEMUFile *f);
 int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy);
 void qemu_savevm_state_cleanup(void);
diff --git a/migration/colo.c b/migration/colo.c
index 5c6c2f0..75e8807 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -333,7 +333,7 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
 }
 
 qemu_savevm_state_header(fb);
-qemu_savevm_state_begin(fb, >params);
+qemu_savevm_state_begin(fb);
 qemu_mutex_lock_iothread();
 qemu_savevm_state_complete_precopy(fb, false);
 qemu_mutex_unlock_iothread();
diff --git a/migration/migration.c b/migration/migration.c
index 9b96f1a..f094079 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1102,7 +1102,7 @@ bool migration_is_idle(void)
 return false;
 }
 
-MigrationState *migrate_init(const MigrationParams *params)
+MigrationState *migrate_init(void)
 {
 MigrationState *s = migrate_get_current();
 
@@ -1116,7 +1116,6 @@ MigrationState *migrate_init(const MigrationParams 
*params)
 s->cleanup_bh = 0;
 s->to_dst_file = NULL;
 s->state = MIGRATION_STATUS_NONE;
-s->params = *params;
 s->rp_state.from_dst_file = NULL;
 s->rp_state.error = false;
 s->mbps = 0.0;
@@ -1215,7 +1214,6 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
 {
 Error *local_err = NULL;
 MigrationState *s = migrate_get_current();
-MigrationParams params;
 const char *p;
 
 if (migration_is_setup_or_active(s->state) ||
@@ -1233,7 +1231,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
 return;
 }
 
-s = migrate_init();
+s = migrate_init();
 
 if (has_blk && blk) {
 migrate_set_block_enabled(s);
@@ -1966,7 +1964,7 @@ static void

[Qemu-devel] [PATCH 0/3] Remove old MigrationParams

2017-04-25 Thread Juan Quintela

Hi

Upon a time there were MigrationParms (only used for block migration)
and then MigrationParams used for everything else.  This series:

- create migration capabilities for block parameters
- make the migrate command line parameters to use capabilities
- remove MigrationParams completely

Please, review.


Juan Quintela (3):
  migration: Create block capabilities for shared and enable
  migration: Remove use of old MigrationParams
  migration: Remove old MigrationParams

 include/migration/migration.h | 14 +---
 include/migration/vmstate.h   |  1 -
 include/qemu/typedefs.h   |  1 -
 include/sysemu/sysemu.h   |  3 +--
 migration/block.c | 17 ++
 migration/colo.c  |  5 +
 migration/migration.c | 52 ---
 migration/savevm.c| 18 +++
 qapi-schema.json  |  7 +-
 9 files changed, 62 insertions(+), 56 deletions(-)

-- 
2.9.3

Re: [Qemu-devel] [PATCH 4/6] migration: calculate downtime on dst side

2017-04-25 Thread Peter Xu

On Tue, Apr 25, 2017 at 01:10:30PM +0300, Alexey Perevalov wrote:
> On 04/25/2017 11:24 AM, Peter Xu wrote:
> >On Fri, Apr 14, 2017 at 04:17:18PM +0300, Alexey Perevalov wrote:
> >
> >[...]
> >
> >>+/*
> >>+ * This function calculates downtime per cpu and trace it
> >>+ *
> >>+ *  Also it calculates total downtime as an interval's overlap,
> >>+ *  for many vCPU.
> >>+ *
> >>+ *  The approach is following:
> >>+ *  Initially intervals are represented in tree where key is
> >>+ *  pagefault address, and values:
> >>+ *   begin - page fault time
> >>+ *   end   - page load time
> >>+ *   cpus  - bit mask shows affected cpus
> >>+ *
> >>+ *  To calculate overlap on all cpus, intervals converted into
> >>+ *  array of points in time (downtime_points), the size of
> >>+ *  array is 2 * number of nodes in tree of intervals (2 array
> >>+ *  elements per one in element of interval).
> >>+ *  Each element is marked as end (E) or as start (S) of interval.
> >>+ *  The overlap downtime will be calculated for SE, only in case
> >>+ *  there is sequence S(0..N)E(M) for every vCPU.
> >>+ *
> >>+ * As example we have 3 CPU
> >>+ *
> >>+ *  S1E1   S1   E1
> >>+ * -***xxx***> 
> >>CPU1
> >>+ *
> >>+ * S2E2
> >>+ * xxx---> 
> >>CPU2
> >>+ *
> >>+ * S3E3
> >>+ * xxx---> 
> >>CPU3
> >>+ *
> >>+ * We have sequence S1,S2,E1,S3,S1,E2,E3,E1
> >>+ * S2,E1 - doesn't match condition due to sequence S1,S2,E1 doesn't 
> >>include CPU3
> >>+ * S3,S1,E2 - sequenece includes all CPUs, in this case overlap will be 
> >>S1,E2
> >>+ * Legend of picture is following: * - means downtime per vCPU
> >>+ * x - means overlapped downtime
> >>+ */
> >Not sure whether I get the point in this patch... iiuc we defined the
> >downtime here as the period when all vcpus are halted, right?
> >
> >If so, I have a few questions:
> >
> >- will this algorithm consume lots of memory? since I see we have one
> >   trace object per fault page address
> I don't think, it consumes too much, one DowntimeDuration
> takes (if I'm using bitmap_try_new, in this patch set I used pointer to
> uint64_t array to keep bitmap array,
> but I'm going to use include/qemu/bitmap.h, it works with pointers to long)
> 
> (2* int64 + (ROUND_UP((smp_cpus + BITS_PER_BYTE * sizeof(long) - 1 /
> (BITS_PER_BYTE * sizeof(long * siezof(long)
> so it's about 16 + at least 4 bytes, per page fault,
> Lets assume we migration 256 vCPU and 256 Gb of ram and that ram is based on
> 4Kb pages - it's really bad case
> 16 + ((256 + 8 * 4 - 1) / ( 8 * 4 )) * 4 = 52 bytes
> (256 * 1024 * 1024 * 1024)/(4 * 1024) = 67108864 page faults, but not all of
> these pages will be pagefaulted, due to
> page pre-fetching
> 67108864 * 52 = 3489660928 bytes (3.5 Gb for that operation),
> but I have a doubt, who will use 4Kb pages for 256 Gb, probably
> 2Mb or 1G huge page will be chosen on x86, on ARM or other architecture it
> could be another values.

Hmm, it looks still big though...

> 
> >
> >- do we need to protect the tree to make sure there's no insertion
> >   when doing the calculation?
> I asked the same question when sent RFC patches,
> the answer here is no, we should not, due to right now,
> it's only one socket and one listen thread (maybe in future,
> it will be required, maybe after multi fd patch set),
> and calculation is doing synchronously right after migration complete.

Okay.

> 
> >
> >- if the only thing we want here is the "total downtime", whether
> >   below would work? (assuming N is vcpu numbers)
> >
> >   a. define array cpu_fault_addr[N], to store current faulted address
> >  for each vcpu. When vcpu X is running, cpu_fault_addr[X] should
> >  be 0.
> >
> >   b. when page fault happens on vcpu A, setup cpu_fault_addr[A] with
> >  corresponding fault address.
> at this time need to is fault happens for all another vCPU,
> and if it happens mark current time as total vCPU downtime start.
> 
> >   c. when page copy finished, loop over cpu_fault_addr[] to see
> >  whether that matches any, clear corresponding element if matched.
> so when page copy finished and mark for total vCPU is set,
> yes that interval is a part of total downtime.
> >
> >   Then, we can just measure the period when cpu_fault_addr[] is all
> >   set (by tracing at both b. and c.). Can this work?
> Yes, it works, but it's better to keep time - cpu_fault_time,
> address is not important here, it doesn't matter the reason of pagefault.

We still need the addresses? So that when we do COPY, we can check the
new page address against these stored ones, to know which vcpus to
clear the bit.

> 2 vCPU could fault due to access to one page, ok, it's not a problem, just
> store
> time when it

[Qemu-devel] [PATCH 6/6] migration: Pass Error ** argument to {save, load}_vmstate

2017-04-25 Thread Juan Quintela

This way we use the "normal" way of printing errors for hmp commands.

--
Paolo suggestion

Signed-off-by: Juan Quintela 
---
 hmp.c|  9 +++--
 include/sysemu/sysemu.h  |  4 ++--
 migration/savevm.c   | 51 
 replay/replay-snapshot.c |  6 --
 vl.c |  4 +++-
 5 files changed, 41 insertions(+), 33 deletions(-)

diff --git a/hmp.c b/hmp.c
index bd7b1ca..d81f71e 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1273,17 +1273,22 @@ void hmp_loadvm(Monitor *mon, const QDict *qdict)
 {
 int saved_vm_running  = runstate_is_running();
 const char *name = qdict_get_str(qdict, "name");
+Error *err = NULL;
 
 vm_stop(RUN_STATE_RESTORE_VM);
 
-if (load_vmstate(name) == 0 && saved_vm_running) {
+if (load_vmstate(name, ) == 0 && saved_vm_running) {
 vm_start();
 }
+hmp_handle_error(mon, );
 }
 
 void hmp_savevm(Monitor *mon, const QDict *qdict)
 {
-save_vmstate(qdict_get_try_str(qdict, "name"));
+Error *err = NULL;
+
+save_vmstate(qdict_get_try_str(qdict, "name"), );
+hmp_handle_error(mon, );
 }
 
 void hmp_delvm(Monitor *mon, const QDict *qdict)
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 15656b7..058d5eb 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -75,8 +75,8 @@ void qemu_remove_exit_notifier(Notifier *notify);
 void qemu_add_machine_init_done_notifier(Notifier *notify);
 void qemu_remove_machine_init_done_notifier(Notifier *notify);
 
-int save_vmstate(const char *name);
-int load_vmstate(const char *name);
+int save_vmstate(const char *name, Error **errp);
+int load_vmstate(const char *name, Error **errp);
 
 void qemu_announce_self(void);
 
diff --git a/migration/savevm.c b/migration/savevm.c
index 8dd4306..0c01988 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2066,7 +2066,7 @@ int qemu_loadvm_state(QEMUFile *f)
 return ret;
 }
 
-int save_vmstate(const char *name)
+int save_vmstate(const char *name, Error **errp)
 {
 BlockDriverState *bs, *bs1;
 QEMUSnapshotInfo sn1, *sn = , old_sn1, *old_sn = _sn1;
@@ -2076,29 +2076,27 @@ int save_vmstate(const char *name)
 uint64_t vm_state_size;
 qemu_timeval tv;
 struct tm tm;
-Error *local_err = NULL;
 AioContext *aio_context;
 
 if (!bdrv_all_can_snapshot()) {
-error_report("Device '%s' is writable but does not support snapshots",
- bdrv_get_device_name(bs));
+error_setg(errp, "Device '%s' is writable but does not support "
+   "snapshots", bdrv_get_device_name(bs));
 return ret;
 }
 
 /* Delete old snapshots of the same name */
 if (name) {
-ret = bdrv_all_delete_snapshot(name, , _err);
+ret = bdrv_all_delete_snapshot(name, , errp);
 if (ret < 0) {
-error_reportf_err(local_err,
-  "Error while deleting snapshot on device '%s': ",
-  bdrv_get_device_name(bs1));
+error_prepend(errp, "Error while deleting snapshot on device "
+  "'%s': ", bdrv_get_device_name(bs1));
 return ret;
 }
 }
 
 bs = bdrv_all_find_vmstate_bs();
 if (bs == NULL) {
-error_report("No block device can accept snapshots");
+error_setg(errp, "No block device can accept snapshots");
 return ret;
 }
 aio_context = bdrv_get_aio_context(bs);
@@ -2107,7 +2105,7 @@ int save_vmstate(const char *name)
 
 ret = global_state_store();
 if (ret) {
-error_report("Error saving global state");
+error_setg(errp, "Error saving global state");
 return ret;
 }
 vm_stop(RUN_STATE_SAVE_VM);
@@ -2139,21 +2137,20 @@ int save_vmstate(const char *name)
 /* save the VM state */
 f = qemu_fopen_bdrv(bs, 1);
 if (!f) {
-error_report("Could not open VM state file");
+error_setg(errp, "Could not open VM state file");
 goto the_end;
 }
-ret = qemu_savevm_state(f, _err);
+ret = qemu_savevm_state(f, errp);
 vm_state_size = qemu_ftell(f);
 qemu_fclose(f);
 if (ret < 0) {
-error_report_err(local_err);
 goto the_end;
 }
 
 ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, );
 if (ret < 0) {
-error_report("Error while creating snapshot on '%s'",
- bdrv_get_device_name(bs));
+error_setg(errp, "Error while creating snapshot on '%s'",
+   bdrv_get_device_name(bs));
 goto the_end;
 }
 
@@ -2226,7 +2223,7 @@ void qmp_xen_load_devices_state(const char *filename, 
Error **errp)
 migration_incoming_state_destroy();
 }
 
-int load_vmstate(const char *name)
+int load_vmstate(const char *name, Error **errp)
 {
 BlockDriverState *bs, *bs_vm_state;
 QEMUSnapshotInfo sn;
@@ -2236,20 +2233,22 @@ int load_vmstate(const char *name)

[Qemu-devel] [PATCH 5/6] monitor: Move hmp_info_snapshots from savevm.c to hmp.c

2017-04-25 Thread Juan Quintela

It only uses block/* functions, nothing from migration.

Signed-off-by: Juan Quintela 
---
 hmp.c   | 143 ++
 hmp.h   |   1 +
 include/sysemu/sysemu.h |   1 -
 migration/savevm.c  | 147 
 4 files changed, 144 insertions(+), 148 deletions(-)

diff --git a/hmp.c b/hmp.c
index bb739ce..bd7b1ca 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1299,6 +1299,149 @@ void hmp_delvm(Monitor *mon, const QDict *qdict)
 }
 }
 
+void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
+{
+BlockDriverState *bs, *bs1;
+BdrvNextIterator it1;
+QEMUSnapshotInfo *sn_tab, *sn;
+bool no_snapshot = true;
+int nb_sns, i;
+int total;
+int *global_snapshots;
+AioContext *aio_context;
+
+typedef struct SnapshotEntry {
+QEMUSnapshotInfo sn;
+QTAILQ_ENTRY(SnapshotEntry) next;
+} SnapshotEntry;
+
+typedef struct ImageEntry {
+const char *imagename;
+QTAILQ_ENTRY(ImageEntry) next;
+QTAILQ_HEAD(, SnapshotEntry) snapshots;
+} ImageEntry;
+
+QTAILQ_HEAD(, ImageEntry) image_list =
+QTAILQ_HEAD_INITIALIZER(image_list);
+
+ImageEntry *image_entry, *next_ie;
+SnapshotEntry *snapshot_entry;
+
+bs = bdrv_all_find_vmstate_bs();
+if (!bs) {
+monitor_printf(mon, "No available block device supports snapshots\n");
+return;
+}
+aio_context = bdrv_get_aio_context(bs);
+
+aio_context_acquire(aio_context);
+nb_sns = bdrv_snapshot_list(bs, _tab);
+aio_context_release(aio_context);
+
+if (nb_sns < 0) {
+monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns);
+return;
+}
+
+for (bs1 = bdrv_first(); bs1; bs1 = bdrv_next()) {
+int bs1_nb_sns = 0;
+ImageEntry *ie;
+SnapshotEntry *se;
+AioContext *ctx = bdrv_get_aio_context(bs1);
+
+aio_context_acquire(ctx);
+if (bdrv_can_snapshot(bs1)) {
+sn = NULL;
+bs1_nb_sns = bdrv_snapshot_list(bs1, );
+if (bs1_nb_sns > 0) {
+no_snapshot = false;
+ie = g_new0(ImageEntry, 1);
+ie->imagename = bdrv_get_device_name(bs1);
+QTAILQ_INIT(>snapshots);
+QTAILQ_INSERT_TAIL(_list, ie, next);
+for (i = 0; i < bs1_nb_sns; i++) {
+se = g_new0(SnapshotEntry, 1);
+se->sn = sn[i];
+QTAILQ_INSERT_TAIL(>snapshots, se, next);
+}
+}
+g_free(sn);
+}
+aio_context_release(ctx);
+}
+
+if (no_snapshot) {
+monitor_printf(mon, "There is no snapshot available.\n");
+return;
+}
+
+global_snapshots = g_new0(int, nb_sns);
+total = 0;
+for (i = 0; i < nb_sns; i++) {
+SnapshotEntry *next_sn;
+if (bdrv_all_find_snapshot(sn_tab[i].name, ) == 0) {
+global_snapshots[total] = i;
+total++;
+QTAILQ_FOREACH(image_entry, _list, next) {
+QTAILQ_FOREACH_SAFE(snapshot_entry, _entry->snapshots,
+next, next_sn) {
+if (!strcmp(sn_tab[i].name, snapshot_entry->sn.name)) {
+QTAILQ_REMOVE(_entry->snapshots, snapshot_entry,
+  next);
+g_free(snapshot_entry);
+}
+}
+}
+}
+}
+
+monitor_printf(mon, "List of snapshots present on all disks:\n");
+
+if (total > 0) {
+bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, NULL);
+monitor_printf(mon, "\n");
+for (i = 0; i < total; i++) {
+sn = _tab[global_snapshots[i]];
+/* The ID is not guaranteed to be the same on all images, so
+ * overwrite it.
+ */
+pstrcpy(sn->id_str, sizeof(sn->id_str), "--");
+bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, sn);
+monitor_printf(mon, "\n");
+}
+} else {
+monitor_printf(mon, "None\n");
+}
+
+QTAILQ_FOREACH(image_entry, _list, next) {
+if (QTAILQ_EMPTY(_entry->snapshots)) {
+continue;
+}
+monitor_printf(mon,
+   "\nList of partial (non-loadable) snapshots on '%s':\n",
+   image_entry->imagename);
+bdrv_snapshot_dump((fprintf_function)monitor_printf, mon, NULL);
+monitor_printf(mon, "\n");
+QTAILQ_FOREACH(snapshot_entry, _entry->snapshots, next) {
+bdrv_snapshot_dump((fprintf_function)monitor_printf, mon,
+   _entry->sn);
+monitor_printf(mon, "\n");
+}
+}
+
+QTAILQ_FOREACH_SAFE(image_entry, _list, next, next_ie) {
+SnapshotEntry *next_sn;
+

[Qemu-devel] [PATCH 4/6] monitor: Move hmp_delvm from savevm.c to hmp.c

2017-04-25 Thread Juan Quintela

It really uses block/* stuff, not migration one.

Signed-off-by: Juan Quintela 
---
 hmp.c   | 13 +
 hmp.h   |  1 +
 include/sysemu/sysemu.h |  1 -
 migration/savevm.c  | 13 -
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/hmp.c b/hmp.c
index a82a952..bb739ce 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1286,6 +1286,19 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
 save_vmstate(qdict_get_try_str(qdict, "name"));
 }
 
+void hmp_delvm(Monitor *mon, const QDict *qdict)
+{
+BlockDriverState *bs;
+Error *err;
+const char *name = qdict_get_str(qdict, "name");
+
+if (bdrv_all_delete_snapshot(name, , ) < 0) {
+error_reportf_err(err,
+  "Error while deleting snapshot on device '%s': ",
+  bdrv_get_device_name(bs));
+}
+}
+
 void hmp_migrate_cancel(Monitor *mon, const QDict *qdict)
 {
 qmp_migrate_cancel(NULL);
diff --git a/hmp.h b/hmp.h
index b302c8d..6a402b1 100644
--- a/hmp.h
+++ b/hmp.h
@@ -65,6 +65,7 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict);
 void hmp_drive_backup(Monitor *mon, const QDict *qdict);
 void hmp_loadvm(Monitor *mon, const QDict *qdict);
 void hmp_savevm(Monitor *mon, const QDict *qdict);
+void hmp_delvm(Monitor *mon, const QDict *qdict);
 void hmp_migrate_cancel(Monitor *mon, const QDict *qdict);
 void hmp_migrate_incoming(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict);
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 914c36c..e4f355ceb 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -77,7 +77,6 @@ void qemu_remove_machine_init_done_notifier(Notifier *notify);
 
 int save_vmstate(const char *name);
 int load_vmstate(const char *name);
-void hmp_delvm(Monitor *mon, const QDict *qdict);
 void hmp_info_snapshots(Monitor *mon, const QDict *qdict);
 
 void qemu_announce_self(void);
diff --git a/migration/savevm.c b/migration/savevm.c
index bbff4d8..acd304b 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2304,19 +2304,6 @@ int load_vmstate(const char *name)
 return 0;
 }
 
-void hmp_delvm(Monitor *mon, const QDict *qdict)
-{
-BlockDriverState *bs;
-Error *err;
-const char *name = qdict_get_str(qdict, "name");
-
-if (bdrv_all_delete_snapshot(name, , ) < 0) {
-error_reportf_err(err,
-  "Error while deleting snapshot on device '%s': ",
-  bdrv_get_device_name(bs));
-}
-}
-
 void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
 {
 BlockDriverState *bs, *bs1;
-- 
2.9.3

[Qemu-devel] [PATCH 2/6] monitor: Move hmp_loadvm from monitor.c to hmp.c

2017-04-25 Thread Juan Quintela

We are going to move the rest of hmp snapshots functions there instead
of monitor.c.

Signed-off-by: Juan Quintela 
---
 hmp.c | 13 +
 hmp.h |  1 +
 monitor.c | 13 -
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/hmp.c b/hmp.c
index ab407d6..f6b8738 100644
--- a/hmp.c
+++ b/hmp.c
@@ -19,6 +19,7 @@
 #include "net/eth.h"
 #include "sysemu/char.h"
 #include "sysemu/block-backend.h"
+#include "sysemu/sysemu.h"
 #include "qemu/config-file.h"
 #include "qemu/option.h"
 #include "qemu/timer.h"
@@ -1268,6 +1269,18 @@ void hmp_snapshot_delete_blkdev_internal(Monitor *mon, 
const QDict *qdict)
 hmp_handle_error(mon, );
 }
 
+void hmp_loadvm(Monitor *mon, const QDict *qdict)
+{
+int saved_vm_running  = runstate_is_running();
+const char *name = qdict_get_str(qdict, "name");
+
+vm_stop(RUN_STATE_RESTORE_VM);
+
+if (load_vmstate(name) == 0 && saved_vm_running) {
+vm_start();
+}
+}
+
 void hmp_migrate_cancel(Monitor *mon, const QDict *qdict)
 {
 qmp_migrate_cancel(NULL);
diff --git a/hmp.h b/hmp.h
index 799fd37..385332c 100644
--- a/hmp.h
+++ b/hmp.h
@@ -63,6 +63,7 @@ void hmp_snapshot_blkdev_internal(Monitor *mon, const QDict 
*qdict);
 void hmp_snapshot_delete_blkdev_internal(Monitor *mon, const QDict *qdict);
 void hmp_drive_mirror(Monitor *mon, const QDict *qdict);
 void hmp_drive_backup(Monitor *mon, const QDict *qdict);
+void hmp_loadvm(Monitor *mon, const QDict *qdict);
 void hmp_migrate_cancel(Monitor *mon, const QDict *qdict);
 void hmp_migrate_incoming(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict);
diff --git a/monitor.c b/monitor.c
index be282ec..d02900d 100644
--- a/monitor.c
+++ b/monitor.c
@@ -37,7 +37,6 @@
 #include "net/slirp.h"
 #include "sysemu/char.h"
 #include "ui/qemu-spice.h"
-#include "sysemu/sysemu.h"
 #include "sysemu/numa.h"
 #include "monitor/monitor.h"
 #include "qemu/config-file.h"
@@ -1843,18 +1842,6 @@ void qmp_closefd(const char *fdname, Error **errp)
 error_setg(errp, QERR_FD_NOT_FOUND, fdname);
 }
 
-static void hmp_loadvm(Monitor *mon, const QDict *qdict)
-{
-int saved_vm_running  = runstate_is_running();
-const char *name = qdict_get_str(qdict, "name");
-
-vm_stop(RUN_STATE_RESTORE_VM);
-
-if (load_vmstate(name) == 0 && saved_vm_running) {
-vm_start();
-}
-}
-
 int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp)
 {
 mon_fd_t *monfd;
-- 
2.9.3

[Qemu-devel] [PATCH 0/6] Move snapshots commands to hmp

2017-04-25 Thread Juan Quintela

Hi

This series:
- Move snapshots commands to hmp.c, as they don't have code for migration
- Make them work with errors in a modern way instead of writting to the monitor
- make paolo happy and use hmp_handle_error

Later, Juan.

Juan Quintela (6):
  monitor: Remove monitor parameter from save_vmstate
  monitor: Move hmp_loadvm from monitor.c to hmp.c
  monitor: Move hmp_savevm from savevm.c to hmp.c
  monitor: Move hmp_delvm from savevm.c to hmp.c
  monitor: Move hmp_info_snapshots from savevm.c to hmp.c
  migration: Pass Error ** argument to {save,load}_vmstate

 hmp.c| 179 +++
 hmp.h|   4 +
 include/sysemu/sysemu.h  |   7 +-
 migration/savevm.c   | 216 ++-
 monitor.c|  13 ---
 replay/replay-snapshot.c |   6 +-
 vl.c |   4 +-
 7 files changed, 217 insertions(+), 212 deletions(-)

-- 
2.9.3

[Qemu-devel] [PATCH 1/6] monitor: Remove monitor parameter from save_vmstate

2017-04-25 Thread Juan Quintela

load_vmstate() already use error_report, so be consistent.  There is
an identical error message in load_vmstate() that ends in a
period. Remove it.

Signed-off-by: Juan Quintela 
---
 include/sysemu/sysemu.h  |  2 +-
 migration/savevm.c   | 20 ++--
 replay/replay-snapshot.c |  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 16175f7..b6daf9d 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -76,7 +76,7 @@ void qemu_add_machine_init_done_notifier(Notifier *notify);
 void qemu_remove_machine_init_done_notifier(Notifier *notify);
 
 void hmp_savevm(Monitor *mon, const QDict *qdict);
-int save_vmstate(Monitor *mon, const char *name);
+int save_vmstate(const char *name);
 int load_vmstate(const char *name);
 void hmp_delvm(Monitor *mon, const QDict *qdict);
 void hmp_info_snapshots(Monitor *mon, const QDict *qdict);
diff --git a/migration/savevm.c b/migration/savevm.c
index 7421a67..ff934aa 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2070,7 +2070,7 @@ int qemu_loadvm_state(QEMUFile *f)
 return ret;
 }
 
-int save_vmstate(Monitor *mon, const char *name)
+int save_vmstate(const char *name)
 {
 BlockDriverState *bs, *bs1;
 QEMUSnapshotInfo sn1, *sn = , old_sn1, *old_sn = _sn1;
@@ -2084,8 +2084,8 @@ int save_vmstate(Monitor *mon, const char *name)
 AioContext *aio_context;
 
 if (!bdrv_all_can_snapshot()) {
-monitor_printf(mon, "Device '%s' is writable but does not "
-   "support snapshots.\n", bdrv_get_device_name(bs));
+error_report("Device '%s' is writable but does not support snapshots",
+ bdrv_get_device_name(bs));
 return ret;
 }
 
@@ -2102,7 +2102,7 @@ int save_vmstate(Monitor *mon, const char *name)
 
 bs = bdrv_all_find_vmstate_bs();
 if (bs == NULL) {
-monitor_printf(mon, "No block device can accept snapshots\n");
+error_report("No block device can accept snapshots");
 return ret;
 }
 aio_context = bdrv_get_aio_context(bs);
@@ -2111,7 +2111,7 @@ int save_vmstate(Monitor *mon, const char *name)
 
 ret = global_state_store();
 if (ret) {
-monitor_printf(mon, "Error saving global state\n");
+error_report("Error saving global state");
 return ret;
 }
 vm_stop(RUN_STATE_SAVE_VM);
@@ -2143,7 +2143,7 @@ int save_vmstate(Monitor *mon, const char *name)
 /* save the VM state */
 f = qemu_fopen_bdrv(bs, 1);
 if (!f) {
-monitor_printf(mon, "Could not open VM state file\n");
+error_report("Could not open VM state file");
 goto the_end;
 }
 ret = qemu_savevm_state(f, _err);
@@ -2156,8 +2156,8 @@ int save_vmstate(Monitor *mon, const char *name)
 
 ret = bdrv_all_create_snapshot(sn, bs, vm_state_size, );
 if (ret < 0) {
-monitor_printf(mon, "Error while creating snapshot on '%s'\n",
-   bdrv_get_device_name(bs));
+error_report("Error while creating snapshot on '%s'",
+ bdrv_get_device_name(bs));
 goto the_end;
 }
 
@@ -2173,7 +2173,7 @@ int save_vmstate(Monitor *mon, const char *name)
 
 void hmp_savevm(Monitor *mon, const QDict *qdict)
 {
-save_vmstate(mon, qdict_get_try_str(qdict, "name"));
+save_vmstate(qdict_get_try_str(qdict, "name"));
 }
 
 void qmp_xen_save_devices_state(const char *filename, Error **errp)
@@ -2245,7 +2245,7 @@ int load_vmstate(const char *name)
 MigrationIncomingState *mis = migration_incoming_get_current();
 
 if (!bdrv_all_can_snapshot()) {
-error_report("Device '%s' is writable but does not support snapshots.",
+error_report("Device '%s' is writable but does not support snapshots",
  bdrv_get_device_name(bs));
 return -ENOTSUP;
 }
diff --git a/replay/replay-snapshot.c b/replay/replay-snapshot.c
index 65e2d37..8cced46 100644
--- a/replay/replay-snapshot.c
+++ b/replay/replay-snapshot.c
@@ -64,7 +64,7 @@ void replay_vmstate_init(void)
 {
 if (replay_snapshot) {
 if (replay_mode == REPLAY_MODE_RECORD) {
-if (save_vmstate(cur_mon, replay_snapshot) != 0) {
+if (save_vmstate(replay_snapshot) != 0) {
 error_report("Could not create snapshot for icount record");
 exit(1);
 }
-- 
2.9.3

[Qemu-devel] [PATCH 3/6] monitor: Move hmp_savevm from savevm.c to hmp.c

2017-04-25 Thread Juan Quintela

It is a monitor command, and has nothing migration specific in it.

Signed-off-by: Juan Quintela 
---
 hmp.c   | 5 +
 hmp.h   | 1 +
 include/sysemu/sysemu.h | 1 -
 migration/savevm.c  | 5 -
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/hmp.c b/hmp.c
index f6b8738..a82a952 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1281,6 +1281,11 @@ void hmp_loadvm(Monitor *mon, const QDict *qdict)
 }
 }
 
+void hmp_savevm(Monitor *mon, const QDict *qdict)
+{
+save_vmstate(qdict_get_try_str(qdict, "name"));
+}
+
 void hmp_migrate_cancel(Monitor *mon, const QDict *qdict)
 {
 qmp_migrate_cancel(NULL);
diff --git a/hmp.h b/hmp.h
index 385332c..b302c8d 100644
--- a/hmp.h
+++ b/hmp.h
@@ -64,6 +64,7 @@ void hmp_snapshot_delete_blkdev_internal(Monitor *mon, const 
QDict *qdict);
 void hmp_drive_mirror(Monitor *mon, const QDict *qdict);
 void hmp_drive_backup(Monitor *mon, const QDict *qdict);
 void hmp_loadvm(Monitor *mon, const QDict *qdict);
+void hmp_savevm(Monitor *mon, const QDict *qdict);
 void hmp_migrate_cancel(Monitor *mon, const QDict *qdict);
 void hmp_migrate_incoming(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict);
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index b6daf9d..914c36c 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -75,7 +75,6 @@ void qemu_remove_exit_notifier(Notifier *notify);
 void qemu_add_machine_init_done_notifier(Notifier *notify);
 void qemu_remove_machine_init_done_notifier(Notifier *notify);
 
-void hmp_savevm(Monitor *mon, const QDict *qdict);
 int save_vmstate(const char *name);
 int load_vmstate(const char *name);
 void hmp_delvm(Monitor *mon, const QDict *qdict);
diff --git a/migration/savevm.c b/migration/savevm.c
index ff934aa..bbff4d8 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2171,11 +2171,6 @@ int save_vmstate(const char *name)
 return ret;
 }
 
-void hmp_savevm(Monitor *mon, const QDict *qdict)
-{
-save_vmstate(qdict_get_try_str(qdict, "name"));
-}
-
 void qmp_xen_save_devices_state(const char *filename, Error **errp)
 {
 QEMUFile *f;
-- 
2.9.3

Re: [Qemu-devel] [PATCH v2 00/25] qmp: add async command type

2017-04-25 Thread Kevin Wolf

Am 24.04.2017 um 21:10 hat Markus Armbruster geschrieben:
> With 2.9 out of the way, how can we make progress on this one?
> 
> I can see two ways to get asynchronous QMP commands accepted:
> 
> 1. We break QMP compatibility in QEMU 3.0 and convert all long-running
>tasks from "synchronous command + event" to "asynchronous command".
> 
>This is design option 1 quoted below.  *If* we decide to leave
>compatibility behind for 3.0, *and* we decide we like the
>asynchronous sufficiently better to put in the work, we can do it.
> 
>I guess there's nothing to do here until we decide on breaking
>compatibility in 3.0.
> 
> 2. We don't break QMP compatibility, but we add asynchronous commands
>anyway, because we decide that's how we want to do "jobs".
> 
>This is design option 3 quoted below.  As I said, I dislike its lack
>of orthogonality.  But if asynchronous commands help us get jobs
>done, I can bury my dislike.

I don't think async commands are attractive at all for doing jobs. I
feel they bring up more questions that they answer, for example, what
happens if libvirt crashes and then reconnects? Which monitor connection
does get the reply for an async command sent on the now disconnected
one?

We already have a model for doing long-running jobs, and as far as I'm
aware, it's working and we're not fighting limitations of the design. So
what are we even trying to solve here? In the context of jobs, async
commands feel like a solution in need of a problem to me.

Things may look a bit different in typically quick, but potentially
long-running commands. That is, anything that we currently execute
synchronously while holding the BQL, but that involves I/O and could
therefore take a while (impacting the performance of the VM) or even
block indefinitely.

The first problem (we're holding the lock too long) can be addressed by
making things async just inside qemu and we don't need to expose the
change on the QMP level. The second one (blocking indefinitely) requires
being async on the QMP level if we want the monitor to be responsive
even if we're using an image on an NFS server that went down.

On the other hand, using the traditional job infrastructure is way over
the top if all you want to do is 'query-block', so we need something
different for making it async. And if a client disconnects, the
'query-block' result can just be thrown away, it's much simpler than
actual jobs.

So where I can see advantages for a new async command type is not for
converting real long-running commands like block jobs, but only for the
typically, but not necessarily quick operations. At the same time it is
where you're rightfully afraid that the less common case might not
receive much testing in management tools.

In the end, I'm unsure whether async commands are a good idea, I can see
good arguments for both stances. But I'm almost certain that they are
the wrong tool for jobs.

Kevin

[Qemu-devel] [PATCH 1/2] migration: Move check_migratable() into qdev.c

2017-04-25 Thread Juan Quintela

The function is only used once, and nothing else in migration knows
about objects.  Create the function vmstate_device_is_migratable() in
savem.c that really do the bit that is related with migration.

Signed-off-by: Juan Quintela 
---
 hw/core/qdev.c| 15 ++-
 include/migration/migration.h |  3 ---
 include/migration/vmstate.h   |  2 ++
 migration/migration.c | 15 ---
 migration/savevm.c| 10 ++
 stubs/vmstate.c   |  5 ++---
 6 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 02b632f..17ff638 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -37,7 +37,7 @@
 #include "hw/boards.h"
 #include "hw/sysbus.h"
 #include "qapi-event.h"
-#include "migration/migration.h"
+#include "migration/vmstate.h"
 
 bool qdev_hotplug = false;
 static bool qdev_hot_added = false;
@@ -861,6 +861,19 @@ static bool device_get_realized(Object *obj, Error **errp)
 return dev->realized;
 }
 
+static int check_migratable(Object *obj, Error **err)
+{
+DeviceClass *dc = DEVICE_GET_CLASS(obj);
+if (!vmstate_device_is_migratable(dc->vmsd)) {
+error_setg(err, "Device %s is not migratable, but "
+   "--only-migratable was specified",
+   object_get_typename(obj));
+return -1;
+}
+
+return 0;
+}
+
 static void device_set_realized(Object *obj, bool value, Error **errp)
 {
 DeviceState *dev = DEVICE(obj);
diff --git a/include/migration/migration.h b/include/migration/migration.h
index ba1a16c..dfeca38 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -22,7 +22,6 @@
 #include "qapi-types.h"
 #include "exec/cpu-common.h"
 #include "qemu/coroutine_int.h"
-#include "qom/object.h"
 
 #define QEMU_VM_FILE_MAGIC   0x5145564d
 #define QEMU_VM_FILE_VERSION_COMPAT  0x0002
@@ -292,8 +291,6 @@ int migrate_add_blocker(Error *reason, Error **errp);
  */
 void migrate_del_blocker(Error *reason);
 
-int check_migratable(Object *obj, Error **err);
-
 bool migrate_release_ram(void);
 bool migrate_postcopy_ram(void);
 bool migrate_zero_blocks(void);
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index dad3984..9452dec 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -1049,4 +1049,6 @@ int64_t self_announce_delay(int round)
 
 void dump_vmstate_json_to_file(FILE *out_fp);
 
+bool vmstate_device_is_migratable(const VMStateDescription *vmsd);
+
 #endif
diff --git a/migration/migration.c b/migration/migration.c
index 353f272..5447cab 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1158,21 +1158,6 @@ void migrate_del_blocker(Error *reason)
 migration_blockers = g_slist_remove(migration_blockers, reason);
 }
 
-int check_migratable(Object *obj, Error **err)
-{
-DeviceClass *dc = DEVICE_GET_CLASS(obj);
-if (only_migratable && dc->vmsd) {
-if (dc->vmsd->unmigratable) {
-error_setg(err, "Device %s is not migratable, but "
-   "--only-migratable was specified",
-   object_get_typename(obj));
-return -1;
-}
-}
-
-return 0;
-}
-
 void qmp_migrate_incoming(const char *uri, Error **errp)
 {
 Error *local_err = NULL;
diff --git a/migration/savevm.c b/migration/savevm.c
index 03ae1bd..7421a67 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2480,3 +2480,13 @@ void vmstate_register_ram_global(MemoryRegion *mr)
 {
 vmstate_register_ram(mr, NULL);
 }
+
+bool vmstate_device_is_migratable(const VMStateDescription *vmsd)
+{
+if (only_migratable && vmsd) {
+if (vmsd->unmigratable) {
+return false;
+}
+}
+return true;
+}
diff --git a/stubs/vmstate.c b/stubs/vmstate.c
index 6d52f29..5af824b 100644
--- a/stubs/vmstate.c
+++ b/stubs/vmstate.c
@@ -1,7 +1,6 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "migration/vmstate.h"
-#include "migration/migration.h"
 
 const VMStateDescription vmstate_dummy = {};
 
@@ -21,7 +20,7 @@ void vmstate_unregister(DeviceState *dev,
 {
 }
 
-int check_migratable(Object *obj, Error **err)
+bool vmstate_device_is_migratable(const VMStateDescription *vmsd)
 {
-return 0;
+return true;
 }
-- 
2.9.3

[Qemu-devel] [PATCH 2/2] migration: to_dst_file at that point is NULL

2017-04-25 Thread Juan Quintela

We have just arrived as:

migration.c: qemu_migrate()
  
  s = migrate_init() <- puts it to NULL
  
  {tcp,unix}_start_outgoing_migration ->
 socket_outgoing_migration
migration_channel_connect()
   sets to_dst_file

if tls is enabled, we do another round through
migrate_channel_tls_connect(), but we only set it up if there is no
error.  So we don't need the assignation.  I am removing it to remove
in the follwing patches the knowledge about MigrationState in that two
files.

Signed-off-by: Juan Quintela 
---
 migration/socket.c | 1 -
 migration/tls.c| 1 -
 2 files changed, 2 deletions(-)

diff --git a/migration/socket.c b/migration/socket.c
index 13966f1..dc88812 100644
--- a/migration/socket.c
+++ b/migration/socket.c
@@ -79,7 +79,6 @@ static void socket_outgoing_migration(QIOTask *task,
 
 if (qio_task_propagate_error(task, )) {
 trace_migration_socket_outgoing_error(error_get_pretty(err));
-data->s->to_dst_file = NULL;
 migrate_fd_error(data->s, err);
 error_free(err);
 } else {
diff --git a/migration/tls.c b/migration/tls.c
index 45bec44..a33ecb7 100644
--- a/migration/tls.c
+++ b/migration/tls.c
@@ -116,7 +116,6 @@ static void migration_tls_outgoing_handshake(QIOTask *task,
 
 if (qio_task_propagate_error(task, )) {
 trace_migration_tls_outgoing_handshake_error(error_get_pretty(err));
-s->to_dst_file = NULL;
 migrate_fd_error(s, err);
 error_free(err);
 } else {
-- 
2.9.3

[Qemu-devel] [PATCH 0/2] Misc migration fixes

2017-04-25 Thread Juan Quintela

Hi

This are independent of all my series, so send then here.

- check_migratable needs to now about objects, device class, etc, that
  migration code don't care.  So move it to qdev.c.

- to_dst_file is set to NULL before this call, just remove it.  Long
  term idea is that nothing outside migration.c should now about
  members of MigrationState.

Please, review.

Juan Quintela (2):
  migration: Move check_migratable() into qdev.c
  migration: to_dst_file at that point is NULL

 hw/core/qdev.c| 15 ++-
 include/migration/migration.h |  3 ---
 include/migration/vmstate.h   |  2 ++
 migration/migration.c | 15 ---
 migration/savevm.c| 10 ++
 migration/socket.c|  1 -
 migration/tls.c   |  1 -
 stubs/vmstate.c   |  5 ++---
 8 files changed, 28 insertions(+), 24 deletions(-)

-- 
2.9.3

< 1 2 3 4 5 >

301 - 400 of 464 matches

Mail list logo