from:"Vijay Kilari"

Re: [Qemu-devel] [PATCH] hw/intc/arm_gicv3_kvm: Check KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS in reset

2017-03-28 Thread Vijay Kilari

Hi Eric,

On Tue, Mar 28, 2017 at 7:28 PM, Eric Auger  wrote:
> KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS needs to be checked before
> attempting to read ICC_CTLR_EL1; otherwise kernel versions not
> exposing this kvm device group will be incompatible with qemu 2.9.
>
> Fixes: 07a5628  ("hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers")
> Signed-off-by: Eric Auger 
> Reported-by: Prakash B 
>
> ---
>
> I understand the ICC_CTLR_EL1 state only is used in the put() function
> which is used for migration
> ---
>  hw/intc/arm_gicv3_kvm.c | 17 +++--
>  1 file changed, 11 insertions(+), 6 deletions(-)
>
> diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
> index 81f0403..4c3a88e 100644
> --- a/hw/intc/arm_gicv3_kvm.c
> +++ b/hw/intc/arm_gicv3_kvm.c
> @@ -614,12 +614,6 @@ static void arm_gicv3_icc_reset(CPUARMState *env, const 
> ARMCPRegInfo *ri)
>  s = c->gic;
>  cpu = ARM_CPU(c->cpu);
>
> -/* Initialize to actual HW supported configuration */
> -kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
> -  KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity),
> -  >icc_ctlr_el1[GICV3_NS], false);
> -
> -c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
>  c->icc_pmr_el1 = 0;
>  c->icc_bpr[GICV3_G0] = GIC_MIN_BPR;
>  c->icc_bpr[GICV3_G1] = GIC_MIN_BPR;
> @@ -628,6 +622,17 @@ static void arm_gicv3_icc_reset(CPUARMState *env, const 
> ARMCPRegInfo *ri)
>  c->icc_sre_el1 = 0x7;
>  memset(c->icc_apr, 0, sizeof(c->icc_apr));
>  memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
> +
> +if (!kvm_device_check_attr(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
> +  KVM_VGIC_ATTR(ICC_CTLR_EL1, 
> cpu->mp_affinity))) {
> +return;
> +}

 Can't we use gicv3 migration blocker provided if it is set before this reset.

> +/* Initialize to actual HW supported configuration */
> +kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
> +  KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity),
> +  >icc_ctlr_el1[GICV3_NS], false);
> +
> +c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
>  }
>
>  static void kvm_arm_gicv3_reset(DeviceState *dev)
> --
> 2.5.5
>

[Qemu-devel] [PATCH v9 2/5] hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate

2017-02-23 Thread vijay . kilari

From: Vijaya Kumar K 

To Save and Restore ICC_SRE_EL1 register introduce vmstate
subsection and load only if non-zero.
Also initialize icc_sre_el1 with to 0x7 in pre_load
function.

Signed-off-by: Vijaya Kumar K 
---
 hw/intc/arm_gicv3_common.c | 36 
 include/hw/intc/arm_gicv3_common.h |  1 +
 2 files changed, 37 insertions(+)

diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 16b9b0f..5b0e456 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -70,6 +70,38 @@ static const VMStateDescription vmstate_gicv3_cpu_virt = {
 }
 };
 
+static int icc_sre_el1_reg_pre_load(void *opaque)
+{
+GICv3CPUState *cs = opaque;
+
+   /*
+* If the sre_el1 subsection is not transferred this
+* means SRE_EL1 is 0x7 (which might not be the same as
+* our reset value).
+*/
+cs->icc_sre_el1 = 0x7;
+return 0;
+}
+
+static bool icc_sre_el1_reg_needed(void *opaque)
+{
+GICv3CPUState *cs = opaque;
+
+return cs->icc_sre_el1 != 7;
+}
+
+const VMStateDescription vmstate_gicv3_cpu_sre_el1 = {
+.name = "arm_gicv3_cpu/sre_el1",
+.version_id = 1,
+.minimum_version_id = 1,
+.pre_load = icc_sre_el1_reg_pre_load,
+.needed = icc_sre_el1_reg_needed,
+.fields = (VMStateField[]) {
+VMSTATE_UINT64(icc_sre_el1, GICv3CPUState),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static const VMStateDescription vmstate_gicv3_cpu = {
 .name = "arm_gicv3_cpu",
 .version_id = 1,
@@ -100,6 +132,10 @@ static const VMStateDescription vmstate_gicv3_cpu = {
 .subsections = (const VMStateDescription * []) {
 _gicv3_cpu_virt,
 NULL
+},
+.subsections = (const VMStateDescription * []) {
+_gicv3_cpu_sre_el1,
+NULL
 }
 };
 
diff --git a/include/hw/intc/arm_gicv3_common.h 
b/include/hw/intc/arm_gicv3_common.h
index 4156051..bccdfe1 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -172,6 +172,7 @@ struct GICv3CPUState {
 uint8_t gicr_ipriorityr[GIC_INTERNAL];
 
 /* CPU interface */
+uint64_t icc_sre_el1;
 uint64_t icc_ctlr_el1[2];
 uint64_t icc_pmr_el1;
 uint64_t icc_bpr[3];
-- 
1.9.1

[Qemu-devel] [PATCH v9 5/5] hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

2017-02-23 Thread vijay . kilari

From: Vijaya Kumar K 

Reset CPU interface registers of GICv3 when CPU is reset.
For this, ARMCPRegInfo struct is registered with one ICC
register whose resetfn is called when cpu is reset.

All the ICC registers are reset under one single register
reset function instead of calling resetfn for each ICC
register.

Signed-off-by: Vijaya Kumar K 
---
 hw/intc/arm_gicv3_kvm.c | 60 +
 1 file changed, 60 insertions(+)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index cda1af4..81f0403 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -604,6 +604,32 @@ static void kvm_arm_gicv3_get(GICv3State *s)
 }
 }
 
+static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ARMCPU *cpu;
+GICv3State *s;
+GICv3CPUState *c;
+
+c = (GICv3CPUState *)env->gicv3state;
+s = c->gic;
+cpu = ARM_CPU(c->cpu);
+
+/* Initialize to actual HW supported configuration */
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+  KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity),
+  >icc_ctlr_el1[GICV3_NS], false);
+
+c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
+c->icc_pmr_el1 = 0;
+c->icc_bpr[GICV3_G0] = GIC_MIN_BPR;
+c->icc_bpr[GICV3_G1] = GIC_MIN_BPR;
+c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR;
+
+c->icc_sre_el1 = 0x7;
+memset(c->icc_apr, 0, sizeof(c->icc_apr));
+memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
+}
+
 static void kvm_arm_gicv3_reset(DeviceState *dev)
 {
 GICv3State *s = ARM_GICV3_COMMON(dev);
@@ -621,6 +647,34 @@ static void kvm_arm_gicv3_reset(DeviceState *dev)
 kvm_arm_gicv3_put(s);
 }
 
+/*
+ * CPU interface registers of GIC needs to be reset on CPU reset.
+ * For the calling arm_gicv3_icc_reset() on CPU reset, we register
+ * below ARMCPRegInfo. As we reset the whole cpu interface under single
+ * register reset, we define only one register of CPU interface instead
+ * of defining all the registers.
+ */
+static const ARMCPRegInfo gicv3_cpuif_reginfo[] = {
+{ .name = "ICC_CTLR_EL1", .state = ARM_CP_STATE_BOTH,
+  .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 12, .opc2 = 4,
+  /*
+   * If ARM_CP_NOP is used, resetfn is not called,
+   * So ARM_CP_NO_RAW is appropriate type.
+   */
+  .type = ARM_CP_NO_RAW,
+  .access = PL1_RW,
+  .readfn = arm_cp_read_zero,
+  .writefn = arm_cp_write_ignore,
+  /*
+   * We hang the whole cpu interface reset routine off here
+   * rather than parcelling it out into one little function
+   * per register
+   */
+  .resetfn = arm_gicv3_icc_reset,
+},
+REGINFO_SENTINEL
+};
+
 static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
 {
 GICv3State *s = KVM_ARM_GICV3(dev);
@@ -644,6 +698,12 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error 
**errp)
 
 gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL);
 
+for (i = 0; i < s->num_cpu; i++) {
+ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i));
+
+define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
+}
+
 /* Try to create the device via the device control API */
 s->dev_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_ARM_VGIC_V3, false);
 if (s->dev_fd < 0) {
-- 
1.9.1

[Qemu-devel] [PATCH v9 0/5] GICv3 live migration support

2017-02-23 Thread vijay . kilari

From: Vijaya Kumar K 

This series introduces support for GICv3 live migration with
new VGIC implementation in 4.7-rc3 kernel.
In this series, patch 1 of the previous implementation
are ported.
https://lists.nongnu.org/archive/html/qemu-devel/2015-10/msg05284.html

Patch 2, is based on below implementation.
http://patchwork.ozlabs.org/patch/626746/

Latest kernel patches
https://www.spinics.net/lists/arm-kernel/msg558046.html

This API definition is as per version of VGICv3 specification
in linux kernel Documentation/virtual/kvm/devices/arm-vgic-v3.txt

Tested Live migration of Idle VM running with 4 VCPUs and 8GB RAM.

v8 => v9:
 - Updated icc_sre_el1_reg_needed() return condition to
   cs->icc_sre_el1 != 0x7;
 - Dropped assert in arm_gicv3_icc_reset()
 - Added comments at required places

v7 => v8:
 - Introduced vmstate subsection to add icc_ctrl_el1 register to
   VMStateDescription
 - Introduced new function gicv3_set_gicv3state() in arm_gicv3_cpuif.c
   to update gicv3state variable in CPUARMState struct.
 - Used arm_cp_read_zero & arm_cp_write_ignore for ARMCPRegInfo[].

v6 => v7:
 - Rebased on top of v2.8.0-rc4 release.
 - Added patch to add icc_ctrl_el1 to vmstruct before live migration
   patch.
 - Added patch to add gicv3state variable to CPUARMState struct to
   store GICv3CPUState pointer.
 - Added patch to register ARMCPRegInfo[] struct and reset on CPU reset.

v5 => v6:
 - Added separate patch for Reseting ICC* register
 - Added seperate patch for save and restore of ICC_CTLR_EL1
 - Dropped translate_fn mechanism and coded open functions
   for edge_trigger and priority save and restore.
 - Save and Restore APnR registers based on ICC_CTLR_EL1.PRIBITS

v4 => v5:
 - Initialized ICC registers before reset.

v3 => v4:
 - Reintroduced offset GICR_SGI_OFFSET
 - Implement save and restore of ICC_SRE_EL1
 - Updated kvm.h header file in sync with KVM v4 patches

v2 => v3:
 - Dropped offset GICR_SGI_OFFSET
 - Implement save/restore of irq line level using
   KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
 - Fixed bug with save/restore of edge_trigger
Vijaya Kumar K (5):
  kernel: Add definitions for GICv3 attributes
  hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate
  hw/intc/arm_gicv3_kvm: Implement get/put functions
  target-arm: Add GICv3CPUState in CPUARMState struct
  hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

 hw/intc/arm_gicv3_common.c |  38 +++
 hw/intc/arm_gicv3_cpuif.c  |   8 +
 hw/intc/arm_gicv3_kvm.c| 629 -
 hw/intc/gicv3_internal.h   |   3 +
 include/hw/intc/arm_gicv3_common.h |   1 +
 linux-headers/asm-arm/kvm.h|  12 +
 linux-headers/asm-arm64/kvm.h  |  12 +
 target/arm/cpu.h   |   2 +
 8 files changed, 691 insertions(+), 14 deletions(-)

-- 
1.9.1

[Qemu-devel] [PATCH v9 3/5] hw/intc/arm_gicv3_kvm: Implement get/put functions

2017-02-23 Thread vijay . kilari

From: Vijaya Kumar K 

This actually implements pre_save and post_load methods for in-kernel
vGICv3.

Signed-off-by: Pavel Fedin 
Signed-off-by: Peter Maydell 
[PMM:
 * use decimal, not 0bnnn
 * fixed typo in names of ICC_APR0R_EL1 and ICC_AP1R_EL1
 * completely rearranged the get and put functions to read and write
   the state in a natural order, rather than mixing distributor and
   redistributor state together]
Signed-off-by: Vijaya Kumar K 
[Vijay:
 * Update macro KVM_VGIC_ATTR
 * Use 32 bit access for gicd and gicr
 * GICD_IROUTER, GICD_TYPER, GICR_PROPBASER and GICR_PENDBASER reg
   access  are changed from 64-bit to 32-bit access
 * Add ICC_SRE_EL1 save and restore
 * Dropped translate_fn mechanism and coded functions to handle
   save and restore of edge_trigger and priority
 * Number of APnR register saved/restored based on number of
   priority bits supported]
Reviewed-by: Peter Maydell 
---
---
 hw/intc/arm_gicv3_kvm.c  | 573 +--
 hw/intc/gicv3_internal.h |   1 +
 2 files changed, 558 insertions(+), 16 deletions(-)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index d69dc47..cda1af4 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -23,8 +23,10 @@
 #include "qapi/error.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "hw/sysbus.h"
+#include "qemu/error-report.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
+#include "gicv3_internal.h"
 #include "vgic_common.h"
 #include "migration/migration.h"
 
@@ -44,6 +46,32 @@
 #define KVM_ARM_GICV3_GET_CLASS(obj) \
  OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3)
 
+#define   KVM_DEV_ARM_VGIC_SYSREG(op0, op1, crn, crm, op2) \
+ (ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+  ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+  ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+  ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+  ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ICC_PMR_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 4, 6, 0)
+#define ICC_BPR0_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 3)
+#define ICC_AP0R_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 4 | n)
+#define ICC_AP1R_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 9, n)
+#define ICC_BPR1_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 3)
+#define ICC_CTLR_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 4)
+#define ICC_SRE_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 5)
+#define ICC_IGRPEN0_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 6)
+#define ICC_IGRPEN1_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 7)
+
 typedef struct KVMARMGICv3Class {
 ARMGICv3CommonClass parent_class;
 DeviceRealize parent_realize;
@@ -57,16 +85,523 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, 
int level)
 kvm_arm_gic_set_irq(s->num_irq, irq, level);
 }
 
+#define KVM_VGIC_ATTR(reg, typer) \
+((typer & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) | (reg))
+
+static inline void kvm_gicd_access(GICv3State *s, int offset,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+  KVM_VGIC_ATTR(offset, 0),
+  val, write);
+}
+
+static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+  KVM_VGIC_ATTR(offset, s->cpu[cpu].gicr_typer),
+  val, write);
+}
+
+static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu,
+   uint64_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+  KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer),
+  val, write);
+}
+
+static inline void kvm_gic_line_level_access(GICv3State *s, int irq, int cpu,
+ uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+  KVM_VGIC_ATTR(irq, s->cpu[cpu].gicr_typer) |
+  (VGIC_LEVEL_INFO_LINE_LEVEL <<
+   KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT),
+  val, write);
+}
+
+/* Loop through each distributor IRQ related register; since bits
+ * corresponding to SPIs and PPIs are RAZ/WI when affinity routing
+ * is enabled, we skip those.
+ */
+#define for_each_dist_irq_reg(_irq, _max, _field_width) \
+for (_irq = GIC_INTERNAL; _irq < _max; _irq += (32 / _field_width))
+
+static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
+{
+uint32_t reg, *field;
+int irq;
+
+

[Qemu-devel] [PATCH v9 1/5] kernel: Add definitions for GICv3 attributes

2017-02-23 Thread vijay . kilari

From: Vijaya Kumar K 

This temporary patch adds kernel API definitions.
Use proper header update procedure after these features
are released.

Signed-off-by: Pavel Fedin 
Signed-off-by: Vijaya Kumar K 
---
 linux-headers/asm-arm/kvm.h   | 12 
 linux-headers/asm-arm64/kvm.h | 12 
 2 files changed, 24 insertions(+)

diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h
index 2fb7859..1798c93 100644
--- a/linux-headers/asm-arm/kvm.h
+++ b/linux-headers/asm-arm/kvm.h
@@ -179,10 +179,22 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS  2
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xffULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+   (0xULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0x)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL   4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+   (0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT0
 
 /* KVM_IRQ_LINE irq field index values */
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index fd5a276..b3f02ce 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -201,10 +201,22 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS  2
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xffULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+   (0xULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0x)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL  4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+   (0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT   0
 
 /* Device Control API on vcpu fd */
-- 
1.9.1

[Qemu-devel] [PATCH v9 4/5] target-arm: Add GICv3CPUState in CPUARMState struct

2017-02-23 Thread vijay . kilari

From: Vijaya Kumar K 

Add gicv3state void pointer to CPUARMState struct
to store GICv3CPUState.

In case of usecase like CPU reset, we need to reset
GICv3CPUState of the CPU. In such scenario, this pointer
becomes handy.

Signed-off-by: Vijaya Kumar K 
Reviewed-by: Peter Maydell 
---
 hw/intc/arm_gicv3_common.c | 2 ++
 hw/intc/arm_gicv3_cpuif.c  | 8 
 hw/intc/gicv3_internal.h   | 2 ++
 target/arm/cpu.h   | 2 ++
 4 files changed, 14 insertions(+)

diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 5b0e456..c6493d6 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -252,6 +252,8 @@ static void arm_gicv3_common_realize(DeviceState *dev, 
Error **errp)
 
 s->cpu[i].cpu = cpu;
 s->cpu[i].gic = s;
+/* Store GICv3CPUState in CPUARMState gicv3state pointer */
+gicv3_set_gicv3state(cpu, >cpu[i]);
 
 /* Pre-construct the GICR_TYPER:
  * For our implementation:
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index c25ee03..7849783 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -18,6 +18,14 @@
 #include "gicv3_internal.h"
 #include "cpu.h"
 
+void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s)
+{
+ARMCPU *arm_cpu = ARM_CPU(cpu);
+CPUARMState *env = _cpu->env;
+
+env->gicv3state = (void *)s;
+};
+
 static GICv3CPUState *icc_cs_from_env(CPUARMState *env)
 {
 /* Given the CPU, find the right GICv3CPUState struct.
diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
index 457118e..05303a5 100644
--- a/hw/intc/gicv3_internal.h
+++ b/hw/intc/gicv3_internal.h
@@ -408,4 +408,6 @@ static inline void 
gicv3_cache_all_target_cpustates(GICv3State *s)
 }
 }
 
+void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s);
+
 #endif /* QEMU_ARM_GICV3_INTERNAL_H */
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 0956a54..d2eb7bf 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -517,6 +517,8 @@ typedef struct CPUARMState {
 
 void *nvic;
 const struct arm_boot_info *boot_info;
+/* Store GICv3CPUState to access from this struct */
+void *gicv3state;
 } CPUARMState;
 
 /**
-- 
1.9.1

Re: [Qemu-devel] [PATCH v8 2/5] hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate

2017-02-22 Thread Vijay Kilari

Hi Christoffer,

On Mon, Feb 20, 2017 at 3:21 PM, Peter Maydell <peter.mayd...@linaro.org> wrote:
> On 20 February 2017 at 06:21, Vijay Kilari <vijay.kil...@gmail.com> wrote:
>> Hi Peter,
>>
>> On Fri, Feb 17, 2017 at 7:25 PM, Peter Maydell <peter.mayd...@linaro.org> 
>> wrote:
> [on the guest-visible ICC_SRE_EL1 value]
>>> Is there a situation where KVM might allow a value other
>>> than 0x7?
>>
>> In KVM, the SRE_EL1 value is 0x1. During save, value
>> read from KVM is 0x1 though we reset to 0x7.
>
> 0x1 meanss "System Register Interface enabled, IRQ
> bypass enabled, FIQ bypass enabled". This seems
> rather a weird setting, because it means "the GICv3
> CPU interface functionality is disabled and the GICv3
> should signal interrupts via legacy IRQ and FIQ".
> Does KVM really support IRQ/FIQ bypass and does Linux
> really leave it enabled rather than turning it off
> by writing the value to 1?
>
> My expectation was that the KVM GICv3 emulation would
> make these bits RAO/WI like the TCG implementation.
> Is there maybe a bug in the kernel side where it
> doesn't implement bypass but has made these bits be
> RAZ/WI rather than RAO/WI ?

Do you have any inputs on this?

Re: [Qemu-devel] [PATCH v8 2/5] hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate

2017-02-19 Thread Vijay Kilari

Hi Peter,

On Fri, Feb 17, 2017 at 7:25 PM, Peter Maydell  wrote:
> On 17 February 2017 at 06:31,   wrote:
>> From: Vijaya Kumar K 
>>
>> To Save and Restore ICC_SRE_EL1 register introduce vmstate
>> subsection and load only if non-zero.
>> Also initialize icc_sre_el1 with to 0x7 in pre_load
>> function.
>>
>> Signed-off-by: Vijaya Kumar K 
>> ---
>>  hw/intc/arm_gicv3_common.c | 32 
>>  include/hw/intc/arm_gicv3_common.h |  1 +
>>  2 files changed, 33 insertions(+)
>>
>> diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
>> index 16b9b0f..e62480e 100644
>> --- a/hw/intc/arm_gicv3_common.c
>> +++ b/hw/intc/arm_gicv3_common.c
>> @@ -70,6 +70,34 @@ static const VMStateDescription vmstate_gicv3_cpu_virt = {
>>  }
>>  };
>>
>> +static int icc_sre_el1_reg_pre_load(void *opaque)
>> +{
>> +GICv3CPUState *cs = opaque;
>> +
>> +/* By default enable SRE and disable IRQ & FIQ bypass. */
>> +cs->icc_sre_el1 = 0x7;
>
> Why do we need the pre_load function? I would have
> expected that reset would have given us these defaults
> already.
>
>> +return 0;
>> +}
>> +
>> +static bool icc_sre_el1_reg_needed(void *opaque)
>> +{
>> +GICv3CPUState *cs = opaque;
>> +
>> +return cs->icc_sre_el1 != 0;
>
> I expected this to say "we need to transfer the value if
> it isn't 0x7" (since the current situation of migration
> is "we assume that the value is 0x7").
>
> Something should probably fail inbound migration for TCG
> if the value isn't 0x7, for that matter.
>
> Is there a situation where KVM might allow a value other
> than 0x7?

In KVM, the SRE_EL1 value is 0x1. During save, value
read from KVM is 0x1 though we reset to 0x7.

>
>> +}
>> +
>> +const VMStateDescription vmstate_gicv3_cpu_sre_el1 = {
>> +.name = "arm_gicv3_cpu/sre_el1",
>> +.version_id = 1,
>> +.minimum_version_id = 1,
>> +.pre_load = icc_sre_el1_reg_pre_load,
>> +.needed = icc_sre_el1_reg_needed,
>> +.fields = (VMStateField[]) {
>> +VMSTATE_UINT64(icc_sre_el1, GICv3CPUState),
>> +VMSTATE_END_OF_LIST()
>> +}
>> +};
>> +
>>  static const VMStateDescription vmstate_gicv3_cpu = {
>>  .name = "arm_gicv3_cpu",
>>  .version_id = 1,
>> @@ -100,6 +128,10 @@ static const VMStateDescription vmstate_gicv3_cpu = {
>>  .subsections = (const VMStateDescription * []) {
>>  _gicv3_cpu_virt,
>>  NULL
>> +},
>> +.subsections = (const VMStateDescription * []) {
>> +_gicv3_cpu_sre_el1,
>> +NULL
>>  }
>>  };
>>
>> diff --git a/include/hw/intc/arm_gicv3_common.h 
>> b/include/hw/intc/arm_gicv3_common.h
>> index 4156051..bccdfe1 100644
>> --- a/include/hw/intc/arm_gicv3_common.h
>> +++ b/include/hw/intc/arm_gicv3_common.h
>> @@ -172,6 +172,7 @@ struct GICv3CPUState {
>>  uint8_t gicr_ipriorityr[GIC_INTERNAL];
>>
>>  /* CPU interface */
>> +uint64_t icc_sre_el1;
>>  uint64_t icc_ctlr_el1[2];
>>  uint64_t icc_pmr_el1;
>>  uint64_t icc_bpr[3];
>> --
>> 1.9.1
>
> thanks
> -- PMM

Re: [Qemu-devel] [PATCH v7 RESEND 2/5] hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate

2017-02-17 Thread Vijay Kilari

On Fri, Feb 17, 2017 at 2:30 PM, Auger Eric <eric.au...@redhat.com> wrote:
> Hi Vijaya,
>
> On 13/02/2017 13:17, Vijay Kilari wrote:
>> On Tue, Feb 7, 2017 at 8:09 PM, Peter Maydell <peter.mayd...@linaro.org> 
>> wrote:
>>> On 31 January 2017 at 16:22,  <vijay.kil...@gmail.com> wrote:
>>>> From: Vijaya Kumar K <vijaya.ku...@cavium.com>
>>>>
>>>> To Save and Restore ICC_SRE_EL1 register Add ICC_SRE_EL1 register
>>>> to vmstate and GICv3CPUState struct.
>>>>
>>>> Signed-off-by: Vijaya Kumar K <vijaya.ku...@cavium.com>
>>>> ---
>>>>  hw/intc/arm_gicv3_common.c | 1 +
>>>>  include/hw/intc/arm_gicv3_common.h | 1 +
>>>>  2 files changed, 2 insertions(+)
>>>>
>>>> diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
>>>> index 0f8c4b8..f3245d9 100644
>>>> --- a/hw/intc/arm_gicv3_common.c
>>>> +++ b/hw/intc/arm_gicv3_common.c
>>>> @@ -68,6 +68,7 @@ static const VMStateDescription vmstate_gicv3_cpu = {
>>>>  VMSTATE_UINT32(gicr_igrpmodr0, GICv3CPUState),
>>>>  VMSTATE_UINT32(gicr_nsacr, GICv3CPUState),
>>>>  VMSTATE_UINT8_ARRAY(gicr_ipriorityr, GICv3CPUState, GIC_INTERNAL),
>>>> +VMSTATE_UINT64(icc_sre_el1, GICv3CPUState),
>>>>  VMSTATE_UINT64_ARRAY(icc_ctlr_el1, GICv3CPUState, 2),
>>>>  VMSTATE_UINT64(icc_pmr_el1, GICv3CPUState),
>>>>  VMSTATE_UINT64_ARRAY(icc_bpr, GICv3CPUState, 3),
>>>> diff --git a/include/hw/intc/arm_gicv3_common.h 
>>>> b/include/hw/intc/arm_gicv3_common.h
>>>> index 341a311..183c7f8 100644
>>>> --- a/include/hw/intc/arm_gicv3_common.h
>>>> +++ b/include/hw/intc/arm_gicv3_common.h
>>>> @@ -166,6 +166,7 @@ struct GICv3CPUState {
>>>>  uint8_t gicr_ipriorityr[GIC_INTERNAL];
>>>>
>>>>  /* CPU interface */
>>>> +uint64_t icc_sre_el1;
>>>>  uint64_t icc_ctlr_el1[2];
>>>>  uint64_t icc_pmr_el1;
>>>>  uint64_t icc_bpr[3];
>>>
>>> This breaks migration compatibility for TCG using GICv3; you
>>> need to do something here with a VMState subsection so
>>> the new register is only transferred if it's non-zero.
>>
>> So, you mean to put a check in kvm_arm_gicv3_put() and
>> kvm_arm_gicv3_get() to check for non-zero value?
>> icc_sre_el1 is always non-zero reset to 0xf in TCG and 0x7 in KVM mode.
> In hw/intc/arm_gicv3_cpuif.c we have
> { .name = "ICC_SRE_EL1", .state = ARM_CP_STATE_BOTH,
> ../..
>   .resetvalue = 0x7,
> },
> where did you find the TCG reset value equal to 0xF? I am not able to
> find it.

Sorry, I have referred to ICC_SRE_EL2/3. 0x7 is correct

[Qemu-devel] [PATCH v8 3/5] hw/intc/arm_gicv3_kvm: Implement get/put functions

2017-02-16 Thread vijay . kilari

From: Vijaya Kumar K 

This actually implements pre_save and post_load methods for in-kernel
vGICv3.

Signed-off-by: Pavel Fedin 
Signed-off-by: Peter Maydell 
[PMM:
 * use decimal, not 0bnnn
 * fixed typo in names of ICC_APR0R_EL1 and ICC_AP1R_EL1
 * completely rearranged the get and put functions to read and write
   the state in a natural order, rather than mixing distributor and
   redistributor state together]
Signed-off-by: Vijaya Kumar K 
[Vijay:
 * Update macro KVM_VGIC_ATTR
 * Use 32 bit access for gicd and gicr
 * GICD_IROUTER, GICD_TYPER, GICR_PROPBASER and GICR_PENDBASER reg
   access  are changed from 64-bit to 32-bit access
 * Add ICC_SRE_EL1 save and restore
 * Dropped translate_fn mechanism and coded functions to handle
   save and restore of edge_trigger and priority
 * Number of APnR register saved/restored based on number of
   priority bits supported]
Reviewed-by: Peter Maydell 
---
---
 hw/intc/arm_gicv3_kvm.c  | 573 +--
 hw/intc/gicv3_internal.h |   1 +
 2 files changed, 558 insertions(+), 16 deletions(-)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index d69dc47..cda1af4 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -23,8 +23,10 @@
 #include "qapi/error.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "hw/sysbus.h"
+#include "qemu/error-report.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
+#include "gicv3_internal.h"
 #include "vgic_common.h"
 #include "migration/migration.h"
 
@@ -44,6 +46,32 @@
 #define KVM_ARM_GICV3_GET_CLASS(obj) \
  OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3)
 
+#define   KVM_DEV_ARM_VGIC_SYSREG(op0, op1, crn, crm, op2) \
+ (ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+  ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+  ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+  ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+  ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ICC_PMR_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 4, 6, 0)
+#define ICC_BPR0_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 3)
+#define ICC_AP0R_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 4 | n)
+#define ICC_AP1R_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 9, n)
+#define ICC_BPR1_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 3)
+#define ICC_CTLR_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 4)
+#define ICC_SRE_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 5)
+#define ICC_IGRPEN0_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 6)
+#define ICC_IGRPEN1_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 7)
+
 typedef struct KVMARMGICv3Class {
 ARMGICv3CommonClass parent_class;
 DeviceRealize parent_realize;
@@ -57,16 +85,523 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, 
int level)
 kvm_arm_gic_set_irq(s->num_irq, irq, level);
 }
 
+#define KVM_VGIC_ATTR(reg, typer) \
+((typer & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) | (reg))
+
+static inline void kvm_gicd_access(GICv3State *s, int offset,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+  KVM_VGIC_ATTR(offset, 0),
+  val, write);
+}
+
+static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+  KVM_VGIC_ATTR(offset, s->cpu[cpu].gicr_typer),
+  val, write);
+}
+
+static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu,
+   uint64_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+  KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer),
+  val, write);
+}
+
+static inline void kvm_gic_line_level_access(GICv3State *s, int irq, int cpu,
+ uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+  KVM_VGIC_ATTR(irq, s->cpu[cpu].gicr_typer) |
+  (VGIC_LEVEL_INFO_LINE_LEVEL <<
+   KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT),
+  val, write);
+}
+
+/* Loop through each distributor IRQ related register; since bits
+ * corresponding to SPIs and PPIs are RAZ/WI when affinity routing
+ * is enabled, we skip those.
+ */
+#define for_each_dist_irq_reg(_irq, _max, _field_width) \
+for (_irq = GIC_INTERNAL; _irq < _max; _irq += (32 / _field_width))
+
+static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
+{
+uint32_t reg, *field;
+int irq;
+
+

[Qemu-devel] [PATCH v8 4/5] target-arm: Add GICv3CPUState in CPUARMState struct

2017-02-16 Thread vijay . kilari

From: Vijaya Kumar K 

Add gicv3state void pointer to CPUARMState struct
to store GICv3CPUState.

In case of usecase like CPU reset, we need to reset
GICv3CPUState of the CPU. In such scenario, this pointer
becomes handy.

This patch take care of only GICv3.

Signed-off-by: Vijaya Kumar K 
---
 hw/intc/arm_gicv3_common.c | 2 ++
 hw/intc/arm_gicv3_cpuif.c  | 8 
 hw/intc/gicv3_internal.h   | 2 ++
 target/arm/cpu.h   | 2 ++
 4 files changed, 14 insertions(+)

diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index e62480e..79a5bd9 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -248,6 +248,8 @@ static void arm_gicv3_common_realize(DeviceState *dev, 
Error **errp)
 
 s->cpu[i].cpu = cpu;
 s->cpu[i].gic = s;
+/* Store GICv3CPUState in CPUARMState gicv3state pointer */
+gicv3_set_gicv3state(cpu, >cpu[i]);
 
 /* Pre-construct the GICR_TYPER:
  * For our implementation:
diff --git a/hw/intc/arm_gicv3_cpuif.c b/hw/intc/arm_gicv3_cpuif.c
index c25ee03..7849783 100644
--- a/hw/intc/arm_gicv3_cpuif.c
+++ b/hw/intc/arm_gicv3_cpuif.c
@@ -18,6 +18,14 @@
 #include "gicv3_internal.h"
 #include "cpu.h"
 
+void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s)
+{
+ARMCPU *arm_cpu = ARM_CPU(cpu);
+CPUARMState *env = _cpu->env;
+
+env->gicv3state = (void *)s;
+};
+
 static GICv3CPUState *icc_cs_from_env(CPUARMState *env)
 {
 /* Given the CPU, find the right GICv3CPUState struct.
diff --git a/hw/intc/gicv3_internal.h b/hw/intc/gicv3_internal.h
index 457118e..05303a5 100644
--- a/hw/intc/gicv3_internal.h
+++ b/hw/intc/gicv3_internal.h
@@ -408,4 +408,6 @@ static inline void 
gicv3_cache_all_target_cpustates(GICv3State *s)
 }
 }
 
+void gicv3_set_gicv3state(CPUState *cpu, GICv3CPUState *s);
+
 #endif /* QEMU_ARM_GICV3_INTERNAL_H */
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 0956a54..d2eb7bf 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -517,6 +517,8 @@ typedef struct CPUARMState {
 
 void *nvic;
 const struct arm_boot_info *boot_info;
+/* Store GICv3CPUState to access from this struct */
+void *gicv3state;
 } CPUARMState;
 
 /**
-- 
1.9.1

[Qemu-devel] [PATCH v8 1/5] kernel: Add definitions for GICv3 attributes

2017-02-16 Thread vijay . kilari

From: Vijaya Kumar K 

This temporary patch adds kernel API definitions.
Use proper header update procedure after these features
are released.

Signed-off-by: Pavel Fedin 
Signed-off-by: Vijaya Kumar K 
---
 linux-headers/asm-arm/kvm.h   | 12 
 linux-headers/asm-arm64/kvm.h | 12 
 2 files changed, 24 insertions(+)

diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h
index 2fb7859..1798c93 100644
--- a/linux-headers/asm-arm/kvm.h
+++ b/linux-headers/asm-arm/kvm.h
@@ -179,10 +179,22 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS  2
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xffULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+   (0xULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0x)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL   4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+   (0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT0
 
 /* KVM_IRQ_LINE irq field index values */
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index fd5a276..b3f02ce 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -201,10 +201,22 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS  2
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xffULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+   (0xULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0x)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL  4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+   (0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT   0
 
 /* Device Control API on vcpu fd */
-- 
1.9.1

[Qemu-devel] [PATCH v8 5/5] hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

2017-02-16 Thread vijay . kilari

From: Vijaya Kumar K 

Reset CPU interface registers of GICv3 when CPU is reset.
For this, ARMCPRegInfo struct is registered with one ICC
register whose resetfn is called when cpu is reset.

All the ICC registers are reset under one single register
reset function instead of calling resetfn for each ICC
register.

Signed-off-by: Vijaya Kumar K 
---
 hw/intc/arm_gicv3_kvm.c | 58 +
 1 file changed, 58 insertions(+)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index cda1af4..6377dc3 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -604,6 +604,34 @@ static void kvm_arm_gicv3_get(GICv3State *s)
 }
 }
 
+static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ARMCPU *cpu;
+GICv3State *s;
+GICv3CPUState *c;
+
+c = (GICv3CPUState *)env->gicv3state;
+assert(!(!c || !c->cpu || !c->gic));
+
+s = c->gic;
+cpu = ARM_CPU(c->cpu);
+
+/* Initialize to actual HW supported configuration */
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+  KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity),
+  >icc_ctlr_el1[GICV3_NS], false);
+
+c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
+c->icc_pmr_el1 = 0;
+c->icc_bpr[GICV3_G0] = GIC_MIN_BPR;
+c->icc_bpr[GICV3_G1] = GIC_MIN_BPR;
+c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR;
+
+c->icc_sre_el1 = 0x7;
+memset(c->icc_apr, 0, sizeof(c->icc_apr));
+memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
+}
+
 static void kvm_arm_gicv3_reset(DeviceState *dev)
 {
 GICv3State *s = ARM_GICV3_COMMON(dev);
@@ -621,6 +649,30 @@ static void kvm_arm_gicv3_reset(DeviceState *dev)
 kvm_arm_gicv3_put(s);
 }
 
+/*
+ * CPU interface registers of GIC needs to be reset on CPU reset.
+ * For the calling arm_gicv3_icc_reset() on CPU reset, we register
+ * below ARMCPRegInfo. As we reset the whole cpu interface under single
+ * register reset, we define only one register of CPU interface instead
+ * of defining all the registers.
+ */
+static const ARMCPRegInfo gicv3_cpuif_reginfo[] = {
+{ .name = "ICC_CTLR_EL1", .state = ARM_CP_STATE_BOTH,
+  .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 12, .opc2 = 4,
+  .type = ARM_CP_NO_RAW,
+  .access = PL1_RW,
+  .readfn = arm_cp_read_zero,
+  .writefn = arm_cp_write_ignore,
+  /*
+   * We hang the whole cpu interface reset routine off here
+   * rather than parcelling it out into one little function
+   * per register
+   */
+  .resetfn = arm_gicv3_icc_reset,
+},
+REGINFO_SENTINEL
+};
+
 static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
 {
 GICv3State *s = KVM_ARM_GICV3(dev);
@@ -644,6 +696,12 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error 
**errp)
 
 gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL);
 
+for (i = 0; i < s->num_cpu; i++) {
+ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i));
+
+define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
+}
+
 /* Try to create the device via the device control API */
 s->dev_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_ARM_VGIC_V3, false);
 if (s->dev_fd < 0) {
-- 
1.9.1

[Qemu-devel] [PATCH v8 2/5] hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate

2017-02-16 Thread vijay . kilari

From: Vijaya Kumar K 

To Save and Restore ICC_SRE_EL1 register introduce vmstate
subsection and load only if non-zero.
Also initialize icc_sre_el1 with to 0x7 in pre_load
function.

Signed-off-by: Vijaya Kumar K 
---
 hw/intc/arm_gicv3_common.c | 32 
 include/hw/intc/arm_gicv3_common.h |  1 +
 2 files changed, 33 insertions(+)

diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 16b9b0f..e62480e 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -70,6 +70,34 @@ static const VMStateDescription vmstate_gicv3_cpu_virt = {
 }
 };
 
+static int icc_sre_el1_reg_pre_load(void *opaque)
+{
+GICv3CPUState *cs = opaque;
+
+/* By default enable SRE and disable IRQ & FIQ bypass. */
+cs->icc_sre_el1 = 0x7;
+return 0;
+}
+
+static bool icc_sre_el1_reg_needed(void *opaque)
+{
+GICv3CPUState *cs = opaque;
+
+return cs->icc_sre_el1 != 0;
+}
+
+const VMStateDescription vmstate_gicv3_cpu_sre_el1 = {
+.name = "arm_gicv3_cpu/sre_el1",
+.version_id = 1,
+.minimum_version_id = 1,
+.pre_load = icc_sre_el1_reg_pre_load,
+.needed = icc_sre_el1_reg_needed,
+.fields = (VMStateField[]) {
+VMSTATE_UINT64(icc_sre_el1, GICv3CPUState),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static const VMStateDescription vmstate_gicv3_cpu = {
 .name = "arm_gicv3_cpu",
 .version_id = 1,
@@ -100,6 +128,10 @@ static const VMStateDescription vmstate_gicv3_cpu = {
 .subsections = (const VMStateDescription * []) {
 _gicv3_cpu_virt,
 NULL
+},
+.subsections = (const VMStateDescription * []) {
+_gicv3_cpu_sre_el1,
+NULL
 }
 };
 
diff --git a/include/hw/intc/arm_gicv3_common.h 
b/include/hw/intc/arm_gicv3_common.h
index 4156051..bccdfe1 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -172,6 +172,7 @@ struct GICv3CPUState {
 uint8_t gicr_ipriorityr[GIC_INTERNAL];
 
 /* CPU interface */
+uint64_t icc_sre_el1;
 uint64_t icc_ctlr_el1[2];
 uint64_t icc_pmr_el1;
 uint64_t icc_bpr[3];
-- 
1.9.1

[Qemu-devel] [PATCH v8 0/5] GICv3 live migration support

2017-02-16 Thread vijay . kilari

From: Vijaya Kumar K 

This series introduces support for GICv3 live migration with
new VGIC implementation in 4.7-rc3 kernel.
In this series, patch 1 of the previous implementation
are ported.
https://lists.nongnu.org/archive/html/qemu-devel/2015-10/msg05284.html

Patch 2, is based on below implementation.
http://patchwork.ozlabs.org/patch/626746/

Latest kernel patches
https://www.spinics.net/lists/arm-kernel/msg558046.html

This API definition is as per version of VGICv3 specification
in linux kernel Documentation/virtual/kvm/devices/arm-vgic-v3.txt

Tested Live migration of Idle VM running with 4 VCPUs and 8GB RAM.

v7 => v8:
 - Introduced vmstate subsection to add icc_ctrl_el1 register to
   VMStateDescription
 - Introduced new function gicv3_set_gicv3state() in arm_gicv3_cpuif.c
   to update gicv3state variable in CPUARMState struct.
 - Used arm_cp_read_zero & arm_cp_write_ignore for ARMCPRegInfo[].

v6 => v7:
 - Rebased on top of v2.8.0-rc4 release.
 - Added patch to add icc_ctrl_el1 to vmstruct before live migration
   patch.
 - Added patch to add gicv3state variable to CPUARMState struct to
   store GICv3CPUState pointer.
 - Added patch to register ARMCPRegInfo[] struct and reset on CPU reset.

v5 => v6:
 - Added separate patch for Reseting ICC* register
 - Added seperate patch for save and restore of ICC_CTLR_EL1
 - Dropped translate_fn mechanism and coded open functions
   for edge_trigger and priority save and restore.
 - Save and Restore APnR registers based on ICC_CTLR_EL1.PRIBITS

v4 => v5:
 - Initialized ICC registers before reset.

v3 => v4:
 - Reintroduced offset GICR_SGI_OFFSET
 - Implement save and restore of ICC_SRE_EL1
 - Updated kvm.h header file in sync with KVM v4 patches

v2 => v3:
 - Dropped offset GICR_SGI_OFFSET
 - Implement save/restore of irq line level using
   KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
 - Fixed bug with save/restore of edge_trigger
Vijaya Kumar K (5):
  kernel: Add definitions for GICv3 attributes
  hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate
  hw/intc/arm_gicv3_kvm: Implement get/put functions
  target-arm: Add GICv3CPUState in CPUARMState struct
  hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

 hw/intc/arm_gicv3_common.c |  34 ++
 hw/intc/arm_gicv3_cpuif.c  |   8 +
 hw/intc/arm_gicv3_kvm.c| 627 -
 hw/intc/gicv3_internal.h   |   3 +
 include/hw/intc/arm_gicv3_common.h |   1 +
 linux-headers/asm-arm/kvm.h|  12 +
 linux-headers/asm-arm64/kvm.h  |  12 +
 target/arm/cpu.h   |   2 +
 8 files changed, 685 insertions(+), 14 deletions(-)

-- 
1.9.1

Re: [Qemu-devel] [PATCH v7 RESEND 5/5] hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

2017-02-16 Thread Vijay Kilari

On Thu, Feb 16, 2017 at 3:39 PM, Peter Maydell <peter.mayd...@linaro.org> wrote:
> On 16 February 2017 at 09:54, Vijay Kilari <vijay.kil...@gmail.com> wrote:
>> On Tue, Feb 7, 2017 at 8:19 PM, Peter Maydell <peter.mayd...@linaro.org> 
>> wrote:
>>> If you want to do nothing in a read/write function there are
>>> already arm_cp_read_zero and arm_cp_write_ignore functions for
>>> this. But using the ARM_CP_NOP flag is better still.
>>
>> With ARM_CP_NOP qemu fails to boot.
>> qemu-system-aarch64: KVM_SET_DEVICE_ATTR failed: Invalid argument
>> Group 6 attr 0xc665
>
> Not clear to me why using ARM_CP_NOP should result in a KVM
> call failure -- can you dig further into why that is happening?

if ARM_CP_NOP is set, .reset function is not called and there by KVM fails
when SRE_EL1 is written with 0x0.

>
> thanks
> -- PMM

Re: [Qemu-devel] [PATCH v7 RESEND 5/5] hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

2017-02-16 Thread Vijay Kilari

On Tue, Feb 7, 2017 at 8:19 PM, Peter Maydell  wrote:
> On 31 January 2017 at 16:23,   wrote:
>> From: Vijaya Kumar K 
>>
>> Reset CPU interface registers of GICv3 when CPU is reset.
>> For this, ARMCPRegInfo struct is registered with one ICC
>> register whose resetfn is called when cpu is reset.
>>
>> All the ICC registers are reset under one single register
>> reset function instead of calling resetfn for each ICC
>> register.
>>
>> Signed-off-by: Vijaya Kumar K 
>> ---
>>  hw/intc/arm_gicv3_kvm.c | 69 
>> +
>>  1 file changed, 69 insertions(+)
>>
>> diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
>> index f91e0ac..c3f38aa 100644
>> --- a/hw/intc/arm_gicv3_kvm.c
>> +++ b/hw/intc/arm_gicv3_kvm.c
>> @@ -604,6 +604,39 @@ static void kvm_arm_gicv3_get(GICv3State *s)
>>  }
>>  }
>>
>> +static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
>> +{
>> +ARMCPU *cpu;
>> +GICv3State *s;
>> +GICv3CPUState *c;
>> +
>> +c = (GICv3CPUState *)env->gicv3state;
>> +if (!c || !c->cpu || !c->gic) {
>
> We should assert this kind of thing, not just silently do nothing.
> Or just assume it's true, because if it's not then we'll segfault
> immediately below which is just as clear an indication of where
> the bug is as asserting.

OK
>
>> +return;
>> +}
>> +
>> +s = c->gic;
>> +if (!s) {
>
> You've already checked this once.
>
>> +return;
>> +}
>> +
>> +cpu = ARM_CPU(c->cpu);
>> +/* Initialize to actual HW supported configuration */
>> +kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
>> +  KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity),
>> +  >icc_ctlr_el1[GICV3_NS], false);
>> +
>> +c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
>> +c->icc_pmr_el1 = 0;
>> +c->icc_bpr[GICV3_G0] = GIC_MIN_BPR;
>> +c->icc_bpr[GICV3_G1] = GIC_MIN_BPR;
>> +c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR;
>> +
>> +c->icc_sre_el1 = 0x7;
>> +memset(c->icc_apr, 0, sizeof(c->icc_apr));
>> +memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
>> +}
>> +
>>  static void kvm_arm_gicv3_reset(DeviceState *dev)
>>  {
>>  GICv3State *s = ARM_GICV3_COMMON(dev);
>> @@ -621,6 +654,41 @@ static void kvm_arm_gicv3_reset(DeviceState *dev)
>>  kvm_arm_gicv3_put(s);
>>  }
>>
>> +static uint64_t icc_cp_reg_read(CPUARMState *env, const ARMCPRegInfo *ri)
>> +{
>> +return 0;
>> +}
>> +
>> +static void icc_cp_reg_write(CPUARMState *env, const ARMCPRegInfo *ri,
>> +  uint64_t value)
>> +{
>> +return;
>> +}
>
> If you want to do nothing in a read/write function there are
> already arm_cp_read_zero and arm_cp_write_ignore functions for
> this. But using the ARM_CP_NOP flag is better still.

With ARM_CP_NOP qemu fails to boot.
qemu-system-aarch64: KVM_SET_DEVICE_ATTR failed: Invalid argument
Group 6 attr 0xc665

>
>> +
>> +/*
>> + * CPU interface registers of GIC needs to be reset on CPU reset.
>> + * For the calling arm_gicv3_icc_reset() on CPU reset, we register
>> + * below ARMCPRegInfo. As we reset the whole cpu interface under single
>> + * register reset, we define only one register of CPU interface instead
>> + * of defining all the registers.
>> + */
>> +static const ARMCPRegInfo gicv3_cpuif_reginfo[] = {
>> +{ .name = "ICC_CTLR_EL1", .state = ARM_CP_STATE_BOTH,
>> +  .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 12, .opc2 = 4,
>> +  .type = ARM_CP_NO_RAW,
>> +  .access = PL1_RW,
>> +  .readfn = icc_cp_reg_read,
>> +  .writefn = icc_cp_reg_write,
>> +  /*
>> +   * We hang the whole cpu interface reset routine off here
>> +   * rather than parcelling it out into one little function
>> +   * per register
>> +   */
>> +  .resetfn = arm_gicv3_icc_reset,
>> +},
>> +REGINFO_SENTINEL
>> +};
>> +
>>  static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
>>  {
>>  GICv3State *s = KVM_ARM_GICV3(dev);
>> @@ -650,6 +718,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, 
>> Error **errp)
>>
>>  /* Store GICv3CPUState in CPUARMState gicv3state pointer */
>>  env->gicv3state = (void *)>cpu[i];
>> +define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
>>  }
>>
>>  /* Try to create the device via the device control API */
>> --
>> 1.9.1
>>
>
> thanks
> -- PMM

Re: [Qemu-devel] [PATCH v7 RESEND 2/5] hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate

2017-02-13 Thread Vijay Kilari

On Tue, Feb 7, 2017 at 8:09 PM, Peter Maydell  wrote:
> On 31 January 2017 at 16:22,   wrote:
>> From: Vijaya Kumar K 
>>
>> To Save and Restore ICC_SRE_EL1 register Add ICC_SRE_EL1 register
>> to vmstate and GICv3CPUState struct.
>>
>> Signed-off-by: Vijaya Kumar K 
>> ---
>>  hw/intc/arm_gicv3_common.c | 1 +
>>  include/hw/intc/arm_gicv3_common.h | 1 +
>>  2 files changed, 2 insertions(+)
>>
>> diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
>> index 0f8c4b8..f3245d9 100644
>> --- a/hw/intc/arm_gicv3_common.c
>> +++ b/hw/intc/arm_gicv3_common.c
>> @@ -68,6 +68,7 @@ static const VMStateDescription vmstate_gicv3_cpu = {
>>  VMSTATE_UINT32(gicr_igrpmodr0, GICv3CPUState),
>>  VMSTATE_UINT32(gicr_nsacr, GICv3CPUState),
>>  VMSTATE_UINT8_ARRAY(gicr_ipriorityr, GICv3CPUState, GIC_INTERNAL),
>> +VMSTATE_UINT64(icc_sre_el1, GICv3CPUState),
>>  VMSTATE_UINT64_ARRAY(icc_ctlr_el1, GICv3CPUState, 2),
>>  VMSTATE_UINT64(icc_pmr_el1, GICv3CPUState),
>>  VMSTATE_UINT64_ARRAY(icc_bpr, GICv3CPUState, 3),
>> diff --git a/include/hw/intc/arm_gicv3_common.h 
>> b/include/hw/intc/arm_gicv3_common.h
>> index 341a311..183c7f8 100644
>> --- a/include/hw/intc/arm_gicv3_common.h
>> +++ b/include/hw/intc/arm_gicv3_common.h
>> @@ -166,6 +166,7 @@ struct GICv3CPUState {
>>  uint8_t gicr_ipriorityr[GIC_INTERNAL];
>>
>>  /* CPU interface */
>> +uint64_t icc_sre_el1;
>>  uint64_t icc_ctlr_el1[2];
>>  uint64_t icc_pmr_el1;
>>  uint64_t icc_bpr[3];
>
> This breaks migration compatibility for TCG using GICv3; you
> need to do something here with a VMState subsection so
> the new register is only transferred if it's non-zero.

So, you mean to put a check in kvm_arm_gicv3_put() and
kvm_arm_gicv3_get() to check for non-zero value?
icc_sre_el1 is always non-zero reset to 0xf in TCG and 0x7 in KVM mode.

>
> thanks
> -- PMM

[Qemu-devel] [PATCH v7 RESEND 4/5] target-arm: Add GICv3CPUState in CPUARMState struct

2017-01-31 Thread vijay . kilari

From: Vijaya Kumar K 

Add gicv3state void pointer to CPUARMState struct
to store GICv3CPUState.

In case of usecase like CPU reset, we need to reset
GICv3CPUState of the CPU. In such scenario, this pointer
becomes handy.

This patch take care of only GICv3.

Signed-off-by: Vijaya Kumar K 
---
 hw/intc/arm_gicv3_kvm.c | 8 
 target-arm/cpu.h| 2 ++
 2 files changed, 10 insertions(+)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 77af32d..f91e0ac 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -644,6 +644,14 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error 
**errp)
 
 gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL);
 
+for (i = 0; i < s->num_cpu; i++) {
+ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i));
+CPUARMState *env = >env;
+
+/* Store GICv3CPUState in CPUARMState gicv3state pointer */
+env->gicv3state = (void *)>cpu[i];
+}
+
 /* Try to create the device via the device control API */
 s->dev_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_ARM_VGIC_V3, false);
 if (s->dev_fd < 0) {
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index ca5c849..b1ca064 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -507,6 +507,8 @@ typedef struct CPUARMState {
 
 void *nvic;
 const struct arm_boot_info *boot_info;
+/* Store GICv3CPUState to access from this struct */
+void *gicv3state;
 } CPUARMState;
 
 /**
-- 
1.9.1

[Qemu-devel] [PATCH v7 RESEND 2/5] hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate

2017-01-31 Thread vijay . kilari

From: Vijaya Kumar K 

To Save and Restore ICC_SRE_EL1 register Add ICC_SRE_EL1 register
to vmstate and GICv3CPUState struct.

Signed-off-by: Vijaya Kumar K 
---
 hw/intc/arm_gicv3_common.c | 1 +
 include/hw/intc/arm_gicv3_common.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 0f8c4b8..f3245d9 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -68,6 +68,7 @@ static const VMStateDescription vmstate_gicv3_cpu = {
 VMSTATE_UINT32(gicr_igrpmodr0, GICv3CPUState),
 VMSTATE_UINT32(gicr_nsacr, GICv3CPUState),
 VMSTATE_UINT8_ARRAY(gicr_ipriorityr, GICv3CPUState, GIC_INTERNAL),
+VMSTATE_UINT64(icc_sre_el1, GICv3CPUState),
 VMSTATE_UINT64_ARRAY(icc_ctlr_el1, GICv3CPUState, 2),
 VMSTATE_UINT64(icc_pmr_el1, GICv3CPUState),
 VMSTATE_UINT64_ARRAY(icc_bpr, GICv3CPUState, 3),
diff --git a/include/hw/intc/arm_gicv3_common.h 
b/include/hw/intc/arm_gicv3_common.h
index 341a311..183c7f8 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -166,6 +166,7 @@ struct GICv3CPUState {
 uint8_t gicr_ipriorityr[GIC_INTERNAL];
 
 /* CPU interface */
+uint64_t icc_sre_el1;
 uint64_t icc_ctlr_el1[2];
 uint64_t icc_pmr_el1;
 uint64_t icc_bpr[3];
-- 
1.9.1

[Qemu-devel] [PATCH v7 RESEND 5/5] hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

2017-01-31 Thread vijay . kilari

From: Vijaya Kumar K 

Reset CPU interface registers of GICv3 when CPU is reset.
For this, ARMCPRegInfo struct is registered with one ICC
register whose resetfn is called when cpu is reset.

All the ICC registers are reset under one single register
reset function instead of calling resetfn for each ICC
register.

Signed-off-by: Vijaya Kumar K 
---
 hw/intc/arm_gicv3_kvm.c | 69 +
 1 file changed, 69 insertions(+)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index f91e0ac..c3f38aa 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -604,6 +604,39 @@ static void kvm_arm_gicv3_get(GICv3State *s)
 }
 }
 
+static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ARMCPU *cpu;
+GICv3State *s;
+GICv3CPUState *c;
+
+c = (GICv3CPUState *)env->gicv3state;
+if (!c || !c->cpu || !c->gic) {
+return;
+}
+
+s = c->gic;
+if (!s) {
+return;
+}
+
+cpu = ARM_CPU(c->cpu);
+/* Initialize to actual HW supported configuration */
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+  KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity),
+  >icc_ctlr_el1[GICV3_NS], false);
+
+c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
+c->icc_pmr_el1 = 0;
+c->icc_bpr[GICV3_G0] = GIC_MIN_BPR;
+c->icc_bpr[GICV3_G1] = GIC_MIN_BPR;
+c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR;
+
+c->icc_sre_el1 = 0x7;
+memset(c->icc_apr, 0, sizeof(c->icc_apr));
+memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
+}
+
 static void kvm_arm_gicv3_reset(DeviceState *dev)
 {
 GICv3State *s = ARM_GICV3_COMMON(dev);
@@ -621,6 +654,41 @@ static void kvm_arm_gicv3_reset(DeviceState *dev)
 kvm_arm_gicv3_put(s);
 }
 
+static uint64_t icc_cp_reg_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+return 0;
+}
+
+static void icc_cp_reg_write(CPUARMState *env, const ARMCPRegInfo *ri,
+  uint64_t value)
+{
+return;
+}
+
+/*
+ * CPU interface registers of GIC needs to be reset on CPU reset.
+ * For the calling arm_gicv3_icc_reset() on CPU reset, we register
+ * below ARMCPRegInfo. As we reset the whole cpu interface under single
+ * register reset, we define only one register of CPU interface instead
+ * of defining all the registers.
+ */
+static const ARMCPRegInfo gicv3_cpuif_reginfo[] = {
+{ .name = "ICC_CTLR_EL1", .state = ARM_CP_STATE_BOTH,
+  .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 12, .opc2 = 4,
+  .type = ARM_CP_NO_RAW,
+  .access = PL1_RW,
+  .readfn = icc_cp_reg_read,
+  .writefn = icc_cp_reg_write,
+  /*
+   * We hang the whole cpu interface reset routine off here
+   * rather than parcelling it out into one little function
+   * per register
+   */
+  .resetfn = arm_gicv3_icc_reset,
+},
+REGINFO_SENTINEL
+};
+
 static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
 {
 GICv3State *s = KVM_ARM_GICV3(dev);
@@ -650,6 +718,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error 
**errp)
 
 /* Store GICv3CPUState in CPUARMState gicv3state pointer */
 env->gicv3state = (void *)>cpu[i];
+define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
 }
 
 /* Try to create the device via the device control API */
-- 
1.9.1

[Qemu-devel] [PATCH v7 RESEND 3/5] hw/intc/arm_gicv3_kvm: Implement get/put functions

2017-01-31 Thread vijay . kilari

From: Vijaya Kumar K 

This actually implements pre_save and post_load methods for in-kernel
vGICv3.

Signed-off-by: Pavel Fedin 
Signed-off-by: Peter Maydell 
[PMM:
 * use decimal, not 0bnnn
 * fixed typo in names of ICC_APR0R_EL1 and ICC_AP1R_EL1
 * completely rearranged the get and put functions to read and write
   the state in a natural order, rather than mixing distributor and
   redistributor state together]
Signed-off-by: Vijaya Kumar K 
[Vijay:
 * Update macro KVM_VGIC_ATTR
 * Use 32 bit access for gicd and gicr
 * GICD_IROUTER, GICD_TYPER, GICR_PROPBASER and GICR_PENDBASER reg
   access  are changed from 64-bit to 32-bit access
 * Add ICC_SRE_EL1 save and restore
 * Dropped translate_fn mechanism and coded functions to handle
   save and restore of edge_trigger and priority
 * Number of APnR register saved/restored based on number of
   priority bits supported]
Reviewed-by: Peter Maydell 
---
---
 hw/intc/arm_gicv3_kvm.c  | 563 ++-
 hw/intc/gicv3_internal.h |   1 +
 2 files changed, 553 insertions(+), 11 deletions(-)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 199a439..77af32d 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -23,8 +23,10 @@
 #include "qapi/error.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "hw/sysbus.h"
+#include "qemu/error-report.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
+#include "gicv3_internal.h"
 #include "vgic_common.h"
 #include "migration/migration.h"
 
@@ -44,6 +46,32 @@
 #define KVM_ARM_GICV3_GET_CLASS(obj) \
  OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3)
 
+#define   KVM_DEV_ARM_VGIC_SYSREG(op0, op1, crn, crm, op2) \
+ (ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+  ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+  ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+  ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+  ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ICC_PMR_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 4, 6, 0)
+#define ICC_BPR0_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 3)
+#define ICC_AP0R_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 4 | n)
+#define ICC_AP1R_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 9, n)
+#define ICC_BPR1_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 3)
+#define ICC_CTLR_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 4)
+#define ICC_SRE_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 5)
+#define ICC_IGRPEN0_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 6)
+#define ICC_IGRPEN1_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 7)
+
 typedef struct KVMARMGICv3Class {
 ARMGICv3CommonClass parent_class;
 DeviceRealize parent_realize;
@@ -57,16 +85,523 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, 
int level)
 kvm_arm_gic_set_irq(s->num_irq, irq, level);
 }
 
+#define KVM_VGIC_ATTR(reg, typer) \
+((typer & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) | (reg))
+
+static inline void kvm_gicd_access(GICv3State *s, int offset,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+  KVM_VGIC_ATTR(offset, 0),
+  val, write);
+}
+
+static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+  KVM_VGIC_ATTR(offset, s->cpu[cpu].gicr_typer),
+  val, write);
+}
+
+static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu,
+   uint64_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+  KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer),
+  val, write);
+}
+
+static inline void kvm_gic_line_level_access(GICv3State *s, int irq, int cpu,
+ uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+  KVM_VGIC_ATTR(irq, s->cpu[cpu].gicr_typer) |
+  (VGIC_LEVEL_INFO_LINE_LEVEL <<
+   KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT),
+  val, write);
+}
+
+/* Loop through each distributor IRQ related register; since bits
+ * corresponding to SPIs and PPIs are RAZ/WI when affinity routing
+ * is enabled, we skip those.
+ */
+#define for_each_dist_irq_reg(_irq, _max, _field_width) \
+for (_irq = GIC_INTERNAL; _irq < _max; _irq += (32 / _field_width))
+
+static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
+{
+uint32_t reg, *field;
+int irq;
+
+

[Qemu-devel] [PATCH v7 RESEND 0/5] GICv3 live migration support

2017-01-31 Thread vijay . kilari

From: Vijaya Kumar K 

This series introduces support for GICv3 live migration with
new VGIC implementation in 4.7-rc3 kernel.
In this series, patch 1 of the previous implementation
are ported.
https://lists.nongnu.org/archive/html/qemu-devel/2015-10/msg05284.html

Patch 2, is based on below implementation.
http://patchwork.ozlabs.org/patch/626746/

Latest kernel patches
https://www.spinics.net/lists/arm-kernel/msg558046.html

This API definition is as per version of VGICv3 specification
in linux kernel Documentation/virtual/kvm/devices/arm-vgic-v3.txt

Tested Live migration of Idle VM running with 4 VCPUs and 8GB RAM.

v6 => v7:
 - Rebased on top of v2.8.0-rc4 release.
 - Added patch to add icc_ctrl_el1 to vmstruct before live migration
   patch.
 - Added patch to add gicv3state variable to CPUARMState struct to
   store GICv3CPUState pointer.
 - Added patch to register ARMCPRegInfo[] struct and reset on CPU reset.

v5 => v6:
 - Added separate patch for Reseting ICC* register
 - Added seperate patch for save and restore of ICC_CTLR_EL1
 - Dropped translate_fn mechanism and coded open functions
   for edge_trigger and priority save and restore.
 - Save and Restore APnR registers based on ICC_CTLR_EL1.PRIBITS

v4 => v5:
 - Initialized ICC registers before reset.

v3 => v4:
 - Reintroduced offset GICR_SGI_OFFSET
 - Implement save and restore of ICC_SRE_EL1
 - Updated kvm.h header file in sync with KVM v4 patches

v2 => v3:
 - Dropped offset GICR_SGI_OFFSET
 - Implement save/restore of irq line level using
   KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
 - Fixed bug with save/restore of edge_trigger

Vijaya Kumar K (5):
  kernel: Add definitions for GICv3 attributes
  hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate
  hw/intc/arm_gicv3_kvm: Implement get/put functions
  target-arm: Add GICv3CPUState in CPUARMState struct
  hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

 hw/intc/arm_gicv3_common.c |   1 +
 hw/intc/arm_gicv3_kvm.c| 640 -
 hw/intc/gicv3_internal.h   |   1 +
 include/hw/intc/arm_gicv3_common.h |   1 +
 linux-headers/asm-arm/kvm.h|  12 +
 linux-headers/asm-arm64/kvm.h  |  12 +
 target-arm/cpu.h   |   2 +
 7 files changed, 658 insertions(+), 11 deletions(-)

-- 
1.9.1

[Qemu-devel] [PATCH v7 RESEND 1/5] kernel: Add definitions for GICv3 attributes

2017-01-31 Thread vijay . kilari

From: Vijaya Kumar K 

This temporary patch adds kernel API definitions.
Use proper header update procedure after these features
are released.

Signed-off-by: Pavel Fedin 
Signed-off-by: Vijaya Kumar K 
---
 linux-headers/asm-arm/kvm.h   | 12 
 linux-headers/asm-arm64/kvm.h | 12 
 2 files changed, 24 insertions(+)

diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h
index 541268c..1ba7d8d 100644
--- a/linux-headers/asm-arm/kvm.h
+++ b/linux-headers/asm-arm/kvm.h
@@ -172,10 +172,22 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS  2
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xffULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+   (0xULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0x)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL   4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+   (0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT0
 
 /* KVM_IRQ_LINE irq field index values */
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index fd5a276..b3f02ce 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -201,10 +201,22 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS  2
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xffULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+   (0xULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0x)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL  4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+   (0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT   0
 
 /* Device Control API on vcpu fd */
-- 
1.9.1

Re: [Qemu-devel] [PATCH v7 0/4] GICv3 live migration support

2017-01-31 Thread Vijay Kilari

Please ignore this patch series. Missed out one patch of this series.
Will resend the full patch series


On Tue, Jan 31, 2017 at 9:35 PM,   wrote:
> From: Vijaya Kumar K 
>
> This series introduces support for GICv3 live migration with
> new VGIC implementation in 4.7-rc3 kernel.
> In this series, patch 1 of the previous implementation
> are ported.
> https://lists.nongnu.org/archive/html/qemu-devel/2015-10/msg05284.html
>
> Patch 2, is based on below implementation.
> http://patchwork.ozlabs.org/patch/626746/
>
> Latest kernel patches
> https://www.spinics.net/lists/arm-kernel/msg558046.html
>
> This API definition is as per version of VGICv3 specification
> in linux kernel Documentation/virtual/kvm/devices/arm-vgic-v3.txt
>
> Tested Live migration of Idle VM running with 4 VCPUs and 8GB RAM.
>
> v6 => v7:
>  - Added patch to add icc_ctrl_el1 to vmstruct before live migration
>patch
>  - Added patch to add gicv3state variable to CPUARMState struct to
>store GICv3CPUState pointer.
>  - Added patch to register ARMCPRegInfo[] struct and reset on CPU reset.
>
> v5 => v6:
>  - Added separate patch for Reseting ICC* register
>  - Added seperate patch for save and restore of ICC_CTLR_EL1
>  - Dropped translate_fn mechanism and coded open functions
>for edge_trigger and priority save and restore.
>  - Save and Restore APnR registers based on ICC_CTLR_EL1.PRIBITS
>
> v4 => v5:
>  - Initialized ICC registers before reset.
>
> v3 => v4:
>  - Reintroduced offset GICR_SGI_OFFSET
>  - Implement save and restore of ICC_SRE_EL1
>  - Updated kvm.h header file in sync with KVM v4 patches
>
> v2 => v3:
>  - Dropped offset GICR_SGI_OFFSET
>  - Implement save/restore of irq line level using
>KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
>  - Fixed bug with save/restore of edge_trigger
>
> Vijaya Kumar K (4):
>   hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate
>   hw/intc/arm_gicv3_kvm: Implement get/put functions
>   target-arm: Add GICv3CPUState in CPUARMState struct
>   hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers
>
>  hw/intc/arm_gicv3_common.c |   1 +
>  hw/intc/arm_gicv3_kvm.c| 640 
> -
>  hw/intc/gicv3_internal.h   |   1 +
>  include/hw/intc/arm_gicv3_common.h |   1 +
>  target-arm/cpu.h   |   2 +
>  5 files changed, 634 insertions(+), 11 deletions(-)
>
> --
> 1.9.1
>

[Qemu-devel] [PATCH v7 1/4] hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate

2017-01-31 Thread vijay . kilari

From: Vijaya Kumar K 

To Save and Restore ICC_SRE_EL1 register Add ICC_SRE_EL1 register
to vmstate and GICv3CPUState struct.

Signed-off-by: Vijaya Kumar K 
---
 hw/intc/arm_gicv3_common.c | 1 +
 include/hw/intc/arm_gicv3_common.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 0f8c4b8..f3245d9 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -68,6 +68,7 @@ static const VMStateDescription vmstate_gicv3_cpu = {
 VMSTATE_UINT32(gicr_igrpmodr0, GICv3CPUState),
 VMSTATE_UINT32(gicr_nsacr, GICv3CPUState),
 VMSTATE_UINT8_ARRAY(gicr_ipriorityr, GICv3CPUState, GIC_INTERNAL),
+VMSTATE_UINT64(icc_sre_el1, GICv3CPUState),
 VMSTATE_UINT64_ARRAY(icc_ctlr_el1, GICv3CPUState, 2),
 VMSTATE_UINT64(icc_pmr_el1, GICv3CPUState),
 VMSTATE_UINT64_ARRAY(icc_bpr, GICv3CPUState, 3),
diff --git a/include/hw/intc/arm_gicv3_common.h 
b/include/hw/intc/arm_gicv3_common.h
index 341a311..183c7f8 100644
--- a/include/hw/intc/arm_gicv3_common.h
+++ b/include/hw/intc/arm_gicv3_common.h
@@ -166,6 +166,7 @@ struct GICv3CPUState {
 uint8_t gicr_ipriorityr[GIC_INTERNAL];
 
 /* CPU interface */
+uint64_t icc_sre_el1;
 uint64_t icc_ctlr_el1[2];
 uint64_t icc_pmr_el1;
 uint64_t icc_bpr[3];
-- 
1.9.1

[Qemu-devel] [PATCH v7 4/4] hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

2017-01-31 Thread vijay . kilari

From: Vijaya Kumar K 

Reset CPU interface registers of GICv3 when CPU is reset.
For this, ARMCPRegInfo struct is registered with one ICC
register whose resetfn is called when cpu is reset.

All the ICC registers are reset under one single register
reset function instead of calling resetfn for each ICC
register.

Signed-off-by: Vijaya Kumar K 
---
 hw/intc/arm_gicv3_kvm.c | 69 +
 1 file changed, 69 insertions(+)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index f91e0ac..c3f38aa 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -604,6 +604,39 @@ static void kvm_arm_gicv3_get(GICv3State *s)
 }
 }
 
+static void arm_gicv3_icc_reset(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+ARMCPU *cpu;
+GICv3State *s;
+GICv3CPUState *c;
+
+c = (GICv3CPUState *)env->gicv3state;
+if (!c || !c->cpu || !c->gic) {
+return;
+}
+
+s = c->gic;
+if (!s) {
+return;
+}
+
+cpu = ARM_CPU(c->cpu);
+/* Initialize to actual HW supported configuration */
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+  KVM_VGIC_ATTR(ICC_CTLR_EL1, cpu->mp_affinity),
+  >icc_ctlr_el1[GICV3_NS], false);
+
+c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
+c->icc_pmr_el1 = 0;
+c->icc_bpr[GICV3_G0] = GIC_MIN_BPR;
+c->icc_bpr[GICV3_G1] = GIC_MIN_BPR;
+c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR;
+
+c->icc_sre_el1 = 0x7;
+memset(c->icc_apr, 0, sizeof(c->icc_apr));
+memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
+}
+
 static void kvm_arm_gicv3_reset(DeviceState *dev)
 {
 GICv3State *s = ARM_GICV3_COMMON(dev);
@@ -621,6 +654,41 @@ static void kvm_arm_gicv3_reset(DeviceState *dev)
 kvm_arm_gicv3_put(s);
 }
 
+static uint64_t icc_cp_reg_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+return 0;
+}
+
+static void icc_cp_reg_write(CPUARMState *env, const ARMCPRegInfo *ri,
+  uint64_t value)
+{
+return;
+}
+
+/*
+ * CPU interface registers of GIC needs to be reset on CPU reset.
+ * For the calling arm_gicv3_icc_reset() on CPU reset, we register
+ * below ARMCPRegInfo. As we reset the whole cpu interface under single
+ * register reset, we define only one register of CPU interface instead
+ * of defining all the registers.
+ */
+static const ARMCPRegInfo gicv3_cpuif_reginfo[] = {
+{ .name = "ICC_CTLR_EL1", .state = ARM_CP_STATE_BOTH,
+  .opc0 = 3, .opc1 = 0, .crn = 12, .crm = 12, .opc2 = 4,
+  .type = ARM_CP_NO_RAW,
+  .access = PL1_RW,
+  .readfn = icc_cp_reg_read,
+  .writefn = icc_cp_reg_write,
+  /*
+   * We hang the whole cpu interface reset routine off here
+   * rather than parcelling it out into one little function
+   * per register
+   */
+  .resetfn = arm_gicv3_icc_reset,
+},
+REGINFO_SENTINEL
+};
+
 static void kvm_arm_gicv3_realize(DeviceState *dev, Error **errp)
 {
 GICv3State *s = KVM_ARM_GICV3(dev);
@@ -650,6 +718,7 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error 
**errp)
 
 /* Store GICv3CPUState in CPUARMState gicv3state pointer */
 env->gicv3state = (void *)>cpu[i];
+define_arm_cp_regs(cpu, gicv3_cpuif_reginfo);
 }
 
 /* Try to create the device via the device control API */
-- 
1.9.1

[Qemu-devel] [PATCH v7 2/4] hw/intc/arm_gicv3_kvm: Implement get/put functions

2017-01-31 Thread vijay . kilari

From: Vijaya Kumar K 

This actually implements pre_save and post_load methods for in-kernel
vGICv3.

Signed-off-by: Pavel Fedin 
Signed-off-by: Peter Maydell 
[PMM:
 * use decimal, not 0bnnn
 * fixed typo in names of ICC_APR0R_EL1 and ICC_AP1R_EL1
 * completely rearranged the get and put functions to read and write
   the state in a natural order, rather than mixing distributor and
   redistributor state together]
Signed-off-by: Vijaya Kumar K 
[Vijay:
 * Update macro KVM_VGIC_ATTR
 * Use 32 bit access for gicd and gicr
 * GICD_IROUTER, GICD_TYPER, GICR_PROPBASER and GICR_PENDBASER reg
   access  are changed from 64-bit to 32-bit access
 * Add ICC_SRE_EL1 save and restore
 * Dropped translate_fn mechanism and coded functions to handle
   save and restore of edge_trigger and priority
 * Number of APnR register saved/restored based on number of
   priority bits supported]
Reviewed-by: Peter Maydell 
---
---
 hw/intc/arm_gicv3_kvm.c  | 563 ++-
 hw/intc/gicv3_internal.h |   1 +
 2 files changed, 553 insertions(+), 11 deletions(-)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 199a439..77af32d 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -23,8 +23,10 @@
 #include "qapi/error.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "hw/sysbus.h"
+#include "qemu/error-report.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
+#include "gicv3_internal.h"
 #include "vgic_common.h"
 #include "migration/migration.h"
 
@@ -44,6 +46,32 @@
 #define KVM_ARM_GICV3_GET_CLASS(obj) \
  OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3)
 
+#define   KVM_DEV_ARM_VGIC_SYSREG(op0, op1, crn, crm, op2) \
+ (ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+  ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+  ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+  ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+  ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ICC_PMR_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 4, 6, 0)
+#define ICC_BPR0_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 3)
+#define ICC_AP0R_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 4 | n)
+#define ICC_AP1R_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 9, n)
+#define ICC_BPR1_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 3)
+#define ICC_CTLR_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 4)
+#define ICC_SRE_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 5)
+#define ICC_IGRPEN0_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 6)
+#define ICC_IGRPEN1_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 7)
+
 typedef struct KVMARMGICv3Class {
 ARMGICv3CommonClass parent_class;
 DeviceRealize parent_realize;
@@ -57,16 +85,523 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, 
int level)
 kvm_arm_gic_set_irq(s->num_irq, irq, level);
 }
 
+#define KVM_VGIC_ATTR(reg, typer) \
+((typer & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) | (reg))
+
+static inline void kvm_gicd_access(GICv3State *s, int offset,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+  KVM_VGIC_ATTR(offset, 0),
+  val, write);
+}
+
+static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+  KVM_VGIC_ATTR(offset, s->cpu[cpu].gicr_typer),
+  val, write);
+}
+
+static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu,
+   uint64_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+  KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer),
+  val, write);
+}
+
+static inline void kvm_gic_line_level_access(GICv3State *s, int irq, int cpu,
+ uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+  KVM_VGIC_ATTR(irq, s->cpu[cpu].gicr_typer) |
+  (VGIC_LEVEL_INFO_LINE_LEVEL <<
+   KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT),
+  val, write);
+}
+
+/* Loop through each distributor IRQ related register; since bits
+ * corresponding to SPIs and PPIs are RAZ/WI when affinity routing
+ * is enabled, we skip those.
+ */
+#define for_each_dist_irq_reg(_irq, _max, _field_width) \
+for (_irq = GIC_INTERNAL; _irq < _max; _irq += (32 / _field_width))
+
+static void kvm_dist_get_priority(GICv3State *s, uint32_t offset, uint8_t *bmp)
+{
+uint32_t reg, *field;
+int irq;
+
+

[Qemu-devel] [PATCH v7 3/4] target-arm: Add GICv3CPUState in CPUARMState struct

2017-01-31 Thread vijay . kilari

From: Vijaya Kumar K 

Add gicv3state void pointer to CPUARMState struct
to store GICv3CPUState.

In case of usecase like CPU reset, we need to reset
GICv3CPUState of the CPU. In such scenario, this pointer
becomes handy.

This patch take care of only GICv3.

Signed-off-by: Vijaya Kumar K 
---
 hw/intc/arm_gicv3_kvm.c | 8 
 target-arm/cpu.h| 2 ++
 2 files changed, 10 insertions(+)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 77af32d..f91e0ac 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -644,6 +644,14 @@ static void kvm_arm_gicv3_realize(DeviceState *dev, Error 
**errp)
 
 gicv3_init_irqs_and_mmio(s, kvm_arm_gicv3_set_irq, NULL);
 
+for (i = 0; i < s->num_cpu; i++) {
+ARMCPU *cpu = ARM_CPU(qemu_get_cpu(i));
+CPUARMState *env = >env;
+
+/* Store GICv3CPUState in CPUARMState gicv3state pointer */
+env->gicv3state = (void *)>cpu[i];
+}
+
 /* Try to create the device via the device control API */
 s->dev_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_ARM_VGIC_V3, false);
 if (s->dev_fd < 0) {
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index ca5c849..b1ca064 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -507,6 +507,8 @@ typedef struct CPUARMState {
 
 void *nvic;
 const struct arm_boot_info *boot_info;
+/* Store GICv3CPUState to access from this struct */
+void *gicv3state;
 } CPUARMState;
 
 /**
-- 
1.9.1

[Qemu-devel] [PATCH v7 0/4] GICv3 live migration support

2017-01-31 Thread vijay . kilari

From: Vijaya Kumar K 

This series introduces support for GICv3 live migration with
new VGIC implementation in 4.7-rc3 kernel.
In this series, patch 1 of the previous implementation
are ported.
https://lists.nongnu.org/archive/html/qemu-devel/2015-10/msg05284.html

Patch 2, is based on below implementation.
http://patchwork.ozlabs.org/patch/626746/

Latest kernel patches
https://www.spinics.net/lists/arm-kernel/msg558046.html

This API definition is as per version of VGICv3 specification
in linux kernel Documentation/virtual/kvm/devices/arm-vgic-v3.txt

Tested Live migration of Idle VM running with 4 VCPUs and 8GB RAM.

v6 => v7:
 - Added patch to add icc_ctrl_el1 to vmstruct before live migration
   patch
 - Added patch to add gicv3state variable to CPUARMState struct to
   store GICv3CPUState pointer.
 - Added patch to register ARMCPRegInfo[] struct and reset on CPU reset.

v5 => v6:
 - Added separate patch for Reseting ICC* register
 - Added seperate patch for save and restore of ICC_CTLR_EL1
 - Dropped translate_fn mechanism and coded open functions
   for edge_trigger and priority save and restore.
 - Save and Restore APnR registers based on ICC_CTLR_EL1.PRIBITS

v4 => v5:
 - Initialized ICC registers before reset.

v3 => v4:
 - Reintroduced offset GICR_SGI_OFFSET
 - Implement save and restore of ICC_SRE_EL1
 - Updated kvm.h header file in sync with KVM v4 patches

v2 => v3:
 - Dropped offset GICR_SGI_OFFSET
 - Implement save/restore of irq line level using
   KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
 - Fixed bug with save/restore of edge_trigger

Vijaya Kumar K (4):
  hw/intc/arm_gicv3_kvm: Add ICC_SRE_EL1 register to vmstate
  hw/intc/arm_gicv3_kvm: Implement get/put functions
  target-arm: Add GICv3CPUState in CPUARMState struct
  hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

 hw/intc/arm_gicv3_common.c |   1 +
 hw/intc/arm_gicv3_kvm.c| 640 -
 hw/intc/gicv3_internal.h   |   1 +
 include/hw/intc/arm_gicv3_common.h |   1 +
 target-arm/cpu.h   |   2 +
 5 files changed, 634 insertions(+), 11 deletions(-)

-- 
1.9.1

Re: [Qemu-devel] [RFC 3/4] hw/intc/arm_gicv3_its: Implement state save/restore

2017-01-26 Thread Vijay Kilari

Hi Eric,

On Thu, Jan 26, 2017 at 2:49 PM, Eric Auger  wrote:
> We need to handle both registers and ITS tables. While
> register handling is standard, ITS table handling is more
> challenging since the kernel API is devised so that the
> tables are flushed into guest RAM and not in vmstate buffers.
>
> Flushing the ITS tables on device pre_save() is too late
> since the guest RAM had already been saved at this point.
>
> Table flushing needs to happen when we are sure the vcpus
> are stopped and before the last dirty page saving. The
> right point is RUN_STATE_FINISH_MIGRATE but sometimes the
> VM gets stopped before migration launch so let's simply
> flush the tables each time the VM gets stopped.
>
> For regular ITS registers we just can use vmstate pre_save
> and post_load callbacks.
>
> Signed-off-by: Eric Auger 
>
> ---
> ---
>  hw/intc/arm_gicv3_its_common.c |  8 
>  hw/intc/arm_gicv3_its_kvm.c| 86 
> ++
>  include/hw/intc/arm_gicv3_its_common.h |  6 +++
>  3 files changed, 100 insertions(+)
>
> diff --git a/hw/intc/arm_gicv3_its_common.c b/hw/intc/arm_gicv3_its_common.c
> index 9d67c5c..75b9f04 100644
> --- a/hw/intc/arm_gicv3_its_common.c
> +++ b/hw/intc/arm_gicv3_its_common.c
> @@ -49,6 +49,14 @@ static const VMStateDescription vmstate_its = {
>  .pre_save = gicv3_its_pre_save,
>  .post_load = gicv3_its_post_load,
>  .unmigratable = true,
> +.fields = (VMStateField[]) {
> +VMSTATE_UINT32(ctlr, GICv3ITSState),
> +VMSTATE_UINT64(cbaser, GICv3ITSState),
> +VMSTATE_UINT64(cwriter, GICv3ITSState),
> +VMSTATE_UINT64(creadr, GICv3ITSState),
> +VMSTATE_UINT64_ARRAY(baser, GICv3ITSState, 8),
> +VMSTATE_END_OF_LIST()
> +},
>  };
>
>  static MemTxResult gicv3_its_trans_read(void *opaque, hwaddr offset,
> diff --git a/hw/intc/arm_gicv3_its_kvm.c b/hw/intc/arm_gicv3_its_kvm.c
> index fc246e0..3f8017d 100644
> --- a/hw/intc/arm_gicv3_its_kvm.c
> +++ b/hw/intc/arm_gicv3_its_kvm.c
> @@ -53,6 +53,24 @@ static int kvm_its_send_msi(GICv3ITSState *s, uint32_t 
> value, uint16_t devid)
>  return kvm_vm_ioctl(kvm_state, KVM_SIGNAL_MSI, );
>  }
>
> +/**
> + * vm_change_state_handler - VM change state callback aiming at flushing
> + * ITS tables into guest RAM
> + *
> + * The tables get flushed to guest RAM whenever the VM gets stopped.
> + */
> +static void vm_change_state_handler(void *opaque, int running,
> +RunState state)
> +{
> +GICv3ITSState *s = (GICv3ITSState *)opaque;
> +
> +if (running) {
> +return;
> +}
> +kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_TABLES,
> +  0, NULL, false);
> +}
> +
>  static void kvm_arm_its_realize(DeviceState *dev, Error **errp)
>  {
>  GICv3ITSState *s = ARM_GICV3_ITS_COMMON(dev);
> @@ -83,6 +101,8 @@ static void kvm_arm_its_realize(DeviceState *dev, Error 
> **errp)
>  kvm_msi_use_devid = true;
>  kvm_gsi_direct_mapping = false;
>  kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled();
> +
> +qemu_add_vm_change_state_handler(vm_change_state_handler, s);
>  }
>
>  static void kvm_arm_its_init(Object *obj)
> @@ -96,6 +116,70 @@ static void kvm_arm_its_init(Object *obj)
>   _abort);
>  }
>
> +/**
> + * kvm_arm_its_get - handles the saving of ITS registers.
> + * ITS tables, being flushed into guest RAM needs to be saved before
> + * the pre_save() callback, hence the migration state change notifiers
> + */
> +static void kvm_arm_its_get(GICv3ITSState *s)
> +{
> +uint64_t reg;
> +int i;
> +

 Don't we need to check for LPI support before save/restore?.
I mean, reading GITS_TYPER and check for LPI support?

> +for (i = 0; i < 8; i++) {
> +kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
> +  GITS_BASER + i * 8, >baser[i], false);
> +}
> +
> +kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
> +  GITS_CTLR, , false);
> +s->ctlr = extract64(reg, 0, 32);
> +
> +kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
> +  GITS_CBASER, >cbaser, false);
> +
> +kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
> +  GITS_CREADR, >creadr, false);
> +
> +kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS,
> +  GITS_CWRITER, >cwriter, false);
> +}
> +
> +/**
> + * kvm_arm_its_put - Restore both the ITS registers and guest RAM tables
> + * ITS tables, being flushed into guest RAM needs to be saved before
> + * the pre_save() callback. The restoration order matters since there
> + * are dependencies between register settings, as specified by the
> + * architecture specification
> + */
> +static void kvm_arm_its_put(GICv3ITSState *s)
> +{
> +uint64_t reg;
> +int i;
> +
> +/* must be written

Re: [Qemu-devel] [RFC 2/4] hw/intc/arm_gicv3_kvm: Rename KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS

2017-01-26 Thread Vijay Kilari

Hi Eric,

On Thu, Jan 26, 2017 at 2:49 PM, Eric Auger  wrote:
> Rename KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS into KVM_DEV_ARM_VGIC_CPU_SYSREGS
> as exposed in the kernel user API and pulled by update-linux-headers.sh.

I will fix it in my next qemu patch series.
I have updated kernel to use KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS in
latest kernel patch
version v11. I so you can drop this patch.

>
> Signed-off-by: Eric Auger 
>
> ---
>
> KVM_DEV_ARM_VGIC_CPU_SYSREGS may be fixed at kernel level instead
> ---
>  hw/intc/arm_gicv3_kvm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
> index 267c2d6..d8ecbc3 100644
> --- a/hw/intc/arm_gicv3_kvm.c
> +++ b/hw/intc/arm_gicv3_kvm.c
> @@ -108,7 +108,7 @@ static inline void kvm_gicr_access(GICv3State *s, int 
> offset, int cpu,
>  static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu,
> uint64_t *val, bool write)
>  {
> -kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
> +kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_CPU_SYSREGS,
>KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer),
>val, write);
>  }
> --
> 2.5.5
>

Re: [Qemu-devel] [PATCH v5 2/3] utils: Add helper to read arm MIDR_EL1 register

2016-12-19 Thread Vijay Kilari

On Fri, Dec 16, 2016 at 7:34 PM, Peter Maydell  wrote:
> On 7 December 2016 at 17:06,   wrote:
>> From: Vijaya Kumar K 
>>
>> Add helper API to read MIDR_EL1 registers to fetch
>> cpu identification information. This helps in
>> adding errata's and architecture specific features.
>>
>> This is implemented only for arm architecture.
>>
>> Signed-off-by: Vijaya Kumar K 
>> ---
>>  include/qemu/aarch64-cpuid.h | 38 
>>  util/Makefile.objs   |  1 +
>>  util/aarch64-cpuid.c | 52 
>> 
>>  3 files changed, 91 insertions(+)
>>
>> diff --git a/include/qemu/aarch64-cpuid.h b/include/qemu/aarch64-cpuid.h
>> new file mode 100644
>> index 000..fb88ed8
>> --- /dev/null
>> +++ b/include/qemu/aarch64-cpuid.h
>> @@ -0,0 +1,38 @@
>> +#ifndef QEMU_AARCH64_CPUID_H
>> +#define QEMU_AARCH64_CPUID_H
>> +
>> +#if defined(__aarch64__) && defined(CONFIG_LINUX)
>> +#define MIDR_IMPLEMENTER_SHIFT  24
>> +#define MIDR_IMPLEMENTER_MASK   (0xffULL << MIDR_IMPLEMENTER_SHIFT)
>> +#define MIDR_ARCHITECTURE_SHIFT 16
>> +#define MIDR_ARCHITECTURE_MASK  (0xf << MIDR_ARCHITECTURE_SHIFT)
>> +#define MIDR_PARTNUM_SHIFT  4
>> +#define MIDR_PARTNUM_MASK   (0xfff << MIDR_PARTNUM_SHIFT)
>> +
>> +#define MIDR_CPU_PART(imp, partnum) \
>> +(((imp) << MIDR_IMPLEMENTER_SHIFT)  | \
>> +(0xf<< MIDR_ARCHITECTURE_SHIFT) | \
>> +((partnum)  << MIDR_PARTNUM_SHIFT))
>> +
>> +#define ARM_CPU_IMP_CAVIUM0x43
>> +#define CAVIUM_CPU_PART_THUNDERX  0x0A1
>> +
>> +#define MIDR_THUNDERX_PASS2  \
>> +   MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
>> +#define CPU_MODEL_MASK  (MIDR_IMPLEMENTER_MASK | MIDR_ARCHITECTURE_MASK | \
>> + MIDR_PARTNUM_MASK)
>> +
>> +uint64_t get_aarch64_cpu_id(void);
>> +bool is_thunderx_pass2_cpu(void);
>> +#else
>> +static inline uint64_t get_aarch64_cpu_id(void)
>> +{
>> +return 0;
>> +}
>> +
>> +static inline bool is_thunderx_pass2_cpu(void)
>> +{
>> +return false;
>> +}
>> +#endif
>> +#endif
>> diff --git a/util/Makefile.objs b/util/Makefile.objs
>> index ad0f9c7..a9585c9 100644
>> --- a/util/Makefile.objs
>> +++ b/util/Makefile.objs
>> @@ -36,3 +36,4 @@ util-obj-y += log.o
>>  util-obj-y += qdist.o
>>  util-obj-y += qht.o
>>  util-obj-y += range.o
>> +util-obj-$(CONFIG_LINUX) += aarch64-cpuid.o
>> diff --git a/util/aarch64-cpuid.c b/util/aarch64-cpuid.c
>> new file mode 100644
>> index 000..575f52e
>> --- /dev/null
>> +++ b/util/aarch64-cpuid.c
>> @@ -0,0 +1,52 @@
>> +/*
>> + * Dealing with arm cpu identification information.
>> + *
>> + * Copyright (C) 2016 Cavium, Inc.
>> + *
>> + * Authors:
>> + *  Vijaya Kumar K 
>> + *
>> + * This work is licensed under the terms of the GNU LGPL, version 2.1
>> + * or later.  See the COPYING.LIB file in the top-level directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu/cutils.h"
>> +#include "qemu/aarch64-cpuid.h"
>> +
>> +#if defined(__aarch64__)
>> +static uint64_t qemu_read_aarch64_midr_el1(void)
>> +{
>> +const char *file = 
>> "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1";
>
> If CPU0 happens to be offline (eg hot-unplugged) then this file
> won't exist, and we'll fail to identify any MIDR value.

I thought wrongly that cpu0 cannot be hot-plugged on arm64.
At-least on our platform, it is not allowed.

One solution I think of is to get current running cpu using sched_getcpu()
and fetch midr from that cpu path
OR  read /sys/devices/system/cpu/online and find online cpu.

>
> The API as designed here also doesn't seem to consider
> the idea of big.LITTLE systems -- if there are multiple
> CPUs with different MIDRs, which one should we return here?

Yes, this is the limitation here to handle big.LITTLE configuration.
It was discussed in initial version of this patch series.

https://lists.gnu.org/archive/html/qemu-devel/2016-05/msg01221.html

(From use case point of view, we require only Implementer ID, which
 won't be different for big.LITTLE configuration. I agree that this generic
 function should work for other use cases as well).

So I will add a comment here.

>
>> +char *buf;
>> +uint64_t midr = 0;
>> +
>> +if (!g_file_get_contents(file, , 0, NULL)) {
>> +goto out;
>> +}
>> +
>> +if (qemu_strtoull(buf, NULL, 0, ) < 0) {
>> +midr = 0;
>> +goto out;
>> +}
>> +
>> +out:
>> +g_free(buf);
>> +
>> +return midr;
>
> thanks
> -- PMM

[Qemu-devel] [PATCH v5 3/3] utils: Add prefetch for Thunderx platform

2016-12-07 Thread vijay . kilari

From: Vijaya Kumar K 

Thunderx pass2 chip requires explicit prefetch
instruction to give prefetch hint.

To speed up live migration on Thunderx platform,
prefetch instruction is added in zero buffer check
function.The below results show live migration time improvement
with prefetch instruction. VM with 4 VCPUs, 8GB RAM is migrated.

Code for decoding cache size is taken from Richard's patch.

With 1K page size and without prefetch
==
Migration status: completed
total time: 13556 milliseconds
downtime: 380 milliseconds
setup: 15 milliseconds
transferred ram: 265557 kbytes
throughput: 160.51 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 8344672 pages
skipped: 0 pages
normal: 190724 pages
normal bytes: 190724 kbytes
dirty sync count: 3

With 1K page size and with prefetch
===
Migration status: completed
total time: 8218 milliseconds
downtime: 395 milliseconds
setup: 15 milliseconds
transferred ram: 274484 kbytes
throughput: 273.67 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 8341921 pages
skipped: 0 pages
normal: 199606 pages
normal bytes: 199606 kbytes
dirty sync count: 3
(qemu)

With 4K page size and without prefetch
==
Migration status: completed
total time: 11121 milliseconds
downtime: 372 milliseconds
setup: 5 milliseconds
transferred ram: 231777 kbytes
throughput: 170.77 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 2082158 pages
skipped: 0 pages
normal: 53265 pages
normal bytes: 213060 kbytes
dirty sync count: 3

With 4K page size and with prefetch
===
Migration status: completed
total time: 5893 milliseconds
downtime: 359 milliseconds
setup: 5 milliseconds
transferred ram: 225795 kbytes
throughput: 313.96 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 2081903 pages
skipped: 0 pages
normal: 51773 pages
normal bytes: 207092 kbytes
dirty sync count: 3

Signed-off-by: Vijaya Kumar K 
---
 util/bufferiszero.c | 37 +++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 421d945..ed3b31d 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -25,6 +25,11 @@
 #include "qemu-common.h"
 #include "qemu/cutils.h"
 #include "qemu/bswap.h"
+#include "qemu/aarch64-cpuid.h"
+
+static uint32_t cache_line_size = 64;
+static uint32_t prefetch_line_dist = 1;
+static uint32_t prefetch_distance = 8;
 
 static bool
 buffer_zero_int(const void *buf, size_t len)
@@ -49,7 +54,7 @@ buffer_zero_int(const void *buf, size_t len)
 const uint64_t *e = (uint64_t *)(((uintptr_t)buf + len) & -8);
 
 for (; p + 8 <= e; p += 8) {
-__builtin_prefetch(p + 8, 0, 0);
+__builtin_prefetch(p + prefetch_distance, 0, 0);
 if (t) {
 return false;
 }
@@ -293,17 +298,45 @@ bool test_buffer_is_zero_next_accel(void)
 }
 #endif
 
+static void __attribute__((constructor)) init_cache_size(void)
+{
+#if defined(__aarch64__)
+uint64_t t;
+
+/* Use the DZP block size as a proxy for the cacheline size,
+   since the later is not available to userspace.  This seems
+   to work in practice for existing implementations.  */
+asm("mrs %0, dczid_el0" : "=r"(t));
+if ((1 << ((t & 0xf) + 2)) >= 128) {
+cache_line_size = 128;
+}
+#endif
+
+get_aarch64_cpu_id();
+if (is_thunderx_pass2_cpu()) {
+prefetch_line_dist = 3;
+prefetch_distance = (prefetch_line_dist * cache_line_size) /
+ sizeof(uint64_t);
+}
+}
+
 /*
  * Checks if a buffer is all zeroes
  */
 bool buffer_is_zero(const void *buf, size_t len)
 {
+int i;
+uint32_t prefetch_distance_bytes;
+
 if (unlikely(len == 0)) {
 return true;
 }
 
 /* Fetch the beginning of the buffer while we select the accelerator.  */
-__builtin_prefetch(buf, 0, 0);
+prefetch_distance_bytes = prefetch_line_dist * cache_line_size;
+for (i = 0; i < prefetch_distance_bytes && i < len; i += cache_line_size) {
+__builtin_prefetch(buf + i, 0, 0);
+}
 
 /* Use an optimized zero check if possible.  Note that this also
includes a check for an unrolled loop over 64-bit integers.  */
-- 
1.9.1

[Qemu-devel] [PATCH v5 2/3] utils: Add helper to read arm MIDR_EL1 register

2016-12-07 Thread vijay . kilari

From: Vijaya Kumar K 

Add helper API to read MIDR_EL1 registers to fetch
cpu identification information. This helps in
adding errata's and architecture specific features.

This is implemented only for arm architecture.

Signed-off-by: Vijaya Kumar K 
---
 include/qemu/aarch64-cpuid.h | 38 
 util/Makefile.objs   |  1 +
 util/aarch64-cpuid.c | 52 
 3 files changed, 91 insertions(+)

diff --git a/include/qemu/aarch64-cpuid.h b/include/qemu/aarch64-cpuid.h
new file mode 100644
index 000..fb88ed8
--- /dev/null
+++ b/include/qemu/aarch64-cpuid.h
@@ -0,0 +1,38 @@
+#ifndef QEMU_AARCH64_CPUID_H
+#define QEMU_AARCH64_CPUID_H
+
+#if defined(__aarch64__) && defined(CONFIG_LINUX)
+#define MIDR_IMPLEMENTER_SHIFT  24
+#define MIDR_IMPLEMENTER_MASK   (0xffULL << MIDR_IMPLEMENTER_SHIFT)
+#define MIDR_ARCHITECTURE_SHIFT 16
+#define MIDR_ARCHITECTURE_MASK  (0xf << MIDR_ARCHITECTURE_SHIFT)
+#define MIDR_PARTNUM_SHIFT  4
+#define MIDR_PARTNUM_MASK   (0xfff << MIDR_PARTNUM_SHIFT)
+
+#define MIDR_CPU_PART(imp, partnum) \
+(((imp) << MIDR_IMPLEMENTER_SHIFT)  | \
+(0xf<< MIDR_ARCHITECTURE_SHIFT) | \
+((partnum)  << MIDR_PARTNUM_SHIFT))
+
+#define ARM_CPU_IMP_CAVIUM0x43
+#define CAVIUM_CPU_PART_THUNDERX  0x0A1
+
+#define MIDR_THUNDERX_PASS2  \
+   MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+#define CPU_MODEL_MASK  (MIDR_IMPLEMENTER_MASK | MIDR_ARCHITECTURE_MASK | \
+ MIDR_PARTNUM_MASK)
+
+uint64_t get_aarch64_cpu_id(void);
+bool is_thunderx_pass2_cpu(void);
+#else
+static inline uint64_t get_aarch64_cpu_id(void)
+{
+return 0;
+}
+
+static inline bool is_thunderx_pass2_cpu(void)
+{
+return false;
+}
+#endif
+#endif
diff --git a/util/Makefile.objs b/util/Makefile.objs
index ad0f9c7..a9585c9 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -36,3 +36,4 @@ util-obj-y += log.o
 util-obj-y += qdist.o
 util-obj-y += qht.o
 util-obj-y += range.o
+util-obj-$(CONFIG_LINUX) += aarch64-cpuid.o
diff --git a/util/aarch64-cpuid.c b/util/aarch64-cpuid.c
new file mode 100644
index 000..575f52e
--- /dev/null
+++ b/util/aarch64-cpuid.c
@@ -0,0 +1,52 @@
+/*
+ * Dealing with arm cpu identification information.
+ *
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * Authors:
+ *  Vijaya Kumar K 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qemu/aarch64-cpuid.h"
+
+#if defined(__aarch64__)
+static uint64_t qemu_read_aarch64_midr_el1(void)
+{
+const char *file = 
"/sys/devices/system/cpu/cpu0/regs/identification/midr_el1";
+char *buf;
+uint64_t midr = 0;
+
+if (!g_file_get_contents(file, , 0, NULL)) {
+goto out;
+}
+
+if (qemu_strtoull(buf, NULL, 0, ) < 0) {
+midr = 0;
+goto out;
+}
+
+out:
+g_free(buf);
+
+return midr;
+}
+
+static uint64_t aarch64_midr_val;
+uint64_t get_aarch64_cpu_id(void)
+{
+aarch64_midr_val = qemu_read_aarch64_midr_el1();
+aarch64_midr_val &= CPU_MODEL_MASK;
+
+return aarch64_midr_val;
+}
+
+bool is_thunderx_pass2_cpu(void)
+{
+return aarch64_midr_val == MIDR_THUNDERX_PASS2;
+}
+#endif
-- 
1.9.1

[Qemu-devel] [PATCH v5 0/3] Live migration optimization for Thunderx platform

2016-12-07 Thread vijay . kilari

From: Vijaya Kumar K 

The CPU MIDR_EL1 register is exposed to userspace for arm64
with the below patch.
https://lkml.org/lkml/2016/7/8/467

Thunderx platform requires explicit prefetch instruction to
provide prefetch hint. Using MIDR_EL1 information, provided
by above kernel patch, prefetch is executed if the platform
is Thunderx.

The results of live migration time improvement is provided
in commit message of patch 3.

Note: Check for size of while prefetching beyond page is
not added. Making this check is counter productive on
performance of live migration.

v4 => v5:
   - Compile util/aarch64-cpuid.c when CONFIG_LINUX enabled
   - Added stubs include/qemu/aarch64-cpuid.h if __aarch64__ and
 CONFIG_LINUX are not enabled.
v3 => v4:
   - Dropped allocation of memory for buf in
 qemu_read_aarch64_midr_el1()
   - Moved MIDR reg definitions to header file
   - Dropped arm64 and thunder specific code from generic
 function.

v2 => v3:
   - Rebased on top of richard's patches.
   - Consider cache line size and line number to prefetch
   - Passed optional parameters to __builtin_prefetch
v1 => v2:
   - Rename util/cpuinfo.c as util/aarch64-cpuid.c
   - Introduced header file include/qemu/aarch64-cpuid.h
   - Place all arch specific code under define __aarch64__ and
 CONFIG_LINUX.
   - Used builtin_prefetch() to add prefetch instruction.
   - Moved arch specific changes out of generic code
   - Dropped prefetching 5th cache line.

Vijaya Kumar K (3):
  cutils: Set __builtin_prefetch optional parameters
  utils: Add helper to read arm MIDR_EL1 register
  utils: Add prefetch for Thunderx platform

 include/qemu/aarch64-cpuid.h | 38 
 util/Makefile.objs   |  1 +
 util/aarch64-cpuid.c | 52 
 util/bufferiszero.c  | 43 +++-
 4 files changed, 129 insertions(+), 5 deletions(-)
 create mode 100644 include/qemu/aarch64-cpuid.h
 create mode 100644 util/aarch64-cpuid.c

-- 
1.9.1

[Qemu-devel] [PATCH v5 1/3] cutils: Set __builtin_prefetch optional parameters

2016-12-07 Thread vijay . kilari

From: Vijaya Kumar K 

Optional parameters of __builtin_prefetch() which specifies
rw and locality to 0's. For checking buffer is zero, set rw as read
and temporal locality to 0.

On arm64, __builtin_prefetch(addr) generates 'prfmpldl1keep'
where __builtin_prefetch(addr, 0, 0) generates 'prfm pldl1strm'
instruction which is optimal for this use case

Signed-off-by: Vijaya Kumar K 
Reviewed-by: Richard Henderson 
---
 util/bufferiszero.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index eb974b7..421d945 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -49,7 +49,7 @@ buffer_zero_int(const void *buf, size_t len)
 const uint64_t *e = (uint64_t *)(((uintptr_t)buf + len) & -8);
 
 for (; p + 8 <= e; p += 8) {
-__builtin_prefetch(p + 8);
+__builtin_prefetch(p + 8, 0, 0);
 if (t) {
 return false;
 }
@@ -86,7 +86,7 @@ buffer_zero_sse2(const void *buf, size_t len)
 
 /* Loop over 16-byte aligned blocks of 64.  */
 while (likely(p <= e)) {
-__builtin_prefetch(p);
+__builtin_prefetch(p, 0, 0);
 t = _mm_cmpeq_epi8(t, zero);
 if (unlikely(_mm_movemask_epi8(t) != 0x)) {
 return false;
@@ -127,7 +127,7 @@ buffer_zero_sse4(const void *buf, size_t len)
 
 /* Loop over 16-byte aligned blocks of 64.  */
 while (likely(p <= e)) {
-__builtin_prefetch(p);
+__builtin_prefetch(p, 0, 0);
 if (unlikely(!_mm_testz_si128(t, t))) {
 return false;
 }
@@ -162,7 +162,7 @@ buffer_zero_avx2(const void *buf, size_t len)
 if (likely(p <= e)) {
 /* Loop over 32-byte aligned blocks of 128.  */
 do {
-__builtin_prefetch(p);
+__builtin_prefetch(p, 0, 0);
 if (unlikely(!_mm256_testz_si256(t, t))) {
 return false;
 }
@@ -303,7 +303,7 @@ bool buffer_is_zero(const void *buf, size_t len)
 }
 
 /* Fetch the beginning of the buffer while we select the accelerator.  */
-__builtin_prefetch(buf);
+__builtin_prefetch(buf, 0, 0);
 
 /* Use an optimized zero check if possible.  Note that this also
includes a check for an unrolled loop over 64-bit integers.  */
-- 
1.9.1

Re: [Qemu-devel] [PATCH v6 4/4] hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

2016-12-07 Thread Vijay Kilari

Hi Peter,

On Thu, Dec 1, 2016 at 3:40 PM, Vijay Kilari <vijay.kil...@gmail.com> wrote:
> On Wed, Nov 30, 2016 at 10:29 PM, Peter Maydell
> <peter.mayd...@linaro.org> wrote:
>> On 30 November 2016 at 16:23, Vijay Kilari <vijay.kil...@gmail.com> wrote:
>>> On Mon, Nov 28, 2016 at 10:05 PM, Peter Maydell
>>> <peter.mayd...@linaro.org> wrote:
>>>> Still I would prefer it if we did this with the same
>>>> mechanism for both TCG and KVM. A generic mechanism for
>>>> "let the CPU reset trigger reset of many other devices in the
>>>> system" isn't widely useful because real hardware doesn't
>>>> have that kind of action-at-a-distance behaviour.
>>>
>>> To make direct call from arm_cpu_reset() to reset CPUIF,
>>> I could not find a way to get GICv3CPUState from CPUARMState or
>>> ARMCPU struct.
>>
>> You don't want to directly call from arm_cpu_reset().
>> Coprocessor regs registered via cpregs can have
>> reset functions, which get called automatically.
>> This is what the TCG gicv3 code already does to reset
>> the CPU i/f, the relevant code just needs to be
>> arranged so it's used for KVM too.
>
> Yes, the reset functions of cpregs get CPUARMState as parameter
> and still we cannot fetch GICv3CPUState from it.

I propose to add new variable to CPUARMState to store
GICV3CPUState to able to access when cpregs reset is called.
Is it ok?

>
> The TCG code in arm_gicv3_cpuif.c is rely on el_hook to get
> GICv3CPUState.
>>
>>> Any idea how to get GICv3CPUState?
>>>
>>> In  hw/intc/arm_gicv3_cpuif.c implementation,
>>> el_hook function is registered to fetch GICv3CPUState
>>> from CPUARMState struct, but it is for TCG
>>
>> Yes, you don't need the el hook.
>
> Without this is there a way to get GICv3CPUState for KVM?
> I am not familiar with this code.
>
>>
>> thanks
>> -- PMM

Re: [Qemu-devel] [PATCH v6 4/4] hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

2016-12-01 Thread Vijay Kilari

On Wed, Nov 30, 2016 at 10:29 PM, Peter Maydell
<peter.mayd...@linaro.org> wrote:
> On 30 November 2016 at 16:23, Vijay Kilari <vijay.kil...@gmail.com> wrote:
>> On Mon, Nov 28, 2016 at 10:05 PM, Peter Maydell
>> <peter.mayd...@linaro.org> wrote:
>>> Still I would prefer it if we did this with the same
>>> mechanism for both TCG and KVM. A generic mechanism for
>>> "let the CPU reset trigger reset of many other devices in the
>>> system" isn't widely useful because real hardware doesn't
>>> have that kind of action-at-a-distance behaviour.
>>
>> To make direct call from arm_cpu_reset() to reset CPUIF,
>> I could not find a way to get GICv3CPUState from CPUARMState or
>> ARMCPU struct.
>
> You don't want to directly call from arm_cpu_reset().
> Coprocessor regs registered via cpregs can have
> reset functions, which get called automatically.
> This is what the TCG gicv3 code already does to reset
> the CPU i/f, the relevant code just needs to be
> arranged so it's used for KVM too.

Yes, the reset functions of cpregs get CPUARMState as parameter
and still we cannot fetch GICv3CPUState from it.

The TCG code in arm_gicv3_cpuif.c is rely on el_hook to get
GICv3CPUState.
>
>> Any idea how to get GICv3CPUState?
>>
>> In  hw/intc/arm_gicv3_cpuif.c implementation,
>> el_hook function is registered to fetch GICv3CPUState
>> from CPUARMState struct, but it is for TCG
>
> Yes, you don't need the el hook.

Without this is there a way to get GICv3CPUState for KVM?
I am not familiar with this code.

>
> thanks
> -- PMM

Re: [Qemu-devel] [PATCH v6 4/4] hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

2016-11-30 Thread Vijay Kilari

On Mon, Nov 28, 2016 at 10:05 PM, Peter Maydell
<peter.mayd...@linaro.org> wrote:
> On 28 November 2016 at 16:01, Vijay Kilari <vijay.kil...@gmail.com> wrote:
>> On Mon, Nov 28, 2016 at 6:31 PM, Peter Maydell <peter.mayd...@linaro.org> 
>> wrote:
>>> On 23 November 2016 at 12:39,  <vijay.kil...@gmail.com> wrote:
>>>> From: Vijaya Kumar K <vijaya.ku...@cavium.com>
>>>>
>>>> Reset CPU interface registers of GICv3 when CPU is reset.
>>>> For this, object interface is used, which is called from
>>>> arm_cpu_reset function.
>>>>
>>>> Signed-off-by: Vijaya Kumar K <vijaya.ku...@cavium.com>
>>>
>>> This approach doesn't handle the SMP case correctly --
>>> when a CPU is reset then the CPU interface for that CPU
>>> (and only that CPU) should be reset. Your code will
>>> reset every CPU interface every time any CPU is reset.
>>
>> arm_cpu_reset is not called when particular cpu is reset?.
>> Is it called for all cpus?.
>
> It's called to reset a particular CPU (so it will be called
> once for each CPU).
>
>> OR object_child_foreach_recursive() is calling to reset cpu
>> interfaces of
>> all cpus?.
>
> It does "look through the whole graph of objects in the
> simulation and call the function on anything in the
> graph that implements the interface". I've just seen that
> your code is doing "ignore the call if the CPU that
> triggered this isn't the one we care about", though --
> I missed that the first time reading the code.
>
> Still I would prefer it if we did this with the same
> mechanism for both TCG and KVM. A generic mechanism for
> "let the CPU reset trigger reset of many other devices in the
> system" isn't widely useful because real hardware doesn't
> have that kind of action-at-a-distance behaviour.

To make direct call from arm_cpu_reset() to reset CPUIF,
I could not find a way to get GICv3CPUState from CPUARMState or
ARMCPU struct.

Any idea how to get GICv3CPUState?

In  hw/intc/arm_gicv3_cpuif.c implementation,
el_hook function is registered to fetch GICv3CPUState from CPUARMState
struct, but it is for TCG

>
> thanks
> -- PMM

Re: [Qemu-devel] [PATCH v6 4/4] hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

2016-11-28 Thread Vijay Kilari

On Mon, Nov 28, 2016 at 6:31 PM, Peter Maydell  wrote:
> On 23 November 2016 at 12:39,   wrote:
>> From: Vijaya Kumar K 
>>
>> Reset CPU interface registers of GICv3 when CPU is reset.
>> For this, object interface is used, which is called from
>> arm_cpu_reset function.
>>
>> Signed-off-by: Vijaya Kumar K 
>
> This approach doesn't handle the SMP case correctly --
> when a CPU is reset then the CPU interface for that CPU
> (and only that CPU) should be reset. Your code will
> reset every CPU interface every time any CPU is reset.

arm_cpu_reset is not called when particular cpu is reset?.
Is it called for all cpus?.
OR object_child_foreach_recursive() is calling to reset cpu interfaces of
all cpus?.

>
> I think it would be better to use the same approach that
> the arm_gicv3_cpuif.c code uses to arrange for cpu i/f
> registers to be reset, perhaps by moving the appropriate
> parts of that code into the common source file.
>
> Having the reset state depend implicitly on the kernel's
> internal state (as you have here for the ICC_CTLR_EL1
> state) is something I'm a bit unsure about -- what goes
> wrong if you don't do that?

During VM boots kvm_arm_gicv3_reset() writes all
the GIC registers with reset value. kernel does not allow writing ICC_CTLR_EL1
with zeros because it validates against hw supported values.
Similarly SRE_EL1.

>
> thanks
> -- PMM

Re: [Qemu-devel] [PATCH v6 1/4] kernel: Add definitions for GICv3 attributes

2016-11-25 Thread Vijay Kilari

On Fri, Nov 25, 2016 at 1:27 PM, Auger Eric  wrote:
> Hi Vijay,
>
> On 23/11/2016 13:39, vijay.kil...@gmail.com wrote:
>> From: Vijaya Kumar K 
>>
>> This temporary patch adds kernel API definitions. Use proper header update
>> procedure after these features are released.
>
> Did you send the complete v6 series? I only see 1/4 and 4/4 of this v6
> (https://lists.gnu.org/archive/html/qemu-devel/2016-11/threads.html#04318)?
> Did I miss something?

Strange!. Yes, I have sent complete series.

>
> Thanks
>
> Eric
>>
>> Signed-off-by: Pavel Fedin 
>> Signed-off-by: Vijaya Kumamr K 
>> ---
>>  linux-headers/asm-arm/kvm.h   | 13 +
>>  linux-headers/asm-arm64/kvm.h | 13 +
>>  2 files changed, 26 insertions(+)
>>
>> diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h
>> index 541268c..e3dd0e1 100644
>> --- a/linux-headers/asm-arm/kvm.h
>> +++ b/linux-headers/asm-arm/kvm.h
>> @@ -172,10 +172,23 @@ struct kvm_arch_memory_slot {
>>  #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS2
>>  #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT   32
>>  #define   KVM_DEV_ARM_VGIC_CPUID_MASK(0xffULL << 
>> KVM_DEV_ARM_VGIC_CPUID_SHIFT)
>> +#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
>> +#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
>> +   (0xULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
>>  #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT  0
>>  #define   KVM_DEV_ARM_VGIC_OFFSET_MASK   (0xULL << 
>> KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
>> +#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0x)
>>  #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
>>  #define KVM_DEV_ARM_VGIC_GRP_CTRL   4
>> +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
>> +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
>> +#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
>> +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
>> +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
>> +   (0x3fULL << 
>> KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
>> +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
>> +#define VGIC_LEVEL_INFO_LINE_LEVEL 0
>> +
>>  #define   KVM_DEV_ARM_VGIC_CTRL_INIT0
>>
>>  /* KVM_IRQ_LINE irq field index values */
>> diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
>> index fd5a276..6698bdd 100644
>> --- a/linux-headers/asm-arm64/kvm.h
>> +++ b/linux-headers/asm-arm64/kvm.h
>> @@ -201,10 +201,23 @@ struct kvm_arch_memory_slot {
>>  #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS2
>>  #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT   32
>>  #define   KVM_DEV_ARM_VGIC_CPUID_MASK(0xffULL << 
>> KVM_DEV_ARM_VGIC_CPUID_SHIFT)
>> +#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
>> +#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
>> +   (0xULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
>>  #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT  0
>>  #define   KVM_DEV_ARM_VGIC_OFFSET_MASK   (0xULL << 
>> KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
>> +#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0x)
>>  #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
>>  #define KVM_DEV_ARM_VGIC_GRP_CTRL4
>> +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
>> +#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
>> +#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
>> +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
>> +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
>> +   (0x3fULL << 
>> KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
>> +#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
>> +#define VGIC_LEVEL_INFO_LINE_LEVEL   0
>> +
>>  #define   KVM_DEV_ARM_VGIC_CTRL_INIT 0
>>
>>  /* Device Control API on vcpu fd */
>>

[Qemu-devel] [PATCH v6 4/4] hw/intc/arm_gicv3_kvm: Reset GICv3 cpu interface registers

2016-11-23 Thread vijay . kilari

From: Vijaya Kumar K 

Reset CPU interface registers of GICv3 when CPU is reset.
For this, object interface is used, which is called from
arm_cpu_reset function.

Signed-off-by: Vijaya Kumar K 
---
 hw/intc/arm_gicv3_kvm.c| 37 +
 include/hw/arm/linux-boot-if.h | 28 
 target-arm/cpu.c   | 31 +++
 3 files changed, 96 insertions(+)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 77af32d..267c2d6 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -29,6 +29,7 @@
 #include "gicv3_internal.h"
 #include "vgic_common.h"
 #include "migration/migration.h"
+#include "hw/arm/linux-boot-if.h"
 
 #ifdef DEBUG_GICV3_KVM
 #define DPRINTF(fmt, ...) \
@@ -604,6 +605,36 @@ static void kvm_arm_gicv3_get(GICv3State *s)
 }
 }
 
+static void  arm_gicv3_reset_cpuif(ARMDeviceResetIf *obj,
+  unsigned int cpu_num)
+{
+GICv3CPUState *c;
+GICv3State *s = ARM_GICV3_COMMON(obj);
+
+if (!s && !s->cpu) {
+return;
+}
+
+c = >cpu[cpu_num];
+if (!c) {
+return;
+}
+
+/* Initialize to actual HW supported configuration */
+kvm_gicc_access(s, ICC_CTLR_EL1, cpu_num,
+>icc_ctlr_el1[GICV3_NS], false);
+
+c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
+c->icc_pmr_el1 = 0;
+c->icc_bpr[GICV3_G0] = GIC_MIN_BPR;
+c->icc_bpr[GICV3_G1] = GIC_MIN_BPR;
+c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR;
+
+c->icc_sre_el1 = 0x7;
+memset(c->icc_apr, 0, sizeof(c->icc_apr));
+memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
+}
+
 static void kvm_arm_gicv3_reset(DeviceState *dev)
 {
 GICv3State *s = ARM_GICV3_COMMON(dev);
@@ -688,6 +719,7 @@ static void kvm_arm_gicv3_class_init(ObjectClass *klass, 
void *data)
 DeviceClass *dc = DEVICE_CLASS(klass);
 ARMGICv3CommonClass *agcc = ARM_GICV3_COMMON_CLASS(klass);
 KVMARMGICv3Class *kgc = KVM_ARM_GICV3_CLASS(klass);
+ARMDeviceResetIfClass *adrifc = ARM_DEVICE_RESET_IF_CLASS(klass);
 
 agcc->pre_save = kvm_arm_gicv3_get;
 agcc->post_load = kvm_arm_gicv3_put;
@@ -695,6 +727,7 @@ static void kvm_arm_gicv3_class_init(ObjectClass *klass, 
void *data)
 kgc->parent_reset = dc->reset;
 dc->realize = kvm_arm_gicv3_realize;
 dc->reset = kvm_arm_gicv3_reset;
+adrifc->arm_device_reset = arm_gicv3_reset_cpuif;
 }
 
 static const TypeInfo kvm_arm_gicv3_info = {
@@ -703,6 +736,10 @@ static const TypeInfo kvm_arm_gicv3_info = {
 .instance_size = sizeof(GICv3State),
 .class_init = kvm_arm_gicv3_class_init,
 .class_size = sizeof(KVMARMGICv3Class),
+.interfaces = (InterfaceInfo []) {
+{ TYPE_ARM_DEVICE_RESET_IF },
+{ },
+},
 };
 
 static void kvm_arm_gicv3_register_types(void)
diff --git a/include/hw/arm/linux-boot-if.h b/include/hw/arm/linux-boot-if.h
index aba4479..4a8affd 100644
--- a/include/hw/arm/linux-boot-if.h
+++ b/include/hw/arm/linux-boot-if.h
@@ -40,4 +40,32 @@ typedef struct ARMLinuxBootIfClass {
 void (*arm_linux_init)(ARMLinuxBootIf *obj, bool secure_boot);
 } ARMLinuxBootIfClass;
 
+#define TYPE_ARM_DEVICE_RESET_IF "arm-device-reset-if"
+#define ARM_DEVICE_RESET_IF_CLASS(klass) \
+OBJECT_CLASS_CHECK(ARMDeviceResetIfClass, (klass), 
TYPE_ARM_DEVICE_RESET_IF)
+#define ARM_DEVICE_RESET_IF_GET_CLASS(obj) \
+OBJECT_GET_CLASS(ARMDeviceResetIfClass, (obj), TYPE_ARM_DEVICE_RESET_IF)
+#define ARM_DEVICE_RESET_IF(obj) \
+INTERFACE_CHECK(ARMDeviceResetIf, (obj), TYPE_ARM_DEVICE_RESET_IF)
+
+typedef struct ARMDeviceResetIf {
+/*< private >*/
+Object parent_obj;
+} ARMDeviceResetIf;
+
+typedef struct ARMDeviceResetIfClass {
+/*< private >*/
+InterfaceClass parent_class;
+
+/*< public >*/
+/** arm_device_reset: Reset the device when cpu is reset is
+ * called. Some device registers like GICv3 cpu interface registers
+ * required to be reset when CPU is reset instead of GICv3 device
+ * reset. This callback is called when arm_cpu_reset is called.
+ *
+ * @obj: the object implementing this interface
+ * @cpu_num: CPU number being reset
+ */
+void (*arm_device_reset)(ARMDeviceResetIf *obj, unsigned int cpu_num);
+} ARMDeviceResetIfClass;
 #endif
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 99f0dbe..44806be 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -30,6 +30,7 @@
 #include "hw/loader.h"
 #endif
 #include "hw/arm/arm.h"
+#include "hw/arm/linux-boot-if.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
@@ -113,6 +114,21 @@ static void cp_reg_check_reset(gpointer key, gpointer 
value,  gpointer opaque)
 assert(oldvalue == newvalue);
 }
 
+static int do_arm_device_reset(Object *obj, void *opaque)
+{
+if (object_dynamic_cast(obj, TYPE_ARM_DEVICE_RESET_IF)) {
+ARMDeviceResetIf *adrif =

[Qemu-devel] [PATCH v6 1/4] kernel: Add definitions for GICv3 attributes

2016-11-23 Thread vijay . kilari

From: Vijaya Kumar K 

This temporary patch adds kernel API definitions. Use proper header update
procedure after these features are released.

Signed-off-by: Pavel Fedin 
Signed-off-by: Vijaya Kumamr K 
---
 linux-headers/asm-arm/kvm.h   | 13 +
 linux-headers/asm-arm64/kvm.h | 13 +
 2 files changed, 26 insertions(+)

diff --git a/linux-headers/asm-arm/kvm.h b/linux-headers/asm-arm/kvm.h
index 541268c..e3dd0e1 100644
--- a/linux-headers/asm-arm/kvm.h
+++ b/linux-headers/asm-arm/kvm.h
@@ -172,10 +172,23 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS  2
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xffULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+   (0xULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0x)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL   4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+   (0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
+
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT0
 
 /* KVM_IRQ_LINE irq field index values */
diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index fd5a276..6698bdd 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -201,10 +201,23 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS  2
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xffULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+   (0xULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0x)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL  4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+   (0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
+
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT   0
 
 /* Device Control API on vcpu fd */
-- 
1.9.1

Re: [Qemu-devel] [RFC PATCH v5 2/2] hw/intc/arm_gicv3_kvm: Implement get/put functions

2016-11-09 Thread Vijay Kilari

On Fri, Oct 7, 2016 at 9:00 PM, Peter Maydell  wrote:
> On 20 September 2016 at 07:55,   wrote:
>> From: Vijaya Kumar K 
>>
>> This actually implements pre_save and post_load methods for in-kernel
>> vGICv3.
>>
>> Signed-off-by: Pavel Fedin 
>> Signed-off-by: Peter Maydell 
>> Signed-off-by: Vijaya Kumamr K 
>> [PMM:
>>  * use decimal, not 0bnnn
>>  * fixed typo in names of ICC_APR0R_EL1 and ICC_AP1R_EL1
>>  * completely rearranged the get and put functions to read and write
>>the state in a natural order, rather than mixing distributor and
>>redistributor state together]
>> [Vijay:
>>  * Update macro KVM_VGIC_ATTR
>>  * Use 32 bit access for gicd and gicr
>>  * GICD_IROUTER, GICD_TYPER, GICR_PROPBASER and GICR_PENDBASER reg
>>access  are changed from 64-bit to 32-bit access
>>  * s->edge_trigger stores only even bits value of an irq config.
>>Update translate_edge_trigger() accordingly.
>>  * Add ICC_SRE_EL1 save and restore
>>  * Initialized ICC registers during reset
>
> These sorts of [] changes should go above the sign-off
> of the person who did them, to indicate where in the
> chain they happened. Also, yours is missing the closing ].
>
>> ---
>> ---
>
>> +/* Translate from the in-kernel field for an IRQ value to/from the qemu
>> + * representation. Note that these are only expected to be used for
>> + * SPIs (that is, for interrupts whose state is in the distributor
>> + * rather than the redistributor).
>> + */
>> +typedef void (*vgic_translate_fn)(GICv3State *s, int irq,
>> +  uint32_t *field, bool to_kernel);
>> +
>> +static void translate_edge_trigger(GICv3State *s, int irq,
>> +uint32_t *field, bool to_kernel)
>> +{
>> +/*
>> + * s->edge_trigger stores only even bits value of an irq config.
>> + * Consider only even bits and translate accordingly.
>> + */
>> +if (to_kernel) {
>> +*field = gicv3_gicd_edge_trigger_test(s, irq);
>> +*field = (*field << 1) & 3;
>> +} else {
>> +*field = (*field >> 1) & 1;
>> +gicv3_gicd_edge_trigger_replace(s, irq, *field);
>> +}
>> +}
>
> I would prefer that we just open-coded a for-loop for these,
> as then you can use half_shuffle32 and half_unshuffle32 to
> deal with the bits 32 at a time.

You mean to completely drop this translate_fn which is called from
kvm_dist_put/get() and have a direct function to handle edge_trigger?

>
>> +
>> +static void translate_priority(GICv3State *s, int irq,
>> +   uint32_t *field, bool to_kernel)
>> +{
>> +if (to_kernel) {
>> +*field = s->gicd_ipriority[irq];
>> +} else {
>> +s->gicd_ipriority[irq] = *field;
>> +}
>> +}
>
> Similarly, this would be better with open-coded for loops.
> Then we can dump the translate_fn machinery entirely.
>
>> +
>> +static void kvm_arm_gicv3_reset_reg(GICv3State *s)
>> +{
>> +int ncpu;
>> +
>> +for (ncpu = 0; ncpu < s->num_cpu; ncpu++) {
>> +GICv3CPUState *c = >cpu[ncpu];
>> +
>> +/* Initialize to actual HW supported configuration */
>> +kvm_gicc_access(s, ICC_CTLR_EL1, ncpu,
>> +>icc_ctlr_el1[GICV3_NS], false);
>> +
>> +c->icc_ctlr_el1[GICV3_S] = c->icc_ctlr_el1[GICV3_NS];
>> +c->icc_pmr_el1 = 0;
>> +c->icc_bpr[GICV3_G0] = GIC_MIN_BPR;
>> +c->icc_bpr[GICV3_G1] = GIC_MIN_BPR;
>> +c->icc_bpr[GICV3_G1NS] = GIC_MIN_BPR;
>> +
>> +c->icc_sre_el1 = 0x7;
>> +memset(c->icc_apr, 0, sizeof(c->icc_apr));
>> +memset(c->icc_igrpen, 0, sizeof(c->icc_igrpen));
>> +}
>
> This shouldn't be in this patch. If we need to fix reset we
> should do it as a separate patch.
>
> Also this isn't the right place, really, because the CPU interface
> registers need to be reset when the CPU is reset, not when
> the GIC device is reset.

To make GIC cpuif registers to reset upon cpu reset,
I propose to add Interface for gicv3_common class and
call this interface from arm_cpu_reset() similar to
ARMLinuxBootIf. This will be more generic way rather
than searching for gicv3 object and reset the registers

>
>>  }
>
>>  static void kvm_arm_gicv3_class_init(ObjectClass *klass, void *data)
>> diff --git a/include/hw/intc/arm_gicv3_common.h 
>> b/include/hw/intc/arm_gicv3_common.h
>> index 341a311..183c7f8 100644
>> --- a/include/hw/intc/arm_gicv3_common.h
>> +++ b/include/hw/intc/arm_gicv3_common.h
>> @@ -166,6 +166,7 @@ struct GICv3CPUState {
>>  uint8_t gicr_ipriorityr[GIC_INTERNAL];
>>
>>  /* CPU interface */
>> +uint64_t icc_sre_el1;
>
> Where has this come from? If we need to add a new field then it

This was part of review comment from Christoffer to add icc_sre_el1
save and restore

> needs to be in a different patch (and we need to make sure we
OK. I will spin a new patch

> add it

Re: [Qemu-devel] [PATCH v4 2/3] utils: Add helper to read arm MIDR_EL1 register

2016-11-04 Thread Vijay Kilari

Hi Peter

On Fri, Oct 28, 2016 at 3:39 PM, Vijay Kilari <vijay.kil...@gmail.com> wrote:
> On Fri, Oct 28, 2016 at 2:33 PM, Peter Maydell <peter.mayd...@linaro.org> 
> wrote:
>> On 28 October 2016 at 08:00, Vijay Kilari <vijay.kil...@gmail.com> wrote:
>>> On Thu, Oct 27, 2016 at 9:33 PM, Peter Maydell <peter.mayd...@linaro.org> 
>>> wrote:
>>>> On 25 October 2016 at 13:12,  <vijay.kil...@gmail.com> wrote:
>>>>> From: Vijaya Kumar K <vijaya.ku...@cavium.com>
>>>>>
>>>>> Add helper API to read MIDR_EL1 registers to fetch
>>>>> cpu identification information. This helps in
>>>>> adding errata's and architecture specific features.
>>>>>
>>>>> This is implemented only for arm architecture.
>>>>>
>>>>> Signed-off-by: Vijaya Kumar K <vijaya.ku...@cavium.com>
>>
>>>>> diff --git a/util/Makefile.objs b/util/Makefile.objs
>>>>> index 36c7dcc..d14a455 100644
>>>>> --- a/util/Makefile.objs
>>>>> +++ b/util/Makefile.objs
>>>>> @@ -37,3 +37,4 @@ util-obj-y += log.o
>>>>>  util-obj-y += qdist.o
>>>>>  util-obj-y += qht.o
>>>>>  util-obj-y += range.o
>>>>> +util-obj-y += aarch64-cpuid.o
>>
>>>>
>>>>> +#include "qemu/cutils.h"
>>>>> +#include "qemu/aarch64-cpuid.h"
>>>>> +
>>>>> +#if defined(__aarch64__)
>>>>> +static uint64_t qemu_read_aarch64_midr_el1(void)
>>>>> +{
>>>>> +#ifdef CONFIG_LINUX
>>>>
>>>> When will CONFIG_LINUX not be defined but __aarch64__ is?
>>>   The contents of this file is compiled only for aarch64
>>
>> Your makefile change compiles it for everything.
>>
>>> and hence
>>> all the contents are under this __aarch64__.
>>> Also the code is only for linux, have added CONFIG_LINUX.
>>
>> ...and you haven't answered the question: in what
>> circumstances could __aarch64__ be defined but
>> CONFIG_LINUX is not, ie why is there any point in
>> checking both defines?
>
> Ok. You mean __aarch64__ and __linux__ both are defined by gcc.
> we can rely on __aarch64__ define  here?.
>
> AFAIK, the caller of this function bufferiszero.c is compiled
> for everything, I case of bufferiszero.c is compiled for other than
> linux for aarch64, compilation might fail. In such case, the header file
> needs to have dummy/empty functions.

What do you suggest for this?.

Regards
Vijay

Re: [Qemu-devel] [PATCH v4 2/3] utils: Add helper to read arm MIDR_EL1 register

2016-10-28 Thread Vijay Kilari

On Fri, Oct 28, 2016 at 2:33 PM, Peter Maydell <peter.mayd...@linaro.org> wrote:
> On 28 October 2016 at 08:00, Vijay Kilari <vijay.kil...@gmail.com> wrote:
>> On Thu, Oct 27, 2016 at 9:33 PM, Peter Maydell <peter.mayd...@linaro.org> 
>> wrote:
>>> On 25 October 2016 at 13:12,  <vijay.kil...@gmail.com> wrote:
>>>> From: Vijaya Kumar K <vijaya.ku...@cavium.com>
>>>>
>>>> Add helper API to read MIDR_EL1 registers to fetch
>>>> cpu identification information. This helps in
>>>> adding errata's and architecture specific features.
>>>>
>>>> This is implemented only for arm architecture.
>>>>
>>>> Signed-off-by: Vijaya Kumar K <vijaya.ku...@cavium.com>
>
>>>> diff --git a/util/Makefile.objs b/util/Makefile.objs
>>>> index 36c7dcc..d14a455 100644
>>>> --- a/util/Makefile.objs
>>>> +++ b/util/Makefile.objs
>>>> @@ -37,3 +37,4 @@ util-obj-y += log.o
>>>>  util-obj-y += qdist.o
>>>>  util-obj-y += qht.o
>>>>  util-obj-y += range.o
>>>> +util-obj-y += aarch64-cpuid.o
>
>>>
>>>> +#include "qemu/cutils.h"
>>>> +#include "qemu/aarch64-cpuid.h"
>>>> +
>>>> +#if defined(__aarch64__)
>>>> +static uint64_t qemu_read_aarch64_midr_el1(void)
>>>> +{
>>>> +#ifdef CONFIG_LINUX
>>>
>>> When will CONFIG_LINUX not be defined but __aarch64__ is?
>>   The contents of this file is compiled only for aarch64
>
> Your makefile change compiles it for everything.
>
>> and hence
>> all the contents are under this __aarch64__.
>> Also the code is only for linux, have added CONFIG_LINUX.
>
> ...and you haven't answered the question: in what
> circumstances could __aarch64__ be defined but
> CONFIG_LINUX is not, ie why is there any point in
> checking both defines?

Ok. You mean __aarch64__ and __linux__ both are defined by gcc.
we can rely on __aarch64__ define  here?.

AFAIK, the caller of this function bufferiszero.c is compiled
for everything, I case of bufferiszero.c is compiled for other than
linux for aarch64, compilation might fail. In such case, the header file
needs to have dummy/empty functions.

Re: [Qemu-devel] [PATCH v4 2/3] utils: Add helper to read arm MIDR_EL1 register

2016-10-28 Thread Vijay Kilari

On Thu, Oct 27, 2016 at 9:33 PM, Peter Maydell  wrote:
> On 25 October 2016 at 13:12,   wrote:
>> From: Vijaya Kumar K 
>>
>> Add helper API to read MIDR_EL1 registers to fetch
>> cpu identification information. This helps in
>> adding errata's and architecture specific features.
>>
>> This is implemented only for arm architecture.
>>
>> Signed-off-by: Vijaya Kumar K 
>> ---
>>  include/qemu/aarch64-cpuid.h | 29 +
>>  util/Makefile.objs   |  1 +
>>  util/aarch64-cpuid.c | 61 
>> 
>>  3 files changed, 91 insertions(+)
>>
>> diff --git a/include/qemu/aarch64-cpuid.h b/include/qemu/aarch64-cpuid.h
>> new file mode 100644
>> index 000..8f776e8
>> --- /dev/null
>> +++ b/include/qemu/aarch64-cpuid.h
>> @@ -0,0 +1,29 @@
>> +#ifndef QEMU_AARCH64_CPUID_H
>> +#define QEMU_AARCH64_CPUID_H
>> +
>> +#if defined(__aarch64__)
>> +#define MIDR_IMPLEMENTER_SHIFT  24
>> +#define MIDR_IMPLEMENTER_MASK   (0xffULL << MIDR_IMPLEMENTER_SHIFT)
>> +#define MIDR_ARCHITECTURE_SHIFT 16
>> +#define MIDR_ARCHITECTURE_MASK  (0xf << MIDR_ARCHITECTURE_SHIFT)
>> +#define MIDR_PARTNUM_SHIFT  4
>> +#define MIDR_PARTNUM_MASK   (0xfff << MIDR_PARTNUM_SHIFT)
>> +
>> +#define MIDR_CPU_PART(imp, partnum) \
>> +(((imp) << MIDR_IMPLEMENTER_SHIFT)  | \
>> +(0xf<< MIDR_ARCHITECTURE_SHIFT) | \
>> +((partnum)  << MIDR_PARTNUM_SHIFT))
>> +
>> +#define ARM_CPU_IMP_CAVIUM0x43
>> +#define CAVIUM_CPU_PART_THUNDERX  0x0A1
>> +
>> +#define MIDR_THUNDERX_PASS2  \
>> +   MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
>> +#define CPU_MODEL_MASK  (MIDR_IMPLEMENTER_MASK | MIDR_ARCHITECTURE_MASK | \
>> + MIDR_PARTNUM_MASK)
>> +
>> +uint64_t get_aarch64_cpu_id(void);
>> +bool is_thunderx_pass2_cpu(void);
>> +#endif
>> +
>> +#endif
>> diff --git a/util/Makefile.objs b/util/Makefile.objs
>> index 36c7dcc..d14a455 100644
>> --- a/util/Makefile.objs
>> +++ b/util/Makefile.objs
>> @@ -37,3 +37,4 @@ util-obj-y += log.o
>>  util-obj-y += qdist.o
>>  util-obj-y += qht.o
>>  util-obj-y += range.o
>> +util-obj-y += aarch64-cpuid.o
>> diff --git a/util/aarch64-cpuid.c b/util/aarch64-cpuid.c
>> new file mode 100644
>> index 000..536ece1
>> --- /dev/null
>> +++ b/util/aarch64-cpuid.c
>> @@ -0,0 +1,61 @@
>> +/*
>> + * Dealing with arm cpu identification information.
>> + *
>> + * Copyright (C) 2016 Cavium, Inc.
>> + *
>> + * Authors:
>> + *  Vijaya Kumar K 
>> + *
>> + * This work is licensed under the terms of the GNU LGPL, version 2.1
>> + * or later.  See the COPYING.LIB file in the top-level directory.
>> + */
>> +
>> +#include 
>> +#include "qemu/osdep.h"
>
> osdep.h must always be the first #include, before anything else.
>
> What do we need math.h for anyway?
>
>> +#include "qemu-common.h"
>
> What do we need qemu-common.h for ?
>
>> +#include "qemu/cutils.h"
>> +#include "qemu/aarch64-cpuid.h"
>> +
>> +#if defined(__aarch64__)
>> +static uint64_t qemu_read_aarch64_midr_el1(void)
>> +{
>> +#ifdef CONFIG_LINUX
>
> When will CONFIG_LINUX not be defined but __aarch64__ is?
  The contents of this file is compiled only for aarch64 and hence
all the contents are under this __aarch64__.
Also the code is only for linux, have added CONFIG_LINUX.

Re: [Qemu-devel] [PATCH v3 2/3] utils: Add helper to read arm MIDR_EL1 register

2016-10-24 Thread Vijay Kilari

On Mon, Oct 24, 2016 at 3:09 PM, Dr. David Alan Gilbert
 wrote:
> * vijay.kil...@gmail.com (vijay.kil...@gmail.com) wrote:
>> From: Vijaya Kumar K 
>>
>> Add helper API to read MIDR_EL1 registers to fetch
>> cpu identification information. This helps in
>> adding errata's and architecture specific features.
>>
>> This is implemented only for arm architecture.
>>
>> Signed-off-by: Vijaya Kumar K 
>> ---
>>  include/qemu/aarch64-cpuid.h |  9 +
>>  util/Makefile.objs   |  1 +
>>  util/aarch64-cpuid.c | 87 
>> 
>
> It feels like there should be somewhere else to put this very ARM specific 
> thing
> that in util/ - not sure where though.

  IRC, I tried it. But libutil is built before arch code compilation.
So cannot put
outside of util folder

[Qemu-devel] [PATCH v3 3/3] utils: Add prefetch for Thunderx platform

2016-10-24 Thread vijay . kilari

From: Vijaya Kumar K 

Thunderx pass2 chip requires explicit prefetch
instruction to give prefetch hint.

To speed up live migration on Thunderx platform,
prefetch instruction is added in zero buffer check
function.The below results show live migration time improvement
with prefetch instruction. VM with 4 VCPUs, 8GB RAM is migrated.

Without prefetch total migration time is ~13 seconds
adding prefetch total migration time is 9.5 seconds

Code for decoding cache size is taken from Richard's
patch

Signed-off-by: Vijaya Kumar K 
---
 util/bufferiszero.c | 37 -
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 421d945..f50b8df 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -25,6 +25,10 @@
 #include "qemu-common.h"
 #include "qemu/cutils.h"
 #include "qemu/bswap.h"
+#include 
+
+static uint32_t cache_line_factor = 1;
+static uint32_t prefetch_line_dist = 1;
 
 static bool
 buffer_zero_int(const void *buf, size_t len)
@@ -49,7 +53,8 @@ buffer_zero_int(const void *buf, size_t len)
 const uint64_t *e = (uint64_t *)(((uintptr_t)buf + len) & -8);
 
 for (; p + 8 <= e; p += 8) {
-__builtin_prefetch(p + 8, 0, 0);
+__builtin_prefetch(p +
+   (8 * cache_line_factor * prefetch_line_dist), 0, 0);
 if (t) {
 return false;
 }
@@ -293,6 +298,30 @@ bool test_buffer_is_zero_next_accel(void)
 }
 #endif
 
+#if defined(__aarch64__)
+#include "qemu/aarch64-cpuid.h"
+
+static void __attribute__((constructor)) aarch64_init_cache_size(void)
+{
+uint64_t t;
+
+/* Use the DZP block size as a proxy for the cacheline size,
+   since the later is not available to userspace.  This seems
+   to work in practice for existing implementations.  */
+asm("mrs %0, dczid_el0" : "=r"(t));
+if (pow(2, (t & 0xf)) * 4 >= 128) {
+cache_line_factor = 2;
+} else {
+cache_line_factor = 1;
+}
+
+get_aarch64_cpu_id();
+if (is_thunderx_pass2_cpu()) {
+prefetch_line_dist = 3;
+}
+}
+#endif
+
 /*
  * Checks if a buffer is all zeroes
  */
@@ -305,6 +334,12 @@ bool buffer_is_zero(const void *buf, size_t len)
 /* Fetch the beginning of the buffer while we select the accelerator.  */
 __builtin_prefetch(buf, 0, 0);
 
+#if defined(__aarch64__)
+if (is_thunderx_pass2_cpu()) {
+__builtin_prefetch(buf + 16, 0, 0);
+__builtin_prefetch(buf + 32, 0, 0);
+}
+#endif
 /* Use an optimized zero check if possible.  Note that this also
includes a check for an unrolled loop over 64-bit integers.  */
 return select_accel_fn(buf, len);
-- 
1.9.1

[Qemu-devel] [PATCH v3 0/3] Live migration optimization for Thunderx platform

2016-10-23 Thread vijay . kilari

From: Vijaya Kumar K 

The CPU MIDR_EL1 register is exposed to userspace for arm64
with the below patch.
https://lkml.org/lkml/2016/7/8/467

Thunderx platform requires explicit prefetch instruction to
provide prefetch hint. Using MIDR_EL1 information, provided
by above kernel patch, prefetch is executed if the platform
is Thunderx.

The results of live migration time improvement is provided
in commit message of patch 2.

Note: Check for size of while prefetching beyond page is
not added. Making this check is counter productive on
performance of live migration.

v2 => v3:
   - Rebased on top of richard's patches.
   - Consider cache line size and line number to prefetch
   - Passed optional parameters to __builtin_prefetch
v1 => v2:
   - Rename util/cpuinfo.c as util/aarch64-cpuid.c
   - Introduced header file include/qemu/aarch64-cpuid.h
   - Place all arch specific code under define __aarch64__ and
 CONFIG_LINUX.
   - Used builtin_prefetch() to add prefetch instruction.
   - Moved arch specific changes out of generic code
   - Dropped prefetching 5th cache line.

Vijaya Kumar K (3):
  cutils: Set __builtin_prefetch optional parameters
  utils: Add helper to read arm MIDR_EL1 register
  utils: Add prefetch for Thunderx platform

 include/qemu/aarch64-cpuid.h |  9 +
 util/Makefile.objs   |  1 +
 util/aarch64-cpuid.c | 87 
 util/bufferiszero.c  | 45 ---
 4 files changed, 137 insertions(+), 5 deletions(-)
 create mode 100644 include/qemu/aarch64-cpuid.h
 create mode 100644 util/aarch64-cpuid.c

-- 
1.9.1

[Qemu-devel] [PATCH v3 2/3] utils: Add helper to read arm MIDR_EL1 register

2016-10-23 Thread vijay . kilari

From: Vijaya Kumar K 

Add helper API to read MIDR_EL1 registers to fetch
cpu identification information. This helps in
adding errata's and architecture specific features.

This is implemented only for arm architecture.

Signed-off-by: Vijaya Kumar K 
---
 include/qemu/aarch64-cpuid.h |  9 +
 util/Makefile.objs   |  1 +
 util/aarch64-cpuid.c | 87 
 3 files changed, 97 insertions(+)

diff --git a/include/qemu/aarch64-cpuid.h b/include/qemu/aarch64-cpuid.h
new file mode 100644
index 000..dbcb5ff
--- /dev/null
+++ b/include/qemu/aarch64-cpuid.h
@@ -0,0 +1,9 @@
+#ifndef QEMU_AARCH64_CPUID_H
+#define QEMU_AARCH64_CPUID_H
+
+#if defined(__aarch64__)
+uint64_t get_aarch64_cpu_id(void);
+bool is_thunderx_pass2_cpu(void);
+#endif
+
+#endif
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 36c7dcc..d14a455 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -37,3 +37,4 @@ util-obj-y += log.o
 util-obj-y += qdist.o
 util-obj-y += qht.o
 util-obj-y += range.o
+util-obj-y += aarch64-cpuid.o
diff --git a/util/aarch64-cpuid.c b/util/aarch64-cpuid.c
new file mode 100644
index 000..a6352ad
--- /dev/null
+++ b/util/aarch64-cpuid.c
@@ -0,0 +1,87 @@
+/*
+ * Dealing with arm cpu identification information.
+ *
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * Authors:
+ *  Vijaya Kumar K 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#include 
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/cutils.h"
+#include "qemu/aarch64-cpuid.h"
+
+#if defined(__aarch64__)
+#define MIDR_IMPLEMENTER_SHIFT  24
+#define MIDR_IMPLEMENTER_MASK   (0xffULL << MIDR_IMPLEMENTER_SHIFT)
+#define MIDR_ARCHITECTURE_SHIFT 16
+#define MIDR_ARCHITECTURE_MASK  (0xf << MIDR_ARCHITECTURE_SHIFT)
+#define MIDR_PARTNUM_SHIFT  4
+#define MIDR_PARTNUM_MASK   (0xfff << MIDR_PARTNUM_SHIFT)
+
+#define MIDR_CPU_PART(imp, partnum) \
+(((imp) << MIDR_IMPLEMENTER_SHIFT)  | \
+(0xf<< MIDR_ARCHITECTURE_SHIFT) | \
+((partnum)  << MIDR_PARTNUM_SHIFT))
+
+#define ARM_CPU_IMP_CAVIUM0x43
+#define CAVIUM_CPU_PART_THUNDERX  0x0A1
+
+#define MIDR_THUNDERX_PASS2  \
+   MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+#define CPU_MODEL_MASK  (MIDR_IMPLEMENTER_MASK | MIDR_ARCHITECTURE_MASK | \
+ MIDR_PARTNUM_MASK)
+
+static uint64_t qemu_read_aarch64_midr_el1(void)
+{
+#ifdef CONFIG_LINUX
+const char *file = 
"/sys/devices/system/cpu/cpu0/regs/identification/midr_el1";
+char *buf;
+uint64_t midr = 0;
+
+#define BUF_SIZE 32
+buf = g_malloc0(BUF_SIZE);
+if (!buf) {
+return 0;
+}
+
+if (!g_file_get_contents(file, , 0, NULL)) {
+goto out;
+}
+
+if (qemu_strtoull(buf, NULL, 0, ) < 0) {
+goto out;
+}
+
+out:
+g_free(buf);
+
+return midr;
+#else
+return 0;
+#endif
+}
+
+static uint64_t aarch64_midr_val;
+uint64_t get_aarch64_cpu_id(void)
+{
+#ifdef CONFIG_LINUX
+aarch64_midr_val = qemu_read_aarch64_midr_el1();
+aarch64_midr_val &= CPU_MODEL_MASK;
+
+return aarch64_midr_val;
+#else
+return 0;
+#endif
+}
+
+bool is_thunderx_pass2_cpu(void)
+{
+return aarch64_midr_val == MIDR_THUNDERX_PASS2;
+}
+#endif
-- 
1.9.1

[Qemu-devel] [PATCH v3 1/3] cutils: Set __builtin_prefetch optional parameters

2016-10-23 Thread vijay . kilari

From: Vijaya Kumar K 

Optional parameters of __builtin_prefetch() which specifies
rw and locality to 0's. For checking buffer is zero, set rw as read
and temporal locality to 0.

On arm64, __builtin_prefetch(addr) generates 'prfmpldl1keep'
where __builtin_prefetch(addr, 0, 0) generates 'prfm pldl1strm'
instruction which is optimal for this use case

Signed-off-by: Vijaya Kumar K 
---
 util/bufferiszero.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index eb974b7..421d945 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -49,7 +49,7 @@ buffer_zero_int(const void *buf, size_t len)
 const uint64_t *e = (uint64_t *)(((uintptr_t)buf + len) & -8);
 
 for (; p + 8 <= e; p += 8) {
-__builtin_prefetch(p + 8);
+__builtin_prefetch(p + 8, 0, 0);
 if (t) {
 return false;
 }
@@ -86,7 +86,7 @@ buffer_zero_sse2(const void *buf, size_t len)
 
 /* Loop over 16-byte aligned blocks of 64.  */
 while (likely(p <= e)) {
-__builtin_prefetch(p);
+__builtin_prefetch(p, 0, 0);
 t = _mm_cmpeq_epi8(t, zero);
 if (unlikely(_mm_movemask_epi8(t) != 0x)) {
 return false;
@@ -127,7 +127,7 @@ buffer_zero_sse4(const void *buf, size_t len)
 
 /* Loop over 16-byte aligned blocks of 64.  */
 while (likely(p <= e)) {
-__builtin_prefetch(p);
+__builtin_prefetch(p, 0, 0);
 if (unlikely(!_mm_testz_si128(t, t))) {
 return false;
 }
@@ -162,7 +162,7 @@ buffer_zero_avx2(const void *buf, size_t len)
 if (likely(p <= e)) {
 /* Loop over 32-byte aligned blocks of 128.  */
 do {
-__builtin_prefetch(p);
+__builtin_prefetch(p, 0, 0);
 if (unlikely(!_mm256_testz_si256(t, t))) {
 return false;
 }
@@ -303,7 +303,7 @@ bool buffer_is_zero(const void *buf, size_t len)
 }
 
 /* Fetch the beginning of the buffer while we select the accelerator.  */
-__builtin_prefetch(buf);
+__builtin_prefetch(buf, 0, 0);
 
 /* Use an optimized zero check if possible.  Note that this also
includes a check for an unrolled loop over 64-bit integers.  */
-- 
1.9.1

Re: [Qemu-devel] Fw: [Qemu-arm] [PATCH v2 0/6] Runtime pagesize computation

2016-10-07 Thread Vijay Kilari

Hi Peter,

On Fri, Oct 7, 2016 at 7:50 PM, Peter Maydell <peter.mayd...@linaro.org> wrote:
> On 19 July 2016 at 12:04, Peter Maydell <peter.mayd...@linaro.org> wrote:
>> On 19 July 2016 at 12:01, Vijay Kilari <vijay.kil...@gmail.com> wrote:
>>> Hi Peter,
>>>
>>>   Any update on this patch set. Is it merged?
>>
>> No. It's an RFC patchset really, and it was posted during softfreeze,
>> so I never expected or intended it to be merged before the 2.7
>> release. It also needs a respin to address some of the review comments
>> I got on it.
>>
>> What would be useful in getting it further forward would be
>> some good performance benchmarking of more than just migration.
>> In particular whether it improves the speed in TCG emulation
>> mode of aarch64 guests and whether that assert in the definition
>> of TARGET_PAGE_BITS is particularly performance-draining.
>
> If I do a respin of this patchset is anybody willing to do
> the actual perf benchmarking of the TCG emulation?
> Otherwise this is unlikely to go into 2.8.

 I can help you. Let me know details on benchmarkings to be done.
Also, a reference/details about running arm64 guests in TCG mode
would be helpful.

Regards
Vijay

[Qemu-devel] [RFC PATCH v4 1/2] kernel: Add definitions for GICv3 attributes

2016-09-12 Thread vijay . kilari

From: Vijaya Kumar K 

This temporary patch adds kernel API definitions. Use proper header update
procedure after these features are released.

Signed-off-by: Pavel Fedin 
Signed-off-by: Vijaya Kumamr K 
---
 linux-headers/asm-arm64/kvm.h | 13 +
 1 file changed, 13 insertions(+)

diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index 7d82d1f..3dc0860 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -199,10 +199,23 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS  2
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xffULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define   KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+   (0xULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0x)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL  4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+   (0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
+
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT   0
 
 /* Device Control API on vcpu fd */
-- 
1.9.1

[Qemu-devel] [RFC PATCH v4 2/2] hw/intc/arm_gicv3_kvm: Implement get/put functions

2016-09-12 Thread vijay . kilari

From: Vijaya Kumar K 

This actually implements pre_save and post_load methods for in-kernel
vGICv3.

Signed-off-by: Pavel Fedin 
Signed-off-by: Peter Maydell 
Signed-off-by: Vijaya Kumamr K 
[PMM:
 * use decimal, not 0bnnn
 * fixed typo in names of ICC_APR0R_EL1 and ICC_AP1R_EL1
 * completely rearranged the get and put functions to read and write
   the state in a natural order, rather than mixing distributor and
   redistributor state together]
[Vijay:
 * Update macro KVM_VGIC_ATTR
 * Use 32 bit access for gicd and gicr
 * GICD_IROUTER, GICD_TYPER, GICR_PROPBASER and GICR_PENDBASER reg
   access  are changed from 64-bit to 32-bit access
 * s->edge_trigger stores only even bits value of an irq config.
   Update translate_edge_trigger() accordingly.
 * Add ICC_SRE_EL1 save and restore
---
---
 hw/intc/arm_gicv3_kvm.c| 530 -
 include/hw/intc/arm_gicv3_common.h |   1 +
 2 files changed, 521 insertions(+), 10 deletions(-)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 711fde3..af3fedf 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -23,8 +23,11 @@
 #include "qapi/error.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "hw/sysbus.h"
+#include "migration/migration.h"
+#include "qemu/error-report.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
+#include "gicv3_internal.h"
 #include "vgic_common.h"
 #include "migration/migration.h"
 
@@ -44,6 +47,32 @@
 #define KVM_ARM_GICV3_GET_CLASS(obj) \
  OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3)
 
+#define   KVM_DEV_ARM_VGIC_SYSREG(op0, op1, crn, crm, op2) \
+ (ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+  ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+  ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+  ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+  ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ICC_PMR_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 4, 6, 0)
+#define ICC_BPR0_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 3)
+#define ICC_AP0R_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 4 | n)
+#define ICC_AP1R_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 9, n)
+#define ICC_BPR1_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 3)
+#define ICC_CTLR_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 4)
+#define ICC_SRE_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 5)
+#define ICC_IGRPEN0_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 6)
+#define ICC_IGRPEN1_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 7)
+
 typedef struct KVMARMGICv3Class {
 ARMGICv3CommonClass parent_class;
 DeviceRealize parent_realize;
@@ -57,16 +86,491 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, 
int level)
 kvm_arm_gic_set_irq(s->num_irq, irq, level);
 }
 
+#define KVM_VGIC_ATTR(reg, typer) \
+((typer & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) | (reg))
+
+static inline void kvm_gicd_access(GICv3State *s, int offset,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+  KVM_VGIC_ATTR(offset, 0),
+  val, write);
+}
+
+static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+  KVM_VGIC_ATTR(offset, s->cpu[cpu].gicr_typer),
+  val, write);
+}
+
+static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu,
+   uint64_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+  KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer),
+  val, write);
+}
+
+static inline void kvm_gic_line_level_access(GICv3State *s, int irq, int cpu,
+ uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+  KVM_VGIC_ATTR(irq, s->cpu[cpu].gicr_typer) |
+  (VGIC_LEVEL_INFO_LINE_LEVEL <<
+   KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT),
+  val, write);
+}
+
+/* Translate from the in-kernel field for an IRQ value to/from the qemu
+ * representation. Note that these are only expected to be used for
+ * SPIs (that is, for interrupts whose state is in the distributor
+ * rather than the redistributor).
+ */
+typedef void (*vgic_translate_fn)(GICv3State *s, int irq,
+  uint32_t *field, bool to_kernel);
+
+static void translate_edge_trigger(GICv3State *s, int irq,
+uint32_t *field, bool to_kernel)
+{
+/*
+ * s->edge_trigger stores only even bits

[Qemu-devel] [RFC PATCH v4 0/2] GICv3 live migration support

2016-09-12 Thread vijay . kilari

From: Vijaya Kumar K 

This series introduces support for GICv3 live migration with
new VGIC implementation in 4.7-rc3 kernel.
In this series, patch 1 of the previous implementation
are ported.
https://lists.nongnu.org/archive/html/qemu-devel/2015-10/msg05284.html

Patch 2, is based on below implementation.
http://patchwork.ozlabs.org/patch/626746/

Kernel patches which implement this functionality are:
http://www.spinics.net/lists/arm-kernel/msg519596.html

This API definition is as per version of VGICv3 specification
http://lists.infradead.org/pipermail/linux-arm-kernel/2016-July/445611.html

Patch 1 of this series will be synced with KVM patches
in next revision.

Tested Live migration of Idle VM running with 4 VCPUs and 8GB RAM.

v3 => v4:
 - Reintroduced offset GICR_SGI_OFFSET
 - Implement save and restore of ICC_SRE_EL1
 - Updated kvm.h header file in sync with KVM v4 patches

v2 => v3:
 - Dropped offset GICR_SGI_OFFSET
 - Implement save/restore of irq line level using
   KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
 - Fixed bug with save/restore of edge_trigger

Vijaya Kumar K (2):
  kernel: Add definitions for GICv3 attributes
  hw/intc/arm_gicv3_kvm: Implement get/put functions

 hw/intc/arm_gicv3_kvm.c| 530 -
 include/hw/intc/arm_gicv3_common.h |   1 +
 linux-headers/asm-arm64/kvm.h  |  13 +
 3 files changed, 534 insertions(+), 10 deletions(-)

-- 
1.9.1

Re: [Qemu-devel] [PATCH 0/7] Improve buffer_is_zero

2016-08-25 Thread Vijay Kilari

On Thu, Aug 25, 2016 at 12:07 PM, Vijay Kilari <vijay.kil...@gmail.com> wrote:
> Hi Richard,
>
>   Migration fails on arm64 with these patches.
> On the destination VM, follow errors are appearing.
>
> qemu-system-aarch64: VQ 0 size 0x400 Guest index 0x0 inconsistent with
> Host index 0x1937: delta 0xe6c9
> qemu-system-aarch64: error while loading state for instance 0x0 of
> device 'virtio-mmio@0a003e00/virtio-net'
> qemu-system-aarch64: load of migration failed: Operation not permitted
> qemu-system-aarch64: network script /etc/qemu-ifdown failed with status 256

With below changes, migration is working fine on arm64.

diff --git a/util/cutils.c b/util/cutils.c
index 30fac02..9bbf31f 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -170,6 +170,7 @@ static bool __attribute__((noinline))
 \
 NAME(const void *buf, size_t len)   \
 {   \
 const void *end = buf + len;\
+const VECTYPE zero = (VECTYPE){0};  \
 do {\
 const VECTYPE *p = buf; \
 VECTYPE t;  \
@@ -185,7 +186,7 @@ NAME(const void *buf, size_t len)
 \
 } else {\
 link_error();   \
 }   \
-if (unlikely(!ZERO(t))) {   \
+if (unlikely(!ZERO(t, zero))) { \
 return false;   \
 }   \
 buf += SIZE;\
@@ -227,7 +228,7 @@ buffer_zero_base(const void *buf, size_t len)
 return true;
 }
-#define IDENT_ZERO(X)  (X)
+#define IDENT_ZERO(X1, X2)  (X1 == X2)
 ACCEL_BUFFER_ZERO(buffer_zero_int, 4*sizeof(long), long, IDENT_ZERO)

 static bool select_accel_int(const void *buf, size_t len)
@@ -511,7 +512,9 @@ static bool select_accel_fn(const void *buf, size_t len)
 #elif defined(__aarch64__)
 #include "arm_neon.h"

-#define DO_ZERO(X)  (vgetq_lane_u64((X), 0) | vgetq_lane_u64((X), 1))
+#define DO_ZERO(X1, X2) \
+((vgetq_lane_u64(X1, 0) == vgetq_lane_u64(X2, 0)) && \
+ (vgetq_lane_u64(X1, 1) == vgetq_lane_u64(X2, 1)))
 ACCEL_BUFFER_ZERO(buffer_zero_neon_64, 64, uint64x2_t, DO_ZERO)
 ACCEL_BUFFER_ZERO(buffer_zero_neon_128, 128, uint64x2_t, DO_ZERO)

@@ -526,7 +529,7 @@ static void __attribute__((constructor))
init_buffer_zero_accel(void)
since the later is not available to userspace.  This seems
to work in practice for existing implementations.  */
 asm("mrs %0, dczid_el0" : "=r"(t));
-if ((t & 15) * 16 >= 128) {
+if (pow(2, (t & 0xf)) * 4 >= 128) {
 buffer_zero_line_mask = 128 - 1;
 buffer_zero_accel = buffer_zero_neon_128;
 } else {


>
> Regards
> Vijay
>
>
> On Wed, Aug 24, 2016 at 9:47 AM, Richard Henderson <r...@twiddle.net> wrote:
>> Patches 1-3 remove the use of ifunc from the implementation.
>>
>> Patch 5 adjusts the x86 implementation a bit more to take
>> advantage of ptest (in sse4.1) and unaligned accesses (in avx1).
>>
>> Patches 2 and 6 are the result of my conversation with Vijaya
>> Kumar with respect to ThunderX.
>>
>> Patch 7 is the result of seeing some really really horrible code
>> produced for ppc64le (gcc 4.9 and mainline).
>>
>> This has had limited testing.  What I don't know is the best way
>> to benchmark this -- the only way I know to trigger this is via
>> the console, by hand, which doesn't make for reasonable timing.
>>
>>
>> r~
>>
>>
>> Richard Henderson (7):
>>   cutils: Remove SPLAT macro
>>   cutils: Export only buffer_is_zero
>>   cutils: Rearrange buffer_is_zero acceleration
>>   cutils: Add generic prefetch
>>   cutils: Rewrite x86 buffer zero checking
>>   cutils: Rewrite aarch64 buffer zero checking
>>   cutils: Rewrite ppc buffer zero checking
>>
>>  configure |  21 +-
>>  include/qemu/cutils.h |   2 -
>>  migration/ram.c   |   2 +-
>>  migration/rdma.c  |   5 +-
>>  util/cutils.c | 526 
>> +-
>>  5 files changed, 352 insertions(+), 204 deletions(-)
>>
>> --
>> 2.7.4
>>

Re: [Qemu-devel] [PATCH 0/7] Improve buffer_is_zero

2016-08-25 Thread Vijay Kilari

Hi Richard,

  Migration fails on arm64 with these patches.
On the destination VM, follow errors are appearing.

qemu-system-aarch64: VQ 0 size 0x400 Guest index 0x0 inconsistent with
Host index 0x1937: delta 0xe6c9
qemu-system-aarch64: error while loading state for instance 0x0 of
device 'virtio-mmio@0a003e00/virtio-net'
qemu-system-aarch64: load of migration failed: Operation not permitted
qemu-system-aarch64: network script /etc/qemu-ifdown failed with status 256

Regards
Vijay


On Wed, Aug 24, 2016 at 9:47 AM, Richard Henderson  wrote:
> Patches 1-3 remove the use of ifunc from the implementation.
>
> Patch 5 adjusts the x86 implementation a bit more to take
> advantage of ptest (in sse4.1) and unaligned accesses (in avx1).
>
> Patches 2 and 6 are the result of my conversation with Vijaya
> Kumar with respect to ThunderX.
>
> Patch 7 is the result of seeing some really really horrible code
> produced for ppc64le (gcc 4.9 and mainline).
>
> This has had limited testing.  What I don't know is the best way
> to benchmark this -- the only way I know to trigger this is via
> the console, by hand, which doesn't make for reasonable timing.
>
>
> r~
>
>
> Richard Henderson (7):
>   cutils: Remove SPLAT macro
>   cutils: Export only buffer_is_zero
>   cutils: Rearrange buffer_is_zero acceleration
>   cutils: Add generic prefetch
>   cutils: Rewrite x86 buffer zero checking
>   cutils: Rewrite aarch64 buffer zero checking
>   cutils: Rewrite ppc buffer zero checking
>
>  configure |  21 +-
>  include/qemu/cutils.h |   2 -
>  migration/ram.c   |   2 +-
>  migration/rdma.c  |   5 +-
>  util/cutils.c | 526 
> +-
>  5 files changed, 352 insertions(+), 204 deletions(-)
>
> --
> 2.7.4
>

[Qemu-devel] [RFC PATCH v3 2/2] hw/intc/arm_gicv3_kvm: Implement get/put functions

2016-08-24 Thread vijay . kilari

From: Vijaya Kumar K 

This actually implements pre_save and post_load methods for in-kernel
vGICv3.

Signed-off-by: Pavel Fedin 
Signed-off-by: Peter Maydell 
[PMM:
 * use decimal, not 0bnnn
 * fixed typo in names of ICC_APR0R_EL1 and ICC_AP1R_EL1
 * completely rearranged the get and put functions to read and write
   the state in a natural order, rather than mixing distributor and
   redistributor state together]
Signed-off-by: Vijaya Kumar K 
[Vijay:
 * Update macro KVM_VGIC_ATTR
 * Use 32 bit access for gicd and gicr
 * GICD_IROUTER, GICD_TYPER, GICR_PROPBASER and GICR_PENDBASER reg
   access  are changed from 64-bit to 32-bit access
 * s->edge_trigger stores only even bits value of an irq config.
   Update translate_edge_trigger() accordingly.
 * Drop GICR_SGI_OFFSET for GICR* registers address]
---
 hw/intc/arm_gicv3_kvm.c  | 518 ++-
 hw/intc/gicv3_internal.h |  29 ++-
 2 files changed, 520 insertions(+), 27 deletions(-)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 711fde3..24ba41e 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -23,8 +23,11 @@
 #include "qapi/error.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "hw/sysbus.h"
+#include "migration/migration.h"
+#include "qemu/error-report.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
+#include "gicv3_internal.h"
 #include "vgic_common.h"
 #include "migration/migration.h"
 
@@ -44,6 +47,23 @@
 #define KVM_ARM_GICV3_GET_CLASS(obj) \
  OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3)
 
+#define ICC_PMR_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 4, 6, 0)
+#define ICC_BPR0_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 3)
+#define ICC_AP0R_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 4 | n)
+#define ICC_AP1R_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 9, n)
+#define ICC_BPR1_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 3)
+#define ICC_CTLR_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 4)
+#define ICC_IGRPEN0_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 6)
+#define ICC_IGRPEN1_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 7)
+
 typedef struct KVMARMGICv3Class {
 ARMGICv3CommonClass parent_class;
 DeviceRealize parent_realize;
@@ -57,16 +77,488 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, 
int level)
 kvm_arm_gic_set_irq(s->num_irq, irq, level);
 }
 
+#define KVM_VGIC_ATTR(reg, typer) \
+((typer & KVM_DEV_ARM_VGIC_V3_CPUID_MASK) | (reg))
+
+static inline void kvm_gicd_access(GICv3State *s, int offset,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+  KVM_VGIC_ATTR(offset, 0),
+  val, write);
+}
+
+static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+  KVM_VGIC_ATTR(offset, s->cpu[cpu].gicr_typer),
+  val, write);
+}
+
+static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu,
+   uint64_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+  KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer),
+  val, write);
+}
+
+static inline void kvm_gic_line_level_access(GICv3State *s, int irq, int cpu,
+ uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+  KVM_VGIC_ATTR(irq, s->cpu[cpu].gicr_typer) |
+  (KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_VAL <<
+   KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT),
+  val, write);
+}
+
+/* Translate from the in-kernel field for an IRQ value to/from the qemu
+ * representation. Note that these are only expected to be used for
+ * SPIs (that is, for interrupts whose state is in the distributor
+ * rather than the redistributor).
+ */
+typedef void (*vgic_translate_fn)(GICv3State *s, int irq,
+  uint32_t *field, bool to_kernel);
+
+static void translate_edge_trigger(GICv3State *s, int irq,
+uint32_t *field, bool to_kernel)
+{
+/*
+ * s->edge_trigger stores only even bits value of an irq config.
+ * Consider only even bits and translate accordingly.
+ */
+if (to_kernel) {
+*field = gicv3_gicd_edge_trigger_test(s, irq);
+*field = (*field << 1) & 3;
+} else {
+*field = (*field >> 1) & 1;
+gicv3_gicd_edge_trigger_replace(s, irq, *field);
+}
+}
+
+static void translate_priority(GICv3State *s, int irq,
+   uint32_t *field, bool to_kernel)
+{
+if (to_kernel) {

[Qemu-devel] [RFC PATCH v3 1/2] kernel: Add definitions for GICv3 attributes

2016-08-24 Thread vijay . kilari

From: Vijaya Kumar K 

This temporary patch adds kernel API definitions. Use proper header update
procedure after these features are released.

Signed-off-by: Pavel Fedin 
---
 linux-headers/asm-arm64/kvm.h | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index 7d82d1f..dd6c09a 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -180,14 +180,14 @@ struct kvm_arch_memory_slot {
KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
 
 #define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \
-   (KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \
-   ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+   (ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
 
-#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
+#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_ARM64 | \
+   KVM_REG_SIZE_U64 | KVM_REG_ARM64_SYSREG)
 
 #define KVM_REG_ARM_TIMER_CTL  ARM64_SYS_REG(3, 3, 14, 3, 1)
 #define KVM_REG_ARM_TIMER_CNT  ARM64_SYS_REG(3, 3, 14, 3, 2)
@@ -199,10 +199,28 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS  2
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xffULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_V3_CPUID_MASK \
+  (0xULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_MASK (KVM_REG_ARM64_SYSREG_OP0_MASK | \
+KVM_REG_ARM64_SYSREG_OP1_MASK | \
+KVM_REG_ARM64_SYSREG_CRN_MASK | \
+KVM_REG_ARM64_SYSREG_CRM_MASK | \
+KVM_REG_ARM64_SYSREG_OP2_MASK)
+#define   KVM_DEV_ARM_VGIC_SYSREG(op0,op1,crn,crm,op2) \
+   __ARM64_SYS_REG(op0,op1,crn,crm,op2)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL  4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 9
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+   (0x7fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x1ff
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_VAL   1
+
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT   0
 
 /* Device Control API on vcpu fd */
-- 
1.9.1

[Qemu-devel] [RFC PATCH v3 0/2] GICv3 live migration support

2016-08-24 Thread vijay . kilari

From: Vijaya Kumar K 

This series introduces support for GICv3 live migration with
new VGIC implementation in 4.7-rc3 kernel.
In this series, patch 1 of the previous implementation
are ported.
https://lists.nongnu.org/archive/html/qemu-devel/2015-10/msg05284.html

Patch 2, is based on below implementation.
http://patchwork.ozlabs.org/patch/626746/

Kernel patches which implement this functionality are:
http://www.spinics.net/lists/arm-kernel/msg519596.html

This API definition is as per version of VGICv3 specification
http://lists.infradead.org/pipermail/linux-arm-kernel/2016-July/445611.html

Patch 1 of this series will be synced with KVM patches
in next revision.

Tested Live migration of Idle VM running with 4 VCPUs and 8GB RAM.

v2 => v3:
 - Dropped offset GICR_SGI_OFFSET
 - Implement save/restore of irq line level using
   KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
 - Fixed bug with save/restore of edge_trigger

Vijaya Kumar K (2):
  kernel: Add definitions for GICv3 attributes
  hw/intc/arm_gicv3_kvm: Implement get/put functions

 hw/intc/arm_gicv3_kvm.c   | 518 +-
 hw/intc/gicv3_internal.h  |  29 +--
 linux-headers/asm-arm64/kvm.h |  24 +-
 3 files changed, 541 insertions(+), 30 deletions(-)

-- 
1.9.1

Re: [Qemu-devel] [RFC PATCH v2 1/2] utils: Add helper to read arm MIDR_EL1 register

2016-08-19 Thread Vijay Kilari

On Thu, Aug 18, 2016 at 8:26 PM, Peter Maydell  wrote:
> On 18 August 2016 at 15:46, Richard Henderson  wrote:
>> On 08/18/2016 07:14 AM, Peter Maydell wrote:
>>> While we're on the subject, can somebody explain to me why we
>>> use ifuncs at all? I couldn't work out why it would be better than
>>> just using a straightforward function pointer -- when I tried single
>>> stepping through things the ifunc approach still seemed to indirect
>>> through some table or other so it wasn't actually resolving to
>>> a direct function call anyway.
>
>> No reason, I suppose.
>>
>> It's particularly helpful for libraries, where we don't really want the
>> overhead of the initialization when it's not used.
>
> Ah, I see.
>
>> But (1) we don't have many of these and (2) we really don't care *that* much
>> about startup time.
>>
>> So a simple function pointer initialized by a constructor has the same
>> effect.
>

 The cutils does not have any initialization function that can init
function/constructor pointer
for zero_check function.

Also creating separate function with most of repeated code for prefetch does
not look good. So suggest to put check for prefetch outside the for loop and
code for loop with and without prefetch

I profiled and found that a single check inside the loop is adding 100ms delay
for 8GB RAM migration. So moving check outside the loop is enough.

Ex:

   if (need_prefetch()) {

   prefetch_vector(p, 0);

for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
 i < len / sizeof(VECTYPE);
 i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {

prefetch_vector_loop(p, i);

VECTYPE tmp0 = VEC_OR(p[i + 0], p[i + 1]);
VECTYPE tmp1 = VEC_OR(p[i + 2], p[i + 3]);
VECTYPE tmp2 = VEC_OR(p[i + 4], p[i + 5]);
VECTYPE tmp3 = VEC_OR(p[i + 6], p[i + 7]);
   VECTYPE tmp01 = VEC_OR(tmp0, tmp1);
   VECTYPE tmp23 = VEC_OR(tmp2, tmp3);
if (!ALL_EQ(VEC_OR(tmp01, tmp23), zero)) {
break;
}
}

} else {

for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
 i < len / sizeof(VECTYPE);
 i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {

VECTYPE tmp0 = VEC_OR(p[i + 0], p[i + 1]);
VECTYPE tmp1 = VEC_OR(p[i + 2], p[i + 3]);
VECTYPE tmp2 = VEC_OR(p[i + 4], p[i + 5]);
VECTYPE tmp3 = VEC_OR(p[i + 6], p[i + 7]);
   VECTYPE tmp01 = VEC_OR(tmp0, tmp1);
   VECTYPE tmp23 = VEC_OR(tmp2, tmp3);
if (!ALL_EQ(VEC_OR(tmp01, tmp23), zero)) {
break;
}
}
}

Also,  If you want to make prefetch common for all arm64 platforms,
Then thunder cache line is 128 bytes so the prefetch is performed
at 128 byte index. If the platform has 64 byte cache line, then this
prefetch will fill only 64 byte line instead of 128 bytes required for the loop.

> That seems like it would be a worthwhile change since
> (a) I think it's easier to understand than ifunc magic
> (b) it means we don't unnecessarily restrict ourselves to a libc
> with ifunc support (musl libc doesn't do ifuncs, for instance)
>
> thanks
> -- PMM

Re: [Qemu-devel] [RFC PATCH v2 1/2] utils: Add helper to read arm MIDR_EL1 register

2016-08-18 Thread Vijay Kilari

On Thu, Aug 18, 2016 at 2:20 PM, Paolo Bonzini <pbonz...@redhat.com> wrote:
>
>
> On 18/08/2016 09:56, Vijay Kilari wrote:
>> The get_aarch_cpu_id() has check " if (unlikely(!cpu_info_read)) ".
>> If we call get_aarch_cpu_id() from is_thunderx_pass2_cpu() which is
>> called from inside the loop, we will be adding one additional check.
>
> On the other hand, you are making an assumption that the caller of
> is_thunderx_pass2_cpu() calls get_aarch64_cpu_id() first, and not
> documenting it anywhere.
>
> And given that you shouldn't call _any_ function from inside such a hot
> loop, your solution is inferior on both counts.

Yes, but I could not think of better way to get rid of this check. However
as Richard suggested (in another email), to drop this check and let prefetch
be called for all the arm64 architectures. But I don't have any other
arm64 platform
to check the impact of it.

>
> Paolo
>
>> What I observed is having extra check inside the loop is adding 100 to
>> 200ms overhead
>> on live migration time. So I added this variable extra is_thunderx_cpu
>> static variable
>> to make it simple single check.
>

Re: [Qemu-devel] [RFC PATCH v2 1/2] utils: Add helper to read arm MIDR_EL1 register

2016-08-18 Thread Vijay Kilari

On Wed, Aug 17, 2016 at 7:09 PM, Paolo Bonzini  wrote:
>
>
> On 16/08/2016 14:02, vijay.kil...@gmail.com wrote:
>> From: Vijaya Kumar K 
>>
>> Add helper API to read MIDR_EL1 registers to fetch
>> cpu identification information. This helps in
>> adding errata's and architecture specific features.
>>
>> This is implemented only for arm architecture.
>>
>> Signed-off-by: Vijaya Kumar K 
>> ---
>>  include/qemu/aarch64-cpuid.h |  9 +
>>  util/Makefile.objs   |  1 +
>>  util/aarch64-cpuid.c | 94 
>> 
>>  3 files changed, 104 insertions(+)
>>
>> diff --git a/include/qemu/aarch64-cpuid.h b/include/qemu/aarch64-cpuid.h
>> new file mode 100644
>> index 000..3c11057
>> --- /dev/null
>> +++ b/include/qemu/aarch64-cpuid.h
>> @@ -0,0 +1,9 @@
>> +#ifndef QEMU_AARCH64_CPUID_H
>> +#define QEMU_AARCH64_CPUID_H
>> +
>> +#if defined (__aarch64__)
>> +uint64_t get_aarch64_cpu_id(void);
>> +bool is_thunderx_pass2_cpu(void);
>> +#endif
>> +
>> +#endif
>> diff --git a/util/Makefile.objs b/util/Makefile.objs
>> index 96cb1e0..aa07bc3 100644
>> --- a/util/Makefile.objs
>> +++ b/util/Makefile.objs
>> @@ -35,3 +35,4 @@ util-obj-y += log.o
>>  util-obj-y += qdist.o
>>  util-obj-y += qht.o
>>  util-obj-y += range.o
>> +util-obj-y += aarch64-cpuid.o
>> diff --git a/util/aarch64-cpuid.c b/util/aarch64-cpuid.c
>> new file mode 100644
>> index 000..42af704
>> --- /dev/null
>> +++ b/util/aarch64-cpuid.c
>> @@ -0,0 +1,94 @@
>> +/*
>> + * Dealing with arm cpu identification information.
>> + *
>> + * Copyright (C) 2016 Cavium, Inc.
>> + *
>> + * Authors:
>> + *  Vijaya Kumar K 
>> + *
>> + * This work is licensed under the terms of the GNU LGPL, version 2.1
>> + * or later.  See the COPYING.LIB file in the top-level directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu-common.h"
>> +#include "qemu/cutils.h"
>> +#include "qemu/aarch64-cpuid.h"
>> +
>> +#if defined (__aarch64__)
>> +#define MIDR_IMPLEMENTER_SHIFT  24
>> +#define MIDR_IMPLEMENTER_MASK   (0xffULL << MIDR_IMPLEMENTER_SHIFT)
>> +#define MIDR_ARCHITECTURE_SHIFT 16
>> +#define MIDR_ARCHITECTURE_MASK  (0xf << MIDR_ARCHITECTURE_SHIFT)
>> +#define MIDR_PARTNUM_SHIFT  4
>> +#define MIDR_PARTNUM_MASK   (0xfff << MIDR_PARTNUM_SHIFT)
>> +
>> +#define MIDR_CPU_PART(imp, partnum) \
>> +(((imp) << MIDR_IMPLEMENTER_SHIFT)  | \
>> +(0xf<< MIDR_ARCHITECTURE_SHIFT) | \
>> +((partnum)  << MIDR_PARTNUM_SHIFT))
>> +
>> +#define ARM_CPU_IMP_CAVIUM0x43
>> +#define CAVIUM_CPU_PART_THUNDERX  0x0A1
>> +
>> +#define MIDR_THUNDERX_PASS2  \
>> +   MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
>> +#define CPU_MODEL_MASK  (MIDR_IMPLEMENTER_MASK | MIDR_ARCHITECTURE_MASK | \
>> + MIDR_PARTNUM_MASK)
>> +
>> +static uint64_t qemu_read_aarch64_midr_el1(void)
>> +{
>> +#ifdef CONFIG_LINUX
>> +const char *file = 
>> "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1";
>> +char *buf;
>> +uint64_t midr = 0;
>> +
>> +#define BUF_SIZE 32
>> +buf = g_malloc0(BUF_SIZE);
>> +if (!buf) {
>> +return 0;
>> +}
>> +
>> +if (!g_file_get_contents(file, , 0, NULL)) {
>> +goto out;
>> +}
>> +
>> +if (qemu_strtoull(buf, NULL, 0, ) < 0) {
>> +goto out;
>> +}
>> +
>> +out:
>> +g_free(buf);
>> +
>> +return midr;
>> +#else
>> +return 0;
>> +#endif
>> +}
>> +
>> +static bool is_thunderx_cpu;
>> +static uint64_t aarch64_midr_val;
>> +uint64_t get_aarch64_cpu_id(void)
>> +{
>> +#ifdef CONFIG_LINUX
>> +static bool cpu_info_read;
>> +
>> +if (unlikely(!cpu_info_read)) {
>> +aarch64_midr_val = qemu_read_aarch64_midr_el1();
>> +aarch64_midr_val &= CPU_MODEL_MASK;
>> +cpu_info_read = 1;
>> +if (aarch64_midr_val == MIDR_THUNDERX_PASS2) {
>> +is_thunderx_cpu = 1;
>> +}
>> +}
>> +return aarch64_midr_val;
>> +#else
>> +return 0;
>> +#endif
>> +}
>> +
>> +bool is_thunderx_pass2_cpu(void)
>> +{
>> +   return is_thunderx_cpu;
>
> This can be:
>
>return get_aarch64_cpu_id() == MIDR_THUNDERX_PASS2;
>
> without the is_thunderx_cpu variable.

The get_aarch_cpu_id() has check " if (unlikely(!cpu_info_read)) ".
If we call get_aarch_cpu_id() from is_thunderx_pass2_cpu() which is
called from inside the loop, we will be adding one additional check.

What I observed is having extra check inside the loop is adding 100 to
200ms overhead
on live migration time. So I added this variable extra is_thunderx_cpu
static variable
to make it simple single check.

>
> Paolo
>
>> +}
>> +#endif
>>

Re: [Qemu-devel] [RFC PATCH v2 2/2] utils: Add prefetch for Thunderx platform

2016-08-16 Thread Vijay Kilari

On Tue, Aug 16, 2016 at 11:32 PM, Richard Henderson  wrote:
> On 08/16/2016 05:02 AM, vijay.kil...@gmail.com wrote:
>>
>> +static inline void prefetch_vector_loop(const VECTYPE *p, int index)
>> +{
>> +#if defined(__aarch64__)
>> +if (is_thunderx_pass2_cpu()) {
>> +/* Prefetch 4 cache lines ahead from index */
>> +VEC_PREFETCH(p, index + (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
>> * 4));
>> +}
>> +#endif
>> +}
>
>
> Oh come now.  This is even worse than before.  A function call protecting a
> mere prefetch within the main body of an inner loop?
>
> Did you not understand what I was asking for?

No, Could you please detail the problem?.

>
>
> r~

[Qemu-devel] [RFC PATCH v2 2/2] utils: Add prefetch for Thunderx platform

2016-08-16 Thread vijay . kilari

From: Vijaya Kumar K 

Thunderx pass2 chip requires explicit prefetch
instruction to give prefetch hint.

To speed up live migration on Thunderx platform,
prefetch instruction is added in zero buffer check
function.

The below results show live migration time improvement
with prefetch instruction with 1K and 4K page size.
VM with 4 VCPUs, 8GB RAM is migrated.

1K page size, no prefetch
=
Migration status: completed
total time: 13012 milliseconds
downtime: 10 milliseconds
setup: 15 milliseconds
transferred ram: 268131 kbytes
throughput: 168.84 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 8338072 pages
skipped: 0 pages
normal: 193335 pages
normal bytes: 193335 kbytes
dirty sync count: 4

1K page size with prefetch
=
Migration status: completed
total time: 7493 milliseconds
downtime: 71 milliseconds
setup: 16 milliseconds
transferred ram: 269666 kbytes
throughput: 294.88 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 8340596 pages
skipped: 0 pages
normal: 194837 pages
normal bytes: 194837 kbytes
dirty sync count: 3

4K page size with no prefetch
=
Migration status: completed
total time: 10456 milliseconds
downtime: 49 milliseconds
setup: 5 milliseconds
transferred ram: 231726 kbytes
throughput: 181.59 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 2079914 pages
skipped: 0 pages
normal: 53257 pages
normal bytes: 213028 kbytes
dirty sync count: 3

4K page size with prefetch
==
Migration status: completed
total time: 3937 milliseconds
downtime: 23 milliseconds
setup: 5 milliseconds
transferred ram: 229283 kbytes
throughput: 477.19 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 2079775 pages
skipped: 0 pages
normal: 52648 pages
normal bytes: 210592 kbytes
dirty sync count: 3

Signed-off-by: Vijaya Kumar K 
---
 util/cutils.c | 31 +++
 1 file changed, 31 insertions(+)

diff --git a/util/cutils.c b/util/cutils.c
index 7505fda..342d1e3 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -186,11 +186,14 @@ int qemu_fdatasync(int fd)
 #define VEC_OR(v1, v2) (_mm_or_si128(v1, v2))
 #elif defined(__aarch64__)
 #include "arm_neon.h"
+#include "qemu/aarch64-cpuid.h"
 #define VECTYPEuint64x2_t
 #define ALL_EQ(v1, v2) \
 ((vgetq_lane_u64(v1, 0) == vgetq_lane_u64(v2, 0)) && \
  (vgetq_lane_u64(v1, 1) == vgetq_lane_u64(v2, 1)))
 #define VEC_OR(v1, v2) ((v1) | (v2))
+#define VEC_PREFETCH(base, index) \
+__builtin_prefetch([index], 0, 0);
 #else
 #define VECTYPEunsigned long
 #define SPLAT(p)   (*(p) * (~0UL / 255))
@@ -200,6 +203,29 @@ int qemu_fdatasync(int fd)
 
 #define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8
 
+static inline void prefetch_vector(const VECTYPE *p, int index)
+{
+#if defined(__aarch64__)
+get_aarch64_cpu_id();
+if (is_thunderx_pass2_cpu()) {
+/* Prefetch first 3 cache lines */
+VEC_PREFETCH(p, index + BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR);
+VEC_PREFETCH(p, index + (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * 
2));
+VEC_PREFETCH(p, index + (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * 
3));
+}
+#endif
+}
+
+static inline void prefetch_vector_loop(const VECTYPE *p, int index)
+{
+#if defined(__aarch64__)
+if (is_thunderx_pass2_cpu()) {
+/* Prefetch 4 cache lines ahead from index */
+VEC_PREFETCH(p, index + (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * 
4));
+}
+#endif
+}
+
 static bool
 can_use_buffer_find_nonzero_offset_inner(const void *buf, size_t len)
 {
@@ -246,9 +272,14 @@ static size_t buffer_find_nonzero_offset_inner(const void 
*buf, size_t len)
 }
 }
 
+prefetch_vector(p, 0);
+
 for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
  i < len / sizeof(VECTYPE);
  i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
+
+prefetch_vector_loop(p, i);
+
 VECTYPE tmp0 = VEC_OR(p[i + 0], p[i + 1]);
 VECTYPE tmp1 = VEC_OR(p[i + 2], p[i + 3]);
 VECTYPE tmp2 = VEC_OR(p[i + 4], p[i + 5]);
-- 
1.9.1

[Qemu-devel] [RFC PATCH v2 1/2] utils: Add helper to read arm MIDR_EL1 register

2016-08-16 Thread vijay . kilari

From: Vijaya Kumar K 

Add helper API to read MIDR_EL1 registers to fetch
cpu identification information. This helps in
adding errata's and architecture specific features.

This is implemented only for arm architecture.

Signed-off-by: Vijaya Kumar K 
---
 include/qemu/aarch64-cpuid.h |  9 +
 util/Makefile.objs   |  1 +
 util/aarch64-cpuid.c | 94 
 3 files changed, 104 insertions(+)

diff --git a/include/qemu/aarch64-cpuid.h b/include/qemu/aarch64-cpuid.h
new file mode 100644
index 000..3c11057
--- /dev/null
+++ b/include/qemu/aarch64-cpuid.h
@@ -0,0 +1,9 @@
+#ifndef QEMU_AARCH64_CPUID_H
+#define QEMU_AARCH64_CPUID_H
+
+#if defined (__aarch64__)
+uint64_t get_aarch64_cpu_id(void);
+bool is_thunderx_pass2_cpu(void);
+#endif
+
+#endif
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 96cb1e0..aa07bc3 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -35,3 +35,4 @@ util-obj-y += log.o
 util-obj-y += qdist.o
 util-obj-y += qht.o
 util-obj-y += range.o
+util-obj-y += aarch64-cpuid.o
diff --git a/util/aarch64-cpuid.c b/util/aarch64-cpuid.c
new file mode 100644
index 000..42af704
--- /dev/null
+++ b/util/aarch64-cpuid.c
@@ -0,0 +1,94 @@
+/*
+ * Dealing with arm cpu identification information.
+ *
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * Authors:
+ *  Vijaya Kumar K 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/cutils.h"
+#include "qemu/aarch64-cpuid.h"
+
+#if defined (__aarch64__)
+#define MIDR_IMPLEMENTER_SHIFT  24
+#define MIDR_IMPLEMENTER_MASK   (0xffULL << MIDR_IMPLEMENTER_SHIFT)
+#define MIDR_ARCHITECTURE_SHIFT 16
+#define MIDR_ARCHITECTURE_MASK  (0xf << MIDR_ARCHITECTURE_SHIFT)
+#define MIDR_PARTNUM_SHIFT  4
+#define MIDR_PARTNUM_MASK   (0xfff << MIDR_PARTNUM_SHIFT)
+
+#define MIDR_CPU_PART(imp, partnum) \
+(((imp) << MIDR_IMPLEMENTER_SHIFT)  | \
+(0xf<< MIDR_ARCHITECTURE_SHIFT) | \
+((partnum)  << MIDR_PARTNUM_SHIFT))
+
+#define ARM_CPU_IMP_CAVIUM0x43
+#define CAVIUM_CPU_PART_THUNDERX  0x0A1
+
+#define MIDR_THUNDERX_PASS2  \
+   MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+#define CPU_MODEL_MASK  (MIDR_IMPLEMENTER_MASK | MIDR_ARCHITECTURE_MASK | \
+ MIDR_PARTNUM_MASK)
+
+static uint64_t qemu_read_aarch64_midr_el1(void)
+{
+#ifdef CONFIG_LINUX
+const char *file = 
"/sys/devices/system/cpu/cpu0/regs/identification/midr_el1";
+char *buf;
+uint64_t midr = 0;
+
+#define BUF_SIZE 32
+buf = g_malloc0(BUF_SIZE);
+if (!buf) {
+return 0;
+}
+
+if (!g_file_get_contents(file, , 0, NULL)) {
+goto out;
+}
+
+if (qemu_strtoull(buf, NULL, 0, ) < 0) {
+goto out;
+}
+
+out:
+g_free(buf);
+
+return midr;
+#else
+return 0;
+#endif
+}
+
+static bool is_thunderx_cpu;
+static uint64_t aarch64_midr_val;
+uint64_t get_aarch64_cpu_id(void)
+{
+#ifdef CONFIG_LINUX
+static bool cpu_info_read;
+
+if (unlikely(!cpu_info_read)) {
+aarch64_midr_val = qemu_read_aarch64_midr_el1();
+aarch64_midr_val &= CPU_MODEL_MASK;
+cpu_info_read = 1;
+if (aarch64_midr_val == MIDR_THUNDERX_PASS2) {
+is_thunderx_cpu = 1;
+}
+}
+return aarch64_midr_val;
+#else
+return 0;
+#endif
+}
+
+bool is_thunderx_pass2_cpu(void)
+{
+   return is_thunderx_cpu;
+}
+#endif
-- 
1.9.1

[Qemu-devel] [RFC PATCH v2 0/2] Live migration optimization for Thunderx platform

2016-08-16 Thread vijay . kilari

From: Vijaya Kumar K 

The CPU MIDR_EL1 register is exposed to userspace for arm64
with the below patch.
https://lkml.org/lkml/2016/7/8/467

Thunderx platform requires explicit prefetch instruction to
provide prefetch hint. Using MIDR_EL1 information, provided
by above kernel patch, prefetch is executed if the platform
is Thunderx.

The results of live migration time improvement is provided
in commit message of patch 2.

Note: Check for size of while prefetching beyond page is
not added. Making this check is counter productive on
performance of live migration.

v1 => v2:
   - Rename util/cpuinfo.c as util/aarch64-cpuid.c
   - Introduced header file include/qemu/aarch64-cpuid.h
   - Place all arch specific code under define __aarch64__ and
 CONFIG_LINUX.
   - Used builtin_prefetch() to add prefetch instruction.
   - Moved arch specific changes out of generic code
   - Dropped prefetching 5th cache line.

Vijaya Kumar K (2):
  utils: Add helper to read arm MIDR_EL1 register
  utils: Add prefetch for Thunderx platform

 include/qemu/aarch64-cpuid.h |  9 +
 util/Makefile.objs   |  1 +
 util/aarch64-cpuid.c | 94 
 util/cutils.c| 31 +++
 4 files changed, 135 insertions(+)
 create mode 100644 include/qemu/aarch64-cpuid.h
 create mode 100644 util/aarch64-cpuid.c

-- 
1.9.1

Re: [Qemu-devel] [RFC PATCH v1 2/2] utils: Add prefetch for Thunderx platform

2016-08-12 Thread Vijay Kilari

On Sat, Aug 6, 2016 at 3:47 PM, Richard Henderson  wrote:
> On 08/02/2016 03:50 PM, vijay.kil...@gmail.com wrote:
>>
>> +#define VEC_PREFETCH(base, index) \
>> +asm volatile ("prfm pldl1strm, [%x[a]]\n" : :
>> [a]"r"([(index)]))
>
>
> Is this not __builtin_prefetch(base + index) ?
>
> I.e. you can defined this generically for all targets.

__builtin_prefetch() is available only in gcc 5.3 for arm64.

>
>> +#if defined (__aarch64__)
>> +do_prefetch = is_thunder_pass2_cpu();
>> +if (do_prefetch) {
>> +VEC_PREFETCH(p, 8);
>> +VEC_PREFETCH(p, 16);
>> +VEC_PREFETCH(p, 24);
>> +}
>> +#endif
>> +
>>  for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
>>   i < len / sizeof(VECTYPE);
>>   i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
>> +
>> +#if defined (__aarch64__)
>> +if (do_prefetch) {
>> +VEC_PREFETCH(p, i+32);
>> +VEC_PREFETCH(p, i+40);
>> +}
>> +#endif
>> +
>
>
> Surely we shouldn't be adding a conditional around a prefetch inside of the
> inner loop.  Does it really hurt to perform this prefetch for all aarch64
> cpus?

prefetch is only required for thunderx pass2 platform. I will remove this
condition check.

>
> I'll note that you're also prefetching too much, off the end of the block,
> and that you're probably not prefetching far enough.  You'd need to break
> off the last iteration(s) of the loop.
>
> I'll note that you're also prefetching too close.  The loop operates on
> 8*vecsize units.  In the case of aarch64, 128 byte units.  Both i+32 and

128 unit is specific to thunder. I will move this to thunder
specific function

> i+40 are within the current loop.  I believe you want to prefetch at
>

I am dropping i+40

>   i + BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * N
>
> where N is the number of iterations in advance to be fetched.  Probably N is
> 1 or 2, unless the memory controller is really slow.
>
>
> r~

Re: [Qemu-devel] [RFC PATCH v2 2/2] hw/intc/arm_gicv3_kvm: Implement get/put functions

2016-08-09 Thread Vijay Kilari

On Mon, Aug 8, 2016 at 10:27 PM, Peter Maydell  wrote:
> On 8 August 2016 at 17:51,   wrote:
>> From: Vijaya Kumar K 
>>
>> This actually implements pre_save and post_load methods for in-kernel
>> vGICv3.
>>
>> Signed-off-by: Pavel Fedin 
>> Signed-off-by: Peter Maydell 
>> [PMM:
>>  * use decimal, not 0bnnn
>>  * fixed typo in names of ICC_APR0R_EL1 and ICC_AP1R_EL1
>>  * completely rearranged the get and put functions to read and write
>>the state in a natural order, rather than mixing distributor and
>>redistributor state together]
>> Signed-off-by: Vijaya Kumar K 
>> [Vijay:
>>  * Update macro KVM_VGIC_ATTR
>>  * Use 32 bit access for gicd and gicr
>>  * GICD_IROUTER, GICD_TYPER, GICR_PROPBASER and GICR_PENDBASER reg
>>access  are changed from 64-bit to 32-bit access]
>> ---
>
>> +// TODO: there is no kernel API for reading/writing c->level
>
> We have now defined this API so this code should use it.

You mean storing and restoring of irq->line_level of kernel?.
I don't see any API defined in new vgic to read line level.

The irq pending information is updated when line_level is changed in
kernel. Hence pending (ispendr) holds information of pending status
of interrupt. Do you see line level is still required to save & restore?

>
>> +// TODO: there is no kernel API for reading/writing s->level
>
> Also here (and similarly in the _get function).
>
> thanks
> -- PMM

[Qemu-devel] [RFC PATCH v2 1/2] kernel: Add definitions for GICv3 attributes

2016-08-08 Thread vijay . kilari

From: Vijaya Kumar K 

This temporary patch adds kernel API definitions. Use proper header update
procedure after these features are released.

Signed-off-by: Pavel Fedin 
---
 linux-headers/asm-arm64/kvm.h | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index 7d82d1f..396c6f3 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -180,14 +180,14 @@ struct kvm_arch_memory_slot {
KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
 
 #define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \
-   (KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \
-   ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+   (ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
 
-#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
+#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_ARM64 | \
+   KVM_REG_SIZE_U64 | KVM_REG_ARM64_SYSREG)
 
 #define KVM_REG_ARM_TIMER_CTL  ARM64_SYS_REG(3, 3, 14, 3, 1)
 #define KVM_REG_ARM_TIMER_CNT  ARM64_SYS_REG(3, 3, 14, 3, 2)
@@ -199,10 +199,21 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS  2
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
 #define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xffULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_V3_CPUID_MASK \
+  (0xULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
 #define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_MASK (KVM_REG_ARM64_SYSREG_OP0_MASK | \
+KVM_REG_ARM64_SYSREG_OP1_MASK | \
+KVM_REG_ARM64_SYSREG_CRN_MASK | \
+KVM_REG_ARM64_SYSREG_CRM_MASK | \
+KVM_REG_ARM64_SYSREG_OP2_MASK)
+#define   KVM_DEV_ARM_VGIC_SYSREG(op0,op1,crn,crm,op2) \
+   __ARM64_SYS_REG(op0,op1,crn,crm,op2)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL  4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT   0
 
 /* Device Control API on vcpu fd */
-- 
1.9.1

[Qemu-devel] [RFC PATCH v2 2/2] hw/intc/arm_gicv3_kvm: Implement get/put functions

2016-08-08 Thread vijay . kilari

From: Vijaya Kumar K 

This actually implements pre_save and post_load methods for in-kernel
vGICv3.

Signed-off-by: Pavel Fedin 
Signed-off-by: Peter Maydell 
[PMM:
 * use decimal, not 0bnnn
 * fixed typo in names of ICC_APR0R_EL1 and ICC_AP1R_EL1
 * completely rearranged the get and put functions to read and write
   the state in a natural order, rather than mixing distributor and
   redistributor state together]
Signed-off-by: Vijaya Kumar K 
[Vijay:
 * Update macro KVM_VGIC_ATTR
 * Use 32 bit access for gicd and gicr
 * GICD_IROUTER, GICD_TYPER, GICR_PROPBASER and GICR_PENDBASER reg
   access  are changed from 64-bit to 32-bit access]
---
 hw/intc/arm_gicv3_kvm.c | 474 +++-
 1 file changed, 464 insertions(+), 10 deletions(-)

diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 711fde3..0f84c86 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -23,8 +23,11 @@
 #include "qapi/error.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "hw/sysbus.h"
+#include "migration/migration.h"
+#include "qemu/error-report.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
+#include "gicv3_internal.h"
 #include "vgic_common.h"
 #include "migration/migration.h"
 
@@ -44,6 +47,23 @@
 #define KVM_ARM_GICV3_GET_CLASS(obj) \
  OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3)
 
+#define ICC_PMR_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 4, 6, 0)
+#define ICC_BPR0_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 3)
+#define ICC_AP0R_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 8, 4 | n)
+#define ICC_AP1R_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 9, n)
+#define ICC_BPR1_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 3)
+#define ICC_CTLR_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 4)
+#define ICC_IGRPEN0_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 6)
+#define ICC_IGRPEN1_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(3, 0, 12, 12, 7)
+
 typedef struct KVMARMGICv3Class {
 ARMGICv3CommonClass parent_class;
 DeviceRealize parent_realize;
@@ -57,16 +77,444 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, 
int level)
 kvm_arm_gic_set_irq(s->num_irq, irq, level);
 }
 
+#define KVM_VGIC_ATTR(reg, typer) \
+((typer & KVM_DEV_ARM_VGIC_V3_CPUID_MASK) | (reg))
+
+static inline void kvm_gicd_access(GICv3State *s, int offset,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+  KVM_VGIC_ATTR(offset, 0),
+  val, write);
+}
+
+static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+  KVM_VGIC_ATTR(offset, s->cpu[cpu].gicr_typer),
+  val, write);
+}
+
+static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu,
+   uint64_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+  KVM_VGIC_ATTR(reg, s->cpu[cpu].gicr_typer),
+  val, write);
+}
+
+/* Translate from the in-kernel field for an IRQ value to/from the qemu
+ * representation. Note that these are only expected to be used for
+ * SPIs (that is, for interrupts whose state is in the distributor
+ * rather than the redistributor).
+ */
+typedef void (*vgic_translate_fn)(GICv3State *s, int irq,
+  uint32_t *field, bool to_kernel);
+
+static void translate_edge_trigger(GICv3State *s, int irq,
+uint32_t *field, bool to_kernel)
+{
+if (to_kernel) { \
+*field = gicv3_gicd_edge_trigger_test(s, irq);   \
+} else { \
+gicv3_gicd_edge_trigger_replace(s, irq, *field); \
+}\
+}
+
+static void translate_priority(GICv3State *s, int irq,
+   uint32_t *field, bool to_kernel)
+{
+if (to_kernel) {
+*field = s->gicd_ipriority[irq];
+} else {
+s->gicd_ipriority[irq] = *field;
+}
+}
+
+/* Loop through each distributor IRQ related register; since bits
+ * corresponding to SPIs and PPIs are RAZ/WI when affinity routing
+ * is enabled, we skip those.
+ */
+#define for_each_dist_irq_reg(_irq, _max, _field_width) \
+for (_irq = GIC_INTERNAL; _irq < _max; _irq += (32 / _field_width))
+
+/* Read a register group from the kernel VGIC */
+static void kvm_dist_get(GICv3State *s, uint32_t offset, int width,
+ vgic_translate_fn translate_fn)
+{
+uint32_t reg;
+int j;
+int irq;
+uint32_t field;
+int regsz = 32 / width; /* irqs

[Qemu-devel] [RFC PATCH v2 0/2] GICv3 live migration support

2016-08-08 Thread vijay . kilari

From: Vijaya Kumar K 

This series introduces support for GICv3 live migration with
new VGIC implementation in 4.7-rc3 kernel.
In this series, patch 1 of the previous implementation
are ported.
https://lists.nongnu.org/archive/html/qemu-devel/2015-10/msg05284.html

Patch 2, is based on below implementation.
http://patchwork.ozlabs.org/patch/626746/

Kernel patches which implement this functionality are:
http://www.spinics.net/lists/arm-kernel/msg519596.html

This API definition is as per version of VGICv3 specification
http://lists.infradead.org/pipermail/linux-arm-kernel/2016-July/445611.html

Patch 1 of this series will be synced with KVM patches
in next revision.

Tested Live migration of Idle VM running with 4 VCPUs and 8GB RAM.

Vijaya Kumar K (2):
  kernel: Add definitions for GICv3 attributes
  hw/intc/arm_gicv3_kvm: Implement get/put functions

 hw/intc/arm_gicv3_kvm.c   | 474 +-
 linux-headers/asm-arm64/kvm.h |  17 +-
 2 files changed, 478 insertions(+), 13 deletions(-)

-- 
1.9.1

Re: [Qemu-devel] [RFC PATCH v1 1/2] utils: Add helper to read arm MIDR_EL1 register

2016-08-04 Thread Vijay Kilari

Hi Paolo,

On Tue, Aug 2, 2016 at 4:18 PM, Paolo Bonzini <pbonz...@redhat.com> wrote:
> - Original Message -
>> From: "vijay kilari" <vijay.kil...@gmail.com>
>> To: qemu-...@nongnu.org, "peter maydell" <peter.mayd...@linaro.org>, 
>> pbonz...@redhat.com
>> Cc: qemu-devel@nongnu.org, "Prasun Kapoor" <prasun.kap...@cavium.com>, 
>> "vijay kilari" <vijay.kil...@gmail.com>,
>> "Vijaya Kumar K" <vijaya.ku...@cavium.com>
>> Sent: Tuesday, August 2, 2016 12:20:15 PM
>> Subject: [RFC PATCH v1 1/2] utils: Add helper to read arm MIDR_EL1 register
>>
>> From: Vijaya Kumar K <vijaya.ku...@cavium.com>
>>
>> Add helper API to read MIDR_EL1 registers to fetch
>> cpu identification information. This helps in
>> adding errata's and architecture specific features.
>>
>> This is implemented only for arm architecture.
>>
>> Signed-off-by: Vijaya Kumar K <vijaya.ku...@cavium.com>
>> ---
>>  include/qemu-common.h |1 +
>>  util/Makefile.objs|1 +
>>  util/cpuinfo.c|   52
>>  +
>>  3 files changed, 54 insertions(+)
>>
>> diff --git a/include/qemu-common.h b/include/qemu-common.h
>> index 1f2cb94..62ad674 100644
>> --- a/include/qemu-common.h
>> +++ b/include/qemu-common.h
>> @@ -134,4 +134,5 @@ void page_size_init(void);
>>   * returned. */
>>  bool dump_in_progress(void);
>>
>> +long int qemu_read_cpuid_info(void);
>
> First, please avoid adding to include/qemu-common.h (it really should
> go away).
>
> Second, this is too generic a name.  Please call it something like
> qemu_read_aarch64_midr_el1.

OK
>
> Third, it's probably a bad idea to call this function from generic code, so
> make it static and add the detection function from patch 2/2 already here.
> By making it static, it's also possible to define it only if CONFIG_LINUX
> is defined; the ThunderX detection will then return false if !CONFIG_LINUX.
>

  You mean to say, move contents of this patch to util/cutils.c and make it
static and define under __aarch64__ and CONFIG_LINUX?.

> Thanks,
>
> Paolo
>
>>  #endif
>> diff --git a/util/Makefile.objs b/util/Makefile.objs
>> index 96cb1e0..9d25a72 100644
>> --- a/util/Makefile.objs
>> +++ b/util/Makefile.objs
>> @@ -35,3 +35,4 @@ util-obj-y += log.o
>>  util-obj-y += qdist.o
>>  util-obj-y += qht.o
>>  util-obj-y += range.o
>> +util-obj-y += cpuinfo.o
>> diff --git a/util/cpuinfo.c b/util/cpuinfo.c
>> new file mode 100644
>> index 000..3ba7194
>> --- /dev/null
>> +++ b/util/cpuinfo.c
>> @@ -0,0 +1,52 @@
>> +/*
>> + * Dealing with arm cpu identification information.
>> + *
>> + * Copyright (C) 2016 Cavium, Inc.
>> + *
>> + * Authors:
>> + *  Vijaya Kumar K <vijaya.ku...@cavium.com>
>> + *
>> + * This work is licensed under the terms of the GNU LGPL, version 2.1
>> + * or later.  See the COPYING.LIB file in the top-level directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu-common.h"
>> +#include "qemu/cutils.h"
>> +
>> +#if defined(__aarch64__)
>> +
>> +long int qemu_read_cpuid_info(void)
>> +{
>> +FILE *fp;
>> +char *buf;
>> +long int midr = 0;
>> +#define BUF_SIZE 32
>> +
>> +fp = fopen("/sys/devices/system/cpu/cpu0/regs/identification/midr_el1",
>> +   "r");
>> +if (!fp) {
>> +return 0;
>> +}
>> +
>> +buf = g_malloc0(BUF_SIZE);
>> +if (!buf) {
>> +fclose(fp);
>> +return 0;
>> +}
>> +
>> +if (buf != fgets(buf, BUF_SIZE - 1, fp)) {
>> +goto out;
>> +}
>> +
>> +if (qemu_strtol(buf, NULL, 0, ) < 0) {
>> +goto out;
>> +}
>> +
>> +out:
>> +g_free(buf);
>> +fclose(fp);
>> +
>> +return midr;
>> +}
>> +#endif
>> --
>> 1.7.9.5
>>
>>

[Qemu-devel] [RFC PATCH v1 2/2] utils: Add prefetch for Thunderx platform

2016-08-02 Thread vijay . kilari

From: Vijaya Kumar K 

Thunderx pass2 chip requires explicit prefetch
instruction to give prefetch hint.

To speed up live migration on Thunderx platform,
prefetch instruction is added in zero buffer check
function.

The below results show live migration time improvement
with prefetch instruction with 1K and 4K page size.
VM with 4 VCPUs, 8GB RAM is migrated.

1K page size, no prefetch
=
Migration status: completed
total time: 13012 milliseconds
downtime: 10 milliseconds
setup: 15 milliseconds
transferred ram: 268131 kbytes
throughput: 168.84 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 8338072 pages
skipped: 0 pages
normal: 193335 pages
normal bytes: 193335 kbytes
dirty sync count: 4

1K page size with prefetch
=
Migration status: completed
total time: 7493 milliseconds
downtime: 71 milliseconds
setup: 16 milliseconds
transferred ram: 269666 kbytes
throughput: 294.88 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 8340596 pages
skipped: 0 pages
normal: 194837 pages
normal bytes: 194837 kbytes
dirty sync count: 3

4K page size with no prefetch
=
Migration status: completed
total time: 10456 milliseconds
downtime: 49 milliseconds
setup: 5 milliseconds
transferred ram: 231726 kbytes
throughput: 181.59 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 2079914 pages
skipped: 0 pages
normal: 53257 pages
normal bytes: 213028 kbytes
dirty sync count: 3

4K page size with prefetch
==
Migration status: completed
total time: 3937 milliseconds
downtime: 23 milliseconds
setup: 5 milliseconds
transferred ram: 229283 kbytes
throughput: 477.19 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 2079775 pages
skipped: 0 pages
normal: 52648 pages
normal bytes: 210592 kbytes
dirty sync count: 3

Signed-off-by: Vijaya Kumar K 
---
 include/qemu-common.h |1 +
 util/cpuinfo.c|   38 ++
 util/cutils.c |   22 ++
 3 files changed, 61 insertions(+)

diff --git a/include/qemu-common.h b/include/qemu-common.h
index 62ad674..3d8a32c 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -135,4 +135,5 @@ void page_size_init(void);
 bool dump_in_progress(void);
 
 long int qemu_read_cpuid_info(void);
+bool is_thunder_pass2_cpu(void);
 #endif
diff --git a/util/cpuinfo.c b/util/cpuinfo.c
index 3ba7194..0e72a34 100644
--- a/util/cpuinfo.c
+++ b/util/cpuinfo.c
@@ -16,6 +16,26 @@
 
 #if defined(__aarch64__)
 
+#define MIDR_IMPLEMENTER_SHIFT  24
+#define MIDR_IMPLEMENTER_MASK   (0xffULL << MIDR_IMPLEMENTER_SHIFT)
+#define MIDR_ARCHITECTURE_SHIFT 16
+#define MIDR_ARCHITECTURE_MASK  (0xf << MIDR_ARCHITECTURE_SHIFT)
+#define MIDR_PARTNUM_SHIFT  4
+#define MIDR_PARTNUM_MASK   (0xfff << MIDR_PARTNUM_SHIFT)
+
+#define MIDR_CPU_PART(imp, partnum) \
+(((imp) << MIDR_IMPLEMENTER_SHIFT)  | \
+(0xf<< MIDR_ARCHITECTURE_SHIFT) | \
+((partnum)  << MIDR_PARTNUM_SHIFT))
+
+#define ARM_CPU_IMP_CAVIUM0x43
+#define CAVIUM_CPU_PART_THUNDERX  0x0A1
+
+#define MIDR_THUNDERX  \
+   MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+#define CPU_MODEL_MASK  (MIDR_IMPLEMENTER_MASK | MIDR_ARCHITECTURE_MASK | \
+ MIDR_PARTNUM_MASK)
+
 long int qemu_read_cpuid_info(void)
 {
 FILE *fp;
@@ -49,4 +69,22 @@ out:
 
 return midr;
 }
+
+bool is_thunder_pass2_cpu(void)
+{
+static bool cpu_info_read;
+static long int midr_thunder_val;
+
+if (!cpu_info_read) {
+midr_thunder_val = qemu_read_cpuid_info();
+midr_thunder_val &= CPU_MODEL_MASK;
+cpu_info_read = 1;
+}
+
+if (midr_thunder_val == MIDR_THUNDERX) {
+return 1;
+} else {
+return 0;
+}
+}
 #endif
diff --git a/util/cutils.c b/util/cutils.c
index 7505fda..66c816b 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -191,6 +191,8 @@ int qemu_fdatasync(int fd)
 ((vgetq_lane_u64(v1, 0) == vgetq_lane_u64(v2, 0)) && \
  (vgetq_lane_u64(v1, 1) == vgetq_lane_u64(v2, 1)))
 #define VEC_OR(v1, v2) ((v1) | (v2))
+#define VEC_PREFETCH(base, index) \
+asm volatile ("prfm pldl1strm, [%x[a]]\n" : : [a]"r"([(index)]))
 #else
 #define VECTYPEunsigned long
 #define SPLAT(p)   (*(p) * (~0UL / 255))
@@ -233,6 +235,9 @@ static size_t buffer_find_nonzero_offset_inner(const void 
*buf, size_t len)
 const VECTYPE *p = buf;
 const VECTYPE zero = (VECTYPE){0};
 size_t i;
+#if defined (__aarch64__)
+bool do_prefetch;
+#endif
 
 assert(can_use_buffer_find_nonzero_offset_inner(buf, len));
 
@@ -246,9 +251,26 @@ static size_t buffer_find_nonzero_offset_inner(const void 
*buf, size_t len)
 }
 }
 
+#if defined (__aarch64__)
+do_prefetch = is_thunder_pass2_cpu();
+if (do_prefetch) {

[Qemu-devel] [RFC PATCH v1 1/2] utils: Add helper to read arm MIDR_EL1 register

2016-08-02 Thread vijay . kilari

From: Vijaya Kumar K 

Add helper API to read MIDR_EL1 registers to fetch
cpu identification information. This helps in
adding errata's and architecture specific features.

This is implemented only for arm architecture.

Signed-off-by: Vijaya Kumar K 
---
 include/qemu-common.h |1 +
 util/Makefile.objs|1 +
 util/cpuinfo.c|   52 +
 3 files changed, 54 insertions(+)

diff --git a/include/qemu-common.h b/include/qemu-common.h
index 1f2cb94..62ad674 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -134,4 +134,5 @@ void page_size_init(void);
  * returned. */
 bool dump_in_progress(void);
 
+long int qemu_read_cpuid_info(void);
 #endif
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 96cb1e0..9d25a72 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -35,3 +35,4 @@ util-obj-y += log.o
 util-obj-y += qdist.o
 util-obj-y += qht.o
 util-obj-y += range.o
+util-obj-y += cpuinfo.o
diff --git a/util/cpuinfo.c b/util/cpuinfo.c
new file mode 100644
index 000..3ba7194
--- /dev/null
+++ b/util/cpuinfo.c
@@ -0,0 +1,52 @@
+/*
+ * Dealing with arm cpu identification information.
+ *
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * Authors:
+ *  Vijaya Kumar K 
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/cutils.h"
+
+#if defined(__aarch64__)
+
+long int qemu_read_cpuid_info(void)
+{
+FILE *fp;
+char *buf;
+long int midr = 0;
+#define BUF_SIZE 32
+
+fp = fopen("/sys/devices/system/cpu/cpu0/regs/identification/midr_el1",
+   "r");
+if (!fp) {
+return 0;
+}
+
+buf = g_malloc0(BUF_SIZE);
+if (!buf) {
+fclose(fp);
+return 0;
+}
+
+if (buf != fgets(buf, BUF_SIZE - 1, fp)) {
+goto out;
+}
+
+if (qemu_strtol(buf, NULL, 0, ) < 0) {
+goto out;
+}
+
+out:
+g_free(buf);
+fclose(fp);
+
+return midr;
+}
+#endif
-- 
1.7.9.5

[Qemu-devel] [RFC PATCH v1 0/2] Live migration optimization for Thunderx platform

2016-08-02 Thread vijay . kilari

From: Vijaya Kumar K 

The CPU MIDR_EL1 register is exposed to userspace for arm64
with the below patch.
https://lkml.org/lkml/2016/7/8/467

Thunderx platform requires explicit prefetch instruction to
provide prefetch hint. Using MIDR_EL1 information, provided
by above kernel patch, prefetch is executed if the platform
is Thunderx.

The results of live migration time improvement is provided
in commit message of patch 2.

Vijaya Kumar K (2):
  utils: Add helper to read arm MIDR_EL1 register
  utils: Add prefetch for Thunderx platform

 include/qemu-common.h |2 ++
 util/Makefile.objs|1 +
 util/cpuinfo.c|   90 +
 util/cutils.c |   22 
 4 files changed, 115 insertions(+)
 create mode 100644 util/cpuinfo.c

-- 
1.7.9.5

[Qemu-devel] [RFC PATCH v1 2/2] hw/intc/arm_gicv3_kvm: Implement get/put functions

2016-07-26 Thread vijay . kilari

From: Vijaya Kumar K 

This actually implements pre_save and post_load methods for in-kernel
vGICv3.

Signed-off-by: Pavel Fedin 
Signed-off-by: Vijaya Kumar K 
[Vijay: - Adjusted macros to handle gicr variables
 - Used gicr_typer for affinity
 - Made all GICD/GICR registers access as 32-bit.
 - All ICC register access as 64 bit.
 ]
---
 hw/intc/arm_gicv3_common.c |3 +
 hw/intc/arm_gicv3_kvm.c|  496 +++-
 hw/intc/gicv3_internal.h   |  113 
 include/hw/intc/arm_gicv3_common.h |1 +
 4 files changed, 607 insertions(+), 6 deletions(-)

diff --git a/hw/intc/arm_gicv3_common.c b/hw/intc/arm_gicv3_common.c
index 0f8c4b8..55102f7 100644
--- a/hw/intc/arm_gicv3_common.c
+++ b/hw/intc/arm_gicv3_common.c
@@ -54,6 +54,7 @@ static const VMStateDescription vmstate_gicv3_cpu = {
 .version_id = 1,
 .minimum_version_id = 1,
 .fields = (VMStateField[]) {
+VMSTATE_BOOL(cpu_enabled, GICv3CPUState),
 VMSTATE_UINT32(level, GICv3CPUState),
 VMSTATE_UINT32(gicr_ctlr, GICv3CPUState),
 VMSTATE_UINT32_ARRAY(gicr_statusr, GICv3CPUState, 2),
@@ -64,6 +65,7 @@ static const VMStateDescription vmstate_gicv3_cpu = {
 VMSTATE_UINT32(gicr_ienabler0, GICv3CPUState),
 VMSTATE_UINT32(gicr_ipendr0, GICv3CPUState),
 VMSTATE_UINT32(gicr_iactiver0, GICv3CPUState),
+VMSTATE_UINT32(level, GICv3CPUState),
 VMSTATE_UINT32(edge_trigger, GICv3CPUState),
 VMSTATE_UINT32(gicr_igrpmodr0, GICv3CPUState),
 VMSTATE_UINT32(gicr_nsacr, GICv3CPUState),
@@ -220,6 +222,7 @@ static void arm_gicv3_common_reset(DeviceState *dev)
 for (i = 0; i < s->num_cpu; i++) {
 GICv3CPUState *cs = >cpu[i];
 
+cs->cpu_enabled = false;
 cs->level = 0;
 cs->gicr_ctlr = 0;
 cs->gicr_statusr[GICV3_S] = 0;
diff --git a/hw/intc/arm_gicv3_kvm.c b/hw/intc/arm_gicv3_kvm.c
index 711fde3..9b35165 100644
--- a/hw/intc/arm_gicv3_kvm.c
+++ b/hw/intc/arm_gicv3_kvm.c
@@ -23,8 +23,11 @@
 #include "qapi/error.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "hw/sysbus.h"
+#include "migration/migration.h"
+#include "qemu/error-report.h"
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
+#include "gicv3_internal.h"
 #include "vgic_common.h"
 #include "migration/migration.h"
 
@@ -44,6 +47,23 @@
 #define KVM_ARM_GICV3_GET_CLASS(obj) \
  OBJECT_GET_CLASS(KVMARMGICv3Class, (obj), TYPE_KVM_ARM_GICV3)
 
+#define ICC_PMR_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(0b11, 0b000, 0b0100, 0b0110, 0b000)
+#define ICC_BPR0_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(0b11, 0b000, 0b1100, 0b1000, 0b011)
+#define ICC_APR0_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(0b11, 0b000, 0b1100, 0b1000, 0b100 | n)
+#define ICC_APR1_EL1(n) \
+KVM_DEV_ARM_VGIC_SYSREG(0b11, 0b000, 0b1100, 0b1001, 0b000 | n)
+#define ICC_BPR1_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(0b11, 0b000, 0b1100, 0b1100, 0b011)
+#define ICC_CTLR_EL1\
+KVM_DEV_ARM_VGIC_SYSREG(0b11, 0b000, 0b1100, 0b1100, 0b100)
+#define ICC_IGRPEN0_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(0b11, 0b000, 0b1100, 0b1100, 0b110)
+#define ICC_IGRPEN1_EL1 \
+KVM_DEV_ARM_VGIC_SYSREG(0b11, 0b000, 0b1100, 0b1100, 0b111)
+
 typedef struct KVMARMGICv3Class {
 ARMGICv3CommonClass parent_class;
 DeviceRealize parent_realize;
@@ -57,16 +77,469 @@ static void kvm_arm_gicv3_set_irq(void *opaque, int irq, 
int level)
 kvm_arm_gic_set_irq(s->num_irq, irq, level);
 }
 
+#define KVM_VGIC_ATTR(reg, cpuaff) \
+((cpuaff << KVM_DEV_ARM_VGIC_CPUID_SHIFT) | (reg))
+
+static inline void kvm_gicd_access(GICv3State *s, int offset, int cpu,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+   KVM_VGIC_ATTR(offset, ((s->cpu[cpu].gicr_typer >> 32) & 
0x)),
+   val, write);
+}
+
+static inline void kvm_gicr_access(GICv3State *s, int offset, int cpu,
+   uint32_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+   KVM_VGIC_ATTR(offset, ((s->cpu[cpu].gicr_typer >> 32) & 
0x)),
+   val, write);
+}
+
+static inline void kvm_gicc_access(GICv3State *s, uint64_t reg, int cpu,
+   uint64_t *val, bool write)
+{
+kvm_device_access(s->dev_fd, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS,
+   KVM_VGIC_ATTR(reg, ((s->cpu[cpu].gicr_typer >> 32) & 0x)),
+   val, write);
+}
+
+/*
+ * Translate from the in-kernel field for an IRQ value to/from the qemu
+ * representation.
+ */
+typedef void (*vgic_translate_fn)(GICv3State *s, int irq, int cpu,
+  uint32_t *field, bool to_kernel);
+
+/* synthetic translate function used for clear/set registers to completely
+ * clear a setting using a clear-register before setting the remaining bits
+ * using a

[Qemu-devel] [RFC PATCH v1 1/2] kernel: Add definitions for GICv3 attributes

2016-07-26 Thread vijay . kilari

From: Vijaya Kumar K 

This temporary patch adds kernel API definitions. Use proper header update
procedure after these features are released.

Signed-off-by: Pavel Fedin 
---
 linux-headers/asm-arm64/kvm.h |   22 +-
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/linux-headers/asm-arm64/kvm.h b/linux-headers/asm-arm64/kvm.h
index 7d82d1f..9a21242 100644
--- a/linux-headers/asm-arm64/kvm.h
+++ b/linux-headers/asm-arm64/kvm.h
@@ -180,14 +180,14 @@ struct kvm_arch_memory_slot {
KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
 
 #define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \
-   (KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \
-   ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+   (ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
 
-#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
+#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_ARM64 | \
+   KVM_REG_SIZE_U64 | KVM_REG_ARM64_SYSREG)
 
 #define KVM_REG_ARM_TIMER_CTL  ARM64_SYS_REG(3, 3, 14, 3, 1)
 #define KVM_REG_ARM_TIMER_CNT  ARM64_SYS_REG(3, 3, 14, 3, 2)
@@ -197,12 +197,24 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_ADDR  0
 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
 #define KVM_DEV_ARM_VGIC_GRP_CPU_REGS  2
+#define   KVM_DEV_ARM_VGIC_64BIT(1ULL << 63)
 #define   KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
-#define   KVM_DEV_ARM_VGIC_CPUID_MASK  (0xffULL << 
KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK  \
+  (0xULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
 #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT0
-#define   KVM_DEV_ARM_VGIC_OFFSET_MASK (0xULL << 
KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_OFFSET_MASK \
+  (0xULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define   KVM_DEV_ARM_VGIC_SYSREG_MASK (KVM_REG_ARM64_SYSREG_OP0_MASK | \
+KVM_REG_ARM64_SYSREG_OP1_MASK | \
+KVM_REG_ARM64_SYSREG_CRN_MASK | \
+KVM_REG_ARM64_SYSREG_CRM_MASK | \
+KVM_REG_ARM64_SYSREG_OP2_MASK)
+#define   KVM_DEV_ARM_VGIC_SYSREG(op0,op1,crn,crm,op2) \
+   __ARM64_SYS_REG(op0,op1,crn,crm,op2)
 #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS   3
 #define KVM_DEV_ARM_VGIC_GRP_CTRL  4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define   KVM_DEV_ARM_VGIC_CTRL_INIT   0
 
 /* Device Control API on vcpu fd */
-- 
1.7.9.5

[Qemu-devel] [RFC PATCH v1 0/2] GICv3 live migration support

2016-07-26 Thread vijay . kilari

From: Vijaya Kumar K 

This series introduces support for GICv3 live migration with
new VGIC implementation in 4.7-rc3 kernel.
In this series, patch 2 & 3 of the previous implementation
are ported.
https://lists.nongnu.org/archive/html/qemu-devel/2015-10/msg05284.html

Kernel patches which implement this functionality are:
http://www.spinics.net/lists/arm-kernel/msg519596.html

This API definition is as per draft version of VGICv3 specification
https://lists.cs.columbia.edu/pipermail/kvmarm/2016-May/020355.html

Patch 1 of this series will be synced with KVM patches
in next revision.

Tested Live migration of Idle VM running with 4 VCPUs and 8GB RAM.

Vijaya Kumar K (2):
  kernel: Add definitions for GICv3 attributes
  hw/intc/arm_gicv3_kvm: Implement get/put functions

 hw/intc/arm_gicv3_common.c |3 +
 hw/intc/arm_gicv3_kvm.c|  496 +++-
 hw/intc/gicv3_internal.h   |  113 
 include/hw/intc/arm_gicv3_common.h |1 +
 linux-headers/asm-arm64/kvm.h  |   22 +-
 5 files changed, 624 insertions(+), 11 deletions(-)

-- 
1.7.9.5

Re: [Qemu-devel] Fw: [Qemu-arm] [PATCH v2 0/6] Runtime pagesize computation

2016-07-19 Thread Vijay Kilari

Hi Peter,

  Any update on this patch set. Is it merged?

On Wed, Jun 29, 2016 at 12:30 PM, Vijay Kilari <vijay.kil...@gmail.com> wrote:
> On Wed, Jun 29, 2016 at 12:24 PM, Kumar, Vijaya <vijaya.ku...@cavium.com> 
> wrote:
>>
>>
>>
>> 
>> From: Peter Maydell <peter.mayd...@linaro.org>
>> Sent: Tuesday, June 28, 2016 1:46 PM
>> To: qemu-arm; QEMU Developers
>> Cc: Paolo Bonzini; Kumar, Vijaya; Patch Tracking
>> Subject: Re: [Qemu-arm] [PATCH v2 0/6] Runtime pagesize computation
>>
>> On 21 June 2016 at 18:09, Peter Maydell <peter.mayd...@linaro.org> wrote:
>>> This set of patches is a development based on the ones from Vijaya:
>>> the general idea is similar but I have tried to improve the interface
>>> for defining the page size a bit.  I've also tweaked patches 2 and 3
>>> to address code review comments.
>>
>>> NB: I have only very lightly tested these and haven't attempted
>>> to measure performance at all. There is an assert() in the
>>> definition of TARGET_PAGE_BITS which is good for making sure
>>> it isn't used before it's valid but not so good for speed.
>>
>> Vijaya, are you in a position to test this patchset for
>> performance? Presumably you have a test case benchmark you're
>> looking to improve here?
>
> I have tested the patches and the test case that I was trying was
> Live migration of Idle VM on arm64 platform.
> VM migrated is with 4 VCPUS and 8GB RAM running CentOS.
>
> With page bits 10 (1K), the live migration time is 5.8 sec
>
> capabilities: xbzrle: off rdma-pin-all: off auto-converge: off
> zero-blocks: off compress: off events: off x-postcopy-ram: off
> Migration status: completed
> total time: 5857 milliseconds
> downtime: 102 milliseconds
> setup: 14 milliseconds
> transferred ram: 336081 kbytes
> throughput: 470.21 mbps
> remaining ram: 0 kbytes
> total ram: 8519872 kbytes
> duplicate: 8271539 pages
> skipped: 0 pages
> normal: 261340 pages
> normal bytes: 261340 kbytes
> dirty sync count: 3
>
> With page bits 12 (4K), live migration time is 2.9 sec
>
> capabilities: xbzrle: off rdma-pin-all: off auto-converge: off
> zero-blocks: off compress: off events: off x-postcopy-ram: off
> Migration status: completed
> total time: 2974 milliseconds
> downtime: 76 milliseconds
> setup: 5 milliseconds
> transferred ram: 301327 kbytes
> throughput: 830.30 mbps
> remaining ram: 0 kbytes
> total ram: 8519872 kbytes
> duplicate: 2062398 pages
> skipped: 0 pages
> normal: 70662 pages
> normal bytes: 282648 kbytes
> dirty sync count: 3
>
> Regards
> Vijay
>>
>> thanks
>> -- PMM

Re: [Qemu-devel] [PATCH v3 1/1] target-arm: Use Neon for zero checking

2016-07-05 Thread Vijay Kilari

On Sat, Jul 2, 2016 at 3:37 AM, Richard Henderson  wrote:
> On 06/30/2016 06:45 AM, Peter Maydell wrote:
>>
>> On 29 June 2016 at 09:47,   wrote:
>>>
>>> From: Vijay 
>>>
>>> Use Neon instructions to perform zero checking of
>>> buffer. This is helps in reducing total migration time.
>>
>>
>>> diff --git a/util/cutils.c b/util/cutils.c
>>> index 5830a68..4779403 100644
>>> --- a/util/cutils.c
>>> +++ b/util/cutils.c
>>> @@ -184,6 +184,13 @@ int qemu_fdatasync(int fd)
>>>  #define SPLAT(p)   _mm_set1_epi8(*(p))
>>>  #define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) ==
>>> 0x)
>>>  #define VEC_OR(v1, v2) (_mm_or_si128(v1, v2))
>>> +#elif __aarch64__
>>> +#include "arm_neon.h"
>>> +#define VECTYPEuint64x2_t
>>> +#define ALL_EQ(v1, v2) \
>>> +((vgetq_lane_u64(v1, 0) == vgetq_lane_u64(v2, 0)) && \
>>> + (vgetq_lane_u64(v1, 1) == vgetq_lane_u64(v2, 1)))
>>> +#define VEC_OR(v1, v2) ((v1) | (v2))
>>
>>
>> Should be '#elif defined(__aarch64__)'. I have made this
>> tweak and put this patch in target-arm.next.
>
>
> Consider
>
> #define VECTYPEuint32x4_t
> #define ALL_EQ(v1, v2) (vmaxvq_u32((v1) ^ (v2)) == 0)
>
>
> which compiles down to
>
>   1c:   6e211c00eor v0.16b, v0.16b, v1.16b
>   20:   6eb0a800umaxv   s0, v0.4s
>   24:   1e26fmovw0, s0
>   28:   6b1f001fcmp w0, wzr
>   2c:   1a9f17e0csetw0, eq
>   30:   d65f03c0ret

For me this code compiles as below and migration time is ~100ms more.

See below 3 trails of migration time

  7039cc:   6eb0a800umaxv   s0, v0.4s
  7039d0:   0e043c02mov w2, v0.s[0]
  7039d4:   35c2cbnzw2, 7039ec 
  7039d8:   91002084add x4, x4, #0x8
  7039dc:   91020063add x3, x3, #0x80
  7039e0:   eb01009fcmp x4, x1

(qemu) info migrate
capabilities: xbzrle: off rdma-pin-all: off auto-converge: off
zero-blocks: off compress: off events: off x-postcopy-ram: off
Migration status: completed
total time: 3070 milliseconds
downtime: 55 milliseconds
setup: 4 milliseconds
transferred ram: 300637 kbytes
throughput: 802.49 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 2062834 pages
skipped: 0 pages
normal: 70489 pages
normal bytes: 281956 kbytes
dirty sync count: 3

(qemu) info migrate
capabilities: xbzrle: off rdma-pin-all: off auto-converge: off
zero-blocks: off compress: off events: off x-postcopy-ram: off
Migration status: completed
total time: 3067 milliseconds
downtime: 47 milliseconds
setup: 5 milliseconds
transferred ram: 290277 kbytes
throughput: 775.61 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 2064185 pages
skipped: 0 pages
normal: 67901 pages
normal bytes: 271604 kbytes
dirty sync count: 3
(qemu)

(qemu) info migrate
capabilities: xbzrle: off rdma-pin-all: off auto-converge: off
zero-blocks: off compress: off events: off x-postcopy-ram: off
Migration status: completed
total time: 3067 milliseconds
downtime: 34 milliseconds
setup: 5 milliseconds
transferred ram: 294614 kbytes
throughput: 787.19 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 2063365 pages
skipped: 0 pages
normal: 68985 pages
normal bytes: 275940 kbytes
dirty sync count: 3

>
> vs
>
>   34:   4e083c20mov x0, v1.d[0]
>   38:   4e083c01mov x1, v0.d[0]
>   3c:   eb3fcmp x1, x0
>   40:   5280mov w0, #0
>   44:   5440b.eq4c 
>   48:   d65f03c0ret
>   4c:   4e183c20mov x0, v1.d[1]
>   50:   4e183c01mov x1, v0.d[1]
>   54:   eb3fcmp x1, x0
>   58:   1a9f17e0csetw0, eq
>   5c:   d65f03c0ret
>

My patch compiles to below code and takes ~100ms less time

#define VECTYPEuint64x2_t
#define ALL_EQ(v1, v2) \
((vgetq_lane_u64(v1, 0) == vgetq_lane_u64(v2, 0)) && \
 (vgetq_lane_u64(v1, 1) == vgetq_lane_u64(v2, 1)))

  7039d0:   4e083c02mov x2, v0.d[0]
  7039d4:   b5000102cbnzx2, 7039f4 
  7039d8:   4e183c02mov x2, v0.d[1]
  7039dc:   b5c2cbnzx2, 7039f4 
  7039e0:   91002084add x4, x4, #0x8
  7039e4:   91020063add x3, x3, #0x80
  7039e8:   eb04003fcmp x1, x4

capabilities: xbzrle: off rdma-pin-all: off auto-converge: off
zero-blocks: off compress: off events: off x-postcopy-ram: off
Migration status: completed
total time: 2973 milliseconds
downtime: 67 milliseconds
setup: 5 milliseconds
transferred ram: 293659 kbytes
throughput: 809.45 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 2062791 pages
skipped: 0 pages
normal: 68748 pages
normal bytes: 274992 kbytes
dirty sync count: 3
(qemu)

capabilities: xbzrle: off rdma-pin-all: off auto-converge: off
zero-blocks: off compress: off

Re: [Qemu-devel] Fw: [Qemu-arm] [PATCH v2 0/6] Runtime pagesize computation

2016-06-29 Thread Vijay Kilari

On Wed, Jun 29, 2016 at 12:24 PM, Kumar, Vijaya  wrote:
>
>
>
> 
> From: Peter Maydell 
> Sent: Tuesday, June 28, 2016 1:46 PM
> To: qemu-arm; QEMU Developers
> Cc: Paolo Bonzini; Kumar, Vijaya; Patch Tracking
> Subject: Re: [Qemu-arm] [PATCH v2 0/6] Runtime pagesize computation
>
> On 21 June 2016 at 18:09, Peter Maydell  wrote:
>> This set of patches is a development based on the ones from Vijaya:
>> the general idea is similar but I have tried to improve the interface
>> for defining the page size a bit.  I've also tweaked patches 2 and 3
>> to address code review comments.
>
>> NB: I have only very lightly tested these and haven't attempted
>> to measure performance at all. There is an assert() in the
>> definition of TARGET_PAGE_BITS which is good for making sure
>> it isn't used before it's valid but not so good for speed.
>
> Vijaya, are you in a position to test this patchset for
> performance? Presumably you have a test case benchmark you're
> looking to improve here?

I have tested the patches and the test case that I was trying was
Live migration of Idle VM on arm64 platform.
VM migrated is with 4 VCPUS and 8GB RAM running CentOS.

With page bits 10 (1K), the live migration time is 5.8 sec

capabilities: xbzrle: off rdma-pin-all: off auto-converge: off
zero-blocks: off compress: off events: off x-postcopy-ram: off
Migration status: completed
total time: 5857 milliseconds
downtime: 102 milliseconds
setup: 14 milliseconds
transferred ram: 336081 kbytes
throughput: 470.21 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 8271539 pages
skipped: 0 pages
normal: 261340 pages
normal bytes: 261340 kbytes
dirty sync count: 3

With page bits 12 (4K), live migration time is 2.9 sec

capabilities: xbzrle: off rdma-pin-all: off auto-converge: off
zero-blocks: off compress: off events: off x-postcopy-ram: off
Migration status: completed
total time: 2974 milliseconds
downtime: 76 milliseconds
setup: 5 milliseconds
transferred ram: 301327 kbytes
throughput: 830.30 mbps
remaining ram: 0 kbytes
total ram: 8519872 kbytes
duplicate: 2062398 pages
skipped: 0 pages
normal: 70662 pages
normal bytes: 282648 kbytes
dirty sync count: 3

Regards
Vijay
>
> thanks
> -- PMM

Re: [Qemu-devel] Fw: [Qemu-arm] [PATCH v2 2/6] exec.c: Remove static allocation of sub_section of sub_page

2016-06-22 Thread Vijay Kilari

> 
> From: Qemu-arm  on
> behalf of Peter Maydell 
> Sent: Tuesday, June 21, 2016 10:39 PM
> To: qemu-...@nongnu.org; qemu-devel@nongnu.org
> Cc: Paolo Bonzini; Kumar, Vijaya; patc...@linaro.org
> Subject: [Qemu-arm] [PATCH v2 2/6] exec.c: Remove static allocation of
> sub_section of sub_page
>
> From: Vijaya Kumar K 
>
> Allocate sub_section dynamically. Remove dependency
> on TARGET_PAGE_SIZE to make run-time page size detection
> for arm platforms.
>
> Signed-off-by: Vijaya Kumar K 
> Message-id: 1465808915-4887-3-git-send-email-vija...@caviumnetworks.com
> [PMM: use flexible array member rather than separate malloc
>  so we don't need an extra pointer deref when using it]
> Signed-off-by: Peter Maydell 
> ---
>  exec.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 0122ef7..8eaeb0c 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -153,7 +153,7 @@ typedef struct subpage_t {
>  MemoryRegion iomem;
>  AddressSpace *as;
>  hwaddr base;
> -uint16_t sub_section[TARGET_PAGE_SIZE];
> +uint16_t sub_section[];
   NIT: Comment that this variable should be last member of this
struct will be helpful.

>  } subpage_t;
>
>  #define PHYS_SECTION_UNASSIGNED 0
> @@ -2270,8 +2270,7 @@ static subpage_t *subpage_init(AddressSpace *as,
> hwaddr base)
>  {
>  subpage_t *mmio;
>
> -mmio = g_malloc0(sizeof(subpage_t));
> -
> +mmio = g_malloc0(sizeof(subpage_t) + TARGET_PAGE_SIZE *
> sizeof(uint16_t));

NIT: sizeof(mmio->sub_section[0]) looks better than sizeof(uint16_t)

>  mmio->as = as;
>  mmio->base = base;
>  memory_region_init_io(>iomem, NULL, _ops, mmio,
> --
> 1.9.1
>
>

Re: [Qemu-devel] [RFC PATCH v1 4/4] target-arm: Compute page size based on ARM target cpu type

2016-06-17 Thread Vijay Kilari

On Fri, Jun 17, 2016 at 4:00 PM, Peter Maydell <peter.mayd...@linaro.org> wrote:
> On 17 June 2016 at 11:20, Vijay Kilari <vijay.kil...@gmail.com> wrote:
>> On Fri, Jun 17, 2016 at 1:12 AM, Richard Henderson <r...@twiddle.net> wrote:
>>> On 06/14/2016 04:36 AM, Peter Maydell wrote:
>>>> It would be better to delay the point at which we allocate
>>>> the data structures which care about page size, rather than
>>>> moving init of the CPU earlier.
>>>
>>> It would be *best* if we could re-initialize and re-allocate these data
>>> structures so that we can follow the current page size as it changes.
>>>
>>> Yes, this might require flushing just about everything, but it's the kind of
>>> thing that's likely to happen only at system startup.  After that, 
>>> everything
>>> benefits from having the correct (larger) page size.
>>
>> I tried shuffling the memory initialization code after cpu initialization.
>> but it was full mess. So could not proceed further.
>>
>> However, I tried early creation cpu objects instead of doing it from
>> machvirt_init.
>> With this, initfn of the cpu model is called earlier and feature set is 
>> updated.
>> With that I could fetch arm architecture info. Based on this we can
>> choose page size.
>
> This won't work, because machvirt_init needs to be able to specify
> properties of the CPU. I think we need to solve the issues with
> dynamically reinitializing and reallocating the data structures,
> as rth suggests.

In early init, only cpu object is created and does not specify
properties of the CPU.
machvirt_init will reuse already created cpu object and specify
properties of the CPU.

Regards
Vijay

Re: [Qemu-devel] [RFC PATCH v1 4/4] target-arm: Compute page size based on ARM target cpu type

2016-06-17 Thread Vijay Kilari

On Fri, Jun 17, 2016 at 1:12 AM, Richard Henderson  wrote:
> On 06/14/2016 04:36 AM, Peter Maydell wrote:
>> It would be better to delay the point at which we allocate
>> the data structures which care about page size, rather than
>> moving init of the CPU earlier.
>
> It would be *best* if we could re-initialize and re-allocate these data
> structures so that we can follow the current page size as it changes.
>
> Yes, this might require flushing just about everything, but it's the kind of
> thing that's likely to happen only at system startup.  After that, everything
> benefits from having the correct (larger) page size.

I tried shuffling the memory initialization code after cpu initialization.
but it was full mess. So could not proceed further.

However, I tried early creation cpu objects instead of doing it from
machvirt_init.
With this, initfn of the cpu model is called earlier and feature set is updated.
With that I could fetch arm architecture info. Based on this we can
choose page size.
I can send out RFC patch.

>
>> Also we should consider what happens if we have decided
>> the page size is X, and then a CPU is hotplugged which
>> requires a page size Y where Y < X.
>
> Is that a real possibility?  Or trivially true because the new cpu has yet to
> be initialized by the OS to use the regular OS page size.
>
>
> r~

Re: [Qemu-devel] [RFC PATCH v1 2/4] exec.c: Remove static allocation of sub_section of sub_page

2016-06-17 Thread Vijay Kilari

Hi Paolo,

On Mon, Jun 13, 2016 at 3:22 PM, Paolo Bonzini  wrote:
>
>
> On 13/06/2016 11:08, vija...@caviumnetworks.com wrote:
>> From: Vijaya Kumar K 
>>
>> Allocate sub_section dynamically. Remove dependency
>> on TARGET_PAGE_SIZE to make run-time page size detection
>> for arm platforms.
>>
>> Signed-off-by: Vijaya Kumar K 
>> ---
>>  exec.c |5 +++--
>>  1 file changed, 3 insertions(+), 2 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index a9d465b..e803a41 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -154,7 +154,7 @@ typedef struct subpage_t {
>>  MemoryRegion iomem;
>>  AddressSpace *as;
>>  hwaddr base;
>> -uint16_t sub_section[TARGET_PAGE_SIZE];
>> +uint16_t *sub_section;
>
> Please make this a flexible array member instead, so that you can avoid
> the extra pointer dereference.

What do you mean by flexible array member?. please give more info.

Re: [Qemu-devel] [RFC PATCH v1 4/4] target-arm: Compute page size based on ARM target cpu type

2016-06-14 Thread Vijay Kilari

On Mon, Jun 13, 2016 at 3:40 PM, Peter Maydell  wrote:
> On 13 June 2016 at 10:43, Peter Maydell  wrote:
>> On 13 June 2016 at 10:08,   wrote:
>>> +/*
>>> + * Holds TARGET_AARCH_64_PAGE_BITS or TARGET_ARM_PAGE_BITS
>>> + * based on the the cpu type emulated at runtime.
>>> + */
>>> +static uint32_t target_page_bits;
>>
>> The CPU page size is not specific to the 'virt' board, so this
>> is the wrong place to do this. You should identify the
>> page size in arm_cpu_realizefn() based on the set of feature
>> bits the CPU has: anything with ARM_FEATURE_V7 has a 4K page
>> table (this includes a lot of 32-bit CPUs).

  cpu_init and cpu_realizefn() of required cpu model is called in
machvirt_init(),
which is quite late in the initialization sequence.
The cpu_exec_init_all() which calls memory_map_init() is called very
early stage,
is where TARGET_PAGE_BITS information is required.

In order to get feature information of CPU early, one option is to
create cpu object
early, initialize it. This means moving some cpu object creation and
initalization
code from machvirt_init().

>
> Actually that should be "with ARM_FEATURE_V7 and not
> ARM_FEATURE_MPU", or we'll break the PMSA code.
>
> Note that you'll also need to handle systems where the
> different CPUs in it disagree about the preferred target
> page size -- the xlnx-ep108 board can have both
> Cortex-A53 (prefers 4K) and Cortex-R5 (prefers 1K) CPUs in it.
> "Use the smallest value required by any CPU on the board"
> is probably the best approach.

How -cpu options are passed for xlnx-ep108 board in qemu command?.

>
> thanks
> -- PMM

Re: [Qemu-devel] [RFC PATCH v1 1/2] target-arm: Update page size for aarch64

2016-05-31 Thread Vijay Kilari

Hi Peter

On Wed, Apr 6, 2016 at 8:31 PM, Vijay Kilari <vijay.kil...@gmail.com> wrote:
> On Mon, Apr 4, 2016 at 10:14 PM, Peter Maydell <peter.mayd...@linaro.org> 
> wrote:
>> On 4 April 2016 at 17:40, Vijay Kilari <vijay.kil...@gmail.com> wrote:
>>> On Mon, Apr 4, 2016 at 7:14 PM, Peter Maydell <peter.mayd...@linaro.org> 
>>> wrote:
>>>> I agree that this would definitely improve performance (both for
>>>> migration and for emulated guests), but I'm afraid this breaks
>>>> running 32-bit ARMv5 and ARMv7M guests with this QEMU binary,
>>>> so we can't do this. If we want to allow the minimum page size to
>>>> be bigger than 1K for AArch64 CPUs then we need to make it a
>>>> runtime settable thing rather than compile-time (which is not
>>>> an entirely trivial thing).
>>>
>>> Do you mean to say that based on -cpu type qemu option
>>> choose the page size at runtime?
>>
>> If you want to avoid defining TARGET_PAGE_SIZE to the
>> lowest-common-denominator 1K, then yes, you'd need to
>> choose it at runtime. That could be painful to implement.
>
> Had a look at it. Needs some changes in common code as well.
> I will send this as a separate patch series and drop this patch
> from this series.

The L1 page table size, L1 shift are dependent on TARGET_PAGE_BITS(page size).
as shown in snippet code below from translate-all.c

/* The bits remaining after N lower levels of page tables.  */
#define V_L1_BITS_REM \
((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS)

#if V_L1_BITS_REM < 4
#define V_L1_BITS  (V_L1_BITS_REM + V_L2_BITS)
#else
#define V_L1_BITS  V_L1_BITS_REM
#endif

#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)

#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)

/* The bottom level has pointers to PageDesc */
static void *l1_map[V_L1_SIZE];

How about adding CONFIG_PAGE_SIZE option to configure?.

Re: [Qemu-devel] [RFC PATCH v2 2/3] utils: Add cpuinfo helper to fetch /proc/cpuinfo

2016-05-08 Thread Vijay Kilari

Hi Suzuki/Peter,

On Wed, Apr 13, 2016 at 5:59 PM, Suzuki K Poulose
<suzuki.poul...@arm.com> wrote:
> On 13/04/16 10:54, Vijay Kilari wrote:
>>
>> On Mon, Apr 11, 2016 at 3:07 PM, Suzuki K Poulose
>> <suzuki.poul...@arm.com> wrote:
>>>
>>> On 11/04/16 07:52, Vijay Kilari wrote:
>
>
>>
>> Hi Suzuki,
>>
>>   The last 5 patches are not compiling on v4.4. Looks like your patch
>> series is not merged completely. Can you please
>> rebase your patches and let me know.
>>
>
> Could you please give the tree below a try ?
>
> git://linux-arm.org/linux-skp.git cpu-ftr/v3-4.3-rc4

This works.
Now the question is, Are your patches getting merged anytime soon?.
If not, I prefer to go with /proc/cpuinfo.

Another solution is look for /sys/devices/system/cpu/cpu$ID/identification/midr
if not available then fall back on /proc/cpuinfo.

Regards
Vijay

Re: [Qemu-devel] [RFC PATCH v2 2/3] utils: Add cpuinfo helper to fetch /proc/cpuinfo

2016-04-13 Thread Vijay Kilari

On Mon, Apr 11, 2016 at 3:07 PM, Suzuki K Poulose
<suzuki.poul...@arm.com> wrote:
> On 11/04/16 07:52, Vijay Kilari wrote:
>>
>> Adding Suzuki Poulose.
>>
>> Hi Suzuki,
>>
>> On Fri, Apr 8, 2016 at 3:13 PM, Peter Maydell <peter.mayd...@linaro.org>
>> wrote:
>>>
>>> On 8 April 2016 at 07:21, Vijay Kilari <vijay.kil...@gmail.com> wrote:
>>>>
>>>> On Thu, Apr 7, 2016 at 5:15 PM, Peter Maydell <peter.mayd...@linaro.org>
>>>> wrote:
>>>>>
>>>>> I'm told there are kernel patches in progress to get this sort
>>>>> of information in a maintainable way to userspace, which are
>>>>> currently somewhat stalled due to lack of anybody who wants to
>>>>> consume it. If you have a use case then you should probably
>>>>> flag it up with the kernel devs

Hi Peter,

   Looks like getting Suzuki's patches merged might take some time.
I propose to use /proc/cpuinfo for now and later I can move to using
Suzuki's way.


>>>>
>>>>
>>>> Can you please give references to those patches/discussion?
>>>
>>>
>>> I'm told the most recent thread is https://lkml.org/lkml/2015/10/5/517
>>> (and that most of the patches in that series have gone in, except
>>> for the last 4 or 5 which implement the ABI).
>>
>>
>> Can you please throw some light on what is the status of ABI to
>> read cpu information in user space.
>> I wanted to know cpu implementer, part number in QEMU utils
>> to add prefetches to speed up live migration for Thunderx platform.
>>
>
> As for the patch series, except for that last 5 patches (which actually
> implements
> the ABI), the infrastructure patches have been merged in v4.4.
>
> We are awaiting feedback from possible consumers like toolchain (gcc,
> glibc).
> If you think this will be suitable for you, thats good to know. There is
> documentation available in the last patch in the above series. Could you
> please
> try the series (on v4.4, which would be easier, by simply picking up the
> last
> 5 patches) and let us know if that works for you ?

Hi Suzuki,

 The last 5 patches are not compiling on v4.4. Looks like your patch
series is not merged completely. Can you please
rebase your patches and let me know.

Re: [Qemu-devel] [RFC PATCH v2 2/3] utils: Add cpuinfo helper to fetch /proc/cpuinfo

2016-04-11 Thread Vijay Kilari

Adding Suzuki Poulose.

Hi Suzuki,

On Fri, Apr 8, 2016 at 3:13 PM, Peter Maydell <peter.mayd...@linaro.org> wrote:
> On 8 April 2016 at 07:21, Vijay Kilari <vijay.kil...@gmail.com> wrote:
>> On Thu, Apr 7, 2016 at 5:15 PM, Peter Maydell <peter.mayd...@linaro.org> 
>> wrote:
>>> I'm told there are kernel patches in progress to get this sort
>>> of information in a maintainable way to userspace, which are
>>> currently somewhat stalled due to lack of anybody who wants to
>>> consume it. If you have a use case then you should probably
>>> flag it up with the kernel devs.
>>
>> Can you please give references to those patches/discussion?
>
> I'm told the most recent thread is https://lkml.org/lkml/2015/10/5/517
> (and that most of the patches in that series have gone in, except
> for the last 4 or 5 which implement the ABI).

Can you please throw some light on what is the status of ABI to
read cpu information in user space.
I wanted to know cpu implementer, part number in QEMU utils
to add prefetches to speed up live migration for Thunderx platform.

Re: [Qemu-devel] [RFC PATCH v2 2/3] utils: Add cpuinfo helper to fetch /proc/cpuinfo

2016-04-08 Thread Vijay Kilari

Hi Peter,

On Thu, Apr 7, 2016 at 5:15 PM, Peter Maydell <peter.mayd...@linaro.org> wrote:
> On 7 April 2016 at 11:56, Vijay Kilari <vijay.kil...@gmail.com> wrote:
>> On Thu, Apr 7, 2016 at 3:41 PM, Peter Maydell <peter.mayd...@linaro.org> 
>> wrote:
>>> On 7 April 2016 at 10:58,  <vija...@caviumnetworks.com> wrote:
>>>> From: Vijaya Kumar K <vijaya.ku...@caviumnetworks.com>
>>>>
>>>> utils cannot read target cpu information to
>>>> fetch cpu information to implement cpu specific
>>>> features or erratas. For this parse /proc/cpuinfo
>>>> and fetch cpu information.
>>>>
>>>> For now this helper only fetches cpu information
>>>> for arm architectures.
>>>
>>> As I understand it /proc/cpuinfo is intended only for
>>> humans to read. Please don't write code to parse it;
>>> find a different way to get this information instead
>>> if you really need it.
>
>> Also unlike x86 there is no cpuid.h where we can get cpu identification
>> information for arm64.
>
> I'm told there are kernel patches in progress to get this sort
> of information in a maintainable way to userspace, which are
> currently somewhat stalled due to lack of anybody who wants to
> consume it. If you have a use case then you should probably
> flag it up with the kernel devs.

Can you please give references to those patches/discussion?

>
> That said, I think we should probably hold off on this
> discussion until we have clearer benchmarking info that
> demonstrates that doing these prefetches really does make
> a significant difference. I would much prefer to have a


Thunderx pass2 board does not have hardware prefetch. So
explicit sw prefetch instructions is required for this platform.
Here is the benchmarking result with and without prefetch.
of an idle VM with 4 VCPUS, 8GB RAM.

Without prefech, total migration time is 8.2 seconds
With prefetch total migration time is 2.7 seconds.

Without prefetch:


(qemu) info migrate
capabilities: xbzrle: off rdma-pin-all: off auto-converge: off
zero-blocks: off compress: off events: off x-postcopy-ram: off
Migration status: completed
total time: 8217 milliseconds
downtime: 86 milliseconds
setup: 4 milliseconds
transferred ram: 212624 kbytes
throughput: 212.08 mbps
remaining ram: 0 kbytes
total ram: 8520128 kbytes
duplicate: 2085805 pages
skipped: 0 pages
normal: 48478 pages
normal bytes: 193912 kbytes
dirty sync count: 3

With prefetch:

(qemu) info migrate
capabilities: xbzrle: off rdma-pin-all: off auto-converge: off
zero-blocks: off compress: off events: off x-postcopy-ram: off
Migration status: completed
total time: 2744 milliseconds
downtime: 48 milliseconds
setup: 5 milliseconds
transferred ram: 213526 kbytes
throughput: 637.76 mbps
remaining ram: 0 kbytes
total ram: 8520128 kbytes
duplicate: 2085014 pages
skipped: 0 pages
normal: 48705 pages
normal bytes: 194820 kbytes
dirty sync count: 3

> single aarch64 routine that works for everybody, rather
> than a thunderx-only special case.

Now, I found that the generic existings function by name
buffer_find_nonzero_offset_inner()
 can be made to work with neon. So no need of special function by name
buffer_find_nonzero_offset_neon() for arm64 creating in this patch series.
However, adding prefetch code needs to be added for performance
reason.

Re: [Qemu-devel] [RFC PATCH v2 2/3] utils: Add cpuinfo helper to fetch /proc/cpuinfo

2016-04-07 Thread Vijay Kilari

On Thu, Apr 7, 2016 at 3:41 PM, Peter Maydell  wrote:
> On 7 April 2016 at 10:58,   wrote:
>> From: Vijaya Kumar K 
>>
>> utils cannot read target cpu information to
>> fetch cpu information to implement cpu specific
>> features or erratas. For this parse /proc/cpuinfo
>> and fetch cpu information.
>>
>> For now this helper only fetches cpu information
>> for arm architectures.
>
> As I understand it /proc/cpuinfo is intended only for
> humans to read. Please don't write code to parse it;
> find a different way to get this information instead
> if you really need it.

  The utils code does not accept any dependency with target specific code.
The libqemuutil.a is compiled and linked before target specific
code is compiled.

  Also, utils functions neither have any cpu object to fetch
cpu identification information (ex: midr in case of arm) to identify the
cpu information nor utils cannot make any ioctl to read cpu information
from qemu.

Also unlike x86 there is no cpuid.h where we can get cpu identification
information for arm64.

So, I think userspace process can rely on /proc/cpuinfo for
fetching cpu information.

>
> (I'm not really happy about such specific-to-a-particular-vendor
> patches in QEMU anyway; we should have migration code that
> works acceptably for any implementation.)
>
> thanks
> -- PMM

Re: [Qemu-devel] [RFC PATCH v1 1/2] target-arm: Update page size for aarch64

2016-04-06 Thread Vijay Kilari

On Mon, Apr 4, 2016 at 10:14 PM, Peter Maydell <peter.mayd...@linaro.org> wrote:
> On 4 April 2016 at 17:40, Vijay Kilari <vijay.kil...@gmail.com> wrote:
>> On Mon, Apr 4, 2016 at 7:14 PM, Peter Maydell <peter.mayd...@linaro.org> 
>> wrote:
>>> I agree that this would definitely improve performance (both for
>>> migration and for emulated guests), but I'm afraid this breaks
>>> running 32-bit ARMv5 and ARMv7M guests with this QEMU binary,
>>> so we can't do this. If we want to allow the minimum page size to
>>> be bigger than 1K for AArch64 CPUs then we need to make it a
>>> runtime settable thing rather than compile-time (which is not
>>> an entirely trivial thing).
>>
>> Do you mean to say that based on -cpu type qemu option
>> choose the page size at runtime?
>
> If you want to avoid defining TARGET_PAGE_SIZE to the
> lowest-common-denominator 1K, then yes, you'd need to
> choose it at runtime. That could be painful to implement.

Had a look at it. Needs some changes in common code as well.
I will send this as a separate patch series and drop this patch
from this series.

>
> thanks
> -- PMM

Re: [Qemu-devel] [RFC PATCH v1 2/2] target-arm: Use Neon for zero checking

2016-04-06 Thread Vijay Kilari

On Tue, Apr 5, 2016 at 8:06 PM, Peter Maydell  wrote:
> On 4 April 2016 at 14:39,   wrote:
>> From: Vijay 
>>
>> Use Neon instructions to perform zero checking of
>> buffer. This is helps in reducing downtime during
>> live migration.
>>
>> Signed-off-by: Vijaya Kumar K 
>> ---
>>  util/cutils.c |   81 
>> +
>>  1 file changed, 81 insertions(+)
>>
>> diff --git a/util/cutils.c b/util/cutils.c
>> index 43d1afb..d343b9a 100644
>> --- a/util/cutils.c
>> +++ b/util/cutils.c
>> @@ -352,6 +352,87 @@ static void 
>> *can_use_buffer_find_nonzero_offset_ifunc(void)
>>  return func;
>>  }
>>  #pragma GCC pop_options
>> +
>> +#elif defined __aarch64__
>> +#include "arm_neon.h"
>
> Can we rely on all compilers having this, or do we need to
> test in configure?

GCC and armcc support the same intrinsics. Both needs inclusion
of arm_neon.h.

>
>> +
>> +#define NEON_VECTYPE   uint64x2_t
>> +#define NEON_LOAD_N_ORR(v1, v2)vorrq_u64(vld1q_u64(), vld1q_u64())
>> +#define NEON_ORR(v1, v2)   vorrq_u64(v1, v2)
>> +#define NEON_EQ_ZERO(v1) \
>> +((vgetq_lane_u64(vceqzq_u64(v1), 0) == 0) || \
>> + (vgetq_lane_u64(vceqzq_u64(v1), 1)) == 0)
>
> The intrinsics are a bit confusing, but shouldn't we be
> testing that both lanes of v1 are 0, rather than whether
> either of them is? (so "&&", not "||").

Above check is correct. vceqzq() sets all bits to 1 if value is 0.
So if one lane is 0, then it means it is non-zero buffer. I think
redefining this macro as below would be better and avoid
vceqzq_u64()

#define NEON_NOT_EQ_ZERO(v1) \
((vgetq_lane_u64(v1, 0) != 0) || (vgetq_lane_u64(v1, 1)) != 0)

>
>> +
>> +#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR_NEON 16
>> +
>> +/*
>> + * Zero page/buffer checking using SIMD(Neon)
>> + */
>> +
>> +static bool
>> +can_use_buffer_find_nonzero_offset_neon(const void *buf, size_t len)
>> +{
>> +return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR_NEON
>> +   * sizeof(NEON_VECTYPE)) == 0
>> +&& ((uintptr_t) buf) % sizeof(NEON_VECTYPE) == 0);
>> +}
>> +
>> +static size_t buffer_find_nonzero_offset_neon(const void *buf, size_t len)
>> +{
>> +size_t i;
>> +NEON_VECTYPE d0, d1, d2, d3, d4, d5, d6;
>> +NEON_VECTYPE d7, d8, d9, d10, d11, d12, d13, d14;
>> +uint64_t const *data = buf;
>> +
>> +assert(can_use_buffer_find_nonzero_offset_neon(buf, len));
>> +len /= sizeof(unsigned long);
>> +
>> +for (i = 0; i < len; i += 32) {
>> +d0 = NEON_LOAD_N_ORR(data[i], data[i + 2]);
>> +d1 = NEON_LOAD_N_ORR(data[i + 4], data[i + 6]);
>> +d2 = NEON_LOAD_N_ORR(data[i + 8], data[i + 10]);
>> +d3 = NEON_LOAD_N_ORR(data[i + 12], data[i + 14]);
>> +d4 = NEON_ORR(d0, d1);
>> +d5 = NEON_ORR(d2, d3);
>> +d6 = NEON_ORR(d4, d5);
>> +
>> +d7 = NEON_LOAD_N_ORR(data[i + 16], data[i + 18]);
>> +d8 = NEON_LOAD_N_ORR(data[i + 20], data[i + 22]);
>> +d9 = NEON_LOAD_N_ORR(data[i + 24], data[i + 26]);
>> +d10 = NEON_LOAD_N_ORR(data[i + 28], data[i + 30]);
>> +d11 = NEON_ORR(d7, d8);
>> +d12 = NEON_ORR(d9, d10);
>> +d13 = NEON_ORR(d11, d12);
>> +
>> +d14 = NEON_ORR(d6, d13);
>> +if (NEON_EQ_ZERO(d14)) {
>> +break;
>> +}
>> +}
>
> Both the other optimised find_nonzero implementations in this
> file have two loops, not just one. Is it OK that this
> implementation has only a single loop?
>
> Paolo: do you know why we have two loops in the other
> implementations?

Paolo was right as he mentioned in the previous email.
But with two loops, I don't see much benefit. So restricted to
one loop.

>
>> +
>> +return i * sizeof(unsigned long);
>> +}
>> +
>> +static inline bool neon_support(void)
>> +{
>> +/*
>> + * Check if neon feature is supported.
>> + * By default neon is supported for aarch64.
>> + */
>> +return true;
>> +}
>
> There doesn't seem much point in this. We can assume Neon exists
> on any CPU we're going to run on (it's part of the ABI, the kernel
> assumes it, etc etc). So you can just implement the functions without
> the indirection functions below.
>
 Hmm. One reason was compilation fails if we don't call
can_use_buffer_find_nonzero_offset_inner() function from inside neon
implementation.
So I added this similar to AVX2 intel. Also thought if any platform
does not implement
Neon, then can simply skip changes this function.

>> +
>> +bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
>> +{
>> +return neon_support() ? can_use_buffer_find_nonzero_offset_neon(buf, 
>> len) :
>> +   can_use_buffer_find_nonzero_offset_inner(buf, len);
>> +}
>> +
>> +size_t buffer_find_nonzero_offset(const void *buf, size_t len)
>> +{
>> +return neon_support() ?

Re: [Qemu-devel] [RFC PATCH v1 1/2] target-arm: Update page size for aarch64

2016-04-04 Thread Vijay Kilari

On Mon, Apr 4, 2016 at 7:14 PM, Peter Maydell  wrote:
> On 4 April 2016 at 14:39,   wrote:
>> From: Vijay 
>>
>> Set target page size to minimum 4K for aarch64.
>> This helps to reduce live migration downtime significantly.
>>
>> Signed-off-by: Vijaya Kumar K 
>> ---
>>  target-arm/cpu.h |7 +++
>>  1 file changed, 7 insertions(+)
>>
>> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
>> index 066ff67..2e4b48f 100644
>> --- a/target-arm/cpu.h
>> +++ b/target-arm/cpu.h
>> @@ -1562,11 +1562,18 @@ bool write_cpustate_to_list(ARMCPU *cpu);
>>  #if defined(CONFIG_USER_ONLY)
>>  #define TARGET_PAGE_BITS 12
>>  #else
>> +/*
>> + * Aarch64 support minimum 4K page size
>> + */
>> +#if defined(TARGET_AARCH64)
>> +#define TARGET_PAGE_BITS 12
>
> I agree that this would definitely improve performance (both for
> migration and for emulated guests), but I'm afraid this breaks
> running 32-bit ARMv5 and ARMv7M guests with this QEMU binary,
> so we can't do this. If we want to allow the minimum page size to
> be bigger than 1K for AArch64 CPUs then we need to make it a
> runtime settable thing rather than compile-time (which is not
> an entirely trivial thing).

Do you mean to say that based on -cpu type qemu option
choose the page size at runtime?

>
>> +#else
>>  /* The ARM MMU allows 1k pages.  */
>>  /* ??? Linux doesn't actually use these, and they're deprecated in recent
>> architecture revisions.  Maybe a configure option to disable them.  */
>>  #define TARGET_PAGE_BITS 10
>>  #endif
>> +#endif
>>
>>  #if defined(TARGET_AARCH64)
>>  #  define TARGET_PHYS_ADDR_SPACE_BITS 48
>
> thanks
> -- PMM

99 matches

Mail list logo