date:20140805

RE: [PATCH v3] KVM: nVMX: nested TPR shadow/threshold emulation

2014-08-05 Thread Zhang, Yang Z

Wanpeng Li wrote on 2014-08-04:
 This patch fix bug https://bugzilla.kernel.org/show_bug.cgi?id=61411
 
 TPR shadow/threshold feature is important to speed up the Windows guest.
 Besides, it is a must feature for certain VMM.
 
 We map virtual APIC page address and TPR threshold from L1 VMCS. If
 TPR_BELOW_THRESHOLD VM exit is triggered by L2 guest and L1 interested
 in, we inject it into L1 VMM for handling.
 
 Signed-off-by: Wanpeng Li wanpeng...@linux.intel.com
 ---
 v2 - v3:
  * nested vm entry failure if both tpr shadow and cr8 exiting bits are not set
 v1 - v2:
  * don't take L0's virtualize APIC accesses setting into account
  * virtual_apic_page do exactly the same thing that is done for
 apic_access_page
  * add the tpr threshold field to the read-write fields for shadow VMCS
 
  arch/x86/kvm/vmx.c | 38 --
  1 file changed, 36 insertions(+), 2 deletions(-)
 
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index c604f3c..7a56e2c 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -379,6 +379,7 @@ struct nested_vmx {
* we must keep them pinned while L2 runs.
*/
   struct page *apic_access_page;
 + struct page *virtual_apic_page;
   u64 msr_ia32_feature_control;
 
   struct hrtimer preemption_timer;
 @@ -533,6 +534,7 @@ static int max_shadow_read_only_fields =
   ARRAY_SIZE(shadow_read_only_fields);
 
  static unsigned long shadow_read_write_fields[] = {
 + TPR_THRESHOLD,
   GUEST_RIP,
   GUEST_RSP,
   GUEST_CR0,
 @@ -2330,7 +2332,7 @@ static __init void
 nested_vmx_setup_ctls_msrs(void)
   CPU_BASED_MOV_DR_EXITING |
 CPU_BASED_UNCOND_IO_EXITING |
   CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
   CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
 - CPU_BASED_PAUSE_EXITING |
 + CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW |
   CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
   /*
* We can allow some features even when not supported by the
 @@ -6148,6 +6150,10 @@ static void free_nested(struct vcpu_vmx *vmx)
   nested_release_page(vmx-nested.apic_access_page);
   vmx-nested.apic_access_page = 0;
   }
 + if (vmx-nested.virtual_apic_page) {
 + nested_release_page(vmx-nested.virtual_apic_page);
 + vmx-nested.virtual_apic_page = 0;
 + }
 
   nested_free_all_saved_vmcss(vmx);
  }
 @@ -6936,7 +6942,7 @@ static bool nested_vmx_exit_handled(struct
 kvm_vcpu *vcpu)
   case EXIT_REASON_MCE_DURING_VMENTRY:
   return 0;
   case EXIT_REASON_TPR_BELOW_THRESHOLD:
 - return 1;
 + return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
   case EXIT_REASON_APIC_ACCESS:
   return nested_cpu_has2(vmcs12,
   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
 @@ -7057,6 +7063,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 
  static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
  {
 + if (is_guest_mode(vcpu))
 + return;
 +
   if (irr == -1 || tpr  irr) {
   vmcs_write32(TPR_THRESHOLD, 0);
   return;
 @@ -8024,6 +8033,27 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu,
 struct vmcs12 *vmcs12)
   exec_control = ~CPU_BASED_VIRTUAL_NMI_PENDING;
   exec_control = ~CPU_BASED_TPR_SHADOW;
   exec_control |= vmcs12-cpu_based_vm_exec_control;
 +
 + if (exec_control  CPU_BASED_TPR_SHADOW) {
 + if (vmx-nested.virtual_apic_page)
 + nested_release_page(vmx-nested.virtual_apic_page);
 + vmx-nested.virtual_apic_page =
 +nested_get_page(vcpu, vmcs12-virtual_apic_page_addr);
 + if (!vmx-nested.virtual_apic_page)
 + exec_control =
 + ~CPU_BASED_TPR_SHADOW;
 + else
 + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
 + page_to_phys(vmx-nested.virtual_apic_page));
 +
 + if (!(exec_control  CPU_BASED_TPR_SHADOW) 
 + !((exec_control  CPU_BASED_CR8_LOAD_EXITING) 
 + (exec_control  CPU_BASED_CR8_STORE_EXITING)))
 + nested_vmx_failValid(vcpu,
 VMXERR_ENTRY_INVALID_CONTROL_FIELD);

I think this is not correct. The vmx-nested.virtual_apic_page may not valid 
due to two reasons:
1. The virtual_apic_page_addr is not a valid gfn. In this case, the vmx failure 
must be injected to L1 unconditionally regardless of the setting of CR8 
load/store exiting.
2. The virtual_apic_page is swapped by L0. In this case, we should not inject 
failure to L1.

 +
 + vmcs_write32(TPR_THRESHOLD, vmcs12-tpr_threshold);
 + }

Miss else here:
If L2 owns the APIC and doesn't use TPR_SHADOW, we need to setup the vmcs02 
based on vmcs01. For example, if vmcs01 is using TPR_SHADOW, then vmcs02 must 
set

[PATCH 2/5] kvmtool: ARM64: Fix compile error for aarch64

2014-08-05 Thread Anup Patel

The __ARM64_SYS_REG() macro is already defined in uapi/asm/kvm.h
of Linux-3.16-rcX hence remove it from arm/aarch64/kvm-cpu.c

Signed-off-by: Pranavkumar Sawargaonkar pranavku...@linaro.org
Signed-off-by: Anup Patel anup.pa...@linaro.org
---
 tools/kvm/arm/aarch64/kvm-cpu.c |   11 ---
 1 file changed, 11 deletions(-)

diff --git a/tools/kvm/arm/aarch64/kvm-cpu.c b/tools/kvm/arm/aarch64/kvm-cpu.c
index 71a2a3a..545171b 100644
--- a/tools/kvm/arm/aarch64/kvm-cpu.c
+++ b/tools/kvm/arm/aarch64/kvm-cpu.c
@@ -19,17 +19,6 @@
(((x)  KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) \
 KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
 
-#define __ARM64_SYS_REG(op0,op1,crn,crm,op2)   \
-   (KVM_REG_ARM64 | KVM_REG_SIZE_U64   |   \
-KVM_REG_ARM64_SYSREG   |   \
-ARM64_SYS_REG_SHIFT_MASK(op0, OP0) |   \
-ARM64_SYS_REG_SHIFT_MASK(op1, OP1) |   \
-ARM64_SYS_REG_SHIFT_MASK(crn, CRN) |   \
-ARM64_SYS_REG_SHIFT_MASK(crm, CRM) |   \
-ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
-
-#define ARM64_SYS_REG(...) __ARM64_SYS_REG(__VA_ARGS__)
-
 unsigned long kvm_cpu__get_vcpu_mpidr(struct kvm_cpu *vcpu)
 {
struct kvm_one_reg reg;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/5] kvmtool: ARM/ARM64: Misc updates

2014-08-05 Thread Anup Patel

This patchset updates KVMTOOL to use some of the features
supported by Linux-3.16 KVM ARM/ARM64, such as:

1. Target CPU == Host using KVM_ARM_PREFERRED_TARGET vm ioctl
2. Target CPU type Potenza for using KVMTOOL on X-Gene
3. PSCI v0.2 support for Aarch32 and Aarch64 guest
4. System event exit reason

Anup Patel (5):
  kvmtool: ARM: Use KVM_ARM_PREFERRED_TARGET vm ioctl to determine
target cpu
  kvmtool: ARM64: Fix compile error for aarch64
  kvmtool: ARM64: Add target type potenza for aarch64
  kvmtool: Handle exit reason KVM_EXIT_SYSTEM_EVENT
  kvmtool: ARM/ARM64: Provide PSCI-0.2 guest when in-kernel KVM
supports it

 tools/kvm/arm/aarch64/arm-cpu.c |9 -
 tools/kvm/arm/aarch64/kvm-cpu.c |   11 ---
 tools/kvm/arm/fdt.c |   39 +--
 tools/kvm/arm/kvm-cpu.c |   26 +-
 tools/kvm/kvm-cpu.c |6 ++
 5 files changed, 68 insertions(+), 23 deletions(-)

-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/5] kvmtool: ARM: Use KVM_ARM_PREFERRED_TARGET vm ioctl to determine target cpu

2014-08-05 Thread Anup Patel

Instead, of trying out each and every target type we should use
KVM_ARM_PREFERRED_TARGET vm ioctl to determine target type
for KVM ARM/ARM64.

We bail-out target type returned by KVM_ARM_PREFERRED_TARGET vm ioctl
is not known to kvmtool.

Signed-off-by: Pranavkumar Sawargaonkar pranavku...@linaro.org
Signed-off-by: Anup Patel anup.pa...@linaro.org
---
 tools/kvm/arm/kvm-cpu.c |   21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/tools/kvm/arm/kvm-cpu.c b/tools/kvm/arm/kvm-cpu.c
index aeaa4cf..7478f8f 100644
--- a/tools/kvm/arm/kvm-cpu.c
+++ b/tools/kvm/arm/kvm-cpu.c
@@ -34,6 +34,7 @@ struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, unsigned 
long cpu_id)
struct kvm_cpu *vcpu;
int coalesced_offset, mmap_size, err = -1;
unsigned int i;
+   struct kvm_vcpu_init preferred_init;
struct kvm_vcpu_init vcpu_init = {
.features = ARM_VCPU_FEATURE_FLAGS(kvm, cpu_id)
};
@@ -46,6 +47,10 @@ struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, unsigned 
long cpu_id)
if (vcpu-vcpu_fd  0)
die_perror(KVM_CREATE_VCPU ioctl);
 
+   err = ioctl(kvm-vm_fd, KVM_ARM_PREFERRED_TARGET, preferred_init);
+   if (err  0)
+   die_perror(KVM_ARM_PREFERRED_TARGET ioctl);
+
mmap_size = ioctl(kvm-sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
if (mmap_size  0)
die_perror(KVM_GET_VCPU_MMAP_SIZE ioctl);
@@ -55,17 +60,22 @@ struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, 
unsigned long cpu_id)
if (vcpu-kvm_run == MAP_FAILED)
die(unable to mmap vcpu fd);
 
-   /* Find an appropriate target CPU type. */
+   /* Match preferred target CPU type. */
+   target = NULL;
for (i = 0; i  ARRAY_SIZE(kvm_arm_targets); ++i) {
if (!kvm_arm_targets[i])
continue;
-   target = kvm_arm_targets[i];
-   vcpu_init.target = target-id;
-   err = ioctl(vcpu-vcpu_fd, KVM_ARM_VCPU_INIT, vcpu_init);
-   if (!err)
+   if (kvm_arm_targets[i]-id == preferred_init.target) {
+   target = kvm_arm_targets[i];
break;
+   }
+   }
+   if (!target) {
+   die(preferred target not available\n);
}
 
+   vcpu_init.target = preferred_init.target;
+   err = ioctl(vcpu-vcpu_fd, KVM_ARM_VCPU_INIT, vcpu_init);
if (err || target-init(vcpu))
die(Unable to initialise ARM vcpu);
 
@@ -81,6 +91,7 @@ struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, unsigned 
long cpu_id)
vcpu-cpu_type  = target-id;
vcpu-cpu_compatible= target-compatible;
vcpu-is_running= true;
+
return vcpu;
 }
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 5/5] kvmtool: ARM/ARM64: Provide PSCI-0.2 guest when in-kernel KVM supports it

2014-08-05 Thread Anup Patel

If in-kernel KVM support PSCI-0.2 emulation then we should set
KVM_ARM_VCPU_PSCI_0_2 feature for each guest VCPU and also
provide arm,psci-0.2,arm,psci as PSCI compatible string.

This patch updates kvm_cpu__arch_init() and setup_fdt() as
per above.

Signed-off-by: Pranavkumar Sawargaonkar pranavku...@linaro.org
Signed-off-by: Anup Patel anup.pa...@linaro.org
---
 tools/kvm/arm/fdt.c |   39 +--
 tools/kvm/arm/kvm-cpu.c |5 +
 2 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/tools/kvm/arm/fdt.c b/tools/kvm/arm/fdt.c
index 186a718..93849cf2 100644
--- a/tools/kvm/arm/fdt.c
+++ b/tools/kvm/arm/fdt.c
@@ -13,6 +13,7 @@
 #include linux/byteorder.h
 #include linux/kernel.h
 #include linux/sizes.h
+#include linux/psci.h
 
 static char kern_cmdline[COMMAND_LINE_SIZE];
 
@@ -162,12 +163,38 @@ static int setup_fdt(struct kvm *kvm)
 
/* PSCI firmware */
_FDT(fdt_begin_node(fdt, psci));
-   _FDT(fdt_property_string(fdt, compatible, arm,psci));
-   _FDT(fdt_property_string(fdt, method, hvc));
-   _FDT(fdt_property_cell(fdt, cpu_suspend, KVM_PSCI_FN_CPU_SUSPEND));
-   _FDT(fdt_property_cell(fdt, cpu_off, KVM_PSCI_FN_CPU_OFF));
-   _FDT(fdt_property_cell(fdt, cpu_on, KVM_PSCI_FN_CPU_ON));
-   _FDT(fdt_property_cell(fdt, migrate, KVM_PSCI_FN_MIGRATE));
+   if (kvm__supports_extension(kvm, KVM_CAP_ARM_PSCI_0_2)) {
+   const char compatible[] = arm,psci-0.2\0arm,psci;
+   _FDT(fdt_property(fdt, compatible,
+ compatible, sizeof(compatible)));
+   _FDT(fdt_property_string(fdt, method, hvc));
+   if (kvm-cfg.arch.aarch32_guest) {
+   _FDT(fdt_property_cell(fdt, cpu_suspend,
+   PSCI_0_2_FN_CPU_SUSPEND));
+   _FDT(fdt_property_cell(fdt, cpu_off,
+   PSCI_0_2_FN_CPU_OFF));
+   _FDT(fdt_property_cell(fdt, cpu_on,
+   PSCI_0_2_FN_CPU_ON));
+   _FDT(fdt_property_cell(fdt, migrate,
+   PSCI_0_2_FN_MIGRATE));
+   } else {
+   _FDT(fdt_property_cell(fdt, cpu_suspend,
+   PSCI_0_2_FN64_CPU_SUSPEND));
+   _FDT(fdt_property_cell(fdt, cpu_off,
+   PSCI_0_2_FN_CPU_OFF));
+   _FDT(fdt_property_cell(fdt, cpu_on,
+   PSCI_0_2_FN64_CPU_ON));
+   _FDT(fdt_property_cell(fdt, migrate,
+   PSCI_0_2_FN64_MIGRATE));
+   }
+   } else {
+   _FDT(fdt_property_string(fdt, compatible, arm,psci));
+   _FDT(fdt_property_string(fdt, method, hvc));
+   _FDT(fdt_property_cell(fdt, cpu_suspend, 
KVM_PSCI_FN_CPU_SUSPEND));
+   _FDT(fdt_property_cell(fdt, cpu_off, KVM_PSCI_FN_CPU_OFF));
+   _FDT(fdt_property_cell(fdt, cpu_on, KVM_PSCI_FN_CPU_ON));
+   _FDT(fdt_property_cell(fdt, migrate, KVM_PSCI_FN_MIGRATE));
+   }
_FDT(fdt_end_node(fdt));
 
/* Finalise. */
diff --git a/tools/kvm/arm/kvm-cpu.c b/tools/kvm/arm/kvm-cpu.c
index 7478f8f..76c28a0 100644
--- a/tools/kvm/arm/kvm-cpu.c
+++ b/tools/kvm/arm/kvm-cpu.c
@@ -74,6 +74,11 @@ struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, unsigned 
long cpu_id)
die(preferred target not available\n);
}
 
+   /* Set KVM_ARM_VCPU_PSCI_0_2 if available */
+   if (kvm__supports_extension(kvm, KVM_CAP_ARM_PSCI_0_2)) {
+   vcpu_init.features[0] |= (1UL  KVM_ARM_VCPU_PSCI_0_2);
+   }
+
vcpu_init.target = preferred_init.target;
err = ioctl(vcpu-vcpu_fd, KVM_ARM_VCPU_INIT, vcpu_init);
if (err || target-init(vcpu))
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/5] kvmtool: ARM64: Add target type potenza for aarch64

2014-08-05 Thread Anup Patel

The VCPU target type KVM_ARM_TARGET_XGENE_POTENZA is available
in latest Linux-3.16-rcX or higher hence register aarch64 target
type for it.

This patch enables us to run KVMTOOL on X-Gene Potenza host.

Signed-off-by: Pranavkumar Sawargaonkar pranavku...@linaro.org
Signed-off-by: Anup Patel anup.pa...@linaro.org
---
 tools/kvm/arm/aarch64/arm-cpu.c |9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/kvm/arm/aarch64/arm-cpu.c b/tools/kvm/arm/aarch64/arm-cpu.c
index ce5ea2f..ce526e3 100644
--- a/tools/kvm/arm/aarch64/arm-cpu.c
+++ b/tools/kvm/arm/aarch64/arm-cpu.c
@@ -41,10 +41,17 @@ static struct kvm_arm_target target_cortex_a57 = {
.init   = arm_cpu__vcpu_init,
 };
 
+static struct kvm_arm_target target_potenza = {
+   .id = KVM_ARM_TARGET_XGENE_POTENZA,
+   .compatible = arm,arm-v8,
+   .init   = arm_cpu__vcpu_init,
+};
+
 static int arm_cpu__core_init(struct kvm *kvm)
 {
return (kvm_cpu__register_kvm_arm_target(target_aem_v8) ||
kvm_cpu__register_kvm_arm_target(target_foundation_v8) ||
-   kvm_cpu__register_kvm_arm_target(target_cortex_a57));
+   kvm_cpu__register_kvm_arm_target(target_cortex_a57) ||
+   kvm_cpu__register_kvm_arm_target(target_potenza));
 }
 core_init(arm_cpu__core_init);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 4/5] kvmtool: Handle exit reason KVM_EXIT_SYSTEM_EVENT

2014-08-05 Thread Anup Patel

The KVM_EXIT_SYSTEM_EVENT exit reason was added to define
architecture independent system-wide events for a Guest.

Currently, it is used by in-kernel PSCI-0.2 emulation of
KVM ARM/ARM64 to inform user space about PSCI SYSTEM_OFF
or PSCI SYSTEM_RESET request.

For now, we simply treat all system-wide guest events as
same and shutdown the guest upon KVM_EXIT_SYSTEM_EVENT.

Signed-off-by: Pranavkumar Sawargaonkar pranavku...@linaro.org
Signed-off-by: Anup Patel anup.pa...@linaro.org
---
 tools/kvm/kvm-cpu.c |6 ++
 1 file changed, 6 insertions(+)

diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c
index ee0a8ec..e20ee4b 100644
--- a/tools/kvm/kvm-cpu.c
+++ b/tools/kvm/kvm-cpu.c
@@ -160,6 +160,12 @@ int kvm_cpu__start(struct kvm_cpu *cpu)
goto exit_kvm;
case KVM_EXIT_SHUTDOWN:
goto exit_kvm;
+   case KVM_EXIT_SYSTEM_EVENT:
+   /*
+* Treat both SHUTDOWN  RESET system events
+* as shutdown request.
+*/
+   goto exit_kvm;
default: {
bool ret;
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/5] kvmtool: ARM/ARM64: Misc updates

2014-08-05 Thread Anup Patel

On 5 August 2014 14:19, Anup Patel anup.pa...@linaro.org wrote:
 This patchset updates KVMTOOL to use some of the features
 supported by Linux-3.16 KVM ARM/ARM64, such as:

 1. Target CPU == Host using KVM_ARM_PREFERRED_TARGET vm ioctl
 2. Target CPU type Potenza for using KVMTOOL on X-Gene
 3. PSCI v0.2 support for Aarch32 and Aarch64 guest
 4. System event exit reason

 Anup Patel (5):
   kvmtool: ARM: Use KVM_ARM_PREFERRED_TARGET vm ioctl to determine
 target cpu
   kvmtool: ARM64: Fix compile error for aarch64
   kvmtool: ARM64: Add target type potenza for aarch64
   kvmtool: Handle exit reason KVM_EXIT_SYSTEM_EVENT
   kvmtool: ARM/ARM64: Provide PSCI-0.2 guest when in-kernel KVM
 supports it

  tools/kvm/arm/aarch64/arm-cpu.c |9 -
  tools/kvm/arm/aarch64/kvm-cpu.c |   11 ---
  tools/kvm/arm/fdt.c |   39 
 +--
  tools/kvm/arm/kvm-cpu.c |   26 +-
  tools/kvm/kvm-cpu.c |6 ++
  5 files changed, 68 insertions(+), 23 deletions(-)

 --
 1.7.9.5


Hi All,

This patchset is tested on X-Gene Mustang and Foundation v8 model.

Regards,
Anup
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC PATCH 1/6] ARM64: Move PMU register related defines to asm/pmu.h

2014-08-05 Thread Anup Patel

To use the ARMv8 PMU related register defines from the KVM code,
we move the relevant definitions to asm/pmu.h include file.

We also add #ifndef __ASSEMBLY__ in order to use asm/pmu.h from
assembly code.

Signed-off-by: Anup Patel anup.pa...@linaro.org
Signed-off-by: Pranavkumar Sawargaonkar pranavku...@linaro.org
---
 arch/arm64/include/asm/pmu.h   |   44 
 arch/arm64/kernel/perf_event.c |   32 -
 2 files changed, 44 insertions(+), 32 deletions(-)

diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
index e6f0878..f49cc72 100644
--- a/arch/arm64/include/asm/pmu.h
+++ b/arch/arm64/include/asm/pmu.h
@@ -19,6 +19,49 @@
 #ifndef __ASM_PMU_H
 #define __ASM_PMU_H
 
+/*
+ * Per-CPU PMCR: config reg
+ */
+#define ARMV8_PMCR_E   (1  0) /* Enable all counters */
+#define ARMV8_PMCR_P   (1  1) /* Reset all counters */
+#define ARMV8_PMCR_C   (1  2) /* Cycle counter reset */
+#define ARMV8_PMCR_D   (1  3) /* CCNT counts every 64th cpu cycle */
+#define ARMV8_PMCR_X   (1  4) /* Export to ETM */
+#define ARMV8_PMCR_DP  (1  5) /* Disable CCNT if non-invasive debug*/
+#defineARMV8_PMCR_N_SHIFT  11   /* Number of counters 
supported */
+#defineARMV8_PMCR_N_MASK   0x1f
+#defineARMV8_PMCR_MASK 0x3f /* Mask for writable bits */
+
+/*
+ * PMCNTEN: counters enable reg
+ */
+#defineARMV8_CNTEN_MASK0x  /* Mask for writable 
bits */
+
+/*
+ * PMINTEN: counters interrupt enable reg
+ */
+#defineARMV8_INTEN_MASK0x  /* Mask for writable 
bits */
+
+/*
+ * PMOVSR: counters overflow flag status reg
+ */
+#defineARMV8_OVSR_MASK 0x  /* Mask for writable 
bits */
+#defineARMV8_OVERFLOWED_MASK   ARMV8_OVSR_MASK
+
+/*
+ * PMXEVTYPER: Event selection reg
+ */
+#defineARMV8_EVTYPE_MASK   0xc80003ff  /* Mask for writable 
bits */
+#defineARMV8_EVTYPE_EVENT  0x3ff   /* Mask for EVENT bits 
*/
+
+/*
+ * Event filters for PMUv3
+ */
+#defineARMV8_EXCLUDE_EL1   (1  31)
+#defineARMV8_EXCLUDE_EL0   (1  30)
+#defineARMV8_INCLUDE_EL2   (1  27)
+
+#ifndef __ASSEMBLY__
 #ifdef CONFIG_HW_PERF_EVENTS
 
 /* The events for a given PMU register set. */
@@ -79,4 +122,5 @@ int armpmu_event_set_period(struct perf_event *event,
int idx);
 
 #endif /* CONFIG_HW_PERF_EVENTS */
+#endif /* __ASSEMBLY__ */
 #endif /* __ASM_PMU_H */
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index baf5afb..47dfb8b 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -810,38 +810,6 @@ static const unsigned 
armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
 #defineARMV8_IDX_TO_COUNTER(x) \
(((x) - ARMV8_IDX_COUNTER0)  ARMV8_COUNTER_MASK)
 
-/*
- * Per-CPU PMCR: config reg
- */
-#define ARMV8_PMCR_E   (1  0) /* Enable all counters */
-#define ARMV8_PMCR_P   (1  1) /* Reset all counters */
-#define ARMV8_PMCR_C   (1  2) /* Cycle counter reset */
-#define ARMV8_PMCR_D   (1  3) /* CCNT counts every 64th cpu cycle */
-#define ARMV8_PMCR_X   (1  4) /* Export to ETM */
-#define ARMV8_PMCR_DP  (1  5) /* Disable CCNT if non-invasive debug*/
-#defineARMV8_PMCR_N_SHIFT  11   /* Number of counters 
supported */
-#defineARMV8_PMCR_N_MASK   0x1f
-#defineARMV8_PMCR_MASK 0x3f /* Mask for writable bits */
-
-/*
- * PMOVSR: counters overflow flag status reg
- */
-#defineARMV8_OVSR_MASK 0x  /* Mask for writable 
bits */
-#defineARMV8_OVERFLOWED_MASK   ARMV8_OVSR_MASK
-
-/*
- * PMXEVTYPER: Event selection reg
- */
-#defineARMV8_EVTYPE_MASK   0xc80003ff  /* Mask for writable 
bits */
-#defineARMV8_EVTYPE_EVENT  0x3ff   /* Mask for EVENT bits 
*/
-
-/*
- * Event filters for PMUv3
- */
-#defineARMV8_EXCLUDE_EL1   (1  31)
-#defineARMV8_EXCLUDE_EL0   (1  30)
-#defineARMV8_INCLUDE_EL2   (1  27)
-
 static inline u32 armv8pmu_pmcr_read(void)
 {
u32 val;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC PATCH 6/6] ARM64: KVM: Upgrade to lazy context switch of PMU registers

2014-08-05 Thread Anup Patel

Full context switch of all PMU registers for both host and
guest can make KVM world-switch very expensive.

This patch improves current PMU context switch by implementing
lazy context switch of PMU registers.

To achieve this, we trap all PMU register accesses and use a
per-VCPU dirty flag to keep track whether guest has updated
PMU registers or not. If PMU registers of VCPU are dirty or
PMCR_EL0.E bit is set for VCPU then we do full context switch
for both host and guest.
(This is very similar to lazy world switch for debug registers:
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-July/271040.html)

Also, we always trap-n-emulate PMCR_EL0 to fake number of event
counters available to guest. For this PMCR_EL0 trap-n-emulate to
work correctly, we always save/restore PMCR_EL0 for both host and
guest whereas other PMU registers will be saved/restored based
on PMU dirty flag.

Signed-off-by: Anup Patel anup.pa...@linaro.org
Signed-off-by: Pranavkumar Sawargaonkar pranavku...@linaro.org
---
 arch/arm64/include/asm/kvm_asm.h  |3 +
 arch/arm64/include/asm/kvm_host.h |3 +
 arch/arm64/kernel/asm-offsets.c   |1 +
 arch/arm64/kvm/hyp.S  |   63 --
 arch/arm64/kvm/sys_regs.c |  248 +++--
 5 files changed, 298 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 93be21f..47b7fcd 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -132,6 +132,9 @@
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT0
 #define KVM_ARM64_DEBUG_DIRTY  (1  KVM_ARM64_DEBUG_DIRTY_SHIFT)
 
+#define KVM_ARM64_PMU_DIRTY_SHIFT  0
+#define KVM_ARM64_PMU_DIRTY(1  KVM_ARM64_PMU_DIRTY_SHIFT)
+
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index ae4cdb2..4dba2a3 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -117,6 +117,9 @@ struct kvm_vcpu_arch {
/* Timer state */
struct arch_timer_cpu timer_cpu;
 
+   /* PMU flags */
+   u64 pmu_flags;
+
/* PMU state */
struct pmu_cpu pmu_cpu;
 
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 053dc3e..4234794 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -140,6 +140,7 @@ int main(void)
   DEFINE(VGIC_CPU_NR_LR,   offsetof(struct vgic_cpu, nr_lr));
   DEFINE(KVM_VTTBR,offsetof(struct kvm, arch.vttbr));
   DEFINE(KVM_VGIC_VCTRL,   offsetof(struct kvm, arch.vgic.vctrl_base));
+  DEFINE(VCPU_PMU_FLAGS,   offsetof(struct kvm_vcpu, arch.pmu_flags));
   DEFINE(VCPU_PMU_IRQ_PENDING, offsetof(struct kvm_vcpu, 
arch.pmu_cpu.irq_pending));
 #endif
 #ifdef CONFIG_ARM64_CPU_SUSPEND
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 6b41c01..5f9ccee 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -443,6 +443,9 @@ __kvm_hyp_code_start:
and x5, x4, #~(ARMV8_PMCR_E)// Clear PMCR_EL0.E
msr pmcr_el0, x5// This will stop all counters
 
+   ldr x5, [x0, #VCPU_PMU_FLAGS] // Only save if dirty flag set
+   tbz x5, #KVM_ARM64_PMU_DIRTY_SHIFT, 1f
+
mov x3, #0
ubfxx4, x4, #ARMV8_PMCR_N_SHIFT, #5 // Number of event counters
cmp x4, #0  // Skip if no event counters
@@ -731,7 +734,7 @@ __kvm_hyp_code_start:
msr mdccint_el1, x21
 .endm
 
-.macro restore_pmu
+.macro restore_pmu, is_vcpu_pmu
// x2: base address for cpu context
// x3: mask of counters allowed in EL0  EL1
// x4: number of event counters allowed in EL0  EL1
@@ -741,16 +744,19 @@ __kvm_hyp_code_start:
cmp x5, #1  // Must be PMUv3 else skip
bne 1f
 
+   ldr x5, [x0, #VCPU_PMU_FLAGS] // Only restore if dirty flag set
+   tbz x5, #KVM_ARM64_PMU_DIRTY_SHIFT, 2f
+
mov x3, #0
mrs x4, pmcr_el0
ubfxx4, x4, #ARMV8_PMCR_N_SHIFT, #5 // Number of event counters
cmp x4, #0  // Skip if no event counters
-   beq 2f
+   beq 3f
sub x4, x4, #1  // Last event counter is reserved
mov x3, #1
lsl x3, x3, x4
sub x3, x3, #1
-2: orr x3, x3, #(1  31)  // Mask of event counters
+3: orr x3, x3, #(1  31)  // Mask of event counters
 
ldr x5, [x2, #CPU_SYSREG_OFFSET(PMCCFILTR_EL0)]
msr pmccfiltr_el0, x5   // Restore PMCCFILTR_EL0
@@ -772,15 +778,15 @@ __kvm_hyp_code_start:
lsl x5, x4, #4
add x5, x5, #CPU_SYSREG_OFFSET(PMEVCNTR0_EL0)
add x5, x2, x5
-3: cmp x4, #0
-   beq 4f
+4: cmp x4, #0
+   beq 5f
sub x4, x4, #1
ldp x6, x7, [x5, #-16]!
msr

[RFC PATCH 4/6] ARM/ARM64: KVM: Add common code PMU IRQ routing

2014-08-05 Thread Anup Patel

This patch introduces common PMU IRQ routing code for
KVM ARM and KVM ARM64 under virt/kvm/arm directory.

The virtual PMU IRQ number for each Guest VCPU will be
provided by user space using set device address vm ioctl
with prameters:
dev_id = KVM_ARM_DEVICE_PMU
type = VCPU number
addr = PMU IRQ number for the VCPU

The low-level context switching code of KVM ARM/ARM64
will determine the state of VCPU PMU IRQ store it in
irq_pending flag when saving PMU context for the VCPU.

The common PMU IRQ routing code will inject virtual PMU
IRQ based on irq_pending flag and it will also clear
the irq_pending flag.

Signed-off-by: Anup Patel anup.pa...@linaro.org
Signed-off-by: Pranavkumar Sawargaonkar pranavku...@linaro.org
---
 arch/arm/include/asm/kvm_host.h   |9 
 arch/arm/include/uapi/asm/kvm.h   |1 +
 arch/arm/kvm/arm.c|6 +++
 arch/arm/kvm/reset.c  |4 ++
 arch/arm64/include/asm/kvm_host.h |9 
 arch/arm64/include/uapi/asm/kvm.h |1 +
 arch/arm64/kvm/Kconfig|7 +++
 arch/arm64/kvm/Makefile   |1 +
 arch/arm64/kvm/reset.c|4 ++
 include/kvm/arm_pmu.h |   52 ++
 virt/kvm/arm/pmu.c|  105 +
 11 files changed, 199 insertions(+)
 create mode 100644 include/kvm/arm_pmu.h
 create mode 100644 virt/kvm/arm/pmu.c

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 193ceaf..a6a778f 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -24,6 +24,7 @@
 #include asm/kvm_mmio.h
 #include asm/fpstate.h
 #include kvm/arm_arch_timer.h
+#include kvm/arm_pmu.h
 
 #if defined(CONFIG_KVM_ARM_MAX_VCPUS)
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
@@ -53,6 +54,9 @@ struct kvm_arch {
/* Timer */
struct arch_timer_kvm   timer;
 
+   /* PMU */
+   struct pmu_kvm  pmu;
+
/*
 * Anything that is not used directly from assembly code goes
 * here.
@@ -118,8 +122,13 @@ struct kvm_vcpu_arch {
 
/* VGIC state */
struct vgic_cpu vgic_cpu;
+
+   /* Timer state */
struct arch_timer_cpu timer_cpu;
 
+   /* PMU state */
+   struct pmu_cpu pmu_cpu;
+
/*
 * Anything that is not used directly from assembly code goes
 * here.
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index e6ebdd3..b21e6eb 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -75,6 +75,7 @@ struct kvm_regs {
 
 /* Supported device IDs */
 #define KVM_ARM_DEVICE_VGIC_V2 0
+#define KVM_ARM_DEVICE_PMU 1
 
 /* Supported VGIC address types  */
 #define KVM_VGIC_V2_ADDR_TYPE_DIST 0
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 3c82b37..04130f5 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -140,6 +140,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
kvm_timer_init(kvm);
 
+   kvm_pmu_init(kvm);
+
/* Mark the initial VMID generation invalid */
kvm-arch.vmid_gen = 0;
 
@@ -567,6 +569,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
if (ret = 0 || need_new_vmid_gen(vcpu-kvm)) {
local_irq_enable();
kvm_timer_sync_hwstate(vcpu);
+   kvm_pmu_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
continue;
}
@@ -601,6 +604,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 */
 
kvm_timer_sync_hwstate(vcpu);
+   kvm_pmu_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
 
ret = handle_exit(vcpu, run, ret);
@@ -794,6 +798,8 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
if (!vgic_present)
return -ENXIO;
return kvm_vgic_addr(kvm, type, dev_addr-addr, true);
+   case KVM_ARM_DEVICE_PMU:
+   return kvm_pmu_addr(kvm, type, dev_addr-addr, true);
default:
return -ENODEV;
}
diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c
index f558c07..42e6996 100644
--- a/arch/arm/kvm/reset.c
+++ b/arch/arm/kvm/reset.c
@@ -28,6 +28,7 @@
 #include asm/kvm_coproc.h
 
 #include kvm/arm_arch_timer.h
+#include kvm/arm_pmu.h
 
 /**
  * Cortex-A15 and Cortex-A7 Reset Values
@@ -79,5 +80,8 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
/* Reset arch_timer context */
kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq);
 
+   /* Reset pmu context */
+   kvm_pmu_vcpu_reset(vcpu);
+
return 0;
 }
diff --git a/arch/arm64/include/asm/kvm_host.h

[RFC PATCH 3/6] ARM: perf: Re-enable overflow interrupt from interrupt handler

2014-08-05 Thread Anup Patel

A hypervisor will typically mask the overflow interrupt before
forwarding it to Guest Linux hence we need to re-enable the overflow
interrupt after clearing it in Guest Linux. Also, this re-enabling
of overflow interrupt does not harm in non-virtualized scenarios.

Signed-off-by: Pranavkumar Sawargaonkar pranavku...@linaro.org
Signed-off-by: Anup Patel anup.pa...@linaro.org
---
 arch/arm/kernel/perf_event_v7.c |8 
 1 file changed, 8 insertions(+)

diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 1d37568..581cca5 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -1355,6 +1355,14 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void 
*dev)
if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
continue;
 
+   /*
+* If we are running under a hypervisor such as KVM then
+* hypervisor will mask the interrupt before forwarding
+* it to Guest Linux hence re-enable interrupt for the
+* overflowed counter.
+*/
+   armv7_pmnc_enable_intens(idx);
+
hwc = event-hw;
armpmu_event_update(event);
perf_sample_data_init(data, 0, hwc-last_period);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC PATCH 0/6] ARM64: KVM: PMU infrastructure support

2014-08-05 Thread Anup Patel

This patchset enables PMU virtualization in KVM ARM64. The
Guest can now directly use PMU available on the host HW.

The virtual PMU IRQ injection for Guest VCPUs is managed by
small piece of code shared between KVM ARM and KVM ARM64. The
virtual PMU IRQ number will be based on Guest machine model and
user space will provide it using set device address vm ioctl.

The second last patch of this series implements full context
switch of PMU registers which will context switch all PMU
registers on every KVM world-switch.

The last patch implements a lazy context switch of PMU registers
which is very similar to lazy debug context switch.
(Refer, 
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-July/271040.html)

Also, we reserve last PMU event counter for EL2 mode which
will not be accessible from Host and Guest EL1 mode. This
reserved EL2 mode PMU event counter can be used for profiling
KVM world-switch and other EL2 mode functions.

All testing have been done using KVMTOOL on X-Gene Mustang and
Foundation v8 Model for both Aarch32 and Aarch64 guest.

Anup Patel (6):
  ARM64: Move PMU register related defines to asm/pmu.h
  ARM64: perf: Re-enable overflow interrupt from interrupt handler
  ARM: perf: Re-enable overflow interrupt from interrupt handler
  ARM/ARM64: KVM: Add common code PMU IRQ routing
  ARM64: KVM: Implement full context switch of PMU registers
  ARM64: KVM: Upgrade to lazy context switch of PMU registers

 arch/arm/include/asm/kvm_host.h   |9 +
 arch/arm/include/uapi/asm/kvm.h   |1 +
 arch/arm/kernel/perf_event_v7.c   |8 +
 arch/arm/kvm/arm.c|6 +
 arch/arm/kvm/reset.c  |4 +
 arch/arm64/include/asm/kvm_asm.h  |   39 +++-
 arch/arm64/include/asm/kvm_host.h |   12 ++
 arch/arm64/include/asm/pmu.h  |   44 +
 arch/arm64/include/uapi/asm/kvm.h |1 +
 arch/arm64/kernel/asm-offsets.c   |2 +
 arch/arm64/kernel/perf_event.c|   40 +---
 arch/arm64/kvm/Kconfig|7 +
 arch/arm64/kvm/Makefile   |1 +
 arch/arm64/kvm/hyp-init.S |   15 ++
 arch/arm64/kvm/hyp.S  |  209 +++-
 arch/arm64/kvm/reset.c|4 +
 arch/arm64/kvm/sys_regs.c |  385 +
 include/kvm/arm_pmu.h |   52 +
 virt/kvm/arm/pmu.c|  105 ++
 19 files changed, 870 insertions(+), 74 deletions(-)
 create mode 100644 include/kvm/arm_pmu.h
 create mode 100644 virt/kvm/arm/pmu.c

-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC PATCH 2/6] ARM64: perf: Re-enable overflow interrupt from interrupt handler

2014-08-05 Thread Anup Patel

A hypervisor will typically mask the overflow interrupt before
forwarding it to Guest Linux hence we need to re-enable the overflow
interrupt after clearing it in Guest Linux. Also, this re-enabling
of overflow interrupt does not harm in non-virtualized scenarios.

Signed-off-by: Pranavkumar Sawargaonkar pranavku...@linaro.org
Signed-off-by: Anup Patel anup.pa...@linaro.org
---
 arch/arm64/kernel/perf_event.c |8 
 1 file changed, 8 insertions(+)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 47dfb8b..19fb140 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1076,6 +1076,14 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void 
*dev)
if (!armv8pmu_counter_has_overflowed(pmovsr, idx))
continue;
 
+   /*
+* If we are running under a hypervisor such as KVM then
+* hypervisor will mask the interrupt before forwarding
+* it to Guest Linux hence re-enable interrupt for the
+* overflowed counter.
+*/
+   armv8pmu_enable_intens(idx);
+
hwc = event-hw;
armpmu_event_update(event, hwc, idx);
perf_sample_data_init(data, 0, hwc-last_period);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC PATCH 5/6] ARM64: KVM: Implement full context switch of PMU registers

2014-08-05 Thread Anup Patel

This patch implements following stuff:
1. Save/restore all PMU registers for both Guest and Host in
KVM world switch.
2. Reserve last PMU event counter for performance analysis in
EL2-mode. To achieve we fake the number of event counters available
to the Guest by trapping PMCR_EL0 register accesses and program
MDCR_EL2.HPMN with number of PMU event counters minus one.
3. Clear and mask overflowed interrupts when saving PMU context
for Guest. The Guest will re-enable overflowed interrupts when
processing virtual PMU interrupt.

With this patch we have direct access of all PMU registers from
Guest and we only trap-n-emulate PMCR_EL0 accesses to fake number
of PMU event counters to Guest.

Signed-off-by: Anup Patel anup.pa...@linaro.org
Signed-off-by: Pranavkumar Sawargaonkar pranavku...@linaro.org
---
 arch/arm64/include/asm/kvm_asm.h |   36 ++--
 arch/arm64/kernel/asm-offsets.c  |1 +
 arch/arm64/kvm/hyp-init.S|   15 
 arch/arm64/kvm/hyp.S |  168 +++-
 arch/arm64/kvm/sys_regs.c|  175 --
 5 files changed, 343 insertions(+), 52 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 993a7db..93be21f 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -53,15 +53,27 @@
 #define DBGWVR0_EL171  /* Debug Watchpoint Value Registers (0-15) */
 #define DBGWVR15_EL1   86
 #define MDCCINT_EL187  /* Monitor Debug Comms Channel Interrupt Enable 
Reg */
+#define PMCR_EL0   88  /* Performance Monitors Control Register */
+#define PMOVSSET_EL0   89  /* Performance Monitors Overflow Flag Status 
Set Register */
+#define PMCCNTR_EL090  /* Cycle Counter Register */
+#define PMSELR_EL0 91  /* Performance Monitors Event Counter Selection 
Register */
+#define PMEVCNTR0_EL0  92  /* Performance Monitors Event Counter Register 
(0-30) */
+#define PMEVTYPER0_EL0 93  /* Performance Monitors Event Type Register 
(0-30) */
+#define PMEVCNTR30_EL0 152
+#define PMEVTYPER30_EL0153
+#define PMCNTENSET_EL0 154 /* Performance Monitors Count Enable Set 
Register */
+#define PMINTENSET_EL1 155 /* Performance Monitors Interrupt Enable Set 
Register */
+#define PMUSERENR_EL0  156 /* Performance Monitors User Enable Register */
+#define PMCCFILTR_EL0  157 /* Cycle Count Filter Register */
 
 /* 32bit specific registers. Keep them at the end of the range */
-#defineDACR32_EL2  88  /* Domain Access Control Register */
-#defineIFSR32_EL2  89  /* Instruction Fault Status Register */
-#defineFPEXC32_EL2 90  /* Floating-Point Exception Control 
Register */
-#defineDBGVCR32_EL291  /* Debug Vector Catch Register */
-#defineTEECR32_EL1 92  /* ThumbEE Configuration Register */
-#defineTEEHBR32_EL193  /* ThumbEE Handler Base Register */
-#defineNR_SYS_REGS 94
+#defineDACR32_EL2  158 /* Domain Access Control Register */
+#defineIFSR32_EL2  159 /* Instruction Fault Status Register */
+#defineFPEXC32_EL2 160 /* Floating-Point Exception Control 
Register */
+#defineDBGVCR32_EL2161 /* Debug Vector Catch Register */
+#defineTEECR32_EL1 162 /* ThumbEE Configuration Register */
+#defineTEEHBR32_EL1163 /* ThumbEE Handler Base Register */
+#defineNR_SYS_REGS 164
 
 /* 32bit mapping */
 #define c0_MPIDR   (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
@@ -83,6 +95,13 @@
 #define c6_IFAR(c6_DFAR + 1)   /* Instruction Fault Address 
Register */
 #define c7_PAR (PAR_EL1 * 2)   /* Physical Address Register */
 #define c7_PAR_high(c7_PAR + 1)/* PAR top 32 bits */
+#define c9_PMCR(PMCR_EL0 * 2)  /* Performance Monitors Control 
Register */
+#define c9_PMOVSSET(PMOVSSET_EL0 * 2)
+#define c9_PMCCNTR (PMCCNTR_EL0 * 2)
+#define c9_PMSELR  (PMSELR_EL0 * 2)
+#define c9_PMCNTENSET  (PMCNTENSET_EL0 * 2)
+#define c9_PMINTENSET  (PMINTENSET_EL1 * 2)
+#define c9_PMUSERENR   (PMUSERENR_EL0 * 2)
 #define c10_PRRR   (MAIR_EL1 * 2)  /* Primary Region Remap Register */
 #define c10_NMRR   (c10_PRRR + 1)  /* Normal Memory Remap Register */
 #define c12_VBAR   (VBAR_EL1 * 2)  /* Vector Base Address Register */
@@ -93,6 +112,9 @@
 #define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */
 #define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
 #define c14_CNTKCTL(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
+#define c14_PMEVCNTR0  (PMEVCNTR0_EL0 * 2)
+#define c14_PMEVTYPR0  (PMEVTYPER0_EL0 * 2)
+#define c14_PMCCFILTR  (PMCCFILTR_EL0 * 2)
 
 #define cp14_DBGDSCRext(MDSCR_EL1 * 2)
 #define cp14_DBGBCR0   (DBGBCR0_EL1 * 2)
diff --git a/arch/arm64/kernel/asm-offsets.c

Re: [RFC PATCH 0/6] ARM64: KVM: PMU infrastructure support

2014-08-05 Thread Anup Patel

On 5 August 2014 15:02, Anup Patel apa...@apm.com wrote:
 On Tue, Aug 5, 2014 at 2:54 PM, Anup Patel anup.pa...@linaro.org wrote:
 This patchset enables PMU virtualization in KVM ARM64. The
 Guest can now directly use PMU available on the host HW.

 The virtual PMU IRQ injection for Guest VCPUs is managed by
 small piece of code shared between KVM ARM and KVM ARM64. The
 virtual PMU IRQ number will be based on Guest machine model and
 user space will provide it using set device address vm ioctl.

 The second last patch of this series implements full context
 switch of PMU registers which will context switch all PMU
 registers on every KVM world-switch.

 The last patch implements a lazy context switch of PMU registers
 which is very similar to lazy debug context switch.
 (Refer, 
 http://lists.infradead.org/pipermail/linux-arm-kernel/2014-July/271040.html)

 Also, we reserve last PMU event counter for EL2 mode which
 will not be accessible from Host and Guest EL1 mode. This
 reserved EL2 mode PMU event counter can be used for profiling
 KVM world-switch and other EL2 mode functions.

 All testing have been done using KVMTOOL on X-Gene Mustang and
 Foundation v8 Model for both Aarch32 and Aarch64 guest.

 Anup Patel (6):
   ARM64: Move PMU register related defines to asm/pmu.h
   ARM64: perf: Re-enable overflow interrupt from interrupt handler
   ARM: perf: Re-enable overflow interrupt from interrupt handler
   ARM/ARM64: KVM: Add common code PMU IRQ routing
   ARM64: KVM: Implement full context switch of PMU registers
   ARM64: KVM: Upgrade to lazy context switch of PMU registers

  arch/arm/include/asm/kvm_host.h   |9 +
  arch/arm/include/uapi/asm/kvm.h   |1 +
  arch/arm/kernel/perf_event_v7.c   |8 +
  arch/arm/kvm/arm.c|6 +
  arch/arm/kvm/reset.c  |4 +
  arch/arm64/include/asm/kvm_asm.h  |   39 +++-
  arch/arm64/include/asm/kvm_host.h |   12 ++
  arch/arm64/include/asm/pmu.h  |   44 +
  arch/arm64/include/uapi/asm/kvm.h |1 +
  arch/arm64/kernel/asm-offsets.c   |2 +
  arch/arm64/kernel/perf_event.c|   40 +---
  arch/arm64/kvm/Kconfig|7 +
  arch/arm64/kvm/Makefile   |1 +
  arch/arm64/kvm/hyp-init.S |   15 ++
  arch/arm64/kvm/hyp.S  |  209 +++-
  arch/arm64/kvm/reset.c|4 +
  arch/arm64/kvm/sys_regs.c |  385 
 +
  include/kvm/arm_pmu.h |   52 +
  virt/kvm/arm/pmu.c|  105 ++
  19 files changed, 870 insertions(+), 74 deletions(-)
  create mode 100644 include/kvm/arm_pmu.h
  create mode 100644 virt/kvm/arm/pmu.c

 --
 1.7.9.5

 CONFIDENTIALITY NOTICE: This e-mail message, including any attachments,
 is for the sole use of the intended recipient(s) and contains information
 that is confidential and proprietary to Applied Micro Circuits Corporation 
 or its subsidiaries.
 It is to be used solely for the purpose of furthering the parties' business 
 relationship.
 All unauthorized review, use, disclosure or distribution is prohibited.
 If you are not the intended recipient, please contact the sender by reply 
 e-mail
 and destroy all copies of the original message.

Please ignore this notice, it accidentally sneaked in.

--
Anup



 Hi All,

 Please apply attached patch to KVMTOOL on-top-of my
 recent KVMTOOL patchset for trying this patchset using
 KVMTOOL.

 Regards,
 Anup
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC PATCH 0/6] ARM64: KVM: PMU infrastructure support

2014-08-05 Thread Anup Patel

On Tue, Aug 5, 2014 at 2:54 PM, Anup Patel anup.pa...@linaro.org wrote:
 This patchset enables PMU virtualization in KVM ARM64. The
 Guest can now directly use PMU available on the host HW.

 The virtual PMU IRQ injection for Guest VCPUs is managed by
 small piece of code shared between KVM ARM and KVM ARM64. The
 virtual PMU IRQ number will be based on Guest machine model and
 user space will provide it using set device address vm ioctl.

 The second last patch of this series implements full context
 switch of PMU registers which will context switch all PMU
 registers on every KVM world-switch.

 The last patch implements a lazy context switch of PMU registers
 which is very similar to lazy debug context switch.
 (Refer, 
 http://lists.infradead.org/pipermail/linux-arm-kernel/2014-July/271040.html)

 Also, we reserve last PMU event counter for EL2 mode which
 will not be accessible from Host and Guest EL1 mode. This
 reserved EL2 mode PMU event counter can be used for profiling
 KVM world-switch and other EL2 mode functions.

 All testing have been done using KVMTOOL on X-Gene Mustang and
 Foundation v8 Model for both Aarch32 and Aarch64 guest.

 Anup Patel (6):
   ARM64: Move PMU register related defines to asm/pmu.h
   ARM64: perf: Re-enable overflow interrupt from interrupt handler
   ARM: perf: Re-enable overflow interrupt from interrupt handler
   ARM/ARM64: KVM: Add common code PMU IRQ routing
   ARM64: KVM: Implement full context switch of PMU registers
   ARM64: KVM: Upgrade to lazy context switch of PMU registers

  arch/arm/include/asm/kvm_host.h   |9 +
  arch/arm/include/uapi/asm/kvm.h   |1 +
  arch/arm/kernel/perf_event_v7.c   |8 +
  arch/arm/kvm/arm.c|6 +
  arch/arm/kvm/reset.c  |4 +
  arch/arm64/include/asm/kvm_asm.h  |   39 +++-
  arch/arm64/include/asm/kvm_host.h |   12 ++
  arch/arm64/include/asm/pmu.h  |   44 +
  arch/arm64/include/uapi/asm/kvm.h |1 +
  arch/arm64/kernel/asm-offsets.c   |2 +
  arch/arm64/kernel/perf_event.c|   40 +---
  arch/arm64/kvm/Kconfig|7 +
  arch/arm64/kvm/Makefile   |1 +
  arch/arm64/kvm/hyp-init.S |   15 ++
  arch/arm64/kvm/hyp.S  |  209 +++-
  arch/arm64/kvm/reset.c|4 +
  arch/arm64/kvm/sys_regs.c |  385 
 +
  include/kvm/arm_pmu.h |   52 +
  virt/kvm/arm/pmu.c|  105 ++
  19 files changed, 870 insertions(+), 74 deletions(-)
  create mode 100644 include/kvm/arm_pmu.h
  create mode 100644 virt/kvm/arm/pmu.c

 --
 1.7.9.5

 CONFIDENTIALITY NOTICE: This e-mail message, including any attachments,
 is for the sole use of the intended recipient(s) and contains information
 that is confidential and proprietary to Applied Micro Circuits Corporation or 
 its subsidiaries.
 It is to be used solely for the purpose of furthering the parties' business 
 relationship.
 All unauthorized review, use, disclosure or distribution is prohibited.
 If you are not the intended recipient, please contact the sender by reply 
 e-mail
 and destroy all copies of the original message.


Hi All,

Please apply attached patch to KVMTOOL on-top-of my
recent KVMTOOL patchset for trying this patchset using
KVMTOOL.

Regards,
Anup
From c16a3265992ba8159ab1da6d589026c0aa0914ba Mon Sep 17 00:00:00 2001
From: Anup Patel anup.pa...@linaro.org
Date: Mon, 4 Aug 2014 16:45:44 +0530
Subject: [RFC PATCH] kvmtool: ARM/ARM64: Add PMU node to generated guest DTB.

This patch informs KVM ARM/ARM64 in-kernel PMU virtualization
about the PMU irq numbers for each guest VCPU using set device
address vm ioctl.

We also adds PMU node in generated guest DTB to inform guest
about the PMU irq numbers. For now, we have assumed PPI17 as
PMU IRQ of KVMTOOL guest.

Signed-off-by: Pranavkumar Sawargaonkar pranavku...@linaro.org
Signed-off-by: Anup Patel anup.pa...@linaro.org
---
 tools/kvm/Makefile |3 ++-
 tools/kvm/arm/fdt.c|4 +++
 tools/kvm/arm/include/arm-common/pmu.h |   10 +++
 tools/kvm/arm/pmu.c|   45 
 4 files changed, 61 insertions(+), 1 deletion(-)
 create mode 100644 tools/kvm/arm/include/arm-common/pmu.h
 create mode 100644 tools/kvm/arm/pmu.c

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index fba60f1..59b75c4 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -158,7 +158,8 @@ endif
 
 # ARM
 OBJS_ARM_COMMON		:= arm/fdt.o arm/gic.o arm/ioport.o arm/irq.o \
-			   arm/kvm.o arm/kvm-cpu.o arm/pci.o arm/timer.o
+			   arm/kvm.o arm/kvm-cpu.o arm/pci.o arm/timer.o \
+			   arm/pmu.o
 HDRS_ARM_COMMON		:= arm/include
 ifeq ($(ARCH), arm)
 	DEFINES		+= -DCONFIG_ARM
diff --git a/tools/kvm/arm/fdt.c b/tools/kvm/arm/fdt.c
index 93849cf2..42b0a67 100644
--- a/tools/kvm/arm/fdt.c
+++ b/tools/kvm/arm/fdt.c
@@ -5,6 +5,7 @@
 #include kvm/virtio-mmio.h
 
 #include

Re: kvm-unit-tests failures

2014-08-05 Thread Paolo Bonzini

Il 04/08/2014 19:02, Chris J Arges ha scritto:
 All tests below are done with the kvm tip (fresh as of today) merged
 with 3.16-rc4.

Sorry, I had missed the list of failures.

 * Failures:
 - pmu
 As suggested in comment 2 on this bug:
 https://bugzilla.redhat.com/show_bug.cgi?id=1079796
 Adding the -cpu host allows this test to PASS.

Correct, I thought unittests.cfg already did that.

 - tsc_adjust
 Here again adding -cpu host allows this to pass.

Looks like QEMU doesn't know the tsc_adjust flag at all.  CCing Eduardo.

 - pcid
 Here again adding -cpu host allows this to pass.

It passes here, and fails with -cpu host:

PASS: CPUID consistency
FAIL: Test on PCID when enabled
PASS: Test on INVPCID when disabled

Looks like the Test on PCID when enabled has problems, and the
-cpu flag is a red herring.

 - msr (HANG)
 ./x86-run x86/msr.flat -smp 1
 
 $ sudo ./x86-run x86/msr.flat -smp 1qemu-system-x86_64 -enable-kvm
 -device pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4
 -display none -serial stdio -device pci-testdev -kernel x86/msr.flat -smp 1
 enabling apic
 enabling apic
 enabling apic
 
 enabling apic repeats and adding -cpu host seems to have no effect.
 Interestingly enough on an Ubuntu 3.13 series kernel this PASSes.

The APICBASE register is already tested by the apic testcase, and the
simple-minded tests in msr.flat actually trigger a general protection
fault nowadays because the quality of the emulation improved.  I'm
dropping the APICBASE tests from msr.flat.

 - vmx (HANG)
 Test suite: interrupt
 PASS: direct interrupt while running guest
 PASS: intercepted interrupt while running guest
 PASS: direct interrupt + hlt
 
 Here it just hangs.

It fails here for me.  It could be related to APICv, can you try with
the enable_apic=0 parameter to kvm-intel.ko?

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH kvm-unit-tests] x86: unittests.cfg: add -cpu host to pmu and tsc_adjust

2014-08-05 Thread Paolo Bonzini

vPMU is only available with -cpu host, and so is TSC_ADJUST because
the QEMU CPU models forgot about it.

Adjust these two tests to use the option.

Reported-by: Chris J Arges chris.j.ar...@canonical.com
Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 x86/unittests.cfg | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index d78fe0e..6d3e23a 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -91,6 +91,7 @@ file = msr.flat
 
 [pmu]
 file = pmu.flat
+extra_params = -cpu host
 
 [port80]
 file = port80.flat
@@ -109,6 +110,7 @@ file = tsc.flat
 
 [tsc_adjust]
 file = tsc_adjust.flat
+extra_params = -cpu host
 
 [xsave]
 file = xsave.flat
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH kvm-unit-tests] x86: msr: remove invalid test

2014-08-05 Thread Paolo Bonzini

The APICBASE register is already tested by the apic testcase,
and the simple-minded tests in msr.flat actually trigger a
general protection fault nowadays.  Just drop the tests, and
the test now passes.

Reported-by: Chris J Arges chris.j.ar...@canonical.com
Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 x86/msr.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/x86/msr.c b/x86/msr.c
index db08a8d..ec4710e 100644
--- a/x86/msr.c
+++ b/x86/msr.c
@@ -19,12 +19,6 @@ struct msr_info {
 
 struct msr_info msr_info[] =
 {
-{ .index = 0x001b, .name = MSR_IA32_APICBASE,
-  .val_pairs = {
-{ .valid = 1, .value = 0x56789900, .expected = 
0x56789900},
-{ .valid = 1, .value = 0x56789D01, .expected = 
0x56789D01},
-}
-},
 { .index = 0x0174, .name = IA32_SYSENTER_CS,
   .val_pairs = {{ .valid = 1, .value = 0x1234, .expected = 0x1234}}
 },
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: When I boot two virtio-rng devices, guest will hang

2014-08-05 Thread Amos Kong

3.16 (guest hangs with two rng devices)
3.16 + quick fix (can startup with two rng devices) (hotplug issue 1 + hotplug 
issue 2 exist)
lates torvalds/linux.git + amit 4 patches (can startup with two rng devices) 
(only hotplug issue 2 exists)

However, the 4 patches also fixed the hang issue, the hotplug issue was fixed a 
little.
The hotplug issue is effected by the backend, or maybe it's not a real issue, 
because
the rng device can be hot-removed after dd process is killed.


Hotplug issue 1:
  1. boot up guest with two rng device (rng0 uses /dev/urandom, rng1 uses 
/dev/random)
  2. read data by dd in guest
  3 (option 1). hot-remove rng0, then hot-remove rng1 - result: _only rng1_ 
can't be removed until dd process is killed
  3 (option 2). hot-remove rng1, then hot-remove rng0 - result: two devices 
can be removed successfully, dd process will exit automatically.

  If we use /dev/urandom for rng0 and rng1, _rng0  rng1_ can be removed, dd 
process will exit automatically.

Hotplug issue 2:
  If we use /dev/random for rng0 and rng1, _rng0  rng1_ can't be removed until 
dd process is killed.

Hotplug issue 3:
  If we use /dev/random for rng0 and rng1, _only rng1_ can't be removed until 
dd process is killed.


(The difference between /dev/random and /dev/urandom is the speed.)

Thanks, Amos
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH kvm-unit-tests] x86: pcid: fix clearing of CR0.PG

2014-08-05 Thread Paolo Bonzini

The test was trying to set CR0.PG, not clear it (and it was
already set, even).

Reported-by: Chris J Arges chris.j.ar...@canonical.com
Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 x86/pcid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/x86/pcid.c b/x86/pcid.c
index 164e9a1..e3ccfdb 100644
--- a/x86/pcid.c
+++ b/x86/pcid.c
@@ -53,7 +53,7 @@ void test_pcid_enabled(void)
 goto report;
 
 /* try clearing CR0.PG when CR4.PCIDE=1, #GP expected */
-if (write_cr0_checking(cr0 | X86_CR0_PG) != GP_VECTOR)
+if (write_cr0_checking(cr0  ~X86_CR0_PG) != GP_VECTOR)
 goto report;
 
 write_cr4(cr4);
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] KVM: PPC: drop duplicate tracepoint

2014-08-05 Thread Paolo Bonzini

Commit 29577fc00ba4 (KVM: PPC: HV: Remove generic instruction emulation)
caused a build failure:

arch/powerpc/kvm/kvm-pr.o:(__tracepoints+0xa8): multiple definition of 
`__tracepoint_kvm_ppc_instr'
arch/powerpc/kvm/kvm.o:(__tracepoints+0x1c0): first defined here

due to a duplicate definition of the tracepoint in trace.h and
trace_pr.h.  Because the tracepoint is still used by Book3S HV
code, and because the PR code does include trace.h, just remove
the duplicate definition from trace_pr.h, and export it from
kvm.o.

Reported-by: Stephen Rothwell s...@canb.auug.org.au
Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 arch/powerpc/kvm/emulate.c  |  2 ++
 arch/powerpc/kvm/trace_pr.h | 20 
 2 files changed, 2 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index e96b50d0bdab..2325168ad1ff 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -300,3 +300,5 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct 
kvm_vcpu *vcpu)
return emulated;
 }
 EXPORT_SYMBOL_GPL(kvmppc_emulate_instruction);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index a674f090dfb8..e1357cd8dc1f 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -291,26 +291,6 @@ TRACE_EVENT(kvm_unmap_hva,
TP_printk(unmap hva 0x%lx\n, __entry-hva)
 );
 
-TRACE_EVENT(kvm_ppc_instr,
-   TP_PROTO(unsigned int inst, unsigned long _pc, unsigned int emulate),
-   TP_ARGS(inst, _pc, emulate),
-
-   TP_STRUCT__entry(
-   __field(unsigned int,   inst)
-   __field(unsigned long,  pc  )
-   __field(unsigned int,   emulate )
-   ),
-
-   TP_fast_assign(
-   __entry-inst   = inst;
-   __entry-pc = _pc;
-   __entry-emulate= emulate;
-   ),
-
-   TP_printk(inst %u pc 0x%lx emulate %u\n,
- __entry-inst, __entry-pc, __entry-emulate)
-);
-
 #endif /* _TRACE_KVM_H */
 
 /* This part must be outside protection */
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 0/5] KVM: PPC: Book3e: AltiVec support

2014-08-05 Thread Mihai Caraman

Add KVM Book3e AltiVec support and enable e6500 core.

Changes:

v3:
 - use distinct SPE/AltiVec exception handlers
 - make ONE_REG AltiVec support powerpc generic
 - add ONE_REG IVORs support

v2:
 - integrate Paul's FP/VMX/VSX changes that landed in kvm-ppc-queue
   in January and take into account feedback

Mihai Caraman (5):
  KVM: PPC: Book3e: Increase FPU laziness
  KVM: PPC: Book3e: Add AltiVec support
  KVM: PPC: Move ONE_REG AltiVec support to powerpc
  KVM: PPC: Booke: Add ONE_REG IVORs support
  KVM: PPC: Book3e: Enable e6500 core

 arch/powerpc/include/uapi/asm/kvm.h   |  29 +++
 arch/powerpc/kvm/book3s.c | 151 +---
 arch/powerpc/kvm/booke.c  | 331 --
 arch/powerpc/kvm/booke.h  |  39 +---
 arch/powerpc/kvm/bookehv_interrupts.S |  10 +-
 arch/powerpc/kvm/e500.c   |  42 -
 arch/powerpc/kvm/e500_emulate.c   |  18 ++
 arch/powerpc/kvm/e500mc.c |  44 -
 arch/powerpc/kvm/powerpc.c|  97 ++
 9 files changed, 554 insertions(+), 207 deletions(-)

-- 
1.7.11.7

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 2/5] KVM: PPC: Book3e: Add AltiVec support

2014-08-05 Thread Mihai Caraman

Add KVM Book3e AltiVec support. KVM Book3e FPU support gracefully reuse host
infrastructure so follow the same approach for AltiVec.

Keep SPE/AltiVec exception handlers distinct using CONFIG_KVM_E500V2.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - use distinct SPE/AltiVec exception handlers

v2:
 - integrate Paul's FP/VMX/VSX changes

 arch/powerpc/kvm/booke.c  | 73 +++
 arch/powerpc/kvm/booke.h  |  5 +++
 arch/powerpc/kvm/bookehv_interrupts.S | 10 +++--
 arch/powerpc/kvm/e500_emulate.c   | 18 +
 4 files changed, 102 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 0c6f616..c5cca09 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -168,6 +168,40 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 #endif
 }
 
+/*
+ * Simulate AltiVec unavailable fault to load guest state
+ * from thread to AltiVec unit.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_altivec(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ALTIVEC
+   if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
+   if (!(current-thread.regs-msr  MSR_VEC)) {
+   enable_kernel_altivec();
+   load_vr_state(vcpu-arch.vr);
+   current-thread.vr_save_area = vcpu-arch.vr;
+   current-thread.regs-msr |= MSR_VEC;
+   }
+   }
+#endif
+}
+
+/*
+ * Save guest vcpu AltiVec state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_altivec(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ALTIVEC
+   if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
+   if (current-thread.regs-msr  MSR_VEC)
+   giveup_altivec(current);
+   current-thread.vr_save_area = NULL;
+   }
+#endif
+}
+
 static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
 {
/* Synchronize guest's desire to get debug interrupts into shadow MSR */
@@ -375,9 +409,14 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu 
*vcpu,
case BOOKE_IRQPRIO_ITLB_MISS:
case BOOKE_IRQPRIO_SYSCALL:
case BOOKE_IRQPRIO_FP_UNAVAIL:
+#ifdef CONFIG_KVM_E500V2
case BOOKE_IRQPRIO_SPE_UNAVAIL:
case BOOKE_IRQPRIO_SPE_FP_DATA:
case BOOKE_IRQPRIO_SPE_FP_ROUND:
+#else
+   case BOOKE_IRQPRIO_ALTIVEC_UNAVAIL:
+   case BOOKE_IRQPRIO_ALTIVEC_ASSIST:
+#endif
case BOOKE_IRQPRIO_AP_UNAVAIL:
allowed = 1;
msr_mask = MSR_CE | MSR_ME | MSR_DE;
@@ -693,6 +732,17 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
kvmppc_load_guest_fp(vcpu);
 #endif
 
+#ifdef CONFIG_ALTIVEC
+   /* Save userspace AltiVec state in stack */
+   if (cpu_has_feature(CPU_FTR_ALTIVEC))
+   enable_kernel_altivec();
+   /*
+* Since we can't trap on MSR_VEC in GS-mode, we consider the guest
+* as always using the AltiVec.
+*/
+   kvmppc_load_guest_altivec(vcpu);
+#endif
+
/* Switch to guest debug context */
debug = vcpu-arch.shadow_dbg_reg;
switch_booke_debug_regs(debug);
@@ -715,6 +765,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
kvmppc_save_guest_fp(vcpu);
 #endif
 
+#ifdef CONFIG_ALTIVEC
+   kvmppc_save_guest_altivec(vcpu);
+#endif
+
 out:
vcpu-mode = OUTSIDE_GUEST_MODE;
return ret;
@@ -999,6 +1053,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
 
+#ifdef CONFIG_KVM_E500V2
 #ifdef CONFIG_SPE
case BOOKE_INTERRUPT_SPE_UNAVAIL: {
if (vcpu-arch.shared-msr  MSR_SPE)
@@ -1040,7 +1095,24 @@ int kvmppc_handle_exit(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
run-hw.hardware_exit_reason = exit_nr;
r = RESUME_HOST;
break;
+#endif /* !CONFIG_SPE */
+#else
+/*
+ * On cores with Vector category, KVM is loaded only if CONFIG_ALTIVEC,
+ * see kvmppc_core_check_processor_compat().
+ */
+#ifdef CONFIG_ALTIVEC
+   case BOOKE_INTERRUPT_ALTIVEC_UNAVAIL:
+   kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL);
+   r = RESUME_GUEST;
+   break;
+
+   case BOOKE_INTERRUPT_ALTIVEC_ASSIST:
+   kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_ASSIST);
+   r = RESUME_GUEST;
+   break;
 #endif
+#endif /* !CONFIG_KVM_E500V2 */
 
case BOOKE_INTERRUPT_DATA_STORAGE:
kvmppc_core_queue_data_storage(vcpu, vcpu-arch.fault_dear,
@@ -1217,6 +1289,7 @@ out:
/* interrupts now hard-disabled */
kvmppc_fix_ee_before_entry();
kvmppc_load_guest_fp(vcpu);
+

[PATCH v3 3/5] KVM: PPC: Move ONE_REG AltiVec support to powerpc

2014-08-05 Thread Mihai Caraman

Make ONE_REG AltiVec support common across server and embedded implementations
moving kvm_vcpu_ioctl_get_one_reg() and kvm_vcpu_ioctl_set_one_reg() functions
to powerpc layer.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - make ONE_REG AltiVec support powerpc generic

v2:
 - add comment describing VCSR register representation in KVM vs kernel

 arch/powerpc/include/uapi/asm/kvm.h |   5 ++
 arch/powerpc/kvm/book3s.c   | 151 +++-
 arch/powerpc/kvm/booke.c|  85 
 arch/powerpc/kvm/powerpc.c  |  97 +++
 4 files changed, 179 insertions(+), 159 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index e0e49db..7a27ff0 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -476,6 +476,11 @@ struct kvm_get_htab_header {
 
 /* FP and vector status/control registers */
 #define KVM_REG_PPC_FPSCR  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
+/*
+ * VSCR register is documented as a 32-bit register in the ISA, but it can
+ * only be accesses via a vector register. Expose VSCR as a 32-bit register
+ * even though the kernel represents it as a 128-bit vector.
+ */
 #define KVM_REG_PPC_VSCR   (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
 
 /* Virtual processor areas */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index dd03f6b..1b5adda 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -535,174 +535,111 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, 
struct kvm_fpu *fpu)
return -ENOTSUPP;
 }
 
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+   union kvmppc_one_reg *val)
 {
-   int r;
-   union kvmppc_one_reg val;
-   int size;
+   int r = 0;
long int i;
 
-   size = one_reg_size(reg-id);
-   if (size  sizeof(val))
-   return -EINVAL;
-
-   r = vcpu-kvm-arch.kvm_ops-get_one_reg(vcpu, reg-id, val);
+   r = vcpu-kvm-arch.kvm_ops-get_one_reg(vcpu, id, val);
if (r == -EINVAL) {
r = 0;
-   switch (reg-id) {
+   switch (id) {
case KVM_REG_PPC_DAR:
-   val = get_reg_val(reg-id, kvmppc_get_dar(vcpu));
+   *val = get_reg_val(id, kvmppc_get_dar(vcpu));
break;
case KVM_REG_PPC_DSISR:
-   val = get_reg_val(reg-id, kvmppc_get_dsisr(vcpu));
+   *val = get_reg_val(id, kvmppc_get_dsisr(vcpu));
break;
case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
-   i = reg-id - KVM_REG_PPC_FPR0;
-   val = get_reg_val(reg-id, VCPU_FPR(vcpu, i));
+   i = id - KVM_REG_PPC_FPR0;
+   *val = get_reg_val(id, VCPU_FPR(vcpu, i));
break;
case KVM_REG_PPC_FPSCR:
-   val = get_reg_val(reg-id, vcpu-arch.fp.fpscr);
-   break;
-#ifdef CONFIG_ALTIVEC
-   case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
-   if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-   r = -ENXIO;
-   break;
-   }
-   val.vval = vcpu-arch.vr.vr[reg-id - KVM_REG_PPC_VR0];
+   *val = get_reg_val(id, vcpu-arch.fp.fpscr);
break;
-   case KVM_REG_PPC_VSCR:
-   if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-   r = -ENXIO;
-   break;
-   }
-   val = get_reg_val(reg-id, vcpu-arch.vr.vscr.u[3]);
-   break;
-   case KVM_REG_PPC_VRSAVE:
-   val = get_reg_val(reg-id, vcpu-arch.vrsave);
-   break;
-#endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
if (cpu_has_feature(CPU_FTR_VSX)) {
-   long int i = reg-id - KVM_REG_PPC_VSR0;
-   val.vsxval[0] = vcpu-arch.fp.fpr[i][0];
-   val.vsxval[1] = vcpu-arch.fp.fpr[i][1];
+   i = id - KVM_REG_PPC_VSR0;
+   val-vsxval[0] = vcpu-arch.fp.fpr[i][0];
+   val-vsxval[1] = vcpu-arch.fp.fpr[i][1];
} else {
r = -ENXIO;
}
break;
 #endif /* CONFIG_VSX */
-   case KVM_REG_PPC_DEBUG_INST: {
-   u32 opcode = INS_TW;
-   r =

[PATCH v3 5/5] KVM: PPC: Book3E: Enable e6500 core

2014-08-05 Thread Mihai Caraman

Now that AltiVec support is in place enable e6500 core.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v2-v3:
 - no changes

 arch/powerpc/kvm/e500mc.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 19dd927..aa48dc3 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -177,6 +177,16 @@ int kvmppc_core_check_processor_compat(void)
r = 0;
else if (strcmp(cur_cpu_spec-cpu_name, e5500) == 0)
r = 0;
+#ifdef CONFIG_ALTIVEC
+   /*
+* Since guests have the priviledge to enable AltiVec, we need AltiVec
+* support in the host to save/restore their context.
+* Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit
+* because it's cleared in the absence of CONFIG_ALTIVEC!
+*/
+   else if (strcmp(cur_cpu_spec-cpu_name, e6500) == 0)
+   r = 0;
+#endif
else
r = -ENOTSUPP;
 
-- 
1.7.11.7

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 4/5] KVM: PPC: Booke: Add ONE_REG IVORs support

2014-08-05 Thread Mihai Caraman

Add ONE_REG IVORs support, with IVORs 0-15 and 35 booke common.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - new patch

 arch/powerpc/include/uapi/asm/kvm.h |  24 +++
 arch/powerpc/kvm/booke.c| 132 
 arch/powerpc/kvm/e500.c |  42 +++-
 arch/powerpc/kvm/e500mc.c   |  32 +
 4 files changed, 228 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 7a27ff0..174fed0 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -563,6 +563,30 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_WORT   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9)
 #define KVM_REG_PPC_SPRG9  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
 
+/* Booke IVOR registers */
+#define KVM_REG_PPC_IVOR0  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc0)
+#define KVM_REG_PPC_IVOR1  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc1)
+#define KVM_REG_PPC_IVOR2  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc2)
+#define KVM_REG_PPC_IVOR3  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc3)
+#define KVM_REG_PPC_IVOR4  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc4)
+#define KVM_REG_PPC_IVOR5  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc5)
+#define KVM_REG_PPC_IVOR6  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc6)
+#define KVM_REG_PPC_IVOR7  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc7)
+#define KVM_REG_PPC_IVOR8  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc8)
+#define KVM_REG_PPC_IVOR9  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc9)
+#define KVM_REG_PPC_IVOR10 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xca)
+#define KVM_REG_PPC_IVOR11 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xcb)
+#define KVM_REG_PPC_IVOR12 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xcc)
+#define KVM_REG_PPC_IVOR13 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xcd)
+#define KVM_REG_PPC_IVOR14 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xce)
+#define KVM_REG_PPC_IVOR15 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xcf)
+#define KVM_REG_PPC_IVOR32 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd0)
+#define KVM_REG_PPC_IVOR33 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd1)
+#define KVM_REG_PPC_IVOR34 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd2)
+#define KVM_REG_PPC_IVOR35 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd3)
+#define KVM_REG_PPC_IVOR36 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd4)
+#define KVM_REG_PPC_IVOR37 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd5)
+
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 4fe7f68..ffa82a5 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1565,6 +1565,72 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
int r = 0;
 
switch (id) {
+   case KVM_REG_PPC_IVOR0:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
+   break;
+   case KVM_REG_PPC_IVOR1:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]);
+   break;
+   case KVM_REG_PPC_IVOR2:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
+   break;
+   case KVM_REG_PPC_IVOR3:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]);
+   break;
+   case KVM_REG_PPC_IVOR4:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_EXTERNAL]);
+   break;
+   case KVM_REG_PPC_IVOR5:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]);
+   break;
+   case KVM_REG_PPC_IVOR6:
+   *val = get_reg_val(id, vcpu-arch.ivor[BOOKE_IRQPRIO_PROGRAM]);
+   break;
+   case KVM_REG_PPC_IVOR7:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]);
+   break;
+   case KVM_REG_PPC_IVOR8:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
+   break;
+   case KVM_REG_PPC_IVOR9:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]);
+   break;
+   case KVM_REG_PPC_IVOR10:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_DECREMENTER]);
+   break;
+   case KVM_REG_PPC_IVOR11:
+   *val = get_reg_val(id, vcpu-arch.ivor[BOOKE_IRQPRIO_FIT]);
+   break;
+   case KVM_REG_PPC_IVOR12:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_WATCHDOG]);
+   break;
+   case KVM_REG_PPC_IVOR13:
+

[PATCH v3 1/5] KVM: PPC: Book3e: Increase FPU laziness

2014-08-05 Thread Mihai Caraman

Increase FPU laziness by calling kvmppc_load_guest_fp() just before
returning to guest instead of each sched in. Without this improvement
an interrupt may also claim floting point corrupting guest state.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - no changes

v2:
 - remove fpu_active
 - add descriptive comments

 arch/powerpc/kvm/booke.c  | 43 ---
 arch/powerpc/kvm/booke.h  | 34 --
 arch/powerpc/kvm/e500mc.c |  2 --
 3 files changed, 36 insertions(+), 43 deletions(-)

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b4c89fa..0c6f616 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -124,6 +124,40 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
 }
 #endif
 
+/*
+ * Load up guest vcpu FP state if it's needed.
+ * It also set the MSR_FP in thread so that host know
+ * we're holding FPU, and then host can help to save
+ * guest vcpu FP state if other threads require to use FPU.
+ * This simulates an FP unavailable fault.
+ *
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+   if (!(current-thread.regs-msr  MSR_FP)) {
+   enable_kernel_fp();
+   load_fp_state(vcpu-arch.fp);
+   current-thread.fp_save_area = vcpu-arch.fp;
+   current-thread.regs-msr |= MSR_FP;
+   }
+#endif
+}
+
+/*
+ * Save guest vcpu FP state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+   if (current-thread.regs-msr  MSR_FP)
+   giveup_fpu(current);
+   current-thread.fp_save_area = NULL;
+#endif
+}
+
 static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 {
 #if defined(CONFIG_PPC_FPU)  !defined(CONFIG_KVM_BOOKE_HV)
@@ -654,12 +688,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
 
/*
 * Since we can't trap on MSR_FP in GS-mode, we consider the guest
-* as always using the FPU.  Kernel usage of FP (via
-* enable_kernel_fp()) in this thread must not occur while
-* vcpu-fpu_active is set.
+* as always using the FPU.
 */
-   vcpu-fpu_active = 1;
-
kvmppc_load_guest_fp(vcpu);
 #endif
 
@@ -683,8 +713,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
 
 #ifdef CONFIG_PPC_FPU
kvmppc_save_guest_fp(vcpu);
-
-   vcpu-fpu_active = 0;
 #endif
 
 out:
@@ -1188,6 +1216,7 @@ out:
else {
/* interrupts now hard-disabled */
kvmppc_fix_ee_before_entry();
+   kvmppc_load_guest_fp(vcpu);
}
}
 
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index f753543..e73d513 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -116,40 +116,6 @@ extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu 
*vcpu, int sprn,
 extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
  ulong *spr_val);
 
-/*
- * Load up guest vcpu FP state if it's needed.
- * It also set the MSR_FP in thread so that host know
- * we're holding FPU, and then host can help to save
- * guest vcpu FP state if other threads require to use FPU.
- * This simulates an FP unavailable fault.
- *
- * It requires to be called with preemption disabled.
- */
-static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_PPC_FPU
-   if (vcpu-fpu_active  !(current-thread.regs-msr  MSR_FP)) {
-   enable_kernel_fp();
-   load_fp_state(vcpu-arch.fp);
-   current-thread.fp_save_area = vcpu-arch.fp;
-   current-thread.regs-msr |= MSR_FP;
-   }
-#endif
-}
-
-/*
- * Save guest vcpu FP state into thread.
- * It requires to be called with preemption disabled.
- */
-static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_PPC_FPU
-   if (vcpu-fpu_active  (current-thread.regs-msr  MSR_FP))
-   giveup_fpu(current);
-   current-thread.fp_save_area = NULL;
-#endif
-}
-
 static inline void kvmppc_clear_dbsr(void)
 {
mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 164bad2..67c06eb 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -145,8 +145,6 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu 
*vcpu, int cpu)
kvmppc_e500_tlbil_all(vcpu_e500);
__get_cpu_var(last_vcpu_of_lpid)[vcpu-kvm-arch.lpid] = vcpu;
}
-
-   kvmppc_load_guest_fp(vcpu);
 }
 
 static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu)
-- 
1.7.11.7

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a

Re: [PATCH v3] KVM: nVMX: nested TPR shadow/threshold emulation

2014-08-05 Thread Paolo Bonzini

Il 05/08/2014 09:56, Zhang, Yang Z ha scritto:
 Wanpeng Li wrote on 2014-08-04:
 This patch fix bug https://bugzilla.kernel.org/show_bug.cgi?id=61411

 TPR shadow/threshold feature is important to speed up the Windows guest.
 Besides, it is a must feature for certain VMM.

 We map virtual APIC page address and TPR threshold from L1 VMCS. If
 TPR_BELOW_THRESHOLD VM exit is triggered by L2 guest and L1 interested
 in, we inject it into L1 VMM for handling.

 Signed-off-by: Wanpeng Li wanpeng...@linux.intel.com
 ---
 v2 - v3:
  * nested vm entry failure if both tpr shadow and cr8 exiting bits are not 
 set
 v1 - v2:
  * don't take L0's virtualize APIC accesses setting into account
  * virtual_apic_page do exactly the same thing that is done for
 apic_access_page
  * add the tpr threshold field to the read-write fields for shadow VMCS

  arch/x86/kvm/vmx.c | 38 --
  1 file changed, 36 insertions(+), 2 deletions(-)

 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index c604f3c..7a56e2c 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -379,6 +379,7 @@ struct nested_vmx {
   * we must keep them pinned while L2 runs.
   */
  struct page *apic_access_page;
 +struct page *virtual_apic_page;
  u64 msr_ia32_feature_control;

  struct hrtimer preemption_timer;
 @@ -533,6 +534,7 @@ static int max_shadow_read_only_fields =
  ARRAY_SIZE(shadow_read_only_fields);

  static unsigned long shadow_read_write_fields[] = {
 +TPR_THRESHOLD,
  GUEST_RIP,
  GUEST_RSP,
  GUEST_CR0,
 @@ -2330,7 +2332,7 @@ static __init void
 nested_vmx_setup_ctls_msrs(void)
  CPU_BASED_MOV_DR_EXITING |
 CPU_BASED_UNCOND_IO_EXITING |
  CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
  CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
 -CPU_BASED_PAUSE_EXITING |
 +CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW |
  CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
  /*
   * We can allow some features even when not supported by the
 @@ -6148,6 +6150,10 @@ static void free_nested(struct vcpu_vmx *vmx)
  nested_release_page(vmx-nested.apic_access_page);
  vmx-nested.apic_access_page = 0;
  }
 +if (vmx-nested.virtual_apic_page) {
 +nested_release_page(vmx-nested.virtual_apic_page);
 +vmx-nested.virtual_apic_page = 0;
 +}

  nested_free_all_saved_vmcss(vmx);
  }
 @@ -6936,7 +6942,7 @@ static bool nested_vmx_exit_handled(struct
 kvm_vcpu *vcpu)
  case EXIT_REASON_MCE_DURING_VMENTRY:
  return 0;
  case EXIT_REASON_TPR_BELOW_THRESHOLD:
 -return 1;
 +return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
  case EXIT_REASON_APIC_ACCESS:
  return nested_cpu_has2(vmcs12,
  SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
 @@ -7057,6 +7063,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)

  static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
  {
 +if (is_guest_mode(vcpu))
 +return;
 +
  if (irr == -1 || tpr  irr) {
  vmcs_write32(TPR_THRESHOLD, 0);
  return;
 @@ -8024,6 +8033,27 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu,
 struct vmcs12 *vmcs12)
  exec_control = ~CPU_BASED_VIRTUAL_NMI_PENDING;
  exec_control = ~CPU_BASED_TPR_SHADOW;
  exec_control |= vmcs12-cpu_based_vm_exec_control;
 +
 +if (exec_control  CPU_BASED_TPR_SHADOW) {
 +if (vmx-nested.virtual_apic_page)
 +nested_release_page(vmx-nested.virtual_apic_page);
 +vmx-nested.virtual_apic_page =
 +   nested_get_page(vcpu, vmcs12-virtual_apic_page_addr);
 +if (!vmx-nested.virtual_apic_page)
 +exec_control =
 +~CPU_BASED_TPR_SHADOW;
 +else
 +vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
 +page_to_phys(vmx-nested.virtual_apic_page));
 +
 +if (!(exec_control  CPU_BASED_TPR_SHADOW) 
 +!((exec_control  CPU_BASED_CR8_LOAD_EXITING) 
 +(exec_control  CPU_BASED_CR8_STORE_EXITING)))
 +nested_vmx_failValid(vcpu,
 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 
 I think this is not correct. The vmx-nested.virtual_apic_page may not valid 
 due to two reasons:
 1. The virtual_apic_page_addr is not a valid gfn. In this case, the vmx 
 failure must be injected to L1 unconditionally regardless of the setting of 
 CR8 load/store exiting.

You're right that accesses to the APIC-access page may also end up
writing to the virtual-APIC page.  Hence, if the virtualize APIC
accesses setting is 1 in the secondary exec controls, you also have to
fail the vmentry.

Doing it unconditionally is not correct, but it is the simplest thing to
do and it would be okay with a comment, I think.

 2. The

Re: [PATCH v2 2/2] KVM: nVMX: fix acknowledge interrupt on exit when APICv is in use

2014-08-05 Thread Paolo Bonzini

Il 05/08/2014 06:42, Wanpeng Li ha scritto:
 After commit 77b0f5d (KVM: nVMX: Ack and write vector info to intr_info
 if L1 asks us to), Acknowledge interrupt on exit behavior can be
 emulated. To do so, KVM will ask the APIC for the interrupt vector if
 during a nested vmexit if VM_EXIT_ACK_INTR_ON_EXIT is set.  With APICv,
 kvm_get_apic_interrupt would return -1 and give the following WARNING:
 
 Call Trace:
  [81493563] dump_stack+0x49/0x5e
  [8103f0eb] warn_slowpath_common+0x7c/0x96
  [a059709a] ? nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
  [8103f11a] warn_slowpath_null+0x15/0x17
  [a059709a] nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
  [a0594295] ? nested_vmx_exit_handled+0x6a/0x39e [kvm_intel]
  [a0537931] ? kvm_apic_has_interrupt+0x80/0xd5 [kvm]
  [a05972ec] vmx_check_nested_events+0xc3/0xd3 [kvm_intel]
  [a051ebe9] inject_pending_event+0xd0/0x16e [kvm]
  [a051efa0] vcpu_enter_guest+0x319/0x704 [kvm]
 
 If enabling APIC-v, all interrupts to L1 are delivered through APIC-v.
 But when L2 is running, external interrupt will casue L1 vmexit with
 reason external interrupt. Then L1 will pick up the interrupt through
 vmcs12. when L1 ack the interrupt, since the APIC-v is enabled when
 L1 is running, so APIC-v hardware still will do vEOI updating. The problem
 is that the interrupt is delivered not through APIC-v hardware, this means
 SVI/RVI/vPPR are not setting, but hardware required them when doing vEOI
 updating. The solution is that, when L1 tried to pick up the interrupt
 from vmcs12, then hypervisor will help to update the SVI/RVI/vPPR to make
 sure the following vEOI updating and vPPR updating corrently.
 
 Also, since interrupt is delivered through vmcs12, so APIC-v hardware will
 not cleare vIRR and hypervisor need to clear it before L1 running.
 
 Suggested-by: Paolo Bonzini pbonz...@redhat.com
 Suggested-by: Zhang, Yang Z yang.z.zh...@intel.com
 Tested-by: Liu, RongrongX rongrongx@intel.com
 Signed-off-by: Wanpeng Li wanpeng...@linux.intel.com
 ---
 v1 - v2:
  * reusing kvm_get_apic_interrupt here (by modifying kvm_cpu_get_interrupt, 
apic_set_isr and apic_clear_irr)
 
  arch/x86/kvm/irq.c   |  2 +-
  arch/x86/kvm/lapic.c | 52 
 +++-
  2 files changed, 40 insertions(+), 14 deletions(-)
 
 diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
 index bd0da43..a1ec6a5 100644
 --- a/arch/x86/kvm/irq.c
 +++ b/arch/x86/kvm/irq.c
 @@ -108,7 +108,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
  
   vector = kvm_cpu_get_extint(v);
  
 - if (kvm_apic_vid_enabled(v-kvm) || vector != -1)
 + if (vector != -1)
   return vector;  /* PIC */
  
   return kvm_get_apic_interrupt(v);   /* APIC */
 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
 index 3855103..08e8a89 100644
 --- a/arch/x86/kvm/lapic.c
 +++ b/arch/x86/kvm/lapic.c
 @@ -352,25 +352,46 @@ static inline int apic_find_highest_irr(struct 
 kvm_lapic *apic)
  
  static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
  {
 - apic-irr_pending = false;
 + struct kvm_vcpu *vcpu;
 +
 + vcpu = apic-vcpu;
 +
   apic_clear_vector(vec, apic-regs + APIC_IRR);
 - if (apic_search_irr(apic) != -1)
 - apic-irr_pending = true;
 + if (unlikely(kvm_apic_vid_enabled(vcpu-kvm)))
 + /* try to update RVI */
 + kvm_make_request(KVM_REQ_EVENT, vcpu);
 + else {
 + vec = apic_search_irr(apic);
 + apic-irr_pending = (vec != -1);
 + }
  }
  
  static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
  {
 - /* Note that we never get here with APIC virtualization enabled.  */
 + struct kvm_vcpu *vcpu;
 +
 + if (__apic_test_and_set_vector(vec, apic-regs + APIC_ISR))
 + return;
 +
 + vcpu = apic-vcpu;
  
 - if (!__apic_test_and_set_vector(vec, apic-regs + APIC_ISR))
 - ++apic-isr_count;
 - BUG_ON(apic-isr_count  MAX_APIC_VECTOR);
   /*
 -  * ISR (in service register) bit is set when injecting an interrupt.
 -  * The highest vector is injected. Thus the latest bit set matches
 -  * the highest bit in ISR.
 +  * With APIC virtualization enabled, all caching is disabled
 +  * because the processor can modify ISR under the hood.  Instead
 +  * just set SVI.
*/
 - apic-highest_isr_cache = vec;
 + if (unlikely(kvm_apic_vid_enabled(vcpu-kvm)))
 + kvm_x86_ops-hwapic_isr_update(vcpu-kvm, vec);
 + else {
 + ++apic-isr_count;
 + BUG_ON(apic-isr_count  MAX_APIC_VECTOR);
 + /*
 +  * ISR (in service register) bit is set when injecting an 
 interrupt.
 +  * The highest vector is injected. Thus the latest bit set 
 matches
 +  * the highest bit in ISR.
 +  */
 + apic-highest_isr_cache = vec;
 +

Re: Query: Is it possible to lose interrupts between vhost and virtio_net during migration?

2014-08-05 Thread Zhangjie (HZ)

On 2014/8/5 17:49, Michael S. Tsirkin wrote:
 On Tue, Aug 05, 2014 at 02:29:28PM +0800, Zhangjie (HZ) wrote:
 Jason is right, the new order is not the cause of network unreachable.
 Changing order seems not work. After about 40 times, the problem occurs 
 again.
 Maybe there is other hidden reasons for that.
I modified the code to change the order myself yesterday.
This result is about my code.
 
 To make sure, you tested the patch that I posted to list:
 vhost_net: stop guest notifiers after backend?
 
 Please confirm.
 
OK, I will test with your patch vhost_net: stop guest notifiers after backend.

-- 
Best Wishes!
Zhang Jie

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/6] IRQFD without IRQ routing, enabled for XICS

2014-08-05 Thread Paolo Bonzini

Il 30/06/2014 12:51, Paul Mackerras ha scritto:
 This series of patches provides a way to implement IRQFD support
 without having to implement IRQ routing, and adds IRQFD support for
 the XICS interrupt controller emulation.  (XICS is the interrupt
 controller defined for the pSeries machine type, used on IBM POWER
 servers).
 
 The basic approach is to make it easy for code other than irqchip.c to
 provide a mapping from a global interrupt number (GSI) to an irq
 routing entry (struct kvm_kernel_irq_routing_entry).  To make the
 lifetime of this routing entry easier to manage, we change the IRQFD
 code to keep a copy of the routing entry (for the MSI fast-path case)
 rather than a pointer to the routing entry.  Since the copy can't be
 updated atomically, we add a seqcount_t to make sure that when reading
 it we get a copy that hasn't been half-way updated.
 
 Next we replace the hard-coded accesses outside irqchip.c to the
 fields of the kvm_irq_routing_table struct with calls to accessor
 functions in irqchip.c, namely kvm_irq_map_gsi() and
 kvm_irq_map_chip_pin().  That enables us to move all references to the
 kvm_irq_routing_table struct, and the definition of that struct, into
 irqchip.c.
 
 Then we move the irq notifier implementation from irqchip.c into
 eventfd.c and add a separate Kconfig option to enable IRQFD.  With
 that we can enable IRQFD without irq routing, which we achieve by
 compiling in eventfd.c but not irqchip.c, and providing an alternative
 implementation of kvm_irq_map_gsi() and kvm_irq_map_chip_pin().
 
 The last patch does that for XICS.  With this series I can use
 vhost-net with KVM guests, and I see the TCP bandwidth between guest
 and host on a POWER8 machine go from around 700MB/s to over 2GB/s.
 
 I would like to see this go into 3.17.
 
  arch/ia64/kvm/Kconfig|   1 +
  arch/powerpc/kvm/Kconfig |   3 +
  arch/powerpc/kvm/book3s_hv_rm_xics.c |   5 ++
  arch/powerpc/kvm/book3s_xics.c   |  55 +++---
  arch/powerpc/kvm/book3s_xics.h   |   2 +
  arch/powerpc/kvm/mpic.c  |   4 +-
  arch/s390/kvm/Kconfig|   1 +
  arch/s390/kvm/interrupt.c|   3 +-
  arch/x86/kvm/Kconfig |   1 +
  include/linux/kvm_host.h |  43 ---
  virt/kvm/Kconfig |   3 +
  virt/kvm/eventfd.c   | 134 
 ++-
  virt/kvm/irq_comm.c  |  24 +++
  virt/kvm/irqchip.c   |  98 ++---
  virt/kvm/kvm_main.c  |   2 +-
  15 files changed, 227 insertions(+), 152 deletions(-)
 

Applied for 3.17, thanks Eric and Cornelia for testing.

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] KVM: PPC: drop duplicate tracepoint

2014-08-05 Thread Paolo Bonzini

Il 05/08/2014 12:35, Paolo Bonzini ha scritto:
 diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
 index e96b50d0bdab..2325168ad1ff 100644
 --- a/arch/powerpc/kvm/emulate.c
 +++ b/arch/powerpc/kvm/emulate.c
 @@ -300,3 +300,5 @@ int kvmppc_emulate_instruction(struct kvm_run *run, 
 struct kvm_vcpu *vcpu)
   return emulated;
  }
  EXPORT_SYMBOL_GPL(kvmppc_emulate_instruction);
 +
 +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);

Nope, this is in kvm-pr.ko so it is not enough... replaced with

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 288b4bb05cbd..4c79284b58be 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1347,3 +1347,5 @@ void kvm_arch_exit(void)
 {

 }
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);


Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2 2/2] KVM: nVMX: fix acknowledge interrupt on exit when APICv is in use

2014-08-05 Thread Felipe Reyes


Hi,

On 08/05/2014 01:04 PM, Paolo Bonzini wrote:

Il 05/08/2014 06:42, Wanpeng Li ha scritto:

After commit 77b0f5d (KVM: nVMX: Ack and write vector info to intr_info
if L1 asks us to), Acknowledge interrupt on exit behavior can be
emulated. To do so, KVM will ask the APIC for the interrupt vector if
during a nested vmexit if VM_EXIT_ACK_INTR_ON_EXIT is set.  With APICv,
kvm_get_apic_interrupt would return -1 and give the following WARNING:

Call Trace:
  [81493563] dump_stack+0x49/0x5e
  [8103f0eb] warn_slowpath_common+0x7c/0x96
  [a059709a] ? nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
  [8103f11a] warn_slowpath_null+0x15/0x17
  [a059709a] nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
  [a0594295] ? nested_vmx_exit_handled+0x6a/0x39e [kvm_intel]
  [a0537931] ? kvm_apic_has_interrupt+0x80/0xd5 [kvm]
  [a05972ec] vmx_check_nested_events+0xc3/0xd3 [kvm_intel]
  [a051ebe9] inject_pending_event+0xd0/0x16e [kvm]
  [a051efa0] vcpu_enter_guest+0x319/0x704 [kvm]

If enabling APIC-v, all interrupts to L1 are delivered through APIC-v.
But when L2 is running, external interrupt will casue L1 vmexit with
reason external interrupt. Then L1 will pick up the interrupt through
vmcs12. when L1 ack the interrupt, since the APIC-v is enabled when
L1 is running, so APIC-v hardware still will do vEOI updating. The problem
is that the interrupt is delivered not through APIC-v hardware, this means
SVI/RVI/vPPR are not setting, but hardware required them when doing vEOI
updating. The solution is that, when L1 tried to pick up the interrupt
from vmcs12, then hypervisor will help to update the SVI/RVI/vPPR to make
sure the following vEOI updating and vPPR updating corrently.

Also, since interrupt is delivered through vmcs12, so APIC-v hardware will
not cleare vIRR and hypervisor need to clear it before L1 running.

Suggested-by: Paolo Bonzini pbonz...@redhat.com
Suggested-by: Zhang, Yang Z yang.z.zh...@intel.com
Tested-by: Liu, RongrongX rongrongx@intel.com
Signed-off-by: Wanpeng Li wanpeng...@linux.intel.com
---
v1 - v2:
  * reusing kvm_get_apic_interrupt here (by modifying kvm_cpu_get_interrupt,
apic_set_isr and apic_clear_irr)

  arch/x86/kvm/irq.c   |  2 +-
  arch/x86/kvm/lapic.c | 52 +++-
  2 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index bd0da43..a1ec6a5 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -108,7 +108,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)

vector = kvm_cpu_get_extint(v);

-   if (kvm_apic_vid_enabled(v-kvm) || vector != -1)
+   if (vector != -1)
return vector;  /* PIC */

return kvm_get_apic_interrupt(v);   /* APIC */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3855103..08e8a89 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -352,25 +352,46 @@ static inline int apic_find_highest_irr(struct kvm_lapic 
*apic)

  static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
  {
-   apic-irr_pending = false;
+   struct kvm_vcpu *vcpu;
+
+   vcpu = apic-vcpu;
+
apic_clear_vector(vec, apic-regs + APIC_IRR);
-   if (apic_search_irr(apic) != -1)
-   apic-irr_pending = true;
+   if (unlikely(kvm_apic_vid_enabled(vcpu-kvm)))
+   /* try to update RVI */
+   kvm_make_request(KVM_REQ_EVENT, vcpu);
+   else {
+   vec = apic_search_irr(apic);
+   apic-irr_pending = (vec != -1);
+   }
  }

  static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
  {
-   /* Note that we never get here with APIC virtualization enabled.  */
+   struct kvm_vcpu *vcpu;
+
+   if (__apic_test_and_set_vector(vec, apic-regs + APIC_ISR))
+   return;
+
+   vcpu = apic-vcpu;

-   if (!__apic_test_and_set_vector(vec, apic-regs + APIC_ISR))
-   ++apic-isr_count;
-   BUG_ON(apic-isr_count  MAX_APIC_VECTOR);
/*
-* ISR (in service register) bit is set when injecting an interrupt.
-* The highest vector is injected. Thus the latest bit set matches
-* the highest bit in ISR.
+* With APIC virtualization enabled, all caching is disabled
+* because the processor can modify ISR under the hood.  Instead
+* just set SVI.
 */
-   apic-highest_isr_cache = vec;
+   if (unlikely(kvm_apic_vid_enabled(vcpu-kvm)))
+   kvm_x86_ops-hwapic_isr_update(vcpu-kvm, vec);
+   else {
+   ++apic-isr_count;
+   BUG_ON(apic-isr_count  MAX_APIC_VECTOR);
+   /*
+* ISR (in service register) bit is set when injecting an 
interrupt.
+* The highest vector is injected. Thus the latest bit set 
matches
+* the highest bit in ISR.
+*/
+

Re: [PATCH v2 2/2] KVM: nVMX: fix acknowledge interrupt on exit when APICv is in use

2014-08-05 Thread Wanpeng Li

On Tue, Aug 05, 2014 at 02:39:05PM +0200, Felipe Reyes wrote:
Hi,

On 08/05/2014 01:04 PM, Paolo Bonzini wrote:
Il 05/08/2014 06:42, Wanpeng Li ha scritto:
After commit 77b0f5d (KVM: nVMX: Ack and write vector info to intr_info
if L1 asks us to), Acknowledge interrupt on exit behavior can be
emulated. To do so, KVM will ask the APIC for the interrupt vector if
during a nested vmexit if VM_EXIT_ACK_INTR_ON_EXIT is set.  With APICv,
kvm_get_apic_interrupt would return -1 and give the following WARNING:

Call Trace:
  [81493563] dump_stack+0x49/0x5e
  [8103f0eb] warn_slowpath_common+0x7c/0x96
  [a059709a] ? nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
  [8103f11a] warn_slowpath_null+0x15/0x17
  [a059709a] nested_vmx_vmexit+0xa4/0x233 [kvm_intel]
  [a0594295] ? nested_vmx_exit_handled+0x6a/0x39e [kvm_intel]
  [a0537931] ? kvm_apic_has_interrupt+0x80/0xd5 [kvm]
  [a05972ec] vmx_check_nested_events+0xc3/0xd3 [kvm_intel]
  [a051ebe9] inject_pending_event+0xd0/0x16e [kvm]
  [a051efa0] vcpu_enter_guest+0x319/0x704 [kvm]

If enabling APIC-v, all interrupts to L1 are delivered through APIC-v.
But when L2 is running, external interrupt will casue L1 vmexit with
reason external interrupt. Then L1 will pick up the interrupt through
vmcs12. when L1 ack the interrupt, since the APIC-v is enabled when
L1 is running, so APIC-v hardware still will do vEOI updating. The problem
is that the interrupt is delivered not through APIC-v hardware, this means
SVI/RVI/vPPR are not setting, but hardware required them when doing vEOI
updating. The solution is that, when L1 tried to pick up the interrupt
from vmcs12, then hypervisor will help to update the SVI/RVI/vPPR to make
sure the following vEOI updating and vPPR updating corrently.

Also, since interrupt is delivered through vmcs12, so APIC-v hardware will
not cleare vIRR and hypervisor need to clear it before L1 running.

Suggested-by: Paolo Bonzini pbonz...@redhat.com
Suggested-by: Zhang, Yang Z yang.z.zh...@intel.com
Tested-by: Liu, RongrongX rongrongx@intel.com
Signed-off-by: Wanpeng Li wanpeng...@linux.intel.com
---
v1 - v2:
  * reusing kvm_get_apic_interrupt here (by modifying kvm_cpu_get_interrupt,
apic_set_isr and apic_clear_irr)

  arch/x86/kvm/irq.c   |  2 +-
  arch/x86/kvm/lapic.c | 52 
 +++-
  2 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index bd0da43..a1ec6a5 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -108,7 +108,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)

 vector = kvm_cpu_get_extint(v);

-if (kvm_apic_vid_enabled(v-kvm) || vector != -1)
+if (vector != -1)
 return vector;  /* PIC */

 return kvm_get_apic_interrupt(v);   /* APIC */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3855103..08e8a89 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -352,25 +352,46 @@ static inline int apic_find_highest_irr(struct 
kvm_lapic *apic)

  static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
  {
-apic-irr_pending = false;
+struct kvm_vcpu *vcpu;
+
+vcpu = apic-vcpu;
+
 apic_clear_vector(vec, apic-regs + APIC_IRR);
-if (apic_search_irr(apic) != -1)
-apic-irr_pending = true;
+if (unlikely(kvm_apic_vid_enabled(vcpu-kvm)))
+/* try to update RVI */
+kvm_make_request(KVM_REQ_EVENT, vcpu);
+else {
+vec = apic_search_irr(apic);
+apic-irr_pending = (vec != -1);
+}
  }

  static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
  {
-/* Note that we never get here with APIC virtualization enabled.  */
+struct kvm_vcpu *vcpu;
+
+if (__apic_test_and_set_vector(vec, apic-regs + APIC_ISR))
+return;
+
+vcpu = apic-vcpu;

-if (!__apic_test_and_set_vector(vec, apic-regs + APIC_ISR))
-++apic-isr_count;
-BUG_ON(apic-isr_count  MAX_APIC_VECTOR);
 /*
- * ISR (in service register) bit is set when injecting an interrupt.
- * The highest vector is injected. Thus the latest bit set matches
- * the highest bit in ISR.
+ * With APIC virtualization enabled, all caching is disabled
+ * because the processor can modify ISR under the hood.  Instead
+ * just set SVI.
  */
-apic-highest_isr_cache = vec;
+if (unlikely(kvm_apic_vid_enabled(vcpu-kvm)))
+kvm_x86_ops-hwapic_isr_update(vcpu-kvm, vec);
+else {
+++apic-isr_count;
+BUG_ON(apic-isr_count  MAX_APIC_VECTOR);
+/*
+ * ISR (in service register) bit is set when injecting an 
interrupt.
+ * The highest vector is injected. Thus the latest bit set 
matches
+ * the highest bit in ISR.
+ */
+apic-highest_isr_cache = vec;
+}
  }

  static inline int

[RFC] Synchronizing TSC of All vCPU

2014-08-05 Thread Zhanghailiang

Hi,

Each vCPU has its own TSC. A desirable property of such a system is that 
the TSCs of all of the vCPUs in a VM are exactly synchronized.
If the TSCs are exactly synchronized, when software reads the TSC of vCPU A and 
then reads the TSC of vCPU B,
the read on B is guaranteed to be a larger value than the read on A. If the 
TSCs are not exactly synchronized, 
this is not necessarily true.

Now in KVM, we can't guarantee all the vCPUs' TSC to be exactly synchronized,
Because we can't guarantee all vCPU threads to be scheduled in the same time 
after VM is starting.
The offset value of vCPUs' TSC will be worse when the pressure of host is large.

So is it necessary to sync all vCPUs' TSC when the guest start? May be we could 
set the tsc_offset to the same value,
when vCPU is created. Is it acceptable?


Thanks,
zhanghailiang

Re: KVM call for agenda for 2014-08-05

2014-08-05 Thread Juan Quintela

Juan Quintela quint...@redhat.com wrote:
 Reset, this time with the right mailing lists.

 Thanks to Markus for noticing.

 Later, Juan.

 Juan Quintela quint...@redhat.com wrote:

No agenda, no call.  Sorry for the late advice :-(

Later, Juan.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3 2/9] arm/arm64: KVM: vgic: switch to dynamic allocation

2014-08-05 Thread Christoffer Dall

On Tue, Jul 08, 2014 at 12:09:01PM +0100, Marc Zyngier wrote:
 So far, all the VGIC data structures are statically defined by the
 *maximum* number of vcpus and interrupts it supports. It means that
 we always have to oversize it to cater for the worse case.
 
 Start by changing the data structures to be dynamically sizeable,
 and allocate them at runtime.
 
 The sizes are still very static though.
 
 Signed-off-by: Marc Zyngier marc.zyng...@arm.com
 ---
  arch/arm/kvm/arm.c |   3 +
  include/kvm/arm_vgic.h |  44 ++
  virt/kvm/arm/vgic.c| 232 
 ++---
  3 files changed, 231 insertions(+), 48 deletions(-)
 
 diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
 index 3c82b37..782632e 100644
 --- a/arch/arm/kvm/arm.c
 +++ b/arch/arm/kvm/arm.c
 @@ -182,6 +182,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
   kvm-vcpus[i] = NULL;
   }
   }
 +
 + kvm_vgic_destroy(kvm);
  }
  
  int kvm_dev_ioctl_check_extension(long ext)
 @@ -290,6 +292,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
  {
   kvm_mmu_free_memory_caches(vcpu);
   kvm_timer_vcpu_terminate(vcpu);
 + kvm_vgic_vcpu_destroy(vcpu);
   kmem_cache_free(kvm_vcpu_cache, vcpu);
  }
  
 diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
 index 35b0c12..2246f4c 100644
 --- a/include/kvm/arm_vgic.h
 +++ b/include/kvm/arm_vgic.h
 @@ -54,19 +54,24 @@
   * - a bunch of shared interrupts (SPI)
   */
  struct vgic_bitmap {
 - union {
 - u32 reg[VGIC_NR_PRIVATE_IRQS / 32];
 - DECLARE_BITMAP(reg_ul, VGIC_NR_PRIVATE_IRQS);
 - } percpu[VGIC_MAX_CPUS];
 - union {
 - u32 reg[VGIC_NR_SHARED_IRQS / 32];
 - DECLARE_BITMAP(reg_ul, VGIC_NR_SHARED_IRQS);
 - } shared;
 + /*
 +  * - One UL per VCPU for private interrupts (assumes UL is at
 +  * least 32 bits)

nit: consider indenting the second line two spaces.

 +  * - As many UL as necessary for shared interrupts.
 +  */

strictly speaking we're not documenting where in the allocated array one
is supposed to find the state for VCPU0 or where in the array one would
find the bit for IRQ 32 (I assume it's vgic_bitmap-private[0] and
vgic_bitmap-shared[0]  1, respectively).  Maybe I'm being ridiculous
and this is obvious.

 + int nr_cpus;
 + unsigned long *private;
 + unsigned long *shared;
  };
  
  struct vgic_bytemap {
 - u32 percpu[VGIC_MAX_CPUS][VGIC_NR_PRIVATE_IRQS / 4];
 - u32 shared[VGIC_NR_SHARED_IRQS  / 4];
 + /*
 +  * - 8 u32 per VCPU for private interrupts
 +  * - As many u32 as necessary for shared interrupts.
 +  */
 + int nr_cpus;

why are we adding this field?  I can understand if you wanted to have
the data structure be self-contained, but then the destroy code should
rely on it.  As far as I can see, the patches only ever set this value
and never read it...

 + u32 *private;
 + u32 *shared;
  };
  
  struct kvm_vcpu;
 @@ -127,6 +132,9 @@ struct vgic_dist {
   boolin_kernel;
   boolready;
  
 + int nr_cpus;
 + int nr_irqs;
 +
   /* Virtual control interface mapping */
   void __iomem*vctrl_base;
  
 @@ -152,15 +160,15 @@ struct vgic_dist {
   /* Level/edge triggered */
   struct vgic_bitmap  irq_cfg;
  
 - /* Source CPU per SGI and target CPU */
 - u8  irq_sgi_sources[VGIC_MAX_CPUS][VGIC_NR_SGIS];
 + /* Source CPU per SGI and target CPU : 16 bytes per CPU */
 + u8  *irq_sgi_sources;

the data layout definition is not complete, I think you're grouping this
per CPU, not the other way around.  Can you be slightly more specific in
the comment?

(think I asked about this in my original review way back too).

  
   /* Target CPU for each IRQ */
 - u8  irq_spi_cpu[VGIC_NR_SHARED_IRQS];
 - struct vgic_bitmap  irq_spi_target[VGIC_MAX_CPUS];
 + u8  *irq_spi_cpu;
 + struct vgic_bitmap  *irq_spi_target;

As I commented on before, it gets really hard to know what these are for
when the array sizes are gone, can we clarify that each byte in
irq_spi_cpu gives you the (mask or number?) of the target CPU, indexed
by irq_num-32.

could we add something like the above before the irq_spi_target:

/* Reverse lookup of irq_spi_cpu for faster compute pending */ ?

I know we have accessors, but if we find bugs in any of that it's really
helpful to know what the intention behind this was.

  
   /* Bitmap indicating which CPU has something pending */
 - unsigned long   irq_pending_on_cpu;
 + unsigned long   *irq_pending_on_cpu;
  #endif
  };
  
 @@ -190,11 +198,11 @@ struct vgic_v3_cpu_if {
  struct vgic_cpu {
  #ifdef CONFIG_KVM_ARM_VGIC
   /* per IRQ to LR mapping */
 - u8

Re: [PATCH v3 3/9] arm/arm64: KVM: vgic: Parametrize VGIC_NR_SHARED_IRQS

2014-08-05 Thread Christoffer Dall

On Tue, Jul 08, 2014 at 12:09:02PM +0100, Marc Zyngier wrote:
 Having a dynamic number of supported interrupts means that we
 cannot relly on VGIC_NR_SHARED_IRQS being fixed anymore.
 
 Instead, make it take the distributor structure as a parameter,
 so it can return the right value.
 
 Signed-off-by: Marc Zyngier marc.zyng...@arm.com
 ---
  include/kvm/arm_vgic.h |  1 -
  virt/kvm/arm/vgic.c| 16 +++-
  2 files changed, 11 insertions(+), 6 deletions(-)
 
 diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
 index 2246f4c..b8a6337 100644
 --- a/include/kvm/arm_vgic.h
 +++ b/include/kvm/arm_vgic.h
 @@ -29,7 +29,6 @@
  #define VGIC_NR_SGIS 16
  #define VGIC_NR_PPIS 16
  #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS)
 -#define VGIC_NR_SHARED_IRQS  (VGIC_NR_IRQS - VGIC_NR_PRIVATE_IRQS)
  #define VGIC_MAX_CPUSKVM_MAX_VCPUS
  
  #define VGIC_V2_MAX_LRS  (1  6)
 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
 index 754d1cd..6f7cf85 100644
 --- a/virt/kvm/arm/vgic.c
 +++ b/virt/kvm/arm/vgic.c
 @@ -995,11 +995,17 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, 
 u32 reg)
   }
  }
  
 +static int vgic_nr_shared_irqs(struct vgic_dist *dist)
 +{
 + return dist-nr_irqs - VGIC_NR_PRIVATE_IRQS;
 +}
 +
  static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
  {
   struct vgic_dist *dist = vcpu-kvm-arch.vgic;
   unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
   unsigned long pending_private, pending_shared;
 + int shared = vgic_nr_shared_irqs(dist);

nit: prefer nr_shared

   int vcpu_id;
  
   vcpu_id = vcpu-vcpu_id;
 @@ -1012,15 +1018,15 @@ static int compute_pending_for_cpu(struct kvm_vcpu 
 *vcpu)
  
   pending = vgic_bitmap_get_shared_map(dist-irq_state);
   enabled = vgic_bitmap_get_shared_map(dist-irq_enabled);
 - bitmap_and(pend_shared, pending, enabled, VGIC_NR_SHARED_IRQS);
 + bitmap_and(pend_shared, pending, enabled, shared);
   bitmap_and(pend_shared, pend_shared,
  vgic_bitmap_get_shared_map(dist-irq_spi_target[vcpu_id]),
 -VGIC_NR_SHARED_IRQS);
 +shared);
  
   pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
 - pending_shared = find_first_bit(pend_shared, VGIC_NR_SHARED_IRQS);
 + pending_shared = find_first_bit(pend_shared, shared);
   return (pending_private  VGIC_NR_PRIVATE_IRQS ||
 - pending_shared  VGIC_NR_SHARED_IRQS);
 + pending_shared  vgic_nr_shared_irqs(dist));
  }
  
  /*
 @@ -1277,7 +1283,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu 
 *vcpu)
   }
  
   /* SPIs */
 - for_each_set_bit(i, vgic_cpu-pending_shared, VGIC_NR_SHARED_IRQS) {
 + for_each_set_bit(i, vgic_cpu-pending_shared, 
 vgic_nr_shared_irqs(dist)) {
   if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
   overflow = 1;
   }
 -- 
 2.0.0
 

There are a number of places in patch 2 where you do nr_irqs -
VGIC_NR_PRIVATE_IRQS which you could change to use this as well now.

Otherwise:

Reviewed-by: Christoffer Dall christoffer.d...@linaro.org
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3 6/9] arm/arm64: KVM: vgic: kill VGIC_NR_IRQS

2014-08-05 Thread Christoffer Dall

On Tue, Jul 08, 2014 at 12:09:05PM +0100, Marc Zyngier wrote:
 Nuke VGIC_NR_IRQS entierly, now that the distributor instance
 contains the number of IRQ allocated to this GIC.
 
 Also add VGIC_NR_IRQS_LEGACY to preserve the current API.
 
 Signed-off-by: Marc Zyngier marc.zyng...@arm.com
 ---
  include/kvm/arm_vgic.h |  6 +++---
  virt/kvm/arm/vgic.c| 17 +++--
  2 files changed, 14 insertions(+), 9 deletions(-)
 
 diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
 index 98ab604..9feb7fe 100644
 --- a/include/kvm/arm_vgic.h
 +++ b/include/kvm/arm_vgic.h
 @@ -25,7 +25,7 @@
  #include linux/spinlock.h
  #include linux/types.h
  
 -#define VGIC_NR_IRQS 256
 +#define VGIC_NR_IRQS_LEGACY  256
  #define VGIC_NR_SGIS 16
  #define VGIC_NR_PPIS 16
  #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS)
 @@ -39,11 +39,11 @@
  #error   Invalid number of CPU interfaces
  #endif
  
 -#if (VGIC_NR_IRQS  31)
 +#if (VGIC_NR_IRQS_LEGACY  31)
  #error VGIC_NR_IRQS must be a multiple of 32
  #endif
  
 -#if (VGIC_NR_IRQS  VGIC_MAX_IRQS)
 +#if (VGIC_NR_IRQS_LEGACY  VGIC_MAX_IRQS)
  #error VGIC_NR_IRQS must be = 1024
  #endif
  
 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
 index b2ef7ff..47a14a1 100644
 --- a/virt/kvm/arm/vgic.c
 +++ b/virt/kvm/arm/vgic.c
 @@ -395,7 +395,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
  
   case 4: /* GICD_TYPER */
   reg  = (atomic_read(vcpu-kvm-online_vcpus) - 1)  5;
 - reg |= (VGIC_NR_IRQS  5) - 1;
 + reg |= (vcpu-kvm-arch.vgic.nr_irqs  5) - 1;
   vgic_reg_access(mmio, reg, word_offset,
   ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
   break;
 @@ -1186,13 +1186,14 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu 
 *vcpu)
  static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
  {
   struct vgic_cpu *vgic_cpu = vcpu-arch.vgic_cpu;
 + struct vgic_dist *dist = vcpu-kvm-arch.vgic;
   struct vgic_lr vlr;
   int lr;
  
   /* Sanitize the input... */
   BUG_ON(sgi_source_id  ~7);
   BUG_ON(sgi_source_id  irq = VGIC_NR_SGIS);
 - BUG_ON(irq = VGIC_NR_IRQS);
 + BUG_ON(irq = dist-nr_irqs);
  
   kvm_debug(Queue IRQ%d\n, irq);
  
 @@ -1409,7 +1410,7 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu 
 *vcpu)
  
   vlr = vgic_get_lr(vcpu, lr);
  
 - BUG_ON(vlr.irq = VGIC_NR_IRQS);
 + BUG_ON(vlr.irq = dist-nr_irqs);
   vgic_cpu-vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
   }
  
 @@ -1620,7 +1621,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
   if (vcpu-vcpu_id = dist-nr_cpus)
   return -EBUSY;
  
 - for (i = 0; i  VGIC_NR_IRQS; i++) {
 + for (i = 0; i  dist-nr_irqs; i++) {
   if (i  VGIC_NR_PPIS)
   vgic_bitmap_set_irq_val(dist-irq_enabled,
   vcpu-vcpu_id, i, 1);
 @@ -1759,7 +1760,11 @@ static int vgic_init_maps(struct kvm *kvm)
   int ret, i;
  
   nr_cpus = dist-nr_cpus = KVM_MAX_VCPUS;
 - nr_irqs = dist-nr_irqs = VGIC_NR_IRQS;
 +
 + if (!dist-nr_irqs)
 + dist-nr_irqs = VGIC_NR_IRQS_LEGACY;
 +
 + nr_irqs = dist-nr_irqs;
  
   ret  = vgic_init_bitmap(dist-irq_enabled, nr_cpus, nr_irqs);
   ret |= vgic_init_bitmap(dist-irq_state, nr_cpus, nr_irqs);
 @@ -1841,7 +1846,7 @@ int kvm_vgic_init(struct kvm *kvm)
   goto out;
   }
  
 - for (i = VGIC_NR_PRIVATE_IRQS; i  VGIC_NR_IRQS; i += 4)
 + for (i = VGIC_NR_PRIVATE_IRQS; i  kvm-arch.vgic.nr_irqs; i += 4)
   vgic_set_target_reg(kvm, 0, i);
  
   kvm-arch.vgic.ready = true;
 -- 
 2.0.0
 

Reviewed-by: Christoffer Dall christoffer.d...@linaro.org
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3 5/9] arm/arm64: KVM: vgic: handle out-of-range MMIO accesses

2014-08-05 Thread Christoffer Dall

On Tue, Jul 08, 2014 at 12:09:04PM +0100, Marc Zyngier wrote:
 Now that we can (almost) dynamically size the number of interrupts,
 we're facing an interesting issue:
 
 We have to evaluate at runtime whether or not an access hits a valid
 register, based on the sizing of this particular instance of the
 distributor. Furthermore, the GIC spec says that accessing a reserved
 register is RAZ/WI.
 
 For this, add a new field to our range structure, indicating the number
 of bits a single interrupts uses. That allows us to find out whether or
 not the access is in range.
 
 Signed-off-by: Marc Zyngier marc.zyng...@arm.com
 ---
  include/kvm/arm_vgic.h |  3 ++-
  virt/kvm/arm/vgic.c| 56 
 --
  2 files changed, 47 insertions(+), 12 deletions(-)
 
 diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
 index 99ad8af..98ab604 100644
 --- a/include/kvm/arm_vgic.h
 +++ b/include/kvm/arm_vgic.h
 @@ -32,6 +32,7 @@
  
  #define VGIC_V2_MAX_LRS  (1  6)
  #define VGIC_V3_MAX_LRS  16
 +#define VGIC_MAX_IRQS1024
  
  /* Sanity checks... */
  #if (KVM_MAX_VCPUS  8)
 @@ -42,7 +43,7 @@
  #error VGIC_NR_IRQS must be a multiple of 32
  #endif
  
 -#if (VGIC_NR_IRQS  1024)
 +#if (VGIC_NR_IRQS  VGIC_MAX_IRQS)
  #error VGIC_NR_IRQS must be = 1024
  #endif
  
 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
 index 3cb667c..b2ef7ff 100644
 --- a/virt/kvm/arm/vgic.c
 +++ b/virt/kvm/arm/vgic.c
 @@ -804,6 +804,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
  struct mmio_range {
   phys_addr_t base;
   unsigned long len;
 + int bits_per_irq;
   bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
   phys_addr_t offset);
  };
 @@ -812,56 +813,67 @@ static const struct mmio_range vgic_dist_ranges[] = {
   {
   .base   = GIC_DIST_CTRL,
   .len= 12,
 + .bits_per_irq   = 0,
   .handle_mmio= handle_mmio_misc,
   },
   {
   .base   = GIC_DIST_IGROUP,
 - .len= VGIC_NR_IRQS / 8,
 + .len= VGIC_MAX_IRQS / 8,
 + .bits_per_irq   = 1,
   .handle_mmio= handle_mmio_raz_wi,
   },
   {
   .base   = GIC_DIST_ENABLE_SET,
 - .len= VGIC_NR_IRQS / 8,
 + .len= VGIC_MAX_IRQS / 8,
 + .bits_per_irq   = 1,
   .handle_mmio= handle_mmio_set_enable_reg,
   },
   {
   .base   = GIC_DIST_ENABLE_CLEAR,
 - .len= VGIC_NR_IRQS / 8,
 + .len= VGIC_MAX_IRQS / 8,
 + .bits_per_irq   = 1,
   .handle_mmio= handle_mmio_clear_enable_reg,
   },
   {
   .base   = GIC_DIST_PENDING_SET,
 - .len= VGIC_NR_IRQS / 8,
 + .len= VGIC_MAX_IRQS / 8,
 + .bits_per_irq   = 1,
   .handle_mmio= handle_mmio_set_pending_reg,
   },
   {
   .base   = GIC_DIST_PENDING_CLEAR,
 - .len= VGIC_NR_IRQS / 8,
 + .len= VGIC_MAX_IRQS / 8,
 + .bits_per_irq   = 1,
   .handle_mmio= handle_mmio_clear_pending_reg,
   },
   {
   .base   = GIC_DIST_ACTIVE_SET,
 - .len= VGIC_NR_IRQS / 8,
 + .len= VGIC_MAX_IRQS / 8,
 + .bits_per_irq   = 1,
   .handle_mmio= handle_mmio_raz_wi,
   },
   {
   .base   = GIC_DIST_ACTIVE_CLEAR,
 - .len= VGIC_NR_IRQS / 8,
 + .len= VGIC_MAX_IRQS / 8,
 + .bits_per_irq   = 1,
   .handle_mmio= handle_mmio_raz_wi,
   },
   {
   .base   = GIC_DIST_PRI,
 - .len= VGIC_NR_IRQS,
 + .len= VGIC_MAX_IRQS,
 + .bits_per_irq   = 8,
   .handle_mmio= handle_mmio_priority_reg,
   },
   {
   .base   = GIC_DIST_TARGET,
 - .len= VGIC_NR_IRQS,
 + .len= VGIC_MAX_IRQS,
 + .bits_per_irq   = 8,
   .handle_mmio= handle_mmio_target_reg,
   },
   {
   .base   = GIC_DIST_CONFIG,
 - .len= VGIC_NR_IRQS / 4,
 + .len= VGIC_MAX_IRQS / 4,
 + .bits_per_irq   = 2,
   .handle_mmio= handle_mmio_cfg_reg,
   },
   {
 @@ -899,6 +911,22 @@ struct mmio_range *find_matching_range(const struct 
 mmio_range *ranges,
   return NULL;
  }
  
 +static bool vgic_validate_access(const struct vgic_dist *dist,
 +  const struct mmio_range

Re: KVM on ARM64

2014-08-05 Thread Joel Schopp


On 08/04/2014 07:35 PM, Mathew Li wrote:
 Hi,

 I have a quick question. How do we add a hard disk to the qemu ARM VM?

 I tried:

 qemu-system-aarch64 -machine virt -hda disk.img -kernel image -initrd 
 initrd.img

 qemu-system-aarch64 -machine virt -sd disk.img -kernel image -initrd 
 initrd.img

 qemu-system-aarch64 -machine virt -mtdblock disk.img -kernel image
 -initrd initrd.img

 Nothing seems to work. I am not able to see any disk (i.e. dev/sdX)
 inside guest OS.
I've been running something like this:

qemu-system-aarch64 -smp 1 --enable-kvm -nographic -netdev 
tap,id=t0,ifname=tap0,script=no,downscript=no,vhost=on -device 
virtio-net-device,netdev=t0,id=nic0 \
-kernel /extra/rootfs/boot/Image -drive file=/extra/rootfs.img,id=fs -device 
virtio-blk-device,drive=fs -m 512 -M virt -cpu host -append console=ttyAMA0 
console=ttyS0 root=/dev/vda


On my system -smp 2 or higher hangs in the guest kernel.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3 4/9] arm/arm64: KVM: vgic: kill VGIC_MAX_CPUS

2014-08-05 Thread Christoffer Dall

On Tue, Jul 08, 2014 at 12:09:03PM +0100, Marc Zyngier wrote:
 We now have the information about the number of CPU interfaces in
 the distributor itself. Let's get rid of VGIC_MAX_CPUS, and just
 rely on KVM_MAX_VCPUS where we don't have the choice. Yet.
 
 Signed-off-by: Marc Zyngier marc.zyng...@arm.com
 ---
  include/kvm/arm_vgic.h | 3 +--
  virt/kvm/arm/vgic.c| 6 +++---
  2 files changed, 4 insertions(+), 5 deletions(-)
 
 diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
 index b8a6337..99ad8af 100644
 --- a/include/kvm/arm_vgic.h
 +++ b/include/kvm/arm_vgic.h
 @@ -29,13 +29,12 @@
  #define VGIC_NR_SGIS 16
  #define VGIC_NR_PPIS 16
  #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS)
 -#define VGIC_MAX_CPUSKVM_MAX_VCPUS
  
  #define VGIC_V2_MAX_LRS  (1  6)
  #define VGIC_V3_MAX_LRS  16
  
  /* Sanity checks... */
 -#if (VGIC_MAX_CPUS  8)
 +#if (KVM_MAX_VCPUS  8)
  #error   Invalid number of CPU interfaces
  #endif
  
 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
 index 6f7cf85..3cb667c 100644
 --- a/virt/kvm/arm/vgic.c
 +++ b/virt/kvm/arm/vgic.c
 @@ -1206,7 +1206,7 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int 
 irq)
  
   sources = *vgic_get_sgi_sources(dist, vcpu_id, irq);
  
 - for_each_set_bit(c, sources, VGIC_MAX_CPUS) {
 + for_each_set_bit(c, sources, dist-nr_cpus) {
   if (vgic_queue_irq(vcpu, c, irq))
   clear_bit(c, sources);
   }
 @@ -1583,7 +1583,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
   struct vgic_dist *dist = vcpu-kvm-arch.vgic;
   int i;
  
 - if (vcpu-vcpu_id = VGIC_MAX_CPUS)
 + if (vcpu-vcpu_id = dist-nr_cpus)
   return -EBUSY;
  
   for (i = 0; i  VGIC_NR_IRQS; i++) {
 @@ -1724,7 +1724,7 @@ static int vgic_init_maps(struct kvm *kvm)
   int nr_cpus, nr_irqs;
   int ret, i;
  
 - nr_cpus = dist-nr_cpus = VGIC_MAX_CPUS;
 + nr_cpus = dist-nr_cpus = KVM_MAX_VCPUS;
   nr_irqs = dist-nr_irqs = VGIC_NR_IRQS;
  
   ret  = vgic_init_bitmap(dist-irq_enabled, nr_cpus, nr_irqs);
 -- 
 2.0.0
 

Reviewed-by: Christoffer Dall christoffer.d...@linaro.org
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH/RFC] KVM: track pid for VCPU only on KVM_RUN ioctl

2014-08-05 Thread Christian Borntraeger

We currently track the pid of the task that runs the VCPU in
vcpu_load. Since we call vcpu_load for all kind of ioctls on a
CPU, this causes hickups due to synchronize_rcu if one CPU is
modified by another CPU or the main thread (e.g. initialization,
reset). We track the pid only for the purpose of yielding, so
let's update the pid only in the KVM_RUN ioctl.

In addition, don't do a synchronize_rcu on startup (pid == 0).

This speeds up guest boot time on s390 noticably for some configs, e.g.
HZ=100, no full state tracking, 64 guest cpus 32 host cpus.

Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
CC: Rik van Riel r...@redhat.com
CC: Raghavendra K T raghavendra...@linux.vnet.ibm.com
CC: Michael Mueller m...@linux.vnet.ibm.com
---
 virt/kvm/kvm_main.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9ae9135..ebc8f54 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -124,14 +124,6 @@ int vcpu_load(struct kvm_vcpu *vcpu)
 
if (mutex_lock_killable(vcpu-mutex))
return -EINTR;
-   if (unlikely(vcpu-pid != current-pids[PIDTYPE_PID].pid)) {
-   /* The thread running this VCPU changed. */
-   struct pid *oldpid = vcpu-pid;
-   struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
-   rcu_assign_pointer(vcpu-pid, newpid);
-   synchronize_rcu();
-   put_pid(oldpid);
-   }
cpu = get_cpu();
preempt_notifier_register(vcpu-preempt_notifier);
kvm_arch_vcpu_load(vcpu, cpu);
@@ -1991,6 +1983,15 @@ static long kvm_vcpu_ioctl(struct file *filp,
r = -EINVAL;
if (arg)
goto out;
+   if (unlikely(vcpu-pid != current-pids[PIDTYPE_PID].pid)) {
+   /* The thread running this VCPU changed. */
+   struct pid *oldpid = vcpu-pid;
+   struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
+   rcu_assign_pointer(vcpu-pid, newpid);
+   if (oldpid)
+   synchronize_rcu();
+   put_pid(oldpid);
+   }
r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu-run);
trace_kvm_userspace_exit(vcpu-run-exit_reason, r);
break;
-- 
1.8.4.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: kvm-unit-tests failures

2014-08-05 Thread Eduardo Habkost

On Tue, Aug 05, 2014 at 12:26:26PM +0200, Paolo Bonzini wrote:
 Il 04/08/2014 19:02, Chris J Arges ha scritto:
  All tests below are done with the kvm tip (fresh as of today) merged
  with 3.16-rc4.
 
 Sorry, I had missed the list of failures.
 
  * Failures:
  - pmu
  As suggested in comment 2 on this bug:
  https://bugzilla.redhat.com/show_bug.cgi?id=1079796
  Adding the -cpu host allows this test to PASS.
 
 Correct, I thought unittests.cfg already did that.
 
  - tsc_adjust
  Here again adding -cpu host allows this to pass.
 
 Looks like QEMU doesn't know the tsc_adjust flag at all.  CCing Eduardo.

QEMU doesn't know the flag name yet (although we have migration support for the
MSR since 2012).

Note that with the new migratable=yes default on QEMU 2.1, an unknown feature
won't be enabled even when using -cpu host, and will require -cpu
host,migratable=no.

I see a bug on the unit test code: it is calling cpuid(7) without checking if
(cpuid(0).a = 7) first, so it is checking meaningless CPUID bits when
cpuid_level  7.  This causes the following problem:

  [kvm-unit-tests/master=]$ ./x86-run x86/tsc_adjust.flat -smp 2 -cpu 
qemu64,level=7
  qemu-kvm -enable-kvm -device pc-testdev -device 
isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device 
pci-testdev -kernel x86/tsc_adjust.flat -smp 2 -cpu qemu64,level=7
  enabling apic
  enabling apic
  success: IA32_TSC_ADJUST feature not enabled
  Return value from qemu: 1
  [kvm-unit-tests/master=]$ ./x86-run x86/tsc_adjust.flat -smp 2 -cpu 
qemu64,level=4
  qemu-kvm -enable-kvm -device pc-testdev -device 
isa-debug-exit,iobase=0xf4,iosize=0x4 -display none -serial stdio -device 
pci-testdev -kernel x86/tsc_adjust.flat -smp 2 -cpu qemu64,level=4
  enabling apic
  enabling apic
  failure: IA32_TSC_ADJUST msr read / write incorrect
  failure: TSC did not adjust for IA32_TSC_ADJUST value
  failure: IA32_TSC_ADJUST msr incorrectly adjusted on tsc write
  Return value from qemu: 3

-- 
Eduardo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3 8/9] arm/arm64: KVM: vgic: make number of irqs a configurable attribute

2014-08-05 Thread Christoffer Dall

On Tue, Jul 08, 2014 at 12:09:07PM +0100, Marc Zyngier wrote:
 In order to make the number of interrupt configurable, use the new
 interrupts
 fancy device management API to add KVM_DEV_ARM_VGIC_GRP_NR_IRQS as
 a VGIC configurable attribute.
 
 Userspace can now specify the exact size of the GIC (by increments
 of 32 interrupts).
 
 Signed-off-by: Marc Zyngier marc.zyng...@arm.com
 ---
  arch/arm/include/uapi/asm/kvm.h   |  1 +
  arch/arm64/include/uapi/asm/kvm.h |  1 +
  virt/kvm/arm/vgic.c   | 29 +
  3 files changed, 31 insertions(+)
 
 diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
 index e6ebdd3..8b51c1a 100644
 --- a/arch/arm/include/uapi/asm/kvm.h
 +++ b/arch/arm/include/uapi/asm/kvm.h
 @@ -173,6 +173,7 @@ struct kvm_arch_memory_slot {
  #define   KVM_DEV_ARM_VGIC_CPUID_MASK(0xffULL  
 KVM_DEV_ARM_VGIC_CPUID_SHIFT)
  #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT  0
  #define   KVM_DEV_ARM_VGIC_OFFSET_MASK   (0xULL  
 KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
 +#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
  
  /* KVM_IRQ_LINE irq field index values */
  #define KVM_ARM_IRQ_TYPE_SHIFT   24
 diff --git a/arch/arm64/include/uapi/asm/kvm.h 
 b/arch/arm64/include/uapi/asm/kvm.h
 index e633ff8..b5cd6ed 100644
 --- a/arch/arm64/include/uapi/asm/kvm.h
 +++ b/arch/arm64/include/uapi/asm/kvm.h
 @@ -159,6 +159,7 @@ struct kvm_arch_memory_slot {
  #define   KVM_DEV_ARM_VGIC_CPUID_MASK(0xffULL  
 KVM_DEV_ARM_VGIC_CPUID_SHIFT)
  #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT  0
  #define   KVM_DEV_ARM_VGIC_OFFSET_MASK   (0xULL  
 KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
 +#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
  
  /* KVM_IRQ_LINE irq field index values */
  #define KVM_ARM_IRQ_TYPE_SHIFT   24
 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
 index 708aed9..aee10da 100644
 --- a/virt/kvm/arm/vgic.c
 +++ b/virt/kvm/arm/vgic.c
 @@ -2208,6 +2208,28 @@ static int vgic_set_attr(struct kvm_device *dev, 
 struct kvm_device_attr *attr)
  
   return vgic_attr_regs_access(dev, attr, reg, true);
   }
 + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
 + u32 __user *uaddr = (u32 __user *)(long)attr-addr;
 + u32 val;
 + int ret = 0;
 +
 + if (get_user(val, uaddr))
 + return -EFAULT;
 +
 + if (val  1024 || (val  31))
 + return -EINVAL;

 VGIC_MAX_IRQS ?

 +
 + mutex_lock(dev-kvm-lock);
 +
 + if (vgic_initialized(dev-kvm) || dev-kvm-arch.vgic.nr_irqs)
 + ret = -EBUSY;
 + else
 + dev-kvm-arch.vgic.nr_irqs = val;
 +
 + mutex_unlock(dev-kvm-lock);
 +
 + return ret;
 + }
  
   }
  
 @@ -2244,6 +2266,11 @@ static int vgic_get_attr(struct kvm_device *dev, 
 struct kvm_device_attr *attr)
   r = put_user(reg, uaddr);
   break;
   }
 + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
 + u32 __user *uaddr = (u32 __user *)(long)attr-addr;
 + r = put_user(dev-kvm-arch.vgic.nr_irqs, uaddr);
 + break;
 + }
  
   }
  
 @@ -2280,6 +2307,8 @@ static int vgic_has_attr(struct kvm_device *dev, struct 
 kvm_device_attr *attr)
   case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
   offset = attr-attr  KVM_DEV_ARM_VGIC_OFFSET_MASK;
   return vgic_has_attr_regs(vgic_cpu_ranges, offset);
 + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
 + return 0;
   }
   return -ENXIO;
  }
 -- 
 2.0.0
 

Please add Documentation of this ABI extension to:
Documentation/virtual/kvm/devices/arm-vgic.txt
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v3 7/9] arm/arm64: KVM: vgic: delay vgic allocation until init time

2014-08-05 Thread Christoffer Dall

On Tue, Jul 08, 2014 at 12:09:06PM +0100, Marc Zyngier wrote:
 It is now quite easy to delay the allocation of the vgic tables
 until we actually require it to be up and running (when the first

the first starting ?

 starting to kick around).
 
 This allow us to allocate memory for the exact number of CPUs we
 have. As nobody configures the number of interrupts just yet,
 use a fallback to VGIC_NR_IRQS_LEGACY.
 
 Signed-off-by: Marc Zyngier marc.zyng...@arm.com
 ---
  arch/arm/kvm/arm.c |  7 ---
  include/kvm/arm_vgic.h |  1 -
  virt/kvm/arm/vgic.c| 42 +-
  3 files changed, 29 insertions(+), 21 deletions(-)
 
 diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
 index 782632e..9b3957d 100644
 --- a/arch/arm/kvm/arm.c
 +++ b/arch/arm/kvm/arm.c
 @@ -308,16 +308,9 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
  
  int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
  {
 - int ret;
 -
   /* Force users to call KVM_ARM_VCPU_INIT */
   vcpu-arch.target = -1;
  
 - /* Set up VGIC */
 - ret = kvm_vgic_vcpu_init(vcpu);
 - if (ret)
 - return ret;
 -
   /* Set up the timer */
   kvm_timer_vcpu_init(vcpu);
  
 diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
 index 9feb7fe..311a0f0 100644
 --- a/include/kvm/arm_vgic.h
 +++ b/include/kvm/arm_vgic.h
 @@ -233,7 +233,6 @@ int kvm_vgic_hyp_init(void);
  int kvm_vgic_init(struct kvm *kvm);
  int kvm_vgic_create(struct kvm *kvm);
  void kvm_vgic_destroy(struct kvm *kvm);
 -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
  void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
  void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
  void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
 index 47a14a1..708aed9 100644
 --- a/virt/kvm/arm/vgic.c
 +++ b/virt/kvm/arm/vgic.c
 @@ -1612,15 +1612,12 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, 
 int nr_irqs)
   * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
   * this vcpu and enable the VGIC for this VCPU
   */
 -int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 +static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
  {
   struct vgic_cpu *vgic_cpu = vcpu-arch.vgic_cpu;
   struct vgic_dist *dist = vcpu-kvm-arch.vgic;
   int i;
  
 - if (vcpu-vcpu_id = dist-nr_cpus)
 - return -EBUSY;
 -
   for (i = 0; i  dist-nr_irqs; i++) {
   if (i  VGIC_NR_PPIS)
   vgic_bitmap_set_irq_val(dist-irq_enabled,
 @@ -1640,8 +1637,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
   vgic_cpu-nr_lr = vgic-nr_lr;
  
   vgic_enable(vcpu);
 -
 - return 0;
  }
  
  static void vgic_init_maintenance_interrupt(void *info)
 @@ -1759,8 +1754,17 @@ static int vgic_init_maps(struct kvm *kvm)
   int nr_cpus, nr_irqs;
   int ret, i;
  
 - nr_cpus = dist-nr_cpus = KVM_MAX_VCPUS;
 + if (dist-nr_cpus)  /* Already allocated */
 + return 0;
 +
 + nr_cpus = dist-nr_cpus = atomic_read(kvm-online_vcpus);
 + if (!nr_cpus)   /* No vcpus? Can't be good... */
 + return -EINVAL;
  
 + /*
 +  * If nobody configured the number of interrupts, use the
 +  * legacy one.
 +  */
   if (!dist-nr_irqs)
   dist-nr_irqs = VGIC_NR_IRQS_LEGACY;
  
 @@ -1804,6 +1808,9 @@ static int vgic_init_maps(struct kvm *kvm)
   }
   }
  
 + for (i = VGIC_NR_PRIVATE_IRQS; i  dist-nr_irqs; i += 4)
 + vgic_set_target_reg(kvm, 0, i);
 +
  out:
   if (ret)
   kvm_vgic_destroy(kvm);
 @@ -1822,6 +1829,7 @@ out:
   */
  int kvm_vgic_init(struct kvm *kvm)
  {
 + struct kvm_vcpu *vcpu;
   int ret = 0, i;
  
   if (!irqchip_in_kernel(kvm))
 @@ -1839,6 +1847,12 @@ int kvm_vgic_init(struct kvm *kvm)
   goto out;
   }
  
 + ret = vgic_init_maps(kvm);
 + if (ret) {
 + kvm_err(Unable to allocate maps\n);
 + goto out;
 + }
 +
   ret = kvm_phys_addr_ioremap(kvm, kvm-arch.vgic.vgic_cpu_base,
   vgic-vcpu_base, KVM_VGIC_V2_CPU_SIZE);
   if (ret) {
 @@ -1846,11 +1860,13 @@ int kvm_vgic_init(struct kvm *kvm)
   goto out;
   }
  
 - for (i = VGIC_NR_PRIVATE_IRQS; i  kvm-arch.vgic.nr_irqs; i += 4)
 - vgic_set_target_reg(kvm, 0, i);
 + kvm_for_each_vcpu(i, vcpu, kvm)
 + kvm_vgic_vcpu_init(vcpu);
  
   kvm-arch.vgic.ready = true;
  out:
 + if (ret)
 + kvm_vgic_destroy(kvm);
   mutex_unlock(kvm-lock);
   return ret;
  }
 @@ -1891,10 +1907,6 @@ int kvm_vgic_create(struct kvm *kvm)
   kvm-arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
   kvm-arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
  
 - ret = vgic_init_maps(kvm);
 - if (ret)
 - kvm_err(Unable to allocate maps\n);
 -
  out_unlock:
   for (; vcpu_lock_idx = 0;

Re: [PATCH v3 9/9] arm64: KVM: vgic: deal with GIC sub-page alignment

2014-08-05 Thread Christoffer Dall

On Tue, Jul 08, 2014 at 12:09:08PM +0100, Marc Zyngier wrote:
 The GIC CPU interface is always 4k aligned. If the host is using
 64k pages, it is critical to place the guest's GICC interface at the
 same relative alignment as the host's GICV. Failure to do so results
 in an impossibility for the guest to deal with interrupts.
 
 Add a KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET attribute for the VGIC, allowing
 userspace to retrieve the GICV offset in a page. It becomes then trivial
 to adjust the GICC base address for the guest.
 
 Signed-off-by: Marc Zyngier marc.zyng...@arm.com
 ---
  arch/arm/include/uapi/asm/kvm.h   | 1 +
  arch/arm64/include/uapi/asm/kvm.h | 1 +
  virt/kvm/arm/vgic.c   | 7 +++
  3 files changed, 9 insertions(+)
 
 diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
 index 8b51c1a..056b782 100644
 --- a/arch/arm/include/uapi/asm/kvm.h
 +++ b/arch/arm/include/uapi/asm/kvm.h
 @@ -174,6 +174,7 @@ struct kvm_arch_memory_slot {
  #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT  0
  #define   KVM_DEV_ARM_VGIC_OFFSET_MASK   (0xULL  
 KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
  #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
 +#define KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET 4
  
  /* KVM_IRQ_LINE irq field index values */
  #define KVM_ARM_IRQ_TYPE_SHIFT   24
 diff --git a/arch/arm64/include/uapi/asm/kvm.h 
 b/arch/arm64/include/uapi/asm/kvm.h
 index b5cd6ed..5513de4 100644
 --- a/arch/arm64/include/uapi/asm/kvm.h
 +++ b/arch/arm64/include/uapi/asm/kvm.h
 @@ -160,6 +160,7 @@ struct kvm_arch_memory_slot {
  #define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT  0
  #define   KVM_DEV_ARM_VGIC_OFFSET_MASK   (0xULL  
 KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
  #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
 +#define KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET 4
  
  /* KVM_IRQ_LINE irq field index values */
  #define KVM_ARM_IRQ_TYPE_SHIFT   24
 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
 index aee10da..1e60981 100644
 --- a/virt/kvm/arm/vgic.c
 +++ b/virt/kvm/arm/vgic.c
 @@ -2271,6 +2271,12 @@ static int vgic_get_attr(struct kvm_device *dev, 
 struct kvm_device_attr *attr)
   r = put_user(dev-kvm-arch.vgic.nr_irqs, uaddr);
   break;
   }
 + case KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET: {
 + u32 __user *uaddr = (u32 __user *)(long)attr-addr;
 + u32 val = vgic-vcpu_base  ~PAGE_MASK;
 + r = put_user(val, uaddr);
 + break;
 + }
  
   }
  
 @@ -2308,6 +2314,7 @@ static int vgic_has_attr(struct kvm_device *dev, struct 
 kvm_device_attr *attr)
   offset = attr-attr  KVM_DEV_ARM_VGIC_OFFSET_MASK;
   return vgic_has_attr_regs(vgic_cpu_ranges, offset);
   case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
 + case KVM_DEV_ARM_VGIC_GRP_ADDR_OFFSET:
   return 0;
   }
   return -ENXIO;
 -- 
 2.0.0
 

Also here, add documentation to the fancy ABI:
Documentation/virtual/kvm/devices/arm-vgic.txt

When rebased onto the recent patches this will never return anything
else than 0 right?  Otherwise KVM would have failed to initialize and
bailed out.  What is our solution for this problem again?

-Christoffer
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] virtio-rng: complete have_data completion in removing device

2014-08-05 Thread Amos Kong

When we try to hot-remove a busy virtio-rng device from QEMU monitor,
the device can't be hot-removed. Because virtio-rng driver hangs at
wait_for_completion_killable().

This patch fixed the hang by completing have_data completion before
unregistering a virtio-rng device.

Signed-off-by: Amos Kong ak...@redhat.com
Cc: sta...@vger.kernel.org
---
 drivers/char/hw_random/virtio-rng.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/char/hw_random/virtio-rng.c 
b/drivers/char/hw_random/virtio-rng.c
index 0027137..416b15c 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -137,6 +137,7 @@ static void remove_common(struct virtio_device *vdev)
struct virtrng_info *vi = vdev-priv;
 
vdev-config-reset(vdev);
+   complete(vi-have_data);
vi-busy = false;
if (vi-hwrng_register_done)
hwrng_unregister(vi-hwrng);
-- 
1.9.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: When I boot two virtio-rng devices, guest will hang

2014-08-05 Thread Amos Kong

On Tue, Aug 05, 2014 at 06:28:54PM +0800, Amos Kong wrote:
 3.16 (guest hangs with two rng devices)
 3.16 + quick fix (can startup with two rng devices) (hotplug issue 1 + 
 hotplug issue 2 exist)
 lates torvalds/linux.git + amit 4 patches (can startup with two rng devices) 
 (only hotplug issue 2 exists)
 
 However, the 4 patches also fixed the hang issue, the hotplug issue was fixed 
 a little.
 The hotplug issue is effected by the backend, or maybe it's not a real issue, 
 because
 the rng device can be hot-removed after dd process is killed.
 
 
 Hotplug issue 1:
   1. boot up guest with two rng device (rng0 uses /dev/urandom, rng1 uses 
 /dev/random)
   2. read data by dd in guest
   3 (option 1). hot-remove rng0, then hot-remove rng1 - result: _only rng1_ 
 can't be removed until dd process is killed
   3 (option 2). hot-remove rng1, then hot-remove rng0 - result: two devices 
 can be removed successfully, dd process will exit automatically.
 
   If we use /dev/urandom for rng0 and rng1, _rng0  rng1_ can be removed, dd 
 process will exit automatically.
 
 Hotplug issue 2:
   If we use /dev/random for rng0 and rng1, _rng0  rng1_ can't be removed 
 until dd process is killed.
 
 Hotplug issue 3:
   If we use /dev/random for rng0 and rng1, _only rng1_ can't be removed until 
 dd process is killed.

Hi Amit,

I finally found the root problem and posted a fix to upstream:
  
http://lists.linuxfoundation.org/pipermail/virtualization/2014-August/027049.html

It can help to fix the hotplug issues on 3.16  latest kernel, so
stable kernel is CCed.

 (The difference between /dev/random and /dev/urandom is the speed.)
 
 Thanks, Amos

-- 
Amos.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: kvm-unit-tests failures

2014-08-05 Thread Chris J Arges



On 08/05/2014 05:26 AM, Paolo Bonzini wrote:
 Il 04/08/2014 19:02, Chris J Arges ha scritto:
 All tests below are done with the kvm tip (fresh as of today) merged
 with 3.16-rc4.
 
Updated information with the three new kvm-unit-tests patches you've posted.

 Sorry, I had missed the list of failures.
 
 * Failures:
 - pmu
 As suggested in comment 2 on this bug:
 https://bugzilla.redhat.com/show_bug.cgi?id=1079796
 Adding the -cpu host allows this test to PASS.
 
 Correct, I thought unittests.cfg already did that.
 

Ok with your latest patches, the test case runs, but fails on a few
other tests:
⟫ sudo ./x86-run x86/pmu.flat -smp 1 -cpu host | grep FAIL
FAIL: fixed-2
FAIL: all counters

 - tsc_adjust
 Here again adding -cpu host allows this to pass.
 
 Looks like QEMU doesn't know the tsc_adjust flag at all.  CCing Eduardo.
 
 - pcid
 Here again adding -cpu host allows this to pass.
 
 It passes here, and fails with -cpu host:
 
 PASS: CPUID consistency
 FAIL: Test on PCID when enabled
 PASS: Test on INVPCID when disabled
 
 Looks like the Test on PCID when enabled has problems, and the
 -cpu flag is a red herring.
 
This test now passes.

 - msr (HANG)
 ./x86-run x86/msr.flat -smp 1

 $ sudo ./x86-run x86/msr.flat -smp 1qemu-system-x86_64 -enable-kvm
 -device pc-testdev -device isa-debug-exit,iobase=0xf4,iosize=0x4
 -display none -serial stdio -device pci-testdev -kernel x86/msr.flat -smp 1
 enabling apic
 enabling apic
 enabling apic

 enabling apic repeats and adding -cpu host seems to have no effect.
 Interestingly enough on an Ubuntu 3.13 series kernel this PASSes.
 
 The APICBASE register is already tested by the apic testcase, and the
 simple-minded tests in msr.flat actually trigger a general protection
 fault nowadays because the quality of the emulation improved.  I'm
 dropping the APICBASE tests from msr.flat.
 
This test now passes.

 - vmx (HANG)
 Test suite: interrupt
 PASS: direct interrupt while running guest
 PASS: intercepted interrupt while running guest
 PASS: direct interrupt + hlt

 Here it just hangs.
 
 It fails here for me.  It could be related to APICv, can you try with
 the enable_apic=0 parameter to kvm-intel.ko?
 
 Paolo

With enable_apicv=0, this test case just FAILs for me.

⟫ sudo ./x86-run x86/vmx.flat -smp 1 -cpu host,+vmx | grep FAIL
FAIL: EPT violation - paging structure
FAIL: intercepted interrupt + hlt

I'll try to dig a bit more, but things are looking much better! Thanks
for the help and patches.
--chris j arges
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2] kvm: x86: fix stale mmio cache bug

2014-08-05 Thread David Matlack

On Mon, Aug 4, 2014 at 5:31 PM, Wanpeng Li wanpeng...@linux.intel.com wrote:
 Hi David,
 On Mon, Aug 04, 2014 at 02:10:20PM -0700, David Matlack wrote:
The following events can lead to an incorrect KVM_EXIT_MMIO bubbling
up to userspace:

(1) Guest accesses gpa X without a memory slot. The gfn is cached in
struct kvm_vcpu_arch (mmio_gfn). On Intel EPT-enabled hosts, KVM sets
the SPTE write-execute-noread so that future accesses cause
EPT_MISCONFIGs.

(2) Host userspace creates a memory slot via KVM_SET_USER_MEMORY_REGION
covering the page just accessed.


 One question:

 Who trigger host userspace creates a mmio memslot? It will be created
 just after first mmio #PF?

Devices such as vga can be in modes where their memory behaves
like ram and using a memslot to back the memory makes sense. In
other modes, reading and writing to vga memory has side-effects
and so mmio makes sense (delete memslot). Switching between these
modes is a guest initiated event.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 5/5 v2] KVM: PPC: BOOKE: Emulate debug registers and exception

2014-08-05 Thread Scott Wood

On Mon, 2014-08-04 at 22:41 -0500, Bhushan Bharat-R65777 wrote:

  -Original Message-
  From: Wood Scott-B07421
  Sent: Tuesday, August 05, 2014 4:23 AM
  To: Bhushan Bharat-R65777
  Cc: ag...@suse.de; kvm-...@vger.kernel.org; kvm@vger.kernel.org; Yoder 
  Stuart-
  B08248
  Subject: Re: [PATCH 5/5 v2] KVM: PPC: BOOKE: Emulate debug registers and
  exception

  On Mon, 2014-08-04 at 13:32 +0530, Bharat Bhushan wrote:
   @@ -735,7 +745,27 @@ static int kvmppc_handle_debug(struct kvm_run *run,
  struct kvm_vcpu *vcpu)
 struct debug_reg *dbg_reg = (vcpu-arch.shadow_dbg_reg);
 u32 dbsr = vcpu-arch.dbsr;

   - /* Clear guest dbsr (vcpu-arch.dbsr).
   + if (vcpu-guest_debug == 0) {
   + /*
   +  * Debug resources belong to Guest.
   +  * Imprecise debug event are not injected
   +  */
   + if (dbsr  DBSR_IDE)
   + return RESUME_GUEST;

  This is incorrect.  DBSR_IDE shouldn't *cause* an injection, but it 
  shouldn't
  inhibit it either.

 Will this work ?
   If ((dbsr  DBSR_IDE)  !(dbsr  ~DBSR_IDE))
   Return RESUME_GUEST; 

I suppose it could, but it would be cleaner to just change dbsr to
(dbsr  ~DBSR_IDE) in the next if-statement (maybe factoring out each
 term of that if-statement to variables to make it more readable).

   @@ -828,6 +858,8 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu
  *vcpu,
 case BOOKE_INTERRUPT_DEBUG:
 /* Save DBSR before preemption is enabled */
 vcpu-arch.dbsr = mfspr(SPRN_DBSR);
   + /* MASK out DBSR_MRR */
   + vcpu-arch.dbsr = ~DBSR_MRR;
 kvmppc_clear_dbsr();
 break;
 }

  DBSR[MRR] can only be set once per host system reset.  There's no need to 
  filter
  it out here; just make sure the host clears it at some point before this 
  point.

 Can you please suggest where ? somewhere in KVM initialization ?

Sure, KVM init works given that there's no real reason for non-KVM code
to care.

  The MRR value doesn't currently survive past kvmppc_clear_dbsr(), so this 
  isn't
  helping to preserve it for the host's benefit...

   @@ -1858,6 +1890,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct
   kvm_vcpu *vcpu,

 if (!(dbg-control  KVM_GUESTDBG_ENABLE)) {
 vcpu-arch.shadow_dbg_reg.dbcr0 = 0;
   + vcpu-arch.dbg_reg.dbcr0 = 0;

  Again, it's not clear why we need shadow debug registers here.  Just in 
  case we
  implement something that can't be implemented isn't a good reason to keep
  complexity around.

 One reason was that setting EDM in guest visible register, For this we
 need shadow_reg is used to save/restore state in h/w register (which
 does not have DBCR0_EDM) but debug_reg have DBCR0_EDM.

If that's the only reason, then I'd get rid of the shadow and just OR in
DCBR0_EDM when reading the register, if vcpu-guest_debug is nonzero.

-Scott

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2] kvm: x86: fix stale mmio cache bug

2014-08-05 Thread David Matlack

On Mon, Aug 4, 2014 at 8:36 PM, Xiao Guangrong
xiaoguangr...@linux.vnet.ibm.com wrote:
 On 08/05/2014 05:10 AM, David Matlack wrote:

 This patch fixes the issue by doing the following:
   - Tag the mmio cache with the memslot generation and use it to
 validate mmio cache lookups.
   - Extend vcpu_clear_mmio_info to clear mmio_gfn in addition to
 mmio_gva, since both can be used to fast path mmio faults.
   - In mmu_sync_roots, unconditionally clear the mmio cache since
 even direct_map (e.g. tdp) hosts use it.

 It's not needed.

 direct map only uses gpa (and never cache gva) and
 vcpu_clear_mmio_info only clears gva.

Ok thanks for the clarification.

 +static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
 + gva_t gva, gfn_t gfn, unsigned access)
 +{
 + vcpu-arch.mmio_gen = kvm_current_mmio_generation(vcpu-kvm);
 +
 + /*
 +  * Ensure that the mmio_gen is set before the rest of the cache entry.
 +  * Otherwise we might see a new generation number attached to an old
 +  * cache entry if creating/deleting a memslot races with mmio caching.
 +  * The inverse case is possible (old generation number with new cache
 +  * info), but that is safe. The next access will just miss the cache
 +  * when it should have hit.
 +  */
 + smp_wmb();

 The memory barrier can't help us, consider this scenario:

 CPU 0  CPU 1
 page-fault
 see gpa is not mapped in memslot

   create new memslot containing gpa from Qemu
   update the slots's generation number
 cache mmio info

 !!! later when vcpu accesses gpa again
 it will cause mmio-exit.

Ah! Thanks for catching my mistake.

 The easy way to fix this is that we update slots's generation-number
 after synchronize_srcu_expedited when memslot is being updated that
 ensures other sides can see the new generation-number only after
 finishing update.

It would be possible for a vcpu to see an inconsistent kvm_memslots struct
(new set of slots with an old generation number). Is that not an issue?

We could just use the generation number that is stored in the
spte. The only downside (that I can see) is that handle_abnormal_pfn()
calls vcpu_cache_mmio_info() but does not have access to the spte.
So presumably we'd have to do a page table walk.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v2 2/4] vfio: spapr: Fix build error

2014-08-05 Thread Alexey Kardashevskiy

From: Gavin Shan gws...@linux.vnet.ibm.com

The VFIO related components could be built as dynamic modules.
Unfortunately, CONFIG_EEH can't be configured to m. The patch
fixes the build errors when configuring VFIO related components
as dynamic modules as follows:

  CC [M]  drivers/vfio/vfio_iommu_spapr_tce.o
In file included from drivers/vfio/vfio.c:33:0:
include/linux/vfio.h:101:43: warning: ‘struct pci_dev’ declared \
inside parameter list [enabled by default]
:
  WRAParch/powerpc/boot/zImage.pseries
  WRAParch/powerpc/boot/zImage.maple
  WRAParch/powerpc/boot/zImage.pmac
  WRAParch/powerpc/boot/zImage.epapr
  MODPOST 1818 modules
ERROR: .vfio_spapr_iommu_eeh_ioctl [drivers/vfio/vfio_iommu_spapr_tce.ko]\
undefined!
ERROR: .vfio_spapr_pci_eeh_open [drivers/vfio/pci/vfio-pci.ko] undefined!
ERROR: .vfio_spapr_pci_eeh_release [drivers/vfio/pci/vfio-pci.ko] undefined!

Reported-by: Alexey Kardashevskiy a...@ozlabs.ru
Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
[removed include pci.h in vfio.c]
Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
---
Changes:
v2:
* removed #include linux/pci.h from vfio.c and tested

---
 drivers/vfio/Makefile | 4 ++--
 drivers/vfio/vfio_spapr_eeh.c | 6 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 50e30bc..4891cca 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_VFIO) += vfio.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
-obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
-obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
+obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o \
+ vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index f834b4c..1a93e83 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -14,15 +14,19 @@
 #include asm/eeh.h
 
 /* We might build address mapping here for fast path later */
+#ifdef CONFIG_EEH
+
 int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
 {
return eeh_dev_open(pdev);
 }
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
 
 void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
 {
eeh_dev_release(pdev);
 }
+EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release);
 
 long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
unsigned int cmd, unsigned long arg)
@@ -85,3 +89,5 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 
return ret;
 }
+EXPORT_SYMBOL_GPL(vfio_spapr_iommu_eeh_ioctl);
+#endif /* CONFIG_EEH */
-- 
2.0.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v2 3/4] vfio_spapr_eeh: Enable compile as a module

2014-08-05 Thread Alexey Kardashevskiy

This adds necessary declarations to the module, otherwise multiple
dynamic linker errors happen:

vfio_spapr_eeh: Unknown symbol eeh_pe_set_option (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_configure (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_reset (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_get_state (err 0)
vfio_spapr_eeh: Unknown symbol eeh_iommu_group_to_pe (err 0)
vfio_spapr_eeh: Unknown symbol eeh_dev_open (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_set_option (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_configure (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_reset (err 0)
vfio_spapr_eeh: Unknown symbol eeh_pe_get_state (err 0)
vfio_spapr_eeh: Unknown symbol eeh_iommu_group_to_pe (err 0)
vfio_spapr_eeh: Unknown symbol eeh_dev_open (err 0)

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
---
 drivers/vfio/vfio_spapr_eeh.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 1a93e83..a942dd0 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -9,10 +9,15 @@
  * published by the Free Software Foundation.
  */
 
+#include linux/module.h
 #include linux/uaccess.h
 #include linux/vfio.h
 #include asm/eeh.h
 
+#define DRIVER_VERSION  0.1
+#define DRIVER_AUTHOR   Gavin Shan, IBM Corporation
+#define DRIVER_DESC VFIO IOMMU SPAPR EEH
+
 /* We might build address mapping here for fast path later */
 #ifdef CONFIG_EEH
 
@@ -91,3 +96,8 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 }
 EXPORT_SYMBOL_GPL(vfio_spapr_iommu_eeh_ioctl);
 #endif /* CONFIG_EEH */
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE(GPL v2);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
-- 
2.0.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v2 4/4] vfio_pci: spapr: Enable VFIO if EEH is not supported

2014-08-05 Thread Alexey Kardashevskiy

The existing vfio_pci_open() fails if there is no EEH support for PCI.
This breaks POWER7's P5IOC2 PHB support which this patch brings back.

It is a warning because this should not normally happen on supported
configurations such as POWER8+IODA so we always want to see it in dmesg.
It is _once() as the message may be triggered by non-privileged userspace
and we do not want to pollute dmesg.

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
---
Changes:
v2:
* discussed with Ben Herrenschmidt and did s/pr_warn/pr_warn_once/
* updated commit log
---
 drivers/vfio/pci/vfio_pci.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index e2ee80f..68dc8da 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -179,10 +179,8 @@ static int vfio_pci_open(void *device_data)
goto error;
 
ret = vfio_spapr_pci_eeh_open(vdev-pdev);
-   if (ret) {
-   vfio_pci_disable(vdev);
-   goto error;
-   }
+   if (ret)
+   pr_warn_once(EEH is not supported\n);
}
 
return 0;
-- 
2.0.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v2 0/4] vfio: eeh: spapr: Compile and compatibility fixes

2014-08-05 Thread Alexey Kardashevskiy

Here is what I have in my stash which would be nice to have in 3.17.

Alexey Kardashevskiy (2):
  vfio_spapr_eeh: Enable compile as a module
  vfio_pci: spapr: Enable VFIO if EEH is not supported

Gavin Shan (2):
  powerpc/eeh: Export eeh_iommu_group_to_pe()
  vfio: spapr: Fix build error

 arch/powerpc/kernel/eeh.c |  1 +
 drivers/vfio/Makefile |  4 ++--
 drivers/vfio/pci/vfio_pci.c   |  6 ++
 drivers/vfio/vfio_spapr_eeh.c | 16 
 4 files changed, 21 insertions(+), 6 deletions(-)

-- 
2.0.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v2 1/4] powerpc/eeh: Export eeh_iommu_group_to_pe()

2014-08-05 Thread Alexey Kardashevskiy

From: Gavin Shan gws...@linux.vnet.ibm.com

The function is used by VFIO driver, which might be built as a
dynamic module.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
---
 arch/powerpc/kernel/eeh.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 6043879..59a64f8 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1254,6 +1254,7 @@ struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group 
*group)
 
return edev-pe;
 }
+EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe);
 
 #endif /* CONFIG_IOMMU_API */
 
-- 
2.0.0

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2 2/4] vfio: spapr: Fix build error

2014-08-05 Thread Alex Williamson

On Wed, 2014-08-06 at 12:48 +1000, Alexey Kardashevskiy wrote:
 From: Gavin Shan gws...@linux.vnet.ibm.com
 
 The VFIO related components could be built as dynamic modules.
 Unfortunately, CONFIG_EEH can't be configured to m. The patch
 fixes the build errors when configuring VFIO related components
 as dynamic modules as follows:
 
   CC [M]  drivers/vfio/vfio_iommu_spapr_tce.o
 In file included from drivers/vfio/vfio.c:33:0:
 include/linux/vfio.h:101:43: warning: ‘struct pci_dev’ declared \
 inside parameter list [enabled by default]
 :
   WRAParch/powerpc/boot/zImage.pseries
   WRAParch/powerpc/boot/zImage.maple
   WRAParch/powerpc/boot/zImage.pmac
   WRAParch/powerpc/boot/zImage.epapr
   MODPOST 1818 modules
 ERROR: .vfio_spapr_iommu_eeh_ioctl [drivers/vfio/vfio_iommu_spapr_tce.ko]\
 undefined!
 ERROR: .vfio_spapr_pci_eeh_open [drivers/vfio/pci/vfio-pci.ko] undefined!
 ERROR: .vfio_spapr_pci_eeh_release [drivers/vfio/pci/vfio-pci.ko] undefined!
 
 Reported-by: Alexey Kardashevskiy a...@ozlabs.ru
 Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
 [removed include pci.h in vfio.c]
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
 Changes:
 v2:
 * removed #include linux/pci.h from vfio.c and tested

I also commented regarding the ifdef around all of vfio_spapr_eeh.c:

Why not add a new CONFIG_VFIO_SPAPR_EEH option to handle this
instead?

Did you disagree?  The ifdef is pretty ugly.

 ---
  drivers/vfio/Makefile | 4 ++--
  drivers/vfio/vfio_spapr_eeh.c | 6 ++
  2 files changed, 8 insertions(+), 2 deletions(-)
 
 diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
 index 50e30bc..4891cca 100644
 --- a/drivers/vfio/Makefile
 +++ b/drivers/vfio/Makefile
 @@ -1,5 +1,5 @@
  obj-$(CONFIG_VFIO) += vfio.o
  obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 -obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
 -obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
 +obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o \
 +   vfio_spapr_eeh.o
  obj-$(CONFIG_VFIO_PCI) += pci/
 diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
 index f834b4c..1a93e83 100644
 --- a/drivers/vfio/vfio_spapr_eeh.c
 +++ b/drivers/vfio/vfio_spapr_eeh.c
 @@ -14,15 +14,19 @@
  #include asm/eeh.h
  
  /* We might build address mapping here for fast path later */
 +#ifdef CONFIG_EEH
 +
  int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
  {
   return eeh_dev_open(pdev);
  }
 +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
  
  void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
  {
   eeh_dev_release(pdev);
  }
 +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release);
  
  long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
   unsigned int cmd, unsigned long arg)
 @@ -85,3 +89,5 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
  
   return ret;
  }
 +EXPORT_SYMBOL_GPL(vfio_spapr_iommu_eeh_ioctl);
 +#endif /* CONFIG_EEH */



--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2] kvm: x86: fix stale mmio cache bug

2014-08-05 Thread Xiao Guangrong

On 08/06/2014 06:39 AM, David Matlack wrote:
 On Mon, Aug 4, 2014 at 8:36 PM, Xiao Guangrong
 xiaoguangr...@linux.vnet.ibm.com wrote:
 On 08/05/2014 05:10 AM, David Matlack wrote:

 This patch fixes the issue by doing the following:
   - Tag the mmio cache with the memslot generation and use it to
 validate mmio cache lookups.
   - Extend vcpu_clear_mmio_info to clear mmio_gfn in addition to
 mmio_gva, since both can be used to fast path mmio faults.
   - In mmu_sync_roots, unconditionally clear the mmio cache since
 even direct_map (e.g. tdp) hosts use it.

 It's not needed.

 direct map only uses gpa (and never cache gva) and
 vcpu_clear_mmio_info only clears gva.
 
 Ok thanks for the clarification.
 
 +static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
 + gva_t gva, gfn_t gfn, unsigned access)
 +{
 + vcpu-arch.mmio_gen = kvm_current_mmio_generation(vcpu-kvm);
 +
 + /*
 +  * Ensure that the mmio_gen is set before the rest of the cache entry.
 +  * Otherwise we might see a new generation number attached to an old
 +  * cache entry if creating/deleting a memslot races with mmio caching.
 +  * The inverse case is possible (old generation number with new cache
 +  * info), but that is safe. The next access will just miss the cache
 +  * when it should have hit.
 +  */
 + smp_wmb();

 The memory barrier can't help us, consider this scenario:

 CPU 0  CPU 1
 page-fault
 see gpa is not mapped in memslot

   create new memslot containing gpa from Qemu
   update the slots's generation number
 cache mmio info

 !!! later when vcpu accesses gpa again
 it will cause mmio-exit.
 
 Ah! Thanks for catching my mistake.
 
 The easy way to fix this is that we update slots's generation-number
 after synchronize_srcu_expedited when memslot is being updated that
 ensures other sides can see the new generation-number only after
 finishing update.
 
 It would be possible for a vcpu to see an inconsistent kvm_memslots struct
 (new set of slots with an old generation number). Is that not an issue?

In this case, checking generation-number will fail, we will goto the slow path
to handle mmio access - that's very rare, so i think it's ok.

 
 We could just use the generation number that is stored in the
 spte. The only downside (that I can see) is that handle_abnormal_pfn()
 calls vcpu_cache_mmio_info() but does not have access to the spte.
 So presumably we'd have to do a page table walk.

The issue is not only in vcpu_cache_mmio_info but also in
mark_mmio_spte() where we may cache new generation-number and old memslots
info.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2 4/4] vfio_pci: spapr: Enable VFIO if EEH is not supported

2014-08-05 Thread Alex Williamson

On Wed, 2014-08-06 at 12:48 +1000, Alexey Kardashevskiy wrote:
 The existing vfio_pci_open() fails if there is no EEH support for PCI.
 This breaks POWER7's P5IOC2 PHB support which this patch brings back.
 
 It is a warning because this should not normally happen on supported
 configurations such as POWER8+IODA so we always want to see it in dmesg.
 It is _once() as the message may be triggered by non-privileged userspace
 and we do not want to pollute dmesg.
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
 Changes:
 v2:
 * discussed with Ben Herrenschmidt and did s/pr_warn/pr_warn_once/
 * updated commit log
 ---
  drivers/vfio/pci/vfio_pci.c | 6 ++
  1 file changed, 2 insertions(+), 4 deletions(-)
 
 diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
 index e2ee80f..68dc8da 100644
 --- a/drivers/vfio/pci/vfio_pci.c
 +++ b/drivers/vfio/pci/vfio_pci.c
 @@ -179,10 +179,8 @@ static int vfio_pci_open(void *device_data)
   goto error;
  
   ret = vfio_spapr_pci_eeh_open(vdev-pdev);
 - if (ret) {
 - vfio_pci_disable(vdev);
 - goto error;
 - }
 + if (ret)
 + pr_warn_once(EEH is not supported\n);
   }
  
   return 0;

Now the next question, what's the point of vfio_spapr_pci_eeh_open()
returning a value?  Couldn't it return void now and this warning can go
into eeh specific code?  Thanks,

Alex

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2 2/4] vfio: spapr: Fix build error

2014-08-05 Thread Gavin Shan

On Tue, Aug 05, 2014 at 09:12:50PM -0600, Alex Williamson wrote:
On Wed, 2014-08-06 at 12:48 +1000, Alexey Kardashevskiy wrote:
 From: Gavin Shan gws...@linux.vnet.ibm.com
 
 The VFIO related components could be built as dynamic modules.
 Unfortunately, CONFIG_EEH can't be configured to m. The patch
 fixes the build errors when configuring VFIO related components
 as dynamic modules as follows:
 
   CC [M]  drivers/vfio/vfio_iommu_spapr_tce.o
 In file included from drivers/vfio/vfio.c:33:0:
 include/linux/vfio.h:101:43: warning: ‘struct pci_dev’ declared \
 inside parameter list [enabled by default]
 :
   WRAParch/powerpc/boot/zImage.pseries
   WRAParch/powerpc/boot/zImage.maple
   WRAParch/powerpc/boot/zImage.pmac
   WRAParch/powerpc/boot/zImage.epapr
   MODPOST 1818 modules
 ERROR: .vfio_spapr_iommu_eeh_ioctl [drivers/vfio/vfio_iommu_spapr_tce.ko]\
 undefined!
 ERROR: .vfio_spapr_pci_eeh_open [drivers/vfio/pci/vfio-pci.ko] undefined!
 ERROR: .vfio_spapr_pci_eeh_release [drivers/vfio/pci/vfio-pci.ko] 
 undefined!
 
 Reported-by: Alexey Kardashevskiy a...@ozlabs.ru
 Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
 [removed include pci.h in vfio.c]
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
 Changes:
 v2:
 * removed #include linux/pci.h from vfio.c and tested

Appreciated for Alexey's help on it :)


I also commented regarding the ifdef around all of vfio_spapr_eeh.c:

Why not add a new CONFIG_VFIO_SPAPR_EEH option to handle this
instead?

Did you disagree?  The ifdef is pretty ugly.


I'll introduce CONFIG_VFIO_SPAPR_EEH.

Thanks,
Gavin

 ---
  drivers/vfio/Makefile | 4 ++--
  drivers/vfio/vfio_spapr_eeh.c | 6 ++
  2 files changed, 8 insertions(+), 2 deletions(-)
 
 diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
 index 50e30bc..4891cca 100644
 --- a/drivers/vfio/Makefile
 +++ b/drivers/vfio/Makefile
 @@ -1,5 +1,5 @@
  obj-$(CONFIG_VFIO) += vfio.o
  obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 -obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
 -obj-$(CONFIG_EEH) += vfio_spapr_eeh.o
 +obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o \
 +  vfio_spapr_eeh.o
  obj-$(CONFIG_VFIO_PCI) += pci/
 diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
 index f834b4c..1a93e83 100644
 --- a/drivers/vfio/vfio_spapr_eeh.c
 +++ b/drivers/vfio/vfio_spapr_eeh.c
 @@ -14,15 +14,19 @@
  #include asm/eeh.h
  
  /* We might build address mapping here for fast path later */
 +#ifdef CONFIG_EEH
 +
  int vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
  {
  return eeh_dev_open(pdev);
  }
 +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
  
  void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
  {
  eeh_dev_release(pdev);
  }
 +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release);
  
  long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
  unsigned int cmd, unsigned long arg)
 @@ -85,3 +89,5 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
  
  return ret;
  }
 +EXPORT_SYMBOL_GPL(vfio_spapr_iommu_eeh_ioctl);
 +#endif /* CONFIG_EEH */




--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2 4/4] vfio_pci: spapr: Enable VFIO if EEH is not supported

2014-08-05 Thread Benjamin Herrenschmidt

On Tue, 2014-08-05 at 21:44 -0600, Alex Williamson wrote:
ret = vfio_spapr_pci_eeh_open(vdev-pdev);
  - if (ret) {
  - vfio_pci_disable(vdev);
  - goto error;
  - }
  + if (ret)
  + pr_warn_once(EEH is not supported\n);
}
   
return 0;
 
 Now the next question, what's the point of vfio_spapr_pci_eeh_open()
 returning a value?  Couldn't it return void now and this warning can
 go into eeh specific code?  Thanks,

In order to call vfio_pci_disable() when that happens ?

Cheers,
Ben.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] KVM: PPC: drop duplicate tracepoint

2014-08-05 Thread Paolo Bonzini

Commit 29577fc00ba4 (KVM: PPC: HV: Remove generic instruction emulation)
caused a build failure:

arch/powerpc/kvm/kvm-pr.o:(__tracepoints+0xa8): multiple definition of 
`__tracepoint_kvm_ppc_instr'
arch/powerpc/kvm/kvm.o:(__tracepoints+0x1c0): first defined here

due to a duplicate definition of the tracepoint in trace.h and
trace_pr.h.  Because the tracepoint is still used by Book3S HV
code, and because the PR code does include trace.h, just remove
the duplicate definition from trace_pr.h, and export it from
kvm.o.

Reported-by: Stephen Rothwell s...@canb.auug.org.au
Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 arch/powerpc/kvm/emulate.c  |  2 ++
 arch/powerpc/kvm/trace_pr.h | 20 
 2 files changed, 2 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index e96b50d0bdab..2325168ad1ff 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -300,3 +300,5 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct 
kvm_vcpu *vcpu)
return emulated;
 }
 EXPORT_SYMBOL_GPL(kvmppc_emulate_instruction);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index a674f090dfb8..e1357cd8dc1f 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -291,26 +291,6 @@ TRACE_EVENT(kvm_unmap_hva,
TP_printk(unmap hva 0x%lx\n, __entry-hva)
 );
 
-TRACE_EVENT(kvm_ppc_instr,
-   TP_PROTO(unsigned int inst, unsigned long _pc, unsigned int emulate),
-   TP_ARGS(inst, _pc, emulate),
-
-   TP_STRUCT__entry(
-   __field(unsigned int,   inst)
-   __field(unsigned long,  pc  )
-   __field(unsigned int,   emulate )
-   ),
-
-   TP_fast_assign(
-   __entry-inst   = inst;
-   __entry-pc = _pc;
-   __entry-emulate= emulate;
-   ),
-
-   TP_printk(inst %u pc 0x%lx emulate %u\n,
- __entry-inst, __entry-pc, __entry-emulate)
-);
-
 #endif /* _TRACE_KVM_H */
 
 /* This part must be outside protection */
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 0/5] KVM: PPC: Book3e: AltiVec support

2014-08-05 Thread Mihai Caraman

Add KVM Book3e AltiVec support and enable e6500 core.

Changes:

v3:
 - use distinct SPE/AltiVec exception handlers
 - make ONE_REG AltiVec support powerpc generic
 - add ONE_REG IVORs support

v2:
 - integrate Paul's FP/VMX/VSX changes that landed in kvm-ppc-queue
   in January and take into account feedback

Mihai Caraman (5):
  KVM: PPC: Book3e: Increase FPU laziness
  KVM: PPC: Book3e: Add AltiVec support
  KVM: PPC: Move ONE_REG AltiVec support to powerpc
  KVM: PPC: Booke: Add ONE_REG IVORs support
  KVM: PPC: Book3e: Enable e6500 core

 arch/powerpc/include/uapi/asm/kvm.h   |  29 +++
 arch/powerpc/kvm/book3s.c | 151 +---
 arch/powerpc/kvm/booke.c  | 331 --
 arch/powerpc/kvm/booke.h  |  39 +---
 arch/powerpc/kvm/bookehv_interrupts.S |  10 +-
 arch/powerpc/kvm/e500.c   |  42 -
 arch/powerpc/kvm/e500_emulate.c   |  18 ++
 arch/powerpc/kvm/e500mc.c |  44 -
 arch/powerpc/kvm/powerpc.c|  97 ++
 9 files changed, 554 insertions(+), 207 deletions(-)

-- 
1.7.11.7

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 2/5] KVM: PPC: Book3e: Add AltiVec support

2014-08-05 Thread Mihai Caraman

Add KVM Book3e AltiVec support. KVM Book3e FPU support gracefully reuse host
infrastructure so follow the same approach for AltiVec.

Keep SPE/AltiVec exception handlers distinct using CONFIG_KVM_E500V2.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - use distinct SPE/AltiVec exception handlers

v2:
 - integrate Paul's FP/VMX/VSX changes

 arch/powerpc/kvm/booke.c  | 73 +++
 arch/powerpc/kvm/booke.h  |  5 +++
 arch/powerpc/kvm/bookehv_interrupts.S | 10 +++--
 arch/powerpc/kvm/e500_emulate.c   | 18 +
 4 files changed, 102 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 0c6f616..c5cca09 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -168,6 +168,40 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 #endif
 }
 
+/*
+ * Simulate AltiVec unavailable fault to load guest state
+ * from thread to AltiVec unit.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_altivec(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ALTIVEC
+   if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
+   if (!(current-thread.regs-msr  MSR_VEC)) {
+   enable_kernel_altivec();
+   load_vr_state(vcpu-arch.vr);
+   current-thread.vr_save_area = vcpu-arch.vr;
+   current-thread.regs-msr |= MSR_VEC;
+   }
+   }
+#endif
+}
+
+/*
+ * Save guest vcpu AltiVec state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_altivec(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ALTIVEC
+   if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
+   if (current-thread.regs-msr  MSR_VEC)
+   giveup_altivec(current);
+   current-thread.vr_save_area = NULL;
+   }
+#endif
+}
+
 static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
 {
/* Synchronize guest's desire to get debug interrupts into shadow MSR */
@@ -375,9 +409,14 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu 
*vcpu,
case BOOKE_IRQPRIO_ITLB_MISS:
case BOOKE_IRQPRIO_SYSCALL:
case BOOKE_IRQPRIO_FP_UNAVAIL:
+#ifdef CONFIG_KVM_E500V2
case BOOKE_IRQPRIO_SPE_UNAVAIL:
case BOOKE_IRQPRIO_SPE_FP_DATA:
case BOOKE_IRQPRIO_SPE_FP_ROUND:
+#else
+   case BOOKE_IRQPRIO_ALTIVEC_UNAVAIL:
+   case BOOKE_IRQPRIO_ALTIVEC_ASSIST:
+#endif
case BOOKE_IRQPRIO_AP_UNAVAIL:
allowed = 1;
msr_mask = MSR_CE | MSR_ME | MSR_DE;
@@ -693,6 +732,17 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
kvmppc_load_guest_fp(vcpu);
 #endif
 
+#ifdef CONFIG_ALTIVEC
+   /* Save userspace AltiVec state in stack */
+   if (cpu_has_feature(CPU_FTR_ALTIVEC))
+   enable_kernel_altivec();
+   /*
+* Since we can't trap on MSR_VEC in GS-mode, we consider the guest
+* as always using the AltiVec.
+*/
+   kvmppc_load_guest_altivec(vcpu);
+#endif
+
/* Switch to guest debug context */
debug = vcpu-arch.shadow_dbg_reg;
switch_booke_debug_regs(debug);
@@ -715,6 +765,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
kvmppc_save_guest_fp(vcpu);
 #endif
 
+#ifdef CONFIG_ALTIVEC
+   kvmppc_save_guest_altivec(vcpu);
+#endif
+
 out:
vcpu-mode = OUTSIDE_GUEST_MODE;
return ret;
@@ -999,6 +1053,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
 
+#ifdef CONFIG_KVM_E500V2
 #ifdef CONFIG_SPE
case BOOKE_INTERRUPT_SPE_UNAVAIL: {
if (vcpu-arch.shared-msr  MSR_SPE)
@@ -1040,7 +1095,24 @@ int kvmppc_handle_exit(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
run-hw.hardware_exit_reason = exit_nr;
r = RESUME_HOST;
break;
+#endif /* !CONFIG_SPE */
+#else
+/*
+ * On cores with Vector category, KVM is loaded only if CONFIG_ALTIVEC,
+ * see kvmppc_core_check_processor_compat().
+ */
+#ifdef CONFIG_ALTIVEC
+   case BOOKE_INTERRUPT_ALTIVEC_UNAVAIL:
+   kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL);
+   r = RESUME_GUEST;
+   break;
+
+   case BOOKE_INTERRUPT_ALTIVEC_ASSIST:
+   kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_ASSIST);
+   r = RESUME_GUEST;
+   break;
 #endif
+#endif /* !CONFIG_KVM_E500V2 */
 
case BOOKE_INTERRUPT_DATA_STORAGE:
kvmppc_core_queue_data_storage(vcpu, vcpu-arch.fault_dear,
@@ -1217,6 +1289,7 @@ out:
/* interrupts now hard-disabled */
kvmppc_fix_ee_before_entry();
kvmppc_load_guest_fp(vcpu);
+

[PATCH v3 3/5] KVM: PPC: Move ONE_REG AltiVec support to powerpc

2014-08-05 Thread Mihai Caraman

Make ONE_REG AltiVec support common across server and embedded implementations
moving kvm_vcpu_ioctl_get_one_reg() and kvm_vcpu_ioctl_set_one_reg() functions
to powerpc layer.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - make ONE_REG AltiVec support powerpc generic

v2:
 - add comment describing VCSR register representation in KVM vs kernel

 arch/powerpc/include/uapi/asm/kvm.h |   5 ++
 arch/powerpc/kvm/book3s.c   | 151 +++-
 arch/powerpc/kvm/booke.c|  85 
 arch/powerpc/kvm/powerpc.c  |  97 +++
 4 files changed, 179 insertions(+), 159 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index e0e49db..7a27ff0 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -476,6 +476,11 @@ struct kvm_get_htab_header {
 
 /* FP and vector status/control registers */
 #define KVM_REG_PPC_FPSCR  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
+/*
+ * VSCR register is documented as a 32-bit register in the ISA, but it can
+ * only be accesses via a vector register. Expose VSCR as a 32-bit register
+ * even though the kernel represents it as a 128-bit vector.
+ */
 #define KVM_REG_PPC_VSCR   (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
 
 /* Virtual processor areas */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index dd03f6b..1b5adda 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -535,174 +535,111 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, 
struct kvm_fpu *fpu)
return -ENOTSUPP;
 }
 
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+   union kvmppc_one_reg *val)
 {
-   int r;
-   union kvmppc_one_reg val;
-   int size;
+   int r = 0;
long int i;
 
-   size = one_reg_size(reg-id);
-   if (size  sizeof(val))
-   return -EINVAL;
-
-   r = vcpu-kvm-arch.kvm_ops-get_one_reg(vcpu, reg-id, val);
+   r = vcpu-kvm-arch.kvm_ops-get_one_reg(vcpu, id, val);
if (r == -EINVAL) {
r = 0;
-   switch (reg-id) {
+   switch (id) {
case KVM_REG_PPC_DAR:
-   val = get_reg_val(reg-id, kvmppc_get_dar(vcpu));
+   *val = get_reg_val(id, kvmppc_get_dar(vcpu));
break;
case KVM_REG_PPC_DSISR:
-   val = get_reg_val(reg-id, kvmppc_get_dsisr(vcpu));
+   *val = get_reg_val(id, kvmppc_get_dsisr(vcpu));
break;
case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
-   i = reg-id - KVM_REG_PPC_FPR0;
-   val = get_reg_val(reg-id, VCPU_FPR(vcpu, i));
+   i = id - KVM_REG_PPC_FPR0;
+   *val = get_reg_val(id, VCPU_FPR(vcpu, i));
break;
case KVM_REG_PPC_FPSCR:
-   val = get_reg_val(reg-id, vcpu-arch.fp.fpscr);
-   break;
-#ifdef CONFIG_ALTIVEC
-   case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
-   if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-   r = -ENXIO;
-   break;
-   }
-   val.vval = vcpu-arch.vr.vr[reg-id - KVM_REG_PPC_VR0];
+   *val = get_reg_val(id, vcpu-arch.fp.fpscr);
break;
-   case KVM_REG_PPC_VSCR:
-   if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
-   r = -ENXIO;
-   break;
-   }
-   val = get_reg_val(reg-id, vcpu-arch.vr.vscr.u[3]);
-   break;
-   case KVM_REG_PPC_VRSAVE:
-   val = get_reg_val(reg-id, vcpu-arch.vrsave);
-   break;
-#endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
if (cpu_has_feature(CPU_FTR_VSX)) {
-   long int i = reg-id - KVM_REG_PPC_VSR0;
-   val.vsxval[0] = vcpu-arch.fp.fpr[i][0];
-   val.vsxval[1] = vcpu-arch.fp.fpr[i][1];
+   i = id - KVM_REG_PPC_VSR0;
+   val-vsxval[0] = vcpu-arch.fp.fpr[i][0];
+   val-vsxval[1] = vcpu-arch.fp.fpr[i][1];
} else {
r = -ENXIO;
}
break;
 #endif /* CONFIG_VSX */
-   case KVM_REG_PPC_DEBUG_INST: {
-   u32 opcode = INS_TW;
-   r =

[PATCH v3 5/5] KVM: PPC: Book3E: Enable e6500 core

2014-08-05 Thread Mihai Caraman

Now that AltiVec support is in place enable e6500 core.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v2-v3:
 - no changes

 arch/powerpc/kvm/e500mc.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 19dd927..aa48dc3 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -177,6 +177,16 @@ int kvmppc_core_check_processor_compat(void)
r = 0;
else if (strcmp(cur_cpu_spec-cpu_name, e5500) == 0)
r = 0;
+#ifdef CONFIG_ALTIVEC
+   /*
+* Since guests have the priviledge to enable AltiVec, we need AltiVec
+* support in the host to save/restore their context.
+* Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit
+* because it's cleared in the absence of CONFIG_ALTIVEC!
+*/
+   else if (strcmp(cur_cpu_spec-cpu_name, e6500) == 0)
+   r = 0;
+#endif
else
r = -ENOTSUPP;
 
-- 
1.7.11.7

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v3 4/5] KVM: PPC: Booke: Add ONE_REG IVORs support

2014-08-05 Thread Mihai Caraman

Add ONE_REG IVORs support, with IVORs 0-15 and 35 booke common.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - new patch

 arch/powerpc/include/uapi/asm/kvm.h |  24 +++
 arch/powerpc/kvm/booke.c| 132 
 arch/powerpc/kvm/e500.c |  42 +++-
 arch/powerpc/kvm/e500mc.c   |  32 +
 4 files changed, 228 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 7a27ff0..174fed0 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -563,6 +563,30 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_WORT   (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9)
 #define KVM_REG_PPC_SPRG9  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
 
+/* Booke IVOR registers */
+#define KVM_REG_PPC_IVOR0  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc0)
+#define KVM_REG_PPC_IVOR1  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc1)
+#define KVM_REG_PPC_IVOR2  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc2)
+#define KVM_REG_PPC_IVOR3  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc3)
+#define KVM_REG_PPC_IVOR4  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc4)
+#define KVM_REG_PPC_IVOR5  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc5)
+#define KVM_REG_PPC_IVOR6  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc6)
+#define KVM_REG_PPC_IVOR7  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc7)
+#define KVM_REG_PPC_IVOR8  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc8)
+#define KVM_REG_PPC_IVOR9  (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xc9)
+#define KVM_REG_PPC_IVOR10 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xca)
+#define KVM_REG_PPC_IVOR11 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xcb)
+#define KVM_REG_PPC_IVOR12 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xcc)
+#define KVM_REG_PPC_IVOR13 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xcd)
+#define KVM_REG_PPC_IVOR14 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xce)
+#define KVM_REG_PPC_IVOR15 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xcf)
+#define KVM_REG_PPC_IVOR32 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd0)
+#define KVM_REG_PPC_IVOR33 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd1)
+#define KVM_REG_PPC_IVOR34 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd2)
+#define KVM_REG_PPC_IVOR35 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd3)
+#define KVM_REG_PPC_IVOR36 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd4)
+#define KVM_REG_PPC_IVOR37 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd5)
+
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 4fe7f68..ffa82a5 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1565,6 +1565,72 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
int r = 0;
 
switch (id) {
+   case KVM_REG_PPC_IVOR0:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
+   break;
+   case KVM_REG_PPC_IVOR1:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]);
+   break;
+   case KVM_REG_PPC_IVOR2:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
+   break;
+   case KVM_REG_PPC_IVOR3:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]);
+   break;
+   case KVM_REG_PPC_IVOR4:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_EXTERNAL]);
+   break;
+   case KVM_REG_PPC_IVOR5:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]);
+   break;
+   case KVM_REG_PPC_IVOR6:
+   *val = get_reg_val(id, vcpu-arch.ivor[BOOKE_IRQPRIO_PROGRAM]);
+   break;
+   case KVM_REG_PPC_IVOR7:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]);
+   break;
+   case KVM_REG_PPC_IVOR8:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
+   break;
+   case KVM_REG_PPC_IVOR9:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]);
+   break;
+   case KVM_REG_PPC_IVOR10:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_DECREMENTER]);
+   break;
+   case KVM_REG_PPC_IVOR11:
+   *val = get_reg_val(id, vcpu-arch.ivor[BOOKE_IRQPRIO_FIT]);
+   break;
+   case KVM_REG_PPC_IVOR12:
+   *val = get_reg_val(id,
+   vcpu-arch.ivor[BOOKE_IRQPRIO_WATCHDOG]);
+   break;
+   case KVM_REG_PPC_IVOR13:
+

[PATCH v3 1/5] KVM: PPC: Book3e: Increase FPU laziness

2014-08-05 Thread Mihai Caraman

Increase FPU laziness by calling kvmppc_load_guest_fp() just before
returning to guest instead of each sched in. Without this improvement
an interrupt may also claim floting point corrupting guest state.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
---
v3:
 - no changes

v2:
 - remove fpu_active
 - add descriptive comments

 arch/powerpc/kvm/booke.c  | 43 ---
 arch/powerpc/kvm/booke.h  | 34 --
 arch/powerpc/kvm/e500mc.c |  2 --
 3 files changed, 36 insertions(+), 43 deletions(-)

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b4c89fa..0c6f616 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -124,6 +124,40 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
 }
 #endif
 
+/*
+ * Load up guest vcpu FP state if it's needed.
+ * It also set the MSR_FP in thread so that host know
+ * we're holding FPU, and then host can help to save
+ * guest vcpu FP state if other threads require to use FPU.
+ * This simulates an FP unavailable fault.
+ *
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+   if (!(current-thread.regs-msr  MSR_FP)) {
+   enable_kernel_fp();
+   load_fp_state(vcpu-arch.fp);
+   current-thread.fp_save_area = vcpu-arch.fp;
+   current-thread.regs-msr |= MSR_FP;
+   }
+#endif
+}
+
+/*
+ * Save guest vcpu FP state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+   if (current-thread.regs-msr  MSR_FP)
+   giveup_fpu(current);
+   current-thread.fp_save_area = NULL;
+#endif
+}
+
 static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 {
 #if defined(CONFIG_PPC_FPU)  !defined(CONFIG_KVM_BOOKE_HV)
@@ -654,12 +688,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
 
/*
 * Since we can't trap on MSR_FP in GS-mode, we consider the guest
-* as always using the FPU.  Kernel usage of FP (via
-* enable_kernel_fp()) in this thread must not occur while
-* vcpu-fpu_active is set.
+* as always using the FPU.
 */
-   vcpu-fpu_active = 1;
-
kvmppc_load_guest_fp(vcpu);
 #endif
 
@@ -683,8 +713,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
 
 #ifdef CONFIG_PPC_FPU
kvmppc_save_guest_fp(vcpu);
-
-   vcpu-fpu_active = 0;
 #endif
 
 out:
@@ -1188,6 +1216,7 @@ out:
else {
/* interrupts now hard-disabled */
kvmppc_fix_ee_before_entry();
+   kvmppc_load_guest_fp(vcpu);
}
}
 
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index f753543..e73d513 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -116,40 +116,6 @@ extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu 
*vcpu, int sprn,
 extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
  ulong *spr_val);
 
-/*
- * Load up guest vcpu FP state if it's needed.
- * It also set the MSR_FP in thread so that host know
- * we're holding FPU, and then host can help to save
- * guest vcpu FP state if other threads require to use FPU.
- * This simulates an FP unavailable fault.
- *
- * It requires to be called with preemption disabled.
- */
-static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_PPC_FPU
-   if (vcpu-fpu_active  !(current-thread.regs-msr  MSR_FP)) {
-   enable_kernel_fp();
-   load_fp_state(vcpu-arch.fp);
-   current-thread.fp_save_area = vcpu-arch.fp;
-   current-thread.regs-msr |= MSR_FP;
-   }
-#endif
-}
-
-/*
- * Save guest vcpu FP state into thread.
- * It requires to be called with preemption disabled.
- */
-static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_PPC_FPU
-   if (vcpu-fpu_active  (current-thread.regs-msr  MSR_FP))
-   giveup_fpu(current);
-   current-thread.fp_save_area = NULL;
-#endif
-}
-
 static inline void kvmppc_clear_dbsr(void)
 {
mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 164bad2..67c06eb 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -145,8 +145,6 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu 
*vcpu, int cpu)
kvmppc_e500_tlbil_all(vcpu_e500);
__get_cpu_var(last_vcpu_of_lpid)[vcpu-kvm-arch.lpid] = vcpu;
}
-
-   kvmppc_load_guest_fp(vcpu);
 }
 
 static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu)
-- 
1.7.11.7

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of

Re: [PATCH 0/6] IRQFD without IRQ routing, enabled for XICS

2014-08-05 Thread Paolo Bonzini

Il 30/06/2014 12:51, Paul Mackerras ha scritto:
 This series of patches provides a way to implement IRQFD support
 without having to implement IRQ routing, and adds IRQFD support for
 the XICS interrupt controller emulation.  (XICS is the interrupt
 controller defined for the pSeries machine type, used on IBM POWER
 servers).
 
 The basic approach is to make it easy for code other than irqchip.c to
 provide a mapping from a global interrupt number (GSI) to an irq
 routing entry (struct kvm_kernel_irq_routing_entry).  To make the
 lifetime of this routing entry easier to manage, we change the IRQFD
 code to keep a copy of the routing entry (for the MSI fast-path case)
 rather than a pointer to the routing entry.  Since the copy can't be
 updated atomically, we add a seqcount_t to make sure that when reading
 it we get a copy that hasn't been half-way updated.
 
 Next we replace the hard-coded accesses outside irqchip.c to the
 fields of the kvm_irq_routing_table struct with calls to accessor
 functions in irqchip.c, namely kvm_irq_map_gsi() and
 kvm_irq_map_chip_pin().  That enables us to move all references to the
 kvm_irq_routing_table struct, and the definition of that struct, into
 irqchip.c.
 
 Then we move the irq notifier implementation from irqchip.c into
 eventfd.c and add a separate Kconfig option to enable IRQFD.  With
 that we can enable IRQFD without irq routing, which we achieve by
 compiling in eventfd.c but not irqchip.c, and providing an alternative
 implementation of kvm_irq_map_gsi() and kvm_irq_map_chip_pin().
 
 The last patch does that for XICS.  With this series I can use
 vhost-net with KVM guests, and I see the TCP bandwidth between guest
 and host on a POWER8 machine go from around 700MB/s to over 2GB/s.
 
 I would like to see this go into 3.17.
 
  arch/ia64/kvm/Kconfig|   1 +
  arch/powerpc/kvm/Kconfig |   3 +
  arch/powerpc/kvm/book3s_hv_rm_xics.c |   5 ++
  arch/powerpc/kvm/book3s_xics.c   |  55 +++---
  arch/powerpc/kvm/book3s_xics.h   |   2 +
  arch/powerpc/kvm/mpic.c  |   4 +-
  arch/s390/kvm/Kconfig|   1 +
  arch/s390/kvm/interrupt.c|   3 +-
  arch/x86/kvm/Kconfig |   1 +
  include/linux/kvm_host.h |  43 ---
  virt/kvm/Kconfig |   3 +
  virt/kvm/eventfd.c   | 134 
 ++-
  virt/kvm/irq_comm.c  |  24 +++
  virt/kvm/irqchip.c   |  98 ++---
  virt/kvm/kvm_main.c  |   2 +-
  15 files changed, 227 insertions(+), 152 deletions(-)
 

Applied for 3.17, thanks Eric and Cornelia for testing.

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] KVM: PPC: drop duplicate tracepoint

2014-08-05 Thread Paolo Bonzini

Il 05/08/2014 12:35, Paolo Bonzini ha scritto:
 diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
 index e96b50d0bdab..2325168ad1ff 100644
 --- a/arch/powerpc/kvm/emulate.c
 +++ b/arch/powerpc/kvm/emulate.c
 @@ -300,3 +300,5 @@ int kvmppc_emulate_instruction(struct kvm_run *run, 
 struct kvm_vcpu *vcpu)
   return emulated;
  }
  EXPORT_SYMBOL_GPL(kvmppc_emulate_instruction);
 +
 +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);

Nope, this is in kvm-pr.ko so it is not enough... replaced with

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 288b4bb05cbd..4c79284b58be 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1347,3 +1347,5 @@ void kvm_arch_exit(void)
 {

 }
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);


Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 5/5 v2] KVM: PPC: BOOKE: Emulate debug registers and exception

2014-08-05 Thread Scott Wood

On Mon, 2014-08-04 at 22:41 -0500, Bhushan Bharat-R65777 wrote:

  -Original Message-
  From: Wood Scott-B07421
  Sent: Tuesday, August 05, 2014 4:23 AM
  To: Bhushan Bharat-R65777
  Cc: ag...@suse.de; kvm-ppc@vger.kernel.org; k...@vger.kernel.org; Yoder 
  Stuart-
  B08248
  Subject: Re: [PATCH 5/5 v2] KVM: PPC: BOOKE: Emulate debug registers and
  exception

  On Mon, 2014-08-04 at 13:32 +0530, Bharat Bhushan wrote:
   @@ -735,7 +745,27 @@ static int kvmppc_handle_debug(struct kvm_run *run,
  struct kvm_vcpu *vcpu)
 struct debug_reg *dbg_reg = (vcpu-arch.shadow_dbg_reg);
 u32 dbsr = vcpu-arch.dbsr;

   - /* Clear guest dbsr (vcpu-arch.dbsr).
   + if (vcpu-guest_debug == 0) {
   + /*
   +  * Debug resources belong to Guest.
   +  * Imprecise debug event are not injected
   +  */
   + if (dbsr  DBSR_IDE)
   + return RESUME_GUEST;

  This is incorrect.  DBSR_IDE shouldn't *cause* an injection, but it 
  shouldn't
  inhibit it either.

 Will this work ?
   If ((dbsr  DBSR_IDE)  !(dbsr  ~DBSR_IDE))
   Return RESUME_GUEST; 

I suppose it could, but it would be cleaner to just change dbsr to
(dbsr  ~DBSR_IDE) in the next if-statement (maybe factoring out each
 term of that if-statement to variables to make it more readable).

   @@ -828,6 +858,8 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu
  *vcpu,
 case BOOKE_INTERRUPT_DEBUG:
 /* Save DBSR before preemption is enabled */
 vcpu-arch.dbsr = mfspr(SPRN_DBSR);
   + /* MASK out DBSR_MRR */
   + vcpu-arch.dbsr = ~DBSR_MRR;
 kvmppc_clear_dbsr();
 break;
 }

  DBSR[MRR] can only be set once per host system reset.  There's no need to 
  filter
  it out here; just make sure the host clears it at some point before this 
  point.

 Can you please suggest where ? somewhere in KVM initialization ?

Sure, KVM init works given that there's no real reason for non-KVM code
to care.

  The MRR value doesn't currently survive past kvmppc_clear_dbsr(), so this 
  isn't
  helping to preserve it for the host's benefit...

   @@ -1858,6 +1890,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct
   kvm_vcpu *vcpu,

 if (!(dbg-control  KVM_GUESTDBG_ENABLE)) {
 vcpu-arch.shadow_dbg_reg.dbcr0 = 0;
   + vcpu-arch.dbg_reg.dbcr0 = 0;

  Again, it's not clear why we need shadow debug registers here.  Just in 
  case we
  implement something that can't be implemented isn't a good reason to keep
  complexity around.

 One reason was that setting EDM in guest visible register, For this we
 need shadow_reg is used to save/restore state in h/w register (which
 does not have DBCR0_EDM) but debug_reg have DBCR0_EDM.

If that's the only reason, then I'd get rid of the shadow and just OR in
DCBR0_EDM when reading the register, if vcpu-guest_debug is nonzero.

-Scott

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

75 matches

Mail list logo