[PATCH] KVM: nVMX: Add support for rdtscp

2015-03-23 Thread Jan Kiszka
From: Jan Kiszka jan.kis...@siemens.com

If the guest CPU is supposed to support rdtscp and the host has rdtscp
enabled in the secondary execution controls, we can also expose this
feature to L1. Just extend nested_vmx_exit_handled to properly route
EXIT_REASON_RDTSCP.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---
 arch/x86/include/uapi/asm/vmx.h | 1 +
 arch/x86/kvm/vmx.c  | 6 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index c5f1a1d..1fe9218 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -67,6 +67,7 @@
 #define EXIT_REASON_EPT_VIOLATION   48
 #define EXIT_REASON_EPT_MISCONFIG   49
 #define EXIT_REASON_INVEPT  50
+#define EXIT_REASON_RDTSCP  51
 #define EXIT_REASON_PREEMPTION_TIMER52
 #define EXIT_REASON_INVVPID 53
 #define EXIT_REASON_WBINVD  54
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 50c675b..7875e9b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2467,6 +2467,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx 
*vmx)
vmx-nested.nested_vmx_secondary_ctls_low = 0;
vmx-nested.nested_vmx_secondary_ctls_high =
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+   SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
@@ -7510,7 +7511,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
case EXIT_REASON_RDPMC:
return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
-   case EXIT_REASON_RDTSC:
+   case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
@@ -8517,6 +8518,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
exec_control);
}
}
+   if (!vmx-rdtscp_enabled)
+   vmx-nested.nested_vmx_secondary_ctls_high =
+   ~SECONDARY_EXEC_RDTSCP;
}
 
/* Exposing INVPCID only when PCID is exposed */
-- 
2.1.4



signature.asc
Description: OpenPGP digital signature


[PATCH v2] KVM: nVMX: Add support for rdtscp

2015-03-23 Thread Jan Kiszka
From: Jan Kiszka jan.kis...@siemens.com

If the guest CPU is supposed to support rdtscp and the host has rdtscp
enabled in the secondary execution controls, we can also expose this
feature to L1. Just extend nested_vmx_exit_handled to properly route
EXIT_REASON_RDTSCP.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---

Changes in v2 (thinko in test scenario...):
 - respect L1's setting of SECONDARY_EXEC_RDTSCP

 arch/x86/include/uapi/asm/vmx.h | 1 +
 arch/x86/kvm/vmx.c  | 9 +++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index c5f1a1d..1fe9218 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -67,6 +67,7 @@
 #define EXIT_REASON_EPT_VIOLATION   48
 #define EXIT_REASON_EPT_MISCONFIG   49
 #define EXIT_REASON_INVEPT  50
+#define EXIT_REASON_RDTSCP  51
 #define EXIT_REASON_PREEMPTION_TIMER52
 #define EXIT_REASON_INVVPID 53
 #define EXIT_REASON_WBINVD  54
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 50c675b..45e0a6b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2467,6 +2467,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx 
*vmx)
vmx-nested.nested_vmx_secondary_ctls_low = 0;
vmx-nested.nested_vmx_secondary_ctls_high =
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+   SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
@@ -7510,7 +7511,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
case EXIT_REASON_RDPMC:
return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
-   case EXIT_REASON_RDTSC:
+   case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
@@ -8517,6 +8518,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
exec_control);
}
}
+   if (!vmx-rdtscp_enabled)
+   vmx-nested.nested_vmx_secondary_ctls_high =
+   ~SECONDARY_EXEC_RDTSCP;
}
 
/* Exposing INVPCID only when PCID is exposed */
@@ -9146,8 +9150,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct 
vmcs12 *vmcs12)
exec_control = ~SECONDARY_EXEC_RDTSCP;
/* Take the following fields only from vmcs12 */
exec_control = ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_RDTSCP |
  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-  SECONDARY_EXEC_APIC_REGISTER_VIRT);
+ SECONDARY_EXEC_APIC_REGISTER_VIRT);
if (nested_cpu_has(vmcs12,
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
exec_control |= vmcs12-secondary_vm_exec_control;
-- 
2.1.4



signature.asc
Description: OpenPGP digital signature


Re: [PATCH 02/20] MIPS: Clear [MSA]FPE CSR.Cause after notify_die()

2015-03-23 Thread Ralf Baechle
On Wed, Mar 11, 2015 at 02:44:38PM +, James Hogan wrote:

Acked-by: Ralf Baechle r...@linux-mips.org

Feel free to merge this through the KVM tree along with the remainder of
the series.

  Ralf
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 08/12] KVM: arm/arm64: implement kvm_io_bus MMIO handling for the VGIC

2015-03-23 Thread Andre Przywara
Currently we use a lot of VGIC specific code to do the MMIO
dispatching.
Use the previous reworks to add kvm_io_bus style MMIO handlers.

Those are not yet called by the MMIO abort handler, also the actual
VGIC emulator function do not make use of it yet, but will be enabled
with the following patches.

Signed-off-by: Andre Przywara andre.przyw...@arm.com
---
 include/kvm/arm_vgic.h |9 
 virt/kvm/arm/vgic.c|  129 
 virt/kvm/arm/vgic.h|7 +++
 3 files changed, 145 insertions(+)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 9092fad..f90140c 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -24,6 +24,7 @@
 #include linux/irqreturn.h
 #include linux/spinlock.h
 #include linux/types.h
+#include kvm/iodev.h
 
 #define VGIC_NR_IRQS_LEGACY256
 #define VGIC_NR_SGIS   16
@@ -147,6 +148,14 @@ struct vgic_vm_ops {
int (*map_resources)(struct kvm *, const struct vgic_params *);
 };
 
+struct vgic_io_device {
+   gpa_t addr;
+   int len;
+   const struct vgic_io_range *reg_ranges;
+   struct kvm_vcpu *redist_vcpu;
+   struct kvm_io_device dev;
+};
+
 struct vgic_dist {
spinlock_t  lock;
boolin_kernel;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 8802ad7..e968179 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -32,6 +32,8 @@
 #include asm/kvm_arm.h
 #include asm/kvm_mmu.h
 #include trace/events/kvm.h
+#include asm/kvm.h
+#include kvm/iodev.h
 
 /*
  * How the whole thing works (courtesy of Christoffer Dall):
@@ -837,6 +839,66 @@ bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
 }
 
 /**
+ * vgic_handle_mmio_access - handle an in-kernel MMIO access
+ * This is called by the read/write KVM IO device wrappers below.
+ * @vcpu:  pointer to the vcpu performing the access
+ * @this:  pointer to the KVM IO device in charge
+ * @addr:  guest physical address of the access
+ * @len:   size of the access
+ * @val:   pointer to the data region
+ * @is_write:  read or write access
+ *
+ * returns true if the MMIO access could be performed
+ */
+static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
+  struct kvm_io_device *this, gpa_t addr,
+  int len, void *val, bool is_write)
+{
+   struct vgic_dist *dist = vcpu-kvm-arch.vgic;
+   struct vgic_io_device *iodev = container_of(this,
+   struct vgic_io_device, dev);
+   struct kvm_run *run = vcpu-run;
+   const struct vgic_io_range *range;
+   struct kvm_exit_mmio mmio;
+   bool updated_state;
+   gpa_t offset;
+
+   offset = addr - iodev-addr;
+   range = vgic_find_range(iodev-reg_ranges, len, offset);
+   if (unlikely(!range || !range-handle_mmio)) {
+   pr_warn(Unhandled access %d %08llx %d\n, is_write, addr, len);
+   return -ENXIO;
+   }
+
+   mmio.phys_addr = addr;
+   mmio.len = len;
+   mmio.is_write = is_write;
+   if (is_write)
+   memcpy(mmio.data, val, len);
+   mmio.private = iodev-redist_vcpu;
+
+   spin_lock(dist-lock);
+   offset -= range-base;
+   if (vgic_validate_access(dist, range, offset)) {
+   updated_state = call_range_handler(vcpu, mmio, offset, range);
+   if (!is_write)
+   memcpy(val, mmio.data, len);
+   } else {
+   if (!is_write)
+   memset(val, 0, len);
+   updated_state = false;
+   }
+   spin_unlock(dist-lock);
+   kvm_prepare_mmio(run, mmio);
+   kvm_handle_mmio_return(vcpu, run);
+
+   if (updated_state)
+   vgic_kick_vcpus(vcpu-kvm);
+
+   return 0;
+}
+
+/**
  * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation
  * @vcpu:  pointer to the vcpu performing the access
  * @run:   pointer to the kvm_run structure
@@ -860,6 +922,73 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
return vcpu-kvm-arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio);
 }
 
+static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu,
+struct kvm_io_device *this,
+gpa_t addr, int len, void *val)
+{
+   return vgic_handle_mmio_access(vcpu, this, addr, len, val, false);
+}
+
+static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
+ gpa_t addr, int len, const void *val)
+{
+   return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val,
+  true);
+}
+
+struct kvm_io_device_ops vgic_io_ops = {
+   .read   = vgic_handle_mmio_read,
+   .write  = vgic_handle_mmio_write,
+};
+
+/**
+ * 

[PATCH v2 05/12] KVM: arm/arm64: rename struct kvm_mmio_range to vgic_io_range

2015-03-23 Thread Andre Przywara
The name kvm_mmio_range is a bit bold, given that it only covers
the VGIC's MMIO ranges. To avoid confusion with kvm_io_range, rename
it to vgic_io_range.

Signed-off-by: Andre Przywara andre.przyw...@arm.com
Acked-by: Christoffer Dall christoffer.d...@linaro.org
---
 virt/kvm/arm/vgic-v2-emul.c |6 +++---
 virt/kvm/arm/vgic-v3-emul.c |8 
 virt/kvm/arm/vgic.c |   18 +-
 virt/kvm/arm/vgic.h |   12 ++--
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
index c818662..ddb3135 100644
--- a/virt/kvm/arm/vgic-v2-emul.c
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -319,7 +319,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
 }
 
-static const struct kvm_mmio_range vgic_dist_ranges[] = {
+static const struct vgic_io_range vgic_dist_ranges[] = {
{
.base   = GIC_DIST_CTRL,
.len= 12,
@@ -647,7 +647,7 @@ static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
  * CPU Interface Register accesses - these are not accessed by the VM, but by
  * user space for saving and restoring VGIC state.
  */
-static const struct kvm_mmio_range vgic_cpu_ranges[] = {
+static const struct vgic_io_range vgic_cpu_ranges[] = {
{
.base   = GIC_CPU_CTRL,
.len= 12,
@@ -674,7 +674,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
 struct kvm_device_attr *attr,
 u32 *reg, bool is_write)
 {
-   const struct kvm_mmio_range *r = NULL, *ranges;
+   const struct vgic_io_range *r = NULL, *ranges;
phys_addr_t offset;
int ret, cpuid, c;
struct kvm_vcpu *vcpu, *tmp_vcpu;
diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c
index b3f1546..14943e3 100644
--- a/virt/kvm/arm/vgic-v3-emul.c
+++ b/virt/kvm/arm/vgic-v3-emul.c
@@ -340,7 +340,7 @@ static bool handle_mmio_idregs(struct kvm_vcpu *vcpu,
return false;
 }
 
-static const struct kvm_mmio_range vgic_v3_dist_ranges[] = {
+static const struct vgic_io_range vgic_v3_dist_ranges[] = {
{
.base   = GICD_CTLR,
.len= 0x04,
@@ -570,7 +570,7 @@ static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu 
*vcpu,
return vgic_handle_cfg_reg(reg, mmio, offset);
 }
 
-static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = {
+static const struct vgic_io_range vgic_redist_sgi_ranges[] = {
{
.base   = GICR_IGROUPR0,
.len= 0x04,
@@ -676,7 +676,7 @@ static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu,
return false;
 }
 
-static const struct kvm_mmio_range vgic_redist_ranges[] = {
+static const struct vgic_io_range vgic_redist_ranges[] = {
{
.base   = GICR_CTLR,
.len= 0x04,
@@ -726,7 +726,7 @@ static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, 
struct kvm_run *run,
unsigned long rdbase = dist-vgic_redist_base;
int nrcpus = atomic_read(vcpu-kvm-online_vcpus);
int vcpu_id;
-   const struct kvm_mmio_range *mmio_range;
+   const struct vgic_io_range *mmio_range;
 
if (is_in_range(mmio-phys_addr, mmio-len, dbase, GIC_V3_DIST_SIZE)) {
return vgic_handle_mmio_range(vcpu, run, mmio,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index ffd937c..21a3550 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -712,11 +712,11 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 }
 
 const
-struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges,
-  struct kvm_exit_mmio *mmio,
-  phys_addr_t offset)
+struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
 {
-   const struct kvm_mmio_range *r = ranges;
+   const struct vgic_io_range *r = ranges;
 
while (r-len) {
if (offset = r-base 
@@ -729,7 +729,7 @@ struct kvm_mmio_range *vgic_find_range(const struct 
kvm_mmio_range *ranges,
 }
 
 static bool vgic_validate_access(const struct vgic_dist *dist,
-const struct kvm_mmio_range *range,
+const struct vgic_io_range *range,
 unsigned long offset)
 {
int irq;
@@ -757,7 +757,7 @@ static bool vgic_validate_access(const struct vgic_dist 
*dist,
 static bool call_range_handler(struct kvm_vcpu *vcpu,
   struct kvm_exit_mmio *mmio,
   unsigned long offset,
-  const struct 

[PATCH v2 02/12] KVM: move iodev.h from virt/kvm/ to include/kvm

2015-03-23 Thread Andre Przywara
iodev.h contains definitions for the kvm_io_bus framework. This is
needed both by the generic KVM code in virt/kvm as well as by
architecture specific code under arch/. Putting the header file in
virt/kvm and using local includes in the architecture part seems at
least dodgy to me, so let's move the file into include/kvm, so that a
more natural #include kvm/iodev.h can be used by all of the code.
This also solves a problem later when using struct kvm_io_device
in arm_vgic.h.
Fixing up the FSF address in the GPL header and a wrong include path
on the way.

Signed-off-by: Andre Przywara andre.przyw...@arm.com
Acked-by: Christoffer Dall christoffer.d...@linaro.org
---
 arch/powerpc/kvm/mpic.c   |2 +-
 arch/x86/kvm/i8254.h  |2 +-
 arch/x86/kvm/ioapic.h |2 +-
 arch/x86/kvm/irq.h|2 +-
 arch/x86/kvm/lapic.h  |2 +-
 include/kvm/iodev.h   |   76 
 virt/kvm/coalesced_mmio.c |2 +-
 virt/kvm/eventfd.c|2 +-
 virt/kvm/iodev.h  |   77 -
 virt/kvm/kvm_main.c   |2 +-
 10 files changed, 84 insertions(+), 85 deletions(-)
 create mode 100644 include/kvm/iodev.h
 delete mode 100644 virt/kvm/iodev.h

diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 8542f07..4703fad 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -34,7 +34,7 @@
 #include asm/kvm_para.h
 #include asm/kvm_host.h
 #include asm/kvm_ppc.h
-#include iodev.h
+#include kvm/iodev.h
 
 #define MAX_CPU 32
 #define MAX_SRC 256
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index dd1b16b..c84990b 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -3,7 +3,7 @@
 
 #include linux/kthread.h
 
-#include iodev.h
+#include kvm/iodev.h
 
 struct kvm_kpit_channel_state {
u32 count; /* can be 65536 */
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index c2e36d9..d9e02ca 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -3,7 +3,7 @@
 
 #include linux/kvm_host.h
 
-#include iodev.h
+#include kvm/iodev.h
 
 struct kvm;
 struct kvm_vcpu;
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2d03568..ad68c73 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -27,7 +27,7 @@
 #include linux/kvm_host.h
 #include linux/spinlock.h
 
-#include iodev.h
+#include kvm/iodev.h
 #include ioapic.h
 #include lapic.h
 
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 0bc6c65..e284c28 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -1,7 +1,7 @@
 #ifndef __KVM_X86_LAPIC_H
 #define __KVM_X86_LAPIC_H
 
-#include iodev.h
+#include kvm/iodev.h
 
 #include linux/kvm_host.h
 
diff --git a/include/kvm/iodev.h b/include/kvm/iodev.h
new file mode 100644
index 000..a6d208b
--- /dev/null
+++ b/include/kvm/iodev.h
@@ -0,0 +1,76 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#ifndef __KVM_IODEV_H__
+#define __KVM_IODEV_H__
+
+#include linux/kvm_types.h
+#include linux/errno.h
+
+struct kvm_io_device;
+struct kvm_vcpu;
+
+/**
+ * kvm_io_device_ops are called under kvm slots_lock.
+ * read and write handlers return 0 if the transaction has been handled,
+ * or non-zero to have it passed to the next device.
+ **/
+struct kvm_io_device_ops {
+   int (*read)(struct kvm_vcpu *vcpu,
+   struct kvm_io_device *this,
+   gpa_t addr,
+   int len,
+   void *val);
+   int (*write)(struct kvm_vcpu *vcpu,
+struct kvm_io_device *this,
+gpa_t addr,
+int len,
+const void *val);
+   void (*destructor)(struct kvm_io_device *this);
+};
+
+
+struct kvm_io_device {
+   const struct kvm_io_device_ops *ops;
+};
+
+static inline void kvm_iodevice_init(struct kvm_io_device *dev,
+const struct kvm_io_device_ops *ops)
+{
+   dev-ops = ops;
+}
+
+static inline int kvm_iodevice_read(struct kvm_vcpu *vcpu,
+   struct kvm_io_device *dev, gpa_t addr,
+   int l, void *v)
+{
+   return dev-ops-read ? dev-ops-read(vcpu, dev, addr, l, v)
+   : -EOPNOTSUPP;
+}
+
+static inline int kvm_iodevice_write(struct kvm_vcpu *vcpu,
+struct kvm_io_device 

[PATCH v2 06/12] KVM: mark kvm-buses as empty once they were destroyed

2015-03-23 Thread Andre Przywara
In kvm_destroy_vm() we call kvm_io_bus_destroy() pretty early,
especially before calling kvm_arch_destroy_vm(). To avoid
unregistering devices from the already destroyed bus, let's mark
the bus with NULL to let other users know it has been destroyed
already.
This avoids a crash on a VM shutdown with the VGIC using the
kvm_io_bus later (the unregistering is in there to be able to roll
back a faulting init).

Signed-off-by: Andre Przywara andre.przyw...@arm.com
---
 virt/kvm/kvm_main.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8c7ab0b..6f164eb 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -604,8 +604,10 @@ static void kvm_destroy_vm(struct kvm *kvm)
list_del(kvm-vm_list);
spin_unlock(kvm_lock);
kvm_free_irq_routing(kvm);
-   for (i = 0; i  KVM_NR_BUSES; i++)
+   for (i = 0; i  KVM_NR_BUSES; i++) {
kvm_io_bus_destroy(kvm-buses[i]);
+   kvm-buses[i] = NULL;
+   }
kvm_coalesced_mmio_free(kvm);
 #if defined(CONFIG_MMU_NOTIFIER)  defined(KVM_ARCH_WANT_MMU_NOTIFIER)
mmu_notifier_unregister(kvm-mmu_notifier, kvm-mm);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 03/12] KVM: arm/arm64: remove now unneeded include directory from Makefile

2015-03-23 Thread Andre Przywara
virt/kvm was never really a good include directory for anything else
than locally included headers.
With the move of iodev.h there is no need anymore to add this
directory the compiler's include path, so remove it from the arm and
arm64 kvm Makefile.

Signed-off-by: Andre Przywara andre.przyw...@arm.com
Acked-by: Christoffer Dall christoffer.d...@linaro.org
---
 arch/arm/kvm/Makefile   |2 +-
 arch/arm64/kvm/Makefile |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index a093bf1..139e46c 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -7,7 +7,7 @@ ifeq ($(plus_virt),+virt)
plus_virt_def := -DREQUIRES_VIRT=1
 endif
 
-ccflags-y += -Ivirt/kvm -Iarch/arm/kvm
+ccflags-y += -Iarch/arm/kvm
 CFLAGS_arm.o := -I. $(plus_virt_def)
 CFLAGS_mmu.o := -I.
 
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index b22c636..d5904f8 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm
+ccflags-y += -Iarch/arm64/kvm
 CFLAGS_arm.o := -I.
 CFLAGS_mmu.o := -I.
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 09/12] KVM: arm/arm64: prepare GICv2 emulation to be handled by kvm_io_bus

2015-03-23 Thread Andre Przywara
Using the framework provided by the recent vgic.c changes we register
a kvm_io_bus device when initializing the virtual GICv2.

Signed-off-by: Andre Przywara andre.przyw...@arm.com
---
 include/kvm/arm_vgic.h  |1 +
 virt/kvm/arm/vgic-v2-emul.c |   13 +
 virt/kvm/arm/vgic.c |   17 +
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index f90140c..4523984 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -251,6 +251,7 @@ struct vgic_dist {
unsigned long   *irq_active_on_cpu;
 
struct vgic_vm_ops  vm_ops;
+   struct vgic_io_device   dist_iodev;
 };
 
 struct vgic_v2_cpu_if {
diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
index 1dd183e..69f27c8 100644
--- a/virt/kvm/arm/vgic-v2-emul.c
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -506,6 +506,7 @@ static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int 
irq)
 static int vgic_v2_map_resources(struct kvm *kvm,
 const struct vgic_params *params)
 {
+   struct vgic_dist *dist = kvm-arch.vgic;
int ret = 0;
 
if (!irqchip_in_kernel(kvm))
@@ -516,13 +517,17 @@ static int vgic_v2_map_resources(struct kvm *kvm,
if (vgic_ready(kvm))
goto out;
 
-   if (IS_VGIC_ADDR_UNDEF(kvm-arch.vgic.vgic_dist_base) ||
-   IS_VGIC_ADDR_UNDEF(kvm-arch.vgic.vgic_cpu_base)) {
+   if (IS_VGIC_ADDR_UNDEF(dist-vgic_dist_base) ||
+   IS_VGIC_ADDR_UNDEF(dist-vgic_cpu_base)) {
kvm_err(Need to set vgic cpu and dist addresses first\n);
ret = -ENXIO;
goto out;
}
 
+   vgic_register_kvm_io_dev(kvm, dist-vgic_dist_base,
+KVM_VGIC_V2_DIST_SIZE,
+vgic_dist_ranges, -1, dist-dist_iodev);
+
/*
 * Initialize the vgic if this hasn't already been done on demand by
 * accessing the vgic state from userspace.
@@ -533,7 +538,7 @@ static int vgic_v2_map_resources(struct kvm *kvm,
goto out;
}
 
-   ret = kvm_phys_addr_ioremap(kvm, kvm-arch.vgic.vgic_cpu_base,
+   ret = kvm_phys_addr_ioremap(kvm, dist-vgic_cpu_base,
params-vcpu_base, KVM_VGIC_V2_CPU_SIZE,
true);
if (ret) {
@@ -541,7 +546,7 @@ static int vgic_v2_map_resources(struct kvm *kvm,
goto out;
}
 
-   kvm-arch.vgic.ready = true;
+   dist-ready = true;
 out:
if (ret)
kvm_vgic_destroy(kvm);
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index e968179..9a732d0 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -989,6 +989,21 @@ int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, 
int len,
return ret;
 }
 
+static void vgic_unregister_kvm_io_dev(struct kvm *kvm)
+{
+   struct vgic_dist *dist = kvm-arch.vgic;
+
+   if (!dist || !kvm-buses[KVM_MMIO_BUS])
+   return;
+
+   mutex_lock(kvm-slots_lock);
+   /* We could get here without ever having registered a device. */
+   if (dist-dist_iodev.dev.ops)
+   kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+ dist-dist_iodev.dev);
+   mutex_unlock(kvm-slots_lock);
+}
+
 static int vgic_nr_shared_irqs(struct vgic_dist *dist)
 {
return dist-nr_irqs - VGIC_NR_PRIVATE_IRQS;
@@ -1705,6 +1720,8 @@ void kvm_vgic_destroy(struct kvm *kvm)
struct kvm_vcpu *vcpu;
int i;
 
+   vgic_unregister_kvm_io_dev(kvm);
+
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_vgic_vcpu_destroy(vcpu);
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 11/12] KVM: arm/arm64: rework MMIO abort handling to use KVM MMIO bus

2015-03-23 Thread Andre Przywara
Currently we have struct kvm_exit_mmio for encapsulating MMIO abort
data to be passed on from syndrome decoding all the way down to the
VGIC register handlers. Now as we switch the MMIO handling to be
routed through the KVM MMIO bus, it does not make sense anymore to
use that structure already from the beginning. So we put the data into
kvm_run very early and use that encapsulation till the MMIO bus call.
Then we fill kvm_exit_mmio in the VGIC only, making it a VGIC private
structure. On that way we replace the data buffer in that structure
with a pointer pointing to a single location in kvm_run, so we get
rid of some copying on the way.
I didn't bother to rename kvm_exit_mmio (to vgic_mmio or something),
because that touches a lot of code lines without any good reason.

This is based on an original patch by Nikolay.

Signed-off-by: Andre Przywara andre.przyw...@arm.com
Cc: Nikolay Nikolaev n.nikol...@virtualopensystems.com
---
 arch/arm/include/asm/kvm_mmio.h   |   22 --
 arch/arm/kvm/mmio.c   |   60 ++---
 arch/arm64/include/asm/kvm_mmio.h |   22 --
 include/kvm/arm_vgic.h|3 --
 virt/kvm/arm/vgic.c   |   18 +++
 virt/kvm/arm/vgic.h   |8 +
 6 files changed, 55 insertions(+), 78 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h
index 3f83db2..d8e90c8 100644
--- a/arch/arm/include/asm/kvm_mmio.h
+++ b/arch/arm/include/asm/kvm_mmio.h
@@ -28,28 +28,6 @@ struct kvm_decode {
bool sign_extend;
 };
 
-/*
- * The in-kernel MMIO emulation code wants to use a copy of run-mmio,
- * which is an anonymous type. Use our own type instead.
- */
-struct kvm_exit_mmio {
-   phys_addr_t phys_addr;
-   u8  data[8];
-   u32 len;
-   boolis_write;
-   void*private;
-};
-
-static inline void kvm_prepare_mmio(struct kvm_run *run,
-   struct kvm_exit_mmio *mmio)
-{
-   run-mmio.phys_addr = mmio-phys_addr;
-   run-mmio.len   = mmio-len;
-   run-mmio.is_write  = mmio-is_write;
-   memcpy(run-mmio.data, mmio-data, mmio-len);
-   run-exit_reason= KVM_EXIT_MMIO;
-}
-
 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 phys_addr_t fault_ipa);
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 5d3bfc0..bb2ab44 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -122,7 +122,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 }
 
 static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
- struct kvm_exit_mmio *mmio)
+ struct kvm_run *run)
 {
unsigned long rt;
int len;
@@ -148,9 +148,9 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t 
fault_ipa,
sign_extend = kvm_vcpu_dabt_issext(vcpu);
rt = kvm_vcpu_dabt_get_rd(vcpu);
 
-   mmio-is_write = is_write;
-   mmio-phys_addr = fault_ipa;
-   mmio-len = len;
+   run-mmio.is_write = is_write;
+   run-mmio.phys_addr = fault_ipa;
+   run-mmio.len = len;
vcpu-arch.mmio_decode.sign_extend = sign_extend;
vcpu-arch.mmio_decode.rt = rt;
 
@@ -162,23 +162,49 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t 
fault_ipa,
return 0;
 }
 
+/**
+ * handle_kernel_mmio - handle an in-kernel MMIO access
+ * @vcpu:  pointer to the vcpu performing the access
+ * @run:   pointer to the kvm_run structure
+ *
+ * returns true if the MMIO access has been performed in kernel space,
+ * and false if it needs to be emulated in user space.
+ */
+static bool handle_kernel_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+   int ret;
+
+   if (run-mmio.is_write) {
+   ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run-mmio.phys_addr,
+  run-mmio.len, run-mmio.data);
+
+   } else {
+   ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run-mmio.phys_addr,
+ run-mmio.len, run-mmio.data);
+   }
+   if (!ret) {
+   kvm_handle_mmio_return(vcpu, run);
+   return true;
+   }
+
+   return false;
+}
+
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 phys_addr_t fault_ipa)
 {
-   struct kvm_exit_mmio mmio;
unsigned long data;
unsigned long rt;
int ret;
 
/*
-* Prepare MMIO operation. First stash it in a private
-* structure that we can use for in-kernel emulation. If the
-* kernel can't handle it, copy it into run-mmio and let user
-* space do its magic.
+* Prepare MMIO operation. First put the MMIO data into run-mmio.
+* Then try if some in-kernel emulation feels 

[PATCH v2 01/12] KVM: Redesign kvm_io_bus_ API to pass VCPU structure to the callbacks.

2015-03-23 Thread Andre Przywara
From: Nikolay Nikolaev n.nikol...@virtualopensystems.com

This is needed in e.g. ARM vGIC emulation, where the MMIO handling
depends on the VCPU that does the access.

Signed-off-by: Nikolay Nikolaev n.nikol...@virtualopensystems.com
Signed-off-by: Andre Przywara andre.przyw...@arm.com
Acked-by: Paolo Bonzini pbonz...@redhat.com
Acked-by: Christoffer Dall christoffer.d...@linaro.org
---
 arch/powerpc/kvm/mpic.c|   10 ++
 arch/powerpc/kvm/powerpc.c |4 ++--
 arch/s390/kvm/diag.c   |2 +-
 arch/x86/kvm/i8254.c   |   14 +-
 arch/x86/kvm/i8259.c   |   12 ++--
 arch/x86/kvm/ioapic.c  |8 
 arch/x86/kvm/lapic.c   |4 ++--
 arch/x86/kvm/vmx.c |2 +-
 arch/x86/kvm/x86.c |   13 +++--
 include/linux/kvm_host.h   |   10 +-
 virt/kvm/coalesced_mmio.c  |5 +++--
 virt/kvm/eventfd.c |4 ++--
 virt/kvm/iodev.h   |   23 +++
 virt/kvm/kvm_main.c|   32 
 14 files changed, 79 insertions(+), 64 deletions(-)

diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 39b3a8f..8542f07 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -1374,8 +1374,9 @@ static int kvm_mpic_write_internal(struct openpic *opp, 
gpa_t addr, u32 val)
return -ENXIO;
 }
 
-static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
-int len, void *ptr)
+static int kvm_mpic_read(struct kvm_vcpu *vcpu,
+struct kvm_io_device *this,
+gpa_t addr, int len, void *ptr)
 {
struct openpic *opp = container_of(this, struct openpic, mmio);
int ret;
@@ -1415,8 +1416,9 @@ static int kvm_mpic_read(struct kvm_io_device *this, 
gpa_t addr,
return ret;
 }
 
-static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
- int len, const void *ptr)
+static int kvm_mpic_write(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
+ gpa_t addr, int len, const void *ptr)
 {
struct openpic *opp = container_of(this, struct openpic, mmio);
int ret;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 27c0fac..24bfe40 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -807,7 +807,7 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu 
*vcpu,
 
idx = srcu_read_lock(vcpu-kvm-srcu);
 
-   ret = kvm_io_bus_read(vcpu-kvm, KVM_MMIO_BUS, run-mmio.phys_addr,
+   ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run-mmio.phys_addr,
  bytes, run-mmio.data);
 
srcu_read_unlock(vcpu-kvm-srcu, idx);
@@ -880,7 +880,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
 
idx = srcu_read_lock(vcpu-kvm-srcu);
 
-   ret = kvm_io_bus_write(vcpu-kvm, KVM_MMIO_BUS, run-mmio.phys_addr,
+   ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run-mmio.phys_addr,
   bytes, run-mmio.data);
 
srcu_read_unlock(vcpu-kvm-srcu, idx);
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 9254aff..329ec75 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -213,7 +213,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 * - gpr 3 contains the virtqueue index (passed as datamatch)
 * - gpr 4 contains the index on the bus (optionally)
 */
-   ret = kvm_io_bus_write_cookie(vcpu-kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
+   ret = kvm_io_bus_write_cookie(vcpu, KVM_VIRTIO_CCW_NOTIFY_BUS,
  vcpu-run-s.regs.gprs[2]  0x,
  8, vcpu-run-s.regs.gprs[3],
  vcpu-run-s.regs.gprs[4]);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 298781d..4dce6f8 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -443,7 +443,8 @@ static inline int pit_in_range(gpa_t addr)
(addr  KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
 }
 
-static int pit_ioport_write(struct kvm_io_device *this,
+static int pit_ioport_write(struct kvm_vcpu *vcpu,
+   struct kvm_io_device *this,
gpa_t addr, int len, const void *data)
 {
struct kvm_pit *pit = dev_to_pit(this);
@@ -519,7 +520,8 @@ static int pit_ioport_write(struct kvm_io_device *this,
return 0;
 }
 
-static int pit_ioport_read(struct kvm_io_device *this,
+static int pit_ioport_read(struct kvm_vcpu *vcpu,
+  struct kvm_io_device *this,
   gpa_t addr, int len, void *data)
 {
struct kvm_pit *pit = dev_to_pit(this);
@@ -589,7 +591,8 @@ static int pit_ioport_read(struct kvm_io_device *this,
return 0;
 }
 
-static int speaker_ioport_write(struct kvm_io_device *this,
+static int 

[PATCH v2 12/12] KVM: arm/arm64: remove now obsolete VGIC specific MMIO handling code

2015-03-23 Thread Andre Przywara
With all of the virtual GIC emulation code now being registered with
the kvm_io_bus, we can remove all of the old MMIO handling code and
its dispatching functionality.

Signed-off-by: Andre Przywara andre.przyw...@arm.com
---
 include/kvm/arm_vgic.h  |2 --
 virt/kvm/arm/vgic-v2-emul.c |   19 
 virt/kvm/arm/vgic-v3-emul.c |   39 
 virt/kvm/arm/vgic.c |   71 ---
 virt/kvm/arm/vgic.h |5 ---
 5 files changed, 136 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 14853d8..9503664 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -140,8 +140,6 @@ struct vgic_params {
 };
 
 struct vgic_vm_ops {
-   bool(*handle_mmio)(struct kvm_vcpu *, struct kvm_run *,
-  struct kvm_exit_mmio *);
bool(*queue_sgi)(struct kvm_vcpu *, int irq);
void(*add_sgi_source)(struct kvm_vcpu *, int irq, int source);
int (*init_model)(struct kvm *);
diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
index 69f27c8..12cb1361 100644
--- a/virt/kvm/arm/vgic-v2-emul.c
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -404,24 +404,6 @@ static const struct vgic_io_range vgic_dist_ranges[] = {
{}
 };
 
-static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-   struct kvm_exit_mmio *mmio)
-{
-   unsigned long base = vcpu-kvm-arch.vgic.vgic_dist_base;
-
-   if (!is_in_range(mmio-phys_addr, mmio-len, base,
-KVM_VGIC_V2_DIST_SIZE))
-   return false;
-
-   /* GICv2 does not support accesses wider than 32 bits */
-   if (mmio-len  4) {
-   kvm_inject_dabt(vcpu, mmio-phys_addr);
-   return true;
-   }
-
-   return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base);
-}
-
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 {
struct kvm *kvm = vcpu-kvm;
@@ -575,7 +557,6 @@ void vgic_v2_init_emulation(struct kvm *kvm)
 {
struct vgic_dist *dist = kvm-arch.vgic;
 
-   dist-vm_ops.handle_mmio = vgic_v2_handle_mmio;
dist-vm_ops.queue_sgi = vgic_v2_queue_sgi;
dist-vm_ops.add_sgi_source = vgic_v2_add_sgi_source;
dist-vm_ops.init_model = vgic_v2_init_model;
diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c
index 35679d1..4d443a07 100644
--- a/virt/kvm/arm/vgic-v3-emul.c
+++ b/virt/kvm/arm/vgic-v3-emul.c
@@ -710,45 +710,7 @@ static const struct vgic_io_range vgic_redist_ranges[] = {
{},
 };
 
-/*
- * This function splits accesses between the distributor and the two
- * redistributor parts (private/SPI). As each redistributor is accessible
- * from any CPU, we have to determine the affected VCPU by taking the faulting
- * address into account. We then pass this VCPU to the handler function via
- * the private parameter.
- */
 #define SGI_BASE_OFFSET SZ_64K
-static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-   struct kvm_exit_mmio *mmio)
-{
-   struct vgic_dist *dist = vcpu-kvm-arch.vgic;
-   unsigned long dbase = dist-vgic_dist_base;
-   unsigned long rdbase = dist-vgic_redist_base;
-   int nrcpus = atomic_read(vcpu-kvm-online_vcpus);
-   int vcpu_id;
-   const struct vgic_io_range *mmio_range;
-
-   if (is_in_range(mmio-phys_addr, mmio-len, dbase, GIC_V3_DIST_SIZE)) {
-   return vgic_handle_mmio_range(vcpu, run, mmio,
- vgic_v3_dist_ranges, dbase);
-   }
-
-   if (!is_in_range(mmio-phys_addr, mmio-len, rdbase,
-   GIC_V3_REDIST_SIZE * nrcpus))
-   return false;
-
-   vcpu_id = (mmio-phys_addr - rdbase) / GIC_V3_REDIST_SIZE;
-   rdbase += (vcpu_id * GIC_V3_REDIST_SIZE);
-   mmio-private = kvm_get_vcpu(vcpu-kvm, vcpu_id);
-
-   if (mmio-phys_addr = rdbase + SGI_BASE_OFFSET) {
-   rdbase += SGI_BASE_OFFSET;
-   mmio_range = vgic_redist_sgi_ranges;
-   } else {
-   mmio_range = vgic_redist_ranges;
-   }
-   return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase);
-}
 
 static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 {
@@ -864,7 +826,6 @@ void vgic_v3_init_emulation(struct kvm *kvm)
 {
struct vgic_dist *dist = kvm-arch.vgic;
 
-   dist-vm_ops.handle_mmio = vgic_v3_handle_mmio;
dist-vm_ops.queue_sgi = vgic_v3_queue_sgi;
dist-vm_ops.add_sgi_source = vgic_v3_add_sgi_source;
dist-vm_ops.init_model = vgic_v3_init_model;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 2598fe8..df25cf8 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -785,53 +785,6 @@ static bool call_range_handler(struct kvm_vcpu *vcpu,
 }
 
 /**
- * vgic_handle_mmio_range - handle an in-kernel MMIO access
- * @vcpu:  pointer to 

[PATCH v2 00/12] KVM: arm/arm64: move VGIC MMIO to kvm_io_bus

2015-03-23 Thread Andre Przywara
This series converts the VGIC MMIO handling routines to the generic
kvm_io_bus framework. The framework is needed for the ioeventfd
functionality, some people on the list wanted to see the VGIC
converted over to use it, too.
Beside from now moving to a generic framework instead of relying on
an ARM specific one we also clean up quite some code and get rid of
some unnecessary copying.
On that way the MMIO abort handling for ARM has changed quite a bit,
so please have a closer look and test it on your setup if possible.

Based on the v1 review I addressed Christoffer's minor comments, but
also heavily changed [11/12]: KVM: ARM: on IO mem abort - route the
call to KVM MMIO bus to get rid of the now unnecessary copying and
the usage of kvm_exit_mmio in that early stage. See the respective
commit message for more details.

The series is loosely based on Nikolay's work[1], thanks especially
for the tedious first patch.
I totally reworked Nikolay's 3/5 to avoid adding another MMIO handling
layer on top of the already quite convoluted VGIC MMIO handling.
Also Nikolay's 2/5 get extended and changed significantly, that's why
I dropped his Signed-off-by.

Unfortunately kvm_io_bus lacks an opaque pointer to pass in some data,
so I worked around this by using container_of.
Now for every struct kvm_mmio_range array a KVM I/O device is
registered (one for VGICv2, 2*nr_vcpus + 1 for VGICv3), using the
struct kvm_io_device variable as an anchor into the new
struct vgic_io_device. This one holds the base address, the
vgic_io_range pointer and (in case of the GICv3 redistributor) the
associated vCPU, so that we can access all instance-specific data
easily.

Patch 2 moves the iodev.h header file around, that solves a problem
when embedding a struct in arm_vgic.h later. That looks like a nice
cleanup anyway, so I added two patches to remove the compiler switch
to add virt/kvm as a include directory. This has been tested for
arm/arm64 and x86. As soon as I get around to compile-test the other
architectures, I can send out the respective patches for those, too.

Patches 5-7 tweak the existing code a bit to make it fit for the
conversion.
Patch 8 contains the framework for the new handling, while
patch 9 and 10 enable the GICv2 and GICv3 emulation, respectively.
Patch 11 finally switches over to the new kvm_io_bus handling,
reworking the early ARM KVM MMIO handling quite a bit. Patch 12
removes the now unneeded code. I split this up to ease reviewing, I
could merge patches as well if needed.

The series goes on top of the kvmarm.git/next branch and was briefly
tested on an arm64 model with a GICv2 and a GICv3 guest and on Midway
(GICv2 guest).

Cheers,
Andre.

[1] https://lists.cs.columbia.edu/pipermail/kvmarm/2015-January/013379.html

Andre Przywara (11):
  KVM: move iodev.h from virt/kvm/ to include/kvm
  KVM: arm/arm64: remove now unneeded include directory from Makefile
  KVM: x86: remove now unneeded include directory from Makefile
  KVM: arm/arm64: rename struct kvm_mmio_range to vgic_io_range
  KVM: mark kvm-buses as empty once they were destroyed
  KVM: arm/arm64: simplify vgic_find_range() and callers
  KVM: arm/arm64: implement kvm_io_bus MMIO handling for the VGIC
  KVM: arm/arm64: prepare GICv2 emulation to be handled by kvm_io_bus
  KVM: arm/arm64: prepare GICv3 emulation to use kvm_io_bus MMIO
handling
  KVM: arm/arm64: rework MMIO abort handling to use KVM MMIO bus
  KVM: arm/arm64: remove now obsolete VGIC specific MMIO handling code

Nikolay Nikolaev (1):
  KVM: Redesign kvm_io_bus_ API to pass VCPU structure to the
callbacks.

 arch/arm/include/asm/kvm_mmio.h   |   22 
 arch/arm/kvm/Makefile |2 +-
 arch/arm/kvm/mmio.c   |   60 ---
 arch/arm64/include/asm/kvm_mmio.h |   22 
 arch/arm64/kvm/Makefile   |2 +-
 arch/powerpc/kvm/mpic.c   |   12 ++-
 arch/powerpc/kvm/powerpc.c|4 +-
 arch/s390/kvm/diag.c  |2 +-
 arch/x86/kvm/Makefile |2 +-
 arch/x86/kvm/i8254.c  |   14 ++-
 arch/x86/kvm/i8254.h  |2 +-
 arch/x86/kvm/i8259.c  |   12 +--
 arch/x86/kvm/ioapic.c |8 +-
 arch/x86/kvm/ioapic.h |2 +-
 arch/x86/kvm/irq.h|2 +-
 arch/x86/kvm/lapic.c  |4 +-
 arch/x86/kvm/lapic.h  |2 +-
 arch/x86/kvm/vmx.c|2 +-
 arch/x86/kvm/x86.c|   13 +--
 include/kvm/arm_vgic.h|   16 ++-
 include/kvm/iodev.h   |   76 +
 include/linux/kvm_host.h  |   10 +-
 virt/kvm/arm/vgic-v2-emul.c   |   40 +++
 virt/kvm/arm/vgic-v3-emul.c   |   79 +++---
 virt/kvm/arm/vgic.c   |  211 +
 virt/kvm/arm/vgic.h   |   29 +++--
 virt/kvm/coalesced_mmio.c |7 +-
 virt/kvm/eventfd.c|6 +-
 virt/kvm/iodev.h  |   70 
 

[PATCH v2 04/12] KVM: x86: remove now unneeded include directory from Makefile

2015-03-23 Thread Andre Przywara
virt/kvm was never really a good include directory for anything else
than locally included headers.
With the move of iodev.h there is no need anymore to add this
directory the compiler's include path, so remove it from the x86 kvm
Makefile.

Signed-off-by: Andre Przywara andre.przyw...@arm.com
---
 arch/x86/kvm/Makefile |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 08f790d..16e8f96 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -1,5 +1,5 @@
 
-ccflags-y += -Ivirt/kvm -Iarch/x86/kvm
+ccflags-y += -Iarch/x86/kvm
 
 CFLAGS_x86.o := -I.
 CFLAGS_svm.o := -I.
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 07/12] KVM: arm/arm64: simplify vgic_find_range() and callers

2015-03-23 Thread Andre Przywara
The vgic_find_range() function in vgic.c takes a struct kvm_exit_mmio
argument, but actually only used the length field in there. Since we
need to get rid of that structure in that part of the code anyway,
let's rework the function (and it's callers) to pass the length
argument to the function directly.

Signed-off-by: Andre Przywara andre.przyw...@arm.com
Reviewed-by: Christoffer Dall christoffer.d...@linaro.org
---
 virt/kvm/arm/vgic-v2-emul.c |2 +-
 virt/kvm/arm/vgic.c |   22 --
 virt/kvm/arm/vgic.h |3 +--
 3 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c
index ddb3135..1dd183e 100644
--- a/virt/kvm/arm/vgic-v2-emul.c
+++ b/virt/kvm/arm/vgic-v2-emul.c
@@ -715,7 +715,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
default:
BUG();
}
-   r = vgic_find_range(ranges, mmio, offset);
+   r = vgic_find_range(ranges, 4, offset);
 
if (unlikely(!r || !r-handle_mmio)) {
ret = -ENXIO;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 21a3550..8802ad7 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -713,16 +713,13 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 
 const
 struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
- struct kvm_exit_mmio *mmio,
- phys_addr_t offset)
+ int len, gpa_t offset)
 {
-   const struct vgic_io_range *r = ranges;
-
-   while (r-len) {
-   if (offset = r-base 
-   (offset + mmio-len) = (r-base + r-len))
-   return r;
-   r++;
+   while (ranges-len) {
+   if (offset = ranges-base 
+   (offset + len) = (ranges-base + ranges-len))
+   return ranges;
+   ranges++;
}
 
return NULL;
@@ -813,7 +810,7 @@ bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct 
kvm_run *run,
unsigned long offset;
 
offset = mmio-phys_addr - mmio_base;
-   range = vgic_find_range(ranges, mmio, offset);
+   range = vgic_find_range(ranges, mmio-len, offset);
if (unlikely(!range || !range-handle_mmio)) {
pr_warn(Unhandled access %d %08llx %d\n,
mmio-is_write, mmio-phys_addr, mmio-len);
@@ -1986,10 +1983,7 @@ int vgic_get_common_attr(struct kvm_device *dev, struct 
kvm_device_attr *attr)
 
 int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset)
 {
-   struct kvm_exit_mmio dev_attr_mmio;
-
-   dev_attr_mmio.len = 4;
-   if (vgic_find_range(ranges, dev_attr_mmio, offset))
+   if (vgic_find_range(ranges, 4, offset))
return 0;
else
return -ENXIO;
diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h
index 6fccb96..01aa622 100644
--- a/virt/kvm/arm/vgic.h
+++ b/virt/kvm/arm/vgic.h
@@ -90,8 +90,7 @@ static inline bool is_in_range(phys_addr_t addr, unsigned 
long len,
 
 const
 struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
- struct kvm_exit_mmio *mmio,
- phys_addr_t offset);
+ int len, gpa_t offset);
 
 bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
struct kvm_exit_mmio *mmio,
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 10/12] KVM: arm/arm64: prepare GICv3 emulation to use kvm_io_bus MMIO handling

2015-03-23 Thread Andre Przywara
Using the framework provided by the recent vgic.c changes, we
register a kvm_io_bus device on mapping the virtual GICv3 resources.
The distributor mapping is pretty straight forward, but the
redistributors need some more love, since they need to be tagged with
the respective redistributor (read: VCPU) they are connected with.
We use the kvm_io_bus framework to register two devices per VCPU, as
each block is handled independently by the VGIC code.

Signed-off-by: Andre Przywara andre.przyw...@arm.com
---
 include/kvm/arm_vgic.h  |1 +
 virt/kvm/arm/vgic-v3-emul.c |   34 +-
 virt/kvm/arm/vgic.c |   18 ++
 3 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 4523984..d6705f4 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -252,6 +252,7 @@ struct vgic_dist {
 
struct vgic_vm_ops  vm_ops;
struct vgic_io_device   dist_iodev;
+   struct vgic_io_device   *redist_iodevs;
 };
 
 struct vgic_v2_cpu_if {
diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c
index 14943e3..35679d1 100644
--- a/virt/kvm/arm/vgic-v3-emul.c
+++ b/virt/kvm/arm/vgic-v3-emul.c
@@ -766,6 +766,9 @@ static int vgic_v3_map_resources(struct kvm *kvm,
 {
int ret = 0;
struct vgic_dist *dist = kvm-arch.vgic;
+   gpa_t rdbase = dist-vgic_redist_base;
+   struct vgic_io_device *iodevs = NULL;
+   int i;
 
if (!irqchip_in_kernel(kvm))
return 0;
@@ -791,7 +794,36 @@ static int vgic_v3_map_resources(struct kvm *kvm,
goto out;
}
 
-   kvm-arch.vgic.ready = true;
+   ret = vgic_register_kvm_io_dev(kvm, dist-vgic_dist_base,
+  GIC_V3_DIST_SIZE, vgic_v3_dist_ranges,
+  -1, dist-dist_iodev);
+   if (ret)
+   goto out;
+
+   iodevs = kcalloc(dist-nr_cpus * 2, sizeof(iodevs[0]), GFP_KERNEL);
+   if (!iodevs) {
+   ret = -ENOMEM;
+   goto out;
+   }
+
+   /* kvm_vgic_destroy() will take care of destroying the devices later. */
+   for (i = 0; i  dist-nr_cpus; i++) {
+   ret = vgic_register_kvm_io_dev(kvm, rdbase,
+  SZ_64K, vgic_redist_ranges,
+  i, iodevs[i * 2]);
+   if (ret)
+   goto out;
+   ret = vgic_register_kvm_io_dev(kvm, rdbase + SGI_BASE_OFFSET,
+  SZ_64K, vgic_redist_sgi_ranges,
+  i, iodevs[i * 2 + 1]);
+   if (ret)
+   goto out;
+   rdbase += GIC_V3_REDIST_SIZE;
+   }
+
+   dist-redist_iodevs = iodevs;
+   dist-ready = true;
+
 out:
if (ret)
kvm_vgic_destroy(kvm);
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 9a732d0..9cbb55f4 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -992,6 +992,8 @@ int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, 
int len,
 static void vgic_unregister_kvm_io_dev(struct kvm *kvm)
 {
struct vgic_dist *dist = kvm-arch.vgic;
+   struct vgic_io_device *iodevs;
+   int i;
 
if (!dist || !kvm-buses[KVM_MMIO_BUS])
return;
@@ -1001,6 +1003,22 @@ static void vgic_unregister_kvm_io_dev(struct kvm *kvm)
if (dist-dist_iodev.dev.ops)
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
  dist-dist_iodev.dev);
+
+   iodevs = dist-redist_iodevs;
+   if (iodevs) {
+   for (i = 0; i  dist-nr_cpus * 2; i++) {
+   /*
+* Because of a failed initialization we could get here
+* without ever having registered a device.
+*/
+   if (!iodevs[i].dev.ops)
+   continue;
+   kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+ iodevs[i].dev);
+   }
+   kfree(iodevs);
+   dist-redist_iodevs = NULL;
+   }
mutex_unlock(kvm-slots_lock);
 }
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RESEND PATCH] KVM: PPC: Book3S HV: Deliver machine check with MSR(RI=0) to guest as MCE.

2015-03-23 Thread Mahesh J Salgaonkar
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com

For the machine check interrupt that happens while we are in the guest,
kvm layer attempts the recovery, and then delivers the machine check interrupt
directly to the guest if recovery fails. On successful recovery we go back to
normal functioning of the guest. But there can be cases where a machine check
interrupt can happen with MSR(RI=0) while we are in the guest. This means
MC interrupt is unrecoverable and we have to deliver a machine check to the
guest since the machine check interrupt might have trashed valid values in
SRR0/1. The current implementation do not handle this case, causing guest
to crash with Bad kernel stack pointer instead of machine check oops message.

[26281.490060] Bad kernel stack pointer 3fff9ccce5b0 at c000490c
[26281.490434] Oops: Bad kernel stack pointer, sig: 6 [#1]
[26281.490472] SMP NR_CPUS=2048 NUMA pSeries

This patch fixes this issue by checking MSR(RI=0) in KVM layer and forwarding
unrecoverable interrupt to guest which then panics with proper machine check
Oops message.

Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com
Acked-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |   12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bb94e6f..258f46d 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2063,7 +2063,6 @@ machine_check_realmode:
mr  r3, r9  /* get vcpu pointer */
bl  kvmppc_realmode_machine_check
nop
-   cmpdi   r3, 0   /* Did we handle MCE ? */
ld  r9, HSTATE_KVM_VCPU(r13)
li  r12, BOOK3S_INTERRUPT_MACHINE_CHECK
/*
@@ -2076,13 +2075,18 @@ machine_check_realmode:
 * The old code used to return to host for unhandled errors which
 * was causing guest to hang with soft lockups inside guest and
 * makes it difficult to recover guest instance.
+*
+* if we receive machine check with MSR(RI=0) then deliver it to
+* guest as machine check causing guest to crash.
 */
-   ld  r10, VCPU_PC(r9)
ld  r11, VCPU_MSR(r9)
+   andi.   r10, r11, MSR_RI/* check for unrecoverable exception */
+   beq 1f  /* Deliver a machine check to guest */
+   ld  r10, VCPU_PC(r9)
+   cmpdi   r3, 0   /* Did we handle MCE ? */
bne 2f  /* Continue guest execution. */
/* If not, deliver a machine check.  SRR0/1 are already set */
-   li  r10, BOOK3S_INTERRUPT_MACHINE_CHECK
-   ld  r11, VCPU_MSR(r9)
+1: li  r10, BOOK3S_INTERRUPT_MACHINE_CHECK
bl  kvmppc_msr_interrupt
 2: b   fast_interrupt_c_return
 

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 2/2] drivers/vfio: Support EEH error injection

2015-03-23 Thread Alex Williamson
On Mon, 2015-03-23 at 16:20 +1100, Gavin Shan wrote:
 On Mon, Mar 23, 2015 at 04:10:20PM +1100, David Gibson wrote:
 On Mon, Mar 23, 2015 at 04:03:59PM +1100, Gavin Shan wrote:
  On Mon, Mar 23, 2015 at 02:43:03PM +1100, David Gibson wrote:
  On Mon, Mar 23, 2015 at 12:56:36PM +1100, Gavin Shan wrote:
   On Mon, Mar 23, 2015 at 12:39:45PM +1100, David Gibson wrote:
   On Sat, Mar 21, 2015 at 06:58:45AM +1100, Gavin Shan wrote:
The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
to inject the specified EEH error, which is represented by
(struct vfio_eeh_pe_err), to the indicated PE for testing purpose.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
   
   Reviewed-by: David Gibson da...@gibson.dropbear.id.au
   
---
 Documentation/vfio.txt| 12 
 drivers/vfio/vfio_spapr_eeh.c | 10 ++
 include/uapi/linux/vfio.h | 36 
+++-
 3 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index 96978ec..c6e11a3 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -385,6 +385,18 @@ The code flow from the example above should be 
slightly changed:
 
 
 
+/* Inject EEH error, which is expected to be caused by 32-bits
+ * config load.
+ */
+pe_op.op = VFIO_EEH_PE_INJECT_ERR;
+pe_op.err.type = VFIO_EEH_ERR_TYPE_32;
+pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR;
+pe_op.err.addr = 0ul;
+pe_op.err.mask = 0ul;
+ioctl(container, VFIO_EEH_PE_OP, pe_op);
+
+
+
 /* When 0xFF's returned from reading PCI config space or IO BARs
  * of the PCI device. Check the PE's state to see if that has 
been
  * frozen.
diff --git a/drivers/vfio/vfio_spapr_eeh.c 
b/drivers/vfio/vfio_spapr_eeh.c
index 5fa42db..38edeb4 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct 
iommu_group *group,
 case VFIO_EEH_PE_CONFIGURE:
 ret = eeh_pe_configure(pe);
 break;
+case VFIO_EEH_PE_INJECT_ERR:
+minsz = offsetofend(struct vfio_eeh_pe_op, 
err.mask);
+if (op.argsz  minsz)
+return -EINVAL;
+if (copy_from_user(op, (void __user *)arg, 
minsz))
+return -EFAULT;
+
+ret = eeh_pe_inject_err(pe, op.err.type, 
op.err.func,
+op.err.addr, 
op.err.mask);
+break;
 default:
 ret = -EINVAL;
 }
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 82889c3..f68e962 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info {
  * - unfreeze IO/DMA for frozen PE;
  * - read PE state;
  * - reset PE;
- * - configure PE.
+ * - configure PE;
+ * - inject EEH error.
  */
+struct vfio_eeh_pe_err {
+__u32 type;
+__u32 func;
+__u64 addr;
+__u64 mask;
+};
+
 struct vfio_eeh_pe_op {
 __u32 argsz;
 __u32 flags;
 __u32 op;
+union {
+struct vfio_eeh_pe_err err;
+};
 };
 
 #define VFIO_EEH_PE_DISABLE 0   /* Disable EEH 
functionality */
@@ -490,6 +501,29 @@ struct vfio_eeh_pe_op {
 #define VFIO_EEH_PE_RESET_HOT   6   /* Assert hot 
reset  */
 #define VFIO_EEH_PE_RESET_FUNDAMENTAL   7   /* Assert 
fundamental reset  */
 #define VFIO_EEH_PE_CONFIGURE   8   /* PE 
configuration  */
+#define VFIO_EEH_PE_INJECT_ERR  9   /* Inject EEH 
error  */
+#define  VFIO_EEH_ERR_TYPE_32   0   /* 32-bits EEH 
error type*/
+#define  VFIO_EEH_ERR_TYPE_64   1   /* 64-bits EEH 
error type*/
+#define  VFIO_EEH_ERR_FUNC_LD_MEM_ADDR  0   /* 
Memory load  */
+#define  VFIO_EEH_ERR_FUNC_LD_MEM_DATA  1
+#define  VFIO_EEH_ERR_FUNC_LD_IO_ADDR   2   /* IO 
load  */
+#define  VFIO_EEH_ERR_FUNC_LD_IO_DATA   3
+#define  VFIO_EEH_ERR_FUNC_LD_CFG_ADDR  4   /* 
Config load  */
+#define  VFIO_EEH_ERR_FUNC_LD_CFG_DATA  5
+#define  VFIO_EEH_ERR_FUNC_ST_MEM_ADDR  6   /* 
Memory store */
+#define  VFIO_EEH_ERR_FUNC_ST_MEM_DATA  7
+#define  VFIO_EEH_ERR_FUNC_ST_IO_ADDR   8

[RESEND PATCH v2] powerpc/book3s: Fix the MCE code to use CONFIG_KVM_BOOK3S_64_HANDLER

2015-03-23 Thread Mahesh J Salgaonkar
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com

commit id 2ba9f0d changed CONFIG_KVM_BOOK3S_64_HV to tristate to allow
HV/PR bits to be built as modules. But the MCE code still depends on
CONFIG_KVM_BOOK3S_64_HV which is wrong. When user selects
CONFIG_KVM_BOOK3S_64_HV=m to build HV/PR bits as a separate module the
relevant MCE code gets excluded.

This patch fixes the MCE code to use CONFIG_KVM_BOOK3S_64_HANDLER. This
makes sure that the relevant MCE code is included when HV/PR bits
are built as a separate modules.

Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com
Acked-by: Paul Mackerras pau...@samba.org
Cc: sta...@vger.kernel.org  # v3.14+
---
 arch/powerpc/kernel/exceptions-64s.S |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index c2df815..9519e6b 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1408,7 +1408,7 @@ machine_check_handle_early:
bne 9f  /* continue in V mode if we are. */
 
 5:
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
 * We are coming from kernel context. Check if we are coming from
 * guest. if yes, then we can continue. We will fall through

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RESEND PATCH v2] powerpc/book3s: Fix the MCE code to use CONFIG_KVM_BOOK3S_64_HANDLER

2015-03-23 Thread Mahesh J Salgaonkar
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com

commit id 2ba9f0d changed CONFIG_KVM_BOOK3S_64_HV to tristate to allow
HV/PR bits to be built as modules. But the MCE code still depends on
CONFIG_KVM_BOOK3S_64_HV which is wrong. When user selects
CONFIG_KVM_BOOK3S_64_HV=m to build HV/PR bits as a separate module the
relevant MCE code gets excluded.

This patch fixes the MCE code to use CONFIG_KVM_BOOK3S_64_HANDLER. This
makes sure that the relevant MCE code is included when HV/PR bits
are built as a separate modules.

Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com
Acked-by: Paul Mackerras pau...@samba.org
Cc: sta...@vger.kernel.org  # v3.14+
---
 arch/powerpc/kernel/exceptions-64s.S |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index c2df815..9519e6b 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1408,7 +1408,7 @@ machine_check_handle_early:
bne 9f  /* continue in V mode if we are. */
 
 5:
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
 * We are coming from kernel context. Check if we are coming from
 * guest. if yes, then we can continue. We will fall through

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


iscsi multipath failure with libvirtError: Failed to open file '/dev/mapper/Mar': No such file or directory

2015-03-23 Thread mad Engineer
hello All,
  I know the issue is related to libvirt,but i dont know
where to ask.

i have centos 6.6 running KVM as compute node in openstack icehouse

when i try to attach volume to instance it shows

2596: error : virStorageFileGetMetadataRecurse:952 : Failed to open
file '/dev/mapper/Mar': No such file or directory

in libvirt log

This does not always happen when it happens no one will be able to
attach volume to instance


using EMC VNX as storage backend.


multipath.conf


# Skip the files uner /dev that are definitely not FC/iSCSI devices
# Different system may need different customization
devnode ^(ram|raw|loop|fd|md|dm-|sr|scd|st)[0-9]*
devnode ^hd[a-z][0-9]*
devnode ^cciss!c[0-9]d[0-9]*[p[0-9]*]

# Skip LUNZ device from VNX
device {
vendor DGC
product LUNZ
}
}

defaults {
user_friendly_names no
flush_on_last_del yes
}

devices {
# Device attributed for EMC CLARiiON and VNX series ALUA
device {
vendor DGC
product .*
product_blacklist LUNZ
path_grouping_policy group_by_prio
path_selector round-robin 0
path_checker emc_clariion
features 1 queue_if_no_path
hardware_handler 1 alua
prio alua
failback immediate
}
}


Can any one help me with this issue
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM live migration i/o error

2015-03-23 Thread Stefan Hajnoczi
On Fri, Mar 20, 2015 at 12:34:59PM +0100, Francesc Guasch wrote:
 On Fri, Mar 20, 2015 at 10:03:20AM +, Stefan Hajnoczi wrote:
 
 Hi Stefan, thank you very much for answering me.
 
  On Wed, Mar 18, 2015 at 04:53:28PM +0100, Francesc Guasch wrote:
   I have three Ubuntu Server 14.04 trusty with KVM. Two of
   them are HP servers and one is Dell. Both brands run fine
   the KVM virtual servers, and I can do live migration between
   the HPs. But I get I/O errors in the vda when I migrate to
   or from the Dell server.
   
   I have shared storage with NFS, mounted the same way in all
   of them:
   
   As soon as it starts in the origin console I spot I/O error
   messages, when it finishes I got them in the console in the
   destination server. The file system is read only and I have to
   shut it down hard.
   
   end request I/O error, /dev/vda, sector 8790327
  
  origin console == guest's console?
 
 Yes, I mean I open two consoles with virt-manager, one in
 the origin host and another one in the destination
  
  I/O errors starting while the guest is still running on the migration
  source host is strange.  I wonder if something happened to the NFS file
  related to file permissions or SELinux labels?
 
 I think I found something checking SELinux. ls -Z and getfattr
 return nothing. But ps -eZ showed something very different
 in the Dell server.
 
 This is in the HP server:
 /usr/sbin/libvirtd  1034 ?11:51:44 libvirtd
 libvirt-09540b5d-82 701  ?05:28:40 qemu-system-x86
 unconfined  1?00:01:00 init
 
 In the Dell server init is confined in lxc and there are also
 lxc-start processes.
 
 /usr/sbin/libvirtd  1622 ?05:07:07 libvirtd
 libvirt-8a0f9087-32d... 29926 ?   00:00:01 qemu-system-x86
 lxc-container-default   1774 ?00:00:00 init
 /usr/bin/lxc-start  1763 ?00:00:00 lxc-start
 
 There is also LXC installed in that server ! Maybe that is messing
 with kvm. The qemu processes look fine to me but there is a chance
 the problem comes from there.
 
 I could move the LXC somewhere else or I can keep it there to
 try to fix this issue. What do you advice I should do now ?

I suggest asking on the libvirt mailing list: libvirt-l...@redhat.com


pgpk8pbvBvTGx.pgp
Description: PGP signature


Re: Windows 7 guest installer does not detect drive if physical partition used instead of disk file.

2015-03-23 Thread Stefan Hajnoczi
On Sat, Mar 21, 2015 at 01:50:46AM +0800, Emmanuel Noobadmin wrote:
 Running
 3.18.9-200.fc21.x86_64
 qemu 2:2.1.3-3.fc21
 libvirt 1.2.9.2-1.fc21
 System is a Thinkpad X250 with Intel i7-5600u Broadwell GT2
 
 I'm trying to replace the Win7 installation on my laptop with Fedora
 21 and virtualizing Windows 7 for work purposes. I'd prefer to give
 the guest its own NTFS partition instead of using a file for both
 performance and ease of potential recovery.
 
 So I've set aside unpartitioned space on the hard disk and added
 /dev/sda to the virt-manager storage pool, created a new volume and
 assigned it to the guest as an IDE drive. Unfortunately, the Windows 7
 installer does not see this drive despite being IDE and not virtio.
 If I use a qcow2 file as the drive, the installer has no problems
 detecting it.
 
 To eliminate virt-manager from the equation, I've also tried to do a
 very basic install using virt-install with similar results, the
 physical partition cannot be detected regardless of bus type
 (IDE/SATA/virtio) even with the signed Redhat virtio drivers loaded by
 the installer.
 
 I was unable to find any similar issues or solutions online except a 2
 year old thread on linuxquestions which quoted that we must specify
 the whole disk instead of a partition. However, I cannot find the
 source of that quote.
 http://www.linuxquestions.org/questions/linux-virtualization-and-cloud-90/qemu-kvm-on-a-real-partition-947162/
 
 Is this really the case and the reason why Windows 7 cannot see the
 physical partition or there is something else I am doing wrong?

I have CCed the libvirt mailing list, since KVM is a component here but
your question seems to be mainly about libvirt, virt-manager,
virt-install, etc.

It sounds like you want an NTFS partition on /dev/sda.  That requires
passing the whole /dev/sda drive to the guest - and the Windows
installer might overwrite your GRUB Master Boot Record.  Be careful when
trying to do this.

Also keep in mind that the virtual machine's hardware and your physical
hardware are probably quiet different (different chipsets, PCI devices,
etc).  Windows might not be happy booting on the physical host if it was
installed under KVM, and vice versa.  This is known as
physical-to-virtual (p2v) migration and means some tweaks or driver
installs may be necessary to make Windows run after switching.

Stefan


pgprBcrq2t8NW.pgp
Description: PGP signature


Re: [PATCH 07/23] KVM: PPC: Book3S: Allow reuse of vCPU object

2015-03-23 Thread Bharata B Rao
On Sat, Mar 21, 2015 at 8:28 PM, Alexander Graf ag...@suse.de wrote:


 On 20.03.15 16:51, Bharata B Rao wrote:
 On Fri, Mar 20, 2015 at 12:34:18PM +0100, Alexander Graf wrote:


 On 20.03.15 12:26, Paul Mackerras wrote:
 On Fri, Mar 20, 2015 at 12:01:32PM +0100, Alexander Graf wrote:


 On 20.03.15 10:39, Paul Mackerras wrote:
 From: Bharata B Rao bhar...@linux.vnet.ibm.com

 Since KVM isn't equipped to handle closure of vcpu fd from 
 userspace(QEMU)
 correctly, certain work arounds have to be employed to allow reuse of
 vcpu array slot in KVM during cpu hot plug/unplug from guest. One such
 proposed workaround is to park the vcpu fd in userspace during cpu unplug
 and reuse it later during next hotplug.

 More details can be found here:
 KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html
 QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html

 In order to support this workaround with PowerPC KVM, don't create or
 initialize ICP if the vCPU is found to be already associated with an ICP.

 Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com
 Signed-off-by: Paul Mackerras pau...@samba.org

 This probably makes some sense, but please make sure that user space has
 some way to figure out whether hotplug works at all.

 Bharata is working on the qemu side of all this, so I assume he has
 that covered.

 Well, so far the kernel doesn't expose anything he can query, so I
 suppose he just blindly assumes that older host kernels will randomly
 break and nobody cares. I'd rather prefer to see a CAP exposed that qemu
 can check on.

 I see that you have already taken this into your tree. I have an updated
 patch to expose a CAP. If the below patch looks ok, then let me know how
 you would prefer to take this patch in.

 Regards,
 Bharata.

 KVM: PPC: BOOK3S: Allow reuse of vCPU object

 From: Bharata B Rao bhar...@linux.vnet.ibm.com

 Since KVM isn't equipped to handle closure of vcpu fd from userspace(QEMU)
 correctly, certain work arounds have to be employed to allow reuse of
 vcpu array slot in KVM during cpu hot plug/unplug from guest. One such
 proposed workaround is to park the vcpu fd in userspace during cpu unplug
 and reuse it later during next hotplug.

 More details can be found here:
 KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html
 QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html

 In order to support this workaround with PowerPC KVM, don't create or
 initialize ICP if the vCPU is found to be already associated with an ICP.
 User space (QEMU) can reuse the vCPU after checking for the availability
 of KVM_CAP_SPAPR_REUSE_VCPU capability.

 Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com
 ---
  arch/powerpc/kvm/book3s_xics.c |9 +++--
  arch/powerpc/kvm/powerpc.c |   12 
  include/uapi/linux/kvm.h   |1 +
  3 files changed, 20 insertions(+), 2 deletions(-)

 diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
 index a4a8d9f..ead3a35 100644
 --- a/arch/powerpc/kvm/book3s_xics.c
 +++ b/arch/powerpc/kvm/book3s_xics.c
 @@ -1313,8 +1313,13 @@ int kvmppc_xics_connect_vcpu(struct kvm_device *dev, 
 struct kvm_vcpu *vcpu,
   return -EPERM;
   if (xics-kvm != vcpu-kvm)
   return -EPERM;
 - if (vcpu-arch.irq_type)
 - return -EBUSY;
 +
 + /*
 +  * If irq_type is already set, don't reinialize but
 +  * return success allowing this vcpu to be reused.
 +  */
 + if (vcpu-arch.irq_type != KVMPPC_IRQ_DEFAULT)
 + return 0;

   r = kvmppc_xics_create_icp(vcpu, xcpu);
   if (!r)
 diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
 index 27c0fac..5b7007c 100644
 --- a/arch/powerpc/kvm/powerpc.c
 +++ b/arch/powerpc/kvm/powerpc.c
 @@ -564,6 +564,18 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
 ext)
   r = 1;
   break;
  #endif
 + case KVM_CAP_SPAPR_REUSE_VCPU:
 + /*
 +  * Kernel currently doesn't support closing of vCPU fd from
 +  * user space (QEMU) correctly. Hence the option available
 +  * is to park the vCPU fd in user space whenever a guest
 +  * CPU is hot removed and reuse the same later when another
 +  * guest CPU is hotplugged. This capability determines whether
 +  * it is safe to assume if parking of vCPU fd and reuse from
 +  * user space works for sPAPR guests.

 I don't see how the code you're changing here has anything to do with
 parking vcpus. It's all about being able to call connect on an already
 connected vcpu and not erroring out. Please reflect this in the cap name
 and description.

 You also need to update Documentation/virtual/kvm/api.txt.

 Furthermore, thinking about this a bit more, I might still miss the
 exact case why you need this. Why is QEMU issuing a connect again? Could
 it maybe just not do it?

Thinking 

Re: [PATCH 07/23] KVM: PPC: Book3S: Allow reuse of vCPU object

2015-03-23 Thread Bharata B Rao
On Sat, Mar 21, 2015 at 8:28 PM, Alexander Graf ag...@suse.de wrote:


 On 20.03.15 16:51, Bharata B Rao wrote:
 On Fri, Mar 20, 2015 at 12:34:18PM +0100, Alexander Graf wrote:


 On 20.03.15 12:26, Paul Mackerras wrote:
 On Fri, Mar 20, 2015 at 12:01:32PM +0100, Alexander Graf wrote:


 On 20.03.15 10:39, Paul Mackerras wrote:
 From: Bharata B Rao bhar...@linux.vnet.ibm.com

 Since KVM isn't equipped to handle closure of vcpu fd from 
 userspace(QEMU)
 correctly, certain work arounds have to be employed to allow reuse of
 vcpu array slot in KVM during cpu hot plug/unplug from guest. One such
 proposed workaround is to park the vcpu fd in userspace during cpu unplug
 and reuse it later during next hotplug.

 More details can be found here:
 KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html
 QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html

 In order to support this workaround with PowerPC KVM, don't create or
 initialize ICP if the vCPU is found to be already associated with an ICP.

 Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com
 Signed-off-by: Paul Mackerras pau...@samba.org

 This probably makes some sense, but please make sure that user space has
 some way to figure out whether hotplug works at all.

 Bharata is working on the qemu side of all this, so I assume he has
 that covered.

 Well, so far the kernel doesn't expose anything he can query, so I
 suppose he just blindly assumes that older host kernels will randomly
 break and nobody cares. I'd rather prefer to see a CAP exposed that qemu
 can check on.

 I see that you have already taken this into your tree. I have an updated
 patch to expose a CAP. If the below patch looks ok, then let me know how
 you would prefer to take this patch in.

 Regards,
 Bharata.

 KVM: PPC: BOOK3S: Allow reuse of vCPU object

 From: Bharata B Rao bhar...@linux.vnet.ibm.com

 Since KVM isn't equipped to handle closure of vcpu fd from userspace(QEMU)
 correctly, certain work arounds have to be employed to allow reuse of
 vcpu array slot in KVM during cpu hot plug/unplug from guest. One such
 proposed workaround is to park the vcpu fd in userspace during cpu unplug
 and reuse it later during next hotplug.

 More details can be found here:
 KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html
 QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html

 In order to support this workaround with PowerPC KVM, don't create or
 initialize ICP if the vCPU is found to be already associated with an ICP.
 User space (QEMU) can reuse the vCPU after checking for the availability
 of KVM_CAP_SPAPR_REUSE_VCPU capability.

 Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com
 ---
  arch/powerpc/kvm/book3s_xics.c |9 +++--
  arch/powerpc/kvm/powerpc.c |   12 
  include/uapi/linux/kvm.h   |1 +
  3 files changed, 20 insertions(+), 2 deletions(-)

 diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
 index a4a8d9f..ead3a35 100644
 --- a/arch/powerpc/kvm/book3s_xics.c
 +++ b/arch/powerpc/kvm/book3s_xics.c
 @@ -1313,8 +1313,13 @@ int kvmppc_xics_connect_vcpu(struct kvm_device *dev, 
 struct kvm_vcpu *vcpu,
   return -EPERM;
   if (xics-kvm != vcpu-kvm)
   return -EPERM;
 - if (vcpu-arch.irq_type)
 - return -EBUSY;
 +
 + /*
 +  * If irq_type is already set, don't reinialize but
 +  * return success allowing this vcpu to be reused.
 +  */
 + if (vcpu-arch.irq_type != KVMPPC_IRQ_DEFAULT)
 + return 0;

   r = kvmppc_xics_create_icp(vcpu, xcpu);
   if (!r)
 diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
 index 27c0fac..5b7007c 100644
 --- a/arch/powerpc/kvm/powerpc.c
 +++ b/arch/powerpc/kvm/powerpc.c
 @@ -564,6 +564,18 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
 ext)
   r = 1;
   break;
  #endif
 + case KVM_CAP_SPAPR_REUSE_VCPU:
 + /*
 +  * Kernel currently doesn't support closing of vCPU fd from
 +  * user space (QEMU) correctly. Hence the option available
 +  * is to park the vCPU fd in user space whenever a guest
 +  * CPU is hot removed and reuse the same later when another
 +  * guest CPU is hotplugged. This capability determines whether
 +  * it is safe to assume if parking of vCPU fd and reuse from
 +  * user space works for sPAPR guests.

 I don't see how the code you're changing here has anything to do with
 parking vcpus. It's all about being able to call connect on an already
 connected vcpu and not erroring out. Please reflect this in the cap name
 and description.

 You also need to update Documentation/virtual/kvm/api.txt.

 Furthermore, thinking about this a bit more, I might still miss the
 exact case why you need this. Why is QEMU issuing a connect again? Could
 it maybe just not do it?

Thinking 

Re: virtio fixes pull for 4.0?

2015-03-23 Thread Pawel Moll
On Mon, 2015-03-09 at 07:13 +, Rusty Russell wrote:
  virtio_mmio: generation support
  virtio_mmio: fix endian-ness for mmio these two are waiting for ack by 
  Pawel
 
  These two fix bugs in virtio 1.0 code for mmio.
  Host code for that was AFAIK not posted, so I can't test properly.
  Pawel?
 
 I'm waiting on Acks for these two.

Right, sorry about being silent for a while - I forked and was on
paternity leave...

Will go through the thread and respond today.

Pawel

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] PCI passthrough of 40G ethernet interface (Openstack/KVM)

2015-03-23 Thread Stefan Assmann
On 20.03.2015 21:55, jacob jacob wrote:
 On Thu, Mar 19, 2015 at 10:18 AM, Stefan Assmann sassm...@redhat.com wrote:
 On 19.03.2015 15:04, jacob jacob wrote:
 Hi Stefan,
 have you been able to get PCI passthrough working without any issues
 after the upgrade?

 My XL710 fails to transfer regular TCP traffic (netperf). If that works
 for you then you're already one step ahead of me. Afraid I can't help
 you there.
 
 I have data transfer working when trying the test runs on the host
 itself. Are you seeing problems when directly trying the TCP traffic
 from the host itself?

Correct.

 The issues that i am seeing are specific to the case when the devices
 are passed via PCI passthrough into the VM.
 
 Any ideas whether this would be a kvm/qemu or i40e driver issue?
 (Updating to the latest firmware and using latest i40e driver didn't
 seem to help.)

Hard to say, that's probably something for Intel to look into.

  Stefan
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [kvm-ppc:kvm-ppc-queue 7/9] ERROR: .__spin_yield [arch/powerpc/kvm/kvm.ko] undefined!

2015-03-23 Thread Alexander Graf


On 23.03.15 04:03, Michael Ellerman wrote:
 On Mon, 2015-03-23 at 14:00 +1100, Paul Mackerras wrote:
 On Fri, Mar 20, 2015 at 08:07:53PM +0800, kbuild test robot wrote:
 tree:   git://github.com/agraf/linux-2.6.git kvm-ppc-queue
 head:   9b1daf3cfba1801768aa41b1b6ad0b653844241f
 commit: aba777f5ce0accb4c6a277e671de0330752954e8 [7/9] KVM: PPC: Book3S HV: 
 Convert ICS mutex lock to spin lock
 config: powerpc-defconfig (attached as .config)
 reproduce:
   wget 
 https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
  -O ~/bin/make.cross
   chmod +x ~/bin/make.cross
   git checkout aba777f5ce0accb4c6a277e671de0330752954e8
   # save the attached .config to linux build tree
   make.cross ARCH=powerpc 

 All error/warnings:

 ERROR: .__spin_yield [arch/powerpc/kvm/kvm.ko] undefined!

 Yes, this is the patch that depends on the powerpc: Export
 __spin_yield patch that Suresh posted to linuxppc-...@ozlabs.org and
 I acked.

 I think the best thing at this stage is probably for Alex to take that
 patch through his tree, assuming Michael is OK with that.
 
 Fine by me.
 
 Acked-by: Michael Ellerman m...@ellerman.id.au

Awesome, thanks, applied to kvm-ppc-queue.


Alex
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 07/23] KVM: PPC: Book3S: Allow reuse of vCPU object

2015-03-23 Thread Alexander Graf


On 23.03.15 08:50, Bharata B Rao wrote:
 On Sat, Mar 21, 2015 at 8:28 PM, Alexander Graf ag...@suse.de wrote:


 On 20.03.15 16:51, Bharata B Rao wrote:
 On Fri, Mar 20, 2015 at 12:34:18PM +0100, Alexander Graf wrote:


 On 20.03.15 12:26, Paul Mackerras wrote:
 On Fri, Mar 20, 2015 at 12:01:32PM +0100, Alexander Graf wrote:


 On 20.03.15 10:39, Paul Mackerras wrote:
 From: Bharata B Rao bhar...@linux.vnet.ibm.com

 Since KVM isn't equipped to handle closure of vcpu fd from 
 userspace(QEMU)
 correctly, certain work arounds have to be employed to allow reuse of
 vcpu array slot in KVM during cpu hot plug/unplug from guest. One such
 proposed workaround is to park the vcpu fd in userspace during cpu 
 unplug
 and reuse it later during next hotplug.

 More details can be found here:
 KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html
 QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html

 In order to support this workaround with PowerPC KVM, don't create or
 initialize ICP if the vCPU is found to be already associated with an 
 ICP.

 Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com
 Signed-off-by: Paul Mackerras pau...@samba.org

 This probably makes some sense, but please make sure that user space has
 some way to figure out whether hotplug works at all.

 Bharata is working on the qemu side of all this, so I assume he has
 that covered.

 Well, so far the kernel doesn't expose anything he can query, so I
 suppose he just blindly assumes that older host kernels will randomly
 break and nobody cares. I'd rather prefer to see a CAP exposed that qemu
 can check on.

 I see that you have already taken this into your tree. I have an updated
 patch to expose a CAP. If the below patch looks ok, then let me know how
 you would prefer to take this patch in.

 Regards,
 Bharata.

 KVM: PPC: BOOK3S: Allow reuse of vCPU object

 From: Bharata B Rao bhar...@linux.vnet.ibm.com

 Since KVM isn't equipped to handle closure of vcpu fd from userspace(QEMU)
 correctly, certain work arounds have to be employed to allow reuse of
 vcpu array slot in KVM during cpu hot plug/unplug from guest. One such
 proposed workaround is to park the vcpu fd in userspace during cpu unplug
 and reuse it later during next hotplug.

 More details can be found here:
 KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html
 QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html

 In order to support this workaround with PowerPC KVM, don't create or
 initialize ICP if the vCPU is found to be already associated with an ICP.
 User space (QEMU) can reuse the vCPU after checking for the availability
 of KVM_CAP_SPAPR_REUSE_VCPU capability.

 Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com
 ---
  arch/powerpc/kvm/book3s_xics.c |9 +++--
  arch/powerpc/kvm/powerpc.c |   12 
  include/uapi/linux/kvm.h   |1 +
  3 files changed, 20 insertions(+), 2 deletions(-)

 diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
 index a4a8d9f..ead3a35 100644
 --- a/arch/powerpc/kvm/book3s_xics.c
 +++ b/arch/powerpc/kvm/book3s_xics.c
 @@ -1313,8 +1313,13 @@ int kvmppc_xics_connect_vcpu(struct kvm_device *dev, 
 struct kvm_vcpu *vcpu,
   return -EPERM;
   if (xics-kvm != vcpu-kvm)
   return -EPERM;
 - if (vcpu-arch.irq_type)
 - return -EBUSY;
 +
 + /*
 +  * If irq_type is already set, don't reinialize but
 +  * return success allowing this vcpu to be reused.
 +  */
 + if (vcpu-arch.irq_type != KVMPPC_IRQ_DEFAULT)
 + return 0;

   r = kvmppc_xics_create_icp(vcpu, xcpu);
   if (!r)
 diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
 index 27c0fac..5b7007c 100644
 --- a/arch/powerpc/kvm/powerpc.c
 +++ b/arch/powerpc/kvm/powerpc.c
 @@ -564,6 +564,18 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
 ext)
   r = 1;
   break;
  #endif
 + case KVM_CAP_SPAPR_REUSE_VCPU:
 + /*
 +  * Kernel currently doesn't support closing of vCPU fd from
 +  * user space (QEMU) correctly. Hence the option available
 +  * is to park the vCPU fd in user space whenever a guest
 +  * CPU is hot removed and reuse the same later when another
 +  * guest CPU is hotplugged. This capability determines whether
 +  * it is safe to assume if parking of vCPU fd and reuse from
 +  * user space works for sPAPR guests.

 I don't see how the code you're changing here has anything to do with
 parking vcpus. It's all about being able to call connect on an already
 connected vcpu and not erroring out. Please reflect this in the cap name
 and description.

 You also need to update Documentation/virtual/kvm/api.txt.

 Furthermore, thinking about this a bit more, I might still miss the
 exact case why you need this. Why is QEMU issuing a connect 

Re: [PATCH 07/23] KVM: PPC: Book3S: Allow reuse of vCPU object

2015-03-23 Thread Alexander Graf


On 23.03.15 08:50, Bharata B Rao wrote:
 On Sat, Mar 21, 2015 at 8:28 PM, Alexander Graf ag...@suse.de wrote:


 On 20.03.15 16:51, Bharata B Rao wrote:
 On Fri, Mar 20, 2015 at 12:34:18PM +0100, Alexander Graf wrote:


 On 20.03.15 12:26, Paul Mackerras wrote:
 On Fri, Mar 20, 2015 at 12:01:32PM +0100, Alexander Graf wrote:


 On 20.03.15 10:39, Paul Mackerras wrote:
 From: Bharata B Rao bhar...@linux.vnet.ibm.com

 Since KVM isn't equipped to handle closure of vcpu fd from 
 userspace(QEMU)
 correctly, certain work arounds have to be employed to allow reuse of
 vcpu array slot in KVM during cpu hot plug/unplug from guest. One such
 proposed workaround is to park the vcpu fd in userspace during cpu 
 unplug
 and reuse it later during next hotplug.

 More details can be found here:
 KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html
 QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html

 In order to support this workaround with PowerPC KVM, don't create or
 initialize ICP if the vCPU is found to be already associated with an 
 ICP.

 Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com
 Signed-off-by: Paul Mackerras pau...@samba.org

 This probably makes some sense, but please make sure that user space has
 some way to figure out whether hotplug works at all.

 Bharata is working on the qemu side of all this, so I assume he has
 that covered.

 Well, so far the kernel doesn't expose anything he can query, so I
 suppose he just blindly assumes that older host kernels will randomly
 break and nobody cares. I'd rather prefer to see a CAP exposed that qemu
 can check on.

 I see that you have already taken this into your tree. I have an updated
 patch to expose a CAP. If the below patch looks ok, then let me know how
 you would prefer to take this patch in.

 Regards,
 Bharata.

 KVM: PPC: BOOK3S: Allow reuse of vCPU object

 From: Bharata B Rao bhar...@linux.vnet.ibm.com

 Since KVM isn't equipped to handle closure of vcpu fd from userspace(QEMU)
 correctly, certain work arounds have to be employed to allow reuse of
 vcpu array slot in KVM during cpu hot plug/unplug from guest. One such
 proposed workaround is to park the vcpu fd in userspace during cpu unplug
 and reuse it later during next hotplug.

 More details can be found here:
 KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html
 QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html

 In order to support this workaround with PowerPC KVM, don't create or
 initialize ICP if the vCPU is found to be already associated with an ICP.
 User space (QEMU) can reuse the vCPU after checking for the availability
 of KVM_CAP_SPAPR_REUSE_VCPU capability.

 Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com
 ---
  arch/powerpc/kvm/book3s_xics.c |9 +++--
  arch/powerpc/kvm/powerpc.c |   12 
  include/uapi/linux/kvm.h   |1 +
  3 files changed, 20 insertions(+), 2 deletions(-)

 diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
 index a4a8d9f..ead3a35 100644
 --- a/arch/powerpc/kvm/book3s_xics.c
 +++ b/arch/powerpc/kvm/book3s_xics.c
 @@ -1313,8 +1313,13 @@ int kvmppc_xics_connect_vcpu(struct kvm_device *dev, 
 struct kvm_vcpu *vcpu,
   return -EPERM;
   if (xics-kvm != vcpu-kvm)
   return -EPERM;
 - if (vcpu-arch.irq_type)
 - return -EBUSY;
 +
 + /*
 +  * If irq_type is already set, don't reinialize but
 +  * return success allowing this vcpu to be reused.
 +  */
 + if (vcpu-arch.irq_type != KVMPPC_IRQ_DEFAULT)
 + return 0;

   r = kvmppc_xics_create_icp(vcpu, xcpu);
   if (!r)
 diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
 index 27c0fac..5b7007c 100644
 --- a/arch/powerpc/kvm/powerpc.c
 +++ b/arch/powerpc/kvm/powerpc.c
 @@ -564,6 +564,18 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
 ext)
   r = 1;
   break;
  #endif
 + case KVM_CAP_SPAPR_REUSE_VCPU:
 + /*
 +  * Kernel currently doesn't support closing of vCPU fd from
 +  * user space (QEMU) correctly. Hence the option available
 +  * is to park the vCPU fd in user space whenever a guest
 +  * CPU is hot removed and reuse the same later when another
 +  * guest CPU is hotplugged. This capability determines whether
 +  * it is safe to assume if parking of vCPU fd and reuse from
 +  * user space works for sPAPR guests.

 I don't see how the code you're changing here has anything to do with
 parking vcpus. It's all about being able to call connect on an already
 connected vcpu and not erroring out. Please reflect this in the cap name
 and description.

 You also need to update Documentation/virtual/kvm/api.txt.

 Furthermore, thinking about this a bit more, I might still miss the
 exact case why you need this. Why is QEMU issuing a connect 

Re: [kvm-ppc:kvm-ppc-queue 7/9] ERROR: .__spin_yield [arch/powerpc/kvm/kvm.ko] undefined!

2015-03-23 Thread Alexander Graf


On 23.03.15 04:03, Michael Ellerman wrote:
 On Mon, 2015-03-23 at 14:00 +1100, Paul Mackerras wrote:
 On Fri, Mar 20, 2015 at 08:07:53PM +0800, kbuild test robot wrote:
 tree:   git://github.com/agraf/linux-2.6.git kvm-ppc-queue
 head:   9b1daf3cfba1801768aa41b1b6ad0b653844241f
 commit: aba777f5ce0accb4c6a277e671de0330752954e8 [7/9] KVM: PPC: Book3S HV: 
 Convert ICS mutex lock to spin lock
 config: powerpc-defconfig (attached as .config)
 reproduce:
   wget 
 https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
  -O ~/bin/make.cross
   chmod +x ~/bin/make.cross
   git checkout aba777f5ce0accb4c6a277e671de0330752954e8
   # save the attached .config to linux build tree
   make.cross ARCH=powerpc 

 All error/warnings:

 ERROR: .__spin_yield [arch/powerpc/kvm/kvm.ko] undefined!

 Yes, this is the patch that depends on the powerpc: Export
 __spin_yield patch that Suresh posted to linuxppc-...@ozlabs.org and
 I acked.

 I think the best thing at this stage is probably for Alex to take that
 patch through his tree, assuming Michael is OK with that.
 
 Fine by me.
 
 Acked-by: Michael Ellerman m...@ellerman.id.au

Awesome, thanks, applied to kvm-ppc-queue.


Alex
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Patch v5] x86: irq_comm: Add check for RH bit in kvm_set_msi_irq

2015-03-23 Thread Radim Krčmář
2015-03-20 11:50-0600, James Sullivan:
 On 03/20/2015 09:22 AM, James Sullivan wrote:
  On 03/20/2015 09:15 AM, Radim Krčmář wrote:
  2015-03-19 16:51-0600, James Sullivan:
  I played around with native_compose_msi_msg and discovered the following:
 
  * dm=0, rh=0 = Physical Destination Mode
  * dm=0, rh=1 = Failed delivery
  * dm=1, rh=0 = Logical Destination Mode, No Redirection
  * dm=1, rh=1 = Logical Destination Mode, Redirection
 
  Great!  (What CPU family was that?)
 
  
  This was on Intel x86_64 (Core i5-3210m, 'Ivy Bridge').

Thanks, it's possible that the behavior of chipsets changed since the
report on Intel's forum ...
(Lowest priority behaved differently before QPI, so it might coincide.)

  I'm still wondering about last sentence from that link, the
  parenthesised part to be exact,
The reference to the APIC ID being 0xff is because 0xff is broadcast
and lowest priority (what the RH bit really is for X86) is illegal
with broadcast.
 
  Can you also check if RH=1 does something to delivery mode?
 
 I haven't seen any changes in the MSI Data Register for any values of RH,
 but I don't have a great sample size (one machine with one set of PCI 
 devices),
 so if anyone else can confirm that I would appreciate it.

I meant if the delivery mode from data register isn't ignored with RH=1,
and the message delivered as if lowest-priority was set there.
(Decided by having something else than fixed or lowest-priority there.)

 Worth noting that low prio delivery was used across the board for my PCI 
 devices
 regardless of RH=1 or 0, so it doesn't seem to be de facto the case that the 
 RH
 bit's only purpose is for lowprio delivery on x86.

Yeah, afaik, it can be done with lowest priority delivery mode on ia64
too, so I have a hard time finding RH's intended purpose.

Again, need to have some 
 more
 PCI devices to test against to confirm anything.

It's impossible to test everything, and there is no conflict if we have
at most one data point ;)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 11/12] KVM: arm/arm64: rework MMIO abort handling to use KVM MMIO bus

2015-03-23 Thread Nikolay Nikolaev
On Mon, Mar 23, 2015 at 5:58 PM, Andre Przywara andre.przyw...@arm.com wrote:

 Currently we have struct kvm_exit_mmio for encapsulating MMIO abort
 data to be passed on from syndrome decoding all the way down to the
 VGIC register handlers. Now as we switch the MMIO handling to be
 routed through the KVM MMIO bus, it does not make sense anymore to
 use that structure already from the beginning. So we put the data into
 kvm_run very early and use that encapsulation till the MMIO bus call.
 Then we fill kvm_exit_mmio in the VGIC only, making it a VGIC private
 structure. On that way we replace the data buffer in that structure
 with a pointer pointing to a single location in kvm_run, so we get
 rid of some copying on the way.
 I didn't bother to rename kvm_exit_mmio (to vgic_mmio or something),
I would vote for the renaming.

Otherwise the patch looks much cleaner and straightforward than what
it was before.

Nikolay Nikolaev

 because that touches a lot of code lines without any good reason.

 This is based on an original patch by Nikolay.

 Signed-off-by: Andre Przywara andre.przyw...@arm.com
 Cc: Nikolay Nikolaev n.nikol...@virtualopensystems.com
 ---
  arch/arm/include/asm/kvm_mmio.h   |   22 --
  arch/arm/kvm/mmio.c   |   60 
 ++---
  arch/arm64/include/asm/kvm_mmio.h |   22 --
  include/kvm/arm_vgic.h|3 --
  virt/kvm/arm/vgic.c   |   18 +++
  virt/kvm/arm/vgic.h   |8 +
  6 files changed, 55 insertions(+), 78 deletions(-)

 diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h
 index 3f83db2..d8e90c8 100644
 --- a/arch/arm/include/asm/kvm_mmio.h
 +++ b/arch/arm/include/asm/kvm_mmio.h
 @@ -28,28 +28,6 @@ struct kvm_decode {
 bool sign_extend;
  };

 -/*
 - * The in-kernel MMIO emulation code wants to use a copy of run-mmio,
 - * which is an anonymous type. Use our own type instead.
 - */
 -struct kvm_exit_mmio {
 -   phys_addr_t phys_addr;
 -   u8  data[8];
 -   u32 len;
 -   boolis_write;
 -   void*private;
 -};
 -
 -static inline void kvm_prepare_mmio(struct kvm_run *run,
 -   struct kvm_exit_mmio *mmio)
 -{
 -   run-mmio.phys_addr = mmio-phys_addr;
 -   run-mmio.len   = mmio-len;
 -   run-mmio.is_write  = mmio-is_write;
 -   memcpy(run-mmio.data, mmio-data, mmio-len);
 -   run-exit_reason= KVM_EXIT_MMIO;
 -}
 -
  int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
  int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
  phys_addr_t fault_ipa);
 diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
 index 5d3bfc0..bb2ab44 100644
 --- a/arch/arm/kvm/mmio.c
 +++ b/arch/arm/kvm/mmio.c
 @@ -122,7 +122,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct 
 kvm_run *run)
  }

  static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 - struct kvm_exit_mmio *mmio)
 + struct kvm_run *run)
  {
 unsigned long rt;
 int len;
 @@ -148,9 +148,9 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t 
 fault_ipa,
 sign_extend = kvm_vcpu_dabt_issext(vcpu);
 rt = kvm_vcpu_dabt_get_rd(vcpu);

 -   mmio-is_write = is_write;
 -   mmio-phys_addr = fault_ipa;
 -   mmio-len = len;
 +   run-mmio.is_write = is_write;
 +   run-mmio.phys_addr = fault_ipa;
 +   run-mmio.len = len;
 vcpu-arch.mmio_decode.sign_extend = sign_extend;
 vcpu-arch.mmio_decode.rt = rt;

 @@ -162,23 +162,49 @@ static int decode_hsr(struct kvm_vcpu *vcpu, 
 phys_addr_t fault_ipa,
 return 0;
  }

 +/**
 + * handle_kernel_mmio - handle an in-kernel MMIO access
 + * @vcpu:  pointer to the vcpu performing the access
 + * @run:   pointer to the kvm_run structure
 + *
 + * returns true if the MMIO access has been performed in kernel space,
 + * and false if it needs to be emulated in user space.
 + */
 +static bool handle_kernel_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run)
 +{
 +   int ret;
 +
 +   if (run-mmio.is_write) {
 +   ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, 
 run-mmio.phys_addr,
 +  run-mmio.len, run-mmio.data);
 +
 +   } else {
 +   ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run-mmio.phys_addr,
 + run-mmio.len, run-mmio.data);
 +   }
 +   if (!ret) {
 +   kvm_handle_mmio_return(vcpu, run);
 +   return true;
 +   }
 +
 +   return false;
 +}
 +
  int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
  phys_addr_t fault_ipa)
  {
 -   struct kvm_exit_mmio mmio;
 unsigned long data;
 unsigned long rt;
 int ret;

 /*
 -* Prepare MMIO operation. 

Re: [PATCH v2 00/12] KVM: arm/arm64: move VGIC MMIO to kvm_io_bus

2015-03-23 Thread Nikolay Nikolaev
On Mon, Mar 23, 2015 at 5:58 PM, Andre Przywara andre.przyw...@arm.com wrote:
 This series converts the VGIC MMIO handling routines to the generic
 kvm_io_bus framework. The framework is needed for the ioeventfd
 functionality, some people on the list wanted to see the VGIC
 converted over to use it, too.
 Beside from now moving to a generic framework instead of relying on
 an ARM specific one we also clean up quite some code and get rid of
 some unnecessary copying.
 On that way the MMIO abort handling for ARM has changed quite a bit,
 so please have a closer look and test it on your setup if possible.

 Based on the v1 review I addressed Christoffer's minor comments, but
 also heavily changed [11/12]: KVM: ARM: on IO mem abort - route the
 call to KVM MMIO bus to get rid of the now unnecessary copying and
 the usage of kvm_exit_mmio in that early stage. See the respective
 commit message for more details.

 The series is loosely based on Nikolay's work[1], thanks especially
 for the tedious first patch.
 I totally reworked Nikolay's 3/5 to avoid adding another MMIO handling
 layer on top of the already quite convoluted VGIC MMIO handling.
 Also Nikolay's 2/5 get extended and changed significantly, that's why
 I dropped his Signed-off-by.

 Unfortunately kvm_io_bus lacks an opaque pointer to pass in some data,
 so I worked around this by using container_of.
 Now for every struct kvm_mmio_range array a KVM I/O device is
 registered (one for VGICv2, 2*nr_vcpus + 1 for VGICv3), using the
 struct kvm_io_device variable as an anchor into the new
 struct vgic_io_device. This one holds the base address, the
 vgic_io_range pointer and (in case of the GICv3 redistributor) the
 associated vCPU, so that we can access all instance-specific data
 easily.

 Patch 2 moves the iodev.h header file around, that solves a problem
 when embedding a struct in arm_vgic.h later. That looks like a nice
 cleanup anyway, so I added two patches to remove the compiler switch
 to add virt/kvm as a include directory. This has been tested for
 arm/arm64 and x86. As soon as I get around to compile-test the other
 architectures, I can send out the respective patches for those, too.

 Patches 5-7 tweak the existing code a bit to make it fit for the
 conversion.
 Patch 8 contains the framework for the new handling, while
 patch 9 and 10 enable the GICv2 and GICv3 emulation, respectively.
 Patch 11 finally switches over to the new kvm_io_bus handling,
 reworking the early ARM KVM MMIO handling quite a bit. Patch 12
 removes the now unneeded code. I split this up to ease reviewing, I
 could merge patches as well if needed.

Shall we add here also the last 2 patches from my series that actually enable
the eventfd compilation and KVM_CAP_IOEVENTFD? Or should I send them separately?

regards,
Nikolay Nikolaev

 The series goes on top of the kvmarm.git/next branch and was briefly
 tested on an arm64 model with a GICv2 and a GICv3 guest and on Midway
 (GICv2 guest).

 Cheers,
 Andre.

 [1] https://lists.cs.columbia.edu/pipermail/kvmarm/2015-January/013379.html

 Andre Przywara (11):
   KVM: move iodev.h from virt/kvm/ to include/kvm
   KVM: arm/arm64: remove now unneeded include directory from Makefile
   KVM: x86: remove now unneeded include directory from Makefile
   KVM: arm/arm64: rename struct kvm_mmio_range to vgic_io_range
   KVM: mark kvm-buses as empty once they were destroyed
   KVM: arm/arm64: simplify vgic_find_range() and callers
   KVM: arm/arm64: implement kvm_io_bus MMIO handling for the VGIC
   KVM: arm/arm64: prepare GICv2 emulation to be handled by kvm_io_bus
   KVM: arm/arm64: prepare GICv3 emulation to use kvm_io_bus MMIO
 handling
   KVM: arm/arm64: rework MMIO abort handling to use KVM MMIO bus
   KVM: arm/arm64: remove now obsolete VGIC specific MMIO handling code

 Nikolay Nikolaev (1):
   KVM: Redesign kvm_io_bus_ API to pass VCPU structure to the
 callbacks.

  arch/arm/include/asm/kvm_mmio.h   |   22 
  arch/arm/kvm/Makefile |2 +-
  arch/arm/kvm/mmio.c   |   60 ---
  arch/arm64/include/asm/kvm_mmio.h |   22 
  arch/arm64/kvm/Makefile   |2 +-
  arch/powerpc/kvm/mpic.c   |   12 ++-
  arch/powerpc/kvm/powerpc.c|4 +-
  arch/s390/kvm/diag.c  |2 +-
  arch/x86/kvm/Makefile |2 +-
  arch/x86/kvm/i8254.c  |   14 ++-
  arch/x86/kvm/i8254.h  |2 +-
  arch/x86/kvm/i8259.c  |   12 +--
  arch/x86/kvm/ioapic.c |8 +-
  arch/x86/kvm/ioapic.h |2 +-
  arch/x86/kvm/irq.h|2 +-
  arch/x86/kvm/lapic.c  |4 +-
  arch/x86/kvm/lapic.h  |2 +-
  arch/x86/kvm/vmx.c|2 +-
  arch/x86/kvm/x86.c|   13 +--
  include/kvm/arm_vgic.h|   16 ++-
  include/kvm/iodev.h   |   76 +
  include/linux/kvm_host.h  |   10 +-
  virt/kvm/arm/vgic-v2-emul.c

[RESEND PATCH] KVM: PPC: Book3S HV: Deliver machine check with MSR(RI=0) to guest as MCE.

2015-03-23 Thread Mahesh J Salgaonkar
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com

For the machine check interrupt that happens while we are in the guest,
kvm layer attempts the recovery, and then delivers the machine check interrupt
directly to the guest if recovery fails. On successful recovery we go back to
normal functioning of the guest. But there can be cases where a machine check
interrupt can happen with MSR(RI=0) while we are in the guest. This means
MC interrupt is unrecoverable and we have to deliver a machine check to the
guest since the machine check interrupt might have trashed valid values in
SRR0/1. The current implementation do not handle this case, causing guest
to crash with Bad kernel stack pointer instead of machine check oops message.

[26281.490060] Bad kernel stack pointer 3fff9ccce5b0 at c000490c
[26281.490434] Oops: Bad kernel stack pointer, sig: 6 [#1]
[26281.490472] SMP NR_CPUS=2048 NUMA pSeries

This patch fixes this issue by checking MSR(RI=0) in KVM layer and forwarding
unrecoverable interrupt to guest which then panics with proper machine check
Oops message.

Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com
Acked-by: Paul Mackerras pau...@samba.org
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |   12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bb94e6f..258f46d 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2063,7 +2063,6 @@ machine_check_realmode:
mr  r3, r9  /* get vcpu pointer */
bl  kvmppc_realmode_machine_check
nop
-   cmpdi   r3, 0   /* Did we handle MCE ? */
ld  r9, HSTATE_KVM_VCPU(r13)
li  r12, BOOK3S_INTERRUPT_MACHINE_CHECK
/*
@@ -2076,13 +2075,18 @@ machine_check_realmode:
 * The old code used to return to host for unhandled errors which
 * was causing guest to hang with soft lockups inside guest and
 * makes it difficult to recover guest instance.
+*
+* if we receive machine check with MSR(RI=0) then deliver it to
+* guest as machine check causing guest to crash.
 */
-   ld  r10, VCPU_PC(r9)
ld  r11, VCPU_MSR(r9)
+   andi.   r10, r11, MSR_RI/* check for unrecoverable exception */
+   beq 1f  /* Deliver a machine check to guest */
+   ld  r10, VCPU_PC(r9)
+   cmpdi   r3, 0   /* Did we handle MCE ? */
bne 2f  /* Continue guest execution. */
/* If not, deliver a machine check.  SRR0/1 are already set */
-   li  r10, BOOK3S_INTERRUPT_MACHINE_CHECK
-   ld  r11, VCPU_MSR(r9)
+1: li  r10, BOOK3S_INTERRUPT_MACHINE_CHECK
bl  kvmppc_msr_interrupt
 2: b   fast_interrupt_c_return
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 2/6] target-arm: kvm: save/restore mp state

2015-03-23 Thread Alex Bennée
This adds the saving and restore of the current Multi-Processing state
of the machine. While the KVM_GET/SET_MP_STATE API exposes a number of
potential states for x86 we only use two for ARM. Either the process is
running or not. We then save this state into the cpu_powered TCG state
to avoid changing the serialisation format.

Signed-off-by: Alex Bennée alex.ben...@linaro.org

---
v2
  - make mpstate field runtime dependant (kvm_enabled())
  - drop initial KVM_CAP_MP_STATE requirement
  - re-use cpu_powered instead of new field

v4
  - s/HALTED/STOPPED/
  - move code from machine.c to kvm.

diff --git a/target-arm/kvm.c b/target-arm/kvm.c
index 72c1fa1..a74832c 100644
--- a/target-arm/kvm.c
+++ b/target-arm/kvm.c
@@ -458,6 +458,46 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu)
 }
 }
 
+/*
+ * Update KVM's MP_STATE based on what QEMU thinks it is
+ */
+int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu)
+{
+if (kvm_check_extension(CPU(cpu)-kvm_state, KVM_CAP_MP_STATE)) {
+struct kvm_mp_state mp_state = {
+.mp_state =
+cpu-powered_off ? KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE
+};
+int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, mp_state);
+if (ret) {
+fprintf(stderr, %s: failed to set MP_STATE %d/%s\n,
+__func__, ret, strerror(ret));
+return -1;
+}
+}
+
+return 0;
+}
+
+/*
+ * Sync the KVM MP_STATE into QEMU
+ */
+int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu)
+{
+if (kvm_check_extension(CPU(cpu)-kvm_state, KVM_CAP_MP_STATE)) {
+struct kvm_mp_state mp_state;
+int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, mp_state);
+if (ret) {
+fprintf(stderr, %s: failed to get MP_STATE %d/%s\n,
+__func__, ret, strerror(ret));
+abort();
+}
+cpu-powered_off = (mp_state.mp_state == KVM_MP_STATE_STOPPED);
+}
+
+return 0;
+}
+
 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
 {
 }
diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c
index 94030d1..49b6bab 100644
--- a/target-arm/kvm32.c
+++ b/target-arm/kvm32.c
@@ -356,6 +356,8 @@ int kvm_arch_put_registers(CPUState *cs, int level)
 return EINVAL;
 }
 
+kvm_arm_sync_mpstate_to_kvm(cpu);
+
 return ret;
 }
 
@@ -427,5 +429,7 @@ int kvm_arch_get_registers(CPUState *cs)
  */
 write_list_to_cpustate(cpu);
 
+kvm_arm_sync_mpstate_to_qemu(cpu);
+
 return 0;
 }
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index 8cf3a62..fed03f2 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -211,6 +211,8 @@ int kvm_arch_put_registers(CPUState *cs, int level)
 return EINVAL;
 }
 
+kvm_arm_sync_mpstate_to_kvm(cpu);
+
 /* TODO:
  * FP state
  */
@@ -310,6 +312,8 @@ int kvm_arch_get_registers(CPUState *cs)
  */
 write_list_to_cpustate(cpu);
 
+kvm_arm_sync_mpstate_to_qemu(cpu);
+
 /* TODO: other registers */
 return ret;
 }
diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h
index 455dea3..7b75758 100644
--- a/target-arm/kvm_arm.h
+++ b/target-arm/kvm_arm.h
@@ -162,6 +162,24 @@ typedef struct ARMHostCPUClass {
  */
 bool kvm_arm_get_host_cpu_features(ARMHostCPUClass *ahcc);
 
+
+/**
+ * kvm_arm_sync_mpstate_to_kvm
+ * @cpu: ARMCPU
+ *
+ * If supported set the KVM MP_STATE based on QEMUs migration data.
+ */
+int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu);
+
+/**
+ * kvm_arm_sync_mpstate_to_qemu
+ * @cpu: ARMCPU
+ *
+ * If supported get the MP_STATE from KVM and store in QEMUs migration
+ * data.
+ */
+int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu);
+
 #endif
 
 #endif
-- 
2.3.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 6/6] target-arm: cpu.h document why env-spsr exists

2015-03-23 Thread Alex Bennée
I was getting very confused about the duplication of state so wanted to
make it explicit.

Signed-off-by: Alex Bennée alex.ben...@linaro.org

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 083211c..6dc1799 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -155,6 +155,11 @@ typedef struct CPUARMState {
This contains all the other bits.  Use cpsr_{read,write} to access
the whole CPSR.  */
 uint32_t uncached_cpsr;
+/* The spsr is a alias for spsr_elN where N is the current
+ * exception level. It is provided for here so the TCG msr/mrs
+ * implementation can access one register. Care needs to be taken
+ * to ensure the banked_spsr[] is also updated.
+ */
 uint32_t spsr;
 
 /* Banked registers.  */
-- 
2.3.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 5/6] target-arm: kvm64 fix save/restore of SPSR regs

2015-03-23 Thread Alex Bennée
The current code was negatively indexing the cpu state array and not
synchronizing banked spsr register state with the current mode's spsr
state, causing occasional failures with migration.

Some munging is done to take care of the aarch64 mapping and also to
ensure the most current value of the spsr is updated to the banked
registers (relevant for KVM-TCG migration).

Signed-off-by: Alex Bennée alex.ben...@linaro.org

---
v2 (ajb)
  - minor tweaks and clarifications
v3
  - Use the correct bank index function for setting/getting env-spsr
  - only deal with spsrs in elevated exception levels
v4
 - try and make commentary clearer
 - ensure env-banked_spsr[0] = env-spsr before we sync
v5
 - fix banking index now banking fixed
 - keep wide spacing on [ ] forms
 - claimed authorship

diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index 857e970..5270fa7 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -139,6 +139,7 @@ int kvm_arch_put_registers(CPUState *cs, int level)
 uint64_t val;
 int i;
 int ret;
+unsigned int el;
 
 ARMCPU *cpu = ARM_CPU(cs);
 CPUARMState *env = cpu-env;
@@ -205,9 +206,24 @@ int kvm_arch_put_registers(CPUState *cs, int level)
 return ret;
 }
 
+/* Saved Program State Registers
+ *
+ * Before we restore from the banked_spsr[] array we need to
+ * ensure that any modifications to env-spsr are correctly
+ * reflected in the banks.
+ */
+el = arm_current_el(env);
+if (el  0) {
+i = is_a64(env) ?
+aarch64_banked_spsr_index(el) :
+bank_number(env-uncached_cpsr  CPSR_M);
+env-banked_spsr[i] = env-spsr;
+}
+
+/* KVM 0-4 map to QEMU banks 1-5 */
 for (i = 0; i  KVM_NR_SPSR; i++) {
 reg.id = AARCH64_CORE_REG(spsr[i]);
-reg.addr = (uintptr_t) env-banked_spsr[i - 1];
+reg.addr = (uintptr_t) env-banked_spsr[i + 1];
 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, reg);
 if (ret) {
 return ret;
@@ -253,11 +269,13 @@ int kvm_arch_put_registers(CPUState *cs, int level)
 return ret;
 }
 
+
 int kvm_arch_get_registers(CPUState *cs)
 {
 struct kvm_one_reg reg;
 uint64_t val;
 uint32_t fpr;
+unsigned int el;
 int i;
 int ret;
 
@@ -330,15 +348,27 @@ int kvm_arch_get_registers(CPUState *cs)
 return ret;
 }
 
+/* Fetch the SPSR registers
+ *
+ * KVM SPSRs 0-4 map to QEMU banks 1-5
+ */
 for (i = 0; i  KVM_NR_SPSR; i++) {
 reg.id = AARCH64_CORE_REG(spsr[i]);
-reg.addr = (uintptr_t) env-banked_spsr[i - 1];
+reg.addr = (uintptr_t) env-banked_spsr[i + 1];
 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, reg);
 if (ret) {
 return ret;
 }
 }
 
+el = arm_current_el(env);
+if (el  0) {
+i = is_a64(env) ?
+aarch64_banked_spsr_index(el) :
+bank_number(env-uncached_cpsr  CPSR_M);
+env-spsr = env-banked_spsr[i];
+}
+
 /* Advanced SIMD and FP registers
  * We map Qn = regs[2n+1]:regs[2n]
  */
-- 
2.3.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 0/6] QEMU ARM64 Migration Fixes

2015-03-23 Thread Alex Bennée
Hi,

Following some review comments (and a patch) from Peter I've re-spun
this series:

v5
  - Added Peter's SPSR_EL1 state fix for architectural mapping
  - As a result SPSR save/restore no longer does munge
  - FP register save/restore re-done to deal float128 mapping
  - Some minor [ spaces ] added

I submitted the kernel side of this on Friday

Branch: https://github.com/stsquad/qemu/tree/migration/fixes-v5
Kernel: 
https://git.linaro.org/people/alex.bennee/linux.git/shortlog/refs/heads/migration/kvmarm-fixes-for-4.0-v3

Alex Bennée (5):
  target-arm: kvm: save/restore mp state
  hw/intc: arm_gic_kvm.c restore config first
  target-arm: kvm64 sync FP register state
  target-arm: kvm64 fix save/restore of SPSR regs
  target-arm: cpu.h document why env-spsr exists

Peter Maydell (1):
  target-arm: Store SPSR_EL1 state in banked_spsr[1] (SPSR_svc)

 hw/intc/arm_gic_kvm.c   |   7 ++-
 target-arm/cpu.h|   5 +++
 target-arm/helper-a64.c |   2 +-
 target-arm/helper.c |   2 +-
 target-arm/internals.h  |   5 ++-
 target-arm/kvm.c|  40 +
 target-arm/kvm32.c  |   4 ++
 target-arm/kvm64.c  | 111 +---
 target-arm/kvm_arm.h|  18 
 9 files changed, 184 insertions(+), 10 deletions(-)

-- 
2.3.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] KVM: nVMX: Add support for rdtscp

2015-03-23 Thread Bandan Das
Jan Kiszka jan.kis...@web.de writes:
...
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -2467,6 +2467,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx 
 *vmx)
   vmx-nested.nested_vmx_secondary_ctls_low = 0;
   vmx-nested.nested_vmx_secondary_ctls_high =
   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 + SECONDARY_EXEC_RDTSCP |
   SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
   SECONDARY_EXEC_APIC_REGISTER_VIRT |
   SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 @@ -7510,7 +7511,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu 
 *vcpu)
   return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
   case EXIT_REASON_RDPMC:
   return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
 - case EXIT_REASON_RDTSC:
 + case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
   return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
   case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
   case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
 @@ -8517,6 +8518,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
   exec_control);
   }
   }
 + if (!vmx-rdtscp_enabled)
 + vmx-nested.nested_vmx_secondary_ctls_high =
 + ~SECONDARY_EXEC_RDTSCP;
No need to do this if nested is not enabled ? Or just
a if (nested) in the prior if else loop should be enough I think.

Bandan
   }
  
   /* Exposing INVPCID only when PCID is exposed */
 @@ -9146,8 +9150,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, 
 struct vmcs12 *vmcs12)
   exec_control = ~SECONDARY_EXEC_RDTSCP;
   /* Take the following fields only from vmcs12 */
   exec_control = ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 +   SECONDARY_EXEC_RDTSCP |
 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 -  SECONDARY_EXEC_APIC_REGISTER_VIRT);
 +   SECONDARY_EXEC_APIC_REGISTER_VIRT);
   if (nested_cpu_has(vmcs12,
   CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
   exec_control |= vmcs12-secondary_vm_exec_control;
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Windows 7 guest installer does not detect drive if physical partition used instead of disk file.

2015-03-23 Thread Emmanuel Noobadmin
On 3/23/15, Stefan Hajnoczi stefa...@gmail.com wrote:
 I have CCed the libvirt mailing list, since KVM is a component here but
 your question seems to be mainly about libvirt, virt-manager,
 virt-install, etc.

Apologies for posting to the wrong list, I assumed it would be KVM
related as the guest could run but could not see the drive.

More information
1. install guest with /dev/sdxx as virtio device (the problem case)
- installer does not see any drive
- load drivers on Redhat virtio cdrom
- installer still does not see any drive

2. Install guest with qcow2 disk file as virtio device
- as previous scenario but installer see drives after installing drivers

3. install guest with qcow2 disk file as IDE device
- complete installation
- add /dev/sdxx as virtio disk
- goto Windows Device Manager and update virtio driver for unknown controller
- Windows see /dev/sdxx after driver installed


 It sounds like you want an NTFS partition on /dev/sda.  That requires
 passing the whole /dev/sda drive to the guest - and the Windows
 installer might overwrite your GRUB Master Boot Record.  Be careful when
 trying to do this.

Yes, I wanted to give Windows its own native partition that could be
read directly if I had to yank the disk and put it into a Windows
machine. Is this why #3 works but not #1? That as long as I want to
install Windows directly to an NTFS partition on/dev/sda, it is
required that I pass the whole drive to Windows?
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 3/6] hw/intc: arm_gic_kvm.c restore config first

2015-03-23 Thread Alex Bennée
As there is logic to deal with the difference between edge and level
triggered interrupts in the kernel we must ensure it knows the
configuration of the IRQs before we restore the pending state.

Signed-off-by: Alex Bennée alex.ben...@linaro.org
Acked-by: Christoffer Dall christoffer.d...@linaro.org

diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index 0d20750..e2512f1 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -370,6 +370,11 @@ static void kvm_arm_gic_put(GICState *s)
  * the appropriate CPU interfaces in the kernel) */
 kvm_dist_put(s, 0x800, 8, s-num_irq, translate_targets);
 
+/* irq_state[n].trigger - GICD_ICFGRn
+ * (restore targets before pending IRQs so we treat level/edge
+ * correctly */
+kvm_dist_put(s, 0xc00, 2, s-num_irq, translate_trigger);
+
 /* irq_state[n].pending + irq_state[n].level - GICD_ISPENDRn */
 kvm_dist_put(s, 0x280, 1, s-num_irq, translate_clear);
 kvm_dist_put(s, 0x200, 1, s-num_irq, translate_pending);
@@ -378,8 +383,6 @@ static void kvm_arm_gic_put(GICState *s)
 kvm_dist_put(s, 0x380, 1, s-num_irq, translate_clear);
 kvm_dist_put(s, 0x300, 1, s-num_irq, translate_active);
 
-/* irq_state[n].trigger - GICD_ICFRn */
-kvm_dist_put(s, 0xc00, 2, s-num_irq, translate_trigger);
 
 /* s-priorityX[irq] - ICD_IPRIORITYRn */
 kvm_dist_put(s, 0x400, 8, s-num_irq, translate_priority);
-- 
2.3.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 4/6] target-arm: kvm64 sync FP register state

2015-03-23 Thread Alex Bennée
For migration to work we need to sync all of the register state. This is
especially noticeable when GCC starts using FP registers as spill
registers even with integer programs.

Signed-off-by: Alex Bennée alex.ben...@linaro.org

---

v4:
  - fixed merge conflicts
  - rm superfluous reg.id++
v5:
  - use interim float128 to deal with endianess
  - correctly map into vfp.regs[]
  - fix spacing around []s

diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index fed03f2..857e970 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -126,9 +126,16 @@ bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx)
 #define AARCH64_CORE_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
  KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
 
+#define AARCH64_SIMD_CORE_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \
+ KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
+
+#define AARCH64_SIMD_CTRL_REG(x)   (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \
+ KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
+
 int kvm_arch_put_registers(CPUState *cs, int level)
 {
 struct kvm_one_reg reg;
+uint32_t fpr;
 uint64_t val;
 int i;
 int ret;
@@ -207,15 +214,42 @@ int kvm_arch_put_registers(CPUState *cs, int level)
 }
 }
 
+/* Advanced SIMD and FP registers
+ * We map Qn = regs[2n+1]:regs[2n]
+ */
+for (i = 0; i  32; i++) {
+int rd = i  1;
+float128 fp_val = make_float128(env-vfp.regs[rd + 1],
+env-vfp.regs[rd]);
+reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]);
+reg.addr = (uintptr_t)(fp_val);
+ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, reg);
+if (ret) {
+return ret;
+}
+}
+
+reg.addr = (uintptr_t)(fpr);
+fpr = vfp_get_fpsr(env);
+reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
+ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, reg);
+if (ret) {
+return ret;
+}
+
+fpr = vfp_get_fpcr(env);
+reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
+ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, reg);
+if (ret) {
+return ret;
+}
+
 if (!write_list_to_kvmstate(cpu)) {
 return EINVAL;
 }
 
 kvm_arm_sync_mpstate_to_kvm(cpu);
 
-/* TODO:
- * FP state
- */
 return ret;
 }
 
@@ -223,6 +257,7 @@ int kvm_arch_get_registers(CPUState *cs)
 {
 struct kvm_one_reg reg;
 uint64_t val;
+uint32_t fpr;
 int i;
 int ret;
 
@@ -304,6 +339,38 @@ int kvm_arch_get_registers(CPUState *cs)
 }
 }
 
+/* Advanced SIMD and FP registers
+ * We map Qn = regs[2n+1]:regs[2n]
+ */
+for (i = 0; i  32; i++) {
+float128 fp_val;
+reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]);
+reg.addr = (uintptr_t)(fp_val);
+ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, reg);
+if (ret) {
+return ret;
+} else {
+int rd = i  1;
+env-vfp.regs[rd + 1] = fp_val.high;
+env-vfp.regs[rd] = fp_val.low;
+}
+}
+
+reg.addr = (uintptr_t)(fpr);
+reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr);
+ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, reg);
+if (ret) {
+return ret;
+}
+vfp_set_fpsr(env, fpr);
+
+reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr);
+ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, reg);
+if (ret) {
+return ret;
+}
+vfp_set_fpcr(env, fpr);
+
 if (!write_kvmstate_to_list(cpu)) {
 return EINVAL;
 }
-- 
2.3.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v5 1/6] target-arm: Store SPSR_EL1 state in banked_spsr[1] (SPSR_svc)

2015-03-23 Thread Alex Bennée
From: Peter Maydell peter.mayd...@linaro.org

The AArch64 SPSR_EL1 register is architecturally mandated to
be mapped to the AArch32 SPSR_svc register. This means its
state should live in QEMU's env-banked_spsr[1] field.
Correct the various places in the code that incorrectly
put it in banked_spsr[0].

Signed-off-by: Peter Maydell peter.mayd...@linaro.org

diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index 7e0d038..861f6fa 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -523,7 +523,7 @@ void aarch64_cpu_do_interrupt(CPUState *cs)
 aarch64_save_sp(env, arm_current_el(env));
 env-elr_el[new_el] = env-pc;
 } else {
-env-banked_spsr[0] = cpsr_read(env);
+env-banked_spsr[aarch64_banked_spsr_index(new_el)] = cpsr_read(env);
 if (!env-thumb) {
 env-cp15.esr_el[new_el] |= 1  25;
 }
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 10886c5..d77c6de 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -2438,7 +2438,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
 { .name = SPSR_EL1, .state = ARM_CP_STATE_AA64,
   .type = ARM_CP_ALIAS,
   .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 0,
-  .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[0]) },
+  .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[1]) },
 /* We rely on the access checks not allowing the guest to write to the
  * state field when SPSel indicates that it's being used as the stack
  * pointer.
diff --git a/target-arm/internals.h b/target-arm/internals.h
index bb171a7..2cc3017 100644
--- a/target-arm/internals.h
+++ b/target-arm/internals.h
@@ -82,11 +82,14 @@ static inline void arm_log_exception(int idx)
 
 /*
  * For AArch64, map a given EL to an index in the banked_spsr array.
+ * Note that this mapping and the AArch32 mapping defined in bank_number()
+ * must agree such that the AArch64-AArch32 SPSRs have the architecturally
+ * mandated mapping between each other.
  */
 static inline unsigned int aarch64_banked_spsr_index(unsigned int el)
 {
 static const unsigned int map[4] = {
-[1] = 0, /* EL1.  */
+[1] = 1, /* EL1.  */
 [2] = 6, /* EL2.  */
 [3] = 7, /* EL3.  */
 };
-- 
2.3.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] KVM: nVMX: Add support for rdtscp

2015-03-23 Thread Jan Kiszka
On 2015-03-23 18:01, Bandan Das wrote:
 Jan Kiszka jan.kis...@web.de writes:
 ...
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -2467,6 +2467,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx 
 *vmx)
  vmx-nested.nested_vmx_secondary_ctls_low = 0;
  vmx-nested.nested_vmx_secondary_ctls_high =
  SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 +SECONDARY_EXEC_RDTSCP |
  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
  SECONDARY_EXEC_APIC_REGISTER_VIRT |
  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 @@ -7510,7 +7511,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu 
 *vcpu)
  return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
  case EXIT_REASON_RDPMC:
  return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
 -case EXIT_REASON_RDTSC:
 +case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
  return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
  case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
  case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
 @@ -8517,6 +8518,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
  exec_control);
  }
  }
 +if (!vmx-rdtscp_enabled)
 +vmx-nested.nested_vmx_secondary_ctls_high =
 +~SECONDARY_EXEC_RDTSCP;
 No need to do this if nested is not enabled ? Or just
 a if (nested) in the prior if else loop should be enough I think.

I can add this - but this is far away from being a hotpath. What would
be the benefit?

Thanks,
Jan




signature.asc
Description: OpenPGP digital signature


[PATCH v3] KVM: nVMX: Add support for rdtscp

2015-03-23 Thread Jan Kiszka
From: Jan Kiszka jan.kis...@siemens.com

If the guest CPU is supposed to support rdtscp and the host has rdtscp
enabled in the secondary execution controls, we can also expose this
feature to L1. Just extend nested_vmx_exit_handled to properly route
EXIT_REASON_RDTSCP.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---

Changes in v3:
 - avoid needlessly touching vmx-nested if nested is off

 arch/x86/include/uapi/asm/vmx.h | 1 +
 arch/x86/kvm/vmx.c  | 9 +++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index c5f1a1d..1fe9218 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -67,6 +67,7 @@
 #define EXIT_REASON_EPT_VIOLATION   48
 #define EXIT_REASON_EPT_MISCONFIG   49
 #define EXIT_REASON_INVEPT  50
+#define EXIT_REASON_RDTSCP  51
 #define EXIT_REASON_PREEMPTION_TIMER52
 #define EXIT_REASON_INVVPID 53
 #define EXIT_REASON_WBINVD  54
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 50c675b..fdd9f8b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2467,6 +2467,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx 
*vmx)
vmx-nested.nested_vmx_secondary_ctls_low = 0;
vmx-nested.nested_vmx_secondary_ctls_high =
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+   SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
@@ -7510,7 +7511,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
case EXIT_REASON_RDPMC:
return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
-   case EXIT_REASON_RDTSC:
+   case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
@@ -8517,6 +8518,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
exec_control);
}
}
+   if (nested  !vmx-rdtscp_enabled)
+   vmx-nested.nested_vmx_secondary_ctls_high =
+   ~SECONDARY_EXEC_RDTSCP;
}
 
/* Exposing INVPCID only when PCID is exposed */
@@ -9146,8 +9150,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct 
vmcs12 *vmcs12)
exec_control = ~SECONDARY_EXEC_RDTSCP;
/* Take the following fields only from vmcs12 */
exec_control = ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_RDTSCP |
  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-  SECONDARY_EXEC_APIC_REGISTER_VIRT);
+ SECONDARY_EXEC_APIC_REGISTER_VIRT);
if (nested_cpu_has(vmcs12,
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
exec_control |= vmcs12-secondary_vm_exec_control;
-- 
2.1.4



signature.asc
Description: OpenPGP digital signature


Re: [PATCH v2] KVM: nVMX: Add support for rdtscp

2015-03-23 Thread Bandan Das
Jan Kiszka jan.kis...@web.de writes:

 On 2015-03-23 18:01, Bandan Das wrote:
 Jan Kiszka jan.kis...@web.de writes:
 ...
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -2467,6 +2467,7 @@ static void nested_vmx_setup_ctls_msrs(struct 
 vcpu_vmx *vmx)
 vmx-nested.nested_vmx_secondary_ctls_low = 0;
 vmx-nested.nested_vmx_secondary_ctls_high =
 SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
 +   SECONDARY_EXEC_RDTSCP |
 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
 SECONDARY_EXEC_APIC_REGISTER_VIRT |
 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 @@ -7510,7 +7511,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu 
 *vcpu)
 return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
 case EXIT_REASON_RDPMC:
 return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
 -   case EXIT_REASON_RDTSC:
 +   case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
 return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
 case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
 case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
 @@ -8517,6 +8518,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 exec_control);
 }
 }
 +   if (!vmx-rdtscp_enabled)
 +   vmx-nested.nested_vmx_secondary_ctls_high =
 +   ~SECONDARY_EXEC_RDTSCP;
 No need to do this if nested is not enabled ? Or just
 a if (nested) in the prior if else loop should be enough I think.

 I can add this - but this is far away from being a hotpath. What would
 be the benefit?

Right, definitely not a hotpath, just seems unnecessary if nested is not 
enabled.

Bandan
 Thanks,
 Jan
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [GIT PULL 00/11] KVM: s390: Features and fixes for 4.1 (kvm/next)

2015-03-23 Thread Marcelo Tosatti
On Wed, Mar 18, 2015 at 12:43:58PM +0100, Christian Borntraeger wrote:
 Paolo, Marcelo,
 
 here is the followup pull request. As Marcelo has not yet pushed out
 queue or next to git.kernel.org, this request is based on the previous
 s390 pull request and should merge without conflicts.
 
 For details see tag description.
 
 Christian
 
 The following changes since commit 13211ea7b47db3d8ee2ff258a9a973a6d3aa3d43:
 
   KVM: s390: Enable vector support for capable guest (2015-03-06 13:49:35 
 +0100)
 
 are available in the git repository at:
 
   git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git  
 tags/kvm-s390-next-20150318
 
 for you to fetch changes up to 18280d8b4bcd4a2b174ee3cd748166c6190acacb:
 
   KVM: s390: represent SIMD cap in kvm facility (2015-03-17 16:33:14 +0100)

Pulled, thanks.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


x86: kvm: Revert remove sched notifier for cross-cpu migrations

2015-03-23 Thread Marcelo Tosatti

The following point:

2. per-CPU pvclock time info is updated if the
   underlying CPU changes.

Is not true anymore since KVM: x86: update pvclock area conditionally,
on cpu migration.

Add task migration notification back.

Problem noticed by Andy Lutomirski.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
CC: sta...@kernel.org # 3.11+

diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index d6b078e..25b1cc0 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -95,6 +95,7 @@ unsigned __pvclock_read_cycles(const struct 
pvclock_vcpu_time_info *src,
 
 struct pvclock_vsyscall_time_info {
struct pvclock_vcpu_time_info pvti;
+   u32 migrate_count;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 2f355d2..e5ecd20 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -141,7 +141,46 @@ void pvclock_read_wallclock(struct pvclock_wall_clock 
*wall_clock,
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
 }
 
+static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
+
+static struct pvclock_vsyscall_time_info *
+pvclock_get_vsyscall_user_time_info(int cpu)
+{
+   if (!pvclock_vdso_info) {
+   BUG();
+   return NULL;
+   }
+
+   return pvclock_vdso_info[cpu];
+}
+
+struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
+{
+   return pvclock_get_vsyscall_user_time_info(cpu)-pvti;
+}
+
 #ifdef CONFIG_X86_64
+static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
+   void *v)
+{
+   struct task_migration_notifier *mn = v;
+   struct pvclock_vsyscall_time_info *pvti;
+
+   pvti = pvclock_get_vsyscall_user_time_info(mn-from_cpu);
+
+   /* this is NULL when pvclock vsyscall is not initialized */
+   if (unlikely(pvti == NULL))
+   return NOTIFY_DONE;
+
+   pvti-migrate_count++;
+
+   return NOTIFY_DONE;
+}
+
+static struct notifier_block pvclock_migrate = {
+   .notifier_call = pvclock_task_migrate,
+};
+
 /*
  * Initialize the generic pvclock vsyscall state.  This will allocate
  * a/some page(s) for the per-vcpu pvclock information, set up a
@@ -155,12 +194,17 @@ int __init pvclock_init_vsyscall(struct 
pvclock_vsyscall_time_info *i,
 
WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
 
+   pvclock_vdso_info = i;
+
for (idx = 0; idx = (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
 __pa(i) + (idx*PAGE_SIZE),
 PAGE_KERNEL_VVAR);
}
 
+
+   register_task_migration_notifier(pvclock_migrate);
+
return 0;
 }
 #endif
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 9793322..3093376 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -82,18 +82,15 @@ static notrace cycle_t vread_pvclock(int *mode)
cycle_t ret;
u64 last;
u32 version;
+   u32 migrate_count;
u8 flags;
unsigned cpu, cpu1;
 
 
/*
-* Note: hypervisor must guarantee that:
-* 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
-* 2. that per-CPU pvclock time info is updated if the
-*underlying CPU changes.
-* 3. that version is increased whenever underlying CPU
-*changes.
-*
+* When looping to get a consistent (time-info, tsc) pair, we
+* also need to deal with the possibility we can switch vcpus,
+* so make sure we always re-fetch time-info for the current vcpu.
 */
do {
cpu = __getcpu()  VGETCPU_CPU_MASK;
@@ -104,6 +101,8 @@ static notrace cycle_t vread_pvclock(int *mode)
 
pvti = get_pvti(cpu);
 
+   migrate_count = pvti-migrate_count;
+
version = __pvclock_read_cycles(pvti-pvti, ret, flags);
 
/*
@@ -115,7 +114,8 @@ static notrace cycle_t vread_pvclock(int *mode)
cpu1 = __getcpu()  VGETCPU_CPU_MASK;
} while (unlikely(cpu != cpu1 ||
  (pvti-pvti.version  1) ||
- pvti-pvti.version != version));
+ pvti-pvti.version != version ||
+ pvti-migrate_count != migrate_count));
 
if (unlikely(!(flags  PVCLOCK_TSC_STABLE_BIT)))
*mode = VCLOCK_NONE;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d77432..be98910 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -176,6 +176,14 @@ extern void get_iowait_load(unsigned long *nr_waiters, 
unsigned long *load);
 extern void calc_global_load(unsigned long ticks);
 extern void update_cpu_load_nohz(void);
 

Re: x86: kvm: Revert remove sched notifier for cross-cpu migrations

2015-03-23 Thread Andy Lutomirski
On Mon, Mar 23, 2015 at 4:21 PM, Marcelo Tosatti mtosa...@redhat.com wrote:

 The following point:

 2. per-CPU pvclock time info is updated if the
underlying CPU changes.

 Is not true anymore since KVM: x86: update pvclock area conditionally,
 on cpu migration.

 Add task migration notification back.

IMO this is a pretty big hammer to use to work around what appears to
be a bug in the host, but I guess that's okay.

It's also unfortunate in another regard: it seems non-obvious to me
how to use this without reading the cpu number twice in the vdso.  On
the other hand, unless we have a global pvti, or at least a global
indication of TSC stability, I don't see how to do that even with the
host bug fixed.

Grumble.

On a more useful note, could you rename migrate_count to
migrate_from_count, since that's what it is?

--Andy
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Patch v5] x86: irq_comm: Add check for RH bit in kvm_set_msi_irq

2015-03-23 Thread James Sullivan
On 03/23/2015 03:13 PM, Radim Krčmář wrote:
 2015-03-20 11:50-0600, James Sullivan:
 On 03/20/2015 09:22 AM, James Sullivan wrote:
 On 03/20/2015 09:15 AM, Radim Krčmář wrote:
 2015-03-19 16:51-0600, James Sullivan:
 I played around with native_compose_msi_msg and discovered the following:

 * dm=0, rh=0 = Physical Destination Mode
 * dm=0, rh=1 = Failed delivery
 * dm=1, rh=0 = Logical Destination Mode, No Redirection
 * dm=1, rh=1 = Logical Destination Mode, Redirection

 Great!  (What CPU family was that?)


 This was on Intel x86_64 (Core i5-3210m, 'Ivy Bridge').
 
 Thanks, it's possible that the behavior of chipsets changed since the
 report on Intel's forum ...
 (Lowest priority behaved differently before QPI, so it might coincide.)
 
 I'm still wondering about last sentence from that link, the
 parenthesised part to be exact,
   The reference to the APIC ID being 0xff is because 0xff is broadcast
   and lowest priority (what the RH bit really is for X86) is illegal
   with broadcast.

 Can you also check if RH=1 does something to delivery mode?

 I haven't seen any changes in the MSI Data Register for any values of RH,
 but I don't have a great sample size (one machine with one set of PCI 
 devices),
 so if anyone else can confirm that I would appreciate it.
 
 I meant if the delivery mode from data register isn't ignored with RH=1,
 and the message delivered as if lowest-priority was set there.
 (Decided by having something else than fixed or lowest-priority there.)
 

Hmm, any thoughts on how I could test for that?

 Worth noting that low prio delivery was used across the board for my PCI 
 devices
 regardless of RH=1 or 0, so it doesn't seem to be de facto the case that the 
 RH
 bit's only purpose is for lowprio delivery on x86.
 
 Yeah, afaik, it can be done with lowest priority delivery mode on ia64
 too, so I have a hard time finding RH's intended purpose.
 
Again, need to have some 
 more
 PCI devices to test against to confirm anything.
 
 It's impossible to test everything, and there is no conflict if we have
 at most one data point ;)
 

Very true :)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 2/2] drivers/vfio: Support EEH error injection

2015-03-23 Thread Gavin Shan
On Mon, Mar 23, 2015 at 10:14:59AM -0600, Alex Williamson wrote:
On Mon, 2015-03-23 at 16:20 +1100, Gavin Shan wrote:
 On Mon, Mar 23, 2015 at 04:10:20PM +1100, David Gibson wrote:
 On Mon, Mar 23, 2015 at 04:03:59PM +1100, Gavin Shan wrote:
  On Mon, Mar 23, 2015 at 02:43:03PM +1100, David Gibson wrote:
  On Mon, Mar 23, 2015 at 12:56:36PM +1100, Gavin Shan wrote:
   On Mon, Mar 23, 2015 at 12:39:45PM +1100, David Gibson wrote:
   On Sat, Mar 21, 2015 at 06:58:45AM +1100, Gavin Shan wrote:
The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
to inject the specified EEH error, which is represented by
(struct vfio_eeh_pe_err), to the indicated PE for testing purpose.

Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com
   
   Reviewed-by: David Gibson da...@gibson.dropbear.id.au
   
---
 Documentation/vfio.txt| 12 
 drivers/vfio/vfio_spapr_eeh.c | 10 ++
 include/uapi/linux/vfio.h | 36 
+++-
 3 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index 96978ec..c6e11a3 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -385,6 +385,18 @@ The code flow from the example above should be 
slightly changed:
 

 
+   /* Inject EEH error, which is expected to be caused by 32-bits
+* config load.
+*/
+   pe_op.op = VFIO_EEH_PE_INJECT_ERR;
+   pe_op.err.type = VFIO_EEH_ERR_TYPE_32;
+   pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR;
+   pe_op.err.addr = 0ul;
+   pe_op.err.mask = 0ul;
+   ioctl(container, VFIO_EEH_PE_OP, pe_op);
+
+   
+
/* When 0xFF's returned from reading PCI config space or IO BARs
 * of the PCI device. Check the PE's state to see if that has 
been
 * frozen.
diff --git a/drivers/vfio/vfio_spapr_eeh.c 
b/drivers/vfio/vfio_spapr_eeh.c
index 5fa42db..38edeb4 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct 
iommu_group *group,
case VFIO_EEH_PE_CONFIGURE:
ret = eeh_pe_configure(pe);
break;
+   case VFIO_EEH_PE_INJECT_ERR:
+   minsz = offsetofend(struct vfio_eeh_pe_op, 
err.mask);
+   if (op.argsz  minsz)
+   return -EINVAL;
+   if (copy_from_user(op, (void __user *)arg, 
minsz))
+   return -EFAULT;
+
+   ret = eeh_pe_inject_err(pe, op.err.type, 
op.err.func,
+   op.err.addr, 
op.err.mask);
+   break;
default:
ret = -EINVAL;
}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 82889c3..f68e962 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info {
  * - unfreeze IO/DMA for frozen PE;
  * - read PE state;
  * - reset PE;
- * - configure PE.
+ * - configure PE;
+ * - inject EEH error.
  */
+struct vfio_eeh_pe_err {
+   __u32 type;
+   __u32 func;
+   __u64 addr;
+   __u64 mask;
+};
+
 struct vfio_eeh_pe_op {
__u32 argsz;
__u32 flags;
__u32 op;
+   union {
+   struct vfio_eeh_pe_err err;
+   };
 };
 
 #define VFIO_EEH_PE_DISABLE0   /* Disable EEH 
functionality */
@@ -490,6 +501,29 @@ struct vfio_eeh_pe_op {
 #define VFIO_EEH_PE_RESET_HOT  6   /* Assert hot 
reset  */
 #define VFIO_EEH_PE_RESET_FUNDAMENTAL  7   /* Assert 
fundamental reset  */
 #define VFIO_EEH_PE_CONFIGURE  8   /* PE 
configuration  */
+#define VFIO_EEH_PE_INJECT_ERR 9   /* Inject EEH 
error  */
+#define  VFIO_EEH_ERR_TYPE_32  0   /* 32-bits EEH 
error type*/
+#define  VFIO_EEH_ERR_TYPE_64  1   /* 64-bits EEH 
error type*/
+#define  VFIO_EEH_ERR_FUNC_LD_MEM_ADDR 0   /* 
Memory load  */
+#define  VFIO_EEH_ERR_FUNC_LD_MEM_DATA 1
+#define  VFIO_EEH_ERR_FUNC_LD_IO_ADDR  2   /* IO 
load  */
+#define  VFIO_EEH_ERR_FUNC_LD_IO_DATA  3
+#define  VFIO_EEH_ERR_FUNC_LD_CFG_ADDR 4   /* 
Config load  */
+#define  VFIO_EEH_ERR_FUNC_LD_CFG_DATA 5
+#define  VFIO_EEH_ERR_FUNC_ST_MEM_ADDR 6   /* 
Memory store */
+#define  VFIO_EEH_ERR_FUNC_ST_MEM_DATA 7
+#define  VFIO_EEH_ERR_FUNC_ST_IO_ADDR 

Re: [PATCH] KVM: x86: call irq notifiers with directed EOI

2015-03-23 Thread Marcelo Tosatti
On Wed, Mar 18, 2015 at 07:38:22PM +0100, Radim Krčmář wrote:
 kvm_ioapic_update_eoi() wasn't called if directed EOI was enabled.
 We need to do that for irq notifiers.  (Like with edge interrupts.)
 
 Fix it by skipping EOI broadcast only.
 
 Bug: https://bugzilla.kernel.org/show_bug.cgi?id=82211
 Signed-off-by: Radim Krčmář rkrc...@redhat.com
 ---
  arch/x86/kvm/ioapic.c | 4 +++-
  arch/x86/kvm/lapic.c  | 3 +--
  2 files changed, 4 insertions(+), 3 deletions(-)

Applied to master, thanks.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: x86: inline kvm_ioapic_handles_vector()

2015-03-23 Thread Marcelo Tosatti
On Thu, Mar 19, 2015 at 09:52:41PM +0100, Radim Krčmář wrote:
 An overhead from function call is not appropriate for its size and
 frequency of execution.
 
 Suggested-by: Paolo Bonzini pbonz...@redhat.com
 Signed-off-by: Radim Krčmář rkrc...@redhat.com
 ---
   I'm not very fond of that smp_rmb(): there is no real synchronization
   against update_handled_vectors(), 

Yes, because the guest OS should provide synchronization (it should
shutdown interrupts before attempting to modify IOAPIC table).

The smp_wmb is necessary.

  so the only point I see is to drop
   cached value of handled_vectors, which seems like bad use of LFENCE.

test_bit has volatile on *addr, so don't see why the smp_rmb is
necessary at all.

Applied, thanks.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


x86: kvm: rename migrate_count variable

2015-03-23 Thread Marcelo Tosatti

As thats more indicative of the variables usage.

Suggested by Andy Lutomirski.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 25b1cc0..1c1b474 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -95,7 +95,7 @@ unsigned __pvclock_read_cycles(const struct 
pvclock_vcpu_time_info *src,
 
 struct pvclock_vsyscall_time_info {
struct pvclock_vcpu_time_info pvti;
-   u32 migrate_count;
+   u32 migrate_from_count;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index e5ecd20..8eaf04b 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -172,7 +172,7 @@ static int pvclock_task_migrate(struct notifier_block *nb, 
unsigned long l,
if (unlikely(pvti == NULL))
return NOTIFY_DONE;
 
-   pvti-migrate_count++;
+   pvti-migrate_from_count++;
 
return NOTIFY_DONE;
 }
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 3093376..ef8bb76 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -82,7 +82,7 @@ static notrace cycle_t vread_pvclock(int *mode)
cycle_t ret;
u64 last;
u32 version;
-   u32 migrate_count;
+   u32 migrate_from_count;
u8 flags;
unsigned cpu, cpu1;
 
@@ -101,7 +101,7 @@ static notrace cycle_t vread_pvclock(int *mode)
 
pvti = get_pvti(cpu);
 
-   migrate_count = pvti-migrate_count;
+   migrate_from_count = pvti-migrate_from_count;
 
version = __pvclock_read_cycles(pvti-pvti, ret, flags);
 
@@ -115,7 +115,7 @@ static notrace cycle_t vread_pvclock(int *mode)
} while (unlikely(cpu != cpu1 ||
  (pvti-pvti.version  1) ||
  pvti-pvti.version != version ||
- pvti-migrate_count != migrate_count));
+ pvti-migrate_from_count != migrate_from_count));
 
if (unlikely(!(flags  PVCLOCK_TSC_STABLE_BIT)))
*mode = VCLOCK_NONE;

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 2/2] drivers/vfio: Support EEH error injection

2015-03-23 Thread Gavin Shan
On Tue, Mar 24, 2015 at 12:22:25PM +1100, David Gibson wrote:
On Tue, Mar 24, 2015 at 09:47:54AM +1100, Gavin Shan wrote:
 On Mon, Mar 23, 2015 at 10:14:59AM -0600, Alex Williamson wrote:
 On Mon, 2015-03-23 at 16:20 +1100, Gavin Shan wrote:
  On Mon, Mar 23, 2015 at 04:10:20PM +1100, David Gibson wrote:
  On Mon, Mar 23, 2015 at 04:03:59PM +1100, Gavin Shan wrote:
   On Mon, Mar 23, 2015 at 02:43:03PM +1100, David Gibson wrote:
   On Mon, Mar 23, 2015 at 12:56:36PM +1100, Gavin Shan wrote:
On Mon, Mar 23, 2015 at 12:39:45PM +1100, David Gibson wrote:
On Sat, Mar 21, 2015 at 06:58:45AM +1100, Gavin Shan wrote:
 The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
 to inject the specified EEH error, which is represented by
 (struct vfio_eeh_pe_err), to the indicated PE for testing 
 purpose.
 
 Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com

Reviewed-by: David Gibson da...@gibson.dropbear.id.au

 ---
  Documentation/vfio.txt| 12 
  drivers/vfio/vfio_spapr_eeh.c | 10 ++
  include/uapi/linux/vfio.h | 36 
 +++-
  3 files changed, 57 insertions(+), 1 deletion(-)
 
 diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
 index 96978ec..c6e11a3 100644
 --- a/Documentation/vfio.txt
 +++ b/Documentation/vfio.txt
 @@ -385,6 +385,18 @@ The code flow from the example above should 
 be slightly changed:
  
  
  
 +/* Inject EEH error, which is expected to be caused by 
 32-bits
 + * config load.
 + */
 +pe_op.op = VFIO_EEH_PE_INJECT_ERR;
 +pe_op.err.type = VFIO_EEH_ERR_TYPE_32;
 +pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR;
 +pe_op.err.addr = 0ul;
 +pe_op.err.mask = 0ul;
 +ioctl(container, VFIO_EEH_PE_OP, pe_op);
 +
 +
 +
  /* When 0xFF's returned from reading PCI config space 
 or IO BARs
   * of the PCI device. Check the PE's state to see if 
 that has been
   * frozen.
 diff --git a/drivers/vfio/vfio_spapr_eeh.c 
 b/drivers/vfio/vfio_spapr_eeh.c
 index 5fa42db..38edeb4 100644
 --- a/drivers/vfio/vfio_spapr_eeh.c
 +++ b/drivers/vfio/vfio_spapr_eeh.c
 @@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct 
 iommu_group *group,
  case VFIO_EEH_PE_CONFIGURE:
  ret = eeh_pe_configure(pe);
  break;
 +case VFIO_EEH_PE_INJECT_ERR:
 +minsz = offsetofend(struct 
 vfio_eeh_pe_op, err.mask);
 +if (op.argsz  minsz)
 +return -EINVAL;
 +if (copy_from_user(op, (void __user 
 *)arg, minsz))
 +return -EFAULT;
 +
 +ret = eeh_pe_inject_err(pe, 
 op.err.type, op.err.func,
 +op.err.addr, 
 op.err.mask);
 +break;
  default:
  ret = -EINVAL;
  }
 diff --git a/include/uapi/linux/vfio.h 
 b/include/uapi/linux/vfio.h
 index 82889c3..f68e962 100644
 --- a/include/uapi/linux/vfio.h
 +++ b/include/uapi/linux/vfio.h
 @@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info {
   * - unfreeze IO/DMA for frozen PE;
   * - read PE state;
   * - reset PE;
 - * - configure PE.
 + * - configure PE;
 + * - inject EEH error.
   */
 +struct vfio_eeh_pe_err {
 +__u32 type;
 +__u32 func;
 +__u64 addr;
 +__u64 mask;
 +};
 +
  struct vfio_eeh_pe_op {
  __u32 argsz;
  __u32 flags;
  __u32 op;
 +union {
 +struct vfio_eeh_pe_err err;
 +};
  };
  
  #define VFIO_EEH_PE_DISABLE 0   /* Disable EEH 
 functionality */
 @@ -490,6 +501,29 @@ struct vfio_eeh_pe_op {
  #define VFIO_EEH_PE_RESET_HOT   6   /* Assert hot 
 reset  */
  #define VFIO_EEH_PE_RESET_FUNDAMENTAL   7   /* Assert 
 fundamental reset  */
  #define VFIO_EEH_PE_CONFIGURE   8   /* PE 
 configuration  */
 +#define VFIO_EEH_PE_INJECT_ERR  9   /* Inject EEH 
 error  */
 +#define  VFIO_EEH_ERR_TYPE_32   0   /* 32-bits EEH 
 error type*/
 +#define  VFIO_EEH_ERR_TYPE_64   1   /* 64-bits EEH 
 error type*/
 +#define  VFIO_EEH_ERR_FUNC_LD_MEM_ADDR  0   /* 
 Memory load  */
 +#define  VFIO_EEH_ERR_FUNC_LD_MEM_DATA  1
 +#define  VFIO_EEH_ERR_FUNC_LD_IO_ADDR   

Re: [PATCH v3 2/2] drivers/vfio: Support EEH error injection

2015-03-23 Thread David Gibson
On Tue, Mar 24, 2015 at 09:47:54AM +1100, Gavin Shan wrote:
 On Mon, Mar 23, 2015 at 10:14:59AM -0600, Alex Williamson wrote:
 On Mon, 2015-03-23 at 16:20 +1100, Gavin Shan wrote:
  On Mon, Mar 23, 2015 at 04:10:20PM +1100, David Gibson wrote:
  On Mon, Mar 23, 2015 at 04:03:59PM +1100, Gavin Shan wrote:
   On Mon, Mar 23, 2015 at 02:43:03PM +1100, David Gibson wrote:
   On Mon, Mar 23, 2015 at 12:56:36PM +1100, Gavin Shan wrote:
On Mon, Mar 23, 2015 at 12:39:45PM +1100, David Gibson wrote:
On Sat, Mar 21, 2015 at 06:58:45AM +1100, Gavin Shan wrote:
 The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
 to inject the specified EEH error, which is represented by
 (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
 
 Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com

Reviewed-by: David Gibson da...@gibson.dropbear.id.au

 ---
  Documentation/vfio.txt| 12 
  drivers/vfio/vfio_spapr_eeh.c | 10 ++
  include/uapi/linux/vfio.h | 36 
 +++-
  3 files changed, 57 insertions(+), 1 deletion(-)
 
 diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
 index 96978ec..c6e11a3 100644
 --- a/Documentation/vfio.txt
 +++ b/Documentation/vfio.txt
 @@ -385,6 +385,18 @@ The code flow from the example above should 
 be slightly changed:
  
   
  
 + /* Inject EEH error, which is expected to be caused by 32-bits
 +  * config load.
 +  */
 + pe_op.op = VFIO_EEH_PE_INJECT_ERR;
 + pe_op.err.type = VFIO_EEH_ERR_TYPE_32;
 + pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR;
 + pe_op.err.addr = 0ul;
 + pe_op.err.mask = 0ul;
 + ioctl(container, VFIO_EEH_PE_OP, pe_op);
 +
 + 
 +
   /* When 0xFF's returned from reading PCI config space or IO BARs
* of the PCI device. Check the PE's state to see if that has 
 been
* frozen.
 diff --git a/drivers/vfio/vfio_spapr_eeh.c 
 b/drivers/vfio/vfio_spapr_eeh.c
 index 5fa42db..38edeb4 100644
 --- a/drivers/vfio/vfio_spapr_eeh.c
 +++ b/drivers/vfio/vfio_spapr_eeh.c
 @@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct 
 iommu_group *group,
   case VFIO_EEH_PE_CONFIGURE:
   ret = eeh_pe_configure(pe);
   break;
 + case VFIO_EEH_PE_INJECT_ERR:
 + minsz = offsetofend(struct vfio_eeh_pe_op, 
 err.mask);
 + if (op.argsz  minsz)
 + return -EINVAL;
 + if (copy_from_user(op, (void __user *)arg, 
 minsz))
 + return -EFAULT;
 +
 + ret = eeh_pe_inject_err(pe, op.err.type, 
 op.err.func,
 + op.err.addr, 
 op.err.mask);
 + break;
   default:
   ret = -EINVAL;
   }
 diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
 index 82889c3..f68e962 100644
 --- a/include/uapi/linux/vfio.h
 +++ b/include/uapi/linux/vfio.h
 @@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info {
   * - unfreeze IO/DMA for frozen PE;
   * - read PE state;
   * - reset PE;
 - * - configure PE.
 + * - configure PE;
 + * - inject EEH error.
   */
 +struct vfio_eeh_pe_err {
 + __u32 type;
 + __u32 func;
 + __u64 addr;
 + __u64 mask;
 +};
 +
  struct vfio_eeh_pe_op {
   __u32 argsz;
   __u32 flags;
   __u32 op;
 + union {
 + struct vfio_eeh_pe_err err;
 + };
  };
  
  #define VFIO_EEH_PE_DISABLE  0   /* Disable EEH 
 functionality */
 @@ -490,6 +501,29 @@ struct vfio_eeh_pe_op {
  #define VFIO_EEH_PE_RESET_HOT6   /* Assert hot 
 reset  */
  #define VFIO_EEH_PE_RESET_FUNDAMENTAL7   /* Assert 
 fundamental reset  */
  #define VFIO_EEH_PE_CONFIGURE8   /* PE 
 configuration  */
 +#define VFIO_EEH_PE_INJECT_ERR   9   /* Inject EEH 
 error  */
 +#define  VFIO_EEH_ERR_TYPE_320   /* 32-bits EEH 
 error type*/
 +#define  VFIO_EEH_ERR_TYPE_641   /* 64-bits EEH 
 error type*/
 +#define  VFIO_EEH_ERR_FUNC_LD_MEM_ADDR   0   /* 
 Memory load  */
 +#define  VFIO_EEH_ERR_FUNC_LD_MEM_DATA   1
 +#define  VFIO_EEH_ERR_FUNC_LD_IO_ADDR2   /* IO 
 load  */
 +#define  VFIO_EEH_ERR_FUNC_LD_IO_DATA3
 +#define  VFIO_EEH_ERR_FUNC_LD_CFG_ADDR   4   /* 
 Config load  */
 +#define  VFIO_EEH_ERR_FUNC_LD_CFG_DATA   5
 +#define  VFIO_EEH_ERR_FUNC_ST_MEM_ADDR   6   /* 
 Memory store */
 +#define