[PATCH] KVM: nVMX: Add support for rdtscp
From: Jan Kiszka jan.kis...@siemens.com If the guest CPU is supposed to support rdtscp and the host has rdtscp enabled in the secondary execution controls, we can also expose this feature to L1. Just extend nested_vmx_exit_handled to properly route EXIT_REASON_RDTSCP. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- arch/x86/include/uapi/asm/vmx.h | 1 + arch/x86/kvm/vmx.c | 6 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index c5f1a1d..1fe9218 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -67,6 +67,7 @@ #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_INVEPT 50 +#define EXIT_REASON_RDTSCP 51 #define EXIT_REASON_PREEMPTION_TIMER52 #define EXIT_REASON_INVVPID 53 #define EXIT_REASON_WBINVD 54 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 50c675b..7875e9b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2467,6 +2467,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx-nested.nested_vmx_secondary_ctls_low = 0; vmx-nested.nested_vmx_secondary_ctls_high = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | @@ -7510,7 +7511,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); case EXIT_REASON_RDPMC: return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); - case EXIT_REASON_RDTSC: + case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: @@ -8517,6 +8518,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) exec_control); } } + if (!vmx-rdtscp_enabled) + vmx-nested.nested_vmx_secondary_ctls_high = + ~SECONDARY_EXEC_RDTSCP; } /* Exposing INVPCID only when PCID is exposed */ -- 2.1.4 signature.asc Description: OpenPGP digital signature
[PATCH v2] KVM: nVMX: Add support for rdtscp
From: Jan Kiszka jan.kis...@siemens.com If the guest CPU is supposed to support rdtscp and the host has rdtscp enabled in the secondary execution controls, we can also expose this feature to L1. Just extend nested_vmx_exit_handled to properly route EXIT_REASON_RDTSCP. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Changes in v2 (thinko in test scenario...): - respect L1's setting of SECONDARY_EXEC_RDTSCP arch/x86/include/uapi/asm/vmx.h | 1 + arch/x86/kvm/vmx.c | 9 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index c5f1a1d..1fe9218 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -67,6 +67,7 @@ #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_INVEPT 50 +#define EXIT_REASON_RDTSCP 51 #define EXIT_REASON_PREEMPTION_TIMER52 #define EXIT_REASON_INVVPID 53 #define EXIT_REASON_WBINVD 54 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 50c675b..45e0a6b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2467,6 +2467,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx-nested.nested_vmx_secondary_ctls_low = 0; vmx-nested.nested_vmx_secondary_ctls_high = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | @@ -7510,7 +7511,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); case EXIT_REASON_RDPMC: return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); - case EXIT_REASON_RDTSC: + case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: @@ -8517,6 +8518,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) exec_control); } } + if (!vmx-rdtscp_enabled) + vmx-nested.nested_vmx_secondary_ctls_high = + ~SECONDARY_EXEC_RDTSCP; } /* Exposing INVPCID only when PCID is exposed */ @@ -9146,8 +9150,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) exec_control = ~SECONDARY_EXEC_RDTSCP; /* Take the following fields only from vmcs12 */ exec_control = ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_APIC_REGISTER_VIRT); + SECONDARY_EXEC_APIC_REGISTER_VIRT); if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) exec_control |= vmcs12-secondary_vm_exec_control; -- 2.1.4 signature.asc Description: OpenPGP digital signature
Re: [PATCH 02/20] MIPS: Clear [MSA]FPE CSR.Cause after notify_die()
On Wed, Mar 11, 2015 at 02:44:38PM +, James Hogan wrote: Acked-by: Ralf Baechle r...@linux-mips.org Feel free to merge this through the KVM tree along with the remainder of the series. Ralf -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 08/12] KVM: arm/arm64: implement kvm_io_bus MMIO handling for the VGIC
Currently we use a lot of VGIC specific code to do the MMIO dispatching. Use the previous reworks to add kvm_io_bus style MMIO handlers. Those are not yet called by the MMIO abort handler, also the actual VGIC emulator function do not make use of it yet, but will be enabled with the following patches. Signed-off-by: Andre Przywara andre.przyw...@arm.com --- include/kvm/arm_vgic.h |9 virt/kvm/arm/vgic.c| 129 virt/kvm/arm/vgic.h|7 +++ 3 files changed, 145 insertions(+) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 9092fad..f90140c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -24,6 +24,7 @@ #include linux/irqreturn.h #include linux/spinlock.h #include linux/types.h +#include kvm/iodev.h #define VGIC_NR_IRQS_LEGACY256 #define VGIC_NR_SGIS 16 @@ -147,6 +148,14 @@ struct vgic_vm_ops { int (*map_resources)(struct kvm *, const struct vgic_params *); }; +struct vgic_io_device { + gpa_t addr; + int len; + const struct vgic_io_range *reg_ranges; + struct kvm_vcpu *redist_vcpu; + struct kvm_io_device dev; +}; + struct vgic_dist { spinlock_t lock; boolin_kernel; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 8802ad7..e968179 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -32,6 +32,8 @@ #include asm/kvm_arm.h #include asm/kvm_mmu.h #include trace/events/kvm.h +#include asm/kvm.h +#include kvm/iodev.h /* * How the whole thing works (courtesy of Christoffer Dall): @@ -837,6 +839,66 @@ bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, } /** + * vgic_handle_mmio_access - handle an in-kernel MMIO access + * This is called by the read/write KVM IO device wrappers below. + * @vcpu: pointer to the vcpu performing the access + * @this: pointer to the KVM IO device in charge + * @addr: guest physical address of the access + * @len: size of the access + * @val: pointer to the data region + * @is_write: read or write access + * + * returns true if the MMIO access could be performed + */ +static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, + int len, void *val, bool is_write) +{ + struct vgic_dist *dist = vcpu-kvm-arch.vgic; + struct vgic_io_device *iodev = container_of(this, + struct vgic_io_device, dev); + struct kvm_run *run = vcpu-run; + const struct vgic_io_range *range; + struct kvm_exit_mmio mmio; + bool updated_state; + gpa_t offset; + + offset = addr - iodev-addr; + range = vgic_find_range(iodev-reg_ranges, len, offset); + if (unlikely(!range || !range-handle_mmio)) { + pr_warn(Unhandled access %d %08llx %d\n, is_write, addr, len); + return -ENXIO; + } + + mmio.phys_addr = addr; + mmio.len = len; + mmio.is_write = is_write; + if (is_write) + memcpy(mmio.data, val, len); + mmio.private = iodev-redist_vcpu; + + spin_lock(dist-lock); + offset -= range-base; + if (vgic_validate_access(dist, range, offset)) { + updated_state = call_range_handler(vcpu, mmio, offset, range); + if (!is_write) + memcpy(val, mmio.data, len); + } else { + if (!is_write) + memset(val, 0, len); + updated_state = false; + } + spin_unlock(dist-lock); + kvm_prepare_mmio(run, mmio); + kvm_handle_mmio_return(vcpu, run); + + if (updated_state) + vgic_kick_vcpus(vcpu-kvm); + + return 0; +} + +/** * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation * @vcpu: pointer to the vcpu performing the access * @run: pointer to the kvm_run structure @@ -860,6 +922,73 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, return vcpu-kvm-arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio); } +static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu, +struct kvm_io_device *this, +gpa_t addr, int len, void *val) +{ + return vgic_handle_mmio_access(vcpu, this, addr, len, val, false); +} + +static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, const void *val) +{ + return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val, + true); +} + +struct kvm_io_device_ops vgic_io_ops = { + .read = vgic_handle_mmio_read, + .write = vgic_handle_mmio_write, +}; + +/** + *
[PATCH v2 05/12] KVM: arm/arm64: rename struct kvm_mmio_range to vgic_io_range
The name kvm_mmio_range is a bit bold, given that it only covers the VGIC's MMIO ranges. To avoid confusion with kvm_io_range, rename it to vgic_io_range. Signed-off-by: Andre Przywara andre.przyw...@arm.com Acked-by: Christoffer Dall christoffer.d...@linaro.org --- virt/kvm/arm/vgic-v2-emul.c |6 +++--- virt/kvm/arm/vgic-v3-emul.c |8 virt/kvm/arm/vgic.c | 18 +- virt/kvm/arm/vgic.h | 12 ++-- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c index c818662..ddb3135 100644 --- a/virt/kvm/arm/vgic-v2-emul.c +++ b/virt/kvm/arm/vgic-v2-emul.c @@ -319,7 +319,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); } -static const struct kvm_mmio_range vgic_dist_ranges[] = { +static const struct vgic_io_range vgic_dist_ranges[] = { { .base = GIC_DIST_CTRL, .len= 12, @@ -647,7 +647,7 @@ static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, * CPU Interface Register accesses - these are not accessed by the VM, but by * user space for saving and restoring VGIC state. */ -static const struct kvm_mmio_range vgic_cpu_ranges[] = { +static const struct vgic_io_range vgic_cpu_ranges[] = { { .base = GIC_CPU_CTRL, .len= 12, @@ -674,7 +674,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, u32 *reg, bool is_write) { - const struct kvm_mmio_range *r = NULL, *ranges; + const struct vgic_io_range *r = NULL, *ranges; phys_addr_t offset; int ret, cpuid, c; struct kvm_vcpu *vcpu, *tmp_vcpu; diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index b3f1546..14943e3 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -340,7 +340,7 @@ static bool handle_mmio_idregs(struct kvm_vcpu *vcpu, return false; } -static const struct kvm_mmio_range vgic_v3_dist_ranges[] = { +static const struct vgic_io_range vgic_v3_dist_ranges[] = { { .base = GICD_CTLR, .len= 0x04, @@ -570,7 +570,7 @@ static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu, return vgic_handle_cfg_reg(reg, mmio, offset); } -static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = { +static const struct vgic_io_range vgic_redist_sgi_ranges[] = { { .base = GICR_IGROUPR0, .len= 0x04, @@ -676,7 +676,7 @@ static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, return false; } -static const struct kvm_mmio_range vgic_redist_ranges[] = { +static const struct vgic_io_range vgic_redist_ranges[] = { { .base = GICR_CTLR, .len= 0x04, @@ -726,7 +726,7 @@ static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, unsigned long rdbase = dist-vgic_redist_base; int nrcpus = atomic_read(vcpu-kvm-online_vcpus); int vcpu_id; - const struct kvm_mmio_range *mmio_range; + const struct vgic_io_range *mmio_range; if (is_in_range(mmio-phys_addr, mmio-len, dbase, GIC_V3_DIST_SIZE)) { return vgic_handle_mmio_range(vcpu, run, mmio, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index ffd937c..21a3550 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -712,11 +712,11 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) } const -struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) +struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) { - const struct kvm_mmio_range *r = ranges; + const struct vgic_io_range *r = ranges; while (r-len) { if (offset = r-base @@ -729,7 +729,7 @@ struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, } static bool vgic_validate_access(const struct vgic_dist *dist, -const struct kvm_mmio_range *range, +const struct vgic_io_range *range, unsigned long offset) { int irq; @@ -757,7 +757,7 @@ static bool vgic_validate_access(const struct vgic_dist *dist, static bool call_range_handler(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, unsigned long offset, - const struct
[PATCH v2 02/12] KVM: move iodev.h from virt/kvm/ to include/kvm
iodev.h contains definitions for the kvm_io_bus framework. This is needed both by the generic KVM code in virt/kvm as well as by architecture specific code under arch/. Putting the header file in virt/kvm and using local includes in the architecture part seems at least dodgy to me, so let's move the file into include/kvm, so that a more natural #include kvm/iodev.h can be used by all of the code. This also solves a problem later when using struct kvm_io_device in arm_vgic.h. Fixing up the FSF address in the GPL header and a wrong include path on the way. Signed-off-by: Andre Przywara andre.przyw...@arm.com Acked-by: Christoffer Dall christoffer.d...@linaro.org --- arch/powerpc/kvm/mpic.c |2 +- arch/x86/kvm/i8254.h |2 +- arch/x86/kvm/ioapic.h |2 +- arch/x86/kvm/irq.h|2 +- arch/x86/kvm/lapic.h |2 +- include/kvm/iodev.h | 76 virt/kvm/coalesced_mmio.c |2 +- virt/kvm/eventfd.c|2 +- virt/kvm/iodev.h | 77 - virt/kvm/kvm_main.c |2 +- 10 files changed, 84 insertions(+), 85 deletions(-) create mode 100644 include/kvm/iodev.h delete mode 100644 virt/kvm/iodev.h diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 8542f07..4703fad 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -34,7 +34,7 @@ #include asm/kvm_para.h #include asm/kvm_host.h #include asm/kvm_ppc.h -#include iodev.h +#include kvm/iodev.h #define MAX_CPU 32 #define MAX_SRC 256 diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index dd1b16b..c84990b 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -3,7 +3,7 @@ #include linux/kthread.h -#include iodev.h +#include kvm/iodev.h struct kvm_kpit_channel_state { u32 count; /* can be 65536 */ diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index c2e36d9..d9e02ca 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -3,7 +3,7 @@ #include linux/kvm_host.h -#include iodev.h +#include kvm/iodev.h struct kvm; struct kvm_vcpu; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2d03568..ad68c73 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -27,7 +27,7 @@ #include linux/kvm_host.h #include linux/spinlock.h -#include iodev.h +#include kvm/iodev.h #include ioapic.h #include lapic.h diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 0bc6c65..e284c28 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -1,7 +1,7 @@ #ifndef __KVM_X86_LAPIC_H #define __KVM_X86_LAPIC_H -#include iodev.h +#include kvm/iodev.h #include linux/kvm_host.h diff --git a/include/kvm/iodev.h b/include/kvm/iodev.h new file mode 100644 index 000..a6d208b --- /dev/null +++ b/include/kvm/iodev.h @@ -0,0 +1,76 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#ifndef __KVM_IODEV_H__ +#define __KVM_IODEV_H__ + +#include linux/kvm_types.h +#include linux/errno.h + +struct kvm_io_device; +struct kvm_vcpu; + +/** + * kvm_io_device_ops are called under kvm slots_lock. + * read and write handlers return 0 if the transaction has been handled, + * or non-zero to have it passed to the next device. + **/ +struct kvm_io_device_ops { + int (*read)(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, + int len, + void *val); + int (*write)(struct kvm_vcpu *vcpu, +struct kvm_io_device *this, +gpa_t addr, +int len, +const void *val); + void (*destructor)(struct kvm_io_device *this); +}; + + +struct kvm_io_device { + const struct kvm_io_device_ops *ops; +}; + +static inline void kvm_iodevice_init(struct kvm_io_device *dev, +const struct kvm_io_device_ops *ops) +{ + dev-ops = ops; +} + +static inline int kvm_iodevice_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, gpa_t addr, + int l, void *v) +{ + return dev-ops-read ? dev-ops-read(vcpu, dev, addr, l, v) + : -EOPNOTSUPP; +} + +static inline int kvm_iodevice_write(struct kvm_vcpu *vcpu, +struct kvm_io_device
[PATCH v2 06/12] KVM: mark kvm-buses as empty once they were destroyed
In kvm_destroy_vm() we call kvm_io_bus_destroy() pretty early, especially before calling kvm_arch_destroy_vm(). To avoid unregistering devices from the already destroyed bus, let's mark the bus with NULL to let other users know it has been destroyed already. This avoids a crash on a VM shutdown with the VGIC using the kvm_io_bus later (the unregistering is in there to be able to roll back a faulting init). Signed-off-by: Andre Przywara andre.przyw...@arm.com --- virt/kvm/kvm_main.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8c7ab0b..6f164eb 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -604,8 +604,10 @@ static void kvm_destroy_vm(struct kvm *kvm) list_del(kvm-vm_list); spin_unlock(kvm_lock); kvm_free_irq_routing(kvm); - for (i = 0; i KVM_NR_BUSES; i++) + for (i = 0; i KVM_NR_BUSES; i++) { kvm_io_bus_destroy(kvm-buses[i]); + kvm-buses[i] = NULL; + } kvm_coalesced_mmio_free(kvm); #if defined(CONFIG_MMU_NOTIFIER) defined(KVM_ARCH_WANT_MMU_NOTIFIER) mmu_notifier_unregister(kvm-mmu_notifier, kvm-mm); -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 03/12] KVM: arm/arm64: remove now unneeded include directory from Makefile
virt/kvm was never really a good include directory for anything else than locally included headers. With the move of iodev.h there is no need anymore to add this directory the compiler's include path, so remove it from the arm and arm64 kvm Makefile. Signed-off-by: Andre Przywara andre.przyw...@arm.com Acked-by: Christoffer Dall christoffer.d...@linaro.org --- arch/arm/kvm/Makefile |2 +- arch/arm64/kvm/Makefile |2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index a093bf1..139e46c 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -7,7 +7,7 @@ ifeq ($(plus_virt),+virt) plus_virt_def := -DREQUIRES_VIRT=1 endif -ccflags-y += -Ivirt/kvm -Iarch/arm/kvm +ccflags-y += -Iarch/arm/kvm CFLAGS_arm.o := -I. $(plus_virt_def) CFLAGS_mmu.o := -I. diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index b22c636..d5904f8 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -2,7 +2,7 @@ # Makefile for Kernel-based Virtual Machine module # -ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm +ccflags-y += -Iarch/arm64/kvm CFLAGS_arm.o := -I. CFLAGS_mmu.o := -I. -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 09/12] KVM: arm/arm64: prepare GICv2 emulation to be handled by kvm_io_bus
Using the framework provided by the recent vgic.c changes we register a kvm_io_bus device when initializing the virtual GICv2. Signed-off-by: Andre Przywara andre.przyw...@arm.com --- include/kvm/arm_vgic.h |1 + virt/kvm/arm/vgic-v2-emul.c | 13 + virt/kvm/arm/vgic.c | 17 + 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f90140c..4523984 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -251,6 +251,7 @@ struct vgic_dist { unsigned long *irq_active_on_cpu; struct vgic_vm_ops vm_ops; + struct vgic_io_device dist_iodev; }; struct vgic_v2_cpu_if { diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c index 1dd183e..69f27c8 100644 --- a/virt/kvm/arm/vgic-v2-emul.c +++ b/virt/kvm/arm/vgic-v2-emul.c @@ -506,6 +506,7 @@ static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq) static int vgic_v2_map_resources(struct kvm *kvm, const struct vgic_params *params) { + struct vgic_dist *dist = kvm-arch.vgic; int ret = 0; if (!irqchip_in_kernel(kvm)) @@ -516,13 +517,17 @@ static int vgic_v2_map_resources(struct kvm *kvm, if (vgic_ready(kvm)) goto out; - if (IS_VGIC_ADDR_UNDEF(kvm-arch.vgic.vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(kvm-arch.vgic.vgic_cpu_base)) { + if (IS_VGIC_ADDR_UNDEF(dist-vgic_dist_base) || + IS_VGIC_ADDR_UNDEF(dist-vgic_cpu_base)) { kvm_err(Need to set vgic cpu and dist addresses first\n); ret = -ENXIO; goto out; } + vgic_register_kvm_io_dev(kvm, dist-vgic_dist_base, +KVM_VGIC_V2_DIST_SIZE, +vgic_dist_ranges, -1, dist-dist_iodev); + /* * Initialize the vgic if this hasn't already been done on demand by * accessing the vgic state from userspace. @@ -533,7 +538,7 @@ static int vgic_v2_map_resources(struct kvm *kvm, goto out; } - ret = kvm_phys_addr_ioremap(kvm, kvm-arch.vgic.vgic_cpu_base, + ret = kvm_phys_addr_ioremap(kvm, dist-vgic_cpu_base, params-vcpu_base, KVM_VGIC_V2_CPU_SIZE, true); if (ret) { @@ -541,7 +546,7 @@ static int vgic_v2_map_resources(struct kvm *kvm, goto out; } - kvm-arch.vgic.ready = true; + dist-ready = true; out: if (ret) kvm_vgic_destroy(kvm); diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index e968179..9a732d0 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -989,6 +989,21 @@ int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len, return ret; } +static void vgic_unregister_kvm_io_dev(struct kvm *kvm) +{ + struct vgic_dist *dist = kvm-arch.vgic; + + if (!dist || !kvm-buses[KVM_MMIO_BUS]) + return; + + mutex_lock(kvm-slots_lock); + /* We could get here without ever having registered a device. */ + if (dist-dist_iodev.dev.ops) + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, + dist-dist_iodev.dev); + mutex_unlock(kvm-slots_lock); +} + static int vgic_nr_shared_irqs(struct vgic_dist *dist) { return dist-nr_irqs - VGIC_NR_PRIVATE_IRQS; @@ -1705,6 +1720,8 @@ void kvm_vgic_destroy(struct kvm *kvm) struct kvm_vcpu *vcpu; int i; + vgic_unregister_kvm_io_dev(kvm); + kvm_for_each_vcpu(i, vcpu, kvm) kvm_vgic_vcpu_destroy(vcpu); -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 11/12] KVM: arm/arm64: rework MMIO abort handling to use KVM MMIO bus
Currently we have struct kvm_exit_mmio for encapsulating MMIO abort data to be passed on from syndrome decoding all the way down to the VGIC register handlers. Now as we switch the MMIO handling to be routed through the KVM MMIO bus, it does not make sense anymore to use that structure already from the beginning. So we put the data into kvm_run very early and use that encapsulation till the MMIO bus call. Then we fill kvm_exit_mmio in the VGIC only, making it a VGIC private structure. On that way we replace the data buffer in that structure with a pointer pointing to a single location in kvm_run, so we get rid of some copying on the way. I didn't bother to rename kvm_exit_mmio (to vgic_mmio or something), because that touches a lot of code lines without any good reason. This is based on an original patch by Nikolay. Signed-off-by: Andre Przywara andre.przyw...@arm.com Cc: Nikolay Nikolaev n.nikol...@virtualopensystems.com --- arch/arm/include/asm/kvm_mmio.h | 22 -- arch/arm/kvm/mmio.c | 60 ++--- arch/arm64/include/asm/kvm_mmio.h | 22 -- include/kvm/arm_vgic.h|3 -- virt/kvm/arm/vgic.c | 18 +++ virt/kvm/arm/vgic.h |8 + 6 files changed, 55 insertions(+), 78 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index 3f83db2..d8e90c8 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h @@ -28,28 +28,6 @@ struct kvm_decode { bool sign_extend; }; -/* - * The in-kernel MMIO emulation code wants to use a copy of run-mmio, - * which is an anonymous type. Use our own type instead. - */ -struct kvm_exit_mmio { - phys_addr_t phys_addr; - u8 data[8]; - u32 len; - boolis_write; - void*private; -}; - -static inline void kvm_prepare_mmio(struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - run-mmio.phys_addr = mmio-phys_addr; - run-mmio.len = mmio-len; - run-mmio.is_write = mmio-is_write; - memcpy(run-mmio.data, mmio-data, mmio-len); - run-exit_reason= KVM_EXIT_MMIO; -} - int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa); diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 5d3bfc0..bb2ab44 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -122,7 +122,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) } static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - struct kvm_exit_mmio *mmio) + struct kvm_run *run) { unsigned long rt; int len; @@ -148,9 +148,9 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, sign_extend = kvm_vcpu_dabt_issext(vcpu); rt = kvm_vcpu_dabt_get_rd(vcpu); - mmio-is_write = is_write; - mmio-phys_addr = fault_ipa; - mmio-len = len; + run-mmio.is_write = is_write; + run-mmio.phys_addr = fault_ipa; + run-mmio.len = len; vcpu-arch.mmio_decode.sign_extend = sign_extend; vcpu-arch.mmio_decode.rt = rt; @@ -162,23 +162,49 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return 0; } +/** + * handle_kernel_mmio - handle an in-kernel MMIO access + * @vcpu: pointer to the vcpu performing the access + * @run: pointer to the kvm_run structure + * + * returns true if the MMIO access has been performed in kernel space, + * and false if it needs to be emulated in user space. + */ +static bool handle_kernel_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + int ret; + + if (run-mmio.is_write) { + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run-mmio.phys_addr, + run-mmio.len, run-mmio.data); + + } else { + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run-mmio.phys_addr, + run-mmio.len, run-mmio.data); + } + if (!ret) { + kvm_handle_mmio_return(vcpu, run); + return true; + } + + return false; +} + int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa) { - struct kvm_exit_mmio mmio; unsigned long data; unsigned long rt; int ret; /* -* Prepare MMIO operation. First stash it in a private -* structure that we can use for in-kernel emulation. If the -* kernel can't handle it, copy it into run-mmio and let user -* space do its magic. +* Prepare MMIO operation. First put the MMIO data into run-mmio. +* Then try if some in-kernel emulation feels
[PATCH v2 01/12] KVM: Redesign kvm_io_bus_ API to pass VCPU structure to the callbacks.
From: Nikolay Nikolaev n.nikol...@virtualopensystems.com This is needed in e.g. ARM vGIC emulation, where the MMIO handling depends on the VCPU that does the access. Signed-off-by: Nikolay Nikolaev n.nikol...@virtualopensystems.com Signed-off-by: Andre Przywara andre.przyw...@arm.com Acked-by: Paolo Bonzini pbonz...@redhat.com Acked-by: Christoffer Dall christoffer.d...@linaro.org --- arch/powerpc/kvm/mpic.c| 10 ++ arch/powerpc/kvm/powerpc.c |4 ++-- arch/s390/kvm/diag.c |2 +- arch/x86/kvm/i8254.c | 14 +- arch/x86/kvm/i8259.c | 12 ++-- arch/x86/kvm/ioapic.c |8 arch/x86/kvm/lapic.c |4 ++-- arch/x86/kvm/vmx.c |2 +- arch/x86/kvm/x86.c | 13 +++-- include/linux/kvm_host.h | 10 +- virt/kvm/coalesced_mmio.c |5 +++-- virt/kvm/eventfd.c |4 ++-- virt/kvm/iodev.h | 23 +++ virt/kvm/kvm_main.c| 32 14 files changed, 79 insertions(+), 64 deletions(-) diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 39b3a8f..8542f07 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1374,8 +1374,9 @@ static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val) return -ENXIO; } -static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr, -int len, void *ptr) +static int kvm_mpic_read(struct kvm_vcpu *vcpu, +struct kvm_io_device *this, +gpa_t addr, int len, void *ptr) { struct openpic *opp = container_of(this, struct openpic, mmio); int ret; @@ -1415,8 +1416,9 @@ static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr, return ret; } -static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr, - int len, const void *ptr) +static int kvm_mpic_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, const void *ptr) { struct openpic *opp = container_of(this, struct openpic, mmio); int ret; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 27c0fac..24bfe40 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -807,7 +807,7 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, idx = srcu_read_lock(vcpu-kvm-srcu); - ret = kvm_io_bus_read(vcpu-kvm, KVM_MMIO_BUS, run-mmio.phys_addr, + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run-mmio.phys_addr, bytes, run-mmio.data); srcu_read_unlock(vcpu-kvm-srcu, idx); @@ -880,7 +880,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, idx = srcu_read_lock(vcpu-kvm-srcu); - ret = kvm_io_bus_write(vcpu-kvm, KVM_MMIO_BUS, run-mmio.phys_addr, + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run-mmio.phys_addr, bytes, run-mmio.data); srcu_read_unlock(vcpu-kvm-srcu, idx); diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 9254aff..329ec75 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -213,7 +213,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) * - gpr 3 contains the virtqueue index (passed as datamatch) * - gpr 4 contains the index on the bus (optionally) */ - ret = kvm_io_bus_write_cookie(vcpu-kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, + ret = kvm_io_bus_write_cookie(vcpu, KVM_VIRTIO_CCW_NOTIFY_BUS, vcpu-run-s.regs.gprs[2] 0x, 8, vcpu-run-s.regs.gprs[3], vcpu-run-s.regs.gprs[4]); diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 298781d..4dce6f8 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -443,7 +443,8 @@ static inline int pit_in_range(gpa_t addr) (addr KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); } -static int pit_ioport_write(struct kvm_io_device *this, +static int pit_ioport_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, const void *data) { struct kvm_pit *pit = dev_to_pit(this); @@ -519,7 +520,8 @@ static int pit_ioport_write(struct kvm_io_device *this, return 0; } -static int pit_ioport_read(struct kvm_io_device *this, +static int pit_ioport_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, void *data) { struct kvm_pit *pit = dev_to_pit(this); @@ -589,7 +591,8 @@ static int pit_ioport_read(struct kvm_io_device *this, return 0; } -static int speaker_ioport_write(struct kvm_io_device *this, +static int
[PATCH v2 12/12] KVM: arm/arm64: remove now obsolete VGIC specific MMIO handling code
With all of the virtual GIC emulation code now being registered with the kvm_io_bus, we can remove all of the old MMIO handling code and its dispatching functionality. Signed-off-by: Andre Przywara andre.przyw...@arm.com --- include/kvm/arm_vgic.h |2 -- virt/kvm/arm/vgic-v2-emul.c | 19 virt/kvm/arm/vgic-v3-emul.c | 39 virt/kvm/arm/vgic.c | 71 --- virt/kvm/arm/vgic.h |5 --- 5 files changed, 136 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 14853d8..9503664 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -140,8 +140,6 @@ struct vgic_params { }; struct vgic_vm_ops { - bool(*handle_mmio)(struct kvm_vcpu *, struct kvm_run *, - struct kvm_exit_mmio *); bool(*queue_sgi)(struct kvm_vcpu *, int irq); void(*add_sgi_source)(struct kvm_vcpu *, int irq, int source); int (*init_model)(struct kvm *); diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c index 69f27c8..12cb1361 100644 --- a/virt/kvm/arm/vgic-v2-emul.c +++ b/virt/kvm/arm/vgic-v2-emul.c @@ -404,24 +404,6 @@ static const struct vgic_io_range vgic_dist_ranges[] = { {} }; -static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - unsigned long base = vcpu-kvm-arch.vgic.vgic_dist_base; - - if (!is_in_range(mmio-phys_addr, mmio-len, base, -KVM_VGIC_V2_DIST_SIZE)) - return false; - - /* GICv2 does not support accesses wider than 32 bits */ - if (mmio-len 4) { - kvm_inject_dabt(vcpu, mmio-phys_addr); - return true; - } - - return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base); -} - static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) { struct kvm *kvm = vcpu-kvm; @@ -575,7 +557,6 @@ void vgic_v2_init_emulation(struct kvm *kvm) { struct vgic_dist *dist = kvm-arch.vgic; - dist-vm_ops.handle_mmio = vgic_v2_handle_mmio; dist-vm_ops.queue_sgi = vgic_v2_queue_sgi; dist-vm_ops.add_sgi_source = vgic_v2_add_sgi_source; dist-vm_ops.init_model = vgic_v2_init_model; diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index 35679d1..4d443a07 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -710,45 +710,7 @@ static const struct vgic_io_range vgic_redist_ranges[] = { {}, }; -/* - * This function splits accesses between the distributor and the two - * redistributor parts (private/SPI). As each redistributor is accessible - * from any CPU, we have to determine the affected VCPU by taking the faulting - * address into account. We then pass this VCPU to the handler function via - * the private parameter. - */ #define SGI_BASE_OFFSET SZ_64K -static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - struct vgic_dist *dist = vcpu-kvm-arch.vgic; - unsigned long dbase = dist-vgic_dist_base; - unsigned long rdbase = dist-vgic_redist_base; - int nrcpus = atomic_read(vcpu-kvm-online_vcpus); - int vcpu_id; - const struct vgic_io_range *mmio_range; - - if (is_in_range(mmio-phys_addr, mmio-len, dbase, GIC_V3_DIST_SIZE)) { - return vgic_handle_mmio_range(vcpu, run, mmio, - vgic_v3_dist_ranges, dbase); - } - - if (!is_in_range(mmio-phys_addr, mmio-len, rdbase, - GIC_V3_REDIST_SIZE * nrcpus)) - return false; - - vcpu_id = (mmio-phys_addr - rdbase) / GIC_V3_REDIST_SIZE; - rdbase += (vcpu_id * GIC_V3_REDIST_SIZE); - mmio-private = kvm_get_vcpu(vcpu-kvm, vcpu_id); - - if (mmio-phys_addr = rdbase + SGI_BASE_OFFSET) { - rdbase += SGI_BASE_OFFSET; - mmio_range = vgic_redist_sgi_ranges; - } else { - mmio_range = vgic_redist_ranges; - } - return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase); -} static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq) { @@ -864,7 +826,6 @@ void vgic_v3_init_emulation(struct kvm *kvm) { struct vgic_dist *dist = kvm-arch.vgic; - dist-vm_ops.handle_mmio = vgic_v3_handle_mmio; dist-vm_ops.queue_sgi = vgic_v3_queue_sgi; dist-vm_ops.add_sgi_source = vgic_v3_add_sgi_source; dist-vm_ops.init_model = vgic_v3_init_model; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 2598fe8..df25cf8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -785,53 +785,6 @@ static bool call_range_handler(struct kvm_vcpu *vcpu, } /** - * vgic_handle_mmio_range - handle an in-kernel MMIO access - * @vcpu: pointer to
[PATCH v2 00/12] KVM: arm/arm64: move VGIC MMIO to kvm_io_bus
This series converts the VGIC MMIO handling routines to the generic kvm_io_bus framework. The framework is needed for the ioeventfd functionality, some people on the list wanted to see the VGIC converted over to use it, too. Beside from now moving to a generic framework instead of relying on an ARM specific one we also clean up quite some code and get rid of some unnecessary copying. On that way the MMIO abort handling for ARM has changed quite a bit, so please have a closer look and test it on your setup if possible. Based on the v1 review I addressed Christoffer's minor comments, but also heavily changed [11/12]: KVM: ARM: on IO mem abort - route the call to KVM MMIO bus to get rid of the now unnecessary copying and the usage of kvm_exit_mmio in that early stage. See the respective commit message for more details. The series is loosely based on Nikolay's work[1], thanks especially for the tedious first patch. I totally reworked Nikolay's 3/5 to avoid adding another MMIO handling layer on top of the already quite convoluted VGIC MMIO handling. Also Nikolay's 2/5 get extended and changed significantly, that's why I dropped his Signed-off-by. Unfortunately kvm_io_bus lacks an opaque pointer to pass in some data, so I worked around this by using container_of. Now for every struct kvm_mmio_range array a KVM I/O device is registered (one for VGICv2, 2*nr_vcpus + 1 for VGICv3), using the struct kvm_io_device variable as an anchor into the new struct vgic_io_device. This one holds the base address, the vgic_io_range pointer and (in case of the GICv3 redistributor) the associated vCPU, so that we can access all instance-specific data easily. Patch 2 moves the iodev.h header file around, that solves a problem when embedding a struct in arm_vgic.h later. That looks like a nice cleanup anyway, so I added two patches to remove the compiler switch to add virt/kvm as a include directory. This has been tested for arm/arm64 and x86. As soon as I get around to compile-test the other architectures, I can send out the respective patches for those, too. Patches 5-7 tweak the existing code a bit to make it fit for the conversion. Patch 8 contains the framework for the new handling, while patch 9 and 10 enable the GICv2 and GICv3 emulation, respectively. Patch 11 finally switches over to the new kvm_io_bus handling, reworking the early ARM KVM MMIO handling quite a bit. Patch 12 removes the now unneeded code. I split this up to ease reviewing, I could merge patches as well if needed. The series goes on top of the kvmarm.git/next branch and was briefly tested on an arm64 model with a GICv2 and a GICv3 guest and on Midway (GICv2 guest). Cheers, Andre. [1] https://lists.cs.columbia.edu/pipermail/kvmarm/2015-January/013379.html Andre Przywara (11): KVM: move iodev.h from virt/kvm/ to include/kvm KVM: arm/arm64: remove now unneeded include directory from Makefile KVM: x86: remove now unneeded include directory from Makefile KVM: arm/arm64: rename struct kvm_mmio_range to vgic_io_range KVM: mark kvm-buses as empty once they were destroyed KVM: arm/arm64: simplify vgic_find_range() and callers KVM: arm/arm64: implement kvm_io_bus MMIO handling for the VGIC KVM: arm/arm64: prepare GICv2 emulation to be handled by kvm_io_bus KVM: arm/arm64: prepare GICv3 emulation to use kvm_io_bus MMIO handling KVM: arm/arm64: rework MMIO abort handling to use KVM MMIO bus KVM: arm/arm64: remove now obsolete VGIC specific MMIO handling code Nikolay Nikolaev (1): KVM: Redesign kvm_io_bus_ API to pass VCPU structure to the callbacks. arch/arm/include/asm/kvm_mmio.h | 22 arch/arm/kvm/Makefile |2 +- arch/arm/kvm/mmio.c | 60 --- arch/arm64/include/asm/kvm_mmio.h | 22 arch/arm64/kvm/Makefile |2 +- arch/powerpc/kvm/mpic.c | 12 ++- arch/powerpc/kvm/powerpc.c|4 +- arch/s390/kvm/diag.c |2 +- arch/x86/kvm/Makefile |2 +- arch/x86/kvm/i8254.c | 14 ++- arch/x86/kvm/i8254.h |2 +- arch/x86/kvm/i8259.c | 12 +-- arch/x86/kvm/ioapic.c |8 +- arch/x86/kvm/ioapic.h |2 +- arch/x86/kvm/irq.h|2 +- arch/x86/kvm/lapic.c |4 +- arch/x86/kvm/lapic.h |2 +- arch/x86/kvm/vmx.c|2 +- arch/x86/kvm/x86.c| 13 +-- include/kvm/arm_vgic.h| 16 ++- include/kvm/iodev.h | 76 + include/linux/kvm_host.h | 10 +- virt/kvm/arm/vgic-v2-emul.c | 40 +++ virt/kvm/arm/vgic-v3-emul.c | 79 +++--- virt/kvm/arm/vgic.c | 211 + virt/kvm/arm/vgic.h | 29 +++-- virt/kvm/coalesced_mmio.c |7 +- virt/kvm/eventfd.c|6 +- virt/kvm/iodev.h | 70
[PATCH v2 04/12] KVM: x86: remove now unneeded include directory from Makefile
virt/kvm was never really a good include directory for anything else than locally included headers. With the move of iodev.h there is no need anymore to add this directory the compiler's include path, so remove it from the x86 kvm Makefile. Signed-off-by: Andre Przywara andre.przyw...@arm.com --- arch/x86/kvm/Makefile |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 08f790d..16e8f96 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -1,5 +1,5 @@ -ccflags-y += -Ivirt/kvm -Iarch/x86/kvm +ccflags-y += -Iarch/x86/kvm CFLAGS_x86.o := -I. CFLAGS_svm.o := -I. -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 07/12] KVM: arm/arm64: simplify vgic_find_range() and callers
The vgic_find_range() function in vgic.c takes a struct kvm_exit_mmio argument, but actually only used the length field in there. Since we need to get rid of that structure in that part of the code anyway, let's rework the function (and it's callers) to pass the length argument to the function directly. Signed-off-by: Andre Przywara andre.przyw...@arm.com Reviewed-by: Christoffer Dall christoffer.d...@linaro.org --- virt/kvm/arm/vgic-v2-emul.c |2 +- virt/kvm/arm/vgic.c | 22 -- virt/kvm/arm/vgic.h |3 +-- 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c index ddb3135..1dd183e 100644 --- a/virt/kvm/arm/vgic-v2-emul.c +++ b/virt/kvm/arm/vgic-v2-emul.c @@ -715,7 +715,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, default: BUG(); } - r = vgic_find_range(ranges, mmio, offset); + r = vgic_find_range(ranges, 4, offset); if (unlikely(!r || !r-handle_mmio)) { ret = -ENXIO; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 21a3550..8802ad7 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -713,16 +713,13 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) const struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) + int len, gpa_t offset) { - const struct vgic_io_range *r = ranges; - - while (r-len) { - if (offset = r-base - (offset + mmio-len) = (r-base + r-len)) - return r; - r++; + while (ranges-len) { + if (offset = ranges-base + (offset + len) = (ranges-base + ranges-len)) + return ranges; + ranges++; } return NULL; @@ -813,7 +810,7 @@ bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, unsigned long offset; offset = mmio-phys_addr - mmio_base; - range = vgic_find_range(ranges, mmio, offset); + range = vgic_find_range(ranges, mmio-len, offset); if (unlikely(!range || !range-handle_mmio)) { pr_warn(Unhandled access %d %08llx %d\n, mmio-is_write, mmio-phys_addr, mmio-len); @@ -1986,10 +1983,7 @@ int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset) { - struct kvm_exit_mmio dev_attr_mmio; - - dev_attr_mmio.len = 4; - if (vgic_find_range(ranges, dev_attr_mmio, offset)) + if (vgic_find_range(ranges, 4, offset)) return 0; else return -ENXIO; diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h index 6fccb96..01aa622 100644 --- a/virt/kvm/arm/vgic.h +++ b/virt/kvm/arm/vgic.h @@ -90,8 +90,7 @@ static inline bool is_in_range(phys_addr_t addr, unsigned long len, const struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t offset); + int len, gpa_t offset); bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio, -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 10/12] KVM: arm/arm64: prepare GICv3 emulation to use kvm_io_bus MMIO handling
Using the framework provided by the recent vgic.c changes, we register a kvm_io_bus device on mapping the virtual GICv3 resources. The distributor mapping is pretty straight forward, but the redistributors need some more love, since they need to be tagged with the respective redistributor (read: VCPU) they are connected with. We use the kvm_io_bus framework to register two devices per VCPU, as each block is handled independently by the VGIC code. Signed-off-by: Andre Przywara andre.przyw...@arm.com --- include/kvm/arm_vgic.h |1 + virt/kvm/arm/vgic-v3-emul.c | 34 +- virt/kvm/arm/vgic.c | 18 ++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4523984..d6705f4 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -252,6 +252,7 @@ struct vgic_dist { struct vgic_vm_ops vm_ops; struct vgic_io_device dist_iodev; + struct vgic_io_device *redist_iodevs; }; struct vgic_v2_cpu_if { diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index 14943e3..35679d1 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -766,6 +766,9 @@ static int vgic_v3_map_resources(struct kvm *kvm, { int ret = 0; struct vgic_dist *dist = kvm-arch.vgic; + gpa_t rdbase = dist-vgic_redist_base; + struct vgic_io_device *iodevs = NULL; + int i; if (!irqchip_in_kernel(kvm)) return 0; @@ -791,7 +794,36 @@ static int vgic_v3_map_resources(struct kvm *kvm, goto out; } - kvm-arch.vgic.ready = true; + ret = vgic_register_kvm_io_dev(kvm, dist-vgic_dist_base, + GIC_V3_DIST_SIZE, vgic_v3_dist_ranges, + -1, dist-dist_iodev); + if (ret) + goto out; + + iodevs = kcalloc(dist-nr_cpus * 2, sizeof(iodevs[0]), GFP_KERNEL); + if (!iodevs) { + ret = -ENOMEM; + goto out; + } + + /* kvm_vgic_destroy() will take care of destroying the devices later. */ + for (i = 0; i dist-nr_cpus; i++) { + ret = vgic_register_kvm_io_dev(kvm, rdbase, + SZ_64K, vgic_redist_ranges, + i, iodevs[i * 2]); + if (ret) + goto out; + ret = vgic_register_kvm_io_dev(kvm, rdbase + SGI_BASE_OFFSET, + SZ_64K, vgic_redist_sgi_ranges, + i, iodevs[i * 2 + 1]); + if (ret) + goto out; + rdbase += GIC_V3_REDIST_SIZE; + } + + dist-redist_iodevs = iodevs; + dist-ready = true; + out: if (ret) kvm_vgic_destroy(kvm); diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 9a732d0..9cbb55f4 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -992,6 +992,8 @@ int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len, static void vgic_unregister_kvm_io_dev(struct kvm *kvm) { struct vgic_dist *dist = kvm-arch.vgic; + struct vgic_io_device *iodevs; + int i; if (!dist || !kvm-buses[KVM_MMIO_BUS]) return; @@ -1001,6 +1003,22 @@ static void vgic_unregister_kvm_io_dev(struct kvm *kvm) if (dist-dist_iodev.dev.ops) kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, dist-dist_iodev.dev); + + iodevs = dist-redist_iodevs; + if (iodevs) { + for (i = 0; i dist-nr_cpus * 2; i++) { + /* +* Because of a failed initialization we could get here +* without ever having registered a device. +*/ + if (!iodevs[i].dev.ops) + continue; + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, + iodevs[i].dev); + } + kfree(iodevs); + dist-redist_iodevs = NULL; + } mutex_unlock(kvm-slots_lock); } -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RESEND PATCH] KVM: PPC: Book3S HV: Deliver machine check with MSR(RI=0) to guest as MCE.
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com For the machine check interrupt that happens while we are in the guest, kvm layer attempts the recovery, and then delivers the machine check interrupt directly to the guest if recovery fails. On successful recovery we go back to normal functioning of the guest. But there can be cases where a machine check interrupt can happen with MSR(RI=0) while we are in the guest. This means MC interrupt is unrecoverable and we have to deliver a machine check to the guest since the machine check interrupt might have trashed valid values in SRR0/1. The current implementation do not handle this case, causing guest to crash with Bad kernel stack pointer instead of machine check oops message. [26281.490060] Bad kernel stack pointer 3fff9ccce5b0 at c000490c [26281.490434] Oops: Bad kernel stack pointer, sig: 6 [#1] [26281.490472] SMP NR_CPUS=2048 NUMA pSeries This patch fixes this issue by checking MSR(RI=0) in KVM layer and forwarding unrecoverable interrupt to guest which then panics with proper machine check Oops message. Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com Acked-by: Paul Mackerras pau...@samba.org --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 12 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bb94e6f..258f46d 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2063,7 +2063,6 @@ machine_check_realmode: mr r3, r9 /* get vcpu pointer */ bl kvmppc_realmode_machine_check nop - cmpdi r3, 0 /* Did we handle MCE ? */ ld r9, HSTATE_KVM_VCPU(r13) li r12, BOOK3S_INTERRUPT_MACHINE_CHECK /* @@ -2076,13 +2075,18 @@ machine_check_realmode: * The old code used to return to host for unhandled errors which * was causing guest to hang with soft lockups inside guest and * makes it difficult to recover guest instance. +* +* if we receive machine check with MSR(RI=0) then deliver it to +* guest as machine check causing guest to crash. */ - ld r10, VCPU_PC(r9) ld r11, VCPU_MSR(r9) + andi. r10, r11, MSR_RI/* check for unrecoverable exception */ + beq 1f /* Deliver a machine check to guest */ + ld r10, VCPU_PC(r9) + cmpdi r3, 0 /* Did we handle MCE ? */ bne 2f /* Continue guest execution. */ /* If not, deliver a machine check. SRR0/1 are already set */ - li r10, BOOK3S_INTERRUPT_MACHINE_CHECK - ld r11, VCPU_MSR(r9) +1: li r10, BOOK3S_INTERRUPT_MACHINE_CHECK bl kvmppc_msr_interrupt 2: b fast_interrupt_c_return -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 2/2] drivers/vfio: Support EEH error injection
On Mon, 2015-03-23 at 16:20 +1100, Gavin Shan wrote: On Mon, Mar 23, 2015 at 04:10:20PM +1100, David Gibson wrote: On Mon, Mar 23, 2015 at 04:03:59PM +1100, Gavin Shan wrote: On Mon, Mar 23, 2015 at 02:43:03PM +1100, David Gibson wrote: On Mon, Mar 23, 2015 at 12:56:36PM +1100, Gavin Shan wrote: On Mon, Mar 23, 2015 at 12:39:45PM +1100, David Gibson wrote: On Sat, Mar 21, 2015 at 06:58:45AM +1100, Gavin Shan wrote: The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR) to inject the specified EEH error, which is represented by (struct vfio_eeh_pe_err), to the indicated PE for testing purpose. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com Reviewed-by: David Gibson da...@gibson.dropbear.id.au --- Documentation/vfio.txt| 12 drivers/vfio/vfio_spapr_eeh.c | 10 ++ include/uapi/linux/vfio.h | 36 +++- 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index 96978ec..c6e11a3 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -385,6 +385,18 @@ The code flow from the example above should be slightly changed: +/* Inject EEH error, which is expected to be caused by 32-bits + * config load. + */ +pe_op.op = VFIO_EEH_PE_INJECT_ERR; +pe_op.err.type = VFIO_EEH_ERR_TYPE_32; +pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR; +pe_op.err.addr = 0ul; +pe_op.err.mask = 0ul; +ioctl(container, VFIO_EEH_PE_OP, pe_op); + + + /* When 0xFF's returned from reading PCI config space or IO BARs * of the PCI device. Check the PE's state to see if that has been * frozen. diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index 5fa42db..38edeb4 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, case VFIO_EEH_PE_CONFIGURE: ret = eeh_pe_configure(pe); break; +case VFIO_EEH_PE_INJECT_ERR: +minsz = offsetofend(struct vfio_eeh_pe_op, err.mask); +if (op.argsz minsz) +return -EINVAL; +if (copy_from_user(op, (void __user *)arg, minsz)) +return -EFAULT; + +ret = eeh_pe_inject_err(pe, op.err.type, op.err.func, +op.err.addr, op.err.mask); +break; default: ret = -EINVAL; } diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 82889c3..f68e962 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info { * - unfreeze IO/DMA for frozen PE; * - read PE state; * - reset PE; - * - configure PE. + * - configure PE; + * - inject EEH error. */ +struct vfio_eeh_pe_err { +__u32 type; +__u32 func; +__u64 addr; +__u64 mask; +}; + struct vfio_eeh_pe_op { __u32 argsz; __u32 flags; __u32 op; +union { +struct vfio_eeh_pe_err err; +}; }; #define VFIO_EEH_PE_DISABLE 0 /* Disable EEH functionality */ @@ -490,6 +501,29 @@ struct vfio_eeh_pe_op { #define VFIO_EEH_PE_RESET_HOT 6 /* Assert hot reset */ #define VFIO_EEH_PE_RESET_FUNDAMENTAL 7 /* Assert fundamental reset */ #define VFIO_EEH_PE_CONFIGURE 8 /* PE configuration */ +#define VFIO_EEH_PE_INJECT_ERR 9 /* Inject EEH error */ +#define VFIO_EEH_ERR_TYPE_32 0 /* 32-bits EEH error type*/ +#define VFIO_EEH_ERR_TYPE_64 1 /* 64-bits EEH error type*/ +#define VFIO_EEH_ERR_FUNC_LD_MEM_ADDR 0 /* Memory load */ +#define VFIO_EEH_ERR_FUNC_LD_MEM_DATA 1 +#define VFIO_EEH_ERR_FUNC_LD_IO_ADDR 2 /* IO load */ +#define VFIO_EEH_ERR_FUNC_LD_IO_DATA 3 +#define VFIO_EEH_ERR_FUNC_LD_CFG_ADDR 4 /* Config load */ +#define VFIO_EEH_ERR_FUNC_LD_CFG_DATA 5 +#define VFIO_EEH_ERR_FUNC_ST_MEM_ADDR 6 /* Memory store */ +#define VFIO_EEH_ERR_FUNC_ST_MEM_DATA 7 +#define VFIO_EEH_ERR_FUNC_ST_IO_ADDR 8
[RESEND PATCH v2] powerpc/book3s: Fix the MCE code to use CONFIG_KVM_BOOK3S_64_HANDLER
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com commit id 2ba9f0d changed CONFIG_KVM_BOOK3S_64_HV to tristate to allow HV/PR bits to be built as modules. But the MCE code still depends on CONFIG_KVM_BOOK3S_64_HV which is wrong. When user selects CONFIG_KVM_BOOK3S_64_HV=m to build HV/PR bits as a separate module the relevant MCE code gets excluded. This patch fixes the MCE code to use CONFIG_KVM_BOOK3S_64_HANDLER. This makes sure that the relevant MCE code is included when HV/PR bits are built as a separate modules. Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com Acked-by: Paul Mackerras pau...@samba.org Cc: sta...@vger.kernel.org # v3.14+ --- arch/powerpc/kernel/exceptions-64s.S |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index c2df815..9519e6b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1408,7 +1408,7 @@ machine_check_handle_early: bne 9f /* continue in V mode if we are. */ 5: -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER /* * We are coming from kernel context. Check if we are coming from * guest. if yes, then we can continue. We will fall through -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RESEND PATCH v2] powerpc/book3s: Fix the MCE code to use CONFIG_KVM_BOOK3S_64_HANDLER
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com commit id 2ba9f0d changed CONFIG_KVM_BOOK3S_64_HV to tristate to allow HV/PR bits to be built as modules. But the MCE code still depends on CONFIG_KVM_BOOK3S_64_HV which is wrong. When user selects CONFIG_KVM_BOOK3S_64_HV=m to build HV/PR bits as a separate module the relevant MCE code gets excluded. This patch fixes the MCE code to use CONFIG_KVM_BOOK3S_64_HANDLER. This makes sure that the relevant MCE code is included when HV/PR bits are built as a separate modules. Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com Acked-by: Paul Mackerras pau...@samba.org Cc: sta...@vger.kernel.org # v3.14+ --- arch/powerpc/kernel/exceptions-64s.S |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index c2df815..9519e6b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1408,7 +1408,7 @@ machine_check_handle_early: bne 9f /* continue in V mode if we are. */ 5: -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER /* * We are coming from kernel context. Check if we are coming from * guest. if yes, then we can continue. We will fall through -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
iscsi multipath failure with libvirtError: Failed to open file '/dev/mapper/Mar': No such file or directory
hello All, I know the issue is related to libvirt,but i dont know where to ask. i have centos 6.6 running KVM as compute node in openstack icehouse when i try to attach volume to instance it shows 2596: error : virStorageFileGetMetadataRecurse:952 : Failed to open file '/dev/mapper/Mar': No such file or directory in libvirt log This does not always happen when it happens no one will be able to attach volume to instance using EMC VNX as storage backend. multipath.conf # Skip the files uner /dev that are definitely not FC/iSCSI devices # Different system may need different customization devnode ^(ram|raw|loop|fd|md|dm-|sr|scd|st)[0-9]* devnode ^hd[a-z][0-9]* devnode ^cciss!c[0-9]d[0-9]*[p[0-9]*] # Skip LUNZ device from VNX device { vendor DGC product LUNZ } } defaults { user_friendly_names no flush_on_last_del yes } devices { # Device attributed for EMC CLARiiON and VNX series ALUA device { vendor DGC product .* product_blacklist LUNZ path_grouping_policy group_by_prio path_selector round-robin 0 path_checker emc_clariion features 1 queue_if_no_path hardware_handler 1 alua prio alua failback immediate } } Can any one help me with this issue -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM live migration i/o error
On Fri, Mar 20, 2015 at 12:34:59PM +0100, Francesc Guasch wrote: On Fri, Mar 20, 2015 at 10:03:20AM +, Stefan Hajnoczi wrote: Hi Stefan, thank you very much for answering me. On Wed, Mar 18, 2015 at 04:53:28PM +0100, Francesc Guasch wrote: I have three Ubuntu Server 14.04 trusty with KVM. Two of them are HP servers and one is Dell. Both brands run fine the KVM virtual servers, and I can do live migration between the HPs. But I get I/O errors in the vda when I migrate to or from the Dell server. I have shared storage with NFS, mounted the same way in all of them: As soon as it starts in the origin console I spot I/O error messages, when it finishes I got them in the console in the destination server. The file system is read only and I have to shut it down hard. end request I/O error, /dev/vda, sector 8790327 origin console == guest's console? Yes, I mean I open two consoles with virt-manager, one in the origin host and another one in the destination I/O errors starting while the guest is still running on the migration source host is strange. I wonder if something happened to the NFS file related to file permissions or SELinux labels? I think I found something checking SELinux. ls -Z and getfattr return nothing. But ps -eZ showed something very different in the Dell server. This is in the HP server: /usr/sbin/libvirtd 1034 ?11:51:44 libvirtd libvirt-09540b5d-82 701 ?05:28:40 qemu-system-x86 unconfined 1?00:01:00 init In the Dell server init is confined in lxc and there are also lxc-start processes. /usr/sbin/libvirtd 1622 ?05:07:07 libvirtd libvirt-8a0f9087-32d... 29926 ? 00:00:01 qemu-system-x86 lxc-container-default 1774 ?00:00:00 init /usr/bin/lxc-start 1763 ?00:00:00 lxc-start There is also LXC installed in that server ! Maybe that is messing with kvm. The qemu processes look fine to me but there is a chance the problem comes from there. I could move the LXC somewhere else or I can keep it there to try to fix this issue. What do you advice I should do now ? I suggest asking on the libvirt mailing list: libvirt-l...@redhat.com pgpk8pbvBvTGx.pgp Description: PGP signature
Re: Windows 7 guest installer does not detect drive if physical partition used instead of disk file.
On Sat, Mar 21, 2015 at 01:50:46AM +0800, Emmanuel Noobadmin wrote: Running 3.18.9-200.fc21.x86_64 qemu 2:2.1.3-3.fc21 libvirt 1.2.9.2-1.fc21 System is a Thinkpad X250 with Intel i7-5600u Broadwell GT2 I'm trying to replace the Win7 installation on my laptop with Fedora 21 and virtualizing Windows 7 for work purposes. I'd prefer to give the guest its own NTFS partition instead of using a file for both performance and ease of potential recovery. So I've set aside unpartitioned space on the hard disk and added /dev/sda to the virt-manager storage pool, created a new volume and assigned it to the guest as an IDE drive. Unfortunately, the Windows 7 installer does not see this drive despite being IDE and not virtio. If I use a qcow2 file as the drive, the installer has no problems detecting it. To eliminate virt-manager from the equation, I've also tried to do a very basic install using virt-install with similar results, the physical partition cannot be detected regardless of bus type (IDE/SATA/virtio) even with the signed Redhat virtio drivers loaded by the installer. I was unable to find any similar issues or solutions online except a 2 year old thread on linuxquestions which quoted that we must specify the whole disk instead of a partition. However, I cannot find the source of that quote. http://www.linuxquestions.org/questions/linux-virtualization-and-cloud-90/qemu-kvm-on-a-real-partition-947162/ Is this really the case and the reason why Windows 7 cannot see the physical partition or there is something else I am doing wrong? I have CCed the libvirt mailing list, since KVM is a component here but your question seems to be mainly about libvirt, virt-manager, virt-install, etc. It sounds like you want an NTFS partition on /dev/sda. That requires passing the whole /dev/sda drive to the guest - and the Windows installer might overwrite your GRUB Master Boot Record. Be careful when trying to do this. Also keep in mind that the virtual machine's hardware and your physical hardware are probably quiet different (different chipsets, PCI devices, etc). Windows might not be happy booting on the physical host if it was installed under KVM, and vice versa. This is known as physical-to-virtual (p2v) migration and means some tweaks or driver installs may be necessary to make Windows run after switching. Stefan pgprBcrq2t8NW.pgp Description: PGP signature
Re: [PATCH 07/23] KVM: PPC: Book3S: Allow reuse of vCPU object
On Sat, Mar 21, 2015 at 8:28 PM, Alexander Graf ag...@suse.de wrote: On 20.03.15 16:51, Bharata B Rao wrote: On Fri, Mar 20, 2015 at 12:34:18PM +0100, Alexander Graf wrote: On 20.03.15 12:26, Paul Mackerras wrote: On Fri, Mar 20, 2015 at 12:01:32PM +0100, Alexander Graf wrote: On 20.03.15 10:39, Paul Mackerras wrote: From: Bharata B Rao bhar...@linux.vnet.ibm.com Since KVM isn't equipped to handle closure of vcpu fd from userspace(QEMU) correctly, certain work arounds have to be employed to allow reuse of vcpu array slot in KVM during cpu hot plug/unplug from guest. One such proposed workaround is to park the vcpu fd in userspace during cpu unplug and reuse it later during next hotplug. More details can be found here: KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html In order to support this workaround with PowerPC KVM, don't create or initialize ICP if the vCPU is found to be already associated with an ICP. Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com Signed-off-by: Paul Mackerras pau...@samba.org This probably makes some sense, but please make sure that user space has some way to figure out whether hotplug works at all. Bharata is working on the qemu side of all this, so I assume he has that covered. Well, so far the kernel doesn't expose anything he can query, so I suppose he just blindly assumes that older host kernels will randomly break and nobody cares. I'd rather prefer to see a CAP exposed that qemu can check on. I see that you have already taken this into your tree. I have an updated patch to expose a CAP. If the below patch looks ok, then let me know how you would prefer to take this patch in. Regards, Bharata. KVM: PPC: BOOK3S: Allow reuse of vCPU object From: Bharata B Rao bhar...@linux.vnet.ibm.com Since KVM isn't equipped to handle closure of vcpu fd from userspace(QEMU) correctly, certain work arounds have to be employed to allow reuse of vcpu array slot in KVM during cpu hot plug/unplug from guest. One such proposed workaround is to park the vcpu fd in userspace during cpu unplug and reuse it later during next hotplug. More details can be found here: KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html In order to support this workaround with PowerPC KVM, don't create or initialize ICP if the vCPU is found to be already associated with an ICP. User space (QEMU) can reuse the vCPU after checking for the availability of KVM_CAP_SPAPR_REUSE_VCPU capability. Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com --- arch/powerpc/kvm/book3s_xics.c |9 +++-- arch/powerpc/kvm/powerpc.c | 12 include/uapi/linux/kvm.h |1 + 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index a4a8d9f..ead3a35 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1313,8 +1313,13 @@ int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, return -EPERM; if (xics-kvm != vcpu-kvm) return -EPERM; - if (vcpu-arch.irq_type) - return -EBUSY; + + /* + * If irq_type is already set, don't reinialize but + * return success allowing this vcpu to be reused. + */ + if (vcpu-arch.irq_type != KVMPPC_IRQ_DEFAULT) + return 0; r = kvmppc_xics_create_icp(vcpu, xcpu); if (!r) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 27c0fac..5b7007c 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -564,6 +564,18 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; #endif + case KVM_CAP_SPAPR_REUSE_VCPU: + /* + * Kernel currently doesn't support closing of vCPU fd from + * user space (QEMU) correctly. Hence the option available + * is to park the vCPU fd in user space whenever a guest + * CPU is hot removed and reuse the same later when another + * guest CPU is hotplugged. This capability determines whether + * it is safe to assume if parking of vCPU fd and reuse from + * user space works for sPAPR guests. I don't see how the code you're changing here has anything to do with parking vcpus. It's all about being able to call connect on an already connected vcpu and not erroring out. Please reflect this in the cap name and description. You also need to update Documentation/virtual/kvm/api.txt. Furthermore, thinking about this a bit more, I might still miss the exact case why you need this. Why is QEMU issuing a connect again? Could it maybe just not do it? Thinking
Re: [PATCH 07/23] KVM: PPC: Book3S: Allow reuse of vCPU object
On Sat, Mar 21, 2015 at 8:28 PM, Alexander Graf ag...@suse.de wrote: On 20.03.15 16:51, Bharata B Rao wrote: On Fri, Mar 20, 2015 at 12:34:18PM +0100, Alexander Graf wrote: On 20.03.15 12:26, Paul Mackerras wrote: On Fri, Mar 20, 2015 at 12:01:32PM +0100, Alexander Graf wrote: On 20.03.15 10:39, Paul Mackerras wrote: From: Bharata B Rao bhar...@linux.vnet.ibm.com Since KVM isn't equipped to handle closure of vcpu fd from userspace(QEMU) correctly, certain work arounds have to be employed to allow reuse of vcpu array slot in KVM during cpu hot plug/unplug from guest. One such proposed workaround is to park the vcpu fd in userspace during cpu unplug and reuse it later during next hotplug. More details can be found here: KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html In order to support this workaround with PowerPC KVM, don't create or initialize ICP if the vCPU is found to be already associated with an ICP. Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com Signed-off-by: Paul Mackerras pau...@samba.org This probably makes some sense, but please make sure that user space has some way to figure out whether hotplug works at all. Bharata is working on the qemu side of all this, so I assume he has that covered. Well, so far the kernel doesn't expose anything he can query, so I suppose he just blindly assumes that older host kernels will randomly break and nobody cares. I'd rather prefer to see a CAP exposed that qemu can check on. I see that you have already taken this into your tree. I have an updated patch to expose a CAP. If the below patch looks ok, then let me know how you would prefer to take this patch in. Regards, Bharata. KVM: PPC: BOOK3S: Allow reuse of vCPU object From: Bharata B Rao bhar...@linux.vnet.ibm.com Since KVM isn't equipped to handle closure of vcpu fd from userspace(QEMU) correctly, certain work arounds have to be employed to allow reuse of vcpu array slot in KVM during cpu hot plug/unplug from guest. One such proposed workaround is to park the vcpu fd in userspace during cpu unplug and reuse it later during next hotplug. More details can be found here: KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html In order to support this workaround with PowerPC KVM, don't create or initialize ICP if the vCPU is found to be already associated with an ICP. User space (QEMU) can reuse the vCPU after checking for the availability of KVM_CAP_SPAPR_REUSE_VCPU capability. Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com --- arch/powerpc/kvm/book3s_xics.c |9 +++-- arch/powerpc/kvm/powerpc.c | 12 include/uapi/linux/kvm.h |1 + 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index a4a8d9f..ead3a35 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1313,8 +1313,13 @@ int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, return -EPERM; if (xics-kvm != vcpu-kvm) return -EPERM; - if (vcpu-arch.irq_type) - return -EBUSY; + + /* + * If irq_type is already set, don't reinialize but + * return success allowing this vcpu to be reused. + */ + if (vcpu-arch.irq_type != KVMPPC_IRQ_DEFAULT) + return 0; r = kvmppc_xics_create_icp(vcpu, xcpu); if (!r) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 27c0fac..5b7007c 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -564,6 +564,18 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; #endif + case KVM_CAP_SPAPR_REUSE_VCPU: + /* + * Kernel currently doesn't support closing of vCPU fd from + * user space (QEMU) correctly. Hence the option available + * is to park the vCPU fd in user space whenever a guest + * CPU is hot removed and reuse the same later when another + * guest CPU is hotplugged. This capability determines whether + * it is safe to assume if parking of vCPU fd and reuse from + * user space works for sPAPR guests. I don't see how the code you're changing here has anything to do with parking vcpus. It's all about being able to call connect on an already connected vcpu and not erroring out. Please reflect this in the cap name and description. You also need to update Documentation/virtual/kvm/api.txt. Furthermore, thinking about this a bit more, I might still miss the exact case why you need this. Why is QEMU issuing a connect again? Could it maybe just not do it? Thinking
Re: virtio fixes pull for 4.0?
On Mon, 2015-03-09 at 07:13 +, Rusty Russell wrote: virtio_mmio: generation support virtio_mmio: fix endian-ness for mmio these two are waiting for ack by Pawel These two fix bugs in virtio 1.0 code for mmio. Host code for that was AFAIK not posted, so I can't test properly. Pawel? I'm waiting on Acks for these two. Right, sorry about being silent for a while - I forked and was on paternity leave... Will go through the thread and respond today. Pawel -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] PCI passthrough of 40G ethernet interface (Openstack/KVM)
On 20.03.2015 21:55, jacob jacob wrote: On Thu, Mar 19, 2015 at 10:18 AM, Stefan Assmann sassm...@redhat.com wrote: On 19.03.2015 15:04, jacob jacob wrote: Hi Stefan, have you been able to get PCI passthrough working without any issues after the upgrade? My XL710 fails to transfer regular TCP traffic (netperf). If that works for you then you're already one step ahead of me. Afraid I can't help you there. I have data transfer working when trying the test runs on the host itself. Are you seeing problems when directly trying the TCP traffic from the host itself? Correct. The issues that i am seeing are specific to the case when the devices are passed via PCI passthrough into the VM. Any ideas whether this would be a kvm/qemu or i40e driver issue? (Updating to the latest firmware and using latest i40e driver didn't seem to help.) Hard to say, that's probably something for Intel to look into. Stefan -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [kvm-ppc:kvm-ppc-queue 7/9] ERROR: .__spin_yield [arch/powerpc/kvm/kvm.ko] undefined!
On 23.03.15 04:03, Michael Ellerman wrote: On Mon, 2015-03-23 at 14:00 +1100, Paul Mackerras wrote: On Fri, Mar 20, 2015 at 08:07:53PM +0800, kbuild test robot wrote: tree: git://github.com/agraf/linux-2.6.git kvm-ppc-queue head: 9b1daf3cfba1801768aa41b1b6ad0b653844241f commit: aba777f5ce0accb4c6a277e671de0330752954e8 [7/9] KVM: PPC: Book3S HV: Convert ICS mutex lock to spin lock config: powerpc-defconfig (attached as .config) reproduce: wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout aba777f5ce0accb4c6a277e671de0330752954e8 # save the attached .config to linux build tree make.cross ARCH=powerpc All error/warnings: ERROR: .__spin_yield [arch/powerpc/kvm/kvm.ko] undefined! Yes, this is the patch that depends on the powerpc: Export __spin_yield patch that Suresh posted to linuxppc-...@ozlabs.org and I acked. I think the best thing at this stage is probably for Alex to take that patch through his tree, assuming Michael is OK with that. Fine by me. Acked-by: Michael Ellerman m...@ellerman.id.au Awesome, thanks, applied to kvm-ppc-queue. Alex -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 07/23] KVM: PPC: Book3S: Allow reuse of vCPU object
On 23.03.15 08:50, Bharata B Rao wrote: On Sat, Mar 21, 2015 at 8:28 PM, Alexander Graf ag...@suse.de wrote: On 20.03.15 16:51, Bharata B Rao wrote: On Fri, Mar 20, 2015 at 12:34:18PM +0100, Alexander Graf wrote: On 20.03.15 12:26, Paul Mackerras wrote: On Fri, Mar 20, 2015 at 12:01:32PM +0100, Alexander Graf wrote: On 20.03.15 10:39, Paul Mackerras wrote: From: Bharata B Rao bhar...@linux.vnet.ibm.com Since KVM isn't equipped to handle closure of vcpu fd from userspace(QEMU) correctly, certain work arounds have to be employed to allow reuse of vcpu array slot in KVM during cpu hot plug/unplug from guest. One such proposed workaround is to park the vcpu fd in userspace during cpu unplug and reuse it later during next hotplug. More details can be found here: KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html In order to support this workaround with PowerPC KVM, don't create or initialize ICP if the vCPU is found to be already associated with an ICP. Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com Signed-off-by: Paul Mackerras pau...@samba.org This probably makes some sense, but please make sure that user space has some way to figure out whether hotplug works at all. Bharata is working on the qemu side of all this, so I assume he has that covered. Well, so far the kernel doesn't expose anything he can query, so I suppose he just blindly assumes that older host kernels will randomly break and nobody cares. I'd rather prefer to see a CAP exposed that qemu can check on. I see that you have already taken this into your tree. I have an updated patch to expose a CAP. If the below patch looks ok, then let me know how you would prefer to take this patch in. Regards, Bharata. KVM: PPC: BOOK3S: Allow reuse of vCPU object From: Bharata B Rao bhar...@linux.vnet.ibm.com Since KVM isn't equipped to handle closure of vcpu fd from userspace(QEMU) correctly, certain work arounds have to be employed to allow reuse of vcpu array slot in KVM during cpu hot plug/unplug from guest. One such proposed workaround is to park the vcpu fd in userspace during cpu unplug and reuse it later during next hotplug. More details can be found here: KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html In order to support this workaround with PowerPC KVM, don't create or initialize ICP if the vCPU is found to be already associated with an ICP. User space (QEMU) can reuse the vCPU after checking for the availability of KVM_CAP_SPAPR_REUSE_VCPU capability. Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com --- arch/powerpc/kvm/book3s_xics.c |9 +++-- arch/powerpc/kvm/powerpc.c | 12 include/uapi/linux/kvm.h |1 + 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index a4a8d9f..ead3a35 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1313,8 +1313,13 @@ int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, return -EPERM; if (xics-kvm != vcpu-kvm) return -EPERM; - if (vcpu-arch.irq_type) - return -EBUSY; + + /* + * If irq_type is already set, don't reinialize but + * return success allowing this vcpu to be reused. + */ + if (vcpu-arch.irq_type != KVMPPC_IRQ_DEFAULT) + return 0; r = kvmppc_xics_create_icp(vcpu, xcpu); if (!r) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 27c0fac..5b7007c 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -564,6 +564,18 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; #endif + case KVM_CAP_SPAPR_REUSE_VCPU: + /* + * Kernel currently doesn't support closing of vCPU fd from + * user space (QEMU) correctly. Hence the option available + * is to park the vCPU fd in user space whenever a guest + * CPU is hot removed and reuse the same later when another + * guest CPU is hotplugged. This capability determines whether + * it is safe to assume if parking of vCPU fd and reuse from + * user space works for sPAPR guests. I don't see how the code you're changing here has anything to do with parking vcpus. It's all about being able to call connect on an already connected vcpu and not erroring out. Please reflect this in the cap name and description. You also need to update Documentation/virtual/kvm/api.txt. Furthermore, thinking about this a bit more, I might still miss the exact case why you need this. Why is QEMU issuing a connect
Re: [PATCH 07/23] KVM: PPC: Book3S: Allow reuse of vCPU object
On 23.03.15 08:50, Bharata B Rao wrote: On Sat, Mar 21, 2015 at 8:28 PM, Alexander Graf ag...@suse.de wrote: On 20.03.15 16:51, Bharata B Rao wrote: On Fri, Mar 20, 2015 at 12:34:18PM +0100, Alexander Graf wrote: On 20.03.15 12:26, Paul Mackerras wrote: On Fri, Mar 20, 2015 at 12:01:32PM +0100, Alexander Graf wrote: On 20.03.15 10:39, Paul Mackerras wrote: From: Bharata B Rao bhar...@linux.vnet.ibm.com Since KVM isn't equipped to handle closure of vcpu fd from userspace(QEMU) correctly, certain work arounds have to be employed to allow reuse of vcpu array slot in KVM during cpu hot plug/unplug from guest. One such proposed workaround is to park the vcpu fd in userspace during cpu unplug and reuse it later during next hotplug. More details can be found here: KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html In order to support this workaround with PowerPC KVM, don't create or initialize ICP if the vCPU is found to be already associated with an ICP. Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com Signed-off-by: Paul Mackerras pau...@samba.org This probably makes some sense, but please make sure that user space has some way to figure out whether hotplug works at all. Bharata is working on the qemu side of all this, so I assume he has that covered. Well, so far the kernel doesn't expose anything he can query, so I suppose he just blindly assumes that older host kernels will randomly break and nobody cares. I'd rather prefer to see a CAP exposed that qemu can check on. I see that you have already taken this into your tree. I have an updated patch to expose a CAP. If the below patch looks ok, then let me know how you would prefer to take this patch in. Regards, Bharata. KVM: PPC: BOOK3S: Allow reuse of vCPU object From: Bharata B Rao bhar...@linux.vnet.ibm.com Since KVM isn't equipped to handle closure of vcpu fd from userspace(QEMU) correctly, certain work arounds have to be employed to allow reuse of vcpu array slot in KVM during cpu hot plug/unplug from guest. One such proposed workaround is to park the vcpu fd in userspace during cpu unplug and reuse it later during next hotplug. More details can be found here: KVM: https://www.mail-archive.com/kvm@vger.kernel.org/msg102839.html QEMU: http://lists.gnu.org/archive/html/qemu-devel/2014-12/msg00859.html In order to support this workaround with PowerPC KVM, don't create or initialize ICP if the vCPU is found to be already associated with an ICP. User space (QEMU) can reuse the vCPU after checking for the availability of KVM_CAP_SPAPR_REUSE_VCPU capability. Signed-off-by: Bharata B Rao bhar...@linux.vnet.ibm.com --- arch/powerpc/kvm/book3s_xics.c |9 +++-- arch/powerpc/kvm/powerpc.c | 12 include/uapi/linux/kvm.h |1 + 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index a4a8d9f..ead3a35 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1313,8 +1313,13 @@ int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, return -EPERM; if (xics-kvm != vcpu-kvm) return -EPERM; - if (vcpu-arch.irq_type) - return -EBUSY; + + /* + * If irq_type is already set, don't reinialize but + * return success allowing this vcpu to be reused. + */ + if (vcpu-arch.irq_type != KVMPPC_IRQ_DEFAULT) + return 0; r = kvmppc_xics_create_icp(vcpu, xcpu); if (!r) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 27c0fac..5b7007c 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -564,6 +564,18 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = 1; break; #endif + case KVM_CAP_SPAPR_REUSE_VCPU: + /* + * Kernel currently doesn't support closing of vCPU fd from + * user space (QEMU) correctly. Hence the option available + * is to park the vCPU fd in user space whenever a guest + * CPU is hot removed and reuse the same later when another + * guest CPU is hotplugged. This capability determines whether + * it is safe to assume if parking of vCPU fd and reuse from + * user space works for sPAPR guests. I don't see how the code you're changing here has anything to do with parking vcpus. It's all about being able to call connect on an already connected vcpu and not erroring out. Please reflect this in the cap name and description. You also need to update Documentation/virtual/kvm/api.txt. Furthermore, thinking about this a bit more, I might still miss the exact case why you need this. Why is QEMU issuing a connect
Re: [kvm-ppc:kvm-ppc-queue 7/9] ERROR: .__spin_yield [arch/powerpc/kvm/kvm.ko] undefined!
On 23.03.15 04:03, Michael Ellerman wrote: On Mon, 2015-03-23 at 14:00 +1100, Paul Mackerras wrote: On Fri, Mar 20, 2015 at 08:07:53PM +0800, kbuild test robot wrote: tree: git://github.com/agraf/linux-2.6.git kvm-ppc-queue head: 9b1daf3cfba1801768aa41b1b6ad0b653844241f commit: aba777f5ce0accb4c6a277e671de0330752954e8 [7/9] KVM: PPC: Book3S HV: Convert ICS mutex lock to spin lock config: powerpc-defconfig (attached as .config) reproduce: wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout aba777f5ce0accb4c6a277e671de0330752954e8 # save the attached .config to linux build tree make.cross ARCH=powerpc All error/warnings: ERROR: .__spin_yield [arch/powerpc/kvm/kvm.ko] undefined! Yes, this is the patch that depends on the powerpc: Export __spin_yield patch that Suresh posted to linuxppc-...@ozlabs.org and I acked. I think the best thing at this stage is probably for Alex to take that patch through his tree, assuming Michael is OK with that. Fine by me. Acked-by: Michael Ellerman m...@ellerman.id.au Awesome, thanks, applied to kvm-ppc-queue. Alex -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Patch v5] x86: irq_comm: Add check for RH bit in kvm_set_msi_irq
2015-03-20 11:50-0600, James Sullivan: On 03/20/2015 09:22 AM, James Sullivan wrote: On 03/20/2015 09:15 AM, Radim Krčmář wrote: 2015-03-19 16:51-0600, James Sullivan: I played around with native_compose_msi_msg and discovered the following: * dm=0, rh=0 = Physical Destination Mode * dm=0, rh=1 = Failed delivery * dm=1, rh=0 = Logical Destination Mode, No Redirection * dm=1, rh=1 = Logical Destination Mode, Redirection Great! (What CPU family was that?) This was on Intel x86_64 (Core i5-3210m, 'Ivy Bridge'). Thanks, it's possible that the behavior of chipsets changed since the report on Intel's forum ... (Lowest priority behaved differently before QPI, so it might coincide.) I'm still wondering about last sentence from that link, the parenthesised part to be exact, The reference to the APIC ID being 0xff is because 0xff is broadcast and lowest priority (what the RH bit really is for X86) is illegal with broadcast. Can you also check if RH=1 does something to delivery mode? I haven't seen any changes in the MSI Data Register for any values of RH, but I don't have a great sample size (one machine with one set of PCI devices), so if anyone else can confirm that I would appreciate it. I meant if the delivery mode from data register isn't ignored with RH=1, and the message delivered as if lowest-priority was set there. (Decided by having something else than fixed or lowest-priority there.) Worth noting that low prio delivery was used across the board for my PCI devices regardless of RH=1 or 0, so it doesn't seem to be de facto the case that the RH bit's only purpose is for lowprio delivery on x86. Yeah, afaik, it can be done with lowest priority delivery mode on ia64 too, so I have a hard time finding RH's intended purpose. Again, need to have some more PCI devices to test against to confirm anything. It's impossible to test everything, and there is no conflict if we have at most one data point ;) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2 11/12] KVM: arm/arm64: rework MMIO abort handling to use KVM MMIO bus
On Mon, Mar 23, 2015 at 5:58 PM, Andre Przywara andre.przyw...@arm.com wrote: Currently we have struct kvm_exit_mmio for encapsulating MMIO abort data to be passed on from syndrome decoding all the way down to the VGIC register handlers. Now as we switch the MMIO handling to be routed through the KVM MMIO bus, it does not make sense anymore to use that structure already from the beginning. So we put the data into kvm_run very early and use that encapsulation till the MMIO bus call. Then we fill kvm_exit_mmio in the VGIC only, making it a VGIC private structure. On that way we replace the data buffer in that structure with a pointer pointing to a single location in kvm_run, so we get rid of some copying on the way. I didn't bother to rename kvm_exit_mmio (to vgic_mmio or something), I would vote for the renaming. Otherwise the patch looks much cleaner and straightforward than what it was before. Nikolay Nikolaev because that touches a lot of code lines without any good reason. This is based on an original patch by Nikolay. Signed-off-by: Andre Przywara andre.przyw...@arm.com Cc: Nikolay Nikolaev n.nikol...@virtualopensystems.com --- arch/arm/include/asm/kvm_mmio.h | 22 -- arch/arm/kvm/mmio.c | 60 ++--- arch/arm64/include/asm/kvm_mmio.h | 22 -- include/kvm/arm_vgic.h|3 -- virt/kvm/arm/vgic.c | 18 +++ virt/kvm/arm/vgic.h |8 + 6 files changed, 55 insertions(+), 78 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index 3f83db2..d8e90c8 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h @@ -28,28 +28,6 @@ struct kvm_decode { bool sign_extend; }; -/* - * The in-kernel MMIO emulation code wants to use a copy of run-mmio, - * which is an anonymous type. Use our own type instead. - */ -struct kvm_exit_mmio { - phys_addr_t phys_addr; - u8 data[8]; - u32 len; - boolis_write; - void*private; -}; - -static inline void kvm_prepare_mmio(struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - run-mmio.phys_addr = mmio-phys_addr; - run-mmio.len = mmio-len; - run-mmio.is_write = mmio-is_write; - memcpy(run-mmio.data, mmio-data, mmio-len); - run-exit_reason= KVM_EXIT_MMIO; -} - int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa); diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 5d3bfc0..bb2ab44 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -122,7 +122,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) } static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - struct kvm_exit_mmio *mmio) + struct kvm_run *run) { unsigned long rt; int len; @@ -148,9 +148,9 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, sign_extend = kvm_vcpu_dabt_issext(vcpu); rt = kvm_vcpu_dabt_get_rd(vcpu); - mmio-is_write = is_write; - mmio-phys_addr = fault_ipa; - mmio-len = len; + run-mmio.is_write = is_write; + run-mmio.phys_addr = fault_ipa; + run-mmio.len = len; vcpu-arch.mmio_decode.sign_extend = sign_extend; vcpu-arch.mmio_decode.rt = rt; @@ -162,23 +162,49 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return 0; } +/** + * handle_kernel_mmio - handle an in-kernel MMIO access + * @vcpu: pointer to the vcpu performing the access + * @run: pointer to the kvm_run structure + * + * returns true if the MMIO access has been performed in kernel space, + * and false if it needs to be emulated in user space. + */ +static bool handle_kernel_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + int ret; + + if (run-mmio.is_write) { + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run-mmio.phys_addr, + run-mmio.len, run-mmio.data); + + } else { + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run-mmio.phys_addr, + run-mmio.len, run-mmio.data); + } + if (!ret) { + kvm_handle_mmio_return(vcpu, run); + return true; + } + + return false; +} + int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa) { - struct kvm_exit_mmio mmio; unsigned long data; unsigned long rt; int ret; /* -* Prepare MMIO operation.
Re: [PATCH v2 00/12] KVM: arm/arm64: move VGIC MMIO to kvm_io_bus
On Mon, Mar 23, 2015 at 5:58 PM, Andre Przywara andre.przyw...@arm.com wrote: This series converts the VGIC MMIO handling routines to the generic kvm_io_bus framework. The framework is needed for the ioeventfd functionality, some people on the list wanted to see the VGIC converted over to use it, too. Beside from now moving to a generic framework instead of relying on an ARM specific one we also clean up quite some code and get rid of some unnecessary copying. On that way the MMIO abort handling for ARM has changed quite a bit, so please have a closer look and test it on your setup if possible. Based on the v1 review I addressed Christoffer's minor comments, but also heavily changed [11/12]: KVM: ARM: on IO mem abort - route the call to KVM MMIO bus to get rid of the now unnecessary copying and the usage of kvm_exit_mmio in that early stage. See the respective commit message for more details. The series is loosely based on Nikolay's work[1], thanks especially for the tedious first patch. I totally reworked Nikolay's 3/5 to avoid adding another MMIO handling layer on top of the already quite convoluted VGIC MMIO handling. Also Nikolay's 2/5 get extended and changed significantly, that's why I dropped his Signed-off-by. Unfortunately kvm_io_bus lacks an opaque pointer to pass in some data, so I worked around this by using container_of. Now for every struct kvm_mmio_range array a KVM I/O device is registered (one for VGICv2, 2*nr_vcpus + 1 for VGICv3), using the struct kvm_io_device variable as an anchor into the new struct vgic_io_device. This one holds the base address, the vgic_io_range pointer and (in case of the GICv3 redistributor) the associated vCPU, so that we can access all instance-specific data easily. Patch 2 moves the iodev.h header file around, that solves a problem when embedding a struct in arm_vgic.h later. That looks like a nice cleanup anyway, so I added two patches to remove the compiler switch to add virt/kvm as a include directory. This has been tested for arm/arm64 and x86. As soon as I get around to compile-test the other architectures, I can send out the respective patches for those, too. Patches 5-7 tweak the existing code a bit to make it fit for the conversion. Patch 8 contains the framework for the new handling, while patch 9 and 10 enable the GICv2 and GICv3 emulation, respectively. Patch 11 finally switches over to the new kvm_io_bus handling, reworking the early ARM KVM MMIO handling quite a bit. Patch 12 removes the now unneeded code. I split this up to ease reviewing, I could merge patches as well if needed. Shall we add here also the last 2 patches from my series that actually enable the eventfd compilation and KVM_CAP_IOEVENTFD? Or should I send them separately? regards, Nikolay Nikolaev The series goes on top of the kvmarm.git/next branch and was briefly tested on an arm64 model with a GICv2 and a GICv3 guest and on Midway (GICv2 guest). Cheers, Andre. [1] https://lists.cs.columbia.edu/pipermail/kvmarm/2015-January/013379.html Andre Przywara (11): KVM: move iodev.h from virt/kvm/ to include/kvm KVM: arm/arm64: remove now unneeded include directory from Makefile KVM: x86: remove now unneeded include directory from Makefile KVM: arm/arm64: rename struct kvm_mmio_range to vgic_io_range KVM: mark kvm-buses as empty once they were destroyed KVM: arm/arm64: simplify vgic_find_range() and callers KVM: arm/arm64: implement kvm_io_bus MMIO handling for the VGIC KVM: arm/arm64: prepare GICv2 emulation to be handled by kvm_io_bus KVM: arm/arm64: prepare GICv3 emulation to use kvm_io_bus MMIO handling KVM: arm/arm64: rework MMIO abort handling to use KVM MMIO bus KVM: arm/arm64: remove now obsolete VGIC specific MMIO handling code Nikolay Nikolaev (1): KVM: Redesign kvm_io_bus_ API to pass VCPU structure to the callbacks. arch/arm/include/asm/kvm_mmio.h | 22 arch/arm/kvm/Makefile |2 +- arch/arm/kvm/mmio.c | 60 --- arch/arm64/include/asm/kvm_mmio.h | 22 arch/arm64/kvm/Makefile |2 +- arch/powerpc/kvm/mpic.c | 12 ++- arch/powerpc/kvm/powerpc.c|4 +- arch/s390/kvm/diag.c |2 +- arch/x86/kvm/Makefile |2 +- arch/x86/kvm/i8254.c | 14 ++- arch/x86/kvm/i8254.h |2 +- arch/x86/kvm/i8259.c | 12 +-- arch/x86/kvm/ioapic.c |8 +- arch/x86/kvm/ioapic.h |2 +- arch/x86/kvm/irq.h|2 +- arch/x86/kvm/lapic.c |4 +- arch/x86/kvm/lapic.h |2 +- arch/x86/kvm/vmx.c|2 +- arch/x86/kvm/x86.c| 13 +-- include/kvm/arm_vgic.h| 16 ++- include/kvm/iodev.h | 76 + include/linux/kvm_host.h | 10 +- virt/kvm/arm/vgic-v2-emul.c
[RESEND PATCH] KVM: PPC: Book3S HV: Deliver machine check with MSR(RI=0) to guest as MCE.
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com For the machine check interrupt that happens while we are in the guest, kvm layer attempts the recovery, and then delivers the machine check interrupt directly to the guest if recovery fails. On successful recovery we go back to normal functioning of the guest. But there can be cases where a machine check interrupt can happen with MSR(RI=0) while we are in the guest. This means MC interrupt is unrecoverable and we have to deliver a machine check to the guest since the machine check interrupt might have trashed valid values in SRR0/1. The current implementation do not handle this case, causing guest to crash with Bad kernel stack pointer instead of machine check oops message. [26281.490060] Bad kernel stack pointer 3fff9ccce5b0 at c000490c [26281.490434] Oops: Bad kernel stack pointer, sig: 6 [#1] [26281.490472] SMP NR_CPUS=2048 NUMA pSeries This patch fixes this issue by checking MSR(RI=0) in KVM layer and forwarding unrecoverable interrupt to guest which then panics with proper machine check Oops message. Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com Acked-by: Paul Mackerras pau...@samba.org --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 12 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bb94e6f..258f46d 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2063,7 +2063,6 @@ machine_check_realmode: mr r3, r9 /* get vcpu pointer */ bl kvmppc_realmode_machine_check nop - cmpdi r3, 0 /* Did we handle MCE ? */ ld r9, HSTATE_KVM_VCPU(r13) li r12, BOOK3S_INTERRUPT_MACHINE_CHECK /* @@ -2076,13 +2075,18 @@ machine_check_realmode: * The old code used to return to host for unhandled errors which * was causing guest to hang with soft lockups inside guest and * makes it difficult to recover guest instance. +* +* if we receive machine check with MSR(RI=0) then deliver it to +* guest as machine check causing guest to crash. */ - ld r10, VCPU_PC(r9) ld r11, VCPU_MSR(r9) + andi. r10, r11, MSR_RI/* check for unrecoverable exception */ + beq 1f /* Deliver a machine check to guest */ + ld r10, VCPU_PC(r9) + cmpdi r3, 0 /* Did we handle MCE ? */ bne 2f /* Continue guest execution. */ /* If not, deliver a machine check. SRR0/1 are already set */ - li r10, BOOK3S_INTERRUPT_MACHINE_CHECK - ld r11, VCPU_MSR(r9) +1: li r10, BOOK3S_INTERRUPT_MACHINE_CHECK bl kvmppc_msr_interrupt 2: b fast_interrupt_c_return -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 2/6] target-arm: kvm: save/restore mp state
This adds the saving and restore of the current Multi-Processing state of the machine. While the KVM_GET/SET_MP_STATE API exposes a number of potential states for x86 we only use two for ARM. Either the process is running or not. We then save this state into the cpu_powered TCG state to avoid changing the serialisation format. Signed-off-by: Alex Bennée alex.ben...@linaro.org --- v2 - make mpstate field runtime dependant (kvm_enabled()) - drop initial KVM_CAP_MP_STATE requirement - re-use cpu_powered instead of new field v4 - s/HALTED/STOPPED/ - move code from machine.c to kvm. diff --git a/target-arm/kvm.c b/target-arm/kvm.c index 72c1fa1..a74832c 100644 --- a/target-arm/kvm.c +++ b/target-arm/kvm.c @@ -458,6 +458,46 @@ void kvm_arm_reset_vcpu(ARMCPU *cpu) } } +/* + * Update KVM's MP_STATE based on what QEMU thinks it is + */ +int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu) +{ +if (kvm_check_extension(CPU(cpu)-kvm_state, KVM_CAP_MP_STATE)) { +struct kvm_mp_state mp_state = { +.mp_state = +cpu-powered_off ? KVM_MP_STATE_STOPPED : KVM_MP_STATE_RUNNABLE +}; +int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, mp_state); +if (ret) { +fprintf(stderr, %s: failed to set MP_STATE %d/%s\n, +__func__, ret, strerror(ret)); +return -1; +} +} + +return 0; +} + +/* + * Sync the KVM MP_STATE into QEMU + */ +int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu) +{ +if (kvm_check_extension(CPU(cpu)-kvm_state, KVM_CAP_MP_STATE)) { +struct kvm_mp_state mp_state; +int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MP_STATE, mp_state); +if (ret) { +fprintf(stderr, %s: failed to get MP_STATE %d/%s\n, +__func__, ret, strerror(ret)); +abort(); +} +cpu-powered_off = (mp_state.mp_state == KVM_MP_STATE_STOPPED); +} + +return 0; +} + void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) { } diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c index 94030d1..49b6bab 100644 --- a/target-arm/kvm32.c +++ b/target-arm/kvm32.c @@ -356,6 +356,8 @@ int kvm_arch_put_registers(CPUState *cs, int level) return EINVAL; } +kvm_arm_sync_mpstate_to_kvm(cpu); + return ret; } @@ -427,5 +429,7 @@ int kvm_arch_get_registers(CPUState *cs) */ write_list_to_cpustate(cpu); +kvm_arm_sync_mpstate_to_qemu(cpu); + return 0; } diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c index 8cf3a62..fed03f2 100644 --- a/target-arm/kvm64.c +++ b/target-arm/kvm64.c @@ -211,6 +211,8 @@ int kvm_arch_put_registers(CPUState *cs, int level) return EINVAL; } +kvm_arm_sync_mpstate_to_kvm(cpu); + /* TODO: * FP state */ @@ -310,6 +312,8 @@ int kvm_arch_get_registers(CPUState *cs) */ write_list_to_cpustate(cpu); +kvm_arm_sync_mpstate_to_qemu(cpu); + /* TODO: other registers */ return ret; } diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h index 455dea3..7b75758 100644 --- a/target-arm/kvm_arm.h +++ b/target-arm/kvm_arm.h @@ -162,6 +162,24 @@ typedef struct ARMHostCPUClass { */ bool kvm_arm_get_host_cpu_features(ARMHostCPUClass *ahcc); + +/** + * kvm_arm_sync_mpstate_to_kvm + * @cpu: ARMCPU + * + * If supported set the KVM MP_STATE based on QEMUs migration data. + */ +int kvm_arm_sync_mpstate_to_kvm(ARMCPU *cpu); + +/** + * kvm_arm_sync_mpstate_to_qemu + * @cpu: ARMCPU + * + * If supported get the MP_STATE from KVM and store in QEMUs migration + * data. + */ +int kvm_arm_sync_mpstate_to_qemu(ARMCPU *cpu); + #endif #endif -- 2.3.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 6/6] target-arm: cpu.h document why env-spsr exists
I was getting very confused about the duplication of state so wanted to make it explicit. Signed-off-by: Alex Bennée alex.ben...@linaro.org diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 083211c..6dc1799 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -155,6 +155,11 @@ typedef struct CPUARMState { This contains all the other bits. Use cpsr_{read,write} to access the whole CPSR. */ uint32_t uncached_cpsr; +/* The spsr is a alias for spsr_elN where N is the current + * exception level. It is provided for here so the TCG msr/mrs + * implementation can access one register. Care needs to be taken + * to ensure the banked_spsr[] is also updated. + */ uint32_t spsr; /* Banked registers. */ -- 2.3.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 5/6] target-arm: kvm64 fix save/restore of SPSR regs
The current code was negatively indexing the cpu state array and not synchronizing banked spsr register state with the current mode's spsr state, causing occasional failures with migration. Some munging is done to take care of the aarch64 mapping and also to ensure the most current value of the spsr is updated to the banked registers (relevant for KVM-TCG migration). Signed-off-by: Alex Bennée alex.ben...@linaro.org --- v2 (ajb) - minor tweaks and clarifications v3 - Use the correct bank index function for setting/getting env-spsr - only deal with spsrs in elevated exception levels v4 - try and make commentary clearer - ensure env-banked_spsr[0] = env-spsr before we sync v5 - fix banking index now banking fixed - keep wide spacing on [ ] forms - claimed authorship diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c index 857e970..5270fa7 100644 --- a/target-arm/kvm64.c +++ b/target-arm/kvm64.c @@ -139,6 +139,7 @@ int kvm_arch_put_registers(CPUState *cs, int level) uint64_t val; int i; int ret; +unsigned int el; ARMCPU *cpu = ARM_CPU(cs); CPUARMState *env = cpu-env; @@ -205,9 +206,24 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } +/* Saved Program State Registers + * + * Before we restore from the banked_spsr[] array we need to + * ensure that any modifications to env-spsr are correctly + * reflected in the banks. + */ +el = arm_current_el(env); +if (el 0) { +i = is_a64(env) ? +aarch64_banked_spsr_index(el) : +bank_number(env-uncached_cpsr CPSR_M); +env-banked_spsr[i] = env-spsr; +} + +/* KVM 0-4 map to QEMU banks 1-5 */ for (i = 0; i KVM_NR_SPSR; i++) { reg.id = AARCH64_CORE_REG(spsr[i]); -reg.addr = (uintptr_t) env-banked_spsr[i - 1]; +reg.addr = (uintptr_t) env-banked_spsr[i + 1]; ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, reg); if (ret) { return ret; @@ -253,11 +269,13 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } + int kvm_arch_get_registers(CPUState *cs) { struct kvm_one_reg reg; uint64_t val; uint32_t fpr; +unsigned int el; int i; int ret; @@ -330,15 +348,27 @@ int kvm_arch_get_registers(CPUState *cs) return ret; } +/* Fetch the SPSR registers + * + * KVM SPSRs 0-4 map to QEMU banks 1-5 + */ for (i = 0; i KVM_NR_SPSR; i++) { reg.id = AARCH64_CORE_REG(spsr[i]); -reg.addr = (uintptr_t) env-banked_spsr[i - 1]; +reg.addr = (uintptr_t) env-banked_spsr[i + 1]; ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, reg); if (ret) { return ret; } } +el = arm_current_el(env); +if (el 0) { +i = is_a64(env) ? +aarch64_banked_spsr_index(el) : +bank_number(env-uncached_cpsr CPSR_M); +env-spsr = env-banked_spsr[i]; +} + /* Advanced SIMD and FP registers * We map Qn = regs[2n+1]:regs[2n] */ -- 2.3.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 0/6] QEMU ARM64 Migration Fixes
Hi, Following some review comments (and a patch) from Peter I've re-spun this series: v5 - Added Peter's SPSR_EL1 state fix for architectural mapping - As a result SPSR save/restore no longer does munge - FP register save/restore re-done to deal float128 mapping - Some minor [ spaces ] added I submitted the kernel side of this on Friday Branch: https://github.com/stsquad/qemu/tree/migration/fixes-v5 Kernel: https://git.linaro.org/people/alex.bennee/linux.git/shortlog/refs/heads/migration/kvmarm-fixes-for-4.0-v3 Alex Bennée (5): target-arm: kvm: save/restore mp state hw/intc: arm_gic_kvm.c restore config first target-arm: kvm64 sync FP register state target-arm: kvm64 fix save/restore of SPSR regs target-arm: cpu.h document why env-spsr exists Peter Maydell (1): target-arm: Store SPSR_EL1 state in banked_spsr[1] (SPSR_svc) hw/intc/arm_gic_kvm.c | 7 ++- target-arm/cpu.h| 5 +++ target-arm/helper-a64.c | 2 +- target-arm/helper.c | 2 +- target-arm/internals.h | 5 ++- target-arm/kvm.c| 40 + target-arm/kvm32.c | 4 ++ target-arm/kvm64.c | 111 +--- target-arm/kvm_arm.h| 18 9 files changed, 184 insertions(+), 10 deletions(-) -- 2.3.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] KVM: nVMX: Add support for rdtscp
Jan Kiszka jan.kis...@web.de writes: ... --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2467,6 +2467,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx-nested.nested_vmx_secondary_ctls_low = 0; vmx-nested.nested_vmx_secondary_ctls_high = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | @@ -7510,7 +7511,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); case EXIT_REASON_RDPMC: return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); - case EXIT_REASON_RDTSC: + case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: @@ -8517,6 +8518,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) exec_control); } } + if (!vmx-rdtscp_enabled) + vmx-nested.nested_vmx_secondary_ctls_high = + ~SECONDARY_EXEC_RDTSCP; No need to do this if nested is not enabled ? Or just a if (nested) in the prior if else loop should be enough I think. Bandan } /* Exposing INVPCID only when PCID is exposed */ @@ -9146,8 +9150,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) exec_control = ~SECONDARY_EXEC_RDTSCP; /* Take the following fields only from vmcs12 */ exec_control = ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_APIC_REGISTER_VIRT); + SECONDARY_EXEC_APIC_REGISTER_VIRT); if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) exec_control |= vmcs12-secondary_vm_exec_control; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Windows 7 guest installer does not detect drive if physical partition used instead of disk file.
On 3/23/15, Stefan Hajnoczi stefa...@gmail.com wrote: I have CCed the libvirt mailing list, since KVM is a component here but your question seems to be mainly about libvirt, virt-manager, virt-install, etc. Apologies for posting to the wrong list, I assumed it would be KVM related as the guest could run but could not see the drive. More information 1. install guest with /dev/sdxx as virtio device (the problem case) - installer does not see any drive - load drivers on Redhat virtio cdrom - installer still does not see any drive 2. Install guest with qcow2 disk file as virtio device - as previous scenario but installer see drives after installing drivers 3. install guest with qcow2 disk file as IDE device - complete installation - add /dev/sdxx as virtio disk - goto Windows Device Manager and update virtio driver for unknown controller - Windows see /dev/sdxx after driver installed It sounds like you want an NTFS partition on /dev/sda. That requires passing the whole /dev/sda drive to the guest - and the Windows installer might overwrite your GRUB Master Boot Record. Be careful when trying to do this. Yes, I wanted to give Windows its own native partition that could be read directly if I had to yank the disk and put it into a Windows machine. Is this why #3 works but not #1? That as long as I want to install Windows directly to an NTFS partition on/dev/sda, it is required that I pass the whole drive to Windows? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 3/6] hw/intc: arm_gic_kvm.c restore config first
As there is logic to deal with the difference between edge and level triggered interrupts in the kernel we must ensure it knows the configuration of the IRQs before we restore the pending state. Signed-off-by: Alex Bennée alex.ben...@linaro.org Acked-by: Christoffer Dall christoffer.d...@linaro.org diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c index 0d20750..e2512f1 100644 --- a/hw/intc/arm_gic_kvm.c +++ b/hw/intc/arm_gic_kvm.c @@ -370,6 +370,11 @@ static void kvm_arm_gic_put(GICState *s) * the appropriate CPU interfaces in the kernel) */ kvm_dist_put(s, 0x800, 8, s-num_irq, translate_targets); +/* irq_state[n].trigger - GICD_ICFGRn + * (restore targets before pending IRQs so we treat level/edge + * correctly */ +kvm_dist_put(s, 0xc00, 2, s-num_irq, translate_trigger); + /* irq_state[n].pending + irq_state[n].level - GICD_ISPENDRn */ kvm_dist_put(s, 0x280, 1, s-num_irq, translate_clear); kvm_dist_put(s, 0x200, 1, s-num_irq, translate_pending); @@ -378,8 +383,6 @@ static void kvm_arm_gic_put(GICState *s) kvm_dist_put(s, 0x380, 1, s-num_irq, translate_clear); kvm_dist_put(s, 0x300, 1, s-num_irq, translate_active); -/* irq_state[n].trigger - GICD_ICFRn */ -kvm_dist_put(s, 0xc00, 2, s-num_irq, translate_trigger); /* s-priorityX[irq] - ICD_IPRIORITYRn */ kvm_dist_put(s, 0x400, 8, s-num_irq, translate_priority); -- 2.3.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 4/6] target-arm: kvm64 sync FP register state
For migration to work we need to sync all of the register state. This is especially noticeable when GCC starts using FP registers as spill registers even with integer programs. Signed-off-by: Alex Bennée alex.ben...@linaro.org --- v4: - fixed merge conflicts - rm superfluous reg.id++ v5: - use interim float128 to deal with endianess - correctly map into vfp.regs[] - fix spacing around []s diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c index fed03f2..857e970 100644 --- a/target-arm/kvm64.c +++ b/target-arm/kvm64.c @@ -126,9 +126,16 @@ bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx) #define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) +#define AARCH64_SIMD_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \ + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) + +#define AARCH64_SIMD_CTRL_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \ + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) + int kvm_arch_put_registers(CPUState *cs, int level) { struct kvm_one_reg reg; +uint32_t fpr; uint64_t val; int i; int ret; @@ -207,15 +214,42 @@ int kvm_arch_put_registers(CPUState *cs, int level) } } +/* Advanced SIMD and FP registers + * We map Qn = regs[2n+1]:regs[2n] + */ +for (i = 0; i 32; i++) { +int rd = i 1; +float128 fp_val = make_float128(env-vfp.regs[rd + 1], +env-vfp.regs[rd]); +reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); +reg.addr = (uintptr_t)(fp_val); +ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, reg); +if (ret) { +return ret; +} +} + +reg.addr = (uintptr_t)(fpr); +fpr = vfp_get_fpsr(env); +reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); +ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, reg); +if (ret) { +return ret; +} + +fpr = vfp_get_fpcr(env); +reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); +ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, reg); +if (ret) { +return ret; +} + if (!write_list_to_kvmstate(cpu)) { return EINVAL; } kvm_arm_sync_mpstate_to_kvm(cpu); -/* TODO: - * FP state - */ return ret; } @@ -223,6 +257,7 @@ int kvm_arch_get_registers(CPUState *cs) { struct kvm_one_reg reg; uint64_t val; +uint32_t fpr; int i; int ret; @@ -304,6 +339,38 @@ int kvm_arch_get_registers(CPUState *cs) } } +/* Advanced SIMD and FP registers + * We map Qn = regs[2n+1]:regs[2n] + */ +for (i = 0; i 32; i++) { +float128 fp_val; +reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); +reg.addr = (uintptr_t)(fp_val); +ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, reg); +if (ret) { +return ret; +} else { +int rd = i 1; +env-vfp.regs[rd + 1] = fp_val.high; +env-vfp.regs[rd] = fp_val.low; +} +} + +reg.addr = (uintptr_t)(fpr); +reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); +ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, reg); +if (ret) { +return ret; +} +vfp_set_fpsr(env, fpr); + +reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); +ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, reg); +if (ret) { +return ret; +} +vfp_set_fpcr(env, fpr); + if (!write_kvmstate_to_list(cpu)) { return EINVAL; } -- 2.3.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v5 1/6] target-arm: Store SPSR_EL1 state in banked_spsr[1] (SPSR_svc)
From: Peter Maydell peter.mayd...@linaro.org The AArch64 SPSR_EL1 register is architecturally mandated to be mapped to the AArch32 SPSR_svc register. This means its state should live in QEMU's env-banked_spsr[1] field. Correct the various places in the code that incorrectly put it in banked_spsr[0]. Signed-off-by: Peter Maydell peter.mayd...@linaro.org diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c index 7e0d038..861f6fa 100644 --- a/target-arm/helper-a64.c +++ b/target-arm/helper-a64.c @@ -523,7 +523,7 @@ void aarch64_cpu_do_interrupt(CPUState *cs) aarch64_save_sp(env, arm_current_el(env)); env-elr_el[new_el] = env-pc; } else { -env-banked_spsr[0] = cpsr_read(env); +env-banked_spsr[aarch64_banked_spsr_index(new_el)] = cpsr_read(env); if (!env-thumb) { env-cp15.esr_el[new_el] |= 1 25; } diff --git a/target-arm/helper.c b/target-arm/helper.c index 10886c5..d77c6de 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -2438,7 +2438,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = { { .name = SPSR_EL1, .state = ARM_CP_STATE_AA64, .type = ARM_CP_ALIAS, .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 0, .opc2 = 0, - .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[0]) }, + .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, banked_spsr[1]) }, /* We rely on the access checks not allowing the guest to write to the * state field when SPSel indicates that it's being used as the stack * pointer. diff --git a/target-arm/internals.h b/target-arm/internals.h index bb171a7..2cc3017 100644 --- a/target-arm/internals.h +++ b/target-arm/internals.h @@ -82,11 +82,14 @@ static inline void arm_log_exception(int idx) /* * For AArch64, map a given EL to an index in the banked_spsr array. + * Note that this mapping and the AArch32 mapping defined in bank_number() + * must agree such that the AArch64-AArch32 SPSRs have the architecturally + * mandated mapping between each other. */ static inline unsigned int aarch64_banked_spsr_index(unsigned int el) { static const unsigned int map[4] = { -[1] = 0, /* EL1. */ +[1] = 1, /* EL1. */ [2] = 6, /* EL2. */ [3] = 7, /* EL3. */ }; -- 2.3.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] KVM: nVMX: Add support for rdtscp
On 2015-03-23 18:01, Bandan Das wrote: Jan Kiszka jan.kis...@web.de writes: ... --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2467,6 +2467,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx-nested.nested_vmx_secondary_ctls_low = 0; vmx-nested.nested_vmx_secondary_ctls_high = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | +SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | @@ -7510,7 +7511,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); case EXIT_REASON_RDPMC: return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); -case EXIT_REASON_RDTSC: +case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: @@ -8517,6 +8518,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) exec_control); } } +if (!vmx-rdtscp_enabled) +vmx-nested.nested_vmx_secondary_ctls_high = +~SECONDARY_EXEC_RDTSCP; No need to do this if nested is not enabled ? Or just a if (nested) in the prior if else loop should be enough I think. I can add this - but this is far away from being a hotpath. What would be the benefit? Thanks, Jan signature.asc Description: OpenPGP digital signature
[PATCH v3] KVM: nVMX: Add support for rdtscp
From: Jan Kiszka jan.kis...@siemens.com If the guest CPU is supposed to support rdtscp and the host has rdtscp enabled in the secondary execution controls, we can also expose this feature to L1. Just extend nested_vmx_exit_handled to properly route EXIT_REASON_RDTSCP. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- Changes in v3: - avoid needlessly touching vmx-nested if nested is off arch/x86/include/uapi/asm/vmx.h | 1 + arch/x86/kvm/vmx.c | 9 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index c5f1a1d..1fe9218 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -67,6 +67,7 @@ #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_INVEPT 50 +#define EXIT_REASON_RDTSCP 51 #define EXIT_REASON_PREEMPTION_TIMER52 #define EXIT_REASON_INVVPID 53 #define EXIT_REASON_WBINVD 54 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 50c675b..fdd9f8b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2467,6 +2467,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx-nested.nested_vmx_secondary_ctls_low = 0; vmx-nested.nested_vmx_secondary_ctls_high = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | @@ -7510,7 +7511,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); case EXIT_REASON_RDPMC: return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); - case EXIT_REASON_RDTSC: + case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: @@ -8517,6 +8518,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) exec_control); } } + if (nested !vmx-rdtscp_enabled) + vmx-nested.nested_vmx_secondary_ctls_high = + ~SECONDARY_EXEC_RDTSCP; } /* Exposing INVPCID only when PCID is exposed */ @@ -9146,8 +9150,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) exec_control = ~SECONDARY_EXEC_RDTSCP; /* Take the following fields only from vmcs12 */ exec_control = ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_APIC_REGISTER_VIRT); + SECONDARY_EXEC_APIC_REGISTER_VIRT); if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) exec_control |= vmcs12-secondary_vm_exec_control; -- 2.1.4 signature.asc Description: OpenPGP digital signature
Re: [PATCH v2] KVM: nVMX: Add support for rdtscp
Jan Kiszka jan.kis...@web.de writes: On 2015-03-23 18:01, Bandan Das wrote: Jan Kiszka jan.kis...@web.de writes: ... --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2467,6 +2467,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx-nested.nested_vmx_secondary_ctls_low = 0; vmx-nested.nested_vmx_secondary_ctls_high = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | @@ -7510,7 +7511,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); case EXIT_REASON_RDPMC: return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); - case EXIT_REASON_RDTSC: + case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: @@ -8517,6 +8518,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) exec_control); } } + if (!vmx-rdtscp_enabled) + vmx-nested.nested_vmx_secondary_ctls_high = + ~SECONDARY_EXEC_RDTSCP; No need to do this if nested is not enabled ? Or just a if (nested) in the prior if else loop should be enough I think. I can add this - but this is far away from being a hotpath. What would be the benefit? Right, definitely not a hotpath, just seems unnecessary if nested is not enabled. Bandan Thanks, Jan -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [GIT PULL 00/11] KVM: s390: Features and fixes for 4.1 (kvm/next)
On Wed, Mar 18, 2015 at 12:43:58PM +0100, Christian Borntraeger wrote: Paolo, Marcelo, here is the followup pull request. As Marcelo has not yet pushed out queue or next to git.kernel.org, this request is based on the previous s390 pull request and should merge without conflicts. For details see tag description. Christian The following changes since commit 13211ea7b47db3d8ee2ff258a9a973a6d3aa3d43: KVM: s390: Enable vector support for capable guest (2015-03-06 13:49:35 +0100) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git tags/kvm-s390-next-20150318 for you to fetch changes up to 18280d8b4bcd4a2b174ee3cd748166c6190acacb: KVM: s390: represent SIMD cap in kvm facility (2015-03-17 16:33:14 +0100) Pulled, thanks. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
x86: kvm: Revert remove sched notifier for cross-cpu migrations
The following point: 2. per-CPU pvclock time info is updated if the underlying CPU changes. Is not true anymore since KVM: x86: update pvclock area conditionally, on cpu migration. Add task migration notification back. Problem noticed by Andy Lutomirski. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com CC: sta...@kernel.org # 3.11+ diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index d6b078e..25b1cc0 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -95,6 +95,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, struct pvclock_vsyscall_time_info { struct pvclock_vcpu_time_info pvti; + u32 migrate_count; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 2f355d2..e5ecd20 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -141,7 +141,46 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); } +static struct pvclock_vsyscall_time_info *pvclock_vdso_info; + +static struct pvclock_vsyscall_time_info * +pvclock_get_vsyscall_user_time_info(int cpu) +{ + if (!pvclock_vdso_info) { + BUG(); + return NULL; + } + + return pvclock_vdso_info[cpu]; +} + +struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) +{ + return pvclock_get_vsyscall_user_time_info(cpu)-pvti; +} + #ifdef CONFIG_X86_64 +static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, + void *v) +{ + struct task_migration_notifier *mn = v; + struct pvclock_vsyscall_time_info *pvti; + + pvti = pvclock_get_vsyscall_user_time_info(mn-from_cpu); + + /* this is NULL when pvclock vsyscall is not initialized */ + if (unlikely(pvti == NULL)) + return NOTIFY_DONE; + + pvti-migrate_count++; + + return NOTIFY_DONE; +} + +static struct notifier_block pvclock_migrate = { + .notifier_call = pvclock_task_migrate, +}; + /* * Initialize the generic pvclock vsyscall state. This will allocate * a/some page(s) for the per-vcpu pvclock information, set up a @@ -155,12 +194,17 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); + pvclock_vdso_info = i; + for (idx = 0; idx = (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, __pa(i) + (idx*PAGE_SIZE), PAGE_KERNEL_VVAR); } + + register_task_migration_notifier(pvclock_migrate); + return 0; } #endif diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 9793322..3093376 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -82,18 +82,15 @@ static notrace cycle_t vread_pvclock(int *mode) cycle_t ret; u64 last; u32 version; + u32 migrate_count; u8 flags; unsigned cpu, cpu1; /* -* Note: hypervisor must guarantee that: -* 1. cpu ID number maps 1:1 to per-CPU pvclock time info. -* 2. that per-CPU pvclock time info is updated if the -*underlying CPU changes. -* 3. that version is increased whenever underlying CPU -*changes. -* +* When looping to get a consistent (time-info, tsc) pair, we +* also need to deal with the possibility we can switch vcpus, +* so make sure we always re-fetch time-info for the current vcpu. */ do { cpu = __getcpu() VGETCPU_CPU_MASK; @@ -104,6 +101,8 @@ static notrace cycle_t vread_pvclock(int *mode) pvti = get_pvti(cpu); + migrate_count = pvti-migrate_count; + version = __pvclock_read_cycles(pvti-pvti, ret, flags); /* @@ -115,7 +114,8 @@ static notrace cycle_t vread_pvclock(int *mode) cpu1 = __getcpu() VGETCPU_CPU_MASK; } while (unlikely(cpu != cpu1 || (pvti-pvti.version 1) || - pvti-pvti.version != version)); + pvti-pvti.version != version || + pvti-migrate_count != migrate_count)); if (unlikely(!(flags PVCLOCK_TSC_STABLE_BIT))) *mode = VCLOCK_NONE; diff --git a/include/linux/sched.h b/include/linux/sched.h index 6d77432..be98910 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -176,6 +176,14 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); extern void update_cpu_load_nohz(void);
Re: x86: kvm: Revert remove sched notifier for cross-cpu migrations
On Mon, Mar 23, 2015 at 4:21 PM, Marcelo Tosatti mtosa...@redhat.com wrote: The following point: 2. per-CPU pvclock time info is updated if the underlying CPU changes. Is not true anymore since KVM: x86: update pvclock area conditionally, on cpu migration. Add task migration notification back. IMO this is a pretty big hammer to use to work around what appears to be a bug in the host, but I guess that's okay. It's also unfortunate in another regard: it seems non-obvious to me how to use this without reading the cpu number twice in the vdso. On the other hand, unless we have a global pvti, or at least a global indication of TSC stability, I don't see how to do that even with the host bug fixed. Grumble. On a more useful note, could you rename migrate_count to migrate_from_count, since that's what it is? --Andy -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Patch v5] x86: irq_comm: Add check for RH bit in kvm_set_msi_irq
On 03/23/2015 03:13 PM, Radim Krčmář wrote: 2015-03-20 11:50-0600, James Sullivan: On 03/20/2015 09:22 AM, James Sullivan wrote: On 03/20/2015 09:15 AM, Radim Krčmář wrote: 2015-03-19 16:51-0600, James Sullivan: I played around with native_compose_msi_msg and discovered the following: * dm=0, rh=0 = Physical Destination Mode * dm=0, rh=1 = Failed delivery * dm=1, rh=0 = Logical Destination Mode, No Redirection * dm=1, rh=1 = Logical Destination Mode, Redirection Great! (What CPU family was that?) This was on Intel x86_64 (Core i5-3210m, 'Ivy Bridge'). Thanks, it's possible that the behavior of chipsets changed since the report on Intel's forum ... (Lowest priority behaved differently before QPI, so it might coincide.) I'm still wondering about last sentence from that link, the parenthesised part to be exact, The reference to the APIC ID being 0xff is because 0xff is broadcast and lowest priority (what the RH bit really is for X86) is illegal with broadcast. Can you also check if RH=1 does something to delivery mode? I haven't seen any changes in the MSI Data Register for any values of RH, but I don't have a great sample size (one machine with one set of PCI devices), so if anyone else can confirm that I would appreciate it. I meant if the delivery mode from data register isn't ignored with RH=1, and the message delivered as if lowest-priority was set there. (Decided by having something else than fixed or lowest-priority there.) Hmm, any thoughts on how I could test for that? Worth noting that low prio delivery was used across the board for my PCI devices regardless of RH=1 or 0, so it doesn't seem to be de facto the case that the RH bit's only purpose is for lowprio delivery on x86. Yeah, afaik, it can be done with lowest priority delivery mode on ia64 too, so I have a hard time finding RH's intended purpose. Again, need to have some more PCI devices to test against to confirm anything. It's impossible to test everything, and there is no conflict if we have at most one data point ;) Very true :) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 2/2] drivers/vfio: Support EEH error injection
On Mon, Mar 23, 2015 at 10:14:59AM -0600, Alex Williamson wrote: On Mon, 2015-03-23 at 16:20 +1100, Gavin Shan wrote: On Mon, Mar 23, 2015 at 04:10:20PM +1100, David Gibson wrote: On Mon, Mar 23, 2015 at 04:03:59PM +1100, Gavin Shan wrote: On Mon, Mar 23, 2015 at 02:43:03PM +1100, David Gibson wrote: On Mon, Mar 23, 2015 at 12:56:36PM +1100, Gavin Shan wrote: On Mon, Mar 23, 2015 at 12:39:45PM +1100, David Gibson wrote: On Sat, Mar 21, 2015 at 06:58:45AM +1100, Gavin Shan wrote: The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR) to inject the specified EEH error, which is represented by (struct vfio_eeh_pe_err), to the indicated PE for testing purpose. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com Reviewed-by: David Gibson da...@gibson.dropbear.id.au --- Documentation/vfio.txt| 12 drivers/vfio/vfio_spapr_eeh.c | 10 ++ include/uapi/linux/vfio.h | 36 +++- 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index 96978ec..c6e11a3 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -385,6 +385,18 @@ The code flow from the example above should be slightly changed: + /* Inject EEH error, which is expected to be caused by 32-bits +* config load. +*/ + pe_op.op = VFIO_EEH_PE_INJECT_ERR; + pe_op.err.type = VFIO_EEH_ERR_TYPE_32; + pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR; + pe_op.err.addr = 0ul; + pe_op.err.mask = 0ul; + ioctl(container, VFIO_EEH_PE_OP, pe_op); + + + /* When 0xFF's returned from reading PCI config space or IO BARs * of the PCI device. Check the PE's state to see if that has been * frozen. diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index 5fa42db..38edeb4 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, case VFIO_EEH_PE_CONFIGURE: ret = eeh_pe_configure(pe); break; + case VFIO_EEH_PE_INJECT_ERR: + minsz = offsetofend(struct vfio_eeh_pe_op, err.mask); + if (op.argsz minsz) + return -EINVAL; + if (copy_from_user(op, (void __user *)arg, minsz)) + return -EFAULT; + + ret = eeh_pe_inject_err(pe, op.err.type, op.err.func, + op.err.addr, op.err.mask); + break; default: ret = -EINVAL; } diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 82889c3..f68e962 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info { * - unfreeze IO/DMA for frozen PE; * - read PE state; * - reset PE; - * - configure PE. + * - configure PE; + * - inject EEH error. */ +struct vfio_eeh_pe_err { + __u32 type; + __u32 func; + __u64 addr; + __u64 mask; +}; + struct vfio_eeh_pe_op { __u32 argsz; __u32 flags; __u32 op; + union { + struct vfio_eeh_pe_err err; + }; }; #define VFIO_EEH_PE_DISABLE0 /* Disable EEH functionality */ @@ -490,6 +501,29 @@ struct vfio_eeh_pe_op { #define VFIO_EEH_PE_RESET_HOT 6 /* Assert hot reset */ #define VFIO_EEH_PE_RESET_FUNDAMENTAL 7 /* Assert fundamental reset */ #define VFIO_EEH_PE_CONFIGURE 8 /* PE configuration */ +#define VFIO_EEH_PE_INJECT_ERR 9 /* Inject EEH error */ +#define VFIO_EEH_ERR_TYPE_32 0 /* 32-bits EEH error type*/ +#define VFIO_EEH_ERR_TYPE_64 1 /* 64-bits EEH error type*/ +#define VFIO_EEH_ERR_FUNC_LD_MEM_ADDR 0 /* Memory load */ +#define VFIO_EEH_ERR_FUNC_LD_MEM_DATA 1 +#define VFIO_EEH_ERR_FUNC_LD_IO_ADDR 2 /* IO load */ +#define VFIO_EEH_ERR_FUNC_LD_IO_DATA 3 +#define VFIO_EEH_ERR_FUNC_LD_CFG_ADDR 4 /* Config load */ +#define VFIO_EEH_ERR_FUNC_LD_CFG_DATA 5 +#define VFIO_EEH_ERR_FUNC_ST_MEM_ADDR 6 /* Memory store */ +#define VFIO_EEH_ERR_FUNC_ST_MEM_DATA 7 +#define VFIO_EEH_ERR_FUNC_ST_IO_ADDR
Re: [PATCH] KVM: x86: call irq notifiers with directed EOI
On Wed, Mar 18, 2015 at 07:38:22PM +0100, Radim Krčmář wrote: kvm_ioapic_update_eoi() wasn't called if directed EOI was enabled. We need to do that for irq notifiers. (Like with edge interrupts.) Fix it by skipping EOI broadcast only. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=82211 Signed-off-by: Radim Krčmář rkrc...@redhat.com --- arch/x86/kvm/ioapic.c | 4 +++- arch/x86/kvm/lapic.c | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) Applied to master, thanks. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: x86: inline kvm_ioapic_handles_vector()
On Thu, Mar 19, 2015 at 09:52:41PM +0100, Radim Krčmář wrote: An overhead from function call is not appropriate for its size and frequency of execution. Suggested-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Radim Krčmář rkrc...@redhat.com --- I'm not very fond of that smp_rmb(): there is no real synchronization against update_handled_vectors(), Yes, because the guest OS should provide synchronization (it should shutdown interrupts before attempting to modify IOAPIC table). The smp_wmb is necessary. so the only point I see is to drop cached value of handled_vectors, which seems like bad use of LFENCE. test_bit has volatile on *addr, so don't see why the smp_rmb is necessary at all. Applied, thanks. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
x86: kvm: rename migrate_count variable
As thats more indicative of the variables usage. Suggested by Andy Lutomirski. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 25b1cc0..1c1b474 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -95,7 +95,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, struct pvclock_vsyscall_time_info { struct pvclock_vcpu_time_info pvti; - u32 migrate_count; + u32 migrate_from_count; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index e5ecd20..8eaf04b 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -172,7 +172,7 @@ static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, if (unlikely(pvti == NULL)) return NOTIFY_DONE; - pvti-migrate_count++; + pvti-migrate_from_count++; return NOTIFY_DONE; } diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 3093376..ef8bb76 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -82,7 +82,7 @@ static notrace cycle_t vread_pvclock(int *mode) cycle_t ret; u64 last; u32 version; - u32 migrate_count; + u32 migrate_from_count; u8 flags; unsigned cpu, cpu1; @@ -101,7 +101,7 @@ static notrace cycle_t vread_pvclock(int *mode) pvti = get_pvti(cpu); - migrate_count = pvti-migrate_count; + migrate_from_count = pvti-migrate_from_count; version = __pvclock_read_cycles(pvti-pvti, ret, flags); @@ -115,7 +115,7 @@ static notrace cycle_t vread_pvclock(int *mode) } while (unlikely(cpu != cpu1 || (pvti-pvti.version 1) || pvti-pvti.version != version || - pvti-migrate_count != migrate_count)); + pvti-migrate_from_count != migrate_from_count)); if (unlikely(!(flags PVCLOCK_TSC_STABLE_BIT))) *mode = VCLOCK_NONE; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 2/2] drivers/vfio: Support EEH error injection
On Tue, Mar 24, 2015 at 12:22:25PM +1100, David Gibson wrote: On Tue, Mar 24, 2015 at 09:47:54AM +1100, Gavin Shan wrote: On Mon, Mar 23, 2015 at 10:14:59AM -0600, Alex Williamson wrote: On Mon, 2015-03-23 at 16:20 +1100, Gavin Shan wrote: On Mon, Mar 23, 2015 at 04:10:20PM +1100, David Gibson wrote: On Mon, Mar 23, 2015 at 04:03:59PM +1100, Gavin Shan wrote: On Mon, Mar 23, 2015 at 02:43:03PM +1100, David Gibson wrote: On Mon, Mar 23, 2015 at 12:56:36PM +1100, Gavin Shan wrote: On Mon, Mar 23, 2015 at 12:39:45PM +1100, David Gibson wrote: On Sat, Mar 21, 2015 at 06:58:45AM +1100, Gavin Shan wrote: The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR) to inject the specified EEH error, which is represented by (struct vfio_eeh_pe_err), to the indicated PE for testing purpose. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com Reviewed-by: David Gibson da...@gibson.dropbear.id.au --- Documentation/vfio.txt| 12 drivers/vfio/vfio_spapr_eeh.c | 10 ++ include/uapi/linux/vfio.h | 36 +++- 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index 96978ec..c6e11a3 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -385,6 +385,18 @@ The code flow from the example above should be slightly changed: +/* Inject EEH error, which is expected to be caused by 32-bits + * config load. + */ +pe_op.op = VFIO_EEH_PE_INJECT_ERR; +pe_op.err.type = VFIO_EEH_ERR_TYPE_32; +pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR; +pe_op.err.addr = 0ul; +pe_op.err.mask = 0ul; +ioctl(container, VFIO_EEH_PE_OP, pe_op); + + + /* When 0xFF's returned from reading PCI config space or IO BARs * of the PCI device. Check the PE's state to see if that has been * frozen. diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index 5fa42db..38edeb4 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, case VFIO_EEH_PE_CONFIGURE: ret = eeh_pe_configure(pe); break; +case VFIO_EEH_PE_INJECT_ERR: +minsz = offsetofend(struct vfio_eeh_pe_op, err.mask); +if (op.argsz minsz) +return -EINVAL; +if (copy_from_user(op, (void __user *)arg, minsz)) +return -EFAULT; + +ret = eeh_pe_inject_err(pe, op.err.type, op.err.func, +op.err.addr, op.err.mask); +break; default: ret = -EINVAL; } diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 82889c3..f68e962 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info { * - unfreeze IO/DMA for frozen PE; * - read PE state; * - reset PE; - * - configure PE. + * - configure PE; + * - inject EEH error. */ +struct vfio_eeh_pe_err { +__u32 type; +__u32 func; +__u64 addr; +__u64 mask; +}; + struct vfio_eeh_pe_op { __u32 argsz; __u32 flags; __u32 op; +union { +struct vfio_eeh_pe_err err; +}; }; #define VFIO_EEH_PE_DISABLE 0 /* Disable EEH functionality */ @@ -490,6 +501,29 @@ struct vfio_eeh_pe_op { #define VFIO_EEH_PE_RESET_HOT 6 /* Assert hot reset */ #define VFIO_EEH_PE_RESET_FUNDAMENTAL 7 /* Assert fundamental reset */ #define VFIO_EEH_PE_CONFIGURE 8 /* PE configuration */ +#define VFIO_EEH_PE_INJECT_ERR 9 /* Inject EEH error */ +#define VFIO_EEH_ERR_TYPE_32 0 /* 32-bits EEH error type*/ +#define VFIO_EEH_ERR_TYPE_64 1 /* 64-bits EEH error type*/ +#define VFIO_EEH_ERR_FUNC_LD_MEM_ADDR 0 /* Memory load */ +#define VFIO_EEH_ERR_FUNC_LD_MEM_DATA 1 +#define VFIO_EEH_ERR_FUNC_LD_IO_ADDR
Re: [PATCH v3 2/2] drivers/vfio: Support EEH error injection
On Tue, Mar 24, 2015 at 09:47:54AM +1100, Gavin Shan wrote: On Mon, Mar 23, 2015 at 10:14:59AM -0600, Alex Williamson wrote: On Mon, 2015-03-23 at 16:20 +1100, Gavin Shan wrote: On Mon, Mar 23, 2015 at 04:10:20PM +1100, David Gibson wrote: On Mon, Mar 23, 2015 at 04:03:59PM +1100, Gavin Shan wrote: On Mon, Mar 23, 2015 at 02:43:03PM +1100, David Gibson wrote: On Mon, Mar 23, 2015 at 12:56:36PM +1100, Gavin Shan wrote: On Mon, Mar 23, 2015 at 12:39:45PM +1100, David Gibson wrote: On Sat, Mar 21, 2015 at 06:58:45AM +1100, Gavin Shan wrote: The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR) to inject the specified EEH error, which is represented by (struct vfio_eeh_pe_err), to the indicated PE for testing purpose. Signed-off-by: Gavin Shan gws...@linux.vnet.ibm.com Reviewed-by: David Gibson da...@gibson.dropbear.id.au --- Documentation/vfio.txt| 12 drivers/vfio/vfio_spapr_eeh.c | 10 ++ include/uapi/linux/vfio.h | 36 +++- 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index 96978ec..c6e11a3 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -385,6 +385,18 @@ The code flow from the example above should be slightly changed: + /* Inject EEH error, which is expected to be caused by 32-bits + * config load. + */ + pe_op.op = VFIO_EEH_PE_INJECT_ERR; + pe_op.err.type = VFIO_EEH_ERR_TYPE_32; + pe_op.err.func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR; + pe_op.err.addr = 0ul; + pe_op.err.mask = 0ul; + ioctl(container, VFIO_EEH_PE_OP, pe_op); + + + /* When 0xFF's returned from reading PCI config space or IO BARs * of the PCI device. Check the PE's state to see if that has been * frozen. diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index 5fa42db..38edeb4 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, case VFIO_EEH_PE_CONFIGURE: ret = eeh_pe_configure(pe); break; + case VFIO_EEH_PE_INJECT_ERR: + minsz = offsetofend(struct vfio_eeh_pe_op, err.mask); + if (op.argsz minsz) + return -EINVAL; + if (copy_from_user(op, (void __user *)arg, minsz)) + return -EFAULT; + + ret = eeh_pe_inject_err(pe, op.err.type, op.err.func, + op.err.addr, op.err.mask); + break; default: ret = -EINVAL; } diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 82889c3..f68e962 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -468,12 +468,23 @@ struct vfio_iommu_spapr_tce_info { * - unfreeze IO/DMA for frozen PE; * - read PE state; * - reset PE; - * - configure PE. + * - configure PE; + * - inject EEH error. */ +struct vfio_eeh_pe_err { + __u32 type; + __u32 func; + __u64 addr; + __u64 mask; +}; + struct vfio_eeh_pe_op { __u32 argsz; __u32 flags; __u32 op; + union { + struct vfio_eeh_pe_err err; + }; }; #define VFIO_EEH_PE_DISABLE 0 /* Disable EEH functionality */ @@ -490,6 +501,29 @@ struct vfio_eeh_pe_op { #define VFIO_EEH_PE_RESET_HOT6 /* Assert hot reset */ #define VFIO_EEH_PE_RESET_FUNDAMENTAL7 /* Assert fundamental reset */ #define VFIO_EEH_PE_CONFIGURE8 /* PE configuration */ +#define VFIO_EEH_PE_INJECT_ERR 9 /* Inject EEH error */ +#define VFIO_EEH_ERR_TYPE_320 /* 32-bits EEH error type*/ +#define VFIO_EEH_ERR_TYPE_641 /* 64-bits EEH error type*/ +#define VFIO_EEH_ERR_FUNC_LD_MEM_ADDR 0 /* Memory load */ +#define VFIO_EEH_ERR_FUNC_LD_MEM_DATA 1 +#define VFIO_EEH_ERR_FUNC_LD_IO_ADDR2 /* IO load */ +#define VFIO_EEH_ERR_FUNC_LD_IO_DATA3 +#define VFIO_EEH_ERR_FUNC_LD_CFG_ADDR 4 /* Config load */ +#define VFIO_EEH_ERR_FUNC_LD_CFG_DATA 5 +#define VFIO_EEH_ERR_FUNC_ST_MEM_ADDR 6 /* Memory store */ +#define