Re: [PATCH 09/11] kvm/x86: distingiush hyper-v guest crash notification
On 22/06/2015 18:05, Denis V. Lunev wrote: From: Andrey Smetanin asmeta...@virtuozzo.com Previous patches allowes userspace to setup Hyper-V crash ctl msr. This msr should expose HV_X64_MSR_CRASH_CTL_NOTIFY value to Hyper-V guest to allow to send crash data. Unfortunately Hyper-V guest notifies hardware about crash by writing the same HV_X64_MSR_CRASH_CTL_NOTIFY value into crash ctl msr. Thus both user space and guest writes inside ctl msr the same value and this patch distingiush the moment of actual guest crash by checking host initiated value from msr info. Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Paolo Bonzini pbonz...@redhat.com CC: Gleb Natapov g...@kernel.org --- arch/x86/kvm/hyperv.c | 17 + arch/x86/kvm/hyperv.h | 2 +- arch/x86/kvm/x86.c| 3 ++- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 6b18015..f49502a 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -54,12 +54,12 @@ static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) return 0; } -static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data) +static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host) { struct kvm_arch_hyperv *hv = vcpu-kvm-arch.hyperv; hv-hv_crash_ctl = data; - if ((data HV_X64_MSR_CRASH_CTL_NOTIFY)) { + if ((data HV_X64_MSR_CRASH_CTL_NOTIFY) !host) { vcpu_debug(vcpu, hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n, hv-hv_crash_param[0], hv-hv_crash_param[1], @@ -99,7 +99,8 @@ static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, return 0; } -static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) +static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, + u32 msr, u64 data, bool host) { struct kvm *kvm = vcpu-kvm; struct kvm_arch_hyperv *hv = kvm-arch.hyperv; @@ -156,7 +157,7 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) msr - HV_X64_MSR_CRASH_P0, data); case HV_X64_MSR_CRASH_CTL: - return kvm_hv_msr_set_crash_ctl(vcpu, data); + return kvm_hv_msr_set_crash_ctl(vcpu, data, host); default: vcpu_unimpl(vcpu, Hyper-V unimpl wrmsr: 0x%x data 0x%llx\n, msr, data); @@ -165,7 +166,7 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) return 0; } -static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) +static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) { struct kvm_vcpu_arch_hyperv *hv = vcpu-arch.hyperv; @@ -278,17 +279,17 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) return 0; } -int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) +int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) { if (kvm_hv_msr_partition_wide(msr)) { int r; mutex_lock(vcpu-kvm-lock); - r = kvm_hv_set_msr_pw(vcpu, msr, data); + r = kvm_hv_set_msr_pw(vcpu, msr, data, host); mutex_unlock(vcpu-kvm-lock); return r; } else - return kvm_hv_set_msr(vcpu, msr, data); + return kvm_hv_set_msr(vcpu, msr, data, host); } int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 39aee93..dc49527 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -22,7 +22,7 @@ #ifndef __ARCH_X86_KVM_HYPERV_H__ #define __ARCH_X86_KVM_HYPERV_H__ -int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); +int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_hv_hypercall_enabled(struct kvm *kvm); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 111fa83..db4eecb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2210,7 +2210,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: - return kvm_hv_set_msr_common(vcpu, msr, data); + return kvm_hv_set_msr_common(vcpu, msr, data, + msr_info-host_initiated); break; case MSR_IA32_BBL_CR_CTL3: /* Drop writes to this legacy MSR -- see rdmsr This has to be squashed
Re: [PATCH 10/11] qemu/kvm: kvm hyper-v based guest crash event handling
On 22/06/2015 18:05, Denis V. Lunev wrote: +void qemu_system_guest_panicked(void) +{ +qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, error_abort); +vm_stop(RUN_STATE_GUEST_PANICKED); +} + Please call this in pvpanic.c and target-s390x/kvm.c (replacing the guest_panicked function in that file there) as well. @@ -2540,6 +2573,53 @@ static bool host_supports_vmx(void) return ecx CPUID_EXT_VMX; } +int kvm_arch_handle_hv_crash(CPUState *cs) +{ +X86CPU *cpu = X86_CPU(cs); +CPUX86State *env = cpu-env; +struct { +struct kvm_msrs info; +struct kvm_msr_entry entries[HV_X64_MSR_CRASH_PARAMS + 1]; +} msr_data; +struct kvm_msr_entry *msrs = msr_data.entries; +int ret, n, i; + +if (!has_msr_hv_crash) { +return -EINVAL; +} + +for (n = 0; n HV_X64_MSR_CRASH_PARAMS; n++) { +msrs[n].index = HV_X64_MSR_CRASH_P0 + n; +} + +msrs[n++].index = HV_X64_MSR_CRASH_CTL; +msr_data.info = (struct kvm_msrs) { +.nmsrs = n, +}; + +ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, msr_data); +if (ret 0) { +return ret; +} + +for (i = 0; i ret; i++) { +uint32_t index = msrs[i].index; + +switch (index) { +case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: +env-msr_hv_crash_prm[index - HV_X64_MSR_CRASH_P0] = msrs[i].data; +break; +case HV_X64_MSR_CRASH_CTL: +env-msr_hv_crash_ctl = msrs[i].data; +break; +default: +break; +} +} + +return 0; +} + Is this necessary? The call to cpu_synchronize_all_states in qemu_savevm_state_complete should be enough. If necessary, you can call it from qemu_system_guest_panicked instead of special casing the crash MSRs here. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH 11/11] qemu/kvm: mark in cpu state that hyper-v crash occured
On 22/06/15 19:33, Andreas Färber wrote: Am 22.06.2015 um 18:27 schrieb Paolo Bonzini: On the other hand, I wonder if current_cpu is available in qemu_system_guest_panicked. If so, you could add the field to the generic CPUState struct and migrate it as a subsection of vmstate_cpu_common. Hm, not sure whether it is. Would that work with the two ways we use vmstate_cpu_common though? I.e., can a nested VMState struct (VMSTATE_CPU()) have subsections? Regards, Andreas we'd better squash to avoid troubles. Any other issues? -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH] MAINTAINERS: Add vfio-platform sub-maintainer
Acked-by: Baptiste Reynal b.rey...@virtualopensystems.com On Thu, Jun 18, 2015 at 8:06 PM, Alex Williamson alex.william...@redhat.com wrote: Add Baptiste Reynal as the VFIO platform driver sub-maintainer. Signed-off-by: Alex Williamson alex.william...@redhat.com Cc: Baptiste Reynal b.rey...@virtualopensystems.com --- MAINTAINERS |6 ++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index d8afd29..c6bf7f6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10545,6 +10545,12 @@ F: drivers/vfio/ F: include/linux/vfio.h F: include/uapi/linux/vfio.h +VFIO PLATFORM DRIVER +M: Baptiste Reynal b.rey...@virtualopensystems.com +L: kvm@vger.kernel.org +S: Maintained +F: drivers/vfio/platform/ + VIDEOBUF2 FRAMEWORK M: Pawel Osciak pa...@osciak.com M: Marek Szyprowski m.szyprow...@samsung.com -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH 10/11] qemu/kvm: kvm hyper-v based guest crash event handling
Another one... On 22/06/2015 18:05, Denis V. Lunev wrote: +case KVM_SYSTEM_EVENT_CRASH: +if (run-system_event.flags KVM_SYSTEM_EVENT_FL_HV_CRASH) { +kvm_arch_handle_hv_crash(cpu); +} +qemu_system_guest_panicked(); Please call kvm_arch_handle_crash(cpu); qemu_system_guest_panicked(); here, and check the HV_CRASH flag inside x86 specific code. Paolo +ret = 0; +break; -- To unsubscribe from this list: send the line unsubscribe kvm in
KVM call for aghenda for 2015-06-23 (late call)
Hi Please, send any topic that you are interested in covering. Call details: By popular demand, a google calendar public entry with it https://www.google.com/calendar/embed?src=dG9iMXRqcXAzN3Y4ZXZwNzRoMHE4a3BqcXNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ (Let me know if you have any problems with the calendar entry. I just gave up about getting right at the same time CEST, CET, EDT and DST). If you need phone number details, contact me privately Thanks, Juan. -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [RFC 1/6] KVM: api: add kvm_irq_routing_extended_msi
Hi Eric, On 18/06/15 18:40, Eric Auger wrote: On ARM, the MSI msg (address and data) comes along with out-of-band device ID information. The device ID encodes the device that composes the MSI msg. Let's create a new routing entry structure that enables to encode that information on top of standard MSI message Signed-off-by: Eric Auger eric.au...@linaro.org --- Documentation/virtual/kvm/api.txt | 9 + include/uapi/linux/kvm.h | 9 + 2 files changed, 18 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index d20fd94..bcec91e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1419,6 +1419,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing_irqchip irqchip; struct kvm_irq_routing_msi msi; struct kvm_irq_routing_s390_adapter adapter; + struct kvm_irq_routing_extended_msi ext_msi; __u32 pad[8]; } u; }; @@ -1427,6 +1428,7 @@ struct kvm_irq_routing_entry { #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 +#define KVM_IRQ_ROUTING_EXTENDED_MSI 4 No flags are specified so far, the corresponding field must be set to zero. @@ -1442,6 +1444,13 @@ struct kvm_irq_routing_msi { __u32 pad; }; +struct kvm_irq_routing_extended_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + __u32 devid; +}; + I wonder if we could re-use the existing struct kvm_irq_routing_msi, which has an u32 pad field already. Since we use a different type number, this should not break. Admittedly not the nicest thing, but reduces interface bloat: struct kvm_irq_routing_msi { __u32 address_lo; __u32 address_hi; __u32 data; union { __u32 pad; __u32 devid; }; }; to maintain backward compatibility on the userland source level. Cheers, Andre. struct kvm_irq_routing_s390_adapter { __u64 ind_addr; __u64 summary_addr; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 2a23705..e3f65a0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -829,6 +829,13 @@ struct kvm_irq_routing_msi { __u32 pad; }; +struct kvm_irq_routing_extended_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + __u32 devid; +}; + struct kvm_irq_routing_s390_adapter { __u64 ind_addr; __u64 summary_addr; @@ -841,6 +848,7 @@ struct kvm_irq_routing_s390_adapter { #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 +#define KVM_IRQ_ROUTING_EXTENDED_MSI 4 struct kvm_irq_routing_entry { __u32 gsi; @@ -851,6 +859,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing_irqchip irqchip; struct kvm_irq_routing_msi msi; struct kvm_irq_routing_s390_adapter adapter; + struct kvm_irq_routing_extended_msi ext_msi; __u32 pad[8]; } u; }; -- To unsubscribe from this list: send the line unsubscribe kvm in
[3.16.y-ckt stable] Patch MIPS: KVM: Do not sign extend on unsigned MMIO load has been added to staging queue
This is a note to let you know that I have just added a patch titled MIPS: KVM: Do not sign extend on unsigned MMIO load to the linux-3.16.y-queue branch of the 3.16.y-ckt extended stable tree which can be found at: http://kernel.ubuntu.com/git/ubuntu/linux.git/log/?h=linux-3.16.y-queue This patch is scheduled to be released in version 3.16.7-ckt14. If you, or anyone else, feels it should not be added to this tree, please reply to this email. For more information about the 3.16.y-ckt tree, see https://wiki.ubuntu.com/Kernel/Dev/ExtendedStable Thanks. -Luis -- From 4c4ae634491a443d4ec81217de248b194be8ad7a Mon Sep 17 00:00:00 2001 From: Nicholas Mc Guire hof...@osadl.org Date: Thu, 7 May 2015 14:47:50 +0200 Subject: MIPS: KVM: Do not sign extend on unsigned MMIO load commit ed9244e6c534612d2b5ae47feab2f55a0d4b4ced upstream. Fix possible unintended sign extension in unsigned MMIO loads by casting to uint16_t in the case of mmio_needed != 2. Signed-off-by: Nicholas Mc Guire hof...@osadl.org Reviewed-by: James Hogan james.ho...@imgtec.com Tested-by: James Hogan james.ho...@imgtec.com Cc: Gleb Natapov g...@kernel.org Cc: Paolo Bonzini pbonz...@redhat.com Cc: kvm@vger.kernel.org Cc: linux-m...@linux-mips.org Cc: linux-ker...@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/9985/ Signed-off-by: Ralf Baechle r...@linux-mips.org [ luis: backported to 3.16: - file rename: emulate.c - kvm_mips_emul.c ] Signed-off-by: Luis Henriques luis.henriq...@canonical.com --- arch/mips/kvm/kvm_mips_emul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kvm/kvm_mips_emul.c b/arch/mips/kvm/kvm_mips_emul.c index 2071472bc3c4..18b4e2fdae33 100644 --- a/arch/mips/kvm/kvm_mips_emul.c +++ b/arch/mips/kvm/kvm_mips_emul.c @@ -2130,7 +2130,7 @@ kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) if (vcpu-mmio_needed == 2) *gpr = *(int16_t *) run-mmio.data; else - *gpr = *(int16_t *) run-mmio.data; + *gpr = *(uint16_t *)run-mmio.data; break; case 1: -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH 03/11] kvm: add hyper-v crash msrs constants
From: Andrey Smetanin asmeta...@virtuozzo.com Added Hyper-V crash msrs HV_X64_MSR_CRASH* constants. Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Paolo Bonzini pbonz...@redhat.com CC: Gleb Natapov g...@kernel.org --- arch/x86/include/uapi/asm/hyperv.h | 15 +++ 1 file changed, 15 insertions(+) diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index ce6068d..a1be4ba 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -199,6 +199,21 @@ #define HV_X64_MSR_STIMER3_CONFIG 0x40B6 #define HV_X64_MSR_STIMER3_COUNT 0x40B7 +/* Hypev-V guest crash notification MSR's */ +#define HV_X64_MSR_CRASH_P00x4100 +#define HV_X64_MSR_CRASH_P10x4101 +#define HV_X64_MSR_CRASH_P20x4102 +#define HV_X64_MSR_CRASH_P30x4103 +#define HV_X64_MSR_CRASH_P40x4104 +#define HV_X64_MSR_CRASH_CTL 0x4105 +#define HV_X64_MSR_CRASH_CTL_NOTIFY(1ULL 63) +#define HV_X64_MSR_CRASH_CTL_CONTENTS \ + (HV_X64_MSR_CRASH_CTL_NOTIFY) + +#define HV_X64_MSR_CRASH_PARAMS\ + (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) + + #define HV_X64_MSR_HYPERCALL_ENABLE0x0001 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT12 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH 02/11] kvm: introduce vcpu_debug = kvm_debug + vcpu context
From: Andrey Smetanin asmeta...@virtuozzo.com vcpu_debug is a useful macro like kvm_debug and additionally includes vcpu context into output. Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Paolo Bonzini pbonz...@redhat.com CC: Gleb Natapov g...@kernel.org --- include/linux/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ad45054..7ee3a90 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -411,6 +411,9 @@ struct kvm { #define vcpu_unimpl(vcpu, fmt, ...)\ kvm_pr_unimpl(vcpu%i fmt, (vcpu)-vcpu_id, ## __VA_ARGS__) +#define vcpu_debug(vcpu, fmt, ...) \ + kvm_debug(vcpu%i fmt, (vcpu)-vcpu_id, ## __VA_ARGS__) + static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { smp_rmb(); -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH 05/11] kvm: added KVM_REQ_HV_CRASH value to notify qemu about Hyper-V crash
From: Andrey Smetanin asmeta...@virtuozzo.com Added KVM_REQ_HV_CRASH - vcpu request used for notify user space(QEMU) about Hyper-v crash. Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Paolo Bonzini pbonz...@redhat.com CC: Gleb Natapov g...@kernel.org --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7ee3a90..f1a3977b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -134,6 +134,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_ENABLE_IBS23 #define KVM_REQ_DISABLE_IBS 24 #define KVM_REQ_APIC_PAGE_RELOAD 25 +#define KVM_REQ_HV_CRASH 26 #define KVM_USERSPACE_IRQ_SOURCE_ID0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH 11/11] qemu/kvm: mark in cpu state that hyper-v crash occured
From: Andrey Smetanin asmeta...@virtuozzo.com It's usually impossible to understand from Hyper-V crash msr's that crash happened because ctl msr always contains the same value HV_X64_MSR_CRASH_CTL_NOTIFY. To solve it add a particalar value hv_crash_occurred inside CPU state and migrate this value with crash msr's. Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Paolo Bonzini pbonz...@redhat.com CC: Andreas Färber afaer...@suse.de --- target-i386/cpu.h | 1 + target-i386/kvm.c | 1 + target-i386/machine.c | 1 + 3 files changed, 3 insertions(+) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 474a93e..2958cdc 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -907,6 +907,7 @@ typedef struct CPUX86State { uint64_t msr_hv_tsc; uint64_t msr_hv_crash_prm[HV_X64_MSR_CRASH_PARAMS]; uint64_t msr_hv_crash_ctl; +uint8_t hv_crash_occurred; /* exception/interrupt handling */ int error_code; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 690677b..2c8d00f 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -2616,6 +2616,7 @@ int kvm_arch_handle_hv_crash(CPUState *cs) break; } } +env-hv_crash_occurred = 1; return 0; } diff --git a/target-i386/machine.c b/target-i386/machine.c index 15b3f31..4f72ba8 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -679,6 +679,7 @@ static const VMStateDescription vmstate_msr_hyperv_crash = { VMSTATE_UINT64(env.msr_hv_crash_ctl, X86CPU), VMSTATE_UINT64_ARRAY(env.msr_hv_crash_prm, X86CPU, HV_X64_MSR_CRASH_PARAMS), +VMSTATE_UINT8(env.hv_crash_occurred, X86CPU), VMSTATE_END_OF_LIST() } }; -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH 09/11] kvm/x86: distingiush hyper-v guest crash notification
From: Andrey Smetanin asmeta...@virtuozzo.com Previous patches allowes userspace to setup Hyper-V crash ctl msr. This msr should expose HV_X64_MSR_CRASH_CTL_NOTIFY value to Hyper-V guest to allow to send crash data. Unfortunately Hyper-V guest notifies hardware about crash by writing the same HV_X64_MSR_CRASH_CTL_NOTIFY value into crash ctl msr. Thus both user space and guest writes inside ctl msr the same value and this patch distingiush the moment of actual guest crash by checking host initiated value from msr info. Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Paolo Bonzini pbonz...@redhat.com CC: Gleb Natapov g...@kernel.org --- arch/x86/kvm/hyperv.c | 17 + arch/x86/kvm/hyperv.h | 2 +- arch/x86/kvm/x86.c| 3 ++- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 6b18015..f49502a 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -54,12 +54,12 @@ static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) return 0; } -static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data) +static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host) { struct kvm_arch_hyperv *hv = vcpu-kvm-arch.hyperv; hv-hv_crash_ctl = data; - if ((data HV_X64_MSR_CRASH_CTL_NOTIFY)) { + if ((data HV_X64_MSR_CRASH_CTL_NOTIFY) !host) { vcpu_debug(vcpu, hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n, hv-hv_crash_param[0], hv-hv_crash_param[1], @@ -99,7 +99,8 @@ static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, return 0; } -static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) +static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, +u32 msr, u64 data, bool host) { struct kvm *kvm = vcpu-kvm; struct kvm_arch_hyperv *hv = kvm-arch.hyperv; @@ -156,7 +157,7 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) msr - HV_X64_MSR_CRASH_P0, data); case HV_X64_MSR_CRASH_CTL: - return kvm_hv_msr_set_crash_ctl(vcpu, data); + return kvm_hv_msr_set_crash_ctl(vcpu, data, host); default: vcpu_unimpl(vcpu, Hyper-V unimpl wrmsr: 0x%x data 0x%llx\n, msr, data); @@ -165,7 +166,7 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) return 0; } -static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) +static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) { struct kvm_vcpu_arch_hyperv *hv = vcpu-arch.hyperv; @@ -278,17 +279,17 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) return 0; } -int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) +int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) { if (kvm_hv_msr_partition_wide(msr)) { int r; mutex_lock(vcpu-kvm-lock); - r = kvm_hv_set_msr_pw(vcpu, msr, data); + r = kvm_hv_set_msr_pw(vcpu, msr, data, host); mutex_unlock(vcpu-kvm-lock); return r; } else - return kvm_hv_set_msr(vcpu, msr, data); + return kvm_hv_set_msr(vcpu, msr, data, host); } int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 39aee93..dc49527 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -22,7 +22,7 @@ #ifndef __ARCH_X86_KVM_HYPERV_H__ #define __ARCH_X86_KVM_HYPERV_H__ -int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); +int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_hv_hypercall_enabled(struct kvm *kvm); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 111fa83..db4eecb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2210,7 +2210,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: - return kvm_hv_set_msr_common(vcpu, msr, data); + return kvm_hv_set_msr_common(vcpu, msr, data, +msr_info-host_initiated); break; case MSR_IA32_BBL_CR_CTL3: /* Drop writes to this legacy MSR -- see rdmsr -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH 08/11] kvm/x86: add sending hyper-v crash notification to user space
From: Andrey Smetanin asmeta...@virtuozzo.com Sending of notification is done by exiting vcpu to user space if KVM_REQ_HV_CRASH is set for vcpu. kvm_run structure will contain system_event with type equals to KVM_SYSTEM_EVENT_CRASH and flag KVM_SYSTEM_EVENT_FL_HV_CRASH to clarify that the crash occures inside Hyper-V based guest. Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Paolo Bonzini pbonz...@redhat.com CC: Gleb Natapov g...@kernel.org --- arch/x86/kvm/x86.c | 8 include/uapi/linux/kvm.h | 2 ++ 2 files changed, 10 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2046b78..111fa83 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6027,6 +6027,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu_scan_ioapic(vcpu); if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) kvm_vcpu_reload_apic_access_page(vcpu); + if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) { + vcpu-run-exit_reason = KVM_EXIT_SYSTEM_EVENT; + vcpu-run-system_event.type = KVM_SYSTEM_EVENT_CRASH; + vcpu-run-system_event.flags = + KVM_SYSTEM_EVENT_FL_HV_CRASH; + r = 0; + goto out; + } } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 4b60056..22b6cca 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -317,6 +317,8 @@ struct kvm_run { struct { #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 +#define KVM_SYSTEM_EVENT_CRASH 3 +#define KVM_SYSTEM_EVENT_FL_HV_CRASH(1ULL 0) __u32 type; __u64 flags; } system_event; -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH 01/11] kvm/x86: move Hyper-V MSR's/hypercall code into hyperv.c file
From: Andrey Smetanin asmeta...@virtuozzo.com This patch introduces Hyper-V related source code file - hyperv.c and per vm and per vcpu hyperv context structures. All Hyper-V MSR's and hypercall code moved into hyperv.c. All hyper-v kvm/vcpu fields moved into appropriate hyperv context structures. Copyrights and authors information copied from x86.c to hyperv.c. Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Paolo Bonzini pbonz...@redhat.com CC: Gleb Natapov g...@kernel.org --- arch/x86/include/asm/kvm_host.h | 20 ++- arch/x86/kvm/Makefile | 2 +- arch/x86/kvm/hyperv.c | 303 arch/x86/kvm/hyperv.h | 31 arch/x86/kvm/lapic.h| 2 +- arch/x86/kvm/x86.c | 263 +- arch/x86/kvm/x86.h | 5 + 7 files changed, 358 insertions(+), 268 deletions(-) create mode 100644 arch/x86/kvm/hyperv.c create mode 100644 arch/x86/kvm/hyperv.h diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f4a555b..717a03c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -344,6 +344,11 @@ enum { KVM_DEBUGREG_RELOAD = 4, }; +/* Hyper-V per vcpu emulation context */ +struct kvm_vcpu_arch_hyperv { + u64 hv_vapic; +}; + struct kvm_vcpu_arch { /* * rip and regs accesses must go through @@ -498,8 +503,7 @@ struct kvm_vcpu_arch { /* used for guest single stepping over the given code position */ unsigned long singlestep_rip; - /* fields used by HYPER-V emulation */ - u64 hv_vapic; + struct kvm_vcpu_arch_hyperv hyperv; cpumask_var_t wbinvd_dirty_mask; @@ -570,6 +574,13 @@ struct kvm_apic_map { struct kvm_lapic *logical_map[16][16]; }; +/* Hyper-V emulation context */ +struct kvm_arch_hyperv { + u64 hv_guest_os_id; + u64 hv_hypercall; + u64 hv_tsc_page; +}; + struct kvm_arch { unsigned int n_used_mmu_pages; unsigned int n_requested_mmu_pages; @@ -627,10 +638,7 @@ struct kvm_arch { /* reads protected by irq_srcu, writes by irq_lock */ struct hlist_head mask_notifier_list; - /* fields used by HYPER-V emulation */ - u64 hv_guest_os_id; - u64 hv_hypercall; - u64 hv_tsc_page; + struct kvm_arch_hyperv hyperv; #ifdef CONFIG_KVM_MMU_AUDIT int audit_point; diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 16e8f96..944c8c8 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -12,7 +12,7 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ - i8254.o ioapic.o irq_comm.o cpuid.o pmu.o + i8254.o ioapic.o irq_comm.o cpuid.o pmu.o hyperv.o kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)+= assigned-dev.o iommu.o kvm-intel-y+= vmx.o kvm-amd-y += svm.o diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c new file mode 100644 index 000..0d24d9a --- /dev/null +++ b/arch/x86/kvm/hyperv.c @@ -0,0 +1,303 @@ +/* + * KVM Microsoft Hyper-V emulation + * + * Copyright (C) 2006 Qumranet, Inc. + * Copyright (C) 2008 Qumranet, Inc. + * Copyright IBM Corporation, 2008 + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * Copyright (C) 2015 Andrey Smetanin asmeta...@virtuozzo.com + * + * Authors: + * Avi Kivity a...@qumranet.com + * Yaniv Kamay ya...@qumranet.com + * Amit Shahamit.s...@qumranet.com + * Ben-Ami Yassour ben...@il.ibm.com + * Andrey Smetanin asmeta...@virtuozzo.com + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include x86.h +#include lapic.h +#include hyperv.h + +#include linux/kvm_host.h +#include trace/events/kvm.h + +#include trace.h + +static bool kvm_hv_msr_partition_wide(u32 msr) +{ + bool r = false; + + switch (msr) { + case HV_X64_MSR_GUEST_OS_ID: + case HV_X64_MSR_HYPERCALL: + case HV_X64_MSR_REFERENCE_TSC: + case HV_X64_MSR_TIME_REF_COUNT: + r = true; + break; + } + + return r; +} + +static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) +{ + struct kvm *kvm = vcpu-kvm; + struct kvm_arch_hyperv *hv = kvm-arch.hyperv; + + switch (msr) { + case HV_X64_MSR_GUEST_OS_ID: + hv-hv_guest_os_id = data; + /* setting guest os id to zero disables hypercall page */ + if (!hv-hv_guest_os_id) + hv-hv_hypercall = ~HV_X64_MSR_HYPERCALL_ENABLE; + break; + case HV_X64_MSR_HYPERCALL: { + u64 gfn; + unsigned long addr;
[PATCH 07/11] kvm/x86: added hyper-v crash data and ctl msr's get/set'ers
From: Andrey Smetanin asmeta...@virtuozzo.com Added hyper-v crash msr's(HV_X64_MSR_CRASH*) data and control geters and setters. Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Paolo Bonzini pbonz...@redhat.com CC: Gleb Natapov g...@kernel.org --- arch/x86/kvm/hyperv.c | 66 +++ arch/x86/kvm/x86.c| 4 2 files changed, 70 insertions(+) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index f65fb622..0a7d373 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -46,6 +46,59 @@ static bool kvm_hv_msr_partition_wide(u32 msr) return r; } +static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) +{ + struct kvm_arch_hyperv *hv = vcpu-kvm-arch.hyperv; + + *pdata = hv-hv_crash_ctl; + return 0; +} + +static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data) +{ + struct kvm_arch_hyperv *hv = vcpu-kvm-arch.hyperv; + + hv-hv_crash_ctl = data; + if ((data HV_X64_MSR_CRASH_CTL_NOTIFY)) { + vcpu_debug(vcpu, hv crash (0x%llx 0x%llx 0x%llx 0x%llx + 0x%llx)\n, hv-hv_crash_param[0], + hv-hv_crash_param[1], + hv-hv_crash_param[2], + hv-hv_crash_param[3], + hv-hv_crash_param[4]); + + /* Send notification about crash to user space */ + kvm_make_request(KVM_REQ_HV_CRASH, vcpu); + return 0; + } + + return 0; +} + +static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, +u32 index, u64 data) +{ + struct kvm_arch_hyperv *hv = vcpu-kvm-arch.hyperv; + + if (WARN_ON_ONCE(index = ARRAY_SIZE(hv-hv_crash_param))) + return -EINVAL; + + hv-hv_crash_param[index] = data; + return 0; +} + +static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, +u32 index, u64 *pdata) +{ + struct kvm_arch_hyperv *hv = vcpu-kvm-arch.hyperv; + + if (WARN_ON_ONCE(index = ARRAY_SIZE(hv-hv_crash_param))) + return -EINVAL; + + *pdata = hv-hv_crash_param[index]; + return 0; +} + static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) { struct kvm *kvm = vcpu-kvm; @@ -98,6 +152,12 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) mark_page_dirty(kvm, gfn); break; } + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + return kvm_hv_msr_set_crash_data(vcpu, +msr - HV_X64_MSR_CRASH_P0, +data); + case HV_X64_MSR_CRASH_CTL: + return kvm_hv_msr_set_crash_ctl(vcpu, data); default: vcpu_unimpl(vcpu, Hyper-V unimpl wrmsr: 0x%x data 0x%llx\n, msr, data); @@ -170,6 +230,12 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_REFERENCE_TSC: data = hv-hv_tsc_page; break; + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + return kvm_hv_msr_get_crash_data(vcpu, +msr - HV_X64_MSR_CRASH_P0, +pdata); + case HV_X64_MSR_CRASH_CTL: + return kvm_hv_msr_get_crash_ctl(vcpu, pdata); default: vcpu_unimpl(vcpu, Hyper-V unhandled rdmsr: 0x%x\n, msr); return 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2755c37..2046b78 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2208,6 +2208,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) */ break; case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + case HV_X64_MSR_CRASH_CTL: return kvm_hv_set_msr_common(vcpu, msr, data); break; case MSR_IA32_BBL_CR_CTL3: @@ -2451,6 +2453,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) data = 0x2000; break; case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + case HV_X64_MSR_CRASH_CTL: return kvm_hv_get_msr_common(vcpu, msr, pdata); break; case MSR_IA32_BBL_CR_CTL3: -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH 04/11] kvm/x86: added hyper-v crash msrs into kvm hyperv context
From: Andrey Smetanin asmeta...@virtuozzo.com Added kvm hyperv context hv crash variables as storage of hyper-v crash msrs. Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Paolo Bonzini pbonz...@redhat.com CC: Gleb Natapov g...@kernel.org --- arch/x86/include/asm/kvm_host.h | 4 1 file changed, 4 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 717a03c..578816a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -579,6 +579,10 @@ struct kvm_arch_hyperv { u64 hv_guest_os_id; u64 hv_hypercall; u64 hv_tsc_page; + + /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ + u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; + u64 hv_crash_ctl; }; struct kvm_arch { -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH 10/11] qemu/kvm: kvm hyper-v based guest crash event handling
From: Andrey Smetanin asmeta...@virtuozzo.com KVM Hyper-V based guests can notify hypervisor about occurred guest crash. This patch does handling of KVM crash event by sending to libvirt guest panic event that allows to gather guest crash dump by QEMU/LIBVIRT. Add support of HV_X64_MSR_CRASH_P0-P4, HV_X64_MSR_CRASH_CTL msrs. The idea is to provide functionality equal to pvpanic device without QEMU guest agent for Windows. The idea is borrowed from Linux HyperV bus driver and validated against Windows 2k12. Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Paolo Bonzini pbonz...@redhat.com CC: Andreas Färber afaer...@suse.de --- include/sysemu/kvm.h | 2 ++ include/sysemu/sysemu.h| 1 + kvm-all.c | 7 linux-headers/asm-x86/hyperv.h | 16 + linux-headers/linux/kvm.h | 2 ++ target-arm/kvm.c | 5 +++ target-i386/cpu-qom.h | 1 + target-i386/cpu.c | 1 + target-i386/cpu.h | 3 ++ target-i386/kvm.c | 80 ++ target-i386/machine.c | 23 target-mips/kvm.c | 5 +++ target-ppc/kvm.c | 5 +++ target-s390x/kvm.c | 5 +++ vl.c | 6 15 files changed, 162 insertions(+) diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index f459fbd..3c0fa02 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -257,6 +257,8 @@ extern const KVMCapabilityInfo kvm_arch_required_capabilities[]; void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run); MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run); +int kvm_arch_handle_hv_crash(CPUState *cpu); + int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run); int kvm_arch_process_async_events(CPUState *cpu); diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index df80951..70164c9 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -68,6 +68,7 @@ int qemu_reset_requested_get(void); void qemu_system_killed(int signal, pid_t pid); void qemu_devices_reset(void); void qemu_system_reset(bool report); +void qemu_system_guest_panicked(void); void qemu_add_exit_notifier(Notifier *notify); void qemu_remove_exit_notifier(Notifier *notify); diff --git a/kvm-all.c b/kvm-all.c index 53e01d4..1528fb5 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1844,6 +1844,13 @@ int kvm_cpu_exec(CPUState *cpu) qemu_system_reset_request(); ret = EXCP_INTERRUPT; break; +case KVM_SYSTEM_EVENT_CRASH: +if (run-system_event.flags KVM_SYSTEM_EVENT_FL_HV_CRASH) { +kvm_arch_handle_hv_crash(cpu); +} +qemu_system_guest_panicked(); +ret = 0; +break; default: DPRINTF(kvm_arch_handle_exit\n); ret = kvm_arch_handle_exit(cpu, run); diff --git a/linux-headers/asm-x86/hyperv.h b/linux-headers/asm-x86/hyperv.h index ce6068d..aec7d27 100644 --- a/linux-headers/asm-x86/hyperv.h +++ b/linux-headers/asm-x86/hyperv.h @@ -108,6 +108,8 @@ #define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 4) /* Support for a virtual guest idle state is available */ #define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 5) +/* Guest crash data handler available */ +#define HV_X64_GUEST_CRASH_MSR_AVAILABLE (1 10) /* * Implementation recommendations. Indicates which behaviors the hypervisor @@ -199,6 +201,20 @@ #define HV_X64_MSR_STIMER3_CONFIG 0x40B6 #define HV_X64_MSR_STIMER3_COUNT 0x40B7 +/* Hypev-V guest crash notification MSR's */ +#define HV_X64_MSR_CRASH_P00x4100 +#define HV_X64_MSR_CRASH_P10x4101 +#define HV_X64_MSR_CRASH_P20x4102 +#define HV_X64_MSR_CRASH_P30x4103 +#define HV_X64_MSR_CRASH_P40x4104 +#define HV_X64_MSR_CRASH_CTL 0x4105 +#define HV_X64_MSR_CRASH_CTL_NOTIFY(1ULL 63) +#define HV_X64_MSR_CRASH_CTL_CONTENTS \ + (HV_X64_MSR_CRASH_CTL_NOTIFY) + +#define HV_X64_MSR_CRASH_PARAMS\ + (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) + #define HV_X64_MSR_HYPERCALL_ENABLE0x0001 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT12 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index fad9e5c..46cb7e0 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -317,6 +317,8 @@ struct kvm_run { struct { #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 +#define KVM_SYSTEM_EVENT_CRASH 3 +#define KVM_SYSTEM_EVENT_FL_HV_CRASH(1ULL
[PATCH 06/11] kvm/x86: mark hyper-v crash msrs as partition wide
From: Andrey Smetanin asmeta...@virtuozzo.com Hyper-V crash msr's a per VM, not per vcpu, so mark them as partition wide. Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Paolo Bonzini pbonz...@redhat.com CC: Gleb Natapov g...@kernel.org --- arch/x86/kvm/hyperv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 0d24d9a..f65fb622 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -37,6 +37,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) case HV_X64_MSR_HYPERCALL: case HV_X64_MSR_REFERENCE_TSC: case HV_X64_MSR_TIME_REF_COUNT: + case HV_X64_MSR_CRASH_CTL: + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: r = true; break; } -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH v2 0/11] HyperV equivalent of pvpanic driver
Windows 2012 guests can notify hypervisor about occurred guest crash (Windows bugcheck(BSOD)) by writing specific Hyper-V msrs. This patch does handling of this MSR's by KVM and sending notification to user space that allows to gather Windows guest crash dump by QEMU/LIBVIRT. The idea is to provide functionality equal to pvpanic device without QEMU guest agent for Windows. The idea is borrowed from Linux HyperV bus driver and validated against Windows 2k12. Changes from v1: * hyperv code move to hyperv.c * added read handlers of crash data msrs * added per vm and per cpu hyperv context structures * added saving crash msrs inside qemu cpu state * added qemu fetch and update of crash msrs * added qemu crash msrs store in cpu state and it's migration Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Gleb Natapov g...@kernel.org CC: Paolo Bonzini pbonz...@redhat.com -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH 11/11] qemu/kvm: mark in cpu state that hyper-v crash occured
On 22/06/2015 18:23, Andreas Färber wrote: @@ -679,6 +679,7 @@ static const VMStateDescription vmstate_msr_hyperv_crash = { VMSTATE_UINT64(env.msr_hv_crash_ctl, X86CPU), VMSTATE_UINT64_ARRAY(env.msr_hv_crash_prm, X86CPU, HV_X64_MSR_CRASH_PARAMS), +VMSTATE_UINT8(env.hv_crash_occurred, X86CPU), VMSTATE_END_OF_LIST() } }; This looks like a migration format breakage. You probably need to squash it with the preceding patch so that the cpu/msr_hyperv_crash subsection does not change in size between commits. Just incrementing the version is not an option for subsections, I think? We don't usually care about migration format within the same upstream release, but yes that would be better. On the other hand, I wonder if current_cpu is available in qemu_system_guest_panicked. If so, you could add the field to the generic CPUState struct and migrate it as a subsection of vmstate_cpu_common. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH 11/11] qemu/kvm: mark in cpu state that hyper-v crash occured
Am 22.06.2015 um 18:05 schrieb Denis V. Lunev: From: Andrey Smetanin asmeta...@virtuozzo.com It's usually impossible to understand from Hyper-V crash msr's that crash happened because ctl msr always contains the same value HV_X64_MSR_CRASH_CTL_NOTIFY. To solve it add a particalar value hv_crash_occurred inside CPU state and migrate this value with crash msr's. Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Paolo Bonzini pbonz...@redhat.com CC: Andreas Färber afaer...@suse.de --- [...] diff --git a/target-i386/machine.c b/target-i386/machine.c index 15b3f31..4f72ba8 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -679,6 +679,7 @@ static const VMStateDescription vmstate_msr_hyperv_crash = { VMSTATE_UINT64(env.msr_hv_crash_ctl, X86CPU), VMSTATE_UINT64_ARRAY(env.msr_hv_crash_prm, X86CPU, HV_X64_MSR_CRASH_PARAMS), +VMSTATE_UINT8(env.hv_crash_occurred, X86CPU), VMSTATE_END_OF_LIST() } }; This looks like a migration format breakage. You probably need to squash it with the preceding patch so that the cpu/msr_hyperv_crash subsection does not change in size between commits. Just incrementing the version is not an option for subsections, I think? Regards, Andreas -- SUSE Linux GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Felix Imendörffer, Jane Smithard, Dilip Upmanyu, Graham Norton; HRB 21284 (AG Nürnberg) -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH 11/11] qemu/kvm: mark in cpu state that hyper-v crash occured
Am 22.06.2015 um 18:27 schrieb Paolo Bonzini: On the other hand, I wonder if current_cpu is available in qemu_system_guest_panicked. If so, you could add the field to the generic CPUState struct and migrate it as a subsection of vmstate_cpu_common. Hm, not sure whether it is. Would that work with the two ways we use vmstate_cpu_common though? I.e., can a nested VMState struct (VMSTATE_CPU()) have subsections? Regards, Andreas -- SUSE Linux GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Felix Imendörffer, Jane Smithard, Dilip Upmanyu, Graham Norton; HRB 21284 (AG Nürnberg) -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH 11/11] qemu/kvm: mark in cpu state that hyper-v crash occured
On 22/06/2015 18:33, Andreas Färber wrote: On the other hand, I wonder if current_cpu is available in qemu_system_guest_panicked. If so, you could add the field to the generic CPUState struct and migrate it as a subsection of vmstate_cpu_common. Hm, not sure whether it is. It should be... Would that work with the two ways we use vmstate_cpu_common though? I.e., can a nested VMState struct (VMSTATE_CPU()) have subsections? Yes, it can. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: Hang on reboot in FreeBSD guest on Linux KVM host
John Nielsen li...@jnielsen.net writes: On Jun 17, 2014, at 10:48 AM, John Nielsen li...@jnielsen.net wrote: On Jun 17, 2014, at 12:05 AM, Gleb Natapov g...@kernel.org wrote: On Tue, Jun 17, 2014 at 06:21:23AM +0200, Paolo Bonzini wrote: Il 16/06/2014 18:47, John Nielsen ha scritto: On Jun 16, 2014, at 10:39 AM, Paolo Bonzini pbonz...@redhat.com wrote: Il 16/06/2014 18:09, John Nielsen ha scritto: The only substantial difference on the hardware side is the CPU. The hosts where the problem occurs use Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz, while the hosts that don't show the problem use the prior revision, Intel(R) Xeon(R) CPU E5-2650 0 @ 2.00GHz. Can you do grep . /sys/module/kvm_intel/parameters/* on both hosts please? No differences that I can see. Output below. Not really: Working host: Intel(R) Xeon(R) CPU E5-2650 0 @ 2.00GHz # grep . /sys/module/kvm_intel/parameters/* /sys/module/kvm_intel/parameters/enable_apicv:N Problem host: Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz # grep . /sys/module/kvm_intel/parameters/* /sys/module/kvm_intel/parameters/enable_apicv:Y So we have a clue. Let me study the code more, I'll try to get back with a suggestion. Wow, can't believe I missed that. Good catch! Does disabling apicv on E5-2650 v2 make reboot problem go away? Yes it does! # modprobe kvm_intel /sys/module/kvm_intel/parameters/enable_apicv:Y # /usr/bin/qemu-system-x86_64 -machine accel=kvm -name bsdtest -m 512 -smp 2,sockets=1,cores=1,threads=2 -drive file=./20140613_FreeBSD_9.2-RELEASE_ufs.qcow2,if=none,id=drive0,format=qcow2 -device virtio-blk-pci,scsi=off,drive=drive0 -vnc 0.0.0.0:0 -net none [problem occurs] # rmmod kvm_intel # modprobe kvm_intel enable_apicv=N /sys/module/kvm_intel/parameters/enable_apicv:N # /usr/bin/qemu-system-x86_64 -machine accel=kvm -name bsdtest -m 512 -smp 2,sockets=1,cores=1,threads=2 -drive file=./20140613_FreeBSD_9.2-RELEASE_ufs.qcow2,if=none,id=drive0,format=qcow2 -device virtio-blk-pci,scsi=off,drive=drive0 -vnc 0.0.0.0:0 -net none [problem does not occur] Thank you. This both narrows the problem considerably and provides an acceptable workaround. It would still be nice to see it fixed, of course. Keep me CC'ed as I'm not on the KVM list. I’m resurrecting an old thread since I haven’t heard anything in a while. Has anyone looked in to the KVM+apicv bug documented above as well as here: https://bugs.launchpad.net/qemu/+bug/1329956 ? If appropriate, where should I go to file a KVM bug (since this isn’t really Qemu’s problem)? Hi John, does this happen with the latest upstream kernel version ? Bandan Thanks, JN-- To unsubscribe from this list: send the line unsubscribe kvm in -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH 09/11] kvm/x86: distingiush hyper-v guest crash notification
On Mon, Jun 22, 2015 at 9:05 AM, Denis V. Lunev d...@openvz.org wrote: From: Andrey Smetanin asmeta...@virtuozzo.com Previous patches allowes userspace to setup Hyper-V crash ctl msr. This msr should expose HV_X64_MSR_CRASH_CTL_NOTIFY value to Hyper-V guest to allow to send crash data. Unfortunately Hyper-V guest notifies hardware about crash by writing the same HV_X64_MSR_CRASH_CTL_NOTIFY value into crash ctl msr. Thus both user space and guest writes inside ctl msr the same value and this patch distingiush the moment of actual guest crash by checking host initiated value from msr info. Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Paolo Bonzini pbonz...@redhat.com CC: Gleb Natapov g...@kernel.org --- arch/x86/kvm/hyperv.c | 17 + arch/x86/kvm/hyperv.h | 2 +- arch/x86/kvm/x86.c| 3 ++- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 6b18015..f49502a 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -54,12 +54,12 @@ static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) return 0; } -static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data) +static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host) { struct kvm_arch_hyperv *hv = vcpu-kvm-arch.hyperv; hv-hv_crash_ctl = data; Should we allow hv_crash_ctl to be set if !host? It's a small detail, but it doesn't seem like the guest should be able to disable crash actions that userspace has enabled in hv_crash_ctl. - if ((data HV_X64_MSR_CRASH_CTL_NOTIFY)) { + if ((data HV_X64_MSR_CRASH_CTL_NOTIFY) !host) { vcpu_debug(vcpu, hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n, hv-hv_crash_param[0], hv-hv_crash_param[1], @@ -99,7 +99,8 @@ static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, return 0; } -static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) +static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, +u32 msr, u64 data, bool host) { struct kvm *kvm = vcpu-kvm; struct kvm_arch_hyperv *hv = kvm-arch.hyperv; @@ -156,7 +157,7 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) msr - HV_X64_MSR_CRASH_P0, data); case HV_X64_MSR_CRASH_CTL: - return kvm_hv_msr_set_crash_ctl(vcpu, data); + return kvm_hv_msr_set_crash_ctl(vcpu, data, host); default: vcpu_unimpl(vcpu, Hyper-V unimpl wrmsr: 0x%x data 0x%llx\n, msr, data); @@ -165,7 +166,7 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) return 0; } -static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data) +static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) { struct kvm_vcpu_arch_hyperv *hv = vcpu-arch.hyperv; @@ -278,17 +279,17 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) return 0; } -int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) +int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) { if (kvm_hv_msr_partition_wide(msr)) { int r; mutex_lock(vcpu-kvm-lock); - r = kvm_hv_set_msr_pw(vcpu, msr, data); + r = kvm_hv_set_msr_pw(vcpu, msr, data, host); mutex_unlock(vcpu-kvm-lock); return r; } else - return kvm_hv_set_msr(vcpu, msr, data); + return kvm_hv_set_msr(vcpu, msr, data, host); } int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 39aee93..dc49527 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -22,7 +22,7 @@ #ifndef __ARCH_X86_KVM_HYPERV_H__ #define __ARCH_X86_KVM_HYPERV_H__ -int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data); +int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_hv_hypercall_enabled(struct kvm *kvm); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 111fa83..db4eecb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2210,7 +2210,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: - return kvm_hv_set_msr_common(vcpu, msr, data); +
Re: Hang on reboot in FreeBSD guest on Linux KVM host
On Jun 22, 2015, at 3:48 PM, Bandan Das b...@redhat.com wrote: John Nielsen li...@jnielsen.net writes: On Jun 17, 2014, at 10:48 AM, John Nielsen li...@jnielsen.net wrote: On Jun 17, 2014, at 12:05 AM, Gleb Natapov g...@kernel.org wrote: On Tue, Jun 17, 2014 at 06:21:23AM +0200, Paolo Bonzini wrote: Il 16/06/2014 18:47, John Nielsen ha scritto: On Jun 16, 2014, at 10:39 AM, Paolo Bonzini pbonz...@redhat.com wrote: Il 16/06/2014 18:09, John Nielsen ha scritto: The only substantial difference on the hardware side is the CPU. The hosts where the problem occurs use Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz, while the hosts that don't show the problem use the prior revision, Intel(R) Xeon(R) CPU E5-2650 0 @ 2.00GHz. Can you do grep . /sys/module/kvm_intel/parameters/* on both hosts please? No differences that I can see. Output below. Not really: Working host: Intel(R) Xeon(R) CPU E5-2650 0 @ 2.00GHz # grep . /sys/module/kvm_intel/parameters/* /sys/module/kvm_intel/parameters/enable_apicv:N Problem host: Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz # grep . /sys/module/kvm_intel/parameters/* /sys/module/kvm_intel/parameters/enable_apicv:Y So we have a clue. Let me study the code more, I'll try to get back with a suggestion. Wow, can't believe I missed that. Good catch! Does disabling apicv on E5-2650 v2 make reboot problem go away? Yes it does! # modprobe kvm_intel /sys/module/kvm_intel/parameters/enable_apicv:Y # /usr/bin/qemu-system-x86_64 -machine accel=kvm -name bsdtest -m 512 -smp 2,sockets=1,cores=1,threads=2 -drive file=./20140613_FreeBSD_9.2-RELEASE_ufs.qcow2,if=none,id=drive0,format=qcow2 -device virtio-blk-pci,scsi=off,drive=drive0 -vnc 0.0.0.0:0 -net none [problem occurs] # rmmod kvm_intel # modprobe kvm_intel enable_apicv=N /sys/module/kvm_intel/parameters/enable_apicv:N # /usr/bin/qemu-system-x86_64 -machine accel=kvm -name bsdtest -m 512 -smp 2,sockets=1,cores=1,threads=2 -drive file=./20140613_FreeBSD_9.2-RELEASE_ufs.qcow2,if=none,id=drive0,format=qcow2 -device virtio-blk-pci,scsi=off,drive=drive0 -vnc 0.0.0.0:0 -net none [problem does not occur] Thank you. This both narrows the problem considerably and provides an acceptable workaround. It would still be nice to see it fixed, of course. Keep me CC'ed as I'm not on the KVM list. I’m resurrecting an old thread since I haven’t heard anything in a while. Has anyone looked in to the KVM+apicv bug documented above as well as here: https://bugs.launchpad.net/qemu/+bug/1329956 ? If appropriate, where should I go to file a KVM bug (since this isn’t really Qemu’s problem)? Hi John, does this happen with the latest upstream kernel version ? I know for sure it happens with 4.0.4 and I’m not aware of any newer changes that would affect it.-- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH 07/11] kvm/x86: added hyper-v crash data and ctl msr's get/set'ers
On Mon, Jun 22, 2015 at 9:05 AM, Denis V. Lunev d...@openvz.org wrote: From: Andrey Smetanin asmeta...@virtuozzo.com Added hyper-v crash msr's(HV_X64_MSR_CRASH*) data and control geters and setters. Signed-off-by: Andrey Smetanin asmeta...@virtuozzo.com Signed-off-by: Denis V. Lunev d...@openvz.org CC: Paolo Bonzini pbonz...@redhat.com CC: Gleb Natapov g...@kernel.org --- arch/x86/kvm/hyperv.c | 66 +++ arch/x86/kvm/x86.c| 4 2 files changed, 70 insertions(+) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index f65fb622..0a7d373 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -46,6 +46,59 @@ static bool kvm_hv_msr_partition_wide(u32 msr) return r; } +static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) +{ + struct kvm_arch_hyperv *hv = vcpu-kvm-arch.hyperv; + + *pdata = hv-hv_crash_ctl; I see that this is implemented so that userspace controls the Hyper-V crash capabilities that are enabled - userspace must set HV_X64_MSR_CRASH_CTL_NOTIFY before the guest reads HV_X64_MSR_CRASH_CTL. I just want to check that you considered an alternative: a simpler implementation would have the crash capabilities statically defined by kvm, and HV_X64_MSR_CRASH_CTL_CONTENTS could just be returned here (and hv_crash_ctl could be removed from struct kvm_arch_hyperv). The current implementation is potentially more flexible but makes the MSR handling a little more awkward since the host_initiated bool needs to be passed around (patch 09). I guess either approach seems ok to me. Also, if this patchset is used then it looks like HV_X64_MSR_CRASH_CTL_CONTENTS can be removed. + return 0; +} + +static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data) +{ + struct kvm_arch_hyperv *hv = vcpu-kvm-arch.hyperv; + + hv-hv_crash_ctl = data; + if ((data HV_X64_MSR_CRASH_CTL_NOTIFY)) { + vcpu_debug(vcpu, hv crash (0x%llx 0x%llx 0x%llx 0x%llx + 0x%llx)\n, hv-hv_crash_param[0], + hv-hv_crash_param[1], + hv-hv_crash_param[2], + hv-hv_crash_param[3], + hv-hv_crash_param[4]); + + /* Send notification about crash to user space */ + kvm_make_request(KVM_REQ_HV_CRASH, vcpu); + return 0; Returning from here seems unnecessary - if more crash capabilities are added in the future, the guest might want to invoke multiple capabilities at once, so we'd want to check for those here too. + } + + return 0; +} + +static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, +u32 index, u64 data) +{ + struct kvm_arch_hyperv *hv = vcpu-kvm-arch.hyperv; + + if (WARN_ON_ONCE(index = ARRAY_SIZE(hv-hv_crash_param))) + return -EINVAL; + + hv-hv_crash_param[index] = data; + return 0; +} + +static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, +u32 index, u64 *pdata) +{ + struct kvm_arch_hyperv *hv = vcpu-kvm-arch.hyperv; + + if (WARN_ON_ONCE(index = ARRAY_SIZE(hv-hv_crash_param))) + return -EINVAL; + + *pdata = hv-hv_crash_param[index]; + return 0; +} + static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) { struct kvm *kvm = vcpu-kvm; @@ -98,6 +152,12 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) mark_page_dirty(kvm, gfn); break; } + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + return kvm_hv_msr_set_crash_data(vcpu, +msr - HV_X64_MSR_CRASH_P0, +data); + case HV_X64_MSR_CRASH_CTL: + return kvm_hv_msr_set_crash_ctl(vcpu, data); default: vcpu_unimpl(vcpu, Hyper-V unimpl wrmsr: 0x%x data 0x%llx\n, msr, data); @@ -170,6 +230,12 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_REFERENCE_TSC: data = hv-hv_tsc_page; break; + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + return kvm_hv_msr_get_crash_data(vcpu, +msr - HV_X64_MSR_CRASH_P0, +pdata); + case HV_X64_MSR_CRASH_CTL: + return kvm_hv_msr_get_crash_ctl(vcpu, pdata); default: vcpu_unimpl(vcpu, Hyper-V unhandled rdmsr: 0x%x\n, msr); return 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2755c37..2046b78 100644 --- a/arch/x86/kvm/x86.c +++
Re: [PATCH 3/5] vhost: support upto 509 memory regions
On Fri, 19 Jun 2015 18:33:39 +0200 Michael S. Tsirkin m...@redhat.com wrote: On Fri, Jun 19, 2015 at 06:26:27PM +0200, Paolo Bonzini wrote: On 19/06/2015 18:20, Michael S. Tsirkin wrote: We could, but I/O is just an example. It can be I/O, a network ring, whatever. We cannot audit all address_space_map uses. No need to audit them all: defer device_add using an hva range until address_space_unmap drops using hvas in range drops reference count to 0. That could be forever. You certainly don't want to lockup the monitor forever just because a device model isn't too friendly to memory hot-unplug. We can defer the addition, no need to lockup the monitor. That's why you need to audit them (also, it's perfectly in the device model's right to use address_space_unmap this way: it's the guest that's buggy and leaves a dangling reference to a region before unplugging it). Paolo Then maybe it's not too bad that the guest will crash because the memory was unmapped. So far HVA is unusable even if we will make this assumption and let guest crash. virt_net doesn't work with it anyway, translation of GPA to HVA for descriptors works as expected (correctly) but vhost+HVA hack backed virtio still can't send/received packets. That's why I prefer to merge kernel solution first as a stable and not introducing any issues solution. And work on userspace approach on top of that. Hopefully it could be done but we still would need time to iron out side effects/issues it causes or could cause so that fix became stable enough for production. -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH] arm64/kvm: Add generic v8 KVM target
On Fri, 19 Jun 2015 21:31:27 +0100 Timur Tabi ti...@codeaurora.org wrote: On 06/17/2015 04:00 AM, Suzuki K. Poulose wrote: genericv8_target_table); kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_GENERIC_V8, + genericv8_target_table); Shouldn't you also remove all of the previous lines that return genericv8_target_table? No. KVM_ARM_TARGET_* are part of the uapi, and existing userspace knows about them. You can't just drop them. What you *could* do would be to map the existing targets to the generic one in a way that leaves userspace blissfully unaware of the underlying change (for example, KVM_ARM_PREFERRED_TARGET should still return KVM_ARM_TARGET_XGENE_POTENZA on an XGene platform, and KVM_ARM_VCPU_INIT should still be accept it). What would be the gain of such a mapping? Added complexity, hardly anything else. We're much better off considering the generic target as a new one, both inside the kernel and in the view we export to userspace. Thanks, M. -- Jazz is not dead. It just smells funny. -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH v3 04/11] KVM: arm: common infrastructure for handling AArch32 CP14/CP15
As we're about to trap a bunch of CP14 registers, let's rework the CP15 handling so it can be generalized and work with multiple tables. Signed-off-by: Zhichao Huang zhichao.hu...@linaro.org --- arch/arm/kvm/coproc.c | 176 ++--- arch/arm/kvm/interrupts_head.S | 2 +- 2 files changed, 112 insertions(+), 66 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 9d283d9..d23395b 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -375,6 +375,9 @@ static const struct coproc_reg cp15_regs[] = { { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, }; +static const struct coproc_reg cp14_regs[] = { +}; + /* Target specific emulation tables */ static struct kvm_coproc_target_table *target_tables[KVM_ARM_NUM_TARGETS]; @@ -424,47 +427,75 @@ static const struct coproc_reg *find_reg(const struct coproc_params *params, return NULL; } -static int emulate_cp15(struct kvm_vcpu *vcpu, - const struct coproc_params *params) +/* + * emulate_cp -- tries to match a cp14/cp15 access in a handling table, + *and call the corresponding trap handler. + * + * @params: pointer to the descriptor of the access + * @table: array of trap descriptors + * @num: size of the trap descriptor array + * + * Return 0 if the access has been handled, and -1 if not. + */ +static int emulate_cp(struct kvm_vcpu *vcpu, + const struct coproc_params *params, + const struct coproc_reg *table, + size_t num) { - size_t num; - const struct coproc_reg *table, *r; - - trace_kvm_emulate_cp15_imp(params-Op1, params-Rt1, params-CRn, - params-CRm, params-Op2, params-is_write); + const struct coproc_reg *r; - table = get_target_table(vcpu-arch.target, num); + if (!table) + return -1; /* Not handled */ - /* Search target-specific then generic table. */ r = find_reg(params, table, num); - if (!r) - r = find_reg(params, cp15_regs, ARRAY_SIZE(cp15_regs)); - if (likely(r)) { + if (r) { /* If we don't have an accessor, we should never get here! */ BUG_ON(!r-access); if (likely(r-access(vcpu, params, r))) { /* Skip instruction, since it was emulated */ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - return 1; } - /* If access function fails, it should complain. */ - } else { - kvm_err(Unsupported guest CP15 access at: %08lx\n, - *vcpu_pc(vcpu)); - print_cp_instr(params); + + /* Handled */ + return 0; } + + /* Not handled */ + return -1; +} + +static void unhandled_cp_access(struct kvm_vcpu *vcpu, + const struct coproc_params *params) +{ + u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + int cp; + + switch (hsr_ec) { + case HSR_EC_CP15_32: + case HSR_EC_CP15_64: + cp = 15; + break; + case HSR_EC_CP14_MR: + case HSR_EC_CP14_64: + cp = 14; + break; + default: + WARN_ON((cp = -1)); + } + + kvm_err(Unsupported guest CP%d access at: %08lx\n, + cp, *vcpu_pc(vcpu)); + print_cp_instr(params); kvm_inject_undefined(vcpu); - return 1; } -/** - * kvm_handle_cp15_64 -- handles a mrrc/mcrr trap on a guest CP15 access - * @vcpu: The VCPU pointer - * @run: The kvm_run struct - */ -int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +int kvm_handle_cp_64(struct kvm_vcpu *vcpu, + const struct coproc_reg *global, + size_t nr_global, + const struct coproc_reg *target_specific, + size_t nr_specific) { struct coproc_params params; @@ -478,7 +509,13 @@ int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run) params.Rt2 = (kvm_vcpu_get_hsr(vcpu) 10) 0xf; params.CRm = 0; - return emulate_cp15(vcpu, params); + if (!emulate_cp(vcpu, params, target_specific, nr_specific)) + return 1; + if (!emulate_cp(vcpu, params, global, nr_global)) + return 1; + + unhandled_cp_access(vcpu, params); + return 1; } static void reset_coproc_regs(struct kvm_vcpu *vcpu, @@ -491,12 +528,11 @@ static void reset_coproc_regs(struct kvm_vcpu *vcpu, table[i].reset(vcpu, table[i]); } -/** - * kvm_handle_cp15_32 -- handles a mrc/mcr trap on a guest CP15 access - * @vcpu: The VCPU pointer - * @run: The kvm_run struct - */ -int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
[PATCH v3 05/11] KVM: arm: check ordering of all system register tables
We now have multiple tables for the various system registers we trap. Make sure we check the order of all of them, as it is critical that we get the order right (been there, done that...). Signed-off-by: Zhichao Huang zhichao.hu...@linaro.org --- arch/arm/kvm/coproc.c | 26 +- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index d23395b..16d5f69 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -737,6 +737,9 @@ static struct coproc_reg invariant_cp15[] = { { CRn( 0), CRm( 0), Op1( 0), Op2( 3), is32, NULL, get_TLBTR }, { CRn( 0), CRm( 0), Op1( 0), Op2( 6), is32, NULL, get_REVIDR }, + { CRn( 0), CRm( 0), Op1( 1), Op2( 1), is32, NULL, get_CLIDR }, + { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR }, + { CRn( 0), CRm( 1), Op1( 0), Op2( 0), is32, NULL, get_ID_PFR0 }, { CRn( 0), CRm( 1), Op1( 0), Op2( 1), is32, NULL, get_ID_PFR1 }, { CRn( 0), CRm( 1), Op1( 0), Op2( 2), is32, NULL, get_ID_DFR0 }, @@ -752,9 +755,6 @@ static struct coproc_reg invariant_cp15[] = { { CRn( 0), CRm( 2), Op1( 0), Op2( 3), is32, NULL, get_ID_ISAR3 }, { CRn( 0), CRm( 2), Op1( 0), Op2( 4), is32, NULL, get_ID_ISAR4 }, { CRn( 0), CRm( 2), Op1( 0), Op2( 5), is32, NULL, get_ID_ISAR5 }, - - { CRn( 0), CRm( 0), Op1( 1), Op2( 1), is32, NULL, get_CLIDR }, - { CRn( 0), CRm( 0), Op1( 1), Op2( 7), is32, NULL, get_AIDR }, }; /* @@ -1297,13 +1297,29 @@ int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return write_demux_regids(uindices); } +static int check_sysreg_table(const struct coproc_reg *table, unsigned int n) +{ + unsigned int i; + + for (i = 1; i n; i++) { + if (cmp_reg(table[i-1], table[i]) = 0) { + kvm_err(sys_reg table %p out of order (%d)\n, + table, i - 1); + return 1; + } + } + + return 0; +} + void kvm_coproc_table_init(void) { unsigned int i; /* Make sure tables are unique and in order. */ - for (i = 1; i ARRAY_SIZE(cp15_regs); i++) - BUG_ON(cmp_reg(cp15_regs[i-1], cp15_regs[i]) = 0); + BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs))); + BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs))); + BUG_ON(check_sysreg_table(invariant_cp15, ARRAY_SIZE(invariant_cp15))); /* We abuse the reset function to overwrite the table itself. */ for (i = 0; i ARRAY_SIZE(invariant_cp15); i++) -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH v3 06/11] KVM: arm: add trap handlers for 32-bit debug registers
Add handlers for all the 32-bit debug registers. Signed-off-by: Zhichao Huang zhichao.hu...@linaro.org --- arch/arm/include/asm/kvm_asm.h | 12 arch/arm/include/asm/kvm_host.h | 3 + arch/arm/kernel/asm-offsets.c | 1 + arch/arm/kvm/coproc.c | 122 4 files changed, 138 insertions(+) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 25410b2..ba65e05 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -52,6 +52,18 @@ #define c10_AMAIR1 30 /* Auxilary Memory Attribute Indirection Reg1 */ #define NR_CP15_REGS 31 /* Number of regs (incl. invalid) */ +/* 0 is reserved as an invalid value. */ +#define cp14_DBGBVR0 1 /* Debug Breakpoint Control Registers (0-15) */ +#define cp14_DBGBVR15 16 +#define cp14_DBGBCR0 17 /* Debug Breakpoint Value Registers (0-15) */ +#define cp14_DBGBCR15 32 +#define cp14_DBGWVR0 33 /* Debug Watchpoint Control Registers (0-15) */ +#define cp14_DBGWVR15 48 +#define cp14_DBGWCR0 49 /* Debug Watchpoint Value Registers (0-15) */ +#define cp14_DBGWCR15 64 +#define cp14_DBGDSCRext65 /* Debug Status and Control external */ +#define NR_CP14_REGS 66 /* Number of regs (incl. invalid) */ + #define ARM_EXCEPTION_RESET 0 #define ARM_EXCEPTION_UNDEFINED 1 #define ARM_EXCEPTION_SOFTWARE2 diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index d71607c..3d16820 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -124,6 +124,9 @@ struct kvm_vcpu_arch { struct vgic_cpu vgic_cpu; struct arch_timer_cpu timer_cpu; + /* System control coprocessor (cp14) */ + u32 cp14[NR_CP14_REGS]; + /* * Anything that is not used directly from assembly code goes * here. diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 871b826..9158de0 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -172,6 +172,7 @@ int main(void) #ifdef CONFIG_KVM_ARM_HOST DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); DEFINE(VCPU_MIDR,offsetof(struct kvm_vcpu, arch.midr)); + DEFINE(VCPU_CP14,offsetof(struct kvm_vcpu, arch.cp14)); DEFINE(VCPU_CP15,offsetof(struct kvm_vcpu, arch.cp15)); DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest)); DEFINE(VCPU_VFP_HOST,offsetof(struct kvm_vcpu, arch.host_cpu_context)); diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 16d5f69..59b65b7 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -220,6 +220,47 @@ bool access_vm_reg(struct kvm_vcpu *vcpu, return true; } +static bool trap_debug32(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p-is_write) + vcpu-arch.cp14[r-reg] = *vcpu_reg(vcpu, p-Rt1); + else + *vcpu_reg(vcpu, p-Rt1) = vcpu-arch.cp14[r-reg]; + + return true; +} + +/* DBGIDR (RO) Debug ID */ +static bool trap_dbgidr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + u32 val; + + if (p-is_write) + return ignore_write(vcpu, p); + + ARM_DBG_READ(c0, c0, 0, val); + *vcpu_reg(vcpu, p-Rt1) = val; + + return true; +} + +/* DBGDSCRint (RO) Debug Status and Control Register */ +static bool trap_dbgdscr(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p-is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p-Rt1) = vcpu-arch.cp14[r-reg]; + + return true; +} + /* * We could trap ID_DFR0 and tell the guest we don't support performance * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was @@ -375,7 +416,88 @@ static const struct coproc_reg cp15_regs[] = { { CRn(15), CRm( 0), Op1( 4), Op2( 0), is32, access_cbar}, }; +#define DBG_BCR_BVR_WCR_WVR(n) \ + /* DBGBVRn */ \ + { CRn( 0), CRm((n)), Op1( 0), Op2( 4), is32,\ + trap_debug32, reset_val, (cp14_DBGBVR0 + (n)), 0 }, \ + /* DBGBCRn */ \ + { CRn( 0), CRm((n)), Op1( 0), Op2( 5), is32,\ + trap_debug32, reset_val, (cp14_DBGBCR0 + (n)), 0 }, \ + /* DBGWVRn */ \ + { CRn( 0), CRm((n)), Op1( 0), Op2( 6), is32,\ + trap_debug32, reset_val, (cp14_DBGWVR0 + (n)), 0 }, \ + /* DBGWCRn */ \ + { CRn( 0), CRm((n)), Op1(
[PATCH v3 00/11] KVM: arm: debug infrastructure support
This patch series adds debug support, a key feature missing from the KVM/armv7 port. The main idea is borrowed from ARM64, which is to keep track of whether the debug registers are dirty (changed by the guest) or not. In this case, perform the usual save/restore dance, for one run only. It means we only have a penalty if a guest is actively using the debug registers. The amount of registers is properly frightening, but CPUs actually only implement a subset of them. Also, there is a number of registers we don't bother emulating (things having to do with external debug and OSlock). External debug is when you actually plug a physical JTAG into the CPU. OSlock is a way to prevent other software to play with the debug registers. My understanding is that it is only useful in combination with the external debug. In both case, implementing support for this is probably not worth the effort, at least for the time being. This has been tested on a Cortex-A15 platform, running 32bit guests. The patches for this series are based off v4.1-rc8 and can be found at: https://git.linaro.org/people/zhichao.huang/linux.git branch: guest-debug/4.1-rc8-v3 From v2 [2]: - Delete the debug mode enabling/disabling strategy - Add missing cp14/cp15 trace events From v1 [1]: - Added missing cp14 reset functions - Disable debug mode if we don't need it to reduce unnecessary switch [1]: https://lists.cs.columbia.edu/pipermail/kvmarm/2015-May/014729.html [2]: https://lists.cs.columbia.edu/pipermail/kvmarm/2015-May/014847.html Zhichao Huang (11): KVM: arm: plug guest debug exploit KVM: arm: rename pm_fake handler to trap_raz_wi KVM: arm: enable to use the ARM_DSCR_MDBGEN macro from KVM assembly code KVM: arm: common infrastructure for handling AArch32 CP14/CP15 KVM: arm: check ordering of all system register tables KVM: arm: add trap handlers for 32-bit debug registers KVM: arm: add trap handlers for 64-bit debug registers KVM: arm: implement dirty bit mechanism for debug registers KVM: arm: implement lazy world switch for debug registers KVM: arm: add a trace event for cp14 traps KVM: arm: enable trapping of all debug registers arch/arm/include/asm/hw_breakpoint.h | 54 ++--- arch/arm/include/asm/kvm_asm.h | 15 ++ arch/arm/include/asm/kvm_coproc.h| 3 +- arch/arm/include/asm/kvm_host.h | 6 + arch/arm/kernel/asm-offsets.c| 2 + arch/arm/kvm/coproc.c| 407 ++- arch/arm/kvm/handle_exit.c | 4 +- arch/arm/kvm/interrupts.S| 16 ++ arch/arm/kvm/interrupts_head.S | 313 ++- arch/arm/kvm/trace.h | 30 +++ 10 files changed, 762 insertions(+), 88 deletions(-) -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH v3 02/11] KVM: arm: rename pm_fake handler to trap_raz_wi
pm_fake doesn't quite describe what the handler does (ignoring writes and returning 0 for reads). As we're about to use it (a lot) in a different context, rename it with a (admitedly cryptic) name that make sense for all users. Signed-off-by: Zhichao Huang zhichao.hu...@linaro.org Reviewed-by: Alex Bennee alex.ben...@linaro.org --- arch/arm/kvm/coproc.c | 34 -- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 2e12760..9d283d9 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -229,7 +229,7 @@ bool access_vm_reg(struct kvm_vcpu *vcpu, * must always support PMCCNTR (the cycle counter): we just RAZ/WI for * all PM registers, which doesn't crash the guest kernel at least. */ -static bool pm_fake(struct kvm_vcpu *vcpu, +static bool trap_raz_wi(struct kvm_vcpu *vcpu, const struct coproc_params *p, const struct coproc_reg *r) { @@ -239,19 +239,19 @@ static bool pm_fake(struct kvm_vcpu *vcpu, return read_zero(vcpu, p); } -#define access_pmcr pm_fake -#define access_pmcntenset pm_fake -#define access_pmcntenclr pm_fake -#define access_pmovsr pm_fake -#define access_pmselr pm_fake -#define access_pmceid0 pm_fake -#define access_pmceid1 pm_fake -#define access_pmccntr pm_fake -#define access_pmxevtyper pm_fake -#define access_pmxevcntr pm_fake -#define access_pmuserenr pm_fake -#define access_pmintenset pm_fake -#define access_pmintenclr pm_fake +#define access_pmcr trap_raz_wi +#define access_pmcntenset trap_raz_wi +#define access_pmcntenclr trap_raz_wi +#define access_pmovsr trap_raz_wi +#define access_pmselr trap_raz_wi +#define access_pmceid0 trap_raz_wi +#define access_pmceid1 trap_raz_wi +#define access_pmccntr trap_raz_wi +#define access_pmxevtyper trap_raz_wi +#define access_pmxevcntr trap_raz_wi +#define access_pmuserenr trap_raz_wi +#define access_pmintenset trap_raz_wi +#define access_pmintenclr trap_raz_wi /* Architected CP15 registers. * CRn denotes the primary register number, but is copied to the CRm in the @@ -532,8 +532,7 @@ int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run) params.Rt2 = (kvm_vcpu_get_hsr(vcpu) 10) 0xf; params.CRm = 0; - /* raz_wi */ - (void)pm_fake(vcpu, params, NULL); + (void)trap_raz_wi(vcpu, params, NULL); /* handled */ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); @@ -559,8 +558,7 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run) params.Op2 = (kvm_vcpu_get_hsr(vcpu) 17) 0x7; params.Rt2 = 0; - /* raz_wi */ - (void)pm_fake(vcpu, params, NULL); + (void)trap_raz_wi(vcpu, params, NULL); /* handled */ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH v3 03/11] KVM: arm: enable to use the ARM_DSCR_MDBGEN macro from KVM assembly code
Add #ifndef __ASSEMBLY__ in hw_breakpoint.h, in order to use the ARM_DSCR_MDBGEN macro from KVM assembly code. Signed-off-by: Zhichao Huang zhichao.hu...@linaro.org Reviewed-by: Alex Bennee alex.ben...@linaro.org --- arch/arm/include/asm/hw_breakpoint.h | 54 +++- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/arch/arm/include/asm/hw_breakpoint.h b/arch/arm/include/asm/hw_breakpoint.h index 8e427c7..f2f4c61 100644 --- a/arch/arm/include/asm/hw_breakpoint.h +++ b/arch/arm/include/asm/hw_breakpoint.h @@ -3,6 +3,8 @@ #ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + struct task_struct; #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -44,6 +46,33 @@ static inline void decode_ctrl_reg(u32 reg, ctrl-mismatch = reg 0x1; } +struct notifier_block; +struct perf_event; +struct pmu; + +extern struct pmu perf_ops_bp; +extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, + int *gen_len, int *gen_type); +extern int arch_check_bp_in_kernelspace(struct perf_event *bp); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp); +extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, + unsigned long val, void *data); + +extern u8 arch_get_debug_arch(void); +extern u8 arch_get_max_wp_len(void); +extern void clear_ptrace_hw_breakpoint(struct task_struct *tsk); + +int arch_install_hw_breakpoint(struct perf_event *bp); +void arch_uninstall_hw_breakpoint(struct perf_event *bp); +void hw_breakpoint_pmu_read(struct perf_event *bp); +int hw_breakpoint_slots(int type); + +#else +static inline void clear_ptrace_hw_breakpoint(struct task_struct *tsk) {} + +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ +#endif /* __ASSEMBLY */ + /* Debug architecture numbers. */ #define ARM_DEBUG_ARCH_RESERVED0 /* In case of ptrace ABI updates. */ #define ARM_DEBUG_ARCH_V6 1 @@ -110,30 +139,5 @@ static inline void decode_ctrl_reg(u32 reg, asm volatile(mcr p14, 0, %0, #N , #M , #OP2 : : r (VAL));\ } while (0) -struct notifier_block; -struct perf_event; -struct pmu; - -extern struct pmu perf_ops_bp; -extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl, - int *gen_len, int *gen_type); -extern int arch_check_bp_in_kernelspace(struct perf_event *bp); -extern int arch_validate_hwbkpt_settings(struct perf_event *bp); -extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, - unsigned long val, void *data); - -extern u8 arch_get_debug_arch(void); -extern u8 arch_get_max_wp_len(void); -extern void clear_ptrace_hw_breakpoint(struct task_struct *tsk); - -int arch_install_hw_breakpoint(struct perf_event *bp); -void arch_uninstall_hw_breakpoint(struct perf_event *bp); -void hw_breakpoint_pmu_read(struct perf_event *bp); -int hw_breakpoint_slots(int type); - -#else -static inline void clear_ptrace_hw_breakpoint(struct task_struct *tsk) {} - -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif /* __KERNEL__ */ #endif /* _ARM_HW_BREAKPOINT_H */ -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH v3 09/11] KVM: arm: implement lazy world switch for debug registers
Implement switching of the debug registers. While the number of registers is massive, CPUs usually don't implement them all (A15 has 6 breakpoints and 4 watchpoints, which gives us a total of 22 registers only). Notice that, for ARMv7, if the CONFIG_HAVE_HW_BREAKPOINT is set in the guest, debug is always actively in use (ARM_DSCR_MDBGEN set). We have to do the save/restore dance in this case, because the host and the guest might use their respective debug registers at any moment. If the CONFIG_HAVE_HW_BREAKPOINT is not set, and if no one flagged the debug registers as dirty, we only save/resotre DBGDSCR. Signed-off-by: Zhichao Huang zhichao.hu...@linaro.org --- arch/arm/kvm/interrupts.S | 16 +++ arch/arm/kvm/interrupts_head.S | 249 - 2 files changed, 263 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 79caf79..d626275 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -116,6 +116,12 @@ ENTRY(__kvm_vcpu_run) read_cp15_state store_to_vcpu = 0 write_cp15_state read_from_vcpu = 1 + @ Store hardware CP14 state and load guest state + compute_debug_state 1f + bl __save_host_debug_regs + bl __restore_guest_debug_regs + +1: @ If the host kernel has not been configured with VFPv3 support, @ then it is safer if we deny guests from using it as well. #ifdef CONFIG_VFPv3 @@ -201,6 +207,16 @@ after_vfp_restore: mrc p15, 0, r2, c0, c0, 5 mcr p15, 4, r2, c0, c0, 5 + @ Store guest CP14 state and restore host state + skip_debug_state 1f + bl __save_guest_debug_regs + bl __restore_host_debug_regs + /* Clear the dirty flag for the next run, as all the state has +* already been saved. Note that we nuke the whole 32bit word. +* If we ever add more flags, we'll have to be more careful... +*/ + clear_debug_dirty_bit +1: @ Store guest CP15 state and restore host state read_cp15_state store_to_vcpu = 1 write_cp15_state read_from_vcpu = 0 diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 5662c39..ed406be 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -7,6 +7,7 @@ #define VCPU_USR_SP(VCPU_USR_REG(13)) #define VCPU_USR_LR(VCPU_USR_REG(14)) #define CP15_OFFSET(_cp15_reg_idx) (VCPU_CP15 + (_cp15_reg_idx * 4)) +#define CP14_OFFSET(_cp14_reg_idx) (VCPU_CP14 + ((_cp14_reg_idx) * 4)) /* * Many of these macros need to access the VCPU structure, which is always @@ -168,8 +169,7 @@ vcpu.reqr0 @ vcpu pointer always in r0 * Clobbers *all* registers. */ .macro restore_guest_regs - /* reset DBGDSCR to disable debug mode */ - mov r2, #0 + ldr r2, [vcpu, #CP14_OFFSET(cp14_DBGDSCRext)] mcr p14, 0, r2, c0, c2, 2 restore_guest_regs_mode svc, #VCPU_SVC_REGS @@ -250,6 +250,10 @@ vcpu .reqr0 @ vcpu pointer always in r0 save_guest_regs_mode abt, #VCPU_ABT_REGS save_guest_regs_mode und, #VCPU_UND_REGS save_guest_regs_mode irq, #VCPU_IRQ_REGS + + /* DBGDSCR reg */ + mrc p14, 0, r2, c0, c1, 0 + str r2, [vcpu, #CP14_OFFSET(cp14_DBGDSCRext)] .endm /* Reads cp15 registers from hardware and stores them in memory @@ -449,6 +453,231 @@ vcpu .reqr0 @ vcpu pointer always in r0 str r5, [vcpu, #VCPU_DEBUG_FLAGS] .endm +/* Assume r11/r12 in used, clobbers r2-r10 */ +.macro cp14_read_and_push Op2 skip_num + cmp \skip_num, #8 + // if (skip_num = 8) then skip c8-c15 directly + bge 1f + adr r2, 9998f + add r2, r2, \skip_num, lsl #2 + bx r2 +1: + adr r2, f + sub r3, \skip_num, #8 + add r2, r2, r3, lsl #2 + bx r2 +9998: + mrc p14, 0, r10, c0, c15, \Op2 + mrc p14, 0, r9, c0, c14, \Op2 + mrc p14, 0, r8, c0, c13, \Op2 + mrc p14, 0, r7, c0, c12, \Op2 + mrc p14, 0, r6, c0, c11, \Op2 + mrc p14, 0, r5, c0, c10, \Op2 + mrc p14, 0, r4, c0, c9, \Op2 + mrc p14, 0, r3, c0, c8, \Op2 + push{r3-r10} +: + mrc p14, 0, r10, c0, c7, \Op2 + mrc p14, 0, r9, c0, c6, \Op2 + mrc p14, 0, r8, c0, c5, \Op2 + mrc p14, 0, r7, c0, c4, \Op2 + mrc p14, 0, r6, c0, c3, \Op2 + mrc p14, 0, r5, c0, c2, \Op2 + mrc p14, 0, r4, c0, c1, \Op2 + mrc p14, 0, r3, c0, c0, \Op2 + push{r3-r10} +.endm + +/* Assume r11/r12 in used, clobbers r2-r10 */ +.macro cp14_pop_and_write Op2 skip_num + cmp \skip_num, #8 + // if (skip_num = 8) then skip c8-c15 directly + bge 1f + adr r2, 9998f + add r2, r2, \skip_num, lsl #2
[PATCH v3 07/11] KVM: arm: add trap handlers for 64-bit debug registers
Add handlers for all the 64-bit debug registers. There is an overlap between 32 and 64bit registers. Make sure that 64-bit registers preceding 32-bit ones. Signed-off-by: Zhichao Huang zhichao.hu...@linaro.org --- arch/arm/kvm/coproc.c | 12 1 file changed, 12 insertions(+) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 59b65b7..648 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -435,9 +435,17 @@ static const struct coproc_reg cp15_regs[] = { { CRn( 1), CRm((n)), Op1( 0), Op2( 1), is32, trap_raz_wi } /* + * Architected CP14 registers. + * * Trapped cp14 registers. We generally ignore most of the external * debug, on the principle that they don't really make sense to a * guest. Revisit this one day, whould this principle change. + * + * CRn denotes the primary register number, but is copied to the CRm in the + * user space API for 64-bit register access in line with the terminology used + * in the ARM ARM. + * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit + *registers preceding 32-bit ones. */ static const struct coproc_reg cp14_regs[] = { /* DBGIDR */ @@ -445,10 +453,14 @@ static const struct coproc_reg cp14_regs[] = { /* DBGDTRRXext */ { CRn( 0), CRm( 0), Op1( 0), Op2( 2), is32, trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(0), + /* DBGDRAR (64bit) */ + { CRn( 0), CRm( 1), Op1( 0), Op2( 0), is64, trap_raz_wi}, /* DBGDSCRint */ { CRn( 0), CRm( 1), Op1( 0), Op2( 0), is32, trap_dbgdscr, NULL, cp14_DBGDSCRext }, DBG_BCR_BVR_WCR_WVR(1), + /* DBGDSAR (64bit) */ + { CRn( 0), CRm( 2), Op1( 0), Op2( 0), is64, trap_raz_wi}, /* DBGDSCRext */ { CRn( 0), CRm( 2), Op1( 0), Op2( 2), is32, trap_debug32, reset_val, cp14_DBGDSCRext, 0 }, -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH v3 08/11] KVM: arm: implement dirty bit mechanism for debug registers
The trapping code keeps track of the state of the debug registers, allowing for the switch code to implement a lazy switching strategy. Signed-off-by: Zhichao Huang zhichao.hu...@linaro.org --- arch/arm/include/asm/kvm_asm.h | 3 +++ arch/arm/include/asm/kvm_host.h | 3 +++ arch/arm/kernel/asm-offsets.c | 1 + arch/arm/kvm/coproc.c | 39 -- arch/arm/kvm/interrupts_head.S | 42 + 5 files changed, 86 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index ba65e05..4fb64cf 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -64,6 +64,9 @@ #define cp14_DBGDSCRext65 /* Debug Status and Control external */ #define NR_CP14_REGS 66 /* Number of regs (incl. invalid) */ +#define KVM_ARM_DEBUG_DIRTY_SHIFT 0 +#define KVM_ARM_DEBUG_DIRTY(1 KVM_ARM_DEBUG_DIRTY_SHIFT) + #define ARM_EXCEPTION_RESET 0 #define ARM_EXCEPTION_UNDEFINED 1 #define ARM_EXCEPTION_SOFTWARE2 diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 3d16820..09b54bf 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -127,6 +127,9 @@ struct kvm_vcpu_arch { /* System control coprocessor (cp14) */ u32 cp14[NR_CP14_REGS]; + /* Debug state */ + u32 debug_flags; + /* * Anything that is not used directly from assembly code goes * here. diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 9158de0..e876109 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -185,6 +185,7 @@ int main(void) DEFINE(VCPU_FIQ_REGS,offsetof(struct kvm_vcpu, arch.regs.fiq_regs)); DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc)); DEFINE(VCPU_CPSR,offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr)); + DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); DEFINE(VCPU_HCR, offsetof(struct kvm_vcpu, arch.hcr)); DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.fault.hsr)); diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 648..fc0c2ef 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -220,14 +220,49 @@ bool access_vm_reg(struct kvm_vcpu *vcpu, return true; } +/* + * We want to avoid world-switching all the DBG registers all the + * time: + * + * - If we've touched any debug register, it is likely that we're + * going to touch more of them. It then makes sense to disable the + * traps and start doing the save/restore dance + * - If debug is active (ARM_DSCR_MDBGEN set), it is then mandatory + * to save/restore the registers, as the guest depends on them. + * + * For this, we use a DIRTY bit, indicating the guest has modified the + * debug registers, used as follow: + * + * On guest entry: + * - If the dirty bit is set (because we're coming back from trapping), + * disable the traps, save host registers, restore guest registers. + * - If debug is actively in use (ARM_DSCR_MDBGEN set), + * set the dirty bit, disable the traps, save host registers, + * restore guest registers. + * - Otherwise, enable the traps + * + * On guest exit: + * - If the dirty bit is set, save guest registers, restore host + * registers and clear the dirty bit. This ensure that the host can + * now use the debug registers. + * + * Notice: + * - For ARMv7, if the CONFIG_HAVE_HW_BREAKPOINT is set in the guest, + * debug is always actively in use (ARM_DSCR_MDBGEN set). + * We have to do the save/restore dance in this case, because the + * host and the guest might use their respective debug registers + * at any moment. + */ static bool trap_debug32(struct kvm_vcpu *vcpu, const struct coproc_params *p, const struct coproc_reg *r) { - if (p-is_write) + if (p-is_write) { vcpu-arch.cp14[r-reg] = *vcpu_reg(vcpu, p-Rt1); - else + vcpu-arch.debug_flags |= KVM_ARM_DEBUG_DIRTY; + } else { *vcpu_reg(vcpu, p-Rt1) = vcpu-arch.cp14[r-reg]; + } return true; } diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index a20b9ad..5662c39 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -1,4 +1,6 @@ #include linux/irqchip/arm-gic.h +#include asm/hw_breakpoint.h +#include asm/kvm_asm.h #include asm/assembler.h #define VCPU_USR_REG(_reg_nr) (VCPU_USR_REGS + (_reg_nr * 4)) @@ -407,6 +409,46 @@ vcpu .reqr0 @ vcpu pointer always in r0 mcr p15, 2, r12, c0, c0, 0 @ CSSELR .endm +/* Assume vcpu pointer in vcpu reg, clobbers r5 */
Re: [PATCH 1/2] arm64: qcom: Add define for ARMv8 implementer (MIDR)
On Fri, Jun 19, 2015 at 05:28:53PM -0500, Timur Tabi wrote: On 06/15/2015 05:59 AM, Catalin Marinas wrote: I think this patch together with the second one could go through the kvm tree. For the core arm64 part: Acked-by: Catalin Marinascatalin.mari...@arm.com Suzuki Poulose posted a patch that adds generic support for ARMv8 KVM targets. I want to drop my second patch, so can we pick up this first patch through the ARM tree? So if the second patch is no longer needed, what's using this patch? I would defer merging it until actually required in some part of the kernel. -- Catalin -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH v2 11/15] KVM: MTRR: sort variable MTRRs
On 17/06/2015 18:11, Paolo Bonzini wrote: Also, this loop looks weird. Is this what you wanted? list_for_each_entry(tmp, mtrr_state-head, node) if (cur-base = tmp-base) break; list_add_tail(cur-node, tmp-node); If so, can you look at kvm/queue and see if it is okay for you (so that we can get the series in 4.2)? Ping? If I don't get testing results before Wednesday, I'll drop this series from the 4.2 pull request. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH v3 01/11] KVM: arm: plug guest debug exploit
Hardware debugging in guests is not intercepted currently, it means that a malicious guest can bring down the entire machine by writing to the debug registers. This patch enable trapping of all debug registers, preventing the guests to access the debug registers. This patch also disable the debug mode(DBGDSCR) in the guest world all the time, preventing the guests to mess with the host state. However, it is a precursor for later patches which will need to do more to world switch debug states while necessary. Cc: sta...@vger.kernel.org Signed-off-by: Zhichao Huang zhichao.hu...@linaro.org --- arch/arm/include/asm/kvm_coproc.h | 3 +- arch/arm/kvm/coproc.c | 60 +++ arch/arm/kvm/handle_exit.c| 4 +-- arch/arm/kvm/interrupts_head.S| 13 - 4 files changed, 70 insertions(+), 10 deletions(-) diff --git a/arch/arm/include/asm/kvm_coproc.h b/arch/arm/include/asm/kvm_coproc.h index 4917c2f..e74ab0f 100644 --- a/arch/arm/include/asm/kvm_coproc.h +++ b/arch/arm/include/asm/kvm_coproc.h @@ -31,7 +31,8 @@ void kvm_register_target_coproc_table(struct kvm_coproc_target_table *table); int kvm_handle_cp10_id(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp_0_13_access(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run); diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index f3d88dc..2e12760 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -91,12 +91,6 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } -int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - kvm_inject_undefined(vcpu); - return 1; -} - static void reset_mpidr(struct kvm_vcpu *vcpu, const struct coproc_reg *r) { /* @@ -519,6 +513,60 @@ int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run) return emulate_cp15(vcpu, params); } +/** + * kvm_handle_cp14_64 -- handles a mrrc/mcrr trap on a guest CP14 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct coproc_params params; + + params.CRn = (kvm_vcpu_get_hsr(vcpu) 1) 0xf; + params.Rt1 = (kvm_vcpu_get_hsr(vcpu) 5) 0xf; + params.is_write = ((kvm_vcpu_get_hsr(vcpu) 1) == 0); + params.is_64bit = true; + + params.Op1 = (kvm_vcpu_get_hsr(vcpu) 16) 0xf; + params.Op2 = 0; + params.Rt2 = (kvm_vcpu_get_hsr(vcpu) 10) 0xf; + params.CRm = 0; + + /* raz_wi */ + (void)pm_fake(vcpu, params, NULL); + + /* handled */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; +} + +/** + * kvm_handle_cp14_32 -- handles a mrc/mcr trap on a guest CP14 access + * @vcpu: The VCPU pointer + * @run: The kvm_run struct + */ +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + struct coproc_params params; + + params.CRm = (kvm_vcpu_get_hsr(vcpu) 1) 0xf; + params.Rt1 = (kvm_vcpu_get_hsr(vcpu) 5) 0xf; + params.is_write = ((kvm_vcpu_get_hsr(vcpu) 1) == 0); + params.is_64bit = false; + + params.CRn = (kvm_vcpu_get_hsr(vcpu) 10) 0xf; + params.Op1 = (kvm_vcpu_get_hsr(vcpu) 14) 0x7; + params.Op2 = (kvm_vcpu_get_hsr(vcpu) 17) 0x7; + params.Rt2 = 0; + + /* raz_wi */ + (void)pm_fake(vcpu, params, NULL); + + /* handled */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + return 1; +} + /** * Userspace API */ diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 95f12b2..357ad1b 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -104,9 +104,9 @@ static exit_handle_fn arm_exit_handlers[] = { [HSR_EC_WFI]= kvm_handle_wfx, [HSR_EC_CP15_32]= kvm_handle_cp15_32, [HSR_EC_CP15_64]= kvm_handle_cp15_64, - [HSR_EC_CP14_MR]= kvm_handle_cp14_access, + [HSR_EC_CP14_MR]= kvm_handle_cp14_32, [HSR_EC_CP14_LS]= kvm_handle_cp14_load_store, - [HSR_EC_CP14_64]= kvm_handle_cp14_access, + [HSR_EC_CP14_64]= kvm_handle_cp14_64, [HSR_EC_CP_0_13]= kvm_handle_cp_0_13_access, [HSR_EC_CP10_ID]= kvm_handle_cp10_id, [HSR_EC_SVC_HYP]= handle_svc_hyp,
Re: Nested EPT Write Protection
On 19/06/2015 20:57, Hu Yaohui wrote: One more thing, for the standard guest VM which uses EPT, What's the usage of gfn field in the struct kvm_mmu_page? Since it uses EPT, a single shadow page should has no relation with any of the guest physical page, right? The gfn is the same value that you can find in bits 12 to MAXPHYADDR-1 of the EPT page table entry. Paolo According to the source code, each allocated shadow page struct kvm_mmu_page got it's gfn field filled. -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH 3/5] vhost: support upto 509 memory regions
On 22/06/2015 09:10, Igor Mammedov wrote: So far HVA is unusable even if we will make this assumption and let guest crash. virt_net doesn't work with it anyway, translation of GPA to HVA for descriptors works as expected (correctly) but vhost+HVA hack backed virtio still can't send/received packets. That's why I prefer to merge kernel solution first as a stable and not introducing any issues solution. And work on userspace approach on top of that. Also, let's do some math. Let's assume 3 network devices per VM, one vhost device per queue, one queue per VCPU per network device. Let's assume the host is overcommitted 3:1. Thus we have 3*3=9 times vhost devices as we have physical CPUs. We're thus talking about 108K per physical CPU. From a relative point of view, and assuming 1 GB of memory per physical CPU (pretty low amount if you're overcommitting CPU 3:1), this is 0.01% of the total memory. From an absolute point of view, it takes a system with 60 physical CPUs to reach the same memory usage as the vmlinuz binary of a typical distro kernel (not counting the modules). Paolo Hopefully it could be done but we still would need time to iron out side effects/issues it causes or could cause so that fix became stable enough for production. -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH 0/2] KVM: PPC: Book3S HV: Dynamic micro-threading/split-core
On 22/06/2015 02:09, Paul Mackerras wrote: On Wed, Jun 17, 2015 at 07:30:09PM +0200, Laurent Vivier wrote: Tested-by: Laurent Vivier lviv...@redhat.com Performance is better, but Paul could you explain why it is better if I disable dynamic micro-threading ? Did I miss something ? My test system is an IBM Power S822L. I run two guests with 8 vCPUs (-smp 8,sockets=8,cores=1,threads=1) both attached on the same core (with pinning option of virt-manager). Then, I measure the time needed to compile a kernel in parallel in both guests with make -j 16. My kernel without micro-threading: real37m23.424s real37m24.959s user167m31.474suser165m44.142s sys 113m26.195ssys 113m45.072s With micro-threading patches (PATCH 1+2): target_smt_mode 0 [in fact It was 8 here, but it should behave like 0, as it is max threads/sub-core] dynamic_mt_modes 6 real32m13.338s real 32m26.652s user139m21.181suser 140m20.994s sys 77m35.339s sys 78m16.599s It's better, but if I disable dynamic micro-threading (but PATCH 1+2): target_smt_mode 0 dynamic_mt_modes 0 real30m49.100s real 30m48.161s user144m22.989suser 142m53.886s sys 65m4.942s sys 66m8.159s it's even better. I think what's happening here is that with dynamic_mt_modes=0 the system alternates between the two guests, whereas with dynamic_mt_modes=6 it will spend some of the time running both guests simultaneously in two-way split mode. Since you have two compute-bound guests that each have threads=1 and 8 vcpus, it can fill up the core either way. In that case it is more efficient to fill up the core with vcpus from one guest and not have to split the core, firstly because you avoid the split/unsplit latency and secondly because the threads run a little faster in whole-core mode than in split-core. Thank you for the explanation. So it has more sense to have vCPUs with threads ? It seems: I did same tests with 4 vCPUs x 2 threads x 2 guests concurrently on one 8 threaded bare metal core. target_smt_mode 0 dynamic_mt_modes 0 real35m33.142s real35m44.967s user167m16.971s user163m14.320s sys 84m19.618s sys 90m38.978s target_smt_mode 0 dynamic_mt_modes 6 real26m41.993s real25m54.270s user130m31.362s user127m55.145s sys 58m17.378s sys 55m10.202s In this case, it really improves performance. Laurent -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in
Re: [PATCH 0/2] KVM: PPC: Book3S HV: Dynamic micro-threading/split-core
On 22/06/2015 02:09, Paul Mackerras wrote: On Wed, Jun 17, 2015 at 07:30:09PM +0200, Laurent Vivier wrote: Tested-by: Laurent Vivier lviv...@redhat.com Performance is better, but Paul could you explain why it is better if I disable dynamic micro-threading ? Did I miss something ? My test system is an IBM Power S822L. I run two guests with 8 vCPUs (-smp 8,sockets=8,cores=1,threads=1) both attached on the same core (with pinning option of virt-manager). Then, I measure the time needed to compile a kernel in parallel in both guests with make -j 16. My kernel without micro-threading: real37m23.424s real37m24.959s user167m31.474suser165m44.142s sys 113m26.195ssys 113m45.072s With micro-threading patches (PATCH 1+2): target_smt_mode 0 [in fact It was 8 here, but it should behave like 0, as it is max threads/sub-core] dynamic_mt_modes 6 real32m13.338s real 32m26.652s user139m21.181suser 140m20.994s sys 77m35.339s sys 78m16.599s It's better, but if I disable dynamic micro-threading (but PATCH 1+2): target_smt_mode 0 dynamic_mt_modes 0 real30m49.100s real 30m48.161s user144m22.989suser 142m53.886s sys 65m4.942s sys 66m8.159s it's even better. I think what's happening here is that with dynamic_mt_modes=0 the system alternates between the two guests, whereas with dynamic_mt_modes=6 it will spend some of the time running both guests simultaneously in two-way split mode. Since you have two compute-bound guests that each have threads=1 and 8 vcpus, it can fill up the core either way. In that case it is more efficient to fill up the core with vcpus from one guest and not have to split the core, firstly because you avoid the split/unsplit latency and secondly because the threads run a little faster in whole-core mode than in split-core. Thank you for the explanation. So it has more sense to have vCPUs with threads ? It seems: I did same tests with 4 vCPUs x 2 threads x 2 guests concurrently on one 8 threaded bare metal core. target_smt_mode 0 dynamic_mt_modes 0 real35m33.142s real35m44.967s user167m16.971s user163m14.320s sys 84m19.618s sys 90m38.978s target_smt_mode 0 dynamic_mt_modes 6 real26m41.993s real25m54.270s user130m31.362s user127m55.145s sys 58m17.378s sys 55m10.202s In this case, it really improves performance. Laurent -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH v3 10/11] KVM: arm: add a trace event for cp14 traps
There are too many cp15 traps, so we don't reuse the cp15 trace event but add a new trace event to trace the access of debug registers. Signed-off-by: Zhichao Huang zhichao.hu...@linaro.org --- arch/arm/kvm/coproc.c | 14 ++ arch/arm/kvm/trace.h | 30 ++ 2 files changed, 44 insertions(+) diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index fc0c2ef..42b720a 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -678,6 +678,13 @@ int kvm_handle_cp_64(struct kvm_vcpu *vcpu, params.Rt2 = (kvm_vcpu_get_hsr(vcpu) 10) 0xf; params.CRm = 0; + if (global == cp14_regs) + trace_kvm_emulate_cp14_imp(params.Op1, params.Rt1, params.CRn, + params.CRm, params.Op2, params.is_write); + else + trace_kvm_emulate_cp15_imp(params.Op1, params.Rt1, params.CRn, + params.CRm, params.Op2, params.is_write); + if (!emulate_cp(vcpu, params, target_specific, nr_specific)) return 1; if (!emulate_cp(vcpu, params, global, nr_global)) @@ -715,6 +722,13 @@ int kvm_handle_cp_32(struct kvm_vcpu *vcpu, params.Op2 = (kvm_vcpu_get_hsr(vcpu) 17) 0x7; params.Rt2 = 0; + if (global == cp14_regs) + trace_kvm_emulate_cp14_imp(params.Op1, params.Rt1, params.CRn, + params.CRm, params.Op2, params.is_write); + else + trace_kvm_emulate_cp15_imp(params.Op1, params.Rt1, params.CRn, + params.CRm, params.Op2, params.is_write); + if (!emulate_cp(vcpu, params, target_specific, nr_specific)) return 1; if (!emulate_cp(vcpu, params, global, nr_global)) diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index 0ec3539..988da03 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -159,6 +159,36 @@ TRACE_EVENT(kvm_emulate_cp15_imp, __entry-CRm, __entry-Op2) ); +/* Architecturally implementation defined CP14 register access */ +TRACE_EVENT(kvm_emulate_cp14_imp, + TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn, +unsigned long CRm, unsigned long Op2, bool is_write), + TP_ARGS(Op1, Rt1, CRn, CRm, Op2, is_write), + + TP_STRUCT__entry( + __field(unsigned int, Op1 ) + __field(unsigned int, Rt1 ) + __field(unsigned int, CRn ) + __field(unsigned int, CRm ) + __field(unsigned int, Op2 ) + __field(bool, is_write) + ), + + TP_fast_assign( + __entry-is_write = is_write; + __entry-Op1= Op1; + __entry-Rt1= Rt1; + __entry-CRn= CRn; + __entry-CRm= CRm; + __entry-Op2= Op2; + ), + + TP_printk(Implementation defined CP14: %s\tp14, %u, r%u, c%u, c%u, %u, + (__entry-is_write) ? mcr : mrc, + __entry-Op1, __entry-Rt1, __entry-CRn, + __entry-CRm, __entry-Op2) +); + TRACE_EVENT(kvm_wfx, TP_PROTO(unsigned long vcpu_pc, bool is_wfe), TP_ARGS(vcpu_pc, is_wfe), -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH v3 11/11] KVM: arm: enable trapping of all debug registers
Enable trapping of the debug registers, allowing guests to use the debug infrastructure. Signed-off-by: Zhichao Huang zhichao.hu...@linaro.org --- arch/arm/kvm/interrupts_head.S | 15 +-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index ed406be..107bda4 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -886,10 +886,21 @@ ARM_BE8(rev r6, r6 ) .endm /* Configures the HDCR (Hyp Debug Configuration Register) on entry/return - * (hardware reset value is 0) */ + * (hardware reset value is 0) + * + * Clobbers r2-r4 + */ .macro set_hdcr operation mrc p15, 4, r2, c1, c1, 1 - ldr r3, =(HDCR_TPM|HDCR_TPMCR) + ldr r3, =(HDCR_TPM|HDCR_TPMCR|HDCR_TDRA|HDCR_TDOSA) + + // Check for KVM_ARM_DEBUG_DIRTY, and set debug to trap + // if not dirty. + ldr r4, [vcpu, #VCPU_DEBUG_FLAGS] + cmp r4, #KVM_ARM_DEBUG_DIRTY + beq 1f + orr r3, r3, #HDCR_TDA +1: .if \operation == vmentry orr r2, r2, r3 @ Trap some perfmon accesses .else -- 1.7.12.4 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH 1/1] KVM: s390: clear floating interrupt bitmap and parameters
From: Jens Freimann jf...@linux.vnet.ibm.com commit 6d3da24141 (KVM: s390: deliver floating interrupts in order of priority) introduced a regression for the reset handling. We don't clear the bitmap of pending floating interrupts and interrupt parameters. This could result in stale interrupts even after a reset. Let's fix this by clearing the pending bitmap and the parameters for service and machine check interrupts. Cc: sta...@vger.kernel.org # 4.1 Signed-off-by: Jens Freimann jf...@linux.vnet.ibm.com Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com --- arch/s390/kvm/interrupt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 322ef9c..480b975 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1621,6 +1621,9 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) int i; spin_lock(fi-lock); + fi-pending_irqs = 0; + memset(fi-srv_signal, 0, sizeof(fi-srv_signal)); + memset(fi-mchk, 0, sizeof(fi-mchk)); for (i = 0; i FIRQ_LIST_COUNT; i++) clear_irq_list(fi-lists[i]); for (i = 0; i FIRQ_MAX_COUNT; i++) -- 2.3.0 -- To unsubscribe from this list: send the line unsubscribe kvm in
[PATCH 0/1] KVM: s390: One small fix for post 4.1
Paolo, here is a small fixup for KVM on s390. It is also necessary for 4.1 which I am a bit late for - so cc stable. No pull request as it is only one patch. Jens Freimann (1): KVM: s390: clear floating interrupt bitmap and parameters arch/s390/kvm/interrupt.c | 3 +++ 1 file changed, 3 insertions(+) -- 2.3.0 -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [RFC 0/6] KVM: arm/arm64: gsi routing support
Hi Pavel, On 06/19/2015 08:37 AM, Pavel Fedin wrote: Hello! The series therefore allows and mandates the usage of KVM_SET_GSI_ROUTING ioctl along with KVM_IRQFD. If the userspace does not define any routing table, no irqfd injection can happen. The user-space can use KVM_CAP_IRQ_ROUTING to detect whether a routing table is needed. Yesterday, half-sleeping in the train back home, i've got a simple idea how to resolve conflicts with existing static GSI-SPI routing without bringing in any more inconsistencies. So far, in current implementation GSI is an SPI index (let alone KVM_IRQ_LINE, because it's already another story on ARM). In order to maintain this convention we could simply implement default routing which sets all GSIs to corresponding SPI pins. So, if the userland never cares about KVM_SET_GSI_ROUTING, everything works as before. But it will be possible to re-route GSIs to MSI. It will perfectly work because SPI signaling is used with GICv2m, and MSI with GICv3(+), which cannot be used at the same time. I agree with you and I suggested the same approach in my cover letter. Since applying GSI routing to KVM_IRQ_LINE is quite problematic, I would be also in favour to forbid userspace GSI routing setting and implement it kernel-side. Userspace would only be allowed to define MSI routing entries. I will respin accordingly and validate it further with qemu. Best Regards Eric Kind regards, Pavel Fedin Expert Engineer Samsung Electronics Research center Russia -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [RFC 0/6] KVM: arm/arm64: gsi routing support
Hi Eric, I briefly looked over the series, the patches itself look good overall. I have one or two comments on the actual code, but want to discuss the general approach first (more a dump of some first thoughts): On 18/06/15 18:40, Eric Auger wrote: With the advent of GICv3 ITS in-kernel emulation, KVM GSI routing appears to be requested. More specifically MSI routing is needed. irqchip routing does not sound to be really useful on arm but usage of MSI routing also mandates to integrate irqchip routing. The initial implementation of irqfd on arm must be upgraded with the integration of kvm irqchip.c code and the implementation of its standard hooks in the architecture specific part. The series therefore allows and mandates the usage of KVM_SET_GSI_ROUTING ioctl along with KVM_IRQFD. If the userspace does not define any routing table, no irqfd injection can happen. The user-space can use KVM_CAP_IRQ_ROUTING to detect whether a routing table is needed. for irqchip routing, the convention is, only SPI can be injected and the SPI ID corresponds to irqchip.pin + 32. For MSI routing the interrupt ID matches the MSI msg data. API evolve to support associating a device ID to a routine entry. So if I get this right, in a guest ITS case we have now three different IRQ name spaces: a) the LPI number, which is guest internal. The ITS driver in the guest maintains it. We can track assignments and changes when handling the MAPVI command in the host kernel, but this would stay in the kernel, as I don't see an efficient way of propagating this to userland. b) the GSI number, which is used in communication between userland and the host kernel. The guest kernel does not know about this at all. Also the ioctl requires us to set the routing for _all_ GSIs, and I read it that it assumes starting at GSI 0. So we cannot even pretend to have LPIs here, because we would need at least 8192 empty entries then, not to speak of the possibly sparse allocation above. So we have a completely distinct name space here. c) The DevID:EvID pair, which actually identifies an IRQ in all the three regimes and is the only authoritative ID. So that means we need to maintain the connection between all the three, somehow duplicating the whole ITS mapping again to map GSIs to DevID:EvID. So I wonder if we could use DevID:EvID directly. The KVM_IRQFD ioctl struct has some space, so we could put the DevID into the pad area. Also (more forward-looking) KVM_CAP_ASSIGN_DEV_IRQ identifies guest IRQs by an u32, but again there is quite some padding area available. In general I am a bit reluctant to introduce just another level of complexity to the already quite convoluted way of doing IRQs and MSIs on ARM(64), that's why I will investigate if we can use DevID:EvID to refer to an interrupt. So far, Andre. Known Issues of this RFC: - One of the biggest is the API inconsistencies on ARM. Blame me. Routing should apply to KVM_IRQ_LINE ioctl which is not the case yet in this series. It only applies to irqfd. on x86 typically this KVM_IRQ_LINE is plugged onto irqchip.c kvm_set_irq whereas on ARM we inject directly through kvm_vgic_inject_irq x on arm/arm64 gsi has a specific structure: bits: | 31 ... 24 | 23 ... 16 | 15...0 | field: | irq_type | vcpu_index | irq_id | where irq_id matches the Interrupt ID - for KVM_IRQFD without routing (current implementation) the gsi field corresponds to an SPI index = irq_id (above) -32. - as far as understand qemu integration, gsi is supposed to be within [0, KVM_MAX_IRQ_ROUTES]. Difficult to use KVM_IRQ_LINE gsi. - to be defined what we choose as a convention with irqchip routing is applied: gsi - irqchip input pin. - Or shouldn't we simply rule out any userspace irqchip routing and stick to MSI routing? we could define a fixed identity in-kernel irqchip mapping and only offer MSI routing. - static allocation of chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; arbitrary put KVM_IRQCHIP_NUM_PINS = 1020 - 32 (SPI count). On s390 this is even bigger. Currently tested on irqchip routing only (Calxeda midway only), ie NOT TESTED on MSI routing yet. This is a very preliminary RFC to ease the discussion. Code can be found at https://git.linaro.org/people/eric.auger/linux.git/shortlog/refs/heads/v4.1-rc8-gsi-routing-rfc It applies on Andre's [PATCH 00/13] arm64: KVM: GICv3 ITS emulation (http://www.spinics.net/lists/kvm/msg117402.html) Eric Auger (6): KVM: api: add kvm_irq_routing_extended_msi KVM: kvm_host: add kvm_extended_msi KVM: irqchip: convey devid to kvm_set_msi KVM: arm/arm64: enable irqchip routing KVM: arm/arm64: enable MSI routing KVM: arm: implement kvm_set_msi by gsi direct mapping Documentation/virtual/kvm/api.txt | 20 ++-- arch/arm/include/asm/kvm_host.h | 2 + arch/arm/kvm/Kconfig | 3 ++ arch/arm/kvm/Makefile | 2 +-
Re: [PATCH] arm64/kvm: Add generic v8 KVM target
On 17 June 2015 at 10:00, Suzuki K. Poulose suzuki.poul...@arm.com wrote: From: Suzuki K. Poulose suzuki.poul...@arm.com This patch adds a generic ARM v8 KVM target cpu type for use by the new CPUs which eventualy ends up using the common sys_reg table. For backward compatibility the existing targets have been preserved. Any new target CPU that can be covered by generic v8 sys_reg tables should make use of the new generic target. How do you intend this to work for cross-host migration? Is the idea that the kernel guarantees that generic looks 100% the same to the guest regardless of host hardware? I'm not sure that can be made to work, given impdef differences in ID register values, bp/wp registers, and so on. Given that, it seems to me that we still need to provide KVM_ARM_TARGET_$THISCPU defines so userspace can request a specific guest CPU flavour; so what does this patch provide that isn't already provided by just having userspace query for the preferred CPU type as it does already? thanks -- PMM -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [RFC 0/6] KVM: arm/arm64: gsi routing support
On 06/22/2015 10:40 AM, Andre Przywara wrote: Hi Eric, I briefly looked over the series, the patches itself look good overall. I have one or two comments on the actual code, but want to discuss the general approach first (more a dump of some first thoughts): On 18/06/15 18:40, Eric Auger wrote: With the advent of GICv3 ITS in-kernel emulation, KVM GSI routing appears to be requested. More specifically MSI routing is needed. irqchip routing does not sound to be really useful on arm but usage of MSI routing also mandates to integrate irqchip routing. The initial implementation of irqfd on arm must be upgraded with the integration of kvm irqchip.c code and the implementation of its standard hooks in the architecture specific part. The series therefore allows and mandates the usage of KVM_SET_GSI_ROUTING ioctl along with KVM_IRQFD. If the userspace does not define any routing table, no irqfd injection can happen. The user-space can use KVM_CAP_IRQ_ROUTING to detect whether a routing table is needed. for irqchip routing, the convention is, only SPI can be injected and the SPI ID corresponds to irqchip.pin + 32. For MSI routing the interrupt ID matches the MSI msg data. API evolve to support associating a device ID to a routine entry. So if I get this right, in a guest ITS case we have now three different IRQ name spaces: a) the LPI number, which is guest internal. The ITS driver in the guest maintains it. We can track assignments and changes when handling the MAPVI command in the host kernel, but this would stay in the kernel, as I don't see an efficient way of propagating this to userland. b) the GSI number, which is used in communication between userland and the host kernel. The guest kernel does not know about this at all. Also the ioctl requires us to set the routing for _all_ GSIs, and I read it that it assumes starting at GSI 0. all injected GSI must effectively have a routing entry in KVM. Starting at 0 that's not requested. At qemu level there's just the constaint gsi fits between [0, max route number]. So we cannot even pretend to have LPIs here, because we would need at least 8192 empty entries then, not to speak of the possibly sparse allocation above. So we have a completely distinct name space here. What is done currently at qemu level for other archs - if I understand it correctly - is there is static GSI routing for standard IRQ. For MSI irqfd setup they use spare gsi number not yet used for GSI routing max route number. So this is sparse for MSI but not for standard IRQs. Effectively we do not plan to have GSI routing for LPIs but only MSI routing. c) The DevID:EvID pair, which actually identifies an IRQ in all the three regimes and is the only authoritative ID. So that means we need to maintain the connection between all the three, somehow duplicating the whole ITS mapping again to map GSIs to DevID:EvID. Currently the KVM routing table indeed stores GSI/DevID:EvID mapping. So I wonder if we could use DevID:EvID directly. The KVM_IRQFD ioctl struct has some space, so we could put the DevID into the pad area. Also (more forward-looking) KVM_CAP_ASSIGN_DEV_IRQ identifies guest IRQs by an u32, but again there is quite some padding area available. ASSIGN_DEV_IRQ is a deprecated feature. We should not use that API I think. Eric In general I am a bit reluctant to introduce just another level of complexity to the already quite convoluted way of doing IRQs and MSIs on ARM(64), that's why I will investigate if we can use DevID:EvID to refer to an interrupt. So far, Andre. Known Issues of this RFC: - One of the biggest is the API inconsistencies on ARM. Blame me. Routing should apply to KVM_IRQ_LINE ioctl which is not the case yet in this series. It only applies to irqfd. on x86 typically this KVM_IRQ_LINE is plugged onto irqchip.c kvm_set_irq whereas on ARM we inject directly through kvm_vgic_inject_irq x on arm/arm64 gsi has a specific structure: bits: | 31 ... 24 | 23 ... 16 | 15...0 | field: | irq_type | vcpu_index | irq_id | where irq_id matches the Interrupt ID - for KVM_IRQFD without routing (current implementation) the gsi field corresponds to an SPI index = irq_id (above) -32. - as far as understand qemu integration, gsi is supposed to be within [0, KVM_MAX_IRQ_ROUTES]. Difficult to use KVM_IRQ_LINE gsi. - to be defined what we choose as a convention with irqchip routing is applied: gsi - irqchip input pin. - Or shouldn't we simply rule out any userspace irqchip routing and stick to MSI routing? we could define a fixed identity in-kernel irqchip mapping and only offer MSI routing. - static allocation of chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; arbitrary put KVM_IRQCHIP_NUM_PINS = 1020 - 32 (SPI count). On s390 this is even bigger. Currently tested on irqchip routing only (Calxeda midway only), ie NOT TESTED on MSI routing yet. This is a
Re: [PATCH 11/11] qemu/kvm: mark in cpu state that hyper-v crash occured
Am 22.06.2015 um 18:36 schrieb Paolo Bonzini: On 22/06/2015 18:33, Andreas Färber wrote: On the other hand, I wonder if current_cpu is available in qemu_system_guest_panicked. If so, you could add the field to the generic CPUState struct and migrate it as a subsection of vmstate_cpu_common. Hm, not sure whether it is. It should be... Obviously depends on the call site. :) At some point in cpu-exec.c, current_cpu gets set to NULL. So the function would at least deserve a comment on when (not to) use it. Cheers, Andreas -- SUSE Linux GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany GF: Felix Imendörffer, Jane Smithard, Dilip Upmanyu, Graham Norton; HRB 21284 (AG Nürnberg) -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: Hang on reboot in FreeBSD guest on Linux KVM host
On Jun 17, 2014, at 10:48 AM, John Nielsen li...@jnielsen.net wrote: On Jun 17, 2014, at 12:05 AM, Gleb Natapov g...@kernel.org wrote: On Tue, Jun 17, 2014 at 06:21:23AM +0200, Paolo Bonzini wrote: Il 16/06/2014 18:47, John Nielsen ha scritto: On Jun 16, 2014, at 10:39 AM, Paolo Bonzini pbonz...@redhat.com wrote: Il 16/06/2014 18:09, John Nielsen ha scritto: The only substantial difference on the hardware side is the CPU. The hosts where the problem occurs use Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz, while the hosts that don't show the problem use the prior revision, Intel(R) Xeon(R) CPU E5-2650 0 @ 2.00GHz. Can you do grep . /sys/module/kvm_intel/parameters/* on both hosts please? No differences that I can see. Output below. Not really: Working host: Intel(R) Xeon(R) CPU E5-2650 0 @ 2.00GHz # grep . /sys/module/kvm_intel/parameters/* /sys/module/kvm_intel/parameters/enable_apicv:N Problem host: Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz # grep . /sys/module/kvm_intel/parameters/* /sys/module/kvm_intel/parameters/enable_apicv:Y So we have a clue. Let me study the code more, I'll try to get back with a suggestion. Wow, can't believe I missed that. Good catch! Does disabling apicv on E5-2650 v2 make reboot problem go away? Yes it does! # modprobe kvm_intel /sys/module/kvm_intel/parameters/enable_apicv:Y # /usr/bin/qemu-system-x86_64 -machine accel=kvm -name bsdtest -m 512 -smp 2,sockets=1,cores=1,threads=2 -drive file=./20140613_FreeBSD_9.2-RELEASE_ufs.qcow2,if=none,id=drive0,format=qcow2 -device virtio-blk-pci,scsi=off,drive=drive0 -vnc 0.0.0.0:0 -net none [problem occurs] # rmmod kvm_intel # modprobe kvm_intel enable_apicv=N /sys/module/kvm_intel/parameters/enable_apicv:N # /usr/bin/qemu-system-x86_64 -machine accel=kvm -name bsdtest -m 512 -smp 2,sockets=1,cores=1,threads=2 -drive file=./20140613_FreeBSD_9.2-RELEASE_ufs.qcow2,if=none,id=drive0,format=qcow2 -device virtio-blk-pci,scsi=off,drive=drive0 -vnc 0.0.0.0:0 -net none [problem does not occur] Thank you. This both narrows the problem considerably and provides an acceptable workaround. It would still be nice to see it fixed, of course. Keep me CC'ed as I'm not on the KVM list. I’m resurrecting an old thread since I haven’t heard anything in a while. Has anyone looked in to the KVM+apicv bug documented above as well as here: https://bugs.launchpad.net/qemu/+bug/1329956 ? If appropriate, where should I go to file a KVM bug (since this isn’t really Qemu’s problem)? Thanks, JN-- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH v5] i386: Introduce ARAT CPU feature
On 6/22/15 1:38 AM, Jan Kiszka wrote: On 2015-06-18 22:21, Eduardo Habkost wrote: On Sun, Jun 07, 2015 at 11:15:08AM +0200, Jan Kiszka wrote: From: Jan Kiszka jan.kis...@siemens.com ARAT signals that the APIC timer does not stop in power saving states. As our APICs are emulated, it's fine to expose this feature to guests, at least when asking for KVM host features or with CPU types that include the flag. The exact model number that introduced the feature is not known, but reports can be found that it's at least available since Sandy Bridge. Signed-off-by: Jan Kiszka jan.kis...@siemens.com The code looks good now, but: what are the real consequences of enabling/disabling the flag? What exactly guests use it for? Isn't this going to make guests have additional expectations about the APIC timer that may be broken when live-migrating or pausing the VM? ARAT only refers to stopping of the timer in certain power states (which we do not even emulate IIRC). In that case, the OS is under risk of sleeping forever, thus need to look for a different wakeup source. HPET will always be the default broadcast event device I think. Regards, Wanpeng Li Live-migration or VM pausing are external effects on all timers of the guest, not only the APIC. However, none of them cause a wakeup miss - provided the host decides to resume the guest eventually. Jan -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH v2 11/15] KVM: MTRR: sort variable MTRRs
On 06/22/2015 07:24 PM, Paolo Bonzini wrote: On 17/06/2015 18:11, Paolo Bonzini wrote: Also, this loop looks weird. Is this what you wanted? list_for_each_entry(tmp, mtrr_state-head, node) if (cur-base = tmp-base) break; list_add_tail(cur-node, tmp-node); If so, can you look at kvm/queue and see if it is okay for you (so that we can get the series in 4.2)? Ping? If I don't get testing results before Wednesday, I'll drop this series from the 4.2 pull request. Paolo, sorry for the delay. Your changes are good to me. Thanks! -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: [PATCH v5] i386: Introduce ARAT CPU feature
On 2015-06-23 04:50, Wanpeng Li wrote: On 6/22/15 1:38 AM, Jan Kiszka wrote: On 2015-06-18 22:21, Eduardo Habkost wrote: On Sun, Jun 07, 2015 at 11:15:08AM +0200, Jan Kiszka wrote: From: Jan Kiszka jan.kis...@siemens.com ARAT signals that the APIC timer does not stop in power saving states. As our APICs are emulated, it's fine to expose this feature to guests, at least when asking for KVM host features or with CPU types that include the flag. The exact model number that introduced the feature is not known, but reports can be found that it's at least available since Sandy Bridge. Signed-off-by: Jan Kiszka jan.kis...@siemens.com The code looks good now, but: what are the real consequences of enabling/disabling the flag? What exactly guests use it for? Isn't this going to make guests have additional expectations about the APIC timer that may be broken when live-migrating or pausing the VM? ARAT only refers to stopping of the timer in certain power states (which we do not even emulate IIRC). In that case, the OS is under risk of sleeping forever, thus need to look for a different wakeup source. HPET will always be the default broadcast event device I think. But it's unused (under Linux) if per-cpu clockevents are unaffected by CLOCK_EVT_FEAT_C3STOP (x86-only none-feature), i.e. have ARAT set. And other guests may have other strategies to deal with missing ARAT. Again, the scenario for me was not a regular setup but some Jailhouse boot of Linux where neither a HPET nor a PIT are available as broadcast sources and Linux therefore refuses to switch to hires mode - in contrast to running on real hardware. Jan signature.asc Description: OpenPGP digital signature
Re: [PATCH 1/2] arm64: qcom: Add define for ARMv8 implementer (MIDR)
Catalin Marinas wrote: So if the second patch is no longer needed, what's using this patch? I would defer merging it until actually required in some part of the kernel. Fair enough. -- Sent by an employee of the Qualcomm Innovation Center, Inc. The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, hosted by The Linux Foundation. -- To unsubscribe from this list: send the line unsubscribe kvm in
Re: Nested EPT Write Protection
On 22/06/2015 15:28, Hu Yaohui wrote: */2504 pseudo_gfn = base_addr PAGE_SHIFT; 2505 sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, 2506 iterator.level - 1, 2507 1, ACC_ALL, iterator.sptep);/* 2508 if (!sp) { 2509 pgprintk(nonpaging_map: ENOMEM\n); 2510 kvm_release_pfn_clean(pfn); 2511 return -ENOMEM; 2512 } . /code it will get a pseudo_gfn to allocate a kvm_mmu_page. What if a pseudo_gfn itself causes a tdp_page_fault? Will it make the corresponding EPT page table entry marked as readonly also? If tdp_page_fault is used (meaning non-nested KVM: nested KVM uses ept_page_fault instead), sp-unsync is always true: /* in kvm_mmu_get_page - __direct_map passes direct == true */ if (!direct) { if (rmap_write_protect(vcpu, gfn)) kvm_flush_remote_tlbs(vcpu-kvm); if (level PT_PAGE_TABLE_LEVEL need_sync) kvm_sync_pages(vcpu, gfn); account_shadowed(vcpu-kvm, sp); } so mmu_need_write_protect always returns false. Note that higher in kvm_mmu_get_page there is another conditional: if (!need_sync sp-unsync) need_sync = true; but it only applies to the !direct case. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in