Hello,
I was trying to enable the use of nmi watchdog within a linux guest running in
kvm. I have done it
by allowing direct access to perfmon msrs using the MSR_BITMAP field in vmcs
region.
Most of the times the NMI Watchdog Test in the guest fails, but with a finite
number of NMI's
received by the guest. But randomly it does work! Whenever it fails, i get
this vmwrite error :
vmwrite error: reg 4016 value 80000202 (err 164061)
I have a few questions.
1. How are NMI's supposed to be delivered to the guest ? I did this by adding a
new op to
kvm_x86_ops.
2. How am I supposed to handle perfmon MSRs ? Direct access may pose problems
during migration. But
am not sure how costly emulation by abstraction would be..
I have not yet considered saving the MSRS upon vmexits to allow multiple VMs
use the MSRs. I think i
can do them easily when i get this working.
Here's the code. Please tell me what dumb mistake I am doing.
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c
b/arch/x86/kernel/cpu/perfctr-watchdog.c
index c02541e..276048a 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -342,7 +342,7 @@ static const struct wd_ops k7_wd_ops = {
#define P6_EVNTSEL_INT (1 << 20)
#define P6_EVNTSEL_OS (1 << 17)
#define P6_EVNTSEL_USR (1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
+#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x3C
#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
static int setup_p6_watchdog(unsigned nmi_hz)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2cbee94..73e9361 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -25,6 +25,8 @@
#include <linux/hrtimer.h>
#include <linux/io.h>
#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/notifier.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/page.h>
@@ -740,9 +742,12 @@ static void apic_mmio_write(struct kvm_io_device *this,
apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
break;
+ case APIC_LVTPC:
+ /* Enable PC NMI*/
+ if (val == APIC_DM_NMI)
+ apic_write(APIC_LVTPC,val);
case APIC_LVTT:
case APIC_LVTTHMR:
- case APIC_LVTPC:
case APIC_LVT0:
case APIC_LVT1:
case APIC_LVTERR:
@@ -790,6 +795,18 @@ static int apic_mmio_range(struct kvm_io_device *this,
gpa_t addr)
return ret;
}
+static int nmi_notify(struct notifier_block *self,unsigned long val, void
*data) {
+
+ struct kvm *kvm;
+ kvm = list_entry(vm_list.next, struct kvm, vm_list);
+ kvm_x86_ops->inject_nmi(kvm->vcpus[0]);
+ return NOTIFY_STOP;
+}
+
+static struct notifier_block nmi_notifier = {
+ .notifier_call = nmi_notify,
+};
+
void kvm_free_lapic(struct kvm_vcpu *vcpu)
{
if (!vcpu->arch.apic)
@@ -801,6 +818,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
__free_page(vcpu->arch.apic->regs_page);
kfree(vcpu->arch.apic);
+ unregister_die_notifier(&nmi_notifier);
}
/*
@@ -1005,6 +1023,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
apic->dev.in_range = apic_mmio_range;
apic->dev.private = apic;
+ register_die_notifier(&nmi_notifier);
return 0;
nomem_free_apic:
kfree(apic);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 00a00e4..fcffab1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -89,6 +89,7 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
static struct page *vmx_io_bitmap_a;
static struct page *vmx_io_bitmap_b;
+static struct page *vmx_msr_bitmap;
static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -982,7 +983,7 @@ static __init int setup_vmcs_config(struct vmcs_config
*vmcs_conf)
u32 _vmexit_control = 0;
u32 _vmentry_control = 0;
- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+ min = PIN_BASED_EXT_INTR_MASK;
opt = 0;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0)
@@ -994,8 +995,10 @@ static __init int setup_vmcs_config(struct vmcs_config
*vmcs_conf)
CPU_BASED_CR8_STORE_EXITING |
#endif
CPU_BASED_USE_IO_BITMAPS |
+ CPU_BASED_USE_MSR_BITMAPS |
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_USE_TSC_OFFSETING;
+
opt = CPU_BASED_TPR_SHADOW |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
@@ -1568,6 +1571,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a));
vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b));
+ /* MSR BITMAP */
+ vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap));
+
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
/* Control */
@@ -1786,6 +1792,14 @@ out:
return ret;
}
+static void vmx_inject_nmi(struct kvm_vcpu *vcpu) {
+
+ struct vcpu_vmx * vmx = to_vmx(vcpu);
+ if (vmx->launched)
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ 2 | INTR_TYPE_NMI | INTR_INFO_VALID_MASK);
+}
+
static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2686,6 +2700,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.exception_injected = vmx_exception_injected,
.inject_pending_irq = vmx_intr_assist,
.inject_pending_vectors = do_interrupt_requests,
+ .inject_nmi = vmx_inject_nmi,
.set_tss_addr = vmx_set_tss_addr,
};
@@ -2700,7 +2715,11 @@ static int __init vmx_init(void)
return -ENOMEM;
vmx_io_bitmap_b = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
- if (!vmx_io_bitmap_b) {
+ if (!vmx_io_bitmap_b)
+ r = -ENOMEM;
+
+ vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+ if (!vmx_msr_bitmap) {
r = -ENOMEM;
goto out;
}
@@ -2718,6 +2737,15 @@ static int __init vmx_init(void)
memset(iova, 0xff, PAGE_SIZE);
kunmap(vmx_io_bitmap_b);
+ iova = kmap(vmx_msr_bitmap);
+ memset(iova, 0xff, PAGE_SIZE);
+ /* Enable direct access to first perfmon MSR */
+ clear_bit(MSR_P6_PERFCTR0, iova);
+ clear_bit(MSR_P6_EVNTSEL0, iova);
+ clear_bit(MSR_P6_PERFCTR0, iova + 2048);
+ clear_bit(MSR_P6_EVNTSEL0, iova + 2048);
+ kunmap(vmx_msr_bitmap);
+
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);
@@ -2730,8 +2758,9 @@ static int __init vmx_init(void)
return 0;
out1:
- __free_page(vmx_io_bitmap_b);
+ __free_page(vmx_msr_bitmap);
out:
+ __free_page(vmx_io_bitmap_b);
__free_page(vmx_io_bitmap_a);
return r;
}
@@ -2740,6 +2769,7 @@ static void __exit vmx_exit(void)
{
__free_page(vmx_io_bitmap_b);
__free_page(vmx_io_bitmap_a);
+ __free_page(vmx_msr_bitmap);
kvm_exit();
}
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 436ce0f..1b6d6a8 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -242,6 +242,7 @@ enum vmcs_field {
#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK
#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
+#define INTR_TYPE_NMI (2 << 8)
#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */
#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 67ae307..f17248d 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -387,6 +387,7 @@ struct kvm_x86_ops {
void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code);
bool (*exception_injected)(struct kvm_vcpu *vcpu);
+ void (*inject_nmi)(struct kvm_vcpu *vcpu);
void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
struct kvm_run *run);
---
thanks,
balaji rao
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel