Hi,
I have been trying to increase fork performance of openbsd/amd64 on KVM.
It turns out that when I increase the number of CPUs of a VM from 1 to 3,
a fork+exit micro benchmark is slowed down by a factor of 7.
The main reason for this seems to be a very large number of cross-CPU TLB
flushes (about 4 per fork+exit). Each IPI causes several VM exits which
are expensive. To reduce this, I have been trying to use paravirtualized
interfaces provided by KVM and optimize some other things. These changes
are mostly activated by a new pseudo device paravirt (which has the
advantage that one can use UKC to tweak things without recompiling).
However, some changes will remain if not running on a hypervisor (or
paravirt is disabled). For example, x86_ipi() will use a pointer to
dispatch to the appropriate implementation.
Is this the way to go forward? Or would you rather prefer a compile time
option and maybe ship a bsd.mp.paravirt kernel in addition to bsd+bsd.mp?
The attached patch speeds up the fork+exit micro benchmark by a factor of
3 on a 3 CPU system. And the time to build a kernel with -j4 on a 4 CPU
system is also reduced by about 20%:
current:
real 1m50.089s
user 4m46.240s
sys 1m29.510s
current+paravirt:
real 1m29.313s
user 4m54.720s
sys 0m45.100s
BTW, why does amd64 use the APTE mapping/unmapping dance in pmap despite
the memory being available in the direct map area all the time?
Cheers,
Stefan
diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c
index 88725f7..248ebb8 100644
--- a/sys/arch/amd64/amd64/cpu.c
+++ b/sys/arch/amd64/amd64/cpu.c
@@ -83,6 +83,7 @@
#include <machine/pmap.h>
#include <machine/vmparam.h>
#include <machine/mpbiosvar.h>
+#include <machine/paravirtvar.h>
#include <machine/pcb.h>
#include <machine/specialreg.h>
#include <machine/segments.h>
@@ -569,6 +570,19 @@ cpu_init(struct cpu_info *ci)
ci->ci_flags |= CPUF_RUNNING;
tlbflushg();
#endif
+#if NPARAVIRT > 0
+ if (kvm_pv_eoi_enabled) {
+ paddr_t pa;
+ ci->ci_kvm_pv_eoi = 0;
+ if (pmap_extract(pmap_kernel(), (vaddr_t)&ci->ci_kvm_pv_eoi,
&pa) &&
+ ((uint64_t)pa & 0x3) == 0) {
+ wrmsr(MSR_KVM_EOI_EN, (1 | (uint64_t)pa) );
+ } else {
+ printf("could not get phys addr for MSR_KVM_EOI_EN,
disabling pv_eoi\n");
+ kvm_pv_eoi_enabled = 0;
+ }
+ }
+#endif
}
diff --git a/sys/arch/amd64/amd64/genassym.cf b/sys/arch/amd64/amd64/genassym.cf
index e13a477..ab20329 100644
--- a/sys/arch/amd64/amd64/genassym.cf
+++ b/sys/arch/amd64/amd64/genassym.cf
@@ -114,6 +114,9 @@ member CPU_INFO_MUTEX_LEVEL ci_mutex_level
endif
member CPU_INFO_GDT ci_gdt
member CPU_INFO_TSS ci_tss
+if NPARAVIRT > 0
+member CPU_INFO_KVM_PV_EOI ci_kvm_pv_eoi
+endif
struct intrsource
member is_recurse
diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
index d09e3fc..857af4b 100644
--- a/sys/arch/amd64/amd64/lapic.c
+++ b/sys/arch/amd64/amd64/lapic.c
@@ -45,6 +45,7 @@
#include <machine/pmap.h>
#include <machine/vmparam.h>
#include <machine/mpbiosvar.h>
+#include <machine/paravirtvar.h>
#include <machine/pcb.h>
#include <machine/specialreg.h>
#include <machine/segments.h>
@@ -235,20 +236,42 @@ lapic_boot_init(paddr_t lapic_base)
lapic_map(lapic_base);
#ifdef MULTIPROCESSOR
- idt_allocmap[LAPIC_IPI_VECTOR] = 1;
- idt_vec_set(LAPIC_IPI_VECTOR, Xintr_lapic_ipi);
- idt_allocmap[LAPIC_IPI_INVLTLB] = 1;
- idt_vec_set(LAPIC_IPI_INVLTLB, Xipi_invltlb);
- idt_allocmap[LAPIC_IPI_INVLPG] = 1;
- idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg);
- idt_allocmap[LAPIC_IPI_INVLRANGE] = 1;
- idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange);
+#if NPARAVIRT > 0
+ if (kvm_pv_eoi_enabled) {
+ idt_allocmap[LAPIC_IPI_VECTOR] = 1;
+ idt_vec_set(LAPIC_IPI_VECTOR, Xintr_lapic_ipi_kvm_pv_eoi);
+ idt_allocmap[LAPIC_IPI_INVLTLB] = 1;
+ idt_vec_set(LAPIC_IPI_INVLTLB, Xipi_invltlb_kvm_pv_eoi);
+ idt_allocmap[LAPIC_IPI_INVLPG] = 1;
+ idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg_kvm_pv_eoi);
+ idt_allocmap[LAPIC_IPI_INVLRANGE] = 1;
+ idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange_kvm_pv_eoi);
+ }
+ else
+#endif
+ {
+ idt_allocmap[LAPIC_IPI_VECTOR] = 1;
+ idt_vec_set(LAPIC_IPI_VECTOR, Xintr_lapic_ipi);
+ idt_allocmap[LAPIC_IPI_INVLTLB] = 1;
+ idt_vec_set(LAPIC_IPI_INVLTLB, Xipi_invltlb);
+ idt_allocmap[LAPIC_IPI_INVLPG] = 1;
+ idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg);
+ idt_allocmap[LAPIC_IPI_INVLRANGE] = 1;
+ idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange);
+ }
#endif
idt_allocmap[LAPIC_SPURIOUS_VECTOR] = 1;
idt_vec_set(LAPIC_SPURIOUS_VECTOR, Xintrspurious);
idt_allocmap[LAPIC_TIMER_VECTOR] = 1;
+#if NPARAVIRT > 0
+ if (kvm_pv_eoi_enabled)
+ idt_vec_set(LAPIC_TIMER_VECTOR, Xintr_lapic_ltimer_kvm_pv_eoi);
+ else
+ idt_vec_set(LAPIC_TIMER_VECTOR, Xintr_lapic_ltimer);
+#else
idt_vec_set(LAPIC_TIMER_VECTOR, Xintr_lapic_ltimer);
+#endif
evcount_attach(&clk_count, "clock", &clk_irq);
#ifdef MULTIPROCESSOR
@@ -502,8 +525,12 @@ x86_ipi_init(int target)
return (i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) ? EBUSY : 0;
}
+#if NPARAVIRT > 0
int
+default_x86_ipi(int vec, int target, int dl)
+#else
x86_ipi(int vec, int target, int dl)
+#endif
{
int result, s;
@@ -519,12 +546,50 @@ x86_ipi(int vec, int target, int dl)
i82489_icr_wait();
+#ifndef DIAGNOSTIC
result = (i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) ? EBUSY : 0;
+#else
+ result = 0;
+#endif
splx(s);
return result;
}
+
+#if NPARAVIRT > 0
+int
+kvm_x86_ipi(int vec, int target, int dl)
+{
+ uint64_t data = target << LAPIC_ID_SHIFT;
+ data <<= 32;
+ data |= (target & LAPIC_DEST_MASK) | vec | dl | LAPIC_LVL_ASSERT;
+
+ /*
+ * Under KVM with in-kernel lapic, i82489_icr_wait() is not necessary.
+ * Omitting it saves several vm exits.
+ * XXX Check what happens without in-kernel lapic
+ * XXX Check what happens on AMD
+ */
+
+ /*
+ * Using the MSR causes only one vm exit as opposed to two exits when
+ * writing the two halfs of the ICR register.
+ *
+ * Also, MSRs are cheaper than MMIO writes on CPUs lacking the APIC
+ * virtualization feature.
+ *
+ * XXX detect MSR support
+ */
+
+ wrmsr(MSR_HV_X64_ICR, data);
+
+ return 0;
+}
+
+int (*x86_ipi_func)(int, int, int) = default_x86_ipi;
+#endif /* NPARAVIRT */
+
#endif /* MULTIPROCESSOR */
diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c
index 77c1a64..d79abda 100644
--- a/sys/arch/amd64/amd64/machdep.c
+++ b/sys/arch/amd64/amd64/machdep.c
@@ -101,6 +101,7 @@
#include <machine/cpu.h>
#include <machine/cpufunc.h>
+#include <machine/paravirtvar.h>
#include <machine/pio.h>
#include <machine/psl.h>
#include <machine/reg.h>
@@ -779,7 +780,8 @@ boot(int howto)
}
if_downall();
- delay(4*1000000); /* XXX */
+ if (!running_on_hypervisor())
+ delay(4*1000000); /* XXX */
uvm_shutdown();
splhigh(); /* Disable interrupts. */
@@ -803,7 +805,8 @@ haltsys:
extern int acpi_enabled;
if (acpi_enabled) {
- delay(500000);
+ if (!running_on_hypervisor())
+ delay(500000);
if (howto & RB_POWERDOWN)
acpi_powerdown();
}
@@ -817,7 +820,7 @@ haltsys:
}
printf("rebooting...\n");
- if (cpureset_delay > 0)
+ if (cpureset_delay > 0 && !running_on_hypervisor())
delay(cpureset_delay * 1000);
cpu_reset();
for(;;) ;
diff --git a/sys/arch/amd64/amd64/mainbus.c b/sys/arch/amd64/amd64/mainbus.c
index 2742ca0..7a4def3 100644
--- a/sys/arch/amd64/amd64/mainbus.c
+++ b/sys/arch/amd64/amd64/mainbus.c
@@ -49,6 +49,7 @@
#include "bios.h"
#include "mpbios.h"
#include "vmt.h"
+#include "paravirt.h"
#include <machine/cpuvar.h>
#include <machine/i82093var.h>
@@ -151,6 +152,13 @@ mainbus_attach(struct device *parent, struct device *self,
void *aux)
printf("\n");
+#if NPARAVIRT > 0
+ {
+ mba.mba_bios.ba_name = "paravirt";
+ config_found(self, &mba.mba_bios, mainbus_print);
+ }
+#endif
+
#if NBIOS > 0
{
mba.mba_bios.ba_name = "bios";
diff --git a/sys/arch/amd64/amd64/paravirt.c b/sys/arch/amd64/amd64/paravirt.c
new file mode 100644
index 0000000..0ce3ee9
--- /dev/null
+++ b/sys/arch/amd64/amd64/paravirt.c
@@ -0,0 +1,94 @@
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <machine/paravirtvar.h>
+#include <machine/cpu.h>
+#include <machine/specialreg.h>
+#include <machine/biosvar.h>
+#include <machine/cpuvar.h>
+
+char hypervisor_signature[13];
+uint32_t kvm_features;
+uint32_t kvm_cpuid_base = 0;
+uint32_t hyperv_cpuid_base = 0;
+int kvm_pv_eoi_enabled = 0;
+
+
+struct paravirt_softc {
+ struct device sc_dev;
+};
+
+int paravirt_probe(struct device *, void *, void *);
+void paravirt_attach(struct device *, struct device *, void *);
+int paravirt_guess(void);
+
+struct cfattach paravirt_ca = {
+ sizeof(struct paravirt_softc),
+ paravirt_probe,
+ paravirt_attach,
+ NULL,
+ NULL
+};
+
+struct cfdriver paravirt_cd = {
+ NULL, "paravirt", DV_DULL
+};
+
+int
+paravirt_probe(struct device *parent, void *match, void *aux)
+{
+ struct bios_attach_args *bia = aux;
+ if (paravirt_cd.cd_ndevs || strcmp(bia->ba_name, paravirt_cd.cd_name))
+ return 0;
+ return 1;
+}
+
+int
+kvm_has_feature(int feature)
+{
+ return (kvm_features & (1UL << feature));
+}
+
+int
+running_on_hypervisor()
+{
+ return (kvm_cpuid_base != 0 || hyperv_cpuid_base != 0);
+}
+
+void
+paravirt_attach(struct device *parent, struct device *self, void *aux)
+{
+ // struct paravirt_softc *sc = (struct paravirt_softc *)self;
+ // uint32_t flags = sc->sc_dev.dv_cfdata->cf_flags;
+ uint32_t regs[4];
+ uint32_t base;
+ // struct cpu_info *ci = curcpu();
+
+ for (base = CPUID_HYPERVISOR_SIGNATURE_START;
+ base < CPUID_HYPERVISOR_SIGNATURE_END;
+ base += CPUID_HYPERVISOR_SIGNATURE_STEP) {
+ // XXX CPUID_LEAF()???
+ CPUID(base, regs[0], regs[1], regs[2], regs[3]);
+ if (memcmp(®s[1], "KVMKVMKVM\0\0\0", 12) == 0) {
+ kvm_cpuid_base = base;
+ printf(" KVM");
+ CPUID(base + CPUID_OFFSET_KVM_FEATURES, regs[0],
regs[1], regs[2], regs[3]);
+ kvm_features = regs[0];
+ } else if (memcmp(®s[1], "Microsoft Hv", 12) == 0) {
+ hyperv_cpuid_base = base;
+ printf(" Hyper-V");
+ }
+ }
+
+ if (kvm_cpuid_base != 0) {
+ printf(" KVM:Optimized-IPI");
+ x86_ipi_func = kvm_x86_ipi;
+
+ if (kvm_has_feature(KVM_FEATURE_PV_EOI)) {
+ printf(" KVM:PV_EOI");
+ kvm_pv_eoi_enabled = 1;
+ }
+
+ }
+ printf("\n");
+
+}
diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index 19fa93e..3f4ac1b 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -77,6 +77,7 @@
#include <machine/trap.h>
#include <machine/intr.h>
#include <machine/psl.h>
+#include <machine/paravirtvar.h>
#include "ioapic.h"
#include "lapic.h"
@@ -319,6 +320,18 @@ IDTVEC(recurse_lapic_ipi)
pushq $T_ASTFLT
INTRENTRY
jmp 1f
+IDTVEC(intr_lapic_ipi_kvm_pv_eoi)
+ pushq $0
+ pushq $T_ASTFLT
+ INTRENTRY
+ btr $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+ jc 3f
+ movl $0,_C_LABEL(local_apic)+LAPIC_EOI
+3:
+ movl CPUVAR(ILEVEL),%ebx
+ cmpl $IPL_IPI,%ebx
+ jae 2f
+ jmp 1f
IDTVEC(intr_lapic_ipi)
pushq $0
pushq $T_ASTFLT
@@ -344,7 +357,7 @@ IDTVEC(resume_lapic_ipi)
IDTVEC(ipi_invltlb)
pushq %rax
- ioapic_asm_ack()
+ ioapic_asm_ack_no_swapgs()
movq %cr3, %rax
movq %rax, %cr3
@@ -358,7 +371,7 @@ IDTVEC(ipi_invltlb)
IDTVEC(ipi_invlpg)
pushq %rax
- ioapic_asm_ack()
+ ioapic_asm_ack_no_swapgs()
movq tlb_shoot_addr1, %rax
invlpg (%rax)
@@ -373,7 +386,7 @@ IDTVEC(ipi_invlrange)
pushq %rax
pushq %rdx
- ioapic_asm_ack()
+ ioapic_asm_ack_no_swapgs()
movq tlb_shoot_addr1, %rax
movq tlb_shoot_addr2, %rdx
@@ -389,6 +402,85 @@ IDTVEC(ipi_invlrange)
popq %rax
iretq
+IDTVEC(ipi_invltlb_kvm_pv_eoi)
+ testq $SEL_UPL,8(%rsp)
+ je 1f
+ swapgs
+ btr $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+ swapgs
+ jmp 2f
+1:
+ btr $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+2:
+ jc 3f
+ movl $0,(_C_LABEL(local_apic)+LAPIC_EOI)(%rip)
+3:
+ pushq %rax
+
+ movq %cr3, %rax
+ movq %rax, %cr3
+
+ lock
+ decq tlb_shoot_wait
+
+ popq %rax
+ iretq
+
+IDTVEC(ipi_invlpg_kvm_pv_eoi)
+ testq $SEL_UPL,8(%rsp)
+ je 1f
+ swapgs
+ btr $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+ swapgs
+ jmp 2f
+1:
+ btr $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+2:
+ jc 3f
+ movl $0,(_C_LABEL(local_apic)+LAPIC_EOI)(%rip)
+3:
+ pushq %rax
+
+ movq tlb_shoot_addr1, %rax
+ invlpg (%rax)
+
+ lock
+ decq tlb_shoot_wait
+
+ popq %rax
+ iretq
+
+IDTVEC(ipi_invlrange_kvm_pv_eoi)
+ testq $SEL_UPL,8(%rsp)
+ je 1f
+ swapgs
+ btr $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+ swapgs
+ jmp 2f
+1:
+ btr $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+2:
+ jc 3f
+ movl $0,(_C_LABEL(local_apic)+LAPIC_EOI)(%rip)
+3:
+ pushq %rax
+ pushq %rdx
+
+ movq tlb_shoot_addr1, %rax
+ movq tlb_shoot_addr2, %rdx
+4: invlpg (%rax)
+ addq $PAGE_SIZE, %rax
+ cmpq %rdx, %rax
+ jb 4b
+
+ lock
+ decq tlb_shoot_wait
+
+ popq %rdx
+ popq %rax
+ iretq
+
+
#endif /* MULTIPROCESSOR */
/*
@@ -400,6 +492,18 @@ IDTVEC(recurse_lapic_ltimer)
pushq $T_ASTFLT
INTRENTRY
jmp 1f
+IDTVEC(intr_lapic_ltimer_kvm_pv_eoi)
+ pushq $0
+ pushq $T_ASTFLT
+ INTRENTRY
+ btr $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+ jc 3f
+ movl $0,_C_LABEL(local_apic)+LAPIC_EOI
+3:
+ movl CPUVAR(ILEVEL),%ebx
+ cmpl $IPL_CLOCK,%ebx
+ jae 2f
+ jmp 1f
IDTVEC(intr_lapic_ltimer)
pushq $0
pushq $T_ASTFLT
diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC
index 07cad3b..ba0ccba 100644
--- a/sys/arch/amd64/conf/GENERIC
+++ b/sys/arch/amd64/conf/GENERIC
@@ -38,6 +38,8 @@ isa0 at amdpcib?
isa0 at tcpcib?
pci* at mainbus0
+paravirt0 at mainbus0
+
acpi0 at bios0
acpitimer* at acpi?
acpihpet* at acpi?
diff --git a/sys/arch/amd64/conf/files.amd64 b/sys/arch/amd64/conf/files.amd64
index f283f4c..d352aec 100644
--- a/sys/arch/amd64/conf/files.amd64
+++ b/sys/arch/amd64/conf/files.amd64
@@ -80,6 +80,10 @@ device mainbus: isabus, pcibus, mainbus
attach mainbus at root
file arch/amd64/amd64/mainbus.c mainbus
+device paravirt
+attach paravirt at mainbus
+file arch/amd64/amd64/paravirt.c paravirt needs-flag
+
device bios {}
attach bios at mainbus
file arch/amd64/amd64/bios.c bios needs-flag
diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h
index 4495ed1..6b2cb15 100644
--- a/sys/arch/amd64/include/cpu.h
+++ b/sys/arch/amd64/include/cpu.h
@@ -46,6 +46,7 @@
#include <machine/segments.h>
#include <machine/cacheinfo.h>
#include <machine/intrdefs.h>
+#include "paravirt.h"
#ifdef MULTIPROCESSOR
#include <machine/i82489reg.h>
@@ -144,6 +145,9 @@ struct cpu_info {
struct ksensordev ci_sensordev;
struct ksensor ci_sensor;
+#if NPARAVIRT > 0
+ u_int32_t ci_kvm_pv_eoi;
+#endif
#ifdef GPROF
struct gmonparam *ci_gmon;
#endif
diff --git a/sys/arch/amd64/include/cpuvar.h b/sys/arch/amd64/include/cpuvar.h
index 8a75da5..5f19e27 100644
--- a/sys/arch/amd64/include/cpuvar.h
+++ b/sys/arch/amd64/include/cpuvar.h
@@ -64,6 +64,8 @@
* SUCH DAMAGE.
*/
+#include "paravirt.h"
+
struct cpu_functions {
int (*start)(struct cpu_info *);
int (*stop)(struct cpu_info *);
@@ -87,7 +89,17 @@ struct cpu_attach_args {
#ifdef _KERNEL
+#if NPARAVIRT > 0
+extern int (*x86_ipi_func)(int,int,int);
+int kvm_x86_ipi(int vec, int target, int dl);
+int default_x86_ipi(int vec, int target, int dl);
+static inline int x86_ipi(int vec, int target, int dl) {
+ return x86_ipi_func(vec, target, dl);
+}
+#else
int x86_ipi(int,int,int);
+#endif
+
void x86_self_ipi(int);
int x86_ipi_init(int);
diff --git a/sys/arch/amd64/include/i82093reg.h
b/sys/arch/amd64/include/i82093reg.h
index e4ab947..9275b2e 100644
--- a/sys/arch/amd64/include/i82093reg.h
+++ b/sys/arch/amd64/include/i82093reg.h
@@ -112,9 +112,26 @@
#ifdef _KERNEL
-#define ioapic_asm_ack(num) \
+#include <machine/paravirtvar.h>
+#define ioapic_asm_ack_no_swapgs(num) \
movl $0,(_C_LABEL(local_apic)+LAPIC_EOI)(%rip)
+#if NPARAVIRT > 0
+/*
+ * This is only usable if swapgs has already been called (e.d. by INTRENTRY).
+ */
+#define ioapic_asm_ack(num) \
+ btr $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI) ;\
+ jc 78f ;\
+ ioapic_asm_ack_no_swapgs(num) ;\
+78:
+
+#else
+
+#define ioapic_asm_ack(num) ioapic_asm_ack_no_swapgs(num)
+
+#endif
+
#ifdef MULTIPROCESSOR
#ifdef notyet
diff --git a/sys/arch/amd64/include/i82489var.h
b/sys/arch/amd64/include/i82489var.h
index dd50af5..f926c43 100644
--- a/sys/arch/amd64/include/i82489var.h
+++ b/sys/arch/amd64/include/i82489var.h
@@ -33,6 +33,8 @@
#ifndef _MACHINE_I82489VAR_H_
#define _MACHINE_I82489VAR_H_
+#include "paravirt.h"
+
/*
* Software definitions belonging to Local APIC driver.
*/
@@ -73,6 +75,9 @@ extern void Xintrspurious(void);
* Vector used for inter-processor interrupts.
*/
extern void Xintr_lapic_ipi(void);
+#if NPARAVIRT > 0
+extern void Xintr_lapic_ipi_kvm_pv_eoi(void);
+#endif
extern void Xrecurse_lapic_ipi(void);
extern void Xresume_lapic_ipi(void);
#define LAPIC_IPI_VECTOR 0xe0
@@ -88,12 +93,20 @@ extern void Xresume_lapic_ipi(void);
extern void Xipi_invltlb(void);
extern void Xipi_invlpg(void);
extern void Xipi_invlrange(void);
+#if NPARAVIRT > 0
+extern void Xipi_invltlb_kvm_pv_eoi(void);
+extern void Xipi_invlpg_kvm_pv_eoi(void);
+extern void Xipi_invlrange_kvm_pv_eoi(void);
+#endif
/*
* Vector used for local apic timer interrupts.
*/
extern void Xintr_lapic_ltimer(void);
+#if NPARAVIRT > 0
+extern void Xintr_lapic_ltimer_kvm_pv_eoi(void);
+#endif
extern void Xresume_lapic_ltimer(void);
extern void Xrecurse_lapic_ltimer(void);
#define LAPIC_TIMER_VECTOR 0xc0
diff --git a/sys/arch/amd64/include/paravirtvar.h
b/sys/arch/amd64/include/paravirtvar.h
new file mode 100644
index 0000000..ab37e3b
--- /dev/null
+++ b/sys/arch/amd64/include/paravirtvar.h
@@ -0,0 +1,43 @@
+#ifndef _MACHINE_PARAVIRT_H_
+#define _MACHINE_PARAVIRT_H_
+
+#include "paravirt.h"
+
+#define CPUID_HYPERVISOR_SIGNATURE_START 0x40000000
+#define CPUID_HYPERVISOR_SIGNATURE_END 0x40010000
+#define CPUID_HYPERVISOR_SIGNATURE_STEP 0x100
+
+#define CPUID_OFFSET_KVM_FEATURES 0x1
+
+#define KVM_FEATURE_CLOCKSOURCE 0 /* deprecated */
+#define KVM_FEATURE_NOP_IO_DELAY 1
+#define KVM_FEATURE_MMU_OP 2 /* deprecated */
+#define KVM_FEATURE_CLOCKSOURCE2 3
+#define KVM_FEATURE_ASYNC_PF 4
+#define KVM_FEATURE_STEAL_TIME 5
+#define KVM_FEATURE_PV_EOI 6
+#define KVM_FEATURE_PV_UNHALT 7
+#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24
+
+#define MSR_KVM_EOI_EN 0x4b564d04
+
+#define MSR_HV_X64_EOI 0x40000070
+#define MSR_HV_X64_ICR 0x40000071
+#define MSR_HV_X64_TPR 0x40000072
+
+#define KVM_PV_EOI_BIT 0
+
+#ifndef _LOCORE
+
+extern int kvm_pv_eoi_enabled;
+int kvm_has_feature(int feature);
+
+#if NPARAVIRT > 0
+int running_on_hypervisor(void);
+#else
+#define running_on_hypervisor() 0
+#endif /* NPARAVIRT */
+
+#endif /* !_LOCORE */
+
+#endif /* _MACHINE_PARAVIRT_H_ */