Hi,

I have been trying to increase fork performance of openbsd/amd64 on KVM. 
It turns out that when I increase the number of CPUs of a VM from 1 to 3, 
a fork+exit micro benchmark is slowed down by a factor of 7.

The main reason for this seems to be a very large number of cross-CPU TLB 
flushes (about 4 per fork+exit). Each IPI causes several VM exits which 
are expensive. To reduce this, I have been trying to use paravirtualized 
interfaces provided by KVM and optimize some other things. These changes 
are mostly activated by a new pseudo device paravirt (which has the 
advantage that one can use UKC to tweak things without recompiling). 
However, some changes will remain if not running on a hypervisor (or 
paravirt is disabled). For example, x86_ipi() will use a pointer to 
dispatch to the appropriate implementation.

Is this the way to go forward? Or would you rather prefer a compile time 
option and maybe ship a bsd.mp.paravirt kernel in addition to bsd+bsd.mp?


The attached patch speeds up the fork+exit micro benchmark by a factor of 
3 on a 3 CPU system. And the time to build a kernel with -j4 on a 4 CPU 
system is also reduced by about 20%:

current:
real    1m50.089s
user    4m46.240s
sys     1m29.510s

current+paravirt:
real    1m29.313s
user    4m54.720s
sys     0m45.100s



BTW, why does amd64 use the APTE mapping/unmapping dance in pmap despite 
the memory being available in the direct map area all the time?


Cheers,
Stefan


diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c
index 88725f7..248ebb8 100644
--- a/sys/arch/amd64/amd64/cpu.c
+++ b/sys/arch/amd64/amd64/cpu.c
@@ -83,6 +83,7 @@
 #include <machine/pmap.h>
 #include <machine/vmparam.h>
 #include <machine/mpbiosvar.h>
+#include <machine/paravirtvar.h>
 #include <machine/pcb.h>
 #include <machine/specialreg.h>
 #include <machine/segments.h>
@@ -569,6 +570,19 @@ cpu_init(struct cpu_info *ci)
        ci->ci_flags |= CPUF_RUNNING;
        tlbflushg();
 #endif
+#if NPARAVIRT > 0
+       if (kvm_pv_eoi_enabled) {
+               paddr_t pa;
+               ci->ci_kvm_pv_eoi = 0;
+               if (pmap_extract(pmap_kernel(), (vaddr_t)&ci->ci_kvm_pv_eoi, 
&pa) &&
+                   ((uint64_t)pa & 0x3) == 0) {
+                       wrmsr(MSR_KVM_EOI_EN, (1 | (uint64_t)pa) );
+               } else {
+                       printf("could not get phys addr for MSR_KVM_EOI_EN, 
disabling pv_eoi\n");
+                       kvm_pv_eoi_enabled = 0;
+               }
+       }
+#endif
 }
 
 
diff --git a/sys/arch/amd64/amd64/genassym.cf b/sys/arch/amd64/amd64/genassym.cf
index e13a477..ab20329 100644
--- a/sys/arch/amd64/amd64/genassym.cf
+++ b/sys/arch/amd64/amd64/genassym.cf
@@ -114,6 +114,9 @@ member      CPU_INFO_MUTEX_LEVEL    ci_mutex_level
 endif
 member CPU_INFO_GDT            ci_gdt
 member CPU_INFO_TSS            ci_tss
+if NPARAVIRT > 0
+member CPU_INFO_KVM_PV_EOI     ci_kvm_pv_eoi
+endif
 
 struct intrsource
 member is_recurse
diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
index d09e3fc..857af4b 100644
--- a/sys/arch/amd64/amd64/lapic.c
+++ b/sys/arch/amd64/amd64/lapic.c
@@ -45,6 +45,7 @@
 #include <machine/pmap.h>
 #include <machine/vmparam.h>
 #include <machine/mpbiosvar.h>
+#include <machine/paravirtvar.h>
 #include <machine/pcb.h>
 #include <machine/specialreg.h>
 #include <machine/segments.h>
@@ -235,20 +236,42 @@ lapic_boot_init(paddr_t lapic_base)
        lapic_map(lapic_base);
 
 #ifdef MULTIPROCESSOR
-       idt_allocmap[LAPIC_IPI_VECTOR] = 1;
-       idt_vec_set(LAPIC_IPI_VECTOR, Xintr_lapic_ipi);
-       idt_allocmap[LAPIC_IPI_INVLTLB] = 1;
-       idt_vec_set(LAPIC_IPI_INVLTLB, Xipi_invltlb);
-       idt_allocmap[LAPIC_IPI_INVLPG] = 1;
-       idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg);
-       idt_allocmap[LAPIC_IPI_INVLRANGE] = 1;
-       idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange);
+#if NPARAVIRT > 0
+       if (kvm_pv_eoi_enabled) {
+               idt_allocmap[LAPIC_IPI_VECTOR] = 1;
+               idt_vec_set(LAPIC_IPI_VECTOR, Xintr_lapic_ipi_kvm_pv_eoi);
+               idt_allocmap[LAPIC_IPI_INVLTLB] = 1;
+               idt_vec_set(LAPIC_IPI_INVLTLB, Xipi_invltlb_kvm_pv_eoi);
+               idt_allocmap[LAPIC_IPI_INVLPG] = 1;
+               idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg_kvm_pv_eoi);
+               idt_allocmap[LAPIC_IPI_INVLRANGE] = 1;
+               idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange_kvm_pv_eoi);
+       }
+       else
+#endif
+       {
+               idt_allocmap[LAPIC_IPI_VECTOR] = 1;
+               idt_vec_set(LAPIC_IPI_VECTOR, Xintr_lapic_ipi);
+               idt_allocmap[LAPIC_IPI_INVLTLB] = 1;
+               idt_vec_set(LAPIC_IPI_INVLTLB, Xipi_invltlb);
+               idt_allocmap[LAPIC_IPI_INVLPG] = 1;
+               idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg);
+               idt_allocmap[LAPIC_IPI_INVLRANGE] = 1;
+               idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange);
+       }
 #endif
        idt_allocmap[LAPIC_SPURIOUS_VECTOR] = 1;
        idt_vec_set(LAPIC_SPURIOUS_VECTOR, Xintrspurious);
 
        idt_allocmap[LAPIC_TIMER_VECTOR] = 1;
+#if NPARAVIRT > 0
+       if (kvm_pv_eoi_enabled)
+               idt_vec_set(LAPIC_TIMER_VECTOR, Xintr_lapic_ltimer_kvm_pv_eoi);
+       else
+               idt_vec_set(LAPIC_TIMER_VECTOR, Xintr_lapic_ltimer);
+#else
        idt_vec_set(LAPIC_TIMER_VECTOR, Xintr_lapic_ltimer);
+#endif
 
        evcount_attach(&clk_count, "clock", &clk_irq);
 #ifdef MULTIPROCESSOR
@@ -502,8 +525,12 @@ x86_ipi_init(int target)
        return (i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) ? EBUSY : 0;
 }
 
+#if NPARAVIRT > 0
 int
+default_x86_ipi(int vec, int target, int dl)
+#else
 x86_ipi(int vec, int target, int dl)
+#endif
 {
        int result, s;
 
@@ -519,12 +546,50 @@ x86_ipi(int vec, int target, int dl)
 
        i82489_icr_wait();
 
+#ifndef DIAGNOSTIC
        result = (i82489_readreg(LAPIC_ICRLO) & LAPIC_DLSTAT_BUSY) ? EBUSY : 0;
+#else
+       result = 0;
+#endif
 
        splx(s);
 
        return result;
 }
+
+#if NPARAVIRT > 0
+int
+kvm_x86_ipi(int vec, int target, int dl)
+{
+       uint64_t data = target << LAPIC_ID_SHIFT;
+       data <<= 32;
+       data |= (target & LAPIC_DEST_MASK) | vec | dl | LAPIC_LVL_ASSERT;
+
+       /*
+        * Under KVM with in-kernel lapic, i82489_icr_wait() is not necessary.
+        * Omitting it saves several vm exits.
+        * XXX Check what happens without in-kernel lapic
+        * XXX Check what happens on AMD
+        */
+
+       /*
+        * Using the MSR causes only one vm exit as opposed to two exits when
+        * writing the two halfs of the ICR register.
+        *
+        * Also, MSRs are cheaper than MMIO writes on CPUs lacking the APIC
+        * virtualization feature.
+        *
+        * XXX detect MSR support
+        */
+
+       wrmsr(MSR_HV_X64_ICR, data);
+
+       return 0;
+}
+
+int (*x86_ipi_func)(int, int, int) = default_x86_ipi;
+#endif /* NPARAVIRT */
+
 #endif /* MULTIPROCESSOR */
 
 
diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c
index 77c1a64..d79abda 100644
--- a/sys/arch/amd64/amd64/machdep.c
+++ b/sys/arch/amd64/amd64/machdep.c
@@ -101,6 +101,7 @@
 
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
+#include <machine/paravirtvar.h>
 #include <machine/pio.h>
 #include <machine/psl.h>
 #include <machine/reg.h>
@@ -779,7 +780,8 @@ boot(int howto)
        }
        if_downall();
 
-       delay(4*1000000);       /* XXX */
+       if (!running_on_hypervisor())
+               delay(4*1000000);       /* XXX */
 
        uvm_shutdown();
        splhigh();              /* Disable interrupts. */
@@ -803,7 +805,8 @@ haltsys:
                extern int acpi_enabled;
 
                if (acpi_enabled) {
-                       delay(500000);
+                       if (!running_on_hypervisor())
+                               delay(500000);
                        if (howto & RB_POWERDOWN)
                                acpi_powerdown();
                }
@@ -817,7 +820,7 @@ haltsys:
        }
 
        printf("rebooting...\n");
-       if (cpureset_delay > 0)
+       if (cpureset_delay > 0 && !running_on_hypervisor())
                delay(cpureset_delay * 1000);
        cpu_reset();
        for(;;) ;
diff --git a/sys/arch/amd64/amd64/mainbus.c b/sys/arch/amd64/amd64/mainbus.c
index 2742ca0..7a4def3 100644
--- a/sys/arch/amd64/amd64/mainbus.c
+++ b/sys/arch/amd64/amd64/mainbus.c
@@ -49,6 +49,7 @@
 #include "bios.h"
 #include "mpbios.h"
 #include "vmt.h"
+#include "paravirt.h"
 
 #include <machine/cpuvar.h>
 #include <machine/i82093var.h>
@@ -151,6 +152,13 @@ mainbus_attach(struct device *parent, struct device *self, 
void *aux)
 
        printf("\n");
 
+#if NPARAVIRT > 0
+       {
+               mba.mba_bios.ba_name = "paravirt";
+               config_found(self, &mba.mba_bios, mainbus_print);
+       }
+#endif
+
 #if NBIOS > 0
        {
                mba.mba_bios.ba_name = "bios";
diff --git a/sys/arch/amd64/amd64/paravirt.c b/sys/arch/amd64/amd64/paravirt.c
new file mode 100644
index 0000000..0ce3ee9
--- /dev/null
+++ b/sys/arch/amd64/amd64/paravirt.c
@@ -0,0 +1,94 @@
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <machine/paravirtvar.h>
+#include <machine/cpu.h>
+#include <machine/specialreg.h>
+#include <machine/biosvar.h>
+#include <machine/cpuvar.h>
+
+char           hypervisor_signature[13];
+uint32_t       kvm_features;
+uint32_t       kvm_cpuid_base = 0;
+uint32_t       hyperv_cpuid_base = 0;
+int            kvm_pv_eoi_enabled = 0;
+
+
+struct paravirt_softc {
+       struct  device sc_dev;
+};
+
+int paravirt_probe(struct device *, void *, void *);
+void paravirt_attach(struct device *, struct device *, void *);
+int paravirt_guess(void);
+
+struct cfattach paravirt_ca = {
+       sizeof(struct paravirt_softc),
+       paravirt_probe,
+       paravirt_attach,
+       NULL,
+       NULL
+};
+
+struct cfdriver paravirt_cd = {
+       NULL, "paravirt", DV_DULL
+};
+
+int
+paravirt_probe(struct device *parent, void *match, void *aux)
+{
+       struct bios_attach_args *bia = aux;
+       if (paravirt_cd.cd_ndevs || strcmp(bia->ba_name, paravirt_cd.cd_name))
+               return 0;
+       return 1;
+}
+
+int
+kvm_has_feature(int feature)
+{
+       return (kvm_features & (1UL << feature));
+}
+
+int
+running_on_hypervisor()
+{
+       return (kvm_cpuid_base != 0 || hyperv_cpuid_base != 0);
+}
+
+void
+paravirt_attach(struct device *parent, struct device *self, void *aux)
+{
+       // struct paravirt_softc *sc = (struct paravirt_softc *)self;
+       // uint32_t flags = sc->sc_dev.dv_cfdata->cf_flags;
+       uint32_t regs[4];
+       uint32_t base;
+       // struct cpu_info *ci = curcpu();
+
+       for (base = CPUID_HYPERVISOR_SIGNATURE_START;
+           base < CPUID_HYPERVISOR_SIGNATURE_END;
+           base += CPUID_HYPERVISOR_SIGNATURE_STEP) {
+               // XXX CPUID_LEAF()???
+               CPUID(base, regs[0], regs[1], regs[2], regs[3]);
+               if (memcmp(&regs[1], "KVMKVMKVM\0\0\0", 12) == 0) {
+                       kvm_cpuid_base = base;
+                       printf(" KVM");
+                       CPUID(base + CPUID_OFFSET_KVM_FEATURES, regs[0], 
regs[1], regs[2], regs[3]);
+                       kvm_features = regs[0];
+               } else if (memcmp(&regs[1], "Microsoft Hv", 12) == 0) {
+                       hyperv_cpuid_base = base;
+                       printf(" Hyper-V");
+               }
+       }
+
+       if (kvm_cpuid_base != 0) {
+               printf(" KVM:Optimized-IPI");
+               x86_ipi_func = kvm_x86_ipi;
+
+               if (kvm_has_feature(KVM_FEATURE_PV_EOI)) {
+                       printf(" KVM:PV_EOI");
+                       kvm_pv_eoi_enabled = 1;
+               }
+
+       }
+       printf("\n");
+
+}
diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index 19fa93e..3f4ac1b 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -77,6 +77,7 @@
 #include <machine/trap.h>
 #include <machine/intr.h>
 #include <machine/psl.h>
+#include <machine/paravirtvar.h>
 
 #include "ioapic.h"
 #include "lapic.h"
@@ -319,6 +320,18 @@ IDTVEC(recurse_lapic_ipi)
        pushq   $T_ASTFLT
        INTRENTRY               
        jmp     1f
+IDTVEC(intr_lapic_ipi_kvm_pv_eoi)
+       pushq   $0
+       pushq   $T_ASTFLT
+       INTRENTRY
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+       jc      3f
+       movl    $0,_C_LABEL(local_apic)+LAPIC_EOI
+3:
+       movl    CPUVAR(ILEVEL),%ebx
+       cmpl    $IPL_IPI,%ebx
+       jae     2f
+       jmp     1f
 IDTVEC(intr_lapic_ipi)
        pushq   $0              
        pushq   $T_ASTFLT
@@ -344,7 +357,7 @@ IDTVEC(resume_lapic_ipi)
 IDTVEC(ipi_invltlb)
        pushq   %rax
 
-       ioapic_asm_ack()
+       ioapic_asm_ack_no_swapgs()
 
        movq    %cr3, %rax
        movq    %rax, %cr3
@@ -358,7 +371,7 @@ IDTVEC(ipi_invltlb)
 IDTVEC(ipi_invlpg)
        pushq   %rax
 
-       ioapic_asm_ack()
+       ioapic_asm_ack_no_swapgs()
 
        movq    tlb_shoot_addr1, %rax
        invlpg  (%rax)
@@ -373,7 +386,7 @@ IDTVEC(ipi_invlrange)
        pushq   %rax
        pushq   %rdx
 
-       ioapic_asm_ack()
+       ioapic_asm_ack_no_swapgs()
 
        movq    tlb_shoot_addr1, %rax
        movq    tlb_shoot_addr2, %rdx
@@ -389,6 +402,85 @@ IDTVEC(ipi_invlrange)
        popq    %rax
        iretq
 
+IDTVEC(ipi_invltlb_kvm_pv_eoi)
+        testq   $SEL_UPL,8(%rsp)
+        je      1f
+       swapgs
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+       swapgs
+       jmp 2f
+1:
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+2:
+       jc      3f
+       movl    $0,(_C_LABEL(local_apic)+LAPIC_EOI)(%rip)
+3:
+       pushq   %rax
+
+       movq    %cr3, %rax
+       movq    %rax, %cr3
+
+       lock
+       decq    tlb_shoot_wait
+
+       popq    %rax
+       iretq
+
+IDTVEC(ipi_invlpg_kvm_pv_eoi)
+        testq   $SEL_UPL,8(%rsp)
+        je      1f
+       swapgs
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+       swapgs
+       jmp 2f
+1:
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+2:
+       jc      3f
+       movl    $0,(_C_LABEL(local_apic)+LAPIC_EOI)(%rip)
+3:
+       pushq   %rax
+
+       movq    tlb_shoot_addr1, %rax
+       invlpg  (%rax)
+
+       lock
+       decq    tlb_shoot_wait
+
+       popq    %rax
+       iretq
+
+IDTVEC(ipi_invlrange_kvm_pv_eoi)
+        testq   $SEL_UPL,8(%rsp)
+        je      1f
+       swapgs
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+       swapgs
+       jmp 2f
+1:
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+2:
+       jc      3f
+       movl    $0,(_C_LABEL(local_apic)+LAPIC_EOI)(%rip)
+3:
+       pushq   %rax
+       pushq   %rdx
+
+       movq    tlb_shoot_addr1, %rax
+       movq    tlb_shoot_addr2, %rdx
+4:     invlpg  (%rax)
+       addq    $PAGE_SIZE, %rax
+       cmpq    %rdx, %rax
+       jb      4b
+
+       lock
+       decq    tlb_shoot_wait
+
+       popq    %rdx
+       popq    %rax
+       iretq
+
+
 #endif /* MULTIPROCESSOR */
        
        /*
@@ -400,6 +492,18 @@ IDTVEC(recurse_lapic_ltimer)
        pushq   $T_ASTFLT
        INTRENTRY               
        jmp     1f
+IDTVEC(intr_lapic_ltimer_kvm_pv_eoi)
+       pushq   $0
+       pushq   $T_ASTFLT
+       INTRENTRY
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+       jc      3f
+       movl    $0,_C_LABEL(local_apic)+LAPIC_EOI
+3:
+       movl    CPUVAR(ILEVEL),%ebx
+       cmpl    $IPL_CLOCK,%ebx
+       jae     2f
+       jmp     1f
 IDTVEC(intr_lapic_ltimer)
        pushq   $0              
        pushq   $T_ASTFLT
diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC
index 07cad3b..ba0ccba 100644
--- a/sys/arch/amd64/conf/GENERIC
+++ b/sys/arch/amd64/conf/GENERIC
@@ -38,6 +38,8 @@ isa0  at amdpcib?
 isa0   at tcpcib?
 pci*   at mainbus0
 
+paravirt0 at mainbus0
+
 acpi0          at bios0
 acpitimer*     at acpi?
 acpihpet*      at acpi?
diff --git a/sys/arch/amd64/conf/files.amd64 b/sys/arch/amd64/conf/files.amd64
index f283f4c..d352aec 100644
--- a/sys/arch/amd64/conf/files.amd64
+++ b/sys/arch/amd64/conf/files.amd64
@@ -80,6 +80,10 @@ device       mainbus: isabus, pcibus, mainbus
 attach mainbus at root
 file   arch/amd64/amd64/mainbus.c              mainbus
 
+device paravirt
+attach paravirt at mainbus
+file   arch/amd64/amd64/paravirt.c             paravirt needs-flag
+
 device bios {}
 attach bios at mainbus
 file   arch/amd64/amd64/bios.c                 bios needs-flag
diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h
index 4495ed1..6b2cb15 100644
--- a/sys/arch/amd64/include/cpu.h
+++ b/sys/arch/amd64/include/cpu.h
@@ -46,6 +46,7 @@
 #include <machine/segments.h>
 #include <machine/cacheinfo.h>
 #include <machine/intrdefs.h>
+#include "paravirt.h"
 
 #ifdef MULTIPROCESSOR
 #include <machine/i82489reg.h>
@@ -144,6 +145,9 @@ struct cpu_info {
 
        struct ksensordev       ci_sensordev;
        struct ksensor          ci_sensor;
+#if NPARAVIRT > 0
+       u_int32_t               ci_kvm_pv_eoi;
+#endif
 #ifdef GPROF
        struct gmonparam        *ci_gmon;
 #endif
diff --git a/sys/arch/amd64/include/cpuvar.h b/sys/arch/amd64/include/cpuvar.h
index 8a75da5..5f19e27 100644
--- a/sys/arch/amd64/include/cpuvar.h
+++ b/sys/arch/amd64/include/cpuvar.h
@@ -64,6 +64,8 @@
  * SUCH DAMAGE.
  */
 
+#include "paravirt.h"
+
 struct cpu_functions {
        int (*start)(struct cpu_info *);
        int (*stop)(struct cpu_info *);
@@ -87,7 +89,17 @@ struct cpu_attach_args {
 
 #ifdef _KERNEL
 
+#if NPARAVIRT > 0
+extern int (*x86_ipi_func)(int,int,int);
+int kvm_x86_ipi(int vec, int target, int dl);
+int default_x86_ipi(int vec, int target, int dl);
+static inline int x86_ipi(int vec, int target, int dl) {
+       return x86_ipi_func(vec, target, dl);
+}
+#else
 int x86_ipi(int,int,int);
+#endif
+
 void x86_self_ipi(int);
 int x86_ipi_init(int);
 
diff --git a/sys/arch/amd64/include/i82093reg.h 
b/sys/arch/amd64/include/i82093reg.h
index e4ab947..9275b2e 100644
--- a/sys/arch/amd64/include/i82093reg.h
+++ b/sys/arch/amd64/include/i82093reg.h
@@ -112,9 +112,26 @@
 
 #ifdef _KERNEL
 
-#define ioapic_asm_ack(num) \
+#include <machine/paravirtvar.h>
+#define ioapic_asm_ack_no_swapgs(num) \
        movl    $0,(_C_LABEL(local_apic)+LAPIC_EOI)(%rip)
 
+#if NPARAVIRT > 0
+/*
+ * This is only usable if swapgs has already been called (e.d. by INTRENTRY).
+ */
+#define ioapic_asm_ack(num)                                    \
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)              ;\
+       jc      78f                                             ;\
+       ioapic_asm_ack_no_swapgs(num)                           ;\
+78:
+
+#else
+
+#define ioapic_asm_ack(num)             ioapic_asm_ack_no_swapgs(num)
+
+#endif
+
 #ifdef MULTIPROCESSOR
 
 #ifdef notyet
diff --git a/sys/arch/amd64/include/i82489var.h 
b/sys/arch/amd64/include/i82489var.h
index dd50af5..f926c43 100644
--- a/sys/arch/amd64/include/i82489var.h
+++ b/sys/arch/amd64/include/i82489var.h
@@ -33,6 +33,8 @@
 #ifndef _MACHINE_I82489VAR_H_
 #define _MACHINE_I82489VAR_H_
 
+#include "paravirt.h"
+
 /*
  * Software definitions belonging to Local APIC driver.
  */
@@ -73,6 +75,9 @@ extern void Xintrspurious(void);
  * Vector used for inter-processor interrupts.
  */
 extern void Xintr_lapic_ipi(void);
+#if NPARAVIRT > 0
+extern void Xintr_lapic_ipi_kvm_pv_eoi(void);
+#endif
 extern void Xrecurse_lapic_ipi(void);
 extern void Xresume_lapic_ipi(void);
 #define LAPIC_IPI_VECTOR                       0xe0
@@ -88,12 +93,20 @@ extern void Xresume_lapic_ipi(void);
 extern void Xipi_invltlb(void);
 extern void Xipi_invlpg(void);
 extern void Xipi_invlrange(void);
+#if NPARAVIRT > 0
+extern void Xipi_invltlb_kvm_pv_eoi(void);
+extern void Xipi_invlpg_kvm_pv_eoi(void);
+extern void Xipi_invlrange_kvm_pv_eoi(void);
+#endif
 
 /*
  * Vector used for local apic timer interrupts.
  */
 
 extern void Xintr_lapic_ltimer(void);
+#if NPARAVIRT > 0
+extern void Xintr_lapic_ltimer_kvm_pv_eoi(void);
+#endif
 extern void Xresume_lapic_ltimer(void);
 extern void Xrecurse_lapic_ltimer(void);
 #define LAPIC_TIMER_VECTOR             0xc0
diff --git a/sys/arch/amd64/include/paravirtvar.h 
b/sys/arch/amd64/include/paravirtvar.h
new file mode 100644
index 0000000..ab37e3b
--- /dev/null
+++ b/sys/arch/amd64/include/paravirtvar.h
@@ -0,0 +1,43 @@
+#ifndef _MACHINE_PARAVIRT_H_
+#define _MACHINE_PARAVIRT_H_
+
+#include "paravirt.h"
+
+#define        CPUID_HYPERVISOR_SIGNATURE_START        0x40000000
+#define        CPUID_HYPERVISOR_SIGNATURE_END          0x40010000
+#define        CPUID_HYPERVISOR_SIGNATURE_STEP         0x100
+
+#define        CPUID_OFFSET_KVM_FEATURES               0x1
+
+#define        KVM_FEATURE_CLOCKSOURCE                 0       /* deprecated */
+#define        KVM_FEATURE_NOP_IO_DELAY                1
+#define        KVM_FEATURE_MMU_OP                      2       /* deprecated */
+#define        KVM_FEATURE_CLOCKSOURCE2                3
+#define        KVM_FEATURE_ASYNC_PF                    4
+#define        KVM_FEATURE_STEAL_TIME                  5
+#define        KVM_FEATURE_PV_EOI                      6
+#define        KVM_FEATURE_PV_UNHALT                   7
+#define        KVM_FEATURE_CLOCKSOURCE_STABLE_BIT      24
+
+#define        MSR_KVM_EOI_EN                          0x4b564d04
+
+#define        MSR_HV_X64_EOI                          0x40000070
+#define        MSR_HV_X64_ICR                          0x40000071
+#define        MSR_HV_X64_TPR                          0x40000072
+
+#define KVM_PV_EOI_BIT                         0
+
+#ifndef _LOCORE
+
+extern int kvm_pv_eoi_enabled;
+int kvm_has_feature(int feature);
+
+#if NPARAVIRT > 0
+int running_on_hypervisor(void);
+#else
+#define running_on_hypervisor()                0
+#endif /* NPARAVIRT */
+
+#endif /* !_LOCORE */
+
+#endif /* _MACHINE_PARAVIRT_H_ */

Reply via email to