Author: neel
Date: Tue Jan  7 21:04:49 2014
New Revision: 260410
URL: http://svnweb.freebsd.org/changeset/base/260410

Log:
  Use the 'Virtual Interrupt Delivery' feature of Intel VT-x if supported by
  hardware. It is possible to turn this feature off and fall back to software
  emulation of the APIC by setting the tunable hw.vmm.vmx.use_apic_vid to 0.
  
  We now start handling two new types of VM-exits:
  
  APIC-access: This is a fault-like VM-exit and is triggered when the APIC
  register access is not accelerated (e.g. apic timer CCR). In response to
  this we do emulate the instruction that triggered the APIC-access exit.
  
  APIC-write: This is a trap-like VM-exit which does not require any instruction
  emulation but it does require the hypervisor to emulate the access to the
  specified register (e.g. icrlo register).
  
  Introduce 'vlapic_ops' which are function pointers to vector the various
  vlapic operations into processor-dependent code. The 'Virtual Interrupt
  Delivery' feature installs 'ops' for setting the IRR bits in the virtual
  APIC page and to return whether any interrupts are pending for this vcpu.
  
  Tested on an "Intel Xeon E5-2620 v2" courtesy of Allan Jude at ScaleEngine.

Modified:
  head/sys/amd64/vmm/intel/vmcs.h
  head/sys/amd64/vmm/intel/vmx.c
  head/sys/amd64/vmm/intel/vmx_controls.h
  head/sys/amd64/vmm/io/vlapic.c
  head/sys/amd64/vmm/io/vlapic_priv.h
  head/sys/amd64/vmm/vmm.c

Modified: head/sys/amd64/vmm/intel/vmcs.h
==============================================================================
--- head/sys/amd64/vmm/intel/vmcs.h     Tue Jan  7 20:36:15 2014        
(r260409)
+++ head/sys/amd64/vmm/intel/vmcs.h     Tue Jan  7 21:04:49 2014        
(r260410)
@@ -107,6 +107,7 @@ vmcs_write(uint32_t encoding, uint64_t v
 #define        VMCS_GUEST_GS_SELECTOR          0x0000080A
 #define        VMCS_GUEST_LDTR_SELECTOR        0x0000080C
 #define        VMCS_GUEST_TR_SELECTOR          0x0000080E
+#define        VMCS_GUEST_INTR_STATUS          0x00000810
 
 /* 16-bit host-state fields */
 #define        VMCS_HOST_ES_SELECTOR           0x00000C00
@@ -129,6 +130,10 @@ vmcs_write(uint32_t encoding, uint64_t v
 #define        VMCS_VIRTUAL_APIC               0x00002012
 #define        VMCS_APIC_ACCESS                0x00002014
 #define        VMCS_EPTP                       0x0000201A
+#define        VMCS_EOI_EXIT0                  0x0000201C
+#define        VMCS_EOI_EXIT1                  0x0000201E
+#define        VMCS_EOI_EXIT2                  0x00002020
+#define        VMCS_EOI_EXIT3                  0x00002022
 
 /* 64-bit read-only fields */
 #define        VMCS_GUEST_PHYSICAL_ADDRESS     0x00002400
@@ -310,7 +315,7 @@ vmcs_write(uint32_t encoding, uint64_t v
 #define EXIT_REASON_PAUSE              40
 #define EXIT_REASON_MCE                        41
 #define EXIT_REASON_TPR                        43
-#define EXIT_REASON_APIC               44
+#define EXIT_REASON_APIC_ACCESS                44
 #define EXIT_REASON_GDTR_IDTR          46
 #define EXIT_REASON_LDTR_TR            47
 #define EXIT_REASON_EPT_FAULT          48
@@ -321,6 +326,7 @@ vmcs_write(uint32_t encoding, uint64_t v
 #define EXIT_REASON_INVVPID            53
 #define EXIT_REASON_WBINVD             54
 #define EXIT_REASON_XSETBV             55
+#define        EXIT_REASON_APIC_WRITE          56
 
 /*
  * VMCS interrupt information fields
@@ -360,4 +366,15 @@ vmcs_write(uint32_t encoding, uint64_t v
 #define        EPT_VIOLATION_GLA_VALID         (1UL << 7)
 #define        EPT_VIOLATION_XLAT_VALID        (1UL << 8)
 
+/*
+ * Exit qualification for APIC-access VM exit
+ */
+#define        APIC_ACCESS_OFFSET(qual)        ((qual) & 0xFFF)
+#define        APIC_ACCESS_TYPE(qual)          (((qual) >> 12) & 0xF)
+
+/*
+ * Exit qualification for APIC-write VM exit
+ */
+#define        APIC_WRITE_OFFSET(qual)         ((qual) & 0xFFF)
+
 #endif

Modified: head/sys/amd64/vmm/intel/vmx.c
==============================================================================
--- head/sys/amd64/vmm/intel/vmx.c      Tue Jan  7 20:36:15 2014        
(r260409)
+++ head/sys/amd64/vmm/intel/vmx.c      Tue Jan  7 21:04:49 2014        
(r260410)
@@ -166,12 +166,25 @@ static int cap_pause_exit;
 static int cap_unrestricted_guest;
 static int cap_monitor_trap;
 static int cap_invpcid;
- 
+
+static int virtual_interrupt_delivery;
+SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, virtual_interrupt_delivery, CTLFLAG_RD,
+    &virtual_interrupt_delivery, 0, "APICv virtual interrupt delivery 
support");
+
 static struct unrhdr *vpid_unr;
 static u_int vpid_alloc_failed;
 SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD,
            &vpid_alloc_failed, 0, NULL);
 
+/*
+ * Use the last page below 4GB as the APIC access address. This address is
+ * occupied by the boot firmware so it is guaranteed that it will not conflict
+ * with a page in system memory.
+ */
+#define        APIC_ACCESS_ADDRESS     0xFFFFF000
+
+static void vmx_inject_pir(struct vlapic *vlapic);
+
 #ifdef KTR
 static const char *
 exit_reason_to_str(int reason)
@@ -261,8 +274,8 @@ exit_reason_to_str(int reason)
                return "mce";
        case EXIT_REASON_TPR:
                return "tpr";
-       case EXIT_REASON_APIC:
-               return "apic";
+       case EXIT_REASON_APIC_ACCESS:
+               return "apic-access";
        case EXIT_REASON_GDTR_IDTR:
                return "gdtridtr";
        case EXIT_REASON_LDTR_TR:
@@ -283,6 +296,8 @@ exit_reason_to_str(int reason)
                return "wbinvd";
        case EXIT_REASON_XSETBV:
                return "xsetbv";
+       case EXIT_REASON_APIC_WRITE:
+               return "apic-write";
        default:
                snprintf(reasonbuf, sizeof(reasonbuf), "%d", reason);
                return (reasonbuf);
@@ -461,9 +476,9 @@ vmx_restore(void)
 static int
 vmx_init(void)
 {
-       int error;
+       int error, use_tpr_shadow;
        uint64_t fixed0, fixed1, feature_control;
-       uint32_t tmp;
+       uint32_t tmp, procbased2_vid_bits;
 
        /* CPUID.1:ECX[bit 5] must be 1 for processor to support VMX */
        if (!(cpu_feature2 & CPUID2_VMX)) {
@@ -597,6 +612,31 @@ vmx_init(void)
            MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0,
            &tmp) == 0);
 
+       /*
+        * Check support for virtual interrupt delivery.
+        */
+       procbased2_vid_bits = (PROCBASED2_VIRTUALIZE_APIC_ACCESSES |
+           PROCBASED2_VIRTUALIZE_X2APIC_MODE |
+           PROCBASED2_APIC_REGISTER_VIRTUALIZATION |
+           PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY);
+
+       use_tpr_shadow = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
+           MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0,
+           &tmp) == 0);
+
+       error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2,
+           procbased2_vid_bits, 0, &tmp);
+       if (error == 0 && use_tpr_shadow) {
+               virtual_interrupt_delivery = 1;
+               TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_vid",
+                   &virtual_interrupt_delivery);
+       }
+
+       if (virtual_interrupt_delivery) {
+               procbased_ctls |= PROCBASED_USE_TPR_SHADOW;
+               procbased_ctls2 |= procbased2_vid_bits;
+               procbased_ctls2 &= ~PROCBASED2_VIRTUALIZE_X2APIC_MODE;
+       }
 
        /* Initialize EPT */
        error = ept_init();
@@ -743,6 +783,13 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
 
        vpid_alloc(vpid, VM_MAXCPU);
 
+       if (virtual_interrupt_delivery) {
+               error = vm_map_mmio(vm, DEFAULT_APIC_BASE, PAGE_SIZE,
+                   APIC_ACCESS_ADDRESS);
+               /* XXX this should really return an error to the caller */
+               KASSERT(error == 0, ("vm_map_mmio(apicbase) error %d", error));
+       }
+
        for (i = 0; i < VM_MAXCPU; i++) {
                vmcs = &vmx->vmcs[i];
                vmcs->identifier = vmx_revision();
@@ -766,6 +813,15 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
                error += vmwrite(VMCS_ENTRY_CTLS, entry_ctls);
                error += vmwrite(VMCS_MSR_BITMAP, vtophys(vmx->msr_bitmap));
                error += vmwrite(VMCS_VPID, vpid[i]);
+               if (virtual_interrupt_delivery) {
+                       error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS);
+                       error += vmwrite(VMCS_VIRTUAL_APIC,
+                           vtophys(&vmx->apic_page[i]));
+                       error += vmwrite(VMCS_EOI_EXIT0, 0);
+                       error += vmwrite(VMCS_EOI_EXIT1, 0);
+                       error += vmwrite(VMCS_EOI_EXIT2, 0);
+                       error += vmwrite(VMCS_EOI_EXIT3, 0);
+               }
                VMCLEAR(vmcs);
                KASSERT(error == 0, ("vmx_vminit: error customizing the vmcs"));
 
@@ -988,6 +1044,11 @@ vmx_inject_interrupts(struct vmx *vmx, i
        if (vmx_inject_nmi(vmx, vcpu))
                return;
 
+       if (virtual_interrupt_delivery) {
+               vmx_inject_pir(vlapic);
+               return;
+       }
+
        /* Ask the local apic for a vector to inject */
        if (!vlapic_pending_intr(vlapic, &vector))
                return;
@@ -1181,10 +1242,140 @@ ept_emulation_fault(uint64_t ept_qual)
 }
 
 static int
+vmx_handle_apic_write(struct vlapic *vlapic, uint64_t qual)
+{
+       int error, handled, offset;
+       bool retu;
+
+       if (!virtual_interrupt_delivery)
+               return (UNHANDLED);
+
+       handled = 1;
+       offset = APIC_WRITE_OFFSET(qual);
+       switch (offset) {
+       case APIC_OFFSET_ID:
+               vlapic_id_write_handler(vlapic);
+               break;
+       case APIC_OFFSET_LDR:
+               vlapic_ldr_write_handler(vlapic);
+               break;
+       case APIC_OFFSET_DFR:
+               vlapic_dfr_write_handler(vlapic);
+               break;
+       case APIC_OFFSET_SVR:
+               vlapic_svr_write_handler(vlapic);
+               break;
+       case APIC_OFFSET_ESR:
+               vlapic_esr_write_handler(vlapic);
+               break;
+       case APIC_OFFSET_ICR_LOW:
+               retu = false;
+               error = vlapic_icrlo_write_handler(vlapic, &retu);
+               if (error != 0 || retu)
+                       handled = 0;
+               break;
+       case APIC_OFFSET_CMCI_LVT:
+       case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
+               vlapic_lvt_write_handler(vlapic, offset);
+               break;
+       case APIC_OFFSET_TIMER_ICR:
+               vlapic_icrtmr_write_handler(vlapic);
+               break;
+       case APIC_OFFSET_TIMER_DCR:
+               vlapic_dcr_write_handler(vlapic);
+               break;
+       default:
+               handled = 0;
+               break;
+       }
+       return (handled);
+}
+
+static bool
+apic_access_fault(uint64_t gpa)
+{
+
+       if (virtual_interrupt_delivery &&
+           (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE))
+               return (true);
+       else
+               return (false);
+}
+
+static int
+vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
+{
+       uint64_t qual;
+       int access_type, offset, allowed;
+
+       if (!virtual_interrupt_delivery)
+               return (UNHANDLED);
+
+       qual = vmexit->u.vmx.exit_qualification;
+       access_type = APIC_ACCESS_TYPE(qual);
+       offset = APIC_ACCESS_OFFSET(qual);
+
+       allowed = 0;
+       if (access_type == 0) {
+               /*
+                * Read data access to the following registers is expected.
+                */
+               switch (offset) {
+               case APIC_OFFSET_APR:
+               case APIC_OFFSET_PPR:
+               case APIC_OFFSET_RRR:
+               case APIC_OFFSET_CMCI_LVT:
+               case APIC_OFFSET_TIMER_CCR:
+                       allowed = 1;
+                       break;
+               default:
+                       break;
+               }
+       } else if (access_type == 1) {
+               /*
+                * Write data access to the following registers is expected.
+                */
+               switch (offset) {
+               case APIC_OFFSET_VER:
+               case APIC_OFFSET_APR:
+               case APIC_OFFSET_PPR:
+               case APIC_OFFSET_RRR:
+               case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
+               case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
+               case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
+               case APIC_OFFSET_CMCI_LVT:
+               case APIC_OFFSET_TIMER_CCR:
+                       allowed = 1;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       if (allowed) {
+               vmexit->exitcode = VM_EXITCODE_INST_EMUL;
+               vmexit->u.inst_emul.gpa = DEFAULT_APIC_BASE + offset;
+               vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
+               vmexit->u.inst_emul.cr3 = vmcs_guest_cr3();
+       }
+
+       /*
+        * Regardless of whether the APIC-access is allowed this handler
+        * always returns UNHANDLED:
+        * - if the access is allowed then it is handled by emulating the
+        *   instruction that caused the VM-exit (outside the critical section)
+        * - if the access is not allowed then it will be converted to an
+        *   exitcode of VM_EXITCODE_VMX and will be dealt with in userland.
+        */
+       return (UNHANDLED);
+}
+
+static int
 vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 {
        int error, handled;
        struct vmxctx *vmxctx;
+       struct vlapic *vlapic;
        uint32_t eax, ecx, edx, idtvec_info, idtvec_err, reason;
        uint64_t qual, gpa;
        bool retu;
@@ -1209,7 +1400,7 @@ vmx_exit_process(struct vmx *vmx, int vc
        switch (reason) {
        case EXIT_REASON_EPT_FAULT:
        case EXIT_REASON_EPT_MISCONFIG:
-       case EXIT_REASON_APIC:
+       case EXIT_REASON_APIC_ACCESS:
        case EXIT_REASON_TASK_SWITCH:
        case EXIT_REASON_EXCEPTION:
                idtvec_info = vmcs_idt_vectoring_info();
@@ -1331,7 +1522,7 @@ vmx_exit_process(struct vmx *vmx, int vc
                 * this must be an instruction that accesses MMIO space.
                 */
                gpa = vmcs_gpa();
-               if (vm_mem_allocated(vmx->vm, gpa)) {
+               if (vm_mem_allocated(vmx->vm, gpa) || apic_access_fault(gpa)) {
                        vmexit->exitcode = VM_EXITCODE_PAGING;
                        vmexit->u.paging.gpa = gpa;
                        vmexit->u.paging.fault_type = ept_fault_type(qual);
@@ -1342,6 +1533,18 @@ vmx_exit_process(struct vmx *vmx, int vc
                        vmexit->u.inst_emul.cr3 = vmcs_guest_cr3();
                }
                break;
+       case EXIT_REASON_APIC_ACCESS:
+               handled = vmx_handle_apic_access(vmx, vcpu, vmexit);
+               break;
+       case EXIT_REASON_APIC_WRITE:
+               /*
+                * APIC-write VM exit is trap-like so the %rip is already
+                * pointing to the next instruction.
+                */
+               vmexit->inst_length = 0;
+               vlapic = vm_lapic(vmx->vm, vcpu);
+               handled = vmx_handle_apic_write(vlapic, qual);
+               break;
        default:
                vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1);
                break;
@@ -1532,6 +1735,9 @@ vmx_vmcleanup(void *arg)
        int i, error;
        struct vmx *vmx = arg;
 
+       if (virtual_interrupt_delivery)
+               vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE);
+
        for (i = 0; i < VM_MAXCPU; i++)
                vpid_free(vmx->state[i].vpid);
 
@@ -1895,6 +2101,195 @@ vmx_setcap(void *arg, int vcpu, int type
         return (retval);
 }
 
+/*
+ * Posted Interrupt Descriptor (described in section 29.6 of the Intel SDM).
+ */
+struct pir_desc {
+       uint64_t        pir[4];
+       uint64_t        pending;
+       uint64_t        unused[3];
+} __aligned(64);
+CTASSERT(sizeof(struct pir_desc) == 64);
+
+struct vlapic_vtx {
+       struct vlapic   vlapic;
+       struct pir_desc pir_desc;
+};
+
+#define        VMX_CTR_PIR(vm, vcpuid, pir_desc, notify, vector, level, msg)   
\
+do {                                                                   \
+       VCPU_CTR2(vm, vcpuid, msg " assert %s-triggered vector %d",     \
+           level ? "level" : "edge", vector);                          \
+       VCPU_CTR1(vm, vcpuid, msg " pir0 0x%016lx", pir_desc->pir[0]);  \
+       VCPU_CTR1(vm, vcpuid, msg " pir1 0x%016lx", pir_desc->pir[1]);  \
+       VCPU_CTR1(vm, vcpuid, msg " pir2 0x%016lx", pir_desc->pir[2]);  \
+       VCPU_CTR1(vm, vcpuid, msg " pir3 0x%016lx", pir_desc->pir[3]);  \
+       VCPU_CTR1(vm, vcpuid, msg " notify: %s", notify ? "yes" : "no");\
+} while (0)
+
+/*
+ * vlapic->ops handlers that utilize the APICv hardware assist described in
+ * Chapter 29 of the Intel SDM.
+ */
+static int
+vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
+{
+       struct vlapic_vtx *vlapic_vtx;
+       struct pir_desc *pir_desc;
+       uint64_t mask;
+       int idx, notify;
+
+       /*
+        * XXX need to deal with level triggered interrupts
+        */
+       vlapic_vtx = (struct vlapic_vtx *)vlapic;
+       pir_desc = &vlapic_vtx->pir_desc;
+
+       /*
+        * Keep track of interrupt requests in the PIR descriptor. This is
+        * because the virtual APIC page pointed to by the VMCS cannot be
+        * modified if the vcpu is running.
+        */
+       idx = vector / 64;
+       mask = 1UL << (vector % 64);
+       atomic_set_long(&pir_desc->pir[idx], mask);
+       notify = atomic_cmpset_long(&pir_desc->pending, 0, 1);
+
+       VMX_CTR_PIR(vlapic->vm, vlapic->vcpuid, pir_desc, notify, vector,
+           level, "vmx_set_intr_ready");
+       return (notify);
+}
+
+static int
+vmx_pending_intr(struct vlapic *vlapic, int *vecptr)
+{
+       struct vlapic_vtx *vlapic_vtx;
+       struct pir_desc *pir_desc;
+       struct LAPIC *lapic;
+       uint64_t pending, pirval;
+       uint32_t ppr, vpr;
+       int i;
+
+       /*
+        * This function is only expected to be called from the 'HLT' exit
+        * handler which does not care about the vector that is pending.
+        */
+       KASSERT(vecptr == NULL, ("vmx_pending_intr: vecptr must be NULL"));
+
+       vlapic_vtx = (struct vlapic_vtx *)vlapic;
+       pir_desc = &vlapic_vtx->pir_desc;
+
+       pending = atomic_load_acq_long(&pir_desc->pending);
+       if (!pending)
+               return (0);     /* common case */
+
+       /*
+        * If there is an interrupt pending then it will be recognized only
+        * if its priority is greater than the processor priority.
+        *
+        * Special case: if the processor priority is zero then any pending
+        * interrupt will be recognized.
+        */
+       lapic = vlapic->apic_page;
+       ppr = lapic->ppr & 0xf0;
+       if (ppr == 0)
+               return (1);
+
+       VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "HLT with non-zero PPR %d",
+           lapic->ppr);
+
+       for (i = 3; i >= 0; i--) {
+               pirval = pir_desc->pir[i];
+               if (pirval != 0) {
+                       vpr = (i * 64 + flsl(pirval) - 1) & 0xf0;
+                       return (vpr > ppr);
+               }
+       }
+       return (0);
+}
+
+static void
+vmx_intr_accepted(struct vlapic *vlapic, int vector)
+{
+
+       panic("vmx_intr_accepted: not expected to be called");
+}
+
+/*
+ * Transfer the pending interrupts in the PIR descriptor to the IRR
+ * in the virtual APIC page.
+ */
+static void
+vmx_inject_pir(struct vlapic *vlapic)
+{
+       struct vlapic_vtx *vlapic_vtx;
+       struct pir_desc *pir_desc;
+       struct LAPIC *lapic;
+       uint64_t val, pirval;
+       int rvi, pirbase;
+       uint16_t intr_status_old, intr_status_new;
+
+       vlapic_vtx = (struct vlapic_vtx *)vlapic;
+       pir_desc = &vlapic_vtx->pir_desc;
+       if (atomic_cmpset_long(&pir_desc->pending, 1, 0) == 0) {
+               VCPU_CTR0(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: "
+                   "no posted interrupt pending");
+               return;
+       }
+
+       pirval = 0;
+       lapic = vlapic->apic_page;
+
+       val = atomic_readandclear_long(&pir_desc->pir[0]);
+       if (val != 0) {
+               lapic->irr0 |= val;
+               lapic->irr1 |= val >> 32;
+               pirbase = 0;
+               pirval = val;
+       }
+
+       val = atomic_readandclear_long(&pir_desc->pir[1]);
+       if (val != 0) {
+               lapic->irr2 |= val;
+               lapic->irr3 |= val >> 32;
+               pirbase = 64;
+               pirval = val;
+       }
+
+       val = atomic_readandclear_long(&pir_desc->pir[2]);
+       if (val != 0) {
+               lapic->irr4 |= val;
+               lapic->irr5 |= val >> 32;
+               pirbase = 128;
+               pirval = val;
+       }
+
+       val = atomic_readandclear_long(&pir_desc->pir[3]);
+       if (val != 0) {
+               lapic->irr6 |= val;
+               lapic->irr7 |= val >> 32;
+               pirbase = 192;
+               pirval = val;
+       }
+       VLAPIC_CTR_IRR(vlapic, "vmx_inject_pir");
+
+       /*
+        * Update RVI so the processor can evaluate pending virtual
+        * interrupts on VM-entry.
+        */
+       if (pirval != 0) {
+               rvi = pirbase + flsl(pirval) - 1;
+               intr_status_old = vmcs_read(VMCS_GUEST_INTR_STATUS);
+               intr_status_new = (intr_status_old & 0xFF00) | rvi;
+               if (intr_status_new > intr_status_old) {
+                       vmcs_write(VMCS_GUEST_INTR_STATUS, intr_status_new);
+                       VCPU_CTR2(vlapic->vm, vlapic->vcpuid, "vmx_inject_pir: "
+                           "guest_intr_status changed from 0x%04x to 0x%04x",
+                           intr_status_old, intr_status_new);
+               }
+       }
+}
+
 static struct vlapic *
 vmx_vlapic_init(void *arg, int vcpuid)
 {
@@ -1903,11 +2298,17 @@ vmx_vlapic_init(void *arg, int vcpuid)
        
        vmx = arg;
 
-       vlapic = malloc(sizeof(struct vlapic), M_VLAPIC, M_WAITOK | M_ZERO);
+       vlapic = malloc(sizeof(struct vlapic_vtx), M_VLAPIC, M_WAITOK | M_ZERO);
        vlapic->vm = vmx->vm;
        vlapic->vcpuid = vcpuid;
        vlapic->apic_page = (struct LAPIC *)&vmx->apic_page[vcpuid];
 
+       if (virtual_interrupt_delivery) {
+               vlapic->ops.set_intr_ready = vmx_set_intr_ready;
+               vlapic->ops.pending_intr = vmx_pending_intr;
+               vlapic->ops.intr_accepted = vmx_intr_accepted;
+       }
+
        vlapic_init(vlapic);
 
        return (vlapic);

Modified: head/sys/amd64/vmm/intel/vmx_controls.h
==============================================================================
--- head/sys/amd64/vmm/intel/vmx_controls.h     Tue Jan  7 20:36:15 2014        
(r260409)
+++ head/sys/amd64/vmm/intel/vmx_controls.h     Tue Jan  7 21:04:49 2014        
(r260410)
@@ -34,6 +34,7 @@
 #define        PINBASED_NMI_EXITING            (1 << 3)
 #define        PINBASED_VIRTUAL_NMI            (1 << 5)
 #define        PINBASED_PREMPTION_TIMER        (1 << 6)
+#define        PINBASED_POSTED_INTERRUPT       (1 << 7)
 
 /* Primary Processor-Based VM-Execution Controls */
 #define        PROCBASED_INT_WINDOW_EXITING    (1 << 2)
@@ -59,16 +60,18 @@
 #define        PROCBASED_SECONDARY_CONTROLS    (1U << 31)
 
 /* Secondary Processor-Based VM-Execution Controls */
-#define        PROCBASED2_VIRTUALIZE_APIC      (1 << 0)
-#define        PROCBASED2_ENABLE_EPT           (1 << 1)
-#define        PROCBASED2_DESC_TABLE_EXITING   (1 << 2)
-#define        PROCBASED2_ENABLE_RDTSCP        (1 << 3)
-#define        PROCBASED2_VIRTUALIZE_X2APIC    (1 << 4)
-#define        PROCBASED2_ENABLE_VPID          (1 << 5)
-#define        PROCBASED2_WBINVD_EXITING       (1 << 6)
-#define        PROCBASED2_UNRESTRICTED_GUEST   (1 << 7)
-#define        PROCBASED2_PAUSE_LOOP_EXITING   (1 << 10)
-#define        PROCBASED2_ENABLE_INVPCID       (1 << 12)
+#define        PROCBASED2_VIRTUALIZE_APIC_ACCESSES     (1 << 0)
+#define        PROCBASED2_ENABLE_EPT                   (1 << 1)
+#define        PROCBASED2_DESC_TABLE_EXITING           (1 << 2)
+#define        PROCBASED2_ENABLE_RDTSCP                (1 << 3)
+#define        PROCBASED2_VIRTUALIZE_X2APIC_MODE       (1 << 4)
+#define        PROCBASED2_ENABLE_VPID                  (1 << 5)
+#define        PROCBASED2_WBINVD_EXITING               (1 << 6)
+#define        PROCBASED2_UNRESTRICTED_GUEST           (1 << 7)
+#define        PROCBASED2_APIC_REGISTER_VIRTUALIZATION (1 << 8)
+#define        PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY   (1 << 9)
+#define        PROCBASED2_PAUSE_LOOP_EXITING           (1 << 10)
+#define        PROCBASED2_ENABLE_INVPCID               (1 << 12)
 
 /* VM Exit Controls */
 #define        VM_EXIT_SAVE_DEBUG_CONTROLS     (1 << 2)

Modified: head/sys/amd64/vmm/io/vlapic.c
==============================================================================
--- head/sys/amd64/vmm/io/vlapic.c      Tue Jan  7 20:36:15 2014        
(r260409)
+++ head/sys/amd64/vmm/io/vlapic.c      Tue Jan  7 21:04:49 2014        
(r260410)
@@ -54,43 +54,6 @@ __FBSDID("$FreeBSD$");
 #include "vlapic_priv.h"
 #include "vioapic.h"
 
-#define        VLAPIC_CTR0(vlapic, format)                                     
\
-       VCPU_CTR0((vlapic)->vm, (vlapic)->vcpuid, format)
-
-#define        VLAPIC_CTR1(vlapic, format, p1)                                 
\
-       VCPU_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1)
-
-#define        VLAPIC_CTR2(vlapic, format, p1, p2)                             
\
-       VCPU_CTR2((vlapic)->vm, (vlapic)->vcpuid, format, p1, p2)
-
-#define        VLAPIC_CTR_IRR(vlapic, msg)                                     
\
-do {                                                                   \
-       uint32_t *irrptr = &(vlapic)->apic_page->irr0;                  \
-       irrptr[0] = irrptr[0];  /* silence compiler */                  \
-       VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]);      \
-       VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]);      \
-       VLAPIC_CTR1((vlapic), msg " irr2 0x%08x", irrptr[2 << 2]);      \
-       VLAPIC_CTR1((vlapic), msg " irr3 0x%08x", irrptr[3 << 2]);      \
-       VLAPIC_CTR1((vlapic), msg " irr4 0x%08x", irrptr[4 << 2]);      \
-       VLAPIC_CTR1((vlapic), msg " irr5 0x%08x", irrptr[5 << 2]);      \
-       VLAPIC_CTR1((vlapic), msg " irr6 0x%08x", irrptr[6 << 2]);      \
-       VLAPIC_CTR1((vlapic), msg " irr7 0x%08x", irrptr[7 << 2]);      \
-} while (0)
-
-#define        VLAPIC_CTR_ISR(vlapic, msg)                                     
\
-do {                                                                   \
-       uint32_t *isrptr = &(vlapic)->apic_page->isr0;                  \
-       isrptr[0] = isrptr[0];  /* silence compiler */                  \
-       VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]);      \
-       VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]);      \
-       VLAPIC_CTR1((vlapic), msg " isr2 0x%08x", isrptr[2 << 2]);      \
-       VLAPIC_CTR1((vlapic), msg " isr3 0x%08x", isrptr[3 << 2]);      \
-       VLAPIC_CTR1((vlapic), msg " isr4 0x%08x", isrptr[4 << 2]);      \
-       VLAPIC_CTR1((vlapic), msg " isr5 0x%08x", isrptr[5 << 2]);      \
-       VLAPIC_CTR1((vlapic), msg " isr6 0x%08x", isrptr[6 << 2]);      \
-       VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]);      \
-} while (0)
-
 #define        PRIO(x)                 ((x) >> 4)
 
 #define VLAPIC_VERSION         (16)
@@ -312,6 +275,9 @@ vlapic_set_intr_ready(struct vlapic *vla
                return (1);
        }
 
+       if (vlapic->ops.set_intr_ready)
+               return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level));
+
        idx = (vector / 32) * 4;
        mask = 1 << (vector % 32);
 
@@ -1040,6 +1006,9 @@ vlapic_pending_intr(struct vlapic *vlapi
        int              idx, i, bitpos, vector;
        uint32_t        *irrptr, val;
 
+       if (vlapic->ops.pending_intr)
+               return ((*vlapic->ops.pending_intr)(vlapic, vecptr));
+
        irrptr = &lapic->irr0;
 
        /*
@@ -1071,6 +1040,9 @@ vlapic_intr_accepted(struct vlapic *vlap
        uint32_t        *irrptr, *isrptr;
        int             idx, stk_top;
 
+       if (vlapic->ops.intr_accepted)
+               return ((*vlapic->ops.intr_accepted)(vlapic, vector));
+
        /*
         * clear the ready bit for vector being accepted in irr 
         * and set the vector as in service in isr.
@@ -1469,7 +1441,10 @@ vlapic_post_intr(struct vlapic *vlapic, 
         * If neither of these features are available then fallback to
         * sending an IPI to 'hostcpu'.
         */
-       ipi_cpu(hostcpu, vmm_ipinum);
+       if (vlapic->ops.post_intr)
+               (*vlapic->ops.post_intr)(vlapic, hostcpu);
+       else
+               ipi_cpu(hostcpu, vmm_ipinum);
 }
 
 bool

Modified: head/sys/amd64/vmm/io/vlapic_priv.h
==============================================================================
--- head/sys/amd64/vmm/io/vlapic_priv.h Tue Jan  7 20:36:15 2014        
(r260409)
+++ head/sys/amd64/vmm/io/vlapic_priv.h Tue Jan  7 21:04:49 2014        
(r260410)
@@ -82,6 +82,43 @@
 #define APIC_OFFSET_TIMER_CCR  0x390   /* Timer's Current Count        */
 #define APIC_OFFSET_TIMER_DCR  0x3E0   /* Timer's Divide Configuration */
 
+#define        VLAPIC_CTR0(vlapic, format)                                     
\
+       VCPU_CTR0((vlapic)->vm, (vlapic)->vcpuid, format)
+
+#define        VLAPIC_CTR1(vlapic, format, p1)                                 
\
+       VCPU_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1)
+
+#define        VLAPIC_CTR2(vlapic, format, p1, p2)                             
\
+       VCPU_CTR2((vlapic)->vm, (vlapic)->vcpuid, format, p1, p2)
+
+#define        VLAPIC_CTR_IRR(vlapic, msg)                                     
\
+do {                                                                   \
+       uint32_t *irrptr = &(vlapic)->apic_page->irr0;                  \
+       irrptr[0] = irrptr[0];  /* silence compiler */                  \
+       VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]);      \
+       VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]);      \
+       VLAPIC_CTR1((vlapic), msg " irr2 0x%08x", irrptr[2 << 2]);      \
+       VLAPIC_CTR1((vlapic), msg " irr3 0x%08x", irrptr[3 << 2]);      \
+       VLAPIC_CTR1((vlapic), msg " irr4 0x%08x", irrptr[4 << 2]);      \
+       VLAPIC_CTR1((vlapic), msg " irr5 0x%08x", irrptr[5 << 2]);      \
+       VLAPIC_CTR1((vlapic), msg " irr6 0x%08x", irrptr[6 << 2]);      \
+       VLAPIC_CTR1((vlapic), msg " irr7 0x%08x", irrptr[7 << 2]);      \
+} while (0)
+
+#define        VLAPIC_CTR_ISR(vlapic, msg)                                     
\
+do {                                                                   \
+       uint32_t *isrptr = &(vlapic)->apic_page->isr0;                  \
+       isrptr[0] = isrptr[0];  /* silence compiler */                  \
+       VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]);      \
+       VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]);      \
+       VLAPIC_CTR1((vlapic), msg " isr2 0x%08x", isrptr[2 << 2]);      \
+       VLAPIC_CTR1((vlapic), msg " isr3 0x%08x", isrptr[3 << 2]);      \
+       VLAPIC_CTR1((vlapic), msg " isr4 0x%08x", isrptr[4 << 2]);      \
+       VLAPIC_CTR1((vlapic), msg " isr5 0x%08x", isrptr[5 << 2]);      \
+       VLAPIC_CTR1((vlapic), msg " isr6 0x%08x", isrptr[6 << 2]);      \
+       VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]);      \
+} while (0)
+
 enum boot_state {
        BS_INIT,
        BS_SIPI,
@@ -95,10 +132,20 @@ enum boot_state {
 
 #define VLAPIC_MAXLVT_INDEX    APIC_LVT_CMCI
 
+struct vlapic;
+
+struct vlapic_ops {
+       int (*set_intr_ready)(struct vlapic *vlapic, int vector, bool level);
+       int (*pending_intr)(struct vlapic *vlapic, int *vecptr);
+       void (*intr_accepted)(struct vlapic *vlapic, int vector);
+       void (*post_intr)(struct vlapic *vlapic, int hostcpu);
+};
+
 struct vlapic {
        struct vm               *vm;
        int                     vcpuid;
        struct LAPIC            *apic_page;
+       struct vlapic_ops       ops;
 
        uint32_t                esr_pending;
        int                     esr_firing;

Modified: head/sys/amd64/vmm/vmm.c
==============================================================================
--- head/sys/amd64/vmm/vmm.c    Tue Jan  7 20:36:15 2014        (r260409)
+++ head/sys/amd64/vmm/vmm.c    Tue Jan  7 21:04:49 2014        (r260410)
@@ -321,6 +321,7 @@ vm_create(const char *name, struct vm **
 
        vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
        strcpy(vm->name, name);
+       vm->vmspace = vmspace;
        vm->cookie = VMINIT(vm, vmspace_pmap(vmspace));
        vm->vioapic = vioapic_init(vm);
        vm->vhpet = vhpet_init(vm);
@@ -331,7 +332,6 @@ vm_create(const char *name, struct vm **
        }
 
        vm_activate_cpu(vm, BSP);
-       vm->vmspace = vmspace;
 
        *retvm = vm;
        return (0);
_______________________________________________
[email protected] mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "[email protected]"

Reply via email to