There's a couple parts to it: - vmrunkernel was not posting the IRQ properly; it wasn't setting the outstanding notification bit.
- We need to self_ipi when that bit is set. We had previously lost a race when poking the guest pcore (IPI was sent, but not received while in a VM). We just resend the IPI. For ease of access, I now store the posted_irq_desc in the GPC. Signed-off-by: Barret Rhoden <[email protected]> --- kern/arch/x86/process64.c | 6 ++++++ kern/arch/x86/ros/vmm.h | 2 +- kern/arch/x86/ros/vmx.h | 2 ++ kern/arch/x86/vmm/intel/vmx.c | 4 +++- kern/arch/x86/vmm/vmm.h | 1 + tests/vmm/vmrunkernel.c | 29 +++++------------------------ 6 files changed, 18 insertions(+), 26 deletions(-) diff --git a/kern/arch/x86/process64.c b/kern/arch/x86/process64.c index 102c2e11a088..cddee5eab2c2 100644 --- a/kern/arch/x86/process64.c +++ b/kern/arch/x86/process64.c @@ -114,6 +114,12 @@ static void __attribute__((noreturn)) proc_pop_vmtf(struct vm_trapframe *tf) /* cr2 is not part of the VMCS state; we need to save/restore it manually */ lcr2(tf->tf_cr2); vmcs_write(VM_ENTRY_INTR_INFO_FIELD, tf->tf_trap_inject); + /* Someone may have tried poking the guest and posting an IRQ, but the IPI + * missed (concurrent vmexit). In these cases, the 'outstanding + * notification' bit should still be set, and we can resend the IPI. This + * will arrive after we vmenter, since IRQs are currently disabled. */ + if (test_bit(VMX_POSTED_OUTSTANDING_NOTIF, gpc->posted_irq_desc)) + send_self_ipi(I_POKE_CORE); /* vmlaunch/resume can fail, so we need to be able to return from this. * Thus we can't clobber rsp via the popq style of setting the registers. * Likewise, we don't want to lose rbp via the clobber list. diff --git a/kern/arch/x86/ros/vmm.h b/kern/arch/x86/ros/vmm.h index 04624fc9648d..216d8e376254 100644 --- a/kern/arch/x86/ros/vmm.h +++ b/kern/arch/x86/ros/vmm.h @@ -11,7 +11,7 @@ /* Initialization data provided by the userspace part of the VMM when setting * up a guest physical core (vmx vcpu). */ struct vmm_gpcore_init { - void *pir_addr; + void *posted_irq_desc; void *vapic_addr; void *apic_addr; }; diff --git a/kern/arch/x86/ros/vmx.h b/kern/arch/x86/ros/vmx.h index a233cf47ef11..30924af2c98c 100644 --- a/kern/arch/x86/ros/vmx.h +++ b/kern/arch/x86/ros/vmx.h @@ -411,6 +411,8 @@ enum vmcs_field { #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ +#define VMX_POSTED_OUTSTANDING_NOTIF 256 + /* GUEST_INTERRUPTIBILITY_INFO flags. */ #define GUEST_INTR_STATE_STI 0x00000001 #define GUEST_INTR_STATE_MOV_SS 0x00000002 diff --git a/kern/arch/x86/vmm/intel/vmx.c b/kern/arch/x86/vmm/intel/vmx.c index 7b3e00fd8302..7d173afb0969 100644 --- a/kern/arch/x86/vmm/intel/vmx.c +++ b/kern/arch/x86/vmm/intel/vmx.c @@ -943,7 +943,7 @@ static int vmx_setup_initial_guest_state(struct proc *p, /* Initialize parts based on the users info. If one of them fails, we'll do * the others but then error out. */ - ret |= vmcs_set_pgaddr(p, gpci->pir_addr, POSTED_INTR_DESC_ADDR); + ret |= vmcs_set_pgaddr(p, gpci->posted_irq_desc, POSTED_INTR_DESC_ADDR); ret |= vmcs_set_pgaddr(p, gpci->vapic_addr, VIRTUAL_APIC_PAGE_ADDR); ret |= vmcs_set_pgaddr(p, gpci->apic_addr, APIC_ACCESS_ADDR); @@ -1132,6 +1132,8 @@ struct guest_pcore *create_guest_pcore(struct proc *p, ret = vmx_setup_initial_guest_state(p, gpci); vmx_unload_guest_pcore(gpc); + gpc->posted_irq_desc = gpci->posted_irq_desc; + if (!ret) return gpc; diff --git a/kern/arch/x86/vmm/vmm.h b/kern/arch/x86/vmm/vmm.h index 5a5a0fe71cc5..1ce2f92719c3 100644 --- a/kern/arch/x86/vmm/vmm.h +++ b/kern/arch/x86/vmm/vmm.h @@ -19,6 +19,7 @@ static inline int cpu_has_svm(const char **msg) struct guest_pcore { int cpu; struct proc *proc; + unsigned long *posted_irq_desc; struct msr_autoload { unsigned nr; struct vmx_msr_entry guest[NR_AUTOLOAD_MSRS]; diff --git a/tests/vmm/vmrunkernel.c b/tests/vmm/vmrunkernel.c index f9105682db23..2dd3778dd343 100644 --- a/tests/vmm/vmrunkernel.c +++ b/tests/vmm/vmrunkernel.c @@ -519,7 +519,7 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr) static void pir_dump() { - unsigned long *pir_ptr = (unsigned long *)gpci.pir_addr; + unsigned long *pir_ptr = gpci.posted_irq_desc; int i; fprintf(stderr, "-------Begin PIR dump-------\n"); for (i = 0; i < 8; i++){ @@ -530,28 +530,9 @@ static void pir_dump() static void set_posted_interrupt(int vector) { - unsigned long *bit_vec; - int bit_offset; - int i, j; - unsigned long *pir = (unsigned long *)gpci.pir_addr; - // Move to the correct location to set our bit. - bit_vec = pir + vector/(sizeof(unsigned long)*8); - bit_offset = vector%(sizeof(unsigned long)*8); - if(debug) fprintf(stderr, "%s: Pre set PIR dump\n", __func__); - if(debug) pir_dump(); - if (debug) - vapic_status_dump(stderr, gpci.vapic_addr); - if(debug) fprintf(stderr, "%s: Setting pir bit offset %d at 0x%p\n", __func__, - bit_offset, bit_vec); - test_and_set_bit(bit_offset, bit_vec); - - // Set outstanding notification bit - /*bit_vec = pir + 4; - fprintf(stderr, "%s: Setting pir bit offset 0 at 0x%p", __func__, - bit_vec); - test_and_set_bit(0, bit_vec);*/ - - if(debug) pir_dump(); + test_and_set_bit(vector, gpci.posted_irq_desc); + /* LOCKed instruction provides the mb() */ + test_and_set_bit(VMX_POSTED_OUTSTANDING_NOTIF, gpci.posted_irq_desc); } int main(int argc, char **argv) @@ -753,7 +734,7 @@ int main(int argc, char **argv) hexdump(stdout, r, a-(void *)r); a = (void *)(((unsigned long)a + 0xfff) & ~0xfff); - gpci.pir_addr = a; + gpci.posted_irq_desc = a; memset(a, 0, 4096); a += 4096; gpci.vapic_addr = a; -- 2.7.0.rc3.207.g0ac5344 -- You received this message because you are subscribed to the Google Groups "Akaros" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To post to this group, send email to [email protected]. For more options, visit https://groups.google.com/d/optout.
