Posted IRQs in VMX are a lot like poking the guest pcore, so we'll just use a syscall for it.
There's a bit of nastiness with error handling. So far, it's a real pain to find out if a posted IRQ landed on the VM and handling if it didn't. (When the POKE IRQ lands and the core wasn't a VM, how do we know for certain which VM we were supposed to interrupt, without doing something painful?). The general Akaros philosophy here is to post a bit in memory and poke spuriously. When it comes to notifying vcores, we set notif_pending, send a (possibly spurious) __notify, and if we missed it, we'll see the notif_pending the next time we __startcore. Hopefully we can do something similar with posted IRQs. This also cleans up all of the vmctl hacks, none of which are needed anymore. Reinstall your kernel headers. Signed-off-by: Barret Rhoden <[email protected]> --- kern/arch/x86/vmm/intel/vmx.c | 9 --------- kern/arch/x86/vmm/vmm.c | 36 +++++++++++++++++++++++++----------- kern/arch/x86/vmm/vmm.h | 2 +- kern/drivers/dev/cons.c | 12 ------------ kern/include/ros/bits/syscall.h | 1 + kern/src/syscall.c | 6 ++++++ tests/vmm/vmrunkernel.c | 14 +++----------- 7 files changed, 36 insertions(+), 44 deletions(-) diff --git a/kern/arch/x86/vmm/intel/vmx.c b/kern/arch/x86/vmm/intel/vmx.c index 132bb166e5a2..7b3e00fd8302 100644 --- a/kern/arch/x86/vmm/intel/vmx.c +++ b/kern/arch/x86/vmm/intel/vmx.c @@ -1155,15 +1155,6 @@ static void vmx_step_instruction(void) { vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); } -int vmx_interrupt_notify(struct vmctl *v) -{ - /* Assume we want to IPI guest pcore 0 (which vmctl controlled). */ - int vm_core = current->vmm.guest_pcores[0]->cpu; - - send_ipi(vm_core, I_POKE_CORE); - return 0; -} - /** * __vmx_enable - low-level enable of VMX mode on the current CPU * @vmxon_buf: an opaque buffer for use as the VMXON region diff --git a/kern/arch/x86/vmm/vmm.c b/kern/arch/x86/vmm/vmm.c index 6d183aab0a08..708d729d5ffd 100644 --- a/kern/arch/x86/vmm/vmm.c +++ b/kern/arch/x86/vmm/vmm.c @@ -61,17 +61,6 @@ void vmm_pcpu_init(void) printk("vmm_pcpu_init failed\n"); } -int vm_post_interrupt(struct vmctl *v) -{ - int vmx_interrupt_notify(struct vmctl *v); - if (current->vmm.amd) { - return -1; - } else { - return vmx_interrupt_notify(v); - } - return -1; -} - /* Initializes a process to run virtual machine contexts, returning the number * initialized, optionally setting errno */ int vmm_struct_init(struct proc *p, unsigned int nr_guest_pcores, @@ -140,6 +129,31 @@ void __vmm_struct_cleanup(struct proc *p) vmm->vmmcp = FALSE; } +int vmm_poke_guest(struct proc *p, int guest_pcoreid) +{ + struct guest_pcore *gpc; + int pcoreid; + + gpc = lookup_guest_pcore(p, guest_pcoreid); + if (!gpc) { + set_error(ENOENT, "Bad guest_pcoreid %d", guest_pcoreid); + return -1; + } + /* We're doing an unlocked peek; it could change immediately. This is a + * best effort service. */ + pcoreid = ACCESS_ONCE(gpc->cpu); + if (pcoreid == -1) { + /* So we know that we'll miss the poke for the posted IRQ. We could + * return an error. However, error handling for this case isn't + * particularly helpful (yet). The absence of the error does not mean + * the IRQ was posted. We'll still return 0, meaning "the user didn't + * mess up; we tried." */ + return 0; + } + send_ipi(pcoreid, I_POKE_CORE); + return 0; +} + struct guest_pcore *lookup_guest_pcore(struct proc *p, int guest_pcoreid) { /* nr_guest_pcores is written once at setup and never changed */ diff --git a/kern/arch/x86/vmm/vmm.h b/kern/arch/x86/vmm/vmm.h index b4430dc294ee..5a5a0fe71cc5 100644 --- a/kern/arch/x86/vmm/vmm.h +++ b/kern/arch/x86/vmm/vmm.h @@ -61,8 +61,8 @@ void vmm_pcpu_init(void); int vmm_struct_init(struct proc *p, unsigned int nr_guest_pcores, struct vmm_gpcore_init *gpcis, int flags); void __vmm_struct_cleanup(struct proc *p); +int vmm_poke_guest(struct proc *p, int guest_pcoreid); -int vm_post_interrupt(struct vmctl *v); int intel_vmx_start(int id); int intel_vmx_setup(int nvmcs); diff --git a/kern/drivers/dev/cons.c b/kern/drivers/dev/cons.c index f1dc09db1ff3..72dc5e22509e 100644 --- a/kern/drivers/dev/cons.c +++ b/kern/drivers/dev/cons.c @@ -605,7 +605,6 @@ enum { Qsysstat, Qtime, Quser, - Qvmctl, Qzero, }; @@ -639,7 +638,6 @@ static struct dirtab consdir[] = { {"sysstat", {Qsysstat}, 0, 0666}, {"time", {Qtime}, NUMSIZE + 3 * VLNUMSIZE, 0664}, {"user", {Quser}, 0, 0666}, - {"vmctl", {Qvmctl}, 0, 0666}, {"zero", {Qzero}, 0, 0444}, }; @@ -1095,16 +1093,6 @@ static long conswrite(struct chan *c, void *va, long n, int64_t off) error(EPERM, "Cannot write to config QID"); break; - case Qvmctl: - memmove(&vmctl, a, sizeof(vmctl)); - if ((offset >> 12) ==1) { - ret = vm_post_interrupt(&vmctl); - n = ret; - //printk("vm_interrupt_notify returns %d\n", ret); - } else { - error(EINVAL, "Bad vmctl command"); - } - break; case Qsysctl: //if (!iseve()) error(EPERM, ERROR_FIXME); cb = parsecmd(a, n); diff --git a/kern/include/ros/bits/syscall.h b/kern/include/ros/bits/syscall.h index 4612885b71ac..509041c41ebf 100644 --- a/kern/include/ros/bits/syscall.h +++ b/kern/include/ros/bits/syscall.h @@ -46,6 +46,7 @@ #define SYS_vc_entry 35 #define SYS_nanosleep 36 #define SYS_pop_ctx 37 +#define SYS_vmm_poke_guest 38 /* FS Syscalls */ #define SYS_read 100 diff --git a/kern/src/syscall.c b/kern/src/syscall.c index b5cd624ddae1..b9e99dc8528f 100644 --- a/kern/src/syscall.c +++ b/kern/src/syscall.c @@ -1438,6 +1438,11 @@ static int sys_vmm_setup(struct proc *p, unsigned int nr_guest_pcores, return vmm_struct_init(p, nr_guest_pcores, gpcis, flags); } +static int sys_vmm_poke_guest(struct proc *p, int guest_pcoreid) +{ + return vmm_poke_guest(p, guest_pcoreid); +} + /* Pokes the ksched for the given resource for target_pid. If the target pid * == 0, we just poke for the calling process. The common case is poking for * self, so we avoid the lookup. @@ -2581,6 +2586,7 @@ const struct sys_table_entry syscall_table[] = { #endif [SYS_change_to_m] = {(syscall_t)sys_change_to_m, "change_to_m"}, [SYS_vmm_setup] = {(syscall_t)sys_vmm_setup, "vmm_setup"}, + [SYS_vmm_poke_guest] = {(syscall_t)sys_vmm_poke_guest, "vmm_poke_guest"}, [SYS_poke_ksched] = {(syscall_t)sys_poke_ksched, "poke_ksched"}, [SYS_abort_sysc] = {(syscall_t)sys_abort_sysc, "abort_sysc"}, [SYS_abort_sysc_fd] = {(syscall_t)sys_abort_sysc_fd, "abort_sysc_fd"}, diff --git a/tests/vmm/vmrunkernel.c b/tests/vmm/vmrunkernel.c index f99560581d83..f9105682db23 100644 --- a/tests/vmm/vmrunkernel.c +++ b/tests/vmm/vmrunkernel.c @@ -326,11 +326,9 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr); void *timer_thread(void *arg) { - int fd = open("#cons/vmctl", O_RDWR), ret; - while (1) { set_posted_interrupt(0xef); - pwrite(fd, &vmctl, sizeof(vmctl), 1<<12); + ros_syscall(SYS_vmm_poke_guest, 0, 0, 0, 0, 0, 0); uthread_usleep(1); } } @@ -413,8 +411,6 @@ void *consin(void *arg) int timer_started = 0; pthread_t timerthread_struct; - int fd = open("#cons/vmctl", O_RDWR), ret; - if (debug) fprintf(stderr, "Spin on console being read, print num queues, halt\n"); for(num = 0;! quit;num++) { @@ -455,7 +451,7 @@ void *consin(void *arg) set_posted_interrupt(0xE5); virtio_mmio_set_vring_irq(); - pwrite(fd, &vmctl, sizeof(vmctl), 1<<12); + ros_syscall(SYS_vmm_poke_guest, 0, 0, 0, 0, 0, 0); /*if (!timer_started && mcp) { // Start up timer thread if (pthread_create(&timerthread_struct, NULL, timer_thread, NULL)) { @@ -574,7 +570,7 @@ int main(int argc, char **argv) int vmmflags = 0; // Disabled probably forever. VMM_VMCALL_PRINTF; uint64_t entry = 0x1200000, kerneladdress = 0x1200000; int nr_gpcs = 1; - int fd = open("#cons/vmctl", O_RDWR), ret; + int ret; void * xp; int kfd = -1; static char cmd[512]; @@ -620,10 +616,6 @@ int main(int argc, char **argv) ((uint32_t *)a_page)[0x30/4] = 0xDEADBEEF; - if (fd < 0) { - perror("#cons/sysctl"); - exit(1); - } argc--,argv++; // switches ... // Sorry, I don't much like the gnu opt parsing code. -- 2.7.0.rc3.207.g0ac5344 -- You received this message because you are subscribed to the Google Groups "Akaros" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To post to this group, send email to [email protected]. For more options, visit https://groups.google.com/d/optout.
