[akaros] [PATCH 4/5] VMM: Add a syscall to poke a guest pcore (XCC)

Barret Rhoden Wed, 10 Feb 2016 11:33:06 -0800

Posted IRQs in VMX are a lot like poking the guest pcore, so we'll just use
a syscall for it.


There's a bit of nastiness with error handling.  So far, it's a real pain
to find out if a posted IRQ landed on the VM and handling if it didn't.
(When the POKE IRQ lands and the core wasn't a VM, how do we know for
certain which VM we were supposed to interrupt, without doing something
painful?).

The general Akaros philosophy here is to post a bit in memory and poke
spuriously.  When it comes to notifying vcores, we set notif_pending, send
a (possibly spurious) __notify, and if we missed it, we'll see the
notif_pending the next time we __startcore.  Hopefully we can do something
similar with posted IRQs.

This also cleans up all of the vmctl hacks, none of which are needed
anymore.

Reinstall your kernel headers.

Signed-off-by: Barret Rhoden <[email protected]>
---
 kern/arch/x86/vmm/intel/vmx.c   |  9 ---------
 kern/arch/x86/vmm/vmm.c         | 36 +++++++++++++++++++++++++-----------
 kern/arch/x86/vmm/vmm.h         |  2 +-
 kern/drivers/dev/cons.c         | 12 ------------
 kern/include/ros/bits/syscall.h |  1 +
 kern/src/syscall.c              |  6 ++++++
 tests/vmm/vmrunkernel.c         | 14 +++-----------
 7 files changed, 36 insertions(+), 44 deletions(-)

diff --git a/kern/arch/x86/vmm/intel/vmx.c b/kern/arch/x86/vmm/intel/vmx.c
index 132bb166e5a2..7b3e00fd8302 100644
--- a/kern/arch/x86/vmm/intel/vmx.c
+++ b/kern/arch/x86/vmm/intel/vmx.c
@@ -1155,15 +1155,6 @@ static void vmx_step_instruction(void) {
                    vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
 }
 
-int vmx_interrupt_notify(struct vmctl *v)
-{
-       /* Assume we want to IPI guest pcore 0 (which vmctl controlled). */
-       int vm_core = current->vmm.guest_pcores[0]->cpu;
-
-       send_ipi(vm_core, I_POKE_CORE);
-       return 0;
-}
-
 /**
  * __vmx_enable - low-level enable of VMX mode on the current CPU
  * @vmxon_buf: an opaque buffer for use as the VMXON region
diff --git a/kern/arch/x86/vmm/vmm.c b/kern/arch/x86/vmm/vmm.c
index 6d183aab0a08..708d729d5ffd 100644
--- a/kern/arch/x86/vmm/vmm.c
+++ b/kern/arch/x86/vmm/vmm.c
@@ -61,17 +61,6 @@ void vmm_pcpu_init(void)
        printk("vmm_pcpu_init failed\n");
 }
 
-int vm_post_interrupt(struct vmctl *v)
-{
-       int vmx_interrupt_notify(struct vmctl *v);
-       if (current->vmm.amd) {
-               return -1;
-       } else {
-               return vmx_interrupt_notify(v);
-       }
-       return -1;
-}
-
 /* Initializes a process to run virtual machine contexts, returning the number
  * initialized, optionally setting errno */
 int vmm_struct_init(struct proc *p, unsigned int nr_guest_pcores,
@@ -140,6 +129,31 @@ void __vmm_struct_cleanup(struct proc *p)
        vmm->vmmcp = FALSE;
 }
 
+int vmm_poke_guest(struct proc *p, int guest_pcoreid)
+{
+       struct guest_pcore *gpc;
+       int pcoreid;
+
+       gpc = lookup_guest_pcore(p, guest_pcoreid);
+       if (!gpc) {
+               set_error(ENOENT, "Bad guest_pcoreid %d", guest_pcoreid);
+               return -1;
+       }
+       /* We're doing an unlocked peek; it could change immediately.  This is a
+        * best effort service. */
+       pcoreid = ACCESS_ONCE(gpc->cpu);
+       if (pcoreid == -1) {
+               /* So we know that we'll miss the poke for the posted IRQ.  We 
could
+                * return an error.  However, error handling for this case isn't
+                * particularly helpful (yet).  The absence of the error does 
not mean
+                * the IRQ was posted.  We'll still return 0, meaning "the user 
didn't
+                * mess up; we tried." */
+               return 0;
+       }
+       send_ipi(pcoreid, I_POKE_CORE);
+       return 0;
+}
+
 struct guest_pcore *lookup_guest_pcore(struct proc *p, int guest_pcoreid)
 {
        /* nr_guest_pcores is written once at setup and never changed */
diff --git a/kern/arch/x86/vmm/vmm.h b/kern/arch/x86/vmm/vmm.h
index b4430dc294ee..5a5a0fe71cc5 100644
--- a/kern/arch/x86/vmm/vmm.h
+++ b/kern/arch/x86/vmm/vmm.h
@@ -61,8 +61,8 @@ void vmm_pcpu_init(void);
 int vmm_struct_init(struct proc *p, unsigned int nr_guest_pcores,
                     struct vmm_gpcore_init *gpcis, int flags);
 void __vmm_struct_cleanup(struct proc *p);
+int vmm_poke_guest(struct proc *p, int guest_pcoreid);
 
-int vm_post_interrupt(struct vmctl *v);
 int intel_vmx_start(int id);
 int intel_vmx_setup(int nvmcs);
 
diff --git a/kern/drivers/dev/cons.c b/kern/drivers/dev/cons.c
index f1dc09db1ff3..72dc5e22509e 100644
--- a/kern/drivers/dev/cons.c
+++ b/kern/drivers/dev/cons.c
@@ -605,7 +605,6 @@ enum {
        Qsysstat,
        Qtime,
        Quser,
-       Qvmctl,
        Qzero,
 };
 
@@ -639,7 +638,6 @@ static struct dirtab consdir[] = {
        {"sysstat", {Qsysstat}, 0, 0666},
        {"time", {Qtime}, NUMSIZE + 3 * VLNUMSIZE, 0664},
        {"user", {Quser}, 0, 0666},
-       {"vmctl", {Qvmctl}, 0, 0666},
        {"zero", {Qzero}, 0, 0444},
 };
 
@@ -1095,16 +1093,6 @@ static long conswrite(struct chan *c, void *va, long n, 
int64_t off)
                        error(EPERM, "Cannot write to config QID");
                        break;
 
-               case Qvmctl:
-                       memmove(&vmctl, a, sizeof(vmctl));
-                       if ((offset >> 12) ==1) {
-                               ret = vm_post_interrupt(&vmctl);
-                               n = ret;
-                               //printk("vm_interrupt_notify returns %d\n", 
ret);
-                       } else {
-                               error(EINVAL, "Bad vmctl command");
-                       }
-                       break;
                case Qsysctl:
                        //if (!iseve()) error(EPERM, ERROR_FIXME);
                        cb = parsecmd(a, n);
diff --git a/kern/include/ros/bits/syscall.h b/kern/include/ros/bits/syscall.h
index 4612885b71ac..509041c41ebf 100644
--- a/kern/include/ros/bits/syscall.h
+++ b/kern/include/ros/bits/syscall.h
@@ -46,6 +46,7 @@
 #define SYS_vc_entry                           35
 #define SYS_nanosleep                          36
 #define SYS_pop_ctx                                    37
+#define SYS_vmm_poke_guest                     38
 
 /* FS Syscalls */
 #define SYS_read                               100
diff --git a/kern/src/syscall.c b/kern/src/syscall.c
index b5cd624ddae1..b9e99dc8528f 100644
--- a/kern/src/syscall.c
+++ b/kern/src/syscall.c
@@ -1438,6 +1438,11 @@ static int sys_vmm_setup(struct proc *p, unsigned int 
nr_guest_pcores,
        return vmm_struct_init(p, nr_guest_pcores, gpcis, flags);
 }
 
+static int sys_vmm_poke_guest(struct proc *p, int guest_pcoreid)
+{
+       return vmm_poke_guest(p, guest_pcoreid);
+}
+
 /* Pokes the ksched for the given resource for target_pid.  If the target pid
  * == 0, we just poke for the calling process.  The common case is poking for
  * self, so we avoid the lookup. 
@@ -2581,6 +2586,7 @@ const struct sys_table_entry syscall_table[] = {
 #endif
        [SYS_change_to_m] = {(syscall_t)sys_change_to_m, "change_to_m"},
        [SYS_vmm_setup] = {(syscall_t)sys_vmm_setup, "vmm_setup"},
+       [SYS_vmm_poke_guest] = {(syscall_t)sys_vmm_poke_guest, 
"vmm_poke_guest"},
        [SYS_poke_ksched] = {(syscall_t)sys_poke_ksched, "poke_ksched"},
        [SYS_abort_sysc] = {(syscall_t)sys_abort_sysc, "abort_sysc"},
        [SYS_abort_sysc_fd] = {(syscall_t)sys_abort_sysc_fd, "abort_sysc_fd"},
diff --git a/tests/vmm/vmrunkernel.c b/tests/vmm/vmrunkernel.c
index f99560581d83..f9105682db23 100644
--- a/tests/vmm/vmrunkernel.c
+++ b/tests/vmm/vmrunkernel.c
@@ -326,11 +326,9 @@ static inline int test_and_set_bit(int nr, volatile 
unsigned long *addr);
 
 void *timer_thread(void *arg)
 {
-       int fd = open("#cons/vmctl", O_RDWR), ret;
-
        while (1) {
                set_posted_interrupt(0xef);
-               pwrite(fd, &vmctl, sizeof(vmctl), 1<<12);
+               ros_syscall(SYS_vmm_poke_guest, 0, 0, 0, 0, 0, 0);
                uthread_usleep(1);
        }
 }
@@ -413,8 +411,6 @@ void *consin(void *arg)
        int timer_started = 0;
        pthread_t timerthread_struct;
 
-       int fd = open("#cons/vmctl", O_RDWR), ret;
-       
        if (debug) fprintf(stderr, "Spin on console being read, print num 
queues, halt\n");
 
        for(num = 0;! quit;num++) {
@@ -455,7 +451,7 @@ void *consin(void *arg)
                set_posted_interrupt(0xE5);
                virtio_mmio_set_vring_irq();
 
-               pwrite(fd, &vmctl, sizeof(vmctl), 1<<12);
+               ros_syscall(SYS_vmm_poke_guest, 0, 0, 0, 0, 0, 0);
                /*if (!timer_started && mcp) {
                        // Start up timer thread
                        if (pthread_create(&timerthread_struct, NULL, 
timer_thread, NULL)) {
@@ -574,7 +570,7 @@ int main(int argc, char **argv)
        int vmmflags = 0; // Disabled probably forever. VMM_VMCALL_PRINTF;
        uint64_t entry = 0x1200000, kerneladdress = 0x1200000;
        int nr_gpcs = 1;
-       int fd = open("#cons/vmctl", O_RDWR), ret;
+       int ret;
        void * xp;
        int kfd = -1;
        static char cmd[512];
@@ -620,10 +616,6 @@ int main(int argc, char **argv)
        ((uint32_t *)a_page)[0x30/4] = 0xDEADBEEF;
 
 
-       if (fd < 0) {
-               perror("#cons/sysctl");
-               exit(1);
-       }
        argc--,argv++;
        // switches ...
        // Sorry, I don't much like the gnu opt parsing code.
-- 
2.7.0.rc3.207.g0ac5344

-- 
You received this message because you are subscribed to the Google Groups 
"Akaros" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
For more options, visit https://groups.google.com/d/optout.

[akaros] [PATCH 4/5] VMM: Add a syscall to poke a guest pcore (XCC)

Reply via email to