This patch reworks the IO thread to use signalfd() instead of sigtimedwait().
This will eliminate the need to use SIGIO everywhere.  In this version of the
patch, we use signalfd() when it's available.  When it isn't available, we
instead use a pipe() that is written to in each signal handler.

I've tested Windows and Linux guests with SMP without seeing an obvious
regressions.

Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>

diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c
index 9a9bf59..0c7f49f 100644
--- a/qemu/qemu-kvm.c
+++ b/qemu/qemu-kvm.c
@@ -12,6 +12,9 @@ int kvm_allowed = 1;
 int kvm_irqchip = 1;
 int kvm_pit = 1;
 
+#include "qemu-common.h"
+#include "console.h"
+
 #include <string.h>
 #include "hw/hw.h"
 #include "sysemu.h"
@@ -38,14 +41,6 @@ __thread struct vcpu_info *vcpu;
 
 static int qemu_system_ready;
 
-struct qemu_kvm_signal_table {
-    sigset_t sigset;
-    sigset_t negsigset;
-};
-
-static struct qemu_kvm_signal_table io_signal_table;
-static struct qemu_kvm_signal_table vcpu_signal_table;
-
 #define SIG_IPI (SIGRTMIN+4)
 
 struct vcpu_info {
@@ -169,37 +164,23 @@ static int has_work(CPUState *env)
     return kvm_arch_has_work(env);
 }
 
-static int kvm_process_signal(int si_signo)
-{
-    struct sigaction sa;
-
-    switch (si_signo) {
-    case SIGUSR2:
-        pthread_cond_signal(&qemu_aio_cond);
-        break;
-    case SIGALRM:
-    case SIGIO:
-        sigaction(si_signo, NULL, &sa);
-        sa.sa_handler(si_signo);
-        break;
-    }
-
-    return 1;
-}
-
-static int kvm_eat_signal(struct qemu_kvm_signal_table *waitset, CPUState *env,
-                          int timeout)
+static int kvm_eat_signal(CPUState *env, int timeout)
 {
     struct timespec ts;
     int r, e, ret = 0;
     siginfo_t siginfo;
+    sigset_t waitset;
 
     ts.tv_sec = timeout / 1000;
     ts.tv_nsec = (timeout % 1000) * 1000000;
-    r = sigtimedwait(&waitset->sigset, &siginfo, &ts);
+    sigemptyset(&waitset);
+    sigaddset(&waitset, SIG_IPI);
+
+    r = sigtimedwait(&waitset, &siginfo, &ts);
     if (r == -1 && (errno == EAGAIN || errno == EINTR) && !timeout)
        return 0;
     e = errno;
+
     pthread_mutex_lock(&qemu_mutex);
     if (env && vcpu)
         cpu_single_env = vcpu->env;
@@ -208,7 +189,7 @@ static int kvm_eat_signal(struct qemu_kvm_signal_table 
*waitset, CPUState *env,
        exit(1);
     }
     if (r != -1)
-        ret = kvm_process_signal(siginfo.si_signo);
+       ret = 1;
 
     if (env && vcpu_info[env->cpu_index].stop) {
        vcpu_info[env->cpu_index].stop = 0;
@@ -224,14 +205,13 @@ static int kvm_eat_signal(struct qemu_kvm_signal_table 
*waitset, CPUState *env,
 static void kvm_eat_signals(CPUState *env, int timeout)
 {
     int r = 0;
-    struct qemu_kvm_signal_table *waitset = &vcpu_signal_table;
 
-    while (kvm_eat_signal(waitset, env, 0))
+    while (kvm_eat_signal(env, 0))
        r = 1;
     if (!r && timeout) {
-       r = kvm_eat_signal(waitset, env, timeout);
+       r = kvm_eat_signal(env, timeout);
        if (r)
-           while (kvm_eat_signal(waitset, env, 0))
+           while (kvm_eat_signal(env, 0))
                ;
     }
 }
@@ -264,9 +244,7 @@ static void pause_all_threads(void)
        pthread_kill(vcpu_info[i].thread, SIG_IPI);
     }
     while (!all_threads_paused()) {
-       pthread_mutex_unlock(&qemu_mutex);
-       kvm_eat_signal(&io_signal_table, NULL, 1000);
-       pthread_mutex_lock(&qemu_mutex);
+       main_loop_wait(1000);
        cpu_single_env = NULL;
     }
 }
@@ -307,6 +285,13 @@ static void setup_kernel_sigmask(CPUState *env)
 {
     sigset_t set;
 
+    sigemptyset(&set);
+    sigaddset(&set, SIGUSR1);
+    sigaddset(&set, SIGUSR2);
+    sigaddset(&set, SIGIO);
+    sigaddset(&set, SIGALRM);
+    sigprocmask(SIG_BLOCK, &set, NULL);
+
     sigprocmask(SIG_BLOCK, NULL, &set);
     sigdelset(&set, SIG_IPI);
     
@@ -343,7 +328,7 @@ static int kvm_main_loop_cpu(CPUState *env)
     cpu_single_env = env;
     while (1) {
        while (!has_work(env))
-           kvm_main_loop_wait(env, 10);
+           kvm_main_loop_wait(env, 1000);
        if (env->interrupt_request & CPU_INTERRUPT_HARD)
            env->hflags &= ~HF_HALTED_MASK;
        if (!kvm_irqchip_in_kernel(kvm_context) && info->sipi_needed)
@@ -391,18 +376,6 @@ static void *ap_main_loop(void *_env)
     return NULL;
 }
 
-static void qemu_kvm_init_signal_table(struct qemu_kvm_signal_table *sigtab)
-{
-    sigemptyset(&sigtab->sigset);
-    sigfillset(&sigtab->negsigset);
-}
-
-static void kvm_add_signal(struct qemu_kvm_signal_table *sigtab, int signum)
-{
-    sigaddset(&sigtab->sigset, signum);
-    sigdelset(&sigtab->negsigset, signum);
-}
-
 void kvm_init_new_ap(int cpu, CPUState *env)
 {
     pthread_create(&vcpu_info[cpu].thread, NULL, ap_main_loop, env);
@@ -411,28 +384,12 @@ void kvm_init_new_ap(int cpu, CPUState *env)
        pthread_cond_wait(&qemu_vcpu_cond, &qemu_mutex);
 }
 
-static void qemu_kvm_init_signal_tables(void)
-{
-    qemu_kvm_init_signal_table(&io_signal_table);
-    qemu_kvm_init_signal_table(&vcpu_signal_table);
-
-    kvm_add_signal(&io_signal_table, SIGIO);
-    kvm_add_signal(&io_signal_table, SIGALRM);
-    kvm_add_signal(&io_signal_table, SIGUSR1);
-    kvm_add_signal(&io_signal_table, SIGUSR2);
-
-    kvm_add_signal(&vcpu_signal_table, SIG_IPI);
-
-    sigprocmask(SIG_BLOCK, &io_signal_table.sigset, NULL);
-}
-
 int kvm_init_ap(void)
 {
 #ifdef TARGET_I386
     kvm_tpr_opt_setup();
 #endif
     qemu_add_vm_change_state_handler(kvm_vm_state_change_handler, NULL);
-    qemu_kvm_init_signal_tables();
 
     signal(SIG_IPI, sig_ipi_handler);
     return 0;
@@ -444,25 +401,235 @@ void qemu_kvm_notify_work(void)
         pthread_kill(io_thread, SIGUSR1);
 }
 
-/*
- * The IO thread has all signals that inform machine events
- * blocked (io_signal_table), so it won't get interrupted
- * while processing in main_loop_wait().
+static int received_signal;
+
+/* QEMU relies on periodically breaking out of select via EINTR to poll for IO
+   and timer signals.  Since we're now using a file descriptor to handle
+   signals, select() won't be interrupted by a signal.  We need to forcefully
+   break the select() loop when a signal is received hence
+   kvm_check_received_signal(). */
+
+int kvm_check_received_signal(void)
+{
+    if (received_signal) {
+       received_signal = 0;
+       return 1;
+    }
+
+    return 0;
+}
+
+#if defined(SYS_signalfd)
+#if !defined(HAVE_signalfd)
+#include <linux/signalfd.h>
+
+static int signalfd(int fd, const sigset_t *mask, int flags)
+{
+    if (flags) {
+       errno = EINVAL;
+       return -1;
+    }
+
+    return syscall(SYS_signalfd, fd, mask, _NSIG / 8);
+}
+#endif
+
+/* If we have signalfd, we mask out the signals we want to handle and then
+ * use signalfd to listen for them.  We rely on whatever the current signal
+ * handler is to dispatch the signals when we receive them.
  */
 
+static void sigfd_handler(void *opaque)
+{
+    int fd = (unsigned long)opaque;
+    struct signalfd_siginfo info;
+    struct sigaction action;
+    ssize_t len;
+
+    while (1) {
+       do {
+           len = read(fd, &info, sizeof(info));
+       } while (len == -1 && errno == EINTR);
+
+       if (len == -1 && errno == EAGAIN)
+           break;
+
+       if (len != sizeof(info)) {
+           printf("read from sigfd returned %ld: %m\n", len);
+           return;
+       }
+
+       sigaction(info.ssi_signo, NULL, &action);
+       if (action.sa_handler)
+           action.sa_handler(info.ssi_signo);
+
+       if (info.ssi_signo == SIGUSR2)
+           pthread_cond_signal(&qemu_aio_cond); 
+    }
+
+    received_signal = 1;
+}
+
+static int setup_signal_handlers(int nr_signals, ...)
+{
+    sigset_t mask;
+    va_list ap;
+    int i, fd;
+
+    sigemptyset(&mask);
+
+    va_start(ap, nr_signals);
+    for (i = 0; i < nr_signals; i++) {
+       int signo = va_arg(ap, int);
+
+       sigaddset(&mask, signo);
+    }
+    va_end(ap);
+
+    sigprocmask(SIG_BLOCK, &mask, NULL);
+
+    fd = signalfd(-1, &mask, 0);
+    if (fd == -1)
+       return -1;
+
+    fcntl(fd, F_SETFL, O_NONBLOCK);
+
+    qemu_set_fd_handler2(fd, NULL, sigfd_handler, NULL,
+                        (void *)(unsigned long)fd);
+
+    return 0;
+}
+#else
+struct kvm_sighandler
+{
+    int signo;
+    void (*action)(int);
+};
+
+static int nr_sighandlers;
+static struct kvm_sighandler *sighandlers;
+static int sigpipefd;
+
+/* If we don't have signalfd, we don't mask out the signals we want to receive.
+ * To avoid the signal/select race, we use a pipe() that we write to from the
+ * signal handler.  As a consequence, we save off the signal handler to perform
+ * dispatch.
+ */
+
+static void kvm_sighandler(int signo)
+{
+    char buffer[4];
+    size_t offset = 0;
+
+    memcpy(&buffer, &signo, 4);
+    while (offset < 4) {
+       ssize_t len;
+
+       len = write(sigpipefd, buffer + offset, 4 - offset);
+       if (len == -1 && errno == EINTR)
+           continue;
+
+       if (len < 1)
+           return;
+
+       offset += len;
+    }
+}
+
+static void sigfd_handler(void *opaque)
+{
+    int fd = (unsigned long)opaque;
+    char buffer[4];
+    int signo, i;
+    size_t offset = 0;
+
+    while (1) {
+       while (offset < 4) {
+           ssize_t len;
+
+           len = read(fd, buffer + offset, 4 - offset);
+           if (len == -1 && errno == EINTR)
+               continue;
+
+           if (len == -1 && errno == EAGAIN)
+               return;
+
+           if (len < 1) {
+               fprintf(stderr, "unexpected error in sigfd_handler\n");
+               exit(1);
+           }
+
+           offset += len;
+       }
+
+       offset = 0;
+
+       memcpy(&signo, buffer, 4);
+       for (i = 0; i < nr_sighandlers; i++) {
+           if (sighandlers[i].signo == signo) {
+               if (sighandlers[i].action)
+                   sighandlers[i].action(signo);
+               break;
+           }
+       }
+
+       if (signo == SIGUSR2)
+           pthread_cond_signal(&qemu_aio_cond); 
+    }
+
+    received_signal = 1;
+}
+
+static int setup_signal_handlers(int nr_signals, ...)
+{
+    va_list ap;
+    int fds[2];
+    int i;
+
+    if (pipe(fds) == -1)
+       return -1;
+
+    sigpipefd = fds[1];
+
+    nr_sighandlers = nr_signals;
+    sighandlers = qemu_malloc(nr_sighandlers * sizeof(sighandlers[0]));
+
+    fcntl(fds[0], F_SETFL, O_NONBLOCK);
+    fcntl(fds[1], F_SETFL, O_NONBLOCK);
+
+    va_start(ap, nr_signals);
+    for (i = 0; i < nr_signals; i++) {
+       int signo = va_arg(ap, int);
+
+       sighandlers[i].signo = signo;
+       sighandlers[i].action = signal(signo, kvm_sighandler);
+    }
+    va_end(ap);
+
+    qemu_set_fd_handler2(fds[0], NULL, sigfd_handler, NULL,
+                        (void *)(unsigned long)fds[0]);
+
+    return 0;
+}
+#endif
+
 int kvm_main_loop(void)
 {
     io_thread = pthread_self();
     qemu_system_ready = 1;
+
+    setup_signal_handlers(4, SIGIO, SIGALRM, SIGUSR1, SIGUSR2);
+
     pthread_mutex_unlock(&qemu_mutex);
 
     pthread_cond_broadcast(&qemu_system_cond);
 
+    pthread_mutex_lock(&qemu_mutex);
+
+    cpu_single_env = NULL;
+
     while (1) {
-        kvm_eat_signal(&io_signal_table, NULL, 1000);
-        pthread_mutex_lock(&qemu_mutex);
-        cpu_single_env = NULL;
-        main_loop_wait(0);
+        main_loop_wait(1000);
         if (qemu_shutdown_requested())
             break;
         else if (qemu_powerdown_requested())
@@ -471,7 +638,6 @@ int kvm_main_loop(void)
             pthread_kill(vcpu_info[0].thread, SIG_IPI);
             qemu_kvm_reset_requested = 1;
         }
-        pthread_mutex_unlock(&qemu_mutex);
     }
 
     pause_all_threads();
@@ -834,10 +1000,7 @@ void qemu_kvm_aio_wait(void)
     CPUState *cpu_single = cpu_single_env;
 
     if (!cpu_single_env) {
-        pthread_mutex_unlock(&qemu_mutex);
-        kvm_eat_signal(&io_signal_table, NULL, 1000);
-        pthread_mutex_lock(&qemu_mutex);
-        cpu_single_env = NULL;
+       main_loop_wait(1000);
     } else {
         pthread_cond_wait(&qemu_aio_cond, &qemu_mutex);
         cpu_single_env = cpu_single;
@@ -864,3 +1027,14 @@ void kvm_cpu_destroy_phys_mem(target_phys_addr_t 
start_addr,
 {
     kvm_destroy_phys_mem(kvm_context, start_addr, size);
 }
+
+void kvm_mutex_unlock(void)
+{
+    pthread_mutex_unlock(&qemu_mutex);
+}
+
+void kvm_mutex_lock(void)
+{
+    pthread_mutex_lock(&qemu_mutex);
+    cpu_single_env = NULL;
+}
diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h
index 024a653..bcab82c 100644
--- a/qemu/qemu-kvm.h
+++ b/qemu/qemu-kvm.h
@@ -97,4 +97,28 @@ extern kvm_context_t kvm_context;
 #define qemu_kvm_pit_in_kernel() (0)
 #endif
 
+void kvm_mutex_unlock(void);
+void kvm_mutex_lock(void);
+
+static inline void kvm_sleep_begin(void)
+{
+    if (kvm_enabled())
+       kvm_mutex_unlock();
+}
+
+static inline void kvm_sleep_end(void)
+{
+    if (kvm_enabled())
+       kvm_mutex_lock();
+}
+
+int kvm_check_received_signal(void);
+
+static inline int kvm_received_signal(void)
+{
+    if (kvm_enabled())
+       return kvm_check_received_signal();
+    return 0;
+}
+
 #endif
diff --git a/qemu/vl.c b/qemu/vl.c
index 74be059..1192759 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -7836,6 +7836,23 @@ void qemu_system_powerdown_request(void)
         cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT);
 }
 
+static int qemu_select(int max_fd, fd_set *rfds, fd_set *wfds, fd_set *xfds,
+                      struct timeval *tv)
+{
+    int ret;
+
+    /* KVM holds a mutex while QEMU code is running, we need hooks to
+       release the mutex whenever QEMU code sleeps. */
+
+    kvm_sleep_begin();
+
+    ret = select(max_fd, rfds, wfds, xfds, tv);
+
+    kvm_sleep_end();
+
+    return ret;
+}
+
 void main_loop_wait(int timeout)
 {
     IOHandlerRecord *ioh;
@@ -7907,11 +7924,12 @@ void main_loop_wait(int timeout)
         }
     }
 
-    tv.tv_sec = 0;
 #ifdef _WIN32
+    tv.tv_sec = 0;
     tv.tv_usec = 0;
 #else
-    tv.tv_usec = timeout * 1000;
+    tv.tv_sec = timeout / 1000;
+    tv.tv_usec = (timeout % 1000) * 1000;
 #endif
 #if defined(CONFIG_SLIRP)
     if (slirp_inited) {
@@ -7919,7 +7937,7 @@ void main_loop_wait(int timeout)
     }
 #endif
  moreio:
-    ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv);
+    ret = qemu_select(nfds + 1, &rfds, &wfds, &xfds, &tv);
     if (ret > 0) {
         IOHandlerRecord **pioh;
         int more = 0;
@@ -7948,7 +7966,7 @@ void main_loop_wait(int timeout)
             } else
                 pioh = &ioh->next;
         }
-        if (more)
+        if (more && !kvm_received_signal())
             goto moreio;
     }
 #if defined(CONFIG_SLIRP)

-------------------------------------------------------------------------
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to