Il Mon, Aug 13, 2007 at 05:42:55PM +0300, Dan Kenigsberg ha scritto: > "Dynamic ticks" in Qemu: have a SIGALRM generated only when it is > needed, instead of every 1 millisecond. This patch requires that the > host supports high resolution timers, since it arms a POSIX timer to the > nearest Qemu timer's expiry time (which might be rather near). > > I tried to send a previous version of this patch yesterday, but luckily > it seems to have been eaten by qemu-devel list. I'd be happy to hear > your comments about it.
I like it ;) I have some comments (and a reworked patch at the end): > Index: vl.c > =================================================================== > RCS file: /sources/qemu/qemu/vl.c,v > retrieving revision 1.323 > diff -u -r1.323 vl.c > --- vl.c 29 Jul 2007 17:57:25 -0000 1.323 > +++ vl.c 13 Aug 2007 14:18:19 -0000 > @@ -793,6 +793,15 @@ > /* frequency of the times() clock tick */ > static int timer_freq; > #endif > +#ifdef DYNAMIC_TICKS > +/* If DYNAMIC_TICKS is defined (and use_dynamic_ticks selected) qemu does not > + * attepmt to generate SIGALRM at a constant rate. Rather, the system timer > is > + * set to generate SIGALRM only when it is needed. DYNAMIC_TICKS reduces the > + * number of SIGALRMs sent to idle dynamic-ticked guests. */ > +static timer_t host_timer; > +static void rearm_host_timer(void); > +static int use_dynamic_ticks = 1; > +#endif I'd make use_dynamic_ticks a static inline helper, in this way gcc will still optimize it away when not need and you can remove a lot of #ifdefs from the code. > > QEMUClock *qemu_new_clock(int type) > { > @@ -838,6 +847,10 @@ > } > pt = &t->next; > } > +#ifdef DYNAMIC_TICKS > + if (use_dynamic_ticks) > + rearm_host_timer(); > +#endif Like here > } > > /* modify the current timer so that it will be fired when current_time > @@ -897,6 +910,7 @@ > /* run the callback (the timer list can be modified) */ > ts->cb(ts->opaque); > } > + rearm_host_timer(); > } > > int64_t qemu_get_clock(QEMUClock *clock) > @@ -1004,7 +1018,12 @@ > last_clock = ti; > } > #endif > - if (qemu_timer_expired(active_timers[QEMU_TIMER_VIRTUAL], > + > + if ( > +#ifdef DYNAMIC_TICKS > + use_dynamic_ticks || > +#endif > + qemu_timer_expired(active_timers[QEMU_TIMER_VIRTUAL], > qemu_get_clock(vm_clock)) || > qemu_timer_expired(active_timers[QEMU_TIMER_REALTIME], > qemu_get_clock(rt_clock))) { and here (this one is pretty ugly btw) > @@ -1109,21 +1128,37 @@ > act.sa_handler = host_alarm_handler; > sigaction(SIGALRM, &act, NULL); > > - itv.it_interval.tv_sec = 0; > - itv.it_interval.tv_usec = 999; /* for i386 kernel 2.6 to get 1 ms */ > - itv.it_value.tv_sec = 0; > - itv.it_value.tv_usec = 10 * 1000; > - setitimer(ITIMER_REAL, &itv, NULL); > - /* we probe the tick duration of the kernel to inform the user if > - the emulated kernel requested a too high timer frequency */ > - getitimer(ITIMER_REAL, &itv); > +#ifdef DYNAMIC_TICKS > + if (use_dynamic_ticks) { > + struct sigevent ev; > + ev.sigev_value.sival_int = 0; > + ev.sigev_notify = SIGEV_SIGNAL; > + ev.sigev_signo = SIGALRM; > + if (timer_create(CLOCK_REALTIME, &ev, &host_timer)) > + perror("timer_create"); Should this fail you end up with a non-working QEMU. It's pretty easy to recover though, running this code: > + } else > +#endif /* DYNAMIC_TICKS */ > + { > + itv.it_interval.tv_sec = 0; > + itv.it_interval.tv_usec = 999; /* for i386 kernel 2.6 to get 1 > ms */ > + itv.it_value.tv_sec = 0; > + itv.it_value.tv_usec = 10 * 1000; > + setitimer(ITIMER_REAL, &itv, NULL); > + /* we probe the tick duration of the kernel to inform the user if > + the emulated kernel requested a too high timer frequency */ > + getitimer(ITIMER_REAL, &itv); > + } > > #if defined(__linux__) > /* XXX: force /dev/rtc usage because even 2.6 kernels may not > have timers with 1 ms resolution. The correct solution will > be to use the POSIX real time timers available in recent > 2.6 kernels */ > - if (itv.it_interval.tv_usec > 1000 || 1) { > + if ( > +#ifdef DYNAMIC_TICKS > + !use_dynamic_ticks && helper ;) > +#endif > + (itv.it_interval.tv_usec > 1000 || 1)) { Plus, in this way you change the behaviour from "always try RTC under Linux" to "don't use RTC is dynticks is enabled". Is this what you really want? Furthermore: I'm not sure about the cost of reprogramming the CMOS RTC, but with HPET should be feasible to program the timer in one-shot mode. > /* try to use /dev/rtc to have a faster timer */ > if (start_rtc_timer() < 0) > goto use_itimer; > @@ -6287,6 +6322,10 @@ > cpu_enable_ticks(); > vm_running = 1; > vm_state_notify(1); > +#ifdef DYNAMIC_TICKS > + if (use_dynamic_ticks) > + rearm_host_timer(); > +#endif > } > } > > @@ -6708,6 +6747,9 @@ > #ifdef TARGET_SPARC > "-prom-env variable=value set OpenBIOS nvram variables\n" > #endif > +#if defined(DYNAMIC_TICKS) > + "-no-dyntick don't use dynamic ticks\n" > +#endif > "\n" > "During emulation, the following keys are useful:\n" > "ctrl-alt-f toggle full screen\n" > @@ -6805,6 +6847,9 @@ > QEMU_OPTION_name, > QEMU_OPTION_prom_env, > QEMU_OPTION_old_param, > +#ifdef DYNAMIC_TICKS > + QEMU_OPTION_no_dyntick, > +#endif > }; > > typedef struct QEMUOption { > @@ -6909,6 +6954,9 @@ > #if defined(TARGET_ARM) > { "old-param", 0, QEMU_OPTION_old_param }, > #endif > +#if defined(DYNAMIC_TICKS) > + { "no-dyntick", 0, QEMU_OPTION_no_dyntick }, > +#endif > { NULL }, > }; > > @@ -7137,6 +7185,53 @@ > > #define MAX_NET_CLIENTS 32 > > + > +#ifdef DYNAMIC_TICKS > +/* call host_alarm_handler just when the nearest QEMUTimer expires */ > +/* expire_time is measured in nanosec for vm_clock > + * but in millisec for rt_clock */ > +static void rearm_host_timer(void) > +{ > + struct itimerspec timeout; > + int64_t nearest_delta_us = INT64_MAX; > + > + if (active_timers[QEMU_TIMER_REALTIME] || > + active_timers[QEMU_TIMER_VIRTUAL]) { > + int64_t vmdelta_us, current_us; > + if (active_timers[QEMU_TIMER_REALTIME]) { > + nearest_delta_us = > (active_timers[QEMU_TIMER_REALTIME]->expire_time - > qemu_get_clock(rt_clock))*1000; > + } > + > + if (active_timers[QEMU_TIMER_VIRTUAL]) { > + vmdelta_us = (active_timers[QEMU_TIMER_VIRTUAL]->expire_time - > qemu_get_clock(vm_clock)+999)/1000; /* round up */ > + if (vmdelta_us < nearest_delta_us) { > + nearest_delta_us = vmdelta_us; > + } > + } > + /* Avoid arming the timer to negative, zero, or too low values */ > + /* MIN_TIMER_REARM_US should be optimized */ > +#define MIN_TIMER_REARM_US 250 > + if (nearest_delta_us <= MIN_TIMER_REARM_US) { > + nearest_delta_us = MIN_TIMER_REARM_US; > + } > + > + /* check whether a timer is already running */ > + if (timer_gettime(host_timer, &timeout)) > + perror("gettime"); Hum. > + current_us = timeout.it_value.tv_sec * 1000000 + > timeout.it_value.tv_nsec/1000; > + if (current_us && current_us <= nearest_delta_us) > + return; > + > + timeout.it_interval.tv_sec = 0; > + timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */ > + timeout.it_value.tv_sec = nearest_delta_us / 1000000; > + timeout.it_value.tv_nsec = nearest_delta_us % 1000000 * 1000; > + if (timer_settime(host_timer, 0 /* RELATIVE */, &timeout, NULL)) > + perror("settime"); Hum. Hopefully those 2 functions won't fail after timer_create() succeeded; still error handling should be improved. I can't see an easy way to recover, maybe the right thing to do is printing the error and closing the VM (instead of leaving it half-dead). > + } > +} > +#endif /* DYNAMIC_TICKS */ > + > int main(int argc, char **argv) > { > #ifdef CONFIG_GDBSTUB > @@ -7687,6 +7782,12 @@ > #ifdef TARGET_ARM > case QEMU_OPTION_old_param: > old_param = 1; > + break; > +#endif > +#if defined(DYNAMIC_TICKS) > + case QEMU_OPTION_no_dyntick: > + use_dynamic_ticks = 0; > + break; > #endif > } > } I've implemented some of my suggestions in the following patch - rebased to kvm-userspace current git since it's easier to test (...ok, I'm lazy - but you get the idea): diff --git a/qemu/configure b/qemu/configure index 365b7fb..38373db 100755 --- a/qemu/configure +++ b/qemu/configure @@ -262,6 +262,8 @@ for opt do ;; --enable-uname-release=*) uname_release="$optarg" ;; + --disable-dynamic-ticks) dynamic_ticks="no" + ;; esac done @@ -788,6 +790,9 @@ echo "TARGET_DIRS=$target_list" >> $config_mak if [ "$build_docs" = "yes" ] ; then echo "BUILD_DOCS=yes" >> $config_mak fi +if test "$dynamic_ticks" != "no" ; then + echo "#define DYNAMIC_TICKS 1" >> $config_h +fi # XXX: suppress that if [ "$bsd" = "yes" ] ; then diff --git a/qemu/vl.c b/qemu/vl.c index 4b73f77..05639d7 100644 --- a/qemu/vl.c +++ b/qemu/vl.c @@ -759,6 +759,35 @@ static unsigned int period = 1; static int timer_freq; #endif +#ifdef DYNAMIC_TICKS +/* If DYNAMIC_TICKS is defined (and use_dynamic_ticks selected) qemu does not + * attepmt to generate SIGALRM at a constant rate. Rather, the system timer is + * set to generate SIGALRM only when it is needed. DYNAMIC_TICKS reduces the + * number of SIGALRMs sent to idle dynamic-ticked guests. */ + +static void rearm_host_timer(void); +static int dynticks_create_timer(void); + +static int use_dynticks = 1; + +static int use_dynamic_ticks() { + return use_dynticks; +} + +#else + +static void rearm_host_timer(void) {} + +static int dynticks_create_timer(void) { + return 0; +} + +static int use_dynamic_ticks() { + return 0; +} + +#endif + QEMUClock *qemu_new_clock(int type) { QEMUClock *clock; @@ -803,6 +832,7 @@ void qemu_del_timer(QEMUTimer *ts) } pt = &t->next; } + rearm_host_timer(); } /* modify the current timer so that it will be fired when current_time @@ -862,6 +892,7 @@ static void qemu_run_timers(QEMUTimer **ptimer_head, int64_t current_time) /* run the callback (the timer list can be modified) */ ts->cb(ts->opaque); } + rearm_host_timer(); } int64_t qemu_get_clock(QEMUClock *clock) @@ -969,7 +1000,8 @@ static void host_alarm_handler(int host_signum) last_clock = ti; } #endif - if (qemu_timer_expired(active_timers[QEMU_TIMER_VIRTUAL], + if (use_dynamic_ticks() || + qemu_timer_expired(active_timers[QEMU_TIMER_VIRTUAL], qemu_get_clock(vm_clock)) || qemu_timer_expired(active_timers[QEMU_TIMER_REALTIME], qemu_get_clock(rt_clock))) { @@ -1126,21 +1158,26 @@ static void init_timer_alarm(void) act.sa_handler = host_alarm_handler; sigaction(SIGALRM, &act, NULL); - itv.it_interval.tv_sec = 0; - itv.it_interval.tv_usec = 999; /* for i386 kernel 2.6 to get 1 ms */ - itv.it_value.tv_sec = 0; - itv.it_value.tv_usec = 10 * 1000; - setitimer(ITIMER_REAL, &itv, NULL); - /* we probe the tick duration of the kernel to inform the user if - the emulated kernel requested a too high timer frequency */ - getitimer(ITIMER_REAL, &itv); + if (use_dynamic_ticks() && dynticks_create_timer()) { + /* dynticks disabled or failed to create a timer, fallback + * to old code. + */ + itv.it_interval.tv_sec = 0; + itv.it_interval.tv_usec = 999; /* for i386 kernel 2.6 to get 1 ms */ + itv.it_value.tv_sec = 0; + itv.it_value.tv_usec = 10 * 1000; + setitimer(ITIMER_REAL, &itv, NULL); + /* we probe the tick duration of the kernel to inform the user if + the emulated kernel requested a too high timer frequency */ + getitimer(ITIMER_REAL, &itv); + } #if defined(__linux__) /* XXX: force /dev/rtc usage because even 2.6 kernels may not have timers with 1 ms resolution. The correct solution will be to use the POSIX real time timers available in recent 2.6 kernels */ - if (itv.it_interval.tv_usec > 1000 || 1) { + if (!use_dynamic_ticks() && (itv.it_interval.tv_usec > 1000 || 1)) { /* try to use /dev/rtc to have a faster timer */ if ((!use_rtc && !use_hpet) || (start_rtc_timer() < 0)) goto use_itimer; @@ -6110,6 +6147,7 @@ void vm_start(void) cpu_enable_ticks(); vm_running = 1; vm_state_notify(1); + rearm_host_timer(); } } @@ -6536,6 +6574,9 @@ void help(void) "-no-rtc don't use /dev/rtc for timer alarm (do use gettimeofday)\n" "-use-hpet use /dev/hpet (HPET) instead of RTC for timer alarm\n" #endif +#ifdef DYNAMIC_TICKS + "-no-dyntick don't use dynamic ticks\n" +#endif "-option-rom rom load a file, rom, into the option ROM space\n" "\n" "During emulation, the following keys are useful:\n" @@ -6630,6 +6671,9 @@ enum { QEMU_OPTION_use_hpet, #endif QEMU_OPTION_cpu_vendor, +#ifdef DYNAMIC_TICKS + QEMU_OPTION_no_dyntick, +#endif }; typedef struct QEMUOption { @@ -6728,6 +6772,9 @@ const QEMUOption qemu_options[] = { { "use-hpet", 0, QEMU_OPTION_use_hpet }, #endif { "cpu-vendor", HAS_ARG, QEMU_OPTION_cpu_vendor }, +#if defined(DYNAMIC_TICKS) + { "no-dyntick", 0, QEMU_OPTION_no_dyntick }, +#endif { NULL }, }; @@ -6932,6 +6979,78 @@ static BOOL WINAPI qemu_ctrl_handler(DWORD type) #define MAX_NET_CLIENTS 32 +#ifdef DYNAMIC_TICKS + +static timer_t host_timer; + +static int dynticks_create_timer() { + struct sigevent ev; + + ev.sigev_value.sival_int = 0; + ev.sigev_notify = SIGEV_SIGNAL; + ev.sigev_signo = SIGALRM; + + if (timer_create(CLOCK_REALTIME, &ev, &host_timer)) { + perror("timer_create"); + + /* disable dynticks */ + fprintf(stderr, "Dynamic Ticks disabled\n"); + use_dynticks = 0; + + return -1; + } + + return 0; +} + +/* call host_alarm_handler just when the nearest QEMUTimer expires */ +/* expire_time is measured in nanosec for vm_clock but in millisec + * for rt_clock + */ +static void rearm_host_timer(void) { + struct itimerspec timeout; + int64_t nearest_delta_us = INT64_MAX; + + if (!use_dynamic_ticks()) + return; + + if (active_timers[QEMU_TIMER_REALTIME] || + active_timers[QEMU_TIMER_VIRTUAL]) { + int64_t vmdelta_us, current_us; + + if (active_timers[QEMU_TIMER_REALTIME]) + nearest_delta_us = (active_timers[QEMU_TIMER_REALTIME]->expire_time - qemu_get_clock(rt_clock))*1000; + + if (active_timers[QEMU_TIMER_VIRTUAL]) { + /* round up */ + vmdelta_us = (active_timers[QEMU_TIMER_VIRTUAL]->expire_time - qemu_get_clock(vm_clock)+999)/1000; + if (vmdelta_us < nearest_delta_us) + nearest_delta_us = vmdelta_us; + } + + /* Avoid arming the timer to negative, zero, or too low values */ + /* MIN_TIMER_REARM_US should be optimized */ +#define MIN_TIMER_REARM_US 250 + if (nearest_delta_us <= MIN_TIMER_REARM_US) + nearest_delta_us = MIN_TIMER_REARM_US; + + /* check whether a timer is already running */ + if (timer_gettime(host_timer, &timeout)) + perror("gettime"); + current_us = timeout.it_value.tv_sec * 1000000 + timeout.it_value.tv_nsec/1000; + if (current_us && current_us <= nearest_delta_us) + return; + + timeout.it_interval.tv_sec = 0; + timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */ + timeout.it_value.tv_sec = nearest_delta_us / 1000000; + timeout.it_value.tv_nsec = nearest_delta_us % 1000000 * 1000; + if (timer_settime(host_timer, 0 /* RELATIVE */, &timeout, NULL)) + perror("settime"); + } +} +#endif /* DYNAMIC_TICKS */ + static int saved_argc; static char **saved_argv; @@ -7457,6 +7576,11 @@ int main(int argc, char **argv) case QEMU_OPTION_cpu_vendor: cpu_vendor_string = optarg; break; +#ifdef DYNAMIC_TICKS + case QEMU_OPTION_no_dyntick: + use_dynticks = 0; + break; +#endif } } } Luca -- Se non puoi convincerli, confondili.