Hi Aubrey, Can you run this patch through SPECPower? It looks good here in other testing.
Sorry I have not tried your patch yet on our test machine. I will experiment with it Friday. :-) Thank you! Bill On 12/04/08 17:29, Li, Aubrey wrote: > Bill Holler wrote: > > >> Hi Tesla Dev, >> >> I have been experimenting not going into C3 or C2 when >> the number of non-idle CPUs in the cpu-partion exceeds >> a threshold. This is an attempt to regain the high load performance. >> These numbers are on a 2-socket system. >> >> With C3 threshold = 40% active and C2 threshold = 60% active >> the libmicro fork_1000 benchmark completes in 80/100 the time. >> >> With C3 threshold = 20% active and C2 threshold = 30% active >> the libmicro fork_1000 benchmark completes in 50/100 the time. >> These numbers are still about 80/100 slower than with c-states >> totally disabled. >> >> >> I am also going to experiment with cpu idle/wakeup rate. >> I suspect this may be more important for performance than >> the number of active cpus. >> >> > > How do you calculate the active percent? > Are you using a polling mechanism? > Looking forward to revewing the code... > > Thanks, > -Aubrey > _______________________________________________ > tesla-dev mailing list > tesla-dev at opensolaris.org > http://mail.opensolaris.org/mailman/listinfo/tesla-dev > bash-3.2$ hg pdiff Enter passphrase for key '/home/bholler/.ssh/id_dsa': diff -r dcaed6292fbd -r 7f93d91e9bbf usr/src/uts/i86pc/io/cpudrv/cpu_idle.c --- a/usr/src/uts/i86pc/io/cpudrv/cpu_idle.c Tue Dec 02 22:15:09 2008 -0800 +++ b/usr/src/uts/i86pc/io/cpudrv/cpu_idle.c Thu Dec 04 20:10:58 2008 -0800 @@ -40,6 +40,7 @@ #include <sys/dtrace.h> #include <sys/sdt.h> #include <sys/callb.h> +#include <sys/cpuvar.h> extern void cpu_idle_adaptive(void); @@ -47,6 +48,7 @@ static void cpu_idle_fini(cpudrv_devstate_t *); static boolean_t cpu_deep_idle_callb(void *arg, int code); static boolean_t cpu_idle_cpr_callb(void *arg, int code); +static int cpu_rate_throttle(int cs_type); /* * Interfaces for modules implementing Intel's deep c-state. @@ -56,6 +58,19 @@ cpu_idle_init, cpu_idle_fini, }; + +/* + * Tunables for C3 and C2 cstate throttling based on idle rate. + * cpu_idle_count[cpuid] contains the idle count during the last time interval + * of period cpu_idle_period ending at time cpu_idle_count_time[cpuid]. + * cpu_idle_count_new[cpuid] contains the current count. + */ +hrtime_t cpu_idle_count_time[NCPU]; +uint_t cpu_idle_count[NCPU]; +uint_t cpu_idle_count_new[NCPU]; +uint_t cpu_idle_period = 10 * 1000 * 1000; /* 10 milliseconds */ +uint_t cpu_idle_rate1 = 0x10; /* C2 wakeup rate throttle */ +uint_t cpu_idle_rate2 = 0x30; /* C3 wakeup rate throttle */ static kmutex_t cpu_idle_callb_mutex; static callb_id_t cpu_deep_idle_callb_id; @@ -513,6 +528,45 @@ } /* + * Throttle deep cstate to C2 or C1 when idle frequency is above a threshold + */ +static int +cpu_rate_throttle(int cs_type) +{ + hrtime_t now, start; + processorid_t cpuid = CPU->cpu_id; + int i; + + ++cpu_idle_count_new[cpuid]; + + now = tsc_read(); + scalehrtime(&now); + + start = cpu_idle_count_time[cpuid]; + if (now > start + cpu_idle_period) { + cpu_idle_count[cpuid] = cpu_idle_count_new[cpuid]; + cpu_idle_count_new[cpuid] = 0; + cpu_idle_count_time[cpuid] = now; + } + + switch (cs_type) { + case CPU_ACPI_C2: + /* FALLTHROUGH */ + case CPU_ACPI_C3: + if (cpu_idle_count[cpuid] > cpu_idle_rate1) + return (CPU_ACPI_C1); + + if (cpu_idle_count[cpuid] > cpu_idle_rate2) + return (CPU_ACPI_C2); + + break; + default: + break; + } + return (cs_type); +} + +/* * Idle the present CPU, deep c-state is supported */ void @@ -522,6 +576,7 @@ struct machcpu *mcpu = &(cp->cpu_m); int instance = cp->cpu_id; uint16_t cs_type = mcpu->mcpu_idle_type; + uint16_t act_cs_type; cpudrv_devstate_t *cpudsp; cpu_acpi_handle_t handle; cpu_acpi_cstate_t *cstate; @@ -564,10 +619,12 @@ if (cpu_max_cstates > CPU_MAX_CSTATES) cpu_max_cstates = CPU_MAX_CSTATES; + act_cs_type = cpu_rate_throttle(cs_type); + cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle); - cstate += cs_type - 1; + cstate += act_cs_type - 1; - switch (cs_type) { + switch (act_cs_type) { case CPU_ACPI_C1: start = tsc_read(); (*non_deep_idle_cpu)(); -------------- next part -------------- A non-text attachment was scrubbed... Name: idle_rate.bundle Type: application/octet-stream Size: 1257 bytes Desc: not available URL: <http://mail.opensolaris.org/pipermail/tesla-dev/attachments/20081204/dbc8595d/attachment.obj>
