Re: Scheduler improvements

Gregor Best Thu, 04 Oct 2012 14:49:03 -0700

Hi people,

after a (very) long time of silence on this, here's another go at it. This time,
I basically started from scratch and used a bit of code by Christiano Haesberth
which had been posted to tech@ a while ago to detect CPU topology on amd64 and
take that into account when moving processes between CPUs.


This version has one single queue per CPU, getting rid of a) the one single 
system
wide runqueue and b) the queue for expired processes. This simplifies things a 
bit
and performs just as well as my previous versions (the only difference is the 
order
in which expired procs get selected for running on a CPU). One advantage is that
process selection is in O(log n) of the number of processes on the CPU and 
depends
neither on the total number of processes nor the number of expired processes in
the runqueue.

The factors for the cost of moving a process between hardware threads, cpu dies
and cpu packages are guesses now, I think those will have to be tuned further.
Sadly, I haven't had access to a multiprocessor machine with a more diverse
architecture than "a bunch of cores on the same die".

I tested this on some more machines than before; a Core i5, an i7 and my Core 2
Duo and on all machines (perceived) interactivity was improved. The simplest
benchmark I used was playing back a 1080p version of Big Buck Bunny with 
mplayer.
All machines I tested on had Intel graphics cards, one GM965 (on the Core2Duo) 
and
the others were Sandy Bridge devices. On all of them, playback was smoother with
the i7 being most visible. With the default scheduler, watching the movie was a
big pain due to heavy frame-dropping, with my patch, the movie was watchable 
(with
frame dropping only (barely) noticable in scenes with much movement).

As before, I'm looking forward to anything you have to comment, especially cool
benchmark ideas or the like.

-- 
    Gregor Best
Index: arch/amd64/amd64/identcpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/identcpu.c,v
retrieving revision 1.39
diff -u -r1.39 identcpu.c
--- arch/amd64/amd64/identcpu.c 19 Sep 2012 20:19:31 -0000      1.39
+++ arch/amd64/amd64/identcpu.c 4 Oct 2012 21:27:55 -0000
@@ -202,6 +202,8 @@
 
 void via_nano_setup(struct cpu_info *ci);
 
+void cpu_topology(struct cpu_info *ci);
+
 void
 via_nano_setup(struct cpu_info *ci)
 {
@@ -470,4 +472,123 @@
                sensordev_install(&ci->ci_sensordev);
 #endif
        }
+
+       cpu_topology(ci);
+}
+
+/*
+ * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
+ */
+static int
+log2(unsigned int i)
+{
+       int ret = 0;
+
+       while (i >>= 1)
+               ret++;
+
+       return (ret);
+}
+
+static int
+mask_width(u_int x)
+{
+       int bit;
+       int mask;
+       int powerof2;
+
+       powerof2 = ((x - 1) & x) == 0;
+       mask = (x << (1 - powerof2)) - 1;
+
+       /* fls */
+       if (mask == 0)
+               return (0);
+       for (bit = 1; mask != 1; bit++)
+               mask = (unsigned int)mask >> 1;
+
+       return (bit);
+}
+
+/*
+ * Build up cpu topology for given cpu, must run on the core itself.
+ */
+void
+cpu_topology(struct cpu_info *ci)
+{
+       u_int32_t eax, ebx, ecx, edx;
+       u_int32_t apicid, max_apicid, max_coreid;
+       u_int32_t smt_bits, core_bits, pkg_bits;
+       u_int32_t smt_mask, core_mask, pkg_mask;
+
+       /* We need at least apicid at CPUID 1 */
+       CPUID(0, eax, ebx, ecx, edx);
+       if (eax < 1)
+               goto no_topology;
+
+       /* Initial apicid */
+       CPUID(1, eax, ebx, ecx, edx);
+       apicid = (ebx >> 24) & 0xff;
+
+       if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
+               /* We need at least apicid at CPUID 0x80000008 */
+               CPUID(0x80000000, eax, ebx, ecx, edx);
+               if (eax < 0x80000008)
+                       goto no_topology;
+
+               CPUID(0x80000008, eax, ebx, ecx, edx);
+               core_bits = (ecx >> 12) & 0xf;
+               if (core_bits == 0)
+                       goto no_topology;
+               /* So coreidsize 2 gives 3, 3 gives 7... */
+               core_mask = (1 << core_bits) - 1;
+               /* Core id is the least significant considering mask */
+               ci->ci_core_id = apicid & core_mask;
+               /* Pkg id is the upper remaining bits */
+               ci->ci_pkg_id = apicid & ~core_mask;
+               ci->ci_pkg_id >>= core_bits;
+       } else if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
+               /* We only support leaf 1/4 detection */
+               CPUID(0, eax, ebx, ecx, edx);
+               if (eax < 4)
+                       goto no_topology;
+               /* Get max_apicid */
+               CPUID(1, eax, ebx, ecx, edx);
+               max_apicid = (ebx >> 16) & 0xff;
+               /* Get max_coreid */
+               CPUID2(4, 0, eax, ebx, ecx, edx);
+               max_coreid = ((eax >> 26) & 0x3f) + 1;
+               /* SMT */
+               smt_bits = mask_width(max_apicid / max_coreid);
+               smt_mask = (1 << smt_bits) - 1;
+               /* Core */
+               core_bits = log2(max_coreid);
+               core_mask = (1 << (core_bits + smt_bits)) - 1;
+               core_mask ^= smt_mask;
+               /* Pkg */
+               pkg_bits = core_bits + smt_bits;
+               pkg_mask = -1 << core_bits;
+
+               ci->ci_smt_id = apicid & smt_mask;
+               ci->ci_core_id = (apicid & core_mask) >> smt_bits;
+               ci->ci_pkg_id = (apicid & pkg_mask) >> pkg_bits;
+       } else
+               goto no_topology;
+#ifdef DEBUG
+       printf("cpu%d: smt %u, core %u, pkg %u "
+               "(apicid 0x%x, max_apicid 0x%x, max_coreid 0x%x, smt_bits 0x%x, 
smt_mask 0x%x, "
+               "core_bits 0x%x, core_mask 0x%x, pkg_bits 0x%x, pkg_mask 
0x%x)\n",
+               ci->ci_cpuid, ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id,
+               apicid, max_apicid, max_coreid, smt_bits, smt_mask, core_bits,
+               core_mask, pkg_bits, pkg_mask);
+#else
+       printf("cpu%d: smt %u, core %u, package %u\n", ci->ci_cpuid,
+               ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id);
+
+#endif
+       return;
+       /* We can't map, so consider ci_core_id as ci_cpuid */
+no_topology:
+       ci->ci_smt_id  = 0;
+       ci->ci_core_id = ci->ci_cpuid;
+       ci->ci_pkg_id  = 0;
 }
Index: arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.73
diff -u -r1.73 cpu.h
--- arch/amd64/include/cpu.h    17 Apr 2012 16:02:33 -0000      1.73
+++ arch/amd64/include/cpu.h    4 Oct 2012 21:27:55 -0000
@@ -101,6 +101,11 @@
        u_int32_t       ci_cflushsz;
        u_int64_t       ci_tsc_freq;
 
+#define ARCH_HAVE_CPU_TOPOLOGY
+       u_int32_t       ci_smt_id;
+       u_int32_t       ci_core_id;
+       u_int32_t       ci_pkg_id;
+
        struct cpu_functions *ci_func;
        void (*cpu_setup)(struct cpu_info *);
        void (*ci_info)(struct cpu_info *);
Index: arch/amd64/include/specialreg.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/specialreg.h,v
retrieving revision 1.22
diff -u -r1.22 specialreg.h
--- arch/amd64/include/specialreg.h     24 Aug 2012 02:49:23 -0000      1.22
+++ arch/amd64/include/specialreg.h     4 Oct 2012 21:27:55 -0000
@@ -209,10 +209,14 @@
 #define        CPUID2MODEL(cpuid)      (((cpuid) >> 4) & 15)
 #define        CPUID2STEPPING(cpuid)   ((cpuid) & 15)
 
-#define        CPUID(code, eax, ebx, ecx, edx)                         \
+#define CPUID2(eax_code, ecx_code, eax, ebx, ecx, edx)         \
        __asm("cpuid"                                           \
-           : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)    \
-           : "a" (code));
+       : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)    \
+       : "a" (eax_code), "c" (ecx_code));
+
+#define CPUID(code, eax, ebx, ecx, edx)                                \
+       CPUID2(code, 0, eax, ebx, ecx, edx)
+
 #define        CPUID_LEAF(code, leaf, eax, ebx, ecx, edx)              \
        __asm("cpuid"                                           \
            : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)    \
Index: kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
retrieving revision 1.75
diff -u -r1.75 kern_clock.c
--- kern/kern_clock.c   2 Aug 2012 03:18:48 -0000       1.75
+++ kern/kern_clock.c   4 Oct 2012 21:27:58 -0000
@@ -233,7 +233,7 @@
        if (stathz == 0)
                statclock(frame);
 
-       if (--ci->ci_schedstate.spc_rrticks <= 0)
+       if (p && (--(p->p_rrticks) <= 0))
                roundrobin(ci);
 
        /*
Index: kern/kern_proc.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_proc.c,v
retrieving revision 1.48
diff -u -r1.48 kern_proc.c
--- kern/kern_proc.c    10 Apr 2012 15:50:52 -0000      1.48
+++ kern/kern_proc.c    4 Oct 2012 21:27:58 -0000
@@ -398,8 +398,6 @@
            p->p_comm, p->p_pid, pst, p->p_flag, P_BITS);
        (*pr)("    pri=%u, usrpri=%u, nice=%d\n",
            p->p_priority, p->p_usrpri, p->p_p->ps_nice);
-       (*pr)("    forw=%p, list=%p,%p\n",
-           TAILQ_NEXT(p, p_runq), p->p_list.le_next, p->p_list.le_prev);
        (*pr)("    process=%p user=%p, vmspace=%p\n",
            p->p_p, p->p_addr, p->p_vmspace);
        (*pr)("    estcpu=%u, cpticks=%d, pctcpu=%u.%u, swtime=%u\n",
Index: kern/kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.27
diff -u -r1.27 kern_sched.c
--- kern/kern_sched.c   10 Jul 2012 18:20:37 -0000      1.27
+++ kern/kern_sched.c   4 Oct 2012 21:27:58 -0000
@@ -24,11 +24,22 @@
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/mutex.h>
+#include <sys/tree.h>
 
 #include <uvm/uvm_extern.h>
 
 #include <sys/malloc.h>
 
+static int
+sched_cmp_proc(struct proc *a, struct proc *b) {
+       if (a == b)
+               return 0;
+       if (timercmp(&(a->p_deadline), &(b->p_deadline), <))
+               return -1;
+       return 1;
+}
+
+RB_GENERATE_STATIC(prochead, proc, p_runq, sched_cmp_proc);
 
 void sched_kthreads_create(void *);
 
@@ -79,10 +90,8 @@
 sched_init_cpu(struct cpu_info *ci)
 {
        struct schedstate_percpu *spc = &ci->ci_schedstate;
-       int i;
 
-       for (i = 0; i < SCHED_NQS; i++)
-               TAILQ_INIT(&spc->spc_qs[i]);
+       RB_INIT(&spc->spc_runq);
 
        spc->spc_idleproc = NULL;
 
@@ -158,18 +167,17 @@
 
                cpuset_add(&sched_idle_cpus, ci);
                cpu_idle_enter();
-               while (spc->spc_whichqs == 0) {
-                       if (spc->spc_schedflags & SPCF_SHOULDHALT &&
-                           (spc->spc_schedflags & SPCF_HALTED) == 0) {
-                               cpuset_del(&sched_idle_cpus, ci);
-                               SCHED_LOCK(s);
-                               atomic_setbits_int(&spc->spc_schedflags,
-                                   spc->spc_whichqs ? 0 : SPCF_HALTED);
-                               SCHED_UNLOCK(s);
-                               wakeup(spc);
-                       }
-                       cpu_idle_cycle();
+
+               if (spc->spc_schedflags & SPCF_SHOULDHALT &&
+                        (spc->spc_schedflags & SPCF_HALTED) == 0) {
+                       cpuset_del(&sched_idle_cpus, ci);
+                       SCHED_LOCK(s);
+                       atomic_setbits_int(&spc->spc_schedflags, SPCF_HALTED);
+                       SCHED_UNLOCK(s);
+                       wakeup(spc);
                }
+               cpu_idle_cycle();
+
                cpu_idle_leave();
                cpuset_del(&sched_idle_cpus, ci);
        }
@@ -222,14 +230,13 @@
 setrunqueue(struct proc *p)
 {
        struct schedstate_percpu *spc;
-       int queue = p->p_priority >> 2;
 
        SCHED_ASSERT_LOCKED();
        spc = &p->p_cpu->ci_schedstate;
        spc->spc_nrun++;
 
-       TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
-       spc->spc_whichqs |= (1 << queue);
+       KASSERT(!RB_FIND(prochead, &spc->spc_runq, p));
+       RB_INSERT(prochead, &spc->spc_runq, p);
        cpuset_add(&sched_queued_cpus, p->p_cpu);
 
        if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
@@ -240,38 +247,29 @@
 remrunqueue(struct proc *p)
 {
        struct schedstate_percpu *spc;
-       int queue = p->p_priority >> 2;
 
        SCHED_ASSERT_LOCKED();
        spc = &p->p_cpu->ci_schedstate;
        spc->spc_nrun--;
 
-       TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
-       if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
-               spc->spc_whichqs &= ~(1 << queue);
-               if (spc->spc_whichqs == 0)
-                       cpuset_del(&sched_queued_cpus, p->p_cpu);
-       }
+       KASSERT(RB_REMOVE(prochead, &spc->spc_runq, p));
+       if (RB_EMPTY(&spc->spc_runq))
+               cpuset_del(&sched_queued_cpus, p->p_cpu);
 }
 
 struct proc *
 sched_chooseproc(void)
 {
        struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
-       struct proc *p;
-       int queue;
+       struct proc *p, *p_tmp = NULL;
 
        SCHED_ASSERT_LOCKED();
 
        if (spc->spc_schedflags & SPCF_SHOULDHALT) {
-               if (spc->spc_whichqs) {
-                       for (queue = 0; queue < SCHED_NQS; queue++) {
-                               TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
-                                       remrunqueue(p);
-                                       p->p_cpu = sched_choosecpu(p);
-                                       setrunqueue(p);
-                               }
-                       }
+               RB_FOREACH_SAFE(p, prochead, &spc->spc_runq, p_tmp) {
+                       remrunqueue(p);
+                       p->p_cpu = sched_choosecpu(p);
+                       setrunqueue(p);
                }
                p = spc->spc_idleproc;
                KASSERT(p);
@@ -280,17 +278,14 @@
                return (p);
        }
 
-again:
-       if (spc->spc_whichqs) {
-               queue = ffs(spc->spc_whichqs) - 1;
-               p = TAILQ_FIRST(&spc->spc_qs[queue]);
+       if (!RB_EMPTY(&spc->spc_runq)) {
+               p = RB_MIN(prochead, &spc->spc_runq);
                remrunqueue(p);
                sched_noidle++;
                KASSERT(p->p_stat == SRUN);
        } else if ((p = sched_steal_proc(curcpu())) == NULL) {
-               p = spc->spc_idleproc;
-               if (p == NULL) {
-                        int s;
+               while ((p = spc->spc_idleproc) == NULL) {
+                       int s;
                        /*
                         * We get here if someone decides to switch during
                         * boot before forking kthreads, bleh.
@@ -302,8 +297,7 @@
                        spl0();
                        delay(10);
                        SCHED_LOCK(s);
-                       goto again;
-                }
+               }
                KASSERT(p);
                p->p_stat = SRUN;
        } 
@@ -441,15 +435,13 @@
 
        while ((ci = cpuset_first(&set)) != NULL) {
                struct proc *p;
-               int queue;
                int cost;
 
                cpuset_del(&set, ci);
 
                spc = &ci->ci_schedstate;
 
-               queue = ffs(spc->spc_whichqs) - 1;
-               TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
+               RB_FOREACH(p, prochead, &spc->spc_runq) {
                        if (p->p_flag & P_CPUPEG)
                                continue;
 
@@ -502,6 +494,10 @@
 int sched_cost_priority = 1;
 int sched_cost_runnable = 3;
 int sched_cost_resident = 1;
+#ifdef ARCH_HAVE_CPU_TOPOLOGY
+int sched_cost_diffcore = 2; /* cost for moving to a different core */
+int sched_cost_diffpkg = 3; /* cost for moving to a different package */
+#endif
 
 int
 sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)
@@ -541,6 +537,13 @@
                    log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));
                cost -= l2resident * sched_cost_resident;
        }
+
+#ifdef ARCH_HAVE_CPU_TOPOLOGY
+       if (p->p_cpu->ci_pkg_id != ci->ci_pkg_id)
+               cost *= sched_cost_diffpkg;
+       else if (p->p_cpu->ci_core_id != ci->ci_core_id)
+               cost *= sched_cost_diffcore;
+#endif
 
        return (cost);
 }
Index: kern/kern_synch.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.104
diff -u -r1.104 kern_synch.c
--- kern/kern_synch.c   21 Aug 2012 19:51:58 -0000      1.104
+++ kern/kern_synch.c   4 Oct 2012 21:27:58 -0000
@@ -205,7 +205,7 @@
        p->p_wmesg = wmesg;
        p->p_slptime = 0;
        p->p_priority = prio & PRIMASK;
-       TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
+       TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_slpq);
 }
 
 void
@@ -342,7 +342,7 @@
 unsleep(struct proc *p)
 {
        if (p->p_wchan) {
-               TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq);
+               TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_slpq);
                p->p_wchan = NULL;
        }
 }
@@ -361,7 +361,7 @@
        SCHED_LOCK(s);
        qp = &slpque[LOOKUP(ident)];
        for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) {
-               pnext = TAILQ_NEXT(p, p_runq);
+               pnext = TAILQ_NEXT(p, p_slpq);
 #ifdef DIAGNOSTIC
                if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
                        panic("wakeup: p_stat is %d", (int)p->p_stat);
@@ -369,7 +369,7 @@
                if (p->p_wchan == ident) {
                        --n;
                        p->p_wchan = 0;
-                       TAILQ_REMOVE(qp, p, p_runq);
+                       TAILQ_REMOVE(qp, p, p_slpq);
                        if (p->p_stat == SSLEEP)
                                setrunnable(p);
                }
Index: kern/sched_bsd.c
===================================================================
RCS file: /cvs/src/sys/kern/sched_bsd.c,v
retrieving revision 1.30
diff -u -r1.30 sched_bsd.c
--- kern/sched_bsd.c    9 Jul 2012 17:27:32 -0000       1.30
+++ kern/sched_bsd.c    4 Oct 2012 21:27:58 -0000
@@ -77,20 +77,18 @@
 
        timeout_set(&schedcpu_to, schedcpu, &schedcpu_to);
 
-       rrticks_init = hz / 10;
+       rrticks_init = hz / 20;
        schedcpu(&schedcpu_to);
 }
 
 /*
- * Force switch among equal priority processes every 100ms.
+ * Force switch among equal priority processes every 50ms.
  */
 void
 roundrobin(struct cpu_info *ci)
 {
        struct schedstate_percpu *spc = &ci->ci_schedstate;
 
-       spc->spc_rrticks = rrticks_init;
-
        if (ci->ci_curproc != NULL) {
                if (spc->spc_schedflags & SPCF_SEENRR) {
                        /*
@@ -252,8 +250,7 @@
                resetpriority(p);
                if (p->p_priority >= PUSER) {
                        if (p->p_stat == SRUN &&
-                           (p->p_priority / SCHED_PPQ) !=
-                           (p->p_usrpri / SCHED_PPQ)) {
+                           p->p_priority == p->p_usrpri) {
                                remrunqueue(p);
                                p->p_priority = p->p_usrpri;
                                setrunqueue(p);
@@ -304,6 +301,7 @@
        SCHED_LOCK(s);
        p->p_priority = p->p_usrpri;
        p->p_stat = SRUN;
+       generate_deadline(p, 1);
        setrunqueue(p);
        p->p_ru.ru_nvcsw++;
        mi_switch();
@@ -332,6 +330,7 @@
        p->p_priority = p->p_usrpri;
        p->p_stat = SRUN;
        p->p_cpu = sched_choosecpu(p);
+       generate_deadline(p, 0);
        setrunqueue(p);
        p->p_ru.ru_nivcsw++;
        mi_switch();
@@ -531,8 +530,7 @@
 
        SCHED_ASSERT_LOCKED();
 
-       newpriority = PUSER + p->p_estcpu +
-           NICE_WEIGHT * (p->p_p->ps_nice - NZERO);
+       newpriority = PUSER + p->p_estcpu + (p->p_p->ps_nice - NZERO);
        newpriority = min(newpriority, MAXPRI);
        p->p_usrpri = newpriority;
        resched_proc(p, p->p_usrpri);
@@ -564,4 +562,33 @@
        if (p->p_priority >= PUSER)
                p->p_priority = p->p_usrpri;
        SCHED_UNLOCK(s);
+}
+
+void
+generate_deadline(struct proc *p, char voluntary) {
+       /*
+       * For nice values between 0 and 39 inclusively, the offset lies between
+       * 32 and 1280 milliseconds for a machine with hz=100. That means that
+       * processes with nice value=0 (i.e. -20 in userland) will be executed
+       * 32 milliseconds in the future at the latest. Processes with very
+       * little priority will be executed 1.28 seconds in the future at the 
very
+       * latest. The shift is done to ensure that the lowest possible offset is
+       * larger than the timeslice, in order to make sure that the scheduler 
does
+       * not degenerate to round robin behaviour when more than just a few 
processes
+       * with high priority are started.
+       *
+       * If the process voluntarily yielded its CPU, we reward it by halving 
its
+       * deadline offset.
+       */
+       unsigned int offset_msec = ((p->p_p->ps_nice + 1) * rrticks_init) << 
(voluntary ? 2 : 3);
+       struct timeval offset = {
+               .tv_sec  = offset_msec / 1000,
+               .tv_usec = offset_msec % 1000
+       };
+       struct timeval now;
+       microuptime(&now);
+
+       timeradd(&now, &offset, &(p->p_deadline));
+       if (!voluntary)
+               p->p_rrticks = rrticks_init;
 }
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.163
diff -u -r1.163 proc.h
--- sys/proc.h  11 Sep 2012 15:44:19 -0000      1.163
+++ sys/proc.h  4 Oct 2012 21:27:58 -0000
@@ -247,8 +247,9 @@
 #define        PS_EXITING      _P_EXITING
 
 struct proc {
-       TAILQ_ENTRY(proc) p_runq;
+       TAILQ_ENTRY(proc) p_slpq;
        LIST_ENTRY(proc) p_list;        /* List of all processes. */
+       RB_ENTRY(proc) p_runq;
 
        struct  process *p_p;           /* The process of this thread. */
        TAILQ_ENTRY(proc) p_thr_link;/* Threads in a process linkage. */
@@ -280,6 +281,8 @@
        int     p_sigwait;      /* signal handled by sigwait() */
 
        /* scheduling */
+       int p_rrticks;
+       struct timeval p_deadline;
        u_int   p_estcpu;        /* Time averaged value of p_cpticks. */
        int     p_cpticks;       /* Ticks of cpu time. */
        fixpt_t p_pctcpu;        /* %cpu for this process during p_swtime */
Index: sys/sched.h
===================================================================
RCS file: /cvs/src/sys/sys/sched.h,v
retrieving revision 1.30
diff -u -r1.30 sched.h
--- sys/sched.h 16 Nov 2011 20:50:19 -0000      1.30
+++ sys/sched.h 4 Oct 2012 21:27:58 -0000
@@ -70,6 +70,7 @@
 #define        _SYS_SCHED_H_
 
 #include <sys/queue.h>
+#include <sys/tree.h>
 
 /*
  * Posix defines a <sched.h> which may want to include <sys/sched.h>
@@ -99,7 +100,6 @@
        u_int spc_schedticks;           /* ticks for schedclock() */
        u_int64_t spc_cp_time[CPUSTATES]; /* CPU state statistics */
        u_char spc_curpriority;         /* usrpri of curproc */
-       int spc_rrticks;                /* ticks until roundrobin() */
        int spc_pscnt;                  /* prof/stat counter */
        int spc_psdiv;                  /* prof/stat divisor */ 
        struct proc *spc_idleproc;      /* idle proc for this cpu */
@@ -107,8 +107,7 @@
        u_int spc_nrun;                 /* procs on the run queues */
        fixpt_t spc_ldavg;              /* shortest load avg. for this cpu */
 
-       TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS];
-       volatile uint32_t spc_whichqs;
+       RB_HEAD(prochead, proc) spc_runq;
 
 #ifdef notyet
        struct proc *spc_reaper;        /* dead proc reaper */
@@ -125,9 +124,7 @@
 #define SPCF_SHOULDHALT                0x0004  /* CPU should be vacated */
 #define SPCF_HALTED            0x0008  /* CPU has been halted */
 
-#define        SCHED_PPQ       (128 / SCHED_NQS)       /* priorities per queue 
*/
-#define NICE_WEIGHT 2                  /* priorities per nice level */
-#define        ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - SCHED_PPQ)
+#define        ESTCPULIM(e) min((e), PRIO_MAX)
 
 extern int schedhz;                    /* ideally: 16 */
 extern int rrticks_init;               /* ticks per roundrobin() */
@@ -152,13 +149,14 @@
 void cpu_idle_cycle(void);
 void cpu_idle_leave(void);
 void sched_peg_curproc(struct cpu_info *ci);
+void generate_deadline(struct proc *, char);
 
 #ifdef MULTIPROCESSOR
 void sched_start_secondary_cpus(void);
 void sched_stop_secondary_cpus(void);
 #endif
 
-#define curcpu_is_idle()       (curcpu()->ci_schedstate.spc_whichqs == 0)
+#define curcpu_is_idle()       (RB_EMPTY(&curcpu()->ci_schedstate.spc_runq))
 
 void sched_init_runqueues(void);
 void setrunqueue(struct proc *);

Re: Scheduler improvements

Reply via email to