Scheduler improvements

Gregor Best Wed, 12 Oct 2011 05:58:30 -0700

Hi people,

attached is a patch that basically rewrites the CPU scheduler. It
replaces the multilevel feedback queue currently employed with an
earliest effective deadline first approach, similar to the BrainFuck
Scheduler for Linux.


My main motivation for rewriting the scheduler was mplayer getting
killed all the time because it went over its share of CPU resources,
something which can not happen with the patch I wrote.

The algorithms employed simply assigns a (soft) deadline to each
process, which is calculated based solely on nice value and selects the
next process to execute based on the earliest deadline in the runqueue.
If more than one deadline is missed, the first one encountered in FIFO
order (FIFO with respect to insertion into the runqueue) with a deadline
smaller than current time is selected. This prevents processes from
yielding a small time before their timeslice expires to fool the
scheduler into leaving them in their respective priority queue, thus
preventing multiple processes from teaming up to starve others of CPU
runtime.

I did a few benchmarks to test the difference between the original and
my patched scheduler to see whether there was any improvements. The tool
I used was sysbench (with both CPU and Threads benchmarks). There are a
few graphs of the tests at http://gurkenfunk.de/projects/openbsd-eevdf,
but it looks as if my patches improve latency on workloads with high CPU
usage, little I/O and a relatively large amount of threads.

I'm not aiming for a "yeah, nice, we'll merge it" on this, but rather
for suggestions whether it's worth anyones time to pursue this further.

Oh, also, with the patch, the scheduler code is 225 lines shorter and
IMHO a bit easier to understand.

-- 
    Gregor Best

? sys/.git
? sys/.gitignore
? sys/vim.core
? sys/arch/alpha/stand/installboot/installboot.8.manlint
? sys/arch/alpha/stand/setnetbootinfo/setnetbootinfo.8.manlint
? sys/arch/amd64/amd64/.acpi_machdep.c.swp
? sys/arch/amd64/include/.cpu.h.swp
? sys/arch/armish/stand/boot/boot.8.manlint
? sys/arch/aviion/stand/a2coff/a2coff.8.manlint
? sys/arch/hppa/stand/boot/boot.8.manlint
? sys/arch/hppa/stand/mkboot/mkboot.8.manlint
? sys/arch/hppa64/stand/boot/boot.8.manlint
? sys/arch/hppa64/stand/mkboot/mkboot.8.manlint
? sys/arch/i386/stand/biosboot/biosboot.8.manlint
? sys/arch/i386/stand/boot/boot.8.manlint
? sys/arch/i386/stand/cdboot/cdboot.8.manlint
? sys/arch/i386/stand/installboot/installboot.8.manlint
? sys/arch/i386/stand/pxeboot/pxeboot.8.manlint
? sys/arch/landisk/stand/boot/boot.8.manlint
? sys/arch/landisk/stand/mbr/mbr.8.manlint
? sys/arch/landisk/stand/xxboot/xxboot.8.manlint
? sys/arch/mvme68k/stand/installboot/installboot.8.manlint
? sys/arch/mvme88k/stand/installboot/installboot.8.manlint
? sys/arch/sgi/stand/sgivol/sgivol.8.manlint
? sys/arch/socppc/stand/boot/boot.8.manlint
? sys/arch/socppc/stand/mkboot/mkboot.8.manlint
? sys/arch/sparc/stand/installboot/installboot.8.manlint
? sys/arch/sparc64/stand/installboot/installboot.8.manlint
? sys/arch/zaurus/stand/zboot/boot.8.manlint
? sys/dev/microcode/atmel/atu-at76c503-i3863-ext
? sys/dev/microcode/atmel/atu-at76c503-i3863-int
? sys/dev/microcode/atmel/atu-at76c503-rfmd-acc-ext
? sys/dev/microcode/atmel/atu-at76c503-rfmd-acc-int
? sys/dev/microcode/atmel/atu-at76c505-rfmd-ext
? sys/dev/microcode/atmel/atu-at76c505-rfmd-int
? sys/dev/microcode/atmel/atu-intersil-ext
? sys/dev/microcode/atmel/atu-intersil-int
? sys/dev/microcode/atmel/atu-rfmd-ext
? sys/dev/microcode/atmel/atu-rfmd-int
? sys/dev/microcode/atmel/atu-rfmd2958-ext
? sys/dev/microcode/atmel/atu-rfmd2958-int
? sys/dev/microcode/atmel/atu-rfmd2958smc-ext
? sys/dev/microcode/atmel/atu-rfmd2958smc-int
? sys/dev/microcode/atmel/build
? sys/dev/microcode/bnx/bnx-b06
? sys/dev/microcode/bnx/bnx-b09
? sys/dev/microcode/bnx/bnx-rv2p
? sys/dev/microcode/bnx/bnx-xi-rv2p
? sys/dev/microcode/bnx/bnx-xi90-rv2p
? sys/dev/microcode/bnx/build
? sys/dev/microcode/cirruslogic/build
? sys/dev/microcode/cirruslogic/cs4280
? sys/dev/microcode/fxp/build
? sys/dev/microcode/fxp/fxp-d101a
? sys/dev/microcode/fxp/fxp-d101b0
? sys/dev/microcode/fxp/fxp-d101ma
? sys/dev/microcode/fxp/fxp-d101s
? sys/dev/microcode/fxp/fxp-d102
? sys/dev/microcode/fxp/fxp-d102c
? sys/dev/microcode/fxp/fxp-d102e
? sys/dev/microcode/kue/build
? sys/dev/microcode/kue/kue
? sys/dev/microcode/myx/build
? sys/dev/microcode/myx/myx-eth_z8e
? sys/dev/microcode/myx/myx-ethp_z8e
? sys/dev/microcode/ral/build
? sys/dev/microcode/ral/ral-rt2561
? sys/dev/microcode/ral/ral-rt2561s
? sys/dev/microcode/ral/ral-rt2661
? sys/dev/microcode/ral/ral-rt2860
? sys/dev/microcode/rum/build
? sys/dev/microcode/rum/rum-rt2573
? sys/dev/microcode/rum/run-rt2870
? sys/dev/microcode/rum/run-rt3071
? sys/dev/microcode/tht/build
? sys/dev/microcode/tht/tht
? sys/dev/microcode/tigon/build
? sys/dev/microcode/tigon/tigon1
? sys/dev/microcode/tigon/tigon2
? sys/dev/microcode/tusb3410/build
? sys/dev/microcode/tusb3410/tusb3410
? sys/dev/microcode/typhoon/3c990
? sys/dev/microcode/typhoon/build
? sys/dev/microcode/udl/build
? sys/dev/microcode/udl/udl_huffman
? sys/dev/microcode/uyap/build
? sys/dev/microcode/uyap/uyap
? sys/dev/microcode/yds/build
? sys/dev/microcode/yds/yds
? sys/dev/microcode/zydas/build
? sys/dev/microcode/zydas/zd1211
? sys/dev/microcode/zydas/zd1211b
? sys/kern/.subr_hibernate.c.swp
Index: sys/kern/init_main.c
===================================================================
RCS file: /cvs/src/sys/kern/init_main.c,v
retrieving revision 1.180
diff -u -r1.180 init_main.c
--- sys/kern/init_main.c        7 Jul 2011 18:00:33 -0000       1.180
+++ sys/kern/init_main.c        12 Oct 2011 12:42:54 -0000
@@ -337,7 +337,7 @@
        (void)chgproccnt(0, 1);
 
        /* Initialize run queues */
-       sched_init_runqueues();
+       sched_init_runqueue();
        sleep_queue_init();
        sched_init_cpu(curcpu());
 
Index: sys/kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
retrieving revision 1.72
diff -u -r1.72 kern_clock.c
--- sys/kern/kern_clock.c       7 Mar 2011 07:07:13 -0000       1.72
+++ sys/kern/kern_clock.c       12 Oct 2011 12:42:54 -0000
@@ -241,7 +241,11 @@
        if (stathz == 0)
                statclock(frame);
 
-       if (--ci->ci_schedstate.spc_rrticks <= 0)
+    /*
+     * If the currently running process went over its round robin tick quota,
+     * preempt it.
+     */
+    if (p && (--(p->p_rrticks) <= 0))
                roundrobin(ci);
 
        /*
Index: sys/kern/kern_fork.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.128
diff -u -r1.128 kern_fork.c
--- sys/kern/kern_fork.c        7 Jul 2011 18:00:33 -0000       1.128
+++ sys/kern/kern_fork.c        12 Oct 2011 12:42:54 -0000
@@ -190,6 +190,8 @@
                atomic_setbits_int(&pr->ps_flags, PS_CONTROLT);
 
        newproc->p_p = pr;
+
+    generate_deadline(newproc);
 }
 
 /* print the 'table full' message once per 10 seconds */
@@ -432,7 +434,7 @@
        getmicrotime(&p2->p_stats->p_start);
        p2->p_acflag = AFORK;
        p2->p_stat = SRUN;
-       p2->p_cpu = sched_choosecpu_fork(p1, flags);
+       p2->p_cpu = sched_choosecpu_fork(p1);
        setrunqueue(p2);
        SCHED_UNLOCK(s);
 
Index: sys/kern/kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.23
diff -u -r1.23 kern_sched.c
--- sys/kern/kern_sched.c       6 Jul 2011 21:41:37 -0000       1.23
+++ sys/kern/kern_sched.c       12 Oct 2011 12:42:54 -0000
@@ -35,6 +35,8 @@
 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
 struct proc *sched_steal_proc(struct cpu_info *);
 
+TAILQ_HEAD(prochead, proc) sched_runqueue;
+
 /*
  * To help choosing which cpu should run which process we keep track
  * of cpus which are currently idle and which cpus have processes
@@ -69,10 +71,6 @@
 sched_init_cpu(struct cpu_info *ci)
 {
        struct schedstate_percpu *spc = &ci->ci_schedstate;
-       int i;
-
-       for (i = 0; i < SCHED_NQS; i++)
-               TAILQ_INIT(&spc->spc_qs[i]);
 
        spc->spc_idleproc = NULL;
 
@@ -106,6 +104,7 @@
 {
        struct schedstate_percpu *spc;
        struct proc *p = curproc;
+    struct proc *dead;
        struct cpu_info *ci = v;
        int s;
 
@@ -120,7 +119,6 @@
        SCHED_LOCK(s);
        cpuset_add(&sched_idle_cpus, ci);
        p->p_stat = SSLEEP;
-       p->p_cpu = ci;
        atomic_setbits_int(&p->p_flag, P_CPUPEG);
        mi_switch();
        cpuset_del(&sched_idle_cpus, ci);
@@ -130,38 +128,27 @@
        KASSERT(curproc == spc->spc_idleproc);
 
        while (1) {
-               while (!curcpu_is_idle()) {
-                       struct proc *dead;
-
-                       SCHED_LOCK(s);
-                       p->p_stat = SSLEEP;
-                       mi_switch();
-                       SCHED_UNLOCK(s);
-
-                       while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
-                               LIST_REMOVE(dead, p_hash);
-                               exit2(dead);
-                       }
-               }
+        while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
+            LIST_REMOVE(dead, p_hash);
+            exit2(dead);
+        }
+        if ((spc->spc_schedflags & SPCF_SHOULDHALT) && !(spc->spc_schedflags & 
SPCF_HALTED))
+            atomic_setbits_int(&spc->spc_schedflags, SPCF_HALTED);
 
                splassert(IPL_NONE);
 
                cpuset_add(&sched_idle_cpus, ci);
+
                cpu_idle_enter();
-               while (spc->spc_whichqs == 0) {
-                       if (spc->spc_schedflags & SPCF_SHOULDHALT &&
-                           (spc->spc_schedflags & SPCF_HALTED) == 0) {
-                               cpuset_del(&sched_idle_cpus, ci);
-                               SCHED_LOCK(s);
-                               atomic_setbits_int(&spc->spc_schedflags,
-                                   spc->spc_whichqs ? 0 : SPCF_HALTED);
-                               SCHED_UNLOCK(s);
-                               wakeup(spc);
-                       }
-                       cpu_idle_cycle();
-               }
+        cpu_idle_cycle();
                cpu_idle_leave();
+
                cpuset_del(&sched_idle_cpus, ci);
+
+        SCHED_LOCK(s);
+        p->p_stat = SSLEEP;
+        mi_switch();
+        SCHED_UNLOCK(s);
        }
 }
 
@@ -204,22 +191,21 @@
  * Run queue management.
  */
 void
-sched_init_runqueues(void)
+sched_init_runqueue(void)
 {
+    TAILQ_INIT(&sched_runqueue);
 }
 
 void
 setrunqueue(struct proc *p)
 {
        struct schedstate_percpu *spc;
-       int queue = p->p_priority >> 2;
 
        SCHED_ASSERT_LOCKED();
        spc = &p->p_cpu->ci_schedstate;
        spc->spc_nrun++;
 
-       TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
-       spc->spc_whichqs |= (1 << queue);
+       TAILQ_INSERT_TAIL(&sched_runqueue, p, p_runq);
        cpuset_add(&sched_queued_cpus, p->p_cpu);
 
        if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
@@ -230,54 +216,47 @@
 remrunqueue(struct proc *p)
 {
        struct schedstate_percpu *spc;
-       int queue = p->p_priority >> 2;
 
        SCHED_ASSERT_LOCKED();
        spc = &p->p_cpu->ci_schedstate;
        spc->spc_nrun--;
 
-       TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
-       if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
-               spc->spc_whichqs &= ~(1 << queue);
-               if (spc->spc_whichqs == 0)
-                       cpuset_del(&sched_queued_cpus, p->p_cpu);
-       }
+       TAILQ_REMOVE(&sched_runqueue, p, p_runq);
 }
 
 struct proc *
 sched_chooseproc(void)
 {
        struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
-       struct proc *p;
-       int queue;
+       struct proc *p = NULL;
+    struct proc *p_new = NULL;
+    struct timeval now;
 
        SCHED_ASSERT_LOCKED();
 
        if (spc->spc_schedflags & SPCF_SHOULDHALT) {
-               if (spc->spc_whichqs) {
-                       for (queue = 0; queue < SCHED_NQS; queue++) {
-                               TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
-                                       remrunqueue(p);
-                                       p->p_cpu = sched_choosecpu(p);
-                                       setrunqueue(p);
-                               }
-                       }
-               }
                p = spc->spc_idleproc;
                KASSERT(p);
                p->p_stat = SRUN;
                return (p);
        }
 
-again:
-       if (spc->spc_whichqs) {
-               queue = ffs(spc->spc_whichqs) - 1;
-               p = TAILQ_FIRST(&spc->spc_qs[queue]);
-               remrunqueue(p);
-       } else if ((p = sched_steal_proc(curcpu())) == NULL) {
-               p = spc->spc_idleproc;
-               if (p == NULL) {
-                        int s;
+       if (!TAILQ_EMPTY(&sched_runqueue)) {
+        microuptime(&now);
+               p = TAILQ_FIRST(&sched_runqueue);
+        TAILQ_FOREACH(p_new, &sched_runqueue, p_runq) {
+            if (timercmp(&(p_new->deadline), &now, <)) {
+                p = p_new;
+                break;
+            }
+            if (timercmp(&(p_new->deadline), &(p->deadline), <))
+                p = p_new;
+        }
+        remrunqueue(p);
+        p->p_cpu = curcpu();
+    } else {
+        while ((p = spc->spc_idleproc) == NULL) {
+            int s;
                        /*
                         * We get here if someone decides to switch during
                         * boot before forking kthreads, bleh.
@@ -289,11 +268,13 @@
                        spl0();
                        delay(10);
                        SCHED_LOCK(s);
-                       goto again;
-                }
+        }
+
                KASSERT(p);
+    }
+
+    if (p)
                p->p_stat = SRUN;
-       } 
 
        return (p);     
 }
@@ -307,7 +288,7 @@
 uint64_t sched_nomigrations;
 
 struct cpu_info *
-sched_choosecpu_fork(struct proc *parent, int flags)
+sched_choosecpu_fork(struct proc *parent)
 {
        struct cpu_info *choice = NULL;
        fixpt_t load, best_load = ~0;
@@ -315,22 +296,6 @@
        struct cpu_info *ci;
        struct cpuset set;
 
-#if 0
-       /*
-        * XXX
-        * Don't do this until we have a painless way to move the cpu in exec.
-        * Preferably when nuking the old pmap and getting a new one on a
-        * new cpu.
-        */
-       /*
-        * PPWAIT forks are simple. We know that the parent will not
-        * run until we exec and choose another cpu, so we just steal its
-        * cpu.
-        */
-       if (flags & FORK_PPWAIT)
-               return (parent->p_cpu);
-#endif
-
        /*
         * Look at all cpus that are currently idle and have nothing queued.
         * If there are none, pick the one with least queued procs first,
@@ -361,9 +326,6 @@
 sched_choosecpu(struct proc *p)
 {
        struct cpu_info *choice = NULL;
-       int last_cost = INT_MAX;
-       struct cpu_info *ci;
-       struct cpuset set;
 
        /*
         * If pegged to a cpu, don't allow it to move.
@@ -371,169 +333,18 @@
        if (p->p_flag & P_CPUPEG)
                return (p->p_cpu);
 
+    /*
+     * Else, just pretend we forked a new process.
+     */
        sched_choose++;
 
-       /*
-        * Look at all cpus that are currently idle and have nothing queued.
-        * If there are none, pick the cheapest of those.
-        * (idle + queued could mean that the cpu is handling an interrupt
-        * at this moment and haven't had time to leave idle yet).
-        */
-       cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
-
-       /*
-        * First, just check if our current cpu is in that set, if it is,
-        * this is simple.
-        * Also, our cpu might not be idle, but if it's the current cpu
-        * and it has nothing else queued and we're curproc, take it.
-        */
-       if (cpuset_isset(&set, p->p_cpu) ||
-           (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 &&
-           curproc == p)) {
-               sched_wasidle++;
-               return (p->p_cpu);
-       }
-
-       if (cpuset_first(&set) == NULL)
-               cpuset_copy(&set, &sched_all_cpus);
-
-       while ((ci = cpuset_first(&set)) != NULL) {
-               int cost = sched_proc_to_cpu_cost(ci, p);
-
-               if (choice == NULL || cost < last_cost) {
-                       choice = ci;
-                       last_cost = cost;
-               }
-               cpuset_del(&set, ci);
-       }
-
-       if (p->p_cpu != choice)
-               sched_nmigrations++;
-       else
-               sched_nomigrations++;
+    choice = sched_choosecpu_fork(p);
+    if (p->p_cpu != choice)
+        sched_nmigrations++;
+    else
+        sched_nomigrations++;
 
-       return (choice);
-}
-
-/*
- * Attempt to steal a proc from some cpu.
- */
-struct proc *
-sched_steal_proc(struct cpu_info *self)
-{
-       struct schedstate_percpu *spc;
-       struct proc *best = NULL;
-       int bestcost = INT_MAX;
-       struct cpu_info *ci;
-       struct cpuset set;
-
-       cpuset_copy(&set, &sched_queued_cpus);
-
-       while ((ci = cpuset_first(&set)) != NULL) {
-               struct proc *p;
-               int queue;
-               int cost;
-
-               cpuset_del(&set, ci);
-
-               spc = &ci->ci_schedstate;
-
-               queue = ffs(spc->spc_whichqs) - 1;
-               TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
-                       if (p->p_flag & P_CPUPEG)
-                               continue;
-
-                       cost = sched_proc_to_cpu_cost(self, p);
-
-                       if (best == NULL || cost < bestcost) {
-                               best = p;
-                               bestcost = cost;
-                       }
-               }
-       }
-       if (best == NULL)
-               return (NULL);
-
-       spc = &best->p_cpu->ci_schedstate;
-       remrunqueue(best);
-       best->p_cpu = self;
-
-       sched_stolen++;
-
-       return (best);
-}
-
-/*
- * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
- */
-static int
-log2(unsigned int i)
-{
-       int ret = 0;
-
-       while (i >>= 1)
-               ret++;
-
-       return (ret);
-}
-
-/*
- * Calculate the cost of moving the proc to this cpu.
- * 
- * What we want is some guesstimate of how much "performance" it will
- * cost us to move the proc here. Not just for caches and TLBs and NUMA
- * memory, but also for the proc itself. A highly loaded cpu might not
- * be the best candidate for this proc since it won't get run.
- *
- * Just total guesstimates for now.
- */
-
-int sched_cost_load = 1;
-int sched_cost_priority = 1;
-int sched_cost_runnable = 3;
-int sched_cost_resident = 1;
-
-int
-sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)
-{
-       struct schedstate_percpu *spc;
-       int l2resident = 0;
-       int cost;
-
-       spc = &ci->ci_schedstate;
-
-       cost = 0;
-
-       /*
-        * First, account for the priority of the proc we want to move.
-        * More willing to move, the lower the priority of the destination
-        * and the higher the priority of the proc.
-        */
-       if (!cpuset_isset(&sched_idle_cpus, ci)) {
-               cost += (p->p_priority - spc->spc_curpriority) *
-                   sched_cost_priority;
-               cost += sched_cost_runnable;
-       }
-       if (cpuset_isset(&sched_queued_cpus, ci)) {
-               cost += spc->spc_nrun * sched_cost_runnable;
-       }
-
-       /*
-        * Higher load on the destination means we don't want to go there.
-        */
-       cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT);
-
-       /*
-        * If the proc is on this cpu already, lower the cost by how much
-        * it has been running and an estimate of its footprint.
-        */
-       if (p->p_cpu == ci && p->p_slptime == 0) {
-               l2resident =
-                   log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));
-               cost -= l2resident * sched_cost_resident;
-       }
-
-       return (cost);
+    return choice;
 }
 
 /*
@@ -580,7 +391,9 @@
 {
        CPU_INFO_ITERATOR cii;
        struct cpu_info *ci;
+    int s;
 
+    SCHED_LOCK(s);
        /*
         * Make sure we stop the secondary CPUs.
         */
@@ -589,21 +402,25 @@
 
                if (CPU_IS_PRIMARY(ci))
                        continue;
-               cpuset_del(&sched_all_cpus, ci);
+
                atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT);
-       }
-       CPU_INFO_FOREACH(cii, ci) {
+        cpuset_del(&sched_all_cpus, ci);
+    }
+    CPU_INFO_FOREACH(cii, ci) {
                struct schedstate_percpu *spc = &ci->ci_schedstate;
-               struct sleep_state sls;
-
+        struct sleep_state sls;
                if (CPU_IS_PRIMARY(ci))
                        continue;
-               while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
-                       sleep_setup(&sls, spc, PZERO, "schedstate");
-                       sleep_finish(&sls,
-                           (spc->spc_schedflags & SPCF_HALTED) == 0);
-               }
+
+        SCHED_UNLOCK(s);
+        while (!(spc->spc_schedflags & SPCF_HALTED)) {
+            sleep_setup(&sls, spc, PZERO, "schedstate");
+            sleep_setup_timeout(&sls, 10);
+            sleep_finish(&sls, 1);
+        }
+        SCHED_LOCK(s);
        }
+    SCHED_UNLOCK(s);
 }
 
 #endif
Index: sys/kern/sched_bsd.c
===================================================================
RCS file: /cvs/src/sys/kern/sched_bsd.c,v
retrieving revision 1.27
diff -u -r1.27 sched_bsd.c
--- sys/kern/sched_bsd.c        7 Jul 2011 18:00:33 -0000       1.27
+++ sys/kern/sched_bsd.c        12 Oct 2011 12:42:54 -0000
@@ -77,33 +77,20 @@
 
        timeout_set(&schedcpu_to, schedcpu, &schedcpu_to);
 
-       rrticks_init = hz / 10;
+       rrticks_init = (hz / 100);
        schedcpu(&schedcpu_to);
 }
 
 /*
- * Force switch among equal priority processes every 100ms.
+ * Force switch among equal priority processes every 10ms.
  */
 void
 roundrobin(struct cpu_info *ci)
 {
        struct schedstate_percpu *spc = &ci->ci_schedstate;
 
-       spc->spc_rrticks = rrticks_init;
-
        if (ci->ci_curproc != NULL) {
-               if (spc->spc_schedflags & SPCF_SEENRR) {
-                       /*
-                        * The process has already been through a roundrobin
-                        * without switching and may be hogging the CPU.
-                        * Indicate that the process should yield.
-                        */
-                       atomic_setbits_int(&spc->spc_schedflags,
-                           SPCF_SHOULDYIELD);
-               } else {
-                       atomic_setbits_int(&spc->spc_schedflags,
-                           SPCF_SEENRR);
-               }
+        atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDYIELD);
        }
 
        if (spc->spc_nrun)
@@ -204,7 +191,6 @@
        struct timeout *to = (struct timeout *)arg;
        fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
        struct proc *p;
-       int s;
        unsigned int newcpu;
        int phz;
 
@@ -233,7 +219,7 @@
                 */
                if (p->p_slptime > 1)
                        continue;
-               SCHED_LOCK(s);
+
                /*
                 * p_pctcpu is only for ps.
                 */
@@ -250,17 +236,6 @@
                newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu);
                p->p_estcpu = newcpu;
                resetpriority(p);
-               if (p->p_priority >= PUSER) {
-                       if (p->p_stat == SRUN &&
-                           (p->p_priority / SCHED_PPQ) !=
-                           (p->p_usrpri / SCHED_PPQ)) {
-                               remrunqueue(p);
-                               p->p_priority = p->p_usrpri;
-                               setrunqueue(p);
-                       } else
-                               p->p_priority = p->p_usrpri;
-               }
-               SCHED_UNLOCK(s);
        }
        uvm_meter();
        wakeup(&lbolt);
@@ -278,8 +253,6 @@
        unsigned int newcpu = p->p_estcpu;
        fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
 
-       SCHED_ASSERT_LOCKED();
-
        if (p->p_slptime > 5 * loadfac)
                p->p_estcpu = 0;
        else {
@@ -334,6 +307,7 @@
        p->p_cpu = sched_choosecpu(p);
        setrunqueue(p);
        p->p_stats->p_ru.ru_nivcsw++;
+    generate_deadline(p);
        mi_switch();
        SCHED_UNLOCK(s);
 }
@@ -344,7 +318,6 @@
        struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
        struct proc *p = curproc;
        struct proc *nextproc;
-       struct rlimit *rlim;
        struct timeval tv;
 #ifdef MULTIPROCESSOR
        int hold_count;
@@ -372,38 +345,16 @@
         * process was running, and add that to its total so far.
         */
        microuptime(&tv);
-       if (timercmp(&tv, &spc->spc_runtime, <)) {
-#if 0
-               printf("uptime is not monotonic! "
-                   "tv=%lu.%06lu, runtime=%lu.%06lu\n",
-                   tv.tv_sec, tv.tv_usec, spc->spc_runtime.tv_sec,
-                   spc->spc_runtime.tv_usec);
-#endif
-       } else {
+       if (timercmp(&tv, &spc->spc_runtime, >)) {
                timersub(&tv, &spc->spc_runtime, &tv);
                timeradd(&p->p_rtime, &tv, &p->p_rtime);
        }
 
        /*
-        * Check if the process exceeds its cpu resource allocation.
-        * If over max, kill it.
-        */
-       rlim = &p->p_rlimit[RLIMIT_CPU];
-       if ((rlim_t)p->p_rtime.tv_sec >= rlim->rlim_cur) {
-               if ((rlim_t)p->p_rtime.tv_sec >= rlim->rlim_max) {
-                       psignal(p, SIGKILL);
-               } else {
-                       psignal(p, SIGXCPU);
-                       if (rlim->rlim_cur < rlim->rlim_max)
-                               rlim->rlim_cur += 5;
-               }
-       }
-
-       /*
         * Process is about to yield the CPU; clear the appropriate
         * scheduling flags.
         */
-       atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR);
+       atomic_clearbits_int(&spc->spc_schedflags, SPCF_SHOULDYIELD);
 
        nextproc = sched_chooseproc();
 
@@ -435,9 +386,8 @@
         * be running on a new CPU now, so don't use the cache'd
         * schedstate_percpu pointer.
         */
-       KASSERT(p->p_cpu == curcpu());
-
-       microuptime(&p->p_cpu->ci_schedstate.spc_runtime);
+    if (p->p_cpu == curcpu())
+        microuptime(&p->p_cpu->ci_schedstate.spc_runtime);
 
 #ifdef MULTIPROCESSOR
        /*
@@ -524,21 +474,31 @@
 
 /*
  * Compute the priority of a process when running in user mode.
- * Arrange to reschedule if the resulting priority is better
- * than that of the current process.
  */
 void
 resetpriority(struct proc *p)
 {
-       unsigned int newpriority;
+       p->p_usrpri = min(((20 + (p->p_p->ps_nice))) << 1, MAXPRI);
+}
 
-       SCHED_ASSERT_LOCKED();
+/*
+ * Generate a new virtual deadline for a process. The deadline is a soft
+ * one and has no purpose besides being used for choosing the next process
+ * to run. Also resets the number of round robin ticks available to the
+ * process.
+ */
+void
+generate_deadline(struct proc *p) {
+    unsigned int prio_ratio = ((20 + (p->p_p->ps_nice)) << 5) + 1;
+    struct timeval offset = {
+        .tv_sec = 0,
+        .tv_usec = (rrticks_init * prio_ratio)
+    };
+    struct timeval now;
+    microuptime(&now);
 
-       newpriority = PUSER + p->p_estcpu +
-           NICE_WEIGHT * (p->p_p->ps_nice - NZERO);
-       newpriority = min(newpriority, MAXPRI);
-       p->p_usrpri = newpriority;
-       resched_proc(p, p->p_usrpri);
+    timeradd(&now, &offset, &(p->deadline));
+    p->p_rrticks = rrticks_init;
 }
 
 /*
@@ -559,12 +519,8 @@
 void
 schedclock(struct proc *p)
 {
-       int s;
-
-       SCHED_LOCK(s);
        p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
        resetpriority(p);
        if (p->p_priority >= PUSER)
                p->p_priority = p->p_usrpri;
-       SCHED_UNLOCK(s);
 }
Index: sys/sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.143
diff -u -r1.143 proc.h
--- sys/sys/proc.h      20 Sep 2011 10:07:37 -0000      1.143
+++ sys/sys/proc.h      12 Oct 2011 12:42:54 -0000
@@ -237,6 +237,8 @@
 
 
        /* scheduling */
+    struct timeval deadline; /* virtual deadline used for scheduling */
+    int  p_rrticks; /* number of ticks this process is allowed to stay on a 
processor */
        u_int   p_estcpu;        /* Time averaged value of p_cpticks. */
        int     p_cpticks;       /* Ticks of cpu time. */
        fixpt_t p_pctcpu;        /* %cpu for this process during p_swtime */
Index: sys/sys/sched.h
===================================================================
RCS file: /cvs/src/sys/sys/sched.h,v
retrieving revision 1.29
diff -u -r1.29 sched.h
--- sys/sys/sched.h     7 Jul 2011 18:00:33 -0000       1.29
+++ sys/sys/sched.h     12 Oct 2011 12:42:54 -0000
@@ -87,8 +87,6 @@
 #define CP_IDLE                4
 #define CPUSTATES      5
 
-#define        SCHED_NQS       32                      /* 32 run queues. */
-
 /*
  * Per-CPU scheduler state.
  * XXX - expose to userland for now.
@@ -99,7 +97,6 @@
        u_int spc_schedticks;           /* ticks for schedclock() */
        u_int64_t spc_cp_time[CPUSTATES]; /* CPU state statistics */
        u_char spc_curpriority;         /* usrpri of curproc */
-       int spc_rrticks;                /* ticks until roundrobin() */
        int spc_pscnt;                  /* prof/stat counter */
        int spc_psdiv;                  /* prof/stat divisor */ 
        struct proc *spc_idleproc;      /* idle proc for this cpu */
@@ -107,9 +104,6 @@
        u_int spc_nrun;                 /* procs on the run queues */
        fixpt_t spc_ldavg;              /* shortest load avg. for this cpu */
 
-       TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS];
-       volatile uint32_t spc_whichqs;
-
 #ifdef notyet
        struct proc *spc_reaper;        /* dead proc reaper */
 #endif
@@ -119,18 +113,16 @@
 #ifdef _KERNEL
 
 /* spc_flags */
-#define SPCF_SEENRR             0x0001  /* process has seen roundrobin() */
-#define SPCF_SHOULDYIELD        0x0002  /* process should yield the CPU */
-#define SPCF_SWITCHCLEAR        (SPCF_SEENRR|SPCF_SHOULDYIELD)
-#define SPCF_SHOULDHALT                0x0004  /* CPU should be vacated */
-#define SPCF_HALTED            0x0008  /* CPU has been halted */
+#define SPCF_SHOULDYIELD        0x0001  /* process should yield the CPU */
+#define SPCF_SHOULDHALT                0x0002  /* CPU should be vacated */
+#define SPCF_HALTED            0x0004  /* CPU has been halted */
 
-#define        SCHED_PPQ       (128 / SCHED_NQS)       /* priorities per queue 
*/
 #define NICE_WEIGHT 2                  /* priorities per nice level */
-#define        ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - SCHED_PPQ)
+#define        ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX)
 
 extern int schedhz;                    /* ideally: 16 */
 extern int rrticks_init;               /* ticks per roundrobin() */
+extern struct cpuset sched_idle_cpus;
 
 struct proc;
 void schedclock(struct proc *);
@@ -146,20 +138,22 @@
 void cpu_switchto(struct proc *, struct proc *);
 struct proc *sched_chooseproc(void);
 struct cpu_info *sched_choosecpu(struct proc *);
-struct cpu_info *sched_choosecpu_fork(struct proc *parent, int);
+struct cpu_info *sched_choosecpu_fork(struct proc *);
 void cpu_idle_enter(void);
 void cpu_idle_cycle(void);
 void cpu_idle_leave(void);
 void sched_peg_curproc(struct cpu_info *ci);
 
+void generate_deadline(struct proc *);
+
 #ifdef MULTIPROCESSOR
 void sched_start_secondary_cpus(void);
 void sched_stop_secondary_cpus(void);
 #endif
 
-#define curcpu_is_idle()       (curcpu()->ci_schedstate.spc_whichqs == 0)
+#define curcpu_is_idle() (cpuset_isset(&sched_idle_cpus, curcpu()))
 
-void sched_init_runqueues(void);
+void sched_init_runqueue(void);
 void setrunqueue(struct proc *);
 void remrunqueue(struct proc *);

pgpWr0F1jeVGk.pgp
Description: PGP signature

Scheduler improvements

Reply via email to